#!/usr/bin/perl ################################################################### # Building a Web Site Index by Keywords to Search # with JavaScript SiteSearch Engine, see: # http://members.tripod.com/~schick/sitesuche.html # # Version 2.2 29May99, Perl Version 5.001 Win32 # written by Arnold Schick (Copyright) # # This Perl script is free for domenstic use # and is provided "as it is" (using at your own risk) # # This Perl script key.cgi is to copy into # your cgi-bin directory on your local machine # (file: http://members.tripod.com/~schick/key.txt) # AND the related Perl script: spage.cgi # (file: http://members.tripod.com/~schick/spage.txt) # # The HTML document with the FORM element for these # scripts are to copy into your Web subdirectory # (file: http://members.tripod.com/~schick/collect.html) # # Names: Perl script : key.cgi # Perl script : spage.cgi # HTML document : collect.html # call: /cgi-bin/key.cgi from HTML document # # note: this script create files which are to # name within a FORM of that related # HTML document, deleting of files is # also possible. # # # note: Don't place this script into a public cgi-bin directory # on a public Web server, it's a maintain tool of # your own Web Site files on a local machine (PC) # # if you use Apace Web server on WIN32, use the directive # #!perl in place of #!/usr/bin/perl at top of this script # # # For offline using, see below (nothing for beginners) ################################################################### $now = time(); $now = localtime($now); @date = split(/ /,$now); $content_length = $ENV{'CONTENT_LENGTH'}; read(STDIN, $posted, $content_length); $posted =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg; $posted =~ s/\+/ /g; # for offline use: uncomment the following line and try that Perl script on command line, like: c:\perl5\perl.exe key.cgi > content.html #$posted="dirname=d:\\my_web_root&sort=newest&path=no&index=on&indexfile=c:\\tmp\my_index.js&home=/&ftype=htm&ftype=html"; # the parameters can be changed for your own conditions ie. parameters: dirname=... indexfile=... home=... ftype=... ftpye=... # path uses the values: no (no path), server (takes value from home), or local (takes value from dirname) for file listing # sort uses the values: newest, abc, zyx, or oldest for file and subdirectory listing @fields = split (/&/, $posted); @ftype=(); foreach $name (@fields) { ($n, $w) = split(/=/, $name); if ($n eq 'ftype') { @ftype = (@ftype, $w); } else { $tokens{$n} = $w; } } @allInfos=(); if ( ($tokens{'temp_file'}) and (-e $tokens{'temp_file'}) ) { #find selected information from the Start Page open(INFILE, "<$tokens{'temp_file'}"); comp; @allInfos = ; close(INFILE); if (@allInfos) { $c=0; foreach $line (@allInfos) { $line =~ tr/\n\r\f\t\b/ /s; $line =~ tr/ //d; ($if, $lf, $sp) = split(/,/, $line); push(@indexfiles, $if); push(@localfiles, $lf); push(@serverpath, $sp); $c++; } push(@indexfiles, $tokens{'indexfile'} ); push(@localfiles, $tokens{'dirname'} ); push(@serverpath, $tokens{'home'} ) ; @indexfiles = reverse sort keys %{{map {$_,1} @indexfiles }}; #remove duplicates @localfiles = reverse sort keys %{{map {$_,1} @localfiles }}; #remove duplicates @serverpath = reverse sort keys %{{map {$_,1} @serverpath }}; #remove duplicates } open(OUTFILE, ">$tokens{'temp_file'}"); #save selected information from the Start Page for ($b=0; $b <= $c; $b++) { print OUTFILE "$indexfiles[$b],", "$localfiles[$b],", "$serverpath[$b]\n"; } close(OUTFILE); } elsif ($tokens{'temp_file'}) { open(OUTFILE, ">$tokens{'temp_file'}"); #save selected information from the Start Page print OUTFILE "$tokens{'indexfile'},", "$tokens{'dirname'},", "$tokens{'home'}\n"; print OUTFILE ",,"; close(OUTFILE); } print "Content-type: text/html", "\n\n"; print "\n\n"; print "\n"; print "\n"; print "Files in Subdirectory $tokens{'dirname'}\n"; print "\n"; if ( (!$tokens{'indexfile'}) and ($tokens{'index'} or $tokens{'appindex'}) ) { print "missing Index File
\n"; goto fin; } if ($tokens{'delindex'}) { open(OUTFILE, ">$tokens{'indexfile'}") || (print "Can't open $tokens{'indexfile'}: $!\n"); print OUTFILE "\n"; close(OUTFILE); } if ($tokens{'all'}) { @ftype=(); $ftype[0] = "*"; } print "\n"; print "Collection of Files in $tokens{'dirname'} by selected Filetype: @ftype
\n"; print "\n"; $tokens{'dirname'} =~ s/\\$|\/$//; $tokens{'dirname'} =~ s/\:$/\:\\/; if ($tokens{'dirname'} and -d "$tokens{'dirname'}") { @files =(); opendir(DIR,$tokens{'dirname'}) || (print "Can't open $tokens{'dirname'}: $!\n"); @files = readdir DIR; closedir(DIR); } else { print "missing Subdirectory Name or given Name $tokens{'dirname'} is not a Subdirectory
\n"; } if ($tokens{'dirname'} !~ m/.\\$/) { $tokens{'dirname'} = ($tokens{'dirname'} . "\\"); } # define an array for all wanted filetypes and grep $latest = pop @ftype; @typelist = (); foreach $m ( @ftype ) { (@typelist) = (@typelist, "/\\.$m\$\/i ||"); } (@typelist) = (@typelist, "/\\.$latest\$\/i"); @dateien = (); foreach $m ( @files ) { # find files for listing and find subdirectoríes if ((-f "$tokens{'dirname'}$m") and (grep { eval "@typelist" } $m) ) { (@dateien) = (@dateien, $m); } if ((-d "$tokens{'dirname'}$m") and ($m ne ".") and ($m ne ".." )) { (@dirs) = (@dirs, $m); } } @files = @dateien; if (@files or @dirs) { # prepare something to sort by date, use hashes fset and dset for date sorting foreach $m (@files) { $p = "$tokens{'dirname'}" . "$m"; (($fset{$m}) = (stat($p))[9]) or (print "can't stat file $m
\n"); } foreach $m (@dirs) { $p = "$tokens{'dirname'}" . "$m"; (($dset{$m}) = (stat($p))[9]) or (print "can't stat directory $m
\n"); } if ($tokens{'sort'} =~ m/newest/i) { @files = reverse sort{ $fset{$a} <=> $fset{$b} } keys %fset; @dirs = reverse sort{ $dset{$a} <=> $dset{$b} } keys %dset; } if ($tokens{'sort'} =~ m/oldest/i) { @files = sort{ $fset{$a} <=> $fset{$b} } keys %fset; @dirs = sort{ $dset{$a} <=> $dset{$b} } keys %dset; } if ($tokens{'sort'} =~ m/abc/i) { @files = sort{ uc($a) cmp uc($b) } @files; @dirs = sort{ uc($a) cmp uc($b) } @dirs; } if ($tokens{'sort'} =~ m/zyx/i) { @files = reverse sort{ uc($a) cmp uc($b) } @files; @dirs = reverse sort{ uc($a) cmp uc($b) } @dirs; } if (!$tokens{'home'} == "") { if ($tokens{'home'} !~ m/.\/$/) { $tokens{'home'} = ($tokens{'home'} . "/"); } if ($tokens{'home'} !~ m/^\//) { $tokens{'home'} = ("/" . $tokens{'home'}); } $tokens{'home'} =~ s/\/\//\//g; } if ($tokens{'path'} =~ m/local/i) { $path = $tokens{'dirname'}; } if ($tokens{'path'} =~ m/server/i) { $path = $tokens{'home'}; } if ($tokens{'path'} =~ m/no/i) { $path = ""; } if ($tokens{'appindex'}) { open(INFILE, "<$tokens{'indexfile'}") || ( print "Can't open Index File $tokens{'indexfile'}: $!, new one created
\n" ); while () { chomp; if ( index($_,"new makeArray(") ) { $last_index = substr($_, index($_,"(")+1, index($_,")") - index($_,"(")-1); last; } } close(INFILE); $last_index++; } if ($tokens{'index'}) { open(OUTFILE, ">$tokens{'indexfile'}") || (print "Can't open Index File $tokens{'indexfile'}: $!\n"); print OUTFILE "\/\/ JavaScript SiteSearch Index File, created: @date\n"; close(OUTFILE); $tokens{'appindex'} = "on"; $last_index = 1; } print "$tokens{'sort'} first
Date: @date

to navigate to these pages save this page to $tokens{'dirname'} and load it into the Web Browser
or an HTML Editor to maintain titles or keywords within the \<HEAD></HEAD> section.

\n"; print "\n"; if (@dirs) { print "\n"; $a = 1; foreach $i ( @dirs ) { if ($a < 10) { $an = "\  \  " . $a; } else { if (($a > 9) and ($a < 100)) { $an = "\  " . $a; } else { $an = $a; } } $p = "$tokens{'dirname'}" . "$i"; (($mtime) = (stat($p))[9]) or (print "no subdirectory stats
\n"); print "\n"; $a++; } } if (@files) { print "\n"; $a = 1; foreach $i ( @files ) { if ($a < 10) { $anz = "\  \  " . $a; } else { if (($a > 9) and ($a < 100)) { $anz = "\  " . $a; } else { $anz = $a; } } $p = "$tokens{'dirname'}" . "$i"; (($mtime) = (stat($p))[9]) or (print "no file stats
\n"); print "\n"; $a++; } } print "
    Subdirectorylast modified
$an. $path$i  ", scalar localtime $mtime, "

    Filename
last modified
$anz. $path$i  ", scalar localtime $mtime, "

\n"; } else { print "no files found

"; goto fin;} $k += $last_index; $anzahl = 0; $anz = 0; $a = 0; $b = 0; @lines = (); @allkws = (); if ($tokens{'index'} or $tokens{'appindex'}) { print "

\n"; } print "    List of Files with TITLE and HEADer Information:
\n"; print "\n"; foreach $i ( @files ) { if ($tokens{'index'} or $tokens{'appindex'}) { $checkbox = ""; } else { $checkbox = "==>"; } $fname = $tokens{'dirname'} . $i; $anzahl ++; if ($anzahl < 10) { $anz = "\ \ \ \ " . $anzahl; } else { if (($anzahl > 9) and ($anzahl < 100)) { $anz = "\ \ " . $anzahl; } else { $anz = $anzahl; } } if (-T $fname) { open (EINGABE, $fname) || print "can't open $i: $!\n"; chomp; @page = ; close(EINGABE) ; $ln = 0; foreach $line (@page) { # process each line within the HEADer $pos = index(uc($line), ""); if ($ln gt -1 ) { $a++; } $kwline =~ tr/ \n\r\f\t\b/ /s; # change some whitespaces to one blank $kwline =~ s/'|"|>|,$|^\s+|\s+$|\|,$|^\s+|\s+$|\ -1) { $pos1 = index(uc($line), ""); $pos2 = index(uc($line), ""); if ($pos1 > -1) { $title = substr($line,$pos1+7,$pos2-$pos1-7); $title =~ tr/ \n\r\f\t\b/ /s; # change some whitespaces to one blank $title =~ s/^\s+|\s+$|\"//g; # change whitespaces at the beginning and at the end, also dquotes } } # looking for the end tag of HEAD: if ($pos > -1) { if (!@keyw) { @keyw = ("no keywords"); } (@lines) = ( @lines, $title ); if (!$title) { $title = "no title"; } print "\n"; print "\n"; foreach $m (@keyw) { $m =~ s/^\s+|\s+$//g; # change whitespaces at the beginning and at the end $m =~ s/, | ,|,/\",\"/g; # change commas between keywords with quotes for index file } if ($title !~ m/no title/s) { # indexing if title pesent if ($tokens{'appindex'}) { open(OUTFILE, ">>$tokens{'indexfile'}") || (print "Can't open Index File $tokens{'indexfile'}: $!\n"); print OUTFILE "indexedPage[$k]=new index(\"$tokens{'home'}$i\",\"$title\",\"@keyw\");\n"; close(OUTFILE); } $k++; # counts index on index file } (@allkws) = (@allkws, @keyw); @keyw = (); $title= ""; last; # HEADer processed, jump to the next file } } if ($pos == -1) { print "\n"; } } } print "
$checkboxFile $anz: Page Title: $title
==>File $anz: Keywords: @keyw
==>File $anz: has no HEAD section
\n"; foreach $key (@allkws) { @key = split (/,/ , $key); (@keys) = ( @keys, @key); } if ($tokens{'appindex'}) { @allLines=(); open(INFILE, "<$tokens{'indexfile'}") || (print "Can't open Index File $tokens{'indexfile'} $!\n"); chomp; @allLines = ; close(INFILE); if ($allLines[0] =~ m/^var indexed/ig) { $f = shift(@allLines); } if ($allLines[1] =~ m/^var indexed/ig) { $f = shift(@allLines); } foreach $line (@allLines) { # clean up read file $line =~ s/^\s+//; # remove whitespaces at beginning $line =~ s/\\+/\\\\/g; # change all backslahes with double-backslashes } $number = $k-1; open(OUTFILE, ">$tokens{'indexfile'}") || (print "Can't open Index File $tokens{'indexfile'} $!\n"); print OUTFILE "var indexedPage = new makeArray($number);\n"; close(OUTFILE); open(OUTFILE, ">>$tokens{'indexfile'}") || (print "Can't open Index File $tokens{'indexfile'} $!\n"); print OUTFILE "@allLines\n"; close(OUTFILE); } $n = $k - $last_index; $an=@dirs; if ($an == 1) { $an = "1 Subdirectory"; } else { $an = "$an Subdirectories"; } if ($anzahl == 1) { $anzahl = "1 File"; } else { $anzahl = "$anzahl Files"; } if ($n == 1) { $n = "1 Web document"; } else { $n = "$n Web documents"; } if ($a == 1) { $a = "1 Web document"; } else { $a = "$a Web documents"; } if ($b == 1) { $b = "1 keyword"; } else { $b = "$b keywords"; } print "
$an found
$anzahl found
     $n with titles found
     $a with total $b found

\n"; if ($tokens{'appindex'}) { print "     $n with titles indexed, $n added to $tokens{'indexfile'}.
\n"; } if ($tokens{'delindex'}) { print "existing JavaScript SiteSearch Index File $tokens{'indexfile'} deleted -if $tokens{'indexfile'} was there
\n"; } if ($tokens{'index'}) { print "new JavaScript SiteSearch Index File $tokens{'indexfile'} created, existing file was deleted -if $tokens{'indexfile'} was existing
\n"; } if ($tokens{'appindex'}) { print "continue with index: $last_index

\n"; } if ( (!$tokens{'appindex'}) and (!$tokens{'index'}) ) { print "
\nYou can save this page on the Web browser (with Save As on File menu)
to: $tokens{'dirname'}content.html
and that is then your Table of Content in that work subdirectory.
\n"; } if ($tokens{'index'} or $tokens{'appindex'}) { print "
The produced JavaScript SiteSearch Index File is saved to $tokens{'indexfile'}
and can be included into this JavaScript SiteSearch source.
\n"; &create_JavaScript_Search; print "
\n"; } print "

created with Perl Script written by Arnold Schick

\n"; fin : ; print "back\n"; print "\n"; # finally, clean up, be sure for the next run: $posted=""; sub create_JavaScript_Search { $spage = $tokens{'indexfile'}; # find dirname: $spage =~ s/.\w+$//i; # remove all chars from end until . (filetype) $spage =~ s/.\.$//; # remove period $spage =~ s/.\w+$//i; # remove filename $spage = $spage . "\\search.html"; # append new filename print "
Or Take the checked files and create the Site Search Page with this tool:
\n"; if ($last_index > 1) { print "\n"; } print "\n"; print "      Search Page:        
\n"; print "
\n      Results to Frame:  \n"; print "   (Name of Frame -if frames are inuse)
\n"; print "
\n"; print "
\n"; } ################ End of this Perl Script #############################################