#!/usr/bin/perl ############################################################################### # # IC-SiteSearch 1.7 # # Program name: IC-SiteSearch # Program version: 1.7 # Program name defined: 'Internet Coders Site Search' version 1.7 # Program purpose: Providing a way for customers to search your website. # # # Written by: shawn@internetcoders.com # Downloaded at: www.internetcoders.com # Date released: 05/27/2002 # Last modified: never # # System requirements: Linux,Perl # # Liability: InternetCoders.com and or all individuals of InternetCoders.com # are in no way liable for anything that this program may do while # in use modified/unmodified. You the person choosing to run this # program take full responsibilites for this code and everything # it does. It's simple, you don't want to take responsibility for # this code, then don't use it # # Support: If you have any questions about this program please refer to # the documentation/instructions that came with the program. Since # this program was free there is no e-mail or phone support. However # support is available if you pay for it, or if you pay for cusomizations. # To pay for support or cusomizations please visit our website at # http://www.internetcoders.com # # Distribution: Since you did not write this code, you are not allowed to distribute # it modified or unmodified. If you want someone to have this code then # tell them to download it from InternetCoders.com, if it is # not on the InternetCoders.com website at that time then that # person is out of luck and will not be able to download it until # we post it again, unless you get permission from InternetCoders.com # # Modifications: You are allowed to fully modify any of this code you like. Please # refer to the Distribution section above before giving away copies # of this code whether modified or unmodified. # ############################################################################### ############################################################################### # PROGRAM VARIABLES TO CONFIGURE BELOW ############################################################################### #This should be the directory/path that you would like to have this program search. #Put the exact path starting with the root directory /. #Be sure to add a / at the end or else the program will not display the found #results from the search properly. $searchDirectory = '/home/httpd/cliff69/nobucks.com/'; #This should point to the same directory as above only as it would be typed in a #web browser as a url. Point this url to the same directory that is being searched. #This will be used to create a link to the pages being displayed by the search results. #Be sure to add the trailing / at the end or else you the program will not display the found #results from the search. Be sure to add a / at the end. $hyperLink = 'http://www.nobucks.com/'; #This is the color that you would like the hypertext links to be. These are the links #that point to the pages returned from the search results. $hypertextLinkColor = 'blue'; #This is where you specify the types of files that you would like to have searched by this #program. Seperate your extensions with a pipe symbol which looks like this | . $searchFileTypes = '.html|.htm|.txt'; #This is the width of the search text field. Put the width that you want this text field #to be displayed as. $formFieldSize = '25'; #This is the text that you would like to have next to the search field box. The #default word that shows up is Search. $searchFieldText = 'Search Site '; #To exclude the html tags from being searched set this variable to 1. If you want #this program to search the html tags as well then set this variable to 0. Keep in #mind that html tags contain words and this may throw off your search results. Also #keep in mind that if you choose to not search the html and you have missing html tags #then your search may not be as accurate either. Recommended that you set this to 0, don't #remove the html from the search. Most people have missing html tags and this may rip the #html incorrectly. #1 turns html remove on. 0 turns remove off. $removeHtml = '0'; #Point this to the header file if you have one that you would like to have displayed. #Leave this blank if you do not have a header file to be displayed. $header = ''; #Point this to the footer file if you have one that you would like to have displayed. #Leave this blank if you do not have a footer file to be displayed. $footer = ''; ############################################################################### # PARSE INCOMING DATA AND FIGURE OUT WHAT TO DO WITH IT ############################################################################### &PARSE; print"Content-type:text/html\n\n"; if($form{search} || $form{submit}){&SEARCH_FILES;} else{&DISPLAY_SEARCH_FIELD;} ############################################################################### # PROGRAM SUBROUTINES BELOW ############################################################################### sub DISPLAY_SEARCH_FIELD{ print qq~ document.write("
"); document.write("$searchFieldText"); document.write(""); document.write("
"); ~; } sub SEARCH_FILES{ #display the header open(R, "$header"); @R = ; close R; print @R; opendir(DIR, "$searchDirectory") || ERROR("ERROR: Unable to open the directory:'$searchDirectory' for searching. Please make sure that the path to the directory is correct."); @files = readdir(DIR); close DIR; foreach $line(@files){ #get the files extension $ext=$line; $ext =~ s|.*(\..*)|$1|g; #save the files with a valid extension if($ext =~ /$searchFileTypes/){ push(@validFiles, "$line"); } } #replace all spaces in the search string with pipes, this is the or symbol #this will allow for multiple word searching instead of exact string search #also add \b for word boundry control. This is used for a higher accuracy. This #will stop partial word matching and make sure that it is only matched by whole words. @searchWords = split(/\s+/, $form{search}); foreach $line(@searchWords){ $searchString .= "\\b$line\\b\|"; } #remove begining and ending pipes since we don't want to match nothing "" $searchString =~ s/^\|+//g; $searchString =~ s/\|+$//g; #number of files searched if(@validFiles){$numSearched = $#validFiles;$numSearched++;}#remember arrays start at 0 so increement one #search valid files foreach $line(@validFiles){ open(R, "$searchDirectory$line"); @R = ; close R; $timesFound=0; foreach $nestedLine(@R){ #remove html if requested if($removeHtml eq 1){ $nestedLine =~ s/<(.|\n)*>?//g; } if($nestedLine =~ /$searchString/i){ $timesFound++; } } #save all results that returned positive if($timesFound != 0){ push(@fileMatches, "$timesFound\|$line"); } } #sort results, set the most positive results to the top of the list, and #the least positive resutls to the bottom of the list. @fileMatches = sort{lc($a) cmp lc($b)|$a<=>$b}@fileMatches; @fileMatches = reverse(@fileMatches); #print search box print""; #if there was no results then print this message if(!@fileMatches){ print qq~ searched $numSearched page(s)
No matches were found.


\n ~; }else{ #if there was positive results found then print the results print qq~ searched $numSearched page(s)
Your search returned the following pages.


\n ~; foreach $line(@fileMatches){ ($numberResults,$fileName)=split(/\|/, $line); print"Words matched:$numberResults File:$fileName
\n"; } } #display the header open(R, "$footer"); @R = ; close R; print @R; } #This is the error subroutine sub ERROR{ print"@_"; exit; } sub PARSE{ for($i=0;$i<2;$i++){ ($i eq 0)?($f=):($f=$ENV{QUERY_STRING}); @f=split(/\&/, $f); foreach $l(@f){ ($n,$v)=split(/\=/, $l); $v=~s|\+| |g; $v=~s|%([a-fA-F0-9][a-fA-F0-9])|pack("C", hex($1))|ge; $v =~ s///g; $form{$n}=$v; } } }