#!/usr/bin/perl ############################################## ############################################## # SCRIPT NAME: Page_Search_and_Display.cgi # FUNCTION: A search engine and display script ############################################## # MINE: Molecular INformation Explorer # Copyright 2000 Dawn Field. All rights reserved. # The CGI-PERL scripts belonging to MINE # may be used and modified freely, but I do # request that this copyright notice remain attached # to this file/source code. If you make modifications # please do not distribute unless # you fully document the modifications. # and an END statement to close a table so the error message will print to screen if the # program fails while trying to write the contents of a table END { &table_bottom(); } use CGI; require "CGI-MINE.pl"; ################# # START EACH MINE CGI SCRIPT ################# # this redirects the error messages to the user's screen # and is useful for debugging CGI scripts! open (STDERR, ">&STDOUT"); # print errors to screen $| = 1; # flush the print buffer continuously # make a new query object using CGI.pm module $query = new CGI; # print the required header and start the web page print $query->header; print $query->start_html(-title=>'MINE Search Engine', -BGCOLOR=>'white' ); # Each time a script is invoked for the first time (or $action undef), # log the visit in the custom MINE server log (see CGI-MINE.pl) # (put this after the header incase an error message is printed) # check value of $action $action = $query->param('action'); if ($action eq undef) {&log();} # SECURITY VARIABLE $write_permission = 1; # set to 1 to allow saving of files to disk # use a function in CGI-MINE.pl to see if we should allow # this IP full permission to submit files # even if $write_permission = 0; &check_permission($write_permission); ####### # START THE WEBPAGE ####### # print the MINE menu &menu; # clear out the temp_output file for the .results of the search -UNLESS the action is to copy this file if ($action ne "Copy") { # OPEN THE temp.html output file open (TEMP, ">temp_output") || die "can't open the file temp_output"; system (`chmod 644 temp_output`); } ############ # START FORM ############ &table_top(); print $query->startform(); # make a url link to the MINE_Preferences file $source = "MINE_Preferences"; # START must be aligned LEFT print $table_start = < START ############ PRINT FORM DISPLAY OPTIONS ############ print "Display Options

"; print "Select a search display option:

"; # name all the methods for changing format that are handled below in the script @display_formats = ( 'file_names','entries', 'defined_fields', 'fasta', 'report'); # make nice labels corresponding to each of the values in @display_formats %labels = ( "file_names" => "File names", "entries"=> "Contents", "defined_fields"=> "Defined fields", "report" => "Report Format", "fasta"=> "Fasta Format" ); # make options menu (only one option can be selected by user) print $query->popup_menu(-name=>'display_format', -values=>[@display_formats], -labels=>\%labels, -default=>'defined_fields'); print "

"; # print all the values in the database # $ext = ".db"; # &get_files(); # @files = @database; $temp_query = new CGI; # get the fields from the preference file that makes the submit and edit forms # open a file and read in the information to create customized menus # USER's PREFERENCES! # preferences file is defined by these rules: # all entries separated by || (record separator) # the first entry on each line is a variable and can have no spaces, other entries can # value of button || descriptive line to go on menu undef(@prefs); # open the MINE_Preferences file first: this is the one that might be edited # if it can't be found, open the default file MINE_Preferences.default &check_pref_default(); print $message; open (IN, "$pref_file") || die "Sorry, can't open the file $pref_file for reading: $!"; LINE: while ($line = ) { chomp($line); # if a line is read that starts with a #, it is a comment, skip it # skip line if it's a comment next LINE if $line =~ /#/; next LINE if $line =~ /(\s)*#/; push (@prefs, $line); } foreach $prefs (@prefs) { @button_value = split(/\|\|/, $prefs); push(@names, $button_value[0]); # here make a hash for the labels } push(@names, "seq"); push(@names, "gc"); push(@names, "ga"); push(@names, "seq_len"); # TEXT must align LEFT print $var = < in the $database_name: TEXT print "

"; print $query->checkbox_group(-name=>'defined_fields', -values=>[@names], -linebreak=>'true', -labels=>\%labels); print "
"; # print a 'clear' button at end of form: script self-calls print $query->defaults('Clear'); print ""; ############ PRINT FORM SEARCH OPTIONS ############ print "Advanced Search
"; print "

Search Options See the preferences for creating this menu in the preferences file: $source

"; # write a form to get a user input sequence print $query->startform(); ############### PRINT FORM DIRECTIONS FOR SEARCHING ############### # TEXT must align LEFT print $var = <

field (e.g. file name) comparison (e.g. contains) pattern (e.g. IVET)

Search queries can be joined together to form progressively more complex queries. For example this search would only search through "FILES whose name contained the letters SBW" AND would only match those entries that contain "SEQUENCES of a LENGTH equal or longer than 400 bp" AND "G+C contents that are below 60%":

1. the_file_names contain SBW
2. Click the Search, Search and Display, or MORE button
3. seq_len >= 400
4. Click the Search, Search and Display, or MORE button
5. gc <= 60
6. Click the Search or Search and Display button to see results
7. Etc. Etc. Click Clear when finished
TEXT ############### END PRINT FORM DIRECTIONS FOR SEARCHING ############### # START must align LEFT print $table_start = < START print "Field
"; print "Comparison
"; print "Pattern
"; print $query->popup_menu(-name=>'search_on', -values=>['the_file_names', @names, 'whole_file'], -labels=>\%labels, -default=>'file'); print "

You can only use the comparisions \'contains\' and \'does_not_contain\' when searching a \"whole file\"."; print ""; @comparisons = ("contains", "does_not_contain", "=", "<=", ">=", "defined", "undefined","seen"); print $query->radio_group(-name=>'comparison', -values=>[@comparisons], -default=>'contains', -linebreak=>'true', -labels=>\%labels); print ""; print $query->textfield(-name=>'pattern', -size=>20, -maxlength=>80); print "

The comparisons \"defined\", \"undefined\" and \"seen\" do not take a pattern. All patterns are case sensitive. (\"Fields with the symbol \"?\" count as \"defined\" fields.)"; print ""; print "

"; print <\"Search\" = show number of matching files only
\"Search & Display\" = show selected contents of all matching files
\"MORE\" = add another query before searching
\"Clear\" = stop concatenating queries and start afresh
DIRECTIONS # print button to do search print $query->submit('action', 'Search'); # print button to do search print $query->submit('action', 'Search & Display'); # print button for option to add parameters to search print $query->submit('action', 'MORE Search Parameters'); # print a 'clear' button to clean out MORE_archive_file print $query->submit('action', 'Clear Search List'); print ""; # end the table &table_bottom(); print $query->endform(); ################# # END FORM ################# ################# # PROCESS FORM ################ # start a table for formatting &table_top(); # take action on the values sent by the form above $action = $query ->param('action') || undef; # get the fields selected for the 'defined fields' option # query named $temp_query! @defined_fields = $temp_query ->param('defined_fields') || undef; # get the name of the display format selected $display_format = $query ->param('display_format') || undef; # get the search query file, if MAKE NOVEL search, do nothing $search_query_file = $query ->param('search_query_file') || undef; # print "Use $search_query_file

"; if ($search_query_file != "Make a Novel Search Query" && ($search_query_file)) { # copy the contents of this file into the file that contains the active search query open (QUERY, "$search_query_file") || die "can't open your search query file: $search_query_file"; open (MORE, ">MORE_archive_file") || die "can't open the MORE_archive_file!"; while (QUERY) { print MORE $_; } } # get the name of the file to copy results to if it exists $copyfile = $query ->param('copyfile') || undef; # clear the search list in file MORE_archive_file is Clear is selected (delete file) ######## If "Clear" ######## if ($action =~ /Clear/ || $action eq undef) { unlink ("MORE_archive_file"); print "No search queries in Search List

"; undef (@s_parameter); } ######## END If "Clear" ######## ######## If "Search" ######## if ($action eq "Search" || $action eq "Search & Display" || $action =~ /MORE/) { # get the field (whole file or file names option) to search on $search_on = $query ->param('search_on') || undef; # get the comparision to use $comparison = $query ->param('comparison') || undef; # remind us what the comparisions could be # @comparisons = ("defined", "undefined", "contains", "does not contain", "=", "<=", ">="); # get the comparision to use $pattern = $query ->param('pattern') || undef; ######### WRITE THE QUERY TO THE "MORE_archive_file" file # write the parameters to file for saving open (MORE, ">>MORE_archive_file"); $search_parameter = "$search_on $comparison $pattern"; print MORE "$search_parameter\n"; $search_parameter = undef; close (MORE); ######### ######### WRITE THE FULL QUERY TO SCREEN # print to screen the existing search parameters # open for reading only open (MORE, "MORE_archive_file"); # the searches are fast enough that this is printed with the final results undef (@s_parameters); while () { push (@s_parameters, $_); } print ""; close (MORE); ######### undef (@search_results); undef (@keep_search_results); if ($action eq "Search" || $action eq "Search & Display") { ###### Do the SEARCH Starting Here ###### # put a loop around search parameters and put them into the right variables %seen = (); # a hash to keep track of all the repeat motifs found # when using seen, this is the array that groups the files seen into categories of uniqueness undef(@group_file); $ext = ".db"; &get_files(); @search_results = @database; # loop over the search parameters # FOREACH QUERY TAKEN FROM THE "MORE_archive_file" QUERY: foreach $s_parameter (@s_parameters) { if (@keep_search_results) { undef (@search_results); @search_results = @keep_search_results; } ($search_on, $comparison, $pattern) = split (/\s/, $s_parameter); # the search parameters need to be put into the query $query ->param ('search_on', $search_on); $query ->param ('comparison', $comparison); $query ->param ('pattern', $pattern); # get the field (whole file or file names option) to search on $search_on = $query ->param('search_on') || undef; # get the comparision to use $comparison = $query ->param('comparison') || undef; # get the comparision to use $pattern = $query ->param('pattern') || undef; # FOREACH FILE TO BE SEARCHED SEARCH: foreach $file (@search_results) { # open the file to restore the query open (IN, $file) || die "can't open the file $file"; $search_for_in_query = new CGI(IN); # SEARCH WHOLE_FILE if ($search_on eq "whole_file") { if ( $comparison eq "seen" || $comparison eq "defined" || $comparison eq "undefined" || $comparison eq "=" || $comparison eq ">=" || $comparison eq "<=" ) { print "Sorry, you can only use the contains and does not contain comparision options when searching the whole contents of a file

"; &table_bottom(); undef (@search_results); last QUERY; } close (IN); &search_whole_file($file); next SEARCH; } # SEARCH FILE NAMES # If we want to search the file names, we need to append this value to the query if ($search_on eq "the_file_names") { $search_for_in_query ->append(-name=>'the_file_names', -value =>$file); } if ($search_on =~ /db/ || $search_on =~ /html/) { $search_for_in_query ->append(-name=>'files_to_search', -value =>$search_on); } # SEEN if ($comparison eq "seen") { if ($search_for_in_query->param($search_on)) { $seen = $search_for_in_query ->param($search_on); ++$seen{$seen}; $file_names_seen{$seen} = $file_names_seen{$seen}.":$file"; } } # DEFINED if ($comparison eq "defined") { # defined if not ? if ($search_for_in_query ->param($search_on) && $search_for_in_query ->param($search_on) !~ /\?/) { push (@new_search_results, $file); } } # UNDEFINED if ($comparison eq "undefined") { # undefined if blank and not ? if (!$search_for_in_query ->param($search_on) || $search_for_in_query ->param($search_on) =~ /\?/){ push (@new_search_results, $file); } } # CONTAINS if ($comparison eq "contains") { if ($search_for_in_query ->param($search_on) =~ $pattern){push (@new_search_results, $file);} } # DOES NOT CONTAIN if ($comparison eq "does_not_contain") { if ($search_for_in_query ->param($search_on) !~ $pattern){push (@new_search_results, $file);} } # EQUALS - does this work for numbers and text? if ($comparison eq "=") { if ($search_for_in_query ->param($search_on) eq ( $pattern)){push (@new_search_results, $file);} } # >= GREATER THAN - does this work for numbers and text? alphabetic? if ($comparison eq ">=") { if ($search_for_in_query ->param($search_on) >= $pattern){push (@new_search_results, $file);} } # <= LESS THAN - does this work for numbers and text? alphabetic? if ($comparison eq "<=") { if ($search_for_in_query ->param($search_on) <= $pattern){push (@new_search_results, $file);} } } # end foreach $file (@search_results) undef (@search_results); @search_results = @new_search_results; undef (@new_search_results); $no_matches = "no"; $the_match = "$search_on $comparison $pattern"; if (@search_results) { print "

There are ".@search_results." files that match the search query:

\"$the_match\"

"; } else { if (!%seen) { print "

$the_match: No matches found.

"; } $no_matches = "yes"; } undef(@keep_search_results); @keep_search_results = @search_results; } # end if foreach $s_parameter (@s_parameters) { } # end if # ACTUALLY DO THE SEARCH ###### END Do the SEARCH Starting Here ###### } # end if Do Search Now ###### END Search ###### ###### START DISPLAYING SEARCH RESULTS: "SEEN" ###### # AFTER THE SEARCH, print out a list of UNIQUE items SEEN if ($comparison eq "seen" && %seen) { # Print in sorted order the types of motifs seen and their frequencies print "Unique items seen:
"; # this syntax sorts the hash using the 'values' (these are the frequencies of each motif) foreach $key (sort {$seen{$b} <=> $seen{$a} } keys %seen) { $count++; print " $count $key=$seen{$key}
"; } foreach $key (sort %file_names_seen) { if ($key !~ /^:/) {print "

Unique item: $key
";} @file_names_seen = split (/:/,$file_names_seen{$key}); foreach $file_names_seen (@file_names_seen) { print "$file_names_seen
"; } } } ###### CONTINUE DISPLAYING SEARCH RESULTS###### ######## If "Display ALL Entries" && "Search & Display" ######## if ($action eq "Display ALL Entries" || $action eq "Search & Display") { # this means if there are no search results, show all as if displaying the # whole database without a search, what about searches with no hits! if (!@search_results) { $ext = ".db"; &get_files(); @search_results = @database; } if (!@search_results && ($no_matches eq "yes")) {@search_results = undef;} ######## If "Display ALL Entries" ######## if ($action eq "Display ALL Entries") { $ext = ".db"; &get_files(); @search_results = @database; print "The contents of search results @search_results"; } # no temp file for list of file names, or dump to screen if ($display_format ne "entries" && $display_format ne "file_names") { # GIVE THE manager the option to save search results # if ($write_permission) { print "
"; print "Save search results:

"; print "

"; # make a url link to the temp_output output file $source = "temp_output"; print "

Current search results: $source (not an html file, but pure text)

"; print "

"; # write a small form just to ask the user if they'd like to save the results to # a file with a more memorable name # put the file name through a taint check first, no metacharacters allowed!!! # write a form to get a user input file name ############ # START FORM ############ print $query->startform(); print "
"; %labels = ( "html" => "Save file as HTML instead of pure text (will add an html tag PRE to top of file)" ); print $query->checkbox_group(-name=>'html_file', -values=>['html'], -labels=>\%labels ); print "
"; print "
"; print $query->submit('action','Copy'); print " to a permanent file named: ",$query->textfield('copyfile',''),"\n"; print "
"; print " (.results or .results.html
will be added automatically, depending on whether you select to save
as a text file or an html file"; # add optional comments to the results file - this will change how excel sees the file... # end the form print $query->endform(); # } # end if $write_permission = 1 } # end ################# # END FORM ################# print "
"; # reminds us of the display options found in the $display_format variable # @display_formats = ( 'file_names','entries', 'defined_fields', 'fasta', 'report'); # write some specific comments for different types of display options # DISPLAY AS LIST OF FILE NAMES if ($display_format eq "file_names") { # use the &get_log(".db") function from CGI_MINE.pl # to produce links to ALL the existing database files # if there are search results to be display, display only these files if (@search_results && $no_matches eq "no") {&get_log_array(@search_results);} # else if there was a search, but no results, display nothing elsif (@search_results && $no_matches eq "yes") { # do nothing, no matches found } # elseif no search has been done and the whole database should be displayed elsif (!@search_results) { # this option currently doesn't save to file! &get_log(".db");} } # DISPLAY ALL ENTRIES if ($display_format eq "entries") { # use the &get_dump(".db") function from CGI-MINE.pl # to "dump" the contents of ALL the existing database files # if there are search results to be display, display only these files if (@search_results && $no_matches eq "no") { &get_dump_array(@search_results); # this option currently doesn't save to file! } # else if there was a search, but no results, display nothing elsif (@search_results && $no_matches eq "yes") { # do nothing, no matches found } # elseif no search has been done and the whole database should be displayed elsif (!@search_results) {&get_dump(".db")} } # DISPLAY DEFINED_FIELDS, FASTA, REPORT if (($display_format eq "fasta" ||$display_format eq "report" || $display_format eq "defined_fields")&& $no_matches eq "no") { # The two above DISPLAY options are simple # The three below need to loop over all the files in the @search_results # array to pull out information # if the array containing search results exist, use it, otherwise # if it dosn't exist (tested here) pick up all the files in the database if (!@search_results) { @search_results = <*.db>; $ext = ".db"; &get_files(); @search_results = @database; } # to have nice format use PRE here, will tidy it up print "

";

			# print headers of fields once to top of file if "report"
	                if ($display_format eq "report") {
	                        @defined_fields  = $query->param('defined_fields');
	                        print "Matching Files\t*";
	                        print TEMP "Matching Files\t*";
	                        foreach (@defined_fields) {
	                                print "$_\t*";
	                                print TEMP "$_\t*";
	                                }
				}


			foreach $file (@search_results) {
				# open the file to restore the query
       				open (IN, $file) || die "can't open the file $file";
       				$search_query = new CGI(IN);
			        # get the sequence from the file
       				$seq = $search_query->param('seq');

				# OPTION DISPLAY ONLY DEFINED FIELDS
				if ($display_format eq "defined_fields") {
				        # process the analyses selected
					@defined_fields  = $query->param('defined_fields');
					print "\n$file: \n";
				        print TEMP "\n$file:\n";
					foreach $show_field (@defined_fields) {
						if ($search_query->param($show_field)) {
							@get_field = $search_query->param($show_field);
							foreach (@get_field) {
       								print "$show_field = $_\n";
      								print TEMP "$show_field = $_\n";
								}
							}
						else { 
							print "$show_field = NOT ENTERED\n"; 
							print TEMP "$show_field = NOT ENTERED\n";
							} # end if ($search_query->param($show_field))
						} # end foreach $show_field (@defined_fields)
				        }

			# DISPLAY FASTA FORMAT
		        if ($display_format eq "fasta") {
			        # get the id from the file
       				$id = $search_query->param('id') ||$search_query->param('Accession');
			        # get the sequence from the file
       				$seq = $search_query->param('seq');
				# get the definition from the file
       				$def = $search_query->param('def') || $search_query->param('Definition');
				# wrap the sequence into lines with 70 characters
				&wrap_output($seq, (length $seq), $wrapped_seq);
				print ">File: $file ID: $id DEF: $def
$wrapped_seq
"; print TEMP ">File:$file ID: $id DEF: $def\n$wrapped_seq"; } # OPTION FORMAT REPORT if ($display_format eq "report") { $report_count++; print "\n$report_count*$file*"; print TEMP "\n$report_count*$file*"; foreach $show_field (@defined_fields) { if ($search_query->param($show_field)) { @get_field = $search_query->param($show_field); foreach (@get_field) { print "$_*"; print TEMP "$_*"; } } else { print "NOT ENTERED\t"; print TEMP "NOT ENTERED\t"; } # end if ($search_query->param($show_field)) } # end foreach $show_field (@defined_fields) } } # end foreach $file (@search_results) # close the output file temp.html close(TEMP); } # end if ($display_format eq "fasta" ||$display_format eq "report" || $display_format eq "defined_fields") } # end if ($action eq "Display Now" || "Display) # finish the table formatting &table_bottom(); ######## If "Copy" ######## ######################################################### # if COPY results to file is selected, write the content of # the temp.html file to the new file name (a permanent file) ######################################################### if ($action eq "Copy") { &table_top; # this function causes the script to die if the passed name if bad (could use warn later) &clean_name($copyfile); # if the script hasn't died...you will see the below.... # this only exists if a clean version $html_file = $query->param('html_file'); if ($html_file) { open (USER_IN, ">$copyfile.results.html"); print USER_IN "

"; print "The new file has been made: $copyfile.results.html

"; } else { open (USER_IN, ">$copyfile.results"); print "The new file has been made: $copyfile.results

"; } open (TEMP_OUTPUT, "temp_output") || die "can't open the temp_output file"; while () { print USER_IN $_; } &table_bottom(); } ######## END If "Copy" ######## ################# # END PROCESS FORM ################# # PRINT BOTTOM OF EACH WEB PAGE # if $show_source is set to 1 show a link # at the bottom of each script to the source # code - pass the name of this script to the # function in CGI-MINE.pl if ($show_source) { $script_name = $query->script_name(); &source ($script_name); } # attach the MINE copywrite &mine_cp; # finish off the body of the html page print $query->end_html; ################# # END WEBPAGE ################# ##################### # FUNCTIONS ##################### sub search_whole_file { $file = @_[0]; # IN file handle contains the file to search open (IN, $file) || die "can't open the file $file"; my $contents = ""; while () { $contents .= $_; } close (IN); # CONTAINS if ($comparison eq "contains") { if ($contents =~ /$pattern/i){push (@new_search_results, $file);} } # DOES NOT CONTAIN if ($comparison eq "does_not_contain") { if ($contents !~ /$pattern/i){push (@new_search_results, $file);} } }