#!/usr/bin/perl ############################################## ############################################## # SCRIPT NAME: Page_Analysis.cgi # FUNCTION: Do batch analysis on all files # and define new field in entries ############################################## # MINE: Molecular INformation Explorer # Copyright 2000 Dawn Field. All rights reserved. # The CGI-PERL scripts belonging to MINE # may be used and modified freely, but I do # request that this copyright notice remain attached # to this file/source code. If you make modifications # please do not distribute unless # you fully document the modifications. use CGI; require "CGI-MINE.pl"; # Since you are batch editing your database, be extra careful when # changing this script # if you set $debug to 1, you will dump the contents of each file # to screen AFTER it has been changed. This is useful for # checking that you haven't clobbered any values in your database # and that you aren't accumulating multiple values for any one field $debug = 0; ################# # START EACH MINE CGI SCRIPT ################# # this redirects the error messages to the user's screen # and is useful for debugging CGI scripts! open (STDERR, ">&STDOUT"); # print errors to screen $| = 1; # flush the print buffer continuously # make a new query object using CGI.pm module $query = new CGI; # print the required header and start the web page print $query->header; print $query->start_html('MINE Analysis'); # Each time a script is invoked for the first time (or $action undef), # log the visit in the custom MINE server log (see CGI-MINE.pl) # (put this after the header incase an error message is printed) # check value of $action $action = $query->param('action'); if ($action eq undef) {&log();} ####### # START THE WEBPAGE ####### # print the MINE menu &menu; # start the basic gray table used for formatting MINE pages &table_top(); print "
Links to additional web-based Analyses of single sequences. Also check the Link MINE menu for lists of databases websites, and tools dedicated to the data-mining of complete genomes and proteomes
"; # put the main title of your page here print "Batch Analyses of All MINE Entries

"; ############ # START FORM ############ print $query->startform(); # start the form print < G+C adds the percentage of G+C for the entire sequence, as does G+A for the G+A content. Sequence length is the length of the entire sequence. The "add links to all .db files" option searches for all other files in the database with similar names and adds hyperlinks to each of them into the main entry. If blast reports are detected among these files (all files ending in "blast.html"), this option also writes the top two matches into the main entry file.

The two benefits of this script are 1) that this extra information becomes viewable in individual sequence entry files (see Database Log), and 2) is therefore searchable using the MINE Search Engine. Using the Search Engine one can generate reports for example of the gc content, sequence length, and top blast matches for each sequence which can be saved for viewing in the Database Log or for export, for example as an Excel Workbook.

MESSAGE %labels = ( "gc" => "G+C content (as a percentage of total nucleotides in a sequence)", "ga" => "G+A content (as a percentage of total nucleotides in a sequence)", "seq_len" => "Sequence length", "add_links_to_files" => "add links to all .db files", # "TOP_TblastX_vs_Genbank" => "add top TblastX hits against all genbank DNA documents to all QBR*.db and SBW*.dbfiles" ); #'TOP_TblastX_vs_Genbank' print $query->checkbox_group(-name=>'analyses_to_do', #'TOP_TblastX_vs_Genbank' -values=>['gc', 'ga', 'seq_len', 'add_links_to_files'], -linebreak=>'true', -labels=>\%labels); # add some space between menu and buttons print "

"; # print some buttons # print a button to start the analysis print $query->submit('action','Do Analysis'); # print a 'clear' button at end of form: script self-calls print $query->defaults('Clear'); print $query->endform(); &table_bottom(); ################# # END FORM ################# ############## # PROCESS FORM ############# # get the values # check if the analysis button has be selected # if so Do Analysis $action = $query->param('action'); ################# If "Do Analysis" ################# if ($action eq "Do Analysis") { # process the analyses selected # start a table for formatting &table_top(); # process the analyses selected @analyses_to_do = $query->param('analyses_to_do'); # remind the user which analysis they selected and # use the %labels hash created above to print a # text description of the type of analysis # e.g. instead of just printing "gc", print the # more informative: "G+C content (as a percentage of total nucleotides in a sequence)" print "You have selected to calculate:

"; foreach $analysis (@analyses_to_do) { print "$labels{$analysis}
"; } print "

"; # this tidies up the format a bit
		# get all the files in the database (all end with .db)
		# USE the .dbtest files for DEBUGGING!! while DEVELOPING NEW OPTIONS
		# MISTAKES WILL OVERWRITE YOUR DATABASE
#######		# the pattern to match
		$ext = ".db";
		# use this function to get all the files in this directory ending in the pattern above
		&get_files();			# passes back array @database filled with files
		@files = @database;		# set to the variable in this script to keep @database local to CGI-MINE.pl
		
		# LOOP OVER  each of the files to process it
		foreach $file (@files) {
			# open the file to restore the query
			open (IN, $file) || die "can't open the file $file";
			$temp_query = new CGI(IN);
			# get the sequence from the file
			$seq = $temp_query->param('seq');
	
			# loop over the types of analysis
			foreach $analysis (@analyses_to_do) {
				##########  IF "gc" ##########
				if ($analysis eq "gc" ) {
					# count the times these letters occur in $seq
					$cnt = $seq =~ tr/GCgc/GCgc/; 
					# get the seq length
					$seq_len = length $seq;
					# get the percent CG (test that not dividing by zero!  will kill program)
					if ($seq_len >=1) {$per_CG = ($cnt/$seq_len)*100;}
					# defaults is 10 decimal places, use special printf to get 1 decimal place
					print "
$file\tG+C content = "; # use printf printf ("%8.1f",$per_CG); # need to truncate the value before putting to file too! $per_CG_trunc = $per_CG; $per_CG_trunc =~ s/(..\..)(.)(.)*/$1/; $per_CG = $per_CG_trunc; # if it exists, delete it (if it doesn't exist, perl won't complain) $temp_query->delete($analysis); # append the new value to the file $temp_query ->append(-name=>$analysis, -value=>$per_CG); } ########## IF "ga" ########## if ($analysis eq "ga" ) { $cnt = $seq =~ tr/GAga/GAga/; $seq_len = length $seq; if ($seq_len >=1) {$per_GA = ($cnt/$seq_len)*100;} # defaults is 10 decimal places, use special printf to get 1 decimal place print "
$file\tG+A content = "; # use printf printf ("%8.1f",$per_GA); # if it exists, delete it (if it doesn't exist, perl won't complain) $temp_query->delete($analysis); $per_GA_trunc = $per_GA; $per_GA_trunc =~ s/(..\..)(.)(.)*/$1/; $per_GA = $per_GA_trunc; # append the new value to the file $temp_query ->append(-name=>$analysis, -value=>$per_GA); } ########## IF "seq_len" ########## if ($analysis eq "seq_len" ) { print "
$file\tLength =\t "; $seq_len = length $seq; print "$seq_len"; # if the name already exists, delete it (if it doesn't exist, perl won't complain) $temp_query->delete($analysis); # append the new value to the file $temp_query ->append(-name=>$analysis, -value=>$seq_len); } # end if ($analysis eq "seq_len") ########## IF "add_links_to_files" ########## if ($analysis eq "add_links_to_files" ) { @add_files = <$file*>; print "
$file has these associated files: "; # if the name already exists, delete it (if it doesn't exist, perl won't complain) $temp_query->delete($analysis); foreach $add_file (@add_files) { # Check if the file is a blast report, if so, get the two top matches if ($add_file =~ "blast.html") { undef (@hit_lines); open (GET_HITS, "$add_file") || die "can't open the blast_reports $add_file to get best matches\n"; while ($line = ) { if ($line =~ /Sequences producing significant alignments/) { $line = ; # blank line $line = ; # top match chomp ($line); $line = " Best Match: ".$line; push (@hit_lines, $line); $line = ; # second match chomp ($line); $line = " Second Best Match: ".$line."
";
                                				        push (@hit_lines, $line);  # array containing top matches
                                				}
                        				} # end of while 
			                        }  # end if a blast report...
                                                $add_link = "$add_file@hit_lines";
                                                # append the new value to the file
                                                print " $add_file ";
                                                $temp_query ->append(-name=>$analysis, -value=>$add_link);
                                                undef (@hit_lines);

					}
				} # end if ($analysis eq "add_links_to_files" )

                                ##########  IF "TOP_TblastX_vs_Genbank" ##########
  	       		        if ($analysis eq "TOP_TblastX_vs_Genbank") {
				        foreach (@top_blast_extensions) {
						if ($file =~ /(SBW.....db)/ || $file =~ /(QBR.....db)/) {
							$blast_file = $file."_".$_;
        						if (-e $blast_file) {
                                				print "$blast_file is the genbank file: ";
								open (BLAST_IN, $blast_file) || die "can't open file $blast_file";
			                			while ($blast_line = )  {
		        			                	if ($blast_line =~ /Sequences producing significant alignments/) {
		                        				        $blast_line = ;
        	                        					$blast_line = ;
										$first_hit_line = $blast_line;
        	                        					push (@hit_lines, $blast_line);
        	                #        					$blast_line = ;
        	                #        					$blast_line = "
$blast_line"; # push (@hit_lines, $blast_line); } print "@hit_lines

"; undef (@hit_lines); } # end of while } # if it exists, delete it (if it doesn't exist, perl won't complain) $temp_query->delete($analysis); # append the new value to the file $temp_query ->append(-name=>$analysis, -value=>$first_hit_line); } } } } # end foreach $analysis (@analyses_to_do) if ($debug) { print $temp_query -> dump();} # SAVE NEW VALUES TO FILE if (open(FILE,">$file")) { $temp_query->save(FILE); close FILE; } else { print "Error: couldn't write to file $file: $!\n"; } } # end foreach $file (@files) print "\n\nAnalysis Finished. Fields added successfully to all files."; # finish the table formatting &table_bottom(); } # end if ($action eq "Do Analysis") ################# # END PROCESS FORM ################# # PRINT BOTTOM OF EACH WEB PAGE # if $show_source is set to 1 show a link # at the bottom of each script to the source # code - pass the name of this script to the # function in CGI-MINE.pl if ($show_source) { $script_name = $query->script_name(); &source ($script_name); } # ATTACH the MINE copywrite &mine_cp; print $query->end_html; ################# # END WEBPAGE #################