#!/usr/bin/perl ############################################## ############################################## # SCRIPT NAME: Page_Analysis.cgi # FUNCTION: Do batch analysis on all files # and define new field in entries ############################################## # MINE: Molecular INformation Explorer # Copyright 2000 Dawn Field. All rights reserved. # The CGI-PERL scripts belonging to MINE # may be used and modified freely, but I do # request that this copyright notice remain attached # to this file/source code. If you make modifications # please do not distribute unless # you fully document the modifications. use CGI; require "CGI-MINE.pl"; # Since you are batch editing your database, be extra careful when # changing this script # if you set $debug to 1, you will dump the contents of each file # to screen AFTER it has been changed. This is useful for # checking that you haven't clobbered any values in your database # and that you aren't accumulating multiple values for any one field $debug = 0; ################# # START EACH MINE CGI SCRIPT ################# # this redirects the error messages to the user's screen # and is useful for debugging CGI scripts! open (STDERR, ">&STDOUT"); # print errors to screen $| = 1; # flush the print buffer continuously # make a new query object using CGI.pm module $query = new CGI; # print the required header and start the web page print $query->header; print $query->start_html('MINE Analysis'); # Each time a script is invoked for the first time (or $action undef), # log the visit in the custom MINE server log (see CGI-MINE.pl) # (put this after the header incase an error message is printed) # check value of $action $action = $query->param('action'); if ($action eq undef) {&log();} ####### # START THE WEBPAGE ####### # print the MINE menu &menu; # start the basic gray table used for formatting MINE pages &table_top(); print "
";
############
# START FORM
############
print $query->startform(); # start the form
print <
The two benefits of this script are 1) that this extra information becomes viewable in individual sequence entry files (see
Database Log), and 2)
is therefore searchable using the MINE
Search Engine. Using the Search Engine one can
generate reports for example of the gc content,
sequence length, and top blast matches for each sequence which can be saved for viewing in the
Database Log or for export, for example as an Excel Workbook.
MESSAGE
%labels = (
"gc" => "G+C content (as a percentage of total nucleotides in a sequence)",
"ga" => "G+A content (as a percentage of total nucleotides in a sequence)",
"seq_len" => "Sequence length",
"add_links_to_files" => "add links to all .db files",
# "TOP_TblastX_vs_Genbank" => "add top TblastX hits against all genbank DNA documents to all QBR*.db and SBW*.dbfiles"
);
#'TOP_TblastX_vs_Genbank'
print $query->checkbox_group(-name=>'analyses_to_do',
#'TOP_TblastX_vs_Genbank'
-values=>['gc', 'ga', 'seq_len', 'add_links_to_files'],
-linebreak=>'true',
-labels=>\%labels);
# add some space between menu and buttons
print " ";
# print some buttons
# print a button to start the analysis
print $query->submit('action','Do Analysis');
# print a 'clear' button at end of form: script self-calls
print $query->defaults('Clear');
print $query->endform();
&table_bottom();
#################
# END FORM
#################
##############
# PROCESS FORM
#############
# get the values
# check if the analysis button has be selected
# if so Do Analysis
$action = $query->param('action');
################# If "Do Analysis" #################
if ($action eq "Do Analysis") {
# process the analyses selected
# start a table for formatting
&table_top();
# process the analyses selected
@analyses_to_do = $query->param('analyses_to_do');
# remind the user which analysis they selected and
# use the %labels hash created above to print a
# text description of the type of analysis
# e.g. instead of just printing "gc", print the
# more informative: "G+C content (as a percentage of total nucleotides in a sequence)"
print "You have selected to calculate: ";
foreach $analysis (@analyses_to_do) {
print "$labels{$analysis} ";
undef (@hit_lines);
} # end of while
";
}
print ""; # this tidies up the format a bit
# get all the files in the database (all end with .db)
# USE the .dbtest files for DEBUGGING!! while DEVELOPING NEW OPTIONS
# MISTAKES WILL OVERWRITE YOUR DATABASE
####### # the pattern to match
$ext = ".db";
# use this function to get all the files in this directory ending in the pattern above
&get_files(); # passes back array @database filled with files
@files = @database; # set to the variable in this script to keep @database local to CGI-MINE.pl
# LOOP OVER each of the files to process it
foreach $file (@files) {
# open the file to restore the query
open (IN, $file) || die "can't open the file $file";
$temp_query = new CGI(IN);
# get the sequence from the file
$seq = $temp_query->param('seq');
# loop over the types of analysis
foreach $analysis (@analyses_to_do) {
########## IF "gc" ##########
if ($analysis eq "gc" ) {
# count the times these letters occur in $seq
$cnt = $seq =~ tr/GCgc/GCgc/;
# get the seq length
$seq_len = length $seq;
# get the percent CG (test that not dividing by zero! will kill program)
if ($seq_len >=1) {$per_CG = ($cnt/$seq_len)*100;}
# defaults is 10 decimal places, use special printf to get 1 decimal place
print "
$file\tG+C content = ";
# use printf
printf ("%8.1f",$per_CG);
# need to truncate the value before putting to file too!
$per_CG_trunc = $per_CG;
$per_CG_trunc =~ s/(..\..)(.)(.)*/$1/;
$per_CG = $per_CG_trunc;
# if it exists, delete it (if it doesn't exist, perl won't complain)
$temp_query->delete($analysis);
# append the new value to the file
$temp_query ->append(-name=>$analysis, -value=>$per_CG);
}
########## IF "ga" ##########
if ($analysis eq "ga" ) {
$cnt = $seq =~ tr/GAga/GAga/;
$seq_len = length $seq;
if ($seq_len >=1) {$per_GA = ($cnt/$seq_len)*100;}
# defaults is 10 decimal places, use special printf to get 1 decimal place
print "
$file\tG+A content = ";
# use printf
printf ("%8.1f",$per_GA);
# if it exists, delete it (if it doesn't exist, perl won't complain)
$temp_query->delete($analysis);
$per_GA_trunc = $per_GA;
$per_GA_trunc =~ s/(..\..)(.)(.)*/$1/;
$per_GA = $per_GA_trunc;
# append the new value to the file
$temp_query ->append(-name=>$analysis, -value=>$per_GA);
}
########## IF "seq_len" ##########
if ($analysis eq "seq_len" ) {
print "
$file\tLength =\t ";
$seq_len = length $seq;
print "$seq_len";
# if the name already exists, delete it (if it doesn't exist, perl won't complain)
$temp_query->delete($analysis);
# append the new value to the file
$temp_query ->append(-name=>$analysis, -value=>$seq_len);
} # end if ($analysis eq "seq_len")
########## IF "add_links_to_files" ##########
if ($analysis eq "add_links_to_files" ) {
@add_files = <$file*>;
print "
$file has these associated files: ";
# if the name already exists, delete it (if it doesn't exist, perl won't complain)
$temp_query->delete($analysis);
foreach $add_file (@add_files) {
# Check if the file is a blast report, if so, get the two top matches
if ($add_file =~ "blast.html") {
undef (@hit_lines);
open (GET_HITS, "$add_file") || die "can't open the blast_reports $add_file to get best matches\n";
while ($line = ";
push (@hit_lines, $line); # array containing top matches
}
} # end of while
$blast_line";
# push (@hit_lines, $blast_line);
}
print "@hit_lines