~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The FCHECK source code provided here is for informational purposes only. There are a number of modules and subroutines called by FCHECK which are necessary for FCHECK to function. These are not provided here. No support will be provided to anyone attempting to get FCHECK running locally. This version was created on August 28, 2001. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #!/usr/local/bin/perl -w -I/home/seabass/PERL/lib #+fcheck_3k.pl+admin+Checks that files meet the SeaBASS format and reports errors/warnings # Original development: Brian D. Schieber, SAIC/GSC, 10/1997 # # Mods: # 01/30/98 - BDS, mod /column_headers to /parameters for files parsed. # 03/26/98 - BDS, go back to using [DEG] on lat lon, and [GMT] on times to avoid confusion seen. # 05/20/98 - BDS, remove /begin_data and /end_data@ markers # 05/26/98 - BDS, add /cloud_percent, /wave_height, /wind_speed per J. Mueller, also add /linked_files for list of related filenames # 06/03/98 - BDS, changing /linked_files to /unique_link_id to simplify things, hopefully. Added /cruise per suggestion by Ru Morrison, BBOP. # 06/04/98 - BDS, changing /unique_link_id to /station # 06/08/98 - BDS, trap for duplication of field names, issue warning. # 06/09/98 - BDS, need /sequence_number to assure files are uniquely date named. # 06/18/98 - BDS, modify code to "collect" errors then put them out at one time. # 06/22/98 - BDS, handle /end_header w/o "\@" # 06/23/98 - BDS, decided to take /sequence_number back out, handling that internally. # 06/26/98 - BDS, dissallow file paths to /documents and /calibration_files # 06/30/98 - BDS, new datatype, 'matchup' for surface matchup values # 08/31/98 - BDS, catch times w/o hh:mm:ss format. # 12/28/98 - BDS, added to diagnostic comments for data section columns not matching fields. # 06/01/99 - BDS, get stricter on date and time range checking. # 06/11/99 - BDS, the 'help' option call on a email subject line does not seem to work. # 06/28/99 - BDS, added optional field "/date_stamp=" to track when submitter made file. # 08/09/99 - BDS, now forwarding messages to Jeremy. # 09/20/99 - SWB, changed "/date_stamp=" to "/data_status=" ; modified output to only report warnings and errors not to regurgitate the file. # 10/06/99 - SWB, added a check on the value of "/data_status="; added warning info to those using "/date_stamp=" # 02/04/00 - SWB, modifided to check for valid fields, and to check if units provided match those accepted for the field. # 02/07/00 - SWB, modified output 'look'. # 04/18/00 - SWB, added option to FTP files to seabass then launch FCHECK # 04/21/00 - SWB, added log file for email or FTP options # 06/16/00 - SWB, made fieldnames check case insensitive # 07/07/00 - SWB, fixed bug that caused erroneous error reports if date or time errors were detected # 07/07/00 - SWB, Added failure condition if filename has spaces... # 07/11/00 - SWB, Added failure condition if header fields contain quotes or apostrophes # 07/26/00 - SWB, fixed minor bug that prevented error checking on units if fieldnames were not as they are in the standard fields list (i.e. case sensitive), this bug popped up after the switch to case insensitivity... # 08/17/00 - SWB, Modified to accept internal headers without complaint (e.g. /received) # 10/19/00 - SWB, Modified to check date/time as an entity, not to separately check for consistency between start and end date and time. # 12/08/00 - SWB, Modified duplicate fieldname check to output what fields are duplicated in the error message. # 01/04/01 - SWB, Modified header check to complain if brackets exist in any field other than the time and location fields AND to complain if ranges exist in header values. # 07/02/01 - SWB, Modified datablock test to check for double dots, hyphens, and alpha characters # 08/27/01 - SWB, Added error if depth, lat, or lon is a missing value. Added logic to NOT check for negative values if checking for valid ranges... # use Text::Wrap; use Date::Manip; use MIME::Entity; use File::Copy; use File::Basename; use SB2K::ProcSB qw(rd_hdr data_hash get_delimiter field2nmwl); use strict; $Text::Wrap::columns=80; my $version = "3.0"; sub usage { my $usemsg = <<"EndUsage"; -------------------------------------------------------------------- FCHECK - check format of data to be submitted to SeaBASS archive. Using PERL, test the syntax of a potential SeaBASS ASCII file for adherence to the SeaBASS file format specifications. This file works from the command line or via email trapping. A detailed description of the SeaBASS file format is available at: http://seabass.gsfc.nasa.gov/seabass_submit.html Send questions to the SeaBASS administrator at seabass\@seabass.gsfc.nasa.gov. -------------------------------------------------------------------- Usage: Command line: fcheck.pl filename or send filename as non-attached email to fcheck\@seabass.gsfc.nasa.gov or FTP files to seabass.gsfc.nasa.gov:incoming/fcheck/ then email fcheck\@seabass.gsfc.nasa.gov with "FTP:" as the subject -------------------------------------------------------------------- Original development: Brian D. Schieber, SAIC/GSC, 10/1997 Current 'Keeper of the Code' : Sean W. Bailey, Futuretech Corporation Version 3 is a major rewrite of the code to incorporate hash array processing and implement checks on the data block. Mods: None ... so far ;) -------------------------------------------------------------------- EndUsage return ($usemsg); } # Main Controls my $file = "/tmp/fcheck.data"; my $arg = $ARGV[0]; if ($arg) { copy("$arg","/tmp/$arg.fcheck"); $file = "/tmp/$arg.fcheck"; } my ($tmpfile,$path) = &fileparse($file); #$parsedfile = "/tmp/$tmpfile.parsed"; my $reportfile = "/tmp/$tmpfile.report"; my $logfile = "/home/fcheck/log/fcheck.log"; my $from = ""; my $subject = ""; my $sendmail = "no"; my $num = my $ftp = my $snderr = my $errors = my $warnings = 0; my @ftpfiles; my (@error,@warnings,@uniterr); my $item; my $m = 1; my $subdir = ""; my $pre1 = ""; my $pre2 = "\t"; my $method_used = 'email'; my $cc_email = "jeremy\@seabass.gsfc.nasa.gov"; my $date_time = &datetime(); # get mod time of FCHECK for use in version stamp open(REPORT, ">$reportfile") || die "Can't open file for write: $reportfile"; &lock('REPORT','write'); my $moddate_time = &modtime(); # get mod time of FCHECK for use in version stamp print REPORT "FCHECK Ver. $version last modified: $moddate_time\n"; ################################################################################ # Process mail header part of $file # Accept local, non-mail input if ($arg) { $from = "seabass"; $sendmail="no"; goto START_HEADER; } open(IN, "$file") || die "Can't open file to read: $file"; &lock('IN','read'); my $line = ; # allow a few lines of comments at top while ($line =~ /^\!/) { $line = } # Trap mail input if ($line =~ /^From/) # First line should always say "From " on 1st line { if ($line =~ /fcheck/) { exit; }; # avoid self looping (my $skip,$from) = split(/ /,$line); $sendmail = "yes"; } # Open in/out/log files if ($sendmail eq 'yes'){ if (! -e $logfile) { open(LOG, ">$logfile") || die "Can't open file for write: $logfile"; } else { open(LOG, ">>$logfile") || die "Can't open file for append: $logfile"; } &lock('LOG','write'); } my $count=0; while (){ chomp($line=$_); s/\t//g; # Process subject line... if ($line =~ /^Subject/){ if ($line =~ /help/i) { print REPORT "!\n! FCHECK help information \n!\n"; print REPORT &usage; close(REPORT); &sendparsed($from,'FCHECK Help Information',0); exit; } if ($line =~ /ftp/i){ my @line = split /:/,$line; $subdir = pop(@line); $subdir =~ s/^\s*(.*?)\s*$/$1/; $ftp = 1; opendir FTP, "/home/ftp/incoming/fcheck/$subdir"; @ftpfiles = grep !/^\.\.?$/, readdir FTP; closedir FTP; my $num = $#ftpfiles; $num++; print REPORT "\n################################################################################\n"; print REPORT "#\t\t\t\t\tNOTICE:\t\t\t\t\t#\n"; print REPORT "#\tUsing the FTP option for FCHECK does NOT constitute a submission\t#\n"; print REPORT "#\tof the data to SeaBASS, even if the files pass FCHECK's scrutiny.\t#\n"; print REPORT "#\tThe files you FTP'd to seabass:incoming/fcheck/$subdir have been\t\t#\n"; print REPORT "#\tremoved from the FTP site once FCHECK processes them.\t\t\t#\n"; print REPORT "################################################################################\n\n"; print REPORT " NUMBER OF FILES TO CHECK : $num from SeaBASS FTP : /incoming/fcheck/$subdir\n"; print REPORT " Could not find files to process. Please make sure your files are in a subdirectory of the incoming/fcheck directory on the SeaBASS FTP site\n" if $num == 0; } } last if ($line =~ /\/begin_header/); $count++; &perror("'/begin_header' not found within $count lines in file") if ($count > 70); } if ($ftp == 1){ &unlock('IN'); close(IN); $num = $#ftpfiles; } print "sendmail: $sendmail from: $from directory:$subdir\n"; ######################### # Look for start header START_HEADER: for ($m=0;$m<$num+1;$m++){ if ($ftp == 1){ open(IN,"/home/ftp/incoming/fcheck/$subdir/$ftpfiles[$m]") || die "Can't open $ftpfiles[$m]: $!"; } else{open(IN,$file)|| die "Can't open $file: $!"} &lock('IN','read'); $file = $ftpfiles[$m] if ($ftp == 1); print REPORT "################################################################################\n" if ($ftp == 1); print REPORT "File: $file\n" if ($ftp == 1); my $numerr = 0; $method_used = 'ftp' if ($ftp == 1); &perror("Filename has spaces! Please rename file to remove spaces.") if ($file =~ /\s+/); print LOG "$date_time\t$from\t$file\t$method_used\t" if ($sendmail eq 'yes'); ######################## # Parse header my %monthstr = (1=>'January',2=>'February',3=>'March',4=>'April',5=>'May',6=>'June', 7=>'July',8=>'August',9=>'September',10=>'October',11=>'November',12=>'December'); my %hdrs = &rd_hdr(*IN); my @labels = keys %hdrs; my @spaces; my @datelabels = ('start_date','end_date'); my @timelabels = ('start_time','end_time'); &perror("Header /begin_header is missing! Please begin the header block with '/begin_header'.") if ($hdrs{'begin_header'} == 0); &perror("Header /end_header is missing! Please end the header block with '/end_header'.") if ($hdrs{'end_header'} == 0); foreach my $label (@labels){ next if ($label =~ /(comments|begin_header|end_header|GMT|DEG|sl_fields|sl_units)/); my $test_value = $hdrs{$label}; $test_value =~ s/\.//g; &perror("Quotes and apostrophes are not allowed in header fields!") if ($hdrs{$label} =~ /\"|\'|\`/); push(@spaces,$label) if ($hdrs{$label} =~ /\s+/); $hdrs{$label} =~ s/\s+//g; &perror("No value detected for $label") if ($hdrs{$label} eq ""); &perror("More than one decimal place in $label is not acceptable!") if (($hdrs{$label} =~ /\..*\./) && ($label =~ /(depth|cloud|wind|wave)/)); &perror("Header /$label=$hdrs{$label}: Braces and Brackets are not allowed in any header field except for the time and location headers") if (($hdrs{$label} =~ /(\[|\]|\{|\})/) && ($hdrs{$label} !~ /(latitude|longitude|time)/)); if (($label =~ /(depth|cloud|wind|wave)/) && ($hdrs{$label} !~ /^$hdrs{'missing'}$/)){ &perror("Header $label=$hdrs{$label}: Ranges are not acceptable in header values") if ( ($hdrs{$label} !~ /^none.?$|^na.?$/i) && ($test_value =~ /[0-9].*\-/)); &perror("Header $label=$hdrs{$label}: Must be numeric and non-negative. Do not append units.") if ( ($hdrs{$label} !~ /^none.?$|^na.?$/i) && ((($test_value =~ /\d+.\D/) &&($test_value !~ /(\d+)-/)) || ($test_value < 0.))); } &perror("Header /$label: $hdrs{$label}\nFilenames should not contain file paths.") if (($hdrs{$label} =~ /\//) && ($label =~ /(file|documents)/)); &perror("Header /$label cannot be 'none' or 'NA' or eq to your missing value!") if ( ($hdrs{$label} =~ /^none.?$|^na.?$|^$hdrs{'missing'}.?$/i) && ($label =~ /(documents|calibration|contact|cruise|experiment|investigators|affiliations)/)); &perror("Header /$label:\n[$hdrs{$label}] not decimal in [-180.0,180.0] range") if (($label =~ /longitude/) && (($hdrs{$label} < -180.0) || ($hdrs{$label} > 180.0))); &perror("Header /$label:\n[$hdrs{$label}] not decimal in [-90.0,90.0] range") if (($label =~ /latitude/) && (($hdrs{$label} < -90.0) || ($hdrs{$label} > 90.0))); } &perror("Header has spaces!\nPlease remove spaces, or replace with an underscore ('_') if header value is a text string. (Spaces are acceptable in comments)") if ($#spaces >= 0); my $missing = $hdrs{'missing'}; $missing =~ s/\.//; &perror("Illegal missing value: $hdrs{'missing'}\nMissing values should be numeric and non-zero") if ( ($missing =~ /\D$/) || (int($missing) == 0)); &perror("Only one missing value is allowed.") if ($hdrs{'missing'} =~ /,/); unless ($hdrs{'delimiter'} =~ /^(comma|space|tab|colon|semi-colon)$/i){ &perror("Header /delimiter:\n[$hdrs{'delimiter'}] is not one of the following: space, comma, tab, colon, or semi-colon"); } &perror("Header /experiment=$hdrs{'experiment'}:\nPlease use a /experiment descriptor other than '$hdrs{'experiment'}'. Preferrably one which describes the activity and separates it from others. Please note that /experiment and /cruise are used to describe the general research and specific event, respectively. For example: /experiment=AMT, /cruise=AMT7. Do NOT use the acronym SIMBIOS in the experiment or cruise name.") if ($hdrs{'experiment'} =~ /SIMBIOS/i); &perror("Header /experiment=$hdrs{'cruise'}:\nPlease use a /experiment descriptor other than '$hdrs{'cruise'}'. Preferrably one which describes the activity and separates it from others. Please note that /experiment and /cruise are used to describe the general research and specific event, respectively. For example: /experiment=AMT, /cruise=AMT7. Do NOT use the acronym SIMBIOS in the experiment or cruise name.") if ($hdrs{'cruise'} =~ /SIMBIOS/i); &perror("Header /cruise should not be the same as /experiment, please make /cruise a 'subset' of /experiment. See specs.") if ($hdrs{'experiment'} eq $hdrs{'cruise'}); &pwarning("Header /parameters is no longer required, as /fields have been standardized.") if (exists $hdrs{'parameters'}); &perror("Header /data_file_name should have only one file listed [$hdrs{'data_file_name'}]") if ($hdrs{'data_file_name'} =~ /[,;]/); &pwarning("Header /original_file_name is no longer required, the file name should be entered as /data_file_name.") if (exists $hdrs{'original_file_name'}); &perror("Header /data_type:\n[$hdrs{'data_type'}] One field only for /data_type (comma detected)") if ($hdrs{'data_type'} =~ /,/); if($hdrs{'data_type'} !~ /^(cast|drifter|matchup|pigment|above_water|mooring|flow_thru|sunphoto|scan|ancillary)$/){ &perror("Header /data_type:\n[$hdrs{'data_type'}] data type not 'cast', 'drifter', 'pigment', 'flow_thru', 'matchup', 'above_water', 'mooring', 'sunphoto', or 'scan'"); } &pwarning("Header /data_status should be 'preliminary','update', or 'final'!") if ((exists $hdrs{'data_status'})&& ($hdrs{'data_status'} !~ /^(preliminary|update|final)$/i)); my @gmtlst = ('start_time','end_time'); my @deglst = ('north_latitude','south_latitude','east_longitude','west_longitude'); my %udeg = my %ideg = (); foreach my $e (@deglst,@labels){$udeg{$e}++ && $ideg{$e}++} my @isect_deg = keys %ideg; my %ugmt = my %igmt = (); foreach my $e (@gmtlst,@labels){$ugmt{$e}++ && $igmt{$e}++} my @isect_gmt = keys %igmt; &perror("Location headers need to end with '[DEG]' (ex: 66.23[DEG]) ") if ($hdrs{'DEG'} < $#isect_deg+1); &perror("Time headers need to end with '[GMT]' (ex: 16:25:22[GMT]) ") if ($hdrs{'GMT'} < $#isect_gmt+1); &pwarning("! WARNING: east_longitude: [$hdrs{'east_longitude'}] < west_longitude [$hdrs{'west_longitude'}], implies crossing of the dateline! If you did not do so, please check these values.") if ( $hdrs{'east_longitude'} < $hdrs{'west_longitude'}); if (exists $hdrs{'north_latitude'} && $hdrs{'south_latitude'}){ &perror("Header /south_latitude:\n /south_latitude=$hdrs{'south_latitude'} > /north_latitude=$hdrs{'north_latitude'}:\nSouth cannot be greater than North!") if ($hdrs{'south_latitude'} > $hdrs{'north_latitude'}); } if (exists $hdrs{'measurement_depth'} && exists $hdrs{'data_type'}){ &pwarning("Measurement depth should be 0.0 for /data_type=$hdrs{'data_type'}") if ( ($hdrs{'data_type'} =~ /(sunphoto|above_water)/) && ($hdrs{'measurement_depth'} != 0.0)); } foreach my $datelabel (@datelabels){ my $isleap = &Date_LeapYear(substr($hdrs{$datelabel},0,4)); if (substr($hdrs{$datelabel},0,4) < 1975) { &perror("Header /$datelabel:\n[$hdrs{$datelabel}] year < 1975, this is considered an error. Perhaps it is not?"); } if (length($hdrs{$datelabel}) != 8) { &perror("Header /$datelabel:\n[$hdrs{$datelabel}] length incorrect (format is YYYYMMDD)"); } my $month = substr($hdrs{$datelabel},4,2); my $day = substr($hdrs{$datelabel},6,2); my $intmonth = int($month); &perror("Header /$datelabel:\n[$hdrs{$datelabel}] value [$month] not '01' - '12' (format is YYYYMMDD)") if (($month > 12) || ($month < 1)); if ($month == 2){ my $febdays = 28; if ($isleap){$febdays = 29} &perror("Header /$datelabel:\n[$hdrs{$datelabel}] value [$day] exceeds the maximum number of days for $monthstr{$intmonth}! (format is YYYYMMDD)") if ($day > $febdays); } elsif ($month =~ /^(01|03|05|07|08|10|12)/){ &perror("Header /$datelabel:\n[$hdrs{$datelabel}] value [$day] exceeds the maximum number of days for $monthstr{$intmonth}! (format is YYYYMMDD)") if ($day > 31); } else{ &perror("Header /$datelabel:\n[$hdrs{$datelabel}] value [$day] exceeds the maximum number of days for $monthstr{$intmonth}! (format is YYYYMMDD)") if ($day > 30); } } foreach my $timelabel (@timelabels){ my $timetest = $hdrs{$timelabel}; $timetest =~ s/://g; &perror("Header /$timelabel:\n[$hdrs{$timelabel}] form incorrect, format is (for max time) 23:59:59, min 00:00:00") if ( length($timetest) != 6); &perror("Header /$timelabel:\n[$hdrs{$timelabel}] only numerics and ':' allowed in time field (ex: 12:01:33)") if( $hdrs{$timelabel} =~ /[a-z]/i); my ($hour,$minute,$second) = split /:/,$hdrs{$timelabel}; if ($hour < 0) { &perror("Header /$timelabel:\n[$hdrs{$timelabel}] Hour < 0, max time is 23:59:59, min is 00:00:00")} if ($hour > 23) { &perror("Header /$timelabel:\n[$hdrs{$timelabel}] Hour > 23, max time is 23:59:59, min is 00:00:00")} if ($minute < 0) { &perror("Header /$timelabel:\n[$hdrs{$timelabel}] Minute < 0")} if ($minute > 59) { &perror("Header /$timelabel:\n[$hdrs{$timelabel}] Minute > 59")} if ($second < 0) { &perror("Header /$timelabel:\n[$hdrs{$timelabel}] Second < 0")} if ($second > 59) { &perror("Header /$timelabel:\n[$hdrs{$timelabel}] Second > 59")} } my $sdt = join(' ',$hdrs{'start_date'},$hdrs{'start_time'}); my $edt = join(' ',$hdrs{'end_date'},$hdrs{'end_time'}); my $start = &ParseDate($sdt); my $end = &ParseDate($edt); my $flag = &Date_Cmp($start,$end); if ($flag>0) { &perror("Start date/time [$sdt] greater than end_date/time [$edt]"); } &perror("Header /fields should be a single line!\n") if (exists $hdrs{'sl_fields'}); &perror("Header /units should be a single line!\n") if (exists $hdrs{'sl_units'}); my $numfields = my @fields = split /,/,$hdrs{'fields'}; my $numunits = my @units = split /,/,$hdrs{'units'}; my $fields = $hdrs{'fields'}; $fields =~ tr/A-Z/a-z/; # check for duplicate field names in header my @fields1=@fields; my $duplicate_warning = 0; my @dups; while (my $pop1 = pop @fields1) { my $duplicate_fields = 0; my @fields2 = @fields; while (my $pop2 = pop @fields2) { if ($pop1 eq $pop2) { $duplicate_fields++; if ($duplicate_fields > 1) { $duplicate_warning=1; push(@dups,$pop1); } } } } my %seen; if ($duplicate_warning) { my @uniqdups = sort (grep { ! $seen{$_} ++ } @dups); &perror("Header /fields:\nDuplication of field names found in file. Please rename the following fields uniquely:\n\t@uniqdups"); } my $delimiter = &get_delimiter(%hdrs); if ($ftp == 0){ if ($arg) {print REPORT "File: $arg"} else{print REPORT "File: $hdrs{'data_file_name'}"} } if ($numunits != $numfields) { &perror("Header /units:\n # of units not matching # of fields [$numunits,$numfields]"); } ¶ms_check($hdrs{'fields'},$hdrs{'units'}); ################################## # Check for required headers my @required_entry = ( "affiliations", "investigators", "contact", "experiment", "cruise", "data_file_name", "data_type", "west_longitude", "east_longitude", "north_latitude", "south_latitude", "start_date", "end_date", "start_time", "end_time", "delimiter", "calibration_files", "documents", "missing", "water_depth", "fields", "units"); my $hdrlst = join(' ',@labels); my $entries = @required_entry; for(my $ii=0;$ii<$entries;$ii++){ if ($hdrlst !~ /$required_entry[$ii]/) { &perror("Required header label \"/$required_entry[$ii]\" not provided."); } } # Do same for warning headers my @optional_entry = ( "station", "cloud_percent", "wave_height", "data_status", "wind_speed", "measurement_depth", "secchi_depth" ); $entries = @optional_entry; for(my $ii=0;$ii<$entries;$ii++) { if ($hdrlst !~ /$optional_entry[$ii]/) { &pwarning("Optional header \"/$optional_entry[$ii]\" not provided."); } } push(my @accepted_entry,@required_entry,@optional_entry); undef %seen; my @labelsonly; @seen{@accepted_entry}=(); foreach my $item (@labels){ next if ($item =~ /^(comments|DEG|GMT|sl_fields|sl_units|begin_header|end_header|parameters|original_file_name|received)$/); push(@labelsonly,$item) unless exists $seen{$item}; } foreach my $unrecognized (@labelsonly){ &pwarning("The header /$unrecognized is not recognized by FCHECK. Please remove or put in comments ('!')."); } ################################## # Process data block my %badlines; my $linecounter=0; my @Lufields = grep /l[u|w]\d+/i, split /,/,$fields; my @Esfields = grep /e[s|d]\d+/i, split /,/,$fields; my @AOTfields = grep /aot/i, split /,/,$fields; my @timefields = ('jd','year','month','day','hour','minute','second'); my @negs; while (){ chomp(my $line = $_); $line =~ s/\s+/ /g; # remove multiple spaces to one $line =~ s/^\s//; # remove first character if its a space $line =~ s/^\t//; # remove first character if its a tab $line =~ s/^M//g; # remove DOS linefeeds next if ($line =~ /^$/); # skip (and remove from output) any blank lines next if (length($line) <= 2); # assume datalines with only 2 chars are invalid last if ($line =~ /^--/); # end if -- detected, signature or attachment... if ($line =~ /^\/begin_data/) { &perror("ERROR: '/begin_data' and '/end_data\@' markers are no longer supported. Please remove."); last; } if ($line =~ /^\/end_data/) { &perror("ERROR: '/begin_data' and '/end_data\@' markers are no longer supported. Please remove."); last; } $linecounter++; my @datafields = split(/$delimiter/, $line); my $numdatafields = @datafields; if ($numdatafields != $numfields) { $badlines{$linecounter} = $numdatafields; print "Bad data line:\t$linecounter\n"; } unless ($numdatafields != $numfields) { my %data = &data_hash($fields,$hdrs{'missing'},@datafields); #print map {"$_=>$data{$_}\n"} keys %data; #check date/time info foreach my $datafield (@fields){ $datafield =~ tr/A-Z/a-z/; if (($datafield =~ /^(depth|lat|lon)/i)&&($data{$datafield} =~ /NULL/)){ &perror("Missing value not allowed for $datafield!\n"); next; } next if ($datafield =~ /^(angstrom|lt|wt|lat|lon|pitch|roll|tilt|quality|relaz|sample|sst|station|time)/i); next if ($datafield =~ /gnd/i); next if ($data{$datafield} =~ /NULL/); push(@negs,$datafield) if (($data{$datafield} < 0.0) && ($datafield !~ /^(lu|lw|es|ed|aot)/)); &perror("More than one decimal point detected in $datafield!\n") if ($data{$datafield} =~ /\..*\./); &perror("Non-numeric characters detected in $datafield!\n") if (($data{$datafield} =~ /[a-zA-Z]/) && ($data{$datafield} !~ /(e|\+|\-)/i)); if ($data{$datafield} =~ /[0-9].*\-/){ my $junkfield = $data{$datafield}; $junkfield =~ s/e[(\-|+)]//i; &perror("Hypenated value detected in $datafield!\n") if ($junkfield =~ /[0-9].*\-/); } } foreach my $compfield (@timefields){ if (exists $data{$compfield}){ my $compvalue = $data{$compfield}; $compvalue =~ s/^0//; &pwarning("The fields : year, month, day, hour, minute, second, and jd should be integers") if ( $compvalue ne int($data{$compfield})) } } if ((exists $data{'jd'}) && ($data{'jd'} > 366)){&perror("Check your 'jd' field, detected values > 366")} if ((exists $data{'year'}) && ($data{'year'} < 1975)){&perror("Check your 'year' field, detected values < 1975")} if ((exists $data{'month'}) && ($data{'month'} > 12)){&perror("Check your month field, detected values > 12")} if ((exists $data{'month'}) && ($data{'month'} < 1)){&perror("Check your month field, detected values < 1")} if ((exists $data{'day'}) && (exists $data{'month'}) && (exists $data{'year'})){ my $isleap = &Date_LeapYear(int($data{'year'})); next if ((int($data{'month'}) < 1) || (int($data{'month'}) >12)); if ($data{'month'} == 2){ my $febdays = 28; if ($isleap){$febdays = 29} &perror("Check your 'day' field, detected erroneous values") if ( (int($data{'day'}) > $febdays) || (int($data{'day'}) < 1)) ; } elsif (int($data{'month'}) =~ /([1 3 5 7 8 10 12])/){ &perror("Check your 'day' field, detected erroneous values") if ( (int($data{'day'}) > 31) || (int($data{'day'}) < 1)); } else{ &perror("Check your 'day' field, detected erroneous values") if ( (int($data{'day'}) > 30) || (int($data{'day'}) < 1)); } } if ((exists $data{'hour'}) && ($data{'hour'} > 23)){&perror("Check your 'hour' field, detected values > 23")} if ((exists $data{'minute'}) && ($data{'minute'} > 59)){&perror("Check your month field, detected values > 59")} if ((exists $data{'second'}) && ($data{'second'} > 59)){&perror("Check your seconds field, detected values > 59")} #check depth if ((exists $data{'depth'}) && ($data{'depth'} !~ /NULL/)){&perror("Depths should be positive values") if ($data{'depth'} < 0.)} #check lat/lon if ((exists $data{'lat'}) && (($data{'lat'} < -90.)||($data{'lat'} > 90.))){ &perror("Check your 'lat' field, found erroneous values"); } if ((exists $data{'lon'}) && (($data{'lon'} < -180.)||($data{'lon'} > 180.))){ &perror("Check your 'lon' field, found erroneous values"); } #check lu/lw if ($#Lufields >= 0){ foreach my $lu (@Lufields){ next if ($data{$lu} eq 'NULL'); if (($data{$lu} < 0.)||($data{$lu} > 5.)){&pwarning("The '$lu' field has values outside expected range [0.0 - 5.]")} } } #check es/ew if ($#Esfields >= 0){ foreach my $es (@Esfields){ next if ($data{$es} eq 'NULL'); if (($data{$es} < 0.)||($data{$es} > 250.)){&pwarning("The '$es' field has values outside expected range [0.0 - 250.0]")} } } #check aot/ew if ($#AOTfields >= 0){ foreach my $aot (@AOTfields){ next if ($data{$aot} eq 'NULL'); if (($data{$aot} < 0.005)||($data{$aot} > 2.)){&pwarning("The '$aot' field has values outside expected range [0.005 - 2.0]")} } } } } my %see_negs = (); foreach $item (@negs) { $see_negs{$item}++; } my @uniq_negs = keys %see_negs; my @sort_negs = sort { $b cmp $a } @uniq_negs; &pwarning("Negative value detected in data block for field(s):\n@sort_negs\n") if ($#negs >= 0); if (%badlines){ my @errlines = keys %badlines; my $numerrlines = @errlines; my @errlinenum; if (($numerrlines >= 0) && ($numerrlines < 5)) { foreach my $err (@errlines){push(@errlinenum,$badlines{$err})} &perror("ERROR: at data line(s) @errlines, number of fields [@errlinenum] not equal to header specification [$numfields] Please check the following possible reasons for the error: * Is the /delimiter= [\"$delimiter\"] specified correctly (for example: the /delimiter=space but 'tabs' are separating the data)? * Is the line wrapped by your mailer so that part of the data line is actually on another line? * Are there blank lines at the end of the data? * Is your .signature attached to the end of the data?"); } else{ &perror("ERROR: multiple data lines differ in column count from the number of fields listed in the header specification [$numfields] Please check the following possible reasons for the error: * Is the /delimiter= [\"$delimiter\"] specified correctly (for example: the /delimiter=space but 'tabs' are separating the data)? * Is the line wrapped by your mailer so that part of the data line is actually on another line? * Are there blank lines at the end of the data? * Is your .signature attached to the end of the data?"); } } &unlock('IN'); close(IN); ################################## # Print errors $numerr = $#error +1; my ($numwarn,$warnnum,@uniq_err,@sort_err,@uniq_warn,@sort_warn); if ($numerr > 0){ my %see_err = (); foreach $item (@error) { $see_err{$item}++; } @uniq_err = keys %see_err; @sort_err = sort { $b cmp $a } @uniq_err; $numerr = $#sort_err +1; } if ($warnings) { my %seewarn = (); foreach $item (@warnings) { $seewarn{$item}++; } @uniq_warn = keys %seewarn; @sort_warn = sort { $b cmp $a } @uniq_warn; $numwarn = $#uniq_warn +1; $warnnum = 0; } if ($warnings) { print REPORT "\nThis file has passed the FCHECK program but $numwarn warnings were issued.\n" unless ($errors); print LOG "WARNINGS\n" unless (($errors) || ($sendmail eq 'no')); print REPORT "\nThis file has failed the FCHECK program.\n" if ($numerr > 0); print LOG "FAILED\n" if (($errors) && ($sendmail eq 'yes')); print REPORT "\n$numwarn warnings were issued and $numerr errors were found.\n" if ($numerr > 0); print REPORT "\n********************************* WARNINGS: ***********************************\n"; foreach my $warn (@sort_warn){ $warnnum++; print REPORT wrap($pre1,$pre2,"$warnnum) $warn\n"); } } else { print REPORT "\n This file has passed the FCHECK program.\n" unless ($numerr); print LOG "PASSED\n" unless (($numerr) || ($sendmail eq 'no')); print REPORT "\nThis file has failed the FCHECK program.\n" if ($numerr > 0); print LOG "FAILED\n" if (($numerr) && ($sendmail eq 'yes')); print REPORT "\n$numerr errors were found.\n" if ($numerr > 0); } if ($numerr > 0){ my $errnum = 0; print REPORT "\n"; print REPORT "********************************* ERRORS: ************************************\n"; foreach my $errstr (@sort_err){ $errnum++; print REPORT wrap($pre1,$pre2,"$errnum) $errstr\n"); } } $snderr = 1 if ($errors); undef $errors; undef @error; undef @warnings; undef $warnings; }#end loop for multiple files &unlock('REPORT'); close(REPORT); &unlock('LOG') if ($sendmail eq 'yes'); close(LOG) if ($sendmail eq 'yes'); if ($sendmail eq 'yes'){ if ($snderr) { &sendparsed($from,'Error from FCHECK!',1)} else{ &sendparsed($from,'Automated Reply from FCHECK',0)} } else{ my @args = ("cat",$reportfile); system(@args) == 0 or die "system @args failed :?"; } unlink $reportfile; unlink $file; exit; # End main ########################################## # Exiting message # sub sendparsed { my ($from,$mailsubject,$errmsg)=@_; # Create the top-level, and set up the mail headers: my $message = <<"--"; _______________________________________________________________________ This message is an automated reply from FCHECK. See the SeaBASS web pages for detailed information, examples and RECENT CHANGES to the data specification: http://seabass.gsfc.nasa.gov/seabass_submit.html Specifically, see the example described in: http://seabass.gsfc.nasa.gov/example_header.html FCHECK has checked your file and provided results below. _______________________________________________________________________ -- if ($errmsg){ $message .= <<"--"; Please see header descriptions for missing values at http://seabass.gsfc.nasa.gov/example_header.html A detailed description of the SeaBASS file format is available at: http://seabass.gsfc.nasa.gov/seabass_submit.html Note that some errors may confuse FCHECK, causing false error reporting. Please revise the files with errors and try again. If you suspect that the errors detected are not true errors, please let the seabass admin know. (seabass\@seabass.gsfc.nasa.gov) _______________________________________________________________________ -- } open (PARSEDFILE,"$reportfile") || die "Can't open $reportfile\n"; while (){ chomp; $message .= "$_\n"; } my $top = build MIME::Entity Type => my $mailtype, From => 'fcheck@seabass.gsfc.nasa.gov', To => $from, CC => $cc_email, Subject => $mailsubject, Encoding => 'quoted-printable', Data => $message; # Send it! open MAIL, "| /usr/lib/sendmail -t -i" or die "open: $!"; $top->print(\*MAIL); close MAIL; } ########################################## # Error messages added up in a queue # sub perror { my $arg = shift @_; push @error, $arg; $errors = 1; } ########################################## # Warning messages added up in a queue # sub pwarning { my $arg = shift @_; push @warnings, $arg; $warnings = 1; } ########################################## # Last mod time sub modtime { my @mtime = stat $0; my $mtime = localtime($mtime[9]); my $modtime = &UnixDate($mtime,"%b %e %Y %T"); return $modtime; } ########################################## # Last mod time sub datetime { my $mtime = localtime(time); my $date_time = &UnixDate($mtime,"%b %e %Y %T"); return $date_time; } ########################################## # Lock and unlock filehandles sub lock { my($handle,$type) = @_; #print "Locking $handle\n"; my $lockstat = 1; if ($type eq 'read') {$lockstat = 1} else{$lockstat = 2} # print "$type -> $lockstat\n"; my $flockstatus = flock($handle, $lockstat); #seek $handle, 0, 2; #print "Lock status...$flockstatus\n" } sub unlock { my ($handle) = $_[0]; #print "UNLOCKING...$handle\n"; my $flockstatus = flock($handle, 8); #print "Unlock status...$flockstatus\n" } ########################################## # params_check # - check parameters list to see if it matches master list # sub params_check { my $fields = shift; my $units = shift; my $notstdstr; undef $notstdstr; my $fname = "/home/seabass/development/seabass_std_fields.lst"; my $uname = "/home/seabass/development/seabass_std_units.lst"; my $web_list = 'http://seabass.gsfc.nasa.gov/cgi-bin/stdfields.cgi'; my @in_params = split(/,/,$fields); my @diff_fields = my @in_fields = split(/,/,$fields); my @in_units = split(/,/,$units); my @param; my %fnu=(); my %stdunits=(); # Read provided fields/units list into hash %fnu for (my $i=0;$i<$#in_fields+1;$i++){ my $key = $in_fields[$i]; $key =~ tr/A-Z/a-z/; $key =~ s/\d//g; $key =~ s/\.//g; $key =~ s/\s+$//g; $key =~ s/^\s+//g; $key =~ s/chl_cc/chl_c1c2/g; $fnu{$key} = $in_units[$i]; } # Read standard fields list into array @param open(FIELDS, "$fname") || die "Can't open file to read: $fname"; while (){ chomp(my $record = $_); next if ($record =~ /^#/); my ($key, $value) = split(/==/,$record); $value =~ s/^\s+//g; $key =~ s/^\s+//g; $key =~ s/\s+$//g; $value =~ s/\s+$//g; push @param, $key; } close(FIELDS); # Read standard fields/units list into hash %stdunits open(UNITS, "$uname") || die "Can't open file to read: $uname"; while (){ chomp(my $record = $_); next if ($record =~ /^#/); my ($value, $key) = split(/==/,$record); $value =~ s/^\s+//g; $key =~ s/^\s+//g; $key =~ s/\s+$//g; $value =~ s/\s+$//g; $key =~ tr/A-Z/a-z/; $key =~ s/###.#//g; $stdunits{$key} = $value; } close(UNITS); # Find list of stdfields in provided fields my @commonkeys = (); foreach (keys %fnu){ push(@commonkeys, $_) if exists $stdunits{$_}; } my @union = my @isect = my @diff = (); my %union = my %isect = (); my %count = (); my %seen = (); my @notstd = (); foreach my $item (@param) { $item =~ tr/A-Z/a-z/; $item =~ s/###.#//g; $item =~ s/^\s+//g; $item =~ s/\s+$//g; $seen{$item} = 1; } foreach my $item (@in_fields) { my $infield = $item; $item =~ tr/A-Z/a-z/; $item =~ s/\d//g; $item =~ s/\.//g; $item =~ s/^\s+//g; $item =~ s/\s+$//g; $item =~ s/chl_cc/chl_c1c2/g; unless ($seen{$item}){ push(@notstd, $infield); } } foreach my $e (@param) { $e =~ tr/A-Z/a-z/; $e =~ s/###.#//g; $e =~ s/^\s+//g; $e =~ s/\s+$//g; $union{$e}=1 } foreach my $e (@diff_fields){ $item = $e; $e =~ tr/A-Z/a-z/; $e =~ s/\d//g; $e =~ s/\.//g; $e =~ s/^\s+//g; $e =~ s/\s+$//g; $e =~ s/chl_cc/chl_c1c2/g; if ( $union{$e}){$isect{$item} = 1} $union{$e} = 1; } @union = keys %union; @isect = keys %isect; my @list; if ($#notstd ge 0) { $notstdstr = "is(are) not found in the names list at $web_list\n"; $notstdstr .= "\nThis may be due to one of the following:\n"; $notstdstr .= "a) The fieldname is incorrectly formatted [Lw_490 rather than the required Lw490]\n"; $notstdstr .= "b) The fieldname is not typical for standard SeaBASS submission, i.e. it's new to us! "; $notstdstr .= "If the fieldname does not have an equivalent standardized name, please contact the "; $notstdstr .= "SeaBASS administrator to discuss how to submit this information\n"; &perror("Header /fields:\n@notstd $notstdstr"); } # Check that units match fields! foreach my $key (@commonkeys){ my $std = $stdunits{$key}; my $provided = $fnu{$key}; $std =~ s/^\s*(.*?)\s*$/$1/; $provided =~ s/^\s*(.*?)\s*$/$1/; my $match = matchunit($provided,$std); push(@uniterr,$key) unless ($match); push(@list,("The units of \"$key\" should be $std, not \"$provided\".\n")) unless ($match); } if ($#uniterr ge 0) { perror("Header /units:\n@list"); } undef @list; undef @notstd; undef @uniterr; } sub matchunit { my ($provided,$std) = @_; my @match = split /\// , $std; my $ok=1; $ok = 0 if ($provided =~ /\*/); foreach my $subunit (@match){ $subunit =~ s/\^//g; $subunit =~ s/\d//g; $subunit =~ s/chl_cc/chl_c1c2/g; $ok = 0 unless ($provided =~ /$subunit/i); } if ($std eq 'mg/m^3'){$ok = 1 if ($provided eq 'ug/l')} return($ok); }