Wednesday, March 10, 2010

Example of Splitting a File Based on Value

#!/usr/bin/perl -w
use strict;
use diagnostics;

#- 1 --------------------------------------------------------------------------
print "\n";
print "+-----------------------------------------------------------------+ \n";
print "| splitdat.pl: | \n";
print "| Break a file into two output files based on values in an | \n";
print "| operator-supplied column(s). | \n";
print "| | \n";
print "| The output will be written to the directory where the input | \n";
print "| file resides. | \n";
print "| | \n";
print "| Note: Type in the case (upper or lower) that the filename and | \n";
print "| ---- value is in. | \n";
print "+-----------------------------------------------------------------+ \n\n";

#------------------------------------------------------------------------------
print "Directory Name: ('.' = Current)\t"; # Ask for directory
my $Directory = <STDIN>; # Store what's typed in
chomp($Directory); # Get rid of line break

chdir($Directory) # change to the directory
or die "Invalid Directory \n"; # or if invalid, die

#- 2 --------------------------------------------------------------------------
print "Input File:\t\t\t"; # Ask for filename
my $InFile = <STDIN>; # Store what's typed in
chomp($InFile); # Get rid of line break

open( INPUT, $InFile) # Open the file if found.
or die "The file $InFile could not be found. \n";

#- 3a -------------------------------------------------------------------------
print "Primary Output File:\t\t"; # Ask for 1st output name
my $Out1File = <STDIN>; # Store what's typed in
chomp($Out1File); # Get rid of line break

#- 3.1a -------------------------------------------------------------------
open(OUTPUT1, ">$Out1File") # Try to open
or die "The file $Out1File could not be opened for writing \n";

#- 3b -------------------------------------------------------------------------
print "Secondary Output File: \t\t"; # Ask for 2nd output name
my $Out2File = <STDIN>; # Store what's typed in
chomp($Out2File); # Get rid of line break

#- 3.1b -------------------------------------------------------------------
open(OUTPUT2, ">$Out2File \t") # Try to open
or die "The file $Out2File could not be opened for writing \n";

#- 4 --------------------------------------------------------------------------
print "Start Position to Search: \t"; # Get beginning of column
my $StartPosition = <STDIN>; # Store what's typed in
chomp($StartPosition); # Get rid of line break
$StartPosition = $StartPosition -1; # Starts from 0 so subtract 1

#- 5 --------------------------------------------------------------------------
print "Value to search for: \t\t"; # Get the value we're splitting on
my $SearchValue = <STDIN>; # Store what's typed in
chomp($SearchValue); # Get rid of line break
my $ValueLength = length($SearchValue); # Get the length of the column

#- 6 --------------------------------------------------------------------------
my $InCount = 0; # Initialize counters
my $Out1Count = 0;
my $Out2Count = 0;

#- 7 --------------------------------------------------------------------------
while(<INPUT>) # Read the input file
{ my $TheLine = $_; # Save the line's contents
chomp($TheLine); # Get rid of the line break
$InCount = $InCount + 1; # increment input count

#----------------------------------------------------------------------------
if (substr($TheLine ,$StartPosition # Check the position for the value
,$ValueLength)eq"$SearchValue") # requested.
{ $Out1Count++; # Increment primary output counter
print OUTPUT1 "$TheLine \n"; # write the line to the file
}
else # If another value, write to output2
{ $Out2Count++; # increment output count
print OUTPUT2 "$TheLine \n"; # write the line to the file
}
} # End of while(<INFILE>) loop
#------------------------------------------------------------------------------
print "\n"; # Print the
print "************************************* \n"; # results when done
print "* $0 COMPLETED SUCCESSFULLY \n";
print "************************************* \n\n";
print "Record Counts: \n\n";
print "Input -- $InFile: \t $InCount \n";
print "Primary Output -- $Out1File: \t $Out1Count \n";
print "Secondary Output -- $Out2File: \t $Out2Count \n\n";

No comments:

Post a Comment