Wednesday, March 10, 2010

Example of Matching and Moving Data

#!/usr/bin/perl -w
use strict;
use diagnostics;

print "\n";
print "+----------------------------------------------------------------------------------+ \n";
print "| movedat.pl: | \n";
print "| Move data from one file to another based on matching columns. A third file is | \n";
print "| created with all data from the first file and the columns specified from the 2nd.| \n";
print "| | \n";
print "| The output will be written to the directory where the input file resides. | \n";
print "| | \n";
print "| Note: Type in the case (upper or lower) that the filename and value is in | \n";
print "+----------------------------------------------------------------------------------+ \n\n";

#- 1 ---------------------------------------------------------------------------------------------
print "Directory Name: ('.' = Current)\t"; # Ask for directory name
my $Directory = <STDIN>; # Store what's typed in
chomp($Directory); # Get rid of line break
chdir($Directory) or die "Invalid Directory \n"; # change to the directory

#- 2 --------------------------------------------------------------------------------------------
print "Primary Input File:\t\t"; # Ask for the 1st input file
my $In1File = <STDIN>; # Store what's typed in
chomp($In1File); # Get rid of line break
open( INPUT1, $In1File) or die "$In1File not found. \n"; # Try to open

#- 3 --------------------------------------------------------------------------------------------
print "Secondary Input File: \t\t"; # Ask for the 2nd input file
my $In2File = <STDIN>; # Store what's typed in
chomp($In2File); # Get rid of line break
open( INPUT2, $In2File) or die "$In2File not found. \n"; # Try to open

#- 4 ---------------------------------------------------------------------------------------------
print "Output File: \t\t\t"; # Ask for the output file
my $OutFile = <STDIN>; # Store what's typed in
chomp($OutFile); # Get rid of line break

open(OUTPUT, ">$OutFile") # Try to open
or die "$OutFile could not be opened for writing \n\n";

#- 5a ---------------------------------------------------------------------------------------------
print "\nStart Position to Match: \t"; # Get beginning match position
my $StartMatchPos = <STDIN>; # Store what's typed in
chomp($StartMatchPos); # Get rid of line break
$StartMatchPos = $StartMatchPos -1; # Starts from 0 so subtract 1

#- 5b ---------------------------------------------------------------------------------------------
print "End Position to Match: \t\t"; # Get last position to match
my $EndMatchPos = <STDIN>; # Store what's typed in
chomp($EndMatchPos); # Get rid of line break
$EndMatchPos = $EndMatchPos -1; # Starts from 0 so subtract 1

my $MatchLen = $EndMatchPos - $StartMatchPos + 1; # Calculate the length for substr

#- 6a ----------------------------------------------------------------------------------------------
print "\nStart Position to Move: \t"; # Display the 1st column to move
my $StartMovePos = <STDIN>; # Store what's typed in
chomp($StartMovePos); # Get rid of line break
$StartMovePos = $StartMovePos -1; # Starts from 0 so subtract 1

#- 6a ----------------------------------------------------------------------------------------------
print "End Position to Move: \t\t"; # Display the last column to move
my $EndMovePos = <STDIN>; # Store what's typed in
chomp($EndMovePos); # Get rid of line break
$EndMovePos = $EndMovePos -1; # Starts from 0 so subtract 1

my $MoveLen =$EndMovePos-$StartMovePos+1; # Calculate the length for substr

#- 7 -----------------------------------------------------------------------------------------------
print "\nValue to Insert if no Match: \t"; # Ask for value if no match
my $MissingVal = <STDIN>; # Store what's typed in
chomp($MissingVal); # Get rid of line break
if ($MoveLen < length($MissingVal)) # Print message if value too long
{ print "Missing Value too long. Will be truncated."; # Note if too long
$MissingVal = substr($MissingVal, 0, $MoveLen); # Get the truncated value
}
#- 8 ----------------------------------------------------------------------------------------------
my $In1Count = 0; # Initialize counters
my $OutCount = 0;

#- 9 ----------------------------------------------------------------------------------------------
READ1:
while(<INPUT1>) # Read through 1st file
{ my $Rec1 = $_; # Save the line's contents of 1
chomp($Rec1); # Get rid of the line break
$In1Count++; # increment input count
my $Sub1MatchVal = substr($Rec1,$StartMatchPos,$MatchLen); #Substring to get the match value

#- 10 --------------------------------------------------------------------------------------------
seek(INPUT2, 0, 0); # Read from top of 2nd file
while(<INPUT2>)
{ my $Rec2 = $_; # Save the line's contents
chomp($Rec2); # Get rid of the line break
my $Sub2MatchVal = substr($Rec2,$StartMatchPos,$MatchLen); # get the value(s) to match
my $Sub2MoveVal = substr($Rec2,$StartMovePos ,$MoveLen ); # get the value(s) to move
if ($Sub1MatchVal eq $Sub2MatchVal ) # if values 1 and 2 match,
{ print OUTPUT "$Rec1$Sub2MoveVal\n"; # then print to the 3rd file
$OutCount++; # increment the counter
goto READ1; # and go get the next rec from 1
}
}
print OUTPUT "$Rec1$MissingVal\n"; # if no match in 2, write this
$OutCount++; # increment the output count
} # End of while(<INFILE>) loop
#- 11 ---------------------------------------------------------------------------------------------
print "\n"; # Print the
print "************************************* \n"; # results when done
print "* $0 COMPLETED SUCCESSFULLY \n";
print "************************************* \n\n";
print "Record Counts: \n\n";
print "Input -- $In1File: \t $In1Count \n";
print "Output -- $OutFile: \t $OutCount \n";

No comments:

Post a Comment