Wednesday, March 10, 2010

Counting Example

#!/usr/bin/perl -w
use strict;
use diagnostics;

print "\n";
print "+---------------------------------------------------------------+ \n";
print "| Count characters and/or words and/or lines in a file. | \n";
print "+---------------------------------------------------------------+ \n\n";

#- 1 --------------------------------------------------------------------------
# Ask for filename
#------------------------------------------------------------------------------

print "File Name: "; # Display the question
my $TheFile = <stdin>; # Store what's typed in
chomp($TheFile); # Get rid of line break

open(INFILE, $TheFile) or # Open the file if it exists
die "The file $TheFile could not be found. \n";

#- 2 --------------------------------------------------------------------------
# Do they want to count characters
#------------------------------------------------------------------------------
CHARQUESTION:

print "Count Characters? (y/n) "; # Display the question
my $CountChars = <stdin>; # Store what's typed in
chomp($CountChars); # Get rid of line break
$CountChars =& tr/A-Z/a-z/; # Change 'Y/N' to 'y/n'

unless ($CountChars eq 'y' or 'n') # If not 'y' or 'n', ask again
{print "You must enter Y or N. \n";
goto WORDQUESTION;
}
my $CharCount = 0; # Initialize the counter

#- 3 --------------------------------------------------------------------------
# Do they want to count words?
#------------------------------------------------------------------------------
WORDQUESTION:

print "Count Words? (y/n) "; # Display the question
my $CountWords = <stdin>; # Store what's typed in
chomp($CountWords); # Get rid of line break
$CountWords =& tr/A-Z/a-z/; # Change 'Y/N' to 'y/n'

unless ($CountWords eq 'y' or 'n') # If not 'y' or 'n', ask again
{print "You must enter Y or N. \n";
goto WORDQUESTION;
}
my $WordCount = 0; # Initialize the counter

#- 4 --------------------------------------------------------------------------
# Do they want to count lines? (records)
#------------------------------------------------------------------------------
LINEQUESTION:

print "Count Lines? (y/n) "; # Display the question
my $CountLines = <stdin>; # Store what's typed in
chomp($CountLines); # Get rid of line break
$CountLines =& tr/A-Z/a-z/; # Change 'Y/N' to 'y/n'

unless ($CountLines eq 'y' or 'n') # If not 'y' or 'n', ask again
{print "You must enter Y or N. \n";
goto LINEQUESTION;
}
my $LineCount = 0; # Initialize the counter

#- 5 --------------------------------------------------------------------------
# While there are records in the file...
#-----------------------------------------------------------------------------
while(<infile>)
{ my $TheLine = $_; # Save the line's contents
$LineCount = $LineCount + 1; # Count Lines

#----------------------------------------------------------------------------
# count characters
#----------------------------------------------------------------------------
chomp($TheLine); # Get rid of the line break
if($TheLine eq "") { next }; # If line blank, get the next
my $LineLen = length($TheLine); # Get line length
$CharCount = $CharCount + $LineLen; # increment char count
$WordCount = $WordCount + 1; # This line has at least 1 word

#---------------------------------------------------------------------------
# Now loop through each character on this line to look for words
#----------------------------------------------------------------------------
my $CharPos = 0; # Position we are in the line

until($CharPos == $LineLen) # Check for line end;
{if(substr($TheLine, $CharPos, 1) eq " ") # if not, check for space
{ $WordCount = $WordCount + 1 }
$CharPos = $CharPos + 1;
} # End of until
} # End of while(<infile>) loop
#- 6 --------------------------------------------------------------------------
# Write A Small Sub Routine To Do The Same (from wondersky.com)
#------------------------------------------------------------------------------
sub comma_me {
local $_ = shift;
1 while s/^(-?\d+)(\d{3})/$1,$2/;
return $_;
}
#------------------------------------------------------------------------------
# All finished, so print out the results
#------------------------------------------------------------------------------

print "\n";
print "************************************* \n";
print "* $0 COMPLETED SUCCESSFULLY \n";
print "************************************* \n\n";
print "Counts For $TheFile: \n\n";

if ($CountChars eq "y")
{print "Number of characters: ", comma_me($CharCount), "\n"};
if ($CountWords eq "y")
{print "Number of words : ", comma_me($WordCount), "\n"};
if ($CountLines eq "y")
{print "Number of lines : ", comma_me($LineCount), "\n"};
print "\n";

Sorting Example

#!/usr/bin/perl -w
use strict;
use diagnostics;

print "\n";
print "+-----------------------------------------------------------------+ \n";
print "| uniqsort.pl: | \n";
print "| This program will list the unique values found in the column(s) | \n";
print "| specified by the user from the file named. The output is sorted | \n";
print "| and shows a record count per value. | \n";
print "| | \n";
print "| Note: Type in the case (upper or lower) that the filename in. | \n";
print "| ---- | \n";
print "+-----------------------------------------------------------------+ \n\n";
#- 1 --------------------------------------------------------------------------
print "Directory Name: ('.' = Current) \t" ;# Ask for directory
my $Directory = <STDIN>; # Store what's typed in
chomp($Directory); # Get rid of line break

chdir($Directory) # change to the directory
or die "Invalid Directory \n"; # or if invalid, die
#- 2 --------------------------------------------------------------------------
print "Input File: \t\t\t\t"; # Ask for filename
my $InputFile = <STDIN>; # Store what's typed in
chomp($InputFile); # Get rid of line break

open( INPUT, $InputFile) # Open the file if found
or die "The file $InputFile could not be found. \n";
#- 3 --------------------------------------------------------------------------
print "Start Position: \t\t\t"; # Ask for the beginning column
my $StartPosition = <STDIN>; # Store what's typed in
chomp($StartPosition); # Get rid of line break
$StartPosition = $StartPosition -1; # Starts from 0 so subtract 1
#- 4 --------------------------------------------------------------------------
print "End Position: \t\t\t\t"; # Ask for the ending column
my $EndPosition = <STDIN>; # Store what's typed in
chomp($EndPosition); # Get rid of line break
$EndPosition = $EndPosition -1; # Starts from 0 so subtract 1

my $ValueLength = $EndPosition
- $StartPosition + 1; # Calculate the length for substr
#- 5 --------------------------------------------------------------------------
my $InCount = 0; # Initialize the counters
my @UniqVals = (""); # and variable lists
my @UniqCnts = (0);
my @SortList = ("");
#- 6 --------------------------------------------------------------------------
READ: # Read through the input file.
while(<INPUT>) # Grab the value in the columns
{ my $TheLine = $_; # Save the line's contents
chomp($TheLine); # Get rid of the line break
$InCount = $InCount + 1; # increment input count
my $SubVal = substr($TheLine
,$StartPosition
,$ValueLength);
my $ListNum = 1; # start at beginning of existing list
#- 6.1 ----------------------------------------------------------------------
# If our counter is greater than the number of values already in the list,
# add the value to the end of one list and start the counter in the other.
#----------------------------------------------------------------------------
CHECKNUM:
if ($ListNum > $#UniqVals) # if we're at the end of the list...
{ push(@UniqVals, $SubVal); # add this value to the list
push(@UniqCnts, '1'); # increment a list of counts
goto READ; # go back and get another record
}
#- 6.2 ----------------------------------------------------------------------
if ($SubVal eq $UniqVals[$ListNum]) # If this new value is found on the list,
{$UniqCnts[$ListNum] = # increment the associated counter.
$UniqCnts[$ListNum] +1 ; # bump the counter
goto READ; # go get another record
}
#- 6.3 ----------------------------------------------------------------------
$ListNum = $ListNum + 1; # If value not on list, increment list
goto CHECKNUM; # go back for next one
}; # End of while(<INFILE>) loop
#- 7 --------------------------------------------------------------------------
print "\n";
print "****************************** \n"; # All finished, so print results
print "* $0 COMPLETED SUCCESSFULLY \n";
print "****************************** \n\n";
print "# Record: $InputFile: $InCount\n";

#- 8 --------------------------------------------------------------------------
# Start at the beginning of the two arrays and rebuild it in sorted order.
# Move the Unique values and counts to the new SortList.
#------------------------------------------------------------------------------
my $ListNum = 1; # start at beginning of 2 arrays
until ($ListNum > $#UniqVals) # til we get to the end of the array
{ push(@SortList, $UniqVals[$ListNum].' - '.$UniqCnts[$ListNum] );
$ListNum = $ListNum + 1; # increment out counter for UniqVals
};
#- 9 --------------------------------------------------------------------------
print join("\n", sort(@SortList)); # Print the sorted list
print "\n";

Example of Splitting a File Based on Value

#!/usr/bin/perl -w
use strict;
use diagnostics;

#- 1 --------------------------------------------------------------------------
print "\n";
print "+-----------------------------------------------------------------+ \n";
print "| splitdat.pl: | \n";
print "| Break a file into two output files based on values in an | \n";
print "| operator-supplied column(s). | \n";
print "| | \n";
print "| The output will be written to the directory where the input | \n";
print "| file resides. | \n";
print "| | \n";
print "| Note: Type in the case (upper or lower) that the filename and | \n";
print "| ---- value is in. | \n";
print "+-----------------------------------------------------------------+ \n\n";

#------------------------------------------------------------------------------
print "Directory Name: ('.' = Current)\t"; # Ask for directory
my $Directory = <STDIN>; # Store what's typed in
chomp($Directory); # Get rid of line break

chdir($Directory) # change to the directory
or die "Invalid Directory \n"; # or if invalid, die

#- 2 --------------------------------------------------------------------------
print "Input File:\t\t\t"; # Ask for filename
my $InFile = <STDIN>; # Store what's typed in
chomp($InFile); # Get rid of line break

open( INPUT, $InFile) # Open the file if found.
or die "The file $InFile could not be found. \n";

#- 3a -------------------------------------------------------------------------
print "Primary Output File:\t\t"; # Ask for 1st output name
my $Out1File = <STDIN>; # Store what's typed in
chomp($Out1File); # Get rid of line break

#- 3.1a -------------------------------------------------------------------
open(OUTPUT1, ">$Out1File") # Try to open
or die "The file $Out1File could not be opened for writing \n";

#- 3b -------------------------------------------------------------------------
print "Secondary Output File: \t\t"; # Ask for 2nd output name
my $Out2File = <STDIN>; # Store what's typed in
chomp($Out2File); # Get rid of line break

#- 3.1b -------------------------------------------------------------------
open(OUTPUT2, ">$Out2File \t") # Try to open
or die "The file $Out2File could not be opened for writing \n";

#- 4 --------------------------------------------------------------------------
print "Start Position to Search: \t"; # Get beginning of column
my $StartPosition = <STDIN>; # Store what's typed in
chomp($StartPosition); # Get rid of line break
$StartPosition = $StartPosition -1; # Starts from 0 so subtract 1

#- 5 --------------------------------------------------------------------------
print "Value to search for: \t\t"; # Get the value we're splitting on
my $SearchValue = <STDIN>; # Store what's typed in
chomp($SearchValue); # Get rid of line break
my $ValueLength = length($SearchValue); # Get the length of the column

#- 6 --------------------------------------------------------------------------
my $InCount = 0; # Initialize counters
my $Out1Count = 0;
my $Out2Count = 0;

#- 7 --------------------------------------------------------------------------
while(<INPUT>) # Read the input file
{ my $TheLine = $_; # Save the line's contents
chomp($TheLine); # Get rid of the line break
$InCount = $InCount + 1; # increment input count

#----------------------------------------------------------------------------
if (substr($TheLine ,$StartPosition # Check the position for the value
,$ValueLength)eq"$SearchValue") # requested.
{ $Out1Count++; # Increment primary output counter
print OUTPUT1 "$TheLine \n"; # write the line to the file
}
else # If another value, write to output2
{ $Out2Count++; # increment output count
print OUTPUT2 "$TheLine \n"; # write the line to the file
}
} # End of while(<INFILE>) loop
#------------------------------------------------------------------------------
print "\n"; # Print the
print "************************************* \n"; # results when done
print "* $0 COMPLETED SUCCESSFULLY \n";
print "************************************* \n\n";
print "Record Counts: \n\n";
print "Input -- $InFile: \t $InCount \n";
print "Primary Output -- $Out1File: \t $Out1Count \n";
print "Secondary Output -- $Out2File: \t $Out2Count \n\n";

Example of Matching and Moving Data

#!/usr/bin/perl -w
use strict;
use diagnostics;

print "\n";
print "+----------------------------------------------------------------------------------+ \n";
print "| movedat.pl: | \n";
print "| Move data from one file to another based on matching columns. A third file is | \n";
print "| created with all data from the first file and the columns specified from the 2nd.| \n";
print "| | \n";
print "| The output will be written to the directory where the input file resides. | \n";
print "| | \n";
print "| Note: Type in the case (upper or lower) that the filename and value is in | \n";
print "+----------------------------------------------------------------------------------+ \n\n";

#- 1 ---------------------------------------------------------------------------------------------
print "Directory Name: ('.' = Current)\t"; # Ask for directory name
my $Directory = <STDIN>; # Store what's typed in
chomp($Directory); # Get rid of line break
chdir($Directory) or die "Invalid Directory \n"; # change to the directory

#- 2 --------------------------------------------------------------------------------------------
print "Primary Input File:\t\t"; # Ask for the 1st input file
my $In1File = <STDIN>; # Store what's typed in
chomp($In1File); # Get rid of line break
open( INPUT1, $In1File) or die "$In1File not found. \n"; # Try to open

#- 3 --------------------------------------------------------------------------------------------
print "Secondary Input File: \t\t"; # Ask for the 2nd input file
my $In2File = <STDIN>; # Store what's typed in
chomp($In2File); # Get rid of line break
open( INPUT2, $In2File) or die "$In2File not found. \n"; # Try to open

#- 4 ---------------------------------------------------------------------------------------------
print "Output File: \t\t\t"; # Ask for the output file
my $OutFile = <STDIN>; # Store what's typed in
chomp($OutFile); # Get rid of line break

open(OUTPUT, ">$OutFile") # Try to open
or die "$OutFile could not be opened for writing \n\n";

#- 5a ---------------------------------------------------------------------------------------------
print "\nStart Position to Match: \t"; # Get beginning match position
my $StartMatchPos = <STDIN>; # Store what's typed in
chomp($StartMatchPos); # Get rid of line break
$StartMatchPos = $StartMatchPos -1; # Starts from 0 so subtract 1

#- 5b ---------------------------------------------------------------------------------------------
print "End Position to Match: \t\t"; # Get last position to match
my $EndMatchPos = <STDIN>; # Store what's typed in
chomp($EndMatchPos); # Get rid of line break
$EndMatchPos = $EndMatchPos -1; # Starts from 0 so subtract 1

my $MatchLen = $EndMatchPos - $StartMatchPos + 1; # Calculate the length for substr

#- 6a ----------------------------------------------------------------------------------------------
print "\nStart Position to Move: \t"; # Display the 1st column to move
my $StartMovePos = <STDIN>; # Store what's typed in
chomp($StartMovePos); # Get rid of line break
$StartMovePos = $StartMovePos -1; # Starts from 0 so subtract 1

#- 6a ----------------------------------------------------------------------------------------------
print "End Position to Move: \t\t"; # Display the last column to move
my $EndMovePos = <STDIN>; # Store what's typed in
chomp($EndMovePos); # Get rid of line break
$EndMovePos = $EndMovePos -1; # Starts from 0 so subtract 1

my $MoveLen =$EndMovePos-$StartMovePos+1; # Calculate the length for substr

#- 7 -----------------------------------------------------------------------------------------------
print "\nValue to Insert if no Match: \t"; # Ask for value if no match
my $MissingVal = <STDIN>; # Store what's typed in
chomp($MissingVal); # Get rid of line break
if ($MoveLen < length($MissingVal)) # Print message if value too long
{ print "Missing Value too long. Will be truncated."; # Note if too long
$MissingVal = substr($MissingVal, 0, $MoveLen); # Get the truncated value
}
#- 8 ----------------------------------------------------------------------------------------------
my $In1Count = 0; # Initialize counters
my $OutCount = 0;

#- 9 ----------------------------------------------------------------------------------------------
READ1:
while(<INPUT1>) # Read through 1st file
{ my $Rec1 = $_; # Save the line's contents of 1
chomp($Rec1); # Get rid of the line break
$In1Count++; # increment input count
my $Sub1MatchVal = substr($Rec1,$StartMatchPos,$MatchLen); #Substring to get the match value

#- 10 --------------------------------------------------------------------------------------------
seek(INPUT2, 0, 0); # Read from top of 2nd file
while(<INPUT2>)
{ my $Rec2 = $_; # Save the line's contents
chomp($Rec2); # Get rid of the line break
my $Sub2MatchVal = substr($Rec2,$StartMatchPos,$MatchLen); # get the value(s) to match
my $Sub2MoveVal = substr($Rec2,$StartMovePos ,$MoveLen ); # get the value(s) to move
if ($Sub1MatchVal eq $Sub2MatchVal ) # if values 1 and 2 match,
{ print OUTPUT "$Rec1$Sub2MoveVal\n"; # then print to the 3rd file
$OutCount++; # increment the counter
goto READ1; # and go get the next rec from 1
}
}
print OUTPUT "$Rec1$MissingVal\n"; # if no match in 2, write this
$OutCount++; # increment the output count
} # End of while(<INFILE>) loop
#- 11 ---------------------------------------------------------------------------------------------
print "\n"; # Print the
print "************************************* \n"; # results when done
print "* $0 COMPLETED SUCCESSFULLY \n";
print "************************************* \n\n";
print "Record Counts: \n\n";
print "Input -- $In1File: \t $In1Count \n";
print "Output -- $OutFile: \t $OutCount \n";

Miscellaneous Notes


perl -vthe version of perl running
(most recent from www.activestate.com)
perl -ccheck syntax but don't run
perl -wcheck program more thoroughly than -c
perl -e "code;"run one line program
perl -iuse perl to edit files
perl -d perl's debugger

#!/usr/bin/perl -wfirst line in code on Unix

Variable Names: Case-sensitive. Any Length. A-z and Underscores. Can't start with a number.

#
comment -- starts anywhere on the line
=
set value on left to value on right
;
statement end.
not required at the end of a block.
{}
beginning and end of block. can be embedded.
.open(INFILE,$TheFile)
or die "The file $TheFile could ".
"not be found. \n";
concatenation
+$variable = $variable + 1;add values on right
and store in variable on the left
$0
the name of the program that is running
$_$TheLine = $_;Save the line's contents
\n
line break
++$LineCount++; Increment value
/t
tab
hex
\x42 = B
octal
\102 = B
chr
$CapB = chr(66);

STDINmy $SearchValue = <STDIN>; What user has typed
tr$DoSearch =~ tr/A-Z/a-z/;
translate value to lowercase
printprint "some text and $variable \n";
length$varLength = length($variable2);
openopen(filehandle,$filename)
or die("message to display if not found \n");
Open file. If not able, post message
substr$var = substr($Line,$PosFromZero,$Len);substring
chompchomp($TheLine);get rid of the line break
seekseek(INDB,0,0);Tells Perl where in the file to start
split($Var1, $Var2) = split(/\t/, $TheRec);split a string based on a
value into separate variables

While Loopwhile() # while there are records in the file
{ $TheLine = $_; # save the line's contents
chomp($TheLine); # get rid of the line break

if($TheLine = "")
{next}; # don't count if the line is a blank
$LineCount++; # increment the line count
}; # end of block

while(1) # loop forever (1=true)
{ print "Y or N"; # show this line on the screen.
my $Search = ; # save what's typed
}; # end of block
Until Loopuntil ($var1 == $var2)
{ if ($var3 eq $var4)
{ do some statements }
}
If if ($variable eq "value")
{block of statements};
if ($TheLine = "")
{next} ; # don't count if the line is a blank
if ($Cntr == 0)
{print "zero"};
Unlessunless($DoSearch eq 'i') # If not 'i',
{ print "Enter I or Q.\n";
next; # Go back to 'while' for next try
}
Forfor ($i = 33;
$i <=126;
$i++)
{statements}
Lastif($SrchVal eq 'q') {last}
Nextif($TheLine = "") {next};



' vs "
'do not interpret (except for ' and \)
"interprets

print 'Lynn\'s programs are on usr\\user\\ltobias.';
Lynn's programs are on usr\user\ltobias.

$Shout = "Help!";
print "Please come when I shout \"$Shout\". \n";
Please come when I shout "Help!".

$Value = "something";
print "\$Value is $Value. \n";
$Value is something.