#!/usr/bin/perl -w
use strict;
use diagnostics;
print "\n";
print "+---------------------------------------------------------------+ \n";
print "| Count characters and/or words and/or lines in a file. | \n";
print "+---------------------------------------------------------------+ \n\n";
#- 1 --------------------------------------------------------------------------
# Ask for filename
#------------------------------------------------------------------------------
print "File Name: "; # Display the question
my $TheFile = <stdin>; # Store what's typed in
chomp($TheFile); # Get rid of line break
open(INFILE, $TheFile) or # Open the file if it exists
die "The file $TheFile could not be found. \n";
#- 2 --------------------------------------------------------------------------
# Do they want to count characters
#------------------------------------------------------------------------------
CHARQUESTION:
print "Count Characters? (y/n) "; # Display the question
my $CountChars = <stdin>; # Store what's typed in
chomp($CountChars); # Get rid of line break
$CountChars =& tr/A-Z/a-z/; # Change 'Y/N' to 'y/n'
unless ($CountChars eq 'y' or 'n') # If not 'y' or 'n', ask again
{print "You must enter Y or N. \n";
goto WORDQUESTION;
}
my $CharCount = 0; # Initialize the counter
#- 3 --------------------------------------------------------------------------
# Do they want to count words?
#------------------------------------------------------------------------------
WORDQUESTION:
print "Count Words? (y/n) "; # Display the question
my $CountWords = <stdin>; # Store what's typed in
chomp($CountWords); # Get rid of line break
$CountWords =& tr/A-Z/a-z/; # Change 'Y/N' to 'y/n'
unless ($CountWords eq 'y' or 'n') # If not 'y' or 'n', ask again
{print "You must enter Y or N. \n";
goto WORDQUESTION;
}
my $WordCount = 0; # Initialize the counter
#- 4 --------------------------------------------------------------------------
# Do they want to count lines? (records)
#------------------------------------------------------------------------------
LINEQUESTION:
print "Count Lines? (y/n) "; # Display the question
my $CountLines = <stdin>; # Store what's typed in
chomp($CountLines); # Get rid of line break
$CountLines =& tr/A-Z/a-z/; # Change 'Y/N' to 'y/n'
unless ($CountLines eq 'y' or 'n') # If not 'y' or 'n', ask again
{print "You must enter Y or N. \n";
goto LINEQUESTION;
}
my $LineCount = 0; # Initialize the counter
#- 5 --------------------------------------------------------------------------
# While there are records in the file...
#-----------------------------------------------------------------------------
while(<infile>)
{ my $TheLine = $_; # Save the line's contents
$LineCount = $LineCount + 1; # Count Lines
#----------------------------------------------------------------------------
# count characters
#----------------------------------------------------------------------------
chomp($TheLine); # Get rid of the line break
if($TheLine eq "") { next }; # If line blank, get the next
my $LineLen = length($TheLine); # Get line length
$CharCount = $CharCount + $LineLen; # increment char count
$WordCount = $WordCount + 1; # This line has at least 1 word
#---------------------------------------------------------------------------
# Now loop through each character on this line to look for words
#----------------------------------------------------------------------------
my $CharPos = 0; # Position we are in the line
until($CharPos == $LineLen) # Check for line end;
{if(substr($TheLine, $CharPos, 1) eq " ") # if not, check for space
{ $WordCount = $WordCount + 1 }
$CharPos = $CharPos + 1;
} # End of until
} # End of while(<infile>) loop
#- 6 --------------------------------------------------------------------------
# Write A Small Sub Routine To Do The Same (from wondersky.com)
#------------------------------------------------------------------------------
sub comma_me {
local $_ = shift;
1 while s/^(-?\d+)(\d{3})/$1,$2/;
return $_;
}
#------------------------------------------------------------------------------
# All finished, so print out the results
#------------------------------------------------------------------------------
print "\n";
print "************************************* \n";
print "* $0 COMPLETED SUCCESSFULLY \n";
print "************************************* \n\n";
print "Counts For $TheFile: \n\n";
if ($CountChars eq "y")
{print "Number of characters: ", comma_me($CharCount), "\n"};
if ($CountWords eq "y")
{print "Number of words : ", comma_me($WordCount), "\n"};
if ($CountLines eq "y")
{print "Number of lines : ", comma_me($LineCount), "\n"};
print "\n";
Wednesday, March 10, 2010
Counting Example
Sorting Example
#!/usr/bin/perl -w
use strict;
use diagnostics;
print "\n";
print "+-----------------------------------------------------------------+ \n";
print "| uniqsort.pl: | \n";
print "| This program will list the unique values found in the column(s) | \n";
print "| specified by the user from the file named. The output is sorted | \n";
print "| and shows a record count per value. | \n";
print "| | \n";
print "| Note: Type in the case (upper or lower) that the filename in. | \n";
print "| ---- | \n";
print "+-----------------------------------------------------------------+ \n\n";
#- 1 --------------------------------------------------------------------------
print "Directory Name: ('.' = Current) \t" ;# Ask for directory
my $Directory = <STDIN>; # Store what's typed in
chomp($Directory); # Get rid of line break
chdir($Directory) # change to the directory
or die "Invalid Directory \n"; # or if invalid, die
#- 2 --------------------------------------------------------------------------
print "Input File: \t\t\t\t"; # Ask for filename
my $InputFile = <STDIN>; # Store what's typed in
chomp($InputFile); # Get rid of line break
open( INPUT, $InputFile) # Open the file if found
or die "The file $InputFile could not be found. \n";
#- 3 --------------------------------------------------------------------------
print "Start Position: \t\t\t"; # Ask for the beginning column
my $StartPosition = <STDIN>; # Store what's typed in
chomp($StartPosition); # Get rid of line break
$StartPosition = $StartPosition -1; # Starts from 0 so subtract 1
#- 4 --------------------------------------------------------------------------
print "End Position: \t\t\t\t"; # Ask for the ending column
my $EndPosition = <STDIN>; # Store what's typed in
chomp($EndPosition); # Get rid of line break
$EndPosition = $EndPosition -1; # Starts from 0 so subtract 1
my $ValueLength = $EndPosition
- $StartPosition + 1; # Calculate the length for substr
#- 5 --------------------------------------------------------------------------
my $InCount = 0; # Initialize the counters
my @UniqVals = (""); # and variable lists
my @UniqCnts = (0);
my @SortList = ("");
#- 6 --------------------------------------------------------------------------
READ: # Read through the input file.
while(<INPUT>) # Grab the value in the columns
{ my $TheLine = $_; # Save the line's contents
chomp($TheLine); # Get rid of the line break
$InCount = $InCount + 1; # increment input count
my $SubVal = substr($TheLine
,$StartPosition
,$ValueLength);
my $ListNum = 1; # start at beginning of existing list
#- 6.1 ----------------------------------------------------------------------
# If our counter is greater than the number of values already in the list,
# add the value to the end of one list and start the counter in the other.
#----------------------------------------------------------------------------
CHECKNUM:
if ($ListNum > $#UniqVals) # if we're at the end of the list...
{ push(@UniqVals, $SubVal); # add this value to the list
push(@UniqCnts, '1'); # increment a list of counts
goto READ; # go back and get another record
}
#- 6.2 ----------------------------------------------------------------------
if ($SubVal eq $UniqVals[$ListNum]) # If this new value is found on the list,
{$UniqCnts[$ListNum] = # increment the associated counter.
$UniqCnts[$ListNum] +1 ; # bump the counter
goto READ; # go get another record
}
#- 6.3 ----------------------------------------------------------------------
$ListNum = $ListNum + 1; # If value not on list, increment list
goto CHECKNUM; # go back for next one
}; # End of while(<INFILE>) loop
#- 7 --------------------------------------------------------------------------
print "\n";
print "****************************** \n"; # All finished, so print results
print "* $0 COMPLETED SUCCESSFULLY \n";
print "****************************** \n\n";
print "# Record: $InputFile: $InCount\n";
#- 8 --------------------------------------------------------------------------
# Start at the beginning of the two arrays and rebuild it in sorted order.
# Move the Unique values and counts to the new SortList.
#------------------------------------------------------------------------------
my $ListNum = 1; # start at beginning of 2 arrays
until ($ListNum > $#UniqVals) # til we get to the end of the array
{ push(@SortList, $UniqVals[$ListNum].' - '.$UniqCnts[$ListNum] );
$ListNum = $ListNum + 1; # increment out counter for UniqVals
};
#- 9 --------------------------------------------------------------------------
print join("\n", sort(@SortList)); # Print the sorted list
print "\n";
Example of Splitting a File Based on Value
#!/usr/bin/perl -w
use strict;
use diagnostics;
#- 1 --------------------------------------------------------------------------
print "\n";
print "+-----------------------------------------------------------------+ \n";
print "| splitdat.pl: | \n";
print "| Break a file into two output files based on values in an | \n";
print "| operator-supplied column(s). | \n";
print "| | \n";
print "| The output will be written to the directory where the input | \n";
print "| file resides. | \n";
print "| | \n";
print "| Note: Type in the case (upper or lower) that the filename and | \n";
print "| ---- value is in. | \n";
print "+-----------------------------------------------------------------+ \n\n";
#------------------------------------------------------------------------------
print "Directory Name: ('.' = Current)\t"; # Ask for directory
my $Directory = <STDIN>; # Store what's typed in
chomp($Directory); # Get rid of line break
chdir($Directory) # change to the directory
or die "Invalid Directory \n"; # or if invalid, die
#- 2 --------------------------------------------------------------------------
print "Input File:\t\t\t"; # Ask for filename
my $InFile = <STDIN>; # Store what's typed in
chomp($InFile); # Get rid of line break
open( INPUT, $InFile) # Open the file if found.
or die "The file $InFile could not be found. \n";
#- 3a -------------------------------------------------------------------------
print "Primary Output File:\t\t"; # Ask for 1st output name
my $Out1File = <STDIN>; # Store what's typed in
chomp($Out1File); # Get rid of line break
#- 3.1a -------------------------------------------------------------------
open(OUTPUT1, ">$Out1File") # Try to open
or die "The file $Out1File could not be opened for writing \n";
#- 3b -------------------------------------------------------------------------
print "Secondary Output File: \t\t"; # Ask for 2nd output name
my $Out2File = <STDIN>; # Store what's typed in
chomp($Out2File); # Get rid of line break
#- 3.1b -------------------------------------------------------------------
open(OUTPUT2, ">$Out2File \t") # Try to open
or die "The file $Out2File could not be opened for writing \n";
#- 4 --------------------------------------------------------------------------
print "Start Position to Search: \t"; # Get beginning of column
my $StartPosition = <STDIN>; # Store what's typed in
chomp($StartPosition); # Get rid of line break
$StartPosition = $StartPosition -1; # Starts from 0 so subtract 1
#- 5 --------------------------------------------------------------------------
print "Value to search for: \t\t"; # Get the value we're splitting on
my $SearchValue = <STDIN>; # Store what's typed in
chomp($SearchValue); # Get rid of line break
my $ValueLength = length($SearchValue); # Get the length of the column
#- 6 --------------------------------------------------------------------------
my $InCount = 0; # Initialize counters
my $Out1Count = 0;
my $Out2Count = 0;
#- 7 --------------------------------------------------------------------------
while(<INPUT>) # Read the input file
{ my $TheLine = $_; # Save the line's contents
chomp($TheLine); # Get rid of the line break
$InCount = $InCount + 1; # increment input count
#----------------------------------------------------------------------------
if (substr($TheLine ,$StartPosition # Check the position for the value
,$ValueLength)eq"$SearchValue") # requested.
{ $Out1Count++; # Increment primary output counter
print OUTPUT1 "$TheLine \n"; # write the line to the file
}
else # If another value, write to output2
{ $Out2Count++; # increment output count
print OUTPUT2 "$TheLine \n"; # write the line to the file
}
} # End of while(<INFILE>) loop
#------------------------------------------------------------------------------
print "\n"; # Print the
print "************************************* \n"; # results when done
print "* $0 COMPLETED SUCCESSFULLY \n";
print "************************************* \n\n";
print "Record Counts: \n\n";
print "Input -- $InFile: \t $InCount \n";
print "Primary Output -- $Out1File: \t $Out1Count \n";
print "Secondary Output -- $Out2File: \t $Out2Count \n\n";
Example of Matching and Moving Data
#!/usr/bin/perl -w
use strict;
use diagnostics;
print "\n";
print "+----------------------------------------------------------------------------------+ \n";
print "| movedat.pl: | \n";
print "| Move data from one file to another based on matching columns. A third file is | \n";
print "| created with all data from the first file and the columns specified from the 2nd.| \n";
print "| | \n";
print "| The output will be written to the directory where the input file resides. | \n";
print "| | \n";
print "| Note: Type in the case (upper or lower) that the filename and value is in | \n";
print "+----------------------------------------------------------------------------------+ \n\n";
#- 1 ---------------------------------------------------------------------------------------------
print "Directory Name: ('.' = Current)\t"; # Ask for directory name
my $Directory = <STDIN>; # Store what's typed in
chomp($Directory); # Get rid of line break
chdir($Directory) or die "Invalid Directory \n"; # change to the directory
#- 2 --------------------------------------------------------------------------------------------
print "Primary Input File:\t\t"; # Ask for the 1st input file
my $In1File = <STDIN>; # Store what's typed in
chomp($In1File); # Get rid of line break
open( INPUT1, $In1File) or die "$In1File not found. \n"; # Try to open
#- 3 --------------------------------------------------------------------------------------------
print "Secondary Input File: \t\t"; # Ask for the 2nd input file
my $In2File = <STDIN>; # Store what's typed in
chomp($In2File); # Get rid of line break
open( INPUT2, $In2File) or die "$In2File not found. \n"; # Try to open
#- 4 ---------------------------------------------------------------------------------------------
print "Output File: \t\t\t"; # Ask for the output file
my $OutFile = <STDIN>; # Store what's typed in
chomp($OutFile); # Get rid of line break
open(OUTPUT, ">$OutFile") # Try to open
or die "$OutFile could not be opened for writing \n\n";
#- 5a ---------------------------------------------------------------------------------------------
print "\nStart Position to Match: \t"; # Get beginning match position
my $StartMatchPos = <STDIN>; # Store what's typed in
chomp($StartMatchPos); # Get rid of line break
$StartMatchPos = $StartMatchPos -1; # Starts from 0 so subtract 1
#- 5b ---------------------------------------------------------------------------------------------
print "End Position to Match: \t\t"; # Get last position to match
my $EndMatchPos = <STDIN>; # Store what's typed in
chomp($EndMatchPos); # Get rid of line break
$EndMatchPos = $EndMatchPos -1; # Starts from 0 so subtract 1
my $MatchLen = $EndMatchPos - $StartMatchPos + 1; # Calculate the length for substr
#- 6a ----------------------------------------------------------------------------------------------
print "\nStart Position to Move: \t"; # Display the 1st column to move
my $StartMovePos = <STDIN>; # Store what's typed in
chomp($StartMovePos); # Get rid of line break
$StartMovePos = $StartMovePos -1; # Starts from 0 so subtract 1
#- 6a ----------------------------------------------------------------------------------------------
print "End Position to Move: \t\t"; # Display the last column to move
my $EndMovePos = <STDIN>; # Store what's typed in
chomp($EndMovePos); # Get rid of line break
$EndMovePos = $EndMovePos -1; # Starts from 0 so subtract 1
my $MoveLen =$EndMovePos-$StartMovePos+1; # Calculate the length for substr
#- 7 -----------------------------------------------------------------------------------------------
print "\nValue to Insert if no Match: \t"; # Ask for value if no match
my $MissingVal = <STDIN>; # Store what's typed in
chomp($MissingVal); # Get rid of line break
if ($MoveLen < length($MissingVal)) # Print message if value too long
{ print "Missing Value too long. Will be truncated."; # Note if too long
$MissingVal = substr($MissingVal, 0, $MoveLen); # Get the truncated value
}
#- 8 ----------------------------------------------------------------------------------------------
my $In1Count = 0; # Initialize counters
my $OutCount = 0;
#- 9 ----------------------------------------------------------------------------------------------
READ1:
while(<INPUT1>) # Read through 1st file
{ my $Rec1 = $_; # Save the line's contents of 1
chomp($Rec1); # Get rid of the line break
$In1Count++; # increment input count
my $Sub1MatchVal = substr($Rec1,$StartMatchPos,$MatchLen); #Substring to get the match value
#- 10 --------------------------------------------------------------------------------------------
seek(INPUT2, 0, 0); # Read from top of 2nd file
while(<INPUT2>)
{ my $Rec2 = $_; # Save the line's contents
chomp($Rec2); # Get rid of the line break
my $Sub2MatchVal = substr($Rec2,$StartMatchPos,$MatchLen); # get the value(s) to match
my $Sub2MoveVal = substr($Rec2,$StartMovePos ,$MoveLen ); # get the value(s) to move
if ($Sub1MatchVal eq $Sub2MatchVal ) # if values 1 and 2 match,
{ print OUTPUT "$Rec1$Sub2MoveVal\n"; # then print to the 3rd file
$OutCount++; # increment the counter
goto READ1; # and go get the next rec from 1
}
}
print OUTPUT "$Rec1$MissingVal\n"; # if no match in 2, write this
$OutCount++; # increment the output count
} # End of while(<INFILE>) loop
#- 11 ---------------------------------------------------------------------------------------------
print "\n"; # Print the
print "************************************* \n"; # results when done
print "* $0 COMPLETED SUCCESSFULLY \n";
print "************************************* \n\n";
print "Record Counts: \n\n";
print "Input -- $In1File: \t $In1Count \n";
print "Output -- $OutFile: \t $OutCount \n";
Miscellaneous Notes
perl -v | the version of perl running (most recent from www.activestate.com) |
perl -c | check syntax but don't run |
perl -w | check program more thoroughly than -c |
perl -e "code;" | run one line program |
perl -i | use perl to edit files |
perl -d | perl's debugger |
#!/usr/bin/perl -w | first line in code on Unix |
Variable Names: Case-sensitive. Any Length. A-z and Underscores. Can't start with a number.
# | comment -- starts anywhere on the line | |
= | set value on left to value on right | |
; | statement end. not required at the end of a block. | |
{} | beginning and end of block. can be embedded. | |
. | open(INFILE,$TheFile) or die "The file $TheFile could ". | concatenation |
+ | $variable = $variable + 1; | add values on right and store in variable on the left |
$0 | the name of the program that is running | |
$_ | $TheLine = $_; | Save the line's contents |
\n | line break | |
++ | $LineCount++; | Increment value |
/t | tab | |
hex | \x42 = B | |
octal | \102 = B | |
chr | $CapB = chr(66); | |
STDIN | my $SearchValue = <STDIN>; | What user has typed |
tr | $DoSearch =~ tr/A-Z/a-z/; | translate value to lowercase |
print "some text and $variable \n"; | ||
length | $varLength = length($variable2); | |
open | open(filehandle,$filename) or die("message to display if not found \n"); | Open file. If not able, post message |
substr | $var = substr($Line,$PosFromZero,$Len); | substring |
chomp | chomp($TheLine); | get rid of the line break |
seek | seek(INDB,0,0); | Tells Perl where in the file to start |
split | ($Var1, $Var2) = split(/\t/, $TheRec); | split a string based on a value into separate variables |
While Loop | while( while(1) my $Search = |
Until Loop | until ($var1 == $var2) |
If | if ($variable eq "value") if ($TheLine = "") if ($Cntr == 0) |
Unless | unless($DoSearch eq 'i') { print "Enter I or Q.\n"; next; } |
For | for ($i = 33; $i <=126; $i++) |
Last | if($SrchVal eq 'q') {last} |
Next | if($TheLine = "") {next}; |
' vs "
'
"
print 'Lynn\'s programs are on usr\\user\\ltobias.';
Lynn's programs are on usr\user\ltobias.
$Shout = "Help!";
print "Please come when I shout \"$Shout\". \n";
Please come when I shout "Help!".
$Value = "something";
print "\$Value is $Value. \n";
$Value is something.
Subscribe to:
Posts (Atom)