We had a problem with a client sending us data that was not consistent. I decided to write a function that could deal with many regex's.
we put the regexs into an array. Iterate through the array and break out if any match.
if no regexs match die.
<code>
sub figureOutFileFormat {
my $fileHash = shift(@_);
my $batchName = shift(@_);
my $file = shift(@_);
my $year ;
my $month ;
my $day ;
# set the account number
#
my $acct ;
my @regex = (
qr /
^ # 20140715DDA_0610004666.PDF
(\d{4}) # year
(\d{2}) # month
(\d{2}) # day
[[:alpha:]]* # DDA or SAV or nothing
. # _ or \.
(\d+) # account number
\.pdf
/xi,
qr /
^ # 0000000401003876-S-01042015.pdf.PDF
(\d+) # account number
.* # _ or \.
[[:alpha:]]* # DDA or SAV or nothing
.* # _ or \.
(\d{2}) # month
(\d{2}) # day
(\d{4}) # year
\.pdf
/xi,
);
foreach my $reg ( @regex ) {
if ($file =~ $reg ) {
# die if any of the date or account are not found
#
unless ( $1 && $2 && $3 && $4 ) {
die "Could not find date or account number from the file name : $file \n $! \n";
}
if ( length $1 > 4 ) {
# first digit is an acct
# set the date
#
$month = $2;
$day = $3;
# set year
#
$year = $4;
# set the account number
#
$acct = $1;
}
else {
# set the date
#
$year = $1;
$month = $2;
$day = $3;
# set the account number
#
$acct = $4;
}
# remove leading zeros
#
$acct =~ s/^0*//;
$fileHash->{batches}->{$batchName}->{$file}->{month} = $month;
$fileHash->{batches}->{$batchName}->{$file}->{year} = $year;
$fileHash->{batches}->{$batchName}->{$file}->{day} = $day;
$fileHash->{batches}->{$batchName}->{$file}->{acct} = $acct;
print "Account : $acct Date : ${year}-${month}-${day}\n";
# break out of the loop
#
last;
}
}
</code>