Sort a file based on values from the file name. We had a task were we need to process a number of files in a certain order. This order was run, district, copy, copy total. This code is from tobyink. You rock!
<code>
#!/usr/bin/perl
use strict;
use warnings;
use Data::Dumper;
# sort by run then by dist then by copy then by total
# run distict copy of total
# | | | |
#ASR0004994_8958_ETSTexas_EOC052017P_0517_Candidate_RRD_178901_02_04_Spr17_Initial_201705040951_41043.zip
my @files = qw(
ASR0005336_8950_ETSTexas_EOC052017P_0517_Candidate_RRD_178904_01_02_Spr17_Initial_201705040952_41044.zip
ASR0004520_8960_ETSTexas_EOC052017P_0517_Candidate_RRD_178901_04_04_Spr17_Initial_201705040952_41045.zip
ASR0004994_8958_ETSTexas_EOC052017P_0517_Candidate_RRD_178901_02_04_Spr17_Initial_201705040951_41043.zip
ASR0005336_8950_ETSTexas_EOC052017P_0517_Candidate_RRD_178904_02_02_Spr17_Initial_201705040952_41044.zip
ASR0005154_8957_ETSTexas_EOC052017P_0517_Candidate_RRD_178901_01_04_Spr17_Initial_201705040951_41042.zip
ASR0005336_8959_ETSTexas_EOC052017P_0517_Candidate_RRD_178901_03_04_Spr17_Initial_201705040952_41044.zip
ASR0005336_8972_ETSTexas_EOC052017P_0517_Candidate_RRD_178902_01_01_Spr17_Initial_201705040952_41044.zip
);
# These constants make the code below more readable.
#
use constant {
IX_FILENAME => 0,
IX_RUN => 1,
IX_DISTRICT => 2,
IX_COPY => 3,
IX_TOTAL => 4,
};
# Read this bit from bottom to top:
#
my @sorted =
# Now we've sorted our arrayrefs by the fields we're interested in
# we loop through them again, pulling out just the filename and
# discarding the other parts.
map {
$_->[IX_FILENAME]
}
# Sort by the fields we're interested in. Note that if the two
# values for RUN are different, this will sort by them, and everything
# following the first 'or' is ignored. If they're the same, that
# comparison returns 0, so the stuff after 'or' isn't ignored,
# and we compare by DISTRICT, then COPY, then TOTAL.
sort {
$a->[IX_RUN] <=> $b->[IX_RUN] or
$a->[IX_DISTRICT] <=> $b->[IX_DISTRICT] or
$a->[IX_COPY] <=> $b->[IX_COPY] or
$a->[IX_TOTAL] <=> $b->[IX_TOTAL]
}
# For each filename, split it into an arrayref, so that the first
# element in the arrayref is the filename itself, and the rest are
# the fields we're interested in.
map {
[ $_, m/\A[A-Z0-9]+_(\d{4})+_ETS.*_(\d{6})_(\d{2})_(\d{2})/i ]
}
# Take our list of filenames…
@files;
# Check it works. (It does.)
#
print Dumper(\@sorted);
</code>