-
Notifications
You must be signed in to change notification settings - Fork 2
/
group.pl
executable file
·58 lines (52 loc) · 1.54 KB
/
group.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
#!/bin/env perl
=hey
Author: Shijian Sky Zhang
E-mail: [email protected]
=cut
use 5.012;
use warnings;
use Getopt::Long;
use File::Basename;
use List::Util qw/sum/;
sub usage{
my $scriptName = basename $0;
print <<HELP;
Usage: perl $scriptName INPUT >OUTPUT
If INPUT isn't specified, input from STDIN
Option:
-g --group INT The column to do the group to
-v --value INT The column to do the group from
-b --by STR Group by ['join'], 'sum' or 'count'
-h --help Print this help information
HELP
exit(-1);
}
my ($groupCol, $valueCol, $by);
GetOptions(
'g|group=i' => \$groupCol,
'v|value=i' => \$valueCol,
'b|by=s' => \$by,
'h|help' => sub{usage()}
) || usage();
$ARGV[0] = '-' unless defined $ARGV[0];
open IN, "$ARGV[0]" or die "Can't read file ($ARGV[0]): $!";
my %groupHash;
while(<IN>){
chomp;
my @fields = split "\t";
my $groupID = $fields[$groupCol-1];
my $value = $fields[$valueCol-1];
push @{$groupHash{$groupID}{value}}, $value;
$groupHash{$groupID}{line} = $_;
}
for my $groupID(keys %groupHash){
my @fields = split "\t", $groupHash{$groupID}{line};
my $groupedResult;
given($by){
when($by eq 'sum'){$groupedResult = &sum(@{$groupHash{$groupID}{value}})}
when($by eq 'count'){$groupedResult = scalar @{$groupHash{$groupID}{value}}}
when($by eq 'join'){$groupedResult = join ',', @{$groupHash{$groupID}{value}}}
}
$fields[$valueCol-1] = join ',', $groupedResult;
say join "\t", @fields;
}