forked from Ming-Lian/Bioinfo_LeetCode
-
Notifications
You must be signed in to change notification settings - Fork 0
/
MatrixMaker.pl
86 lines (68 loc) · 2.06 KB
/
MatrixMaker.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
#!/usr/bin/perl -w
use Getopt::Long;
use POSIX;
## ********************
## ** 获取并解析参数 **
## ********************
my %opts=();
GetOptions(\%opts,"i:s","o:s", "suffix:s");
if (!$opts{i} or !$opts{o} ){
print "
Description:
This script is used to merge serveral quant files into a a matrix
USAGE: perl $0
-i input dir
-o output file
-suffix the suffix of input file in input dir\n\n";
exit;
}
## **************
## ** 载入数据 **
## **************
print STDERR "\tread dir and load quantitative data ... \n";
# 获取指定目录下所有文件的文件名
$indir = $opts{i};
opendir DIR,$indir or die "can't opendir $indir: $!";
@files = readdir(DIR);
closedir DIR;
# 只对指定后缀的文件进行处理
$file_count = 0;
%Hash_Feat2Sample2Quant = ();
foreach $file (@files){
if($file =~ /$opts{suffix}$/){
$file_count++;
$sampleId = `basename $file $opts{suffix}`; # 从文件名中获取样本Id
chomp $sampleId;
push @SampleList, $sampleId;
print STDERR "\t\t$file_count: $file\n";
open IN,"<$indir/$file" or die;
while(<IN>){
chomp;
@row = split /\t/;
$Hash_Feat2Sample2Quant{$row[0]}{$sampleId} = $row[1];
}
close IN;
}
}
print STDERR "\tin total $file_count files loaded; \n\n";
## **************
## ** 写出数据 **
## **************
print STDERR "\tnow ready to generate output ... \n";
## --- open OUT file for output --
open OUT, ">$opts{o}" or die;
print OUT join("\t", "GENE", @SampleList), "\n"; ## 写入文件的表头
my $feat_count = 0;
while( my ( $feat, $hashref ) = each %Hash_Feat2Sample2Quant ){
$feat_total ++;
my @current_row;
foreach $sample (@SampleList){
push(@current_row, exists $$hashref{$sample}?$$hashref{ $sample } : 0);
}
print OUT join("\t", $feat, @current_row ), "\n";
if($feat_total%10000==0){
print STDERR "\t\t$feat_total features have been written into outfile\n";
}
}
close OUT;
print STDERR "\tall jobs done; \n\n";