forked from LUMC/ribosome-profiling-analysis-framework
-
Notifications
You must be signed in to change notification settings - Fork 0
/
wig2batchfile.php
executable file
·108 lines (101 loc) · 3.78 KB
/
wig2batchfile.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
#!/usr/bin/php
<?php
/*******************************************************************************
*
* WIG2BATCHFILE converts wiggle files to the Mutalyzer batch format, such that
* the position converter van be used.
*
* Created : 2013-04-10
* Modified : 2015-04-07
* Version : 0.31
*
* Copyright : 2013-2015 Leiden University Medical Center; http://www.LUMC.nl/
* Programmer : Ing. Ivo F.A.C. Fokkema <[email protected]>
*
* Changelog : 0.2 2013-08-13
* While generating the batchfile, the wiggle file is already
* filtered for low coverage. This results in faster conversion,
* smaller files, and also less calculation time for Mutalyzer.
* 0.3 2014-07-10
* Fixed problem with skipping the chrom=NC_* header, adding its
* positions to the last used chromosome (usually chrY).
* 0.31 2015-04-07
* Script doesn't allow for passing more than one argument anymore
* since this feature is never used, and can actually lead to
* confusion.
*
*
* This work is licensed under the Creative Commons
* Attribution-NonCommercial-ShareAlike 4.0 International License. To view a
* copy of this license, visit http://creativecommons.org/licenses/by-nc-sa/4.0/
* or send a letter to:
* Creative Commons, PO Box 1866, Mountain View, CA 94042, USA.
*
*************/
$_SETT =
array(
'version' => '0.3',
'min_coverage' => 3,
);
echo 'WIG2BATCHFILE v.' . $_SETT['version'] . "\n";
$aFiles = $_SERVER['argv'];
$sScriptName = array_shift($aFiles);
if (count($aFiles) != 1) {
die('Usage: ' . $sScriptName . ' WIG_FILE' . "\n\n");
}
// Check if all files can be read.
foreach ($aFiles as $sFile) {
if (!is_readable($sFile)) {
die('Unable to open ' . $sFile . '.' . "\n");
}
}
// FIXME: None if this is necessary anymore, since the script doesn't allow for
// passing more than one argument anymore.
// Find prefix for file(s), to have an output file that matches the name.
$nPrefixLength = min(array_map('strlen', $aFiles)); // Length of the shortest file name in argument list.
$sPrefix = substr($aFiles[0], 0, $nPrefixLength); // Limit prefix already to length of shortest file name.
foreach ($aFiles as $sFile) {
for ($i = 0; $i < $nPrefixLength; $i ++) {
if ($sPrefix{$i} != $sFile{$i}) {
// No match!
$sPrefix = substr($sPrefix, 0, $i);
$nPrefixLength = strlen($sPrefix);
break; // Go to next file.
}
}
}
$sPrefix = rtrim($sPrefix, '._-');
$sFileNameOut = $sPrefix . (!$sPrefix? '' : '_') . 'mutalyzer_batchfile.txt';
$fOut = @fopen($sFileNameOut, 'w');
if (!$fOut) {
die('Unable to open file for writing.' . "\n\n");
}
$nLines = $nFiltered = 0;
foreach ($aFiles as $sFile) {
$aFile = file($sFile);
$sChrom = '';
foreach ($aFile as $sLine) {
if (preg_match('/^variableStep chrom=(.+)$/', $sLine, $aRegs)) {
// Chromosome found.
// if (!preg_match('/^chr([0-9]+|[XYM])$/', $aRegs[1])) { // FOR NOW, IGNORE chrM!!!
if (!preg_match('/^chr([0-9]+|[XY])$/', $aRegs[1])) {
echo 'Unrecognized chromosome: ' . $aRegs[1] . "\n";
$sChrom = '';
} else {
$sChrom = $aRegs[1];
}
continue;
}
if ($sChrom) {
list($nPos, $nCoverage) = explode("\t", $sLine);
if ($nCoverage >= $_SETT['min_coverage']) {
fputs($fOut, $sChrom . ':g.' . $nPos . 'del' . "\r\n");
$nLines ++;
} else {
$nFiltered ++;
}
}
}
}
die('Done, ' . $nLines . ' lines written (' . $nFiltered . ' filtered, coverage too low).' . "\n");
?>