-
Notifications
You must be signed in to change notification settings - Fork 1
/
main.nf
115 lines (84 loc) · 2.68 KB
/
main.nf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
#! usr/bin/env nextflow
params.samples = "${workflow.projectDir}/test_data/wild_isolates.csv"
params.isd_inventory = "${workflow.projectDir}/isd-inventory.csv"
if (params.out == null){
params.outdir = "noaa_analysis_${params.day}"
} else {
params.outdir = params.out
}
if (params.help | params.debug){
params.in = "${workflow.projectDir}/test_data/noaa_test.csv"
} else {
params.in = params.samples
}
// takes input csv with columns "isotype", "latitude", "longitude", and "isolation_date"
// and returns a tsv file for each strain with added elevation from geonames package
workflow {
Channel.fromPath(params.in) | split_WI
split_WI.out.flatten()
.combine(Channel.of( params.isd_inventory) ) | findStations
findStations.out.toSortedList() | joinData
}
process split_WI {
label 'xs'
input:
path(infile)
output:
path("*.raw.tsv")
"""
#!/usr/bin/env Rscript --vanilla
library(dplyr)
library(tidyr)
library(readr)
options(geonamesUsername="katiesevans")
library(geonames)
# read in raw data and filter out strains with no location
# add absolute value of latitude and longitude
# right now, add elevation because it is not in my file, can remove later
wi <- read.csv("${infile}") %>%
tidyr::drop_na(latitude, longitude, isolation_date) %>%
dplyr::rowwise() %>%
dplyr::mutate(abslat = abs(latitude),
abslong = abs(longitude),
elevation = geonames::GNsrtm3(latitude, longitude)\$srtm3)
# make new csv file for each strain
for(i in unique(wi\$isotype)) {
df <- wi %>%
dplyr::filter(isotype == i)
readr::write_tsv(df, paste0(i, ".raw.tsv"))
}
"""
}
// process downloadStations {
// publishDir "${workflow.projectDir}/", mode: 'copy'
// output:
// file("isd-inventory.csv")
// """
// wget 'ftp://ftp.ncdc.noaa.gov/pub/data/noaa/isd-inventory.csv'
// """
// }
// for each strain, finds the closest weather station and download relevant data
process findStations {
label "sm"
tag { wi_location }
input:
tuple path(wi_location), path(isd_inventory)
output:
path("*.noaa.tsv")
"""
Rscript --vanilla "${workflow.projectDir}/bin/find_stations.R" "${wi_location}" "${isd_inventory}" "${params.months}" "${params.events}" "${params.important_trait}" "${workflow.projectDir}/bin/get_isd_station_data_fix.R"
"""
}
process joinData {
publishDir "${params.outdir}/", mode: 'copy'
executor "local"
container null
input:
val(strain_file)
output:
file("phenotypes.tsv")
"""
# use this to only print the header of the first line
awk 'FNR>1 || NR==1' ${strain_file.join(" ")} > phenotypes.tsv
"""
}