Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: adjust final report; add full region scanning in CSM #34

Merged
merged 23 commits into from
Mar 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .test/integration/module_CSM/config1_kmers.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,10 @@ CSM_regions_files:
"3ss": "/home/runner/work/MAPP/MAPP/.test/integration/module_CSM/3ss.bed"
"5ss": "/home/runner/work/MAPP/MAPP/.test/integration/module_CSM/5ss.bed"

# paths to the bed-formatted additional whole regions
CSM_additional_regions_files:
"full_region_test": "/home/runner/work/MAPP/MAPP/.test/integration/module_CSM/full_region.bed"

# sliding window configuration: window size and slide step
CSM_window_size: "50"
CSM_window_step: "25"
Expand Down
3 changes: 3 additions & 0 deletions .test/integration/module_CSM/config2_pwms.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@ CSM_regions_files:
"3ss": "/home/runner/work/MAPP/MAPP/.test/integration/module_CSM/3ss.bed"
"5ss": "/home/runner/work/MAPP/MAPP/.test/integration/module_CSM/5ss.bed"

# paths to the bed-formatted additional whole regions
CSM_additional_regions_files: {}

# sliding window configuration: window size and slide step
CSM_window_size: "50"
CSM_window_step: "25"
Expand Down
Empty file.
24 changes: 22 additions & 2 deletions modules/CREATE_SITECOUNT_MATRICES/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,16 @@ def CSM_get_abspath(p):
"""
return(p if p[0] == os.sep else os.path.abspath(p))

def CSM_region_helper(wildcards):
"""
Helper function to determine wheter a region is supposed to be
analyzed in full or over windows
"""
if wildcards.CSM_region_id in config["CSM_regions_files"]:
return config["CSM_regions_files"][wildcards.CSM_region_id]
else:
return config["CSM_additional_regions_files"][wildcards.CSM_region_id]

##############################################################################
### Target rule with final output of the pipeline
##############################################################################
Expand All @@ -117,6 +127,17 @@ rule CSM_all:
CSM_outdir = CSM_generate_all_matrices_wildcards()["output_dir"],
CSM_region_id = CSM_generate_all_matrices_wildcards()["region_IDs"],
CSM_window_id = CSM_generate_all_matrices_wildcards()["matrix_IDs"]
),
LIST_additional_sitecount_matrices_links = expand(
os.path.join(
"{CSM_outdir}",
"{CSM_region_id}",
"{CSM_window_id}",
"matrix.tsv"
),
CSM_outdir = config["CSM_outdir"],
CSM_region_id = config["CSM_additional_regions_files"].keys(),
CSM_window_id = "FULL"
)

##############################################################################
Expand Down Expand Up @@ -177,8 +198,7 @@ rule CSM_extract_window_coord_and_sequence:
"{CSM_outdir}",
"CSM_outdir"
),
BED_region = lambda wildcards: \
config["CSM_regions_files"][wildcards.CSM_region_id],
BED_region = lambda wildcards: CSM_region_helper(wildcards),
FASTA_genome = config["CSM_genomic_sequence"],
SCRIPT_ = os.path.join(
config["CSM_scripts_dir"],
Expand Down
3 changes: 3 additions & 0 deletions modules/CREATE_SITECOUNT_MATRICES/configs/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@ CSM_regions_files:
"3ss": ""
"5ss": ""

# paths to the bed-formatted additional whole regions
CSM_additional_regions_files: {}

# sliding window configuration: window size and slide step
CSM_window_size: "50"
CSM_window_step: "25"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,82 +69,87 @@ def parse_arguments():
def main():
"""Main body of the script."""

# test that the input BED-file indeed contains coordinates for single sites
outdir = options.outdir

with open(options.bed) as bed_file:
bed_lines = bed_file.read().splitlines()
for line in bed_lines:
line_parsed = line.split("\t")
assert int(line_parsed[2]) - int(line_parsed[1]) == 1

outdir = options.outdir

# parse the relative coordinates from the encoded window name
coords = [i[1:].split("to") for i in options.window.split(".")]
coords = [coords[0][0], coords[0][1], coords[1][0], coords[1][1]]

# Construct bash commands to extract region coordinates into bed
if int(coords[0]) and not int(coords[3]): # whole window upstream the site
assert not int(coords[2])
beg = coords[0]
end = coords[1]
command = (
"cat "
+ options.bed
+ ' | awk -F "\\t" \
\'{{ if ($6 == "+") print $1"\\t"$2-'
+ beg
+ '"\\t"$2-'
+ end
+ '"\\t"$4"\\t"$5"\\t"$6; else if ($6 == "-") print $1"\\t"$3+'
+ end
+ '"\\t"$3+'
+ beg
+ '"\\t"$4"\\t"$5"\\t"$6}}\' \
1> '
+ os.path.join(outdir, "coordinates.bed")
)
if options.window == "FULL":
command = "cp " + options.bed + " " + os.path.join(outdir, "coordinates.bed")

else:
# test that the input BED-file indeed contains coordinates for single sites
for line in bed_lines:
line_parsed = line.split("\t")
assert int(line_parsed[2]) - int(line_parsed[1]) == 1

# parse the relative coordinates from the encoded window name
coords = [i[1:].split("to") for i in options.window.split(".")]
coords = [coords[0][0], coords[0][1], coords[1][0], coords[1][1]]

# Construct bash commands to extract region coordinates into bed
if int(coords[0]) and not int(coords[3]): # whole window upstream the site
assert not int(coords[2])
beg = coords[0]
end = coords[1]
command = (
"cat "
+ options.bed
+ ' | awk -F "\\t" \
\'{{ if ($6 == "+") print $1"\\t"$2-'
+ beg
+ '"\\t"$2-'
+ end
+ '"\\t"$4"\\t"$5"\\t"$6; else if ($6 == "-") print $1"\\t"$3+'
+ end
+ '"\\t"$3+'
+ beg
+ '"\\t"$4"\\t"$5"\\t"$6}}\' \
1> '
+ os.path.join(outdir, "coordinates.bed")
)

elif int(coords[3]) and not int(coords[0]): # whole window downstream the site
assert not int(coords[1])
beg = coords[2]
end = coords[3]
command = (
"cat "
+ options.bed
+ ' | awk -F "\\t" \
\'{{ if ($6 == "+") print $1"\\t"$2+'
+ beg
+ '"\\t"$2+'
+ end
+ '"\\t"$4"\\t"$5"\\t"$6; else if ($6 == "-") print $1"\\t"$3-'
+ end
+ '"\\t"$3-'
+ beg
+ '"\\t"$4"\\t"$5"\\t"$6}}\' \
1> '
+ os.path.join(outdir, "coordinates.bed")
)
elif int(coords[3]) and not int(coords[0]): # whole window downstream the site
assert not int(coords[1])
beg = coords[2]
end = coords[3]
command = (
"cat "
+ options.bed
+ ' | awk -F "\\t" \
\'{{ if ($6 == "+") print $1"\\t"$2+'
+ beg
+ '"\\t"$2+'
+ end
+ '"\\t"$4"\\t"$5"\\t"$6; else if ($6 == "-") print $1"\\t"$3-'
+ end
+ '"\\t"$3-'
+ beg
+ '"\\t"$4"\\t"$5"\\t"$6}}\' \
1> '
+ os.path.join(outdir, "coordinates.bed")
)

else: # window goes through the site
assert not int(coords[1]) and not int(coords[2])
beg = coords[0]
end = coords[3]
command = (
"cat "
+ options.bed
+ ' | awk -F "\\t" \
\'{{ if ($6 == "+") print $1"\\t"$2-'
+ beg
+ '"\\t"$2+'
+ end
+ '"\\t"$4"\\t"$5"\\t"$6; else if ($6 == "-") print $1"\\t"$3-'
+ end
+ '"\\t"$3+'
+ beg
+ '"\\t"$4"\\t"$5"\\t"$6}}\' \
1> '
+ os.path.join(outdir, "coordinates.bed")
)
else: # window goes through the site
assert not int(coords[1]) and not int(coords[2])
beg = coords[0]
end = coords[3]
command = (
"cat "
+ options.bed
+ ' | awk -F "\\t" \
\'{{ if ($6 == "+") print $1"\\t"$2-'
+ beg
+ '"\\t"$2+'
+ end
+ '"\\t"$4"\\t"$5"\\t"$6; else if ($6 == "-") print $1"\\t"$3-'
+ end
+ '"\\t"$3+'
+ beg
+ '"\\t"$4"\\t"$5"\\t"$6}}\' \
1> '
+ os.path.join(outdir, "coordinates.bed")
)

# call bash cat->awk command to extract absolute coordinates of the window
os.system(command)
Expand Down
97 changes: 49 additions & 48 deletions modules/PREPARE_TANDEM_PAS/images/rulegraph.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
1 change: 1 addition & 0 deletions scripts/create-main-config-file.py
Original file line number Diff line number Diff line change
Expand Up @@ -450,6 +450,7 @@ def main():
"pas": "{tandem_pas_representative_sites_coordinates}"
"3ss": "{_3ss_coordinates}"
"5ss": "{_5ss_coordinates}"
CSM_additional_regions_files: {{}}
CSM_window_step: {int(template["window_size"]/2)}
CSM_matrix_type: "{template["matrix_type"]}"
CSM_kmer_min: {template["k_min"]}
Expand Down
Loading