Skip to content

Commit

Permalink
Cole/update args (#85) (#456)
Browse files Browse the repository at this point in the history
  • Loading branch information
Colelyman authored Jul 18, 2024
1 parent 44f692e commit 8d92972
Show file tree
Hide file tree
Showing 11 changed files with 49 additions and 1,331 deletions.
14 changes: 0 additions & 14 deletions CRISPResso.py

This file was deleted.

13 changes: 12 additions & 1 deletion CRISPResso2/CRISPRessoShared.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import gzip
import json
import sys
import textwrap
import importlib.util
from pathlib import Path

Expand Down Expand Up @@ -134,8 +135,18 @@ def set_console_log_level(logger, level, debug=False):
break


class CustomHelpFormatter(argparse.ArgumentDefaultsHelpFormatter):
def _split_lines(self, text, width):
if text.startswith('R|'):
return list(map(
lambda x: textwrap.fill(x, width, subsequent_indent=' ' * 24),
text[2:].splitlines(),
))
return argparse.HelpFormatter._split_lines(self, text, width)


def getCRISPRessoArgParser(tool, parser_title="CRISPResso Parameters"):
parser = argparse.ArgumentParser(description=parser_title, formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser = argparse.ArgumentParser(description=parser_title, formatter_class=CustomHelpFormatter)
parser.add_argument('--version', action='version', version='%(prog)s ' + __version__)

# Getting the directory of the current script
Expand Down
25 changes: 17 additions & 8 deletions CRISPResso2/args.json
Original file line number Diff line number Diff line change
Expand Up @@ -98,15 +98,15 @@
"tools": ["Core", "Batch", "Pooled", "WGS"]
},
"expected_hdr_amplicon_seq": {
"name": "Expected HDR Amplicon Sequence:",
"name": "Expected HDR Amplicon Sequence",
"keys": ["-e", "--expected_hdr_amplicon_seq"],
"help": "Amplicon sequence expected after HDR",
"type": "str",
"default": "",
"tools": ["Core", "Batch", "Pooled", "WGS"]
},
"coding_seq": {
"name": "Exon Specification Coding Sequence/s:",
"name": "Exon Specification Coding Sequence/s",
"keys": ["-c", "--coding_seq"],
"help": "Subsequence/s of the amplicon sequence covering one or more coding sequences for frameshift analysis. If more than one (for example, split by intron/s), please separate by commas.",
"type": "str",
Expand All @@ -121,23 +121,23 @@
"tools": ["Core", "Batch", "Pooled", "WGS"]
},
"min_average_read_quality": {
"name": "Minimum Average Read Quality(phred33 Scale)",
"name": "Minimum Average Read Quality (phred33 Scale)",
"keys": ["-q", "--min_average_read_quality"],
"help": "Minimum average quality score (phred33) to keep a read",
"type": "int",
"default": 0,
"tools": ["Core", "Batch", "Pooled", "WGS"]
},
"min_single_bp_quality": {
"name": "Minimum Single bp Quality(phred33 Scale)",
"name": "Minimum Single bp Quality (phred33 Scale)",
"keys": ["-s", "--min_single_bp_quality"],
"help": "Minimum single bp score (phred33) to keep a read",
"type": "int",
"default": 0,
"tools": ["Core", "Batch", "Pooled", "WGS"]
},
"min_bp_quality_or_N": {
"name": "Minimum bp Quality or N(phred33 Scale)",
"name": "Minimum bp Quality or N (phred33 Scale)",
"keys": ["--min_bp_quality_or_N"],
"help": "Bases with a quality score (phred33) less than this value will be set to 'N'",
"type": "int",
Expand Down Expand Up @@ -340,6 +340,7 @@
"tools": ["Core", "Batch", "Pooled", "WGS"]
},
"needleman_wunsch_aln_matrix_loc": {
"name": "Needleman Wunsch Alignment Matrix Location",
"keys": ["--needleman_wunsch_aln_matrix_loc"],
"help": "Location of the matrix specifying substitution scores in the NCBI format (see ftp://ftp.ncbi.nih.gov/blast/matrices/)",
"type": "str",
Expand Down Expand Up @@ -395,6 +396,7 @@
"tools": ["Core", "Batch", "Pooled", "WGS"]
},
"allele_plot_pcts_only_for_assigned_reference": {
"name": "Allele Plot Percentages Only for Assigned Reference",
"keys": ["--allele_plot_pcts_only_for_assigned_reference"],
"help": "If set, in the allele plots, the percentages will show the percentage as a percent of reads aligned to the assigned reference. Default behavior is to show percentage as a percent of all reads.",
"action": "store_true",
Expand Down Expand Up @@ -586,12 +588,14 @@
"tools": ["Core", "Batch", "Pooled", "WGS"]
},
"crispresso1_mode": {
"name": "CRISPResso 1 Mode",
"keys": ["--crispresso1_mode"],
"help": "Parameter usage as in CRISPResso 1",
"action": "store_true",
"tools": ["Core", "Batch", "Pooled", "WGS"]
},
"dsODN": {
"name": "dsODN",
"keys": ["--dsODN"],
"type": "str",
"help": "Label reads with the dsODN sequence provided",
Expand All @@ -617,6 +621,7 @@
"tools": ["Core", "Batch", "Pooled", "WGS"]
},
"n_processes": {
"name": "Number of Processes",
"keys": ["-p", "--n_processes"],
"type": "str",
"help": "Specify the number of processes to use for analysis. Please use with caution since increasing this parameter will significantly increase the memory required to run CRISPResso. Can be set to 'max'.",
Expand All @@ -631,6 +636,7 @@
"tools": ["Core", "Batch", "Pooled"]
},
"bam_chr_loc": {
"name": "BAM Chromosome Location",
"keys": ["--bam_chr_loc"],
"type": "str",
"help": "Chromosome location in bam for reads to process. For example: 'chr1:50-100' or 'chrX'.",
Expand Down Expand Up @@ -677,6 +683,7 @@
"tools": ["Batch"]
},
"crispresso_command": {
"name": "CRISPResso Command",
"keys": ["--crispresso_command"],
"help": "CRISPResso command to call",
"type": "str",
Expand All @@ -685,7 +692,7 @@
},
"amplicons_file": {
"keys": ["-f", "--amplicons_file"],
"help": "Amplicons description file. This file is a tab-delimited text file with up to 14 columns (2 required): amplicon_name: an identifier for the amplicon (must be unique). amplicon_seq: amplicon sequence used in the experiment. guide_seq (OPTIONAL): sgRNA sequence used for this amplicon without the PAM sequence. Multiple guides can be given separated by commas and not spaces. expected_hdr_amplicon_seq (OPTIONAL): expected amplicon sequence in case of HDR. coding_seq (OPTIONAL): Subsequence(s) of the amplicon corresponding to coding sequences. If more than one separate them by commas and not spaces. prime_editing_pegRNA_spacer_seq (OPTIONAL): pegRNA spacer sgRNA sequence used in prime editing. The spacer should not include the PAM sequence. The sequence should be given in the RNA 5'->3' order, so for Cas9, the PAM would be on the right side of the given sequence. prime_editing_nicking_guide_seq (OPTIONAL): Nicking sgRNA sequence used in prime editing. The sgRNA should not include the PAM sequence. The sequence should be given in the RNA 5'->3' order, so for Cas9, the PAM would be on the right side of the sequence. prime_editing_pegRNA_extension_seq (OPTIONAL): Extension sequence used in prime editing. The sequence should be given in the RNA 5'->3' order, such that the sequence starts with the RT template including the edit, followed by the Primer-binding site (PBS). prime_editing_pegRNA_scaffold_seq (OPTIONAL): If given, reads containing any of this scaffold sequence before extension sequence (provided by --prime_editing_extension_seq) will be classified as 'Scaffold-incorporated'. The sequence should be given in the 5'->3' order such that the RT template directly follows this sequence. A common value ends with 'GGCACCGAGUCGGUGC'. prime_editing_pegRNA_scaffold_min_match_length (OPTIONAL): Minimum number of bases matching scaffold sequence for the read to be counted as 'Scaffold-incorporated'. If the scaffold sequence matches the reference sequence at the incorporation site, the minimum number of bases to match will be minimally increased (beyond this parameter) to disambiguate between prime-edited and scaffold-incorporated sequences. prime_editing_override_prime_edited_ref_seq (OPTIONAL): If given, this sequence will be used as the prime-edited reference sequence. This may be useful if the prime-edited reference sequence has large indels or the algorithm cannot otherwise infer the correct reference sequence. quantification_window_coordinates (OPTIONAL): Bp positions in the amplicon sequence specifying the quantification window. This parameter overrides values of the '--quantification_window_center', '-- cleavage_offset', '--window_around_sgrna' or '-- window_around_sgrna' values. Any indels/substitutions outside this window are excluded. Indexes are 0-based, meaning that the first nucleotide is position 0. Ranges are separated by the dash sign like 'start-stop', and multiple ranges can be separated by the underscore (_). A value of 0 disables this filter. (can be comma-separated list of values, corresponding to amplicon sequences given in --amplicon_seq e.g. 5-10,5-10_20-30 would specify the 5th-10th bp in the first reference and the 5th-10th and 20th-30th bp in the second reference) (default: None) quantification_window_size (OPTIONAL): Defines the size (in bp) of the quantification window extending from the position specified by the '--cleavage_offset' or '--quantification_window_center' parameter in relation to the provided guide RNA sequence(s) (--sgRNA). Mutations within this number of bp from the quantification window center are used in classifying reads as modified or unmodified. A value of 0 disables this window and indels in the entire amplicon are considered. Default is 1, 1bp on each side of the cleavage position for a total length of 2bp. quantification_window_center (OPTIONAL): Center of quantification window to use within respect to the 3' end of the provided sgRNA sequence. Remember that the sgRNA sequence must be entered without the PAM. For cleaving nucleases, this is the predicted cleavage position. The default is -3 and is suitable for the Cas9 system. For alternate nucleases, other cleavage offsets may be appropriate, for example, if using Cpf1 this parameter would be set to 1. For base editors, this could be set to -17.",
"help": "R|Amplicons description file. This file is a tab-delimited text file with up to 14 columns (2 required):\n - amplicon_name: an identifier for the amplicon (must be unique).\n - amplicon_seq: amplicon sequence used in the experiment.\n - guide_seq (OPTIONAL): sgRNA sequence used for this amplicon without the PAM sequence. Multiple guides can be given separated by commas and not spaces.\n - expected_hdr_amplicon_seq (OPTIONAL): expected amplicon sequence in case of HDR.\n - coding_seq (OPTIONAL): Subsequence(s) of the amplicon corresponding to coding sequences. If more than one separate them by commas and not spaces.\n - prime_editing_pegRNA_spacer_seq (OPTIONAL): pegRNA spacer sgRNA sequence used in prime editing. The spacer should not include the PAM sequence. The sequence should be given in the RNA 5'->3' order, so for Cas9, the PAM would be on the right side of the given sequence.\n - prime_editing_nicking_guide_seq (OPTIONAL): Nicking sgRNA sequence used in prime editing. The sgRNA should not include the PAM sequence. The sequence should be given in the RNA 5'->3' order, so for Cas9, the PAM would be on the right side of the sequence.\n - prime_editing_pegRNA_extension_seq (OPTIONAL): Extension sequence used in prime editing. The sequence should be given in the RNA 5'->3' order, such that the sequence starts with the RT template including the edit, followed by the Primer-binding site (PBS).\n - prime_editing_pegRNA_scaffold_seq (OPTIONAL): If given, reads containing any of this scaffold sequence before extension sequence (provided by --prime_editing_extension_seq) will be classified as 'Scaffold-incorporated'. The sequence should be given in the 5'->3' order such that the RT template directly follows this sequence. A common value ends with 'GGCACCGAGUCGGUGC'.\n - prime_editing_pegRNA_scaffold_min_match_length (OPTIONAL): Minimum number of bases matching scaffold sequence for the read to be counted as 'Scaffold-incorporated'. If the scaffold sequence matches the reference sequence at the incorporation site, the minimum number of bases to match will be minimally increased (beyond this parameter) to disambiguate between prime-edited and scaffold-incorporated sequences.\n - prime_editing_override_prime_edited_ref_seq (OPTIONAL): If given, this sequence will be used as the prime-edited reference sequence. This may be useful if the prime-edited reference sequence has large indels or the algorithm cannot otherwise infer the correct reference sequence.\n - quantification_window_coordinates (OPTIONAL): Bp positions in the amplicon sequence specifying the quantification window. This parameter overrides values of the '--quantification_window_center', '-- cleavage_offset', '--window_around_sgrna' or '-- window_around_sgrna' values. Any indels/substitutions outside this window are excluded. Indexes are 0-based, meaning that the first nucleotide is position 0. Ranges are separated by the dash sign like 'start-stop', and multiple ranges can be separated by the underscore (_). A value of 0 disables this filter. (can be comma-separated list of values, corresponding to amplicon sequences given in --amplicon_seq e.g. 5-10,5-10_20-30 would specify the 5th-10th bp in the first reference and the 5th-10th and 20th-30th bp in the second reference) (default: None)\n - quantification_window_size (OPTIONAL): Defines the size (in bp) of the quantification window extending from the position specified by the '--cleavage_offset' or '--quantification_window_center' parameter in relation to the provided guide RNA sequence(s) (--sgRNA). Mutations within this number of bp from the quantification window center are used in classifying reads as modified or unmodified. A value of 0 disables this window and indels in the entire amplicon are considered. Default is 1, 1bp on each side of the cleavage position for a total length of 2bp.\n - quantification_window_center (OPTIONAL): Center of quantification window to use within respect to the 3' end of the provided sgRNA sequence. Remember that the sgRNA sequence must be entered without the PAM. For cleaving nucleases, this is the predicted cleavage position. The default is -3 and is suitable for the Cas9 system. For alternate nucleases, other cleavage offsets may be appropriate, for example, if using Cpf1 this parameter would be set to 1. For base editors, this could be set to -17.",
"type": "str",
"default": "",
"tools": ["Pooled"]
Expand All @@ -711,6 +718,7 @@
"tools": ["Pooled"]
},
"min_reads_to_use_region_pooled": {
"name": "Minimum Reads to Use Region",
"keys": ["--min_reads_to_use_region"],
"help": "Minimum number of reads that align to a region to perform the CRISPResso analysis",
"type": "float",
Expand Down Expand Up @@ -777,7 +785,7 @@
},
"region_file": {
"keys": ["-f", "--region_file"],
"help": "Regions description file. A BED format file containing the regions to analyze, one per line. The REQUIRED columns are: chr_id(chromosome name), bpstart(start position), bpend(end position), the optional columns are:name (an unique indentifier for the region), guide_seq, expected_hdr_amplicon_seq, coding_seq, see CRISPResso help for more details on these last 3 parameters)",
"help": "R|Regions description file. A BED format file containing the regions to analyze, one per line. The REQUIRED columns are:\n - chr_id (chromosome name)\n - bpstart (start position)\n - bpend (end position)\n\nThe optional columns are:\n - name (an unique indentifier for the region)\n - guide_seq\n - expected_hdr_amplicon_seq\n - coding_seq\nSee CRISPResso --help for more details on these last 3 parameters",
"type": "str",
"required": true,
"tools": ["WGS"]
Expand All @@ -791,6 +799,7 @@
"tools": ["WGS"]
},
"min_reads_to_use_region_wgs": {
"name": "Minimum Reads to Use Region",
"keys": ["--min_reads_to_use_region"],
"help": "Minimum number of reads that align to a region to perform the CRISPResso analysis for WGS",
"type": "float",
Expand Down Expand Up @@ -834,7 +843,7 @@
},
"use_matplotlib": {
"keys": ["--use_matplotlib"],
"help": "Use matplotlib for plotting instead of plotl/d3 when CRISPRessoPro is installed",
"help": "Use matplotlib for plotting instead of plotly/d3 when CRISPRessoPro is installed",
"action": "store_true",
"tools": ["Core", "Batch", "Pooled", "WGS", "Compare"]
}
Expand Down
14 changes: 0 additions & 14 deletions CRISPRessoAggregate.py

This file was deleted.

14 changes: 0 additions & 14 deletions CRISPRessoBatch.py

This file was deleted.

14 changes: 0 additions & 14 deletions CRISPRessoCompare.py

This file was deleted.

14 changes: 0 additions & 14 deletions CRISPRessoMeta.py

This file was deleted.

14 changes: 0 additions & 14 deletions CRISPRessoPooled.py

This file was deleted.

14 changes: 0 additions & 14 deletions CRISPRessoPooledWGSCompare.py

This file was deleted.

14 changes: 0 additions & 14 deletions CRISPRessoWGS.py

This file was deleted.

Loading

0 comments on commit 8d92972

Please sign in to comment.