Skip to content

Commit

Permalink
Check to see that no amplicons and their reverse complement exist in …
Browse files Browse the repository at this point in the history
…the input regions
  • Loading branch information
kclem committed Nov 27, 2024
1 parent bb82606 commit 36cdb8b
Showing 1 changed file with 7 additions and 0 deletions.
7 changes: 7 additions & 0 deletions CRISPResso2/CRISPRessoPooledCORE.py
Original file line number Diff line number Diff line change
Expand Up @@ -735,6 +735,13 @@ def main():
duplicated_entries = df_template.amplicon_seq[df_template.amplicon_seq.duplicated()]
raise Exception('The amplicon sequences must be distinct! (Duplicated entries: ' + str(duplicated_entries.values) + ')')

#check to see that no sequences and their reverse complements are present
amp_seqs = df_template.amplicon_seq.values #Beware, this is a numpy array of dtype str and if you add these arrays amp_seqs + rc_amp_seqs, it will concat the strings, not the arrays....
rc_amp_seqs = [CRISPRessoShared.reverse_complement(amp_seq) for amp_seq in amp_seqs]
for seq in amp_seqs:
if seq in rc_amp_seqs:
raise Exception('Amplicon sequences must be distinct! The amplicon sequence %s is the reverse complement of another amplicon sequence in the region file. Please provide only one of the two sequences.' % seq)

if not len(df_template.amplicon_name.unique())==df_template.shape[0]:
duplicated_entries = df_template.amplicon_name[df_template.amplicon_name.duplicated()]
raise Exception('The amplicon names must be distinct! (Duplicated names: ' + str(duplicated_entries.values) + ')')
Expand Down

0 comments on commit 36cdb8b

Please sign in to comment.