From 2bd096f4976f5e17e539ba6a8be3443008989c4a Mon Sep 17 00:00:00 2001 From: Ali Ghaffaari Date: Thu, 26 Jul 2018 12:42:50 +0200 Subject: [PATCH 1/5] Add docstring for ksnper command short description --- vcfy/ksnper.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/vcfy/ksnper.py b/vcfy/ksnper.py index e4c4dce..5728d24 100644 --- a/vcfy/ksnper.py +++ b/vcfy/ksnper.py @@ -113,6 +113,9 @@ def write_csv(output, vcf_file, ref_file, k, dialect='unix'): default='unix', show_default=True, help="Use this CSV dialect.") def cli(**kwargs): + """Report the number of SNPs in all k-mers. Specify the k and the VCF file, + it reports number of SNPS occurred in each k-mer. + """ write_csv(kwargs.pop('output'), kwargs.pop('vcf'), kwargs.pop('reference'), From 2b0a3b197a63f951b94c4d83dd75b587643033aa Mon Sep 17 00:00:00 2001 From: Ali Ghaffaari Date: Thu, 26 Jul 2018 12:49:50 +0200 Subject: [PATCH 2/5] Fix `write_csv` function to get VCF file by path The first positional argument of the Reader class of vcf module is file object (`fsock`) and file name should be provided by `filename` keyword argument. --- vcfy/ksnper.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/vcfy/ksnper.py b/vcfy/ksnper.py index 5728d24..6b53720 100644 --- a/vcfy/ksnper.py +++ b/vcfy/ksnper.py @@ -93,7 +93,10 @@ def write_csv(output, vcf_file, ref_file, k, dialect='unix'): quoting=csv.QUOTE_NONE) csv_writer.writeheader() - vcf_reader = vcf.Reader(vcf_file) + if isinstance(vcf_file, str): + vcf_reader = vcf.Reader(filename=vcf_file) + else: + vcf_reader = vcf.Reader(vcf_file) if ref_file is None: ref_file = open(vcf_reader.metadata['reference'], 'r') bv = compute_snpbv(vcf_reader, reflen(ref_file)) From e9f9d1c3efc015f83b680512ec1900bc346017b9 Mon Sep 17 00:00:00 2001 From: Ali Ghaffaari Date: Thu, 26 Jul 2018 12:55:23 +0200 Subject: [PATCH 3/5] Add an argument to specify compression status The compression status (whether the input VCF file is compressed or not) is automatically determined from input file name. In case that the input VCF file is streamed from standard input, this status should be provided explicitly. --- vcfy/ksnper.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/vcfy/ksnper.py b/vcfy/ksnper.py index 6b53720..75d637e 100644 --- a/vcfy/ksnper.py +++ b/vcfy/ksnper.py @@ -71,7 +71,7 @@ def ksnpcounts(snpbv, k): yield kcount -def write_csv(output, vcf_file, ref_file, k, dialect='unix'): +def write_csv(output, vcf_file, ref_file, k, dialect='unix', compressed=None): """Write CSV file. Args: @@ -86,6 +86,9 @@ def write_csv(output, vcf_file, ref_file, k, dialect='unix'): The length of the k-mer. dialect : str This string specifies the dialect of the output CSV file. + compressed : bool + Whether input VCF is compressed or not. It is determined by file + extension if it is not specified. """ csv_writer = csv.DictWriter(output, fieldnames=['k', 'count'], @@ -94,9 +97,9 @@ def write_csv(output, vcf_file, ref_file, k, dialect='unix'): csv_writer.writeheader() if isinstance(vcf_file, str): - vcf_reader = vcf.Reader(filename=vcf_file) + vcf_reader = vcf.Reader(filename=vcf_file, compressed=compressed) else: - vcf_reader = vcf.Reader(vcf_file) + vcf_reader = vcf.Reader(vcf_file, compressed=compressed) if ref_file is None: ref_file = open(vcf_reader.metadata['reference'], 'r') bv = compute_snpbv(vcf_reader, reflen(ref_file)) From a14c3f393e6bc600089b9aea4ccd3c3fbd18de3e Mon Sep 17 00:00:00 2001 From: Ali Ghaffaari Date: Thu, 26 Jul 2018 13:00:44 +0200 Subject: [PATCH 4/5] Work with compressed input VCF file --- vcfy/ksnper.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/vcfy/ksnper.py b/vcfy/ksnper.py index 75d637e..ccc3e7f 100644 --- a/vcfy/ksnper.py +++ b/vcfy/ksnper.py @@ -10,6 +10,7 @@ :license: MIT, see LICENSE for more details. """ +import sys import csv import click @@ -108,13 +109,15 @@ def write_csv(output, vcf_file, ref_file, k, dialect='unix', compressed=None): @click.command() -@click.argument('vcf', type=click.File('r'), default="-") +@click.argument('vcf', type=str, default="-") @click.option('-o', '--output', type=click.File('w'), default="-", help="Write to this file instead of standard output.") @click.option('-r', '--reference', type=click.File('r'), default=None, help=("Reference genome FASTA file. It will be inferred from VCF " "header, if not specified.")) @click.option('-k', type=int, required=True, help="The value of k.") +@click.option('-c', is_flag=True, default=None, + help="Set if the input VCF is compressed") @click.option('-d', '--dialect', type=click.Choice(csv.list_dialects()), default='unix', show_default=True, help="Use this CSV dialect.") @@ -122,8 +125,10 @@ def cli(**kwargs): """Report the number of SNPs in all k-mers. Specify the k and the VCF file, it reports number of SNPS occurred in each k-mer. """ + stdin_fsock = sys.stdin.buffer if kwargs['c'] else sys.stdin write_csv(kwargs.pop('output'), - kwargs.pop('vcf'), + kwargs.pop('vcf') if kwargs['vcf'] != '-' else stdin_fsock, kwargs.pop('reference'), kwargs.pop('k'), - kwargs.pop('dialect')) + kwargs.pop('dialect'), + kwargs.pop('c')) From 6aad489a1d7f39dec85cc5690be58217fa5faa6b Mon Sep 17 00:00:00 2001 From: Ali Ghaffaari Date: Thu, 26 Jul 2018 13:05:00 +0200 Subject: [PATCH 5/5] Bump to version number v0.0.6 --- vcfy/release.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vcfy/release.py b/vcfy/release.py index 0632bae..3edd8e5 100644 --- a/vcfy/release.py +++ b/vcfy/release.py @@ -38,7 +38,7 @@ __license__ = 'MIT' # Release -__version__ = '0.0.5' +__version__ = '0.0.6' __status__ = DS_PREALPHA # Package data