diff --git a/genome_grist/copy_local_genomes.py b/genome_grist/copy_local_genomes.py index cc1fa72c..03c97aa5 100644 --- a/genome_grist/copy_local_genomes.py +++ b/genome_grist/copy_local_genomes.py @@ -41,8 +41,11 @@ def main(): record_name = record.name break - record_name = record_name.split(' ', 1) - ident, remainder = record_name + ident, *remainder = record_name.split(' ', 1) + if remainder: # is list, needs to be string + remainder = remainder[0] + else: + remainder = ident print(f"read identifer '{ident}' and name '{remainder}'") diff --git a/genome_grist/notebooks/report-gather.ipynb b/genome_grist/notebooks/report-gather.ipynb index 877fae8c..3b23ed06 100644 --- a/genome_grist/notebooks/report-gather.ipynb +++ b/genome_grist/notebooks/report-gather.ipynb @@ -82,7 +82,14 @@ "\n", "# connect gather_df to all_df and left_df using 'genome_id'\n", "def fix_name(x):\n", - " return \"_\".join(x.split('_')[:2]).split('.')[0]\n", + " # pick off first space-delimited name as identifier\n", + " x = x.split(' ')[0]\n", + " \n", + " # eliminate stuff after the period, too.\n", + " x = x.split('.')[0]\n", + " \n", + " return x\n", + " #return \"_\".join(x.split('_')[:2]).split('.')[0]\n", "\n", "gather_df['genome_id'] = gather_df['name'].apply(fix_name)\n", "names_df['genome_id'] = names_df['ident'].apply(fix_name)" diff --git a/genome_grist/notebooks/report-mapping.ipynb b/genome_grist/notebooks/report-mapping.ipynb index 2818deff..bd4fbec6 100644 --- a/genome_grist/notebooks/report-mapping.ipynb +++ b/genome_grist/notebooks/report-mapping.ipynb @@ -78,7 +78,15 @@ "\n", "# connect gather_df to all_df and left_df using 'genome_id'\n", "def fix_name(x):\n", - " return \"_\".join(x.split('_')[:2]).split('.')[0]\n", + " # pick off first space-delimited name as identifier\n", + " x = x.split(' ')[0]\n", + " \n", + " # eliminate stuff after the period, too.\n", + " x = x.split('.')[0]\n", + " \n", + " return x\n", + " #return \"_\".join(x.split('_')[:2]).split('.')[0]\n", + "\n", "\n", "gather_df['genome_id'] = gather_df['name'].apply(fix_name)\n", "names_df['genome_id'] = names_df['ident'].apply(fix_name)"