From f767bfad74bc3c5e688864349c78060b4727c4d8 Mon Sep 17 00:00:00 2001 From: "Arjan Draisma (wur)" <74908173+adraismawur@users.noreply.github.com> Date: Wed, 17 Jul 2024 16:09:30 +0200 Subject: [PATCH] Run bigscape v2 (#251) * add BiG-SCAPE 2 to dependencies * add example config for bigscape 2 * implement running BiG-SCAPE * fix bigscape2 dependency * copy db file properly * remove cluster arg * run ruff formatter * fix ruff check issues * ensure str for mypy static type checking * Move configuration to correct file * use os.path.join instead of string concat * fix merge mistake * remove extra bigscape 2 files * add missing library * add validator for bigscape version * add test for bigscape version * fix typo * add simple run testing * add test to check for nonextent input path * add info to docstring * add exception on invalid version number * move log to after validation * add version info to log * use specific exception * rework return codes and exceptions * add wrong version test * add invalid path test for v2 * specify exception * fix tests not correctly running * change imports to reflect style in other tests * specify exception type * add minimal test data * add real data tests * remove class * force string for mypy * Apply suggestions from code review Co-authored-by: Cunliang Geng * add exceptions to docstring * add docstring to tests * use tmp path instead of data path * add missing typing * add explanation of cluster mode * parameterize tests * remove two gbks * better documentation * skip tests with dataset * do not check output code within run * move log * add test with incorrect parameters for runtime exception * remove temporary nplinker.toml * add stderr to error log * add import needed for skipping test on CI * Apply suggestions from code review Co-authored-by: Cunliang Geng * expand docstring * Apply suggestions from code review Co-authored-by: Cunliang Geng * fix ruff complaints --------- Co-authored-by: Cunliang Geng --- bin/install-nplinker-deps | 15 + src/nplinker/arranger.py | 31 +- src/nplinker/config.py | 1 + src/nplinker/data/nplinker.toml | 10 +- src/nplinker/genomics/bigscape/runbigscape.py | 107 +- src/nplinker/nplinker_default.toml | 2 + .../JK1_GCF_00/JCM_4504.region33.gbk | 1730 +++++++++++++++++ .../JK1_GCF_01/JCM_4529.region35.gbk | 1627 ++++++++++++++++ tests/unit/genomics/test_runbigscape.py | 68 + tests/unit/test_config.py | 1 + 10 files changed, 3565 insertions(+), 27 deletions(-) create mode 100755 tests/unit/data/bigscape/minimal_dataset/JK1_GCF_00/JCM_4504.region33.gbk create mode 100755 tests/unit/data/bigscape/minimal_dataset/JK1_GCF_01/JCM_4529.region35.gbk create mode 100644 tests/unit/genomics/test_runbigscape.py diff --git a/bin/install-nplinker-deps b/bin/install-nplinker-deps index ee8d2eee..98b362aa 100755 --- a/bin/install-nplinker-deps +++ b/bin/install-nplinker-deps @@ -125,6 +125,7 @@ pip install -q -U pip setuptools echo "🔥 Start installing BigScape ..." [[ -d BiG-SCAPE ]] || git clone https://github.com/medema-group/BiG-SCAPE.git cd BiG-SCAPE + git reset --hard git config --add advice.detachedHead false # disable advice git config pull.ff only git checkout master @@ -136,6 +137,20 @@ echo "🔥 Start installing BigScape ..." chmod 775 Annotated_MIBiG_reference ln -sf $LIB_PATH/BiG-SCAPE/bigscape.py $PY_PATH/bin cd .. + # blob size limit to remove large files left in history + [[ -d BiG-SCAPE-v2 ]] || git clone -b dev --filter=blob:limit=10m https://github.com/medema-group/BiG-SCAPE.git BiG-SCAPE-v2 + cd BiG-SCAPE-v2 + git config --ad advice.detatchedHead false + git checkout 99a4c2e4923bb50e175b2e619c2cee0a14918789 # Commits on Jun 14, 2024 + pip install click + pip install sqlalchemy + pip install pyhmmer + pip install tqdm + chmod 754 bigscape.py + ln -sf $LIB_PATH/BiG-SCAPE-v2/bigscape.py $PY_PATH/bin/bigscape-v2.py + cd .. + + echo -e "✅ BigScape installed successfully\n" #--- Install FastTree (not support Windows, required by BigScape) diff --git a/src/nplinker/arranger.py b/src/nplinker/arranger.py index 732954cf..24e0e56b 100644 --- a/src/nplinker/arranger.py +++ b/src/nplinker/arranger.py @@ -304,21 +304,34 @@ def _run_bigscape(self) -> None: default BiG-SCAPE directory. """ self.bigscape_running_output_dir.mkdir(exist_ok=True, parents=True) + + version = self.config.bigscape.version + run_bigscape( self.antismash_dir, self.bigscape_running_output_dir, self.config.bigscape.parameters, + version, ) - for f in glob( - str( - self.bigscape_running_output_dir - / "network_files" - / "*" - / "mix" - / "mix_clustering_c*.tsv" + + if version == 1: + for f in glob( + str( + self.bigscape_running_output_dir + / "network_files" + / "*" + / "mix" + / "mix_clustering_c*.tsv" + ) + ): + shutil.copy(f, self.bigscape_dir) + elif version == 2: + shutil.copy( + self.bigscape_running_output_dir / "data_sqlite.db", + self.bigscape_dir, ) - ): - shutil.copy(f, self.bigscape_dir) + else: + raise ValueError(f"Invalid BiG-SCAPE version: {version}") def arrange_strain_mappings(self) -> None: """Arrange the strain mappings file. diff --git a/src/nplinker/config.py b/src/nplinker/config.py index c75c3ddf..d1ade8ba 100644 --- a/src/nplinker/config.py +++ b/src/nplinker/config.py @@ -73,6 +73,7 @@ def load_config(config_file: str | PathLike) -> Dynaconf: # BigScape Validator("bigscape.parameters", required=True, is_type_of=str), Validator("bigscape.cutoff", required=True, is_type_of=str), + Validator("bigscape.version", required=True, is_type_of=int), # Scoring ## `scoring.methods` must be a list of strings and must contain at least one of the ## supported scoring methods. diff --git a/src/nplinker/data/nplinker.toml b/src/nplinker/data/nplinker.toml index e0209418..d4238673 100644 --- a/src/nplinker/data/nplinker.toml +++ b/src/nplinker/data/nplinker.toml @@ -22,7 +22,7 @@ podp_id = "" # The default value is "INFO". level = "INFO" # The log file to append log messages. -# The value is optional. +# The value is optional. # If not set or use empty string, log messages will not be written to a file. # The file will be created if it does not exist. Log messages will be appended to the file if it exists. file = "path/to/logfile" @@ -43,6 +43,9 @@ version = "3.1" [bigscape] # The parameters to use for running BiG-SCAPE. +# Version of BiG-SCAPE to run. Make sure to change the parameters property below as well +# when changing versions. +version = 1 # Required bigscape parameters are `--mix`, `--include_singletons` and `--cutoffs`. NPLinker needs # them to run the analysis properly. # Parameters that must NOT exist: `--inputdir`, `--outputdir`, `--pfam_dir`. NPLinker will @@ -51,6 +54,11 @@ version = "3.1" # `mibig.version` to the version of mibig in bigscape. # The default value is "--mibig --clans-off --mix --include_singletons --cutoffs 0.30". parameters = "--mibig --clans-off --mix --include_singletons --cutoffs 0.30" +# for version 2, use the following parameters string: +# parameters = "--mibig_version 3.1 --include_singletons --gcf_cutoffs 0.30" +# Note that BiG-SCAPE v2 has subcommands. NPLinker requires the "cluster" subcommand and its parameters to be used. +NPLinker will automatically set the following parameters: `--pfam_path`, `--inputdir` and `--outputdir`. So, do not set them here. +# BiG-SCPAPE v2 also runs a --mix analysis by default, and does not need this to be included. # Which bigscape cutoff to use for NPLinker analysis. # There might be multiple cutoffs in bigscape output. # Note that this value must be a string. diff --git a/src/nplinker/genomics/bigscape/runbigscape.py b/src/nplinker/genomics/bigscape/runbigscape.py index 2e10ba10..eb80c249 100644 --- a/src/nplinker/genomics/bigscape/runbigscape.py +++ b/src/nplinker/genomics/bigscape/runbigscape.py @@ -4,6 +4,7 @@ import subprocess import sys from os import PathLike +from typing import Literal logger = logging.getLogger(__name__) @@ -15,33 +16,105 @@ def run_bigscape( antismash_path: str | PathLike, output_path: str | PathLike, extra_params: str, -): - bigscape_py_path = "bigscape.py" - logger.info( - f'run_bigscape: input="{antismash_path}", output="{output_path}", extra_params={extra_params}"' - ) + version: Literal[1, 2] = 1, +) -> bool: + """Runs BiG-SCAPE to cluster BGCs. + + The behavior of this function is slightly different depending on the version of + BiG-SCAPE that is set to run using the configuration file. + Mostly this means a different set of parameters is used between the two versions. + + The AntiSMASH output directory should be a directory that contains GBK files. + The directory can contain subdirectories, in which case BiG-SCAPE will search + recursively for GBK files. E.g.: + + ``` + example_folder + ├── organism_1 + │  ├── organism_1.region001.gbk + │  ├── organism_1.region002.gbk + │  ├── organism_1.region003.gbk + │  ├── organism_1.final.gbk <- skipped! + │  └── ... + ├── organism_2 + │  ├── ... + └── ... + ``` + + By default, only GBK Files with "cluster" or "region" in the filename are + accepted. GBK Files with "final" in the filename are excluded. + + Args: + antismash_path: Path to the antismash output directory. + output_path: Path to the output directory where BiG-SCAPE will write its results. + extra_params: Additional parameters to pass to BiG-SCAPE. + version: The version of BiG-SCAPE to run. Must be 1 or 2. + + Returns: + True if BiG-SCAPE ran successfully, False otherwise. + + Raises: + ValueError: If an unexpected BiG-SCAPE version number is specified. + FileNotFoundError: If the antismash_path does not exist or if the BiG-SCAPE python + script could not be found. + RuntimeError: If BiG-SCAPE fails to run. + + Examples: + >>> from nplinker.genomics.bigscape import run_bigscape + >>> run_bigscape(antismash_path="./antismash", output_path="./output", + ... extra_params="--help", version=1) + """ + # switch to correct version of BiG-SCAPE + if version == 1: + bigscape_py_path = "bigscape.py" + elif version == 2: + bigscape_py_path = "bigscape-v2.py" + else: + raise ValueError("Invalid BiG-SCAPE version number. Expected: 1 or 2.") try: subprocess.run([bigscape_py_path, "-h"], capture_output=True, check=True) except Exception as e: - raise Exception(f"Failed to find/run bigscape.py (path={bigscape_py_path}, err={e})") from e + raise FileNotFoundError( + f"Failed to find/run BiG-SCAPE executable program (path={bigscape_py_path}, err={e})" + ) from e if not os.path.exists(antismash_path): - raise Exception(f'antismash_path "{antismash_path}" does not exist!') + raise FileNotFoundError(f'antismash_path "{antismash_path}" does not exist!') + + logger.info(f"Running BiG-SCAPE version {version}") + logger.info( + f'run_bigscape: input="{antismash_path}", output="{output_path}", extra_params={extra_params}"' + ) - # configure the IO-related parameters, including pfam_dir - args = [bigscape_py_path, "-i", antismash_path, "-o", output_path, "--pfam_dir", PFAM_PATH] + # assemble arguments. first argument is the python file + args = [bigscape_py_path] + + # version 2 points to specific Pfam file, version 1 points to directory + # version 2 also requires the cluster subcommand + if version == 1: + args.extend(["--pfam_dir", PFAM_PATH]) + elif version == 2: + args.extend(["cluster", "--pfam_path", os.path.join(PFAM_PATH, "Pfam-A.hmm")]) + + # add input and output paths. these are unchanged + args.extend(["-i", str(antismash_path), "-o", str(output_path)]) # append the user supplied params, if any if len(extra_params) > 0: args.extend(extra_params.split(" ")) logger.info(f"BiG-SCAPE command: {args}") - result = subprocess.run(args, stdout=sys.stdout, stderr=sys.stderr, check=True) - logger.info(f"BiG-SCAPE completed with return code {result.returncode}") - # use subprocess.CompletedProcess.check_returncode() to test if the BiG-SCAPE - # process exited successfully. This throws an exception for non-zero returncodes - # which will indicate to the PODPDownloader module that something went wrong. - result.check_returncode() - - return True + result = subprocess.run(args, stdout=sys.stdout, stderr=sys.stderr) + + # return true on any non-error return code + if result.returncode == 0: + logger.info(f"BiG-SCAPE completed with return code {result.returncode}") + return True + + # otherwise log details and raise a runtime error + logger.error(f"BiG-SCAPE failed with return code {result.returncode}") + logger.error(f"output: {str(result.stdout)}") + logger.error(f"stderr: {str(result.stderr)}") + + raise RuntimeError(f"Failed to run BiG-SCAPE with error code {result.returncode}") diff --git a/src/nplinker/nplinker_default.toml b/src/nplinker/nplinker_default.toml index 4d88540a..037895bc 100644 --- a/src/nplinker/nplinker_default.toml +++ b/src/nplinker/nplinker_default.toml @@ -9,7 +9,9 @@ to_use = true version = "3.1" [bigscape] +version = 1 parameters = "--mibig --clans-off --mix --include_singletons --cutoffs 0.30" + cutoff = "0.30" [scoring] diff --git a/tests/unit/data/bigscape/minimal_dataset/JK1_GCF_00/JCM_4504.region33.gbk b/tests/unit/data/bigscape/minimal_dataset/JK1_GCF_00/JCM_4504.region33.gbk new file mode 100755 index 00000000..2d779dfa --- /dev/null +++ b/tests/unit/data/bigscape/minimal_dataset/JK1_GCF_00/JCM_4504.region33.gbk @@ -0,0 +1,1730 @@ +LOCUS NZ_BMUJ01000032 33697 bp DNA linear CON 19-APR-2022 +DEFINITION Streptomyces plicatus strain JCM 4504 sequence032, whole genome + shotgun sequence. +ACCESSION NZ_BMUJ01000032 +VERSION NZ_BMUJ01000032 +KEYWORDS . +SOURCE Streptomyces plicatus + ORGANISM Streptomyces plicatus + Bacteria; Actinobacteria; Streptomycetales; Streptomycetaceae; + Streptomyces; Streptomyces rochei group. +COMMENT REFSEQ INFORMATION: The reference sequence is identical to + BMUJ01000032.1. + The annotation was added by the NCBI Prokaryotic Genome Annotation + Pipeline (PGAP). Information about PGAP can be found here: + https://www.ncbi.nlm.nih.gov/genome/annotation_prok/ + ##antiSMASH-Data-START## + Version :: 6.1.1 + Run date :: 2023-02-07 14:24:16 + Original ID :: NZ_BMUJ01000032.1 + NOTE: This is a single cluster extracted from a larger record! + Orig. start :: 0 + Orig. end :: 33697 + ##antiSMASH-Data-END## +FEATURES Location/Qualifiers + gene <1..508 + /locus_tag="IE211_RS35055" + /old_locus_tag="GCM10010301_70910" + /pseudo="" + source 1..33697 + /culture_collection="JCM:4504" + /db_xref="taxon:1922" + /mol_type="genomic DNA" + /organism="Streptomyces plicatus" + /strain="JCM 4504" + /submitter_seqid="sequence032" + /type_material="type strain of Streptomyces plicatus" + protocluster 1..33697 + /aStool="rule-based-clusters" + /category="PKS" + /contig_edge="True" + /core_location="[5971:21826]" + /cutoff="20000" + /detection_rule="(t2ks and t2clf)" + /neighbourhood="35000" + /product="T2PKS" + /protocluster_number="1" + /tool="antismash" + proto_core 5972..21826 + /aStool="rule-based-clusters" + /tool="antismash" + /cutoff="20000" + /detection_rule="(t2ks and t2clf)" + /neighbourhood="35000" + /product="T2PKS" + /protocluster_number="1" + cand_cluster 1..33697 + /candidate_cluster_number="1" + /contig_edge="True" + /detection_rules="(t2ks and t2clf)" + /kind="single" + /product="T2PKS" + /protoclusters="1" + /tool="antismash" + region 1..33697 + /candidate_cluster_numbers="1" + /contig_edge="True" + /product="T2PKS" + /region_number="1" + /rules="(t2ks and t2clf)" + /tool="antismash" + CDS <1..508 + /codon_start=2 + /inference="COORDINATES: similar to AA + sequence:RefSeq:WP_019058637.1" + /locus_tag="IE211_RS35055" + /note="incomplete; too short partial abutting assembly gap; + missing N-terminus; Derived by automated computational + analysis using gene prediction method: Protein Homology." + /old_locus_tag="GCM10010301_70910" + /product="methylmalonyl-CoA carboxyltransferase" + /pseudo="" + /transl_table=11 + /translation="MCDAFNIPLVTFLDVPGFLPGVDQEHGGIIRHGAKLLYAYCNATV + PRISLILRKAYGGAYIVMDSQSIGADLTYAWPTNEIAVMGAEGAANVIFRRQIAEAEDP + DAVRARMVKEYRAELMHPYYAAERGLVDDVIDPAETREVLIESLAMLRTKDADVPSRKH + GNPPQ" + gene 520..1152 + /locus_tag="IE211_RS35060" + /old_locus_tag="GCM10010301_70920" + CDS 520..1152 + /GO_function="GO:0003677 - DNA binding [Evidence IEA]" + /codon_start=1 + /inference="COORDINATES: similar to AA + sequence:RefSeq:WP_006344625.1" + /locus_tag="IE211_RS35060" + /note="Derived by automated computational analysis using + gene prediction method: Protein Homology." + /old_locus_tag="GCM10010301_70920" + /product="TetR family transcriptional regulator" + /protein_id="WP_019330965.1" + /transl_table=11 + /translation="MARQERAVRTREALIRSAAEIFHDEGFHAAALTTISSRAGVSNGA + LHFHFASKAALADAVEEAAADVLRAVVGRWDGGPPGVLQCLVNATHELACALQNDVVLR + AGFELSREAGRQPRTDLRLCWQNWVTDMVGRAGRGGELRESVAPESAVAAVVAATSGFE + VLGMRNQAWLSRSTVAQFWLLLLPALAPAPHAGLWQAEGSWTGTATG" + gene 1554..2369 + /locus_tag="IE211_RS35065" + /old_locus_tag="GCM10010301_70930" + CDS 1554..2369 + /codon_start=1 + /inference="COORDINATES: similar to AA + sequence:RefSeq:WP_019330964.1" + /locus_tag="IE211_RS35065" + /note="Derived by automated computational analysis using + gene prediction method: Protein Homology." + /old_locus_tag="GCM10010301_70930" + /product="AfsR/SARP family transcriptional regulator" + /protein_id="WP_031022565.1" + /transl_table=11 + /translation="MDIEVLGALSVREHGVSVVPTAPKPRQVLALLALNADQVVPVAAL + VDELWGENPPRSARTTLQTYVLQLRELMAQALAHGPDERCTAKDILATVPGGYRLQTRG + GHVDYREFDQRAGLGYRAMDAEDYAGAARRLADALALWNGQALTDIQAGLRIDTEVKRL + EEARLCALDQRIEADLRLGRHRELLSELTVLVNQYRMHESLHGQFMLALHRSGRRGEAL + NVYQRLRSTLVHELGLEPSAALSRLQRSILMARPETPAAAGGSGRLVTR" + gene complement(2385..3299) + /locus_tag="IE211_RS35070" + /old_locus_tag="GCM10010301_70940" + CDS complement(2385..3299) + /codon_start=1 + /inference="COORDINATES: similar to AA + sequence:RefSeq:WP_019330963.1" + /locus_tag="IE211_RS35070" + /note="Derived by automated computational analysis using + gene prediction method: Protein Homology." + /old_locus_tag="GCM10010301_70940" + /product="AfsR/SARP family transcriptional regulator" + /protein_id="WP_246559116.1" + /transl_table=11 + /translation="MQIHDERFGVRIVPAGAKQRALLGALLVRAGQAVPAECLVEELWG + GHPPVNAANALQAHVARLRRLLPAPGPGGPRHVWLRTSPLGYTLSLGPAATDAQRFHRL + VGQGRELAATAPGRAVEVLREALALWRGPALQGSGQGPICSAEAALLEESRLRALEVLY + DVCLRADRCAEITGELEELVAVHPLRERFHEQLMTALHRCGRRAEALSTYDRARRRLAR + DLGIGPGQVLSRRREAILRSCEPAADSADRAGAGTAGQGGDELGVLREELVRLRGHVEL + LRRQQRELSEQVARLTAHHPCGP" + gene 4117..4938 + /locus_tag="IE211_RS35075" + /old_locus_tag="GCM10010301_70960" + CDS 4117..4938 + /codon_start=1 + /inference="COORDINATES: similar to AA + sequence:RefSeq:WP_007268976.1" + /locus_tag="IE211_RS35075" + /note="Derived by automated computational analysis using + gene prediction method: Protein Homology." + /old_locus_tag="GCM10010301_70960" + /product="AfsR/SARP family transcriptional regulator" + /protein_id="WP_031022570.1" + /transl_table=11 + /translation="MEIKVLGALNAEFEGISVVPSAGKPRQILALLALYPGRVVPVPTL + MEEIWGTDLPQSSMTTLQTYILQLRRLLGTAMGPDVPGSAKDVLATRYGGYLLQIPAEA + VDAFTYERLVTEGRQAYEDGEDERAATVLRRALDLWDGPALVDVRVGPVLEIEAMRLEQ + SRLVARERRIDADLRLGRHVELIAELTDLIARHPQHEGLHSQAMVALYRSGRQAAALDV + YRRLRQRLIDELGVEPSPQLQRLHQAMLAVDPRLDIVAGPRRTSTFDLYAA" + gene 5196..5975 + /locus_tag="IE211_RS35080" + /old_locus_tag="GCM10010301_70970" + CDS 5196..5975 + /NRPS_PKS="Domain: Thioesterase (15-242). E-value: 4.5e-38. + Score: 123.7. Matches aSDomain: + nrpspksdomains_IE211_RS35080_Thioesterase.1" + /NRPS_PKS="type: other" + /codon_start=1 + /inference="COORDINATES: protein motif:HMM:NF013166.2" + /locus_tag="IE211_RS35080" + /note="Derived by automated computational analysis using + gene prediction method: Protein Homology." + /old_locus_tag="GCM10010301_70970" + /product="alpha/beta fold hydrolase" + /protein_id="WP_046249271.1" + /transl_table=11 + /translation="MASRSRDREAGTARITLTCLAHAGAGVASYRGWAAAVGPGIDVAA + LPLPGRDSRRREPRLTERAGLLADFLPTLLQTARRGPYALYGHSMGALVGYTLTRALAD + SGLPPLFLAVGACPPPHTTTVLADAADLPDEDLLPLLDEIGSLPPGASASPGGLWRRTF + LPVLRDDLRLARSLRNAALDPVTGGPLDVPVLVFAGRDDPLAAPAALRHWQQWTTNLIE + LHTVAGGHFFASSSSLAQHVGRACRGHVTALPTGGGR" + aSDomain 5241..5921 + /aSDomain="Thioesterase" + /aSTool="nrps_pks_domains" + /database="nrpspksdomains.hmm" + /detection="hmmscan" + /domain_id="nrpspksdomains_IE211_RS35080_Thioesterase.1" + /evalue="4.50E-38" + /label="IE211_RS35080_Thioesterase.1" + /locus_tag="IE211_RS35080" + /protein_end="242" + /protein_start="15" + /score="123.7" + /tool="antismash" + /translation="TLTCLAHAGAGVASYRGWAAAVGPGIDVAALPLPGRDSRRREPRL + TERAGLLADFLPTLLQTARRGPYALYGHSMGALVGYTLTRALADSGLPPLFLAVGACPP + PHTTTVLADAADLPDEDLLPLLDEIGSLPPGASASPGGLWRRTFLPVLRDDLRLARSLR + NAALDPVTGGPLDVPVLVFAGRDDPLAAPAALRHWQQWTTNLIELHTVAGGHFFASSSS + LAQHV" + CDS_motif 5433..5501 + /aSTool="nrps_pks_domains" + /database="abmotifs" + /detection="hmmscan" + /domain_id="nrpspksmotif_IE211_RS35080_0001" + /evalue="3.80E-07" + /label="NRPS-te1" + /locus_tag="IE211_RS35080" + /protein_end="102" + /protein_start="79" + /score="22.6" + /tool="antismash" + /translation="RGPYALYGHSMGALVGYTLTRAL" + gene 5972..7243 + /locus_tag="IE211_RS35085" + /old_locus_tag="GCM10010301_70980" + CDS 5972..7243 + /NRPS_PKS="Domain: PKS_KS(Iterative-KS) (3-396). E-value: + 1.7e-49. Score: 160.7. Matches aSDomain: + nrpspksdomains_IE211_RS35085_PKS_KS.1" + /NRPS_PKS="type: PKS/NRPS-like protein" + /codon_start=1 + /gene_functions="biosynthetic (rule-based-clusters) T2PKS: + t2ks" + /gene_kind="biosynthetic" + /inference="COORDINATES: similar to AA + sequence:RefSeq:WP_005321633.1" + /locus_tag="IE211_RS35085" + /note="Derived by automated computational analysis using + gene prediction method: Protein Homology." + /old_locus_tag="GCM10010301_70980" + /product="beta-ketoacyl-[acyl-carrier-protein] synthase + family protein" + /protein_id="WP_192342468.1" + /sec_met_domain="t2ks (E-value: 2.2e-202, bitscore: 663.6, + seeds: 25, tool: rule-based-clusters)" + /transl_table=11 + /translation="MNRVVITGIGVVAPGAVGTADFWDLLTVGRTATRRVTLFDACGYR + SRVAAEVDFTPAAHGFDLADTERLDRAAQFALVAAREAVADSGVADRIGRNPLRTGVSL + GSAIGCTTSLATQYAILSDCGTTWTLDHTEAAESLYDYFVPSSLAATVARDRGAQGPVA + LVSSGCTSGLDAVGHGADLIREGSADIVVAGGTEAPIVPIAMACFDRLRLTSSRNDDPA + TASRPFDRTRDGFVLGEGAAVLVLEELEHARRRGARPYAELSAVTAHSSAHHMTGLRPG + ALEMADAIRAALDQARLNPADVDYISAHGAGTRHNDRHETHALKESLGGSAHRVPVSSI + KSMIGHALGAAGALDLAASALAIRHDTVPPTANLHEPDPTCDLDYTPLFAREQRTSTVL + TVASGFGGFHTAAVLTRPRLKEAA" + aSDomain 5981..7159 + /aSDomain="PKS_KS" + /aSTool="nrps_pks_domains" + /database="nrpspksdomains.hmm" + /detection="hmmscan" + /domain_id="nrpspksdomains_IE211_RS35085_PKS_KS.1" + /domain_subtype="Iterative-KS" + /evalue="1.70E-49" + /label="IE211_RS35085_PKS_KS.1" + /locus_tag="IE211_RS35085" + /protein_end="396" + /protein_start="3" + /score="160.7" + /tool="antismash" + /translation="VVITGIGVVAPGAVGTADFWDLLTVGRTATRRVTLFDACGYRSRV + AAEVDFTPAAHGFDLADTERLDRAAQFALVAAREAVADSGVADRIGRNPLRTGVSLGSA + IGCTTSLATQYAILSDCGTTWTLDHTEAAESLYDYFVPSSLAATVARDRGAQGPVALVS + SGCTSGLDAVGHGADLIREGSADIVVAGGTEAPIVPIAMACFDRLRLTSSRNDDPATAS + RPFDRTRDGFVLGEGAAVLVLEELEHARRRGARPYAELSAVTAHSSAHHMTGLRPGALE + MADAIRAALDQARLNPADVDYISAHGAGTRHNDRHETHALKESLGGSAHRVPVSSIKSM + IGHALGAAGALDLAASALAIRHDTVPPTANLHEPDPTCDLDYTPLFAREQRTS" + gene 7240..8505 + /locus_tag="IE211_RS35090" + /old_locus_tag="GCM10010301_70990" + CDS 7240..8505 + /NRPS_PKS="Domain: PKS_KS(Iterative-KS) (85-417). E-value: + 2.9e-25. Score: 80.8. Matches aSDomain: + nrpspksdomains_IE211_RS35090_PKS_KS.1" + /NRPS_PKS="type: PKS/NRPS-like protein" + /codon_start=1 + /gene_functions="biosynthetic (rule-based-clusters) T2PKS: + t2clf" + /gene_kind="biosynthetic" + /inference="COORDINATES: similar to AA + sequence:RefSeq:WP_007269014.1" + /locus_tag="IE211_RS35090" + /note="Derived by automated computational analysis using + gene prediction method: Protein Homology." + /old_locus_tag="GCM10010301_70990" + /product="ketosynthase chain-length factor" + /protein_id="WP_193450422.1" + /sec_met_domain="t2clf (E-value: 2.7e-172, bitscore: 564.0, + seeds: 26, tool: rule-based-clusters)" + /transl_table=11 + /translation="MTKATQAPEPARPAGAEGPRQTLVTGIGVAAPNGLGTRAWWDAVL + CGRTGLGPITRFDASGYPVRIAGEIPGFVDEDHIPSRLLPSTDRGTRIALVAAEEALRD + ANVSPADLPAYGAGVITASSAGGAEFGERGLAALWSKGAQHVSAYQSFASFHAAAPAQI + SIRHRLRGHGSTVVSEQAGGIDALARARRRIRDGACLMVTGGIDSTLCAWGWAAHLADG + RLSPATEPARAYRPFAATADGHAVGEGGALLVLEDARAAARRGATGYGVIAGCAATFDG + PDRPTLRQAAELALADAGLAPEHVDVVFADGAAERRADLVESQALCALFGPYGVPVTVP + KTMTGRLGAGGSALDVAAALLALREKVVPPTTGTGRVADDCPLDLVTGAPRELPRLRVA + LVLARGRGGFNSAAVLQAPQTE" + aSDomain 7495..8490 + /aSDomain="PKS_KS" + /aSTool="nrps_pks_domains" + /database="nrpspksdomains.hmm" + /detection="hmmscan" + /domain_id="nrpspksdomains_IE211_RS35090_PKS_KS.1" + /domain_subtype="Iterative-KS" + /evalue="2.90E-25" + /label="IE211_RS35090_PKS_KS.1" + /locus_tag="IE211_RS35090" + /protein_end="417" + /protein_start="85" + /score="80.8" + /tool="antismash" + /translation="STDRGTRIALVAAEEALRDANVSPADLPAYGAGVITASSAGGAEF + GERGLAALWSKGAQHVSAYQSFASFHAAAPAQISIRHRLRGHGSTVVSEQAGGIDALAR + ARRRIRDGACLMVTGGIDSTLCAWGWAAHLADGRLSPATEPARAYRPFAATADGHAVGE + GGALLVLEDARAAARRGATGYGVIAGCAATFDGPDRPTLRQAAELALADAGLAPEHVDV + VFADGAAERRADLVESQALCALFGPYGVPVTVPKTMTGRLGAGGSALDVAAALLALREK + VVPPTTGTGRVADDCPLDLVTGAPRELPRLRVALVLARGRGGFNSAAVLQA" + gene complement(8863..9099) + /locus_tag="IE211_RS35095" + /old_locus_tag="GCM10010301_71000" + CDS complement(8863..9099) + /codon_start=1 + /inference="COORDINATES: protein motif:HMM:NF039972.1" + /locus_tag="IE211_RS35095" + /note="Derived by automated computational analysis using + gene prediction method: Protein Homology." + /old_locus_tag="GCM10010301_71000" + /product="hypothetical protein" + /protein_id="WP_193450423.1" + /transl_table=11 + /translation="MRRFRPRRCIRGLWEGLVAYGRLCLAGETDRYDPPPHPRIRWHRP + PPGHPERVRDDMPLTDLERRLARELTDEDHDVR" + gene 9413..10297 + /locus_tag="IE211_RS35100" + /old_locus_tag="GCM10010301_71010" + CDS 9413..10297 + /NRPS_PKS="Domain: Polyketide_cyc (3-131). E-value: + 2.6e-13. Score: 42.2. Matches aSDomain: + nrpspksdomains_IE211_RS35100_Polyketide_cyc.1" + /NRPS_PKS="Domain: Polyketide_cyc2 (146-253). E-value: + 1e-07. Score: 24.3. Matches aSDomain: + nrpspksdomains_IE211_RS35100_Polyketide_cyc2.1" + /NRPS_PKS="type: other" + /codon_start=1 + /inference="COORDINATES: similar to AA + sequence:RefSeq:WP_019330954.1" + /locus_tag="IE211_RS35100" + /note="Derived by automated computational analysis using + gene prediction method: Protein Homology." + /old_locus_tag="GCM10010301_71010" + /product="SRPBCC family protein" + /protein_id="WP_193450424.1" + /transl_table=11 + /translation="MTAPAPSGVLYGLIADATVWPLFFPPSVHVEQLDFDGTRERLRMW + AVAGDRISSWVSHRRLDVGQRQVEFRQERPSAPVETMTGLWTVEPLGDGSRVTLEHAFT + VIGDAPADAAWTERVTRANSRAQLQRLAWLAERWTRLDDLVMSFEDTVRVNVPAELVFD + FLYRAGDWPDDLAGTRPLTVQEDTPGIQVLALDGRSATGGEAVRISFPAAGRLVHKHTR + TSGPLAAYTGEWTIEPQPGAGLDVTVRHDVLLNDDAALDQDAARRACDEVGRAGRRLLE + HAVRHASDAVRVL" + aSDomain 9422..9805 + /aSDomain="Polyketide_cyc" + /aSTool="nrps_pks_domains" + /database="nrpspksdomains.hmm" + /detection="hmmscan" + /domain_id="nrpspksdomains_IE211_RS35100_Polyketide_cyc.1" + /domain_subtype="Polyketide_cyc" + /evalue="2.60E-13" + /label="IE211_RS35100_Polyketide_cyc.1" + /locus_tag="IE211_RS35100" + /protein_end="131" + /protein_start="3" + /score="42.2" + /tool="antismash" + /translation="PAPSGVLYGLIADATVWPLFFPPSVHVEQLDFDGTRERLRMWAVA + GDRISSWVSHRRLDVGQRQVEFRQERPSAPVETMTGLWTVEPLGDGSRVTLEHAFTVIG + DAPADAAWTERVTRANSRAQLQRL" + aSModule 9422..10171 + /domains="nrpspksdomains_IE211_RS35100_Polyketide_cyc.1" + /domains="nrpspksdomains_IE211_RS35100_Polyketide_cyc2.1" + /incomplete + /locus_tags="IE211_RS35100" + /tool="antismash" + /type="unknown" + aSDomain 9851..10171 + /aSDomain="Polyketide_cyc" + /aSTool="nrps_pks_domains" + /database="nrpspksdomains.hmm" + /detection="hmmscan" + /domain_id="nrpspksdomains_IE211_RS35100_Polyketide_cyc2.1" + /domain_subtype="Polyketide_cyc2" + /evalue="1.00E-07" + /label="IE211_RS35100_Polyketide_cyc2.1" + /locus_tag="IE211_RS35100" + /protein_end="253" + /protein_start="146" + /score="24.3" + /tool="antismash" + /translation="SFEDTVRVNVPAELVFDFLYRAGDWPDDLAGTRPLTVQEDTPGIQ + VLALDGRSATGGEAVRISFPAAGRLVHKHTRTSGPLAAYTGEWTIEPQPGAGLDVTVRH + DVL" + gene 10294..12162 + /locus_tag="IE211_RS35105" + /old_locus_tag="GCM10010301_71020" + CDS 10294..12162 + /codon_start=1 + /inference="COORDINATES: similar to AA + sequence:RefSeq:WP_019330953.1" + /locus_tag="IE211_RS35105" + /note="Derived by automated computational analysis using + gene prediction method: Protein Homology." + /old_locus_tag="GCM10010301_71020" + /product="acyl-CoA dehydrogenase" + /protein_id="WP_193450425.1" + /transl_table=11 + /translation="MTAALDTAAGPTGQTPAPRSTTPTPEHTTPTPEPEHTTRAPAPES + QHTTPERECAARVPEPERAARLEAALGDPFDPANPHGHLALVRADDTREAPHATEALLT + EHGLSAEFVPHDLGGRLRDLEELARVLRPLFRRDLALGYGFGITSLFAASSVWTAGDPH + QRAALADVLLGGGRVAIVHREVAHANAILRREVRAQRPAGGGFLLNGSKDAVMNADRTD + TFVVYARTSAGSGSASHSVLLLPGPPASGEVRRLARVEMPGMRGARFHGLRLADVRLPD + SALVGSLGEGVTLALRSFQISHCLIPGTVLAGVDSVLRLAVRAATENRPDGRPARRWHK + ALSGVFADLLACDAMAVTGLRALSLVPQHAHLLAAAVKYTMPDLLREDLEELAAVLGAR + GYDRGPLYGGFQKLARDLPVAGLGHSGTAVCQAVLVPQLPALARTAWFRTAEPSAALFL + PGAPLPPLDHRRLTHSGTDDPLTATLIGAAERLAARTGTQPLHAALAALARALVEELRV + LRARCAALPAAGSTVFDPLACALADRYALLLCAAACLGVWEGQADGDGFLADPAWAVLV + LSRIGRRLGIAVPETPADAEQAVLAEALGRCRHGRSLDLYDTPLAG" + gene 12253..13062 + /locus_tag="IE211_RS35110" + /old_locus_tag="GCM10010301_71030" + CDS 12253..13062 + /GO_function="GO:0008897 - holo-[acyl-carrier-protein] + synthase activity [Evidence IEA]" + /NRPS_PKS="Domain: ACPS (134-195). E-value: 7e-12. Score: + 37.1. Matches aSDomain: + nrpspksdomains_IE211_RS35110_ACPS.1" + /NRPS_PKS="type: other" + /codon_start=1 + /inference="COORDINATES: similar to AA + sequence:RefSeq:WP_007389988.1" + /locus_tag="IE211_RS35110" + /note="Derived by automated computational analysis using + gene prediction method: Protein Homology." + /old_locus_tag="GCM10010301_71030" + /product="4'-phosphopantetheinyl transferase superfamily + protein" + /protein_id="WP_029394636.1" + /transl_table=11 + /translation="MERVTCAAPLHVPRPHGPWPAVREDLFRHGNALVCTTWSEWLPSV + LTTPRLRELLGDDWQRYRRTRDAAVRYRFAASRMLIKYTAAAALAVPPEYLDLAYRLGG + RPYLRGFDQIELSLSHTGDVMAVGLSRIGRIGVDVEPAERPVRLDLLETQVFTPAEARE + LAELPEGERTAHALRLWTLKEAYSKALGQGLRFGFKEFGFRQGRLSAPDGSRVTRDEWG + FATYPVMDRFLLSVACHNAGLSTAGDTSVGTMLDQGFLSAMTDTGQQ" + aSDomain 12655..12837 + /aSDomain="ACPS" + /aSTool="nrps_pks_domains" + /database="nrpspksdomains.hmm" + /detection="hmmscan" + /domain_id="nrpspksdomains_IE211_RS35110_ACPS.1" + /evalue="7.00E-12" + /label="IE211_RS35110_ACPS.1" + /locus_tag="IE211_RS35110" + /protein_end="195" + /protein_start="134" + /score="37.1" + /tool="antismash" + /translation="IGVDVEPAERPVRLDLLETQVFTPAEARELAELPEGERTAHALRL + WTLKEAYSKALGQGLR" + gene 13267..14757 + /locus_tag="IE211_RS35115" + /old_locus_tag="GCM10010301_71040" + CDS 13267..14757 + /NRPS_PKS="Domain: Condensation_DCL (38-314). E-value: + 1.6e-48. Score: 157.2. Matches aSDomain: + nrpspksdomains_IE211_RS35115_Condensation_DCL.1" + /NRPS_PKS="type: NRPS-like protein" + /codon_start=1 + /gene_functions="biosynthetic-additional + (rule-based-clusters) Condensation" + /gene_kind="biosynthetic-additional" + /inference="COORDINATES: protein motif:HMM:NF012873.2" + /locus_tag="IE211_RS35115" + /note="Derived by automated computational analysis using + gene prediction method: Protein Homology." + /old_locus_tag="GCM10010301_71040" + /product="condensation domain-containing protein" + /protein_id="WP_051890455.1" + /sec_met_domain="Condensation (E-value: 3.4e-30, bitscore: + 96.2, seeds: 42, tool: rule-based-clusters)" + /transl_table=11 + /translation="MAWHSRPRALRGPGTARPPGVPALWPHLDLLGDFGGRRGAGRHVE + QLVWRWHGPLDTERFTAAWQSVVDRESVLRAALAPGPRPHLVLHEHAHGDVVRHRAGGA + GWDRLLERDRRRGLDPSRPCPLRVTLVERTDDPAGAGPVTRVVLTFHHALLDAWSVCLL + MQELCRAYLAGGELPGGERRPDLRDWAGWLQRQDPAGARDFWRGTVPDGPVAVLPARPG + PRTRQRGRGRTEVRLSPAEAERLHRWAALRAVPDSSALETVWALLLYRAAGPGGAATVG + FGVTVSGRGITLDCAERLPGPLRNCLPMVVRVDPGETVGRLLTALRDRALDMAAYEWVS + TRRIHRWTGRCPDGELLQSVVSVDRLPRPPGNLRNELADAGIALEPEPAHGACPDLPVA + LLVRPGGDGRLTFCVDHDRNRISDADARLLAGHCARLLRHLPGTDEATTNGAVLDVLAG + EALPRIAPRPSRPRPAGSWLRPRSTSSGAAVDRAASHP" + aSDomain 13381..14208 + /aSDomain="Condensation" + /aSTool="nrps_pks_domains" + /database="nrpspksdomains.hmm" + /detection="hmmscan" + /domain_id="nrpspksdomains_IE211_RS35115_Condensation_DCL.1 + " + /domain_subtype="Condensation_DCL" + /evalue="1.60E-48" + /label="IE211_RS35115_Condensation_DCL.1" + /locus_tag="IE211_RS35115" + /protein_end="314" + /protein_start="38" + /score="157.2" + /tool="antismash" + /translation="GAGRHVEQLVWRWHGPLDTERFTAAWQSVVDRESVLRAALAPGPR + PHLVLHEHAHGDVVRHRAGGAGWDRLLERDRRRGLDPSRPCPLRVTLVERTDDPAGAGP + VTRVVLTFHHALLDAWSVCLLMQELCRAYLAGGELPGGERRPDLRDWAGWLQRQDPAGA + RDFWRGTVPDGPVAVLPARPGPRTRQRGRGRTEVRLSPAEAERLHRWAALRAVPDSSAL + ETVWALLLYRAAGPGGAATVGFGVTVSGRGITLDCAERLPGPLRNCLPMVVRVD" + CDS_motif 13396..13497 + /aSTool="nrps_pks_domains" + /database="abmotifs" + /detection="hmmscan" + /domain_id="nrpspksmotif_IE211_RS35115_0001" + /evalue="4.80E-10" + /label="C2_DCL_024-062" + /locus_tag="IE211_RS35115" + /protein_end="77" + /protein_start="43" + /score="31.7" + /tool="antismash" + /translation="VEQLVWRWHGPLDTERFTAAWQSVVDRESVLRAA" + CDS_motif 13714..13779 + /aSTool="nrps_pks_domains" + /database="abmotifs" + /detection="hmmscan" + /domain_id="nrpspksmotif_IE211_RS35115_0002" + /evalue="4.90E-08" + /label="C3_DCL_135-156" + /locus_tag="IE211_RS35115" + /protein_end="171" + /protein_start="149" + /score="25.5" + /tool="antismash" + /translation="FHHALLDAWSVCLLMQELCRAY" + CDS_motif 14107..14193 + /aSTool="nrps_pks_domains" + /database="abmotifs" + /detection="hmmscan" + /domain_id="nrpspksmotif_IE211_RS35115_0003" + /evalue="6.10E-06" + /label="C5_DCL_263-294" + /locus_tag="IE211_RS35115" + /protein_end="309" + /protein_start="280" + /score="18.7" + /tool="antismash" + /translation="GFGVTVSGRGITLDCAERLPGPLRNCLPM" + CDS_motif 14185..14244 + /aSTool="nrps_pks_domains" + /database="abmotifs" + /detection="hmmscan" + /domain_id="nrpspksmotif_IE211_RS35115_0004" + /evalue="3.90E+01" + /label="C2_LCL_024-062" + /locus_tag="IE211_RS35115" + /protein_end="326" + /protein_start="306" + /score="-3.2" + /tool="antismash" + /translation="LPMVVRVDPGETVGRLLTAL" + gene 14792..14938 + /locus_tag="IE211_RS35120" + /old_locus_tag="GCM10010301_71050" + CDS 14792..14938 + /codon_start=1 + /inference="COORDINATES: ab initio prediction:GeneMarkS-2+" + /locus_tag="IE211_RS35120" + /note="Derived by automated computational analysis using + gene prediction method: GeneMarkS-2+." + /old_locus_tag="GCM10010301_71050" + /product="hypothetical protein" + /protein_id="WP_019330950.1" + /transl_table=11 + /translation="MHEGDDERHEGDTARCARAVHGGPPPLRLAGAEEREDEDGRVIVR + SID" + gene 15232..15648 + /locus_tag="IE211_RS35125" + /old_locus_tag="GCM10010301_71060" + CDS 15232..15648 + /codon_start=1 + /inference="COORDINATES: similar to AA + sequence:RefSeq:WP_019330949.1" + /locus_tag="IE211_RS35125" + /note="Derived by automated computational analysis using + gene prediction method: Protein Homology." + /old_locus_tag="GCM10010301_71060" + /product="pyridoxamine 5'-phosphate oxidase family protein" + /protein_id="WP_193450426.1" + /transl_table=11 + /translation="MPSMPRPDAVTVPDSVQAFLTGTALVAAFTTMRPDGTPHVAPVRF + TWDSDAQLARVMTVRSSRKARNLLATPGAPVALCQVDGFRWVTLEGTGTVVTDPERVAL + GARLYAKRYWSAPPTPSDRVVIEIAVDRVLSLNA" + gene complement(15887..16369) + /locus_tag="IE211_RS35130" + /old_locus_tag="GCM10010301_71070" + CDS complement(15887..16369) + /codon_start=1 + /inference="COORDINATES: similar to AA + sequence:RefSeq:WP_007389987.1" + /locus_tag="IE211_RS35130" + /note="Derived by automated computational analysis using + gene prediction method: Protein Homology." + /old_locus_tag="GCM10010301_71070" + /product="nuclear transport factor 2 family protein" + /protein_id="WP_052731589.1" + /transl_table=11 + /translation="MTSSLTTDQSASVSAAESSAQVAGLLHRYLVSLDDERLDDAWTAG + LFTEDAVVAFPVSRHEGADGMAEYHRSALSAFAATQHLGSPAVVDVDGDRAVFRANLIS + THVHHPHHTPPEGDLPPLFATGTFVNGEARRTARGWRLSLLAFRLLWADGSPPPAR" + gene 16536..18008 + /locus_tag="IE211_RS35135" + /old_locus_tag="GCM10010301_71080" + CDS 16536..18008 + /GO_function="GO:0071949 - FAD binding [Evidence IEA]" + /codon_start=1 + /inference="COORDINATES: similar to AA + sequence:RefSeq:WP_007385270.1" + /locus_tag="IE211_RS35135" + /note="Derived by automated computational analysis using + gene prediction method: Protein Homology." + /old_locus_tag="GCM10010301_71080" + /product="FAD-dependent monooxygenase" + /protein_id="WP_029394635.1" + /transl_table=11 + /translation="MEFYDSDVIVVGAGPTGLMLAGELRLAGVSVVVLDKLSEPIQESR + ALGFSARTIEEFAQRGLMDRFGEVGVIPVGHFGGVPLDYRVIEGGSYGARGIPQARTEG + VLGGWARELGADIRRGCEVTGIEQTDASVTVTAAGADGPFSLRARHVVGCDGARSIVRK + LAGIGFPGTEPAIELRFADLAGVALRPRFSGERVAGGMVMVIPMGPDRCRVIYFDSSEP + LRTSPDPITFDEVAQTWQRLTGEDVSGATPLWVSSTTDVSRQADRYRHGRVFLAGDAAH + IHLPIGAQGMSAGVQDAVNLGWKLALDIKGQAPEGLLDTYHAERHPVGARILTNTLAQR + ILYLGGDEITPMREVLAELMGAHESVQRHLAGMVTGLDIRHDVGEGDHPLLGRRLPDRE + LVVDGEKTPFYALLRTARPVLLELGGDHGLRTAAAGWADRVDLVAAEFDGCEAPVDGIL + VRPDGYVAWVAGLGAGPDGLTAALGRWFGPTA" + gene 18065..18763 + /locus_tag="IE211_RS35140" + /old_locus_tag="GCM10010301_71090" + CDS 18065..18763 + /codon_start=1 + /inference="COORDINATES: similar to AA + sequence:RefSeq:WP_018383121.1" + /locus_tag="IE211_RS35140" + /note="Derived by automated computational analysis using + gene prediction method: Protein Homology." + /old_locus_tag="GCM10010301_71090" + /product="antibiotic biosynthesis monooxygenase" + /protein_id="WP_019330946.1" + /transl_table=11 + /translation="MPIISAEDKHLTVLNLFTTDTPEKQAKLIEEMTKIVNAAAYEGWM + SSTVHSGVDGYGTLNFIQWRSGEDLEKRYAGEEFKHRTLPVFGEITTSIRLMQNEVAHT + LTSDALGGKIEIGPDRDDYTVFTLFPVTPEGQDEAVDALGPGQAFLADVPGFRAHVVLK + GLRARGLEGSFVISYSQWDSKEAFEVYRDQAPEEQADARKAAVARVRAVVTGEPYLNTY + RVVHTRSAGE" + gene 19021..19350 + /locus_tag="IE211_RS35145" + /old_locus_tag="GCM10010301_71100" + CDS 19021..19350 + /codon_start=1 + /inference="COORDINATES: similar to AA + sequence:RefSeq:WP_019767153.1" + /locus_tag="IE211_RS35145" + /note="Derived by automated computational analysis using + gene prediction method: Protein Homology." + /old_locus_tag="GCM10010301_71100" + /product="TcmI family type II polyketide cyclase" + /protein_id="WP_019330945.1" + /transl_table=11 + /translation="MHSTLIVARMAATSSNDVAQLFADFDATEMPHRMGTRRRQLFSYR + GLYFHLQDFDEDNGGELIEAAKADPRFVRISEDLKPFIEAYDPTTWRSPADAMATRFYS + WEASR" + gene 19347..20618 + /locus_tag="IE211_RS35150" + /old_locus_tag="GCM10010301_71110" + CDS 19347..20618 + /NRPS_PKS="Domain: PKS_KS(Iterative-KS) (5-395). E-value: + 3.7e-53. Score: 172.7. Matches aSDomain: + nrpspksdomains_IE211_RS35150_PKS_KS.1" + /NRPS_PKS="type: PKS/NRPS-like protein" + /codon_start=1 + /gene_functions="biosynthetic (rule-based-clusters) T2PKS: + t2ks" + /gene_kind="biosynthetic" + /inference="COORDINATES: similar to AA + sequence:RefSeq:WP_007385254.1" + /locus_tag="IE211_RS35150" + /note="Derived by automated computational analysis using + gene prediction method: Protein Homology." + /old_locus_tag="GCM10010301_71110" + /product="beta-ketoacyl-[acyl-carrier-protein] synthase + family protein" + /protein_id="WP_029394634.1" + /sec_met_domain="t2ks (E-value: 2.3e-248, bitscore: 815.1, + seeds: 25, tool: rule-based-clusters)" + /transl_table=11 + /translation="MSGRRVVITGIEVIAPGGVGRENFWNLLSNGRTATRGITFFDPAP + FRSRVAAEADFDPYEHGLTPQEVRRLDRAAQFAVVASRGAVADSGLDIPSLDPHRVGVT + VGSAVGATMGLDQEYRVVSDGGRLDTVDHTYAVPHLYDYMVPSSFAAEVAWAVGAEGPS + TVVSTGCTSGIDSVGYAVELVREGSADVVIAGSSDAPISPITMACFDAIKATTPRHDEP + ECASRPFDKTRNGFVLGEGTAFFVLEELDSARKRGAHIYAEIAGYATRSNAYHMTGLRP + DGVEMAEAIDLALGEARLNPQSIDYINAHGSGTKQNDRHETAAFKRSLGDHAYRTPVSS + IKSMVGHSLGAIGSIEIAASALAMEYDVVPPTANLHTPDPECDLDYVPLVARDQLIDAV + LTVGSGFGGFQSAMVLATPERSLV" + aSDomain 19362..20531 + /aSDomain="PKS_KS" + /aSTool="nrps_pks_domains" + /database="nrpspksdomains.hmm" + /detection="hmmscan" + /domain_id="nrpspksdomains_IE211_RS35150_PKS_KS.1" + /domain_subtype="Iterative-KS" + /evalue="3.70E-53" + /label="IE211_RS35150_PKS_KS.1" + /locus_tag="IE211_RS35150" + /protein_end="395" + /protein_start="5" + /score="172.7" + /tool="antismash" + /translation="VVITGIEVIAPGGVGRENFWNLLSNGRTATRGITFFDPAPFRSRV + AAEADFDPYEHGLTPQEVRRLDRAAQFAVVASRGAVADSGLDIPSLDPHRVGVTVGSAV + GATMGLDQEYRVVSDGGRLDTVDHTYAVPHLYDYMVPSSFAAEVAWAVGAEGPSTVVST + GCTSGIDSVGYAVELVREGSADVVIAGSSDAPISPITMACFDAIKATTPRHDEPECASR + PFDKTRNGFVLGEGTAFFVLEELDSARKRGAHIYAEIAGYATRSNAYHMTGLRPDGVEM + AEAIDLALGEARLNPQSIDYINAHGSGTKQNDRHETAAFKRSLGDHAYRTPVSSIKSMV + GHSLGAIGSIEIAASALAMEYDVVPPTANLHTPDPECDLDYVPLVARDQL" + gene 20615..21826 + /locus_tag="IE211_RS35155" + /old_locus_tag="GCM10010301_71120" + CDS 20615..21826 + /NRPS_PKS="Domain: PKS_KS(Iterative-KS) (48-401). E-value: + 9.4e-31. Score: 98.9. Matches aSDomain: + nrpspksdomains_IE211_RS35155_PKS_KS.1" + /NRPS_PKS="type: PKS/NRPS-like protein" + /codon_start=1 + /gene_functions="biosynthetic (rule-based-clusters) T2PKS: + t2clf" + /gene_kind="biosynthetic" + /inference="COORDINATES: similar to AA + sequence:RefSeq:WP_019330943.1" + /locus_tag="IE211_RS35155" + /note="Derived by automated computational analysis using + gene prediction method: Protein Homology." + /old_locus_tag="GCM10010301_71120" + /product="ketosynthase chain-length factor" + /protein_id="WP_030970827.1" + /sec_met_domain="t2clf (E-value: 2e-209, bitscore: 686.3, + seeds: 26, tool: rule-based-clusters)" + /transl_table=11 + /translation="MTASVVVTGLGVVSPNGMGVKDYWAATLGGKHGIGRITRFDPTGY + PARLAGQIEDFDAEELLPSRLLPQTDRVTRLALVAADWALADAGADPAHLPEFDMGVIT + ASAAGGFEFGQGELQALWSQGSQYVSAYQSFAWFYAVNSGQISIRNGMKGPSGVVVSEG + AGGLDAVAQARRQIRRGTPLIVTGGVDASICPWGWVAQLACGRLTTSDEPDHAYLPFDR + DANGYVPGEGGAILIAEDADAARARGVRPYGEIAGYGATIDPRPGSGREPNLAKAIETA + LADADVNAADIDVVFADGAGDPAGDLAEARAVSTVFGDRGVPVTVPKTMTGRLYSGGAP + LDLAAAFLALRDGVIPPTVHIDPCADYPLDLVLGEPRPAELRTALVLARGAGGFNSAMV + VRAA" + aSDomain 20759..21817 + /aSDomain="PKS_KS" + /aSTool="nrps_pks_domains" + /database="nrpspksdomains.hmm" + /detection="hmmscan" + /domain_id="nrpspksdomains_IE211_RS35155_PKS_KS.1" + /domain_subtype="Iterative-KS" + /evalue="9.40E-31" + /label="IE211_RS35155_PKS_KS.1" + /locus_tag="IE211_RS35155" + /protein_end="401" + /protein_start="48" + /score="98.9" + /tool="antismash" + /translation="LAGQIEDFDAEELLPSRLLPQTDRVTRLALVAADWALADAGADPA + HLPEFDMGVITASAAGGFEFGQGELQALWSQGSQYVSAYQSFAWFYAVNSGQISIRNGM + KGPSGVVVSEGAGGLDAVAQARRQIRRGTPLIVTGGVDASICPWGWVAQLACGRLTTSD + EPDHAYLPFDRDANGYVPGEGGAILIAEDADAARARGVRPYGEIAGYGATIDPRPGSGR + EPNLAKAIETALADADVNAADIDVVFADGAGDPAGDLAEARAVSTVFGDRGVPVTVPKT + MTGRLYSGGAPLDLAAAFLALRDGVIPPTVHIDPCADYPLDLVLGEPRPAELRTALVLA + RGAGGFNSAMVVR" + CDS_motif 21080..21103 + /aSTool="nrps_pks_domains" + /database="abmotifs" + /detection="hmmscan" + /domain_id="nrpspksmotif_IE211_RS35155_0001" + /evalue="9.90E+01" + /label="PKSI-KS_m4" + /locus_tag="IE211_RS35155" + /protein_end="163" + /protein_start="155" + /score="-2.9" + /tool="antismash" + /translation="SGVVVSEG" + CDS_motif 21272..21310 + /aSTool="nrps_pks_domains" + /database="abmotifs" + /detection="hmmscan" + /domain_id="nrpspksmotif_IE211_RS35155_0002" + /evalue="1.20E-04" + /label="PKSI-KS_m4" + /locus_tag="IE211_RS35155" + /protein_end="232" + /protein_start="219" + /score="15.0" + /tool="antismash" + /translation="FDRDANGYVPGEG" + gene 21895..22164 + /locus_tag="IE211_RS35160" + /old_locus_tag="GCM10010301_71130" + CDS 21895..22164 + /NRPS_PKS="Domain: ACP (12-83). E-value: 9.1e-13. Score: + 40.2. Matches aSDomain: nrpspksdomains_IE211_RS35160_ACP.1" + /NRPS_PKS="type: other" + /codon_start=1 + /gene_functions="biosynthetic-additional + (rule-based-clusters) PP-binding" + /gene_kind="biosynthetic-additional" + /inference="COORDINATES: similar to AA + sequence:RefSeq:WP_015037163.1" + /locus_tag="IE211_RS35160" + /note="Derived by automated computational analysis using + gene prediction method: Protein Homology." + /old_locus_tag="GCM10010301_71130" + /product="acyl carrier protein" + /protein_id="WP_019330942.1" + /sec_met_domain="PP-binding (E-value: 1.7e-08, bitscore: + 26.1, seeds: 164, tool: rule-based-clusters)" + /transl_table=11 + /translation="MASKSFTLDDLKRTLREAAGVAEGVDLDGDILDTEFEVIGYESLA + LLEAGSLIEREYGISLDEEAVGEANTPRSFIEVVNAQLAPAKAA" + aSDomain 21931..22143 + /aSDomain="ACP" + /aSTool="nrps_pks_domains" + /database="nrpspksdomains.hmm" + /detection="hmmscan" + /domain_id="nrpspksdomains_IE211_RS35160_ACP.1" + /evalue="9.10E-13" + /label="IE211_RS35160_ACP.1" + /locus_tag="IE211_RS35160" + /protein_end="83" + /protein_start="12" + /score="40.2" + /tool="antismash" + /translation="RTLREAAGVAEGVDLDGDILDTEFEVIGYESLALLEAGSLIEREY + GISLDEEAVGEANTPRSFIEVVNAQL" + gene 22177..22965 + /gene="fabG" + /locus_tag="IE211_RS35165" + /old_locus_tag="GCM10010301_71140" + CDS 22177..22965 + /EC_number="1.1.1.100" + /GO_function="GO:0016491 - oxidoreductase activity + [Evidence IEA]" + /NRPS_PKS="Domain: PKS_KR (8-164). E-value: 7.1e-11. Score: + 34.2. Matches aSDomain: + nrpspksdomains_IE211_RS35165_PKS_KR.1" + /NRPS_PKS="type: other" + /codon_start=1 + /gene="fabG" + /gene_functions="biosynthetic-additional + (rule-based-clusters) adh_short" + /gene_kind="biosynthetic-additional" + /inference="COORDINATES: similar to AA + sequence:RefSeq:WP_019330941.1" + /locus_tag="IE211_RS35165" + /note="Derived by automated computational analysis using + gene prediction method: Protein Homology." + /old_locus_tag="GCM10010301_71140" + /product="3-oxoacyl-ACP reductase FabG" + /protein_id="WP_030970829.1" + /sec_met_domain="adh_short (E-value: 6.5e-36, bitscore: + 115.1, seeds: 230, tool: rule-based-clusters)" + /transl_table=11 + /translation="MTDTTTQRVAVVTGATSGIGLASARLLGRQGHQVFIGARNAENVA + ATVKELQGEGIDADGTVVDVRDTESVNAWIQAAVDRFGSVDVVVNNAGRSGGGPTADIA + DELWDDVIDTNLNSVFRVTRAALTIGGLRAKDRGRIINVASTAGKQGVVLGAPYSASKH + GVVGFTKALGNELAPTGITVNAVCPGYVETPMAQRVRQGYAAAYDTSEDAILEKFQAKI + PLGRYSTPEEVAGLVGYLASDTAASITSQALNVCGGLGNF" + aSDomain 22201..22668 + /aSDomain="PKS_KR" + /aSTool="nrps_pks_domains" + /database="nrpspksdomains.hmm" + /detection="hmmscan" + /domain_id="nrpspksdomains_IE211_RS35165_PKS_KR.1" + /evalue="7.10E-11" + /label="IE211_RS35165_PKS_KR.1" + /locus_tag="IE211_RS35165" + /protein_end="164" + /protein_start="8" + /score="34.2" + /tool="antismash" + /translation="VAVVTGATSGIGLASARLLGRQGHQVFIGARNAENVAATVKELQG + EGIDADGTVVDVRDTESVNAWIQAAVDRFGSVDVVVNNAGRSGGGPTADIADELWDDVI + DTNLNSVFRVTRAALTIGGLRAKDRGRIINVASTAGKQGVVLGAPYSASKHG" + CDS_motif 22207..22269 + /aSTool="nrps_pks_domains" + /database="abmotifs" + /detection="hmmscan" + /domain_id="nrpspksmotif_IE211_RS35165_0001" + /evalue="6.00E-05" + /label="PKSI-KR_m1" + /locus_tag="IE211_RS35165" + /protein_end="31" + /protein_start="10" + /score="15.7" + /tool="antismash" + /translation="VVTGATSGIGLASARLLGRQG" + gene 23006..23947 + /locus_tag="IE211_RS35170" + /old_locus_tag="GCM10010301_71150" + CDS 23006..23947 + /NRPS_PKS="Domain: Polyketide_cyc2 (4-146). E-value: + 2.8e-16. Score: 52.0. Matches aSDomain: + nrpspksdomains_IE211_RS35170_Polyketide_cyc2.1" + /NRPS_PKS="Domain: Polyketide_cyc2 (156-309). E-value: + 1.8e-08. Score: 26.7. Matches aSDomain: + nrpspksdomains_IE211_RS35170_Polyketide_cyc2.2" + /NRPS_PKS="type: other" + /codon_start=1 + /inference="COORDINATES: similar to AA + sequence:RefSeq:WP_019330940.1" + /locus_tag="IE211_RS35170" + /note="Derived by automated computational analysis using + gene prediction method: Protein Homology." + /old_locus_tag="GCM10010301_71150" + /product="aromatase/cyclase" + /protein_id="WP_193450427.1" + /transl_table=11 + /translation="MTTREVEHEITIGAPADAVYQLLADVTNWPRIFPPTIHVDRTEAD + GDHERIHIWATANGQAKEWTSHRTLDRENLTITFRQEIPAAPVKHMGGTWIIEPLADDR + SRVRLLHDYSAIGDDPHDLLWIEQAVDKNSTSELAALKVNVEAAHAAAEELTFSFADTV + QIDGAAKDVFDFINEAQLWAERLPHVAVVRLSEDTPGLQELEMDTRAKDGSVHTTKSYR + VVFPHHKIAYKQVTLPALMTLHTGEWTFTEGDEATTASSQHTVTLNTANIARILGQEAT + VADARAYVHTALSTNSRATLAHAKAYAEQKKG" + aSDomain 23018..23443 + /aSDomain="Polyketide_cyc" + /aSTool="nrps_pks_domains" + /database="nrpspksdomains.hmm" + /detection="hmmscan" + /domain_id="nrpspksdomains_IE211_RS35170_Polyketide_cyc2.1" + /domain_subtype="Polyketide_cyc2" + /evalue="2.80E-16" + /label="IE211_RS35170_Polyketide_cyc2.1" + /locus_tag="IE211_RS35170" + /protein_end="146" + /protein_start="4" + /score="52.0" + /tool="antismash" + /translation="EVEHEITIGAPADAVYQLLADVTNWPRIFPPTIHVDRTEADGDHE + RIHIWATANGQAKEWTSHRTLDRENLTITFRQEIPAAPVKHMGGTWIIEPLADDRSRVR + LLHDYSAIGDDPHDLLWIEQAVDKNSTSELAALKVNVE" + aSModule 23018..23932 + /domains="nrpspksdomains_IE211_RS35170_Polyketide_cyc2.1" + /domains="nrpspksdomains_IE211_RS35170_Polyketide_cyc2.2" + /incomplete + /locus_tags="IE211_RS35170" + /tool="antismash" + /type="unknown" + aSDomain 23474..23932 + /aSDomain="Polyketide_cyc" + /aSTool="nrps_pks_domains" + /database="nrpspksdomains.hmm" + /detection="hmmscan" + /domain_id="nrpspksdomains_IE211_RS35170_Polyketide_cyc2.2" + /domain_subtype="Polyketide_cyc2" + /evalue="1.80E-08" + /label="IE211_RS35170_Polyketide_cyc2.2" + /locus_tag="IE211_RS35170" + /protein_end="309" + /protein_start="156" + /score="26.7" + /tool="antismash" + /translation="FSFADTVQIDGAAKDVFDFINEAQLWAERLPHVAVVRLSEDTPGL + QELEMDTRAKDGSVHTTKSYRVVFPHHKIAYKQVTLPALMTLHTGEWTFTEGDEATTAS + SQHTVTLNTANIARILGQEATVADARAYVHTALSTNSRATLAHAKAYAE" + gene 24008..25426 + /locus_tag="IE211_RS35175" + /old_locus_tag="GCM10010301_71160" + CDS 24008..25426 + /GO_function="GO:0071949 - FAD binding [Evidence IEA]" + /codon_start=1 + /inference="COORDINATES: similar to AA + sequence:RefSeq:WP_018511397.1" + /locus_tag="IE211_RS35175" + /note="Derived by automated computational analysis using + gene prediction method: Protein Homology." + /old_locus_tag="GCM10010301_71160" + /product="FAD-dependent monooxygenase" + /protein_id="WP_051890464.1" + /transl_table=11 + /translation="MMLAGELAHGGVGVVVVEKRRAPSTESRASTLHARTMEILDSRSL + LPEFGDPPNEPRGHFGGIPLDLTLPSSHPGQWKVPQTRTEVILGEWALSLGAELQCKHE + LTALDDSGDLVEAEAAGPDGRTLRLRCRYLVACDGEESTVRRLIGADFPGRDATRELLR + ADVAGIDIPGRRFERLEHGLAIAARRPDGVTRVMVHEFGSAARARPHGDASFEEITAVW + KRVTGEDISGGTPLWANAFGDASRQLTRYRHGRVLFAGDAAHRQMPVGGQALNLGMQDA + FNLGWKLALVVRGKAPQTLLDSYHDERHEVGRQVLANIRAQSLLLLGGPEVEPLRDLLT + ELIGQEDVRRRLAGMISGLDVRYDVGGPAHPLLGARLPCTEVRARRRLLTTTHLVRSGG + GVLLDLTGRPGRPPAVLDGWADRVTALDAQPSPGSSLQGTDRVLVRPDGHVAWAGPGTD + GLAEALTRWFGPPR" + gene 25529..27043 + /locus_tag="IE211_RS35180" + /old_locus_tag="GCM10010301_71170" + CDS 25529..27043 + /GO_function="GO:0071949 - FAD binding [Evidence IEA]" + /codon_start=1 + /inference="COORDINATES: similar to AA + sequence:RefSeq:WP_019330938.1" + /locus_tag="IE211_RS35180" + /note="Derived by automated computational analysis using + gene prediction method: Protein Homology." + /old_locus_tag="GCM10010301_71170" + /product="FAD-dependent monooxygenase" + /protein_id="WP_193450428.1" + /transl_table=11 + /translation="MEGTAADTDVIVVGAGPTGLMLAGELRLGGARVVVIEKLAAPTGQ + SRGLGFTARAMETFDERGLLPRFGQGETLATSPVGHFGGAQFDFTVLEDAHFGARGIPQ + GDTEAVLEGWAGELGADIRRGWEFVSLTDGFLDGDGVEITVRTPQGEERTLRASYLAGC + DGGSSRVRRAAGFDFPGTDATQGMYLADITGVELTPRFLGERLNNGMVMAAPLSQGVWR + IIVCPDGRPAHDRERTVTFEEVAAAWQDITGEDISHGGASWVSSFTNATRQASEYRRGR + VFLAGDAAHIHLPAGGQGLSTGVQDAANLGWKLASVIRGDAPRELLDTYHAERHPVGAR + LLMNTRAQGIVFLGGAESDPLRELMAELVRYDDVKRHLAGIVSHLDIRYDLADTATGPT + HPLLGRRMPPRLLVGADGETRIARLLHAGHGVLLDLADDETVRATAAGHADRVDVVTAV + AKPTDGPDALAGATAVLIRPDGYVAWTGTCAQGLETALERWCGPPR" + gene complement(27262..28224) + /locus_tag="IE211_RS35185" + /old_locus_tag="GCM10010301_71180" + CDS complement(27262..28224) + /codon_start=1 + /inference="COORDINATES: similar to AA + sequence:RefSeq:WP_019330937.1" + /locus_tag="IE211_RS35185" + /note="Derived by automated computational analysis using + gene prediction method: Protein Homology." + /old_locus_tag="GCM10010301_71180" + /product="acetylserotonin O-methyltransferase" + /protein_id="WP_234311021.1" + /transl_table=11 + /translation="MVTRAVHVAAELKVAEALAEGPLSADELAGRVGADADALGRVLRL + LASNGVFATRPDGAFELTPMADALRADHPMSMRGIALLMGHPIHWEDWSGFPETVVTGE + PALPKLRGMHAFEFLTKNAEYGQVFFQGMGSMSASETEPILAAYDFSQFGTVVDFCGGQ + GALLAGILGAAPGCEGVLFDPRVEENGAAEFLAAQGVADRTKRVAGDLFDVPPGGADAY + VLKHIVHDWPEEQALRILRNVRAAIKPGGKLLIAEMVIPEQGDQPHSGKLVDLWLMLLV + GGRERTPGQYADLLARAGFRLERVVETAAAISLVEAVPV" + gene 28591..29706 + /locus_tag="IE211_RS35190" + /old_locus_tag="GCM10010301_71190" + CDS 28591..29706 + /codon_start=1 + /inference="COORDINATES: protein motif:HMM:NF033206.1" + /locus_tag="IE211_RS35190" + /note="Derived by automated computational analysis using + gene prediction method: Protein Homology." + /old_locus_tag="GCM10010301_71190" + /product="ScyD/ScyE family protein" + /protein_id="WP_193450429.1" + /transl_table=11 + /translation="MANSRNSWTKILLAAGAAGAVAVPLTAGPVQAQPQAHRHGHTHTR + TEATVTVVASQLNNPRGVTALGDGGVLVAEAGAGLADCPVDQTCVGTTGSVYKVKGSFQ + GRVATGLASTAKGVAPGAPISANGPSDVVPDRFGGYVVVSGLGGTTESRAALGEGAQTL + GTVFRTRDHKVLADLTDHETRLNPDGGDVHANPWRLARSGSGYLATDAGANTVVRGNAD + GTTATEYLLPKNELPTGAAETVPTGIAKAADGTVYVADMSGGRVGASRVWKIAPGRQPE + ILATGMTNLIDLDLDRDGDLIALSYSAAALAGPPQPGALFEIDADSGAVTEIPTGDQLK + QPTGVAVDPCGKVYVTNNTLGTNGQLVRVNR" + gene 29956..31071 + /locus_tag="IE211_RS35195" + /old_locus_tag="GCM10010301_71200" + CDS 29956..31071 + /codon_start=1 + /inference="COORDINATES: similar to AA + sequence:RefSeq:WP_003962403.1" + /locus_tag="IE211_RS35195" + /note="Derived by automated computational analysis using + gene prediction method: Protein Homology." + /old_locus_tag="GCM10010301_71200" + /product="inositol-3-phosphate synthase" + /protein_id="WP_127437809.1" + /transl_table=11 + /translation="MTSADETRTGVWLVGARGSVATTAVSGCAALAAGLLPPTGMVTET + PPFADCGLPALASLVFGGHDTATTPLPKRAEELAAQGVLPPWLPTAVQGELAAADEHIR + PGGPVPGDRRATEELIADFATDLRTFARTTGVARTVVVNVASTEPDPAHGAWPASSLYA + AAALRAGCPYVNFTPSTGLSHPQLAGAARASGLPYAGRDGKTGQTLLRSVLGPMFAQRA + LAVRAWSGTNLLGGGDGAALADPAAAAAKNAGKERVLTDTLGTRVEGEVHIDDVPALGD + WKTAWDHVAFDGFLGTRMVLQTIWQGCDSALAAPLVLDLARLLARAHERGLSGPLGELG + FYFKDPDAEGSALAEQYTRLLTLADRLGGTR" + gene 31068..32021 + /locus_tag="IE211_RS35200" + /old_locus_tag="GCM10010301_71210" + CDS 31068..32021 + /GO_component="GO:0016021 - integral component of membrane + [Evidence IEA]" + /codon_start=1 + /inference="COORDINATES: similar to AA + sequence:RefSeq:WP_019330933.1" + /locus_tag="IE211_RS35200" + /note="Derived by automated computational analysis using + gene prediction method: Protein Homology." + /old_locus_tag="GCM10010301_71210" + /product="UbiA family prenyltransferase" + /protein_id="WP_193450430.1" + /transl_table=11 + /translation="MSAQRPAHGPARDGHLHAWAELLRAPAALTVPGDVLAGTAAAGTR + PTGRTALAAGASLCLYEAGMALNDWADREEDATARPHRPLPSGRVRPGAALAAAGLLSA + AGLALAACAGRRALAVAGPLAATVWAYDLGLKHTPAGPAAMAAARSLDLLLGAAAGPGA + VRRAIVPAAFLGSHTLAVSLVSRRETEGGSSTAPLTALAAAGALTTVLAGRPTAHPAPD + ASTGAPTPATPADKATRAVRAALAASYAATFARPLAHAALNPSPELTQRAVGAGVRATI + ALQSGLMARAGAPGTGVLTAALAPLAAHLARKVSTT" + gene 32018..32863 + /locus_tag="IE211_RS35205" + /old_locus_tag="GCM10010301_71220" + CDS 32018..32863 + /codon_start=1 + /inference="COORDINATES: similar to AA + sequence:RefSeq:WP_018384637.1" + /locus_tag="IE211_RS35205" + /note="Derived by automated computational analysis using + gene prediction method: Protein Homology." + /old_locus_tag="GCM10010301_71220" + /product="sugar phosphate isomerase/epimerase" + /protein_id="WP_019330932.1" + /transl_table=11 + /translation="MSPLRLGYGTNGLTDLRLEDALRLLADLGYDGVGLTLDHMHLDPL + APDLAARTRHVARQLHRLGLAVTVETGARYVLDPRRKHGPSLLDDDPDARWARVRLLIR + SVRVAADLGAHAVHCFSGPRPAGLDQDTAWKRLADALGPVLDAAEDTGVPLAVEPEPGH + LLATLTDFHRLRTELGDPEPLGLTLDIGHCQCLEPLPPADCVRAAAPWLRHVQIEDMRR + GVHEHLPFGDGEIDFPPVLDALAATGYQGLTVVELPRHSHAGPELAAQSMRFLRNGGTR + " + gene 32860..33459 + /locus_tag="IE211_RS35210" + /old_locus_tag="GCM10010301_71230" + CDS 32860..33459 + /codon_start=1 + /inference="COORDINATES: similar to AA + sequence:RefSeq:WP_019890542.1" + /locus_tag="IE211_RS35210" + /note="Derived by automated computational analysis using + gene prediction method: Protein Homology." + /old_locus_tag="GCM10010301_71230" + /product="EboA domain-containing protein" + /protein_id="WP_079126814.1" + /transl_table=11 + /translation="MTVVHTPPDADLPGEFAGLGTDARRWLTAARASATARDTDWELRF + AEAGRRCGTEHADAARVLLLTAARPDAETVTLLYHRGTAAERRAVLLALDGLDTEPAHA + LPLVEDALRANDTTLLAAALGPYAARHLDAHQWRHAVLKCLFTGVPVATVANLAARARA + DAELARMLRAYAAERTAAHRDIPADLDRVLALTQEQ" + gene 33461..>33697 + /locus_tag="IE211_RS35215" + /pseudo="" + CDS 33461..>33697 + /codon_start=1 + /inference="COORDINATES: similar to AA + sequence:RefSeq:WP_016327783.1" + /locus_tag="IE211_RS35215" + /note="incomplete; too short partial abutting assembly gap; + missing C-terminus; Derived by automated computational + analysis using gene prediction method: Protein Homology." + /product="hydrolase TatD" + /pseudo="" + /transl_table=11 + /translation="LRIFDPHIHMTSRTTDDYEAMHAAGVRAVVEPAFWLGQPRTSPAS + FRDYFDALLGWEPFRAAQYGIAHHCTIALNPKEA" +ORIGIN + 1 gatgtgcgac gccttcaaca tccccctcgt caccttcctg gacgtccccg gcttcctgcc + 61 gggtgtggac caggagcacg gcgggatcat ccgtcacggg gccaagctgc tgtacgcgta + 121 ctgcaacgcg accgtcccgc ggatctcgct gatcctgcgc aaggcctacg gcggcgccta + 181 catcgtgatg gactcccagt ccatcggcgc cgacctcacc tacgcctggc cgaccaacga + 241 gatcgcggtg atgggcgcgg agggcgccgc caacgtcatc ttccgccggc agatcgccga + 301 ggccgaggac cccgacgcgg tgcgcgcccg catggtcaag gagtacaggg ccgagctgat + 361 gcacccctac tacgcggccg aacgcggcct ggtcgacgac gtcatcgacc ccgccgagac + 421 ccgcgaggtg ctgatcgagt ccctggcgat gctgcgcacc aaggacgccg acgtgccctc + 481 ccgcaagcac ggcaaccccc cgcagtgacg gagtgatcga tggccagaca ggaacgtgcg + 541 gtgcgtacgc gggaagccct gatcaggtct gcggccgaga tcttccacga cgagggcttc + 601 cacgccgccg cgctcaccac gatcagctcc agggcggggg tgagcaacgg cgcactgcac + 661 ttccacttcg ccagcaaggc ggcgctggcg gacgcggtgg aggaagcggc cgcagacgtg + 721 ctgcgggccg tcgtcggccg gtgggacggg ggaccgcccg gggttctgca gtgcctggtg + 781 aacgccaccc atgagctggc ctgcgccctg cagaacgacg tggtgctgcg ggcgggtttc + 841 gagctgagcc gggaggccgg ccggcagccg cggaccgacc tgcggctgtg ctggcagaac + 901 tgggtcaccg acatggtcgg ccgggccggg cgcggcggcg agctgcgcga gagcgtggcg + 961 ccggagagcg ccgtggccgc cgtggtcgcc gccaccagcg gcttcgaggt gctcggcatg + 1021 cgcaaccagg cgtggctgtc ccgcagtacg gtcgcccagt tctggctgct gctcctgccc + 1081 gcgctcgcgc cggcgccgca cgccgggctc tggcaggccg agggctcctg gaccgggacc + 1141 gccaccggat aattaaagac cgtacggtct cctttggggg cgccgaccgg acgcccgaat + 1201 cgggcattcg ggaaacggcg ccccttttcc ctgtgcgggg cttccccgtc ccgcgcgatc + 1261 cgcgccgatg gtttccagtc aatcgctccg gtgctggaca gggcgcggtt tcgcgcaaca + 1321 ctggcgaatc atgggcgggt tgagtgatga agggcctgtt cggagccggg ttgtgggcag + 1381 atgttcgatc agggtttgac tggatctggg cgatgccggg gcggcgcgtc gagtccacat + 1441 tgacaaaccg actgagctgt ttttttatcg ggactgcagc cgcgccgggt cccggtcagc + 1501 gggagacgcg caggccggcc accggcctgc ggccttgaaa acatggggga aacgtggaca + 1561 tcgaagtgct gggtgcgctg tcggtgcgcg agcacggggt gtcggtcgtg ccgaccgcac + 1621 ccaagccgcg ccaggtcctg gccctgctcg cgctcaacgc cgaccaggtg gtcccggtgg + 1681 ccgcgctcgt cgacgaactg tggggggaga acccgccgcg cagcgcgcgg accaccctgc + 1741 agacctacgt cctgcagctg cgcgagctga tggcccaggc gctggcccac ggccccgacg + 1801 aacgctgcac cgccaaggac atcctcgcca cggttcccgg cggctaccgc ctgcagacac + 1861 gcggcggcca cgtcgactac cgcgagttcg accagcgcgc cggcctcggc taccgggcca + 1921 tggacgccga ggactacgcc ggcgcggccc gccggctggc cgacgcgctc gcgctgtgga + 1981 acgggcaggc cctgaccgac atccaggccg gcctgcgcat cgacacggag gtcaagcggc + 2041 tggaggaggc ccggctgtgc gcgctcgacc agcgcatcga ggccgacctg cggctcggac + 2101 gccaccggga actgctgtcg gaactgacgg tcctggtcaa ccagtaccgc atgcacgaga + 2161 gcctgcacgg ccagttcatg ctggccctgc accgctcggg ccggcgcggt gaggccctca + 2221 acgtctacca gcggctgcgc agcacactcg tgcacgaact gggcctggaa ccctccgccg + 2281 cgctcagccg cctgcagcgc tccatcctca tggcccgccc cgagacgccc gccgccgccg + 2341 gcggcagcgg ccgcctcgtc acccgctgac ccggccgccg ttcctcacgg gccgcacgga + 2401 tggtgtgccg tgagccgggc gacctgctcg gacagctccc gctgctgccg gcgcagcagc + 2461 tccacgtggc cgcgcagccg caccagctcc tcccggagca ccccgagttc gtccccgccc + 2521 tgcccggccg tcccggcccc cgcccggtcg gcggagtccg ccgccggctc acaagagcgc + 2581 agaatcgctt ctctgcgacg gctgagcacc tggcccggcc cgatacccag atcacgggcc + 2641 agccgccggc gcgccctgtc gtaggtgctc agcgcctcgg cccggcggcc gcagcggtgc + 2701 agcgccgtca tcagctgctc gtggaaccgc tcacgcagcg ggtgcacggc gaccagctcc + 2761 tccagctcac cggtgatctc cgcacagcgg tccgcccgca gacagacgtc gtagagcacc + 2821 tccagggcac gcagccggct ctcctccagc agcgccgcct cggccgaaca gatgggcccc + 2881 tggccgctgc cctgcagagc gggcccgcgc cacagcgcca gagcctcacg cagcacctcc + 2941 accgcccggc ccggggcggt ggcggccagc tcgcgcccct ggcccaccag acggtggaag + 3001 cgctgcgcgt ccgtcgccgc cggccccagg ctcagggtgt agcccagcgg ggaggtccgc + 3061 agccacacat ggcgcggccc gcccgggccc ggggccggca gcagccgccg cagccgggcc + 3121 acatgggcct gcagcgcgtt ggcggcgttg accggagggt gcccgcccca cagctcctcc + 3181 acgaggcact cggcgggcac cgcctgcccg gcccgcacca gcagcgcgcc cagcagcgca + 3241 cgctgcttcg cacccgcggg aacgatccgt acgccgaaac gctcgtcgtg gatctgcacg + 3301 gaaccaagaa tccggaactc catctcgccc ttgcccggga agcacccgat tccgcatccg + 3361 ccggcccgga tcacggcccc ggtattccct cacgaatagc aggtgcactt ccttccgcac + 3421 ggactctatc cgcccatggt ttccggcacg ctattctcca gccggccccg taccggcgtg + 3481 ggcgattcct cgagagttct tcagcccccg cggccctggc ccgcccgaaa aggacaggac + 3541 accgcgaccg aaaagcccgc cccgcccgga cagccggtga cacccacccg ccgcccgcca + 3601 ggacaggaaa cccggtaccg cccgccagga caggaaaccc tccgccggcg gccggaacag + 3661 ggaaccctcc gccaccggcc gggacagggg acccggcgcc gccgcccggg acagggcatc + 3721 ctccgccgcc ggacggccgg ccgcgaccgt ccccggacgg ccggcaccgc tccccggccc + 3781 ccgcacgcgc cgggcgacgc cgtcaccgcg gccctcgccg cggctctcgc cgtggccgga + 3841 aaaacaaccg aaacgaatac gcccccgacc gtaccgggcg caaaatcacc acccgtgagc + 3901 ggtaacccgg cgacagtccg gacaaagccg cagcctcctg tcttccaggg gcgggtcaag + 3961 cataactaga gccgccctcg cgcgccgatc agagaaccgt ccggttcctc ttgccgcgga + 4021 gtggcgggaa tgacgcggac aggcgcaagg aggcagtatc aagtagctcc gtcaacggct + 4081 cagaaccagt cgccccgaaa cattgcggag ggaattatgg aaataaaagt cctgggtgcg + 4141 ttgaacgccg aattcgaggg aatctcggtc gtccccagtg cgggcaagcc ccggcagatc + 4201 ctggcgctgc tcgccctgta ccccggccgg gtggtgccgg tgcccaccct catggaggag + 4261 atctggggca ccgacctgcc gcagagttcg atgaccacgc tgcagacgta catcctccag + 4321 ctgcgccgcc tgctgggcac ggccatgggc cccgacgtgc cgggctcggc caaggacgtc + 4381 ctggccaccc gctacggcgg gtacctgctg cagatacccg ccgaggcggt cgacgcgttc + 4441 acctacgagc ggctcgtgac cgaaggacga caggcctacg aggacggcga ggacgagcgg + 4501 gcggccacgg tgctgcgccg ggcgctcgac ctgtgggacg gccccgccct ggtcgacgtg + 4561 cgggtcgggc ccgtcctgga gatcgaggcg atgcgcctgg agcagagccg gctggtggcc + 4621 cgcgagcgcc gcatcgacgc cgacctgcgg ctgggccggc acgtcgaact catcgccgaa + 4681 ctcaccgacc tgatcgcccg ccacccccag cacgaagggc tgcactccca ggccatggtg + 4741 gcgctctacc gctccggcag gcaggccgcc gccctcgacg tctaccgcag gctccgccag + 4801 cggctcatcg acgaactcgg cgtcgaaccc tccccgcaac tgcagcggct gcaccaggcg + 4861 atgctcgccg tcgacccgcg cctggacatc gtggcgggcc cccggcgcac ctccacgttc + 4921 gacctgtacg cggcatgagg cgccggtgcc ctcgccgccc ggccggaccc ggcaggccgg + 4981 cccccgcggg gtcggcggcc gccgcggccg ccggccgggg ccgccgcccg cctcgccgcg + 5041 gggttgagcg ccgcacgagc cgcgcgggcg acgctcggtg cgcaggggca cccacccccc + 5101 ggaccggcgc cgggacggca ccgcgtggca cgcgccacgc cggcggcgcc cgcccggatg + 5161 gggccggacc ggaccgttca caaggagggc gacgcatggc ttcccgctcc agggaccgtg + 5221 aggcgggcac cgcacggatc acactcacct gcctcgccca cgccggagcg ggcgtggcga + 5281 gctaccgcgg ctgggccgcg gcggtcggac ccggcatcga cgtggccgcc ctgccgctcc + 5341 cgggccgcga cagccgccgc cgcgaaccac gcctgaccga acgcgccggc ctgctcgccg + 5401 acttcctgcc gaccctgctt cagacggcac gccgcggccc ctacgcgctc tacggacaca + 5461 gcatgggagc cctcgtcggc tacacgctca cccgggccct ggccgactcc ggcctgcccc + 5521 cgctgttcct ggccgtcggc gcctgcccgc ccccgcacac caccaccgtc ctggcggacg + 5581 cggcggacct gcccgacgag gacctgctgc cgctgctcga cgagatcggc tccctgccgc + 5641 cgggcgcctc cgcctccccc ggcggactgt ggcggcgcac cttcctgccc gtcctgcgcg + 5701 acgacctgcg cctggcccga tcgctgcgca acgccgccct ggacccggtc accggagggc + 5761 cgctggacgt gcccgtcctg gtcttcgcgg gccgcgacga cccgctcgcc gcacccgccg + 5821 ccctgcgcca ctggcagcag tggaccacca acctgatcga actgcacacc gtcgccgggg + 5881 gacacttctt cgcaagctcc tcgagcctgg cccagcacgt cggccgggcc tgccgcggcc + 5941 acgtgaccgc gctgcccaca ggaggcggcc ggtgaaccgc gtcgtgatca ccgggatcgg + 6001 cgtcgtcgcc cccggcgcgg tgggcaccgc cgacttctgg gacctgctca ccgtcggccg + 6061 caccgccacc cgccgcgtca ccctcttcga cgcctgcggc taccgctccc gcgtcgccgc + 6121 cgaggtcgac ttcacccccg ccgcccacgg attcgacctc gccgacaccg aacgcctgga + 6181 ccgcgcggca cagttcgcgc tggtcgccgc acgcgaagcc gtcgccgaca gcggcgtcgc + 6241 ggaccgcatc ggccgcaacc ccctgcgcac cggcgtcagc ctgggcagcg ccatcggctg + 6301 caccacgagc ctggccaccc agtacgccat cctcagcgac tgcggcacca cctggaccct + 6361 cgaccacacc gaggccgccg aatccctcta cgactacttc gtccccagct ccctggccgc + 6421 caccgtcgcc cgcgaccggg gcgcacaggg ccccgtcgcc ctcgtctcca gcggctgcac + 6481 ctccggcctg gacgccgtcg gccacggcgc cgacctgatc cgggaaggca gcgccgacat + 6541 cgtcgtcgcc ggcggaacgg aagcacccat cgtgcccatc gccatggcct gcttcgaccg + 6601 cctgcgcctc accagctccc gcaacgacga ccccgccacc gccagccgcc ccttcgaccg + 6661 cacccgcgac ggattcgtgc tcggcgaggg cgccgccgta ctggtcctgg aagaactcga + 6721 acacgcccgc cgccgaggcg cccgccccta cgcggaactg tccgccgtca ccgcccacag + 6781 cagcgcccac cacatgacgg gactgcgccc cggggcactg gagatggccg acgccatccg + 6841 cgccgccctc gaccaggcac ggctgaaccc cgccgacgtc gactacatca gcgcccacgg + 6901 cgcgggaacc cggcacaacg accggcacga gacacacgcc ctcaaggaaa gcctgggcgg + 6961 cagcgcccac cgcgtgcccg tcagctccat caagtcgatg atcgggcacg ccctgggcgc + 7021 cgccggcgcc ctggacctgg ccgccagcgc cctggccatc cggcacgaca ccgtcccgcc + 7081 caccgccaac ctgcacgaac ccgaccccac ctgcgacctc gactacaccc ccctgttcgc + 7141 ccgggaacag cgcaccagca ccgtcctcac cgtcgccagc ggcttcggcg gcttccacac + 7201 ggccgccgtc ctcacccggc cccggctcaa ggaggcggca tgaccaaggc gacccaggcc + 7261 cccgaaccgg cccggcccgc cggcgccgag ggcccccgcc agaccctggt caccggcatc + 7321 ggagtcgccg cacccaacgg cctgggcacc agagcctggt gggacgccgt gctgtgcggg + 7381 cgcaccggac tgggtcccat cacccgcttc gacgcctccg gctaccccgt acgcatcgcc + 7441 ggcgagatcc ccggcttcgt cgacgaggac cacatcccca gcagactgct gccctccacc + 7501 gaccgcggca cccgcatcgc cctggtcgcg gccgaagaag cactgcgcga cgcgaacgtg + 7561 agcccggccg acctgcccgc atacggcgcc ggcgtgatca ccgccagctc cgcgggcggc + 7621 gccgaattcg gcgaacgggg actggccgcg ctgtggagca aaggcgccca gcacgtcagc + 7681 gcctaccagt ccttcgcgtc cttccacgcg gcagcccccg cacagatctc catccggcac + 7741 cggctgcgcg gccacggctc gaccgtcgtc agcgaacagg ccggcggcat cgacgcactc + 7801 gcccgcgccc ggcggcggat ccgcgacggg gcatgcctca tggtcaccgg cgggatcgac + 7861 tccacactgt gcgcatgggg ctgggccgcg cacctggcgg acggccggct cagccccgcc + 7921 accgaacccg cccgggccta ccggcccttc gcggccacgg ccgacggcca cgcggtcggc + 7981 gagggcggcg ccctactggt cctggaggac gcccgggccg ccgcccgccg gggcgccacc + 8041 ggctacggcg tcatcgccgg ctgcgccgcc accttcgacg gccccgaccg ccccacactg + 8101 cgccaggccg cggaactcgc cctggccgac gccggcctgg cccccgaaca cgtggacgtg + 8161 gtcttcgccg acggcgccgc cgagcggcgc gccgacctcg tcgagagcca ggcgctgtgc + 8221 gcgctgttcg gaccctacgg agtaccggtc accgtgccga agacgatgac cgggcggctg + 8281 ggcgcgggcg gctcggccct ggacgtggca gccgcgctgc tcgccctgcg cgagaaggtc + 8341 gtacccccga ccaccggaac cggacgcgtc gccgacgact gcccgctgga cctggtcacc + 8401 ggggccccac gggaactgcc ccggctgcga gtggcgctgg tactggcccg cggacggggc + 8461 ggcttcaact ccgccgcagt gctccaggcc cctcagacgg agtgacggcc cgcccctaat + 8521 gagacggagc ggaacacacc ggcggccagg aacggcccgc ggccccgcgg accgcgcaaa + 8581 gggaaacacc cccggcgccg actccccggc cctcctggct ccccggcccg cccgggctcc + 8641 cggccttccg gctcctggcc tcctggcctt ccggcctccc ggccttccga gctcccggcc + 8701 ctcccgggct cccggcctcc cgggctcccg gcctcccggg tccctggcct tccggccttc + 8761 ccggtctctt ggcctcccgg ctcccggccc ccggccgttc gtctctgccg gctgcccggc + 8821 gttccgccgc cccgctcggg gcacgcgtgg tggcgtcctg gcctagcgga cgtcgtggtc + 8881 ctcgtcggtc agttcgcggg ccaggcgccg ctccaggtcg gtcagaggca tgtcgtcgcg + 8941 cacccgttcc gggtgtcccg gcggcggccg gtgccaccgg atgcgcgggt gcggaggggg + 9001 gtcgtaccgg tccgtctcgc cggccaggca gagccgcccg taggcgacca ggccctccca + 9061 caggccgcgg atgcatcggc gcgggcggaa gcgtctcatc cctgccacct cccggatcgc + 9121 ttctggggcc gcaggccacc gtcggtcacc gggctcgagg acccgacggt tcccgctgga + 9181 gcgcgtgccc ctgggcgccg ctccagcggg acacgagcgc gctaccggca catccctagc + 9241 ggttccggac agggtcgaga aaagccgcag tacgacgggc ccggccgcac cggcccgccc + 9301 cggtggaccc ggcacgcgtg cggcccagcg gggaccggcc cggcggcgac gccttggacg + 9361 acggagggtg tggacgatgc cggatgcgcg agtgcaccgt actgcctgtg agatgacggc + 9421 cccggccccg agcggagtgc tctacggcct gatcgcggac gccaccgtgt ggccgctgtt + 9481 cttccctccc agcgtccacg tggaacaact ggacttcgac gggacacggg aacggctgcg + 9541 catgtgggcc gtggcgggcg accggatcag ctcctgggtc tcccaccgcc gcctggacgt + 9601 cggacagcgg caggtggagt tccgccagga acggccctcc gccccggtcg agacgatgac + 9661 cggcctgtgg accgtcgagc ccctgggcga cggctcccgg gtgaccctgg aacacgcctt + 9721 caccgtcatc ggcgacgctc cggccgacgc ggcctggacc gagcgggtca cccgcgccaa + 9781 cagccgcgcc cagctccagc gcctggcctg gctcgccgag cgctggaccc ggctggacga + 9841 cctcgtgatg tccttcgagg acaccgtccg ggtcaacgtg cccgccgagc tggtcttcga + 9901 cttcctctac cgggccggcg actggcccga cgacctcgcc ggcacccgcc ccctgacggt + 9961 gcaggaggac acccccggca tccaggtcct cgccctggac ggccggtcgg ccaccggcgg + 10021 cgaggcggtg cgcatcagct tccccgccgc cggccgcctc gtgcacaaac acacccgcac + 10081 atccggaccg ctggccgcgt acaccggcga gtggaccatc gagccccagc ccggagccgg + 10141 cctcgacgtc accgtgcggc acgacgtgct gctcaacgac gacgccgcac tggaccagga + 10201 cgccgcccgg cgggcgtgcg acgaggtcgg ccgggccggc cgccgcctgc tggagcacgc + 10261 cgtgcgccac gcctccgacg cggtgcgggt cctgtgaccg ccgccctcga cacggccgcc + 10321 gggccgacgg ggcagacccc cgccccgcgc agcaccaccc ccacacccga acacaccacc + 10381 cccacacccg aacccgaaca caccacccgc gcacccgccc ccgaatccca acacaccacc + 10441 cccgaacgcg agtgcgccgc ccgcgtaccc gaacccgagc gcgccgcccg gctggaggcc + 10501 gcgctcggcg accccttcga cccggccaac ccgcacggac acctcgccct ggtccgggcc + 10561 gacgacaccc gcgaggcacc acacgccacc gaggcgctcc tgaccgagca cggcctgtcc + 10621 gccgagttcg tcccccacga cctcggcgga cgcctgaggg acctcgaaga gctggcccgc + 10681 gtgctgcgcc cgctcttccg ccgcgacctg gccctcggct acggcttcgg catcacctcg + 10741 ctgttcgccg cgtcctcggt gtggaccgcg ggcgaccccc accagcgcgc ggccctcgcg + 10801 gacgtcctgc tcggcggagg ccgggtcgcg atcgtgcacc gggaggtggc acacgccaac + 10861 gccatcctgc gccgcgaggt ccgcgcacaa cgccctgcgg gcggcggctt cctgctcaac + 10921 ggcagcaagg acgccgtcat gaacgccgac cgcaccgaca ccttcgtcgt ctacgcccgc + 10981 acctccgccg gctccggctc cgccagccac tcggtgctcc tgctgcccgg accacccgcc + 11041 tccggagaag tgcgccggct ggcgcgggtg gagatgcccg gcatgcgcgg ggcccgcttc + 11101 cacggactgc gcctggccga cgtacgactg cccgacagcg ccctggtcgg ctcgctcggc + 11161 gagggcgtca ccctggccct gcgcagcttc cagatcagcc actgcctcat cccgggcacg + 11221 gtgctcgcgg gcgtggacag cgtcctgcgg ctcgcggtgc gcgccgccac cgagaaccgg + 11281 cccgacggac ggcccgcccg ccgctggcac aaggcactca gcggggtctt cgcggacctg + 11341 ctcgcctgcg acgccatggc cgtcacggga ctgcgggcgc tcagcctcgt accccagcac + 11401 gcccatctgc tcgcggcggc ggtcaaatac accatgccgg acctgctgcg cgaggacctg + 11461 gaagaactcg ccgccgtgct cggcgcccgc ggctacgacc gcggcccgct gtacggcggc + 11521 ttccagaaac tcgcccgcga cctgcccgtg gccggactcg gccactcggg aacggccgtc + 11581 tgccaggcgg tgctcgtacc ccagctgccg gccctggcac gcacggcatg gttccggacc + 11641 gccgaaccga gcgccgcact gttcctgccg ggcgcgccgc tgccaccgct cgaccaccgc + 11701 aggctgacgc actccgggac cgacgacccg ctcacggcca ccctgatcgg ggccgccgaa + 11761 cggctggccg cacggacggg gacacaacca ctgcacgccg ccctcgccgc gctggcccgc + 11821 gccctggtgg aggagctgcg ggtgctgcgc gcgcgctgcg cggccctgcc ggccgccggg + 11881 agcaccgtgt tcgacccgct ggcctgcgcc ctggccgacc ggtacgccct gctgctgtgc + 11941 gccgccgcct gcctcggggt ctgggaggga caggcggacg gtgacggctt cctcgccgac + 12001 ccggcctggg cggtgctcgt cctcagccgc atcggccgca gactcggcat cgccgtaccc + 12061 gagacaccgg cggacgcgga acaggcggtc ctggccgagg cgctgggacg ctgccggcac + 12121 ggccgcagtc tcgacctgta cgacacccca ctggccggct gaccgagacg gcacggcaag + 12181 acgcccccca cctcggcccg ccccccccaa agggggagcg ggccggcgag aaccacacgc + 12241 agggagacga cgatggagcg ggtcacatgt gccgcgcccc ttcacgtgcc gcggccccac + 12301 ggcccctggc ccgcggtgcg cgaggacctc ttccggcacg gcaacgcact ggtctgcacg + 12361 acgtggagcg aatggctgcc cagcgtgctg accaccccgc ggctgcggga actgctcggc + 12421 gacgactggc agcgctaccg gcgtacccgc gacgccgcgg tgcgctaccg gttcgccgcc + 12481 tcccgcatgc tgatcaagta cacggcggcc gccgccctgg ccgtcccgcc cgagtacctg + 12541 gacctggcct accggctggg cggccggccc tacctgcgcg gcttcgacca gatcgaactg + 12601 agcctgagcc acaccgggga cgtcatggcc gtcggcctga gccgcatcgg ccggatcggg + 12661 gtggacgtgg aaccggccga gcggcccgta cggctggacc tgctcgagac ccaggtcttc + 12721 acaccggccg aggcccggga actggccgag ctgcccgaag gcgagcggac cgcccacgca + 12781 ctgcgcctgt ggaccctgaa ggaggcctac agcaaggccc tcgggcaggg actgcggttc + 12841 ggcttcaagg agttcggctt ccggcagggc cggctgagcg cacccgacgg cagccgggtc + 12901 acccgcgacg agtggggctt cgccacctac cccgtcatgg accgcttcct gctcagcgtg + 12961 gcctgccaca acgccggact gagcaccgcc ggggacacct ccgtggggac catgctggac + 13021 caggggttcc tgtcggcgat gacggacacg ggacagcagt agcgggcgtt ccggtacggc + 13081 cccaccgctg cagaagcgtt tctccgcccg gtcgcgagca gcgtcagcgg gccgtcaggc + 13141 ccatggcagc caccggtgcc acgatgccgg gggagtaccg cgccccctca acgggcccgc + 13201 cgctccccga gaaccgtgcg cacggccggc ccgtgcggga ggaggcagcg acgaggaggg + 13261 cgtcccatgg cgtggcacag ccgcccgcgt gccctgcggg gccccggcac ggcccgtccg + 13321 ccgggcgtac cggccctgtg gccccacctc gacctgctgg gcgacttcgg cgggcgccgt + 13381 ggcgccgggc gccacgtcga gcagctcgtc tggcgctggc acggcccgct ggacaccgag + 13441 cggttcaccg cggcctggca gtcggtcgtc gaccgcgaga gcgtgctgcg ggccgccctg + 13501 gcccccgggc cccggccgca cctggtcctg cacgagcacg cccacggcga cgtcgtgcgc + 13561 caccgtgcgg gcggcgccgg atgggaccgg ctgctggagc gggaccgccg gcgcggcctc + 13621 gaccccagcc gcccctgccc gctgcgcgtc accctcgtgg agcgcaccga cgacccggcc + 13681 ggtgccgggc cggtgacccg ggtggtcctc accttccacc acgcgctgct ggacgcgtgg + 13741 agcgtgtgcc tgctgatgca ggagctgtgc cgggcctacc tcgccggcgg cgagctgccc + 13801 ggcggcgagc gccgccccga cctgcgcgac tgggcgggct ggctccagcg gcaggacccc + 13861 gccggagccc gggacttctg gcggggcacc gtgcccgacg gaccggtcgc cgtgctgccc + 13921 gcccggcccg gcccgcgcac ccgccagcgg ggccggggca ggaccgaggt acggctgagc + 13981 cccgccgagg ccgaacggct ccaccgctgg gccgccctgc gcgccgtacc cgactccagc + 14041 gccctggaga cggtctgggc gctgctgctg taccgcgcgg ccgggcccgg cggggccgcg + 14101 acggtgggct tcggcgtcac cgtctccggc cgcggcatca ccctggactg cgccgagcgg + 14161 ctgcccgggc cgctgcgcaa ctgcctgccg atggtggtcc gcgtggaccc cggcgagacg + 14221 gtcggccggc tgctgacggc cctgcgggac cgggcgctgg acatggccgc ctacgaatgg + 14281 gtctccaccc gccggatcca ccgctggacg ggccgctgcc ccgacgggga actgctgcag + 14341 agcgtggtct cggtggacag acttccgcgc ccgccgggca acctgaggaa cgaactcgcc + 14401 gacgccggca tcgcgctgga gccggaaccg gcgcacggcg cctgccccga cctgcccgtc + 14461 gccctgctgg tccgtcccgg cggcgacggc cgcctcacct tctgcgtcga ccacgaccgc + 14521 aaccggatct ccgacgccga cgcccgcctg ctggccgggc actgcgcccg gctgctgcgg + 14581 cacctgcccg gcaccgacga ggccaccacc aacggggccg tgctggacgt gctcgccggt + 14641 gaggcactgc cgcgcatcgc gccgcggccc tcaaggccgc gaccggccgg gtcctggctc + 14701 cggccgcgct ccacttcctc cggggcggcc gtcgaccggg ccgcgagcca cccttgacac + 14761 tctgggccga ccgatcaccg aggaggtgcc catgcacgaa ggcgacgacg aacggcacga + 14821 gggtgacacg gcccgatgcg ccagggcggt ccacggcggc ccgccgcccc tgcggctggc + 14881 gggcgcggag gagcgcgagg acgaggacgg acgcgtcatc gtccgcagca tcgactagcc + 14941 cggacacccg aaacgacgca cgacgcacag cgcacgaccc acgggcgcac agcgcacagc + 15001 gcacgaccca cgggcgcaca gcgcacggcg cacagcgcac ggcgcacagc gcacggcgca + 15061 cggcggaggc gggcggcggg ccgggcgggg ggagccgggt ccagcgaggg tcgagcggcc + 15121 cccggcaagc tggccccgga cccacggccg cccccgacgc ccgcgcgaca gccgcccgga + 15181 gcacgggaag cgcccacgca cgcagcaccc tctggaagcg aggacccgcc catgccgtcg + 15241 atgccgcgcc cggacgccgt gacggtgccc gactccgtcc aggcgttcct gaccggtacg + 15301 gcgctcgtcg ccgcgttcac gacgatgcgg ccggacggca caccgcacgt ggcccccgtg + 15361 cgcttcacct gggactcgga cgcccagctc gcgcgggtga tgacggtgcg ctcctcccgc + 15421 aaggcccgca acctgctggc cacgcccggc gccccggtgg cgctctgcca ggtggacggc + 15481 ttccgctggg tcacgctgga ggggaccggc acggtcgtga ccgaccccga acgggtggcg + 15541 ctcggagcac ggctgtacgc caagcggtac tggtccgccc cgccgacccc gtccgaccgg + 15601 gtggtcatcg agatcgcggt cgaccgcgtc ctcagcctga acgcctgaac gcctgaacgc + 15661 ctgccccgcc ggctccgcac caccaccccc accacccacc acccatcacc gcccccatcg + 15721 ccccccaccg cccccgccgc cccggaccgc gtccggggcg gcacgcgcgt gtaccgcggc + 15781 gcggcgccgc cggacgtccc ggcgccgccg caccacggcc gcggtcaccc caccgcgccc + 15841 gcggtcgccc caccgcggct cggggtcacc gcaccgcggc ccggggtcac cgggccgggg + 15901 gaggggagcc gtccgcccac agcagccgga aggccagcag gctcagccgc cagccccgcg + 15961 cggtgcggcg cgcctcgccg ttcacgaacg tgcccgtcgc gaagagcggg ggcaggtccc + 16021 cctcgggagg cgtgtgatgc gggtggtgca catgggtgga gatcaggttc gcccggaaca + 16081 cggcccggtc cccgtccacg tcgaccacgg ccggagaacc gaggtgctgg gtggccgcga + 16141 acgccgacag cgccgaccgg tggtactcgg ccatgccgtc ggcgccctcg tgccggctga + 16201 ccgggaacgc gacgaccgcg tcctcggtga acaggccggc ggtccaggcg tcgtcgagcc + 16261 gctcgtcgtc gagcgagacc agataccggt gcagcagacc ggcgacctgt gcgctcgact + 16321 cggcggcgga cacggatgcg gattgatcgg tcgtcagact ggaggtcatg ccagagaatg + 16381 ccgtcacccc acgcgcccgg ggcaagaccg cttgtcgaag ctctgacgta ccagacgttt + 16441 ttctggcatt gactgcacaa gacctgtaac aacgcctatt tacagccctc gtaagccctc + 16501 gcactattga tggaacacca gtgagaagag ggcagatgga attctacgat tcagatgtca + 16561 ttgtcgtggg agccggtccc accggtctta tgctcgcagg tgaattgagg ctcgctggag + 16621 tctcggtggt ggttctcgac aaactttccg agccgattca ggaatcccgc gccctgggtt + 16681 tctcggcgcg gaccatcgag gaattcgcgc agcgcgggct gatggaccgg ttcggcgagg + 16741 tcggagtcat cccggtcggc cacttcggcg gcgtcccgct cgactaccgg gtgatcgagg + 16801 gcggttcgta cggggcgcgc ggcatcccgc aggcccgcac cgagggcgtc ctgggcggct + 16861 gggcgcgcga gctgggcgcc gacatccgcc gcgggtgcga ggtcacgggc atcgagcaga + 16921 ccgacgcctc ggtgaccgtc accgccgcgg gcgccgacgg ccccttctcc ctgcgcgccc + 16981 gccacgtggt gggctgcgac ggtgcccgca gcatcgtgcg caagctcgcg ggcatcggct + 17041 tccccggcac cgagccggcc atcgagctgc gcttcgccga cctggccgga gtggcgctgc + 17101 ggccccggtt cagcggggag cgcgtcgccg gcggcatggt catggtcatc ccgatgggcc + 17161 cggaccgctg ccgcgtcatc tacttcgaca gctccgagcc gctgcgcacc agcccggacc + 17221 cgatcacctt cgacgaggtc gcccagacct ggcagcgcct gaccggcgag gacgtcagcg + 17281 gcgccacccc gctgtgggtc agctccacca cggacgtcag ccgccaggcc gaccggtacc + 17341 gccacggccg cgtcttcctg gccggcgacg ccgcgcacat ccacctgccg atcggcgcgc + 17401 agggcatgag cgcgggcgtg caggacgccg tgaacctcgg ctggaagctc gccctcgaca + 17461 tcaagggcca ggcgcccgaa gggctgctcg acacctacca cgccgagcgc caccccgtcg + 17521 gggcccgcat cctgaccaac accctcgccc agcgcatcct ctacctcggc ggcgacgaga + 17581 tcacgccgat gcgcgaggtg ctcgccgagc tgatgggcgc ccacgaatcc gtccagcgcc + 17641 acctggccgg catggtcacc ggcctggaca tccggcacga cgtcggcgaa ggcgaccacc + 17701 ccctgctcgg ccggcgcctg ccggaccggg aactggtcgt cgacggtgag aagaccccgt + 17761 tctacgcgct gctgcgcacc gcacgccccg tgctcctgga actcgggggc gaccacggcc + 17821 tgcgcaccgc ggccgccggc tgggccgacc gggtcgacct cgtcgcggcc gagttcgacg + 17881 gctgcgaggc ccccgtggac ggcatcctcg tccgccccga cggctacgtc gcctgggtcg + 17941 ccggcctcgg cgccgggccg gacggtctca ccgccgccct cggccgctgg ttcggcccca + 18001 ccgcctgacc gtcgcgggcc gcgcagcgac acccaccgca cccaccaagg aaagcgaagg + 18061 acccatgccc atcatctccg ccgaggacaa gcacctcacc gtcctgaacc tgttcaccac + 18121 ggacactccc gagaagcagg ccaagctgat cgaggagatg acgaagatcg tcaacgcggc + 18181 cgcgtacgag ggctggatgt cctccaccgt ccactcgggc gtcgacggct acggcaccct + 18241 caacttcatc cagtggcgca gcggcgagga cctcgagaag cgctacgcgg gcgaggagtt + 18301 caagcaccgc acgctcccgg tcttcggcga gatcaccacc tcgatccggc tgatgcagaa + 18361 cgaggtcgcc cacacgctga cctcggacgc cctcggcggc aagatcgaga tcggaccgga + 18421 ccgcgacgac tacaccgtct tcaccctctt cccggtcacc cccgaggggc aggacgaggc + 18481 cgtcgacgcc ctcggccccg gccaggcctt cctcgccgac gtgcccggct tccgcgccca + 18541 cgtcgtgctc aagggcctgc gcgcccgcgg cctggaggga tccttcgtca tctcctactc + 18601 ccagtgggac agcaaggagg ccttcgaggt ctaccgcgac caggcccccg aggagcaggc + 18661 cgacgcccgc aaggccgccg tggcccgcgt ccgcgccgtc gtcaccggcg agccctacct + 18721 caacacctac cgggtcgtgc acacgcgctc tgccggcgag tgagcccggc gcgccggacg + 18781 gcgcacggga cccgcacgcc ttcggacctc gtccccggca cccgggggcg caggcccgaa + 18841 ggcgtgctcg tgtgcacccg ccgcttcggc tcaggcggga ctcaggcagg ccccgtgcgg + 18901 ggcgctaccg ccaggccgca ccgtcgaagg cggtggcgcc gcagcccccc gggccgccgg + 18961 gccgggccgg tccgcccgct tcgagaaccc ttccgcgcaa ccgccgagga gctaatccct + 19021 atgcacagca cgctgatcgt cgcccggatg gcggccacct cgagcaacga cgtggcccag + 19081 ttgttcgccg acttcgacgc caccgagatg ccgcaccgca tgggcacacg gcgccgccag + 19141 ctcttctcct accggggcct gtacttccac ctccaggact tcgacgagga caacggcggt + 19201 gaactgatcg aggccgccaa ggccgacccg cgcttcgtgc ggatcagcga ggacctcaag + 19261 cccttcatcg aggcctacga ccccacgacc tggcgctcgc cggccgacgc gatggccacg + 19321 cgcttctaca gctgggaggc ctcccgttga gcgggcgacg cgttgtgatc accgggatcg + 19381 aggtgatcgc ccccggcggt gtcggcaggg agaacttctg gaacctgctg agcaacggcc + 19441 gcaccgcgac acggggcatc accttcttcg accccgcccc cttccgctcc cgggtggccg + 19501 ccgaagcgga cttcgacccc tacgagcacg gcctgacccc gcaggaggtc cgccgcctgg + 19561 accgggccgc gcagttcgcc gtcgtcgcct cacgcggcgc cgtcgccgac agcggcctcg + 19621 acatcccctc cctggacccg caccgcgtgg gcgtcaccgt cggcagcgcc gtcggcgcca + 19681 cgatgggcct ggaccaggag taccgggtgg tcagcgacgg gggacggctg gacacggtcg + 19741 accacaccta cgcggtcccg cacctgtacg actacatggt gcccagctcc ttcgccgccg + 19801 aggtcgcctg ggcggtgggg gccgaaggcc ccagcaccgt ggtctccacc ggctgcacct + 19861 ccggcatcga ctccgtcggc tacgccgtcg aactggtccg cgagggatcg gccgacgtcg + 19921 tgatcgccgg ctcctccgac gcgccgatct caccgatcac catggcctgc ttcgacgcga + 19981 tcaaggcgac caccccgcgc cacgacgaac ccgagtgcgc ctcccggccg ttcgacaaga + 20041 cccgcaacgg attcgtcctc ggcgagggaa ccgccttctt cgtcctggag gaactcgaca + 20101 gcgcccgcaa gcgcggcgcc cacatctacg ccgagatcgc cggctacgcc acccgctcca + 20161 acgcctacca catgacgggc ctgcgccccg acggcgtgga gatggccgag gcgatcgacc + 20221 tggccctggg cgaggcccgg ctgaacccgc agtccatcga ctacatcaac gcccacggct + 20281 cgggcaccaa gcagaacgac cggcacgaga cggccgcgtt caagcgcagc ctcggcgacc + 20341 acgcctaccg caccccggtc agctccatca agtcgatggt cgggcactcg ctcggcgcga + 20401 tcggctccat cgagatcgcc gcctcggcac tcgccatgga gtacgacgtc gtcccgccca + 20461 ccgccaacct gcacaccccc gaccccgagt gcgacctcga ctacgtgccc ctggtcgccc + 20521 gcgaccagct gatcgacgcg gtcctcacgg tcggcagcgg attcggcggc ttccagagcg + 20581 ccatggtgct cgccaccccc gaaaggagcc tcgtatgacc gcctccgtgg tggtgaccgg + 20641 cctgggcgtc gtctcaccca acggcatggg ggtgaaggac tactgggcgg ccaccctggg + 20701 cggcaagcac ggcatcggcc gcatcacccg cttcgacccc accggctacc cggcccgtct + 20761 ggccgggcag atcgaggact tcgacgccga ggaactgctg cccagccggc tgctgccgca + 20821 gaccgaccgc gtcacccggc tggccctggt ggccgccgac tgggcactcg cggacgccgg + 20881 cgccgacccc gcgcacctgc ccgagttcga catgggcgtc atcacggcct ccgccgcggg + 20941 cggcttcgag ttcggccagg gcgaactgca ggccctgtgg agccagggca gccagtacgt + 21001 ctccgcctac cagtccttcg cctggttcta cgccgtcaac agcggccaga tctccatccg + 21061 caacggcatg aagggcccct ccggcgtcgt cgtcagcgaa ggcgcgggcg gcctggacgc + 21121 cgtcgcgcag gcccgccggc agatccgccg gggcaccccg ctgatcgtca ccggcggcgt + 21181 cgacgcctcc atctgcccct ggggctgggt ggcccagctg gcctgcggcc ggctcaccac + 21241 cagcgacgaa cccgaccacg cctacctgcc cttcgaccgc gacgcgaacg gctacgtccc + 21301 cggagagggc ggcgcgatcc tcatcgccga ggacgccgac gccgcacgcg cccgcggcgt + 21361 ccgcccctac ggcgagatcg ccggctacgg agccaccatc gacccccggc ccggcagcgg + 21421 acgcgaaccc aacctggcca aggccatcga gacggcactg gccgacgccg acgtgaacgc + 21481 cgccgacatc gacgtggtct tcgccgacgg cgccggcgac ccggccggcg acctcgccga + 21541 ggcccgcgcc gtcagcacgg tcttcggcga ccggggcgtg ccggtgacgg tgcccaagac + 21601 catgaccggg cgcctgtact ccggcggcgc gcccctggac ctggcggccg cgttcctcgc + 21661 cctgcgcgac ggcgtcatcc cgcccaccgt gcacatcgac ccgtgcgccg actaccccct + 21721 cgacctggtc ctgggcgaac cccgcccggc cgagctgcgc accgccctgg tcctggcccg + 21781 gggagccggc ggcttcaact ccgccatggt cgtgcgcgcc gcctgaggac ccccgcgcac + 21841 cgcacccgac cgacgcaccg caaccgccac cacgtactga cgaaaggacc caccatggcc + 21901 agcaagtcct tcaccctcga cgacctcaag cgcaccctgc gggaggccgc gggcgtcgcc + 21961 gagggcgtgg acctggacgg cgacatcctc gacaccgagt tcgaggtgat cggctacgag + 22021 tccctcgccc tgctggaggc cggcagcctc atcgagcgcg agtacggcat ctccctggac + 22081 gaggaggccg tcggcgaggc caacacgccg cgcagcttca tcgaggtcgt caacgcgcag + 22141 ctcgcgcccg ccaaggccgc ctgaaggagc cccaccatga ccgacaccac cacccagcgc + 22201 gtcgccgtcg tcaccggcgc caccagcggc atcggcctgg cctccgcccg gctcctcggc + 22261 cggcagggcc accaagtctt catcggcgcc cgcaacgccg agaacgtcgc cgccaccgtc + 22321 aaggaactcc agggcgaggg catcgacgcg gacggcacgg tcgtcgacgt ccgcgacacc + 22381 gagtccgtca acgcctggat ccaggccgcc gtcgaccgct tcggcagcgt cgacgtcgtc + 22441 gtcaacaacg ccggccgctc cggcggcggc cccaccgccg acatcgcgga cgagctgtgg + 22501 gacgacgtga tcgacaccaa cctcaacagc gtcttccgcg tcacccgcgc cgccctgacc + 22561 atcggcggcc tgcgcgccaa ggaccgcggc cggatcatca acgtcgcctc caccgcgggc + 22621 aagcagggcg tcgtcctggg cgccccgtac tcggcgtcca agcacggcgt cgtcggcttc + 22681 accaaggcac tgggcaacga gctggccccc accggcatca ccgtcaacgc ggtctgcccc + 22741 ggctacgtcg agaccccgat ggcccagcgc gtgcgccagg gatacgccgc cgcctacgac + 22801 acctccgagg acgccatcct cgagaagttc caggcgaaga tccccctcgg ccgctactcc + 22861 acccccgagg aggtcgccgg cctcgtcggc tacctggcct ccgacaccgc cgcgtccatc + 22921 acctcgcagg ccctcaacgt ctgcggcggc ctcggcaact tctgacgcac ccggcacccg + 22981 acccctttcc cgaggagtga gcctcatgac cacacgtgag gtcgagcacg agatcacgat + 23041 cggcgcaccg gccgacgccg tctaccagct gctcgcggac gtgaccaact ggccgcgcat + 23101 cttcccgccc accatccacg tggaccgcac cgaggccgac ggcgaccacg aacgcatcca + 23161 catctgggcg accgccaacg gccaggccaa ggagtggacc tcgcaccgca cgctcgaccg + 23221 cgagaacctg accatcacct tccgccagga gatccccgcc gccccggtca agcacatggg + 23281 cggcacctgg atcatcgagc cgctcgccga cgaccggtcg cgggtgcggc tcctgcacga + 23341 ctacagcgcc atcggcgacg acccgcacga cctgctgtgg atcgagcagg ccgtggacaa + 23401 gaacagcacc tccgagctgg ccgccctgaa ggtcaacgtc gaggccgcgc acgccgccgc + 23461 cgaggagctg acgttctcct tcgccgacac cgtgcagatc gacggcgccg ccaaggacgt + 23521 cttcgacttc atcaacgagg cccagctgtg ggccgaacgg ctcccgcacg tcgccgtggt + 23581 gcgcctgagc gaggacaccc ccggcctgca ggagctggag atggacaccc gcgccaagga + 23641 cggctcggtg cacaccacca agtcctaccg ggtcgtcttc ccccaccaca agatcgccta + 23701 caagcaggtc accctgcccg cgctgatgac cctgcacacc ggcgaatgga ccttcaccga + 23761 gggcgacgag gcgaccaccg cctcctccca gcacaccgtc accctcaaca ccgccaacat + 23821 cgcccgcatc ctcggccagg aagccaccgt cgccgacgcc cgcgcctacg tccacacggc + 23881 cctgtccacc aacagccgcg ccaccctcgc ccacgccaag gcctacgccg agcagaagaa + 23941 gggctgaacc gtggcagcgg acgccctgac caccgacgtc gtcgtcgtcg gagccggccc + 24001 cgtcgggatg atgctcgccg gggaactggc ccacggcggc gtcggcgtgg tggtcgtgga + 24061 gaagcgacgc gctcccagca ccgagtcccg ggcctccacc ctgcacgccc gcacgatgga + 24121 gatcctcgac agccggagcc tgctgcccga gttcggcgac ccgccgaacg agccgcgcgg + 24181 ccacttcggg ggcatcccgc tggatctgac gctgccctcc tcccaccccg gccagtggaa + 24241 ggtgccgcag accaggaccg aggtgatcct gggggagtgg gcgctgtcac tgggcgccga + 24301 actgcagtgc aagcacgaac tgaccgcgct cgacgacagc ggcgacctgg tggaggccga + 24361 agccgccggc ccggacggac ggacgctgcg gctgcgctgc cgctacctgg tcgcctgcga + 24421 cggcgaggag agcaccgtac gccgcctgat cggcgccgac ttccccggca gggacgcgac + 24481 gcgggaactg ctgcgcgccg acgtcgccgg catcgacatc ccgggccggc gcttcgaacg + 24541 cctggagcac ggcctggcga tcgccgcccg ccgccccgac ggggtgaccc gggtgatggt + 24601 ccacgagttc ggctccgccg cgcgggcccg cccgcacggc gacgcctcct tcgaggagat + 24661 caccgcggtg tggaagcggg tcaccggcga ggacatcagc ggcggcaccc ccctgtgggc + 24721 caacgccttc ggcgacgcct cccgccagct gacccgctac cggcacggcc gcgtcctctt + 24781 cgcgggcgac gccgcccacc ggcagatgcc ggtcggcggc caggccctca acctcggcat + 24841 gcaggacgcc ttcaacctgg gctggaaact ggccctggtg gtacgcggca aggcaccgca + 24901 gaccctcctc gacagctacc acgacgaacg tcacgaggtc ggccggcagg tcctggccaa + 24961 catccgcgcc cagtcgctgc tgctgctcgg cggaccggag gtagagccgc tgcgcgacct + 25021 gctgacggag ttgatcgggc aggaggacgt acgccgtcgc ctggccggca tgatcagcgg + 25081 cctggacgtg cgctacgacg tcggcggccc cgcccacccg ctgctcgggg cccggctgcc + 25141 gtgcaccgag gtgcgggcac gccgacgcct gctcaccacc acccacctgg tgcgctcggg + 25201 cggcggcgtc ctgctggacc tgaccggccg gcccggccgg ccgccggcag tcctcgacgg + 25261 ctgggcggac cgcgtcaccg cactggacgc ccagccctcg ccgggcagtt cactgcaggg + 25321 caccgaccgt gtcctggtcc gccccgacgg ccacgtggcc tgggccggcc cgggcaccga + 25381 cggcctcgcc gaggcactca cccgctggtt cggacctccc cgctgacctg ccgtgcgccc + 25441 cggcccggcg gggacgaccg gcaccgcacg acgcgccgcc ccccaccgcc gggccccgaa + 25501 ccgaccccac tgccaggaag gaccacccat ggaagggaca gcggcggaca ccgacgtgat + 25561 cgtcgtcggt gccggcccga ccggactgat gctcgcgggc gaactgcgcc tgggcggggc + 25621 ccgtgtcgtc gtcatcgaga agctggccgc ccccaccggg cagtcccgcg gcctgggctt + 25681 caccgcccgc gccatggaga cgttcgacga acgcgggctg ctgccccggt tcggccaggg + 25741 agagaccctg gccaccagcc ccgtaggaca cttcggcggc gcccagttcg acttcaccgt + 25801 cctcgaggac gcccacttcg gggcccgggg catcccccag ggcgacaccg aggcggtcct + 25861 ggagggctgg gcgggcgaac tcggcgcgga catccggcgc ggctgggagt tcgtctccct + 25921 caccgacggc ttcctggacg gcgacggcgt cgagatcacc gtacgcaccc cgcagggcga + 25981 ggaacgcacc ctgcgcgcct cctacctggc gggctgcgac ggcggctcca gccgggtccg + 26041 cagggcggcc ggcttcgact tccccggaac cgacgccacc cagggcatgt acctggcgga + 26101 catcaccggt gtggagctga ccccgcgctt cctcggagag cgcctgaaca acggcatggt + 26161 gatggcggca ccgctctccc agggcgtgtg gcgcatcatc gtctgccccg acggccgccc + 26221 cgcgcacgac cgtgagcgga ccgtcacctt cgaggaggtc gccgccgcct ggcaggacat + 26281 caccggcgag gacatcagcc acggcggcgc gagctgggtc agctccttca ccaacgccac + 26341 ccggcaggcc tccgagtacc ggcgcggccg cgtcttcctg gcgggcgacg ccgcccacat + 26401 ccacctgccg gccggcggcc agggcctgag caccggcgtg caggacgccg ccaacctcgg + 26461 ctggaagctg gcctcggtga tccgcggcga cgccccccgg gaactgctgg acacctacca + 26521 cgccgaacgc cacccggtcg gcgcccggct gctgatgaac acccgcgccc agggcatcgt + 26581 cttcctcggc ggcgccgagt ccgacccact gcgcgagctg atggccgagc tggtccgcta + 26641 cgacgacgtc aaacgccacc tggccggcat cgtcagccac ctggacatcc gctacgacct + 26701 cgccgacacc gcgaccggcc ccacccaccc gctgctggga cgccggatgc cgccgcggct + 26761 gctcgtcggc gcggacggcg aaacccgcat cgcccgcctc ctgcacgccg ggcacggcgt + 26821 gctgctcgac ctcgccgacg acgagacggt acgcgcgacc gcggccggcc acgcggaccg + 26881 ggtggacgtg gtcaccgccg tcgccaagcc caccgacggc ccggacgccc tcgccggcgc + 26941 caccgccgtg ctgatccgcc ccgacggcta cgtcgcctgg acgggcacct gcgcgcaggg + 27001 cctggaaacg gccctggaac ggtggtgcgg cccgccccgc tgacccacac cccccccaac + 27061 ggcccccggc gtgcgcgcac gccgggggcc ttcgtccgcc gcccggcacc ccccgccgcc + 27121 cggcaccccc cgccgcccgg cgctccccgc gggcctgccc gccggcgccc ccgcccaccc + 27181 ggccgccgct gcccggccgc acacaccgga gggccccgcc gcgttccctc atcacgcgac + 27241 ggggccctcc ctccacggtg gtcacacggg gacggcctcc acgagggaga tcgccgccgc + 27301 cgtctccacg acccgctcca gacggaaccc ggcacgggcc aggagatcgg cgtactggcc + 27361 gggagtacgc tcacggccac cgaccaggag catcagccac aggtcgacca gcttcccgga + 27421 gtgcggctgg tcaccctgct ccggtatcac catctcggcg atcagcagct tgccgcccgg + 27481 cttgatcgcc gcccgcacgt tgcgcaggat ccgcagggcc tgctcctcgg gccagtcgtg + 27541 cacgatgtgc ttgaggacgt aggcgtcggc gccgcccggc ggcacgtcga acaggtcgcc + 27601 ggccacccgc ttcgtccggt cggccacacc ctgcgcggcc aggaactccg cagccccgtt + 27661 ctcctcgacc cgcgggtcga acagaacgcc ctcgcacccg ggcgccgcac ccaggatgcc + 27721 cgcgagcaac gcgccctggc cgccgcagaa gtccaccacc gtcccgaact gcgagaagtc + 27781 gtacgccgcc aggatcggct ccgtctccga ggcggacatg ctgcccatgc cctggaagaa + 27841 cacctggccg tactccgcgt tcttcgtcag gaactcgaag gcgtgcatac cgcgcagctt + 27901 cggcagcgcc ggctcgccgg tgaccaccgt ctccgggaag ccgctccagt cctcccagtg + 27961 gatcgggtgg cccatcagca gcgcgatgcc ccgcatcgac atcggatggt cggcgcgcag + 28021 cgcgtccgcc atcggcgtca gctcgaaggc accgtccggg cgcgtggcga acacaccgtt + 28081 gctcgccagc aggcgcagca cccggccgag cgcgtcggcg tccgcaccga cccgcccggc + 28141 gagctcgtcg gcggacagcg gaccctcggc gagcgcctcg gccaccttca gttcggcggc + 28201 gacgtgcacc gcacgcgtga ccatgacacc catgatcaat tccagcagag cgaacggcgg + 28261 aggcgcgagt tcccggctct gccgctgcag atccgctcgt gctttctccg cttcccgtac + 28321 gacgtgcgga ggcggcaatt cgggcatgag tttcctccat ggcaggagcg caaaagtctg + 28381 tcccaaccgc cggaagggac cgggactaca acgatatgga aaaaggtcga cgcctcctgg + 28441 agggagactc gaataccgct ggttcacggg tgcacccact ttttcctgcg gaaaatttga + 28501 cgcaataaaa ccggcgactg ccctagggtg gcgctgattt cgcatccgct ctccgggaaa + 28561 aggggggagc attggaaacc gaggagagaa atggcgaact cgcgtaactc ctggacgaag + 28621 atacttctgg cggcgggcgc ggccggcgcc gtcgccgtcc cgctgaccgc cggccccgtc + 28681 caggcccagc cccaggccca ccgccacggt cacacccaca cccgtaccga ggcgaccgtg + 28741 accgtcgtgg cctcccagct gaacaacccg cgcggcgtca ccgcgctggg cgacggcggc + 28801 gtgctggtcg ccgaggccgg cgccggactg gccgactgcc cggtcgacca gacgtgcgtg + 28861 ggcaccaccg gctccgtcta caaggtcaag ggcagcttcc agggccgcgt cgccaccggc + 28921 ctcgcctcca cggccaaggg cgtcgccccg ggcgccccga tctccgccaa cggccccagc + 28981 gacgtcgtgc ccgaccggtt cggcggctac gtcgtcgtca gcggcctcgg cggcaccacc + 29041 gagtcgcgcg ccgcgctggg cgagggcgcc cagaccctgg gcacggtctt ccgcacccgc + 29101 gaccacaagg tgctcgccga cctcaccgac cacgagacgc ggctgaaccc cgacggcggc + 29161 gacgtgcacg ccaacccgtg gcggctcgcg cgcagcggca gcggctacct ggccaccgac + 29221 gcgggcgcca acaccgtcgt acgcggcaac gccgacggca ccaccgccac cgagtacctc + 29281 ctgcccaaga acgaactgcc caccggcgcc gccgagaccg tacccaccgg catcgccaag + 29341 gccgccgacg gcaccgtgta cgtcgccgac atgagcggcg gccgggtcgg cgcctcccgc + 29401 gtctggaaga tcgccccggg ccggcagccc gagatcctcg ccaccggcat gaccaacctc + 29461 atcgacctgg acctggaccg ggacggcgac ctgatcgccc tgtcctacag cgccgccgcg + 29521 ctggccggcc cgccgcagcc gggcgccctg ttcgagatcg acgccgacag cggcgcggtc + 29581 accgagatcc ccaccggcga ccagctcaag cagcccaccg gcgtcgccgt cgacccctgc + 29641 ggcaaggtgt acgtcaccaa caacacgctc ggcaccaacg gccagctggt ccgcgtcaac + 29701 cgctgacccc cgcaggaggg tgcgccgcgc acacccccct gccaggcccc gccggtcccg + 29761 cacccctgcg accggcgggg cctcgctgtg cccggcgccg gacgcccgcg gccgatggga + 29821 atccgctcgg cggccgctcg acagccgacg gagggcgacg cccccgcaca gcccggctcg + 29881 gcatcctggc cgggcatcca cttgccggac aacacattcg acgctcaggg aggctgcccc + 29941 gtgacgcaag atgccatgac ctcggccgac gagacccgga ccggcgtgtg gctcgtcgga + 30001 gcccgcggtt cggtcgccac gacggcggtg tcgggctgcg cggcgctggc ggcaggactg + 30061 ctgccaccca ccggcatggt caccgagacg ccccccttcg ccgactgcgg cctgccggcc + 30121 ctggcctccc tcgtcttcgg cggccacgac acggcgacca cccccctgcc caaacgcgcc + 30181 gaggaactgg ccgcccaagg agtactgccg ccctggctgc cgaccgccgt ccagggggaa + 30241 ctggccgccg cggacgaaca catccgcccc ggaggaccgg tacccggcga ccgtcgcgcc + 30301 accgaagagc tgatagccga cttcgccacc gacctgcgca ccttcgcccg caccaccggc + 30361 gtcgcccgca cggtcgtcgt caacgtcgcc tccaccgaac cggaccccgc acacggcgcg + 30421 tggccggcca gctccctgta cgccgcggcg gccctgcggg cgggctgccc ctacgtcaac + 30481 ttcacgccgt cgaccgggct gagccacccc cagctggcgg gcgcggcccg cgcctcgggc + 30541 ctgccgtacg cgggccgcga cggcaagacc gggcagaccc tgctgcgttc ggtgctgggg + 30601 ccgatgttcg cccagcgggc actggcggta cgggcctggt ccggcacgaa cctgctgggc + 30661 ggcggcgacg gcgccgccct cgccgacccc gccgccgccg cggcgaagaa cgccggcaag + 30721 gaacgcgtcc tcaccgacac cctcggcacc cgggtcgaag gcgaagtgca catcgacgac + 30781 gtccccgccc tcggggactg gaagaccgcc tgggaccacg tcgccttcga cggcttcctc + 30841 ggcacccgca tggtcctgca gaccatctgg cagggctgcg actccgccct cgccgcaccg + 30901 ctcgtcctcg acctggcccg cctgctcgcc cgcgcccacg agcggggcct gtccggcccg + 30961 ctgggcgaac tcggcttcta cttcaaggac cccgacgccg aaggctccgc cctggccgag + 31021 cagtacaccc gactgctcac cctcgccgac cggctcggag gaaccaggtg agcgctcagc + 31081 ggcccgccca cggcccggcc cgcgacgggc acctccacgc ctgggccgaa ctgctgcgcg + 31141 cccccgccgc gctcaccgtc cccggtgacg tcctcgccgg caccgcggcc gccggcacac + 31201 ggcccaccgg acgcaccgcc ctcgcggccg gcgcctcact gtgcctgtac gaggcgggca + 31261 tggcactcaa cgactgggcg gaccgcgagg aggacgccac cgcccggccc caccgccccc + 31321 tgccgtccgg ccgcgtccgg cccggcgccg ccctcgccgc cgcgggcctc ctctccgccg + 31381 cgggcctggc cctcgccgca tgcgcgggac gacgggccct cgcggtcgcc ggccccctgg + 31441 ccgccaccgt atgggcctac gacctgggcc tgaaacacac cccggcggga cccgcggcga + 31501 tggccgccgc ccgctccctg gacctgctgc tgggcgcggc cgccggcccg ggcgccgtgc + 31561 gacgggcgat cgtcccggcg gcgttcctcg gcagccacac cctggccgtc tccctcgtct + 31621 cccgccgcga gaccgagggc ggctccagca ccgcccccct cacggccctc gccgccgcgg + 31681 gcgccctcac caccgtcctc gccggccgcc ccaccgccca ccccgcgccc gacgccagca + 31741 ccggcgcacc gacccccgcg acaccggccg acaaggccac ccgcgccgta cgcgccgccc + 31801 tggccgcttc ctacgccgcc accttcgccc gccccctggc ccacgccgcg ctcaacccct + 31861 ccccggaact gacacagcgg gccgtcggcg ccggcgtccg cgccacgatc gccctgcaga + 31921 gcggactgat ggcccgggcc ggcgcacccg gcaccggcgt cctcaccgcc gccctggcac + 31981 ccctcgccgc gcacctggcc cggaaagtga gcaccacatg agcccgctgc gcctcggcta + 32041 cggcaccaac gggctgaccg acctgcgcct ggaggacgcc ctgcgcctgc tggccgacct + 32101 cggctacgac ggcgtcggac tcaccctcga ccacatgcac ctcgacccgc tcgcccccga + 32161 cctcgcggcc cgcacccgcc acgtcgcccg gcaactgcac cgcctcggcc tcgcggtcac + 32221 cgtcgagacc ggcgcccgct acgtcctcga cccgcgccgc aagcacggac ccagcctgct + 32281 cgacgacgac ccggacgccc gctgggcacg cgtacgcctg ctgatccgct cggtacgcgt + 32341 cgccgccgac ctcggcgcgc acgccgtgca ctgcttcagc ggcccacggc ccgcgggcct + 32401 ggaccaggac accgcctgga aacgcctggc cgacgccctc ggccccgtcc tggacgccgc + 32461 ggaggacacc ggcgtccccc tggcggtcga acccgagccc ggccacctgc tggccacact + 32521 gaccgacttc caccgcctgc gcaccgaact gggcgacccc gaaccactgg gactgaccct + 32581 cgacatcggg cactgccagt gcctggagcc cctgccgccc gccgactgcg tacgggccgc + 32641 cgcgccctgg ctgcggcacg tgcagatcga ggacatgcgc cgcggcgtcc acgaacacct + 32701 ccccttcggg gacggcgaga tcgacttccc gcccgtactc gacgccctcg ccgccaccgg + 32761 ctaccagggc ctgaccgtcg tcgaactgcc ccggcactcc cacgccggac ccgaactggc + 32821 cgcacagtcg atgcggttcc tgcgcaacgg agggacgaga tgaccgtcgt acacaccccg + 32881 ccggacgcgg acctgcccgg cgagttcgcc ggcctcggca ccgacgcacg ccgctggctc + 32941 accgccgccc gcgcctcggc aaccgcccgg gacaccgact gggaactgcg cttcgccgag + 33001 gcgggccgcc gctgcggcac cgaacacgcc gacgccgccc gcgtactgct gctcacggcg + 33061 gcacgccccg acgcggagac cgtgaccctg ctgtaccacc gggggacagc cgccgaacgg + 33121 cgtgcggtcc tgctcgccct ggacggactc gacaccgaac ccgcccacgc cctgccgctg + 33181 gtcgaagacg ccctgcgcgc caacgacacc accctgctcg ccgccgccct cggcccctac + 33241 gcggcccggc acctggacgc ccaccaatgg cggcacgccg tactcaagtg cctgttcacc + 33301 ggcgtgcccg tggccaccgt ggcaaacctg gcggcccgcg cccgcgcaga cgcggaactg + 33361 gccagaatgc tgcgcgccta cgccgccgag cgcaccgccg cccaccgaga catcccggcc + 33421 gacctcgacc gcgtgctcgc cctgacccag gagcagtgac ttgcgcatct tcgaccccca + 33481 catccacatg acgtcccgga ccaccgacga ctacgaagcc atgcatgccg cgggtgtccg + 33541 tgccgtggtc gagcccgcct tctggctggg gcagccccgc acttctccgg cctccttccg + 33601 tgactacttc gacgcgttgc tgggctggga gcccttccgt gcggcgcagt acgggatcgc + 33661 ccatcactgc acgatcgcgt tgaacccgaa ggaggcg +// diff --git a/tests/unit/data/bigscape/minimal_dataset/JK1_GCF_01/JCM_4529.region35.gbk b/tests/unit/data/bigscape/minimal_dataset/JK1_GCF_01/JCM_4529.region35.gbk new file mode 100755 index 00000000..8e7c3b14 --- /dev/null +++ b/tests/unit/data/bigscape/minimal_dataset/JK1_GCF_01/JCM_4529.region35.gbk @@ -0,0 +1,1627 @@ +LOCUS NZ_BMUN01000045 30675 bp DNA linear CON 19-APR-2022 +DEFINITION Streptomyces vinaceusdrappus strain JCM 4529 sequence45, whole + genome shotgun sequence. +ACCESSION NZ_BMUN01000045 +VERSION NZ_BMUN01000045 +KEYWORDS . +SOURCE Streptomyces vinaceusdrappus + ORGANISM Streptomyces vinaceusdrappus + Bacteria; Actinobacteria; Streptomycetales; Streptomycetaceae; + Streptomyces; Streptomyces rochei group. +COMMENT REFSEQ INFORMATION: The reference sequence is identical to + BMUN01000045.1. + The annotation was added by the NCBI Prokaryotic Genome Annotation + Pipeline (PGAP). Information about PGAP can be found here: + https://www.ncbi.nlm.nih.gov/genome/annotation_prok/ + ##antiSMASH-Data-START## + Version :: 6.1.1 + Run date :: 2023-02-07 14:34:49 + Original ID :: NZ_BMUN01000045.1 + NOTE: This is a single cluster extracted from a larger record! + Orig. start :: 0 + Orig. end :: 30675 + ##antiSMASH-Data-END## +FEATURES Location/Qualifiers + gene complement(<1..260) + /locus_tag="IE238_RS36835" + CDS complement(<1..260) + /codon_start=1 + /inference="COORDINATES: similar to AA + sequence:RefSeq:WP_011031019.1" + /locus_tag="IE238_RS36835" + /note="Derived by automated computational analysis using + gene prediction method: Protein Homology." + /product="alkaline phosphatase family protein" + /protein_id="WP_193517002.1" + /transl_table=11 + /translation="MTPTTSSGGPVPLLVLDVVGLTPRLLDHMPHLKRLGQSGSRAPLG + TVLPAVTCAAQSTFLTGTYPSEHGIVGNGWYFRELGDVLLWR" + source 1..30675 + /culture_collection="JCM:4529" + /db_xref="taxon:67376" + /mol_type="genomic DNA" + /organism="Streptomyces vinaceusdrappus" + /strain="JCM 4529" + /submitter_seqid="sequence45" + /type_material="type strain of Streptomyces + vinaceusdrappus" + protocluster 1..30675 + /aStool="rule-based-clusters" + /category="PKS" + /contig_edge="True" + /core_location="[13949:29716]" + /cutoff="20000" + /detection_rule="(t2ks and t2clf)" + /neighbourhood="35000" + /product="T2PKS" + /protocluster_number="1" + /tool="antismash" + proto_core 13950..29716 + /aStool="rule-based-clusters" + /tool="antismash" + /cutoff="20000" + /detection_rule="(t2ks and t2clf)" + /neighbourhood="35000" + /product="T2PKS" + /protocluster_number="1" + cand_cluster 1..30675 + /candidate_cluster_number="1" + /contig_edge="True" + /detection_rules="(t2ks and t2clf)" + /kind="single" + /product="T2PKS" + /protoclusters="1" + /tool="antismash" + region 1..30675 + /candidate_cluster_numbers="1" + /contig_edge="True" + /product="T2PKS" + /region_number="1" + /rules="(t2ks and t2clf)" + /tool="antismash" + gene complement(257..1432) + /gene="eboE" + /locus_tag="IE238_RS36840" + /old_locus_tag="GCM10010308_74000" + CDS complement(257..1432) + /codon_start=1 + /gene="eboE" + /inference="COORDINATES: similar to AA + sequence:RefSeq:WP_003994192.1" + /locus_tag="IE238_RS36840" + /note="Derived by automated computational analysis using + gene prediction method: Protein Homology." + /old_locus_tag="GCM10010308_74000" + /product="metabolite traffic protein EboE" + /protein_id="WP_193517003.1" + /transl_table=11 + /translation="MRFRHPDGSTVHLAYCTNVHPAETLDGVLAQLRDHCEPVRRRLGR + DRLGIGLWLAKDAAHALATDPSALRGLRTELDRRGLEVVTLNGFPYQGFGAEEVKYRVY + KPDWAHPERLEHTTALARVLAGLLPDDVSEGSVSTLPLAWRTAWDETRADKARTALATL + GERLDTLHELTGRSIRIGLEPEPGCIVETTRDAIAPLGAIGHDRIGVCVDTCHLATSFE + DPEEALDALEAAGIRIVKSQLSAALHAEHPSRLEVRDALAAFAEPRFLHQTRTTTATGG + LRGTDDLDEALAAGGPLPDSAPWRAHFHVPLHADPAAPLTSTLPVLKSALSRLVGGARP + LTRHLEVETYTWQALPAQLRPRGRAQLTDGIAAELMLARDLLTDLGLKELP" + gene complement(1436..2311) + /locus_tag="IE238_RS36845" + /old_locus_tag="GCM10010308_74010" + CDS complement(1436..2311) + /GO_function="GO:0016788 - hydrolase activity, acting on + ester bonds [Evidence IEA]" + /codon_start=1 + /inference="COORDINATES: similar to AA + sequence:RefSeq:WP_004980911.1" + /locus_tag="IE238_RS36845" + /note="Derived by automated computational analysis using + gene prediction method: Protein Homology." + /old_locus_tag="GCM10010308_74010" + /product="TatD family hydrolase" + /protein_id="WP_127438561.1" + /transl_table=11 + /translation="MRIFDPHIHMTSRTTDDYEAMYAAGVRAVVEPAFWLGQPRTSAAS + FCDYFDALLGWEPFRAAQYGIAHHCTIALNPKEANDPRCTPVLAELPRYLVKDRVVAVG + EIGYDSMTPAEDTALAAQLQLAADHGLPALVHTPHRDKLAGLRRTLDVVRESALPTDRV + LVDHLNETTVKEAKDSGAWLGFSVYPDTKMDEARMVALLREYGPEKVLVNSAADWGRSD + PLKTRKVGDLMLEEGFGEDDVDRVLWRNPVAFYGLSGRLDLDVTATAPTHEGNSVLRGA + PAAEPLPTGA" + gene complement(2313..2912) + /locus_tag="IE238_RS36850" + /old_locus_tag="GCM10010308_74020" + CDS complement(2313..2912) + /codon_start=1 + /inference="COORDINATES: similar to AA + sequence:RefSeq:WP_019890542.1" + /locus_tag="IE238_RS36850" + /note="Derived by automated computational analysis using + gene prediction method: Protein Homology." + /old_locus_tag="GCM10010308_74020" + /product="EboA domain-containing protein" + /protein_id="WP_127438562.1" + /transl_table=11 + /translation="MTVVHTPPDADLPGEFAGLGTDARRWLTAARASATARDTDWELRF + AEAGRRCGTEHADAARVLLLAAARPDAETVTLLYHRGTAAERRAVLLALDGLDTEPAHA + LPLVEDALRANDTTLLAAALGPYAARHLDAHQWRHAVLKCLFTGVPVATVANLAARARA + DAELARMLRAYAAERTAAHRDIPADLDRVLALTQEQ" + gene complement(2909..3754) + /locus_tag="IE238_RS36855" + /old_locus_tag="GCM10010308_74030" + CDS complement(2909..3754) + /codon_start=1 + /inference="COORDINATES: similar to AA + sequence:RefSeq:WP_019330932.1" + /locus_tag="IE238_RS36855" + /note="Derived by automated computational analysis using + gene prediction method: Protein Homology." + /old_locus_tag="GCM10010308_74030" + /product="sugar phosphate isomerase/epimerase" + /protein_id="WP_193517004.1" + /transl_table=11 + /translation="MSPLRLGYGTNGLTDLRLEDALRLLADLGYDGVGLTLDHMHLDPL + APDLAARTRHVARQLHRLGLAVTVETGARYVLDPRRKHGPSLLDDDPDARWARIRLLIR + SVRVAADLGAHAVHCFSGPRPAGLDQDTAWKRLADALGPVLDAAEDTGVPLAVEPEPGH + LLATLTDFHRLRTELGDPEPLGLTLDIGHCQCLEPLPPADCVRAAAPWLRHVQIEDMRR + GVHEHLPFGDGEIDFPPVLDALAATGYQGLTVVELPRHSHAGPELAAQSMRFLRNGGTR + " + gene complement(3751..4707) + /locus_tag="IE238_RS36860" + /old_locus_tag="GCM10010308_74040" + CDS complement(3751..4707) + /GO_component="GO:0016021 - integral component of membrane + [Evidence IEA]" + /codon_start=1 + /inference="COORDINATES: similar to AA + sequence:RefSeq:WP_019330933.1" + /locus_tag="IE238_RS36860" + /note="Derived by automated computational analysis using + gene prediction method: Protein Homology." + /old_locus_tag="GCM10010308_74040" + /product="UbiA family prenyltransferase" + /protein_id="WP_193517005.1" + /transl_table=11 + /translation="MSAQRPAHGPARDGHLHAWAELLRAPAALTVPGDVLAGTAAAGTR + PTGRTALAAGASLCLYEAGMALNDWADREEDATARPHRPLPSGRVRPGAALAAAGLLSA + AGLALAACAGRRALAVAGPLAATVWAYDLGLKHTPAGPAAMAAARSLDLLLGAAAGPGA + VRRAIVPAAFLGSHTLAVSLVSRRETEGGSSTAPLTALAAAGALTTVLAGRPTAHPAPD + ASTGAPTPATPPADKATRAVRAALAASYAATFARPLAHAALNPSPELTQRAVGAGVRAT + IALQSGLMARAGAPGTGVLTAALAPLAAHLARKVSTT" + gene complement(4704..5819) + /locus_tag="IE238_RS36865" + /old_locus_tag="GCM10010308_74050" + CDS complement(4704..5819) + /codon_start=1 + /inference="COORDINATES: similar to AA + sequence:RefSeq:WP_003962403.1" + /locus_tag="IE238_RS36865" + /note="Derived by automated computational analysis using + gene prediction method: Protein Homology." + /old_locus_tag="GCM10010308_74050" + /product="inositol-3-phosphate synthase" + /protein_id="WP_127437809.1" + /transl_table=11 + /translation="MTSADETRTGVWLVGARGSVATTAVSGCAALAAGLLPPTGMVTET + PPFADCGLPALASLVFGGHDTATTPLPKRAEELAAQGVLPPWLPTAVQGELAAADEHIR + PGGPVPGDRRATEELIADFATDLRTFARTTGVARTVVVNVASTEPDPAHGAWPASSLYA + AAALRAGCPYVNFTPSTGLSHPQLAGAARASGLPYAGRDGKTGQTLLRSVLGPMFAQRA + LAVRAWSGTNLLGGGDGAALADPAAAAAKNAGKERVLTDTLGTRVEGEVHIDDVPALGD + WKTAWDHVAFDGFLGTRMVLQTIWQGCDSALAAPLVLDLARLLARAHERGLSGPLGELG + FYFKDPDAEGSALAEQYTRLLTLADRLGGTR" + gene complement(6069..7184) + /locus_tag="IE238_RS36870" + /old_locus_tag="GCM10010308_74060" + CDS complement(6069..7184) + /codon_start=1 + /inference="COORDINATES: protein motif:HMM:NF033206.1" + /locus_tag="IE238_RS36870" + /note="Derived by automated computational analysis using + gene prediction method: Protein Homology." + /old_locus_tag="GCM10010308_74060" + /product="ScyD/ScyE family protein" + /protein_id="WP_193450429.1" + /transl_table=11 + /translation="MANSRNSWTKILLAAGAAGAVAVPLTAGPVQAQPQAHRHGHTHTR + TEATVTVVASQLNNPRGVTALGDGGVLVAEAGAGLADCPVDQTCVGTTGSVYKVKGSFQ + GRVATGLASTAKGVAPGAPISANGPSDVVPDRFGGYVVVSGLGGTTESRAALGEGAQTL + GTVFRTRDHKVLADLTDHETRLNPDGGDVHANPWRLARSGSGYLATDAGANTVVRGNAD + GTTATEYLLPKNELPTGAAETVPTGIAKAADGTVYVADMSGGRVGASRVWKIAPGRQPE + ILATGMTNLIDLDLDRDGDLIALSYSAAALAGPPQPGALFEIDADSGAVTEIPTGDQLK + QPTGVAVDPCGKVYVTNNTLGTNGQLVRVNR" + gene 7551..8513 + /locus_tag="IE238_RS36875" + /old_locus_tag="GCM10010308_74070" + CDS 7551..8513 + /codon_start=1 + /inference="COORDINATES: similar to AA + sequence:RefSeq:WP_019330937.1" + /locus_tag="IE238_RS36875" + /note="Derived by automated computational analysis using + gene prediction method: Protein Homology." + /old_locus_tag="GCM10010308_74070" + /product="acetylserotonin O-methyltransferase" + /protein_id="WP_234311021.1" + /transl_table=11 + /translation="MVTRAVHVAAELKVAEALAEGPLSADELAGRVGADADALGRVLRL + LASNGVFATRPDGAFELTPMADALRADHPMSMRGIALLMGHPIHWEDWSGFPETVVTGE + PALPKLRGMHAFEFLTKNAEYGQVFFQGMGSMSASETEPILAAYDFSQFGTVVDFCGGQ + GALLAGILGAAPGCEGVLFDPRVEENGAAEFLAAQGVADRTKRVAGDLFDVPPGGADAY + VLKHIVHDWPEEQALRILRNVRAAIKPGGKLLIAEMVIPEQGDQPHSGKLVDLWLMLLV + GGRERTPGQYADLLARAGFRLERVVETAAAISLVEAVPV" + gene complement(8732..10246) + /locus_tag="IE238_RS36880" + /old_locus_tag="GCM10010308_74080" + CDS complement(8732..10246) + /GO_function="GO:0071949 - FAD binding [Evidence IEA]" + /codon_start=1 + /inference="COORDINATES: similar to AA + sequence:RefSeq:WP_019330938.1" + /locus_tag="IE238_RS36880" + /note="Derived by automated computational analysis using + gene prediction method: Protein Homology." + /old_locus_tag="GCM10010308_74080" + /product="FAD-dependent monooxygenase" + /protein_id="WP_193517006.1" + /transl_table=11 + /translation="MEGTAADTDVIVVGAGPTGLMLAGELRLGGARVVVIEKLAAPTGQ + SRGLGFTARAMETFDERGLLPRFGQGETLATSPVGHFGGAQFDFTVLEDAHFGARGIPQ + GDTEAVLEGWAGELGADIRRGWEFVSLTDGFLDGDGVEITVRTPQGEERTLRASYLAGC + DGGSSRVRRAAGFDFPGTDATQGMYLADITGVELTPRFLGERLNNGMVMAAPLSQGVWR + IIVCPDGRPAHDRERTVTFEEVAAAWQDITGEDISHGGASWVSSFTNATRQASEYRRGR + VFLAGDAAHIHLPAGGQGLSTGVQDAANLGWKLASVIRGDAPRELLDTYHAERHPVGAR + LLMNTRAQGIVFLGGAESDPLRELMAELVRYDDVKRHLAGIVSHLDIRYDLAGTASGPT + HPLLGRRMPPRLLVGADGETRIARLLHAGHGVLLDLADDETVRATAAGHADRVDVVTAV + AKPTDGPDALAGATAVLIRPDGYVAWTGTCAQGLETALERWFGPPR" + gene complement(10349..11824) + /locus_tag="IE238_RS36885" + /old_locus_tag="GCM10010308_74090" + CDS complement(10349..11824) + /GO_function="GO:0071949 - FAD binding [Evidence IEA]" + /codon_start=1 + /inference="COORDINATES: similar to AA + sequence:RefSeq:WP_019330939.1" + /locus_tag="IE238_RS36885" + /note="Derived by automated computational analysis using + gene prediction method: Protein Homology." + /old_locus_tag="GCM10010308_74090" + /product="FAD-dependent monooxygenase" + /protein_id="WP_127443921.1" + /transl_table=11 + /translation="MAADALTTDVVVVGAGPVGMMLAGELAHGGVGVVVVEKRRAPSTE + SRASTLHARTMEILDSRSLLPEFGDPPNEPRGHFGGIPLDLTLPSSHPGQWKVPQTRTE + VILGEWALSLGAELQCKHELTALDDSGDLVEAEAAGPDGRTLRLRCRYLVACDGEESTV + RRLIGADFPGRDATRELLRADVAGIDIPGRRFERLEHGLAIAARRPDGVTRVMVHEFGS + AARARPHGDASFEEITAVWKRVTGEDISGGTPLWANAFGDASRQLTRYRHGRVLFAGDA + AHRQMPVGGQALNLGMQDAFNLGWKLALVVRGKAPQTLLDSYHDERHEVGRQVLANIRA + QSLLLLGGPEVEPLRDLLTELIGQEDVRRRLAGMISGLDVRYDVGGPAHPLLGARLPCT + EVRARRRLLTTTHLVRSGGGVLLDLTGRPGRPPAVLDGWADRVTALDAQPLPGSSLQGT + DRVLVRPDGHVAWAGPGTDGLAEALTRWFGPPR" + gene complement(11828..12772) + /locus_tag="IE238_RS36890" + /old_locus_tag="GCM10010308_74100" + CDS complement(11828..12772) + /NRPS_PKS="Domain: Polyketide_cyc2 (4-146). E-value: + 2.7e-16. Score: 52.1. Matches aSDomain: + nrpspksdomains_IE238_RS36890_Polyketide_cyc2.1" + /NRPS_PKS="Domain: Polyketide_cyc2 (157-310). E-value: + 2.1e-08. Score: 26.5. Matches aSDomain: + nrpspksdomains_IE238_RS36890_Polyketide_cyc2.2" + /NRPS_PKS="type: other" + /codon_start=1 + /inference="COORDINATES: similar to AA + sequence:RefSeq:WP_019330940.1" + /locus_tag="IE238_RS36890" + /note="Derived by automated computational analysis using + gene prediction method: Protein Homology." + /old_locus_tag="GCM10010308_74100" + /product="aromatase/cyclase" + /protein_id="WP_193517007.1" + /transl_table=11 + /translation="MTTREVEHEITIGAPADAVYQLLADVTNWPRIFPPTIHVDRTEAD + GDHERIHIWATANGQAKEWTSRRTLDRENLTITFRQEIPAAPVKHMGGTWIIEPLADDR + SRVRLLHDYSAIGDDPHDLLWIEQAVDKNSTSELAALKVNVEAAHAAATEELTFSFADT + VQIDGAAKDVFDFINEAQLWAERLPHVAVVRLSEDTPGLQELEMDTRAKDGSVHTTKSY + RVVFPHHKIAYKQVTLPALMTLHTGEWTFTEGDEATTASSQHTVTLNTANIARILGQDA + TVADARAYVHTALSTNSRATLAHAKAYAEQKKG" + aSDomain complement(11843..12301) + /aSDomain="Polyketide_cyc" + /aSTool="nrps_pks_domains" + /database="nrpspksdomains.hmm" + /detection="hmmscan" + /domain_id="nrpspksdomains_IE238_RS36890_Polyketide_cyc2.2" + /domain_subtype="Polyketide_cyc2" + /evalue="2.10E-08" + /label="IE238_RS36890_Polyketide_cyc2.2" + /locus_tag="IE238_RS36890" + /protein_end="310" + /protein_start="157" + /score="26.5" + /tool="antismash" + /translation="FSFADTVQIDGAAKDVFDFINEAQLWAERLPHVAVVRLSEDTPGL + QELEMDTRAKDGSVHTTKSYRVVFPHHKIAYKQVTLPALMTLHTGEWTFTEGDEATTAS + SQHTVTLNTANIARILGQDATVADARAYVHTALSTNSRATLAHAKAYAE" + aSModule 11843..12760 + /domains="nrpspksdomains_IE238_RS36890_Polyketide_cyc2.1" + /domains="nrpspksdomains_IE238_RS36890_Polyketide_cyc2.2" + /incomplete + /locus_tags="IE238_RS36890" + /tool="antismash" + /type="unknown" + aSDomain complement(12335..12760) + /aSDomain="Polyketide_cyc" + /aSTool="nrps_pks_domains" + /database="nrpspksdomains.hmm" + /detection="hmmscan" + /domain_id="nrpspksdomains_IE238_RS36890_Polyketide_cyc2.1" + /domain_subtype="Polyketide_cyc2" + /evalue="2.70E-16" + /label="IE238_RS36890_Polyketide_cyc2.1" + /locus_tag="IE238_RS36890" + /protein_end="146" + /protein_start="4" + /score="52.1" + /tool="antismash" + /translation="EVEHEITIGAPADAVYQLLADVTNWPRIFPPTIHVDRTEADGDHE + RIHIWATANGQAKEWTSRRTLDRENLTITFRQEIPAAPVKHMGGTWIIEPLADDRSRVR + LLHDYSAIGDDPHDLLWIEQAVDKNSTSELAALKVNVE" + gene complement(12813..13601) + /gene="fabG" + /locus_tag="IE238_RS36895" + /old_locus_tag="GCM10010308_74110" + CDS complement(12813..13601) + /EC_number="1.1.1.100" + /GO_function="GO:0016491 - oxidoreductase activity + [Evidence IEA]" + /NRPS_PKS="Domain: PKS_KR (8-164). E-value: 7.1e-11. Score: + 34.2. Matches aSDomain: + nrpspksdomains_IE238_RS36895_PKS_KR.1" + /NRPS_PKS="type: other" + /codon_start=1 + /gene="fabG" + /gene_functions="biosynthetic-additional + (rule-based-clusters) adh_short" + /gene_kind="biosynthetic-additional" + /inference="COORDINATES: similar to AA + sequence:RefSeq:WP_019330941.1" + /locus_tag="IE238_RS36895" + /note="Derived by automated computational analysis using + gene prediction method: Protein Homology." + /old_locus_tag="GCM10010308_74110" + /product="3-oxoacyl-ACP reductase FabG" + /protein_id="WP_030970829.1" + /sec_met_domain="adh_short (E-value: 5.9e-36, bitscore: + 115.1, seeds: 230, tool: rule-based-clusters)" + /transl_table=11 + /translation="MTDTTTQRVAVVTGATSGIGLASARLLGRQGHQVFIGARNAENVA + ATVKELQGEGIDADGTVVDVRDTESVNAWIQAAVDRFGSVDVVVNNAGRSGGGPTADIA + DELWDDVIDTNLNSVFRVTRAALTIGGLRAKDRGRIINVASTAGKQGVVLGAPYSASKH + GVVGFTKALGNELAPTGITVNAVCPGYVETPMAQRVRQGYAAAYDTSEDAILEKFQAKI + PLGRYSTPEEVAGLVGYLASDTAASITSQALNVCGGLGNF" + aSDomain complement(13110..13577) + /aSDomain="PKS_KR" + /aSTool="nrps_pks_domains" + /database="nrpspksdomains.hmm" + /detection="hmmscan" + /domain_id="nrpspksdomains_IE238_RS36895_PKS_KR.1" + /evalue="7.10E-11" + /label="IE238_RS36895_PKS_KR.1" + /locus_tag="IE238_RS36895" + /protein_end="164" + /protein_start="8" + /score="34.2" + /tool="antismash" + /translation="VAVVTGATSGIGLASARLLGRQGHQVFIGARNAENVAATVKELQG + EGIDADGTVVDVRDTESVNAWIQAAVDRFGSVDVVVNNAGRSGGGPTADIADELWDDVI + DTNLNSVFRVTRAALTIGGLRAKDRGRIINVASTAGKQGVVLGAPYSASKHG" + CDS_motif complement(13509..13571) + /aSTool="nrps_pks_domains" + /database="abmotifs" + /detection="hmmscan" + /domain_id="nrpspksmotif_IE238_RS36895_0001" + /evalue="6.00E-05" + /label="PKSI-KR_m1" + /locus_tag="IE238_RS36895" + /protein_end="31" + /protein_start="10" + /score="15.7" + /tool="antismash" + /translation="VVTGATSGIGLASARLLGRQG" + gene complement(13614..13883) + /locus_tag="IE238_RS36900" + /old_locus_tag="GCM10010308_74120" + CDS complement(13614..13883) + /NRPS_PKS="Domain: ACP (12-83). E-value: 9.1e-13. Score: + 40.2. Matches aSDomain: nrpspksdomains_IE238_RS36900_ACP.1" + /NRPS_PKS="type: other" + /codon_start=1 + /gene_functions="biosynthetic-additional + (rule-based-clusters) PP-binding" + /gene_kind="biosynthetic-additional" + /inference="COORDINATES: similar to AA + sequence:RefSeq:WP_015037163.1" + /locus_tag="IE238_RS36900" + /note="Derived by automated computational analysis using + gene prediction method: Protein Homology." + /old_locus_tag="GCM10010308_74120" + /product="acyl carrier protein" + /protein_id="WP_019330942.1" + /sec_met_domain="PP-binding (E-value: 1.5e-08, bitscore: + 26.1, seeds: 164, tool: rule-based-clusters)" + /transl_table=11 + /translation="MASKSFTLDDLKRTLREAAGVAEGVDLDGDILDTEFEVIGYESLA + LLEAGSLIEREYGISLDEEAVGEANTPRSFIEVVNAQLAPAKAA" + aSDomain complement(13635..13847) + /aSDomain="ACP" + /aSTool="nrps_pks_domains" + /database="nrpspksdomains.hmm" + /detection="hmmscan" + /domain_id="nrpspksdomains_IE238_RS36900_ACP.1" + /evalue="9.10E-13" + /label="IE238_RS36900_ACP.1" + /locus_tag="IE238_RS36900" + /protein_end="83" + /protein_start="12" + /score="40.2" + /tool="antismash" + /translation="RTLREAAGVAEGVDLDGDILDTEFEVIGYESLALLEAGSLIEREY + GISLDEEAVGEANTPRSFIEVVNAQL" + gene complement(13950..15161) + /locus_tag="IE238_RS36905" + /old_locus_tag="GCM10010308_74130" + CDS complement(13950..15161) + /NRPS_PKS="Domain: PKS_KS(Iterative-KS) (48-401). E-value: + 9.4e-31. Score: 98.9. Matches aSDomain: + nrpspksdomains_IE238_RS36905_PKS_KS.1" + /NRPS_PKS="type: PKS/NRPS-like protein" + /codon_start=1 + /gene_functions="biosynthetic (rule-based-clusters) T2PKS: + t2clf" + /gene_kind="biosynthetic" + /inference="COORDINATES: similar to AA + sequence:RefSeq:WP_019330943.1" + /locus_tag="IE238_RS36905" + /note="Derived by automated computational analysis using + gene prediction method: Protein Homology." + /old_locus_tag="GCM10010308_74130" + /product="ketosynthase chain-length factor" + /protein_id="WP_030970827.1" + /sec_met_domain="t2clf (E-value: 1.8e-209, bitscore: 686.3, + seeds: 26, tool: rule-based-clusters)" + /transl_table=11 + /translation="MTASVVVTGLGVVSPNGMGVKDYWAATLGGKHGIGRITRFDPTGY + PARLAGQIEDFDAEELLPSRLLPQTDRVTRLALVAADWALADAGADPAHLPEFDMGVIT + ASAAGGFEFGQGELQALWSQGSQYVSAYQSFAWFYAVNSGQISIRNGMKGPSGVVVSEG + AGGLDAVAQARRQIRRGTPLIVTGGVDASICPWGWVAQLACGRLTTSDEPDHAYLPFDR + DANGYVPGEGGAILIAEDADAARARGVRPYGEIAGYGATIDPRPGSGREPNLAKAIETA + LADADVNAADIDVVFADGAGDPAGDLAEARAVSTVFGDRGVPVTVPKTMTGRLYSGGAP + LDLAAAFLALRDGVIPPTVHIDPCADYPLDLVLGEPRPAELRTALVLARGAGGFNSAMV + VRAA" + aSDomain complement(13959..15017) + /aSDomain="PKS_KS" + /aSTool="nrps_pks_domains" + /database="nrpspksdomains.hmm" + /detection="hmmscan" + /domain_id="nrpspksdomains_IE238_RS36905_PKS_KS.1" + /domain_subtype="Iterative-KS" + /evalue="9.40E-31" + /label="IE238_RS36905_PKS_KS.1" + /locus_tag="IE238_RS36905" + /protein_end="401" + /protein_start="48" + /score="98.9" + /tool="antismash" + /translation="LAGQIEDFDAEELLPSRLLPQTDRVTRLALVAADWALADAGADPA + HLPEFDMGVITASAAGGFEFGQGELQALWSQGSQYVSAYQSFAWFYAVNSGQISIRNGM + KGPSGVVVSEGAGGLDAVAQARRQIRRGTPLIVTGGVDASICPWGWVAQLACGRLTTSD + EPDHAYLPFDRDANGYVPGEGGAILIAEDADAARARGVRPYGEIAGYGATIDPRPGSGR + EPNLAKAIETALADADVNAADIDVVFADGAGDPAGDLAEARAVSTVFGDRGVPVTVPKT + MTGRLYSGGAPLDLAAAFLALRDGVIPPTVHIDPCADYPLDLVLGEPRPAELRTALVLA + RGAGGFNSAMVVR" + CDS_motif complement(14466..14504) + /aSTool="nrps_pks_domains" + /database="abmotifs" + /detection="hmmscan" + /domain_id="nrpspksmotif_IE238_RS36905_0002" + /evalue="1.20E-04" + /label="PKSI-KS_m4" + /locus_tag="IE238_RS36905" + /protein_end="232" + /protein_start="219" + /score="15.0" + /tool="antismash" + /translation="FDRDANGYVPGEG" + CDS_motif complement(14673..14696) + /aSTool="nrps_pks_domains" + /database="abmotifs" + /detection="hmmscan" + /domain_id="nrpspksmotif_IE238_RS36905_0001" + /evalue="9.90E+01" + /label="PKSI-KS_m4" + /locus_tag="IE238_RS36905" + /protein_end="163" + /protein_start="155" + /score="-2.9" + /tool="antismash" + /translation="SGVVVSEG" + gene complement(15158..16429) + /locus_tag="IE238_RS36910" + /old_locus_tag="GCM10010308_74140" + CDS complement(15158..16429) + /NRPS_PKS="Domain: PKS_KS(Iterative-KS) (5-395). E-value: + 3.7e-53. Score: 172.7. Matches aSDomain: + nrpspksdomains_IE238_RS36910_PKS_KS.1" + /NRPS_PKS="type: PKS/NRPS-like protein" + /codon_start=1 + /gene_functions="biosynthetic (rule-based-clusters) T2PKS: + t2ks" + /gene_kind="biosynthetic" + /inference="COORDINATES: similar to AA + sequence:RefSeq:WP_007385254.1" + /locus_tag="IE238_RS36910" + /note="Derived by automated computational analysis using + gene prediction method: Protein Homology." + /old_locus_tag="GCM10010308_74140" + /product="beta-ketoacyl-[acyl-carrier-protein] synthase + family protein" + /protein_id="WP_029394634.1" + /sec_met_domain="t2ks (E-value: 2.1e-248, bitscore: 815.1, + seeds: 25, tool: rule-based-clusters)" + /transl_table=11 + /translation="MSGRRVVITGIEVIAPGGVGRENFWNLLSNGRTATRGITFFDPAP + FRSRVAAEADFDPYEHGLTPQEVRRLDRAAQFAVVASRGAVADSGLDIPSLDPHRVGVT + VGSAVGATMGLDQEYRVVSDGGRLDTVDHTYAVPHLYDYMVPSSFAAEVAWAVGAEGPS + TVVSTGCTSGIDSVGYAVELVREGSADVVIAGSSDAPISPITMACFDAIKATTPRHDEP + ECASRPFDKTRNGFVLGEGTAFFVLEELDSARKRGAHIYAEIAGYATRSNAYHMTGLRP + DGVEMAEAIDLALGEARLNPQSIDYINAHGSGTKQNDRHETAAFKRSLGDHAYRTPVSS + IKSMVGHSLGAIGSIEIAASALAMEYDVVPPTANLHTPDPECDLDYVPLVARDQLIDAV + LTVGSGFGGFQSAMVLATPERSLV" + aSDomain complement(15245..16414) + /aSDomain="PKS_KS" + /aSTool="nrps_pks_domains" + /database="nrpspksdomains.hmm" + /detection="hmmscan" + /domain_id="nrpspksdomains_IE238_RS36910_PKS_KS.1" + /domain_subtype="Iterative-KS" + /evalue="3.70E-53" + /label="IE238_RS36910_PKS_KS.1" + /locus_tag="IE238_RS36910" + /protein_end="395" + /protein_start="5" + /score="172.7" + /tool="antismash" + /translation="VVITGIEVIAPGGVGRENFWNLLSNGRTATRGITFFDPAPFRSRV + AAEADFDPYEHGLTPQEVRRLDRAAQFAVVASRGAVADSGLDIPSLDPHRVGVTVGSAV + GATMGLDQEYRVVSDGGRLDTVDHTYAVPHLYDYMVPSSFAAEVAWAVGAEGPSTVVST + GCTSGIDSVGYAVELVREGSADVVIAGSSDAPISPITMACFDAIKATTPRHDEPECASR + PFDKTRNGFVLGEGTAFFVLEELDSARKRGAHIYAEIAGYATRSNAYHMTGLRPDGVEM + AEAIDLALGEARLNPQSIDYINAHGSGTKQNDRHETAAFKRSLGDHAYRTPVSSIKSMV + GHSLGAIGSIEIAASALAMEYDVVPPTANLHTPDPECDLDYVPLVARDQL" + gene complement(16426..16755) + /locus_tag="IE238_RS36915" + /old_locus_tag="GCM10010308_74150" + CDS complement(16426..16755) + /codon_start=1 + /inference="COORDINATES: similar to AA + sequence:RefSeq:WP_019767153.1" + /locus_tag="IE238_RS36915" + /note="Derived by automated computational analysis using + gene prediction method: Protein Homology." + /old_locus_tag="GCM10010308_74150" + /product="TcmI family type II polyketide cyclase" + /protein_id="WP_019330945.1" + /transl_table=11 + /translation="MHSTLIVARMAATSSNDVAQLFADFDATEMPHRMGTRRRQLFSYR + GLYFHLQDFDEDNGGELIEAAKADPRFVRISEDLKPFIEAYDPTTWRSPADAMATRFYS + WEASR" + gene complement(17013..17711) + /locus_tag="IE238_RS36920" + /old_locus_tag="GCM10010308_74160" + CDS complement(17013..17711) + /codon_start=1 + /inference="COORDINATES: similar to AA + sequence:RefSeq:WP_019330946.1" + /locus_tag="IE238_RS36920" + /note="Derived by automated computational analysis using + gene prediction method: Protein Homology." + /old_locus_tag="GCM10010308_74160" + /product="antibiotic biosynthesis monooxygenase" + /protein_id="WP_193517008.1" + /transl_table=11 + /translation="MPIISAEDKHLTVLNLFTTDTPEKQAKLIEEMTKIVNAAAYEGWM + SSTVHSGVDGYGTLNFIQWRSGEDLEKRYAGEEFKHRTLPVFGEITTSIRLMQNEVAHT + LTSDALGGKIEIGPDRDDYTVFTLFPVTPEGQDEAVDALGPGQAFLADVPGFRAHVVLR + GLRARGLEGSFVISYSQWDSKEAFEVYRDQAPEEQADARKAAVARVRAVVTGEPYLNTY + RVVHTRSAGE" + gene complement(17768..19186) + /locus_tag="IE238_RS36925" + /old_locus_tag="GCM10010308_74170" + CDS complement(17768..19186) + /GO_function="GO:0071949 - FAD binding [Evidence IEA]" + /codon_start=1 + /inference="COORDINATES: similar to AA + sequence:RefSeq:WP_004948222.1" + /locus_tag="IE238_RS36925" + /note="Derived by automated computational analysis using + gene prediction method: Protein Homology." + /old_locus_tag="GCM10010308_74170" + /product="FAD-dependent monooxygenase" + /protein_id="WP_233535396.1" + /transl_table=11 + /translation="MLAGELRLAGVSVVVLDKLSEPIQESRALGFSARTIEEFAQRGLM + DRFGEVGVIPVGHFGGVPLDYRVIEGGSYGARGIPQARTEGVLGGWARELGADIRRGCE + VTGIEQTDASVTVTAAGADGPFSLRARHVVGCDGARSIVRKLAGIGFPGTEPAIELRFA + DLAGVALRPRFSGERVAGGMVMVIPMGPDRCRVIYFDSSEPLRTSPDPITFDEVAQTWQ + RLTGEDVSGATPLWVSSTTDVSRQADRYRHGRVFLAGDAAHIHLPIGAQGMSAGVQDAV + NLGWKLALDIKGQAPEGLLDTYHAERHPVGARILTNTLAQRILYLGGDEITPMREVLAE + LMGAHESVQRHLAGMVTGLDIRHDVGEGDHPLLGRRLPDRELVVDGEKTPFYALLRTAR + PVLLELGGDHGLRTAAAGWADRVDLVAAEFDGCEAPVDGILVRPDGYVAWVAGLGAGPD + GLTAALGRWFGPSA" + gene 19407..19889 + /locus_tag="IE238_RS36930" + /old_locus_tag="GCM10010308_74180" + CDS 19407..19889 + /codon_start=1 + /inference="COORDINATES: similar to AA + sequence:RefSeq:WP_007389987.1" + /locus_tag="IE238_RS36930" + /note="Derived by automated computational analysis using + gene prediction method: Protein Homology." + /old_locus_tag="GCM10010308_74180" + /product="nuclear transport factor 2 family protein" + /protein_id="WP_050990831.1" + /transl_table=11 + /translation="MTSSLTTDQSASVSAAESSAQVAGLLHRYLVSLDDERLDDAWTAG + LFTEDAVVAFPVSRHEGADGMAEYHRSALSAFAATQHLGSPAVVDVDGDRAVFRANLIS + THVHHPRHTPPEGDLPPLFATGTFVNGEARRTARGWRLSLLAFRLLWADGSPPPAR" + gene complement(20105..20521) + /locus_tag="IE238_RS36935" + /old_locus_tag="GCM10010308_74190" + CDS complement(20105..20521) + /codon_start=1 + /inference="COORDINATES: similar to AA + sequence:RefSeq:WP_019330949.1" + /locus_tag="IE238_RS36935" + /note="Derived by automated computational analysis using + gene prediction method: Protein Homology." + /old_locus_tag="GCM10010308_74190" + /product="pyridoxamine 5'-phosphate oxidase family protein" + /protein_id="WP_030970823.1" + /transl_table=11 + /translation="MPSMPRPDAVTVPDSVQAFLTGTALVAAFTTMRPDGTPHVAPVRF + TWDADAQLARVMTVRSSRKARNLLATPGAPVALCQVDGFRWVTLEGTGTVVTDPERVAL + GARLYAKRYWSAPPTPSDRVVIEIAVDRVLSLNA" + gene complement(20758..20904) + /locus_tag="IE238_RS36940" + /old_locus_tag="GCM10010308_74200" + CDS complement(20758..20904) + /codon_start=1 + /inference="COORDINATES: ab initio prediction:GeneMarkS-2+" + /locus_tag="IE238_RS36940" + /note="Derived by automated computational analysis using + gene prediction method: GeneMarkS-2+." + /old_locus_tag="GCM10010308_74200" + /product="hypothetical protein" + /protein_id="WP_019330950.1" + /transl_table=11 + /translation="MHEGDDERHEGDTARCARAVHGGPPPLRLAGAEEREDEDGRVIVR + SID" + gene complement(20939..22429) + /locus_tag="IE238_RS36945" + /old_locus_tag="GCM10010308_74210" + CDS complement(20939..22429) + /NRPS_PKS="Domain: Condensation_DCL (38-314). E-value: + 1.3e-48. Score: 157.5. Matches aSDomain: + nrpspksdomains_IE238_RS36945_Condensation_DCL.1" + /NRPS_PKS="type: NRPS-like protein" + /codon_start=1 + /gene_functions="biosynthetic-additional + (rule-based-clusters) Condensation" + /gene_kind="biosynthetic-additional" + /inference="COORDINATES: protein motif:HMM:NF012873.2" + /locus_tag="IE238_RS36945" + /note="Derived by automated computational analysis using + gene prediction method: Protein Homology." + /old_locus_tag="GCM10010308_74210" + /product="condensation domain-containing protein" + /protein_id="WP_193517009.1" + /sec_met_domain="Condensation (E-value: 3.4e-30, bitscore: + 96.0, seeds: 42, tool: rule-based-clusters)" + /transl_table=11 + /translation="MAWHSRPRALRGPGTARPPGVPALWPHLDLLGDFGGRRGAGRHVE + QLVWRWHGPLDTERFTAAWQSVVDRESVLRAALAPGPRPHLVLHEHAHGDVVRHRAGGA + GWDRLLERDRRRGLDPSRPCPLRVTLVERTDDPAGAGPVTRVVLTFHHALLDAWSVCLL + MQELCRAYLAGGELPGGERRPDLRDWAGWLQRQDPAGARDFWRGAVPDGPVAVLPARPG + PRTRQRGRGRTEVRLSPAEAERLHRWAALRAVPDSSALETVWALLLYRAAGPGGAATVG + FGVTVSGRGITLDCAERLPGPLRNCLPMVVRVDPGETVGRLLTALRDRALDMAAYEWVS + TRRIHRWTGRCPDGELLQSVVSVDSLPRPPGNLRNELAGAGIALEPEPAHGACPDLPVA + LLVRPGGDGRLTFCVDHDRNRISDADARLLAGHCARLLRHLPGTDEATTNGAVLDVLAG + EALPRIAPRPSRPRPAGSWLRPRSTSSGAAVDRAASHP" + CDS_motif complement(21452..21511) + /aSTool="nrps_pks_domains" + /database="abmotifs" + /detection="hmmscan" + /domain_id="nrpspksmotif_IE238_RS36945_0004" + /evalue="3.90E+01" + /label="C2_LCL_024-062" + /locus_tag="IE238_RS36945" + /protein_end="326" + /protein_start="306" + /score="-3.2" + /tool="antismash" + /translation="LPMVVRVDPGETVGRLLTAL" + aSDomain complement(21488..22315) + /aSDomain="Condensation" + /aSTool="nrps_pks_domains" + /database="nrpspksdomains.hmm" + /detection="hmmscan" + /domain_id="nrpspksdomains_IE238_RS36945_Condensation_DCL.1 + " + /domain_subtype="Condensation_DCL" + /evalue="1.30E-48" + /label="IE238_RS36945_Condensation_DCL.1" + /locus_tag="IE238_RS36945" + /protein_end="314" + /protein_start="38" + /score="157.5" + /tool="antismash" + /translation="GAGRHVEQLVWRWHGPLDTERFTAAWQSVVDRESVLRAALAPGPR + PHLVLHEHAHGDVVRHRAGGAGWDRLLERDRRRGLDPSRPCPLRVTLVERTDDPAGAGP + VTRVVLTFHHALLDAWSVCLLMQELCRAYLAGGELPGGERRPDLRDWAGWLQRQDPAGA + RDFWRGAVPDGPVAVLPARPGPRTRQRGRGRTEVRLSPAEAERLHRWAALRAVPDSSAL + ETVWALLLYRAAGPGGAATVGFGVTVSGRGITLDCAERLPGPLRNCLPMVVRVD" + CDS_motif complement(21503..21589) + /aSTool="nrps_pks_domains" + /database="abmotifs" + /detection="hmmscan" + /domain_id="nrpspksmotif_IE238_RS36945_0003" + /evalue="6.10E-06" + /label="C5_DCL_263-294" + /locus_tag="IE238_RS36945" + /protein_end="309" + /protein_start="280" + /score="18.7" + /tool="antismash" + /translation="GFGVTVSGRGITLDCAERLPGPLRNCLPM" + CDS_motif complement(21917..21982) + /aSTool="nrps_pks_domains" + /database="abmotifs" + /detection="hmmscan" + /domain_id="nrpspksmotif_IE238_RS36945_0002" + /evalue="4.90E-08" + /label="C3_DCL_135-156" + /locus_tag="IE238_RS36945" + /protein_end="171" + /protein_start="149" + /score="25.5" + /tool="antismash" + /translation="FHHALLDAWSVCLLMQELCRAY" + CDS_motif complement(22199..22300) + /aSTool="nrps_pks_domains" + /database="abmotifs" + /detection="hmmscan" + /domain_id="nrpspksmotif_IE238_RS36945_0001" + /evalue="4.80E-10" + /label="C2_DCL_024-062" + /locus_tag="IE238_RS36945" + /protein_end="77" + /protein_start="43" + /score="31.7" + /tool="antismash" + /translation="VEQLVWRWHGPLDTERFTAAWQSVVDRESVLRAA" + gene complement(22634..23443) + /locus_tag="IE238_RS36950" + /old_locus_tag="GCM10010308_74220" + CDS complement(22634..23443) + /GO_function="GO:0008897 - holo-[acyl-carrier-protein] + synthase activity [Evidence IEA]" + /NRPS_PKS="Domain: ACPS (134-195). E-value: 7e-12. Score: + 37.1. Matches aSDomain: + nrpspksdomains_IE238_RS36950_ACPS.1" + /NRPS_PKS="type: other" + /codon_start=1 + /inference="COORDINATES: similar to AA + sequence:RefSeq:WP_007389988.1" + /locus_tag="IE238_RS36950" + /note="Derived by automated computational analysis using + gene prediction method: Protein Homology." + /old_locus_tag="GCM10010308_74220" + /product="4'-phosphopantetheinyl transferase superfamily + protein" + /protein_id="WP_029394636.1" + /transl_table=11 + /translation="MERVTCAAPLHVPRPHGPWPAVREDLFRHGNALVCTTWSEWLPSV + LTTPRLRELLGDDWQRYRRTRDAAVRYRFAASRMLIKYTAAAALAVPPEYLDLAYRLGG + RPYLRGFDQIELSLSHTGDVMAVGLSRIGRIGVDVEPAERPVRLDLLETQVFTPAEARE + LAELPEGERTAHALRLWTLKEAYSKALGQGLRFGFKEFGFRQGRLSAPDGSRVTRDEWG + FATYPVMDRFLLSVACHNAGLSTAGDTSVGTMLDQGFLSAMTDTGQQ" + aSDomain complement(22859..23041) + /aSDomain="ACPS" + /aSTool="nrps_pks_domains" + /database="nrpspksdomains.hmm" + /detection="hmmscan" + /domain_id="nrpspksdomains_IE238_RS36950_ACPS.1" + /evalue="7.00E-12" + /label="IE238_RS36950_ACPS.1" + /locus_tag="IE238_RS36950" + /protein_end="195" + /protein_start="134" + /score="37.1" + /tool="antismash" + /translation="IGVDVEPAERPVRLDLLETQVFTPAEARELAELPEGERTAHALRL + WTLKEAYSKALGQGLR" + gene complement(23532..25412) + /locus_tag="IE238_RS36955" + /old_locus_tag="GCM10010308_74230" + CDS complement(23532..25412) + /codon_start=1 + /inference="COORDINATES: similar to AA + sequence:RefSeq:WP_019330953.1" + /locus_tag="IE238_RS36955" + /note="Derived by automated computational analysis using + gene prediction method: Protein Homology." + /old_locus_tag="GCM10010308_74230" + /product="acyl-CoA dehydrogenase" + /protein_id="WP_193517010.1" + /transl_table=11 + /translation="MTAALDTAAGPTGQTPAPRSTTPTPAPEHTTPTPAPEHTTRAPAP + EPEHTTPTPECERAARAPEPERAARLEAALGDPFDPANPHGHLALVRADDTREAPHATE + ALLTEHGLSAEFVPHDLGGRLRDLEELARVLRPLFRRDLALGYGFGITSLFAASSVWTA + GDPHQRAALADVLLGGGRVAIVHREVAHANAILRREVRAQRPAGGGFLLNGSKDAVMNA + DRTDTFVVYARTSAGSGSASHSVLLLPGPPASGEVRRLARVEMPGMRGARFHGLRLADV + RLPDSALVGSLGEGVTLALRSFQISHCLIPGTVLAGVDSVLRLAVRAATENRPDGRPAR + RWHKALSGVFADLLACDAMAVTGLRALSLVPQHAHLLAAAVKYTMPDLLREDLEELAAV + LGARGYDRGPLYGGFQKLARDLPVAGLGHSGTAVCQAVLVPQLPALARTAWFRTAEPSA + ALFLPGAPLPPLDHRRLTHSGTDDPLTATLIGAAERLAARTGTQPLHAALAALARALVE + ELRVLRARCAALPAAGSTVFDPLACALADRYALLLCAAACLGVWEGQADGDGFLADPAW + AVLVLSRIGRRLGIAVPETPADAEQAVLAEALGRCRHGRSLDLYDTPLAG" + gene complement(25409..26293) + /locus_tag="IE238_RS36960" + /old_locus_tag="GCM10010308_74240" + CDS complement(25409..26293) + /NRPS_PKS="Domain: Polyketide_cyc (3-131). E-value: + 9.4e-13. Score: 40.4. Matches aSDomain: + nrpspksdomains_IE238_RS36960_Polyketide_cyc.1" + /NRPS_PKS="Domain: Polyketide_cyc2 (146-253). E-value: + 1e-07. Score: 24.3. Matches aSDomain: + nrpspksdomains_IE238_RS36960_Polyketide_cyc2.1" + /NRPS_PKS="type: other" + /codon_start=1 + /inference="COORDINATES: similar to AA + sequence:RefSeq:WP_019330954.1" + /locus_tag="IE238_RS36960" + /note="Derived by automated computational analysis using + gene prediction method: Protein Homology." + /old_locus_tag="GCM10010308_74240" + /product="SRPBCC family protein" + /protein_id="WP_078912213.1" + /transl_table=11 + /translation="MTAPAPSGVLYGLIADATVWPLFFPPSVHVEQLDFDGTRERLRMW + AVAGDRISSWVSHRRLDVGQRRVEFRQERPSAPVETMTGLWTVEPLGDGSRVTLEHAFT + VVGDAPADAAWTERVTRANSRAQLQRLAWLAERWTRLDDLVMSFEDTVRVNVPAELVFD + FLYRAGDWPDDLAGTRPLTVQEDTPGIQVLALDGRSATGGEAVRISFPAAGRLVHKHTR + TSGPLAAYTGEWTIEPQPGAGLDVTVRHDVLLNDDAALDQDAARRACDEVGRAGRRLLE + HAVRHASDAVRVL" + aSDomain complement(25535..25855) + /aSDomain="Polyketide_cyc" + /aSTool="nrps_pks_domains" + /database="nrpspksdomains.hmm" + /detection="hmmscan" + /domain_id="nrpspksdomains_IE238_RS36960_Polyketide_cyc2.1" + /domain_subtype="Polyketide_cyc2" + /evalue="1.00E-07" + /label="IE238_RS36960_Polyketide_cyc2.1" + /locus_tag="IE238_RS36960" + /protein_end="253" + /protein_start="146" + /score="24.3" + /tool="antismash" + /translation="SFEDTVRVNVPAELVFDFLYRAGDWPDDLAGTRPLTVQEDTPGIQ + VLALDGRSATGGEAVRISFPAAGRLVHKHTRTSGPLAAYTGEWTIEPQPGAGLDVTVRH + DVL" + aSModule 25535..26284 + /domains="nrpspksdomains_IE238_RS36960_Polyketide_cyc.1" + /domains="nrpspksdomains_IE238_RS36960_Polyketide_cyc2.1" + /incomplete + /locus_tags="IE238_RS36960" + /tool="antismash" + /type="unknown" + aSDomain complement(25901..26284) + /aSDomain="Polyketide_cyc" + /aSTool="nrps_pks_domains" + /database="nrpspksdomains.hmm" + /detection="hmmscan" + /domain_id="nrpspksdomains_IE238_RS36960_Polyketide_cyc.1" + /domain_subtype="Polyketide_cyc" + /evalue="9.40E-13" + /label="IE238_RS36960_Polyketide_cyc.1" + /locus_tag="IE238_RS36960" + /protein_end="131" + /protein_start="3" + /score="40.4" + /tool="antismash" + /translation="PAPSGVLYGLIADATVWPLFFPPSVHVEQLDFDGTRERLRMWAVA + GDRISSWVSHRRLDVGQRRVEFRQERPSAPVETMTGLWTVEPLGDGSRVTLEHAFTVVG + DAPADAAWTERVTRANSRAQLQRL" + gene 26608..26844 + /locus_tag="IE238_RS36965" + /old_locus_tag="GCM10010308_74250" + CDS 26608..26844 + /codon_start=1 + /inference="COORDINATES: protein motif:HMM:NF039972.1" + /locus_tag="IE238_RS36965" + /note="Derived by automated computational analysis using + gene prediction method: Protein Homology." + /old_locus_tag="GCM10010308_74250" + /product="hypothetical protein" + /protein_id="WP_031022584.1" + /transl_table=11 + /translation="MRRFRPRRCVRGLWEGLVAYGRLCLAGETDRYDHPPRPRIRWHRP + PPGHPERVRDDMPLTDLERRLARELADEDHDVR" + gene complement(27171..28448) + /locus_tag="IE238_RS36970" + /old_locus_tag="GCM10010308_74260" + CDS complement(27171..28448) + /NRPS_PKS="Domain: PKS_KS(Iterative-KS) (89-421). E-value: + 3e-25. Score: 80.8. Matches aSDomain: + nrpspksdomains_IE238_RS36970_PKS_KS.1" + /NRPS_PKS="type: PKS/NRPS-like protein" + /codon_start=1 + /gene_functions="biosynthetic (rule-based-clusters) T2PKS: + t2clf" + /gene_kind="biosynthetic" + /inference="COORDINATES: similar to AA + sequence:RefSeq:WP_018846562.1" + /locus_tag="IE238_RS36970" + /note="Derived by automated computational analysis using + gene prediction method: Protein Homology." + /old_locus_tag="GCM10010308_74260" + /product="ketosynthase chain-length factor" + /protein_id="WP_193517011.1" + /sec_met_domain="t2clf (E-value: 2.5e-172, bitscore: 564.0, + seeds: 26, tool: rule-based-clusters)" + /transl_table=11 + /translation="MTKATQAPEPARPAPTPAGAEGPRQTLVTGIGVAAPNGLGTRAWW + DAVLCGRTGLGPITRFDASGYPVRIAGEIPGFVDEDHIPSRLLPSTDRGTRIALVAAEE + ALRDANVSPADLPAYGAGVITASSAGGAEFGERGLAALWSKGAQHVSAYQSFASFHAAA + PAQISIRHRLRGHGSTVVSEQAGGIDALARARRRIRDGACLMVTGGIDSTLCAWGWAAH + LADGRLSPATEPARAYRPFAATADGHAVGEGGALLVLEDARAAARRGATGYGVIAGCAA + TFDGPDRPTLRQAAELALADAGLAPEHVDVVFADGAAERRADLVESQALCALFGPYGVP + VTVPKTMTGRLGAGGSALDVAAALLALREKVVPPTTGTGRVADDCPLDLVTGAPRELPR + LRVALVLARGRGGFNSAAVLQAPQTQ" + aSDomain complement(27186..28181) + /aSDomain="PKS_KS" + /aSTool="nrps_pks_domains" + /database="nrpspksdomains.hmm" + /detection="hmmscan" + /domain_id="nrpspksdomains_IE238_RS36970_PKS_KS.1" + /domain_subtype="Iterative-KS" + /evalue="3.00E-25" + /label="IE238_RS36970_PKS_KS.1" + /locus_tag="IE238_RS36970" + /protein_end="421" + /protein_start="89" + /score="80.8" + /tool="antismash" + /translation="STDRGTRIALVAAEEALRDANVSPADLPAYGAGVITASSAGGAEF + GERGLAALWSKGAQHVSAYQSFASFHAAAPAQISIRHRLRGHGSTVVSEQAGGIDALAR + ARRRIRDGACLMVTGGIDSTLCAWGWAAHLADGRLSPATEPARAYRPFAATADGHAVGE + GGALLVLEDARAAARRGATGYGVIAGCAATFDGPDRPTLRQAAELALADAGLAPEHVDV + VFADGAAERRADLVESQALCALFGPYGVPVTVPKTMTGRLGAGGSALDVAAALLALREK + VVPPTTGTGRVADDCPLDLVTGAPRELPRLRVALVLARGRGGFNSAAVLQA" + gene complement(28445..29716) + /locus_tag="IE238_RS36975" + /old_locus_tag="GCM10010308_74270" + CDS complement(28445..29716) + /NRPS_PKS="Domain: PKS_KS(Iterative-KS) (3-396). E-value: + 4.4e-49. Score: 159.3. Matches aSDomain: + nrpspksdomains_IE238_RS36975_PKS_KS.1" + /NRPS_PKS="type: PKS/NRPS-like protein" + /codon_start=1 + /gene_functions="biosynthetic (rule-based-clusters) T2PKS: + t2ks" + /gene_kind="biosynthetic" + /inference="COORDINATES: similar to AA + sequence:RefSeq:WP_005321633.1" + /locus_tag="IE238_RS36975" + /note="Derived by automated computational analysis using + gene prediction method: Protein Homology." + /old_locus_tag="GCM10010308_74270" + /product="beta-ketoacyl-[acyl-carrier-protein] synthase + family protein" + /protein_id="WP_193517012.1" + /sec_met_domain="t2ks (E-value: 2.3e-202, bitscore: 663.4, + seeds: 25, tool: rule-based-clusters)" + /transl_table=11 + /translation="MNRVVITGIGVVAPGAVGTAGFWDLLTVGRTATRRVTLFDACGYR + SRVAAEVDFTPAAHGFDLADTERLDRAAQFALVAAREAVADSGVADRIGRNPLRTGVSL + GSAIGCTTSLATQYAILSDCGTTWTLDHTEAAESLYDYFVPSSLAATVARDRGAQGPVA + LVSSGCTSGLDAVGHGADLIREGSADIVVAGGTEAPIVPIAMACFDRLRLTSSRNDDPA + TASRPFDRTRDGFVLGEGAAVLVLEELEHARRRGARPYAELSAITAHSSAHHMTGLRPG + ALEMADAIRAALDQARLNPADVDYISAHGAGTRHNDRHETHALKESLGGSAHRVPVSSI + KSMIGHALGAAGALDLAASALAIRHDTVPPTANLHEPDPTCDLDYTPLFAREQRTSTVL + TVASGFGGFHTAAVLTRPQLKEAA" + aSDomain complement(28529..29707) + /aSDomain="PKS_KS" + /aSTool="nrps_pks_domains" + /database="nrpspksdomains.hmm" + /detection="hmmscan" + /domain_id="nrpspksdomains_IE238_RS36975_PKS_KS.1" + /domain_subtype="Iterative-KS" + /evalue="4.40E-49" + /label="IE238_RS36975_PKS_KS.1" + /locus_tag="IE238_RS36975" + /protein_end="396" + /protein_start="3" + /score="159.3" + /tool="antismash" + /translation="VVITGIGVVAPGAVGTAGFWDLLTVGRTATRRVTLFDACGYRSRV + AAEVDFTPAAHGFDLADTERLDRAAQFALVAAREAVADSGVADRIGRNPLRTGVSLGSA + IGCTTSLATQYAILSDCGTTWTLDHTEAAESLYDYFVPSSLAATVARDRGAQGPVALVS + SGCTSGLDAVGHGADLIREGSADIVVAGGTEAPIVPIAMACFDRLRLTSSRNDDPATAS + RPFDRTRDGFVLGEGAAVLVLEELEHARRRGARPYAELSAITAHSSAHHMTGLRPGALE + MADAIRAALDQARLNPADVDYISAHGAGTRHNDRHETHALKESLGGSAHRVPVSSIKSM + IGHALGAAGALDLAASALAIRHDTVPPTANLHEPDPTCDLDYTPLFAREQRTS" + gene complement(29713..30492) + /locus_tag="IE238_RS36980" + /old_locus_tag="GCM10010308_74280" + CDS complement(29713..30492) + /NRPS_PKS="Domain: Thioesterase (15-242). E-value: 7.3e-38. + Score: 123.1. Matches aSDomain: + nrpspksdomains_IE238_RS36980_Thioesterase.1" + /NRPS_PKS="type: other" + /codon_start=1 + /inference="COORDINATES: protein motif:HMM:NF013166.2" + /locus_tag="IE238_RS36980" + /note="Derived by automated computational analysis using + gene prediction method: Protein Homology." + /old_locus_tag="GCM10010308_74280" + /product="alpha/beta fold hydrolase" + /protein_id="WP_193517013.1" + /transl_table=11 + /translation="MASRSRDREAGTARITLTCLAHAGAGVASYRGWSAAVGPGIDVAA + LPLPGRDSRRREPRLTERAGLLADFLPTLLQTARRGPYALYGHSMGALVGYTLTRALAD + SGLPPLFLAVGACPPPHTTSVLADAADLPDEDLLPLLDEIGSLPPGASASPGGLWRRTF + LPVLRDDLRLARSLRNAALDPVTGGPLDVPVLVFAGRDDPLAAPAALRHWQQWTTNLIE + LHTVAGGHFFASSSSLAQHVGRACRGHVTALPTGGGR" + aSDomain complement(29767..30447) + /aSDomain="Thioesterase" + /aSTool="nrps_pks_domains" + /database="nrpspksdomains.hmm" + /detection="hmmscan" + /domain_id="nrpspksdomains_IE238_RS36980_Thioesterase.1" + /evalue="7.30E-38" + /label="IE238_RS36980_Thioesterase.1" + /locus_tag="IE238_RS36980" + /protein_end="242" + /protein_start="15" + /score="123.1" + /tool="antismash" + /translation="TLTCLAHAGAGVASYRGWSAAVGPGIDVAALPLPGRDSRRREPRL + TERAGLLADFLPTLLQTARRGPYALYGHSMGALVGYTLTRALADSGLPPLFLAVGACPP + PHTTSVLADAADLPDEDLLPLLDEIGSLPPGASASPGGLWRRTFLPVLRDDLRLARSLR + NAALDPVTGGPLDVPVLVFAGRDDPLAAPAALRHWQQWTTNLIELHTVAGGHFFASSSS + LAQHV" + CDS_motif complement(30187..30255) + /aSTool="nrps_pks_domains" + /database="abmotifs" + /detection="hmmscan" + /domain_id="nrpspksmotif_IE238_RS36980_0001" + /evalue="3.80E-07" + /label="NRPS-te1" + /locus_tag="IE238_RS36980" + /protein_end="102" + /protein_start="79" + /score="22.6" + /tool="antismash" + /translation="RGPYALYGHSMGALVGYTLTRAL" +ORIGIN + 1 cgccacagca ggacgtcgcc gagttcgcgg aagtaccagc cgttgccgac gatgccgtgc + 61 tcggacgggt aggtcccggt gaggaaggtg gactgggcgg cgcaggtgac ggcgggcagg + 121 acggtgccca gcggggcgcg ggagccggac tggccgagcc gtttgaggtg gggcatgtgg + 181 tccaggagcc ggggggtgag gccgacgacg tccaggacca gcagcgggac cggccccccg + 241 gaagaggtgg tgggggtcat ggcagctcct tcaggccgag gtcggtcagc aggtcacggg + 301 cgagcatcag ctccgcggcg atcccgtcgg tgagctgggc ccggccccgg ggccgcaact + 361 gggccggcaa cgcctgccag gtgtaggtct ccacctccag atgccgggtc agcggacgcg + 421 ccccgcccac cagccgggac agagccgact tcagcaccgg cagagtcgag gtgagcggcg + 481 cggccgggtc cgcgtgcagg gggacgtgga agtgcgcgcg ccagggcgcc gagtcgggca + 541 gcggtccgcc ggcggccagg gcctcgtcca ggtcatccgt gccacgcagc ccgcccgtgg + 601 cagtggtggt gcgggtctgg tgcaggaagc ggggctcggc gaaggcggcg agagcgtcgc + 661 gtacttcgag acgagaaggg tgctcggcgt gcagggcggc ggagagctgg gacttcacga + 721 ttcggatccc ggccgcctcc agggcgtcca gcgcttcctc cggatcctcg aaggaggtgg + 781 cgaggtggca ggtgtcgaca cagacaccga ttcggtcgtg tccgatcgcg cccagcggag + 841 cgatagcgtc ccgggtcgtc tccacgatgc agccgggttc gggttccagg ccgatgcgga + 901 tggagcggcc ggtcagctcg tgaagggtgt ccagacgctc gccgagcgtg gccagcgccg + 961 tgcgggcctt gtcggcgcgg gtctcgtccc aggcggtgcg ccaggccagc ggcagcgtgg + 1021 agacggagcc ctcgctcaca tcgtcgggca gcagtccggc gaggacgcgg gccagggcgg + 1081 tggtgtgctc caggcgttcg gggtgagccc agtccggttt gtagacgcgg tacttgacct + 1141 cttcggcgcc gaagccctgg taggggaagc cgttgagggt gacgacctcc agtccccgcc + 1201 ggtcgagttc ggtgcgcagt ccgcgcagcg cggaggggtc ggtggccagg gcgtgggcgg + 1261 cgtctttggc gagccacagc ccgatgccga gacggtcgcg gcccaggcgg cggcggaccg + 1321 gctcgcagtg gtcgcggagc tgggcgagga ccccgtcgag ggtttcggcg ggatggacgt + 1381 tggtgcagta ggcgaggtgg acggtggagc cgtccgggtg ccggaaccgc accgttcacg + 1441 ctcccgtcgg cagcggctcg gcggccgggg cgccgcgcag gacggagttg ccctcgtggg + 1501 tgggcgcggt ggccgtgacg tcgaggtcga ggcgtccgct gaggccgtag aaggcgacgg + 1561 ggttgcgcca cagcacccgg tccacgtcgt cctcgccgaa gccctcctcg agcatcaggt + 1621 caccgacctt gcgggtcttg agcgggtcgc tcctgcccca gtccgcggcc gagttcacca + 1681 gcaccttctc cgggccgtac tcacgcagca gagcgaccat gcgggcctcg tccatcttgg + 1741 tgtccggata gacggagaac cccagccagg caccgctgtc cttggcctcc ttgaccgtgg + 1801 tctcgttgag gtggtcgacc agcacccggt ccgtgggcag cgcggactcg cgcaccacgt + 1861 ccagcgtgcg gcgcagcccg gcgagcttgt cccggtgcgg ggtgtgcacc agcgccggca + 1921 gcccgtggtc ggcggcgagc tgcaactggg cggccagcgc ggtgtcctcg gccggggtca + 1981 tcgagtcgta gccgatctcc cccaccgcca ccacccggtc cttgaccaga tagcgcggca + 2041 gctcggccag caccggagtg cagcgcggat cgttcgcctc cttcgggttc aacgcgatcg + 2101 tgcagtgatg ggcgatcccg tactgcgccg cacggaacgg ctcccagccc agcaacgcgt + 2161 cgaagtagtc acagaaggag gctgcagagg tgcggggctg ccccagccag aaggcgggct + 2221 cgaccacggc acggacaccc gcggcgtaca tggcttcgta gtcgtcggtg gtccgggacg + 2281 tcatgtggat gtgggggtcg aagatgcgca agtcactgct cctgggtcag ggcgagcacg + 2341 cggtcgaggt cggccgggat gtctcggtgg gcggcggtgc gctcggcggc gtaggcgcgc + 2401 agcattctgg ccagttccgc gtctgcgcgg gcgcgggccg ccaggtttgc cacggtggcc + 2461 acgggcacgc cggtgaacag gcacttgagt acggcgtgcc gccattggtg ggcgtccagg + 2521 tgccgggccg cgtaggggcc gagggcggcg gcgagcaggg tggtgtcgtt ggcgcgcagg + 2581 gcgtcttcga ccagcggcag agcgtgggcg ggttcggtgt cgagtccgtc cagggcgagc + 2641 aggaccgcac gccgttcggc ggctgtcccc cggtggtaca gcagggtcac ggtctccgcg + 2701 tcggggcgtg ccgccgccag cagcagtacg cgggcggcgt cggcgtgttc ggtgccgcag + 2761 cggcggcccg cctcggcgaa gcgcagttcc cagtcggtgt cccgggcggt tgccgaggcg + 2821 cgggcggcgg tgagccagcg gcgtgcgtcg gtgccgaggc cggcgaactc gccgggcagg + 2881 tccgcgtccg gcggggtgtg tacgacggtc atctcgtccc tccgttgcgc aggaaccgca + 2941 tcgactgtgc ggccagttcg ggtccggcgt gggagtgccg gggcagttcg acgacggtca + 3001 ggccctggta gccggtggcg gcgagggcgt cgagtacggg cgggaagtcg atctcgccgt + 3061 ccccgaaggg gaggtgttcg tggacgccgc ggcgcatgtc ctcgatctgc acgtgccgca + 3121 gccagggcgc ggcggcccgt acgcagtcgg cgggcggcag gggctccagg cactggcagt + 3181 gcccgatgtc gagggtcagt cccagtggtt cggggtcgcc cagttcggtg cgcaggcggt + 3241 ggaagtcggt cagtgtggcc agcaggtggc cgggctcggg ttcgaccgcc agggggacgc + 3301 cggtgtcctc cgcggcgtcc aggacggggc cgagggcgtc ggccaggcgt ttccaggcgg + 3361 tgtcctggtc caggcccgcg ggccgtgggc cgctgaagca gtgcacggcg tgcgcgccga + 3421 ggtcggcggc gacgcgtacc gagcggatca gcaggcgtat gcgtgcccag cgggcgtccg + 3481 ggtcgtcgtc gagcaggctg ggtccgtgct tgcggcgcgg gtcgaggacg tagcgggcgc + 3541 cggtctcgac ggtgaccgcg aggccgaggc ggtgcagttg ccgggcgacg tggcgggtgc + 3601 gggccgcgag gtcgggggcg agcgggtcga ggtgcatgtg gtcgagggtg agtccgacgc + 3661 cgtcgtagcc gaggtcggcc agcaggcgca gggcgtcctc caggcgcagg tcggtcagcc + 3721 cgttggtgcc gtagccgagg cgcagcgggc tcatgtggtg ctcactttcc gggccaggtg + 3781 cgcggcgagg ggtgccaggg cggcggtgag gacgccggtg ccgggtgcgc cggcccgggc + 3841 catcagtccg ctctgcaggg cgatcgtggc gcggacgccg gcgccgacgg cccgctgtgt + 3901 cagttccggg gaggggttga gcgcggcgtg ggccaggggg cgggcgaagg tggcggcgta + 3961 ggaagcggcc agggcggcgc gtacggcgcg ggtggccttg tcggccggtg gtgtcgcggg + 4021 agtcggtgcg ccggtgctgg cgtcgggcgc ggggtgggcg gtggggcggc cggcgaggac + 4081 ggtggtgagg gcgcccgcgg cggcgagggc cgtgaggggg gcggtgctgg agccgccctc + 4141 ggtctcgcgg cgggagacga gggagacggc cagggtgtgg ctgccgagga acgccgccgg + 4201 gacgatcgcc cgtcgcacgg cgcccgggcc ggcggccgcg cccagcagca ggtccaggga + 4261 gcgggcggcg gccatcgccg cgggtcccgc cggggtgtgt ttcaggccca ggtcgtaggc + 4321 ccatacggtg gcggccaggg ggccggcgac cgcgagggcc cgtcgtcccg cgcatgcggc + 4381 gagggccagg cccgcggcgg agaggaggcc cgcggcggcg agggcggcgc cgggccggac + 4441 gcggccggac ggcagggggc ggtggggccg ggcggtggcg tcctcctcgc ggtccgccca + 4501 gtcgttgagt gccatgcccg cctcgtacag gcacagtgag gcgccggccg cgagggcggt + 4561 gcgtccggtg ggccgtgtgc cggcggccgc ggtgccggcg aggacgtcac cggggacggt + 4621 gagcgcggcg ggggcgcgca gcagttcggc ccaggcgtgg aggtgcccgt cgcgggccgg + 4681 gccgtgggcg ggccgctgag cgctcacctg gttcctccga gccggtcggc gagggtgagc + 4741 agtcgggtgt actgctcggc cagggcggag ccttcggcgt cggggtcctt gaagtagaag + 4801 ccgagttcgc ccagcgggcc ggacaggccc cgctcgtggg cgcgggcgag caggcgggcc + 4861 aggtcgagga cgagcggtgc ggcgagggcg gagtcgcagc cctgccagat ggtctgcagg + 4921 accatgcggg tgccgaggaa gccgtcgaag gcgacgtggt cccaggcggt cttccagtcc + 4981 ccgagggcgg ggacgtcgtc gatgtgcact tcgccttcga cccgggtgcc gagggtgtcg + 5041 gtgaggacgc gttccttgcc ggcgttcttc gccgcggcgg cggcggggtc ggcgagggcg + 5101 gcgccgtcgc cgccgcccag caggttcgtg ccggaccagg cccgtaccgc cagtgcccgc + 5161 tgggcgaaca tcggccccag caccgaacgc agcagggtct gcccggtctt gccgtcgcgg + 5221 cccgcgtacg gcaggcccga ggcgcgggcc gcgcccgcca gctgggggtg gctcagcccg + 5281 gtcgacggcg tgaagttgac gtaggggcag cccgcccgca gggccgccgc ggcgtacagg + 5341 gagctggccg gccacgcgcc gtgtgcgggg tccggttcgg tggaggcgac gttgacgacg + 5401 accgtgcggg cgacgccggt ggtgcgggcg aaggtgcgca ggtcggtggc gaagtcggct + 5461 atcagctctt cggtggcgcg acggtcgccg ggtaccggtc ctccggggcg gatgtgttcg + 5521 tccgcggcgg ccagttcccc ctggacggcg gtcggcagcc agggcggcag tactccttgg + 5581 gcggccagtt cctcggcgcg tttgggcagg ggggtggtcg ccgtgtcgtg gccgccgaag + 5641 acgagggagg ccagggccgg caggccgcag tcggcgaagg ggggcgtctc ggtgaccatg + 5701 ccggtgggtg gcagcagtcc tgccgccagc gccgcgcagc ccgacaccgc cgtcgtggcg + 5761 accgaaccgc gggctccgac gagccacacg ccggtccggg tctcgtcggc cgaggtcatg + 5821 gcatcttgcg tcacggggca gcctccctga gcgtcgaatg tgttgtccgg caagtggatg + 5881 cccggccagg atgccgagcc gggctgtgcg ggggcgtcgc cctccgtcgg ctgtcgagcg + 5941 gccgccgagc ggattcccat cggccgcggg cgtccggcgc cgggcacagc gaggccccgc + 6001 cggtcgcagg ggtgcgggac cggcggggcc tggcaggggg gtgtgcgcgg cgcaccctcc + 6061 tgcgggggtc agcggttgac gcggaccagc tggccgttgg tgccgagcgt gttgttggtg + 6121 acgtacacct tgccgcaggg gtcgacggcg acgccggtgg gctgcttgag ctggtcgccg + 6181 gtggggatct cggtgaccgc gccgctgtcg gcgtcgatct cgaacagggc gcccggctgc + 6241 ggcgggccgg ccagcgcggc ggcgctgtag gacagggcga tcaggtcgcc gtcccggtcc + 6301 aggtccaggt cgatgaggtt ggtcatgccg gtggcgagga tctcgggctg ccggcccggg + 6361 gcgatcttcc agacgcggga ggcgccgacc cggccgccgc tcatgtcggc gacgtacacg + 6421 gtgccgtcgg cggccttggc gatgccggtg ggtacggtct cggcggcgcc ggtgggcagt + 6481 tcgttcttgg gcaggaggta ctcggtggcg gtggtgccgt cggcgttgcc gcgtacgacg + 6541 gtgttggcgc ccgcgtcggt ggccaggtag ccgctgccgc tgcgcgcgag ccgccacggg + 6601 ttggcgtgca cgtcgccgcc gtcggggttc agccgcgtct cgtggtcggt gaggtcggcg + 6661 agcaccttgt ggtcgcgggt gcggaagacc gtgcccaggg tctgggcgcc ctcgcccagc + 6721 gcggcgcgcg actcggtggt gccgccgagg ccgctgacga cgacgtagcc gccgaaccgg + 6781 tcgggcacga cgtcgctggg gccgttggcg gagatcgggg cgcccggggc gacgcccttg + 6841 gccgtggagg cgaggccggt ggcgacgcgg ccctggaagc tgcccttgac cttgtagacg + 6901 gagccggtgg tgcccacgca cgtctggtcg accgggcagt cggccagtcc ggcgccggcc + 6961 tcggcgacca gcacgccgcc gtcgcccagc gcggtgacgc cgcgcgggtt gttcagctgg + 7021 gaggccacga cggtcacggt cgcctcggta cgggtgtggg tgtgaccgtg gcggtgggcc + 7081 tggggctggg cctggacggg gccggcggtc agcgggacgg cgacggcgcc ggccgcgccc + 7141 gccgccagaa gtatcttcgt ccaggagtta cgcgagttcg ccatttctct cctcggtttc + 7201 caatgctccc cccttttccc ggagagcgga tgcgaaatca gcgccaccct agggcagtcg + 7261 ccggttttat tgcgtcaaat tttccgcagg aaaaagtggg tgcacccgtg aaccagcggt + 7321 attcgagtct ccctccagga ggcgtcgacc tttttccata tcgttgtagt cccggtccct + 7381 tccggcggtt gggacagact tttgcgctcc tgccatggag gaaactcatg cccgaattgc + 7441 cgcctccgca cgtcgtacgg gaagcggaga aagcacgagc ggatctgcag cggcagagcc + 7501 gggaactcgc gcctccgccg ttcgctctgc tggaattgat catgggtgtc atggtcacgc + 7561 gtgcggtgca cgtcgccgcc gaactgaagg tggccgaggc gctcgccgag ggtccgctgt + 7621 ccgccgacga gctcgccggg cgggtcggtg cggacgccga cgcgctcggc cgggtgctgc + 7681 gcctgctggc gagcaacggt gtgttcgcca cgcgcccgga cggtgccttc gagctgacgc + 7741 cgatggcgga cgcgctgcgc gccgaccatc cgatgtcgat gcggggcatc gcgctgctga + 7801 tgggccaccc gatccactgg gaggactgga gcggcttccc ggagacggtg gtcaccggcg + 7861 agccggcgct gccgaagctg cgcggtatgc acgccttcga gttcctgacg aagaacgcgg + 7921 agtacggcca ggtgttcttc cagggcatgg gcagcatgtc cgcctcggag acggagccga + 7981 tcctggcggc gtacgacttc tcgcagttcg ggacggtggt ggacttctgc ggcggccagg + 8041 gcgcgttgct cgcgggcatc ctgggtgcgg cgcccgggtg cgagggcgtt ctgttcgacc + 8101 cgcgggtcga ggagaacggg gctgcggagt tcctggccgc gcagggtgtg gccgaccgga + 8161 cgaagcgggt ggccggcgac ctgttcgacg tgccgccggg cggcgccgac gcctacgtcc + 8221 tcaagcacat cgtgcacgac tggcccgagg agcaggccct gcggatcctg cgcaacgtgc + 8281 gggcggcgat caagccgggc ggcaagctgc tgatcgccga gatggtgata ccggagcagg + 8341 gtgaccagcc gcactccggg aagctggtcg acctgtggct gatgctcctg gtcggtggcc + 8401 gtgagcgtac tcccggccag tacgccgatc tcctggcccg tgccgggttc cgtctggagc + 8461 gggtcgtgga gacggcggcg gcgatctccc tcgtggaggc cgtccccgtg tgaccaccgt + 8521 ggagggaggg ccccgtcgcg tgatgaggga acgcggcggg gccctccggt gtgtgcggcc + 8581 gggcagcggc ggccgggtgg gcgggggcgc cggcgggcag gcccgcgggg agcgccgggc + 8641 ggcggggggt gccgggcggc ggggggtgcc gggcggcgga cgaaggcccc cggcgtgcgc + 8701 gcacgccggg ggccgctaga gggggtgtgg gtcagcgggg cgggccgaac caccgttcca + 8761 gggccgtttc caggccctgc gcgcaggtgc ccgtccaggc gacgtagccg tcggggcgga + 8821 tcagcacggc ggtggcgccg gcgagggcgt ccgggccgtc ggtgggcttg gcgacggcgg + 8881 tgaccacgtc cacccggtcc gcgtggccgg ccgcggtcgc gcgcaccgtc tcgtcgtcgg + 8941 cgaggtcgag cagcacgccg tgcccggcgt gcaggaggcg ggcgatgcgg gtttcgccgt + 9001 ccgcgccgac gagcagccgc ggcggcatcc ggcgtcccag cagcgggtgg gtggggccgc + 9061 tcgcggtgcc ggcgaggtcg tagcggatgt ccaggtggct gacgatgccg gccaggtggc + 9121 gtttgacgtc gtcgtagcgg accagctcgg ccatcagctc gcgcagcggg tcggactcgg + 9181 cgccgccgag gaagacgatg ccctgggcgc gggtgttcat cagcagccgg gcgccgaccg + 9241 ggtggcgttc ggcgtggtag gtgtccagca gttcccgggg ggcgtcgccg cggatcaccg + 9301 aggccagctt ccagccgagg ttggcggcgt cctgcacgcc ggtgctcagg ccctggccgc + 9361 cggccggcag gtggatgtgg gcggcgtcgc ccgccaggaa gacgcggccc cgccggtact + 9421 cggaggcctg ccgggtggcg ttggtgaagg agctgaccca gctcgcgccg ccgtggctga + 9481 tgtcctcgcc ggtgatgtcc tgccaggcgg cggcgacctc ctcgaaggtg acggtccgct + 9541 cacggtcgtg cgcggggcgg ccgtcggggc agacgatgat gcgccacaca ccctgggaga + 9601 gcggtgccgc catcaccatg ccgttgttca ggcgctctcc gaggaagcgc ggggtcagct + 9661 ccacaccggt gatgtccgcc aggtacatgc cctgggtggc gtcggttccg gggaagtcga + 9721 acccggccgc cctgcggacc cggctggagc cgccgtcgca gcccgccagg taggaggcgc + 9781 gcagggtgcg ttcctcgccc tgcggggtgc gtacggtgat ctcgacgccg tcgccgtcca + 9841 ggaagccgtc ggtgagggag acgaactccc agccgcgccg gatgtccgcg ccgagttcgc + 9901 ccgcccagcc ctccaggacc gcctcggtgt cgccctgggg gatgccccgg gccccgaagt + 9961 gggcgtcctc gaggacggtg aagtcgaact gggcgccgcc gaagtgtcct acggggctgg + 10021 tggccagggt ctctccctgg ccgaaccggg gcagcagccc gcgttcgtcg aacgtctcca + 10081 tggcgcgggc ggtgaagccc aggccgcggg actgcccggt gggggcggcc agcttctcga + 10141 tgacgacgac acgggccccg cccaggcgca gttcgcccgc gagcatcagt ccggtcgggc + 10201 cggcaccgac gacgatcacg tcggtgtccg ccgctgtccc ttccatgggt ggtccttcct + 10261 ggcagtgggg tcggttcggg gcccggcggt ggggggcggc gcgtggtgcg gtgccggtcg + 10321 tccccgccgg gccggggcgc acggcaggtc agcggggagg tccgaaccag cgggtgagtg + 10381 cctcggcgag gccgtcggtg cccgggccgg cccaggccac gtggccgtcg gggcggacca + 10441 ggacacggtc ggtgccctgc agtgaactgc ccggcaaggg ctgggcgtcc agtgcggtga + 10501 cgcggtccgc ccagccgtcg aggactgccg gcggccggcc gggccggccg gtcaggtcca + 10561 gcaggacgcc gccgcccgag cgcaccaggt gggtggtggt gagcaggcgt cggcgtgccc + 10621 gcacctcggt gcacggcagc cgggccccga gcagcgggtg ggcggggccg ccgacgtcgt + 10681 agcgcacgtc caggccgctg atcatgccgg ccaggcgacg gcgtacgtcc tcctgcccga + 10741 tcaactccgt cagcaggtcg cgcaacggct cgacctccgg tccgccgagc agcagcagcg + 10801 actgggcgcg gatgttggcc aggacctgcc ggccgacctc gtgacgttcg tcgtggtagc + 10861 tgtcgaggag ggtctgcggt gccttgccgc gtaccaccag ggccagtttc cagcccaggt + 10921 tgaaggcgtc ctgcatgccg aggttgaggg cctggccgcc gaccggcatc tgccggtggg + 10981 cggcgtcgcc cgcgaagagg acgcggccgt gccggtagcg ggtcagctgg cgggaggcgt + 11041 cgccgaaggc gttggcccac aggggggtgc cgccgctgat gtcctcgccg gtgacccgct + 11101 tccacaccgc ggtgatctcc tcgaaggagg cgtcgccgtg cgggcgggcc cgcgcggcgg + 11161 agccgaactc gtggaccatc acccgggtca ccccgtcggg gcggcgggcg gcgatcgcca + 11221 ggccgtgctc caggcgttcg aagcgccggc ccgggatgtc gatgccggcg acgtcggcgc + 11281 gcagcagttc ccgcgtcgcg tccctgccgg ggaagtcggc gccgatcagg cggcgtacgg + 11341 tgctctcctc gccgtcgcag gcgaccaggt agcggcagcg cagccgcagc gtccgtccgt + 11401 ccgggccggc ggcttcggcc tccaccaggt cgccgctgtc gtcgagcgcg gtcagttcgt + 11461 gcttgcactg cagttcggcg cccagtgaca gcgcccactc ccccaggatc acctcggtcc + 11521 tggtctgcgg caccttccac tggccggggt gggaggaggg cagtgtcaga tccagcggga + 11581 tgcccccgaa gtggccgcgc ggctcgttcg gcgggtcgcc gaactcgggc agcaggctcc + 11641 ggctgtcgag gatctccatc gtgcgggcgt gcagggtgga ggcccgggac tcggtgctgg + 11701 gagcgcgtcg cttctccacg accaccacgc cgacgccgcc gtgggccagt tccccggcga + 11761 gcatcatccc gacggggccg gctccgacga cgacgacgtc ggtggtcagg gcgtccgctg + 11821 ccacggttca gcccttcttc tgctcggcgt aggccttggc gtgggcgagg gtggcgcggc + 11881 tgttggtgga cagggccgtg tggacgtagg cgcgggcgtc ggcgacggtg gcgtcctggc + 11941 cgaggatgcg ggcgatgttg gcggtgttga gggtgacggt gtgctgggag gaggcggtgg + 12001 tcgcctcgtc gccctcggtg aaggtccatt cgccggtgtg cagggtcatc agcgcgggca + 12061 gggtgacctg cttgtaggcg atcttgtggt gggggaagac gacccggtag gacttggtgg + 12121 tgtgcaccga gccgtccttg gcgcgggtgt ccatctccag ctcctgcagg ccgggggtgt + 12181 cctcgctcag gcgcaccacg gcgacgtgcg ggagccgttc ggcccacagc tgggcctcgt + 12241 tgatgaagtc gaagacgtcc ttggcggcgc cgtcgatctg cacggtgtcg gcgaaggaga + 12301 acgtcagctc ctcggtggcg gcggcgtgcg cggcctcgac gttgaccttc agggcggcca + 12361 gctcggaggt gctgttcttg tccacggcct gctcgatcca cagcaggtcg tgcgggtcgt + 12421 cgccgatggc gctgtagtcg tgcaggagcc gcacccgcga ccggtcgtcg gcgagcggct + 12481 cgatgatcca ggtgccgccc atgtgcttga ccggggcggc cgggatctcc tggcggaagg + 12541 tgatggtcag gttctcgcgg tcgagcgtgc ggcgcgaggt ccactccttg gcctggccgt + 12601 tggcggtcgc ccagatgtgg atgcgttcgt ggtcgccgtc ggcctcggtg cggtcgacgt + 12661 ggatggtggg cgggaagatg cgcggccagt tggtcacgtc cgcgagcagc tggtagacgg + 12721 cgtcggccgg tgcgccgatc gtgatctcgt gctcgacctc acgtgtggtc atgaggctca + 12781 ctcctcgggg aaggggtcgg gtgccgggtg cgtcagaagt tgccgaggcc gccgcagacg + 12841 ttgagggcct gcgaggtgat ggacgcggcg gtgtcggagg ccaggtagcc gacgaggccg + 12901 gcgacctcct cgggggtgga gtagcggccc agggggatct tcgcctggaa cttctcgagg + 12961 atggcgtcct cggaggtgtc gtaggcggcg gcgtatccct ggcgcacgcg ctgggccatc + 13021 ggggtctcga cgtagccggg gcagaccgcg ttgacggtga tgccggtggg ggccagctcg + 13081 ttgcccagtg ccttggtgaa gccgacgacg ccgtgcttgg acgccgagta cggggcgccc + 13141 aggacgacgc cctgcttgcc cgcggtggag gcgacgttga tgatccggcc gcggtccttg + 13201 gcgcgcaggc cgccgatggt cagggcggcg cgggtgacgc ggaagacgct gttgaggttg + 13261 gtgtcgatca cgtcgtccca cagctcgtcc gcgatgtcgg cggtggggcc gccgccggag + 13321 cggccggcgt tgttgacgac gacgtcgacg ctgccgaagc ggtcgacggc ggcctggatc + 13381 caggcgttga cggactcggt gtcgcggacg tcgacgaccg tgccgtccgc gtcgatgccc + 13441 tcgccctgga gttccttgac ggtggcggcg acgttctcgg cgttgcgggc gccgatgaag + 13501 acttggtggc cctgccggcc gaggagccgg gcggaggcca ggccgatgcc gctggtggcg + 13561 ccggtgacga cggcgacgcg ctgggtggtg gtgtcggtca tggtggggct ccttcaggcg + 13621 gccttggcgg gcgcgagctg cgcgttgacg acctcgatga agctgcgcgg cgtgttggcc + 13681 tcgccgacgg cctcctcgtc cagggagatg ccgtactcgc gctcgatgag gctgccggcc + 13741 tccagcaggg cgagggactc gtagccgatc acctcgaact cggtgtcgag gatgtcgccg + 13801 tccaggtcca cgccctcggc gacgcccgcg gcctcccgca gggtgcgctt gaggtcgtcg + 13861 agggtgaagg acttgctggc catggtgggt cctttcgtca gtacgtggtg gcggttgcgg + 13921 tgcgtcggtc gggcggtgtg cgggggtcct caggcggcgc gcacgaccat ggcggagttg + 13981 aagccgccgg ctccccgggc caggaccagg gcggtgcgca gctcggccgg gcggggttcg + 14041 cccaggacca ggtcgagggg gtagtcggcg cacgggtcga tgtgcacggt gggcgggatg + 14101 acgccgtcgc gcagggcgag gaacgcggcc gccaggtcca ggggcgcgcc gccggagtac + 14161 aggcgcccgg tcatggtctt gggcaccgtc accggcacgc cccggtcgcc gaagaccgtg + 14221 ctgacggcgc gggcctcggc gaggtcgccg gccgggtcgc cggcgccgtc ggcgaagacc + 14281 acgtcgatgt cggcggcgtt cacgtcggcg tcggccagtg ccgtctcgat ggccttggcc + 14341 aggttgggtt cgcgtccgct gccgggccgg gggtcgatgg tggctccgta gccggcgatc + 14401 tcgccgtagg ggcggacgcc gcgggcgcgt gcggcgtcgg cgtcctcggc gatgaggatc + 14461 gcgccgccct ctccggggac gtagccgttc gcgtcgcggt cgaagggcag gtaggcgtgg + 14521 tcgggttcgt cgctggtggt gagccggccg caggccagct gggccaccca gccccagggg + 14581 cagatggagg cgtcgacgcc gccggtgacg atcagcgggg tgccccggcg gatctgccgg + 14641 cgggcctgcg cgacggcgtc caggccgccc gcgccttcgc tgacgacgac gccggagggg + 14701 cccttcatgc cgttgcggat ggagatctgg ccgctgttga cggcgtagaa ccaggcgaag + 14761 gactggtagg cggagacgta ctggctgccc tggctccaca gggcctgcag ttcgccctgg + 14821 ccgaactcga agccgcccgc ggcggaggcc gtgatgacgc ccatgtcgaa ctcgggcagg + 14881 tgcgcggggt cggcgccggc gtccgcgagt gcccagtcgg cggccaccag ggccagccgg + 14941 gtgacgcggt cggtctgcgg cagcagccgg ctgggcagca gttcctcggc gtcgaagtcc + 15001 tcgatctgcc cggccagacg ggccgggtag ccggtggggt cgaagcgggt gatgcggccg + 15061 atgccgtgct tgccgcccag ggtggccgcc cagtagtcct tcacccccat gccgttgggt + 15121 gagacgacgc ccaggccggt caccaccacg gaggcggtca tacgaggctc ctttcggggg + 15181 tggcgagcac catggcgctc tggaagccgc cgaatccgct gccgaccgtg aggaccgcgt + 15241 cgatcagctg gtcgcgggcg accaggggca cgtagtcgag gtcgcactcg gggtcggggg + 15301 tgtgcaggtt ggcggtgggc gggacgacgt cgtactccat ggcgagtgcc gaggcggcga + 15361 tctcgatgga gccgatcgcg ccgagcgagt gcccgaccat cgacttgatg gagctgaccg + 15421 gggtgcggta ggcgtggtcg ccgaggctgc gcttgaacgc ggccgtctcg tgccggtcgt + 15481 tctgcttggt gcccgagccg tgggcgttga tgtagtcgat ggactgcggg ttcagccggg + 15541 cctcgcccag ggccaggtcg atcgcctcgg ccatctccac gccgtcgggg cgcaggcccg + 15601 tcatgtggta ggcgttggag cgggtggcgt agccggcgat ctcggcgtag atgtgggcgc + 15661 cgcgcttgcg ggcgctgtcg agttcctcca ggacgaagaa ggcggttccc tcgccgagga + 15721 cgaatccgtt gcgggtcttg tcgaacggcc gggaggcgca ctcgggttcg tcgtggcgcg + 15781 gggtggtcgc cttgatcgcg tcgaagcagg ccatggtgat cggtgagatc ggcgcgtcgg + 15841 aggagccggc gatcacgacg tcggccgatc cctcgcggac cagttcgacg gcgtagccga + 15901 cggagtcgat gccggaggtg cagccggtgg agaccaccgt gctggggcct tcggccccca + 15961 ccgcccaggc gacctcggcg gcgaaggagc tgggcaccat gtagtcgtac aggtgcggga + 16021 ccgcgtaggt gtggtcgacc gtgtccagcc gtcccccgtc gctgaccacc cggtactcct + 16081 ggtccaggcc catcgtggcg ccgacggcgc tgccgacggt gacgcccacg cggtgcgggt + 16141 ccagggaggg gatgtcgagg ccgctgtcgg cgacggcgcc gcgcgaggcg acgacggcga + 16201 actgcgcggc ccggtccagc cggcggacct cctgcggggt caggccgtgc tcgtaggggt + 16261 cgaagtccgc ttcggcggcc acccgggagc ggaagggggc ggggtcgaag aaggtgatgc + 16321 cccgtgtcgc ggtacggccg ttgctcagca ggttccagaa gttctccctg ccgacaccgc + 16381 cgggggcgat cacctcgatc ccggtgatca caacgcgtcg cccgctcaac gggaggcctc + 16441 ccagctgtag aagcgcgtgg ccatcgcgtc ggccggcgag cgccaggtcg tggggtcgta + 16501 ggcctcgatg aagggcttga ggtcctcgct gatccgcacg aagcgcgggt cggccttggc + 16561 ggcctcgatc agttcaccgc cgttgtcctc gtcgaagtcc tggaggtgga agtacaggcc + 16621 ccggtaggag aagagctggc ggcgccgtgt gcccatgcgg tgcggcatct cggtggcgtc + 16681 gaagtcggcg aacaactggg ccacgtcgtt gctcgaggtg gccgccatcc gggcgacgat + 16741 cagcgtgctg tgcataggga ttagctcctc ggcggttgcg cggaagggtt ctcgaagcgg + 16801 gcggaccggc ccggcccggc ggcccggggg gctgcggcgc caccgccttc gacggtgcgg + 16861 cctggcggta gcgccccgca cggggcctgc ctgagtcccg cctgagccga agcggcgggt + 16921 gcacacgagc acgccttcgg gcctgcgccc ccgggtgccg gggacgaggt ccgaaggcgt + 16981 gcgggtcccg tgcgccgtcc ggcgcgccgg gctcactcgc cggcagagcg cgtgtgcacg + 17041 acccggtagg tgttgaggta gggctcgccg gtgacgacgg cgcggacgcg ggccacggcg + 17101 gccttgcggg cgtcggcctg ctcctcgggg gcctggtcgc ggtagacctc gaaggcctcc + 17161 ttgctgtccc actgggagta ggagatgacg aaggatccct ccaggccgcg ggcgcgcaga + 17221 cccctgagca cgacgtgggc gcggaagccg ggcacgtcgg cgaggaaggc ctggccgggg + 17281 ccgagggcgt ccacggcctc gtcctgcccc tcgggggtga ccgggaagag ggtgaagacg + 17341 gtgtagtcgt cgcggtccgg gccgatctcg atcttgccgc cgagggcgtc cgaggtcagc + 17401 gtgtgggcga cctcgttctg catcagccgg atcgaggtgg tgatctcgcc gaagaccggg + 17461 agcgtgcggt gcttgaactc ctcgcccgcg tagcgcttct cgaggtcctc gccgctgcgc + 17521 cactggatga agttgagggt gccgtagccg tcgacgcccg agtggacggt ggaggacatc + 17581 cagccctcgt acgcggccgc gttgacgatc ttcgtcatct cctcgatcag cttggcctgc + 17641 ttctcgggag tgtccgtggt gaacaggttc aggacggtga ggtgcttgtc ctcggcggag + 17701 atgatgggca tgggtccttc gctttccttg gtgggtgcgg tggatgtcgc tgcgcggccc + 17761 gcgacggtca ggcgctgggg ccgaaccagc ggccgagggc ggcggtgaga ccgtccggcc + 17821 cggcgccgag gccggcgacc caggcgacgt agccgtcggg gcggacgagg atgccgtcca + 17881 cgggggcctc gcagccgtcg aactcggccg cgacgaggtc gacccggtcg gcccagccgg + 17941 cggccgcggt gcgcaggccg tggtcgcccc cgagttccag gagcacgggg cgtgcggtgc + 18001 gcagcagcgc gtagaacggg gtcttctcac cgtcgacgac cagttcccgg tccggcaggc + 18061 gccggccgag cagggggtgg tcgccttcgc cgacgtcgtg ccggatgtcc aggccggtga + 18121 ccatgccggc caggtggcgc tggacggatt cgtgggcgcc catcagctcg gcgagcacct + 18181 cgcgcatcgg cgtgatctcg tcgccgccga ggtagaggat gcgctgggcg agggtgttgg + 18241 tcaggatgcg ggccccgacg gggtggcgct cggcgtggta ggtgtcgagc agcccttcgg + 18301 gcgcctggcc cttgatgtcg agggcgagct tccagccgag gttcacggcg tcctgcacgc + 18361 ccgcgctcat gccctgcgcg ccgatcggca ggtggatgtg cgcggcgtcg ccggccagga + 18421 agacgcggcc gtggcggtac cggtcggcct ggcggctgac gtccgtggtg gagctgaccc + 18481 acagcggggt ggcgccgctg acgtcctcgc cggtcaggcg ctgccaggtc tgggcgacct + 18541 cgtcgaaggt gatcgggtcc gggctggtgc gcagcggctc ggagctgtcg aagtagatga + 18601 cgcggcagcg gtccgggccc atcgggatga ccatgaccat gccgccggcg acgcgctccc + 18661 cgctgaaccg gggccgcagc gccactccgg ccaggtcggc gaagcgcagc tcgatggccg + 18721 gctcggtgcc ggggaagccg atgcccgcga gcttgcgcac gatgctgcgg gcaccgtcgc + 18781 agcccaccac gtggcgggcg cgcagggaga aggggccgtc ggcgcccgcg gcggtgacgg + 18841 tcaccgaggc gtcggtctgc tcgatgcccg tgacctcgca cccgcggcgg atgtcggcgc + 18901 ccagctcgcg cgcccagccg cccaggacgc cctcggtgcg ggcctgcggg atgccgcgcg + 18961 ccccgtacga accgccctcg atcacccggt agtcgagcgg gacgccgccg aagtggccga + 19021 ccgggatgac tccgacctcg ccgaaccggt ccatcagccc gcgctgcgcg aattcctcga + 19081 tggtccgcgc cgagaaaccc agggcgcggg attcctgaat cggctcggaa agtttgtcga + 19141 gaaccaccac cgagactcca gcgagcctca attcacctgc gagcataaga ccggtgggac + 19201 cggctcccac gacaatgaca tctgaatcgt agaattccat ctgccctctt ctcactggtg + 19261 ttccatcaat agtgcgaggg cttacgaggg ctgtaaatag gcgttgttac aggtcttgtg + 19321 cagtcaatgc cagaaaaacg tctggtacgt cagagcttcg acaagcggtc ttgccccggg + 19381 cgcgtcgggt gacggcattc tctggcatga cctccagtct gacgaccgat caatccgcat + 19441 ccgtgtccgc cgccgagtcg agcgcacagg tcgccggtct gctgcaccgg tatctggtct + 19501 cgctcgacga cgagcggctc gacgacgcct ggaccgccgg cctgttcacc gaggacgcgg + 19561 tcgtcgcgtt cccggtcagc cggcacgagg gcgccgacgg catggccgag taccaccggt + 19621 cggcgctgtc ggcgttcgcg gccacccagc acctcggttc tccggccgtg gtcgacgtgg + 19681 acggggaccg ggccgtgttc cgggcgaacc tgatctccac ccatgtgcac cacccgcgcc + 19741 acacgcctcc cgagggggac ctgcccccgc tcttcgcgac gggcacgttc gtgaacggcg + 19801 aggcgcgccg caccgcgcgg ggctggcggc tgagcctgct ggccttccgg ctgctgtggg + 19861 cggacggctc ccctcccccg gcccggtgac cccggccgcg gtgcggtgac cccgagccgc + 19921 ggtggggcga ccgcgggcgc ggtggggtga ccccgagccg cggtggggtg accgcggccg + 19981 tggtgcggcg gcgccgggcc gggggagggg agccgcgccg cggtacacgc gcgtgccgcc + 20041 ccggacgcgg tccggggcgg cgggggcggt gggggtggtg gtgcggagcc ggcggggcag + 20101 gcgttcaggc gttcaggctc aggacgcggt cgaccgcgat ctcgatgacc acccggtcgg + 20161 acggggtcgg cggggcggac cagtaccgct tggcgtacag ccgtgccccg agcgccaccc + 20221 gttcggggtc ggtcacgacc gtgccggtcc cctccagcgt gacccagcgg aagccgtcca + 20281 cctggcagag cgccaccggg gcgccgggcg tggccagcag gttgcgggcc ttgcgggagg + 20341 agcgcaccgt catcacccgc gcgagctggg cgtccgcgtc ccaggtgaag cgtacggggg + 20401 ccacgtgcgg tgtgccgtcc ggccgcatcg tcgtgaacgc ggcgacgagc gccgtaccgg + 20461 tcaggaacgc ctggacggag tcgggcaccg tcacggcgtc cgggcgcggc atcgacggca + 20521 tgggcgggtc ctcgcttcca gagggtgttg cgtgcgtggg cgcttcccgt gctccgggcg + 20581 gctgtcgcgc gggcgtcggg ggcggccgtg ggtccggggc cagcttgccg ggggccgctc + 20641 gaccctcgct ggacccggct ccccccgccc ggcccgccgc ccgcctccgc cgtgcgccgt + 20701 gtgtcatgcg ctgtgctccc gtgggtcgtg cgtcgtgcgt cgtttcgggt gtccgggcta + 20761 gtcgatgctg cggacgatga cgcgtccgtc ctcgtcctcg cgctcctccg cgcccgccag + 20821 ccgcaggggc ggcgggccgc cgtggaccgc cctggcgcat cgggccgtgt caccctcgtg + 20881 ccgttcgtcg tcgccttcgt gcatgggcac ctcctcggtg atcggtcggc ccagagtgtc + 20941 aagggtggct cgcggcccgg tcgacggccg ccccggagga agtggagcgc ggccggagcc + 21001 aggacccggc cggtcgcggc ctcgagggcc gcggcgcgat gcgcggcagt gcctcaccgg + 21061 cgagcacgtc cagcacggcc ccgttggtgg tggcctcgtc ggtgccgggc agatgccgca + 21121 gcagccgggc gcagtgcccg gccagcaggc gggcgtcggc gtcggagatc cggttgcggt + 21181 cgtggtcgac gcagaaggtg aggcggccgt cgccgccggg acggaccagc agggcgacgg + 21241 gcaggtcggg gcaggcgccg tgcgccggtt ccggctccag cgcgatgccg gcgccggcga + 21301 gttcgttcct caggttgccc ggcgggcgcg gaaggctgtc caccgagacc acgctctgca + 21361 gcagttcccc gtcggggcag cggcccgtcc agcggtggat ccggcgggtg gagacccatt + 21421 cgtaggcggc catgtccagc gcccggtccc gcagggccgt cagcagccgg ccgaccgtct + 21481 cgccggggtc cacgcggacc accatcggca ggcagttgcg cagcggcccg ggcagccgct + 21541 cggcgcagtc cagggtgatg ccgcggccgg agacggtgac gccgaagccc accgtcgcgg + 21601 ccccgccggg cccggccgcg cggtacagca gcagcgccca gaccgtctcc agggcgctgg + 21661 agtcgggtac ggcgcgcagg gcggcccagc ggtggagccg ttcggcctcg gcggggctca + 21721 gccgtacctc ggtcctgccc cggccccgct ggcgggtgcg cgggccgggc cgggcgggca + 21781 gcacggcgac cggcccgtcg ggcacggcgc cccgccagaa gtcccgggct ccagcggggt + 21841 cctgccgctg gagccagccc gcccagtcgc gcaggtcggg gcggcgctcg ccgccgggca + 21901 gctcgccgcc ggcgaggtag gcccggcaca gctcctgcat cagcaggcac acgctccacg + 21961 cgtccagcag cgcgtggtgg aaggtgagga ccacccgggt caccggcccg gcaccggccg + 22021 ggtcgtcggt gcgctccacg agggtgacac gcagcgggca ggggcggctg ggatcgaggc + 22081 cgcgccggcg gtcccgctcc agcagccggt cccatccggc gccgcccgca cggtggcgca + 22141 cgacgtcgcc gtgggcgtgc tcgtgcagga ccaggtgcgg ccggggcccc ggggccaggg + 22201 cggcccgcag cacgctctcg cggtcgacga ccgactgcca ggccgcggtg aaccgctcgg + 22261 tgtccagcgg gccgtgccag cgccagacga gctgctcgac gtggcgcccg gcgccacggc + 22321 gcccgccgaa gtcgcccagc aggtcgaggt ggggccacag ggccggtacg cccggcggac + 22381 gggccgtgcc ggggccccgc agggcacgcg ggcggctgtg ccacgccatg ggacgccctc + 22441 ctcgtcgctg cctcctcccg cacgggccgg ccgtgcgcac ggttctcggg gagcggcggg + 22501 ccccttgagg gggcgcggta ctcccccggc atcgtggcac cggtggctgc catgggcctg + 22561 acggcccgct gacgctgctc gcgaccgggc ggagaaacgc ttctgcagcg gtggggccgt + 22621 accggaacgc ccgctactgc tgtcccgtgt ccgtcatcgc cgacaggaac ccctggtcca + 22681 gcatggtccc cacggaggtg tccccggcgg tgctcagtcc ggcgttgtgg caggccacgc + 22741 tgagcaggaa gcggtccatg acggggtagg tggcgaagcc ccactcgtcg cgggtgaccc + 22801 ggctgccgtc gggtgcgctc agccggccct gccggaagcc gaactccttg aagccgaacc + 22861 gcagtccctg cccgagggct ttgctgtagg cctccttcag ggtccacagg cgcagtgcgt + 22921 gggcggtccg ctctccttcg ggcagttcgg ccagttcccg ggcctcggcc ggtgtgaaga + 22981 cctgggtctc gagcaggtcc agccgtacgg gccgctcggc cggttccacg tccaccccga + 23041 tgcggccgat gcggctcagg ccgacggcca tgacgtcccc ggtgtggctc aggctcagtt + 23101 cgatctggtc gaagccgcgc aggtagggcc ggccgcccag ccggtaggcc aggtccaggt + 23161 actcgggcgg gacggccagg gcggcggccg ccgtgtactt gatcagcatg cgggaggcgg + 23221 cgaaccggta gcgcaccgcg gcgtcgcggg tacgccggta gcgctgccag tcgtcgccga + 23281 gcagttcccg cagccgcggg gtggtcagca cgctgggcag ccattcgctc cacgtcgtgc + 23341 agaccagtgc gttgccgtgc cggaagaggt cctcgcgcac cgcgggccag gggccgtggg + 23401 gccgcggcac gtgaaggggc gcggcacatg tgacccgctc catcgtcgtc tccctgcgtg + 23461 tggttctcgc cggcccgctc ccccgttggg gggggcgggc cgaggggggg cgtcttcccg + 23521 tgccgtctcg gtcagccggc cagtggggtg tcgtacaggt cgagactgcg gccgtgccgg + 23581 cagcgtccca gcgcctcggc caggaccgcc tgttccgcgt ccgccggtgt ctcgggtacg + 23641 gcgatgccga gtctgcggcc gatgcggctg aggacgagca ccgcccaggc cgggtcggcg + 23701 aggaagccgt caccgtccgc ctgtccctcc cagaccccga ggcaggcggc ggcgcacagc + 23761 agcagggcgt agcggtcggc cagggcgcag gccagcgggt cgaacacggt gctcccggcg + 23821 gccggcaggg ccgcgcagcg cgcgcgcagc acccgcagct cctccaccag ggcgcgggcc + 23881 agcgcggcga gggcggcgtg cagtggttgt gtccccgtcc gtgcggccag ccgttcggcg + 23941 gccccgatca gggtggccgt gagcgggtcg tcggtcccgg agtgtgtcag cctgcggtgg + 24001 tcgagcggtg gcagcggcgc gcccggcagg aacagtgcgg cgctcggttc ggcggtccgg + 24061 aaccatgccg tgcgtgccag ggccggcagc tggggtacga gcaccgcctg gcagacggcc + 24121 gttcccgagt ggccgagtcc ggccacgggc aggtcgcggg cgagtttctg gaagccgccg + 24181 tacagcgggc cgcggtcgta gccgcgggcg ccgagcacgg cggcgagttc ttccaggtcc + 24241 tcgcgcagca ggtccggcat ggtgtatttg accgccgccg cgagcagatg ggcgtgctgg + 24301 ggtacgaggc tgagcgcccg cagtcccgtg acggccatgg cgtcgcaggc gagcaggtcc + 24361 gcgaagaccc cgctgagtgc cttgtgccag cggcgggcgg gccgtccgtc gggccggttc + 24421 tcggtggcgg cgcgcaccgc gagccgcagg acgctgtcca cgcccgcgag caccgtgccc + 24481 gggatgaggc agtggctgat ctggaagctg cgcagggcca gggtgacgcc ctcgccgagc + 24541 gagccgacca gggcgctgtc gggcagtcgt acgtcggcca ggcgcagtcc gtggaagcgt + 24601 gctccgcgca tgccgggcat ctccacccgc gccagccggc gcacttctcc ggaggcgggt + 24661 ggtccgggca gcaggagcac cgagtggctg gcggagccgg agccggcgga ggtgcgggcg + 24721 tagacgacga aggtgtcggt gcggtcggcg ttcatgacgg cgtccttgct gccgttgagc + 24781 aggaagccgc cgcccgcagg gcgttgtgcg cggacctcgc ggcgcaggat ggcgttggcg + 24841 tgtgccacct cccggtgcac gatcgcgacc cggcctccgc cgagcaggac gtccgcgagg + 24901 gccgcgcgct ggtgggggtc gcccgcggtc cacaccgagg acgcggcgaa cagcgaggtg + 24961 atgccgaagc cgtagccgag ggccaggtcg cggcggaaga gcgggcgcag cacgcgggcc + 25021 agctcttcga ggtccctcag gcgtccgccg aggtcgtggg ggacgaactc ggcggacagg + 25081 ccgtgctcgg tcaggagcgc ctcggtggcg tgtggtgcct cgcgggtgtc gtcggcccgg + 25141 accagggcga ggtgtccgtg cgggttggcc gggtcgaagg ggtcgccgag cgcggcctcc + 25201 agccgggcgg cgcgctcggg ttcgggtgcg cgggcggcgc gctcgcattc gggtgtggga + 25261 gtggtgtgtt cgggttcggg ggcgggtgcg cgggtggtgt gttcgggtgc gggtgtgggg + 25321 gtggtgtgtt cgggtgcggg tgtgggggtg gtgctgcgcg gggcgggggt ctgccccgtc + 25381 ggcccggcgg ccgtgtcgag ggcggcggtc acaggacccg caccgcgtcg gaggcgtggc + 25441 gcacggcgtg ctccagcagg cggcggccgg cccggccgac ctcgtcgcac gcccgccggg + 25501 cggcgtcctg gtccagtgcg gcgtcgtcgt tgagcagcac gtcgtgccgc acggtgacgt + 25561 cgaggccggc tccgggctgg ggctcgatgg tccactcgcc ggtgtacgcg gccagcggtc + 25621 cggatgtgcg ggtgtgtttg tgcacgaggc ggccggcggc ggggaagctg atgcgcaccg + 25681 cctcgccgcc ggtggccgac cggccgtcca gggcgaggac ctggatgccg ggggtgtcct + 25741 cctgcaccgt cagggggcgg gtgccggcga ggtcgtcggg ccagtcgccg gcccggtaga + 25801 ggaagtcgaa gaccagctcg gcgggcacgt tgacccggac ggtgtcctcg aaggacatca + 25861 cgaggtcgtc cagccgggtc cagcgctcgg cgagccaggc caggcgctgg agctgggcgc + 25921 ggctgttggc gcgggtgacc cgctcggtcc aggccgcgtc ggccggagcg tcgccgacga + 25981 cggtgaaggc gtgttccagg gtcacccggg agccgtcgcc caggggctcg acggtccaca + 26041 ggccggtcat cgtctcgacc ggggcggagg gccgttcctg gcggaactcc acccgccgct + 26101 gtccgacgtc caggcggcgg tgggagaccc aggagctgat ccggtcgccc gccacggccc + 26161 acatgcgcag ccgttcccgt gtcccgtcga agtccagttg ttccacgtgg acgctgggag + 26221 ggaagaacag cggccacacg gtggcgtccg cgatcaggcc gtagagcact ccgctcgggg + 26281 ccggggccgt catctcacag gcagtacggt gcactcgcgc atccggcatc gtccacaccc + 26341 tccgtcgtcc aaggcgtcgc cgccgggccg gtccccgctg ggccgcacgc gtgccgggtc + 26401 caccgggggc gggccggtgc ggccgggccc gtcgtactgc ggcttttctc gaccctgtcc + 26461 ggaaccgcta gggatgtgcc ggtagcgcgc tcgtgtcccg ctggagcggc gcccaggggc + 26521 acgcgctcca gcgggaaccg tcgggtcctc gagcccggtg accgacggtg gcctgcggcc + 26581 ccagaaacga tccgggaggt ggcagggatg aggcgcttcc gcccgcgccg atgcgtccgt + 26641 ggcctgtggg agggtctggt cgcctacggg cggctctgcc tggccggcga gacggaccgg + 26701 tacgaccacc ctccgcgccc gcggatccgg tggcaccggc cgccgccggg acacccggaa + 26761 cgggtgcgcg acgacatgcc tctgaccgac ctggagcggc gcctggcccg cgaactggcc + 26821 gacgaggacc acgacgtccg ctaggcgggg acgccaccac gcgtgccgcg ggcggggcgg + 26881 cggaacgccg ggcagccggg agagacggaa ggccgggggc cgggagccgg gaggccaaga + 26941 gaccgggaag gccggaaggc caggggcccg ggaggccggg agcccgggag ggccgggagg + 27001 ccggaagggc cgggaggcca ggagccggaa ggccgggagc ccgggcgggc cggggagcca + 27061 ggagggccgg ggagtcggcg ccgggggtgt ttccctttgc gcggtccgcg gggccgcggg + 27121 ccgttcctgg ccgccggtgt gttccgctcc gtctcgttag gggcgggccg tcactgcgtc + 27181 tgaggggcct ggagcactgc ggcggagttg aagccgcccc gtccgcgggc cagtaccagc + 27241 gccactcgca gccggggcag ttcccgtggg gctccggtga ccaggtccag cgggcagtcg + 27301 tcggcgacgc gtccggttcc ggtggtcggg gggacgactt tctcgcgcag ggcgagcagc + 27361 gcggctgcca cgtccagggc cgagccgccc gcgcccagcc gcccggtcat cgtcttcggc + 27421 acggtgaccg gtactccgta gggtccgaac agcgcgcaca gcgcctggct ctcgacgagg + 27481 tcggcgcgcc gctcggcggc gccgtcggcg aagaccacgt ccacgtgttc gggggccagg + 27541 ccggcgtcgg ccagggcgag ttccgcggcc tggcgcagtg tggggcggtc ggggccgtcg + 27601 aaggtggcgg cgcagccggc gatgacgccg tagccggtgg cgccccggcg ggcggcggcc + 27661 cgggcgtctt ccaggacgag cagggcgccg ccttcgccga ccgcgtggcc gtcggcggtg + 27721 gccgcgaagg gccggtaggc ccgggcgggt tcggtggcgg ggctgagccg gccgtccgcc + 27781 aggtgtgcgg cccagcccca tgcgcacagt gtggagtcga tcccgccggt gaccatgagg + 27841 catgccccgt cgcggatccg ccgccgggcg cgggcgagtg cgtcgatgcc gccggcctgt + 27901 tcgctgacga cggtcgagcc gtggccgcgc agccggtgcc ggatggagat ctgtgcgggg + 27961 gctgccgcgt ggaaggacgc gaaggactgg taggcgctga cgtgctgggc gcctttgctc + 28021 cacagtgcgg ccagtccccg ttcgccgaat tcggcgccgc ccgcggagct ggcggtgatc + 28081 acgccggcgc cgtatgcggg caggtcggcc gggctcacgt tggcgtcgcg cagtgcttct + 28141 tcggccgcga ccagggcgat gcgggtgccg cggtcggtgg agggcagcag tctgctgggg + 28201 atgtggtcct cgtcgacgaa gccggggatc tcgccggcga tgcgtacggg gtagccggag + 28261 gcgtcgaagc gggtgatggg gcccagtccg gtgcgcccgc acagcacggc gtcccaccag + 28321 gctctggtgc ccaggccgtt gggtgcggcg actccgatgc cggtgaccag ggtctggcgg + 28381 gggccttcgg cgccggcggg ggtgggggcg ggtcgggccg gttcgggggc ctgggtcgcc + 28441 ttggtcatgc ggcctccttg agctgtggcc gggtgaggac ggcggccgtg tggaagccgc + 28501 cgaagccgct ggcgacggtg aggacggtgc tggtgcgctg ttcccgggcg aacagggggg + 28561 tgtagtcgag gtcgcaggtg gggtcgggtt cgtggaggtt ggcggtgggc gggacggtgt + 28621 cgtgccggat ggccagggcg ctggcggcca ggtccagggc gccggcggcg cccagggcgt + 28681 gcccgatcat cgacttgatg gagctgacgg gcacgcggtg ggcgctgccg cccaggcttt + 28741 ccttgagggc gtgtgtctcg tgccggtcgt tgtgccgggt tcccgcgccg tgggcgctga + 28801 tgtagtcgac gtcggcgggg ttcagccgtg cctggtcgag ggcggcgcgg atggcgtcgg + 28861 ccatctccag tgccccgggg cgcagtcccg tcatgtggtg ggcgctgctg tgggcggtga + 28921 tggcggacag ttccgcgtag gggcgggcgc cgcggcggcg ggcgtgttcg agttcttcca + 28981 ggaccagtac ggcggcgccc tcgccgagca cgaatccgtc gcgggtgcgg tcgaaggggc + 29041 ggctggcggt ggcggggtcg tcgttgcggg agctggtgag gcgcaggcgg tcgaagcagg + 29101 ccatggcgat gggcacgatg ggtgcttccg ttccgccggc gacgacgatg tcggcgctgc + 29161 cttcccggat caggtcggcg ccgtggccga cggcgtccag gccggaggtg cagccgctgg + 29221 agacgagggc gacggggccc tgtgcgcccc ggtcgcgggc gacggtggcg gccagggagc + 29281 tggggacgaa gtagtcgtag agggattcgg cggcctcggt gtggtcgagg gtccaggtgg + 29341 tgccgcagtc gctgaggatg gcgtactggg tggccaggct cgtggtgcag ccgatggcgc + 29401 tgcccaggct gacgccggtg cgcagggggt tgcggccgat gcggtccgcg acgccgctgt + 29461 cggcgacggc ttcgcgtgcg gcgaccagcg cgaactgtgc cgcgcggtcc aggcgttcgg + 29521 tgtcggcgag gtcgaatccg tgggcggcgg gggtgaagtc gacctcggcg gcgacgcggg + 29581 agcggtagcc gcaggcgtcg aagagggtga cgcggcgggt ggcggtgcgg ccgacggtga + 29641 gcaggtccca gaagccggcg gtgcccaccg cgccgggggc gacgacgccg atcccggtga + 29701 tcacgacgcg gttcaccggc cgcctcccgt gggcagcgcg gtcacgtggc cgcggcaggc + 29761 ccggccgacg tgctgggcca ggctcgagga gcttgcgaag aagtgtcccc cggcgacggt + 29821 gtgcagttcg atcaggttgg tggtccactg ctgccagtgg cgcagggcgg cgggtgcggc + 29881 gagcgggtcg tcgcggcccg cgaagaccag gacgggcacg tccagcggcc ctccggtgac + 29941 cgggtccagg gcggcgttgc gcagcgatcg ggccaggcgc aggtcgtcgc gcaggacggg + 30001 caggaaggtg cgccgccaca gtccgccggg ggaggcggag gcgcccggcg gcagggagcc + 30061 gatctcgtcg agcagcggca gcaggtcctc gtcgggcagg tccgccgcgt ccgccaggac + 30121 ggaggtggtg tgcgggggcg ggcaggcgcc gacggccagg aacagcgggg gcaggccgga + 30181 gtcggccagg gcccgggtga gcgtgtagcc gacgagggct cccatgctgt gtccgtagag + 30241 cgcgtagggg ccgcggcgtg ccgtctgcag caaggtcggc aggaagtcgg cgagcaggcc + 30301 ggcgcgttcg gtcaggcgtg gttcgcggcg gcggctgtcg cggcccggga gcggcagggc + 30361 ggccacgtcg atgccgggtc cgaccgccgc ggaccagccg cggtagctcg ccacgcccgc + 30421 tccggcgtgg gcgaggcagg tgagtgtgat ccgtgcggtg cccgcctcac ggtccctgga + 30481 gcgggaagcc atgcgtcgcc ctccttgtga acggtccggt ccggccccat ccgggcgggc + 30541 gccgccggcg tggcgcgtgc cacgcggtgc cgtcccggcg ccggtccggg gggtgggtgc + 30601 ccctgcgcac cgagcgtcgc ccgcgcggct cgtgcggcgc tcaaccccgc ggcgaggcgg + 30661 gcggcggcct cggcg +// diff --git a/tests/unit/genomics/test_runbigscape.py b/tests/unit/genomics/test_runbigscape.py new file mode 100644 index 00000000..b10fa18f --- /dev/null +++ b/tests/unit/genomics/test_runbigscape.py @@ -0,0 +1,68 @@ +import os +import pytest +from nplinker.genomics import bigscape +from .. import DATA_DIR + + +@pytest.mark.parametrize("version", [1, 2]) +def test_run_bigscape(tmp_path, version): + """Test whether BiG-SCAPE runs at all using the --help command""" + result = bigscape.run_bigscape( + antismash_path=tmp_path, + output_path=tmp_path, + extra_params="--help", + version=version, + ) + + assert result is True + + +@pytest.mark.skipif(os.getenv('GITHUB_ACTIONS') == 'true', reason="The test is time-consuming on CI") +@pytest.mark.parametrize("version", [1, 2]) +def test_run_bigscape_small_dataset(tmp_path, version): + result = bigscape.run_bigscape( + antismash_path=DATA_DIR / "bigscape" / "minimal_dataset", + output_path=tmp_path, + extra_params="", + version=version, + ) + + assert result is True + + +def test_run_bigscape_wrong_version(tmp_path): + with pytest.raises(ValueError) as e: + bigscape.run_bigscape( + antismash_path=DATA_DIR, + output_path=tmp_path, + extra_params="--help", + version=3, + ) + + assert "version" in e.value.args[0] + + +@pytest.mark.parametrize("version", [1, 2]) +def test_input_path_not_exist(tmp_path, version): + with pytest.raises(FileNotFoundError) as e: + bigscape.run_bigscape( + antismash_path=tmp_path / "not_exist", + output_path=tmp_path, + extra_params="", + version=version, + ) + + assert "antismash_path" in e.value.args[0] + + +@pytest.mark.parametrize("version", [1, 2]) +def test_bad_parameters(tmp_path, version): + with pytest.raises(RuntimeError) as e: + bigscape.run_bigscape( + antismash_path=tmp_path, + output_path=tmp_path, + extra_params="--this-is-not-a-real-argument", + version=version, + ) + + assert "BiG-SCAPE" in e.value.args[0] diff --git a/tests/unit/test_config.py b/tests/unit/test_config.py index 8ced67c3..ce2f24b7 100644 --- a/tests/unit/test_config.py +++ b/tests/unit/test_config.py @@ -25,5 +25,6 @@ def test_config(tmp_path): == "--mibig --clans-off --mix --include_singletons --cutoffs 0.30" ) assert config.bigscape.cutoff == "0.30" + assert config.bigscape.version == 1 assert config.scoring.methods == ["metcalf"]