From 671730975d1d8b39059651a623748ff7a1fdbbfc Mon Sep 17 00:00:00 2001 From: Vyacheslav Brover Date: Tue, 25 Apr 2023 14:35:53 -0400 Subject: [PATCH 1/3] PD-4583 Process files ending with '.gz', dependence on gunzip, on failure no output file (-o) is created --- amrfinder.cpp | 80 ++++++++++++++++++++++++++++++-------------- amrfinder_update.cpp | 21 +++--------- common.hpp | 6 ++++ tsv.cpp | 23 ++----------- tsv.hpp | 7 ++++ version.txt | 2 +- 6 files changed, 74 insertions(+), 65 deletions(-) diff --git a/amrfinder.cpp b/amrfinder.cpp index 97c7839..cf78c47 100644 --- a/amrfinder.cpp +++ b/amrfinder.cpp @@ -30,8 +30,12 @@ * AMRFinder * * Dependencies: NCBI BLAST, HMMer +* gunzip * * Release changes: +* 04/24/2023 Process files ending with ".gz", see https://github.com/ncbi/amr/issues/61, dependence on gunzip +* 04/19/2023 On failure no output file (-o) is created +* 3.11.12 04/13/2023 Application::makeKey() * PD-4548 fasta_check.cpp prohibits '\t' (not any '\'), and all restrictions are only for nucleotide sequences * 3.11.11 04/13/2023 PD-4566 --hmmer_bin * 3.11.10 04/12/2023 PD-4548 fasta_check.cpp prohibits ';', '.', '~' in the last position of a sequence identifier @@ -470,6 +474,22 @@ struct ThisApplication : ShellApplication t. qc (); t. saveFile (tmp + "/" + tmpSuf); } + + + + string uncompress (const string "edFName, + const string &suffix) const + { + const string res (shellQuote (tmp + "/" + suffix)); + ASSERT (quotedFName != res); + const string s (unQuote (quotedFName)); + if (isRight (s, ".gz")) + { + exec ("gunzip -c " + quotedFName + " > " + res); + return res; + } + return quotedFName; + } @@ -553,8 +573,11 @@ struct ThisApplication : ShellApplication } if (! output. empty ()) + { try { OFStream f (output); } catch (...) { throw runtime_error ("Cannot create output file " + shellQuote (output)); } + removeFile (output); + } // For timing... @@ -641,13 +664,9 @@ struct ThisApplication : ShellApplication if (! dbDir. items. empty () && dbDir. items. back () == "latest") { prog2dir ["amrfinder_update"] = execDir; - string blast_bin_par; - if (! blast_bin. empty ()) - blast_bin_par = " --blast_bin " + shellQuote (blast_bin); - string hmmer_bin_par; - if (! hmmer_bin. empty ()) - hmmer_bin_par = " --hmmer_bin " + shellQuote (hmmer_bin); - exec (fullProg ("amrfinder_update") + " -d " + shellQuote (dbDir. getParent ()) + ifS (force_update, " --force_update") + blast_bin_par + hmmer_bin_par + exec (fullProg ("amrfinder_update") + " -d " + shellQuote (dbDir. getParent ()) + ifS (force_update, " --force_update") + + makeKey ("blast_bin", blast_bin) + + makeKey ("hmmer_bin", hmmer_bin) + ifS (quiet, " -q") + ifS (qc_on, " --debug") + " > " + logFName, logFName); } else @@ -759,18 +778,27 @@ struct ThisApplication : ShellApplication for (const string& include : includes) stderr << " - include " << include << '\n'; + } + + + // Quoted names + const string prot_flat = uncompress (prot, "prot_flat"); + const string dna_flat = uncompress (dna, "dna_flat"); + const string gff_flat = uncompress (gff, "gff_flat"); + + { StringVector emptyFiles; - if (! emptyArg (prot) && ! getFileSize (unQuote (prot))) emptyFiles << prot; - if (! emptyArg (dna) && ! getFileSize (unQuote (dna))) emptyFiles << dna; - if (! emptyArg (gff) && ! getFileSize (unQuote (gff))) emptyFiles << gff; + if (! emptyArg (prot) && ! getFileSize (unQuote (prot_flat))) emptyFiles << prot; + if (! emptyArg (dna) && ! getFileSize (unQuote (dna_flat))) emptyFiles << dna; + if (! emptyArg (gff) && ! getFileSize (unQuote (gff_flat))) emptyFiles << gff; for (const string& emptyFile : emptyFiles) { const Warning warning (stderr); stderr << "Empty file: " << emptyFile; } } - + // organism --> organism1 string organism1; @@ -853,7 +881,7 @@ struct ThisApplication : ShellApplication bool lcl = false; if (gffType == Gff::pgap && ! emptyArg (dna)) // PD-3347 { - LineInput f (unQuote (dna)); + LineInput f (unQuote (dna_flat)); while (f. nextLine ()) if (isLeft (f. line, ">")) { @@ -881,20 +909,20 @@ struct ThisApplication : ShellApplication { string gff_prot_match; string gff_dna_match; - if (getFileSize (unQuote (prot))) + if (getFileSize (unQuote (prot_flat))) { findProg ("blastp"); findProg ("hmmsearch"); - string prot1 (prot); // Protein FASTA with no dashes in the sequences + string prot1 (prot_flat); // Protein FASTA with no dashes in the sequences size_t nProt = 0; size_t protLen_max = 0; size_t protLen_total = 0; - if (! fastaCheck (prot, true, qcS, logFName, nProt, protLen_max, protLen_total)) + if (! fastaCheck (prot_flat, true, qcS, logFName, nProt, protLen_max, protLen_total)) { prot1 = shellQuote (tmp + "/prot"); OFStream outF (unQuote (prot1)); - LineInput f (unQuote (prot)); + LineInput f (unQuote (prot_flat)); while (f. nextLine ()) { trimTrailing (f. line); @@ -941,13 +969,13 @@ struct ThisApplication : ShellApplication string dnaPar; if (! emptyArg (dna)) { - dnaPar = " -dna " + dna; + dnaPar = " -dna " + dna_flat; if (gffType == Gff::pseudomonasdb) gff_dna_match = " -gff_dna_match " + tmp + "/dna_match"; } try { - exec (fullProg ("gff_check") + gff + annotS + " -prot " + prot1 + dnaPar + gff_prot_match + gff_dna_match + qcS + " -log " + logFName, logFName); + exec (fullProg ("gff_check") + gff_flat + annotS + " -prot " + prot1 + dnaPar + gff_prot_match + gff_dna_match + qcS + " -log " + logFName, logFName); } catch (...) { @@ -1004,19 +1032,19 @@ struct ThisApplication : ShellApplication amr_report_blastp = "-blastp " + tmp + "/blastp -hmmsearch " + tmp + "/hmmsearch -hmmdom " + tmp + "/dom"; if (! emptyArg (gff)) - amr_report_blastp += " -gff " + gff + gff_prot_match + gff_dna_match + annotS; + amr_report_blastp += " -gff " + gff_flat + gff_prot_match + gff_dna_match + annotS; } if (! emptyArg (dna)) { const bool blastn = ! organism1. empty () && fileExists (db + "/AMR_DNA-" + organism1); - if (getFileSize (unQuote (dna))) + if (getFileSize (unQuote (dna_flat))) { size_t nDna = 0; size_t dnaLen_max = 0; size_t dnaLen_total = 0; - EXEC_ASSERT (fastaCheck (dna, false, qcS, logFName, nDna, dnaLen_max, dnaLen_total)); + EXEC_ASSERT (fastaCheck (dna_flat, false, qcS, logFName, nDna, dnaLen_max, dnaLen_total)); const string blastx (/*"tblastn"*/ dnaLen_max > 100000 ? "tblastn" : "blastx"); // PAR // SB-3643 stderr. section ("Running " + blastx); @@ -1028,14 +1056,14 @@ struct ThisApplication : ShellApplication const string blastx_par (blastp_par + " -word_size 3 -query_gencode " + to_string (gencode)); ASSERT (threads_max >= 1); if (blastx == "blastx") - exec (fullProg ("blastx") + " -query " + dna + " -db " + tmp + "/db/AMRProt" + " " + exec (fullProg ("blastx") + " -query " + dna_flat + " -db " + tmp + "/db/AMRProt" + " " + blastx_par + " " BLAST_FMT " " + get_num_threads_param ("blastx", min (nDna, dnaLen_total / 10002)) + " -out " + tmp + "/blastx > /dev/null 2> " + tmp + "/blastx-err", tmp + "/blastx-err"); else { ASSERT (blastx == "tblastn"); findProg ("makeblastdb"); - exec (fullProg ("makeblastdb") + " -in " + dna + " -out " + tmp + "/nucl" + " -dbtype nucl -logfile " + tmp + "/makeblastdb.log", tmp + "/makeblastdb.log"); + exec (fullProg ("makeblastdb") + " -in " + dna_flat + " -out " + tmp + "/nucl" + " -dbtype nucl -logfile " + tmp + "/makeblastdb.log", tmp + "/makeblastdb.log"); if (threads_max > 1) { createDirectory (tmp + "/AMRProt_chunk"); @@ -1061,7 +1089,7 @@ struct ThisApplication : ShellApplication findProg ("blastn"); stderr. section ("Running blastn"); const Chronometer_OnePass cop ("blastn", cerr, false, qc_on && ! quiet); - exec (fullProg ("blastn") + " -query " + dna + " -db " + tmp + "/db/AMR_DNA-" + organism1 + " -evalue 1e-20 -dust no -max_target_seqs 10000 " + exec (fullProg ("blastn") + " -query " + dna_flat + " -db " + tmp + "/db/AMR_DNA-" + organism1 + " -evalue 1e-20 -dust no -max_target_seqs 10000 " + get_num_threads_param ("blastn", min (nDna, dnaLen_total / 2500000)) + " " BLAST_FMT " -out " + tmp + "/blastn > " + logFName + " 2> " + tmp + "/blastn-err", tmp + "/blastn-err"); } } @@ -1188,7 +1216,7 @@ struct ThisApplication : ShellApplication if (! emptyArg (dna_out)) { prepare_fasta_extract (StringVector {"Contig id", "Start", "Stop", "Strand", "Gene symbol", "Sequence name"}, "dna_out", false); - exec (fullProg ("fasta_extract") + dna + " " + tmp + "/dna_out" + qcS + " -log " + logFName + " > " + dna_out, logFName); + exec (fullProg ("fasta_extract") + dna_flat + " " + tmp + "/dna_out" + qcS + " -log " + logFName + " > " + dna_out, logFName); } if (! emptyArg (dnaFlank5_out)) { @@ -1204,7 +1232,7 @@ struct ThisApplication : ShellApplication t. saveHeader = false; t. qc (); t. saveFile (tmp + "/dnaFlank5_out"); - exec (fullProg ("fasta_extract") + dna + " " + tmp + "/dnaFlank5_out" + qcS + " -log " + logFName + " > " + dnaFlank5_out, logFName); + exec (fullProg ("fasta_extract") + dna_flat + " " + tmp + "/dnaFlank5_out" + qcS + " -log " + logFName + " > " + dnaFlank5_out, logFName); } diff --git a/amrfinder_update.cpp b/amrfinder_update.cpp index b43b0bb..e7a1c0c 100644 --- a/amrfinder_update.cpp +++ b/amrfinder_update.cpp @@ -481,24 +481,11 @@ Requirement: the database directory contains subdirectories named by database ve createLatestLink (mainDirS, load_data_version); - #if 1 prog2dir ["amrfinder_index"] = execDir; - string blast_bin_par; - if (! blast_bin. empty ()) - blast_bin_par = " --blast_bin " + shellQuote (blast_bin); - string hmmer_bin_par; - if (! hmmer_bin. empty ()) - hmmer_bin_par = " --hmmer_bin " + shellQuote (hmmer_bin); - exec (fullProg ("amrfinder_index") + shellQuote (latestDir) + blast_bin_par + hmmer_bin_par + ifS (quiet, " -q") + ifS (qc_on, " --debug") + " > " + tmp + "/amrfinder_index.err", tmp + "/amrfinder_index.err"); - #else - stderr << "Indexing" << "\n"; - exec (fullProg ("hmmpress") + " -f " + shellQuote (latestDir + "AMR.LIB") + " > /dev/null 2> " + tmp + "/hmmpress.err", tmp + "/hmmpress.err"); - setSymlink (latestDir, tmp + "/db", true); - exec (fullProg ("makeblastdb") + " -in " + tmp + "/db/AMRProt" + " -dbtype prot -logfile " + tmp + "/makeblastdb.AMRProt", tmp + "/makeblastdb.AMRProt"); - exec (fullProg ("makeblastdb") + " -in " + tmp + "/db/AMR_CDS" + " -dbtype nucl -logfile " + tmp + "/makeblastdb.AMR_CDS", tmp + "/makeblastdb.AMR_CDS"); - for (const string& dnaPointMut : dnaPointMuts) - exec (fullProg ("makeblastdb") + " -in " + tmp + "/db/AMR_DNA-" + dnaPointMut + " -dbtype nucl -logfile " + tmp + "/makeblastdb.AMR_DNA-" + dnaPointMut, tmp + "/makeblastdb.AMR_DNA-" + dnaPointMut); - #endif + exec (fullProg ("amrfinder_index") + shellQuote (latestDir) + + makeKey ("blast_bin", blast_bin) + + makeKey ("hmmer_bin", hmmer_bin) + + ifS (quiet, " -q") + ifS (qc_on, " --debug") + " > " + tmp + "/amrfinder_index.err", tmp + "/amrfinder_index.err"); } }; diff --git a/common.hpp b/common.hpp index a2daed8..851048f 100644 --- a/common.hpp +++ b/common.hpp @@ -4247,6 +4247,12 @@ struct Application : Singleton, Root {} string getInstruction () const; virtual string getHelp () const; + string makeKey (const string ¶m, + const string &value) const + { if (value. empty ()) + return noString; + return " -" + ifS (gnu, "-") + param + " " + shellQuote (value); + } public: int run (int argc, const char* argv []); diff --git a/tsv.cpp b/tsv.cpp index 554754e..9ff8a74 100644 --- a/tsv.cpp +++ b/tsv.cpp @@ -434,25 +434,7 @@ void TextTable::group (const StringVector &by, if (! header [col2num (s)]. numeric) throw runtime_error ("Summation column " + strQuote (s) + " is not numeric"); -#if 0 - const auto lt = [&byIndex,this] (const StringVector &a, const StringVector &b) - { for (const ColNum i : byIndex) - switch (this->compare (a, b, i)) - { case -1: return true; - case 1: return false; - } - // Tie resolution - FFOR (size_t, i, a. size ()) - switch (this->compare (a, b, i)) - { case -1: return true; - case 1: return false; - } - return false; - }; - Common_sp::sort (rows, lt); -#else sort (by); -#endif RowNum i = 0; FFOR_START (RowNum, j, 1, rows. size ()) @@ -524,18 +506,17 @@ void TextTable::merge (RowNum toRowNum, for (const ColNum i : aggr) { - constexpr char sep = ','; if (from [i]. empty ()) continue; if (to [i]. empty ()) to [i] = from [i]; else { - StringVector vec (to [i], sep, true); + StringVector vec (to [i], aggr_sep, true); vec << from [i]; vec. sort (); vec. uniq (); - to [i] = vec. toString (string (1, sep)); + to [i] = vec. toString (string (1, aggr_sep)); } } } diff --git a/tsv.hpp b/tsv.hpp index e12ef12..b3e308c 100644 --- a/tsv.hpp +++ b/tsv.hpp @@ -84,6 +84,7 @@ struct TextTable : Named // no_index <=> no column typedef size_t RowNum; // no_index <=> no row + static constexpr char aggr_sep {','}; // PAR struct Error : runtime_error @@ -164,6 +165,12 @@ struct TextTable : Named const Vector &maxV, const Vector &aggr); public: + static StringVector aggr2values (const string &aggr) + { StringVector v (aggr, aggr_sep, true); + v. sort (); + v. uniq (); + return v; + } void colNumsRow2values (const Vector &colNums, RowNum row_num, StringVector &values) const; diff --git a/version.txt b/version.txt index 2d4715b..3b564fa 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -3.11.11 +3.11.12 From d61f6a1fbaee074437d3a67cae5ce20d28e424ae Mon Sep 17 00:00:00 2001 From: Vyacheslav Brover Date: Thu, 4 May 2023 14:55:31 -0400 Subject: [PATCH 2/3] PD-4596 Prohibit ASCII characters only between 0x00 and 0x1F in GFF files; Process files ending with .gz, see https://github.com/ncbi/amr/issues/61, dependence on gunzip (optional); On failure no empty output file (-o) is created --- amr_report.cpp | 16 ---------------- amrfinder.cpp | 16 ++++++++-------- common.cpp | 4 ++-- version.txt | 2 +- 4 files changed, 11 insertions(+), 27 deletions(-) diff --git a/amr_report.cpp b/amr_report.cpp index 0a49a3b..4018c01 100644 --- a/amr_report.cpp +++ b/amr_report.cpp @@ -910,22 +910,6 @@ struct BlastAlignment : Alignment } string getMethod (const Locus &cds) const { //IMPLY (refExactlyMatched () && ! mutation_all. get (), ! isMutationProt ()) - #if 0 - string method (fromHmm - ? "HMM" - : refExactlyMatched () - ? alleleReported () - ? "ALLELE" - : "EXACT" // PD-776 - : isMutationProt () - ? "POINT" - : partial () - ? truncated (cds) - ? "PARTIAL_CONTIG_END" // PD-2267 - : "PARTIAL" - : "BLAST" - ); - #endif string method (fromHmm ? "HMM" : isMutationProt () diff --git a/amrfinder.cpp b/amrfinder.cpp index cf78c47..e036d85 100644 --- a/amrfinder.cpp +++ b/amrfinder.cpp @@ -30,11 +30,12 @@ * AMRFinder * * Dependencies: NCBI BLAST, HMMer -* gunzip +* gunzip (optional) * * Release changes: -* 04/24/2023 Process files ending with ".gz", see https://github.com/ncbi/amr/issues/61, dependence on gunzip -* 04/19/2023 On failure no output file (-o) is created +* 3.11.13 05/04/2023 PD-4596 Prohibit ASCII characters only between 0x00 and 0x1F in GFF files +* 04/24/2023 PD-4583 Process files ending with ".gz", see https://github.com/ncbi/amr/issues/61, dependence on gunzip (optional) +* 04/19/2023 On failure no empty output file (-o) is created * 3.11.12 04/13/2023 Application::makeKey() * PD-4548 fasta_check.cpp prohibits '\t' (not any '\'), and all restrictions are only for nucleotide sequences * 3.11.11 04/13/2023 PD-4566 --hmmer_bin @@ -891,6 +892,9 @@ struct ThisApplication : ShellApplication } + const bool blastn = ! emptyArg (dna) && ! organism1. empty () && fileExists (db + "/AMR_DNA-" + organism1); + + // Create files for amr_report string amr_report_blastp; string amr_report_blastx; @@ -1038,7 +1042,6 @@ struct ThisApplication : ShellApplication if (! emptyArg (dna)) { - const bool blastn = ! organism1. empty () && fileExists (db + "/AMR_DNA-" + organism1); if (getFileSize (unQuote (dna_flat))) { size_t nDna = 0; @@ -1155,10 +1158,7 @@ struct ThisApplication : ShellApplication + ifS (suppress_common, " -suppress_prot " + tmp + "/suppress_prot") + nameS + qcS + " " + parm + " -log " + logFName + " > " + tmp + "/amr", logFName); } - if ( ! emptyArg (dna) - && ! organism1. empty () - && fileExists (db + "/AMR_DNA-" + organism1) - ) + if (blastn) { const Chronometer_OnePass cop ("dna_mutation", cerr, false, qc_on && ! quiet); const string mutation_allS (mutation_all. empty () ? "" : ("-mutation_all " + tmp + "/mutation_all.dna")); diff --git a/common.cpp b/common.cpp index 9e8cfcd..2d975c6 100644 --- a/common.cpp +++ b/common.cpp @@ -874,8 +874,8 @@ void reverse (string &s) string unpercent (const string &s) { for (const char c : s) - if (! printable (c)) - throwf (FUNC "Non-printable character: " + to_string (int (c))); + if (between (c, '\0', ' ') /*! printable (c)*/) + throwf (FUNC "Non-printable character: " + to_string (uchar (c))); string r; constexpr size_t hex_pos_max = 2; diff --git a/version.txt b/version.txt index 3b564fa..c70edfa 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -3.11.12 +3.11.13 From d4379409598811ea158af9e3815b73e7afd9ebc3 Mon Sep 17 00:00:00 2001 From: Vyacheslav Brover Date: Sat, 6 May 2023 18:14:16 -0400 Subject: [PATCH 3/3] PD-4598 Add CURL error messages to AMRFinder --- Makefile | 3 +- amrfinder.cpp | 2 + amrfinder_update.cpp | 128 +++------------------------------ common.cpp | 12 ++-- common.hpp | 40 +++++++++-- curl_easy.cpp | 167 +++++++++++++++++++++++++++++++++++++++++++ curl_easy.hpp | 90 +++++++++++++++++++++++ version.txt | 2 +- 8 files changed, 311 insertions(+), 133 deletions(-) create mode 100644 curl_easy.cpp create mode 100644 curl_easy.hpp diff --git a/Makefile b/Makefile index f6a5429..6cb8d39 100644 --- a/Makefile +++ b/Makefile @@ -92,6 +92,7 @@ release: clean make all common.o: common.hpp common.inc +curl_easy.o: curl_easy.hpp common.hpp common.inc gff.o: gff.hpp common.hpp common.inc alignment.o: alignment.hpp alignment.hpp common.inc @@ -106,7 +107,7 @@ amrfinder: $(amrfinderOBJS) $(CXX) -o $@ $(amrfinderOBJS) -pthread $(DBDIR) amrfinder_update.o: common.hpp common.inc -amrfinder_updateOBJS=amrfinder_update.o common.o +amrfinder_updateOBJS=amrfinder_update.o common.o curl_easy.o amrfinder_update: $(amrfinder_updateOBJS) @if [ "$(TEST_UPDATE)" != "" ] ; \ then \ diff --git a/amrfinder.cpp b/amrfinder.cpp index e036d85..66f6eb1 100644 --- a/amrfinder.cpp +++ b/amrfinder.cpp @@ -33,6 +33,8 @@ * gunzip (optional) * * Release changes: +* 05/06/2023 PD-4598 error messages in curl_easy.cpp +* 3.11.14 05/05/2023 extern "C" { #include } * 3.11.13 05/04/2023 PD-4596 Prohibit ASCII characters only between 0x00 and 0x1F in GFF files * 04/24/2023 PD-4583 Process files ending with ".gz", see https://github.com/ncbi/amr/issues/61, dependence on gunzip (optional) * 04/19/2023 On failure no empty output file (-o) is created diff --git a/amrfinder_update.cpp b/amrfinder_update.cpp index e7a1c0c..849b1bb 100644 --- a/amrfinder_update.cpp +++ b/amrfinder_update.cpp @@ -29,34 +29,24 @@ * File Description: * Updating of AMRFinder data * -* Dependencies: curl.{h,c} -* * Release changes: see amrfinder.cpp * */ -#define HTTPS // Otherwise: FTP +#define HTTPS 1 // 0: FTP -#ifdef _MSC_VER - #error "UNIX is required" -#endif - #undef NDEBUG #include "common.inc" -#include -#include #include "common.hpp" using namespace Common_sp; - - - -string curMinor; +#include "curl_easy.hpp" +using namespace CURL_sp; @@ -66,111 +56,10 @@ namespace -struct Curl -{ - CURL* eh {nullptr}; - - - Curl () - { eh = curl_easy_init (); - QC_ASSERT (eh); - #ifndef HTTPS - curl_easy_setopt (eh, CURLOPT_FTP_USE_EPSV, 0); - #endif - } - ~Curl () - { curl_easy_cleanup (eh); } - - - void download (const string &url, - const string &fName); - string read (const string &url); -}; - - - -size_t write_stream_cb (char* ptr, - size_t size, - size_t nMemb, - void* userData) -{ - ASSERT (ptr); - ASSERT (size == 1); - ASSERT (userData); - - OFStream& f = * static_cast (userData); - FOR (size_t, i, nMemb) - f << ptr [i];; - - return nMemb; -} - - - -void Curl::download (const string &url, - const string &fName) -{ - ASSERT (! url. empty ()); - ASSERT (! fName. empty ()); - - { - OFStream f (fName); - curl_easy_setopt (eh, CURLOPT_URL, url. c_str ()); - curl_easy_setopt (eh, CURLOPT_WRITEFUNCTION, write_stream_cb); - curl_easy_setopt (eh, CURLOPT_WRITEDATA, & f); - if (curl_easy_perform (eh)) - throw runtime_error ("CURL: Cannot download from " + url); - } - - ifstream f (fName); - string s; - f >> s; - if (s == " (userData); - FOR (size_t, i, nMemb) - s += ptr [i];; - - return nMemb; -} - - - -string Curl::read (const string &url) -{ - ASSERT (! url. empty ()); - - string s; s. reserve (1024); // PAR - curl_easy_setopt (eh, CURLOPT_URL, url. c_str ()); - curl_easy_setopt (eh, CURLOPT_WRITEFUNCTION, write_string_cb); - curl_easy_setopt (eh, CURLOPT_WRITEDATA, & s); - if (curl_easy_perform (eh)) - throw runtime_error ("CURL: Cannot read from " + url); - - return s; -} - -// - - - #ifdef TEST_UPDATE #define URL "https://ftp.ncbi.nlm.nih.gov/pathogen/Technical/AMRFinder_technical/test_database/" #else - #ifdef HTTPS + #if HTTPS #define URL "https://ftp.ncbi.nlm.nih.gov/pathogen/Antimicrobial_resistance/AMRFinderPlus/database/" #else #define URL "ftp://ftp.ncbi.nlm.nih.gov/pathogen/Antimicrobial_resistance/AMRFinderPlus/database/" @@ -191,7 +80,7 @@ string getLatestMinor (Curl &curl) Vector vers; for (string& line : dir) - #ifdef HTTPS + #if HTTPS if (isLeft (line, " +// char* + +inline const char* nvl (const char* s, + const char* nullS = "-") + { return s ? s : nullS; } + + + // string extern const string noString; @@ -4022,11 +4030,19 @@ struct NumberItemGenerator : ItemGenerator struct SoftwareVersion : Root { - uint major {0}; - uint minor {0}; + uint major {0}; // there is ::major() + uint minor {0}; // there is ::minor() uint patch {0}; + SoftwareVersion () = default; + SoftwareVersion (uint major_arg, + uint minor_arg, + uint patch_arg) + { major = major_arg; + minor = minor_arg; + patch = patch_arg; + } explicit SoftwareVersion (const string &fName); explicit SoftwareVersion (istream &is, bool minorOnly = false); @@ -4034,7 +4050,7 @@ struct SoftwareVersion : Root void init (string &&s, bool minorOnly); public: - void saveText (ostream &os) const override + void saveText (ostream &os) const final { os << major << '.' << minor << '.' << patch; } @@ -4055,18 +4071,28 @@ struct SoftwareVersion : Root struct DataVersion : Root { - uint year {0}; + uint year {0}; uint month {0}; - uint day {0}; - uint num {0}; + uint day {0}; + uint num {0}; + DataVersion () = default; + DataVersion (uint year_arg, + uint month_arg, + uint day_arg, + uint num_arg) + : year (year_arg) + , month (month_arg) + , day (day_arg) + , num (num_arg) + {} explicit DataVersion (const string &fName); explicit DataVersion (istream &is); private: void init (string &&s); public: - void saveText (ostream &os) const override + void saveText (ostream &os) const final { os << year << '-' << std::setfill ('0') << std::setw (2) << month << '-' << std::setfill ('0') << std::setw (2) << day diff --git a/curl_easy.cpp b/curl_easy.cpp new file mode 100644 index 0000000..e40b852 --- /dev/null +++ b/curl_easy.cpp @@ -0,0 +1,167 @@ +// curl_easy.cpp + +/*=========================================================================== +* +* PUBLIC DOMAIN NOTICE +* National Center for Biotechnology Information +* +* This software/database is a "United States Government Work" under the +* terms of the United States Copyright Act. It was written as part of +* the author's official duties as a United States Government employee and +* thus cannot be copyrighted. This software/database is freely available +* to the public for use. The National Library of Medicine and the U.S. +* Government have not placed any restriction on its use or reproduction. +* +* Although all reasonable efforts have been taken to ensure the accuracy +* and reliability of the software and data, the NLM and the U.S. +* Government do not and cannot warrant the performance or results that +* may be obtained by using this software or data. The NLM and the U.S. +* Government disclaim all warranties, express or implied, including +* warranties of performance, merchantability or fitness for any particular +* purpose. +* +* Please cite the author in any work or product based on this material. +* +* =========================================================================== +* +* Author: Vyacheslav Brover +* +* File Description: +* curl_easy functions +* +*/ + + +#undef NDEBUG +#include "common.inc" + +#include "curl_easy.hpp" +using namespace Common_sp; + + + +namespace CURL_sp +{ + + + +SoftwareVersion getLibVersion () +{ + if (const curl_version_info_data* ver = curl_version_info (CURLVERSION_NOW)) + { + const uint major = (ver->version_num >> 16) & 0xff; + const uint minor = (ver->version_num >> 8) & 0xff; + const uint patch = ver->version_num & 0xff; + return SoftwareVersion (major, minor, patch); + } + return SoftwareVersion (); +} + + + + +// Curl + +namespace +{ + size_t write_stream_cb (char* ptr, + size_t size, + size_t nMemb, + void* userData) + { + ASSERT (ptr); + ASSERT (size == 1); + ASSERT (userData); + + OFStream& f = * static_cast (userData); + FOR (size_t, i, nMemb) + f << ptr [i]; + + return nMemb; + } + + + + size_t write_string_cb (char* ptr, + size_t size, + size_t nMemb, + void* userData) + { + ASSERT (ptr); + ASSERT (size == 1); + ASSERT (userData); + + string& s = * static_cast (userData); + FOR (size_t, i, nMemb) + s += ptr [i]; + + return nMemb; + } +} + + + +void Curl::download (const string &url, + const string &fName) +{ + ASSERT (! fName. empty ()); + + { + OFStream f (fName); + curl_easy_setopt (eh, CURLOPT_WRITEFUNCTION, write_stream_cb); + curl_easy_setopt (eh, CURLOPT_WRITEDATA, & f); + process (url, "download"); + } + + ifstream f (fName); + string s; + f >> s; + if (s == " +extern "C" +{ + #include + // Linking requires: -lcurl +} + +#include "common.hpp" +using namespace Common_sp; + + + + +namespace CURL_sp +{ + + +SoftwareVersion getLibVersion (); + + + +struct Curl +{ + CURL* eh {nullptr}; + + + Curl () + : eh (curl_easy_init ()) + { if (! eh) + throw runtime_error ("Cannot initialize curl_easy"); + } + ~Curl () + { curl_easy_cleanup (eh); } + + + void download (const string &url, + const string &fName); + string read (const string &url); +private: + void process (const string &url, + const string &error_msg_action); +}; + + + + +} // namespace + + diff --git a/version.txt b/version.txt index c70edfa..641602f 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -3.11.13 +3.11.14