From 671730975d1d8b39059651a623748ff7a1fdbbfc Mon Sep 17 00:00:00 2001
From: Vyacheslav Brover <vyacheslav.brover@nih.gov>
Date: Tue, 25 Apr 2023 14:35:53 -0400
Subject: [PATCH 1/3] PD-4583 Process files ending with '.gz', dependence on
 gunzip, on failure no output file (-o) is created

---
 amrfinder.cpp        | 80 ++++++++++++++++++++++++++++++--------------
 amrfinder_update.cpp | 21 +++---------
 common.hpp           |  6 ++++
 tsv.cpp              | 23 ++-----------
 tsv.hpp              |  7 ++++
 version.txt          |  2 +-
 6 files changed, 74 insertions(+), 65 deletions(-)

diff --git a/amrfinder.cpp b/amrfinder.cpp
index 97c7839..cf78c47 100644
--- a/amrfinder.cpp
+++ b/amrfinder.cpp
@@ -30,8 +30,12 @@
 *   AMRFinder
 *
 * Dependencies: NCBI BLAST, HMMer
+*               gunzip
 *
 * Release changes:
+*           04/24/2023          Process files ending with ".gz", see https://github.com/ncbi/amr/issues/61, dependence on gunzip
+*           04/19/2023          On failure no output file (-o) is created
+*   3.11.12 04/13/2023          Application::makeKey()
 *                      PD-4548  fasta_check.cpp prohibits '\t' (not any '\'), and all restrictions are only for nucleotide sequences
 *   3.11.11 04/13/2023 PD-4566  --hmmer_bin
 *   3.11.10 04/12/2023 PD-4548  fasta_check.cpp prohibits ';', '.', '~' in the last position of a sequence identifier
@@ -470,6 +474,22 @@ struct ThisApplication : ShellApplication
     t. qc ();
     t. saveFile (tmp + "/" + tmpSuf);
   }
+  
+  
+  
+  string uncompress (const string &quotedFName,
+                     const string &suffix) const
+  {
+    const string res (shellQuote (tmp + "/" + suffix));
+    ASSERT (quotedFName != res);
+    const string s (unQuote (quotedFName));
+    if (isRight (s, ".gz"))  
+    {
+      exec ("gunzip -c " + quotedFName + " > " + res);
+      return res;
+    }
+    return quotedFName;  
+  }
 
 
 
@@ -553,8 +573,11 @@ struct ThisApplication : ShellApplication
 		}
 
 		if (! output. empty ())
+		{
 		  try { OFStream f (output); }
 		    catch (...) { throw runtime_error ("Cannot create output file " + shellQuote (output)); }
+      removeFile (output);
+		}
 
     
     // For timing... 
@@ -641,13 +664,9 @@ struct ThisApplication : ShellApplication
       if (! dbDir. items. empty () && dbDir. items. back () == "latest")
       {
         prog2dir ["amrfinder_update"] = execDir;
-        string blast_bin_par;
-        if (! blast_bin. empty ())
-          blast_bin_par = "  --blast_bin " + shellQuote (blast_bin);
-        string hmmer_bin_par;
-        if (! hmmer_bin. empty ())
-          hmmer_bin_par = "  --hmmer_bin " + shellQuote (hmmer_bin);
-  		  exec (fullProg ("amrfinder_update") + " -d " + shellQuote (dbDir. getParent ()) + ifS (force_update, " --force_update") + blast_bin_par + hmmer_bin_par
+  		  exec (fullProg ("amrfinder_update") + " -d " + shellQuote (dbDir. getParent ()) + ifS (force_update, " --force_update") 
+  		          + makeKey ("blast_bin", blast_bin)  
+  		          + makeKey ("hmmer_bin", hmmer_bin)  
   		          + ifS (quiet, " -q") + ifS (qc_on, " --debug") + " > " + logFName, logFName);
       }
       else
@@ -759,18 +778,27 @@ struct ThisApplication : ShellApplication
 
       for (const string& include : includes)
         stderr << "  - include " << include << '\n';
+    }
+    
+    
+    // Quoted names
+    const string prot_flat = uncompress (prot, "prot_flat");
+    const string dna_flat  = uncompress (dna,  "dna_flat");
+    const string gff_flat  = uncompress (gff,  "gff_flat");
+      
 
+    {
       StringVector emptyFiles;
-      if (! emptyArg (prot) && ! getFileSize (unQuote (prot)))  emptyFiles << prot;
-      if (! emptyArg (dna)  && ! getFileSize (unQuote (dna)))   emptyFiles << dna;
-      if (! emptyArg (gff)  && ! getFileSize (unQuote (gff)))   emptyFiles << gff;      
+      if (! emptyArg (prot) && ! getFileSize (unQuote (prot_flat)))  emptyFiles << prot;
+      if (! emptyArg (dna)  && ! getFileSize (unQuote (dna_flat)))   emptyFiles << dna;
+      if (! emptyArg (gff)  && ! getFileSize (unQuote (gff_flat)))   emptyFiles << gff;      
       for (const string& emptyFile : emptyFiles)
       {
         const Warning warning (stderr);
         stderr << "Empty file: " << emptyFile;
       }
     }
-      
+
 
 	  // organism --> organism1
 	  string organism1;
@@ -853,7 +881,7 @@ struct ThisApplication : ShellApplication
     bool lcl = false;
     if (gffType == Gff::pgap && ! emptyArg (dna))  // PD-3347
     {
-      LineInput f (unQuote (dna));
+      LineInput f (unQuote (dna_flat));
       while (f. nextLine ())
         if (isLeft (f. line, ">"))
         {
@@ -881,20 +909,20 @@ struct ThisApplication : ShellApplication
   		{
   			string gff_prot_match;
  			  string gff_dna_match;
-  			if (getFileSize (unQuote (prot)))
+  			if (getFileSize (unQuote (prot_flat)))
   			{
     			findProg ("blastp");  			
     			findProg ("hmmsearch");
     			
-    			string prot1 (prot);  // Protein FASTA with no dashes in the sequences
+    			string prot1 (prot_flat);  // Protein FASTA with no dashes in the sequences
           size_t nProt = 0;
           size_t protLen_max = 0;
           size_t protLen_total = 0;
-          if (! fastaCheck (prot, true, qcS, logFName, nProt, protLen_max, protLen_total))
+          if (! fastaCheck (prot_flat, true, qcS, logFName, nProt, protLen_max, protLen_total))
           {
             prot1 = shellQuote (tmp + "/prot");
             OFStream outF (unQuote (prot1));
-            LineInput f (unQuote (prot)); 
+            LineInput f (unQuote (prot_flat)); 
             while (f. nextLine ())
             {
               trimTrailing (f. line);
@@ -941,13 +969,13 @@ struct ThisApplication : ShellApplication
     			  string dnaPar;
     			  if (! emptyArg (dna))
     			  {
-    			    dnaPar = " -dna " + dna;
+    			    dnaPar = " -dna " + dna_flat;
     			    if (gffType == Gff::pseudomonasdb)
     			      gff_dna_match = " -gff_dna_match " + tmp + "/dna_match";
     			  }
     			  try 
     			  {
-    			    exec (fullProg ("gff_check") + gff + annotS + " -prot " + prot1 + dnaPar + gff_prot_match + gff_dna_match + qcS + " -log " + logFName, logFName);
+    			    exec (fullProg ("gff_check") + gff_flat + annotS + " -prot " + prot1 + dnaPar + gff_prot_match + gff_dna_match + qcS + " -log " + logFName, logFName);
     			  }
     			  catch (...)
     			  {
@@ -1004,19 +1032,19 @@ struct ThisApplication : ShellApplication
 
   		  amr_report_blastp = "-blastp " + tmp + "/blastp  -hmmsearch " + tmp + "/hmmsearch  -hmmdom " + tmp + "/dom";
   			if (! emptyArg (gff))
-  			  amr_report_blastp += "  -gff " + gff + gff_prot_match + gff_dna_match + annotS;
+  			  amr_report_blastp += "  -gff " + gff_flat + gff_prot_match + gff_dna_match + annotS;
   		}  		
 
   		
   		if (! emptyArg (dna))
   		{
   		  const bool blastn = ! organism1. empty () && fileExists (db + "/AMR_DNA-" + organism1);
-  		  if (getFileSize (unQuote (dna)))
+  		  if (getFileSize (unQuote (dna_flat)))
     		{
           size_t nDna = 0;
           size_t dnaLen_max = 0;
           size_t dnaLen_total = 0;
-          EXEC_ASSERT (fastaCheck (dna, false, qcS, logFName, nDna, dnaLen_max, dnaLen_total));
+          EXEC_ASSERT (fastaCheck (dna_flat, false, qcS, logFName, nDna, dnaLen_max, dnaLen_total));
           const string blastx (/*"tblastn"*/ dnaLen_max > 100000 ? "tblastn" : "blastx");  // PAR  // SB-3643
 
     			stderr. section ("Running " + blastx);
@@ -1028,14 +1056,14 @@ struct ThisApplication : ShellApplication
         		const string blastx_par  (blastp_par + "  -word_size 3  -query_gencode " + to_string (gencode));
       			ASSERT (threads_max >= 1);
       			if (blastx == "blastx")
-        			exec (fullProg ("blastx") + "  -query " + dna + " -db " + tmp + "/db/AMRProt" + "  "
+        			exec (fullProg ("blastx") + "  -query " + dna_flat + " -db " + tmp + "/db/AMRProt" + "  "
             			  + blastx_par + " " BLAST_FMT " " + get_num_threads_param ("blastx", min (nDna, dnaLen_total / 10002))
             			  + " -out " + tmp + "/blastx > /dev/null 2> " + tmp + "/blastx-err", tmp + "/blastx-err");
             else
             {
               ASSERT (blastx == "tblastn");
         			findProg ("makeblastdb");
-           	  exec (fullProg ("makeblastdb") + " -in " + dna + " -out " + tmp + "/nucl" + "  -dbtype nucl  -logfile " + tmp + "/makeblastdb.log", tmp + "/makeblastdb.log");  
+           	  exec (fullProg ("makeblastdb") + " -in " + dna_flat + " -out " + tmp + "/nucl" + "  -dbtype nucl  -logfile " + tmp + "/makeblastdb.log", tmp + "/makeblastdb.log");  
         			if (threads_max > 1)
         			{
           		  createDirectory (tmp + "/AMRProt_chunk");
@@ -1061,7 +1089,7 @@ struct ThisApplication : ShellApplication
       			findProg ("blastn");
       			stderr. section ("Running blastn");
        			const Chronometer_OnePass cop ("blastn", cerr, false, qc_on && ! quiet);
-      			exec (fullProg ("blastn") + " -query " + dna + " -db " + tmp + "/db/AMR_DNA-" + organism1 + " -evalue 1e-20  -dust no  -max_target_seqs 10000  " 
+      			exec (fullProg ("blastn") + " -query " + dna_flat + " -db " + tmp + "/db/AMR_DNA-" + organism1 + " -evalue 1e-20  -dust no  -max_target_seqs 10000  " 
       			      + get_num_threads_param ("blastn", min (nDna, dnaLen_total / 2500000)) + " " BLAST_FMT " -out " + tmp + "/blastn > " + logFName + " 2> " + tmp + "/blastn-err", tmp + "/blastn-err");
       		}
     		}
@@ -1188,7 +1216,7 @@ struct ThisApplication : ShellApplication
     if (! emptyArg (dna_out))
     {
       prepare_fasta_extract (StringVector {"Contig id", "Start", "Stop", "Strand", "Gene symbol", "Sequence name"}, "dna_out", false);
-      exec (fullProg ("fasta_extract") + dna + " " + tmp + "/dna_out" + qcS + " -log " + logFName + " > " + dna_out, logFName);  
+      exec (fullProg ("fasta_extract") + dna_flat + " " + tmp + "/dna_out" + qcS + " -log " + logFName + " > " + dna_out, logFName);  
     }
     if (! emptyArg (dnaFlank5_out))
     {
@@ -1204,7 +1232,7 @@ struct ThisApplication : ShellApplication
       t. saveHeader = false;
       t. qc ();
       t. saveFile (tmp + "/dnaFlank5_out");
-      exec (fullProg ("fasta_extract") + dna + " " + tmp + "/dnaFlank5_out" + qcS + " -log " + logFName + " > " + dnaFlank5_out, logFName);  
+      exec (fullProg ("fasta_extract") + dna_flat + " " + tmp + "/dnaFlank5_out" + qcS + " -log " + logFName + " > " + dnaFlank5_out, logFName);  
     }
 
 		
diff --git a/amrfinder_update.cpp b/amrfinder_update.cpp
index b43b0bb..e7a1c0c 100644
--- a/amrfinder_update.cpp
+++ b/amrfinder_update.cpp
@@ -481,24 +481,11 @@ Requirement: the database directory contains subdirectories named by database ve
     createLatestLink (mainDirS, load_data_version);
   
 
-  #if 1
     prog2dir ["amrfinder_index"] = execDir;
-    string blast_bin_par;
-    if (! blast_bin. empty ())
-      blast_bin_par = "  --blast_bin " + shellQuote (blast_bin);
-    string hmmer_bin_par;
-    if (! hmmer_bin. empty ())
-      hmmer_bin_par = "  --hmmer_bin " + shellQuote (hmmer_bin);
-	  exec (fullProg ("amrfinder_index") + shellQuote (latestDir) + blast_bin_par + hmmer_bin_par + ifS (quiet, " -q") + ifS (qc_on, " --debug") + " > " + tmp + "/amrfinder_index.err", tmp + "/amrfinder_index.err"); 
-  #else    
-    stderr << "Indexing" << "\n";
-    exec (fullProg ("hmmpress") + " -f " + shellQuote (latestDir + "AMR.LIB") + " > /dev/null 2> " + tmp + "/hmmpress.err", tmp + "/hmmpress.err");
-    setSymlink (latestDir, tmp + "/db", true);
-	  exec (fullProg ("makeblastdb") + " -in " + tmp + "/db/AMRProt" + "  -dbtype prot  -logfile " + tmp + "/makeblastdb.AMRProt", tmp + "/makeblastdb.AMRProt");  
-	  exec (fullProg ("makeblastdb") + " -in " + tmp + "/db/AMR_CDS" + "  -dbtype nucl  -logfile " + tmp + "/makeblastdb.AMR_CDS", tmp + "/makeblastdb.AMR_CDS");  
-    for (const string& dnaPointMut : dnaPointMuts)
-  	  exec (fullProg ("makeblastdb") + " -in " + tmp + "/db/AMR_DNA-" + dnaPointMut + "  -dbtype nucl  -logfile " + tmp + "/makeblastdb.AMR_DNA-" + dnaPointMut, tmp + "/makeblastdb.AMR_DNA-" + dnaPointMut);
-  #endif
+	  exec (fullProg ("amrfinder_index") + shellQuote (latestDir) 
+	          + makeKey ("blast_bin", blast_bin)   
+	          + makeKey ("hmmer_bin", hmmer_bin)  
+	          + ifS (quiet, " -q") + ifS (qc_on, " --debug") + " > " + tmp + "/amrfinder_index.err", tmp + "/amrfinder_index.err"); 
   }
 };
 
diff --git a/common.hpp b/common.hpp
index a2daed8..851048f 100644
--- a/common.hpp
+++ b/common.hpp
@@ -4247,6 +4247,12 @@ struct Application : Singleton<Application>, Root
     {}
   string getInstruction () const;
   virtual string getHelp () const;
+  string makeKey (const string &param,
+                  const string &value) const
+    { if (value. empty ())
+        return noString;
+      return "  -" + ifS (gnu, "-") + param + " " + shellQuote (value); 
+    }
 public:
   int run (int argc, 
            const char* argv []);
diff --git a/tsv.cpp b/tsv.cpp
index 554754e..9ff8a74 100644
--- a/tsv.cpp
+++ b/tsv.cpp
@@ -434,25 +434,7 @@ void TextTable::group (const StringVector &by,
     if (! header [col2num (s)]. numeric)
       throw runtime_error ("Summation column " + strQuote (s) + " is not numeric");
 
-#if 0
-  const auto lt = [&byIndex,this] (const StringVector &a, const StringVector &b) 
-                    { for (const ColNum i : byIndex) 
-                        switch (this->compare (a, b, i))
-                        { case -1: return true;
-                          case  1: return false;
-                        }
-                      // Tie resolution
-                      FFOR (size_t, i, a. size ())
-                        switch (this->compare (a, b, i))
-                        { case -1: return true;
-                          case  1: return false;
-                        }
-                      return false;
-                    };
-  Common_sp::sort (rows, lt);
-#else
   sort (by);
-#endif
   
   RowNum i = 0;  
   FFOR_START (RowNum, j, 1, rows. size ())
@@ -524,18 +506,17 @@ void TextTable::merge (RowNum toRowNum,
 
   for (const ColNum i : aggr)
   {
-    constexpr char sep = ',';
     if (from [i]. empty ())
       continue;
     if (to [i]. empty ())
       to [i] = from [i];
     else
     {
-      StringVector vec (to [i], sep, true);
+      StringVector vec (to [i], aggr_sep, true);
       vec << from [i];
       vec. sort ();
       vec. uniq ();
-      to [i] = vec. toString (string (1, sep));
+      to [i] = vec. toString (string (1, aggr_sep));
     }
   }
 }
diff --git a/tsv.hpp b/tsv.hpp
index e12ef12..b3e308c 100644
--- a/tsv.hpp
+++ b/tsv.hpp
@@ -84,6 +84,7 @@ struct TextTable : Named
     // no_index <=> no column
   typedef  size_t  RowNum;
     // no_index <=> no row
+  static constexpr char aggr_sep {','};  // PAR
     
     
   struct Error : runtime_error
@@ -164,6 +165,12 @@ struct TextTable : Named
               const Vector<ColNum> &maxV,
               const Vector<ColNum> &aggr);
 public:
+  static StringVector aggr2values (const string &aggr)
+    { StringVector v (aggr, aggr_sep, true);
+      v. sort ();
+      v. uniq ();
+      return v;
+    }
   void colNumsRow2values (const Vector<ColNum> &colNums,
                           RowNum row_num,
                           StringVector &values) const;
diff --git a/version.txt b/version.txt
index 2d4715b..3b564fa 100644
--- a/version.txt
+++ b/version.txt
@@ -1 +1 @@
-3.11.11
+3.11.12

From d61f6a1fbaee074437d3a67cae5ce20d28e424ae Mon Sep 17 00:00:00 2001
From: Vyacheslav Brover <vyacheslav.brover@nih.gov>
Date: Thu, 4 May 2023 14:55:31 -0400
Subject: [PATCH 2/3] PD-4596 Prohibit ASCII characters only between 0x00 and
 0x1F in GFF files; Process files ending with .gz, see
 https://github.com/ncbi/amr/issues/61, dependence on gunzip (optional); On
 failure no empty output file (-o) is created

---
 amr_report.cpp | 16 ----------------
 amrfinder.cpp  | 16 ++++++++--------
 common.cpp     |  4 ++--
 version.txt    |  2 +-
 4 files changed, 11 insertions(+), 27 deletions(-)

diff --git a/amr_report.cpp b/amr_report.cpp
index 0a49a3b..4018c01 100644
--- a/amr_report.cpp
+++ b/amr_report.cpp
@@ -910,22 +910,6 @@ struct BlastAlignment : Alignment
     }    
 	string getMethod (const Locus &cds) const
 	  { //IMPLY (refExactlyMatched () && ! mutation_all. get (), ! isMutationProt ())
-    #if 0
-	    string method (fromHmm
-	                     ? "HMM"
-	                     : refExactlyMatched () 
-          	             ? alleleReported () 
-          	               ? "ALLELE"
-          	               : "EXACT"  // PD-776
-      	                 : isMutationProt ()
-      	                   ? "POINT"
-          	               : partial ()
-          	                 ? truncated (cds)
-          	                   ? "PARTIAL_CONTIG_END"  // PD-2267
-          	                   : "PARTIAL"
-        	                   : "BLAST"
-        	           );
-    #endif
 	    string method (fromHmm
 	                     ? "HMM"
 	                     : isMutationProt ()
diff --git a/amrfinder.cpp b/amrfinder.cpp
index cf78c47..e036d85 100644
--- a/amrfinder.cpp
+++ b/amrfinder.cpp
@@ -30,11 +30,12 @@
 *   AMRFinder
 *
 * Dependencies: NCBI BLAST, HMMer
-*               gunzip
+*               gunzip (optional)
 *
 * Release changes:
-*           04/24/2023          Process files ending with ".gz", see https://github.com/ncbi/amr/issues/61, dependence on gunzip
-*           04/19/2023          On failure no output file (-o) is created
+*   3.11.13 05/04/2023 PD-4596  Prohibit ASCII characters only between 0x00 and 0x1F in GFF files
+*           04/24/2023 PD-4583  Process files ending with ".gz", see https://github.com/ncbi/amr/issues/61, dependence on gunzip (optional)
+*           04/19/2023          On failure no empty output file (-o) is created
 *   3.11.12 04/13/2023          Application::makeKey()
 *                      PD-4548  fasta_check.cpp prohibits '\t' (not any '\'), and all restrictions are only for nucleotide sequences
 *   3.11.11 04/13/2023 PD-4566  --hmmer_bin
@@ -891,6 +892,9 @@ struct ThisApplication : ShellApplication
     }
     
 
+	  const bool blastn = ! emptyArg (dna) && ! organism1. empty () && fileExists (db + "/AMR_DNA-" + organism1);
+
+
     // Create files for amr_report    
     string amr_report_blastp;	
  		string amr_report_blastx;
@@ -1038,7 +1042,6 @@ struct ThisApplication : ShellApplication
   		
   		if (! emptyArg (dna))
   		{
-  		  const bool blastn = ! organism1. empty () && fileExists (db + "/AMR_DNA-" + organism1);
   		  if (getFileSize (unQuote (dna_flat)))
     		{
           size_t nDna = 0;
@@ -1155,10 +1158,7 @@ struct ThisApplication : ShellApplication
       		  + ifS (suppress_common, " -suppress_prot " + tmp + "/suppress_prot")  
       		  + nameS + qcS + " " + parm + " -log " + logFName + " > " + tmp + "/amr", logFName);
   	}
-		if (   ! emptyArg (dna) 
-		    && ! organism1. empty ()
-		    && fileExists (db + "/AMR_DNA-" + organism1)
-		   )
+		if (blastn)
 		{
  			const Chronometer_OnePass cop ("dna_mutation", cerr, false, qc_on && ! quiet);
       const string mutation_allS (mutation_all. empty () ? "" : ("-mutation_all " + tmp + "/mutation_all.dna")); 
diff --git a/common.cpp b/common.cpp
index 9e8cfcd..2d975c6 100644
--- a/common.cpp
+++ b/common.cpp
@@ -874,8 +874,8 @@ void reverse (string &s)
 string unpercent (const string &s)
 {
   for (const char c : s)
-  	if (! printable (c))
-  		throwf (FUNC "Non-printable character: " + to_string (int (c)));
+  	if (between (c, '\0', ' ') /*! printable (c)*/)
+  		throwf (FUNC "Non-printable character: " + to_string (uchar (c)));
 
   string r;
   constexpr size_t hex_pos_max = 2;
diff --git a/version.txt b/version.txt
index 3b564fa..c70edfa 100644
--- a/version.txt
+++ b/version.txt
@@ -1 +1 @@
-3.11.12
+3.11.13

From d4379409598811ea158af9e3815b73e7afd9ebc3 Mon Sep 17 00:00:00 2001
From: Vyacheslav Brover <vyacheslav.brover@nih.gov>
Date: Sat, 6 May 2023 18:14:16 -0400
Subject: [PATCH 3/3] PD-4598 Add CURL error messages to AMRFinder

---
 Makefile             |   3 +-
 amrfinder.cpp        |   2 +
 amrfinder_update.cpp | 128 +++------------------------------
 common.cpp           |  12 ++--
 common.hpp           |  40 +++++++++--
 curl_easy.cpp        | 167 +++++++++++++++++++++++++++++++++++++++++++
 curl_easy.hpp        |  90 +++++++++++++++++++++++
 version.txt          |   2 +-
 8 files changed, 311 insertions(+), 133 deletions(-)
 create mode 100644 curl_easy.cpp
 create mode 100644 curl_easy.hpp

diff --git a/Makefile b/Makefile
index f6a5429..6cb8d39 100644
--- a/Makefile
+++ b/Makefile
@@ -92,6 +92,7 @@ release: clean
 	make all
 
 common.o:	common.hpp common.inc
+curl_easy.o: curl_easy.hpp common.hpp common.inc
 gff.o: gff.hpp common.hpp common.inc
 alignment.o:	alignment.hpp alignment.hpp common.inc
 
@@ -106,7 +107,7 @@ amrfinder:	$(amrfinderOBJS)
 	$(CXX) -o $@ $(amrfinderOBJS) -pthread $(DBDIR)
 
 amrfinder_update.o:  common.hpp common.inc 
-amrfinder_updateOBJS=amrfinder_update.o common.o
+amrfinder_updateOBJS=amrfinder_update.o common.o curl_easy.o
 amrfinder_update:      $(amrfinder_updateOBJS) 
 	@if [ "$(TEST_UPDATE)" != "" ]  ; \
 	then  \
diff --git a/amrfinder.cpp b/amrfinder.cpp
index e036d85..66f6eb1 100644
--- a/amrfinder.cpp
+++ b/amrfinder.cpp
@@ -33,6 +33,8 @@
 *               gunzip (optional)
 *
 * Release changes:
+*           05/06/2023 PD-4598  error messages in curl_easy.cpp
+*   3.11.14 05/05/2023          extern "C" { #include <curl.h> }
 *   3.11.13 05/04/2023 PD-4596  Prohibit ASCII characters only between 0x00 and 0x1F in GFF files
 *           04/24/2023 PD-4583  Process files ending with ".gz", see https://github.com/ncbi/amr/issues/61, dependence on gunzip (optional)
 *           04/19/2023          On failure no empty output file (-o) is created
diff --git a/amrfinder_update.cpp b/amrfinder_update.cpp
index e7a1c0c..849b1bb 100644
--- a/amrfinder_update.cpp
+++ b/amrfinder_update.cpp
@@ -29,34 +29,24 @@
 * File Description:
 *   Updating of AMRFinder data
 *
-* Dependencies: curl.{h,c}
-*
 * Release changes: see amrfinder.cpp
 *
 */
 
 
 
-#define HTTPS  // Otherwise: FTP
+#define HTTPS 1   // 0: FTP
 
 
 
-#ifdef _MSC_VER
-  #error "UNIX is required"
-#endif
-   
 #undef NDEBUG 
 #include "common.inc"
 
-#include <unistd.h>
-#include <curl/curl.h>
 
 #include "common.hpp"
 using namespace Common_sp;
-
-
-
-string curMinor;
+#include "curl_easy.hpp"
+using namespace CURL_sp;
 
 
 
@@ -66,111 +56,10 @@ namespace
 
 
 
-struct Curl
-{
-  CURL* eh {nullptr};
-
-
-  Curl ()
-    { eh = curl_easy_init ();
-      QC_ASSERT (eh);
-    #ifndef HTTPS
-      curl_easy_setopt (eh, CURLOPT_FTP_USE_EPSV, 0);
-    #endif
-    }
- ~Curl ()
-   { curl_easy_cleanup (eh); }
-
-
-  void download (const string &url,
-                 const string &fName);
-  string read (const string &url);
-};
-
-	
-
-size_t write_stream_cb (char* ptr,
-                        size_t size, 
-                        size_t nMemb, 
-                        void* userData)
-{
-  ASSERT (ptr);
-  ASSERT (size == 1);
-  ASSERT (userData);
-  
-  OFStream& f = * static_cast <OFStream*> (userData);
-  FOR (size_t, i, nMemb)
-    f << ptr [i];;
-  
-  return nMemb;
-}
-
-
- 	
-void Curl::download (const string &url,
-                     const string &fName) 
-{
-  ASSERT (! url. empty ());  
-  ASSERT (! fName. empty ());  
-  
-  {
-    OFStream f (fName);
-    curl_easy_setopt (eh, CURLOPT_URL, url. c_str ());
-    curl_easy_setopt (eh, CURLOPT_WRITEFUNCTION, write_stream_cb);
-    curl_easy_setopt (eh, CURLOPT_WRITEDATA, & f);
-    if (curl_easy_perform (eh))
-      throw runtime_error ("CURL: Cannot download from " + url);
-  }
-  
-  ifstream f (fName);
-  string s;
-  f >> s;
-  if (s == "<?xml")
-    throw runtime_error ("Cannot download " + strQuote (fName));
-}
-
-
-
-size_t write_string_cb (char* ptr,
-                        size_t size, 
-                        size_t nMemb, 
-                        void* userData)
-{
-  ASSERT (ptr);
-  ASSERT (size == 1);
-  ASSERT (userData);
-  
-  string& s = * static_cast <string*> (userData);
-  FOR (size_t, i, nMemb)
-    s += ptr [i];;
-  
-  return nMemb;
-}
-
-
- 	
-string Curl::read (const string &url)
-{
-  ASSERT (! url. empty ());  
-  
-  string s;  s. reserve (1024);  // PAR  
-  curl_easy_setopt (eh, CURLOPT_URL, url. c_str ());
-  curl_easy_setopt (eh, CURLOPT_WRITEFUNCTION, write_string_cb);
-  curl_easy_setopt (eh, CURLOPT_WRITEDATA, & s);
-  if (curl_easy_perform (eh))
-    throw runtime_error ("CURL: Cannot read from " + url);
-  
-  return s;
-}
-
-//
-
-
-
 #ifdef TEST_UPDATE
   #define URL "https://ftp.ncbi.nlm.nih.gov/pathogen/Technical/AMRFinder_technical/test_database/"
 #else
-  #ifdef HTTPS
+  #if HTTPS
     #define URL "https://ftp.ncbi.nlm.nih.gov/pathogen/Antimicrobial_resistance/AMRFinderPlus/database/"  
   #else
     #define URL "ftp://ftp.ncbi.nlm.nih.gov/pathogen/Antimicrobial_resistance/AMRFinderPlus/database/"  
@@ -191,7 +80,7 @@ string getLatestMinor (Curl &curl)
     
   Vector<SoftwareVersion> vers;  
   for (string& line : dir)
-  #ifdef HTTPS
+  #if HTTPS
     if (isLeft (line, "<a href="))
   	  try 
   	  {
@@ -247,7 +136,7 @@ string getLatestDataVersion (Curl &curl,
     
   Vector<DataVersion> dataVersions;  
   for (string& line : dir)
-  #ifdef HTTPS
+  #if HTTPS
     if (isLeft (line, "<a href="))
       try
       {
@@ -308,6 +197,9 @@ void fetchAMRFile (Curl &curl,
 
 struct ThisApplication : ShellApplication
 {
+  string curMinor;
+
+
   ThisApplication ()
     : ShellApplication ("Update the database for AMRFinder from " URL "\n\
 Requirement: the database directory contains subdirectories named by database versions.\
@@ -361,7 +253,7 @@ Requirement: the database directory contains subdirectories named by database ve
     const Verbose vrb (qc_on);
     
 
-    Curl curl;    
+    Curl curl;
         
     
     // FTP site files
diff --git a/common.cpp b/common.cpp
index 2d975c6..3d76ce4 100644
--- a/common.cpp
+++ b/common.cpp
@@ -244,10 +244,10 @@ namespace
     *os << endl
         << msg << endl << endl
       #ifndef _MSC_VER
-  	    << "HOSTNAME: " << (hostname ? hostname : "?") << endl
-  	    << "SHELL: " << (shell ? shell : "?") << endl
-  	    << "PWD: " << (pwd ? pwd : "?") << endl
-  	    << "PATH: " << (path ? path : "?") << endl
+  	    << "HOSTNAME: " << nvl (hostname, "?") << endl
+  	    << "SHELL: "    << nvl (shell,    "?") << endl
+  	    << "PWD: "      << nvl (pwd,      "?") << endl
+  	    << "PATH: "     << nvl (path,     "?") << endl
       #endif
   	    << "Progam name:  " << programName << endl
   	    << "Command line: " << getCommandLine () << endl;
@@ -482,8 +482,8 @@ string pad (const string &s,
     
   switch (right)
   {
-    case efalse: return sp + s; 
-    case etrue:  return s + sp; 
+    case efalse: return s + sp; 
+    case etrue:  return sp + s; 
     case enull:  
       {
         const size_t half = sp. size () / 2;
diff --git a/common.hpp b/common.hpp
index 851048f..d147238 100644
--- a/common.hpp
+++ b/common.hpp
@@ -1017,6 +1017,14 @@ struct List : list<T>
 
 
 
+// char*
+
+inline const char* nvl (const char* s,
+                        const char* nullS = "-")
+  { return s ? s : nullS; }
+  	
+  	  	
+
 // string
 
 extern const string noString;
@@ -4022,11 +4030,19 @@ struct NumberItemGenerator : ItemGenerator
 
 struct SoftwareVersion : Root
 {
-  uint major {0};
-  uint minor {0};
+  uint major {0};  // there is ::major()
+  uint minor {0};  // there is ::minor()
   uint patch {0};
   
 
+  SoftwareVersion () = default;
+  SoftwareVersion (uint major_arg,
+                   uint minor_arg,
+                   uint patch_arg)
+    { major = major_arg;
+      minor = minor_arg;
+      patch = patch_arg;
+    } 
   explicit SoftwareVersion (const string &fName);
   explicit SoftwareVersion (istream &is,
                             bool minorOnly = false);
@@ -4034,7 +4050,7 @@ struct SoftwareVersion : Root
   void init (string &&s,
              bool minorOnly);
 public:
-  void saveText (ostream &os) const override
+  void saveText (ostream &os) const final
     { os << major << '.' << minor << '.' << patch; }   
     
     
@@ -4055,18 +4071,28 @@ struct SoftwareVersion : Root
 
 struct DataVersion : Root
 {
-  uint year {0};
+  uint year  {0};
   uint month {0};
-  uint day {0};
-  uint num {0};
+  uint day   {0};
+  uint num   {0};
   
 
+  DataVersion () = default;
+  DataVersion (uint year_arg,
+               uint month_arg,
+               uint day_arg,
+               uint num_arg)
+    : year  (year_arg)
+    , month (month_arg)
+    , day   (day_arg)
+    , num   (num_arg)
+    {} 
   explicit DataVersion (const string &fName);
   explicit DataVersion (istream &is);
 private:
   void init (string &&s);
 public:
-  void saveText (ostream &os) const override
+  void saveText (ostream &os) const final
     { os << year 
          << '-' << std::setfill ('0') << std::setw (2) << month 
          << '-' << std::setfill ('0') << std::setw (2) << day 
diff --git a/curl_easy.cpp b/curl_easy.cpp
new file mode 100644
index 0000000..e40b852
--- /dev/null
+++ b/curl_easy.cpp
@@ -0,0 +1,167 @@
+// curl_easy.cpp
+
+/*===========================================================================
+*
+*                            PUBLIC DOMAIN NOTICE                          
+*               National Center for Biotechnology Information
+*                                                                          
+*  This software/database is a "United States Government Work" under the   
+*  terms of the United States Copyright Act.  It was written as part of    
+*  the author's official duties as a United States Government employee and 
+*  thus cannot be copyrighted.  This software/database is freely available 
+*  to the public for use. The National Library of Medicine and the U.S.    
+*  Government have not placed any restriction on its use or reproduction.  
+*                                                                          
+*  Although all reasonable efforts have been taken to ensure the accuracy  
+*  and reliability of the software and data, the NLM and the U.S.          
+*  Government do not and cannot warrant the performance or results that    
+*  may be obtained by using this software or data. The NLM and the U.S.    
+*  Government disclaim all warranties, express or implied, including       
+*  warranties of performance, merchantability or fitness for any particular
+*  purpose.                                                                
+*                                                                          
+*  Please cite the author in any work or product based on this material.   
+*
+* ===========================================================================
+*
+* Author: Vyacheslav Brover
+*
+* File Description:
+*   curl_easy functions
+*
+*/
+
+
+#undef NDEBUG
+#include "common.inc"
+
+#include "curl_easy.hpp"
+using namespace Common_sp;
+
+
+
+namespace CURL_sp
+{
+  
+  
+  
+SoftwareVersion getLibVersion ()
+{
+  if (const curl_version_info_data* ver = curl_version_info (CURLVERSION_NOW))
+  {
+    const uint major = (ver->version_num >> 16) & 0xff;
+    const uint minor = (ver->version_num >> 8)  & 0xff;
+    const uint patch = ver->version_num         & 0xff;
+    return SoftwareVersion (major, minor, patch);
+  }
+  return SoftwareVersion ();
+}
+
+
+  
+
+// Curl
+
+namespace 
+{
+  size_t write_stream_cb (char* ptr,
+                          size_t size, 
+                          size_t nMemb, 
+                          void* userData)
+  {
+    ASSERT (ptr);
+    ASSERT (size == 1);
+    ASSERT (userData);
+    
+    OFStream& f = * static_cast <OFStream*> (userData);
+    FOR (size_t, i, nMemb)
+      f << ptr [i];
+    
+    return nMemb;
+  }
+
+
+
+  size_t write_string_cb (char* ptr,
+                          size_t size, 
+                          size_t nMemb, 
+                          void* userData)
+  {
+    ASSERT (ptr);
+    ASSERT (size == 1);
+    ASSERT (userData);
+    
+    string& s = * static_cast <string*> (userData);
+    FOR (size_t, i, nMemb)
+      s += ptr [i];
+    
+    return nMemb;
+  }
+}
+
+
+ 	
+void Curl::download (const string &url,
+                     const string &fName) 
+{
+  ASSERT (! fName. empty ());  
+  
+  {
+    OFStream f (fName);
+    curl_easy_setopt (eh, CURLOPT_WRITEFUNCTION, write_stream_cb);
+    curl_easy_setopt (eh, CURLOPT_WRITEDATA, & f);
+    process (url, "download");
+  }
+  
+  ifstream f (fName);
+  string s;
+  f >> s;
+  if (s == "<?xml")
+    throw runtime_error ("Cannot download " + strQuote (fName));
+}
+
+
+
+string Curl::read (const string &url)
+{
+  string s;  s. reserve (1024);  // PAR  
+  curl_easy_setopt (eh, CURLOPT_WRITEFUNCTION, write_string_cb);
+  curl_easy_setopt (eh, CURLOPT_WRITEDATA, & s);
+  process (url, "read");
+  
+  return s;
+}
+
+
+
+void Curl::process (const string &url,
+                    const string &error_msg_action)
+{
+  QC_ASSERT (! url. empty ());  
+
+  char err [CURL_ERROR_SIZE + 1] = "";
+  curl_easy_setopt (eh, CURLOPT_ERRORBUFFER, err);
+  
+  curl_easy_setopt (eh, CURLOPT_URL, url. c_str ());
+  if (isLeft (url, "ftp://"))
+    curl_easy_setopt (eh, CURLOPT_FTP_USE_EPSV, 0);
+
+  const CURLcode cc = curl_easy_perform (eh);
+  if (cc)
+  {
+    const SoftwareVersion ver (CURL_sp::getLibVersion ());
+    throw runtime_error ("CURL: Cannot " + error_msg_action
+                         + "\n  from " + url 
+                         + "\n  code=" + to_string (cc) 
+                         + "\n  error: " + err
+                         + "\n  version: " + ver. str ()
+                         );
+  }
+}
+
+
+
+
+}  // namespace
+
+
diff --git a/curl_easy.hpp b/curl_easy.hpp
new file mode 100644
index 0000000..2b05162
--- /dev/null
+++ b/curl_easy.hpp
@@ -0,0 +1,90 @@
+// curl_easy.hpp
+
+/*===========================================================================
+*
+*                            PUBLIC DOMAIN NOTICE                          
+*               National Center for Biotechnology Information
+*                                                                          
+*  This software/database is a "United States Government Work" under the   
+*  terms of the United States Copyright Act.  It was written as part of    
+*  the author's official duties as a United States Government employee and 
+*  thus cannot be copyrighted.  This software/database is freely available 
+*  to the public for use. The National Library of Medicine and the U.S.    
+*  Government have not placed any restriction on its use or reproduction.  
+*                                                                          
+*  Although all reasonable efforts have been taken to ensure the accuracy  
+*  and reliability of the software and data, the NLM and the U.S.          
+*  Government do not and cannot warrant the performance or results that    
+*  may be obtained by using this software or data. The NLM and the U.S.    
+*  Government disclaim all warranties, express or implied, including       
+*  warranties of performance, merchantability or fitness for any particular
+*  purpose.                                                                
+*                                                                          
+*  Please cite the author in any work or product based on this material.   
+*
+* ===========================================================================
+*
+* Author: Vyacheslav Brover
+*
+* Dependencies: curl.{h,c}
+*
+* File Description:
+*   curl_easy functions
+*
+*/
+
+
+#ifdef _MSC_VER
+  #error "UNIX is required"
+#endif
+
+
+#include <unistd.h>
+extern "C" 
+{
+  #include <curl/curl.h>
+    // Linking requires:  -lcurl
+}
+
+#include "common.hpp"
+using namespace Common_sp;
+
+
+
+
+namespace CURL_sp
+{
+  
+  
+SoftwareVersion getLibVersion ();
+
+  
+
+struct Curl
+{
+  CURL* eh {nullptr};
+
+
+  Curl ()
+    : eh (curl_easy_init ())
+    { if (! eh)
+        throw runtime_error ("Cannot initialize curl_easy");
+    }
+ ~Curl ()
+   { curl_easy_cleanup (eh); }
+
+
+  void download (const string &url,
+                 const string &fName);
+  string read (const string &url);
+private:
+  void process (const string &url,
+                const string &error_msg_action);
+};
+
+	
+
+
+}  // namespace
+
+
diff --git a/version.txt b/version.txt
index c70edfa..641602f 100644
--- a/version.txt
+++ b/version.txt
@@ -1 +1 @@
-3.11.13
+3.11.14