Skip to content

Commit

Permalink
Enable use of cloud reference files (#1804)
Browse files Browse the repository at this point in the history
Support references in the cloud, and cloud-enable a few more tools.
  • Loading branch information
cmnbroad authored Feb 5, 2024
1 parent a638ce9 commit 1abb61f
Show file tree
Hide file tree
Showing 28 changed files with 1,564 additions and 429 deletions.
11 changes: 10 additions & 1 deletion src/main/java/picard/cmdline/CommandLineProgram.java
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
import picard.cmdline.argumentcollections.ReferenceArgumentCollection;
import picard.cmdline.argumentcollections.RequiredReferenceArgumentCollection;
import picard.nio.PathProvider;
import picard.nio.PicardHtsPath;
import picard.util.RExecutor;

import java.io.File;
Expand Down Expand Up @@ -333,7 +334,15 @@ protected boolean parseArgs(final String[] argv) {
if (!ret) {
return false;
}
REFERENCE_SEQUENCE = referenceSequence.getReferenceFile();

// Set the REFERENCE_SEQUENCE for tools that are not nio-aware and still depend on a File object
// (as opposed to using the preferred method of calling referenceSequence.getHtsPath()). Note
// that if the URI scheme for the reference is something other than "file", the REFERENCE_SEQUENCE
// object created by this code path won't be valid - but we still have to set it here in case
// the tool tries to access REFERENCE_SEQUENCE directly (such tools will subsequently fail given
// a non-local file anyway, but this prevents them from immediately throwing an NPE).
final PicardHtsPath refHtsPath = referenceSequence.getHtsPath();
REFERENCE_SEQUENCE = ReferenceArgumentCollection.getFileSafe(refHtsPath, Log.getInstance(this.getClass()));

// The TMP_DIR setting section below was moved from instanceMain() to here due to timing issues
// related to checking whether R is installed. Certain programs, such as CollectInsertSizeMetrics
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,24 +25,33 @@
package picard.cmdline.argumentcollections;

import htsjdk.samtools.Defaults;
import htsjdk.samtools.util.Log;
import org.broadinstitute.barclay.argparser.Argument;
import picard.cmdline.StandardOptionDefinitions;
import picard.nio.PicardHtsPath;

import java.io.File;
import java.nio.file.Path;

/**
* Picard default argument collection for an optional reference.
*/
public class OptionalReferenceArgumentCollection implements ReferenceArgumentCollection {
private final static Log log = Log.getInstance(OptionalReferenceArgumentCollection.class);

@Argument(shortName = StandardOptionDefinitions.REFERENCE_SHORT_NAME, doc = "Reference sequence file.", common = true, optional = true)
public File REFERENCE_SEQUENCE = Defaults.REFERENCE_FASTA;
public PicardHtsPath REFERENCE_SEQUENCE = Defaults.REFERENCE_FASTA == null ? null : new PicardHtsPath(Defaults.REFERENCE_FASTA.getAbsolutePath());

/**
* @return The reference provided by the user, if any, or the default defined by {@code htsjdk.samtools.Defaults.REFERENCE_FASTA}
*/
@Override
public File getReferenceFile() {
return ReferenceArgumentCollection.getFileSafe(REFERENCE_SEQUENCE, log);
}

@Override
public Path getReferencePath() { return REFERENCE_SEQUENCE == null ? null: REFERENCE_SEQUENCE.toPath(); }

@Override
public PicardHtsPath getHtsPath() {
return REFERENCE_SEQUENCE;
};
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,68 @@
package picard.cmdline.argumentcollections;

import java.io.File;
import java.nio.file.Path;

import htsjdk.samtools.util.Log;
import picard.nio.PicardBucketUtils;
import picard.nio.PicardHtsPath;

/**
* Base interface for a reference argument collection.
*/
public interface ReferenceArgumentCollection {
/**
* @return The reference provided by the user, if any, or the default, if any.
* This method is retained for backward compatibility with legacy tools that have not been updated to support PicardHtsPath input files.
* The preferred methods for accessing the reference file provided on the command line is either getHtsPath() or getReferencePath().
*
* TODO: update tools that call this method to use getHtsPath()
*
* @return The reference provided by the user, or the default defined by {@code htsjdk.samtools.Defaults.REFERENCE_FASTA}. May be null.
*/
File getReferenceFile();

/**
* This method first checks if the PicardHtsPath is null, thereby avoiding NPE that results from getHtsPath.toPath().
* Use this for providing input to methods that expect the Path to be null when the reference is absent e.g. SamReaderFactory.referenceSequence().
*
* @return The reference provided by the user or the default as an nio Path. May be null.
*/
default Path getReferencePath(){
return getHtsPath() == null ? null : getHtsPath().toPath();
}

/**
* Returns a PicardHtsPath for the reference input. Maybe be null. Implementers of this interface should override with an appropriate implementation.
* This currently does not support remote paths set via HtsJdk.Defaults.REFERENCE_FASTA, since that uses a `File` object.
*
* @return The reference provided by the user, if any, or the default, if any, as a PicardHtsPath. May be null.
*/
default PicardHtsPath getHtsPath(){
return getReferenceFile() == null ? null : new PicardHtsPath(getReferenceFile());
}

/**
* @return A "safe" File object for any reference path.
*
* For files that reside on a local file system, this returns a valid File object. Files that reside on
* a non-local file system can't be accessed via a File object, so return a placeholder File object that
* will defer failure until the File object is actually accessed. This allows code paths that blindly propagate
* the value returned by calls to getReferenceFile to not get an NPE, and to fail gracefully downstream
* with an error message that includes the reference file specifier.
*/
static File getFileSafe(final PicardHtsPath picardPath, final Log log) {
if (picardPath == null) {
return null;
} else if (picardPath.getScheme().equals(PicardBucketUtils.FILE_SCHEME)) {
// file on a local file system
return picardPath.toPath().toFile();
} else {
log.warn(String.format(
"The reference specified by %s cannot be used as a local file object",
picardPath.getRawInputString()));
// toPath().toFile() would throw here, so use the File constructor to create a placeholder
// object and defer failure until downstream code attempts to actually use the File object
return new File(picardPath.getRawInputString());
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -24,23 +24,31 @@

package picard.cmdline.argumentcollections;

import htsjdk.samtools.util.Log;
import org.broadinstitute.barclay.argparser.Argument;
import picard.cmdline.StandardOptionDefinitions;
import picard.nio.PicardHtsPath;

import java.io.File;
import java.nio.file.Path;

/**
* Argument collection for references that are required (and not common).
*/
public class RequiredReferenceArgumentCollection implements ReferenceArgumentCollection {
private final static Log log = Log.getInstance(RequiredReferenceArgumentCollection.class);

@Argument(shortName = StandardOptionDefinitions.REFERENCE_SHORT_NAME, doc = "Reference sequence file.", common = false)
public File REFERENCE_SEQUENCE;
@Argument(shortName = StandardOptionDefinitions.REFERENCE_SHORT_NAME, doc = "Reference sequence file.")
public PicardHtsPath REFERENCE_SEQUENCE;

/**
* @return The reference provided by the user.
*/
public File getReferenceFile() {
return REFERENCE_SEQUENCE;
};
return ReferenceArgumentCollection.getFileSafe(REFERENCE_SEQUENCE, log);
}

@Override
public Path getReferencePath() { return REFERENCE_SEQUENCE == null ? null: REFERENCE_SEQUENCE.toPath(); }

@Override
public PicardHtsPath getHtsPath() { return REFERENCE_SEQUENCE; }

}
4 changes: 2 additions & 2 deletions src/main/java/picard/nio/DeleteRecursive.java
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

/**
*
* Copied from GATK; to be removed once the original GATK code is ported to htsjdk
* Adapted from GATK.
*
* Class to hold a set of {@link Path} to be delete on the JVM exit through a shutdown hook.
*
Expand Down Expand Up @@ -58,7 +58,7 @@ static void runHooks() {
Collections.reverse(toBeDeleted);
for (Path path : toBeDeleted) {
try {
GATKIOUtils.deleteRecursively(path);
PicardIOUtils.deleteRecursively(path);
} catch (final Exception e) {
// do nothing if itcannot be deleted, because it is a shutdown hook
LOG.debug(e, "Could not recursively delete ", path.toString(), " during JVM shutdown because we encountered the following exception:");
Expand Down
160 changes: 0 additions & 160 deletions src/main/java/picard/nio/GATKBucketUtils.java

This file was deleted.

Loading

0 comments on commit 1abb61f

Please sign in to comment.