Skip to content

Commit

Permalink
Merge pull request #274 from wtsi-npg/devel
Browse files Browse the repository at this point in the history
merge from devel to master to create Release 2.20.0
  • Loading branch information
mgcam authored Jun 12, 2020
2 parents baeeb18 + 683d371 commit cd9388f
Show file tree
Hide file tree
Showing 15 changed files with 347 additions and 14 deletions.
6 changes: 6 additions & 0 deletions Changes
Original file line number Diff line number Diff line change
@@ -1,7 +1,13 @@
Release 2.20.0

- When archiving logs skip large .err files in archive/tmp_.. directories
- Pacbio tweak to support traction barcode identifiers and loading older analysis.

Release 2.19.0

- PacBio run loading add options as in other scripts to support loading
of older runs.
- PacBio tweak to analysis loading to support traction barcode identifiers.

Release 2.18.0

Expand Down
26 changes: 21 additions & 5 deletions bin/npg_pacbio_analysis_monitor.pl
Original file line number Diff line number Diff line change
Expand Up @@ -9,18 +9,23 @@
use Getopt::Long;
use Log::Log4perl qw[:levels];
use Pod::Usage;

use Readonly;

use WTSI::DNAP::Warehouse::Schema;
use WTSI::NPG::iRODS;
use WTSI::NPG::HTS::PacBio::Sequel::AnalysisMonitor;

our $VERSION = '';

Readonly::Scalar my $DEFAULT_INTERVAL_DAYS => 14;
Readonly::Scalar my $DEFAULT_OLDER_THAN_DAYS => 0;

my $api_uri;
my $collection;
my $debug;
my $interval = $DEFAULT_INTERVAL_DAYS;
my $log4perl_config;
my $older_than = $DEFAULT_OLDER_THAN_DAYS;
my $pipeline_name;
my $task_name;
my $verbose;
Expand All @@ -30,7 +35,9 @@
'help' => sub {
pod2usage(-verbose => 2, -exitval => 0);
},
'interval=i' => \$interval,
'logconf=s' => \$log4perl_config,
'older-than|older_than=i' => \$older_than,
'pipeline-name|pipeline_name=s' => \$pipeline_name,
'task-name|task_name=s' => \$task_name,
'api-uri|api_uri=s' => \$api_uri,
Expand All @@ -50,8 +57,11 @@
my $irods = WTSI::NPG::iRODS->new;
my $wh_schema = WTSI::DNAP::Warehouse::Schema->connect;

my @init_args = (irods => $irods,
mlwh_schema => $wh_schema);
my @init_args = (interval => $interval,
irods => $irods,
mlwh_schema => $wh_schema,
older_than => $older_than,
);
if ($collection) {
push @init_args, dest_collection => $collection;
}
Expand Down Expand Up @@ -96,8 +106,8 @@ =head1 NAME
=head1 SYNOPSIS
npg_pacbio_analysis_monitor
[--collection <path>] [--debug] [--logconf <path>]
[--pipeline_name <name>] [--task_name <name>]
[--collection <path>] [--debug] [--interval days] [--logconf <path>]
[--older-than days] [--pipeline_name <name>] [--task_name <name>]
[--api-uri] [--verbose]
Options:
Expand All @@ -106,7 +116,13 @@ =head1 SYNOPSIS
--debug Enable debug level logging. Optional, defaults to
false.
--help Display help.
--interval Interval of time in days for analysis loading.
Optional, defaults to 14.
--logconf A log4perl configuration file. Optional.
--older-than
--older_than Only consider analysis older than a specified number of
days. Optional defaults to 0 days.
--pipeline-name
--pipeline_name The SMRT Link pipeline name. Optional.
--task-name
Expand Down
4 changes: 2 additions & 2 deletions lib/WTSI/NPG/HTS/Illumina/LogPublisher.pm
Original file line number Diff line number Diff line change
Expand Up @@ -87,9 +87,9 @@ sub publish_logs {
q[-a -prune \\)];

# find specific files in viv directories - use -prune to stop -path
# .. matching subdirectories
# .. matching subdirectories. Add -10M to skip large .err files
my $find_p4 =
q[find . -type f ] .
q[find . -type f -size -10M ] .
q[-a \\( -path "*/tmp_[0-9]*" -a -prune \\) ] .
q[-a \\( -name "*.err" -o -name "*.log" -o -name "*.json" -o -name "*_Log*out" \\)];

Expand Down
18 changes: 17 additions & 1 deletion lib/WTSI/NPG/HTS/PacBio/Sequel/AnalysisPublisher.pm
Original file line number Diff line number Diff line change
Expand Up @@ -111,8 +111,13 @@ sub publish_sequence_files {
my $tag_id = $self->_get_tag_from_fname($filename);

if ($tag_id) {
@tag_records = $self->find_pacbio_runs
my @tag_id_records = $self->find_pacbio_runs
($self->_metadata->run_name, $self->_metadata->well_name, $tag_id);

@tag_records = (@tag_id_records == 1) ? @tag_id_records :
$self->find_pacbio_runs($self->_metadata->run_name,
$self->_metadata->well_name,
$self->_get_tag_name_from_fname($filename));
} else {
$self->_is_allowed_fname($filename, \@FNAME_PERMITTED) or
$self->logcroak("Unexpected file name for $file");
Expand Down Expand Up @@ -261,6 +266,7 @@ sub _build_metadata{
}

sub _get_tag_from_fname {
# SequenceScape tag id is just the numeric part of the name
my ($self, $file) = @_;
my $tag_id;
if ($file =~ /bc(\d+).*bc(\d+)/smx){
Expand All @@ -270,6 +276,16 @@ sub _get_tag_from_fname {
return $tag_id;
}

sub _get_tag_name_from_fname {
# Traction tag id is the full tag name
my ($self, $file) = @_;
my $tag_name;
if ($file =~ m{[.] (\w+\d+\S+) [-] [-]}smx){
$tag_name = $1;
}
return $tag_name;
}

sub _is_allowed_fname {
my ($self, $file, $fnames) = @_;
my @exists = grep { $file =~ m{[.] $_ [.]}smx } @{ $fnames };
Expand Down
Loading

0 comments on commit cd9388f

Please sign in to comment.