Skip to content

Commit

Permalink
introduce size and dpi parameter
Browse files Browse the repository at this point in the history
  • Loading branch information
oschwengers committed Nov 12, 2024
1 parent 97245fa commit 2e6e428
Showing 1 changed file with 61 additions and 26 deletions.
87 changes: 61 additions & 26 deletions bakta/plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,8 @@ def main():
arg_group_plot.add_argument('--sequences', action='store', default='all', help='Sequences to plot: comma separated number or name (default = all, numbers one-based)')
arg_group_plot.add_argument('--type', action='store', type=str, default=bc.PLOT_FEATURES, choices=[bc.PLOT_FEATURES, bc.PLOT_COG], help=f'Plot type (default = {bc.PLOT_FEATURES})')
arg_group_plot.add_argument('--label', action='store', type=str, default=None, help=f"Plot center label (for line breaks use '|')")
arg_group_plot.add_argument('--size', action='store', type=int, default=6, choices=[4, 8, 16], help='Plot size in inches: 4/8/16 (default = 4)')
arg_group_plot.add_argument('--dpi', action='store', type=int, default=300, choices=[150, 300, 600], help='Plot resolution as dots per inch: 150/300/600 (default = 300)')

arg_group_general = parser.add_argument_group('General')
arg_group_general.add_argument('--help', '-h', action='help', help='Show this help message and exit')
Expand Down Expand Up @@ -133,6 +135,12 @@ def main():
log.info('verbose=%s', cfg.verbose)
plot_type = args.type
log.info('plot-type=%s', plot_type)
plot_label = args.label
log.info('plot-label=%s', plot_label)
plot_size = args.size
log.info('plot-size=%s', plot_size)
plot_dpi = args.dpi
log.info('plot-dpi=%s', plot_dpi)

# check and open configuration file
if args.config is not None:
Expand Down Expand Up @@ -162,6 +170,9 @@ def main():
if(cfg.force): print(f'\tforce: {cfg.force}')
print(f'\ttmp directory: {cfg.tmp_path}')
print(f'\tprefix: {cfg.prefix}')
print(f'\tlabel: {plot_label}')
print(f'\tsize: {plot_size}')
print(f'\tDPI: {plot_dpi}')

if(cfg.debug):
print(f"\nBakta runs in DEBUG mode! Temporary data will not be destroyed at: {cfg.tmp_path}")
Expand Down Expand Up @@ -199,7 +210,7 @@ def main():
print('Draw plots...')
if args.sequences == 'all': # write whole genome plot
print(f'\tdraw circular genome plot (type={plot_type}) containing all sequences...')
write(data, features, output_path, colors, plot_type=plot_type, plot_label=args.label)
write(data, features, output_path, colors, plot_type=plot_type, plot_label=plot_label, plot_size=plot_size, plot_dpi=plot_dpi)
else: # write genome plot containing provided sequences only
plot_sequences = []
sequence_identifiers = []
Expand All @@ -218,14 +229,13 @@ def main():
plot_sequence_ids = [seq['id'] for seq in plot_sequences]
data['features'] = [feat for feat in features if feat['sequence'] in plot_sequence_ids] # reduce feature list in data object
data['sequences'] = [seq for seq in sequences if seq['id'] in plot_sequence_ids] # reduce sequence list in data object
write(data, features, output_path, colors, plot_name_suffix=plot_name_suffix, plot_type=plot_type, plot_label=args.label)
write(data, features, output_path, colors, plot_name_suffix=plot_name_suffix, plot_type=plot_type, plot_label=plot_label, plot_size=plot_size, plot_dpi=plot_dpi)


def write(data, features, output_path, colors=COLORS, plot_name_suffix=None, plot_type=bc.PLOT_FEATURES, plot_label=None):
def write(data, features, output_path, colors=COLORS, plot_name_suffix=None, plot_type=bc.PLOT_FEATURES, plot_label=None, plot_size=300, plot_dpi=300):
sequence_list = insdc.build_biopython_sequence_list(data, features)
for seq in sequence_list: # fix edge features because PyCirclize cannot handle them correctly
seq.features = [feat for feat in seq.features if feat.type != 'gene' and feat.type != 'source']
# seq.features = [feat for feat in seq.features if isinstance(feat.location, FeatureLocation)]# and isinstance(feat.location.start, str) and isinstance(feat.location.end, str)]
for feat in seq.features:
feat_loc = feat.location
if isinstance(feat_loc, CompoundLocation):
Expand All @@ -244,9 +254,9 @@ def write(data, features, output_path, colors=COLORS, plot_name_suffix=None, plo

# select style
if plot_type == bc.PLOT_COG:
plot = build_features_type_cog(data, sequence_list, plot_label, colors)
plot = build_features_type_cog(data, sequence_list, colors, plot_label, plot_size, plot_dpi)
else:
plot = build_features_type_feature(data, sequence_list, plot_label, colors)
plot = build_features_type_feature(data, sequence_list, colors, plot_label, plot_size, plot_dpi)
file_name = cfg.prefix if plot_name_suffix is None else f'{cfg.prefix}_{plot_name_suffix}'
for file_type in ['png', 'svg']:
file_path = output_path.joinpath(f'{file_name}.{file_type}')
Expand Down Expand Up @@ -288,15 +298,21 @@ def build_label(data):
return '\n'.join([lable for lable in label_list if lable is not None])


def build_features_type_feature(data, sequence_list, label, colors):
def build_features_type_feature(data, sequence_list, colors, plot_label, plot_size, plot_dpi):
# Get contig genome seqid & size, features dict
total_sequence_length = sum([len(seq['nt']) for seq in data['sequences']])
seqid2seq = {rec.id:rec.seq for rec in sequence_list}
seqid2size = {rec.id:len(rec.seq) for rec in sequence_list}
seqid2features = {rec.id:rec.features for rec in sequence_list}

if plot_size == 4:
text_size = 6
elif plot_size == 8:
text_size = 16
else:
text_size = 30
circos = Circos(seqid2size, space=2)
circos.text(label, r=7, size=15, linespacing=1.5)
circos.text(plot_label, r=7, size=text_size, linespacing=1.5)
for sector in circos.sectors:
# build tracks
outer_track = sector.add_track((99.5, 100))
Expand All @@ -306,7 +322,7 @@ def build_features_type_feature(data, sequence_list, label, colors):
gc_skew_track = sector.add_track((50, 60))

# plot outer track
build_sequence_backbone_track(sector, outer_track, total_sequence_length, colors)
build_sequence_backbone_track(sector, outer_track, total_sequence_length, colors, plot_size)

# plot feature tracks
for feature in seqid2features[sector.name]:
Expand All @@ -328,30 +344,30 @@ def build_features_type_feature(data, sequence_list, label, colors):
elif feature.type == bc.INSDC_FEATURE_GAP:
track.genomic_features([feature], fc=colors['features'][bc.FEATURE_GAP])
elif feature.type == bc.INSDC_FEATURE_ORIGIN_REPLICATION:
gc_skew_track.xticks([(feature.location.start + feature.location.end)/2], outer=False, label_size=5, labels=['oriC'], label_orientation='vertical') # oriC/V
gc_skew_track.xticks([(feature.location.start + feature.location.end)/2], outer=False, label_size=text_size/2, labels=['oriC'], label_orientation='vertical') # oriC/V
elif feature.type == bc.INSDC_FEATURE_ORIGIN_TRANSFER:
gc_skew_track.xticks([(feature.location.start + feature.location.end)/2], outer=False, label_size=5, labels=['oriT'], label_orientation='vertical') # oriT
gc_skew_track.xticks([(feature.location.start + feature.location.end)/2], outer=False, label_size=text_size/2, labels=['oriT'], label_orientation='vertical') # oriT
else:
track.genomic_features([feature], fc=colors['features']['misc'])

# plot GC content and GC skew
seq = str(seqid2seq[sector.name])
build_gc_content_skew(seq, colors, gc_content_track, gc_skew_track)

fig = circos.plotfig(dpi=600, figsize=(8,8))
build_legend(circos, colors)
fig = circos.plotfig(dpi=plot_dpi, figsize=(plot_size,plot_size))
build_legend(circos, colors, plot_size)
return fig


def build_features_type_cog(data, sequence_list, label, colors):
def build_features_type_cog(data, sequence_list, colors, plot_label, plot_size, plot_dpi):
# Get contig genome seqid & size, features dict
total_sequence_length = sum([len(seq['nt']) for seq in data['sequences']])
seqid2seq = {rec.id:rec.seq for rec in sequence_list}
seqid2size = {rec.id:len(rec.seq) for rec in sequence_list}
seqid2features = {rec.id:rec.features for rec in sequence_list}

circos = Circos(seqid2size, space=2)
circos.text(label, r=5, size=15)
circos.text(plot_label, r=5, size=15)
for sector in circos.sectors:
# build tracks
outer_track = sector.add_track((99.5, 100))
Expand All @@ -362,7 +378,7 @@ def build_features_type_cog(data, sequence_list, label, colors):
gc_skew_track = sector.add_track((45, 55))

# plot outer track
build_sequence_backbone_track(sector, outer_track, total_sequence_length, colors)
build_sequence_backbone_track(sector, outer_track, total_sequence_length, colors, plot_size)

# plot feature tracks
for feature in seqid2features[sector.name]:
Expand Down Expand Up @@ -404,12 +420,12 @@ def build_features_type_cog(data, sequence_list, label, colors):
seq = str(seqid2seq[sector.name])
build_gc_content_skew(seq, colors, gc_content_track, gc_skew_track)

fig = circos.plotfig(dpi=600, figsize=(8,8))
fig = circos.plotfig(dpi=plot_dpi, figsize=(plot_size,plot_size))
build_legend(circos, colors)
return fig


def build_sequence_backbone_track(sector, outer_track, total_sequence_length, colors):
def build_sequence_backbone_track(sector, outer_track, total_sequence_length, colors, plot_size):
outer_track.axis(fc=colors['backbone'])
label_formatter=lambda v: f'{v / 1000:.1f} kbp'
if total_sequence_length >= 1_000_000:
Expand All @@ -425,9 +441,17 @@ def build_sequence_backbone_track(sector, outer_track, total_sequence_length, co
elif total_sequence_length >= 1_000:
major_interval = 1_000
minor_interval = int(major_interval / 10)

if plot_size == 4:
text_size = 4
elif plot_size == 8:
text_size = 8
else:
text_size = 16

if sector.size > minor_interval:
outer_track.xticks_by_interval(major_interval, label_formatter=label_formatter)
outer_track.xticks_by_interval(minor_interval, tick_length=1, show_label=False)
outer_track.xticks_by_interval(major_interval, label_formatter=label_formatter, label_size=text_size)
outer_track.xticks_by_interval(minor_interval, tick_length=1, show_label=False, label_size=text_size)


def build_gc_content_skew(sequence, colors, gc_content_track, gc_skew_track):
Expand All @@ -447,25 +471,36 @@ def build_gc_content_skew(sequence, colors, gc_content_track, gc_skew_track):
gc_skew_track.fill_between(pos_list, negative_gc_skews, 0, vmin=-abs_max_gc_skew, vmax=abs_max_gc_skew, color=colors['gc-skew-negative'])


def build_legend(circos, colors):
def build_legend(circos, colors, plot_size):

if plot_size == 4:
text_size = 3
marker_size = 1
elif plot_size == 8:
text_size = 6
marker_size = 5
else:
text_size = 12
marker_size = 9

handles=[
Patch(color=colors['features'][bc.FEATURE_CDS], label='CDS'),
Patch(color=colors['features'][bc.FEATURE_T_RNA], label='tRNA'),
Patch(color=colors['features'][bc.FEATURE_R_RNA], label='rRNA'),
Patch(color=colors['features'][bc.FEATURE_NC_RNA], label='ncRNA'),
Patch(color=colors['features'][bc.FEATURE_NC_RNA_REGION], label='ncRNA reg'),
Patch(color=colors['features'][bc.FEATURE_CRISPR], label='CRISPR'),
Line2D([], [], color=colors['gc-positive'], label="+ GC", marker="^", ms=5, ls="None"),
Line2D([], [], color=colors['gc-negative'], label="- GC", marker="v", ms=5, ls="None"),
Line2D([], [], color=colors['gc-skew-positive'], label="+ GC Skew", marker="^", ms=5, ls="None"),
Line2D([], [], color=colors['gc-skew-negative'], label="- GC Skew", marker="v", ms=5, ls="None")
Line2D([], [], color=colors['gc-positive'], label="+ GC", marker="^", ms=marker_size, ls="None"),
Line2D([], [], color=colors['gc-negative'], label="- GC", marker="v", ms=marker_size, ls="None"),
Line2D([], [], color=colors['gc-skew-positive'], label="+ GC Skew", marker="^", ms=marker_size, ls="None"),
Line2D([], [], color=colors['gc-skew-negative'], label="- GC Skew", marker="v", ms=marker_size, ls="None")
]
_ = circos.ax.legend(
handles=handles,
bbox_to_anchor=(0.5, 0.4),
loc='center',
ncols=2,
fontsize=6
fontsize=text_size
)


Expand Down

0 comments on commit 2e6e428

Please sign in to comment.