Skip to content

Latest commit

 

History

History
73 lines (51 loc) · 3.5 KB

README.md

File metadata and controls

73 lines (51 loc) · 3.5 KB

Volcano Plot in Python

Inspired by R package kevinblighe/EnhancedVolcano

image

Input: Pandas df

image

import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
from adjustText import adjust_text

def volcanoplot(res=rank, pval_cutoff=0.95, pval_colour_threshold=0.05, log2FC_colour_threshold=1, pval_label_cutoff=0.05, log2FC_label_cutoff=0.5, dotsize=4, title='Volcano Plot'):

    res=rank # Pandas df
    pval_cutoff=pval_cutoff # exclude all genes > pval_cutoff, as these swamp the plot
    pval_colour_threshold=pval_colour_threshold # threshold for colouring dots
    log2FC_colour_threshold=log2FC_colour_threshold # threshold for colouring dots
    pval_label_cutoff=pval_label_cutoff  # cutoff for dot labels
    log2FC_label_cutoff=log2FC_label_cutoff  # cutoff for dot labels
    dotsize=dotsize
    title=title
    
    toplot = res[res.pvals_adj <= pval_cutoff]

    # plot
    # plot non-significant genes with log2FC < log2FC_colour_threshold
    plt.plot(toplot[(toplot.pvals_adj > pval_colour_threshold) & (toplot['log2FC'].abs()<log2FC_colour_threshold)].log2FC,
             toplot[(toplot.pvals_adj > pval_colour_threshold) & (toplot['log2FC'].abs()<log2FC_colour_threshold)].nlog10_pval_adj, 'o',
             color='#808080', alpha=.6, ms=dotsize, label='NS & log2FC < '+str(log2FC_colour_threshold)) # green

    # plot non-significant genes with log2FC >= log2FC_colour_threshold
    plt.plot(toplot[(toplot.pvals_adj > pval_colour_threshold) & (toplot['log2FC'].abs()>=log2FC_colour_threshold)].log2FC,
             toplot[(toplot.pvals_adj > pval_colour_threshold) & (toplot['log2FC'].abs()>=log2FC_colour_threshold)].nlog10_pval_adj, 'o',
             color='#1a9641', alpha=.6, ms=dotsize, label='NS & log2FC >= '+str(log2FC_colour_threshold)) # grey 

    # plot significant genes with log2FC < log2FC_colour_threshold
    plt.plot(toplot[(toplot.pvals_adj<=pval_colour_threshold) & (toplot['log2FC'].abs()<log2FC_colour_threshold)].log2FC,
             toplot[(toplot.pvals_adj<=pval_colour_threshold) & (toplot['log2FC'].abs()<log2FC_colour_threshold)].nlog10_pval_adj, 'o',
             color='#6495ED', alpha=.6, ms=dotsize, label='Sign. & log2FC < '+str(log2FC_colour_threshold)) # blue

    # plot significant genes with log2FC >= log2FC_colour_threshold
    plt.plot(toplot[(toplot.pvals_adj<=pval_colour_threshold) & (toplot['log2FC'].abs()>=log2FC_colour_threshold)].log2FC,
             toplot[(toplot.pvals_adj<=pval_colour_threshold) & (toplot['log2FC'].abs()>=log2FC_colour_threshold)].nlog10_pval_adj, 'o',
             color='#FF3131', alpha=.6, ms=dotsize, label='Sign. & log2FC >= '+str(log2FC_colour_threshold)) # red

    # axis labels etc
    plt.xlabel('log2FC')
    plt.ylabel('-log10(p)')
    plt.title(title)
    plt.legend(frameon=True, fontsize=12)

    # dot labels
    main_x = toplot[(toplot.pvals_adj<=pval_label_cutoff) & (toplot['log2FC'].abs()>=log2FC_label_cutoff)].log2FC
    main_y = toplot[(toplot.pvals_adj<=pval_label_cutoff) & (toplot['log2FC'].abs()>=log2FC_label_cutoff)].nlog10_pval_adj

    texts = []
    for x, y, s in zip(main_x, main_y, list(main_x.index)):
        texts.append(plt.text(x, y, s))

    adjust_text(texts,force_text=(0.1,0.1),arrowprops=dict(arrowstyle="-",lw=1))
    
    return(plt)