Skip to content

reproduce ggphylo example using ggtree

Guangchuang Yu edited this page Sep 8, 2015 · 7 revisions

reproduce ggphylo example using ggtree

ggphylo example

ggphylo supports plotting a list of phylo objects.

require(ape)
require(ggphylo)

tree.list = rmtree(3, 20)
ggphylo(tree.list)

And plotting data along the tree:

n <- 40;  x <- rtree(n); n.nodes <- length(nodes(x))
bootstraps <- 100 - rexp(n.nodes, rate=5) * 100
pop.sizes <- pmax(0, rnorm(n.nodes, mean=50000, sd=50000))

draw_ggphylo <- function(x, bootstraps, pop.sizes) {
    for (i in nodes(x)) {
	x <- tree.set.tag(x, i, 'bootstrap', bootstraps[i])
	x <- tree.set.tag(x, i, 'pop.size', pop.sizes[i])
    }
    plot.args <- list(
        x,
        line.color.by='bootstrap',
        line.color.scale=scale_colour_gradient(limits=c(50, 100), low='red', high='black'), 
        node.size.by='pop.size',
        node.size.scale = scale_size_continuous(limits=c(0, 100000), 
            range=c(1, 5)), label.size=2
	)
    do.call(ggphylo, plot.args)
}

draw_ggphylo(x, bootstraps, pop.sizes)

reproduce using ggtree

In ggtree, multiPhylo object is supported.

require(ggplot2)
require(ggtree)

ggtree(tree.list, ladderize=F, aes(color=.id)) + facet_wrap(~.id) + 
    geom_point() + geom_text(subset=.(!isTip), aes(label=node), hjust=-.2, size=3) + 
        geom_tiplab(size=4, hjust=-.2) 

In the ggphylo example, they set attributes based on internal node number. In our opinion, this is not a perfect and flexible way, since most of the users have little idea of how the node numbers are assigned to the internal nodes and taxa.

In ggtree, we created %<+% operator to add related information. It requires the first column to be taxa label. This seems to be more easily used since users already have the taxon labels.

To reproduce this case, we can employ raxml class, which is designed to parse RAxML bootstrapping analysis output, to store the bootstraps and pop.sizes.

draw_ggphylo_ggtree <- function(x, bootstraps, pop.sizes) {
    xx <- new("raxml", phylo=x, bootstrap=data.frame(node=1:getNodeNum(x), 
                                    bootstrap=bootstraps, 
                                    pop.size=pop.sizes)
              )
    
    ggtree(xx, aes(color=bootstrap), ladderize=F, size=1.2) + 
        geom_point(aes(size=pop.size), subset=.(bootstrap>50), color="black") + 
            geom_text(aes(label=node), subset=.(!isTip), size=3, color="grey50", hjust=-.5) + 
                geom_tiplab(color="black", hjust=-.5, size=3) +           
                    scale_color_gradient(limits=c(50, 100), low="red", high="black")  + 
                        scale_size_continuous(limits=c(0, 100000), range=c(1, 5)) +
                            theme_tree2(legend.position="right") 
}

draw_ggphylo_ggtree(x, bootstraps, pop.sizes)

benchmark

require(microbenchmark)

bm <- microbenchmark(
    ggphylo = draw_ggphylo(x, bootstraps, pop.sizes),
    ggtree = draw_ggphylo_ggtree(x, bootstraps, pop.sizes),
    times=100L
    )
print(bm)
## Unit: milliseconds
##    expr        min         lq       mean     median         uq        max
## ggphylo 1230.81977 1266.51639 1283.54395 1284.05280 1294.22355 1467.62436
##  ggtree   30.40235   31.70079   33.48648   32.67046   34.05156   42.88493
## neval
##   100
##   100
qplot(y=time, data=bm, color=expr) + scale_y_log10() + ggtitle("run time comparison")