diff --git a/Project.toml b/Project.toml
index 1c12e9d..ac48a5b 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,12 +1,13 @@
 name = "RDatasets"
 uuid = "ce6b1742-4840-55fa-b093-852dadbb1d8b"
-version = "0.7.7"
+version = "0.8.0"
 
 [deps]
 CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
 CodecZlib = "944b1d66-785c-5afd-91f1-9de20f533193"
 DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
 FileIO = "5789e2e9-d7fb-5bc7-8068-2c6fae9b9549"
+Markdown = "d6f4376e-aef5-505a-96c1-9c027394607a"
 Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
 RData = "df47a6cb-8c03-5eed-afd8-b6050d6c41da"
 Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
@@ -16,7 +17,7 @@ CSV = "0.5, 0.6, 0.7, 0.8, 0.9, 0.10"
 CodecZlib = "0.4, 0.5, 0.6, 0.7"
 DataFrames = "0.15, 0.16, 0.17, 0.18, 0.19, 0.20, 0.21, 0.22, 1"
 FileIO = "1"
-RData = "0.5, 0.6, 0.7, 0.8"
+RData = "0.5, 0.6, 0.7, 0.8, 1"
 Reexport = "0.2, 1.0"
 julia = "1"
 
diff --git a/README.md b/README.md
index ce0927b..fe345f3 100644
--- a/README.md
+++ b/README.md
@@ -5,15 +5,21 @@
 The RDatasets package provides an easy way for Julia users to experiment with most of the standard data sets that are available in the core of R as well as datasets included with many of R's most popular packages. This package is essentially a simplistic port of the Rdatasets repo created by Vincent Arelbundock, who conveniently gathered data sets from many of the standard R packages in one convenient location on GitHub at https://github.com/vincentarelbundock/Rdatasets
 
 In order to load one of the data sets included in the RDatasets package, you will need to have the `DataFrames` package installed. This package is automatically installed as a dependency of the `RDatasets` package if you install `RDatasets` as follows:
-
-    Pkg.add("RDatasets")
-
+```julia
+Pkg.add("RDatasets")
+```
 After installing the RDatasets package, you can then load data sets using the `dataset()` function, which takes the name of a package and a data set as arguments:
-
-    using RDatasets
-    iris = dataset("datasets", "iris")
-    neuro = dataset("boot", "neuro")
-
+```julia
+using RDatasets
+iris = dataset("datasets", "iris")
+neuro = dataset("boot", "neuro")
+```
+You can also get descriptions of the datasets by calling `RDatasets.description`:
+```julia
+RDatasets.description("datasets", "iris")
+# or
+RDatasets.description(iris) # only use this on DataFrames returned from `dataset`!
+```
 # Data Sets
 
 The `RDatasets.packages()` function returns a table of represented R packages:
@@ -74,6 +80,23 @@ mlmRev|guImmun|Immunization in Guatemala|2159|13
 mlmRev|guPrenat|Prenatal care in Guatemala|2449|15
 mlmRev|star|Student Teacher Achievement Ratio (STAR) project data|26796|18
 
+# How to add datasets from a new package
+
+**Step 1: add the data from the package**
+
+ 1. In your clone of this repo `mkdir -p data/$PKG`
+ 2. Go to CRAN
+ 3. Download the *source package*
+ 4. Extract one or more of the datasets in the `data` directory into the new directory
+
+**Step 2: add the metadata**
+
+Run the script:
+
+     $ scripts/update_doc_one.sh $PKG
+
+Now it's ready for you to submit your pull request.
+
 # Licensing and Intellectual Property
 
 Following Vincent's lead, we have assumed that all of the data sets in this repository can be made available under the GPL-3 license. If you know that one of the datasets released here should not be released publicly or if you know that a data set can only be released under a different license, please contact me so that I can remove the data set from this repository.
diff --git a/doc/datasets.csv b/doc/datasets.csv
index b8081be..8de3ed7 100644
--- a/doc/datasets.csv
+++ b/doc/datasets.csv
@@ -506,6 +506,36 @@
 "datasets","volcano","Topographic Information on Auckland's Maunga Whau Volcano",87,61
 "datasets","warpbreaks","The Number of Breaks in Yarn during Weaving",54,3
 "datasets","women","Average Heights and Weights for American Women",15,2
+"gamair","aral","aral",488,4
+"gamair","aral.bnd","aral.bnd",107,3
+"gamair","bird","bird",25100,7
+"gamair","blowfly","blowfly",180,3
+"gamair","bone","bone",23,4
+"gamair","brain","brain",1567,6
+"gamair","cairo","cairo",3780,7
+"gamair","chicago","chicago",5114,8
+"gamair","chl","chl",13840,7
+"gamair","co2s","co2s",507,4
+"gamair","coast","coast",2091,3
+"gamair","engine","engine",19,3
+"gamair","gas","gas",60,804
+"gamair","harrier","harrier",37,3
+"gamair","hubble","hubble",24,4
+"gamair","ipo","ipo",156,7
+"gamair","mack","mack",634,17
+"gamair","mackp","mackp",1162,9
+"gamair","med","med",1476,25
+"gamair","meh","meh",1476,24
+"gamair","mpg","mpg",205,27
+"gamair","prostate","prostate",654,530
+"gamair","sitka","sitka",1027,6
+"gamair","sole","sole",1575,8
+"gamair","sperm.comp1","sperm.comp1",15,5
+"gamair","sperm.comp2","sperm.comp2",24,11
+"gamair","stomata","stomata",24,4
+"gamair","swer","swer",2196,10
+"gamair","wesdr","wesdr",669,5
+"gamair","wine","wine",47,8
 "gap","PD","A study of Parkinson's disease and APOE, LRRK2, SNCA makers",825,22
 "gap","aldh2","ALDH2 markers and Alcoholism",263,18
 "gap","apoeapoc","APOE/APOC1 markers and Alzheimer's",353,8
@@ -732,33 +762,3 @@
 "vcd","VonBort","Von Bortkiewicz Horse Kicks Data",280,4
 "vcd","WeldonDice","Weldon's Dice Data",11,2
 "vcd","WomenQueue","Women in Queues",11,2
-"gamair","aral.bnd","aral.bnd",107,3
-"gamair","aral","aral",488,4
-"gamair","bird","bird",25100,7
-"gamair","blowfly","blowfly",180,3
-"gamair","bone","bone",23,4
-"gamair","brain","brain",1567,6
-"gamair","cairo","cairo",3780,7
-"gamair","chicago","chicago",5114,8
-"gamair","chl","chl",13840,7
-"gamair","co2s","co2s",507,4
-"gamair","coast","coast",2091,3
-"gamair","engine","engine",19,3
-"gamair","gas","gas",60,804
-"gamair","harrier","harrier",37,3
-"gamair","hubble","hubble",24,4
-"gamair","ipo","ipo",156,7
-"gamair","mack","mack",634,17
-"gamair","mackp","mackp",1162,9
-"gamair","med","med",1476,25
-"gamair","meh","meh",1476,24
-"gamair","mpg","mpg",205,27
-"gamair","prostate","prostate",654,530
-"gamair","sitka","sitka",1027,6
-"gamair","sole","sole",1575,8
-"gamair","sperm.comp1","sperm.comp1",15,5
-"gamair","sperm.comp2","sperm.comp2",24,11
-"gamair","stomata","stomata",24,4
-"gamair","swer","swer",2196,10
-"gamair","wesdr","wesdr",669,5
-"gamair","wine","wine",47,8
diff --git a/scripts/update_doc_all.sh b/scripts/update_doc_all.sh
new file mode 100755
index 0000000..0d8ff1a
--- /dev/null
+++ b/scripts/update_doc_all.sh
@@ -0,0 +1,4 @@
+R --no-save <<END
+source("src/update_doc.r")
+update_docs(".")
+END
diff --git a/scripts/update_doc_one.sh b/scripts/update_doc_one.sh
new file mode 100755
index 0000000..cbebb97
--- /dev/null
+++ b/scripts/update_doc_one.sh
@@ -0,0 +1,4 @@
+R --no-save <<END
+source("src/update_doc.r")
+update_package_doc(".", "$1")
+END
diff --git a/src/RDatasets.jl b/src/RDatasets.jl
index 69d94c2..6b8e133 100644
--- a/src/RDatasets.jl
+++ b/src/RDatasets.jl
@@ -3,6 +3,7 @@ module RDatasets
         @eval Base.Experimental.@optlevel 1
     end
 
+    import Markdown
     using Reexport, RData, CSV, CodecZlib
     @reexport using DataFrames
 
diff --git a/src/dataset.jl b/src/dataset.jl
index 494fe00..1b87ee3 100644
--- a/src/dataset.jl
+++ b/src/dataset.jl
@@ -6,19 +6,151 @@ const Dataset_typedetect_rows = Dict{Tuple{String, String}, Union{Vector,Dict}}(
 
 function dataset(package_name::AbstractString, dataset_name::AbstractString)
     basename = joinpath(@__DIR__, "..", "data", package_name)
-
+    # First, identify possible files
+    rdataname = joinpath(basename, string(dataset_name, ".RData"))
     rdaname = joinpath(basename, string(dataset_name, ".rda"))
-    if isfile(rdaname)
-        return load(rdaname)[dataset_name]
-    end
-
     csvname = joinpath(basename, string(dataset_name, ".csv.gz"))
-    if isfile(csvname)
-        return open(csvname,"r") do io
+    # Then, check to see which exists.  If none exist, error.
+    dataset = if isfile(rdataname)
+        load(rdataname)[dataset_name]
+    elseif isfile(rdaname)
+        load(rdaname)[dataset_name]
+    elseif isfile(csvname)
+        open(csvname,"r") do io
             uncompressed = IOBuffer(read(GzipDecompressorStream(io)))
             DataFrame(CSV.File(uncompressed, delim=',', quotechar='\"', missingstring="NA",
                       types=get(Dataset_typedetect_rows, (package_name, dataset_name), nothing)) )
         end
+    else
+        error("Unable to locate dataset file $rdaname or $csvname")
+    end
+    # Finally, inject metadata into the dataframe to indicate origin:
+    DataFrames.metadata!(dataset, "RDatasets.jl", (string(package_name), string(dataset_name)))
+    return dataset
+end
+
+
+"""
+    RDatasets.description(package_name::AbstractString, dataset_name::AbstractString)
+    RDatasets.description(df::DataFrame) # only call this on dataframes from RDatasets!
+
+Returns an `RDatasetDescription` object containing the description of the dataset.
+
+Invoke this function in exactly the same way you would invoke `dataset` to get the dataset itself.
+
+This object prints well in the REPL, and can also be shown as markdown or HTML.
+
+!!! note Unexported
+    This function is left deliberately unexported, since the name is pretty common.
+"""
+function description(package_name::AbstractString, dataset_name::AbstractString)
+    doc_html_file = joinpath(@__DIR__, "..", "doc", package_name, "$dataset_name.html")
+    if isfile(doc_html_file)
+        return RDatasetDescription(read(doc_html_file, String))
+    else
+        return RDatasetDescription("No description available.")
+    end
+end
+
+# This is a convenience function to get the description of a dataset from a DataFrame.
+# Since we set metadata on the DataFrame, we can use this to get the description,
+# if it exists.
+function description(df::AbstractDataFrame)
+    if "RDatasets.jl" in DataFrames.metadatakeys(df)
+        package_name, dataset_name = DataFrames.metadata(df, "RDatasets.jl")
+        return description(package_name, dataset_name)
+    else
+        @warn "No metadata indicating dataset origin found.  Returning default description."
+        return RDatasetDescription("No description available.")
+    end
+end
+
+"""
+    RDatasetDescription(content::String)
+
+A type to hold the content of a dataset description.
+
+The main purpose of its existence is to provide a way to display the content
+differently in HTML and markdown contexts.
+
+Invoked through [`RDatasets.description`](@ref).
+"""
+struct RDatasetDescription
+    content::String
+end
+
+function Base.show(io::IO, mime::MIME"text/plain", d::RDatasetDescription)
+    s = description_to_markdown(d.content)
+    # Here, we show a Markdown.jl object, which the REPL can render correctly
+    # as markdown, as it does in help-mode.
+    show(io, mime, Markdown.parse(s))
+end
+function Base.show(io::IO, mime::MIME"text/markdown", d::RDatasetDescription)
+    s = description_to_markdown(d.content)
+    # Here, we return a Markdown string directly.  This is useful for e.g. documentation, 
+    # where we want to render the markdown as HTML.
+    show(io, mime, s)
+end
+# This returns raw HTML documentation.
+function Base.show(io::IO, mime::MIME"text/html", d::RDatasetDescription)
+    show(io, mime, Docs.HTML(d.content))
+end
+
+
+"""
+    description_to_markdown(string::String)
+
+Converts an HTML string to markdown.  This function is written specifically 
+for HTML descriptions in RDatasets.jl, and so is a bit opinionated on what to 
+replace, etc.
+
+It replaces all known HTML tags using regex, and then removes all other HTML tags.
+
+## Behaviour
+
+Currently, it handles the following HTML tags:
+- `<h1>`, `<h2>`, `<h3>`, `<h4>`, `<h5>`, `<h6>` -> `#`, `##`, `###`, `####`, `#####`, `######`
+- `<title>` -> `#`
+- `<code>` -> `` `code` ``
+- `<pre>` -> "```R\\npre\\n```"
+- `<EM>` -> `*EM*`
+- `<B>` -> `**B**`
+- `&ndash;` -> `-`
+
+## TODOs
+
+- Tables
+- Links
+- Images
+"""
+function description_to_markdown(string)
+    html_header_regex = r"<h(?'hnum'\d)>(?'content'[^<]+)<\/h\g'hnum'>"
+    function regexmatch2md(matched_string)
+        m = match(html_header_regex, matched_string)
+        if isnothing(m.captures[1]) || isnothing(m.captures[2])
+            return matched_string
+        end
+
+        hnum = parse(Int, m[:hnum])
+        content = m[:content]
+
+        return join(("\n", "#"^hnum, " ", content, "\n\n"))
     end
-    error("Unable to locate dataset file $rdaname or $csvname")
+    title_matcher_regex = r"<title>(?'content'[^<]+)<\/title>"
+    code_matcher_regex = r"<code>(?'content'[^<]+)<\/code>"
+    pre_matcher_regex = r"<pre>(?'content'[^<]+)<\/pre>"
+    emph_matcher_regex = r"<(?i)EM(?-i)>(?'content'[^<]+)<\/(?i)EM(?-i)>"
+    b_matcher_regex = r"<(?i)B(?-i)>(?'content'[^<]+)<\/(?i)B(?-i)>"
+    new_string = replace(
+        string, 
+        html_header_regex => regexmatch2md, 
+        title_matcher_regex => titlestr -> "# " * match(title_matcher_regex, titlestr)[:content],
+        code_matcher_regex => codestr -> "`" * match(code_matcher_regex, codestr)[:content] * "`",
+        pre_matcher_regex => prestr -> "\n```R\n" * match(pre_matcher_regex, prestr)[:content] * "\n```\n",
+        emph_matcher_regex => emphstr -> "*" * match(emph_matcher_regex, emphstr)[:content] * "*",
+        b_matcher_regex => bstr -> "**" * match(b_matcher_regex, bstr)[:content] * "**",
+        "&ndash;" => "-",
+    )
+    nohtml = replace(new_string, Regex("<[^>]*>") => "")
+    return replace(nohtml, Regex("\n\n+") => "\n\n")
 end
diff --git a/src/update_doc.r b/src/update_doc.r
index 1ac023a..aecda68 100644
--- a/src/update_doc.r
+++ b/src/update_doc.r
@@ -1,3 +1,129 @@
+install_packages <- function(packages) {
+    # Pick a decent mirror if none set
+    r <- getOption("repos")
+    if (r["CRAN"] == "@CRAN@") {
+        r <- "http://cran.rstudio.com/"
+    }
+    suppressWarnings({install.packages(packages, repos = r)})
+}
+
+install_packages(c("R2HTML"))
+library(R2HTML)
+
+write_doc = function(package, dataset) {
+    help.ref <- try(help(eval(dataset), package=eval(package)), silent = TRUE)
+    doc <- try(utils:::.getHelpFile(help.ref), silent = TRUE)
+    try(dir.create(paste0('doc/', package)), silent = TRUE)
+    fn_doc <- paste0('doc/', package, '/', dataset, '.html')
+    tools::Rd2HTML(doc, out = fn_doc)
+}
+
+do_package_update <- function(data_dir, package_df, old_dataset_df, dataset_df, mismatched_dims_df, package) {
+    suppressWarnings({library(package, character.only = TRUE)})
+
+    # Get package description
+    pdesc <- packageDescription(package)
+    new_row <- as.data.frame(pdesc[c("Package", "Title")],
+                             stringsAsFactors = FALSE)
+    package_df <- rbind(package_df, new_row)
+
+    pdat <- data(package=package)$results
+
+    datasets <- dir(path = file.path(data_dir, package))
+
+    # Trim filenames to dataset names
+    r <- "(.+)\\.(csv\\.gz|rda|RData)$"
+    format_recognized <- grepl(r, datasets)
+    if (!(all(format_recognized))) {
+        stop("Unrecognized formats:\n",
+             cat(datasets[!format_recognized], sep = "\n"))
+    }
+    datanames <- sub(r, "\\1", datasets)
+
+    for (dataname in datanames) {
+        evaltext = paste0("data(", dataname, ", package=package)")
+        eval(parse(text = evaltext))
+        ds <- get(dataname)
+
+        write_doc(package, dataname)
+
+        # Get dataset description
+        title <- unique(pdat[, "Title"][pdat[, "Item"] == dataname])
+        if (length(title) != 1) {
+            stop(package, "/", dataname, " had ", length(title), " descriptions.")
+        }
+
+        # Old dims to fall back on
+        old_row = subset(old_dataset_df,
+                         Dataset == dataname & Package == package)
+        nr <- if (nrow(old_row)) old_row$Rows[[1]] else NA
+        nc <- if (nrow(old_row)) old_row$Columns[[1]] else NA
+
+        # Check against new dims when simple
+        new_nr <- NROW(ds)
+        new_nc <- NCOL(ds)
+        if (!(any(c("table", "ltraj") %in% class(ds))) &&
+            class(ds) != "list" &&
+            is.numeric(new_nr) &&
+            is.numeric(new_nc)) {
+
+            expected_cols <- c(nc, nc - 1) # row.names sometimes included
+            if (!is.numeric(nr) || !is.numeric(nc)) {
+                nr <- new_nr
+                nc <- new_nc
+            } else if (new_nr != nr || !(new_nc %in% expected_cols)) {
+
+                new_row <- data.frame(Package = package,
+                                      Dataset = dataname,
+                                      Class = class(ds),
+                                      OldRows = nr,
+                                      OldColumns = nc,
+                                      NewRows = new_nr,
+                                      NewColumns = new_nc)
+
+                mismatched_dims_df <- rbind(mismatched_dims_df, new_row)
+
+            }
+        }
+
+        new_row <- data.frame(Package = package,
+                              Dataset = dataname,
+                              Title = title,
+                              Rows = nr,
+                              Columns = nc,
+                              stringsAsFactors = FALSE)
+
+        dataset_df <- rbind(dataset_df, new_row)
+    }
+    return(list(package_df = package_df, dataset_df = dataset_df, mismatched_dims_df = mismatched_dims_df))
+}
+
+update_package_doc <- function(pkg_dir, package) {
+    data_dir <- file.path(pkg_dir, "data")
+    doc_dir <- file.path(pkg_dir, "doc")
+
+    package_fn <- file.path(doc_dir, "packages.csv")
+    dataset_fn <- file.path(doc_dir, "datasets.csv")
+
+    package_df <- read.csv(package_fn)
+    dataset_df <- read.csv(dataset_fn)
+
+    install_packages(c(package))
+
+    mismatched_dims_df <- data.frame()
+    dfs <- do_package_update(data_dir, package_df, dataset_df, dataset_df, mismatched_dims_df, package)
+    package_df <- dfs$package_df
+    dataset_df <- dfs$dataset_df
+    mismatched_dims_df <- dfs$mismatched_dims_df
+
+    package_df <- sort_upper_first(clean(package_df), c("Package"))
+    dataset_df <- sort_upper_first(clean(dataset_df), c("Package", "Dataset"))
+
+    write(package_df, package_fn)
+    write(dataset_df, dataset_fn)
+    return(mismatched_dims_df)
+}
+
 update_docs <- function(pkg_dir) {
     data_dir <- file.path(pkg_dir, "data")
     doc_dir <- file.path(pkg_dir, "doc")
@@ -16,90 +142,11 @@ update_docs <- function(pkg_dir) {
     # Install any missing packages
     new_packages <- packages[!(packages %in% installed.packages()[, "Package"])]
     if (length(new_packages)) {
-        # Pick a decent mirror if none set
-        r <- getOption("repos")
-        if (r["CRAN"] == "@CRAN@") {
-            r <- "http://cran.rstudio.com/"
-        }
-        suppressWarnings({install.packages(new_packages, repos = r)})
+        install_packages(new_packages)
     }
 
     for (package in packages) {
-        suppressWarnings({library(package, character.only = TRUE)})
-
-        # Get package description
-        pdesc <- packageDescription(package)
-        new_row <- as.data.frame(pdesc[c("Package", "Title")],
-                                 stringsAsFactors = FALSE)
-        package_df <- rbind(package_df, new_row)
-
-        pdat <- data(package=package)$results
-
-        datasets <- dir(path = file.path(data_dir, package))
-
-        # Trim filenames to dataset names
-        r <- "(.+)\\.(csv\\.gz|rda)$"
-        format_recognized <- grepl(r, datasets)
-        if (!(all(format_recognized))) {
-            stop("Unrecognized formats:\n",
-                 cat(datasets[!format_recognized], sep = "\n"))
-        }
-        datanames <- sub(r, "\\1", datasets)
-
-        for (dataname in datanames) {
-            eval(parse(text = paste0("data(", dataname, ", package=package)")))
-            ds <- get(dataname)
-
-            # TODO: Write rst and html doc per dataset
-
-            # Get dataset description
-            title <- unique(pdat[, "Title"][pdat[, "Item"] == dataname])
-            if (length(title) != 1) {
-                stop(package, "/", title, " had ", length(title), " descriptions.")
-            }
-
-            # Old dims to fall back on
-            old_row = subset(old_dataset_df,
-                             Dataset == dataname & Package == package)
-            nr <- if (nrow(old_row)) old_row$Rows[[1]] else NA
-            nc <- if (nrow(old_row)) old_row$Columns[[1]] else NA
-
-            # Check against new dims when simple
-            new_nr <- NROW(ds)
-            new_nc <- NCOL(ds)
-            if (!(any(c("table", "ltraj") %in% class(ds))) &&
-                class(ds) != "list" &&
-                is.numeric(new_nr) &&
-                is.numeric(new_nc)) {
-
-                expected_cols <- c(nc, nc - 1) # row.names sometimes included
-                if (!is.numeric(nr) || !is.numeric(nc)) {
-                    nr <- new_nr
-                    nc <- new_nc
-                } else if (new_nr != nr || !(new_nc %in% expected_cols)) {
-
-                    new_row <- data.frame(Package = package,
-                                          Dataset = dataname,
-                                          Class = class(ds),
-                                          OldRows = nr,
-                                          OldColumns = nc,
-                                          NewRows = new_nr,
-                                          NewColumns = new_nc)
-
-                    mismatched_dims_df <- rbind(mismatched_dims_df, new_row)
-
-                }
-            }
-
-            new_row <- data.frame(Package = package,
-                                  Dataset = dataname,
-                                  Title = title,
-                                  Rows = nr,
-                                  Columns = nc,
-                                  stringsAsFactors = FALSE)
-
-            dataset_df <- rbind(dataset_df, new_row)
-        }
+        do_package_update(data_dir, package_df, old_dataset_df, dataset_df, mismatched_dims_df, package)
     }
 
     stopifnot(nrow(dataset_df) > 0)
@@ -114,7 +161,7 @@ update_docs <- function(pkg_dir) {
 }
 
 write <- function(df, fn) {
-    write.table(df, file = fn, sep = ",", qmethod = "escape", row.names = FALSE)
+    write.table(df, file = fn, sep = ",", qmethod = "double", row.names = FALSE)
 }
 
 clean <- function(df) {