pb_download_url returns choice of browser or api download urls (#117)

Resolves #116. Adds handling so that pb_download_url can be more useful with private repos. Not 100% sure how the auth header would be passed in a cloud-native setup yet (might never be) but at least we can document ways to skip disk read now.
ropensci · Dec 29, 2023 · 077a649 · 077a649
1 parent 39c665b
commit 077a649
Show file tree

Hide file tree

Showing 7 changed files with 87 additions and 37 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,5 +1,5 @@
 Package: piggyback
-Version: 0.1.5.9003
+Version: 0.1.5.9004
 Title: Managing Larger Data on a GitHub Repository
 Description: Because larger (> 50 MB) data files cannot easily be committed to git,
   a different approach is required to manage data associated with an analysis in a 

diff --git a/NEWS.md b/NEWS.md
@@ -7,6 +7,7 @@ provides the code to create the release in the error body.
 before trying API download URLs. This should reduce/eliminate effect of API rate
 limits for pb_download. [#109]
 * `"latest"` release now aligns with GitHub's "latest" release definition [#113]
+* `pb_download_url()` now can return choice of "browser" or "api" download URLs [#116]
 
 # piggyback 0.1.5
 

diff --git a/R/pb_download.R b/R/pb_download.R
@@ -11,7 +11,7 @@
 #'
 #' @export
 #' @examples \donttest{
-#'  try({ # this try block is to avoid errors on CRAN, not needed for normal use
+#' \dontshow{try(\{}
 #'    ## Download a specific file.
 #'    ## (if dest is omitted, will write to current directory)
 #'    dest <- tempdir()
@@ -29,8 +29,8 @@
 #'      dest = dest
 #'    )
 #'    list.files(dest)
-#'  })
-#'  \dontshow{
+#' \dontshow{\})}
+#' \dontshow{
 #'    try(unlink(list.files(dest, full.names = TRUE)))
 #'  }
 #' }
@@ -96,11 +96,9 @@ pb_download <- function(file = NULL,
 
   resp <- lapply(seq_along(df$id), function(i)
     gh_download_asset(
-      download_url = df$browser_download_url[i],
+      browser_download_url = df$browser_download_url[i],
+      api_download_url = df$api_download_url[i],
       destfile = df$dest[i],
-      owner = df$owner[1],
-      repo = df$repo[1],
-      id = df$id[i],
       overwrite = overwrite,
       .token = .token,
       progress = progress
@@ -110,11 +108,9 @@ pb_download <- function(file = NULL,
 
 ## gh() fails on this, so we do with httr. See https://github.com/r-lib/gh/issues/57
 ## Consider option to suppress progress bar?
-gh_download_asset <- function(download_url,
+gh_download_asset <- function(browser_download_url,
                               destfile,
-                              owner,
-                              repo,
-                              id,
+                              api_download_url,
                               overwrite = TRUE,
                               .token = gh::gh_token(),
                               progress = httr::progress("down")) {
@@ -140,7 +136,7 @@ gh_download_asset <- function(download_url,
   # Attempt download via browser download URL to avoid ratelimiting
   resp <- httr::RETRY(
     verb = "GET",
-    url = download_url,
+    url = browser_download_url,
     httr::add_headers(Accept = "application/octet-stream"),
     auth_token,
     httr::write_disk(destfile, overwrite = overwrite),
@@ -151,11 +147,7 @@ gh_download_asset <- function(download_url,
   if (httr::http_error(resp)){
     resp <- httr::RETRY(
       verb = "GET",
-      url = paste0(
-        "https://",
-        "api.github.com/repos/", owner, "/",
-        repo, "/", "releases/assets/", id
-      ),
+      url = api_download_url,
       httr::add_headers(Accept = "application/octet-stream"),
       auth_token,
       httr::write_disk(destfile, overwrite = overwrite),

diff --git a/R/pb_download_url.R b/R/pb_download_url.R
@@ -1,25 +1,65 @@
 #' Get the download url of a given file
 #'
-#' Returns the URL download for a public file. This can be useful when writing
-#' scripts that may want to download the file directly without introducing any
-#' dependency on `piggyback` or authentication steps.
+#' Returns the URL download for a given file. This can be useful when using
+#' functions that are able to accept URLs.
+#'
+#' @param url_type choice: one of "browser" or "api" - default "browser" is a
+#' web-facing URL that is not subject to API ratelimits but does not work for
+#' private repositories. "api" URLs work for private repos, but require a GitHub
+#' token passed in an Authorization header (see examples)
 #' @inheritParams pb_download
 #' @return the URL to download a file
 #' @export
-#' @examples \dontrun{
+#' @examples \donttest{
+#' \dontshow{try(\{}
+#'
+#' # returns browser url by default (and all files if none are specified)
+#' browser_url <- pb_download_url(
+#'   repo = "tanho63/piggyback-tests",
+#'   tag = "v0.0.2"
+#'   )
+#' print(browser_url)
+#' utils::read.csv(browser_url[[1]])
+#'
+#' # can return api url if desired
+#' api_url <- pb_download_url(
+#'   "mtcars.csv",
+#'   repo = "tanho63/piggyback-tests",
+#'   tag = "v0.0.2"
+#'   )
+#' print(api_url)
 #'
-#' pb_download_url("iris.tsv.xz",
-#'                 repo = "cboettig/piggyback-tests",
-#'                 tag = "v0.0.1")
+#' # for public repositories, this will still work
+#' utils::read.csv(api_url)
 #'
+#' # for private repos, can use httr or curl to fetch and then pass into read function
+#' gh_pat <- Sys.getenv("GITHUB_PAT")
+#'
+#' if(!identical(gh_pat, "")){
+#'   resp <- httr::GET(api_url, httr::add_headers(Authorization = paste("Bearer", gh_pat)))
+#'   utils::read.csv(text = httr::content(resp, as = "text"))
+#' }
+#'
+#' # or use pb_read which bundles some of this for you
+#'
+#' \dontshow{\})}
 #' }
 pb_download_url <- function(file = NULL,
                             repo = guess_repo(),
                             tag = "latest",
+                            url_type = c("browser","api"),
                             .token = gh::gh_token()) {
+  url_type <- rlang::arg_match(url_type, values = c("browser","api"))
+
   df <- pb_info(repo, tag, .token)
 
-  if(is.null(file)) return(df$browser_download_url)
+  if(is.null(file)) {
+    switch(
+      url_type,
+      "browser" = return(df$browser_download_url),
+      "api" = return(df$api_download_url)
+    )
+  }
 
   if(any(!file %in% df$file_name)) {
 
@@ -32,5 +72,9 @@ pb_download_url <- function(file = NULL,
 
   if(length(file) == 0) return(cli::cli_abort("No download URLs to return."))
 
-  return(df[df$file_name %in% file,"browser_download_url"])
+  switch(
+    url_type,
+    "browser" = return(df$browser_download_url[df$file_name %in% file]),
+    "api" = return(df$api_download_url[df$file_name %in% file])
+  )
 }
diff --git a/R/pb_info.R b/R/pb_info.R
@@ -106,6 +106,9 @@ get_release_assets <- function(releases, r, .token) {
         repo = r[[2]],
         upload_url = releases$upload_url[i],
         browser_download_url = .extract_chr(a, "browser_download_url"),
+        api_download_url = glue::glue(
+          "https://api.github.com/repos/{r[[1]]}/{r[[2]]}/releases/assets/{.extract_int(a, 'id')}"
+        ),
         id = .extract_int(a, "id"),
         state = .extract_chr(a, "state"),
         stringsAsFactors = FALSE
@@ -143,6 +146,7 @@ pb_info <- function(repo = guess_repo(),
         repo = r[[2]],
         upload_url = "",
         browser_download_url = "",
+        api_download_url = "",
         id = "",
         state = "",
         stringsAsFactors = FALSE

diff --git a/man/pb_download.Rd b/man/pb_download.Rd
diff --git a/man/pb_download_url.Rd b/man/pb_download_url.Rd