Skip to content

Commit

Permalink
pb_download_url returns choice of browser or api download urls (#117)
Browse files Browse the repository at this point in the history
Resolves #116.

Adds handling so that pb_download_url can be more useful with private
repos. Not 100% sure how the auth header would be passed in a
cloud-native setup yet (might never be) but at least we can document
ways to skip disk read now.
  • Loading branch information
tanho63 authored Dec 29, 2023
1 parent 39c665b commit 077a649
Show file tree
Hide file tree
Showing 7 changed files with 87 additions and 37 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
Package: piggyback
Version: 0.1.5.9003
Version: 0.1.5.9004
Title: Managing Larger Data on a GitHub Repository
Description: Because larger (> 50 MB) data files cannot easily be committed to git,
a different approach is required to manage data associated with an analysis in a
Expand Down
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ provides the code to create the release in the error body.
before trying API download URLs. This should reduce/eliminate effect of API rate
limits for pb_download. [#109]
* `"latest"` release now aligns with GitHub's "latest" release definition [#113]
* `pb_download_url()` now can return choice of "browser" or "api" download URLs [#116]

# piggyback 0.1.5

Expand Down
26 changes: 9 additions & 17 deletions R/pb_download.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
#'
#' @export
#' @examples \donttest{
#' try({ # this try block is to avoid errors on CRAN, not needed for normal use
#' \dontshow{try(\{}
#' ## Download a specific file.
#' ## (if dest is omitted, will write to current directory)
#' dest <- tempdir()
Expand All @@ -29,8 +29,8 @@
#' dest = dest
#' )
#' list.files(dest)
#' })
#' \dontshow{
#' \dontshow{\})}
#' \dontshow{
#' try(unlink(list.files(dest, full.names = TRUE)))
#' }
#' }
Expand Down Expand Up @@ -96,11 +96,9 @@ pb_download <- function(file = NULL,

resp <- lapply(seq_along(df$id), function(i)
gh_download_asset(
download_url = df$browser_download_url[i],
browser_download_url = df$browser_download_url[i],
api_download_url = df$api_download_url[i],
destfile = df$dest[i],
owner = df$owner[1],
repo = df$repo[1],
id = df$id[i],
overwrite = overwrite,
.token = .token,
progress = progress
Expand All @@ -110,11 +108,9 @@ pb_download <- function(file = NULL,

## gh() fails on this, so we do with httr. See https://github.com/r-lib/gh/issues/57
## Consider option to suppress progress bar?
gh_download_asset <- function(download_url,
gh_download_asset <- function(browser_download_url,
destfile,
owner,
repo,
id,
api_download_url,
overwrite = TRUE,
.token = gh::gh_token(),
progress = httr::progress("down")) {
Expand All @@ -140,7 +136,7 @@ gh_download_asset <- function(download_url,
# Attempt download via browser download URL to avoid ratelimiting
resp <- httr::RETRY(
verb = "GET",
url = download_url,
url = browser_download_url,
httr::add_headers(Accept = "application/octet-stream"),
auth_token,
httr::write_disk(destfile, overwrite = overwrite),
Expand All @@ -151,11 +147,7 @@ gh_download_asset <- function(download_url,
if (httr::http_error(resp)){
resp <- httr::RETRY(
verb = "GET",
url = paste0(
"https://",
"api.github.com/repos/", owner, "/",
repo, "/", "releases/assets/", id
),
url = api_download_url,
httr::add_headers(Accept = "application/octet-stream"),
auth_token,
httr::write_disk(destfile, overwrite = overwrite),
Expand Down
62 changes: 53 additions & 9 deletions R/pb_download_url.R
Original file line number Diff line number Diff line change
@@ -1,25 +1,65 @@
#' Get the download url of a given file
#'
#' Returns the URL download for a public file. This can be useful when writing
#' scripts that may want to download the file directly without introducing any
#' dependency on `piggyback` or authentication steps.
#' Returns the URL download for a given file. This can be useful when using
#' functions that are able to accept URLs.
#'
#' @param url_type choice: one of "browser" or "api" - default "browser" is a
#' web-facing URL that is not subject to API ratelimits but does not work for
#' private repositories. "api" URLs work for private repos, but require a GitHub
#' token passed in an Authorization header (see examples)
#' @inheritParams pb_download
#' @return the URL to download a file
#' @export
#' @examples \dontrun{
#' @examples \donttest{
#' \dontshow{try(\{}
#'
#' # returns browser url by default (and all files if none are specified)
#' browser_url <- pb_download_url(
#' repo = "tanho63/piggyback-tests",
#' tag = "v0.0.2"
#' )
#' print(browser_url)
#' utils::read.csv(browser_url[[1]])
#'
#' # can return api url if desired
#' api_url <- pb_download_url(
#' "mtcars.csv",
#' repo = "tanho63/piggyback-tests",
#' tag = "v0.0.2"
#' )
#' print(api_url)
#'
#' pb_download_url("iris.tsv.xz",
#' repo = "cboettig/piggyback-tests",
#' tag = "v0.0.1")
#' # for public repositories, this will still work
#' utils::read.csv(api_url)
#'
#' # for private repos, can use httr or curl to fetch and then pass into read function
#' gh_pat <- Sys.getenv("GITHUB_PAT")
#'
#' if(!identical(gh_pat, "")){
#' resp <- httr::GET(api_url, httr::add_headers(Authorization = paste("Bearer", gh_pat)))
#' utils::read.csv(text = httr::content(resp, as = "text"))
#' }
#'
#' # or use pb_read which bundles some of this for you
#'
#' \dontshow{\})}
#' }
pb_download_url <- function(file = NULL,
repo = guess_repo(),
tag = "latest",
url_type = c("browser","api"),
.token = gh::gh_token()) {
url_type <- rlang::arg_match(url_type, values = c("browser","api"))

df <- pb_info(repo, tag, .token)

if(is.null(file)) return(df$browser_download_url)
if(is.null(file)) {
switch(
url_type,
"browser" = return(df$browser_download_url),
"api" = return(df$api_download_url)
)
}

if(any(!file %in% df$file_name)) {

Expand All @@ -32,5 +72,9 @@ pb_download_url <- function(file = NULL,

if(length(file) == 0) return(cli::cli_abort("No download URLs to return."))

return(df[df$file_name %in% file,"browser_download_url"])
switch(
url_type,
"browser" = return(df$browser_download_url[df$file_name %in% file]),
"api" = return(df$api_download_url[df$file_name %in% file])
)
}
4 changes: 4 additions & 0 deletions R/pb_info.R
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,9 @@ get_release_assets <- function(releases, r, .token) {
repo = r[[2]],
upload_url = releases$upload_url[i],
browser_download_url = .extract_chr(a, "browser_download_url"),
api_download_url = glue::glue(
"https://api.github.com/repos/{r[[1]]}/{r[[2]]}/releases/assets/{.extract_int(a, 'id')}"
),
id = .extract_int(a, "id"),
state = .extract_chr(a, "state"),
stringsAsFactors = FALSE
Expand Down Expand Up @@ -143,6 +146,7 @@ pb_info <- function(repo = guess_repo(),
repo = r[[2]],
upload_url = "",
browser_download_url = "",
api_download_url = "",
id = "",
state = "",
stringsAsFactors = FALSE
Expand Down
6 changes: 3 additions & 3 deletions man/pb_download.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

23 changes: 16 additions & 7 deletions man/pb_download_url.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 077a649

Please sign in to comment.