diff --git a/.gitignore b/.gitignore index f649fbe..916aced 100644 --- a/.gitignore +++ b/.gitignore @@ -6,5 +6,5 @@ .DS_Store data/ glittr_stats_files/ - +_freeze/ /.quarto/ diff --git a/_freeze/index/execute-results/html.json b/_freeze/index/execute-results/html.json deleted file mode 100644 index ef7774c..0000000 --- a/_freeze/index/execute-results/html.json +++ /dev/null @@ -1,16 +0,0 @@ -{ - "hash": "d580c88c9f70559d1e94573ba7c90ccb", - "result": { - "markdown": "---\ntitle: \"Glittr stats\"\nformat: \n html:\n code-fold: true\n---\n\n::: {.cell}\n\n```{.r .cell-code}\nlibrary(httr2)\nlibrary(ggplot2)\nlibrary(dplyr)\nlibrary(ggbreak)\nlibrary(cowplot)\n```\n:::\n\n\nCreate a file named `.env.sh` and add your GitHub PAT (variable named `PAT` ) and google api key (named `GOOGLE_API_KEY`) in there, e.g.:\n\n\n::: {.cell}\n\n```{.bash .cell-code}\n# this is an example, store it as .env.R:\nexport AT=\"ghp_aRSRESCTZII20Lklser3H\"\nexport GOOGLE_API_KEY=\"AjKSLE5SklxuRsxwPP8s0\"\n```\n:::\n\n\nNow source this file to get the keys as objects:\n\n\n::: {.cell}\n\n```{.bash .cell-code}\nsource .env\n```\n:::\n\n\nGet environment variables as R objects:\n\n\n::: {.cell}\n\n```{.r .cell-code}\npat <- Sys.getenv(\"PAT\")\ngoogle_api_key <- Sys.getenv(\"GOOGLE_API_KEY\")\n```\n:::\n\n\n## Set colors\n\nThese are the same colors as on glittr.org\n\n\n::: {.cell}\n\n```{.r .cell-code}\nglittr_cols <- c(\n \"Scripting and languages\" = \"#3a86ff\",\n \"Computational methods and pipelines\" = \"#fb5607\",\n \"Omics analysis\" = \"#ff006e\",\n \"Reproducibility and data management\" = \"#ffbe0b\",\n \"Statistics and machine learning\" = \"#8338ec\",\n \"Others\" = \"#000000\")\n```\n:::\n\n\n## Parse repository data\n\n\n::: {.cell}\n\n```{.r .cell-code}\n# get all repositories content as nested list\nparsed <- request(\"https://glittr.org/api/repositories\") |>\n req_perform() |> resp_body_json()\n\n# extract relevant items as dataframe\nrepo_info_list <- lapply(parsed$data, function(x) data.frame(\n repo = x$name,\n author_name = x$author$name,\n stargazers = x$stargazers,\n recency = x$days_since_last_push,\n url = x$url,\n license = ifelse(is.null(x$license), \"none\", x$license),\n main_tag = x$tags[[1]]$name,\n main_category = x$tags[[1]]$category\n))\n\nrepo_info <- do.call(rbind, repo_info_list)\n\n# create a column with provider (either github or gitlab)\nrepo_info$provider <- ifelse(grepl(\"github\", repo_info$url), \"github\", \"gitlab\")\n\n# create a factor for categories for sorting\nrepo_info$main_category <- factor(repo_info$main_category,\n levels = names(glittr_cols))\n\n# category table to keep order the same in the plots\ncat_table <- table(category = repo_info$main_category)\ncat_table <- sort(cat_table)\n```\n:::\n\n\nNumber of repositories: 568\n\n## Get contributors info\n\n\n::: {.cell}\n\n```{.r .cell-code}\n# take long time to run, so try to use cache results if no repos have been \n# added in the meantime\n\n# check if data/n_contributors.rds exists\nif(file.exists(\"data/n_contributors.rds\")) {\n n_contributors <- readRDS(\"data/n_contributors.rds\")\n}\n\n# get contributors info only from github repos\nrepo_info_gh <- repo_info[repo_info$provider == \"github\", ]\n\n# get contributor info from github api if update is needed\nif(!identical(sort(repo_info_gh$repo), sort(names(n_contributors)))) {\n dir.create(\"data\", showWarnings = FALSE)\n n_contributors <- sapply(repo_info_gh$repo, function(x) {\n \n # get repo contributors\n resp <- request(\"https://api.github.com/repos/\") |>\n req_url_path_append(x) |>\n req_url_path_append(\"contributors\") |>\n req_url_query(per_page = 1) |>\n req_headers(\n Accept = \"application/vnd.github+json\",\n Authorization = paste(\"Bearer\", pat),\n `X-GitHub-Api-Version` = \"2022-11-28\",\n ) |>\n req_perform() \n \n link_url <- resp_link_url(resp, \"last\")\n if(is.null(link_url)) {\n return(1)\n } else {\n npages <- strsplit(link_url, \"&page=\")[[1]][2] |> as.numeric()\n return(npages)\n }\n })\n \n # overwrite rds file\n saveRDS(n_contributors, \"data/n_contributors.rds\")\n}\n\nrepo_info_gh$contributors <- n_contributors[repo_info_gh$repo]\n```\n:::\n\n\n## Get country information\n\n\n::: {.cell}\n\n:::\n\n\n- Number of authors: 263\n- Number of countries: 25\n\n## Parse tag data\n\n\n::: {.cell}\n\n```{.r .cell-code}\nparsed <- request(\"https://glittr.org/api/tags\") |>\n req_perform() |> resp_body_json()\n\ntag_dfs <- list()\nfor(i in seq_along(parsed)) {\n category <- parsed[[i]]$category\n name <- sapply(parsed[[i]]$tags, function(x) x$name)\n repositories <- sapply(parsed[[i]]$tags, function(x) x$repositories)\n tag_dfs[[category]] <- data.frame(name, category, repositories)\n}\n\ntag_df <- do.call(rbind, tag_dfs) |> arrange(repositories)\n```\n:::\n\n\nNumber of tags/topics: 55\n\n## Categories\n\n\n::: {.cell}\n\n```{.r .cell-code}\ncat_count_plot <- table(category = repo_info$main_category) |>\n as.data.frame() |>\n ggplot(aes(x = reorder(category, Freq), y = Freq, fill = category)) +\n geom_bar(stat = \"identity\") +\n scale_fill_manual(values = glittr_cols) +\n coord_flip() +\n theme_classic() +\n ggtitle(\"Categories\") +\n theme(legend.position = \"none\",\n axis.title.y = element_blank()) +\n ylab(\"Number of repositories\")\n\nprint(cat_count_plot)\n```\n\n::: {.cell-output-display}\n![](index_files/figure-html/unnamed-chunk-10-1.png){width=672}\n:::\n:::\n\n::: {.cell}\n\n```{.r .cell-code}\ncategory_count <- table(category = repo_info$main_category) |> as.data.frame()\nknitr::kable(category_count)\n```\n\n::: {.cell-output-display}\n|category | Freq|\n|:-----------------------------------|----:|\n|Scripting and languages | 314|\n|Computational methods and pipelines | 54|\n|Omics analysis | 84|\n|Reproducibility and data management | 51|\n|Statistics and machine learning | 40|\n|Others | 23|\n:::\n:::\n\n\n## Licensing\n\n\n::: {.cell}\n\n```{.r .cell-code}\nlic_freq_data <- table(license = repo_info$license,\n main_category = repo_info$main_category) |>\n as.data.frame()\n\nlic_freq_data$main_category <- factor(lic_freq_data$main_category,\n levels = names(cat_table))\n\nlic_freq_plot <- lic_freq_data |>\n ggplot(aes(x = reorder(license, Freq), y = Freq, fill = main_category)) +\n geom_bar(stat = \"identity\") +\n coord_flip() +\n scale_fill_manual(values = glittr_cols) +\n theme_classic() +\n ggtitle(\"License type\") +\n ylab(\"Number of repositories\") +\n theme(legend.position = \"none\",\n axis.title.y = element_blank())\n\nprint(lic_freq_plot)\n```\n\n::: {.cell-output-display}\n![](index_files/figure-html/unnamed-chunk-12-1.png){width=672}\n:::\n:::\n\n::: {.cell}\n\n```{.r .cell-code}\nrepo_info$license |>\n table() |>\n as.data.frame() |>\n mutate(perc = round(Freq/nrow(repo_info)*100, 1)) |>\n arrange(desc(Freq)) |>\n knitr::kable()\n```\n\n::: {.cell-output-display}\n|Var1 | Freq| perc|\n|:------------|----:|----:|\n|other | 211| 37.3|\n|none | 172| 30.4|\n|mit | 59| 10.4|\n|cc-by-sa-4.0 | 30| 5.3|\n|gpl-3.0 | 26| 4.6|\n|cc-by-4.0 | 23| 4.1|\n|cc0-1.0 | 22| 3.9|\n|apache-2.0 | 10| 1.8|\n|bsd-3-clause | 7| 1.2|\n|agpl-3.0 | 2| 0.4|\n|artistic-2.0 | 2| 0.4|\n|unlicense | 1| 0.2|\n|wtfpl | 1| 0.2|\n:::\n:::\n\n\n## Authors\n\n\n::: {.cell}\n\n```{.r .cell-code}\nauthor_freq <- table(author_name = repo_info$author_name, \n main_category = repo_info$main_category) |>\n as.data.frame()\n\nauthor_freq$main_category <- factor(author_freq$main_category,\n levels = names(cat_table))\n\nrepos_per_author <- table(repo_info$author_name)\n\nlf_authors <- names(repos_per_author)[repos_per_author < 5]\n\nauthor_freq_plot <- author_freq |>\n filter(!author_name %in% lf_authors) |>\n arrange(Freq) |>\n ggplot(aes(x = reorder(author_name, Freq), y = Freq, fill = main_category)) +\n geom_bar(stat = \"identity\") +\n coord_flip() +\n ggtitle(\"Author or organization\") +\n ylab(\"Number of repositories\") +\n scale_fill_manual(values = glittr_cols) +\n annotate(geom = \"text\", x = 2, y = 30,\n label = paste(\"Authors with < 5 repos: \",\n length(lf_authors)),\n color=\"black\") +\n theme_classic() +\n theme(legend.position = \"none\",\n axis.title.y = element_blank())\n\nprint(author_freq_plot)\n```\n\n::: {.cell-output-display}\n![](index_files/figure-html/unnamed-chunk-14-1.png){width=672}\n:::\n:::\n\n\n## Tags\n\n\n::: {.cell}\n\n```{.r .cell-code}\ntag_freq_plot <- tag_df |>\n filter(repositories > 10) |>\n ggplot(aes(x = reorder(name, repositories),\n y = repositories, fill = category)) +\n geom_bar(stat = \"identity\") +\n coord_flip() +\n scale_fill_manual(values = glittr_cols) +\n ggtitle(\"Tags with > 10 repositories\") +\n ylab(\"Number of repositories\") +\n annotate(geom = \"text\", x = 2, y = 150,\n label = paste(\"Total number of tags: \",\n nrow(tag_df)),\n color=\"black\") +\n theme_classic() +\n theme(legend.position = \"none\",\n axis.title.y = element_blank())\n\nprint(tag_freq_plot)\n```\n\n::: {.cell-output-display}\n![](index_files/figure-html/unnamed-chunk-15-1.png){width=672}\n:::\n:::\n\n::: {.cell}\n\n```{.r .cell-code}\ntag_df |>\n filter(repositories > 10) |>\n arrange(desc(repositories)) |>\n knitr::kable(row.names = FALSE)\n```\n\n::: {.cell-output-display}\n|name |category | repositories|\n|:--------------------------|:-----------------------------------|------------:|\n|R |Scripting and languages | 230|\n|Python |Scripting and languages | 81|\n|Transcriptomics |Omics analysis | 74|\n|RNA-seq |Omics analysis | 65|\n|Statistics |Statistics and machine learning | 54|\n|Next generation sequencing |Omics analysis | 48|\n|Data science |Statistics and machine learning | 45|\n|Machine learning |Statistics and machine learning | 41|\n|Genomics |Omics analysis | 37|\n|Unix/Linux |Scripting and languages | 34|\n|Single-cell sequencing |Omics analysis | 34|\n|Data management |Reproducibility and data management | 34|\n|Reproducibility |Reproducibility and data management | 29|\n|FAIR data |Reproducibility and data management | 28|\n|General |Others | 28|\n|Data visualization |Scripting and languages | 27|\n|Variant analysis |Omics analysis | 23|\n|Version control |Scripting and languages | 21|\n|Containerization |Computational methods and pipelines | 18|\n|Workflows |Computational methods and pipelines | 17|\n|Shiny |Scripting and languages | 15|\n|Metagenomics |Omics analysis | 15|\n|Docker |Computational methods and pipelines | 13|\n|Julia |Scripting and languages | 12|\n|Nextflow |Computational methods and pipelines | 12|\n|ChIP-seq |Omics analysis | 11|\n:::\n:::\n\n\n## Contributors boxplot\n\n\n::: {.cell}\n\n```{.r .cell-code}\nrepo_info_gh$main_category <- factor(repo_info_gh$main_category,\n levels = names(cat_table))\n\ncontributors_plot <- repo_info_gh |>\n ggplot(aes(x = main_category, y = contributors, fill = main_category)) +\n geom_violin(scale = \"width\") +\n geom_boxplot(width = 0.1, col = \"darkgrey\") +\n coord_flip() +\n ggtitle(\"Contributors\") +\n ylab(\"Number of contributors\") +\n scale_y_sqrt() +\n scale_fill_manual(values = glittr_cols) +\n theme_bw() +\n theme(legend.position = \"none\",\n axis.title.y = element_blank(),\n plot.margin = margin(t = 5, r = 10, b = 5, l = 10))\n\nprint(contributors_plot)\n```\n\n::: {.cell-output-display}\n![](index_files/figure-html/unnamed-chunk-17-1.png){width=672}\n:::\n:::\n\n::: {.cell}\n\n```{.r .cell-code}\nnna_contr <- repo_info_gh$contributors\nparam1 <- sum(nna_contr > 10)/length(nna_contr)\n# 27.3% have more than 10 contributors\nparam2 <- sum(nna_contr > 1)/length(nna_contr)\n# 78.6% have more than one contributor\n# 115 repos with only one contributor\nparam3 <- sum(nna_contr <= 5)/length(nna_contr)\n```\n:::\n\n\n- More than 10 contributors: 25.6%\n- More than 1 contributor: 78.8%\n- Between 1 and 5 contributors: 59.9%\n\n## Countries\n\n\n::: {.cell}\n\n```{.r .cell-code}\ncountry_freq <- table(country = repo_info$country, \n main_category = repo_info$main_category) |>\n as.data.frame()\n\ncountry_freq$main_category <- factor(country_freq$main_category,\n levels = names(cat_table))\n\ncountry_freq_plot <- country_freq |>\n filter(country != \"undefined\") |>\n ggplot(aes(x = reorder(country, Freq), y = Freq, fill = main_category)) +\n geom_bar(stat = \"identity\") +\n coord_flip() +\n ggtitle(\"Country\") +\n ylab(\"Number of repositories\") +\n scale_fill_manual(values = glittr_cols) +\n annotate(geom = \"text\", x = 2, y = 70,\n label = paste(\"Repos with undefined country: \",\n sum(repo_info$country == \"undefined\")),\n color=\"black\") +\n theme_classic() +\n theme(legend.position = \"none\",\n axis.title.y = element_blank())\n\nprint(country_freq_plot)\n```\n\n::: {.cell-output-display}\n![](index_files/figure-html/unnamed-chunk-19-1.png){width=672}\n:::\n:::\n\n::: {.cell}\n\n```{.r .cell-code}\nrepo_info$country |> table() |> as.data.frame() |> arrange(desc(Freq)) |> knitr::kable()\n```\n\n::: {.cell-output-display}\n|Var1 | Freq|\n|:--------------|----:|\n|undefined | 244|\n|United States | 132|\n|Switzerland | 27|\n|Canada | 26|\n|Sweden | 21|\n|United Kingdom | 19|\n|Australia | 15|\n|Germany | 12|\n|France | 11|\n|Netherlands | 11|\n|Portugal | 11|\n|Belgium | 10|\n|Spain | 8|\n|Denmark | 4|\n|Italy | 3|\n|Bulgaria | 2|\n|Ireland | 2|\n|Argentina | 1|\n|China | 1|\n|Finland | 1|\n|India | 1|\n|Luxembourg | 1|\n|Norway | 1|\n|Poland | 1|\n|Ukraine | 1|\n:::\n:::\n\n\n\n## Summary plot\n\n\n::: {.cell}\n\n```{.r .cell-code}\npdf(\"grid_plot_fig1.pdf\", width = 10, height = 10)\nplot_grid(cat_count_plot, contributors_plot, \n tag_freq_plot, author_freq_plot, \n lic_freq_plot, country_freq_plot,\n ncol = 2, labels = LETTERS[1:6],\n rel_heights = c(2,3,3))\ndev.off()\n```\n\n::: {.cell-output .cell-output-stdout}\n```\nquartz_off_screen \n 2 \n```\n:::\n:::\n", - "supporting": [ - "index_files" - ], - "filters": [ - "rmarkdown/pagebreak.lua" - ], - "includes": {}, - "engineDependencies": {}, - "preserve": {}, - "postProcess": true - } -} \ No newline at end of file diff --git a/_freeze/index/figure-html/unnamed-chunk-10-1.png b/_freeze/index/figure-html/unnamed-chunk-10-1.png deleted file mode 100644 index 27db13c..0000000 Binary files a/_freeze/index/figure-html/unnamed-chunk-10-1.png and /dev/null differ diff --git a/_freeze/index/figure-html/unnamed-chunk-12-1.png b/_freeze/index/figure-html/unnamed-chunk-12-1.png deleted file mode 100644 index 2c02a48..0000000 Binary files a/_freeze/index/figure-html/unnamed-chunk-12-1.png and /dev/null differ diff --git a/_freeze/index/figure-html/unnamed-chunk-14-1.png b/_freeze/index/figure-html/unnamed-chunk-14-1.png deleted file mode 100644 index d1aea07..0000000 Binary files a/_freeze/index/figure-html/unnamed-chunk-14-1.png and /dev/null differ diff --git a/_freeze/index/figure-html/unnamed-chunk-15-1.png b/_freeze/index/figure-html/unnamed-chunk-15-1.png deleted file mode 100644 index 30dfe40..0000000 Binary files a/_freeze/index/figure-html/unnamed-chunk-15-1.png and /dev/null differ diff --git a/_freeze/index/figure-html/unnamed-chunk-17-1.png b/_freeze/index/figure-html/unnamed-chunk-17-1.png deleted file mode 100644 index 1178bb4..0000000 Binary files a/_freeze/index/figure-html/unnamed-chunk-17-1.png and /dev/null differ diff --git a/_freeze/index/figure-html/unnamed-chunk-19-1.png b/_freeze/index/figure-html/unnamed-chunk-19-1.png deleted file mode 100644 index e7f3201..0000000 Binary files a/_freeze/index/figure-html/unnamed-chunk-19-1.png and /dev/null differ diff --git a/_freeze/site_libs/clipboard/clipboard.min.js b/_freeze/site_libs/clipboard/clipboard.min.js deleted file mode 100644 index 1103f81..0000000 --- a/_freeze/site_libs/clipboard/clipboard.min.js +++ /dev/null @@ -1,7 +0,0 @@ -/*! - * clipboard.js v2.0.11 - * https://clipboardjs.com/ - * - * Licensed MIT © Zeno Rocha - */ -!function(t,e){"object"==typeof exports&&"object"==typeof module?module.exports=e():"function"==typeof define&&define.amd?define([],e):"object"==typeof exports?exports.ClipboardJS=e():t.ClipboardJS=e()}(this,function(){return n={686:function(t,e,n){"use strict";n.d(e,{default:function(){return b}});var e=n(279),i=n.n(e),e=n(370),u=n.n(e),e=n(817),r=n.n(e);function c(t){try{return document.execCommand(t)}catch(t){return}}var a=function(t){t=r()(t);return c("cut"),t};function o(t,e){var n,o,t=(n=t,o="rtl"===document.documentElement.getAttribute("dir"),(t=document.createElement("textarea")).style.fontSize="12pt",t.style.border="0",t.style.padding="0",t.style.margin="0",t.style.position="absolute",t.style[o?"right":"left"]="-9999px",o=window.pageYOffset||document.documentElement.scrollTop,t.style.top="".concat(o,"px"),t.setAttribute("readonly",""),t.value=n,t);return e.container.appendChild(t),e=r()(t),c("copy"),t.remove(),e}var f=function(t){var e=1