From e231ebb385f7672c3c1899e00732fc998745e985 Mon Sep 17 00:00:00 2001 From: Erwan Date: Fri, 3 May 2024 03:28:31 +0200 Subject: [PATCH] fix: jekyll-offline works again, no issue with URI:Module and the code is simplified --- .gitignore | 1 + README.md | 72 +++++++++++++++++++++++++++----------------- config.yml | 13 ++------ demo.yml | 12 ++------ html_to_offline.rb | 66 ++++++++++++++++++++++++++++++++++++++++ jekyll_offline.rb | 75 ---------------------------------------------- lib_rellinks.rb | 66 ---------------------------------------- main.rb | 44 +++++++++++++++++++++++++++ 8 files changed, 161 insertions(+), 188 deletions(-) create mode 100644 .gitignore create mode 100644 html_to_offline.rb delete mode 100755 jekyll_offline.rb delete mode 100644 lib_rellinks.rb create mode 100755 main.rb diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..7021c84 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +demo_offline \ No newline at end of file diff --git a/README.md b/README.md index a7ca94d..0054103 100644 --- a/README.md +++ b/README.md @@ -1,41 +1,67 @@ # Jekyll Offline - Turn any Jekyll site into an offline application with relative links -[Jekyll Offline](https://dohliam.github.io/jekyll-offline) creates a copy of a specified Jekyll-based website and rewrites all of the internal links as relative URLs so that the site can be viewed from a local machine without requiring access to the Internet. +### Personal note +This repository is a fork waiting to (maybe) be merged to the first repository [Jekyll Offline](https://dohliam.github.io/jekyll-offline) created. -Usually, Jekyll sites can be viewed either by uploading the generated site files to a remote server or locally using the `jekyll serve` command, which hosts the site temporarily on a local server. However, it is not always possible or practical to do this (for example, on a phone or other mobile device). As static sites, Jekyll sites are well-suited to running offline and usually present no special challenges (with the exception of some [known issues](#issues)) other than that all of the resources and links on a typical web page are relative to the root of the server, rather than the user's computer file structure. +It fixes the undefined method `encode' for URI:Module from the other repository (it was deprecated). -So long as the Jekyll-generated site code is available, the Jekyll executable itself is not even required for this script to work. +The code is a bit simplified so it should be easier to maintain, but I could have broken things, feel free to add issues and pull requests if you want to correct problems. + +Also I didn't understand every argument of the original config file, so maybe there are less features. + +## Jekyll Offline +Creates a copy of a specified Jekyll-based website and rewrites all internal links as relative URLs so that the site can be viewed from a local machine without the need for Internet access. + +All we need is the `_site` folder generated by Jekyll during a build for this script to work. + +Typically, Jekyll sites can be viewed either by uploading the generated site files to a remote server or locally using the `jekyll serve` command, which temporarily hosts the site on a local server. + +As Jekyll is a static site generator, page references can easily be replaced by local references. This ensures that local pages are always linked offline. + +With Jekyll Offline, we modify all resources and links according to the url of the Jekyll-based website, so external links (http requests not linked to the original website or e-mails) keep working. ## Requirements -The script (`jekyll_offline.rb`) can be used directly and does not need to be installed. There are no prerequisites other than [Ruby](https://www.ruby-lang.org/). +The script (`main.rb`) can be used directly and does not need to be installed. There are no prerequisites other than [Ruby](https://www.ruby-lang.org/). If you have the source code for a Jekyll site that has not been generated yet, you will need to [install Jekyll](https://jekyllrb.com/) first and then build the site: +``` cd my_jekyll_site/ jekyll build +``` -You can then point your Jekyll Offline configuration file at the resulting `_site` folder to convert it to a fully functoning offline site (for details, see the [Usage](#usage) section below). +That's it ! ## Usage +### Demo Clone or download the repository and enter the following command in a terminal from within the repo main directory: - ./jekyll_offline.rb demo.yml + `./main.rb demo.yml` + +You just created a new folder : `demo_offline` in the same directory. -This will create a new folder named `demo_offline` in the same directory. This folder contains the offline version of the default Jekyll demo site. You can visit it by opening the file `START_HERE.html` within the `demo_offline` folder in a web browser. +This folder has the same structure as your source directory and is an offline version of the default Jekyll demo site. -To create your own new offline site, simply adjust the variables in the `config.yml` file and run the script again: +### - ./jekyll_offline +To create your own new offline site, add your variables in the `config.yml`: + - `source` : the path to the _site + - `target` : you will create your site offline version inside this directory + - `site_url` : url of the original site (it doesn't have to be up, we only remove this url inside the hrefs of `` html elements) -The configuration file is assumed to be `config.yml` by default, so it does not need to be specified unless you are using a different file. +Now you can run the script : + `./main` -Note that the `config.yml` file should point to the generated `_site` folder of a Jekyll website, and not the unprocessed Jekyll source code. +The configuration file is`config.yml` by default, you don't need to specify it unless you use another file. + +**Warning:** the `source` attribute path has to be the generated `_site` folder and not the unprocessed Jekyll source code. +If you don't have it yet, simply use `jekyll build` at the root of your directory. ## Library -The methods in the `lib_rellinks.rb` library may be useful for relativizing links more generally in HTML pages other than Jekyll sites. +The methods in the `html_to_offline.rb` library may be useful for relativizing links more generally in HTML pages other than Jekyll sites. It is extremely useful to have an offline version of a website that can work without an Internet connection or a local server, so it was quite surprising to find that a library to do this did not already exist. @@ -43,22 +69,7 @@ This script has been used to create fully-functional offline versions of the [Gl ## Issues -There are some unexpected challenges with `file://` URIs that make them different from URIs loaded from a server (even one running on localhost). - -* So-called "clean URLs" (where e.g., pages named `index.html` can be accessed by following a link to the parent directory without needing to add `index.html` at the end) are a feature of the webserver, and thus do not work with `file://` URIs. - * For example, if you have a file located at `/blog/index.html` and you link to it using `/blog`, it will work fine on a webserver, but on a local filesystem you will be taken to an index page listing all the files in the directory instead. - * Jekyll Offline handles this by rewriting these links so that they point to the actual file (e.g., `index.html`) instead of the parent directory. -* To be truly portable, all local links must be relative to some arbitrary "root" level that represents the top level of the site - * This means that the distance between each link on the site and the root directory must be calculated independently, which is what Jekyll Offline does. - -When crawling an online website, some of these issues can be resolved using a tool like Wget with option `--convert-links` for example. However, Jekyll Offline has been designed for cases where the entire website is already available locally, and simply needs to be adjusted slightly to run offline. - -There are also some known issues remaining to be resolved: - -* Due to [this 7 year-old unresolved bug](https://bugzilla.mozilla.org/show_bug.cgi?id=760436) in Firefox, local fonts will not load on a page unless they are placed in the same directory as the page. This can be quite problematic for sites with multiple pages, however as noted in the linked bug report, these sites should still work fine in other browsers. - * Note that this also means that Font Awesome and similar icon fonts will not work properly in Firefox. One way to work around this is to extract the icons you need and embed them into each page. This is of course impractical for large fonts. - * As suggested in the bug report, it may be possible to resolve this by setting `security.fileuri.strict_origin_policy` to `false` in `about:config`. -* There is currently an issue with converting sites in different locations than the script itself. While this is being resolved, it is recommended to place the `_site` folder and corresponding YAML configuration file in the same folder as (or a subfolder of) the `jekyll_offline.rb` script itself. +A lot of code has changed, so maybe the previous issues don't apply anymore. If you encounter an issue, create an issue and maybe someone will resolve it ! ## Contributing @@ -67,3 +78,8 @@ If you encounter any problems while converting a Jekyll site, please open an iss ## License MIT. + +### Personal note: +I'm novice so I don't know what is allowed. + +Based on [This article](https://www.gnu.org/philosophy/open-source-misses-the-point.en.html) I would like to license this fork under GPLv3 if it's possible. \ No newline at end of file diff --git a/config.yml b/config.yml index b5ff825..556c32b 100644 --- a/config.yml +++ b/config.yml @@ -1,12 +1,5 @@ # Default configuration for offline jekyll site generation -:absolute_base: "https://address.of.site.com/" # The full URL of the site as it might appear in any absolute links on the website. (these will be converted to relative links) -:relative_base: "/blog" # Optional - use this if the original site is located in a subdirectory -:source_dir: "~/Downloads/spiffy_website/_site/" # This should point to the _site folder of the generated Jekyll website -:out_dir: "~/my_website_offline/" # The desired output directory where you would like to generate the offline version of the site -# :custom_filter: "fn:|fnref:" # Optional: specify a custom string to filter -- URLS containing this string will not be rewritten - -# boilerplate variables -:site_title: "My Website Title" -:site_url: "https://address.of.site.com/" # URL of the original site -:site_logo: "" # Image to use for site logo on intro page +:source: "~/workspace/root/_site" # This should point to the _site folder of the generated Jekyll website +:target: "~/Downloads" # The desired output directory where you would like to generate the offline version of the site +:site_url: "http://localhost:4000" # URL of the original site (http://localhost:4000/) diff --git a/demo.yml b/demo.yml index 594314d..36e0f2e 100644 --- a/demo.yml +++ b/demo.yml @@ -1,11 +1,5 @@ # Demo configuration for offline jekyll site -:absolute_base: "https://address.of.site.com/" -:relative_base: "/demo" -:source_dir: "demo/_site/" -:out_dir: "demo_offline/" - -# boilerplate variables -:site_title: "Demo Site" -:site_url: "https://dohliam.github.io/offline-jekyll/" -:site_logo: "" +:source: "demo/_site/" +:target: "" +:site_url: "http://yourdomain.com/" \ No newline at end of file diff --git a/html_to_offline.rb b/html_to_offline.rb new file mode 100644 index 0000000..7c5dccd --- /dev/null +++ b/html_to_offline.rb @@ -0,0 +1,66 @@ +# Converts a html file to a offline html file +# Input : +# - page_path: html file path +# - main_folder_path: (kinda useless, but now we have the page relative path) +# - site_url: remove this from every link +# Output : +# - page_content, hopefully our html file adapted for local use +require 'pathname' + +def convert_to_offline(page_path, main_folder_path, site_url) # Flemme d'améliorer la signature, déso + page_relative_path = page_path.gsub(/^#{main_folder_path}/, "") # Path relative to the working directory + + page_content = File.read(page_path) + + page_content = page_content.gsub(/(href|src)=["'](.*?)["']/) do |link| + href = $1 + address = $2 + + unless is_custom_filter?() || (is_url?(address) && !address.start_with?(site_url)) + # add index.html to folder links that end with "/" + address += "index.html" if address.end_with?("/") + + # add /index.html to folder links that end like "/something" with no extension + address += "/index.html" unless has_extension?(address) + + # remove the site_url from paths + address.sub!(/^#{Regexp.escape(site_url)}/, "") + + # remove the multiple "/" + address.gsub!(/\/+/, '/') + + # create the relative path from the page to the link address + address = construct_relative_path(page_relative_path, "/") + address + + # remove the first "/" + address = address.sub(/^\//, ""); + end + href + "=" + "'#{address}'" + end + page_content +end + +def is_custom_filter?() + @config[:custom_filter] && href.match(/#{@config[:custom_filter]}/) +end + +def is_url?(address) + # Check if the address starts with common web references + return true if address.match?(/\A(http|https|ftp|mailto)/) + false +end + + +def has_extension?(file_path) + !File.extname(file_path).empty? +end + +def construct_relative_path(from_address, to_address) + from_path = Pathname.new(from_address) + to_path = Pathname.new(to_address) + + relative_path = to_path.relative_path_from(from_path.dirname).to_s + + # Adjust the relative path to include "../" if necessary + relative_path.empty? ? '.' : relative_path +end \ No newline at end of file diff --git a/jekyll_offline.rb b/jekyll_offline.rb deleted file mode 100755 index c7863d6..0000000 --- a/jekyll_offline.rb +++ /dev/null @@ -1,75 +0,0 @@ -#!/usr/bin/env ruby - -require 'erb' -require 'fileutils' -require 'uri' -require 'yaml' - -require_relative 'lib_rellinks.rb' - -def copy_src_dir(src, trg) - dir_contents = Dir.glob(src + "*") - dir_contents.each do |d| - basename = File.basename(d) - if basename == ".git" - next - end - FileUtils.cp_r(d, trg) - end -end - -def preprocess_html(html) - html = prune_base(html) -end - -def postprocess_html(html) - html.gsub(/https:\/\/@/, "https://") -end - -def boilerplate(out_dir) - $site_title = @config[:site_title] - $site_url = @config[:site_url] - $site_logo = @config[:site_logo] - content = ERB.new(File.read("template.rhtml")).result - File.open(out_dir + "START_HERE.html", "w") {|f| f << content } -end - -def prune_base(html) - if @config[:relative_base] - html.gsub(/(href|src|data|value)(=["'])#{@config[:relative_base]}/, "\\1\\2") - else - html - end -end - -def clone_local_site(absolute_base, source_dir, out_dir) - FileUtils.rm_rf(out_dir) - FileUtils.mkdir_p(out_dir) - boilerplate(out_dir) - out_dir = out_dir + "site/" - FileUtils.mkdir_p(out_dir) - copy_src_dir(source_dir, out_dir) - - pages = Dir.glob(out_dir + "**/*.html") - - pages.each do |p| - page_content = File.read(p) - basename = File.basename(p) - html = preprocess_html(page_content) - page_output = convert_html(html, p, out_dir, absolute_base) - page_output = postprocess_html(page_output) - File.open(p, "w") {|f| f << page_output } - end -end - -@config = YAML::load(File.read("config.yml")) -custom_config = ARGV[0] -if custom_config - @config = YAML::load(File.read(custom_config)) -end - -absolute_base = @config[:absolute_base] -source_dir = @config[:source_dir].gsub(/^~/, Dir.home) -out_dir = @config[:out_dir].gsub(/^~/, Dir.home) - -clone_local_site(absolute_base, source_dir, out_dir) diff --git a/lib_rellinks.rb b/lib_rellinks.rb deleted file mode 100644 index 1adcee8..0000000 --- a/lib_rellinks.rb +++ /dev/null @@ -1,66 +0,0 @@ -# Takes an HTML file or folder of HTML files as input -# and adjusts all absolute links so that they are relative -# e.g., "/images" becomes "../images". -# Requires a (local) base directory to be specified as an -# argument: all links will then be rewritten relative to -# this base directory - -def relativize(href, path, absolute_base, root_dir) - # href = actual href string on page - # path = actual current location / file path of current page - # absolute_base = the base url for the site - - href_url = URI.join(URI.encode(absolute_base), URI.encode(href)) - path_url = URI.join(absolute_base, URI.encode(path)) - relative_url = path_url.route_to(href_url).to_s - url_out = test_index(relative_url, href_url, absolute_base, root_dir) - if href.match(/^#/) - url_out = href - end - url_out -end - -def path_is_dir(href_url, absolute_base, root_dir) - decode_href = URI.decode(href_url.to_s.gsub(/%25/, "%")) - local_target = decode_href.gsub(absolute_base, root_dir + "/") - File.directory?(local_target) -end - -def test_index(relative_url, href_url, absolute_base, root_dir) - if path_is_dir(href_url, absolute_base, root_dir) - relative_url = rewrite_index(relative_url) - else - fixed_url = relative_url.to_s.gsub(/^\//,"") - test_url = URI.join(URI.encode(absolute_base), URI.encode(fixed_url)) - if path_is_dir(test_url, absolute_base, root_dir) - relative_url = rewrite_index(relative_url) - end - end - relative_url -end - -def rewrite_index(relative_url) - relative_url = relative_url.to_s.gsub(/\/+$/, "") + "/index.html" -end - -def convert_html(html, source_file, root_dir, absolute_base) - root_path = File.absolute_path(root_dir) - source_file_path = File.absolute_path(source_file) - path = source_file_path.gsub(/^#{root_path}/, "") - - out_html = html.gsub(/(href|src)=["'](.*?)["']/) do |h| - pre = $1 - href = $2 - if @config[:custom_filter] && href.match(/#{@config[:custom_filter]}/) - h - elsif h.match(absolute_base) || !h.match(/https*:\/\//) - href = href.gsub(/^\/\//, "/") - raw_out = pre + '="' + relativize(href, path, absolute_base, root_path) + '"' - URI.decode(raw_out) - else - h - end - end - - out_html -end diff --git a/main.rb b/main.rb new file mode 100755 index 0000000..1e477b0 --- /dev/null +++ b/main.rb @@ -0,0 +1,44 @@ +#!/usr/bin/env ruby + +require 'erb' +require 'fileutils' +require 'yaml' + +require_relative 'html_to_offline.rb' + +def copy_local_site(source, target) + project_name = File.basename(File.dirname(source)).split('\\')[-1] + "_offline" # main folder in target + + main_folder_path = File.join(target, project_name) + + # Clear before copy + FileUtils.rm_rf(main_folder_path) # Remove the main folder if it already exists + FileUtils.mkdir_p(main_folder_path) + + # Copy all contents of the source directory to the main folder path + Dir.glob(File.join(source, '*')).each do |item| + FileUtils.cp_r(item, main_folder_path) + end + main_folder_path +end + +def create_offline_site(main_folder_path, site_url) + pages = Dir.glob(File.join(main_folder_path, "**", "*.html")) + pages.each do |page_path| + page_output = convert_to_offline(page_path, main_folder_path, site_url) + File.open(page_path, "w") {|f| f << page_output } + end +end + +@config = YAML::load(File.read("config.yml")) +custom_config = ARGV[0] +if custom_config + @config = YAML::load(File.read(custom_config)) +end + +source = File.absolute_path(@config[:source].gsub(/^~/, Dir.home)) +target = File.absolute_path(@config[:target].gsub(/^~/, Dir.home)) +site_url = @config[:site_url] + +main_folder_path = copy_local_site(source, target) +create_offline_site(main_folder_path, site_url)