Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

VisualSelector - Use 'deflate' for storing elements.json, 90% file size reduction #2794

Merged
merged 6 commits into from
Nov 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 17 additions & 6 deletions changedetectionio/flask_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -1297,12 +1297,23 @@ def static_content(group, filename):

# These files should be in our subdirectory
try:
# set nocache, set content-type
response = make_response(send_from_directory(os.path.join(datastore_o.datastore_path, filename), "elements.json"))
response.headers['Content-type'] = 'application/json'
response.headers['Cache-Control'] = 'no-cache, no-store, must-revalidate'
response.headers['Pragma'] = 'no-cache'
response.headers['Expires'] = 0
# set nocache, set content-type,
# `filename` is actually directory UUID of the watch
watch_directory = str(os.path.join(datastore_o.datastore_path, filename))
response = None
if os.path.isfile(os.path.join(watch_directory, "elements.deflate")):
response = make_response(send_from_directory(watch_directory, "elements.deflate"))
response.headers['Content-Type'] = 'application/json'
response.headers['Content-Encoding'] = 'deflate'
else:
logger.error(f'Request elements.deflate at "{watch_directory}" but was notfound.')
abort(404)

if response:
response.headers['Cache-Control'] = 'no-cache, no-store, must-revalidate'
response.headers['Pragma'] = 'no-cache'
response.headers['Expires'] = "0"

return response

except FileNotFoundError:
Expand Down
9 changes: 5 additions & 4 deletions changedetectionio/model/Watch.py
Original file line number Diff line number Diff line change
Expand Up @@ -538,16 +538,17 @@ def save_error_text(self, contents):

def save_xpath_data(self, data, as_error=False):
import json
import zlib

if as_error:
target_path = os.path.join(self.watch_data_dir, "elements-error.json")
target_path = os.path.join(str(self.watch_data_dir), "elements-error.deflate")
else:
target_path = os.path.join(self.watch_data_dir, "elements.json")
target_path = os.path.join(str(self.watch_data_dir), "elements.deflate")

self.ensure_data_dir_exists()

with open(target_path, 'w') as f:
f.write(json.dumps(data))
with open(target_path, 'wb') as f:
f.write(zlib.compress(json.dumps(data).encode()))
f.close()

# Save as PNG, PNG is larger but better for doing visual diff in the future
Expand Down
1 change: 1 addition & 0 deletions changedetectionio/static/js/visual-selector.js
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ $(document).ready(() => {
}).done((data) => {
$fetchingUpdateNoticeElem.html("Rendering..");
selectorData = data;

sortScrapedElementsBySize();
console.log(`Reported browser width from backend: ${data['browser_width']}`);

Expand Down
17 changes: 16 additions & 1 deletion changedetectionio/store.py
Original file line number Diff line number Diff line change
Expand Up @@ -374,7 +374,7 @@ def add_watch(self, url, tag='', extras=None, tag_uuids=None, write_to_disk_now=
def visualselector_data_is_ready(self, watch_uuid):
output_path = "{}/{}".format(self.datastore_path, watch_uuid)
screenshot_filename = "{}/last-screenshot.png".format(output_path)
elements_index_filename = "{}/elements.json".format(output_path)
elements_index_filename = "{}/elements.deflate".format(output_path)
if path.isfile(screenshot_filename) and path.isfile(elements_index_filename) :
return True

Expand Down Expand Up @@ -909,3 +909,18 @@ def update_18(self):
if self.data['watching'][uuid].get('in_stock_only'):
del (self.data['watching'][uuid]['in_stock_only'])

# Compress old elements.json to elements.deflate, saving disk, this compression is pretty fast.
def update_19(self):
import zlib

for uuid, watch in self.data['watching'].items():
json_path = os.path.join(self.datastore_path, uuid, "elements.json")
deflate_path = os.path.join(self.datastore_path, uuid, "elements.deflate")

if os.path.exists(json_path):
with open(json_path, "rb") as f_j:
with open(deflate_path, "wb") as f_d:
logger.debug(f"Compressing {str(json_path)} to {str(deflate_path)}..")
f_d.write(zlib.compress(f_j.read()))
os.unlink(json_path)

14 changes: 10 additions & 4 deletions changedetectionio/tests/visualselector/test_fetch_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,15 +54,21 @@ def test_visual_selector_content_ready(client, live_server, measure_memory_usage


assert os.path.isfile(os.path.join('test-datastore', uuid, 'last-screenshot.png')), "last-screenshot.png should exist"
assert os.path.isfile(os.path.join('test-datastore', uuid, 'elements.json')), "xpath elements.json data should exist"
assert os.path.isfile(os.path.join('test-datastore', uuid, 'elements.deflate')), "xpath elements.deflate data should exist"

# Open it and see if it roughly looks correct
with open(os.path.join('test-datastore', uuid, 'elements.json'), 'r') as f:
json.load(f)
with open(os.path.join('test-datastore', uuid, 'elements.deflate'), 'rb') as f:
import zlib
compressed_data = f.read()
decompressed_data = zlib.decompress(compressed_data)
# See if any error was thrown
json_data = json.loads(decompressed_data.decode('utf-8'))

# Attempt to fetch it via the web hook that the browser would use
res = client.get(url_for('static_content', group='visual_selector_data', filename=uuid))
json.loads(res.data)
decompressed_data = zlib.decompress(res.data)
json_data = json.loads(decompressed_data.decode('utf-8'))

assert res.mimetype == 'application/json'
assert res.status_code == 200

Expand Down
Loading