Skip to content

Commit

Permalink
Remove same checksum skip check - saved a little CPU but added a lot …
Browse files Browse the repository at this point in the history
…of complexity (#2700)
  • Loading branch information
dgtlmoon authored Oct 11, 2024
1 parent 03151da commit 5bb47e4
Show file tree
Hide file tree
Showing 10 changed files with 21 additions and 31 deletions.
6 changes: 3 additions & 3 deletions changedetectionio/api/api_v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def get(self, uuid):
abort(404, message='No watch exists with the UUID of {}'.format(uuid))

if request.args.get('recheck'):
self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': True}))
self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
return "OK", 200
if request.args.get('paused', '') == 'paused':
self.datastore.data['watching'].get(uuid).pause()
Expand Down Expand Up @@ -246,7 +246,7 @@ def post(self):

new_uuid = self.datastore.add_watch(url=url, extras=extras, tag=tags)
if new_uuid:
self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid, 'skip_when_checksum_same': True}))
self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid}))
return {'uuid': new_uuid}, 201
else:
return "Invalid or unsupported URL", 400
Expand Down Expand Up @@ -303,7 +303,7 @@ def get(self):

if request.args.get('recheck_all'):
for uuid in self.datastore.data['watching'].keys():
self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': True}))
self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
return {'status': "OK"}, 200

return list, 200
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def accept(uuid):
datastore.data['watching'][uuid]['track_ldjson_price_data'] = PRICE_DATA_TRACK_ACCEPT
datastore.data['watching'][uuid]['processor'] = 'restock_diff'
datastore.data['watching'][uuid].clear_watch()
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': False}))
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
return redirect(url_for("index"))

@login_required
Expand Down
20 changes: 10 additions & 10 deletions changedetectionio/flask_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -795,7 +795,7 @@ def edit_page(uuid):
datastore.needs_write_urgent = True

# Queue the watch for immediate recheck, with a higher priority
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': False}))
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))

# Diff page [edit] link should go back to diff page
if request.args.get("next") and request.args.get("next") == 'diff':
Expand Down Expand Up @@ -976,7 +976,7 @@ def import_page():
importer = import_url_list()
importer.run(data=request.values.get('urls'), flash=flash, datastore=datastore, processor=request.values.get('processor', 'text_json_diff'))
for uuid in importer.new_uuids:
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': True}))
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))

if len(importer.remaining_data) == 0:
return redirect(url_for('index'))
Expand All @@ -989,7 +989,7 @@ def import_page():
d_importer = import_distill_io_json()
d_importer.run(data=request.values.get('distill-io'), flash=flash, datastore=datastore)
for uuid in d_importer.new_uuids:
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': True}))
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))

# XLSX importer
if request.files and request.files.get('xlsx_file'):
Expand All @@ -1013,7 +1013,7 @@ def import_page():
w_importer.run(data=file, flash=flash, datastore=datastore)

for uuid in w_importer.new_uuids:
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': True}))
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))

# Could be some remaining, or we could be on GET
form = forms.importForm(formdata=request.form if request.method == 'POST' else None)
Expand Down Expand Up @@ -1442,7 +1442,7 @@ def form_clone():
new_uuid = datastore.clone(uuid)
if new_uuid:
if not datastore.data['watching'].get(uuid).get('paused'):
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=5, item={'uuid': new_uuid, 'skip_when_checksum_same': True}))
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=5, item={'uuid': new_uuid}))
flash('Cloned.')

return redirect(url_for('index'))
Expand All @@ -1463,7 +1463,7 @@ def form_watch_checknow():

if uuid:
if uuid not in running_uuids:
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': False}))
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
i = 1

elif tag:
Expand All @@ -1474,7 +1474,7 @@ def form_watch_checknow():
continue
if watch_uuid not in running_uuids and not datastore.data['watching'][watch_uuid]['paused']:
update_q.put(
queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid, 'skip_when_checksum_same': False})
queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid})
)
i += 1

Expand All @@ -1484,7 +1484,7 @@ def form_watch_checknow():
if watch_uuid not in running_uuids and not datastore.data['watching'][watch_uuid]['paused']:
if with_errors and not watch.get('last_error'):
continue
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid, 'skip_when_checksum_same': False}))
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid}))
i += 1
flash(f"{i} watches queued for rechecking.")
return redirect(url_for('index', tag=tag))
Expand Down Expand Up @@ -1542,7 +1542,7 @@ def form_watch_list_checkbox_operations():
uuid = uuid.strip()
if datastore.data['watching'].get(uuid):
# Recheck and require a full reprocessing
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': False}))
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
flash("{} watches queued for rechecking".format(len(uuids)))

elif (op == 'clear-errors'):
Expand Down Expand Up @@ -1866,7 +1866,7 @@ def ticker_thread_check_time_launch_checks():
f"{now - watch['last_checked']:0.2f}s since last checked")

# Into the queue with you
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=priority, item={'uuid': uuid, 'skip_when_checksum_same': True}))
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=priority, item={'uuid': uuid}))

# Reset for next time
watch.jitter_seconds = 0
Expand Down
2 changes: 1 addition & 1 deletion changedetectionio/processors/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ def call_browser(self, preferred_proxy_id=None):
# After init, call run_changedetection() which will do the actual change-detection

@abstractmethod
def run_changedetection(self, watch, skip_when_checksum_same: bool = True):
def run_changedetection(self, watch):
update_obj = {'last_notification_error': False, 'last_error': False}
some_data = 'xxxxx'
update_obj["previous_md5"] = hashlib.md5(some_data.encode('utf-8')).hexdigest()
Expand Down
2 changes: 1 addition & 1 deletion changedetectionio/processors/restock_diff/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ class perform_site_check(difference_detection_processor):
screenshot = None
xpath_data = None

def run_changedetection(self, watch, skip_when_checksum_same=True):
def run_changedetection(self, watch):
import hashlib

if not watch:
Expand Down
5 changes: 1 addition & 4 deletions changedetectionio/processors/text_json_diff/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,7 @@ def _task(watch, update_handler):

try:
# The slow process (we run 2 of these in parallel)
changed_detected, update_obj, text_after_filter = update_handler.run_changedetection(
watch=watch,
skip_when_checksum_same=False,
)
changed_detected, update_obj, text_after_filter = update_handler.run_changedetection(watch=watch)
except FilterNotFoundInResponse as e:
text_after_filter = f"Filter not found in HTML: {str(e)}"
except ReplyWithContentButNoText as e:
Expand Down
5 changes: 1 addition & 4 deletions changedetectionio/processors/text_json_diff/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def __init__(self, msg):
# (set_proxy_from_list)
class perform_site_check(difference_detection_processor):

def run_changedetection(self, watch, skip_when_checksum_same=True):
def run_changedetection(self, watch):
changed_detected = False
html_content = ""
screenshot = False # as bytes
Expand All @@ -58,9 +58,6 @@ def run_changedetection(self, watch, skip_when_checksum_same=True):
# Watches added automatically in the queue manager will skip if its the same checksum as the previous run
# Saves a lot of CPU
update_obj['previous_md5_before_filters'] = hashlib.md5(self.fetcher.content.encode('utf-8')).hexdigest()
if skip_when_checksum_same:
if update_obj['previous_md5_before_filters'] == watch.get('previous_md5_before_filters'):
raise content_fetchers.exceptions.checksumFromPreviousCheckWasTheSame()

# Fetching complete, now filters

Expand Down
1 change: 1 addition & 0 deletions changedetectionio/static/styles/scss/styles.scss
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,7 @@ body.spinner-active {
}
}


.tabs ul li a {
// .tab-pane-inner will have the #id that the tab button jumps/anchors to
scroll-margin-top: 200px;
Expand Down
1 change: 1 addition & 0 deletions changedetectionio/static/styles/styles.css
Original file line number Diff line number Diff line change
Expand Up @@ -605,6 +605,7 @@ body.spinner-active #pure-menu-horizontal-spinner {
background-color: var(--color-background-menu-link-hover);
color: var(--color-text-menu-link-hover); }


.tabs ul li a {
scroll-margin-top: 200px; }

Expand Down
8 changes: 1 addition & 7 deletions changedetectionio/update_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -260,9 +260,6 @@ def run(self):
try:
# Processor is what we are using for detecting the "Change"
processor = watch.get('processor', 'text_json_diff')
# Abort processing when the content was the same as the last fetch
skip_when_same_checksum = queued_item_data.item.get('skip_when_checksum_same')


# Init a new 'difference_detection_processor', first look in processors
processor_module_name = f"changedetectionio.processors.{processor}.processor"
Expand All @@ -278,10 +275,7 @@ def run(self):

update_handler.call_browser()

changed_detected, update_obj, contents = update_handler.run_changedetection(
watch=watch,
skip_when_checksum_same=skip_when_same_checksum,
)
changed_detected, update_obj, contents = update_handler.run_changedetection(watch=watch)

# Re #342
# In Python 3, all strings are sequences of Unicode characters. There is a bytes type that holds raw bytes.
Expand Down

0 comments on commit 5bb47e4

Please sign in to comment.