Skip to content

Commit

Permalink
Fixing proxy checker (#2696)
Browse files Browse the repository at this point in the history
  • Loading branch information
dgtlmoon authored Oct 10, 2024
1 parent 5a768d7 commit c1c8de3
Show file tree
Hide file tree
Showing 6 changed files with 122 additions and 29 deletions.
14 changes: 10 additions & 4 deletions changedetectionio/blueprint/check_proxies/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
import importlib
from concurrent.futures import ThreadPoolExecutor

from changedetectionio.processors.text_json_diff.processor import FilterNotFoundInResponse
from changedetectionio.store import ChangeDetectionStore

from functools import wraps
Expand Down Expand Up @@ -30,16 +33,19 @@ def construct_blueprint(datastore: ChangeDetectionStore):
def long_task(uuid, preferred_proxy):
import time
from changedetectionio.content_fetchers import exceptions as content_fetcher_exceptions
from changedetectionio.processors.text_json_diff import text_json_diff
from changedetectionio.safe_jinja import render as jinja_render

status = {'status': '', 'length': 0, 'text': ''}

contents = ''
now = time.time()
try:
update_handler = text_json_diff.perform_site_check(datastore=datastore, watch_uuid=uuid)
update_handler.call_browser()
processor_module = importlib.import_module("changedetectionio.processors.text_json_diff.processor")
update_handler = processor_module.perform_site_check(datastore=datastore,
watch_uuid=uuid
)

update_handler.call_browser(preferred_proxy_id=preferred_proxy)
# title, size is len contents not len xfer
except content_fetcher_exceptions.Non200ErrorCodeReceived as e:
if e.status_code == 404:
Expand All @@ -48,7 +54,7 @@ def long_task(uuid, preferred_proxy):
status.update({'status': 'ERROR', 'length': len(contents), 'text': f"{e.status_code} - Access denied"})
else:
status.update({'status': 'ERROR', 'length': len(contents), 'text': f"Status code: {e.status_code}"})
except text_json_diff.FilterNotFoundInResponse:
except FilterNotFoundInResponse:
status.update({'status': 'OK', 'length': len(contents), 'text': f"OK but CSS/xPath filter not found (page changed layout?)"})
except content_fetcher_exceptions.EmptyReply as e:
if e.status_code == 403 or e.status_code == 401:
Expand Down
6 changes: 4 additions & 2 deletions changedetectionio/processors/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ class difference_detection_processor():
screenshot = None
watch = None
xpath_data = None
preferred_proxy = None

def __init__(self, *args, datastore, watch_uuid, **kwargs):
super().__init__(*args, **kwargs)
Expand All @@ -26,7 +27,8 @@ def __init__(self, *args, datastore, watch_uuid, **kwargs):
# Generic fetcher that should be extended (requests, playwright etc)
self.fetcher = Fetcher()

def call_browser(self):
def call_browser(self, preferred_proxy_id=None):

from requests.structures import CaseInsensitiveDict

# Protect against file:// access
Expand All @@ -42,7 +44,7 @@ def call_browser(self):
prefer_fetch_backend = self.watch.get('fetch_backend', 'system')

# Proxy ID "key"
preferred_proxy_id = self.datastore.get_preferred_proxy_for_watch(uuid=self.watch.get('uuid'))
preferred_proxy_id = preferred_proxy_id if preferred_proxy_id else self.datastore.get_preferred_proxy_for_watch(uuid=self.watch.get('uuid'))

# Pluggable content self.fetcher
if not prefer_fetch_backend or prefer_fetch_backend == 'system':
Expand Down
24 changes: 15 additions & 9 deletions changedetectionio/static/js/recheck-proxy.js
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
$(function () {
/* add container before each proxy location to show status */

var option_li = $('.fetch-backend-proxy li').filter(function() {
return $("input",this)[0].value.length >0;
});

//var option_li = $('.fetch-backend-proxy li');
var isActive = false;
$(option_li).prepend('<div class="proxy-status"></div>');
$(option_li).append('<div class="proxy-timing"></div><div class="proxy-check-details"></div>');

function setup_html_widget() {
var option_li = $('.fetch-backend-proxy li').filter(function () {
return $("input", this)[0].value.length > 0;
});
$(option_li).prepend('<div class="proxy-status"></div>');
$(option_li).append('<div class="proxy-timing"></div><div class="proxy-check-details"></div>');
}

function set_proxy_check_status(proxy_key, state) {
// select input by value name
Expand Down Expand Up @@ -59,8 +59,14 @@ $(function () {
}

$('#check-all-proxies').click(function (e) {

e.preventDefault()
$('body').addClass('proxy-check-active');

if (!$('body').hasClass('proxy-check-active')) {
setup_html_widget();
$('body').addClass('proxy-check-active');
}

$('.proxy-check-details').html('');
$('.proxy-status').html('<span class="spinner"></span>').fadeIn();
$('.proxy-timing').html('');
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,15 +25,19 @@ ul#requests-extra_proxies {

body.proxy-check-active {
#request {
// Padding set by flex layout
/*
.proxy-status {
width: 2em;
}
*/

.proxy-check-details {
font-size: 80%;
color: #555;
display: block;
padding-left: 4em;
padding-left: 2em;
max-width: 500px;
}

.proxy-timing {
Expand Down
29 changes: 16 additions & 13 deletions changedetectionio/static/styles/styles.css
Original file line number Diff line number Diff line change
Expand Up @@ -119,19 +119,22 @@ ul#requests-extra_proxies {
#request label[for=proxy] {
display: inline-block; }

body.proxy-check-active #request .proxy-status {
width: 2em; }

body.proxy-check-active #request .proxy-check-details {
font-size: 80%;
color: #555;
display: block;
padding-left: 4em; }

body.proxy-check-active #request .proxy-timing {
font-size: 80%;
padding-left: 1rem;
color: var(--color-link); }
body.proxy-check-active #request {
/*
.proxy-status {
width: 2em;
}
*/ }
body.proxy-check-active #request .proxy-check-details {
font-size: 80%;
color: #555;
display: block;
padding-left: 2em;
max-width: 500px; }
body.proxy-check-active #request .proxy-timing {
font-size: 80%;
padding-left: 1rem;
color: var(--color-link); }

#recommended-proxy {
display: grid;
Expand Down
72 changes: 72 additions & 0 deletions changedetectionio/tests/test_preview_endpoints.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
#!/usr/bin/env python3

import time
from flask import url_for
from .util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks


# `subtractive_selectors` should still work in `source:` type requests
def test_fetch_pdf(client, live_server, measure_memory_usage):
import shutil
shutil.copy("tests/test.pdf", "test-datastore/endpoint-test.pdf")

live_server_setup(live_server)
test_url = url_for('test_pdf_endpoint', _external=True)
# Add our URL to the import page
res = client.post(
url_for("import_page"),
data={"urls": test_url},
follow_redirects=True
)

assert b"1 Imported" in res.data

wait_for_all_checks(client)

res = client.get(
url_for("preview_page", uuid="first"),
follow_redirects=True
)

# PDF header should not be there (it was converted to text)
assert b'PDF' not in res.data[:10]
assert b'hello world' in res.data

# So we know if the file changes in other ways
import hashlib
original_md5 = hashlib.md5(open("test-datastore/endpoint-test.pdf", 'rb').read()).hexdigest().upper()
# We should have one
assert len(original_md5) > 0
# And it's going to be in the document
assert b'Document checksum - ' + bytes(str(original_md5).encode('utf-8')) in res.data

shutil.copy("tests/test2.pdf", "test-datastore/endpoint-test.pdf")
changed_md5 = hashlib.md5(open("test-datastore/endpoint-test.pdf", 'rb').read()).hexdigest().upper()
res = client.get(url_for("form_watch_checknow"), follow_redirects=True)
assert b'1 watches queued for rechecking.' in res.data

wait_for_all_checks(client)

# Now something should be ready, indicated by having a 'unviewed' class
res = client.get(url_for("index"))
assert b'unviewed' in res.data

# The original checksum should be not be here anymore (cdio adds it to the bottom of the text)

res = client.get(
url_for("preview_page", uuid="first"),
follow_redirects=True
)

assert original_md5.encode('utf-8') not in res.data
assert changed_md5.encode('utf-8') in res.data

res = client.get(
url_for("diff_history_page", uuid="first"),
follow_redirects=True
)

assert original_md5.encode('utf-8') in res.data
assert changed_md5.encode('utf-8') in res.data

assert b'here is a change' in res.data

0 comments on commit c1c8de3

Please sign in to comment.