-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Backend CSS locators generation template
- Loading branch information
Showing
14 changed files
with
432 additions
and
43 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -55,3 +55,5 @@ coverage.xml | |
.env.dev | ||
|
||
docker-compose.override.yaml | ||
|
||
app/analyzed_page/*.html |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,262 @@ | ||
// License: MIT | ||
// Author: Anton Medvedev <[email protected]> | ||
// Source: https://github.com/antonmedv/finder | ||
let config; | ||
let rootDocument; | ||
function finder(input, options) { | ||
if (input.nodeType !== Node.ELEMENT_NODE) { | ||
throw new Error(`Can't generate CSS selector for non-element node type.`); | ||
} | ||
if ('html' === input.tagName.toLowerCase()) { | ||
return 'html'; | ||
} | ||
const defaults = { | ||
root: document.body, | ||
idName: (name) => true, | ||
className: (name) => true, | ||
tagName: (name) => true, | ||
attr: (name, value) => false, | ||
seedMinLength: 1, | ||
optimizedMinLength: 2, | ||
threshold: 1000, | ||
maxNumberOfTries: 10000, | ||
}; | ||
config = { ...defaults, ...options }; | ||
rootDocument = findRootDocument(config.root, defaults); | ||
let path = bottomUpSearch(input, 'all', () => bottomUpSearch(input, 'two', () => bottomUpSearch(input, 'one', () => bottomUpSearch(input, 'none')))); | ||
if (path) { | ||
const optimized = sort(optimize(path, input)); | ||
if (optimized.length > 0) { | ||
path = optimized[0]; | ||
} | ||
return selector(path); | ||
} | ||
else { | ||
throw new Error(`Selector was not found.`); | ||
} | ||
} | ||
function findRootDocument(rootNode, defaults) { | ||
if (rootNode.nodeType === Node.DOCUMENT_NODE) { | ||
return rootNode; | ||
} | ||
if (rootNode === defaults.root) { | ||
return rootNode.ownerDocument; | ||
} | ||
return rootNode; | ||
} | ||
function bottomUpSearch(input, limit, fallback) { | ||
let path = null; | ||
let stack = []; | ||
let current = input; | ||
let i = 0; | ||
while (current) { | ||
let level = maybe(id(current)) || | ||
maybe(...attr(current)) || | ||
maybe(...classNames(current)) || | ||
maybe(tagName(current)) || [any()]; | ||
const nth = index(current); | ||
if (limit == 'all') { | ||
if (nth) { | ||
level = level.concat(level.filter(dispensableNth).map((node) => nthChild(node, nth))); | ||
} | ||
} | ||
else if (limit == 'two') { | ||
level = level.slice(0, 1); | ||
if (nth) { | ||
level = level.concat(level.filter(dispensableNth).map((node) => nthChild(node, nth))); | ||
} | ||
} | ||
else if (limit == 'one') { | ||
const [node] = (level = level.slice(0, 1)); | ||
if (nth && dispensableNth(node)) { | ||
level = [nthChild(node, nth)]; | ||
} | ||
} | ||
else if (limit == 'none') { | ||
level = [any()]; | ||
if (nth) { | ||
level = [nthChild(level[0], nth)]; | ||
} | ||
} | ||
for (let node of level) { | ||
node.level = i; | ||
} | ||
stack.push(level); | ||
if (stack.length >= config.seedMinLength) { | ||
path = findUniquePath(stack, fallback); | ||
if (path) { | ||
break; | ||
} | ||
} | ||
current = current.parentElement; | ||
i++; | ||
} | ||
if (!path) { | ||
path = findUniquePath(stack, fallback); | ||
} | ||
if (!path && fallback) { | ||
return fallback(); | ||
} | ||
return path; | ||
} | ||
function findUniquePath(stack, fallback) { | ||
const paths = sort(combinations(stack)); | ||
if (paths.length > config.threshold) { | ||
return fallback ? fallback() : null; | ||
} | ||
for (let candidate of paths) { | ||
if (unique(candidate)) { | ||
return candidate; | ||
} | ||
} | ||
return null; | ||
} | ||
function selector(path) { | ||
let node = path[0]; | ||
let query = node.name; | ||
for (let i = 1; i < path.length; i++) { | ||
const level = path[i].level || 0; | ||
if (node.level === level - 1) { | ||
query = `${path[i].name} > ${query}`; | ||
} | ||
else { | ||
query = `${path[i].name} ${query}`; | ||
} | ||
node = path[i]; | ||
} | ||
return query; | ||
} | ||
function penalty(path) { | ||
return path.map((node) => node.penalty).reduce((acc, i) => acc + i, 0); | ||
} | ||
function unique(path) { | ||
const css = selector(path); | ||
switch (rootDocument.querySelectorAll(css).length) { | ||
case 0: | ||
throw new Error(`Can't select any node with this selector: ${css}`); | ||
case 1: | ||
return true; | ||
default: | ||
return false; | ||
} | ||
} | ||
function id(input) { | ||
const elementId = input.getAttribute('id'); | ||
if (elementId && config.idName(elementId)) { | ||
return { | ||
name: '#' + CSS.escape(elementId), | ||
penalty: 0, | ||
}; | ||
} | ||
return null; | ||
} | ||
function attr(input) { | ||
const attrs = Array.from(input.attributes).filter((attr) => config.attr(attr.name, attr.value)); | ||
return attrs.map((attr) => ({ | ||
name: `[${CSS.escape(attr.name)}="${CSS.escape(attr.value)}"]`, | ||
penalty: 0.5, | ||
})); | ||
} | ||
function classNames(input) { | ||
const names = Array.from(input.classList).filter(config.className); | ||
return names.map((name) => ({ | ||
name: '.' + CSS.escape(name), | ||
penalty: 1, | ||
})); | ||
} | ||
function tagName(input) { | ||
const name = input.tagName.toLowerCase(); | ||
if (config.tagName(name)) { | ||
return { | ||
name, | ||
penalty: 2, | ||
}; | ||
} | ||
return null; | ||
} | ||
function any() { | ||
return { | ||
name: '*', | ||
penalty: 3, | ||
}; | ||
} | ||
function index(input) { | ||
const parent = input.parentNode; | ||
if (!parent) { | ||
return null; | ||
} | ||
let child = parent.firstChild; | ||
if (!child) { | ||
return null; | ||
} | ||
let i = 0; | ||
while (child) { | ||
if (child.nodeType === Node.ELEMENT_NODE) { | ||
i++; | ||
} | ||
if (child === input) { | ||
break; | ||
} | ||
child = child.nextSibling; | ||
} | ||
return i; | ||
} | ||
function nthChild(node, i) { | ||
return { | ||
name: node.name + `:nth-child(${i})`, | ||
penalty: node.penalty + 1, | ||
}; | ||
} | ||
function dispensableNth(node) { | ||
return node.name !== 'html' && !node.name.startsWith('#'); | ||
} | ||
function maybe(...level) { | ||
const list = level.filter(notEmpty); | ||
if (list.length > 0) { | ||
return list; | ||
} | ||
return null; | ||
} | ||
function notEmpty(value) { | ||
return value !== null && value !== undefined; | ||
} | ||
function* combinations(stack, path = []) { | ||
if (stack.length > 0) { | ||
for (let node of stack[0]) { | ||
yield* combinations(stack.slice(1, stack.length), path.concat(node)); | ||
} | ||
} | ||
else { | ||
yield path; | ||
} | ||
} | ||
function sort(paths) { | ||
return [...paths].sort((a, b) => penalty(a) - penalty(b)); | ||
} | ||
function* optimize(path, input, scope = { | ||
counter: 0, | ||
visited: new Map(), | ||
}) { | ||
if (path.length > 2 && path.length > config.optimizedMinLength) { | ||
for (let i = 1; i < path.length - 1; i++) { | ||
if (scope.counter > config.maxNumberOfTries) { | ||
return; // Okay At least I tried! | ||
} | ||
scope.counter += 1; | ||
const newPath = [...path]; | ||
newPath.splice(i, 1); | ||
const newPathKey = selector(newPath); | ||
if (scope.visited.has(newPathKey)) { | ||
return; | ||
} | ||
if (unique(newPath) && same(newPath, input)) { | ||
yield newPath; | ||
scope.visited.set(newPathKey, true); | ||
yield* optimize(newPath, input, scope); | ||
} | ||
} | ||
} | ||
} | ||
function same(path, input) { | ||
return rootDocument.querySelector(selector(path)) === input; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
/*jshint esversion: 6 */ | ||
|
||
// import {getCssSelector} from "index.js"; | ||
// import {finder} from "finder.js"; | ||
|
||
const generateSelectorByElement = (element) => { | ||
"use strict"; | ||
let selectorByGenerator; | ||
let selectorByFinder; | ||
|
||
// Due to finder lib issue, we have to catch errors. | ||
// If css selector generated by finder contains ".", for example #Country_16856319000360.6264611129794591 | ||
// We get "Uncaught DOMException: Failed to execute 'querySelectorAll' on 'Document': | ||
// '#Country_16856319000360.6264611129794591' is not a valid selector." | ||
// And because of lib logic we can't prevent generation these selectors. | ||
// Reproduced on https://www.docker.com | ||
try { | ||
const finderForbiddenAttributes = ['jdn-hash', 'href', 'class', 'xmlns', 'xmlns:xlink', 'xlink:href']; | ||
selectorByFinder = finder(element, { | ||
attr: (name, value) => value && !finderForbiddenAttributes.includes(name), | ||
}); | ||
} catch (err) { | ||
selectorByFinder = err; | ||
} | ||
|
||
// If "id" attribute starts with number, for example id="6264611129794591" | ||
// We get "Uncaught DOMException: Failed to execute 'querySelectorAll' on 'Document': | ||
// '#6264611129794591' is not a valid selector." | ||
// And because of lib logic we can't prevent generation these selectors. | ||
// Reproduced on https://www.otto.de | ||
const generatorOptions = { | ||
blacklist: [/jdn-hash/, /href/], | ||
maxCombinations: 30, | ||
maxCandidates: 30, | ||
}; | ||
|
||
try { | ||
selectorByGenerator = CssSelectorGenerator.getCssSelector(element, generatorOptions); | ||
} catch (err) { | ||
selectorByGenerator = err; | ||
} | ||
|
||
const isSelectorByGeneratorString = typeof selectorByGenerator === 'string'; | ||
const isSelectorByFinderString = typeof selectorByFinder === 'string'; | ||
|
||
let selectorGenerationResult; | ||
|
||
if (isSelectorByGeneratorString && isSelectorByFinderString) { | ||
selectorGenerationResult = selectorByGenerator.length < selectorByFinder.length ? selectorByGenerator : selectorByFinder; | ||
} else if (!isSelectorByFinderString && isSelectorByGeneratorString) { | ||
selectorGenerationResult = selectorByGenerator; | ||
} else if (!isSelectorByGeneratorString && isSelectorByFinderString) { | ||
selectorGenerationResult = selectorByFinder; | ||
} else { | ||
selectorGenerationResult = 'CSS selector generation was failed'; | ||
} | ||
return selectorGenerationResult; | ||
}; |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
__all__ = [ | ||
"task_schedule_css_locator_generation", | ||
] | ||
|
||
from .tasks import task_schedule_css_locator_generation |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
from app.celery_app import celery_app | ||
|
||
|
||
@celery_app.task(bind=True) | ||
def task_schedule_css_locator_generation(self, element_id: int, document_uuid: str) -> str: | ||
... |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
def inject_css_selector_generator_scripts(document: str) -> str: | ||
... |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.