Skip to content

Commit

Permalink
Backend CSS locators generation template
Browse files Browse the repository at this point in the history
  • Loading branch information
ivnglkv committed Apr 8, 2024
1 parent a94069e commit 81814e6
Show file tree
Hide file tree
Showing 14 changed files with 432 additions and 43 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -55,3 +55,5 @@ coverage.xml
.env.dev

docker-compose.override.yaml

app/analyzed_page/*.html
262 changes: 262 additions & 0 deletions app/analyzed_page/finder.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,262 @@
// License: MIT
// Author: Anton Medvedev <[email protected]>
// Source: https://github.com/antonmedv/finder
let config;
let rootDocument;
function finder(input, options) {
if (input.nodeType !== Node.ELEMENT_NODE) {
throw new Error(`Can't generate CSS selector for non-element node type.`);
}
if ('html' === input.tagName.toLowerCase()) {
return 'html';
}
const defaults = {
root: document.body,
idName: (name) => true,
className: (name) => true,
tagName: (name) => true,
attr: (name, value) => false,
seedMinLength: 1,
optimizedMinLength: 2,
threshold: 1000,
maxNumberOfTries: 10000,
};
config = { ...defaults, ...options };
rootDocument = findRootDocument(config.root, defaults);
let path = bottomUpSearch(input, 'all', () => bottomUpSearch(input, 'two', () => bottomUpSearch(input, 'one', () => bottomUpSearch(input, 'none'))));
if (path) {
const optimized = sort(optimize(path, input));
if (optimized.length > 0) {
path = optimized[0];
}
return selector(path);
}
else {
throw new Error(`Selector was not found.`);
}
}
function findRootDocument(rootNode, defaults) {
if (rootNode.nodeType === Node.DOCUMENT_NODE) {
return rootNode;
}
if (rootNode === defaults.root) {
return rootNode.ownerDocument;
}
return rootNode;
}
function bottomUpSearch(input, limit, fallback) {
let path = null;
let stack = [];
let current = input;
let i = 0;
while (current) {
let level = maybe(id(current)) ||
maybe(...attr(current)) ||
maybe(...classNames(current)) ||
maybe(tagName(current)) || [any()];
const nth = index(current);
if (limit == 'all') {
if (nth) {
level = level.concat(level.filter(dispensableNth).map((node) => nthChild(node, nth)));
}
}
else if (limit == 'two') {
level = level.slice(0, 1);
if (nth) {
level = level.concat(level.filter(dispensableNth).map((node) => nthChild(node, nth)));
}
}
else if (limit == 'one') {
const [node] = (level = level.slice(0, 1));
if (nth && dispensableNth(node)) {
level = [nthChild(node, nth)];
}
}
else if (limit == 'none') {
level = [any()];
if (nth) {
level = [nthChild(level[0], nth)];
}
}
for (let node of level) {
node.level = i;
}
stack.push(level);
if (stack.length >= config.seedMinLength) {
path = findUniquePath(stack, fallback);
if (path) {
break;
}
}
current = current.parentElement;
i++;
}
if (!path) {
path = findUniquePath(stack, fallback);
}
if (!path && fallback) {
return fallback();
}
return path;
}
function findUniquePath(stack, fallback) {
const paths = sort(combinations(stack));
if (paths.length > config.threshold) {
return fallback ? fallback() : null;
}
for (let candidate of paths) {
if (unique(candidate)) {
return candidate;
}
}
return null;
}
function selector(path) {
let node = path[0];
let query = node.name;
for (let i = 1; i < path.length; i++) {
const level = path[i].level || 0;
if (node.level === level - 1) {
query = `${path[i].name} > ${query}`;
}
else {
query = `${path[i].name} ${query}`;
}
node = path[i];
}
return query;
}
function penalty(path) {
return path.map((node) => node.penalty).reduce((acc, i) => acc + i, 0);
}
function unique(path) {
const css = selector(path);
switch (rootDocument.querySelectorAll(css).length) {
case 0:
throw new Error(`Can't select any node with this selector: ${css}`);
case 1:
return true;
default:
return false;
}
}
function id(input) {
const elementId = input.getAttribute('id');
if (elementId && config.idName(elementId)) {
return {
name: '#' + CSS.escape(elementId),
penalty: 0,
};
}
return null;
}
function attr(input) {
const attrs = Array.from(input.attributes).filter((attr) => config.attr(attr.name, attr.value));
return attrs.map((attr) => ({
name: `[${CSS.escape(attr.name)}="${CSS.escape(attr.value)}"]`,
penalty: 0.5,
}));
}
function classNames(input) {
const names = Array.from(input.classList).filter(config.className);
return names.map((name) => ({
name: '.' + CSS.escape(name),
penalty: 1,
}));
}
function tagName(input) {
const name = input.tagName.toLowerCase();
if (config.tagName(name)) {
return {
name,
penalty: 2,
};
}
return null;
}
function any() {
return {
name: '*',
penalty: 3,
};
}
function index(input) {
const parent = input.parentNode;
if (!parent) {
return null;
}
let child = parent.firstChild;
if (!child) {
return null;
}
let i = 0;
while (child) {
if (child.nodeType === Node.ELEMENT_NODE) {
i++;
}
if (child === input) {
break;
}
child = child.nextSibling;
}
return i;
}
function nthChild(node, i) {
return {
name: node.name + `:nth-child(${i})`,
penalty: node.penalty + 1,
};
}
function dispensableNth(node) {
return node.name !== 'html' && !node.name.startsWith('#');
}
function maybe(...level) {
const list = level.filter(notEmpty);
if (list.length > 0) {
return list;
}
return null;
}
function notEmpty(value) {
return value !== null && value !== undefined;
}
function* combinations(stack, path = []) {
if (stack.length > 0) {
for (let node of stack[0]) {
yield* combinations(stack.slice(1, stack.length), path.concat(node));
}
}
else {
yield path;
}
}
function sort(paths) {
return [...paths].sort((a, b) => penalty(a) - penalty(b));
}
function* optimize(path, input, scope = {
counter: 0,
visited: new Map(),
}) {
if (path.length > 2 && path.length > config.optimizedMinLength) {
for (let i = 1; i < path.length - 1; i++) {
if (scope.counter > config.maxNumberOfTries) {
return; // Okay At least I tried!
}
scope.counter += 1;
const newPath = [...path];
newPath.splice(i, 1);
const newPathKey = selector(newPath);
if (scope.visited.has(newPathKey)) {
return;
}
if (unique(newPath) && same(newPath, input)) {
yield newPath;
scope.visited.set(newPathKey, true);
yield* optimize(newPath, input, scope);
}
}
}
}
function same(path, input) {
return rootDocument.querySelector(selector(path)) === input;
}
58 changes: 58 additions & 0 deletions app/analyzed_page/generate_css_selector.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
/*jshint esversion: 6 */

// import {getCssSelector} from "index.js";
// import {finder} from "finder.js";

const generateSelectorByElement = (element) => {
"use strict";
let selectorByGenerator;
let selectorByFinder;

// Due to finder lib issue, we have to catch errors.
// If css selector generated by finder contains ".", for example #Country_16856319000360.6264611129794591
// We get "Uncaught DOMException: Failed to execute 'querySelectorAll' on 'Document':
// '#Country_16856319000360.6264611129794591' is not a valid selector."
// And because of lib logic we can't prevent generation these selectors.
// Reproduced on https://www.docker.com
try {
const finderForbiddenAttributes = ['jdn-hash', 'href', 'class', 'xmlns', 'xmlns:xlink', 'xlink:href'];
selectorByFinder = finder(element, {
attr: (name, value) => value && !finderForbiddenAttributes.includes(name),
});
} catch (err) {
selectorByFinder = err;
}

// If "id" attribute starts with number, for example id="6264611129794591"
// We get "Uncaught DOMException: Failed to execute 'querySelectorAll' on 'Document':
// '#6264611129794591' is not a valid selector."
// And because of lib logic we can't prevent generation these selectors.
// Reproduced on https://www.otto.de
const generatorOptions = {
blacklist: [/jdn-hash/, /href/],
maxCombinations: 30,
maxCandidates: 30,
};

try {
selectorByGenerator = CssSelectorGenerator.getCssSelector(element, generatorOptions);
} catch (err) {
selectorByGenerator = err;
}

const isSelectorByGeneratorString = typeof selectorByGenerator === 'string';
const isSelectorByFinderString = typeof selectorByFinder === 'string';

let selectorGenerationResult;

if (isSelectorByGeneratorString && isSelectorByFinderString) {
selectorGenerationResult = selectorByGenerator.length < selectorByFinder.length ? selectorByGenerator : selectorByFinder;
} else if (!isSelectorByFinderString && isSelectorByGeneratorString) {
selectorGenerationResult = selectorByGenerator;
} else if (!isSelectorByGeneratorString && isSelectorByFinderString) {
selectorGenerationResult = selectorByFinder;
} else {
selectorGenerationResult = 'CSS selector generation was failed';
}
return selectorGenerationResult;
};
1 change: 1 addition & 0 deletions app/analyzed_page/index.js

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions app/analyzed_page/index.js.map

Large diffs are not rendered by default.

5 changes: 5 additions & 0 deletions app/css_locators/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
__all__ = [
"task_schedule_css_locator_generation",
]

from .tasks import task_schedule_css_locator_generation
6 changes: 6 additions & 0 deletions app/css_locators/tasks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from app.celery_app import celery_app


@celery_app.task(bind=True)
def task_schedule_css_locator_generation(self, element_id: int, document_uuid: str) -> str:
...
2 changes: 2 additions & 0 deletions app/css_locators/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
def inject_css_selector_generator_scripts(document: str) -> str:
...
2 changes: 2 additions & 0 deletions app/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
mui_df_path,
old_df_path,
robula_api,
websocket_api,
)
from app.logger import logger
from app.models import (
Expand All @@ -46,6 +47,7 @@

api = FastAPI()
api.include_router(robula_api.router)
api.include_router(websocket_api.router)
templates = Jinja2Templates(directory="templates")


Expand Down
4 changes: 4 additions & 0 deletions app/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,10 @@ class XPathGenerationModel(TaskIdModel):
config: RobulaSettingsModel


class CSSSelectorGenerationModel(TaskIdModel):
document: str


class LoggingInfoModel(BaseModel):
session_id: int
element_library: str
Expand Down
Loading

0 comments on commit 81814e6

Please sign in to comment.