From 3edcbcaf41031bdf020b64d3ea75c02457bddd9b Mon Sep 17 00:00:00 2001 From: komima <58747243+komima@users.noreply.github.com> Date: Tue, 2 Jul 2024 22:45:52 +0300 Subject: [PATCH] docs: add yaml config examples --- examples/README.md | 6 ++ examples/parse.py | 40 ++++++++++++++ examples/quality_rules.yml | 106 ++++++++++++++++++++++++++++++++++++ examples/rule-config.yml | 16 ++++++ examples/rule-templates.yml | 46 ++++++++++++++++ 5 files changed, 214 insertions(+) create mode 100644 examples/README.md create mode 100644 examples/parse.py create mode 100644 examples/quality_rules.yml create mode 100644 examples/rule-config.yml create mode 100644 examples/rule-templates.yml diff --git a/examples/README.md b/examples/README.md new file mode 100644 index 0000000..cd72ca4 --- /dev/null +++ b/examples/README.md @@ -0,0 +1,6 @@ +# Examples for rule configuration + +- Define the necessary rules as SQL templates (`rule-templates.yml`) +- Configure the rules for all tables (`rule-config.yml`) +- Generate the rules (`parse.py`, needs pyyaml & jinja2) +- Use the result at runtime (`quality_rules.yml`) diff --git a/examples/parse.py b/examples/parse.py new file mode 100644 index 0000000..4e2eb40 --- /dev/null +++ b/examples/parse.py @@ -0,0 +1,40 @@ +import yaml +import pathlib +import jinja2 + +CONFIG = { + "id_column": "fid", + "geometry_column": "geom", +} + + +def str_presenter(dumper, data): + if data.count('\n') > 0: + data = "\n".join([line.rstrip() for line in data.splitlines() if line.rstrip() != '']) + return dumper.represent_scalar('tag:yaml.org,2002:str', data, style='|') + return dumper.represent_scalar('tag:yaml.org,2002:str', data) + +yaml.add_representer(str, str_presenter) + +templates = yaml.load((pathlib.Path(__file__).parent / 'rule-templates.yml').read_text(encoding='utf-8'), Loader=yaml.FullLoader) +config = yaml.load((pathlib.Path(__file__).parent / 'rule-config.yml').read_text(encoding='utf-8'), Loader=yaml.FullLoader) + +env = jinja2.Environment(loader=jinja2.BaseLoader()) + +rules = [] + +for source_table, rule_configs in config.items(): + for rule_key, rule_config_list in rule_configs.items(): + rule_template = templates[rule_key] + for rule_config in rule_config_list: + rules.append( + {"rule_key": rule_key} | + rule_template | { + "sql": env.from_string(rule_template["sql"]).render(**(CONFIG | {"source": source_table} | rule_template | rule_config)), + "descriptions": [ + (d | {"description": env.from_string(d["description"]).render(**rule_config)}) for d in rule_template["descriptions"] + ] + } | rule_config + ) + +(pathlib.Path(__file__).parent / 'quality_rules.yml').write_text(yaml.dump({'quality_rules': rules}), encoding='utf-8') diff --git a/examples/quality_rules.yml b/examples/quality_rules.yml new file mode 100644 index 0000000..597a114 --- /dev/null +++ b/examples/quality_rules.yml @@ -0,0 +1,106 @@ +quality_rules: +- descriptions: + - description: The geometry must be within Polygon 1,Polygon 2 + lang: en + priority: warning + rule_key: is_within + sql: |- + SELECT + s.fid AS source_id, + ST_Force2D(s.geom) AS geom, + NULL AS target_id + FROM + some_schema.line_one s + WHERE + ST_IsValid(s.geom) AND ST_IsSimple(s.geom) + AND NOT EXISTS ( + SELECT + FROM other_schema.polygon_1 t + WHERE + t.fid <> s.fid + AND s.geom && t.geom + AND ST_IsValid(t.geom) AND ST_IsSimple(t.geom) + AND ST_Within(s.geom, t.geom) + and t.some_attr is null + ) + AND NOT EXISTS ( + SELECT + FROM yet_another_schema.polygon_two t + WHERE + t.fid <> s.fid + AND s.geom && t.geom + AND ST_IsValid(t.geom) AND ST_IsSimple(t.geom) + AND ST_Within(s.geom, t.geom) + ) + targets: + - filter: and t.some_attr is null + name: Polygon 1 + table: other_schema.polygon_1 + - name: Polygon 2 + table: yet_another_schema.polygon_two + type: topology +- descriptions: + - description: The geometry must be within Some other polygon + lang: en + priority: warning + rule_key: is_within + sql: |- + SELECT + s.fid AS source_id, + ST_Force2D(s.geom) AS geom, + NULL AS target_id + FROM + some_schema.line_one s + WHERE + ST_IsValid(s.geom) AND ST_IsSimple(s.geom) + AND NOT EXISTS ( + SELECT + FROM some_schema.other_polygon t + WHERE + t.fid <> s.fid + AND s.geom && t.geom + AND ST_IsValid(t.geom) AND ST_IsSimple(t.geom) + AND ST_Within(s.geom, t.geom) + ) + targets: + - name: Some other polygon + table: some_schema.other_polygon + type: topology +- descriptions: + - description: Line geometry length should not be less than 10 meters + lang: en + priority: warning + rule_key: line_longer_than + source_filter: and s.some_attr = 'is_long_line_type' + sql: |- + SELECT + s.fid AS source_id, + ST_Force2D(s.geom) AS geom, + NULL AS target_id + FROM + some_schema.line_one s + WHERE + ST_IsValid(s.geom) AND ST_IsSimple(s.geom) + AND ST_Length(s.geom) < 10 + and s.some_attr = 'is_long_line_type' + threshold: 10 + type: geometry +- descriptions: + - description: Line geometry length should not be less than 1 meters + lang: en + priority: warning + rule_key: line_longer_than + source_filter: and s.some_attr = 'is_short_line_type' + sql: |- + SELECT + s.fid AS source_id, + ST_Force2D(s.geom) AS geom, + NULL AS target_id + FROM + some_schema.line_one s + WHERE + ST_IsValid(s.geom) AND ST_IsSimple(s.geom) + AND ST_Length(s.geom) < 1 + and s.some_attr = 'is_short_line_type' + threshold: 1 + type: geometry diff --git a/examples/rule-config.yml b/examples/rule-config.yml new file mode 100644 index 0000000..8565df2 --- /dev/null +++ b/examples/rule-config.yml @@ -0,0 +1,16 @@ +some_schema.line_one: + is_within: + - targets: + - name: Polygon 1 + table: other_schema.polygon_1 + filter: and t.some_attr is null + - name: Polygon 2 + table: yet_another_schema.polygon_two + - targets: + - name: Some other polygon + table: some_schema.other_polygon + line_longer_than: + - threshold: 10 + source_filter: and s.some_attr = 'is_long_line_type' + - threshold: 1 + source_filter: and s.some_attr = 'is_short_line_type' diff --git a/examples/rule-templates.yml b/examples/rule-templates.yml new file mode 100644 index 0000000..2148292 --- /dev/null +++ b/examples/rule-templates.yml @@ -0,0 +1,46 @@ +line_longer_than: + descriptions: + - lang: en + description: Line geometry length should not be less than {{ threshold }} meters + type: geometry + priority: warning + sql: > + SELECT + s.{{ id_column }} AS source_id, + ST_Force2D(s.{{ geometry_column }}) AS geom, + NULL AS target_id + FROM + {{ source }} s + WHERE + ST_IsValid(s.{{ geometry_column }}) AND ST_IsSimple(s.{{ geometry_column }}) + AND ST_Length(s.{{ geometry_column }}) < {{ threshold }} + {{ source_filter | default('') }} + +is_within: + descriptions: + - lang: en + description: The geometry must be within {{ targets | map(attribute='name') | join(',') }} + type: topology + priority: warning + sql: > + SELECT + s.{{ id_column }} AS source_id, + ST_Force2D(s.{{ geometry_column }}) AS geom, + NULL AS target_id + FROM + {{ source }} s + WHERE + ST_IsValid(s.{{ geometry_column }}) AND ST_IsSimple(s.{{ geometry_column }}) + {% for target in targets -%} + AND NOT EXISTS ( + SELECT + FROM {{ target.table }} t + WHERE + t.{{ id_column }} <> s.{{ id_column }} + AND s.{{ geometry_column }} && t.{{ geometry_column }} + AND ST_IsValid(t.{{ geometry_column }}) AND ST_IsSimple(t.{{ geometry_column }}) + AND ST_Within(s.{{ geometry_column }}, t.{{ geometry_column }}) + {{ target.filter | default('') }} + ) + {% endfor -%} + {{ source_filter | default('') }}