Skip to content

Commit

Permalink
docs: add yaml config examples
Browse files Browse the repository at this point in the history
  • Loading branch information
komima committed Jul 2, 2024
1 parent 63d4ccb commit 3edcbca
Show file tree
Hide file tree
Showing 5 changed files with 214 additions and 0 deletions.
6 changes: 6 additions & 0 deletions examples/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# Examples for rule configuration

- Define the necessary rules as SQL templates (`rule-templates.yml`)
- Configure the rules for all tables (`rule-config.yml`)
- Generate the rules (`parse.py`, needs pyyaml & jinja2)
- Use the result at runtime (`quality_rules.yml`)
40 changes: 40 additions & 0 deletions examples/parse.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import yaml
import pathlib
import jinja2

CONFIG = {
"id_column": "fid",
"geometry_column": "geom",
}


def str_presenter(dumper, data):
if data.count('\n') > 0:
data = "\n".join([line.rstrip() for line in data.splitlines() if line.rstrip() != ''])
return dumper.represent_scalar('tag:yaml.org,2002:str', data, style='|')
return dumper.represent_scalar('tag:yaml.org,2002:str', data)

yaml.add_representer(str, str_presenter)

templates = yaml.load((pathlib.Path(__file__).parent / 'rule-templates.yml').read_text(encoding='utf-8'), Loader=yaml.FullLoader)
config = yaml.load((pathlib.Path(__file__).parent / 'rule-config.yml').read_text(encoding='utf-8'), Loader=yaml.FullLoader)

env = jinja2.Environment(loader=jinja2.BaseLoader())

rules = []

for source_table, rule_configs in config.items():
for rule_key, rule_config_list in rule_configs.items():
rule_template = templates[rule_key]
for rule_config in rule_config_list:
rules.append(
{"rule_key": rule_key} |
rule_template | {
"sql": env.from_string(rule_template["sql"]).render(**(CONFIG | {"source": source_table} | rule_template | rule_config)),
"descriptions": [
(d | {"description": env.from_string(d["description"]).render(**rule_config)}) for d in rule_template["descriptions"]
]
} | rule_config
)

(pathlib.Path(__file__).parent / 'quality_rules.yml').write_text(yaml.dump({'quality_rules': rules}), encoding='utf-8')
106 changes: 106 additions & 0 deletions examples/quality_rules.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
quality_rules:
- descriptions:
- description: The geometry must be within Polygon 1,Polygon 2
lang: en
priority: warning
rule_key: is_within
sql: |-
SELECT
s.fid AS source_id,
ST_Force2D(s.geom) AS geom,
NULL AS target_id
FROM
some_schema.line_one s
WHERE
ST_IsValid(s.geom) AND ST_IsSimple(s.geom)
AND NOT EXISTS (
SELECT
FROM other_schema.polygon_1 t
WHERE
t.fid <> s.fid
AND s.geom && t.geom
AND ST_IsValid(t.geom) AND ST_IsSimple(t.geom)
AND ST_Within(s.geom, t.geom)
and t.some_attr is null
)
AND NOT EXISTS (
SELECT
FROM yet_another_schema.polygon_two t
WHERE
t.fid <> s.fid
AND s.geom && t.geom
AND ST_IsValid(t.geom) AND ST_IsSimple(t.geom)
AND ST_Within(s.geom, t.geom)
)
targets:
- filter: and t.some_attr is null
name: Polygon 1
table: other_schema.polygon_1
- name: Polygon 2
table: yet_another_schema.polygon_two
type: topology
- descriptions:
- description: The geometry must be within Some other polygon
lang: en
priority: warning
rule_key: is_within
sql: |-
SELECT
s.fid AS source_id,
ST_Force2D(s.geom) AS geom,
NULL AS target_id
FROM
some_schema.line_one s
WHERE
ST_IsValid(s.geom) AND ST_IsSimple(s.geom)
AND NOT EXISTS (
SELECT
FROM some_schema.other_polygon t
WHERE
t.fid <> s.fid
AND s.geom && t.geom
AND ST_IsValid(t.geom) AND ST_IsSimple(t.geom)
AND ST_Within(s.geom, t.geom)
)
targets:
- name: Some other polygon
table: some_schema.other_polygon
type: topology
- descriptions:
- description: Line geometry length should not be less than 10 meters
lang: en
priority: warning
rule_key: line_longer_than
source_filter: and s.some_attr = 'is_long_line_type'
sql: |-
SELECT
s.fid AS source_id,
ST_Force2D(s.geom) AS geom,
NULL AS target_id
FROM
some_schema.line_one s
WHERE
ST_IsValid(s.geom) AND ST_IsSimple(s.geom)
AND ST_Length(s.geom) < 10
and s.some_attr = 'is_long_line_type'
threshold: 10
type: geometry
- descriptions:
- description: Line geometry length should not be less than 1 meters
lang: en
priority: warning
rule_key: line_longer_than
source_filter: and s.some_attr = 'is_short_line_type'
sql: |-
SELECT
s.fid AS source_id,
ST_Force2D(s.geom) AS geom,
NULL AS target_id
FROM
some_schema.line_one s
WHERE
ST_IsValid(s.geom) AND ST_IsSimple(s.geom)
AND ST_Length(s.geom) < 1
and s.some_attr = 'is_short_line_type'
threshold: 1
type: geometry
16 changes: 16 additions & 0 deletions examples/rule-config.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
some_schema.line_one:
is_within:
- targets:
- name: Polygon 1
table: other_schema.polygon_1
filter: and t.some_attr is null
- name: Polygon 2
table: yet_another_schema.polygon_two
- targets:
- name: Some other polygon
table: some_schema.other_polygon
line_longer_than:
- threshold: 10
source_filter: and s.some_attr = 'is_long_line_type'
- threshold: 1
source_filter: and s.some_attr = 'is_short_line_type'
46 changes: 46 additions & 0 deletions examples/rule-templates.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
line_longer_than:
descriptions:
- lang: en
description: Line geometry length should not be less than {{ threshold }} meters
type: geometry
priority: warning
sql: >
SELECT
s.{{ id_column }} AS source_id,
ST_Force2D(s.{{ geometry_column }}) AS geom,
NULL AS target_id
FROM
{{ source }} s
WHERE
ST_IsValid(s.{{ geometry_column }}) AND ST_IsSimple(s.{{ geometry_column }})
AND ST_Length(s.{{ geometry_column }}) < {{ threshold }}
{{ source_filter | default('') }}
is_within:
descriptions:
- lang: en
description: The geometry must be within {{ targets | map(attribute='name') | join(',') }}
type: topology
priority: warning
sql: >
SELECT
s.{{ id_column }} AS source_id,
ST_Force2D(s.{{ geometry_column }}) AS geom,
NULL AS target_id
FROM
{{ source }} s
WHERE
ST_IsValid(s.{{ geometry_column }}) AND ST_IsSimple(s.{{ geometry_column }})
{% for target in targets -%}
AND NOT EXISTS (
SELECT
FROM {{ target.table }} t
WHERE
t.{{ id_column }} <> s.{{ id_column }}
AND s.{{ geometry_column }} && t.{{ geometry_column }}
AND ST_IsValid(t.{{ geometry_column }}) AND ST_IsSimple(t.{{ geometry_column }})
AND ST_Within(s.{{ geometry_column }}, t.{{ geometry_column }})
{{ target.filter | default('') }}
)
{% endfor -%}
{{ source_filter | default('') }}

0 comments on commit 3edcbca

Please sign in to comment.