diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 954c938..19ba464 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.1.0 +current_version = 0.2.0 commit = True tag = False parse = (?P\d+)\.(?P\d+)\.(?P\d+)(\-(?P[a-z]+))? diff --git a/CHANGELOG.md b/CHANGELOG.md index f26076d..548c57b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,14 @@ # CHANGELOG +## 0.2.0 + +### Features + +* Add support for matcher generators (syntax is directly bound to lambdas) +* Add support for conditional matcher using lambdas +* Add support for regex matchers +* Add support for range matchers + ## 0.1.0 Initial version of `iguala`: diff --git a/README.md b/README.md index 0eab795..cb68157 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,10 @@ They support: * pattern/matchers compositions, * composite and recursive paths, * yield all combinations found when a pattern matches against an object, -* matching over dictionary the same way as for classes/objects (more or less). +* matching over dictionary the same way as for classes/objects (more or less), +* conditional matchers (you can capture variable and/or the tested object to test a condition), +* matcher generators (you can capture variables and/or the tested object to produce a new matcher), +* regex matchers. More operators/matchers will arrive @@ -95,13 +98,19 @@ Wildcards/variables stores information and checks if the same information appear There is few pattern operators. +* `match(...)` with a type as parameter matches exactly a type, e.g: `match(A) % {}` means, match an instance of `A` (but not subclasses). +* `~` used in front of an object matcher expresses "and all its subclasses", e.g: `~match(object) % {}` means, match an instance of `object` or from its subclasses. * `%` with a dictionnary on its right expresses the properties of an object, e.g: `match(A) % {'name': 'foo'}` means, match an instance of `A` where the `name` equals `foo`. * `@` stores a data into a dedicated name, e.g: `match(A) % {'name': 'foo'} @ 'inst'` means, match an instance of `A` where the `name` equals `foo` and store the instance of `A` in `inst` for further usage. -* `is_not(...)` expresses a negation (needs to be imported `from iguala import is_not`), e.g: `match(A) % {'name': is_not('foo')}`, means match an instance of `A` where the `name` is not equal to `foo`. +* `is_not(...)` expresses a negation (needs to be imported `from iguala import is_not`), e.g: `match(A) % {'name': is_not('foo')}`, means match an instance of `A` where the `name` is not equal to `foo`, +* `regex(...)` expresses a regular expression matcher (needs to be imported `from iguala import regex`). The regex to match needs to be passed as a string, e.g: `match(A) % {'name': regex('[A-Z].*')}`, means match an instance of `A` where the `name` matches the regex `[A-Z].*`. + * This matcher supports an additional operator `>>` that is used to store the matching result for further usage. This mecomes really handy to get matched groups (especially if named match group are used), e.g: `match(A) % {'name': regex('[A-Z].*') >> 'match_result'}` will store the "match" object obtained during the regex matching operation under the label `match_result`. This variable will be accessible as all variables, in the result procuded by `iguala`. + * The same behavior as describe above can be achieved without using the `>>` by passing an extra argument to `regex(...)`, e.g: `match(A) % {'name': regex('[A-Z].*', label='match_result')}`. Using the operator or not is a matter of taste, the effect is exactly the same. +* `range(...)`, if you use the `range(...)` constructor (from builtins), a special "range matcher" is created, e.g: `match(A) % {'x': range(0, 5)}` means, match an instance of `A` where `x` is in the range `[0..4]`. ### Collections patterns -Here is a list of some patterns that can be applied to collections: +Here is a list of some examples of patterns that can be applied to collections: * `[]`, means empty collection * `[3]` means a collection with only one value: `3` @@ -115,10 +124,30 @@ Here is a list of some patterns that can be applied to collections: * `[..., '@x', ..., '@x']` means a collection that have an element that is equal to the last element * `['@x', ..., '@x']` means a collection where the first and the last element are the same * `[..., '@x', '@x', ..]` means a collection that have two times the same element that follow each other -* `[..., '@x', ..., is_not('@x'), ..]` means a collection where two consecutive elements that are not the same (a collection where all elements are different) -* `is_not([..., '@x', ..., is_not('@x'), ...])` means a collection where there is no consecutive elements that are not the same (a collection where all elements are the same) -* ... +* `[..., '@x', ..., is_not('@x'), ..]` means a collection where two elements that are not the same (a collection where all elements are different) +* `is_not([..., '@x', ..., is_not('@x'), ...])` means a collection where there is no elements that are not the same (a collection where all elements are the same) + +### Lambda based matchers + +Lambdas are used to express patterns over captured variables: + +* `lambda VAR1, VAR2, ....: SOMETHINGS WITHS VARS` is a matcher generator. +Matcher generators uses captured variable to generate new matchers that are executed when all necessary variables have been captured, e.g: `match(A): {'x': '@x', 'y': lambda x: x + 1}` means, match an instance of `A` that have an attribute `x` and an attribute `y` that is equals to `x + 1`. +* `cond(lambda ....)` is a condition matcher (needs to be imported `from iguala import cond`). +Condition matchers uses captured variable to execute a function and use the result as matching result. Consequently, the return type of the function must be a boolean, e.g: `match(A): {'x': '@x', 'y': cond(lambda x, __self__: x == __self__ + 1)}` means, match an instance of `A` that have an attribute `x` and an attribute `y` that is equals to `x + 1`. +* `__self__` is a meta-variable that can be passed as arguments of the matcher generator or conditional matcher. This variable resolves to the object currently matched. + +Matcher generators and conditional matchers also works with sequence matchers, negative matchers, range matcher, regex matcher...etc. +Here is some examples: + +* `[..., '@x', lambda x: x + 1, ...]` means a collection where one element is followed by its successor. +* `[..., '@x', is_not(lambda x: x + 1), ...]` means a collection where one element is not followed by its successor. +* `is_not([..., '@x', is_not(lambda x: x + 1), ...])` means a collection where there is no element that is not followed by its successor (a collection that is sorted). +* `match(A) % {'x': '@x', 'y': lambda x: range(0, x + 1)}` means match an instance of `A` which has an `x` value and a `y` value contained in the `[0..x]` interval. + +NOTE: Argument names of the function used for the matcher generator or the conditional matcher have to match the name of variables defined in the pattern. +If other names are used, `iguala` will ignore the matcher, but will generate a warning message stating what are the missing variables and their positions in the pattern. ## Walkthrough - Draw me a pattern on an Object diff --git a/iguala/__init__.py b/iguala/__init__.py index c372dab..5c7d4b3 100644 --- a/iguala/__init__.py +++ b/iguala/__init__.py @@ -1,6 +1,6 @@ from .helpers import is_not, match -from .matchers import as_matcher +from .matchers import as_matcher, cond, regex from .paths import as_path -__ALL__ = ["match", "as_matcher", "as_path", "is_not"] -__version__ = "0.1.0" +__ALL__ = ["match", "as_matcher", "as_path", "is_not", "cond", "regex"] +__version__ = "0.2.0" diff --git a/iguala/matchers.py b/iguala/matchers.py index 088e175..1ecbb9c 100644 --- a/iguala/matchers.py +++ b/iguala/matchers.py @@ -1,3 +1,7 @@ +from collections.abc import MutableMapping +from re import compile +from types import LambdaType + from .helpers import flat from .paths import as_path @@ -13,6 +17,21 @@ def is_match(self): def add_contexts(self, contexts): self.contexts.extend(c for c in contexts if c.is_match) + def analyse_contexts(self): + if any(len(c.delayed_matchers) > 0 for c in self.contexts): + print( + "WARNING: some matcher generators where not executed due to missing variables" + ) + for c in self.contexts: + for bc in c.delayed_matchers: + missing_vars = set(bc.matcher.vars) - set(c.bindings) + code = bc.matcher.fun.__code__ + file = code.co_filename + line = code.co_firstlineno + print( + f' * generator misses variables {missing_vars} to be executed ("{file}", L{line})' + ) + @property def bindings(self): return [c.bindings for c in self.contexts] @@ -21,11 +40,34 @@ def __str__(self): return f"<{self.is_match} - {self.bindings}>" -class Context(object): +class Context(MutableMapping): def __init__(self, truth=True): self.bindings = {} self._is_match = truth self.truth = truth + self.delayed_matchers = [] + + def __getitem__(self, key): + return self.bindings[key] + + def __setitem__(self, key, value): + self.bindings[key] = value + ctx = [] + for gencontext in tuple(self.delayed_matchers): + if gencontext.can_execute(self): + ctx.extend(gencontext.execute(self)) + self.delayed_matchers.remove(gencontext) + if any(not c.is_match for c in ctx): + self.is_match = property(lambda self: False) + + def __delitem__(self, key): + del self.bindings[key] + + def __iter__(self): + return iter(self.bindings) + + def __len__(self): + return len(self.bindings) @property def is_match(self): @@ -38,6 +80,7 @@ def is_match(self, value): def copy(self): instance = self.__class__(self.truth) instance.bindings.update(self.bindings) + instance.delayed_matchers.extend(self.delayed_matchers) return instance @@ -55,7 +98,9 @@ def is_list_wildcard(self): def match(self, obj): result = MatcherResult() - result.add_contexts(self.match_context(obj, Context())) + contexts = self.match_context(obj, Context()) + result.add_contexts(contexts) + result.analyse_contexts() return result def __ror__(self, left): @@ -74,7 +119,7 @@ def __init__(self, alias, matcher): self.matcher = matcher def match_context(self, obj, context): - context.bindings[self.alias] = obj + context[self.alias] = obj return self.matcher.match_context(obj, context) @@ -187,6 +232,79 @@ def __init__(self, d): } +class LambdaBasedMatcher(Matcher): + __self__ = "__self__" + + def __init__(self, fun): + self.fun = fun + self.vars = [*fun.__code__.co_varnames[: fun.__code__.co_argcount]] + if self.__self__ in self.vars: + self.has_self = True + self.vars.remove(self.__self__) + else: + self.has_self = False + + def match_context(self, obj, context): + try: + kwargs = {k: context[k] for k in self.vars} + except KeyError: + context.delayed_matchers.append(BoundMatcherGenerator(self, context, obj)) + return [context] + if self.has_self: + kwargs[self.__self__] = obj + return self.execute(obj, context, kwargs) + + +class MatcherGenerator(LambdaBasedMatcher): + def execute(self, obj, context, kwargs): + return as_matcher(self.fun(**kwargs)).match_context(obj, context) + + +class ConditionalMatcher(LambdaBasedMatcher): + def execute(self, obj, context, kwargs): + context.is_match = self.fun(**kwargs) + return [context] + + +class BoundMatcherGenerator(object): + def __init__(self, matcher, context, self_object): + self.matcher = matcher + self.context = context + self.self_object = self_object + + def can_execute(self, context): + return all(x in context for x in self.matcher.vars) + + def execute(self, context): + return self.matcher.match_context(self.self_object, context.copy()) + + +class RegexMatcher(Matcher): + def __init__(self, regexp, label=None): + self.regexp = compile(regexp) + self.label = label + + def __rshift__(self, label): + self.label = label + return self + + def match_context(self, obj, context): + result = self.regexp.match(obj) + context.is_match = result is not None + if self.label: + context[self.label] = result + return [context] + + +class RangeMatcher(Matcher): + def __init__(self, range): + self.range = range + + def match_context(self, obj, context): + context.is_match = obj in self.range + return [context] + + class WildcardMatcher(Matcher): def __init__(self, alias): self.alias = alias @@ -199,11 +317,11 @@ def match_context(self, obj, context): if self.is_anonymous: context.is_match = True return [context] - if self.alias in context.bindings: - context.is_match = context.bindings[self.alias] == obj + if self.alias in context: + context.is_match = context[self.alias] == obj return [context] context.is_match = True - context.bindings[self.alias] = obj + context[self.alias] = obj return [context] @@ -413,16 +531,24 @@ def as_matcher(obj): return WildcardMatcher(obj[1:]) if obj.startswith("*"): return ListWildcardMatcher(obj[1:]) - if obj is Ellipsis: - return ListWildcardMatcher("") - if isinstance(obj, type): - return ObjectMatcher(obj, {}) if isinstance(obj, bool): return IdentityMatcher(obj) if obj is None or isinstance(obj, (int, float, str)): return LiteralMatcher(obj) + if obj is Ellipsis: + return ListWildcardMatcher("") if isinstance(obj, list): return SequenceMatcher(obj) if isinstance(obj, dict): return DictMatcher(obj) + if isinstance(obj, LambdaType): + return MatcherGenerator(obj) + if isinstance(obj, range): + return RangeMatcher(obj) + if isinstance(obj, type): + return ObjectMatcher(obj, {}) return obj.as_matcher() + + +cond = ConditionalMatcher +regex = RegexMatcher diff --git a/setup.py b/setup.py index 26822b1..0a3d9cf 100644 --- a/setup.py +++ b/setup.py @@ -13,7 +13,7 @@ setup( name='iguala', - version='0.1.0', + version='0.2.0', description=("Non-linear pattern matching for Python's objects, or rexep-like for objects"), long_description=open('README.md').read(), long_description_content_type='text/markdown',