diff --git a/.github/workflows/qc-sec.yml b/.github/workflows/qc-sec.yml new file mode 100644 index 0000000..4200c84 --- /dev/null +++ b/.github/workflows/qc-sec.yml @@ -0,0 +1,30 @@ +# This workflow will install Python dependencies, run tests and lint with a variety of Python versions +# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions + +name: qc.sec + +on: + push: + branches: [ "main" ] + pull_request: + branches: [ "main" ] + +jobs: + qc-sec: + + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + python-version: ["3.10"] + + steps: + - uses: actions/checkout@v3 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v3 + with: + python-version: ${{ matrix.python-version }} + - name: Install tox + run: python -m pip install --upgrade tox + - name: Run Quality Check for Security + run: tox -e qc.sec diff --git a/.github/workflows/qc-sty.yml b/.github/workflows/qc-sty.yml new file mode 100644 index 0000000..ec41510 --- /dev/null +++ b/.github/workflows/qc-sty.yml @@ -0,0 +1,30 @@ +# This workflow will install Python dependencies, run tests and lint with a variety of Python versions +# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions + +name: qc.sty + +on: + push: + branches: [ "main" ] + pull_request: + branches: [ "main" ] + +jobs: + qc-sty: + + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + python-version: ["3.10"] + + steps: + - uses: actions/checkout@v3 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v3 + with: + python-version: ${{ matrix.python-version }} + - name: Install tox + run: python -m pip install --upgrade tox + - name: Run Quality Check for Style + run: tox -e qc.sty diff --git a/.github/workflows/qc-uni.yml b/.github/workflows/qc-uni.yml new file mode 100644 index 0000000..fbed5d0 --- /dev/null +++ b/.github/workflows/qc-uni.yml @@ -0,0 +1,30 @@ +# This workflow will install Python dependencies, run tests and lint with a variety of Python versions +# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions + +name: qc.uni + +on: + push: + branches: [ "main" ] + pull_request: + branches: [ "main" ] + +jobs: + qc-uni: + + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + python-version: ["3.10"] + + steps: + - uses: actions/checkout@v3 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v3 + with: + python-version: ${{ matrix.python-version }} + - name: Install tox + run: python -m pip install --upgrade tox + - name: Run Quality Check for Unittest + run: tox -e qc.uni diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 0000000..cfb2b6b --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,16 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "name": "Python: Current File", + "type": "python", + "request": "launch", + "program": "${file}", + "console": "integratedTerminal", + "justMyCode": true + }, + ] +} \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..e685004 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,8 @@ +{ + "python.testing.pytestArgs": [ + "tests" + ], + "python.testing.unittestEnabled": false, + "python.testing.pytestEnabled": true, + "python.envFile": "${workspaceFolder}/.env" +} \ No newline at end of file diff --git a/README.md b/README.md old mode 100644 new mode 100755 index bdc3a0a..b5b64b4 --- a/README.md +++ b/README.md @@ -1,2 +1,134 @@ -# gevopy -Genetics for Evolutionary Algorithms in Python +# gevopy +![qc.sec](https://github.com/BorjaEst/gevopy/actions/workflows/qc-sec.yml/badge.svg) +![qc.sty](https://github.com/BorjaEst/gevopy/actions/workflows/qc-sty.yml/badge.svg) +![qc.uni](https://github.com/BorjaEst/gevopy/actions/workflows/qc-uni.yml/badge.svg) + +Awesome Genetics for Evolutionary Algorithms library created by Borja Esteban. + +## Install it from PyPI +```bash +pip install gevopy +``` + +## Usage +This package is designed in order to create your own evolution scripts based on the following concepts: + - **Chromosomes**: Genetic instructions for phenotypes. + - **Genotype**: Genetic design to instantiate phenotypes. + - **Phenotypes**: Genotype instances which perform a task. + - **Fitness**: Provide the methods to evaluate phenotypes. + - **Algorithm**: Evolution procedure for phenotypes. + - **Experiment**: Evolution session with phenotypes. + +Now the following sections will introduce a fast initialization to the package. +Do not hesitate to extend your knowledge by using all the additional provided +examples at the folder [examples](./examples). + +### Genotypes +Define your Genotypes following the `dataclass` principles from `pydantic` by +using the base model `GenotypeModel`. All dataclass attributes are accepted in +addition to an special type `Chromosome` provided in the module `genetics`. +To start use the already defined chromosome subclasses such `Haploid` and +`Diploid` depending on the complexity of your genetic model. +```py +from gevopy import genetics, random + +class Genotype(genetics.GenotypeModel): + chromosome_1: genetics.Haploid = Field(default_factory=lambda: random.haploid(12)) + chromosome_2: genetics.Haploid = Field(default_factory=lambda: random.haploid(10)) + simple_attribute: float = 1.0 + +phenotypes = [Genotype() for _ in range(20)] +``` +> Note Genotype attrubutes *id*, *experiment*, *created*, *parents*, +*generation*, *score* and *clone* are attributes used by the library. +Overwriting of this attributes might lead to unexpected behaviors. + +### Fitness +Create your fitness using the parent class `fitness.FitnessModel` and defining +the class method `score`. The fitness to use on the experiment will be an +instance of the defined class. You can use the init arguments `cache` and +`parallel` to optimize how the evaluation flow is executed. + +```py +from genopy import fitness + +class MyFitness1(fitness.FitnessModel): + def score(self, phenotype): + return phenotype.chromosome.count(1) + +fx = MyFitness1(cache=True, parallel=True) +``` +> You can additionally define `setUp` as method to execute once at the begining +of each generation before phenotypes are evaluated. + +### Algorithm +The algorithm is the core of your experiment. It defines the rules of the +evolution process. You can create your own algorithm or use the already +existing templates. Algorithms are generally composed by 4 components: + - **Selection**: Callable which provides the first list of candidates. + - **Mating**: Callable which provides the second list of candidates. + - **Crossover**: Callable to generate offspring from candidates. + - **Mutation**: Callable to mutate phenotype's chromosomes. + +Additionally, each algorithm template might contain additional arguments such a +`survival_rate` or `similarity`. Make sure you read and understand each of the +arguments and steps. + +```py +from gevopy.tools import crossover, mutation, selection +from gevopy import algorithms + +my_algorithm=algorithms.Survival( + selection=selection.Tournaments(tournsize=3), + mating=selection.Best(), + crossover=crossover.TwoPoint(indpb=0.8), + mutation=mutation.SinglePoint(indpb=0.5, mutpb=0.2), + survival_rate=0.40, +) +``` +> The modules `tools.crossover`, `tools.mutation` and `tools.selection` contain +templates and utilities to simplify your algorithm definition. + +### Experiment +The experiment is the final expression of your evolutionary algorithm. +it provides the methods to evolve and store phenotypes. Once an experiment +is instantiated, use the method `run` to force the evolution of the population +until a desired state. + +The results of the experiment can be collected from the method output, calling +`best` method or adding a [Neo4j]() connection as `database` input when +instantiating the experiment to store all phenotypes during the execution. + +```py +import gevopy + +experiment = gevopy.SimpleEvolution( + population=[Genotype() for _ in range(20)], + fitness=MyFitness1(cache=True, parallel=True), +) + +experiment.run( + algorithm=my_algorithm, + max_generations=20, + max_score=12.0, +) + +best_phenotype = experiment.best() +``` +>The method `run` forces the evolution of the experiment which is updated on +each cycle. After the method is completed, you can force again te evolution +process using higher inputs for `max_generations` or `max_score`. + + +## Development +Fork the repository, pick one of the issues at the [issues](https://github.com/BorjaEst/gevopy/issues) +and create a [Pull request](https://github.com/BorjaEst/gevopy/pulls). + + +## FAQ and Notes + +### Why Graph Database? +Storing relationships at the record level makes sense in genotype +relationships as it provides index-free adjacency. +Graph traversal operations such 'genealogy tree' or certain matches can +be performed with no index lookups leading to much better performance. diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..8f45a62 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,30 @@ +[build-system] +requires = ["setuptools", "setuptools-scm"] +build-backend = "setuptools.build_meta" + +[project] +name = "gevopy" +description = "Genetics for Evolutionary Algorithms in Python." +readme = "README.md" +requires-python = ">=3.10" +license = {text = "GNU General Public License v3 (GPLv3)"} +classifiers = [ + "Programming Language :: Python :: 3", + "License :: OSI Approved :: GNU General Public License v3 (GPLv3)", + "Operating System :: OS Independent", +] +dynamic = ["version", "dependencies"] + +[project.urls] +"Homepage" = "https://github.com/BorjaEst/gevopy/" +"Bug Tracker" = "https://github.com/BorjaEst/gevopy/issues" + +[tool.setuptools.dynamic] +version = {file = "src/gevopy/VERSION"} +dependencies = {file = ["requirements.txt"]} + +[tool.pytest.ini_options] +addopts = ["--import-mode=importlib"] + +[tool.pylint.messages_control] +extension-pkg-whitelist = "pydantic" diff --git a/requirements-test.txt b/requirements-test.txt new file mode 100755 index 0000000..10cec12 --- /dev/null +++ b/requirements-test.txt @@ -0,0 +1,8 @@ +# This requirements are for development and testing only, not for production. +-r requirements.txt +pytest +pytest-xdist +pytest-cov +flake8 +black +bandit \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100755 index 0000000..eee8de2 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +numpy~=1.23.4 +pydantic~=1.10.2 \ No newline at end of file diff --git a/src/gevopy/VERSION b/src/gevopy/VERSION new file mode 100755 index 0000000..6c6aa7c --- /dev/null +++ b/src/gevopy/VERSION @@ -0,0 +1 @@ +0.1.0 \ No newline at end of file diff --git a/src/gevopy/__init__.py b/src/gevopy/__init__.py new file mode 100644 index 0000000..70b4217 --- /dev/null +++ b/src/gevopy/__init__.py @@ -0,0 +1,6 @@ +"""Evolution algorithms with python.""" + +import logging + +# Application logger +module_logger = logging.getLogger(__name__) diff --git a/src/gevopy/genetics.py b/src/gevopy/genetics.py new file mode 100755 index 0000000..4d62e86 --- /dev/null +++ b/src/gevopy/genetics.py @@ -0,0 +1,224 @@ +"""The :mod:`genetics` module is intended to contain generic and specific +classes to design your Genotype models. + +Define your Genotypes following the `dataclass` principles from `pydantic` by +using the base model `GenotypeModel`. All dataclass attributes are accepted in +addition to an special type `Chromosome` provided in the module. To start +use the already defined chromosome subclasses such `Haploid` and `Diploid` +depending on the complexity of your genetic model. +""" + +import copy +import uuid +from datetime import datetime +from typing import List, MutableSequence + +import numpy as np +from pydantic import BaseModel, Field, PositiveInt + +from gevopy import random as ea_random + + +class Chromosome(np.ndarray, MutableSequence): + """A chromosome is a long DNA molecule with part or all of the genetic + material of an organism. In the case of Evolutionary Algorithms it + contains the information required to evaluate a phenotype. + + This library bases chromosomes on numpy.ndarray to and therefore when + creating one, the steps defined at `subclassing ndarray` must be followed. + Additionally it subclasses from `collections.abc.MutableSequence` to + provide additional and standard python methods such as `count`. + + In order to suport serialization/deserialization the new method must + accept only the first input parameter for the function np.array. + """ + + def __new__(cls, data): + """Standard python method required to subclass np.ndarray. + :param cls: Chromosome ndarray subclass type + :param data: Any object exposing the array interface or sequence + :param pairs: Number of pairs the chromosome represents + :return: A new generated Chromosome instance + """ + return np.array(data, dtype="uint8").view(cls) + + def __array_finalize__(self, obj): + """Mechanism that numpy provides to allow subclasses to handle + the various ways that new instances get created. + :param obj: New instance provided for slicing and `view` + """ + if obj is None: + return + + def __mutate__(self): + """Performs the chromosome mutation operation. + :return: Chromosome with mutated values + """ + raise NotImplementedError + + def __eq__(self, other): + """Standard python method to compares 2 chromosomes. + :param other: Chromosome to compare with + :return: Boolean, True if are equal, otherwise False + """ + return np.array_equal(self, other) + + def __cross__(self, other): + """Magic method to compare a chromosome bitwise. + :param other: Chromosome to compare with + :return: List of chromosome bits where equal + """ + return super().__eq__(other) + + @classmethod + def __get_validators__(cls): + """Pydantic magic method for custom validation and deserialization. + :yield: Validation method for construction + """ + yield cls.validate_type + + @classmethod + def validate_type(cls, val): + """Validation method for construction and deserialization. + :param val: Value to pass to __new__ data + :return: Deserialized chromosome + """ + return cls(data=val) + + +class Haploid(Chromosome): + """The word haploid describes a condition, a cell, or an organism that + contains half of the set of homologous chromosomes present in the somatic + cell. Homologous chromosomes are two chromosomes that pair up by having + the same gene sequence, loci, chromosomal length, and centromere location. + + Half of the homologous pairs are maternal (coming from the mother) whereas + the other half, paternal (coming from the father). Thus, in other words, + a haploid is when a cell, for instance, contains half of the total + homologous chromosomes, i.e. a single set of chromosomes that are unpaired. + """ + + @classmethod + @property + def states(cls): + """Returns the number of possible chromosome states. + :return: Chromosome with inverted values + """ + return 2 + + def __invert__(self): + """Computes and returns the bit-wise inversion of the chromosome. + :return: Chromosome with inverted values + """ + return np.logical_not(self).view("uint8") + + def __mutate__(self): + """Computes and returns the bit-wise mutation of the chromosome. + :return: Chromosome with mutated values + """ + return ea_random.haploid(self.size) + + +class Diploid(Chromosome): + """In genetics and biology, the term diploid refers to the cell containing + two sets of homologous chromosomes wherein each chromosome in a set is + obtained from each of the two-parent cells. + + As example, the fusion of two haploid sex cells results in the formation + of a diploid cell called a zygote. + """ + + @classmethod + @property + def states(cls): + """Returns the number of possible chromosome states. + :return: Chromosome with inverted values + """ + return 4 + + def __invert__(self): + """Computes and returns the bit-wise inversion of the chromosome. + :return: Chromosome with inverted values + """ + return np.bitwise_and(super().__invert__(), 3) + + def __mutate__(self): + """Computes and returns the bit-wise mutation of the chromosome. + :return: Chromosome with mutated values + """ + return ea_random.diploid(self.size) + + +class Triploid(Chromosome): + """Similar to haploid and diploid, cells can contain three sets of + homologous chromosomes, increasing the amount of combinations for each + bit (trinary) position. + + In biology triploidy is a rare chromosomal abnormality. + """ + + @classmethod + @property + def states(cls): + """Returns the number of possible chromosome states. + :return: Chromosome with inverted values + """ + return 8 + + def __invert__(self): + """Computes and returns the bit-wise inversion of the chromosome. + :return: Chromosome with inverted values + """ + return np.bitwise_and(super().__invert__(), 7) + + def __mutate__(self): + """Computes and returns the bit-wise mutation of the chromosome. + :return: Chromosome with mutated values + """ + return ea_random.triploid(self.size) + + +class GenotypeModel(BaseModel): + """Evolution Genotype is the most basic but flexible form of genetics. + It is a chromosomes container with an unique identifier. Different + organisms might have different numbers of chromosomes. + + When subclassing a genotype note the following attributes are reserved: + - id: A unique identifier for the phenotype + - experiment: Name of the experiment the phenotype belongs, can be None + - created: Datetime when the phenotype was instantiated + - parents: List of phenotype ids used to generate the phenotype + - generation: Positive integer indicating the evolution generations + - score: Float indicating the phenotype score (None, when not evaluated) + - clone: Method to produce a genotype deep copy with different id + """ + + id: uuid.UUID = Field(default_factory=uuid.uuid4) + experiment: str = None + created: datetime = Field(default_factory=datetime.utcnow) + parents: List[uuid.UUID] = [] + generation: PositiveInt = Field(default=1) + score: float = None + + class Config: + json_encoders = {Chromosome: lambda x: x.astype("uint8").tolist()} + + def clone(self): + """Clones the phenotype producing a copy with different id and + an empty score. + :return: Phenotype copy + """ + clone = copy.deepcopy(self) + clone.id = uuid.uuid4() # Generate new id + clone.score = None # Reset the clone score + return clone + + def __repr__(self): + """Representation method for phenotype. It displays the class name + together with the phenotype id. + :return: String representing the genotype instance (phenotype) + """ + return "{name} {id}".format( + name=self.__class__.__name__, + id=self.id, + ) diff --git a/src/gevopy/random.py b/src/gevopy/random.py new file mode 100755 index 0000000..21d6322 --- /dev/null +++ b/src/gevopy/random.py @@ -0,0 +1,31 @@ +"""Evolution algorithm module to generate random units, for example random +chromosomes. +""" + +import numpy as np + +from gevopy import genetics + + +def haploid(size): + """Returns a random standard Haploid chromosome. + :param size: Integer with chromosome size + """ + data = np.random.randint(2 ** 1, size=size, dtype="uint8") + return genetics.Haploid(data) + + +def diploid(size): + """Returns a random standard Diploid chromosome. + :param size: Integer with chromosome size + """ + data = np.random.randint(2 ** 2, size=size, dtype="uint8") + return genetics.Diploid(data) + + +def triploid(size): + """Returns a random standard Triploid chromosome. + :param size: Integer with chromosome size + """ + data = np.random.randint(2 ** 3, size=size, dtype="uint8") + return genetics.Triploid(data) diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100755 index 0000000..05b2580 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,26 @@ +"""Module for evolution requirements fixtures""" +# pylint: disable=redefined-outer-name + +import random + +from pytest import fixture + +from tests import genotypes + + +@fixture(scope="function", autouse=True) +def set_random_seed(): + """Fix the random seed for repeatable testing""" + random.seed(1) + + +@fixture(scope="package", params=["OneHaploid", "OneDiploid"]) +def genotype(request): + """Fixture to return the phenotype generator""" + return genotypes.__dict__[request.param] + + +@fixture(scope="package") +def phenotype(phenotype_gen): + """Fixture to return a phenotype instance""" + return phenotype_gen() diff --git a/tests/genotypes.py b/tests/genotypes.py new file mode 100644 index 0000000..f9a48a0 --- /dev/null +++ b/tests/genotypes.py @@ -0,0 +1,16 @@ +"""Module for test genotypes""" +from gevopy import genetics, random + + +class OneHaploid(genetics.GenotypeModel): + """Simple and most basic haploid genotype""" + chromosome: genetics.Haploid = genetics.Field( + default_factory=lambda: random.haploid(size=12) + ) + + +class OneDiploid(genetics.GenotypeModel): + """Simple and most basic diploid genotype""" + chromosome: genetics.Diploid = genetics.Field( + default_factory=lambda: random.diploid(size=12) + ) diff --git a/tests/test_requirements/test_genotypes.py b/tests/test_requirements/test_genotypes.py new file mode 100755 index 0000000..6a4843c --- /dev/null +++ b/tests/test_requirements/test_genotypes.py @@ -0,0 +1,87 @@ +"""Module to test genotype requirements""" +# pylint: disable=redefined-outer-name + +import uuid +from collections.abc import MutableSequence +from datetime import datetime + +from pytest import fixture + +from gevopy import genetics + + +# Module fixtures --------------------------------------------------- +@fixture(scope="class") +def clone(phenotype): + """Returns a phenotype clone""" + return phenotype.clone() + + +# Requirements ------------------------------------------------------ +class AttrRequirements: + """Tests group for Genotype instances attributes""" + + def test_attr_chromosome(self, phenotype): + """Test phenotype has a correct 'chromosome' attribute""" + assert hasattr(phenotype, "chromosome") + assert isinstance(phenotype.chromosome, MutableSequence) + + def test_attr_id(self, phenotype): + """Test phenotype has a correct 'id' attribute""" + assert hasattr(phenotype, "id") + assert isinstance(phenotype.id, uuid.UUID) + + def test_attr_created(self, phenotype): + """Test phenotype has a correct 'created' attribute""" + assert hasattr(phenotype, "created") + assert isinstance(phenotype.created, datetime) + + def test_attr_parents(self, phenotype): + """Test phenotype has a correct 'parents' attribute""" + assert hasattr(phenotype, "parents") + assert isinstance(phenotype.parents, list) + + def test_attr_generation(self, phenotype): + """Test phenotype has a correct 'generation' attribute""" + assert hasattr(phenotype, "generation") + assert isinstance(phenotype.generation, int) + assert phenotype.generation > 0 + + def test_attr_score(self, phenotype): + """Test phenotype has a correct 'score' attribute""" + assert hasattr(phenotype, "score") + assert isinstance(phenotype.score, type(None)) + + def test_is_instance_genotype(self, phenotype): + """Test phenotype is instance of GenotypeModel""" + assert isinstance(phenotype, genetics.GenotypeModel) + + +class CloneRequirements: + """Tests group for phenotype clone function""" + + def test_clone_id(self, phenotype, clone): + """Test clone 'id' is different from phenotype""" + assert clone.id != phenotype.id + + def test_clone_chromosome(self, phenotype, clone): + """Test clone 'chromosome' is equal to phenotype""" + assert clone.chromosome == phenotype.chromosome + + def test_clone_parents(self, phenotype, clone): + """Test clone 'parents' are equal to phenotype""" + assert clone.parents == phenotype.parents + + def test_clone_generation(self, phenotype, clone): + """Test clone 'generation' is equal to phenotype""" + assert clone.generation == phenotype.generation + + +# Parametrization --------------------------------------------------- +class TestGenotype(AttrRequirements, CloneRequirements): + """Parametrization for testing Genotypes""" + + @fixture(scope="class") + def phenotype(self, genotype): + """Fixture to return a genotype instance""" + return genotype() diff --git a/tox.ini b/tox.ini new file mode 100644 index 0000000..af8601e --- /dev/null +++ b/tox.ini @@ -0,0 +1,21 @@ +[tox] +minversion = 3.25.0 +isolated_build = True +envlist = qc.sty, qc.uni, qc.sec + +[testenv] +deps = -r{toxinidir}/requirements-test.txt +install_command = pip install -U {opts} {packages} +commands = python -m pytest --numprocesses='auto' {posargs} + +[testenv:qc.sty] +deps = -r{toxinidir}/requirements-test.txt +commands = python -m flake8 --format=pylint --output-file=flake8.log + +[testenv:qc.uni] +deps = -r{toxinidir}/requirements-test.txt +commands = python -m pytest --cov=gevopy --cov-report=html -n=auto tests + +[testenv:qc.sec] +deps = -r{toxinidir}/requirements-test.txt +commands = python -m bandit -r gevopy -x tests