From d0cb18e81ca68bbbec29ba71824b6443a397057c Mon Sep 17 00:00:00 2001 From: Marina Evers <45919828+marinaevers@users.noreply.github.com> Date: Wed, 18 Sep 2024 10:04:00 +0200 Subject: [PATCH] Add documentation of distribtion class (#24) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Add documentation of distribtion class * Update distribution.rst each sentence new line (semantic line breaks) * semantic line breaks * Extend documentation of distribution * Extend documentation for distribution --------- Co-authored-by: David Hägele --- docs/changelog.rst | 2 + docs/conf.py | 3 ++ docs/distribution.rst | 52 ++++++++++++++++++++++++ docs/examples.rst | 4 +- docs/index.rst | 13 +++++- docs/uadapy.data.rst | 10 +++++ docs/uadapy.dr.rst | 2 - docs/uadapy.rst | 28 +------------ uadapy/distribution.py | 92 ++++++++++++++++++++++++++++++++++++++---- 9 files changed, 167 insertions(+), 39 deletions(-) create mode 100644 docs/distribution.rst create mode 100644 docs/uadapy.data.rst diff --git a/docs/changelog.rst b/docs/changelog.rst index 0aae921..b0707df 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -4,7 +4,9 @@ Changelog 0.0.2 (in preparation) --- + * Reorganization of imports +* Documentation of distribution class 0.0.1 --- diff --git a/docs/conf.py b/docs/conf.py index 6b078ba..fedd3b9 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -7,6 +7,9 @@ # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information import sphinx_rtd_theme +import sys +import os +sys.path.insert(0, os.path.abspath('..')) project = 'UADAPy' copyright = '2024, Ruben Bauer, Marina Evers, David Hägele, Patrick Paetzold' diff --git a/docs/distribution.rst b/docs/distribution.rst new file mode 100644 index 0000000..6c0db48 --- /dev/null +++ b/docs/distribution.rst @@ -0,0 +1,52 @@ +============ +Distribution +============ + +The `Distribution` class serves as a core component for handling probability distributions, both parametric and non-parametric. +It allows you to create a distribution from either a statistical model (such as one from `scipy.stats`) or directly from a dataset (samples). +The class also supports handling multivariate distributions and automatically distinguishes between univariate and multivariate cases. +This class abstracts away the complexity of working with different types of distributions while providing a uniform interface for statistical operations. + +Creating a distribution +----------------------- +.. code-block:: python + + def __init__(self, model, name="", n_dims=1) + +:Parameters: + - **model**: A `scipy.stats` distribution object or an array of samples. + - **name** *(str)*: The name of the distribution (optional; default is inferred from the model). + - **n_dims** *(int)*: Dimensionality of the distribution (optional; default is `1`). + +If a set of samples is passed instead of a statistical model, a Kernel Density Estimate (KDE) is used for estimating the probability density function (PDF). +If the distribution is named "Normal", the class assumes the samples are from a normal distribution and fits a multivariate normal model to the data. + +Working with distributions +-------------------------- +**Distribution Properties**: Provides methods for calculating key statistical properties such as: + +- **mean() -> np.ndarray | float**: + + Returns the mean of the distribution. + +- **cov() -> np.ndarray | float**: + + Returns the covariance matrix of the distribution. + +- **skew() -> np.ndarray | float**: + + Returns the skewness of the distribution. + +- **kurt() -> np.ndarray | float**: + + Returns the kurtosis of the distribution. + +**Sampling and PDF Evaluation**: + +- **sample(n: int, random_state: int = None) -> np.ndarray**: + + Generates `n` random samples from the distribution. + +- **pdf(x: np.ndarray | float) -> np.ndarray | float**: + + Evaluates the probability density function (PDF) at the given point `x`. diff --git a/docs/examples.rst b/docs/examples.rst index 486d893..2bb64d9 100644 --- a/docs/examples.rst +++ b/docs/examples.rst @@ -10,11 +10,11 @@ Uncertainty-aware multidimensional scaling `Load data, reduce the dimensionality with UAMDS, visualize the output `_ Uncertainty-aware principal component analysis ------------------------------------------- +---------------------------------------------- `Load data, reduce the dimensionality with UAPCA, visualize the output `_ Working with own data ------------------------------------------- +--------------------- `Load data, create a distribution, visualize it `_ diff --git a/docs/index.rst b/docs/index.rst index 19a764a..3d1eafd 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -1,7 +1,8 @@ ================================================== UADAPy - Uncertainty-aware Data Analysis in Python ================================================== -UADAPy is a Python library to support an easy analysis of uncertain data. Here you find the most important information to get started. +UADAPy is a Python library to support an easy analysis of uncertain data. +Here you find the most important information to get started. .. toctree:: :maxdepth: 1 @@ -10,6 +11,16 @@ UADAPy is a Python library to support an easy analysis of uncertain data. Here y installation.rst examples.rst +Classes +======= +In the following, we describe the most important data structure and provide detailed explanations on some concepts. +This section is currently work in progress and will be extended over time. + +.. toctree:: + :maxdepth: 1 + + distribution.rst + Indices and tables ================== * :ref:`genindex` diff --git a/docs/uadapy.data.rst b/docs/uadapy.data.rst new file mode 100644 index 0000000..3b59405 --- /dev/null +++ b/docs/uadapy.data.rst @@ -0,0 +1,10 @@ +uadapy.data package +======================= + +uadapy.data.data module +----------------------------------------- + +.. automodule:: uadapy.data.data + :members: + :undoc-members: + :show-inheritance: \ No newline at end of file diff --git a/docs/uadapy.dr.rst b/docs/uadapy.dr.rst index 8cd7b4c..4ab3aab 100644 --- a/docs/uadapy.dr.rst +++ b/docs/uadapy.dr.rst @@ -1,8 +1,6 @@ uadapy.dr package ================= -Submodules ----------- uadapy.dr.uamds module ---------------------- diff --git a/docs/uadapy.rst b/docs/uadapy.rst index e9ecf38..817908f 100644 --- a/docs/uadapy.rst +++ b/docs/uadapy.rst @@ -9,33 +9,7 @@ Subpackages uadapy.dr uadapy.plotting - -Submodules ----------- - -uadapy.data module ------------------- - -.. automodule:: uadapy.data - :members: - :undoc-members: - :show-inheritance: - -uadapy.distribution module --------------------------- - -.. automodule:: uadapy.distribution - :members: - :undoc-members: - :show-inheritance: - -uadapy.test\_distrib module ---------------------------- - -.. automodule:: uadapy.test_distrib - :members: - :undoc-members: - :show-inheritance: + uadapy.data Module contents --------------- diff --git a/uadapy/distribution.py b/uadapy/distribution.py index 1491a9d..60425fc 100644 --- a/uadapy/distribution.py +++ b/uadapy/distribution.py @@ -5,6 +5,18 @@ class Distribution: + """ + The Distribution class provides a consistent interface to a variety of distributions. + + Attributes + ---------- + model : str + The underlying concrete distribution model, a `scipy.stats` distribution object or an array of samples + name : str + Name of the distribution type, e.g. 'Normal' + n_dims : int + Dimensionality of the distribution + """ def __init__(self, model, name="", n_dims=1): """ @@ -12,9 +24,15 @@ def __init__(self, model, name="", n_dims=1): no assumptions about the distribution are made. For the pdf and the sampling, a KDE is used. If the name is "Normal", the samples are treated as samples of a normal distribution. - :param model: A scipy.stats distribution or samples - :param name: The name of the distribution - :param n_dims: The dimensionality of the distribution + + Parameters + ---------- + model: + A scipy.stats distribution or samples + name: str, optional + The name of the distribution + n_dims: int, optional + The dimensionality of the distribution (default is 1) """ if name: self.name = name @@ -35,15 +53,43 @@ def __init__(self, model, name="", n_dims=1): if isinstance(self.model, np.ndarray): self.kde = stats.gaussian_kde(self.model.T) - def sample(self, n: int, random_state: int = None) -> np.ndarray: + def sample(self, n: int, seed: int = None) -> np.ndarray: + """ + Creates samples from the distribution. + + Parameters + ---------- + n : int + Number of samples. + seed : int, optional + Seed for the random number generator for reproducibility, default is None. + + Returns + ------- + np.ndarray + Samples of the distribution. + """ if isinstance(self.model, np.ndarray): - return self.kde.resample(n, random_state).T + return self.kde.resample(n, seed).T if hasattr(self.model, 'rvs') and callable(self.model.rvs): - return self.model.rvs(size=n, random_state=random_state) + return self.model.rvs(size=n, random_state=seed) if hasattr(self.model, 'resample') and callable(self.model.resample): - return self.model.resample(size=n, seed=random_state) + return self.model.resample(size=n, seed=seed) def pdf(self, x: np.ndarray | float) -> np.ndarray | float: + """ + Computes the probability density function. + + Parameters + ---------- + x : np.ndarray or float + The position where the pdf should be evaluated. + + Returns + ------- + np.ndarray or float + Samples of the distribution. + """ if isinstance(self.model, np.ndarray): return self.kde.pdf(x.T) if not hasattr(self.model, 'pdf'): @@ -52,6 +98,14 @@ def pdf(self, x: np.ndarray | float) -> np.ndarray | float: return self.model.pdf(x) def mean(self) -> np.ndarray | float: + """ + Expected value of the distribution. + + Returns + ------- + np.ndarray or float + Expected value of the distribution. + """ if isinstance(self.model, np.ndarray): return np.mean(self.model, axis=0) if hasattr(self.model, 'mean'): @@ -66,6 +120,14 @@ def mean(self) -> np.ndarray | float: raise AttributeError(f"Mean not implemented yet! {self.model.__class__.__name__}") def cov(self) -> np.ndarray | float: + """ + Covariance of the distribution. + + Returns + ------- + np.ndarray or float + Covariance of the distribution. + """ if isinstance(self.model, np.ndarray): return np.cov(self.model.T) if hasattr(self.model, 'cov'): @@ -86,6 +148,14 @@ def cov(self) -> np.ndarray | float: def skew(self) -> np.ndarray | float: + """ + Skewness of the distribution. + + Returns + ------- + np.ndarray or float + Skewness of the distribution. + """ if isinstance(self.model, np.ndarray): return stats.skew(self.model) if hasattr(self.model, 'stats') and callable(self.model.stats): @@ -94,6 +164,14 @@ def skew(self) -> np.ndarray | float: return 0 def kurt(self) -> np.ndarray | float: + """ + Kurtosis of the distribution. + + Returns + ------- + np.ndarray or float + Kurtosis of the distribution. + """ if isinstance(self.model, np.ndarray): return stats.kurtosis(self.model) if hasattr(self.model, 'stats') and callable(self.model.stats):