update installation files

fcomitani · Aug 21, 2022 · 0f61782 · 0f61782
1 parent 3da29c6
commit 0f61782
Show file tree

Hide file tree

Showing 25 changed files with 64 additions and 39 deletions.
diff --git a/.readthedocs.yml b/.readthedocs.yml
@@ -0,0 +1,14 @@
+version: 2
+
+conda:
+   environment: environment.yml
+
+# Build documentation in the docs/ directory with Sphinx
+sphinx:
+   configuration: docs/conf.py
+
+# Optionally declare the Python requirements required to build your docs
+python:
+   version: "3.7"
+   install:
+   - requirements: docs/requirements.txt
diff --git a/LICENSE b/LICENSE
@@ -1,6 +1,6 @@
 MIT License
 
-Copyright (c) 2021 fcomitani, bhadbhubbie
+Copyright (c) 2021-2022 fcomitani, bhadbhubbie
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal

diff --git a/README.md b/README.md
@@ -11,7 +11,7 @@
 [![Build Status](https://img.shields.io/travis/com/fcomitani/tapir/main?style=flat-square)](https://travis-ci.com/fcomitani/tapir)
 -->
 
-tapir (`tapyra` in Tupi) is a python 3 package for the analysis of gene expression data.
+`tapir` is a python 3 package for the analysis of gene expression data.
 It includes a number of functions for statistical analysis, differential expression
 and gene sets enrichment analysis.
 
@@ -49,7 +49,7 @@ Besides basic scientific and plotting libraries, the current version requires
 ### Installation
 
 <!--- tapir releases can be easily installed through the python standard package manager  
-`pip install tapyra`.
+`pip install tapir`.
 --->
 
 To install the latest (unreleased) version you can download it from this repository by running 
@@ -64,7 +64,7 @@ Given an `input` dataset in pandas-like format (samples X genes), the `build_dge
 the samples as TMM and fit a glmQL model for differential expression
 significance.
 
-    from tapyra.edger import build_dgelist, diff_exp
+    from tapir.edger import build_dgelist, diff_exp
 
     dgelist, tmmlog = build_dgelist(input_table)
     de              = diff_exp(dgelist, groups, filter=True)

diff --git a/docs/conf.py b/docs/conf.py
@@ -12,12 +12,12 @@
 #
 import os
 import sys
-sys.path.insert(0, os.path.abspath('../tapyra'))
+sys.path.insert(0, os.path.abspath('../tapir'))
 
 # -- Project information -----------------------------------------------------
 
 project = 'tapir'
-copyright = '2021, Federico Comitani, Josh Oren Nash'
+copyright = '2021-2022, Federico Comitani, Josh Oren Nash'
 author = 'Federico Comitani, Josh Oren Nash'
 
 # The full version, including alpha/beta/rc tags

diff --git a/docs/de.rst b/docs/de.rst
@@ -18,7 +18,7 @@ equivalent to the flags in :code:`EdgeR.filterByExpr`.
 
 .. code-block:: python
 
-  from tapyra.edger import build_dgelist, diff_exp
+  from tapir.edger import build_dgelist, diff_exp
 
   dgelist, tmmlog = build_dgelist(input_table)
   de              = diff_exp(dgelist, groups, filter=True)

diff --git a/docs/gsets.rst b/docs/gsets.rst
@@ -11,7 +11,7 @@ the :code:`type`.
 
 .. code-block:: python
 
-  from tapyra.gsets import run_gsea
+  from tapir.gsets import run_gsea
 
   gsmat = run_gsea(data, subsel=None, type='ssgsea', tmp_path=r'./tmp_gsea')
   
@@ -39,7 +39,7 @@ a certain percentage relative to the highest connection value observed.
 
 .. code-block:: python
 
-  from tapyra.plotting import plot_genes_network
+  from tapir.plotting import plot_genes_network
 
   plot_genes_network(gset, subsel, exp=None, cutoff=.1, save_file='./net.png')
 

diff --git a/docs/immune.rst b/docs/immune.rst
@@ -8,15 +8,15 @@ an expression matrix :code:`tmm` with samples as rows and genes as columns.
 
 .. code-block:: python
 
-  from tapyra.immune import mcpc_estimate
+  from tapir.immune import mcpc_estimate
 
   estimates = mcpc_estimate(tmm)
 
 
 For details on the deconvolution process, please see the MCPcounter
-publication [Becth2016]_.
+publication [Becht2016]_.
 
 References
 ----------
 
-.. [Becht2016] Becht E., Giraldo N. A., Lacroix L., Buttard B., Elarouci N., Petitprez F., Selves J., Laurent-Puig P., Sautès-Fridman C., Fridman W. H. and de Reyniès A., (2016). “Estimating the population abundance of tissue-infiltrating immune and stromal cell populations using gene expression”, Genome Biol. 20; 17(1) 218.
+.. [Becht2016] Becht E., Giraldo N. A., Lacroix L., Buttard B., Elarouci N., Petitprez F., Selves J., Laurent-Puig P., Sautès-Fridman C., Fridman W. H. and de Reyniès A., (2016). “Estimating the population abundance of tissue-infiltrating immune and stromal cell populations using gene expression”, Genome Biol. 20; 17(1) 218.
diff --git a/docs/index.rst b/docs/index.rst
@@ -5,7 +5,7 @@
 Welcome to tapir's documentation!
 ===================================
 
-Transcriptome Analysis in Python Imported from R (tapir) is a python 3 package for the statistical analysis of expression data.
+Transcriptome Analysis in Python Imported from R (`tapir`) is a python 3 package for the statistical analysis of expression data.
 It includes a number of analysis and plotting utilities, which include differential expression functions imported from EdgeR and
 gene sets enrichment analysis.
 

diff --git a/docs/stats.rst b/docs/stats.rst
@@ -19,7 +19,7 @@ for the multiple testing correction.
 
 .. code-block:: python
 
-  from tapyra.stats import multicompare
+  from tapir.stats import multicompare
 
   stats, dunn = multicompare(groups, membership, data, 
         cutoff=1, multi_method='fdr_tsbh', multi_alpha=0.05)
@@ -37,7 +37,7 @@ Contingency tables can be built and related tests can be run with TAPIR.
 
 .. code-block:: python
 
-  form tapyra.stats import get_contingency, test_contingency
+  form tapir.stats import get_contingency, test_contingency
 
   contab = get_contingency(series, groups, membership)
   stats  = test_contingency(contab, method='auto')
@@ -69,8 +69,8 @@ a one-hot-encoded matrix with groups :code:`membership`.
 
 .. code-block:: python
 
-  from tapyra.stats import st_curves
-  from tapyra.plotting import plot_survival
+  from tapir.stats import st_curves
+  from tapir.plotting import plot_survival
 
   stats, curves = st_curves(st_stats, groups, membership)
   plot_survival(curves, xlab='Years', ylab='OST', save_file='./plot.png')
@@ -100,8 +100,8 @@ by providing the appropriate UMAP object keywords.
 
 .. code-block:: python
 
-  from tapyra.embedding import get_umap
-  from tapyra.plotting import plot_clusters
+  from tapir.embedding import get_umap
+  from tapir.plotting import plot_clusters
 
   proj, mappa = get_umap(data, collinear_thresh=None, var_drop_thresh=.99)
   proj.index  = data.index
@@ -128,7 +128,7 @@ This function allows to plot on one (:code:`genes_up`) or two levels
 
 .. code-block:: python
 
-  from tapyra.plotting import plot_distribution
+  from tapir.plotting import plot_distribution
 
   plot_distribution(data, groups, membership, 
     genes_up, genes_dw,
@@ -142,7 +142,7 @@ with :code:`plot_heatmap`
 
 .. code-block:: python
 
-  from tapyra.plotting import plot_heatmap
+  from tapir.plotting import plot_heatmap
 
   plot_heatmap(data, groups, membership, genes, 
     clab='log$_2$(TPM+1)', 

diff --git a/environment.yml b/environment.yml
@@ -0,0 +1,13 @@
+channels:
+  - conda-forge
+dependencies:
+  - python=3.7.6
+  - numpy>=1.20.2
+  - pandas>=1.1.3
+  - gseapy>=0.9.5
+  - lifelines>=0.21.0
+  - rpy2>=3.4.5
+  - seaborn>=0.11.1
+  - scikit-learn>=0.4.6
+  - statsmodels>=0.11.1
+  - umap-learn>=0.3.9
diff --git a/requirements.txt b/requirements.txt
@@ -1,15 +1,12 @@
-#general
 numpy>=1.20.2
 pandas>=1.1.3
-scikit-learn>=0.22.2.post1
-scikit-network>=0.20.0
-umap-learn>=0.4.5
-psutil>=5.7.3
-anytree>=2.8.0
-
-#plotting
-matplotlib>=3.3.3
-seaborn>=0.11.0
+gseapy>=0.9.5
+lifelines >= 0.21.0
+rpy2 >= 3.4.5
+seaborn >= 0.11.1
+scikit-learn >= 0.4.6
+statsmodels >= 0.11.1
+umap-learn >= 0.3.9
 
 #documentation
 sphinx==3.5.4

diff --git a/setup.py b/setup.py
@@ -3,15 +3,15 @@
 from os import path
 
 here    = path.abspath(path.dirname(__file__))
-version = open("tapyra/_version.py").readlines()[-1].split()[-1].strip("\"'")
+version = open("tapir/_version.py").readlines()[-1].split()[-1].strip("\"'")
 
 with open(path.join(here, 'README.md'), encoding='utf-8') as f:
     long_description = f.read()
 
 
 setup(
 
-    name='tapyra',
+    name='tapir',
 
     version=version,
 
@@ -32,6 +32,7 @@
         'License :: OSI Approved :: MIT License',
         'Programming Language :: Python'
         'Programming Language :: Python :: 3.7'
+        'Programming Language :: Python :: 3.8'
 		],
 
     keywords='clustering recursion dimension-reduction k-NN hiearchical-clustering optimal-clusters differential-evolution',

diff --git a/tapyra/.DS_Store → tapir/.DS_Store b/tapyra/.DS_Store → tapir/.DS_Store
diff --git a/tapyra/__init__.py → tapir/__init__.py b/tapyra/__init__.py → tapir/__init__.py
@@ -4,7 +4,7 @@
 J. O. Nash      @2021
 """
 
-from tapyra._version import __version__
+from tapir._version import __version__
 
 if __name__ == "__main__":
 

diff --git a/tapyra/_version.py → tapir/_version.py b/tapyra/_version.py → tapir/_version.py
diff --git a/tapyra/auxiliary.py → tapir/auxiliary.py b/tapyra/auxiliary.py → tapir/auxiliary.py
diff --git a/tapyra/edger.py → tapir/edger.py b/tapyra/edger.py → tapir/edger.py
@@ -24,7 +24,7 @@ def build_dgelist(tab):
 	""" Builds a DGE list and normalizes counts as TMM. 
 
         Args:
-            tab (pandas dataframe): expression counts matrix with samples as row and 
+            tab (pandas dataframe): expression counts matrix with samples as rows and 
 				genes as columns.
 		Returns:
 			dgelist (edgeR DGElist): a composite dataframe with samples and expression

diff --git a/tapyra/embedding.py → tapir/embedding.py b/tapyra/embedding.py → tapir/embedding.py
diff --git a/tapyra/gsets.py → tapir/gsets.py b/tapyra/gsets.py → tapir/gsets.py
@@ -10,7 +10,7 @@
 import pandas as pd
 import gseapy as gp
 
-from tapyra.auxiliary import invert_dict
+from tapir.auxiliary import invert_dict
 
 ref_path = os.path.join(os.path.dirname(__file__),'ref/')
 

diff --git a/tapyra/immune.py → tapir/immune.py b/tapyra/immune.py → tapir/immune.py
diff --git a/tapyra/plotting.py → tapir/plotting.py b/tapyra/plotting.py → tapir/plotting.py
@@ -13,8 +13,8 @@
 from matplotlib.colors import LinearSegmentedColormap
 import seaborn as sns
 
-from tapyra.gsets import gset_as_dict, connection_matrix_gsets
-from tapyra.embedding import get_umap
+from tapir.gsets import gset_as_dict, connection_matrix_gsets
+from tapir.embedding import get_umap
 
 sns.set_style("darkgrid")
 

diff --git a/tapyra/ref/all_gsets.gmt → tapir/ref/all_gsets.gmt b/tapyra/ref/all_gsets.gmt → tapir/ref/all_gsets.gmt
diff --git a/tapyra/ref/immune_genes.h5 → tapir/ref/immune_genes.h5 b/tapyra/ref/immune_genes.h5 → tapir/ref/immune_genes.h5
diff --git a/tapyra/ref/immune_probes.h5 → tapir/ref/immune_probes.h5 b/tapyra/ref/immune_probes.h5 → tapir/ref/immune_probes.h5
diff --git a/tapyra/stats.py → tapir/stats.py b/tapyra/stats.py → tapir/stats.py
@@ -23,7 +23,7 @@
 from lifelines import KaplanMeierFitter
 from lifelines.statistics import multivariate_logrank_test as mlt
 
-from tapyra.auxiliary import smart_selection
+from tapir.auxiliary import smart_selection
 
 def compare(gene, classes):