paper edits

markur4 · Mar 8, 2024 · 386b1d6 · 386b1d6
1 parent bcc4dcf
commit 386b1d6
Show file tree

Hide file tree

Showing 3 changed files with 45 additions and 33 deletions.
diff --git a/paper.bib b/paper.bib
@@ -5,7 +5,6 @@ @misc{charlierTrevismdStatannotationsV02022
   year = {2022},
   month = oct,
   doi = {10.5281/ZENODO.7213391},
-  url = {https://zenodo.org/record/7213391},
   urldate = {2023-11-16},
   abstract = {Add scipy's Brunner-Munzel test Fix applying statannotations for non-string group labels (Issue \#65) Get Zenodo DOI},
   copyright = {Open Access},
@@ -24,12 +23,20 @@ @article{hunterMatplotlib2DGraphics2007
   pages = {90--95},
   issn = {1558-366X},
   doi = {10.1109/MCSE.2007.55},
-  url = {https://ieeexplore.ieee.org/document/4160265},
   urldate = {2023-11-15},
   abstract = {Matplotlib is a 2D graphics package used for Python for application development, interactive scripting,and publication-quality image generation across user interfaces and operating systems},
   file = {/Users/martinkuric/Zotero/storage/W4FJZDNY/§-hunterMatplotlib2DGraphics2007.pdf;/Users/martinkuric/Zotero/storage/GW3HZZHR/4160265.html}
 }
 
+@inproceedings{mckinneyDataStructuresStatistical2010,
+  title = {Data {{Structures}} for {{Statistical Computing}} in {{Python}}},
+  author = {McKinney, Wes},
+  year = {2010},
+  month = jan,
+  pages = {56--61},
+  doi = {10.25080/Majora-92bf1922-00a}
+}
+
 @article{mckinneyPandasFoundationalPython2011,
   title = {Pandas: A {{Foundational Python Library}} for {{Data Analysis}} and {{Statistics}}},
   shorttitle = {Pandas},
@@ -41,6 +48,15 @@ @article{mckinneyPandasFoundationalPython2011
   file = {/Users/martinkuric/Zotero/storage/IH5C5UZ3/§-mckinneyPandasFoundationalPython2011.pdf}
 }
 
+@misc{reback2020pandas,
+  title = {Pandas-Dev/Pandas: {{Pandas}}},
+  author = {pandas development {team}, The},
+  year = {2020},
+  month = feb,
+  doi = {10.5281/zenodo.3509134},
+  howpublished = {Zenodo}
+}
+
 @article{vallatPingouinStatisticsPython2018,
   title = {Pingouin: Statistics in {{Python}}},
   shorttitle = {Pingouin},
@@ -53,7 +69,6 @@ @article{vallatPingouinStatisticsPython2018
   pages = {1026},
   issn = {2475-9066},
   doi = {10.21105/joss.01026},
-  url = {https://joss.theoj.org/papers/10.21105/joss.01026},
   urldate = {2023-05-29},
   abstract = {Vallat, (2018). Pingouin: statistics in Python. Journal of Open Source Software, 3(31), 1026, https://doi.org/10.21105/joss.01026},
   langid = {english},
@@ -72,7 +87,6 @@ @article{waskomSeabornStatisticalData2021
   pages = {3021},
   issn = {2475-9066},
   doi = {10.21105/joss.03021},
-  url = {https://joss.theoj.org/papers/10.21105/joss.03021},
   urldate = {2023-03-26},
   abstract = {Waskom, M. L., (2021). seaborn: statistical data visualization. Journal of Open Source Software, 6(60), 3021, https://doi.org/10.21105/joss.03021},
   langid = {english},
@@ -89,7 +103,6 @@ @article{wickhamTidyData2014a
   pages = {1--23},
   issn = {1548-7660},
   doi = {10.18637/jss.v059.i10},
-  url = {https://doi.org/10.18637/jss.v059.i10},
   urldate = {2023-11-15},
   abstract = {A huge amount of effort is spent cleaning data to get it ready for analysis, but there has been little research on how to make data cleaning as easy and effective as possible. This paper tackles a small, but important, component of data cleaning: data tidying. Tidy datasets are easy to manipulate, model and visualize, and have a specific structure: each variable is a column, each observation is a row, and each type of observational unit is a table. This framework makes it easy to tidy messy datasets because only a small set of tools are needed to deal with a wide range of un-tidy datasets. This structure also makes it easier to develop tidy tools for data analysis, tools that both input and output tidy datasets. The advantages of a consistent data structure and matching tools are demonstrated with a case study free from mundane data manipulation chores.},
   copyright = {Copyright (c) 2013 Hadley  Wickham},

diff --git a/paper.md b/paper.md
@@ -29,50 +29,50 @@ aas-doi: 10.3847/xxxxx <- update this with the DOI from AAS once you know it.
 # aas-journal: Astrophysical Journal <- The name of the AAS journal.
 ---
 
+
 # Summary
 
 `plotastic` addresses the challenges of transitioning from exploratory
 data analysis to hypothesis testing in Python's data science ecosystem.
 Bridging the gap between `seaborn` and `pingouin`, this library offers a
 unified environment for plotting and statistical analysis. It simplifies
-the workflow with a user-friendly syntax and seamless integration with
+the workflow with user-friendly syntax and seamless integration with
 familiar `seaborn` parameters (y, x, hue, row, col). Inspired by
 `seaborn`'s consistency, `plotastic` utilizes a `DataAnalysis` object to
-intelligently pass parameters to `pingouin` statistical functions. The
-library systematically groups the data according to the needs of
-statistical tests and plots, conducts visualisation, analyses and
-supports extensive customization options. In essence, `plotastic`
-establishes a protocol for configuring statical analyses through
-plotting parameters. This approach streamlines the process, translating
-`seaborn` parameters into statistical terms, allowing researchers to
-focus on correct statistical testing and less about specific syntax and
+intelligently pass parameters to `pingouin` statistical functions.
+Hence, statistics and plotting are performed on the same set of
+parameters, so that the strength of `seaborn` in visualizing
+multidimensional data is extended onto statistical analysis. In essence,
+`plotastic` translates `seaborn` parameters into statistical terms,
+configures statistical protocols based on intuitive plotting syntax and
+returns a `matplotlib` figure with known customization options and more.
+This approach streamlines data analysis, allowing researchers to focus
+on correct statistical testing and less about specific syntax and
 implementations.
 
 
+
 # Statement of need
 
 Python's data science ecosystem provides powerful tools for both
 visualization and statistical testing. However, the transition from
 exploratory data analysis to hypothesis testing can be cumbersome,
 requiring users to switch between libraries and adapt to different
-syntaxes.
-
-`seaborn` has become a popular choice for plotting in Python, offering
-an intuitive interface. Its statistical functionality focuses on
-descriptive plots and bootstrapped confidence intervals
-
+syntaxes. `seaborn` has become a popular choice for plotting in Python,
+offering an intuitive interface. Its statistical functionality focuses
+on descriptive plots and bootstrapped confidence intervals
 [@waskomSeabornStatisticalData2021]. The library `pingouin` offers an
 extensive set of statistical tests, but it lacks integration with common
 plotting capabilities [@vallatPingouinStatisticsPython2018].
-`statannotations` integrates statistical testing with plot annotations, but
- uses a complex interface and is limited to pairwise comparisons
+ `statannotations` integrates statistical testing with plot annotations,
+ but uses a complex interface and is limited to pairwise comparisons
  [@charlierTrevismdStatannotationsV02022].
 
 `plotastic` addresses this gap by offering a unified environment for
 plotting and statistical analysis. With an emphasis on user-friendly
-syntax and integration with familiar `seaborn` parameters, it simplifies
-the process for users already comfortable `seaborn`. The library ensures
-a smooth workflow, from data import to hypothesis testing and
+syntax and integration of familiar `seaborn` parameters, it simplifies
+the process for users already comfortable with `seaborn`. The library
+ensures a smooth workflow, from data import to hypothesis testing and
 visualization.
 
 # Example
@@ -169,11 +169,10 @@ of statistical analysis and plotting, leveraging the capabilities of
 @charlierTrevismdStatannotationsV02022]. It utilizes long-format
 `pandas` `DataFrames` as its primary input, aligning with the
 conventions of `seaborn` and ensuring compatibility with existing data
-structures [@wickhamTidyData2014a;
-@mckinneyPandasFoundationalPython2011].
+structures [@wickhamTidyData2014a; @reback2020pandas; @mckinneyDataStructuresStatistical2010].
 
-`plotastic` was inspired by `seaborn`'s intuitive and consistent usage
-of the same set of parameters (y, x, hue, row, col) found in each of its
+`plotastic` was inspired by `seaborn` using the same set of intuitive
+and consistent parameters (y, x, hue, row, col) found in each of its
 plotting functions [@waskomSeabornStatisticalData2021]. These parameters
 intuitively delineate the data dimensions plotted, yielding 'facetted'
 subplots, each presenting y against x. This allows for rapid and
@@ -208,24 +207,24 @@ on the x-axis, which is achieved by splitting the data by all grouping
 dimensions (hue, row, col)  (\autoref{tab:sphericity}). For omnibus and
 posthoc analyses, data is grouped by the row and col dimensions in
 parallel to the `matplotlib` axes, before performing one two-factor
-analysis per axes using x and hue as the within/between-factors.
+analysis per axis using x and hue as the within/between-factors.
 (\autoref{tab:RMANOVA}).
 
 `DataAnalysis` visualizes data through predefined plotting functions
 designed for drawing multi-layered plots. A notable emphasis within
 `plotastic` is placed on showcasing individual datapoints alongside
 aggregated means or medians. In detail, each plotting function
-initializes `matplotlib` figure and axes using `plt.subplots()` while
+initializes a `matplotlib` figure and axes using `plt.subplots()` while
 returning a `DataAnalysis` object for method chaining. Axes are
 populated by `seaborn` plotting functions (e.g., `sns.boxplot()`),
 leveraging automated aggregation and error bar displays. Keyword
 arguments are passed to these `seaborn` functions, ensuring the same
 degree of customization as in seaborn. Users can further customize plots
 by chaining `DataAnalysis` methods or by applying common `matplotlib` code
-to override plotastic settings. Figures are exported using
+to override `plotastic` settings. Figures are exported using
 `plt.savefig()`. 
 
-`plotastic` also focuses on in annotating statistical information within
+`plotastic` also focuses on annotating statistical information within
 plots, seamlessly incorporating p-values from pairwise comparisons using
 `statannotations` [@charlierTrevismdStatannotationsV02022]. This
 integration simplifies the interface and expands options for pair

diff --git a/paper.pdf b/paper.pdf