Fixes several bugs in words_n_fun tests (#26)

* Fixes several bugs in words_n_fun tests * Forgot to save requirements.txt for spacy version change
France-Travail · Oct 12, 2023 · cc3eb0d · cc3eb0d
1 parent 681d936
commit cc3eb0d
Show file tree

Hide file tree

Showing 9 changed files with 10 additions and 46 deletions.
diff --git a/requirements.txt b/requirements.txt
@@ -24,6 +24,6 @@ coverage==6.4.4
 # Has to be installed last / optionnal to use spacy lemmatizer
 markupsafe==2.0.1  # BUG FIX -> https://github.com/aws/aws-sam-cli/issues/3661
 Cython==0.29.24
-spacy==3.3.1
+spacy==3.3.3
 # The following line downloads a french spacy model. It can be commented if you don't have an internet access to download it, but lemmatizer features won't work.
 https://github.com/explosion/spacy-models/releases/download/fr_core_news_sm-3.3.0/fr_core_news_sm-3.3.0-py3-none-any.whl
diff --git a/setup.py b/setup.py
@@ -60,7 +60,7 @@
         'requests>=2.23,<2.29',
     ],
     extras_require={
-        "lemmatizer": ["spacy==3.3.1", "markupsafe==2.0.1", "Cython==0.29.24", "fr-core-news-sm==3.3.0"]
+        "lemmatizer": ["spacy==3.3.3", "markupsafe==2.0.1", "Cython==0.29.24", "fr-core-news-sm==3.3.0"]
     }
     # pip install words_n_fun || pip install words_n_fun[lemmatizer]
 )
diff --git a/tests/test_1_utils.py b/tests/test_1_utils.py
@@ -94,7 +94,7 @@ def test_data_agnostic(self):
         '''Testing function utils.data_agnostic'''
         # Definition d'une fonction à décorer
         def test_function(docs):
-            if type(docs) != pd.Series: raise TypeError('')
+            if not isinstance(docs, pd.Series): raise TypeError('')
             return docs.apply(lambda x: 'test')
         # Vals à tester
         test_str = "ceci est un test"
@@ -159,7 +159,7 @@ def test_data_agnostic_input(self):
         '''Testing function utils.data_agnostic_input'''
         # Definition d'une fonction à décorer
         def test_function(docs):
-            if type(docs) != pd.Series: raise TypeError('')
+            if not isinstance(docs, pd.Series): raise TypeError('')
             return docs.apply(lambda x: 'test')
         # Vals à tester
         test_str = "ceci est un test"
@@ -503,7 +503,7 @@ def test_regroup_data_series(self):
         '''Testing function utils.regroup_data_series'''
         # Definition d'une fonction à décorer
         def test_function(docs):
-            if type(docs) != pd.Series: raise TypeError('')
+            if not isinstance(docs, pd.Series): raise TypeError('')
             return docs.apply(lambda x: x if x in ['avant', 'milieu', 'après'] else 'test')
         # Vals à tester
         docs_test = pd.Series(['avant'] + ["ceci est un test"] * 5000 + ['milieu'] + ["ceci est un test"] * 5000 + ['après'], name='test')
@@ -534,11 +534,11 @@ def test_regroup_data_df(self):
         '''Testing function utils.regroup_data_df'''
         # Definition d'une fonction à wrapper
         def test_function_1(df):
-            if type(df) != pd.DataFrame: raise TypeError('')
+            if not isinstance(df, pd.DataFrame): raise TypeError('')
             df['test1'] = df['test1'].str.replace('toto', 'titi', regex=False)
             return df
         def test_function_2(df):
-            if type(df) != pd.DataFrame: raise TypeError('')
+            if not isinstance(df, pd.DataFrame): raise TypeError('')
             df['test3'] = df['test2'].str.replace('toto', 'tata', regex=False)
             return df
         # Vals à tester

diff --git a/tests/test_3_api.py b/tests/test_3_api.py
@@ -70,11 +70,11 @@ def test_get_preprocessor(self):
     def test_preprocessor_pipeline_setter(self):
         preprocessor = api.get_preprocessor(pipeline=api.DEFAULT_PIPELINE)
         # test getter
-        self.assertEquals(preprocessor.pipeline, api.DEFAULT_PIPELINE)
+        self.assertEqual(preprocessor.pipeline, api.DEFAULT_PIPELINE)
         # modify pipeline
         alt_pipeline = api.DEFAULT_PIPELINE[:-2]
         preprocessor.pipeline = alt_pipeline
-        self.assertEquals(preprocessor.pipeline, alt_pipeline)
+        self.assertEqual(preprocessor.pipeline, alt_pipeline)
 
     def test_preprocess_pipeline(self):
         '''Testing function api.preprocess_pipeline'''

diff --git a/tests/test_4_vectorizationTokenization.py b/tests/test_4_vectorizationTokenization.py
@@ -23,27 +23,19 @@
 
 # Utils libs
 import os
-import importlib
 import numpy as np
 import pandas as pd
-from words_n_fun import utils
 from words_n_fun.preprocessing import vectorization_tokenization
 
 # Disable logging
 import logging
 logging.disable(logging.CRITICAL)
 
 
+
 class VectorizationTokenizationTests(unittest.TestCase):
     '''Main class to test all functions in vectorization_tokenization.py.'''
 
-    # Mock du decorateur DataAgnostic (on le bypass pour les tests)
-    default_decorator = lambda f: f
-    utils.data_agnostic = default_decorator
-    utils.data_agnostic_input = default_decorator
-    # Reload de la librairie vectorization_tokenization (pour application du decorateur par defaut)
-    importlib.reload(vectorization_tokenization)
-
 
     def setUp(self):
         '''SetUp fonction'''

diff --git a/tests/test_5_synonym_malefemale_replacement.py b/tests/test_5_synonym_malefemale_replacement.py
@@ -23,10 +23,8 @@
 
 # Utils libs
 import os
-import importlib
 import numpy as np
 import pandas as pd
-from words_n_fun import utils
 from words_n_fun.preprocessing import synonym_malefemale_replacement
 
 # Disable logging
@@ -37,13 +35,6 @@
 class SynonymTests(unittest.TestCase):
     '''Main class to test all functions in synonym_malefemale_replacement.py'''
 
-    # Mock du decorateur DataAgnostic (on le bypass pour les tests)
-    default_decorator = lambda f: f
-    utils.data_agnostic = default_decorator
-    utils.data_agnostic_input = default_decorator
-    # Reload de la librairie basic (pour application du decorateur par defaut)
-    importlib.reload(synonym_malefemale_replacement)
-
 
     def setUp(self):
         '''SetUp fonction'''

diff --git a/tests/test_6_lemmatizer.py b/tests/test_6_lemmatizer.py
@@ -27,10 +27,8 @@
     import spacy
 except ModuleNotFoundError:
     raise unittest.SkipTest("Skipping all lemmatizer tests as spacy can't be imported.")
-import importlib
 import numpy as np
 import pandas as pd
-from words_n_fun import utils
 from words_n_fun.preprocessing import lemmatizer
 
 # Disable logging
@@ -41,13 +39,6 @@
 class LemmatizerTests(unittest.TestCase):
     '''Main class to test all functions in lemmatizer.py.'''
 
-    # Mock du decorateur DataAgnostic (on le bypass pour les tests)
-    default_decorator = lambda f: f
-    utils.data_agnostic = default_decorator
-    utils.data_agnostic_input = default_decorator
-    # Reload de la librairie lemmatizer (pour application du decorateur par defaut)
-    importlib.reload(lemmatizer)
-
 
     def setUp(self):
         '''SetUp fonction'''

diff --git a/tests/test_7_stopwords.py b/tests/test_7_stopwords.py
@@ -23,10 +23,8 @@
 
 # Utils libs
 import os
-import importlib
 import numpy as np
 import pandas as pd
-from words_n_fun import utils
 from words_n_fun.preprocessing import stopwords
 
 # Disable logging
@@ -37,13 +35,6 @@
 class StopwordsTests(unittest.TestCase):
     '''Main class to test all functions in stopwords.py.'''
 
-    # Mock du decorateur DataAgnostic (on le bypass pour les tests)
-    default_decorator = lambda f: f
-    utils.data_agnostic = default_decorator
-    utils.data_agnostic_input = default_decorator
-    # Reload de la librairie stopwords (pour application du decorateur par defaut)
-    importlib.reload(stopwords)
-
 
     def setUp(self):
         '''SetUp fonction'''

diff --git a/words_n_fun/preprocessing/vectorization_tokenization.py b/words_n_fun/preprocessing/vectorization_tokenization.py
@@ -30,7 +30,6 @@
 logger = logging.getLogger(__name__)
 
 
-@utils.data_agnostic
 def split_text_into_tokens(docs: pd.Series, nbech: int = 10, seq_size: int = 3, step: int = 1,
                            granularity: str = "word") -> Tuple[pd.Series, pd.Series]:
     '''Split an input text into seq_size tokens (word or char) with at most nbech tokens