Skip to content

Commit

Permalink
Fixes several bugs in words_n_fun tests (#26)
Browse files Browse the repository at this point in the history
* Fixes several bugs in words_n_fun tests

* Forgot to save requirements.txt for spacy version change
  • Loading branch information
gsolard authored Oct 12, 2023
1 parent 681d936 commit cc3eb0d
Show file tree
Hide file tree
Showing 9 changed files with 10 additions and 46 deletions.
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,6 @@ coverage==6.4.4
# Has to be installed last / optionnal to use spacy lemmatizer
markupsafe==2.0.1 # BUG FIX -> https://github.com/aws/aws-sam-cli/issues/3661
Cython==0.29.24
spacy==3.3.1
spacy==3.3.3
# The following line downloads a french spacy model. It can be commented if you don't have an internet access to download it, but lemmatizer features won't work.
https://github.com/explosion/spacy-models/releases/download/fr_core_news_sm-3.3.0/fr_core_news_sm-3.3.0-py3-none-any.whl
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@
'requests>=2.23,<2.29',
],
extras_require={
"lemmatizer": ["spacy==3.3.1", "markupsafe==2.0.1", "Cython==0.29.24", "fr-core-news-sm==3.3.0"]
"lemmatizer": ["spacy==3.3.3", "markupsafe==2.0.1", "Cython==0.29.24", "fr-core-news-sm==3.3.0"]
}
# pip install words_n_fun || pip install words_n_fun[lemmatizer]
)
10 changes: 5 additions & 5 deletions tests/test_1_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ def test_data_agnostic(self):
'''Testing function utils.data_agnostic'''
# Definition d'une fonction à décorer
def test_function(docs):
if type(docs) != pd.Series: raise TypeError('')
if not isinstance(docs, pd.Series): raise TypeError('')
return docs.apply(lambda x: 'test')
# Vals à tester
test_str = "ceci est un test"
Expand Down Expand Up @@ -159,7 +159,7 @@ def test_data_agnostic_input(self):
'''Testing function utils.data_agnostic_input'''
# Definition d'une fonction à décorer
def test_function(docs):
if type(docs) != pd.Series: raise TypeError('')
if not isinstance(docs, pd.Series): raise TypeError('')
return docs.apply(lambda x: 'test')
# Vals à tester
test_str = "ceci est un test"
Expand Down Expand Up @@ -503,7 +503,7 @@ def test_regroup_data_series(self):
'''Testing function utils.regroup_data_series'''
# Definition d'une fonction à décorer
def test_function(docs):
if type(docs) != pd.Series: raise TypeError('')
if not isinstance(docs, pd.Series): raise TypeError('')
return docs.apply(lambda x: x if x in ['avant', 'milieu', 'après'] else 'test')
# Vals à tester
docs_test = pd.Series(['avant'] + ["ceci est un test"] * 5000 + ['milieu'] + ["ceci est un test"] * 5000 + ['après'], name='test')
Expand Down Expand Up @@ -534,11 +534,11 @@ def test_regroup_data_df(self):
'''Testing function utils.regroup_data_df'''
# Definition d'une fonction à wrapper
def test_function_1(df):
if type(df) != pd.DataFrame: raise TypeError('')
if not isinstance(df, pd.DataFrame): raise TypeError('')
df['test1'] = df['test1'].str.replace('toto', 'titi', regex=False)
return df
def test_function_2(df):
if type(df) != pd.DataFrame: raise TypeError('')
if not isinstance(df, pd.DataFrame): raise TypeError('')
df['test3'] = df['test2'].str.replace('toto', 'tata', regex=False)
return df
# Vals à tester
Expand Down
4 changes: 2 additions & 2 deletions tests/test_3_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,11 +70,11 @@ def test_get_preprocessor(self):
def test_preprocessor_pipeline_setter(self):
preprocessor = api.get_preprocessor(pipeline=api.DEFAULT_PIPELINE)
# test getter
self.assertEquals(preprocessor.pipeline, api.DEFAULT_PIPELINE)
self.assertEqual(preprocessor.pipeline, api.DEFAULT_PIPELINE)
# modify pipeline
alt_pipeline = api.DEFAULT_PIPELINE[:-2]
preprocessor.pipeline = alt_pipeline
self.assertEquals(preprocessor.pipeline, alt_pipeline)
self.assertEqual(preprocessor.pipeline, alt_pipeline)

def test_preprocess_pipeline(self):
'''Testing function api.preprocess_pipeline'''
Expand Down
10 changes: 1 addition & 9 deletions tests/test_4_vectorizationTokenization.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,27 +23,19 @@

# Utils libs
import os
import importlib
import numpy as np
import pandas as pd
from words_n_fun import utils
from words_n_fun.preprocessing import vectorization_tokenization

# Disable logging
import logging
logging.disable(logging.CRITICAL)



class VectorizationTokenizationTests(unittest.TestCase):
'''Main class to test all functions in vectorization_tokenization.py.'''

# Mock du decorateur DataAgnostic (on le bypass pour les tests)
default_decorator = lambda f: f
utils.data_agnostic = default_decorator
utils.data_agnostic_input = default_decorator
# Reload de la librairie vectorization_tokenization (pour application du decorateur par defaut)
importlib.reload(vectorization_tokenization)


def setUp(self):
'''SetUp fonction'''
Expand Down
9 changes: 0 additions & 9 deletions tests/test_5_synonym_malefemale_replacement.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,8 @@

# Utils libs
import os
import importlib
import numpy as np
import pandas as pd
from words_n_fun import utils
from words_n_fun.preprocessing import synonym_malefemale_replacement

# Disable logging
Expand All @@ -37,13 +35,6 @@
class SynonymTests(unittest.TestCase):
'''Main class to test all functions in synonym_malefemale_replacement.py'''

# Mock du decorateur DataAgnostic (on le bypass pour les tests)
default_decorator = lambda f: f
utils.data_agnostic = default_decorator
utils.data_agnostic_input = default_decorator
# Reload de la librairie basic (pour application du decorateur par defaut)
importlib.reload(synonym_malefemale_replacement)


def setUp(self):
'''SetUp fonction'''
Expand Down
9 changes: 0 additions & 9 deletions tests/test_6_lemmatizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,8 @@
import spacy
except ModuleNotFoundError:
raise unittest.SkipTest("Skipping all lemmatizer tests as spacy can't be imported.")
import importlib
import numpy as np
import pandas as pd
from words_n_fun import utils
from words_n_fun.preprocessing import lemmatizer

# Disable logging
Expand All @@ -41,13 +39,6 @@
class LemmatizerTests(unittest.TestCase):
'''Main class to test all functions in lemmatizer.py.'''

# Mock du decorateur DataAgnostic (on le bypass pour les tests)
default_decorator = lambda f: f
utils.data_agnostic = default_decorator
utils.data_agnostic_input = default_decorator
# Reload de la librairie lemmatizer (pour application du decorateur par defaut)
importlib.reload(lemmatizer)


def setUp(self):
'''SetUp fonction'''
Expand Down
9 changes: 0 additions & 9 deletions tests/test_7_stopwords.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,8 @@

# Utils libs
import os
import importlib
import numpy as np
import pandas as pd
from words_n_fun import utils
from words_n_fun.preprocessing import stopwords

# Disable logging
Expand All @@ -37,13 +35,6 @@
class StopwordsTests(unittest.TestCase):
'''Main class to test all functions in stopwords.py.'''

# Mock du decorateur DataAgnostic (on le bypass pour les tests)
default_decorator = lambda f: f
utils.data_agnostic = default_decorator
utils.data_agnostic_input = default_decorator
# Reload de la librairie stopwords (pour application du decorateur par defaut)
importlib.reload(stopwords)


def setUp(self):
'''SetUp fonction'''
Expand Down
1 change: 0 additions & 1 deletion words_n_fun/preprocessing/vectorization_tokenization.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@
logger = logging.getLogger(__name__)


@utils.data_agnostic
def split_text_into_tokens(docs: pd.Series, nbech: int = 10, seq_size: int = 3, step: int = 1,
granularity: str = "word") -> Tuple[pd.Series, pd.Series]:
'''Split an input text into seq_size tokens (word or char) with at most nbech tokens
Expand Down

0 comments on commit cc3eb0d

Please sign in to comment.