Coverage for tests\utils\test_vectorization.py: 100.00%
27 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-11-17 13:31 +0100
« prev ^ index » next coverage.py v7.3.2, created at 2023-11-17 13:31 +0100
1# -*- coding: utf-8 -*-
3"""
4* Name: interactive-clustering/tests/utils/test_vectorization.py
5* Description: Unittests for the `utils.vectorization` module.
6* Author: Erwan SCHILD
7* Created: 17/03/2021
8* Licence: CeCILL (https://cecill.info/licences.fr.html)
9"""
11# ==============================================================================
12# IMPORT PYTHON DEPENDENCIES
13# ==============================================================================
15import pytest
16from scipy.sparse import csr_matrix
18from cognitivefactory.interactive_clustering.utils.vectorization import vectorize
21# ==============================================================================
22# test_vectorize_for_unimplemented_vectorizer
23# ==============================================================================
24def test_vectorize_for_unimplemented_vectorizer():
25 """
26 Test that the `utils.vectorization.vectorize` raises `ValueError` for unimplemented vectorizer.
27 """
29 # Check a unimplemented vectorizer.
30 with pytest.raises(ValueError, match="`vectorizer_type`"):
31 vectorize(
32 dict_of_texts={
33 "0": "comment signaler une perte de carte de paiement",
34 "1": "quelle est la procedure pour chercher une carte de credit avalee",
35 "2": "ma carte visa a un plafond de paiment trop bas puis je l augmenter",
36 },
37 vectorizer_type="unimplemented",
38 )
41# ==============================================================================
42# test_vectorize_for_tfidf_vectorizer
43# ==============================================================================
44def test_vectorize_for_tfidf_vectorizer():
45 """
46 Test that the `utils.vectorization.vectorize` works for TFIDF vectorizer.
47 """
49 # Check a TFIDF vectorizer.
50 dict_of_vectors = vectorize(
51 dict_of_texts={
52 "0": "comment signaler une perte de carte de paiement",
53 "1": "quelle est la procedure pour chercher une carte de credit avalee",
54 "2": "ma carte visa a un plafond de paiment trop bas puis je l augmenter",
55 },
56 vectorizer_type="tfidf",
57 )
59 # Assertions
60 assert dict_of_vectors
61 assert sorted(dict_of_vectors.keys()) == ["0", "1", "2"]
62 assert isinstance(dict_of_vectors["0"], csr_matrix)
63 assert isinstance(dict_of_vectors["1"], csr_matrix)
64 assert isinstance(dict_of_vectors["2"], csr_matrix)
67# ==============================================================================
68# test_vectorize_for_uninstalled_spacy_language_model
69# ==============================================================================
70def test_vectorize_for_uninstalled_spacy_language_model():
71 """
72 Test that the `utils.vectorization.vectorize` raises `ValueError` for uninstalled spacy language model.
73 """
75 # Check a unimplemented vectorizer.
76 with pytest.raises(ValueError, match="`spacy_language_model`"):
77 vectorize(
78 dict_of_texts={
79 "0": "comment signaler une perte de carte de paiement",
80 "1": "quelle est la procedure pour chercher une carte de credit avalee",
81 "2": "ma carte visa a un plafond de paiment trop bas puis je l augmenter",
82 },
83 vectorizer_type="spacy",
84 spacy_language_model="uninstalled",
85 )
88# ==============================================================================
89# test_vectorize_for_installed_spacy_language_model
90# ==============================================================================
91def test_vectorize_for_installed_spacy_language_model():
92 """
93 Test that the `utils.vectorization.vectorize` works for an installed spacy language model.
94 """
96 # Check a SPACY vectorizer.
97 dict_of_vectors = vectorize(
98 dict_of_texts={
99 "0": "hello how are you",
100 "1": "hello how old are you",
101 "2": "hello where do you live",
102 },
103 vectorizer_type="spacy",
104 spacy_language_model="en_core_web_md",
105 )
107 # Assertions
108 assert dict_of_vectors
111# ==============================================================================
112# test_vectorize_for_spacy_vectorizer
113# ==============================================================================
114def test_vectorize_for_spacy_vectorizer():
115 """
116 Test that the `utils.vectorization.vectorize` works for SPACY vectorizer.
117 """
119 # Check a SPACY vectorizer.
120 dict_of_vectors = vectorize(
121 dict_of_texts={
122 "0": "comment signaler une perte de carte de paiement",
123 "1": "quelle est la procedure pour chercher une carte de credit avalee",
124 "2": "ma carte visa a un plafond de paiment trop bas puis je l augmenter",
125 },
126 vectorizer_type="spacy",
127 )
129 # Assertions
130 assert dict_of_vectors
131 assert sorted(dict_of_vectors.keys()) == ["0", "1", "2"]
132 assert isinstance(dict_of_vectors["0"], csr_matrix)
133 assert isinstance(dict_of_vectors["1"], csr_matrix)
134 assert isinstance(dict_of_vectors["2"], csr_matrix)