Coverage for tests\sampling\test_factory_closest_in_different_clusters.py: 100.00%
51 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-11-17 13:31 +0100
« prev ^ index » next coverage.py v7.3.2, created at 2023-11-17 13:31 +0100
1# -*- coding: utf-8 -*-
3"""
4* Name: interactive-clustering/tests/sampling/test_factory_closest_in_different_clusters.py
5* Description: Unittests for the `sampling.cluster_based` module, `"closest_in_different_clusters"` sampler.
6* Author: Erwan SCHILD
7* Created: 17/03/2021
8* Licence: CeCILL (https://cecill.info/licences.fr.html)
9"""
11# ==============================================================================
12# IMPORT PYTHON DEPENDENCIES
13# ==============================================================================
15import pytest
16from scipy.sparse import csr_matrix
18from cognitivefactory.interactive_clustering.constraints.binary import BinaryConstraintsManager
19from cognitivefactory.interactive_clustering.sampling.clusters_based import ClustersBasedConstraintsSampling
22# ==============================================================================
23# test_factory_closest_in_different_clusters_sampler_for_correct_settings
24# ==============================================================================
25def test_factory_closest_in_different_clusters_sampler_for_correct_settings():
26 """
27 Test that the `closest_in_different_clusters sampler` works for correct settings.
28 """
30 # Check a correct initialization.
31 sampler = ClustersBasedConstraintsSampling(
32 clusters_restriction="different_clusters", distance_restriction="closest_neighbors", random_seed=1
33 )
35 assert sampler
36 assert sampler.random_seed == 1
39# ==============================================================================
40# test_factory_closest_in_different_clusters_sampler_sample_for_incorrect_constraints_manager
41# ==============================================================================
42def test_factory_closest_in_different_clusters_sampler_sample_for_incorrect_constraints_manager():
43 """
44 Test that the `closest_in_different_clusters sampler` sampling raises `ValueError` for incorrect `constraints_manager`.
45 """
47 # Initialize a closest_in_different_clusters sampler instance.
48 sampler = ClustersBasedConstraintsSampling(
49 clusters_restriction="different_clusters", distance_restriction="closest_neighbors", random_seed=1
50 )
52 # Check sample with incorrect `constraints_manager`.
53 with pytest.raises(ValueError, match="`constraints_manager`"):
54 sampler.sample(
55 constraints_manager=None,
56 nb_to_select=None,
57 )
60# ==============================================================================
61# test_factory_closest_in_different_clusters_sampler_sample_for_incorrect_nb_to_select
62# ==============================================================================
63def test_factory_closest_in_different_clusters_sampler_sample_for_incorrect_nb_to_select():
64 """
65 Test that the `closest_in_different_clusters sampler` sampling raises `ValueError` for incorrect `nb_to_select`.
66 """
68 # Initialize a closest_in_different_clusters sampler instance.
69 sampler = ClustersBasedConstraintsSampling(
70 clusters_restriction="different_clusters", distance_restriction="closest_neighbors", random_seed=1
71 )
73 # Check sample with incorrect `nb_to_select`.
74 with pytest.raises(ValueError, match="`nb_to_select`"):
75 sampler.sample(
76 constraints_manager=BinaryConstraintsManager(
77 list_of_data_IDs=[
78 "bonjour",
79 "salut",
80 "coucou",
81 "au revoir",
82 "a bientôt",
83 ]
84 ),
85 nb_to_select=None,
86 )
88 # Check sample with incorrect `nb_to_select`
89 with pytest.raises(ValueError, match="`nb_to_select`"):
90 sampler.sample(
91 constraints_manager=BinaryConstraintsManager(
92 list_of_data_IDs=[
93 "bonjour",
94 "salut",
95 "coucou",
96 "au revoir",
97 "a bientôt",
98 ],
99 ),
100 nb_to_select=-99,
101 )
104# ==============================================================================
105# test_factory_closest_in_different_clusters_sampler_sample_for_zero_nb_to_select
106# ==============================================================================
107def test_factory_closest_in_different_clusters_sampler_sample_for_zero_nb_to_select():
108 """
109 Test that the `closest_in_different_clusters sampler` sampling works for zero `nb_to_select`.
110 """
112 # Initialize a closest_in_different_clusters sampler instance.
113 sampler = ClustersBasedConstraintsSampling(
114 clusters_restriction="different_clusters", distance_restriction="closest_neighbors", random_seed=1
115 )
117 # Check sample with zero `nb_to_select`
118 assert not sampler.sample(
119 constraints_manager=BinaryConstraintsManager(
120 list_of_data_IDs=[
121 "bonjour",
122 "salut",
123 "coucou",
124 "au revoir",
125 "a bientôt",
126 ],
127 ),
128 nb_to_select=0,
129 )
132# ==============================================================================
133# test_factory_closest_in_different_clusters_sampler_sample_for_incorrect_clustering_result
134# ==============================================================================
135def test_factory_closest_in_different_clusters_sampler_sample_for_incorrect_clustering_result():
136 """
137 Test that the `closest_in_different_clusters sampler` sampling raises `ValueError` or `KeyError` for incorrect `clustering_result`.
138 """
140 # Initialize a closest_in_different_clusters sampler instance.
141 sampler = ClustersBasedConstraintsSampling(
142 clusters_restriction="different_clusters", distance_restriction="closest_neighbors", random_seed=1
143 )
145 # Check sample with incorrect `clustering_result`.
146 with pytest.raises(ValueError, match="`clustering_result`"):
147 sampler.sample(
148 constraints_manager=BinaryConstraintsManager(
149 list_of_data_IDs=[
150 "bonjour",
151 "salut",
152 "coucou",
153 "au revoir",
154 "a bientôt",
155 ],
156 ),
157 nb_to_select=3,
158 clustering_result="unknown",
159 )
161 # Check sample with incorrect `clustering_result`.
162 with pytest.raises(KeyError, match="'a bientôt'|'au revoir'|'bonjour'|'coucou'|'salut'"):
163 sampler.sample(
164 constraints_manager=BinaryConstraintsManager(
165 list_of_data_IDs=[
166 "bonjour",
167 "salut",
168 "coucou",
169 "au revoir",
170 "a bientôt",
171 ],
172 ),
173 nb_to_select=3,
174 clustering_result={
175 "first": 1,
176 "second": 2,
177 },
178 vectors={
179 "bonjour": csr_matrix([1.0, 0.0]),
180 "salut": csr_matrix([0.99, 0.0]),
181 "coucou": csr_matrix([0.8, 0.0]),
182 "au revoir": csr_matrix([0.0, 1.0]),
183 "a bientôt": csr_matrix([0.0, 0.9]),
184 },
185 )
188# ==============================================================================
189# test_factory_closest_in_different_clusters_sampler_sample_for_incorrect_vectors
190# ==============================================================================
191def test_factory_closest_in_different_clusters_sampler_sample_for_incorrect_vectors():
192 """
193 Test that the `closest_in_different_clusters sampler` sampling raises `ValueError` or `KeyError` for incorrect `vectors`.
194 """
196 # Initialize a closest_in_different_clusters sampler instance.
197 sampler = ClustersBasedConstraintsSampling(
198 clusters_restriction="different_clusters", distance_restriction="closest_neighbors", random_seed=1
199 )
201 # Check sample with incorrect `vectors`.
202 with pytest.raises(ValueError, match="`vectors`"):
203 sampler.sample(
204 constraints_manager=BinaryConstraintsManager(
205 list_of_data_IDs=[
206 "bonjour",
207 "salut",
208 "coucou",
209 "au revoir",
210 "a bientôt",
211 ],
212 ),
213 nb_to_select=3,
214 clustering_result={
215 "bonjour": 0,
216 "salut": 0,
217 "coucou": 0,
218 "au revoir": 1,
219 "a bientôt": 1,
220 },
221 vectors="unknown",
222 )
224 # Check sample with incorrect `vectors`.
225 with pytest.raises(KeyError, match="'a bientôt'|'au revoir'|'bonjour'|'coucou'|'salut'"):
226 sampler.sample(
227 constraints_manager=BinaryConstraintsManager(
228 list_of_data_IDs=[
229 "bonjour",
230 "salut",
231 "coucou",
232 "au revoir",
233 "a bientôt",
234 ],
235 ),
236 nb_to_select=3,
237 clustering_result={
238 "bonjour": 0,
239 "salut": 0,
240 "coucou": 0,
241 "au revoir": 1,
242 "a bientôt": 1,
243 },
244 vectors={
245 "first": 1,
246 "second": 2,
247 },
248 )
251# ==============================================================================
252# test_factory_closest_in_different_clusters_sampler_sample_for_empty_constraints_manager
253# ==============================================================================
254def test_factory_closest_in_different_clusters_sampler_sample_for_empty_constraints_manager():
255 """
256 Test that the `closest_in_different_clusters sampler` sampling works for empty `constraints_manager`.
257 """
259 # Initialize a closest_in_different_clusters sampler instance.
260 sampler = ClustersBasedConstraintsSampling(
261 clusters_restriction="different_clusters", distance_restriction="closest_neighbors", random_seed=1
262 )
264 # Check sample with empty `constraints_manager`.
265 assert sampler.sample(
266 constraints_manager=BinaryConstraintsManager(
267 list_of_data_IDs=[
268 "bonjour",
269 "salut",
270 "coucou",
271 "au revoir",
272 "a bientôt",
273 ],
274 ),
275 nb_to_select=3,
276 clustering_result={
277 "bonjour": 0,
278 "salut": 0,
279 "coucou": 0,
280 "au revoir": 1,
281 "a bientôt": 1,
282 },
283 vectors={
284 "bonjour": csr_matrix([1.0, 0.0]),
285 "salut": csr_matrix([0.99, 0.0]),
286 "coucou": csr_matrix([0.8, 0.0]),
287 "au revoir": csr_matrix([0.0, 0.9]),
288 "a bientôt": csr_matrix([0.0, 0.8]),
289 },
290 ) == [
291 ("a bientôt", "coucou"),
292 ("au revoir", "coucou"),
293 ("a bientôt", "salut"),
294 ]
297# ==============================================================================
298# test_factory_closest_in_different_clusters_sampler_sample_for_correct_constraints_manager
299# ==============================================================================
300def test_factory_closest_in_different_clusters_sampler_sample_for_correct_constraints_manager():
301 """
302 Test that the `closest_in_different_clusters sampler` sampling works for correct `constraints_manager`.
303 """
305 # Initialize a closest_in_different_clusters sampler instance.
306 sampler = ClustersBasedConstraintsSampling(
307 clusters_restriction="different_clusters", distance_restriction="closest_neighbors", random_seed=1
308 )
310 # Initialize a `BinaryConstraintsManager` instance.
311 constraints_manager = BinaryConstraintsManager(
312 list_of_data_IDs=[
313 "bonjour",
314 "salut",
315 "coucou",
316 "au revoir",
317 "a bientôt",
318 ]
319 )
320 constraints_manager.add_constraint(data_ID1="bonjour", data_ID2="salut", constraint_type="MUST_LINK")
321 constraints_manager.add_constraint(data_ID1="au revoir", data_ID2="a bientôt", constraint_type="MUST_LINK")
323 # Check sample with correct `constraints_manager`.
324 assert sampler.sample(
325 constraints_manager=constraints_manager,
326 nb_to_select=3,
327 clustering_result={
328 "bonjour": 0,
329 "salut": 0,
330 "coucou": 0,
331 "au revoir": 1,
332 "a bientôt": 1,
333 },
334 vectors={
335 "bonjour": csr_matrix([1.0, 0.0]),
336 "salut": csr_matrix([0.99, 0.0]),
337 "coucou": csr_matrix([0.8, 0.0]),
338 "au revoir": csr_matrix([0.0, 0.9]),
339 "a bientôt": csr_matrix([0.0, 0.8]),
340 },
341 ) == [
342 ("a bientôt", "coucou"),
343 ("au revoir", "coucou"),
344 ("a bientôt", "salut"),
345 ]
348# ==============================================================================
349# test_factory_closest_in_different_clusters_sampler_sample_for_full_annotated_constraints_manager
350# ==============================================================================
351def test_factory_closest_in_different_clusters_sampler_sample_for_full_annotated_constraints_manager():
352 """
353 Test that the `closest_in_different_clusters sampler` sampling works for full annotated `constraints_manager`.
354 """
356 # Initialize a closest_in_different_clusters sampler instance.
357 sampler = ClustersBasedConstraintsSampling(
358 clusters_restriction="different_clusters", distance_restriction="closest_neighbors", random_seed=1
359 )
361 # Initialize a `BinaryConstraintsManager` instance.
362 constraints_manager = BinaryConstraintsManager(
363 list_of_data_IDs=[
364 "bonjour",
365 "salut",
366 "coucou",
367 "au revoir",
368 "a bientôt",
369 ]
370 )
371 constraints_manager.add_constraint(data_ID1="bonjour", data_ID2="salut", constraint_type="MUST_LINK")
372 constraints_manager.add_constraint(data_ID1="bonjour", data_ID2="coucou", constraint_type="MUST_LINK")
373 constraints_manager.add_constraint(data_ID1="bonjour", data_ID2="au revoir", constraint_type="CANNOT_LINK")
374 constraints_manager.add_constraint(data_ID1="au revoir", data_ID2="a bientôt", constraint_type="MUST_LINK")
376 # Check sample for full annotated `constraints_manager`.
377 assert not sampler.sample(
378 constraints_manager=constraints_manager,
379 nb_to_select=3,
380 clustering_result={
381 "bonjour": 0,
382 "salut": 0,
383 "coucou": 0,
384 "au revoir": 1,
385 "a bientôt": 1,
386 },
387 vectors={
388 "bonjour": csr_matrix([1.0, 0.0]),
389 "salut": csr_matrix([0.99, 0.0]),
390 "coucou": csr_matrix([0.8, 0.0]),
391 "au revoir": csr_matrix([0.0, 0.9]),
392 "a bientôt": csr_matrix([0.0, 0.8]),
393 },
394 )