Coverage for tests\sampling\test_factory_farthest_in_same_cluster.py: 100.00%
51 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-11-17 13:31 +0100
« prev ^ index » next coverage.py v7.3.2, created at 2023-11-17 13:31 +0100
1# -*- coding: utf-8 -*-
3"""
4* Name: interactive-clustering/tests/sampling/test_factory_farthest_in_same_cluster.py
5* Description: Unittests for the `sampling.cluster_based` module, `"farhest_in_same_cluster"` sampler.
6* Author: Erwan SCHILD
7* Created: 17/03/2021
8* Licence: CeCILL (https://cecill.info/licences.fr.html)
9"""
11# ==============================================================================
12# IMPORT PYTHON DEPENDENCIES
13# ==============================================================================
15import pytest
16from scipy.sparse import csr_matrix
18from cognitivefactory.interactive_clustering.constraints.binary import BinaryConstraintsManager
19from cognitivefactory.interactive_clustering.sampling.clusters_based import ClustersBasedConstraintsSampling
22# ==============================================================================
23# test_factory_farhest_in_same_cluster_sampler_for_correct_settings
24# ==============================================================================
25def test_factory_farhest_in_same_cluster_sampler_for_correct_settings():
26 """
27 Test that the `farhest_in_same_cluster sampler` works for correct settings.
28 """
30 # Check a correct initialization.
31 sampler = ClustersBasedConstraintsSampling(
32 clusters_restriction="same_cluster",
33 distance_restriction="farthest_neighbors",
34 random_seed=1,
35 )
37 assert sampler
38 assert sampler.random_seed == 1
41# ==============================================================================
42# test_factory_farhest_in_same_cluster_sampler_sample_for_incorrect_constraints_manager
43# ==============================================================================
44def test_factory_farhest_in_same_cluster_sampler_sample_for_incorrect_constraints_manager():
45 """
46 Test that the `farhest_in_same_cluster sampler` sampling raises `ValueError` for incorrect `constraints_manager`.
47 """
49 # Initialize a `farhest_in_same_cluster sampler` instance.
50 sampler = ClustersBasedConstraintsSampling(
51 clusters_restriction="same_cluster",
52 distance_restriction="farthest_neighbors",
53 random_seed=1,
54 )
56 # Check sample with incorrect `constraints_manager`.
57 with pytest.raises(ValueError, match="`constraints_manager`"):
58 sampler.sample(
59 constraints_manager=None,
60 nb_to_select=None,
61 )
64# ==============================================================================
65# test_factory_farhest_in_same_cluster_sampler_sample_for_incorrect_nb_to_select
66# ==============================================================================
67def test_factory_farhest_in_same_cluster_sampler_sample_for_incorrect_nb_to_select():
68 """
69 Test that the `farhest_in_same_cluster sampler` sampling raises `ValueError` for incorrect `nb_to_select`.
70 """
72 # Initialize a `farhest_in_same_cluster sampler` instance.
73 sampler = ClustersBasedConstraintsSampling(
74 clusters_restriction="same_cluster",
75 distance_restriction="farthest_neighbors",
76 random_seed=1,
77 )
79 # Check sample with incorrect `nb_to_select`.
80 with pytest.raises(ValueError, match="`nb_to_select`"):
81 sampler.sample(
82 constraints_manager=BinaryConstraintsManager(
83 list_of_data_IDs=[
84 "bonjour",
85 "salut",
86 "coucou",
87 "au revoir",
88 "a bientôt",
89 ]
90 ),
91 nb_to_select=None,
92 )
94 # Check sample with incorrect `nb_to_select`
95 with pytest.raises(ValueError, match="`nb_to_select`"):
96 sampler.sample(
97 constraints_manager=BinaryConstraintsManager(
98 list_of_data_IDs=[
99 "bonjour",
100 "salut",
101 "coucou",
102 "au revoir",
103 "a bientôt",
104 ],
105 ),
106 nb_to_select=-99,
107 )
110# ==============================================================================
111# test_factory_farhest_in_same_cluster_sampler_sample_for_zero_nb_to_select
112# ==============================================================================
113def test_factory_farhest_in_same_cluster_sampler_sample_for_zero_nb_to_select():
114 """
115 Test that the `farhest_in_same_cluster sampler` sampling works for zero `nb_to_select`.
116 """
118 # Initialize a `farhest_in_same_cluster sampler` instance.
119 sampler = ClustersBasedConstraintsSampling(
120 clusters_restriction="same_cluster",
121 distance_restriction="farthest_neighbors",
122 random_seed=1,
123 )
125 # Check sample with zero `nb_to_select`
126 assert not sampler.sample(
127 constraints_manager=BinaryConstraintsManager(
128 list_of_data_IDs=[
129 "bonjour",
130 "salut",
131 "coucou",
132 "au revoir",
133 "a bientôt",
134 ],
135 ),
136 nb_to_select=0,
137 )
140# ==============================================================================
141# test_factory_farhest_in_same_cluster_sampler_sample_for_incorrect_clustering_result
142# ==============================================================================
143def test_factory_farhest_in_same_cluster_sampler_sample_for_incorrect_clustering_result():
144 """
145 Test that the `farhest_in_same_cluster sampler` sampling raises `ValueError` for incorrect `clustering_result`.
146 """
148 # Initialize a `farhest_in_same_cluster sampler` instance.
149 sampler = ClustersBasedConstraintsSampling(
150 clusters_restriction="same_cluster",
151 distance_restriction="farthest_neighbors",
152 random_seed=1,
153 )
155 # Check sample with incorrect `clustering_result`.
156 with pytest.raises(ValueError, match="`clustering_result`"):
157 sampler.sample(
158 constraints_manager=BinaryConstraintsManager(
159 list_of_data_IDs=[
160 "bonjour",
161 "salut",
162 "coucou",
163 "au revoir",
164 "a bientôt",
165 ],
166 ),
167 nb_to_select=3,
168 clustering_result="unknown",
169 )
171 # Check sample with incorrect `clustering_result`.
172 with pytest.raises(KeyError, match="'a bientôt'|'au revoir'|'bonjour'|'coucou'|'salut'"):
173 sampler.sample(
174 constraints_manager=BinaryConstraintsManager(
175 list_of_data_IDs=[
176 "bonjour",
177 "salut",
178 "coucou",
179 "au revoir",
180 "a bientôt",
181 ],
182 ),
183 nb_to_select=3,
184 clustering_result={
185 "first": 1,
186 "second": 2,
187 },
188 vectors={
189 "bonjour": csr_matrix([1.0, 0.0]),
190 "salut": csr_matrix([0.99, 0.0]),
191 "coucou": csr_matrix([0.8, 0.0]),
192 "au revoir": csr_matrix([0.0, 1.0]),
193 "a bientôt": csr_matrix([0.0, 0.9]),
194 },
195 )
198# ==============================================================================
199# test_factory_farhest_in_same_cluster_sampler_sample_for_incorrect_vectors
200# ==============================================================================
201def test_factory_farhest_in_same_cluster_sampler_sample_for_incorrect_vectors():
202 """
203 Test that the `farhest_in_same_cluster sampler` sampling raises `ValueError` for incorrect `vectors`.
204 """
206 # Initialize a `farhest_in_same_cluster sampler` instance.
207 sampler = ClustersBasedConstraintsSampling(
208 clusters_restriction="same_cluster",
209 distance_restriction="farthest_neighbors",
210 random_seed=1,
211 )
213 # Check sample with incorrect `vectors`.
214 with pytest.raises(ValueError, match="`vectors`"):
215 sampler.sample(
216 constraints_manager=BinaryConstraintsManager(
217 list_of_data_IDs=[
218 "bonjour",
219 "salut",
220 "coucou",
221 "au revoir",
222 "a bientôt",
223 ],
224 ),
225 nb_to_select=3,
226 clustering_result={
227 "bonjour": 0,
228 "salut": 0,
229 "coucou": 0,
230 "au revoir": 1,
231 "a bientôt": 1,
232 },
233 vectors="unknown",
234 )
236 # Check sample with incorrect `vectors`.
237 with pytest.raises(KeyError, match="'a bientôt'|'au revoir'|'bonjour'|'coucou'|'salut'"):
238 sampler.sample(
239 constraints_manager=BinaryConstraintsManager(
240 list_of_data_IDs=[
241 "bonjour",
242 "salut",
243 "coucou",
244 "au revoir",
245 "a bientôt",
246 ],
247 ),
248 nb_to_select=3,
249 clustering_result={
250 "bonjour": 0,
251 "salut": 0,
252 "coucou": 0,
253 "au revoir": 1,
254 "a bientôt": 1,
255 },
256 vectors={
257 "first": 1,
258 "second": 2,
259 },
260 )
263# ==============================================================================
264# test_factory_farhest_in_same_cluster_sampler_sample_for_empty_constraints_manager
265# ==============================================================================
266def test_factory_farhest_in_same_cluster_sampler_sample_for_empty_constraints_manager():
267 """
268 Test that the `farhest_in_same_cluster sampler` sampling works for empty `constraints_manager`.
269 """
271 # Initialize a `farhest_in_same_cluster sampler` instance.
272 sampler = ClustersBasedConstraintsSampling(
273 clusters_restriction="same_cluster",
274 distance_restriction="farthest_neighbors",
275 random_seed=1,
276 )
278 # Check sample with empty `constraints_manager`
279 assert sampler.sample(
280 constraints_manager=BinaryConstraintsManager(
281 list_of_data_IDs=[
282 "bonjour",
283 "salut",
284 "coucou",
285 "au revoir",
286 "a bientôt",
287 ],
288 ),
289 nb_to_select=3,
290 clustering_result={
291 "bonjour": 0,
292 "salut": 0,
293 "coucou": 0,
294 "au revoir": 1,
295 "a bientôt": 1,
296 },
297 vectors={
298 "bonjour": csr_matrix([1.0, 0.0]),
299 "salut": csr_matrix([0.99, 0.0]),
300 "coucou": csr_matrix([0.8, 0.0]),
301 "au revoir": csr_matrix([0.0, 0.9]),
302 "a bientôt": csr_matrix([0.0, 0.8]),
303 },
304 ) == [
305 ("bonjour", "coucou"),
306 ("coucou", "salut"),
307 ("a bientôt", "au revoir"),
308 ]
311# ==============================================================================
312# test_factory_farhest_in_same_cluster_sampler_sample_for_correct_constraints_manager
313# ==============================================================================
314def test_factory_farhest_in_same_cluster_sampler_sample_for_correct_constraints_manager():
315 """
316 Test that the `farhest_in_same_cluster sampler` sampling works for correct `constraints_manager`.
317 """
319 # Initialize a `farhest_in_same_cluster sampler` instance.
320 sampler = ClustersBasedConstraintsSampling(
321 clusters_restriction="same_cluster",
322 distance_restriction="farthest_neighbors",
323 random_seed=1,
324 )
326 # Initialize a `BinaryConstraintsManager` instance
327 constraints_manager = BinaryConstraintsManager(
328 list_of_data_IDs=[
329 "bonjour",
330 "salut",
331 "coucou",
332 "au revoir",
333 "a bientôt",
334 ]
335 )
336 constraints_manager.add_constraint(data_ID1="bonjour", data_ID2="salut", constraint_type="MUST_LINK")
337 constraints_manager.add_constraint(data_ID1="au revoir", data_ID2="a bientôt", constraint_type="MUST_LINK")
339 # Check sample with correct `constraints_manager`
340 assert sampler.sample(
341 constraints_manager=constraints_manager,
342 nb_to_select=3,
343 clustering_result={
344 "bonjour": 0,
345 "salut": 0,
346 "coucou": 0,
347 "au revoir": 1,
348 "a bientôt": 1,
349 },
350 vectors={
351 "bonjour": csr_matrix([1.0, 0.0]),
352 "salut": csr_matrix([0.99, 0.0]),
353 "coucou": csr_matrix([0.8, 0.0]),
354 "au revoir": csr_matrix([0.0, 0.9]),
355 "a bientôt": csr_matrix([0.0, 0.8]),
356 },
357 ) == [
358 ("bonjour", "coucou"),
359 ("coucou", "salut"),
360 ]
363# ==============================================================================
364# test_factory_farhest_in_same_cluster_sampler_sample_for_full_annotated_constraints_manager
365# ==============================================================================
366def test_factory_farhest_in_same_cluster_sampler_sample_for_full_annotated_constraints_manager():
367 """
368 Test that the `farhest_in_same_cluster sampler` sampling works for full annotated `constraints_manager`.
369 """
371 # Initialize a `farhest_in_same_cluster sampler` instance.
372 sampler = ClustersBasedConstraintsSampling(
373 clusters_restriction="same_cluster",
374 distance_restriction="farthest_neighbors",
375 random_seed=1,
376 )
378 # Initialize a `BinaryConstraintsManager` instance
379 constraints_manager = BinaryConstraintsManager(
380 list_of_data_IDs=[
381 "bonjour",
382 "salut",
383 "coucou",
384 "au revoir",
385 "a bientôt",
386 ]
387 )
388 constraints_manager.add_constraint(data_ID1="bonjour", data_ID2="salut", constraint_type="MUST_LINK")
389 constraints_manager.add_constraint(data_ID1="bonjour", data_ID2="coucou", constraint_type="MUST_LINK")
390 constraints_manager.add_constraint(data_ID1="bonjour", data_ID2="au revoir", constraint_type="CANNOT_LINK")
391 constraints_manager.add_constraint(data_ID1="au revoir", data_ID2="a bientôt", constraint_type="MUST_LINK")
393 # Check sample for full annotated `constraints_manager`
394 assert not sampler.sample(
395 constraints_manager=constraints_manager,
396 nb_to_select=3,
397 clustering_result={
398 "bonjour": 0,
399 "salut": 0,
400 "coucou": 0,
401 "au revoir": 1,
402 "a bientôt": 1,
403 },
404 vectors={
405 "bonjour": csr_matrix([1.0, 0.0]),
406 "salut": csr_matrix([0.99, 0.0]),
407 "coucou": csr_matrix([0.8, 0.0]),
408 "au revoir": csr_matrix([0.0, 0.9]),
409 "a bientôt": csr_matrix([0.0, 0.8]),
410 },
411 )