Coverage for tests\clustering\test_kmeans.py: 100.00%
134 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-11-17 13:31 +0100
« prev ^ index » next coverage.py v7.3.2, created at 2023-11-17 13:31 +0100
1# -*- coding: utf-8 -*-
3"""
4* Name: interactive-clustering/tests/clustering/test_kmeans.py
5* Description: Unittests for the `clustering.kmeans` module.
6* Author: Erwan SCHILD
7* Created: 17/03/2021
8* Licence: CeCILL (https://cecill.info/licences.fr.html)
9"""
11# ==============================================================================
12# IMPORT PYTHON DEPENDENCIES
13# ==============================================================================
15import math
17import numpy as np
18import pytest
19from scipy.sparse import csr_matrix
21from cognitivefactory.interactive_clustering.clustering.kmeans import KMeansConstrainedClustering
22from cognitivefactory.interactive_clustering.constraints.binary import BinaryConstraintsManager
25# ==============================================================================
26# test_KMeansConstrainedClustering_for_inconsistent_model
27# ==============================================================================
28def test_KMeansConstrainedClustering_for_inconsistent_model():
29 """
30 Test that the `clustering.kmeans.KMeansConstrainedClustering` initialization raises an `ValueError` for inconsistent `model` parameter.
31 """
33 # Check `ValueError` for bad string value for `model`.
34 with pytest.raises(ValueError, match="`model`"):
35 KMeansConstrainedClustering(
36 model="as_you_want",
37 )
40# ==============================================================================
41# test_KMeansConstrainedClustering_for_inconsistent_max_iteration
42# ==============================================================================
43def test_KMeansConstrainedClustering_for_inconsistent_max_iteration():
44 """
45 Test that the `clustering.kmeans.KMeansConstrainedClustering` initialization raises an `ValueError` for inconsistent `max_iteration` parameter.
46 """
48 # Check `ValueError` for bad string value for `max_iteration`.
49 with pytest.raises(ValueError, match="`max_iteration`"):
50 KMeansConstrainedClustering(
51 max_iteration=-1,
52 )
55# ==============================================================================
56# test_KMeansConstrainedClustering_for_inconsistent_tolerance
57# ==============================================================================
58def test_KMeansConstrainedClustering_for_inconsistent_tolerance():
59 """
60 Test that the `clustering.kmeans.KMeansConstrainedClustering` initialization raises an `ValueError` for inconsistent `tolerance` parameter.
61 """
63 # Check `ValueError` for bad string value for `tolerance`.
64 with pytest.raises(ValueError, match="`tolerance`"):
65 KMeansConstrainedClustering(
66 tolerance=-1,
67 )
70# ==============================================================================
71# test_KMeansConstrainedClustering_for_correct_settings
72# ==============================================================================
73def test_KMeansConstrainedClustering_for_correct_settings():
74 """
75 Test that the `clustering.kmeans.KMeansConstrainedClustering` initialization runs correctly with the correct settings.
76 """
78 # Check a correct initialization.
79 clustering_model = KMeansConstrainedClustering(
80 model="COP",
81 max_iteration=100,
82 tolerance=1e-3,
83 random_seed=3,
84 )
85 assert clustering_model
86 assert clustering_model.model == "COP"
87 assert clustering_model.max_iteration == 100
88 assert math.isclose(clustering_model.tolerance, 1e-3)
89 assert clustering_model.random_seed == 3
92# ==============================================================================
93# test_KMeansConstrainedClustering_cluster_for_inconsistent_constraints_manager
94# ==============================================================================
95def test_KMeansConstrainedClustering_cluster_for_inconsistent_constraints_manager():
96 """
97 Test that the `clustering.kmeans.KMeansConstrainedClustering` clustering raises an `ValueError` for inconsistent `constraints_manager` parameter.
98 """
100 # Initialize a `KMeansConstrainedClustering` instance.
101 clustering_model = KMeansConstrainedClustering()
103 # Check `ValueError` for not matrix `vectors`.
104 with pytest.raises(ValueError, match="`constraints_manager`"):
105 clustering_model.cluster(
106 constraints_manager=None,
107 vectors=None,
108 nb_clusters=2,
109 )
112# ==============================================================================
113# test_KMeansConstrainedClustering_cluster_for_inconsistent_vectors
114# ==============================================================================
115def test_KMeansConstrainedClustering_cluster_for_inconsistent_vectors():
116 """
117 Test that the `clustering.kmeans.KMeansConstrainedClustering` clustering raises an `ValueError` for inconsistent `vectors` parameter.
118 """
120 # Initialize a `KMeansConstrainedClustering` instance.
121 clustering_model = KMeansConstrainedClustering()
123 # Check `ValueError` for not matrix `vectors`.
124 with pytest.raises(ValueError, match="`vectors`"):
125 clustering_model.cluster(
126 constraints_manager=BinaryConstraintsManager(list_of_data_IDs=["first", "second", "third"]),
127 vectors=None,
128 nb_clusters=2,
129 )
132# ==============================================================================
133# test_KMeansConstrainedClustering_cluster_for_inconsistent_nb_clusters
134# ==============================================================================
135def test_KMeansConstrainedClustering_cluster_for_inconsistent_nb_clusters_1():
136 """
137 Test that the `clustering.kmeans.KMeansConstrainedClustering` clustering raises an `ValueError` for inconsistent `nb_clusters` parameter.
138 """
140 # Initialize a `KMeansConstrainedClustering` instance.
141 clustering_model = KMeansConstrainedClustering()
143 # Check `ValueError` for too small `nb_clusters`.
144 with pytest.raises(ValueError, match="`nb_clusters`"):
145 clustering_model.cluster(
146 constraints_manager=BinaryConstraintsManager(list_of_data_IDs=["first", "second", "third"]),
147 vectors={"first": np.array([1, 2, 3]), "second": np.array([[4, 5, 6]]), "third": csr_matrix([7, 8, 9])},
148 nb_clusters=None,
149 )
152# ==============================================================================
153# test_KMeansConstrainedClustering_cluster_for_inconsistent_nb_clusters_2
154# ==============================================================================
155def test_KMeansConstrainedClustering_cluster_for_inconsistent_nb_clusters_2():
156 """
157 Test that the `clustering.kmeans.KMeansConstrainedClustering` clustering raises an `ValueError` for inconsistent `nb_clusters` parameter.
158 """
160 # Initialize a `KMeansConstrainedClustering` instance.
161 clustering_model = KMeansConstrainedClustering()
163 # Check `ValueError` for too small `nb_clusters`.
164 with pytest.raises(ValueError, match="`nb_clusters`"):
165 clustering_model.cluster(
166 constraints_manager=BinaryConstraintsManager(list_of_data_IDs=["first", "second", "third"]),
167 vectors={"first": np.array([1, 2, 3]), "second": np.array([[4, 5, 6]]), "third": csr_matrix([7, 8, 9])},
168 nb_clusters=-1,
169 )
172# ==============================================================================
173# test_KMeansConstrainedClustering_cluster_with_no_constraints_1
174# ==============================================================================
175def test_KMeansConstrainedClustering_cluster_with_no_constraints_1():
176 """
177 Test that the `clustering.kmeans.KMeansConstrainedClustering` clustering works with no `constraints`.
178 """
180 # Define `vectors` and `constraints_manager`
181 vectors = {
182 "0": csr_matrix([1.00, 0.00, 0.00, 0.00]),
183 "1": csr_matrix([0.95, 0.02, 0.02, 0.01]),
184 "2": csr_matrix([0.98, 0.00, 0.02, 0.00]),
185 "3": csr_matrix([0.99, 0.00, 0.01, 0.00]),
186 "4": csr_matrix([0.50, 0.22, 0.21, 0.07]),
187 "5": csr_matrix([0.50, 0.21, 0.22, 0.07]),
188 "6": csr_matrix([0.01, 0.01, 0.01, 0.97]),
189 "7": csr_matrix([0.00, 0.01, 0.00, 0.99]),
190 "8": csr_matrix([0.00, 0.00, 0.00, 1.00]),
191 }
192 constraints_manager = BinaryConstraintsManager(list_of_data_IDs=list(vectors.keys()))
194 # Initialize a `KMeansConstrainedClustering` instance.
195 clustering_model = KMeansConstrainedClustering(
196 random_seed=2,
197 )
199 # Run clustering 2 clusters and no constraints.
200 dict_of_predicted_clusters = clustering_model.cluster(
201 constraints_manager=constraints_manager,
202 vectors=vectors,
203 nb_clusters=2,
204 )
206 assert clustering_model.dict_of_predicted_clusters
207 assert dict_of_predicted_clusters == {"0": 0, "1": 0, "2": 0, "3": 0, "4": 0, "5": 0, "6": 1, "7": 1, "8": 1}
210# ==============================================================================
211# test_KMeansConstrainedClustering_cluster_with_no_constraints_2
212# ==============================================================================
213def test_KMeansConstrainedClustering_cluster_with_no_constraints_2():
214 """
215 Test that the `clustering.kmeans.KMeansConstrainedClustering` clustering works with no `constraints`.
216 """
218 # Define `vectors` and `constraints_manager`
219 vectors = {
220 "0": csr_matrix([1.00, 0.00, 0.00]),
221 "1": csr_matrix([0.95, 0.02, 0.01]),
222 "2": csr_matrix([0.98, 0.00, 0.00]),
223 "3": csr_matrix([0.99, 0.00, 0.00]),
224 "4": csr_matrix([0.01, 0.99, 0.07]),
225 "5": csr_matrix([0.02, 0.99, 0.07]),
226 "6": csr_matrix([0.01, 0.99, 0.02]),
227 "7": csr_matrix([0.01, 0.01, 0.97]),
228 "8": csr_matrix([0.00, 0.01, 0.99]),
229 "9": csr_matrix([0.00, 0.00, 1.00]),
230 }
231 constraints_manager = BinaryConstraintsManager(list_of_data_IDs=list(vectors.keys()))
233 # Initialize a `KMeansConstrainedClustering` instance.
234 clustering_model = KMeansConstrainedClustering(
235 random_seed=2,
236 )
238 # Run clustering 3 clusters and no constraints.
239 dict_of_predicted_clusters = clustering_model.cluster(
240 constraints_manager=constraints_manager,
241 vectors=vectors,
242 nb_clusters=3,
243 )
244 assert clustering_model.dict_of_predicted_clusters
245 assert dict_of_predicted_clusters == {
246 "0": 0,
247 "1": 0,
248 "2": 0,
249 "3": 0,
250 "4": 1,
251 "5": 1,
252 "6": 1,
253 "7": 2,
254 "8": 2,
255 "9": 2,
256 }
259# ==============================================================================
260# test_KMeansConstrainedClustering_cluster_with_some_constraints
261# ==============================================================================
262def test_KMeansConstrainedClustering_cluster_with_some_constraints():
263 """
264 Test that the `clustering.kmeans.KMeansConstrainedClustering` clustering works with some `constraints`.
265 """
267 # Define `vectors` and `constraints_manager`
268 vectors = {
269 "0": csr_matrix([1.00, 0.00, 0.00, 0.00]),
270 "1": csr_matrix([0.95, 0.02, 0.02, 0.01]),
271 "2": csr_matrix([0.98, 0.00, 0.02, 0.00]),
272 "3": csr_matrix([0.99, 0.00, 0.01, 0.00]),
273 "4": csr_matrix([0.50, 0.22, 0.21, 0.07]),
274 "5": csr_matrix([0.50, 0.21, 0.22, 0.07]),
275 "6": csr_matrix([0.01, 0.01, 0.01, 0.97]),
276 "7": csr_matrix([0.00, 0.01, 0.00, 0.99]),
277 "8": csr_matrix([0.00, 0.00, 0.00, 1.00]),
278 }
279 constraints_manager = BinaryConstraintsManager(list_of_data_IDs=["0", "1", "2", "3", "4", "5", "6", "7", "8"])
280 constraints_manager.add_constraint(data_ID1="0", data_ID2="1", constraint_type="MUST_LINK")
281 constraints_manager.add_constraint(data_ID1="0", data_ID2="7", constraint_type="MUST_LINK")
282 constraints_manager.add_constraint(data_ID1="0", data_ID2="8", constraint_type="MUST_LINK")
283 constraints_manager.add_constraint(data_ID1="4", data_ID2="5", constraint_type="MUST_LINK")
284 constraints_manager.add_constraint(data_ID1="0", data_ID2="4", constraint_type="CANNOT_LINK")
285 constraints_manager.add_constraint(data_ID1="2", data_ID2="4", constraint_type="CANNOT_LINK")
287 # Initialize a `KMeansConstrainedClustering` instance.
288 clustering_model = KMeansConstrainedClustering(
289 random_seed=2,
290 )
292 # Run clustering 2 clusters and somme constraints.
293 dict_of_predicted_clusters = clustering_model.cluster(
294 constraints_manager=constraints_manager,
295 vectors=vectors,
296 nb_clusters=3,
297 )
298 assert clustering_model.dict_of_predicted_clusters
299 assert dict_of_predicted_clusters == {
300 "0": 0,
301 "1": 0,
302 "2": 1,
303 "3": 1,
304 "4": 2,
305 "5": 2,
306 "6": 0,
307 "7": 0,
308 "8": 0,
309 }
312# ==============================================================================
313# test_KMeansConstrainedClustering_cluster_with_full_constraints
314# ==============================================================================
315def test_KMeansConstrainedClustering_cluster_with_full_constraints():
316 """
317 Test that the `clustering.kmeans.KMeansConstrainedClustering` clustering works with full `constraints`.
318 """
320 # Define `vectors` and `constraints_manager`
321 vectors = {
322 "0": csr_matrix([1.00, 0.00, 0.00, 0.00]),
323 "1": csr_matrix([0.95, 0.02, 0.02, 0.01]),
324 "2": csr_matrix([0.98, 0.00, 0.02, 0.00]),
325 "3": csr_matrix([0.99, 0.00, 0.01, 0.00]),
326 "4": csr_matrix([0.50, 0.22, 0.21, 0.07]),
327 "5": csr_matrix([0.50, 0.21, 0.22, 0.07]),
328 "6": csr_matrix([0.01, 0.01, 0.01, 0.97]),
329 "7": csr_matrix([0.00, 0.01, 0.00, 0.99]),
330 "8": csr_matrix([0.00, 0.00, 0.00, 1.00]),
331 }
332 constraints_manager = BinaryConstraintsManager(list_of_data_IDs=["0", "1", "2", "3", "4", "5", "6", "7", "8"])
333 constraints_manager.add_constraint(data_ID1="0", data_ID2="4", constraint_type="MUST_LINK")
334 constraints_manager.add_constraint(data_ID1="0", data_ID2="8", constraint_type="MUST_LINK")
335 constraints_manager.add_constraint(data_ID1="1", data_ID2="5", constraint_type="MUST_LINK")
336 constraints_manager.add_constraint(data_ID1="2", data_ID2="6", constraint_type="MUST_LINK")
337 constraints_manager.add_constraint(data_ID1="3", data_ID2="7", constraint_type="MUST_LINK")
338 constraints_manager.add_constraint(data_ID1="0", data_ID2="1", constraint_type="CANNOT_LINK")
339 constraints_manager.add_constraint(data_ID1="0", data_ID2="2", constraint_type="CANNOT_LINK")
340 constraints_manager.add_constraint(data_ID1="0", data_ID2="3", constraint_type="CANNOT_LINK")
341 constraints_manager.add_constraint(data_ID1="1", data_ID2="2", constraint_type="CANNOT_LINK")
342 constraints_manager.add_constraint(data_ID1="1", data_ID2="3", constraint_type="CANNOT_LINK")
343 constraints_manager.add_constraint(data_ID1="2", data_ID2="3", constraint_type="CANNOT_LINK")
345 # Initialize a `KMeansConstrainedClustering` instance.
346 clustering_model = KMeansConstrainedClustering()
348 # Run clustering 4 clusters and full constraints.
349 dict_of_predicted_clusters = clustering_model.cluster(
350 constraints_manager=constraints_manager,
351 vectors=vectors,
352 nb_clusters=4,
353 )
354 assert clustering_model.dict_of_predicted_clusters
355 assert dict_of_predicted_clusters == {
356 "0": 0,
357 "1": 1,
358 "2": 2,
359 "3": 3,
360 "4": 0,
361 "5": 1,
362 "6": 2,
363 "7": 3,
364 "8": 0,
365 }
368# ==============================================================================
369# test_KMeansConstrainedClustering_cluster_with_no_possible_cluster
370# ==============================================================================
371def test_KMeansConstrainedClustering_cluster_with_no_possible_cluster():
372 """
373 Test that the `clustering.kmeans.KMeansConstrainedClustering` clustering works with no possible cluster.
374 """
376 # Define `vectors` and `constraints_manager`
377 vectors = {
378 "0": csr_matrix([1.00, 0.00, 0.00, 0.00]),
379 "1": csr_matrix([0.95, 0.02, 0.02, 0.01]),
380 "2": csr_matrix([0.98, 0.00, 0.02, 0.00]),
381 "3": csr_matrix([0.99, 0.00, 0.01, 0.00]),
382 "4": csr_matrix([0.50, 0.22, 0.21, 0.07]),
383 "5": csr_matrix([0.50, 0.21, 0.22, 0.07]),
384 "6": csr_matrix([0.01, 0.01, 0.01, 0.97]),
385 "7": csr_matrix([0.00, 0.01, 0.00, 0.99]),
386 "8": csr_matrix([0.00, 0.00, 0.00, 1.00]),
387 }
388 constraints_manager = BinaryConstraintsManager(list_of_data_IDs=["0", "1", "2", "3", "4", "5", "6", "7", "8"])
389 constraints_manager.add_constraint(data_ID1="0", data_ID2="1", constraint_type="CANNOT_LINK")
390 constraints_manager.add_constraint(data_ID1="0", data_ID2="2", constraint_type="CANNOT_LINK")
391 constraints_manager.add_constraint(data_ID1="0", data_ID2="3", constraint_type="CANNOT_LINK")
392 constraints_manager.add_constraint(data_ID1="0", data_ID2="4", constraint_type="CANNOT_LINK")
393 constraints_manager.add_constraint(data_ID1="0", data_ID2="5", constraint_type="CANNOT_LINK")
394 constraints_manager.add_constraint(data_ID1="0", data_ID2="6", constraint_type="CANNOT_LINK")
395 constraints_manager.add_constraint(data_ID1="0", data_ID2="7", constraint_type="CANNOT_LINK")
396 constraints_manager.add_constraint(data_ID1="0", data_ID2="8", constraint_type="CANNOT_LINK")
397 constraints_manager.add_constraint(data_ID1="1", data_ID2="2", constraint_type="CANNOT_LINK")
398 constraints_manager.add_constraint(data_ID1="1", data_ID2="3", constraint_type="CANNOT_LINK")
399 constraints_manager.add_constraint(data_ID1="1", data_ID2="4", constraint_type="CANNOT_LINK")
400 constraints_manager.add_constraint(data_ID1="1", data_ID2="5", constraint_type="CANNOT_LINK")
401 constraints_manager.add_constraint(data_ID1="1", data_ID2="6", constraint_type="CANNOT_LINK")
402 constraints_manager.add_constraint(data_ID1="1", data_ID2="7", constraint_type="CANNOT_LINK")
403 constraints_manager.add_constraint(data_ID1="1", data_ID2="8", constraint_type="CANNOT_LINK")
404 constraints_manager.add_constraint(data_ID1="2", data_ID2="3", constraint_type="CANNOT_LINK")
405 constraints_manager.add_constraint(data_ID1="2", data_ID2="4", constraint_type="CANNOT_LINK")
406 constraints_manager.add_constraint(data_ID1="2", data_ID2="5", constraint_type="CANNOT_LINK")
407 constraints_manager.add_constraint(data_ID1="2", data_ID2="6", constraint_type="CANNOT_LINK")
408 constraints_manager.add_constraint(data_ID1="2", data_ID2="7", constraint_type="CANNOT_LINK")
409 constraints_manager.add_constraint(data_ID1="2", data_ID2="8", constraint_type="CANNOT_LINK")
410 constraints_manager.add_constraint(data_ID1="3", data_ID2="4", constraint_type="CANNOT_LINK")
411 constraints_manager.add_constraint(data_ID1="3", data_ID2="5", constraint_type="CANNOT_LINK")
412 constraints_manager.add_constraint(data_ID1="3", data_ID2="6", constraint_type="CANNOT_LINK")
413 constraints_manager.add_constraint(data_ID1="3", data_ID2="7", constraint_type="CANNOT_LINK")
414 constraints_manager.add_constraint(data_ID1="3", data_ID2="8", constraint_type="CANNOT_LINK")
415 constraints_manager.add_constraint(data_ID1="4", data_ID2="5", constraint_type="CANNOT_LINK")
416 constraints_manager.add_constraint(data_ID1="4", data_ID2="6", constraint_type="CANNOT_LINK")
417 constraints_manager.add_constraint(data_ID1="4", data_ID2="7", constraint_type="CANNOT_LINK")
418 constraints_manager.add_constraint(data_ID1="4", data_ID2="8", constraint_type="CANNOT_LINK")
419 constraints_manager.add_constraint(data_ID1="5", data_ID2="6", constraint_type="CANNOT_LINK")
420 constraints_manager.add_constraint(data_ID1="5", data_ID2="7", constraint_type="CANNOT_LINK")
421 constraints_manager.add_constraint(data_ID1="5", data_ID2="8", constraint_type="CANNOT_LINK")
422 constraints_manager.add_constraint(data_ID1="6", data_ID2="7", constraint_type="CANNOT_LINK")
423 constraints_manager.add_constraint(data_ID1="6", data_ID2="8", constraint_type="CANNOT_LINK")
424 constraints_manager.add_constraint(data_ID1="7", data_ID2="8", constraint_type="CANNOT_LINK")
426 # Initialize a `KMeansConstrainedClustering` instance.
427 clustering_model = KMeansConstrainedClustering(
428 random_seed=3,
429 )
431 # Run clustering.
432 dict_of_predicted_clusters = clustering_model.cluster(
433 constraints_manager=constraints_manager,
434 vectors=vectors,
435 nb_clusters=5,
436 )
437 assert clustering_model.dict_of_predicted_clusters
438 assert dict_of_predicted_clusters == {
439 "0": 0,
440 "1": 1,
441 "2": 2,
442 "3": 3,
443 "4": 4,
444 "5": 5,
445 "6": 6,
446 "7": 7,
447 "8": 8,
448 }
451# ==============================================================================
452# test_KMeansConstrainedClustering_cluster_with_max_iteration_ending
453# ==============================================================================
454def test_KMeansConstrainedClustering_cluster_with_max_iteration_ending():
455 """
456 Test that the `clustering.kmeans.KMeansConstrainedClustering` clustering works with `max_iteration` ending.
457 """
459 # Define `vectors` and `constraints_manager`
460 vectors = {
461 "0": csr_matrix([1.00, 0.00, 0.00, 0.00]),
462 "1": csr_matrix([0.95, 0.02, 0.02, 0.01]),
463 "2": csr_matrix([0.98, 0.00, 0.02, 0.00]),
464 "3": csr_matrix([0.99, 0.00, 0.01, 0.00]),
465 "4": csr_matrix([0.50, 0.22, 0.21, 0.07]),
466 "5": csr_matrix([0.50, 0.21, 0.22, 0.07]),
467 "6": csr_matrix([0.01, 0.01, 0.01, 0.97]),
468 "7": csr_matrix([0.00, 0.01, 0.00, 0.99]),
469 "8": csr_matrix([0.00, 0.00, 0.00, 1.00]),
470 }
471 constraints_manager = BinaryConstraintsManager(list_of_data_IDs=list(vectors.keys()))
473 # Initialize a `KMeansConstrainedClustering` instance.
474 clustering_model = KMeansConstrainedClustering(
475 max_iteration=1,
476 )
478 # Run clustering.
479 dict_of_predicted_clusters = clustering_model.cluster(
480 constraints_manager=constraints_manager,
481 vectors=vectors,
482 nb_clusters=2,
483 )
484 assert clustering_model.dict_of_predicted_clusters
485 assert dict_of_predicted_clusters