Coverage for tests\clustering\test_hierarchical.py: 100.00%
203 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-11-17 13:31 +0100
« prev ^ index » next coverage.py v7.3.2, created at 2023-11-17 13:31 +0100
1# -*- coding: utf-8 -*-
3"""
4* Name: interactive-clustering/tests/clustering/test_hierarchical.py
5* Description: Unittests for the `clustering.hierarchical` module.
6* Author: Erwan SCHILD
7* Created: 17/03/2021
8* Licence: CeCILL (https://cecill.info/licences.fr.html)
9"""
11# ==============================================================================
12# IMPORT PYTHON DEPENDENCIES
13# ==============================================================================
15import numpy as np
16import pytest
17from scipy.sparse import csr_matrix
19from cognitivefactory.interactive_clustering.clustering.hierarchical import Cluster, HierarchicalConstrainedClustering
20from cognitivefactory.interactive_clustering.constraints.binary import BinaryConstraintsManager
23# ==============================================================================
24# test_HierarchicalConstrainedClustering_for_inconsistent_linkage
25# ==============================================================================
26def test_HierarchicalConstrainedClustering_for_inconsistent_linkage():
27 """
28 Test that the `clustering.hierarchical.HierarchicalConstrainedClustering` initialization raises an `ValueError` for inconsistent `linkage` parameter.
29 """
31 # Check `ValueError` for bad string value for `linkage`.
32 with pytest.raises(ValueError, match="`linkage`"):
33 HierarchicalConstrainedClustering(
34 linkage="as_you_want",
35 )
38# ==============================================================================
39# test_HierarchicalConstrainedClustering_for_correct_settings
40# ==============================================================================
41def test_HierarchicalConstrainedClustering_for_correct_settings():
42 """
43 Test that the `clustering.hierarchical.HierarchicalConstrainedClustering` initialization runs correctly with the correct settings.
44 """
46 # Check a correct initialization.
47 clustering_model = HierarchicalConstrainedClustering(
48 linkage="average",
49 random_seed=2,
50 )
51 assert clustering_model
52 assert clustering_model.linkage == "average"
53 assert clustering_model.random_seed == 2
56# ==============================================================================
57# test_HierarchicalConstrainedClustering_cluster_for_inconsistent_constraints_manager
58# ==============================================================================
59def test_HierarchicalConstrainedClustering_cluster_for_inconsistent_constraints_manager():
60 """
61 Test that the `clustering.hierarchical.HierarchicalConstrainedClustering` clustering raises an `ValueError` for inconsistent `constraints_manager` parameter.
62 """
64 # Initialize a `HierarchicalConstrainedClustering` instance.
65 clustering_model = HierarchicalConstrainedClustering()
67 # Check `ValueError` for not matrix `vectors`.
68 with pytest.raises(ValueError, match="`constraints_manager`"):
69 clustering_model.cluster(
70 constraints_manager=None,
71 vectors=None,
72 nb_clusters=2,
73 )
76# ==============================================================================
77# test_HierarchicalConstrainedClustering_cluster_for_inconsistent_vectors
78# ==============================================================================
79def test_HierarchicalConstrainedClustering_cluster_for_inconsistent_vectors():
80 """
81 Test that the `clustering.hierarchical.HierarchicalConstrainedClustering` clustering raises an `ValueError` for inconsistent `vectors` parameter.
82 """
84 # Initialize a `HierarchicalConstrainedClustering` instance.
85 clustering_model = HierarchicalConstrainedClustering()
87 # Check `ValueError` for not matrix `vectors`.
88 with pytest.raises(ValueError, match="`vectors`"):
89 clustering_model.cluster(
90 constraints_manager=BinaryConstraintsManager(list_of_data_IDs=["first", "second", "third"]),
91 vectors=None,
92 nb_clusters=2,
93 )
96# ==============================================================================
97# test_HierarchicalConstrainedClustering_cluster_for_inconsistent_nb_clusters_1
98# ==============================================================================
99def test_HierarchicalConstrainedClustering_cluster_for_inconsistent_nb_clusters_1():
100 """
101 Test that the `clustering.hierarchical.HierarchicalConstrainedClustering` clustering raises an `ValueError` for inconsistent `nb_clusters` parameter.
102 """
104 # Initialize a `HierarchicalConstrainedClustering` instance.
105 clustering_model = HierarchicalConstrainedClustering()
107 # Check `ValueError` for too small `nb_clusters`.
108 with pytest.raises(ValueError, match="`nb_clusters`"):
109 clustering_model.cluster(
110 constraints_manager=BinaryConstraintsManager(list_of_data_IDs=["first", "second", "third"]),
111 vectors={"first": np.array([1, 2, 3]), "second": np.array([[4, 5, 6]]), "third": csr_matrix([7, 8, 9])},
112 nb_clusters=None,
113 )
116# ==============================================================================
117# test_HierarchicalConstrainedClustering_cluster_for_inconsistent_nb_clusters_2
118# ==============================================================================
119def test_HierarchicalConstrainedClustering_cluster_for_inconsistent_nb_clusters_2():
120 """
121 Test that the `clustering.hierarchical.HierarchicalConstrainedClustering` clustering raises an `ValueError` for inconsistent `nb_clusters` parameter.
122 """
124 # Initialize a `HierarchicalConstrainedClustering` instance.
125 clustering_model = HierarchicalConstrainedClustering()
127 # Check `ValueError` for too small `nb_clusters`.
128 with pytest.raises(ValueError, match="`nb_clusters`"):
129 clustering_model.cluster(
130 constraints_manager=BinaryConstraintsManager(list_of_data_IDs=["first", "second", "third"]),
131 vectors={"first": np.array([1, 2, 3]), "second": np.array([[4, 5, 6]]), "third": csr_matrix([7, 8, 9])},
132 nb_clusters=-1,
133 )
136# ==============================================================================
137# test_HierarchicalConstrainedClustering_cluster_with_ward_linkage
138# ==============================================================================
139def test_HierarchicalConstrainedClustering_cluster_with_ward_linkage():
140 """
141 Test that the `clustering.hierarchical.HierarchicalConstrainedClustering` clustering works with ward `linkage`.
142 """
144 # Define `vectors` and `constraints_manager`
145 vectors = {
146 "0": csr_matrix([1.00, 0.00, 0.00]),
147 "1": csr_matrix([0.95, 0.02, 0.01]),
148 "2": csr_matrix([0.98, 0.00, 0.00]),
149 "3": csr_matrix([0.99, 0.00, 0.00]),
150 "4": csr_matrix([0.01, 0.99, 0.07]),
151 "5": csr_matrix([0.02, 0.99, 0.07]),
152 "6": csr_matrix([0.01, 0.99, 0.02]),
153 "7": csr_matrix([0.01, 0.01, 0.97]),
154 "8": csr_matrix([0.00, 0.01, 0.99]),
155 "9": csr_matrix([0.00, 0.00, 1.00]),
156 }
157 constraints_manager = BinaryConstraintsManager(list_of_data_IDs=["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"])
158 constraints_manager.add_constraint(data_ID1="0", data_ID2="1", constraint_type="MUST_LINK")
159 constraints_manager.add_constraint(data_ID1="5", data_ID2="7", constraint_type="CANNOT_LINK")
161 # Initialize a `HierarchicalConstrainedClustering` instance.
162 clustering_model = HierarchicalConstrainedClustering(
163 linkage="ward",
164 random_seed=1,
165 )
167 # Run clustering 3 clusters and some constraints.
168 dict_of_predicted_clusters = clustering_model.cluster(
169 constraints_manager=constraints_manager,
170 vectors=vectors,
171 nb_clusters=3,
172 )
174 assert clustering_model.dict_of_predicted_clusters
175 assert dict_of_predicted_clusters == {
176 "0": 0,
177 "1": 0,
178 "2": 0,
179 "3": 0,
180 "4": 1,
181 "5": 1,
182 "6": 1,
183 "7": 2,
184 "8": 2,
185 "9": 2,
186 }
189# ==============================================================================
190# test_HierarchicalConstrainedClustering_cluster_with_average_linkage
191# ==============================================================================
192def test_HierarchicalConstrainedClustering_cluster_with_average_linkage():
193 """
194 Test that the `clustering.hierarchical.HierarchicalConstrainedClustering` clustering works with average `linkage`.
195 """
197 # Define `vectors` and `constraints_manager`
198 vectors = {
199 "0": csr_matrix([1.00, 0.00, 0.00]),
200 "1": csr_matrix([0.95, 0.02, 0.01]),
201 "2": csr_matrix([0.98, 0.00, 0.00]),
202 "3": csr_matrix([0.99, 0.00, 0.00]),
203 "4": csr_matrix([0.01, 0.99, 0.07]),
204 "5": csr_matrix([0.02, 0.99, 0.07]),
205 "6": csr_matrix([0.01, 0.99, 0.02]),
206 "7": csr_matrix([0.01, 0.01, 0.97]),
207 "8": csr_matrix([0.00, 0.01, 0.99]),
208 "9": csr_matrix([0.00, 0.00, 1.00]),
209 }
210 constraints_manager = BinaryConstraintsManager(list_of_data_IDs=["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"])
211 constraints_manager.add_constraint(data_ID1="0", data_ID2="1", constraint_type="MUST_LINK")
212 constraints_manager.add_constraint(data_ID1="5", data_ID2="7", constraint_type="CANNOT_LINK")
214 # Initialize a `HierarchicalConstrainedClustering` instance.
215 clustering_model = HierarchicalConstrainedClustering(
216 linkage="average",
217 random_seed=1,
218 )
220 # Run clustering 3 clusters and some constraints.
221 dict_of_predicted_clusters = clustering_model.cluster(
222 constraints_manager=constraints_manager,
223 vectors=vectors,
224 nb_clusters=3,
225 )
227 assert clustering_model.dict_of_predicted_clusters
228 assert dict_of_predicted_clusters == {
229 "0": 0,
230 "1": 0,
231 "2": 0,
232 "3": 0,
233 "4": 1,
234 "5": 1,
235 "6": 1,
236 "7": 2,
237 "8": 2,
238 "9": 2,
239 }
242# ==============================================================================
243# test_HierarchicalConstrainedClustering_cluster_with_single_linkage
244# ==============================================================================
245def test_HierarchicalConstrainedClustering_cluster_with_single_linkage():
246 """
247 Test that the `clustering.hierarchical.HierarchicalConstrainedClustering` clustering works with single `linkage`.
248 """
250 # Define `vectors` and `constraints_manager`
251 vectors = {
252 "0": csr_matrix([1.00, 0.00, 0.00]),
253 "1": csr_matrix([0.95, 0.02, 0.01]),
254 "2": csr_matrix([0.98, 0.00, 0.00]),
255 "3": csr_matrix([0.99, 0.00, 0.00]),
256 "4": csr_matrix([0.01, 0.99, 0.07]),
257 "5": csr_matrix([0.02, 0.99, 0.07]),
258 "6": csr_matrix([0.01, 0.99, 0.02]),
259 "7": csr_matrix([0.01, 0.01, 0.97]),
260 "8": csr_matrix([0.00, 0.01, 0.99]),
261 "9": csr_matrix([0.00, 0.00, 1.00]),
262 }
263 constraints_manager = BinaryConstraintsManager(list_of_data_IDs=["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"])
264 constraints_manager.add_constraint(data_ID1="0", data_ID2="1", constraint_type="MUST_LINK")
265 constraints_manager.add_constraint(data_ID1="5", data_ID2="7", constraint_type="CANNOT_LINK")
267 # Initialize a `HierarchicalConstrainedClustering` instance.
268 clustering_model = HierarchicalConstrainedClustering(
269 linkage="single",
270 random_seed=1,
271 )
273 # Run clustering 3 clusters and some constraints.
274 dict_of_predicted_clusters = clustering_model.cluster(
275 constraints_manager=constraints_manager,
276 vectors=vectors,
277 nb_clusters=3,
278 )
280 assert clustering_model.dict_of_predicted_clusters
281 assert dict_of_predicted_clusters == {
282 "0": 0,
283 "1": 0,
284 "2": 0,
285 "3": 0,
286 "4": 1,
287 "5": 1,
288 "6": 1,
289 "7": 2,
290 "8": 2,
291 "9": 2,
292 }
295# ==============================================================================
296# test_HierarchicalConstrainedClustering_cluster_with_complete_linkage
297# ==============================================================================
298def test_HierarchicalConstrainedClustering_cluster_with_complete_linkage():
299 """
300 Test that the `clustering.hierarchical.HierarchicalConstrainedClustering` clustering works with complete `linkage`.
301 """
303 # Define `vectors` and `constraints_manager`
304 vectors = {
305 "0": csr_matrix([1.00, 0.00, 0.00]),
306 "1": csr_matrix([0.95, 0.02, 0.01]),
307 "2": csr_matrix([0.98, 0.00, 0.00]),
308 "3": csr_matrix([0.99, 0.00, 0.00]),
309 "4": csr_matrix([0.01, 0.99, 0.07]),
310 "5": csr_matrix([0.02, 0.99, 0.07]),
311 "6": csr_matrix([0.01, 0.99, 0.02]),
312 "7": csr_matrix([0.01, 0.01, 0.97]),
313 "8": csr_matrix([0.00, 0.01, 0.99]),
314 "9": csr_matrix([0.00, 0.00, 1.00]),
315 }
316 constraints_manager = BinaryConstraintsManager(list_of_data_IDs=["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"])
317 constraints_manager.add_constraint(data_ID1="0", data_ID2="1", constraint_type="MUST_LINK")
318 constraints_manager.add_constraint(data_ID1="5", data_ID2="7", constraint_type="CANNOT_LINK")
320 # Initialize a `HierarchicalConstrainedClustering` instance.
321 clustering_model = HierarchicalConstrainedClustering(
322 linkage="complete",
323 random_seed=1,
324 )
326 # Run clustering 3 clusters and some constraints.
327 dict_of_predicted_clusters = clustering_model.cluster(
328 constraints_manager=constraints_manager,
329 vectors=vectors,
330 nb_clusters=3,
331 )
333 assert clustering_model.dict_of_predicted_clusters
334 assert dict_of_predicted_clusters == {
335 "0": 0,
336 "1": 0,
337 "2": 0,
338 "3": 0,
339 "4": 1,
340 "5": 1,
341 "6": 1,
342 "7": 2,
343 "8": 2,
344 "9": 2,
345 }
348# ==============================================================================
349# test_HierarchicalConstrainedClustering_cluster_with_no_constraints_1
350# ==============================================================================
351def test_HierarchicalConstrainedClustering_cluster_with_no_constraints_1():
352 """
353 Test that the `clustering.hierarchical.HierarchicalConstrainedClustering` clustering works with no `constraints`.
354 """
356 # Define `vectors` and `constraints_manager`
357 vectors = {
358 "0": csr_matrix([1.00, 0.00, 0.00, 0.00]),
359 "1": csr_matrix([0.95, 0.02, 0.02, 0.01]),
360 "2": csr_matrix([0.98, 0.00, 0.02, 0.00]),
361 "3": csr_matrix([0.99, 0.00, 0.01, 0.00]),
362 "4": csr_matrix([0.50, 0.22, 0.21, 0.07]),
363 "5": csr_matrix([0.50, 0.21, 0.22, 0.07]),
364 "6": csr_matrix([0.01, 0.01, 0.01, 0.97]),
365 "7": csr_matrix([0.00, 0.01, 0.00, 0.99]),
366 "8": csr_matrix([0.00, 0.00, 0.00, 1.00]),
367 }
368 constraints_manager = BinaryConstraintsManager(list_of_data_IDs=list(vectors.keys()))
370 # Initialize a `HierarchicalConstrainedClustering` instance.
371 clustering_model = HierarchicalConstrainedClustering(
372 random_seed=2,
373 )
375 # Run clustering 2 clusters and no constraints.
376 dict_of_predicted_clusters = clustering_model.cluster(
377 constraints_manager=constraints_manager,
378 vectors=vectors,
379 nb_clusters=2,
380 )
382 assert clustering_model.dict_of_predicted_clusters
383 assert dict_of_predicted_clusters == {"0": 0, "1": 0, "2": 0, "3": 0, "4": 0, "5": 0, "6": 1, "7": 1, "8": 1}
386# ==============================================================================
387# test_HierarchicalConstrainedClustering_cluster_with_no_constraints_2
388# ==============================================================================
389def test_HierarchicalConstrainedClustering_cluster_with_no_constraints_2():
390 """
391 Test that the `clustering.hierarchical.HierarchicalConstrainedClustering` clustering works with no `constraints`.
392 """
394 # Define `vectors` and `constraints_manager`
395 vectors = {
396 "0": csr_matrix([1.00, 0.00, 0.00]),
397 "1": csr_matrix([0.95, 0.02, 0.01]),
398 "2": csr_matrix([0.98, 0.00, 0.00]),
399 "3": csr_matrix([0.99, 0.00, 0.00]),
400 "4": csr_matrix([0.01, 0.99, 0.07]),
401 "5": csr_matrix([0.02, 0.99, 0.07]),
402 "6": csr_matrix([0.01, 0.99, 0.02]),
403 "7": csr_matrix([0.01, 0.01, 0.97]),
404 "8": csr_matrix([0.00, 0.01, 0.99]),
405 "9": csr_matrix([0.00, 0.00, 1.00]),
406 }
407 constraints_manager = BinaryConstraintsManager(list_of_data_IDs=list(vectors.keys()))
409 # Initialize a `HierarchicalConstrainedClustering` instance.
410 clustering_model = HierarchicalConstrainedClustering(
411 random_seed=2,
412 )
414 # Run clustering 3 clusters and no constraints.
415 dict_of_predicted_clusters = clustering_model.cluster(
416 constraints_manager=constraints_manager,
417 vectors=vectors,
418 nb_clusters=3,
419 )
420 assert clustering_model.dict_of_predicted_clusters
421 assert dict_of_predicted_clusters == {
422 "0": 0,
423 "1": 0,
424 "2": 0,
425 "3": 0,
426 "4": 1,
427 "5": 1,
428 "6": 1,
429 "7": 2,
430 "8": 2,
431 "9": 2,
432 }
435# ==============================================================================
436# test_HierarchicalConstrainedClustering_cluster_with_some_constraints
437# ==============================================================================
438def test_HierarchicalConstrainedClustering_cluster_with_some_constraints():
439 """
440 Test that the `clustering.hierarchical.HierarchicalConstrainedClustering` clustering works with some `constraints`.
441 """
443 # Define `vectors` and `constraints_manager`
444 vectors = {
445 "0": csr_matrix([1.00, 0.00, 0.00, 0.00]),
446 "1": csr_matrix([0.95, 0.02, 0.02, 0.01]),
447 "2": csr_matrix([0.98, 0.00, 0.02, 0.00]),
448 "3": csr_matrix([0.99, 0.00, 0.01, 0.00]),
449 "4": csr_matrix([0.50, 0.22, 0.21, 0.07]),
450 "5": csr_matrix([0.50, 0.21, 0.22, 0.07]),
451 "6": csr_matrix([0.01, 0.01, 0.01, 0.97]),
452 "7": csr_matrix([0.00, 0.01, 0.00, 0.99]),
453 "8": csr_matrix([0.00, 0.00, 0.00, 1.00]),
454 }
455 constraints_manager = BinaryConstraintsManager(list_of_data_IDs=["0", "1", "2", "3", "4", "5", "6", "7", "8"])
456 constraints_manager.add_constraint(data_ID1="0", data_ID2="1", constraint_type="MUST_LINK")
457 constraints_manager.add_constraint(data_ID1="0", data_ID2="6", constraint_type="MUST_LINK")
458 constraints_manager.add_constraint(data_ID1="0", data_ID2="7", constraint_type="MUST_LINK")
459 constraints_manager.add_constraint(data_ID1="0", data_ID2="8", constraint_type="MUST_LINK")
460 constraints_manager.add_constraint(data_ID1="4", data_ID2="5", constraint_type="MUST_LINK")
461 constraints_manager.add_constraint(data_ID1="0", data_ID2="4", constraint_type="CANNOT_LINK")
462 constraints_manager.add_constraint(data_ID1="2", data_ID2="4", constraint_type="CANNOT_LINK")
464 # Initialize a `HierarchicalConstrainedClustering` instance.
465 clustering_model = HierarchicalConstrainedClustering(
466 random_seed=2,
467 )
469 # Run clustering 2 clusters and somme constraints.
470 dict_of_predicted_clusters = clustering_model.cluster(
471 constraints_manager=constraints_manager,
472 vectors=vectors,
473 nb_clusters=3,
474 )
475 assert clustering_model.dict_of_predicted_clusters
476 assert dict_of_predicted_clusters == {
477 "0": 0,
478 "1": 0,
479 "2": 1,
480 "3": 1,
481 "4": 2,
482 "5": 2,
483 "6": 0,
484 "7": 0,
485 "8": 0,
486 }
489# ==============================================================================
490# test_HierarchicalConstrainedClustering_cluster_with_full_constraints
491# ==============================================================================
492def test_HierarchicalConstrainedClustering_cluster_with_full_constraints():
493 """
494 Test that the `clustering.hierarchical.HierarchicalConstrainedClustering` clustering works with full `constraints`.
495 """
497 # Define `vectors` and `constraints_manager`
498 vectors = {
499 "0": csr_matrix([1.00, 0.00, 0.00, 0.00]),
500 "1": csr_matrix([0.95, 0.02, 0.02, 0.01]),
501 "2": csr_matrix([0.98, 0.00, 0.02, 0.00]),
502 "3": csr_matrix([0.99, 0.00, 0.01, 0.00]),
503 "4": csr_matrix([0.50, 0.22, 0.21, 0.07]),
504 "5": csr_matrix([0.50, 0.21, 0.22, 0.07]),
505 "6": csr_matrix([0.01, 0.01, 0.01, 0.97]),
506 "7": csr_matrix([0.00, 0.01, 0.00, 0.99]),
507 "8": csr_matrix([0.00, 0.00, 0.00, 1.00]),
508 }
509 constraints_manager = BinaryConstraintsManager(list_of_data_IDs=["0", "1", "2", "3", "4", "5", "6", "7", "8"])
510 constraints_manager.add_constraint(data_ID1="0", data_ID2="4", constraint_type="MUST_LINK")
511 constraints_manager.add_constraint(data_ID1="0", data_ID2="8", constraint_type="MUST_LINK")
512 constraints_manager.add_constraint(data_ID1="1", data_ID2="5", constraint_type="MUST_LINK")
513 constraints_manager.add_constraint(data_ID1="2", data_ID2="6", constraint_type="MUST_LINK")
514 constraints_manager.add_constraint(data_ID1="3", data_ID2="7", constraint_type="MUST_LINK")
515 constraints_manager.add_constraint(data_ID1="0", data_ID2="1", constraint_type="CANNOT_LINK")
516 constraints_manager.add_constraint(data_ID1="0", data_ID2="2", constraint_type="CANNOT_LINK")
517 constraints_manager.add_constraint(data_ID1="0", data_ID2="3", constraint_type="CANNOT_LINK")
518 constraints_manager.add_constraint(data_ID1="1", data_ID2="2", constraint_type="CANNOT_LINK")
519 constraints_manager.add_constraint(data_ID1="1", data_ID2="3", constraint_type="CANNOT_LINK")
520 constraints_manager.add_constraint(data_ID1="2", data_ID2="3", constraint_type="CANNOT_LINK")
522 # Initialize a `HierarchicalConstrainedClustering` instance.
523 clustering_model = HierarchicalConstrainedClustering()
525 # Run clustering 4 clusters and full constraints.
526 dict_of_predicted_clusters = clustering_model.cluster(
527 constraints_manager=constraints_manager,
528 vectors=vectors,
529 nb_clusters=4,
530 )
531 assert clustering_model.dict_of_predicted_clusters
532 assert dict_of_predicted_clusters == {
533 "0": 0,
534 "1": 1,
535 "2": 2,
536 "3": 3,
537 "4": 0,
538 "5": 1,
539 "6": 2,
540 "7": 3,
541 "8": 0,
542 }
545# ==============================================================================
546# test_HierarchicalConstrainedClustering_compute_predicted_clusters_without_clustering_tree
547# ==============================================================================
548def test_HierarchicalConstrainedClustering_compute_predicted_clusters_without_clustering_tree():
549 """
550 Test that the `compute_predicted_clusters` method of the `HierarchicalConstrainedClustering` raises `ValueError` if clustering is not run.
551 """
553 # Initialize a `HierarchicalConstrainedClustering` instance.
554 clustering_model = HierarchicalConstrainedClustering(
555 linkage="single",
556 random_seed=1,
557 )
559 # Run `compute_predicted_clusters` without computing the clustering tree.
560 with pytest.raises(ValueError, match="`clustering_root`"):
561 clustering_model.compute_predicted_clusters(
562 nb_clusters=2,
563 by="size",
564 )
567# ==============================================================================
568# test_HierarchicalConstrainedClustering_compute_predicted_clusters_travelling_by_size
569# ==============================================================================
570def test_HierarchicalConstrainedClustering_compute_predicted_clusters_travelling_by_size():
571 """
572 Test that the `compute_predicted_clusters` method of the `HierarchicalConstrainedClustering` clustering works by travelling by `"size"`.
573 """
575 # Define `vectors` and `constraints_manager`
576 vectors = {
577 "00": csr_matrix([1.00, 0.00, 0.00]),
578 "01": csr_matrix([0.99, 0.00, 0.00]),
579 "02": csr_matrix([0.97, 0.00, 0.00]),
580 "03": csr_matrix([0.96, 0.00, 0.00]),
581 "04": csr_matrix([0.94, 0.00, 0.00]),
582 "05": csr_matrix([0.93, 0.00, 0.00]),
583 "06": csr_matrix([0.80, 0.80, 0.00]),
584 "07": csr_matrix([0.80, 0.81, 0.00]),
585 "08": csr_matrix([0.00, 0.00, 0.70]),
586 "09": csr_matrix([0.00, 0.00, 0.71]),
587 "10": csr_matrix([0.00, 0.00, 0.99]),
588 "11": csr_matrix([0.00, 0.00, 1.00]),
589 }
590 constraints_manager = BinaryConstraintsManager(list_of_data_IDs=list(vectors.keys()))
592 # Initialize a `HierarchicalConstrainedClustering` instance.
593 clustering_model = HierarchicalConstrainedClustering(
594 linkage="single",
595 random_seed=1,
596 )
598 # Compute all clustering tree.
599 clustering_model.cluster(
600 constraints_manager=constraints_manager,
601 vectors=vectors,
602 nb_clusters=2,
603 )
605 # Run `compute_predicted_clusters` while travalleing clustering tree by `"size"`.
606 dict_of_predicted_clusters = clustering_model.compute_predicted_clusters(
607 nb_clusters=4,
608 by="size",
609 )
610 assert dict_of_predicted_clusters == {
611 "00": 0,
612 "01": 0,
613 "02": 0,
614 "03": 0,
615 "04": 1,
616 "05": 1,
617 "06": 2,
618 "07": 2,
619 "08": 3,
620 "09": 3,
621 "10": 3,
622 "11": 3,
623 }
626# ==============================================================================
627# test_HierarchicalConstrainedClustering_compute_predicted_clusters_travelling_by_iteration
628# ==============================================================================
629def test_HierarchicalConstrainedClustering_compute_predicted_clusters_travelling_by_iteration():
630 """
631 Test that the `compute_predicted_clusters` method of the `HierarchicalConstrainedClustering` clustering works by travelling by `"iteration"`.
632 """
634 # Define `vectors` and `constraints_manager`
635 vectors = {
636 "00": csr_matrix([1.00, 0.00, 0.00]),
637 "01": csr_matrix([0.99, 0.00, 0.00]),
638 "02": csr_matrix([0.97, 0.00, 0.00]),
639 "03": csr_matrix([0.96, 0.00, 0.00]),
640 "04": csr_matrix([0.94, 0.00, 0.00]),
641 "05": csr_matrix([0.93, 0.00, 0.00]),
642 "06": csr_matrix([0.80, 0.80, 0.00]),
643 "07": csr_matrix([0.80, 0.81, 0.00]),
644 "08": csr_matrix([0.00, 0.00, 0.70]),
645 "09": csr_matrix([0.00, 0.00, 0.71]),
646 "10": csr_matrix([0.00, 0.00, 0.99]),
647 "11": csr_matrix([0.00, 0.00, 1.00]),
648 }
649 constraints_manager = BinaryConstraintsManager(list_of_data_IDs=list(vectors.keys()))
651 # Initialize a `HierarchicalConstrainedClustering` instance.
652 clustering_model = HierarchicalConstrainedClustering(
653 linkage="single",
654 random_seed=1,
655 )
657 # Compute all clustering tree.
658 clustering_model.cluster(
659 constraints_manager=constraints_manager,
660 vectors=vectors,
661 nb_clusters=2,
662 )
664 # Run `compute_predicted_clusters` while travalleing clustering tree by `"iteration"`.
665 dict_of_predicted_clusters = clustering_model.compute_predicted_clusters(
666 nb_clusters=4,
667 by="iteration",
668 )
669 assert dict_of_predicted_clusters == {
670 "00": 0,
671 "01": 0,
672 "02": 0,
673 "03": 0,
674 "04": 0,
675 "05": 0,
676 "06": 1,
677 "07": 1,
678 "08": 2,
679 "09": 2,
680 "10": 3,
681 "11": 3,
682 }
684 # Run `compute_predicted_clusters` while travalleing clustering tree by `"iteration"`.
685 dict_of_predicted_clusters = clustering_model.compute_predicted_clusters(
686 nb_clusters=99,
687 by="iteration",
688 )
689 assert dict_of_predicted_clusters == {
690 "00": 0,
691 "01": 1,
692 "02": 2,
693 "03": 3,
694 "04": 4,
695 "05": 5,
696 "06": 6,
697 "07": 7,
698 "08": 8,
699 "09": 9,
700 "10": 10,
701 "11": 11,
702 }
705# ==============================================================================
706# test_HierarchicalConstrainedClustering_cluster_with_break_loop
707# ==============================================================================
708def test_HierarchicalConstrainedClustering_cluster_with_break_loop():
709 """
710 Test that the `clustering.hierarchical.HierarchicalConstrainedClustering` clustering can break clustering loop.
711 """
713 # Define `vectors` and `constraints_manager`
714 vectors = {
715 "0": csr_matrix([1.00, 0.00, 0.00, 0.00]),
716 "1": csr_matrix([0.95, 0.02, 0.02, 0.01]),
717 "2": csr_matrix([0.98, 0.00, 0.02, 0.00]),
718 "3": csr_matrix([0.99, 0.00, 0.01, 0.00]),
719 "4": csr_matrix([0.50, 0.22, 0.21, 0.07]),
720 "5": csr_matrix([0.50, 0.21, 0.22, 0.07]),
721 "6": csr_matrix([0.01, 0.01, 0.01, 0.97]),
722 "7": csr_matrix([0.00, 0.01, 0.00, 0.99]),
723 "8": csr_matrix([0.00, 0.00, 0.00, 1.00]),
724 }
725 constraints_manager = BinaryConstraintsManager(list_of_data_IDs=["0", "1", "2", "3", "4", "5", "6", "7", "8"])
726 constraints_manager.add_constraint(data_ID1="0", data_ID2="1", constraint_type="CANNOT_LINK")
727 constraints_manager.add_constraint(data_ID1="0", data_ID2="2", constraint_type="CANNOT_LINK")
728 constraints_manager.add_constraint(data_ID1="0", data_ID2="3", constraint_type="CANNOT_LINK")
729 constraints_manager.add_constraint(data_ID1="0", data_ID2="4", constraint_type="CANNOT_LINK")
730 constraints_manager.add_constraint(data_ID1="0", data_ID2="5", constraint_type="CANNOT_LINK")
731 constraints_manager.add_constraint(data_ID1="0", data_ID2="6", constraint_type="CANNOT_LINK")
732 constraints_manager.add_constraint(data_ID1="0", data_ID2="7", constraint_type="CANNOT_LINK")
733 constraints_manager.add_constraint(data_ID1="0", data_ID2="8", constraint_type="CANNOT_LINK")
734 constraints_manager.add_constraint(data_ID1="1", data_ID2="2", constraint_type="CANNOT_LINK")
735 constraints_manager.add_constraint(data_ID1="1", data_ID2="3", constraint_type="CANNOT_LINK")
736 constraints_manager.add_constraint(data_ID1="1", data_ID2="4", constraint_type="CANNOT_LINK")
737 constraints_manager.add_constraint(data_ID1="1", data_ID2="5", constraint_type="CANNOT_LINK")
738 constraints_manager.add_constraint(data_ID1="1", data_ID2="6", constraint_type="CANNOT_LINK")
739 constraints_manager.add_constraint(data_ID1="1", data_ID2="7", constraint_type="CANNOT_LINK")
740 constraints_manager.add_constraint(data_ID1="1", data_ID2="8", constraint_type="CANNOT_LINK")
741 constraints_manager.add_constraint(data_ID1="2", data_ID2="3", constraint_type="CANNOT_LINK")
742 constraints_manager.add_constraint(data_ID1="2", data_ID2="4", constraint_type="CANNOT_LINK")
743 constraints_manager.add_constraint(data_ID1="2", data_ID2="5", constraint_type="CANNOT_LINK")
744 constraints_manager.add_constraint(data_ID1="2", data_ID2="6", constraint_type="CANNOT_LINK")
745 constraints_manager.add_constraint(data_ID1="2", data_ID2="7", constraint_type="CANNOT_LINK")
746 constraints_manager.add_constraint(data_ID1="2", data_ID2="8", constraint_type="CANNOT_LINK")
747 constraints_manager.add_constraint(data_ID1="3", data_ID2="4", constraint_type="CANNOT_LINK")
748 constraints_manager.add_constraint(data_ID1="3", data_ID2="5", constraint_type="CANNOT_LINK")
749 constraints_manager.add_constraint(data_ID1="3", data_ID2="6", constraint_type="CANNOT_LINK")
750 constraints_manager.add_constraint(data_ID1="3", data_ID2="7", constraint_type="CANNOT_LINK")
751 constraints_manager.add_constraint(data_ID1="3", data_ID2="8", constraint_type="CANNOT_LINK")
752 constraints_manager.add_constraint(data_ID1="4", data_ID2="5", constraint_type="CANNOT_LINK")
753 constraints_manager.add_constraint(data_ID1="4", data_ID2="6", constraint_type="CANNOT_LINK")
754 constraints_manager.add_constraint(data_ID1="4", data_ID2="7", constraint_type="CANNOT_LINK")
755 constraints_manager.add_constraint(data_ID1="4", data_ID2="8", constraint_type="CANNOT_LINK")
756 constraints_manager.add_constraint(data_ID1="5", data_ID2="6", constraint_type="CANNOT_LINK")
757 constraints_manager.add_constraint(data_ID1="5", data_ID2="7", constraint_type="CANNOT_LINK")
758 constraints_manager.add_constraint(data_ID1="5", data_ID2="8", constraint_type="CANNOT_LINK")
759 constraints_manager.add_constraint(data_ID1="6", data_ID2="7", constraint_type="CANNOT_LINK")
760 constraints_manager.add_constraint(data_ID1="6", data_ID2="8", constraint_type="CANNOT_LINK")
761 constraints_manager.add_constraint(data_ID1="7", data_ID2="8", constraint_type="CANNOT_LINK")
763 # Initialize a `HierarchicalConstrainedClustering` instance.
764 clustering_model = HierarchicalConstrainedClustering(
765 linkage="average",
766 )
768 # Run clustering.
769 clustering_model.cluster(
770 constraints_manager=constraints_manager,
771 vectors=vectors,
772 nb_clusters=5,
773 )
774 assert clustering_model.dict_of_predicted_clusters
775 assert clustering_model.dict_of_predicted_clusters == {
776 "0": 0,
777 "1": 1,
778 "2": 2,
779 "3": 3,
780 "4": 4,
781 "5": 5,
782 "6": 6,
783 "7": 7,
784 "8": 8,
785 }
788# ==============================================================================
789# test_HierarchicalConstrainedClustering_cluster_end_cases_with_too_many_clusters
790# ==============================================================================
791def test_HierarchicalConstrainedClustering_cluster_end_cases_with_too_many_clusters():
792 """
793 Test that the `clustering.hierarchical.HierarchicalConstrainedClustering` clustering works with too many clusters.
794 """
796 # Define `vectors` and `constraints_manager`
797 vectors = {
798 "0": csr_matrix([1.00, 0.00, 0.00, 0.00]),
799 "1": csr_matrix([0.95, 0.02, 0.02, 0.01]),
800 "2": csr_matrix([0.98, 0.00, 0.02, 0.00]),
801 "3": csr_matrix([0.99, 0.00, 0.01, 0.00]),
802 "4": csr_matrix([0.50, 0.22, 0.21, 0.07]),
803 "5": csr_matrix([0.50, 0.21, 0.22, 0.07]),
804 "6": csr_matrix([0.01, 0.01, 0.01, 0.97]),
805 "7": csr_matrix([0.00, 0.01, 0.00, 0.99]),
806 "8": csr_matrix([0.00, 0.00, 0.00, 1.00]),
807 }
808 constraints_manager = BinaryConstraintsManager(list_of_data_IDs=list(vectors.keys()))
810 # Initialize a `HierarchicalConstrainedClustering` instance.
811 clustering_model = HierarchicalConstrainedClustering(
812 linkage="average",
813 )
815 # Run clustering.
816 dict_of_predicted_clusters = clustering_model.cluster(
817 constraints_manager=constraints_manager,
818 vectors=vectors,
819 nb_clusters=99,
820 )
821 assert clustering_model.dict_of_predicted_clusters
822 assert dict_of_predicted_clusters == {
823 "0": 0,
824 "1": 1,
825 "2": 2,
826 "3": 3,
827 "4": 4,
828 "5": 5,
829 "6": 6,
830 "7": 7,
831 "8": 8,
832 }
835# ==============================================================================
836# test_Cluster_for_inconsistent_children_and_members
837# ==============================================================================
838def test_Cluster_for_inconsistent_children_and_members():
839 """
840 Test that the `clustering.hierarchical.Cluster` initialization raises an `ValueError` for inconsistent `children` and `members` parameters.
841 """
843 # Define `vectors`.
844 vectors = {
845 "0": csr_matrix([1.00, 0.00]),
846 "1": csr_matrix([0.99, 0.01]),
847 "2": csr_matrix([0.02, 0.98]),
848 "3": csr_matrix([0.01, 0.99]),
849 "4": csr_matrix([0.00, 1.00]),
850 }
852 # Check `ValueError` for both `children` and `members` unset.
853 with pytest.raises(ValueError, match="by `children` setting or by `members` setting"):
854 Cluster(vectors=vectors, cluster_ID=2, clustering_iteration=1, children=None, members=None)
856 # Check `ValueError` for both `children` and `members` set.
857 with pytest.raises(ValueError, match="by `children` setting or by `members` setting"):
858 Cluster(
859 vectors=vectors,
860 cluster_ID=2,
861 clustering_iteration=1,
862 children=[
863 Cluster(
864 vectors=vectors,
865 cluster_ID=0,
866 clustering_iteration=0,
867 members=["0", "1"],
868 ),
869 Cluster(
870 vectors=vectors,
871 cluster_ID=1,
872 clustering_iteration=0,
873 members=["2", "3", "4"],
874 ),
875 ],
876 members=["5", "6", "7", "8", "9"],
877 )
880# ==============================================================================
881# test_Cluster_to_dict
882# ==============================================================================
883def test_Cluster_add_new_children():
884 """
885 Test that the `clustering.hierarchical.Cluster.add_new_children` method of `Cluster` class works.
886 """
888 # Define `vectors`.
889 vectors = {
890 "0": csr_matrix([1.00, 0.00]),
891 "1": csr_matrix([0.99, 0.01]),
892 "2": csr_matrix([0.02, 0.98]),
893 "3": csr_matrix([0.01, 0.99]),
894 "4": csr_matrix([0.00, 1.00]),
895 }
897 # Create `clusters`.
898 clusters = Cluster(
899 vectors=vectors,
900 cluster_ID=2,
901 clustering_iteration=1,
902 children=[
903 Cluster(
904 vectors=vectors,
905 cluster_ID=0,
906 clustering_iteration=0,
907 members=["0", "1"],
908 ),
909 ],
910 )
912 assert clusters.members == ["0", "1"]
913 assert clusters.clustering_iteration == 1
914 assert clusters.get_cluster_size() == 2
916 clusters.add_new_children(
917 new_children=[
918 Cluster(
919 vectors=vectors,
920 cluster_ID=1,
921 clustering_iteration=0,
922 members=["2", "3", "4"],
923 ),
924 ],
925 new_clustering_iteration=2,
926 )
928 assert clusters.members == ["0", "1", "2", "3", "4"]
929 assert clusters.clustering_iteration == 2
930 assert clusters.get_cluster_size() == 5
933# ==============================================================================
934# test_Cluster_to_dict
935# ==============================================================================
936def test_Cluster_to_dict():
937 """
938 Test that the `clustering.hierarchical.Cluster.to_dict` method of `Cluster` class works.
939 """
941 # Define `vectors`.
942 vectors = {
943 "0": csr_matrix([1.00, 0.00]),
944 "1": csr_matrix([0.99, 0.01]),
945 "2": csr_matrix([0.02, 0.98]),
946 "3": csr_matrix([0.01, 0.99]),
947 "4": csr_matrix([0.00, 1.00]),
948 }
950 # Create `clusters`.
951 clusters = Cluster(
952 vectors=vectors,
953 cluster_ID=2,
954 clustering_iteration=1,
955 children=[
956 Cluster(
957 vectors=vectors,
958 cluster_ID=0,
959 clustering_iteration=0,
960 members=["0", "1"],
961 ),
962 Cluster(
963 vectors=vectors,
964 cluster_ID=1,
965 clustering_iteration=0,
966 members=["2", "3", "4"],
967 ),
968 ],
969 )
971 # Define expected dictionnary.
972 dict_expected = {
973 "cluster_ID": 2,
974 "clustering_iteration": 1,
975 "children": [
976 {
977 "cluster_ID": 0,
978 "clustering_iteration": 0,
979 "children": [],
980 "cluster_inverse_depth": 0,
981 "members": ["0", "1"],
982 },
983 {
984 "cluster_ID": 1,
985 "clustering_iteration": 0,
986 "children": [],
987 "cluster_inverse_depth": 0,
988 "members": ["2", "3", "4"],
989 },
990 ],
991 "cluster_inverse_depth": 1,
992 "members": ["0", "1", "2", "3", "4"],
993 }
995 assert clusters.to_dict() == dict_expected