Coverage for tests\clustering\test_mpckmeans.py: 100.00%
63 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-11-17 13:31 +0100
« prev ^ index » next coverage.py v7.3.2, created at 2023-11-17 13:31 +0100
1# -*- coding: utf-8 -*-
3"""
4* Name: interactive-clustering/tests/clustering/test_mpckmeans.py
5* Description: Unittests for the `clustering.mpckmeans` module.
6* Author: Esther LENOTRE, David NICOLAZO, Marc TRUTT
7* Created: 02/11/2022
8* Licence: CeCILL (https://cecill.info/licences.fr.html)
9"""
11# ==============================================================================
12# IMPORT PYTHON DEPENDENCIES
13# ==============================================================================
15import math
17import numpy as np
18import pytest
19from scipy.sparse import csr_matrix
21from cognitivefactory.interactive_clustering.constraints.binary import BinaryConstraintsManager
22from src.cognitivefactory.interactive_clustering.clustering.mpckmeans import MPCKMeansConstrainedClustering
25# ==============================================================================
26# test_MPCKMeansConstrainedClustering_for_inconsistent_model
27# ==============================================================================
28def test_MPCKMeansConstrainedClustering_for_inconsistent_model():
29 """
30 Test that the `clustering.kmeans.MPCKMeansConstrainedClustering` initialization raises an `ValueError` for inconsistent `model` parameter.
31 """
33 # Check `ValueError` for bad string value for `model`.
34 with pytest.raises(ValueError, match="`model`"):
35 MPCKMeansConstrainedClustering(
36 model="as_you_want",
37 )
40# ==============================================================================
41# test_MPCKMeansConstrainedClustering_for_inconsistent_max_iteration
42# ==============================================================================
43def test_MPCKMeansConstrainedClustering_for_inconsistent_max_iteration():
44 """
45 Test that the `clustering.kmeans.MPCKMeansConstrainedClustering` initialization raises an `ValueError` for inconsistent `max_iteration` parameter.
46 """
48 # Check `ValueError` for bad string value for `max_iteration`.
49 with pytest.raises(ValueError, match="`max_iteration`"):
50 MPCKMeansConstrainedClustering(
51 max_iteration=-1,
52 )
55# ==============================================================================
56# test_MPCKMeansConstrainedClustering_for_inconsistent_w
57# ==============================================================================
58def test_MPCKMeansConstrainedClustering_for_inconsistent_w():
59 """
60 Test that the `clustering.kmeans.MPCKMeansConstrainedClustering` initialization raises an `ValueError` for inconsistent `w` parameter.
61 """
63 # Check `ValueError` for bad string value for `tolerance`.
64 with pytest.raises(ValueError, match="`weight`"):
65 MPCKMeansConstrainedClustering(
66 w=-1,
67 )
70# ==============================================================================
71# test_MPCKMeansConstrainedClustering_for_correct_settings
72# ==============================================================================
73def test_MPCKMeansConstrainedClustering_for_correct_settings():
74 """
75 Test that the `clustering.kmeans.MPCKMeansConstrainedClustering` initialization runs correctly with the correct settings.
76 """
78 # Check a correct initialization.
79 clustering_model = MPCKMeansConstrainedClustering(
80 model="MPC",
81 max_iteration=100,
82 w=0.5,
83 random_seed=3,
84 )
85 assert clustering_model
86 assert clustering_model.model == "MPC"
87 assert clustering_model.max_iteration == 100
88 assert math.isclose(clustering_model.w, 0.5)
89 assert clustering_model.random_seed == 3
92# ==============================================================================
93# test_MPCKMeansConstrainedClustering_cluster_for_inconsistent_constraints_manager
94# ==============================================================================
95def test_MPCKMeansConstrainedClustering_cluster_for_inconsistent_constraints_manager():
96 """
97 Test that the `clustering.mpckmeans.MPCKMeansConstrainedClustering` clustering raises an `ValueError` for inconsistent `constraints_manager` parameter.
98 """
100 # Initialize a `MPCKMeansConstrainedClustering` instance.
101 clustering_model = MPCKMeansConstrainedClustering()
103 # Check `ValueError` for not matrix `vectors`.
104 with pytest.raises(ValueError, match="`constraints_manager`"):
105 clustering_model.cluster(
106 constraints_manager=None,
107 vectors=None,
108 nb_clusters=2,
109 )
112# ==============================================================================
113# test_MPCKMeansConstrainedClustering_cluster_for_inconsistent_vectors
114# ==============================================================================
115def test_MPCKMeansConstrainedClustering_cluster_for_inconsistent_vectors():
116 """
117 Test that the `clustering.mpckmeans.MPCKMeansConstrainedClustering` clustering raises an `ValueError` for inconsistent `vectors` parameter.
118 """
120 # Initialize a `KMeansConstrainedClustering` instance.
121 clustering_model = MPCKMeansConstrainedClustering()
123 # Check `ValueError` for not matrix `vectors`.
124 with pytest.raises(ValueError, match="`vectors`"):
125 clustering_model.cluster(
126 constraints_manager=BinaryConstraintsManager(list_of_data_IDs=["first", "second", "third"]),
127 vectors=None,
128 nb_clusters=2,
129 )
132# ==============================================================================
133# test_MPCKMeansConstrainedClustering_cluster_for_inconsistent_nb_clusters_1
134# ==============================================================================
135def test_MPCKMeansConstrainedClustering_cluster_for_inconsistent_nb_clusters_1():
136 """
137 Test that the `clustering.mpckmeans.MPCKMeansConstrainedClustering` clustering raises an `ValueError` for inconsistent `nb_clusters` parameter.
138 """
140 # Initialize a `MPCKMeansConstrainedClustering` instance.
141 clustering_model = MPCKMeansConstrainedClustering()
143 # Check `ValueError` for too small `nb_clusters`.
144 with pytest.raises(ValueError, match="`nb_clusters`"):
145 clustering_model.cluster(
146 constraints_manager=BinaryConstraintsManager(list_of_data_IDs=["first", "second", "third"]),
147 vectors={"first": np.array([1, 2, 3]), "second": np.array([[4, 5, 6]]), "third": csr_matrix([7, 8, 9])},
148 nb_clusters=None,
149 )
152# ==============================================================================
153# test_MPCKMeansConstrainedClustering_cluster_for_inconsistent_nb_clusters_2
154# ==============================================================================
155def test_MPCKMeansConstrainedClustering_cluster_for_inconsistent_nb_clusters_2():
156 """
157 Test that the `clustering.mpckmeans.MPCKMeansConstrainedClustering` clustering raises an `ValueError` for inconsistent `nb_clusters` parameter.
158 """
160 # Initialize a `MPCKMeansConstrainedClustering` instance.
161 clustering_model = MPCKMeansConstrainedClustering()
163 # Check `ValueError` for too small `nb_clusters`.
164 with pytest.raises(ValueError, match="`nb_clusters`"):
165 clustering_model.cluster(
166 constraints_manager=BinaryConstraintsManager(list_of_data_IDs=["first", "second", "third"]),
167 vectors={"first": np.array([1, 2, 3]), "second": np.array([[4, 5, 6]]), "third": csr_matrix([7, 8, 9])},
168 nb_clusters=-1,
169 )
172# ==============================================================================
173# test_MPCKMeansConstrainedClustering_cluster_with_no_constraints_1
174# ==============================================================================
175def test_MPCKMeansConstrainedClustering_cluster_with_no_constraints_1():
176 """
177 Test that the `clustering.mpckmeans.MPCKMeansConstrainedClustering` clustering works with no `constraints`.
178 """
180 # Define `vectors` and `constraints_manager`
181 vectors = {
182 "0": csr_matrix([1.00, 0.00, 0.00, 0.00]),
183 "1": csr_matrix([0.00, 0.43, 0.00, 0.00]),
184 "2": csr_matrix([0.00, 0.00, 0.29, 0.00]),
185 "3": csr_matrix([0.00, 0.00, 0.50, 0.00]),
186 "4": csr_matrix([0.00, 0.00, 0.00, 0.98]),
187 "5": csr_matrix([0.00, 0.00, 0.33, 0.00]),
188 "6": csr_matrix([0.00, 0.00, 0.00, 1.40]),
189 "7": csr_matrix([0.80, 0.00, 0.00, 0.00]),
190 "8": csr_matrix([0.00, 0.54, 0.00, 0.00]),
191 "9": csr_matrix([0.00, 0.00, 0.00, 1.10]),
192 "10": csr_matrix([1.10, 0.00, 0.00, 0.00]),
193 "11": csr_matrix([0.00, 0.49, 0.00, 0.00]),
194 }
196 constraints_manager = BinaryConstraintsManager(list_of_data_IDs=list(vectors.keys()))
198 # Initialize a `MPCKMeansConstrainedClustering` instance.
199 clustering_model = MPCKMeansConstrainedClustering()
201 # Run clustering 2 clusters and no constraints.
202 dict_of_predicted_clusters = clustering_model.cluster(
203 constraints_manager=constraints_manager,
204 vectors=vectors,
205 nb_clusters=4,
206 )
208 assert clustering_model.dict_of_predicted_clusters
209 assert dict_of_predicted_clusters == {
210 "0": 0,
211 "1": 1,
212 "2": 2,
213 "3": 2,
214 "4": 3,
215 "5": 2,
216 "6": 3,
217 "7": 0,
218 "8": 1,
219 "9": 3,
220 "10": 0,
221 "11": 1,
222 }
225# ==============================================================================
226# test_MPCKMeansConstrainedClustering_cluster_with_no_constraints_2
227# ==============================================================================
228def test_MPCKMeansConstrainedClustering_cluster_with_no_constraints_2():
229 """
230 Test that the `clustering.mpckmeans.MPCKMeansConstrainedClustering` clustering works with no `constraints`.
231 """
233 # Define `vectors` and `constraints_manager`
234 vectors = {
235 "0": csr_matrix([2.00, 0.00, 0.00, 0.00]),
236 "1": csr_matrix([0.00, 0.43, 0.00, 0.00]),
237 "2": csr_matrix([0.00, 0.00, 0.29, 0.00]),
238 "3": csr_matrix([0.00, 0.00, 0.50, 0.00]),
239 "4": csr_matrix([0.00, 0.00, 0.00, 0.98]),
240 "5": csr_matrix([0.00, 0.00, 0.33, 0.00]),
241 "6": csr_matrix([0.00, 0.00, 0.00, 1.40]),
242 "7": csr_matrix([0.80, 0.00, 0.00, 0.00]),
243 "8": csr_matrix([0.00, 0.54, 0.00, 0.00]),
244 "9": csr_matrix([0.00, 0.00, 0.00, 1.10]),
245 "10": csr_matrix([1.10, 0.00, 0.00, 0.00]),
246 "11": csr_matrix([0.00, 0.49, 0.00, 0.00]),
247 }
249 constraints_manager = BinaryConstraintsManager(list_of_data_IDs=list(vectors.keys()))
251 # Initialize a `MPCKMeansConstrainedClustering` instance.
252 clustering_model = MPCKMeansConstrainedClustering(eps=0.5, min_samples=3)
254 # Run clustering 2 clusters and no constraints.
255 dict_of_predicted_clusters = clustering_model.cluster(
256 constraints_manager=constraints_manager,
257 vectors=vectors,
258 nb_clusters=4,
259 )
261 assert clustering_model.dict_of_predicted_clusters
263 """
264 Here, '0' is too far from other points so it is noise
265 Furthermore, '7' and '10' are in the same neighbourhood, but no other point.
266 They are not numerous enough to create a cluster
267 """
269 assert dict_of_predicted_clusters == {
270 "0": 0,
271 "1": 1,
272 "2": 1,
273 "3": 1,
274 "4": 3,
275 "5": 1,
276 "6": 3,
277 "7": 2,
278 "8": 1,
279 "9": 3,
280 "10": 2,
281 "11": 1,
282 }
285# ==============================================================================
286# test_MPCKMeansConstrainedClustering_cluster_with_some_constraints
287# ==============================================================================
288def test_MPCKMeansConstrainedClustering_cluster_with_some_constraints():
289 """
290 Test that the `clustering.mpckmeans.MPCKMeansConstrainedClustering` clustering works with no `constraints`.
291 """
293 # Define `vectors` and `constraints_manager`
294 vectors = {
295 "0": csr_matrix([2.00, 0.00, 0.00, 0.00]),
296 "1": csr_matrix([0.00, 0.43, 0.00, 0.00]),
297 "2": csr_matrix([0.00, 0.00, 0.29, 0.00]),
298 "3": csr_matrix([0.00, 0.00, 0.50, 0.00]),
299 "4": csr_matrix([0.00, 0.00, 0.00, 0.98]),
300 "5": csr_matrix([0.00, 0.00, 0.33, 0.00]),
301 "6": csr_matrix([0.00, 0.00, 0.00, 1.40]),
302 "7": csr_matrix([0.80, 0.00, 0.00, 0.00]),
303 "8": csr_matrix([0.00, 0.54, 0.00, 0.00]),
304 "9": csr_matrix([0.00, 0.00, 0.00, 1.10]),
305 "10": csr_matrix([1.10, 0.00, 0.00, 0.00]),
306 "11": csr_matrix([0.00, 0.49, 0.00, 0.00]),
307 }
309 constraints_manager = BinaryConstraintsManager(list_of_data_IDs=list(vectors.keys()))
310 constraints_manager.add_constraint(data_ID1="0", data_ID2="7", constraint_type="MUST_LINK")
311 constraints_manager.add_constraint(data_ID1="0", data_ID2="10", constraint_type="MUST_LINK")
312 constraints_manager.add_constraint(data_ID1="0", data_ID2="4", constraint_type="CANNOT_LINK")
314 # Initialize a `KMeansConstrainedClustering` instance.
315 clustering_model = MPCKMeansConstrainedClustering()
317 # Run clustering 2 clusters and no constraints.
318 dict_of_predicted_clusters = clustering_model.cluster(
319 constraints_manager=constraints_manager,
320 vectors=vectors,
321 nb_clusters=4,
322 )
324 assert clustering_model.dict_of_predicted_clusters
325 assert dict_of_predicted_clusters == {
326 "0": 0,
327 "1": 1,
328 "2": 2,
329 "3": 2,
330 "4": 3,
331 "5": 2,
332 "6": 3,
333 "7": 0,
334 "8": 1,
335 "9": 3,
336 "10": 0,
337 "11": 1,
338 }