Coverage for tests\clustering\test_dbscan.py: 100.00%
54 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-11-17 13:31 +0100
« prev ^ index » next coverage.py v7.3.2, created at 2023-11-17 13:31 +0100
1# -*- coding: utf-8 -*-
3"""
4* Name: interactive-clustering/tests/clustering/test_dbscan.py
5* Description: Unittests for the `clustering.dbscan` module.
6* Author: Marc TRUTT, Esther LENOTRE, David NICOLAZO
7* Created: 31/10/2022
8* Licence: CeCILL (https://cecill.info/licences.fr.html)
9"""
11# ==============================================================================
12# IMPORT PYTHON DEPENDENCIES
13# ==============================================================================
15import math
17import numpy as np
18import pytest
19from scipy.sparse import csr_matrix
21from cognitivefactory.interactive_clustering.constraints.binary import BinaryConstraintsManager
22from src.cognitivefactory.interactive_clustering.clustering.dbscan import DBScanConstrainedClustering
25# ==============================================================================
26# test_DBScanConstrainedClustering_for_inconsistent_eps
27# ==============================================================================
28def test_DBScanConstrainedClustering_for_inconsistent_eps():
29 """
30 Test that the `clustering.dbscan.DBScanConstrainedClustering` initialization raises an `ValueError` for inconsistent `eps` parameter.
31 """
33 # Check `ValueError` for bad string value for `model`.
34 with pytest.raises(ValueError, match="`eps`"):
35 DBScanConstrainedClustering(
36 eps=-1,
37 )
40# ==============================================================================
41# test_DBScanConstrainedClustering_for_inconsistent_min_samples
42# ==============================================================================
43def test_DBScanConstrainedClustering_for_inconsistent_min_samples():
44 """
45 Test that the `clustering.dbscan.DBScanConstrainedClustering` initialization raises an `ValueError` for inconsistent `min_samples` parameter.
46 """
48 # Check `ValueError` for bad string value for `model`.
49 with pytest.raises(ValueError, match="`min_samples`"):
50 DBScanConstrainedClustering(
51 min_samples=-1,
52 )
55# ==============================================================================
56# test_DBScanConstrainedClustering_for_correct_settings
57# ==============================================================================
58def test_DBScanConstrainedClustering_for_correct_settings():
59 """
60 Test that the `clustering.dbscan.DBScanConstrainedClustering` initialization runs correctly with the correct settings.
61 """
63 # Check a correct initialization.
64 clustering_model = DBScanConstrainedClustering(
65 eps=0.5,
66 min_samples=3,
67 )
68 assert clustering_model
69 assert math.isclose(clustering_model.eps, 0.5)
70 assert clustering_model.min_samples == 3
73# ==============================================================================
74# test_DBScanConstrainedClustering_cluster_for_inconsistent_constraints_manager
75# ==============================================================================
76def test_DBScanConstrainedClustering_cluster_for_inconsistent_constraints_manager():
77 """
78 Test that the `clustering.dbscan.DBScanConstrainedClustering` clustering raises an `ValueError` for inconsistent `constraints_manager` parameter.
79 """
81 # Initialize a `DBScanConstrainedClustering` instance.
82 clustering_model = DBScanConstrainedClustering()
84 # Check `ValueError` for not matrix `vectors`.
85 with pytest.raises(ValueError, match="`constraints_manager`"):
86 clustering_model.cluster(
87 constraints_manager=None,
88 vectors=None,
89 )
92# ==============================================================================
93# test_DBScanConstrainedClustering_cluster_for_inconsistent_vectors
94# ==============================================================================
95def test_DBScanConstrainedClustering_cluster_for_inconsistent_vectors():
96 """
97 Test that the `clustering.dbscan.DBScanConstrainedClustering` clustering raises an `ValueError` for inconsistent `vectors` parameter.
98 """
100 # Initialize a `DBScanConstrainedClustering` instance.
101 clustering_model = DBScanConstrainedClustering()
103 # Check `ValueError` for not matrix `vectors`.
104 with pytest.raises(ValueError, match="`vectors`"):
105 clustering_model.cluster(
106 constraints_manager=BinaryConstraintsManager(list_of_data_IDs=["first", "second", "third"]),
107 vectors=None,
108 )
111# ==============================================================================
112# test_DBScanConstrainedClustering_cluster_for_inconsistent_nb_clusters
113# ==============================================================================
114def test_DBScanConstrainedClustering_cluster_for_inconsistent_nb_clusters():
115 """
116 Test that the `clustering.dbscan.DBScanConstrainedClustering` clustering raises an `ValueError` for inconsistent `nb_clusters` parameter.
117 """
119 # Initialize a `DBScanConstrainedClustering` instance.
120 clustering_model = DBScanConstrainedClustering()
122 # Check `ValueError` for not matrix `nb_clusters`.
123 with pytest.raises(ValueError, match="`nb_clusters`"):
124 clustering_model.cluster(
125 constraints_manager=BinaryConstraintsManager(list_of_data_IDs=["first", "second", "third"]),
126 vectors={"first": np.array([1, 2, 3]), "second": np.array([[4, 5, 6]]), "third": csr_matrix([7, 8, 9])},
127 nb_clusters=4,
128 )
131# ==============================================================================
132# test_DBScanConstrainedClustering_cluster_with_no_constraints_1
133# ==============================================================================
134def test_DBScanConstrainedClustering_cluster_with_no_constraints_1():
135 """
136 Test that the `clustering.dbscan.DBScanConstrainedClustering` clustering works with no `constraints`.
137 """
139 # Define `vectors` and `constraints_manager`
140 vectors = {
141 "0": csr_matrix([1.00, 0.00, 0.00, 0.00]),
142 "1": csr_matrix([0.00, 0.43, 0.00, 0.00]),
143 "2": csr_matrix([0.00, 0.00, 0.29, 0.00]),
144 "3": csr_matrix([0.00, 0.00, 0.50, 0.00]),
145 "4": csr_matrix([0.00, 0.00, 0.00, 0.98]),
146 "5": csr_matrix([0.00, 0.00, 0.33, 0.00]),
147 "6": csr_matrix([0.00, 0.00, 0.00, 1.40]),
148 "7": csr_matrix([0.80, 0.00, 0.00, 0.00]),
149 "8": csr_matrix([0.00, 0.54, 0.00, 0.00]),
150 "9": csr_matrix([0.00, 0.00, 0.00, 1.10]),
151 "10": csr_matrix([1.10, 0.00, 0.00, 0.00]),
152 "11": csr_matrix([0.00, 0.49, 0.00, 0.00]),
153 }
155 constraints_manager = BinaryConstraintsManager(list_of_data_IDs=list(vectors.keys()))
157 # Initialize a `KMeansConstrainedClustering` instance.
158 clustering_model = DBScanConstrainedClustering(eps=0.5, min_samples=3)
160 # Run clustering 2 clusters and no constraints.
161 dict_of_predicted_clusters = clustering_model.cluster(
162 constraints_manager=constraints_manager,
163 vectors=vectors,
164 )
166 assert clustering_model.dict_of_predicted_clusters
167 assert dict_of_predicted_clusters == {
168 "0": 0,
169 "1": 1,
170 "2": 2,
171 "3": 2,
172 "4": 3,
173 "5": 2,
174 "6": 3,
175 "7": 0,
176 "8": 1,
177 "9": 3,
178 "10": 0,
179 "11": 1,
180 }
183# ==============================================================================
184# test_DBScanConstrainedClustering_cluster_with_no_constraints_2
185# ==============================================================================
186def test_DBScanConstrainedClustering_cluster_with_no_constraints_2():
187 """
188 Test that the `clustering.dbscan.DBScanConstrainedClustering` clustering works with no `constraints`.
189 """
191 # Define `vectors` and `constraints_manager`
192 vectors = {
193 "0": csr_matrix([2.00, 0.00, 0.00, 0.00]),
194 "1": csr_matrix([0.00, 0.43, 0.00, 0.00]),
195 "2": csr_matrix([0.00, 0.00, 0.29, 0.00]),
196 "3": csr_matrix([0.00, 0.00, 0.50, 0.00]),
197 "4": csr_matrix([0.00, 0.00, 0.00, 0.98]),
198 "5": csr_matrix([0.00, 0.00, 0.33, 0.00]),
199 "6": csr_matrix([0.00, 0.00, 0.00, 1.40]),
200 "7": csr_matrix([0.80, 0.00, 0.00, 0.00]),
201 "8": csr_matrix([0.00, 0.54, 0.00, 0.00]),
202 "9": csr_matrix([0.00, 0.00, 0.00, 1.10]),
203 "10": csr_matrix([1.10, 0.00, 0.00, 0.00]),
204 "11": csr_matrix([0.00, 0.49, 0.00, 0.00]),
205 }
207 constraints_manager = BinaryConstraintsManager(list_of_data_IDs=list(vectors.keys()))
209 # Initialize a `KMeansConstrainedClustering` instance.
210 clustering_model = DBScanConstrainedClustering(
211 eps=0.5,
212 min_samples=3,
213 )
215 # Run clustering 2 clusters and no constraints.
216 dict_of_predicted_clusters = clustering_model.cluster(
217 constraints_manager=constraints_manager,
218 vectors=vectors,
219 )
221 assert clustering_model.dict_of_predicted_clusters
223 """
224 Here, '0' is too far from other points so it is noise
225 Furthermore, '7' and '10' are in the same neighbourhood, but no other point.
226 They are not numerous enough to create a cluster
227 """
229 assert dict_of_predicted_clusters == {
230 "0": -1,
231 "1": 0,
232 "2": 1,
233 "3": 1,
234 "4": 2,
235 "5": 1,
236 "6": 2,
237 "7": -2,
238 "8": 0,
239 "9": 2,
240 "10": -3,
241 "11": 0,
242 }
245# ==============================================================================
246# test_DBScanConstrainedClustering_cluster_with_some_constraints
247# ==============================================================================
248def test_DBScanConstrainedClustering_cluster_with_some_constraints():
249 """
250 Test that the `clustering.dbscan.DBScanConstrainedClustering` clustering works with no `constraints`.
251 """
253 # Define `vectors` and `constraints_manager`
254 vectors = {
255 "0": csr_matrix([2.00, 0.00, 0.00, 0.00]),
256 "1": csr_matrix([0.00, 0.43, 0.00, 0.00]),
257 "2": csr_matrix([0.00, 0.00, 0.29, 0.00]),
258 "3": csr_matrix([0.00, 0.00, 0.50, 0.00]),
259 "4": csr_matrix([0.00, 0.00, 0.00, 0.98]),
260 "5": csr_matrix([0.00, 0.00, 0.33, 0.00]),
261 "6": csr_matrix([0.00, 0.00, 0.00, 1.40]),
262 "7": csr_matrix([0.80, 0.00, 0.00, 0.00]),
263 "8": csr_matrix([0.00, 0.54, 0.00, 0.00]),
264 "9": csr_matrix([0.00, 0.00, 0.00, 1.10]),
265 "10": csr_matrix([1.10, 0.00, 0.00, 0.00]),
266 "11": csr_matrix([0.00, 0.49, 0.00, 0.00]),
267 }
269 constraints_manager = BinaryConstraintsManager(list_of_data_IDs=list(vectors.keys()))
270 constraints_manager.add_constraint(data_ID1="0", data_ID2="7", constraint_type="MUST_LINK")
271 constraints_manager.add_constraint(data_ID1="0", data_ID2="10", constraint_type="MUST_LINK")
272 constraints_manager.add_constraint(data_ID1="0", data_ID2="4", constraint_type="CANNOT_LINK")
274 # Initialize a `KMeansConstrainedClustering` instance.
275 clustering_model = DBScanConstrainedClustering(eps=0.5, min_samples=3)
277 # Run clustering 2 clusters and no constraints.
278 dict_of_predicted_clusters = clustering_model.cluster(
279 constraints_manager=constraints_manager,
280 vectors=vectors,
281 )
283 assert clustering_model.dict_of_predicted_clusters
284 assert dict_of_predicted_clusters == {
285 "0": 0,
286 "1": 1,
287 "2": 2,
288 "3": 2,
289 "4": 3,
290 "5": 2,
291 "6": 3,
292 "7": 0,
293 "8": 1,
294 "9": 3,
295 "10": 0,
296 "11": 1,
297 }