Coverage for tests\clustering\test_mpckmeans.py: 100.00%

63 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-11-17 13:31 +0100

1# -*- coding: utf-8 -*- 

2 

3""" 

4* Name: interactive-clustering/tests/clustering/test_mpckmeans.py 

5* Description: Unittests for the `clustering.mpckmeans` module. 

6* Author: Esther LENOTRE, David NICOLAZO, Marc TRUTT 

7* Created: 02/11/2022 

8* Licence: CeCILL (https://cecill.info/licences.fr.html) 

9""" 

10 

11# ============================================================================== 

12# IMPORT PYTHON DEPENDENCIES 

13# ============================================================================== 

14 

15import math 

16 

17import numpy as np 

18import pytest 

19from scipy.sparse import csr_matrix 

20 

21from cognitivefactory.interactive_clustering.constraints.binary import BinaryConstraintsManager 

22from src.cognitivefactory.interactive_clustering.clustering.mpckmeans import MPCKMeansConstrainedClustering 

23 

24 

25# ============================================================================== 

26# test_MPCKMeansConstrainedClustering_for_inconsistent_model 

27# ============================================================================== 

28def test_MPCKMeansConstrainedClustering_for_inconsistent_model(): 

29 """ 

30 Test that the `clustering.kmeans.MPCKMeansConstrainedClustering` initialization raises an `ValueError` for inconsistent `model` parameter. 

31 """ 

32 

33 # Check `ValueError` for bad string value for `model`. 

34 with pytest.raises(ValueError, match="`model`"): 

35 MPCKMeansConstrainedClustering( 

36 model="as_you_want", 

37 ) 

38 

39 

40# ============================================================================== 

41# test_MPCKMeansConstrainedClustering_for_inconsistent_max_iteration 

42# ============================================================================== 

43def test_MPCKMeansConstrainedClustering_for_inconsistent_max_iteration(): 

44 """ 

45 Test that the `clustering.kmeans.MPCKMeansConstrainedClustering` initialization raises an `ValueError` for inconsistent `max_iteration` parameter. 

46 """ 

47 

48 # Check `ValueError` for bad string value for `max_iteration`. 

49 with pytest.raises(ValueError, match="`max_iteration`"): 

50 MPCKMeansConstrainedClustering( 

51 max_iteration=-1, 

52 ) 

53 

54 

55# ============================================================================== 

56# test_MPCKMeansConstrainedClustering_for_inconsistent_w 

57# ============================================================================== 

58def test_MPCKMeansConstrainedClustering_for_inconsistent_w(): 

59 """ 

60 Test that the `clustering.kmeans.MPCKMeansConstrainedClustering` initialization raises an `ValueError` for inconsistent `w` parameter. 

61 """ 

62 

63 # Check `ValueError` for bad string value for `tolerance`. 

64 with pytest.raises(ValueError, match="`weight`"): 

65 MPCKMeansConstrainedClustering( 

66 w=-1, 

67 ) 

68 

69 

70# ============================================================================== 

71# test_MPCKMeansConstrainedClustering_for_correct_settings 

72# ============================================================================== 

73def test_MPCKMeansConstrainedClustering_for_correct_settings(): 

74 """ 

75 Test that the `clustering.kmeans.MPCKMeansConstrainedClustering` initialization runs correctly with the correct settings. 

76 """ 

77 

78 # Check a correct initialization. 

79 clustering_model = MPCKMeansConstrainedClustering( 

80 model="MPC", 

81 max_iteration=100, 

82 w=0.5, 

83 random_seed=3, 

84 ) 

85 assert clustering_model 

86 assert clustering_model.model == "MPC" 

87 assert clustering_model.max_iteration == 100 

88 assert math.isclose(clustering_model.w, 0.5) 

89 assert clustering_model.random_seed == 3 

90 

91 

92# ============================================================================== 

93# test_MPCKMeansConstrainedClustering_cluster_for_inconsistent_constraints_manager 

94# ============================================================================== 

95def test_MPCKMeansConstrainedClustering_cluster_for_inconsistent_constraints_manager(): 

96 """ 

97 Test that the `clustering.mpckmeans.MPCKMeansConstrainedClustering` clustering raises an `ValueError` for inconsistent `constraints_manager` parameter. 

98 """ 

99 

100 # Initialize a `MPCKMeansConstrainedClustering` instance. 

101 clustering_model = MPCKMeansConstrainedClustering() 

102 

103 # Check `ValueError` for not matrix `vectors`. 

104 with pytest.raises(ValueError, match="`constraints_manager`"): 

105 clustering_model.cluster( 

106 constraints_manager=None, 

107 vectors=None, 

108 nb_clusters=2, 

109 ) 

110 

111 

112# ============================================================================== 

113# test_MPCKMeansConstrainedClustering_cluster_for_inconsistent_vectors 

114# ============================================================================== 

115def test_MPCKMeansConstrainedClustering_cluster_for_inconsistent_vectors(): 

116 """ 

117 Test that the `clustering.mpckmeans.MPCKMeansConstrainedClustering` clustering raises an `ValueError` for inconsistent `vectors` parameter. 

118 """ 

119 

120 # Initialize a `KMeansConstrainedClustering` instance. 

121 clustering_model = MPCKMeansConstrainedClustering() 

122 

123 # Check `ValueError` for not matrix `vectors`. 

124 with pytest.raises(ValueError, match="`vectors`"): 

125 clustering_model.cluster( 

126 constraints_manager=BinaryConstraintsManager(list_of_data_IDs=["first", "second", "third"]), 

127 vectors=None, 

128 nb_clusters=2, 

129 ) 

130 

131 

132# ============================================================================== 

133# test_MPCKMeansConstrainedClustering_cluster_for_inconsistent_nb_clusters_1 

134# ============================================================================== 

135def test_MPCKMeansConstrainedClustering_cluster_for_inconsistent_nb_clusters_1(): 

136 """ 

137 Test that the `clustering.mpckmeans.MPCKMeansConstrainedClustering` clustering raises an `ValueError` for inconsistent `nb_clusters` parameter. 

138 """ 

139 

140 # Initialize a `MPCKMeansConstrainedClustering` instance. 

141 clustering_model = MPCKMeansConstrainedClustering() 

142 

143 # Check `ValueError` for too small `nb_clusters`. 

144 with pytest.raises(ValueError, match="`nb_clusters`"): 

145 clustering_model.cluster( 

146 constraints_manager=BinaryConstraintsManager(list_of_data_IDs=["first", "second", "third"]), 

147 vectors={"first": np.array([1, 2, 3]), "second": np.array([[4, 5, 6]]), "third": csr_matrix([7, 8, 9])}, 

148 nb_clusters=None, 

149 ) 

150 

151 

152# ============================================================================== 

153# test_MPCKMeansConstrainedClustering_cluster_for_inconsistent_nb_clusters_2 

154# ============================================================================== 

155def test_MPCKMeansConstrainedClustering_cluster_for_inconsistent_nb_clusters_2(): 

156 """ 

157 Test that the `clustering.mpckmeans.MPCKMeansConstrainedClustering` clustering raises an `ValueError` for inconsistent `nb_clusters` parameter. 

158 """ 

159 

160 # Initialize a `MPCKMeansConstrainedClustering` instance. 

161 clustering_model = MPCKMeansConstrainedClustering() 

162 

163 # Check `ValueError` for too small `nb_clusters`. 

164 with pytest.raises(ValueError, match="`nb_clusters`"): 

165 clustering_model.cluster( 

166 constraints_manager=BinaryConstraintsManager(list_of_data_IDs=["first", "second", "third"]), 

167 vectors={"first": np.array([1, 2, 3]), "second": np.array([[4, 5, 6]]), "third": csr_matrix([7, 8, 9])}, 

168 nb_clusters=-1, 

169 ) 

170 

171 

172# ============================================================================== 

173# test_MPCKMeansConstrainedClustering_cluster_with_no_constraints_1 

174# ============================================================================== 

175def test_MPCKMeansConstrainedClustering_cluster_with_no_constraints_1(): 

176 """ 

177 Test that the `clustering.mpckmeans.MPCKMeansConstrainedClustering` clustering works with no `constraints`. 

178 """ 

179 

180 # Define `vectors` and `constraints_manager` 

181 vectors = { 

182 "0": csr_matrix([1.00, 0.00, 0.00, 0.00]), 

183 "1": csr_matrix([0.00, 0.43, 0.00, 0.00]), 

184 "2": csr_matrix([0.00, 0.00, 0.29, 0.00]), 

185 "3": csr_matrix([0.00, 0.00, 0.50, 0.00]), 

186 "4": csr_matrix([0.00, 0.00, 0.00, 0.98]), 

187 "5": csr_matrix([0.00, 0.00, 0.33, 0.00]), 

188 "6": csr_matrix([0.00, 0.00, 0.00, 1.40]), 

189 "7": csr_matrix([0.80, 0.00, 0.00, 0.00]), 

190 "8": csr_matrix([0.00, 0.54, 0.00, 0.00]), 

191 "9": csr_matrix([0.00, 0.00, 0.00, 1.10]), 

192 "10": csr_matrix([1.10, 0.00, 0.00, 0.00]), 

193 "11": csr_matrix([0.00, 0.49, 0.00, 0.00]), 

194 } 

195 

196 constraints_manager = BinaryConstraintsManager(list_of_data_IDs=list(vectors.keys())) 

197 

198 # Initialize a `MPCKMeansConstrainedClustering` instance. 

199 clustering_model = MPCKMeansConstrainedClustering() 

200 

201 # Run clustering 2 clusters and no constraints. 

202 dict_of_predicted_clusters = clustering_model.cluster( 

203 constraints_manager=constraints_manager, 

204 vectors=vectors, 

205 nb_clusters=4, 

206 ) 

207 

208 assert clustering_model.dict_of_predicted_clusters 

209 assert dict_of_predicted_clusters == { 

210 "0": 0, 

211 "1": 1, 

212 "2": 2, 

213 "3": 2, 

214 "4": 3, 

215 "5": 2, 

216 "6": 3, 

217 "7": 0, 

218 "8": 1, 

219 "9": 3, 

220 "10": 0, 

221 "11": 1, 

222 } 

223 

224 

225# ============================================================================== 

226# test_MPCKMeansConstrainedClustering_cluster_with_no_constraints_2 

227# ============================================================================== 

228def test_MPCKMeansConstrainedClustering_cluster_with_no_constraints_2(): 

229 """ 

230 Test that the `clustering.mpckmeans.MPCKMeansConstrainedClustering` clustering works with no `constraints`. 

231 """ 

232 

233 # Define `vectors` and `constraints_manager` 

234 vectors = { 

235 "0": csr_matrix([2.00, 0.00, 0.00, 0.00]), 

236 "1": csr_matrix([0.00, 0.43, 0.00, 0.00]), 

237 "2": csr_matrix([0.00, 0.00, 0.29, 0.00]), 

238 "3": csr_matrix([0.00, 0.00, 0.50, 0.00]), 

239 "4": csr_matrix([0.00, 0.00, 0.00, 0.98]), 

240 "5": csr_matrix([0.00, 0.00, 0.33, 0.00]), 

241 "6": csr_matrix([0.00, 0.00, 0.00, 1.40]), 

242 "7": csr_matrix([0.80, 0.00, 0.00, 0.00]), 

243 "8": csr_matrix([0.00, 0.54, 0.00, 0.00]), 

244 "9": csr_matrix([0.00, 0.00, 0.00, 1.10]), 

245 "10": csr_matrix([1.10, 0.00, 0.00, 0.00]), 

246 "11": csr_matrix([0.00, 0.49, 0.00, 0.00]), 

247 } 

248 

249 constraints_manager = BinaryConstraintsManager(list_of_data_IDs=list(vectors.keys())) 

250 

251 # Initialize a `MPCKMeansConstrainedClustering` instance. 

252 clustering_model = MPCKMeansConstrainedClustering(eps=0.5, min_samples=3) 

253 

254 # Run clustering 2 clusters and no constraints. 

255 dict_of_predicted_clusters = clustering_model.cluster( 

256 constraints_manager=constraints_manager, 

257 vectors=vectors, 

258 nb_clusters=4, 

259 ) 

260 

261 assert clustering_model.dict_of_predicted_clusters 

262 

263 """ 

264 Here, '0' is too far from other points so it is noise 

265 Furthermore, '7' and '10' are in the same neighbourhood, but no other point. 

266 They are not numerous enough to create a cluster 

267 """ 

268 

269 assert dict_of_predicted_clusters == { 

270 "0": 0, 

271 "1": 1, 

272 "2": 1, 

273 "3": 1, 

274 "4": 3, 

275 "5": 1, 

276 "6": 3, 

277 "7": 2, 

278 "8": 1, 

279 "9": 3, 

280 "10": 2, 

281 "11": 1, 

282 } 

283 

284 

285# ============================================================================== 

286# test_MPCKMeansConstrainedClustering_cluster_with_some_constraints 

287# ============================================================================== 

288def test_MPCKMeansConstrainedClustering_cluster_with_some_constraints(): 

289 """ 

290 Test that the `clustering.mpckmeans.MPCKMeansConstrainedClustering` clustering works with no `constraints`. 

291 """ 

292 

293 # Define `vectors` and `constraints_manager` 

294 vectors = { 

295 "0": csr_matrix([2.00, 0.00, 0.00, 0.00]), 

296 "1": csr_matrix([0.00, 0.43, 0.00, 0.00]), 

297 "2": csr_matrix([0.00, 0.00, 0.29, 0.00]), 

298 "3": csr_matrix([0.00, 0.00, 0.50, 0.00]), 

299 "4": csr_matrix([0.00, 0.00, 0.00, 0.98]), 

300 "5": csr_matrix([0.00, 0.00, 0.33, 0.00]), 

301 "6": csr_matrix([0.00, 0.00, 0.00, 1.40]), 

302 "7": csr_matrix([0.80, 0.00, 0.00, 0.00]), 

303 "8": csr_matrix([0.00, 0.54, 0.00, 0.00]), 

304 "9": csr_matrix([0.00, 0.00, 0.00, 1.10]), 

305 "10": csr_matrix([1.10, 0.00, 0.00, 0.00]), 

306 "11": csr_matrix([0.00, 0.49, 0.00, 0.00]), 

307 } 

308 

309 constraints_manager = BinaryConstraintsManager(list_of_data_IDs=list(vectors.keys())) 

310 constraints_manager.add_constraint(data_ID1="0", data_ID2="7", constraint_type="MUST_LINK") 

311 constraints_manager.add_constraint(data_ID1="0", data_ID2="10", constraint_type="MUST_LINK") 

312 constraints_manager.add_constraint(data_ID1="0", data_ID2="4", constraint_type="CANNOT_LINK") 

313 

314 # Initialize a `KMeansConstrainedClustering` instance. 

315 clustering_model = MPCKMeansConstrainedClustering() 

316 

317 # Run clustering 2 clusters and no constraints. 

318 dict_of_predicted_clusters = clustering_model.cluster( 

319 constraints_manager=constraints_manager, 

320 vectors=vectors, 

321 nb_clusters=4, 

322 ) 

323 

324 assert clustering_model.dict_of_predicted_clusters 

325 assert dict_of_predicted_clusters == { 

326 "0": 0, 

327 "1": 1, 

328 "2": 2, 

329 "3": 2, 

330 "4": 3, 

331 "5": 2, 

332 "6": 3, 

333 "7": 0, 

334 "8": 1, 

335 "9": 3, 

336 "10": 0, 

337 "11": 1, 

338 }