Coverage for tests\clustering\test_hierarchical.py: 100.00%

203 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-11-17 13:31 +0100

1# -*- coding: utf-8 -*- 

2 

3""" 

4* Name: interactive-clustering/tests/clustering/test_hierarchical.py 

5* Description: Unittests for the `clustering.hierarchical` module. 

6* Author: Erwan SCHILD 

7* Created: 17/03/2021 

8* Licence: CeCILL (https://cecill.info/licences.fr.html) 

9""" 

10 

11# ============================================================================== 

12# IMPORT PYTHON DEPENDENCIES 

13# ============================================================================== 

14 

15import numpy as np 

16import pytest 

17from scipy.sparse import csr_matrix 

18 

19from cognitivefactory.interactive_clustering.clustering.hierarchical import Cluster, HierarchicalConstrainedClustering 

20from cognitivefactory.interactive_clustering.constraints.binary import BinaryConstraintsManager 

21 

22 

23# ============================================================================== 

24# test_HierarchicalConstrainedClustering_for_inconsistent_linkage 

25# ============================================================================== 

26def test_HierarchicalConstrainedClustering_for_inconsistent_linkage(): 

27 """ 

28 Test that the `clustering.hierarchical.HierarchicalConstrainedClustering` initialization raises an `ValueError` for inconsistent `linkage` parameter. 

29 """ 

30 

31 # Check `ValueError` for bad string value for `linkage`. 

32 with pytest.raises(ValueError, match="`linkage`"): 

33 HierarchicalConstrainedClustering( 

34 linkage="as_you_want", 

35 ) 

36 

37 

38# ============================================================================== 

39# test_HierarchicalConstrainedClustering_for_correct_settings 

40# ============================================================================== 

41def test_HierarchicalConstrainedClustering_for_correct_settings(): 

42 """ 

43 Test that the `clustering.hierarchical.HierarchicalConstrainedClustering` initialization runs correctly with the correct settings. 

44 """ 

45 

46 # Check a correct initialization. 

47 clustering_model = HierarchicalConstrainedClustering( 

48 linkage="average", 

49 random_seed=2, 

50 ) 

51 assert clustering_model 

52 assert clustering_model.linkage == "average" 

53 assert clustering_model.random_seed == 2 

54 

55 

56# ============================================================================== 

57# test_HierarchicalConstrainedClustering_cluster_for_inconsistent_constraints_manager 

58# ============================================================================== 

59def test_HierarchicalConstrainedClustering_cluster_for_inconsistent_constraints_manager(): 

60 """ 

61 Test that the `clustering.hierarchical.HierarchicalConstrainedClustering` clustering raises an `ValueError` for inconsistent `constraints_manager` parameter. 

62 """ 

63 

64 # Initialize a `HierarchicalConstrainedClustering` instance. 

65 clustering_model = HierarchicalConstrainedClustering() 

66 

67 # Check `ValueError` for not matrix `vectors`. 

68 with pytest.raises(ValueError, match="`constraints_manager`"): 

69 clustering_model.cluster( 

70 constraints_manager=None, 

71 vectors=None, 

72 nb_clusters=2, 

73 ) 

74 

75 

76# ============================================================================== 

77# test_HierarchicalConstrainedClustering_cluster_for_inconsistent_vectors 

78# ============================================================================== 

79def test_HierarchicalConstrainedClustering_cluster_for_inconsistent_vectors(): 

80 """ 

81 Test that the `clustering.hierarchical.HierarchicalConstrainedClustering` clustering raises an `ValueError` for inconsistent `vectors` parameter. 

82 """ 

83 

84 # Initialize a `HierarchicalConstrainedClustering` instance. 

85 clustering_model = HierarchicalConstrainedClustering() 

86 

87 # Check `ValueError` for not matrix `vectors`. 

88 with pytest.raises(ValueError, match="`vectors`"): 

89 clustering_model.cluster( 

90 constraints_manager=BinaryConstraintsManager(list_of_data_IDs=["first", "second", "third"]), 

91 vectors=None, 

92 nb_clusters=2, 

93 ) 

94 

95 

96# ============================================================================== 

97# test_HierarchicalConstrainedClustering_cluster_for_inconsistent_nb_clusters_1 

98# ============================================================================== 

99def test_HierarchicalConstrainedClustering_cluster_for_inconsistent_nb_clusters_1(): 

100 """ 

101 Test that the `clustering.hierarchical.HierarchicalConstrainedClustering` clustering raises an `ValueError` for inconsistent `nb_clusters` parameter. 

102 """ 

103 

104 # Initialize a `HierarchicalConstrainedClustering` instance. 

105 clustering_model = HierarchicalConstrainedClustering() 

106 

107 # Check `ValueError` for too small `nb_clusters`. 

108 with pytest.raises(ValueError, match="`nb_clusters`"): 

109 clustering_model.cluster( 

110 constraints_manager=BinaryConstraintsManager(list_of_data_IDs=["first", "second", "third"]), 

111 vectors={"first": np.array([1, 2, 3]), "second": np.array([[4, 5, 6]]), "third": csr_matrix([7, 8, 9])}, 

112 nb_clusters=None, 

113 ) 

114 

115 

116# ============================================================================== 

117# test_HierarchicalConstrainedClustering_cluster_for_inconsistent_nb_clusters_2 

118# ============================================================================== 

119def test_HierarchicalConstrainedClustering_cluster_for_inconsistent_nb_clusters_2(): 

120 """ 

121 Test that the `clustering.hierarchical.HierarchicalConstrainedClustering` clustering raises an `ValueError` for inconsistent `nb_clusters` parameter. 

122 """ 

123 

124 # Initialize a `HierarchicalConstrainedClustering` instance. 

125 clustering_model = HierarchicalConstrainedClustering() 

126 

127 # Check `ValueError` for too small `nb_clusters`. 

128 with pytest.raises(ValueError, match="`nb_clusters`"): 

129 clustering_model.cluster( 

130 constraints_manager=BinaryConstraintsManager(list_of_data_IDs=["first", "second", "third"]), 

131 vectors={"first": np.array([1, 2, 3]), "second": np.array([[4, 5, 6]]), "third": csr_matrix([7, 8, 9])}, 

132 nb_clusters=-1, 

133 ) 

134 

135 

136# ============================================================================== 

137# test_HierarchicalConstrainedClustering_cluster_with_ward_linkage 

138# ============================================================================== 

139def test_HierarchicalConstrainedClustering_cluster_with_ward_linkage(): 

140 """ 

141 Test that the `clustering.hierarchical.HierarchicalConstrainedClustering` clustering works with ward `linkage`. 

142 """ 

143 

144 # Define `vectors` and `constraints_manager` 

145 vectors = { 

146 "0": csr_matrix([1.00, 0.00, 0.00]), 

147 "1": csr_matrix([0.95, 0.02, 0.01]), 

148 "2": csr_matrix([0.98, 0.00, 0.00]), 

149 "3": csr_matrix([0.99, 0.00, 0.00]), 

150 "4": csr_matrix([0.01, 0.99, 0.07]), 

151 "5": csr_matrix([0.02, 0.99, 0.07]), 

152 "6": csr_matrix([0.01, 0.99, 0.02]), 

153 "7": csr_matrix([0.01, 0.01, 0.97]), 

154 "8": csr_matrix([0.00, 0.01, 0.99]), 

155 "9": csr_matrix([0.00, 0.00, 1.00]), 

156 } 

157 constraints_manager = BinaryConstraintsManager(list_of_data_IDs=["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"]) 

158 constraints_manager.add_constraint(data_ID1="0", data_ID2="1", constraint_type="MUST_LINK") 

159 constraints_manager.add_constraint(data_ID1="5", data_ID2="7", constraint_type="CANNOT_LINK") 

160 

161 # Initialize a `HierarchicalConstrainedClustering` instance. 

162 clustering_model = HierarchicalConstrainedClustering( 

163 linkage="ward", 

164 random_seed=1, 

165 ) 

166 

167 # Run clustering 3 clusters and some constraints. 

168 dict_of_predicted_clusters = clustering_model.cluster( 

169 constraints_manager=constraints_manager, 

170 vectors=vectors, 

171 nb_clusters=3, 

172 ) 

173 

174 assert clustering_model.dict_of_predicted_clusters 

175 assert dict_of_predicted_clusters == { 

176 "0": 0, 

177 "1": 0, 

178 "2": 0, 

179 "3": 0, 

180 "4": 1, 

181 "5": 1, 

182 "6": 1, 

183 "7": 2, 

184 "8": 2, 

185 "9": 2, 

186 } 

187 

188 

189# ============================================================================== 

190# test_HierarchicalConstrainedClustering_cluster_with_average_linkage 

191# ============================================================================== 

192def test_HierarchicalConstrainedClustering_cluster_with_average_linkage(): 

193 """ 

194 Test that the `clustering.hierarchical.HierarchicalConstrainedClustering` clustering works with average `linkage`. 

195 """ 

196 

197 # Define `vectors` and `constraints_manager` 

198 vectors = { 

199 "0": csr_matrix([1.00, 0.00, 0.00]), 

200 "1": csr_matrix([0.95, 0.02, 0.01]), 

201 "2": csr_matrix([0.98, 0.00, 0.00]), 

202 "3": csr_matrix([0.99, 0.00, 0.00]), 

203 "4": csr_matrix([0.01, 0.99, 0.07]), 

204 "5": csr_matrix([0.02, 0.99, 0.07]), 

205 "6": csr_matrix([0.01, 0.99, 0.02]), 

206 "7": csr_matrix([0.01, 0.01, 0.97]), 

207 "8": csr_matrix([0.00, 0.01, 0.99]), 

208 "9": csr_matrix([0.00, 0.00, 1.00]), 

209 } 

210 constraints_manager = BinaryConstraintsManager(list_of_data_IDs=["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"]) 

211 constraints_manager.add_constraint(data_ID1="0", data_ID2="1", constraint_type="MUST_LINK") 

212 constraints_manager.add_constraint(data_ID1="5", data_ID2="7", constraint_type="CANNOT_LINK") 

213 

214 # Initialize a `HierarchicalConstrainedClustering` instance. 

215 clustering_model = HierarchicalConstrainedClustering( 

216 linkage="average", 

217 random_seed=1, 

218 ) 

219 

220 # Run clustering 3 clusters and some constraints. 

221 dict_of_predicted_clusters = clustering_model.cluster( 

222 constraints_manager=constraints_manager, 

223 vectors=vectors, 

224 nb_clusters=3, 

225 ) 

226 

227 assert clustering_model.dict_of_predicted_clusters 

228 assert dict_of_predicted_clusters == { 

229 "0": 0, 

230 "1": 0, 

231 "2": 0, 

232 "3": 0, 

233 "4": 1, 

234 "5": 1, 

235 "6": 1, 

236 "7": 2, 

237 "8": 2, 

238 "9": 2, 

239 } 

240 

241 

242# ============================================================================== 

243# test_HierarchicalConstrainedClustering_cluster_with_single_linkage 

244# ============================================================================== 

245def test_HierarchicalConstrainedClustering_cluster_with_single_linkage(): 

246 """ 

247 Test that the `clustering.hierarchical.HierarchicalConstrainedClustering` clustering works with single `linkage`. 

248 """ 

249 

250 # Define `vectors` and `constraints_manager` 

251 vectors = { 

252 "0": csr_matrix([1.00, 0.00, 0.00]), 

253 "1": csr_matrix([0.95, 0.02, 0.01]), 

254 "2": csr_matrix([0.98, 0.00, 0.00]), 

255 "3": csr_matrix([0.99, 0.00, 0.00]), 

256 "4": csr_matrix([0.01, 0.99, 0.07]), 

257 "5": csr_matrix([0.02, 0.99, 0.07]), 

258 "6": csr_matrix([0.01, 0.99, 0.02]), 

259 "7": csr_matrix([0.01, 0.01, 0.97]), 

260 "8": csr_matrix([0.00, 0.01, 0.99]), 

261 "9": csr_matrix([0.00, 0.00, 1.00]), 

262 } 

263 constraints_manager = BinaryConstraintsManager(list_of_data_IDs=["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"]) 

264 constraints_manager.add_constraint(data_ID1="0", data_ID2="1", constraint_type="MUST_LINK") 

265 constraints_manager.add_constraint(data_ID1="5", data_ID2="7", constraint_type="CANNOT_LINK") 

266 

267 # Initialize a `HierarchicalConstrainedClustering` instance. 

268 clustering_model = HierarchicalConstrainedClustering( 

269 linkage="single", 

270 random_seed=1, 

271 ) 

272 

273 # Run clustering 3 clusters and some constraints. 

274 dict_of_predicted_clusters = clustering_model.cluster( 

275 constraints_manager=constraints_manager, 

276 vectors=vectors, 

277 nb_clusters=3, 

278 ) 

279 

280 assert clustering_model.dict_of_predicted_clusters 

281 assert dict_of_predicted_clusters == { 

282 "0": 0, 

283 "1": 0, 

284 "2": 0, 

285 "3": 0, 

286 "4": 1, 

287 "5": 1, 

288 "6": 1, 

289 "7": 2, 

290 "8": 2, 

291 "9": 2, 

292 } 

293 

294 

295# ============================================================================== 

296# test_HierarchicalConstrainedClustering_cluster_with_complete_linkage 

297# ============================================================================== 

298def test_HierarchicalConstrainedClustering_cluster_with_complete_linkage(): 

299 """ 

300 Test that the `clustering.hierarchical.HierarchicalConstrainedClustering` clustering works with complete `linkage`. 

301 """ 

302 

303 # Define `vectors` and `constraints_manager` 

304 vectors = { 

305 "0": csr_matrix([1.00, 0.00, 0.00]), 

306 "1": csr_matrix([0.95, 0.02, 0.01]), 

307 "2": csr_matrix([0.98, 0.00, 0.00]), 

308 "3": csr_matrix([0.99, 0.00, 0.00]), 

309 "4": csr_matrix([0.01, 0.99, 0.07]), 

310 "5": csr_matrix([0.02, 0.99, 0.07]), 

311 "6": csr_matrix([0.01, 0.99, 0.02]), 

312 "7": csr_matrix([0.01, 0.01, 0.97]), 

313 "8": csr_matrix([0.00, 0.01, 0.99]), 

314 "9": csr_matrix([0.00, 0.00, 1.00]), 

315 } 

316 constraints_manager = BinaryConstraintsManager(list_of_data_IDs=["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"]) 

317 constraints_manager.add_constraint(data_ID1="0", data_ID2="1", constraint_type="MUST_LINK") 

318 constraints_manager.add_constraint(data_ID1="5", data_ID2="7", constraint_type="CANNOT_LINK") 

319 

320 # Initialize a `HierarchicalConstrainedClustering` instance. 

321 clustering_model = HierarchicalConstrainedClustering( 

322 linkage="complete", 

323 random_seed=1, 

324 ) 

325 

326 # Run clustering 3 clusters and some constraints. 

327 dict_of_predicted_clusters = clustering_model.cluster( 

328 constraints_manager=constraints_manager, 

329 vectors=vectors, 

330 nb_clusters=3, 

331 ) 

332 

333 assert clustering_model.dict_of_predicted_clusters 

334 assert dict_of_predicted_clusters == { 

335 "0": 0, 

336 "1": 0, 

337 "2": 0, 

338 "3": 0, 

339 "4": 1, 

340 "5": 1, 

341 "6": 1, 

342 "7": 2, 

343 "8": 2, 

344 "9": 2, 

345 } 

346 

347 

348# ============================================================================== 

349# test_HierarchicalConstrainedClustering_cluster_with_no_constraints_1 

350# ============================================================================== 

351def test_HierarchicalConstrainedClustering_cluster_with_no_constraints_1(): 

352 """ 

353 Test that the `clustering.hierarchical.HierarchicalConstrainedClustering` clustering works with no `constraints`. 

354 """ 

355 

356 # Define `vectors` and `constraints_manager` 

357 vectors = { 

358 "0": csr_matrix([1.00, 0.00, 0.00, 0.00]), 

359 "1": csr_matrix([0.95, 0.02, 0.02, 0.01]), 

360 "2": csr_matrix([0.98, 0.00, 0.02, 0.00]), 

361 "3": csr_matrix([0.99, 0.00, 0.01, 0.00]), 

362 "4": csr_matrix([0.50, 0.22, 0.21, 0.07]), 

363 "5": csr_matrix([0.50, 0.21, 0.22, 0.07]), 

364 "6": csr_matrix([0.01, 0.01, 0.01, 0.97]), 

365 "7": csr_matrix([0.00, 0.01, 0.00, 0.99]), 

366 "8": csr_matrix([0.00, 0.00, 0.00, 1.00]), 

367 } 

368 constraints_manager = BinaryConstraintsManager(list_of_data_IDs=list(vectors.keys())) 

369 

370 # Initialize a `HierarchicalConstrainedClustering` instance. 

371 clustering_model = HierarchicalConstrainedClustering( 

372 random_seed=2, 

373 ) 

374 

375 # Run clustering 2 clusters and no constraints. 

376 dict_of_predicted_clusters = clustering_model.cluster( 

377 constraints_manager=constraints_manager, 

378 vectors=vectors, 

379 nb_clusters=2, 

380 ) 

381 

382 assert clustering_model.dict_of_predicted_clusters 

383 assert dict_of_predicted_clusters == {"0": 0, "1": 0, "2": 0, "3": 0, "4": 0, "5": 0, "6": 1, "7": 1, "8": 1} 

384 

385 

386# ============================================================================== 

387# test_HierarchicalConstrainedClustering_cluster_with_no_constraints_2 

388# ============================================================================== 

389def test_HierarchicalConstrainedClustering_cluster_with_no_constraints_2(): 

390 """ 

391 Test that the `clustering.hierarchical.HierarchicalConstrainedClustering` clustering works with no `constraints`. 

392 """ 

393 

394 # Define `vectors` and `constraints_manager` 

395 vectors = { 

396 "0": csr_matrix([1.00, 0.00, 0.00]), 

397 "1": csr_matrix([0.95, 0.02, 0.01]), 

398 "2": csr_matrix([0.98, 0.00, 0.00]), 

399 "3": csr_matrix([0.99, 0.00, 0.00]), 

400 "4": csr_matrix([0.01, 0.99, 0.07]), 

401 "5": csr_matrix([0.02, 0.99, 0.07]), 

402 "6": csr_matrix([0.01, 0.99, 0.02]), 

403 "7": csr_matrix([0.01, 0.01, 0.97]), 

404 "8": csr_matrix([0.00, 0.01, 0.99]), 

405 "9": csr_matrix([0.00, 0.00, 1.00]), 

406 } 

407 constraints_manager = BinaryConstraintsManager(list_of_data_IDs=list(vectors.keys())) 

408 

409 # Initialize a `HierarchicalConstrainedClustering` instance. 

410 clustering_model = HierarchicalConstrainedClustering( 

411 random_seed=2, 

412 ) 

413 

414 # Run clustering 3 clusters and no constraints. 

415 dict_of_predicted_clusters = clustering_model.cluster( 

416 constraints_manager=constraints_manager, 

417 vectors=vectors, 

418 nb_clusters=3, 

419 ) 

420 assert clustering_model.dict_of_predicted_clusters 

421 assert dict_of_predicted_clusters == { 

422 "0": 0, 

423 "1": 0, 

424 "2": 0, 

425 "3": 0, 

426 "4": 1, 

427 "5": 1, 

428 "6": 1, 

429 "7": 2, 

430 "8": 2, 

431 "9": 2, 

432 } 

433 

434 

435# ============================================================================== 

436# test_HierarchicalConstrainedClustering_cluster_with_some_constraints 

437# ============================================================================== 

438def test_HierarchicalConstrainedClustering_cluster_with_some_constraints(): 

439 """ 

440 Test that the `clustering.hierarchical.HierarchicalConstrainedClustering` clustering works with some `constraints`. 

441 """ 

442 

443 # Define `vectors` and `constraints_manager` 

444 vectors = { 

445 "0": csr_matrix([1.00, 0.00, 0.00, 0.00]), 

446 "1": csr_matrix([0.95, 0.02, 0.02, 0.01]), 

447 "2": csr_matrix([0.98, 0.00, 0.02, 0.00]), 

448 "3": csr_matrix([0.99, 0.00, 0.01, 0.00]), 

449 "4": csr_matrix([0.50, 0.22, 0.21, 0.07]), 

450 "5": csr_matrix([0.50, 0.21, 0.22, 0.07]), 

451 "6": csr_matrix([0.01, 0.01, 0.01, 0.97]), 

452 "7": csr_matrix([0.00, 0.01, 0.00, 0.99]), 

453 "8": csr_matrix([0.00, 0.00, 0.00, 1.00]), 

454 } 

455 constraints_manager = BinaryConstraintsManager(list_of_data_IDs=["0", "1", "2", "3", "4", "5", "6", "7", "8"]) 

456 constraints_manager.add_constraint(data_ID1="0", data_ID2="1", constraint_type="MUST_LINK") 

457 constraints_manager.add_constraint(data_ID1="0", data_ID2="6", constraint_type="MUST_LINK") 

458 constraints_manager.add_constraint(data_ID1="0", data_ID2="7", constraint_type="MUST_LINK") 

459 constraints_manager.add_constraint(data_ID1="0", data_ID2="8", constraint_type="MUST_LINK") 

460 constraints_manager.add_constraint(data_ID1="4", data_ID2="5", constraint_type="MUST_LINK") 

461 constraints_manager.add_constraint(data_ID1="0", data_ID2="4", constraint_type="CANNOT_LINK") 

462 constraints_manager.add_constraint(data_ID1="2", data_ID2="4", constraint_type="CANNOT_LINK") 

463 

464 # Initialize a `HierarchicalConstrainedClustering` instance. 

465 clustering_model = HierarchicalConstrainedClustering( 

466 random_seed=2, 

467 ) 

468 

469 # Run clustering 2 clusters and somme constraints. 

470 dict_of_predicted_clusters = clustering_model.cluster( 

471 constraints_manager=constraints_manager, 

472 vectors=vectors, 

473 nb_clusters=3, 

474 ) 

475 assert clustering_model.dict_of_predicted_clusters 

476 assert dict_of_predicted_clusters == { 

477 "0": 0, 

478 "1": 0, 

479 "2": 1, 

480 "3": 1, 

481 "4": 2, 

482 "5": 2, 

483 "6": 0, 

484 "7": 0, 

485 "8": 0, 

486 } 

487 

488 

489# ============================================================================== 

490# test_HierarchicalConstrainedClustering_cluster_with_full_constraints 

491# ============================================================================== 

492def test_HierarchicalConstrainedClustering_cluster_with_full_constraints(): 

493 """ 

494 Test that the `clustering.hierarchical.HierarchicalConstrainedClustering` clustering works with full `constraints`. 

495 """ 

496 

497 # Define `vectors` and `constraints_manager` 

498 vectors = { 

499 "0": csr_matrix([1.00, 0.00, 0.00, 0.00]), 

500 "1": csr_matrix([0.95, 0.02, 0.02, 0.01]), 

501 "2": csr_matrix([0.98, 0.00, 0.02, 0.00]), 

502 "3": csr_matrix([0.99, 0.00, 0.01, 0.00]), 

503 "4": csr_matrix([0.50, 0.22, 0.21, 0.07]), 

504 "5": csr_matrix([0.50, 0.21, 0.22, 0.07]), 

505 "6": csr_matrix([0.01, 0.01, 0.01, 0.97]), 

506 "7": csr_matrix([0.00, 0.01, 0.00, 0.99]), 

507 "8": csr_matrix([0.00, 0.00, 0.00, 1.00]), 

508 } 

509 constraints_manager = BinaryConstraintsManager(list_of_data_IDs=["0", "1", "2", "3", "4", "5", "6", "7", "8"]) 

510 constraints_manager.add_constraint(data_ID1="0", data_ID2="4", constraint_type="MUST_LINK") 

511 constraints_manager.add_constraint(data_ID1="0", data_ID2="8", constraint_type="MUST_LINK") 

512 constraints_manager.add_constraint(data_ID1="1", data_ID2="5", constraint_type="MUST_LINK") 

513 constraints_manager.add_constraint(data_ID1="2", data_ID2="6", constraint_type="MUST_LINK") 

514 constraints_manager.add_constraint(data_ID1="3", data_ID2="7", constraint_type="MUST_LINK") 

515 constraints_manager.add_constraint(data_ID1="0", data_ID2="1", constraint_type="CANNOT_LINK") 

516 constraints_manager.add_constraint(data_ID1="0", data_ID2="2", constraint_type="CANNOT_LINK") 

517 constraints_manager.add_constraint(data_ID1="0", data_ID2="3", constraint_type="CANNOT_LINK") 

518 constraints_manager.add_constraint(data_ID1="1", data_ID2="2", constraint_type="CANNOT_LINK") 

519 constraints_manager.add_constraint(data_ID1="1", data_ID2="3", constraint_type="CANNOT_LINK") 

520 constraints_manager.add_constraint(data_ID1="2", data_ID2="3", constraint_type="CANNOT_LINK") 

521 

522 # Initialize a `HierarchicalConstrainedClustering` instance. 

523 clustering_model = HierarchicalConstrainedClustering() 

524 

525 # Run clustering 4 clusters and full constraints. 

526 dict_of_predicted_clusters = clustering_model.cluster( 

527 constraints_manager=constraints_manager, 

528 vectors=vectors, 

529 nb_clusters=4, 

530 ) 

531 assert clustering_model.dict_of_predicted_clusters 

532 assert dict_of_predicted_clusters == { 

533 "0": 0, 

534 "1": 1, 

535 "2": 2, 

536 "3": 3, 

537 "4": 0, 

538 "5": 1, 

539 "6": 2, 

540 "7": 3, 

541 "8": 0, 

542 } 

543 

544 

545# ============================================================================== 

546# test_HierarchicalConstrainedClustering_compute_predicted_clusters_without_clustering_tree 

547# ============================================================================== 

548def test_HierarchicalConstrainedClustering_compute_predicted_clusters_without_clustering_tree(): 

549 """ 

550 Test that the `compute_predicted_clusters` method of the `HierarchicalConstrainedClustering` raises `ValueError` if clustering is not run. 

551 """ 

552 

553 # Initialize a `HierarchicalConstrainedClustering` instance. 

554 clustering_model = HierarchicalConstrainedClustering( 

555 linkage="single", 

556 random_seed=1, 

557 ) 

558 

559 # Run `compute_predicted_clusters` without computing the clustering tree. 

560 with pytest.raises(ValueError, match="`clustering_root`"): 

561 clustering_model.compute_predicted_clusters( 

562 nb_clusters=2, 

563 by="size", 

564 ) 

565 

566 

567# ============================================================================== 

568# test_HierarchicalConstrainedClustering_compute_predicted_clusters_travelling_by_size 

569# ============================================================================== 

570def test_HierarchicalConstrainedClustering_compute_predicted_clusters_travelling_by_size(): 

571 """ 

572 Test that the `compute_predicted_clusters` method of the `HierarchicalConstrainedClustering` clustering works by travelling by `"size"`. 

573 """ 

574 

575 # Define `vectors` and `constraints_manager` 

576 vectors = { 

577 "00": csr_matrix([1.00, 0.00, 0.00]), 

578 "01": csr_matrix([0.99, 0.00, 0.00]), 

579 "02": csr_matrix([0.97, 0.00, 0.00]), 

580 "03": csr_matrix([0.96, 0.00, 0.00]), 

581 "04": csr_matrix([0.94, 0.00, 0.00]), 

582 "05": csr_matrix([0.93, 0.00, 0.00]), 

583 "06": csr_matrix([0.80, 0.80, 0.00]), 

584 "07": csr_matrix([0.80, 0.81, 0.00]), 

585 "08": csr_matrix([0.00, 0.00, 0.70]), 

586 "09": csr_matrix([0.00, 0.00, 0.71]), 

587 "10": csr_matrix([0.00, 0.00, 0.99]), 

588 "11": csr_matrix([0.00, 0.00, 1.00]), 

589 } 

590 constraints_manager = BinaryConstraintsManager(list_of_data_IDs=list(vectors.keys())) 

591 

592 # Initialize a `HierarchicalConstrainedClustering` instance. 

593 clustering_model = HierarchicalConstrainedClustering( 

594 linkage="single", 

595 random_seed=1, 

596 ) 

597 

598 # Compute all clustering tree. 

599 clustering_model.cluster( 

600 constraints_manager=constraints_manager, 

601 vectors=vectors, 

602 nb_clusters=2, 

603 ) 

604 

605 # Run `compute_predicted_clusters` while travalleing clustering tree by `"size"`. 

606 dict_of_predicted_clusters = clustering_model.compute_predicted_clusters( 

607 nb_clusters=4, 

608 by="size", 

609 ) 

610 assert dict_of_predicted_clusters == { 

611 "00": 0, 

612 "01": 0, 

613 "02": 0, 

614 "03": 0, 

615 "04": 1, 

616 "05": 1, 

617 "06": 2, 

618 "07": 2, 

619 "08": 3, 

620 "09": 3, 

621 "10": 3, 

622 "11": 3, 

623 } 

624 

625 

626# ============================================================================== 

627# test_HierarchicalConstrainedClustering_compute_predicted_clusters_travelling_by_iteration 

628# ============================================================================== 

629def test_HierarchicalConstrainedClustering_compute_predicted_clusters_travelling_by_iteration(): 

630 """ 

631 Test that the `compute_predicted_clusters` method of the `HierarchicalConstrainedClustering` clustering works by travelling by `"iteration"`. 

632 """ 

633 

634 # Define `vectors` and `constraints_manager` 

635 vectors = { 

636 "00": csr_matrix([1.00, 0.00, 0.00]), 

637 "01": csr_matrix([0.99, 0.00, 0.00]), 

638 "02": csr_matrix([0.97, 0.00, 0.00]), 

639 "03": csr_matrix([0.96, 0.00, 0.00]), 

640 "04": csr_matrix([0.94, 0.00, 0.00]), 

641 "05": csr_matrix([0.93, 0.00, 0.00]), 

642 "06": csr_matrix([0.80, 0.80, 0.00]), 

643 "07": csr_matrix([0.80, 0.81, 0.00]), 

644 "08": csr_matrix([0.00, 0.00, 0.70]), 

645 "09": csr_matrix([0.00, 0.00, 0.71]), 

646 "10": csr_matrix([0.00, 0.00, 0.99]), 

647 "11": csr_matrix([0.00, 0.00, 1.00]), 

648 } 

649 constraints_manager = BinaryConstraintsManager(list_of_data_IDs=list(vectors.keys())) 

650 

651 # Initialize a `HierarchicalConstrainedClustering` instance. 

652 clustering_model = HierarchicalConstrainedClustering( 

653 linkage="single", 

654 random_seed=1, 

655 ) 

656 

657 # Compute all clustering tree. 

658 clustering_model.cluster( 

659 constraints_manager=constraints_manager, 

660 vectors=vectors, 

661 nb_clusters=2, 

662 ) 

663 

664 # Run `compute_predicted_clusters` while travalleing clustering tree by `"iteration"`. 

665 dict_of_predicted_clusters = clustering_model.compute_predicted_clusters( 

666 nb_clusters=4, 

667 by="iteration", 

668 ) 

669 assert dict_of_predicted_clusters == { 

670 "00": 0, 

671 "01": 0, 

672 "02": 0, 

673 "03": 0, 

674 "04": 0, 

675 "05": 0, 

676 "06": 1, 

677 "07": 1, 

678 "08": 2, 

679 "09": 2, 

680 "10": 3, 

681 "11": 3, 

682 } 

683 

684 # Run `compute_predicted_clusters` while travalleing clustering tree by `"iteration"`. 

685 dict_of_predicted_clusters = clustering_model.compute_predicted_clusters( 

686 nb_clusters=99, 

687 by="iteration", 

688 ) 

689 assert dict_of_predicted_clusters == { 

690 "00": 0, 

691 "01": 1, 

692 "02": 2, 

693 "03": 3, 

694 "04": 4, 

695 "05": 5, 

696 "06": 6, 

697 "07": 7, 

698 "08": 8, 

699 "09": 9, 

700 "10": 10, 

701 "11": 11, 

702 } 

703 

704 

705# ============================================================================== 

706# test_HierarchicalConstrainedClustering_cluster_with_break_loop 

707# ============================================================================== 

708def test_HierarchicalConstrainedClustering_cluster_with_break_loop(): 

709 """ 

710 Test that the `clustering.hierarchical.HierarchicalConstrainedClustering` clustering can break clustering loop. 

711 """ 

712 

713 # Define `vectors` and `constraints_manager` 

714 vectors = { 

715 "0": csr_matrix([1.00, 0.00, 0.00, 0.00]), 

716 "1": csr_matrix([0.95, 0.02, 0.02, 0.01]), 

717 "2": csr_matrix([0.98, 0.00, 0.02, 0.00]), 

718 "3": csr_matrix([0.99, 0.00, 0.01, 0.00]), 

719 "4": csr_matrix([0.50, 0.22, 0.21, 0.07]), 

720 "5": csr_matrix([0.50, 0.21, 0.22, 0.07]), 

721 "6": csr_matrix([0.01, 0.01, 0.01, 0.97]), 

722 "7": csr_matrix([0.00, 0.01, 0.00, 0.99]), 

723 "8": csr_matrix([0.00, 0.00, 0.00, 1.00]), 

724 } 

725 constraints_manager = BinaryConstraintsManager(list_of_data_IDs=["0", "1", "2", "3", "4", "5", "6", "7", "8"]) 

726 constraints_manager.add_constraint(data_ID1="0", data_ID2="1", constraint_type="CANNOT_LINK") 

727 constraints_manager.add_constraint(data_ID1="0", data_ID2="2", constraint_type="CANNOT_LINK") 

728 constraints_manager.add_constraint(data_ID1="0", data_ID2="3", constraint_type="CANNOT_LINK") 

729 constraints_manager.add_constraint(data_ID1="0", data_ID2="4", constraint_type="CANNOT_LINK") 

730 constraints_manager.add_constraint(data_ID1="0", data_ID2="5", constraint_type="CANNOT_LINK") 

731 constraints_manager.add_constraint(data_ID1="0", data_ID2="6", constraint_type="CANNOT_LINK") 

732 constraints_manager.add_constraint(data_ID1="0", data_ID2="7", constraint_type="CANNOT_LINK") 

733 constraints_manager.add_constraint(data_ID1="0", data_ID2="8", constraint_type="CANNOT_LINK") 

734 constraints_manager.add_constraint(data_ID1="1", data_ID2="2", constraint_type="CANNOT_LINK") 

735 constraints_manager.add_constraint(data_ID1="1", data_ID2="3", constraint_type="CANNOT_LINK") 

736 constraints_manager.add_constraint(data_ID1="1", data_ID2="4", constraint_type="CANNOT_LINK") 

737 constraints_manager.add_constraint(data_ID1="1", data_ID2="5", constraint_type="CANNOT_LINK") 

738 constraints_manager.add_constraint(data_ID1="1", data_ID2="6", constraint_type="CANNOT_LINK") 

739 constraints_manager.add_constraint(data_ID1="1", data_ID2="7", constraint_type="CANNOT_LINK") 

740 constraints_manager.add_constraint(data_ID1="1", data_ID2="8", constraint_type="CANNOT_LINK") 

741 constraints_manager.add_constraint(data_ID1="2", data_ID2="3", constraint_type="CANNOT_LINK") 

742 constraints_manager.add_constraint(data_ID1="2", data_ID2="4", constraint_type="CANNOT_LINK") 

743 constraints_manager.add_constraint(data_ID1="2", data_ID2="5", constraint_type="CANNOT_LINK") 

744 constraints_manager.add_constraint(data_ID1="2", data_ID2="6", constraint_type="CANNOT_LINK") 

745 constraints_manager.add_constraint(data_ID1="2", data_ID2="7", constraint_type="CANNOT_LINK") 

746 constraints_manager.add_constraint(data_ID1="2", data_ID2="8", constraint_type="CANNOT_LINK") 

747 constraints_manager.add_constraint(data_ID1="3", data_ID2="4", constraint_type="CANNOT_LINK") 

748 constraints_manager.add_constraint(data_ID1="3", data_ID2="5", constraint_type="CANNOT_LINK") 

749 constraints_manager.add_constraint(data_ID1="3", data_ID2="6", constraint_type="CANNOT_LINK") 

750 constraints_manager.add_constraint(data_ID1="3", data_ID2="7", constraint_type="CANNOT_LINK") 

751 constraints_manager.add_constraint(data_ID1="3", data_ID2="8", constraint_type="CANNOT_LINK") 

752 constraints_manager.add_constraint(data_ID1="4", data_ID2="5", constraint_type="CANNOT_LINK") 

753 constraints_manager.add_constraint(data_ID1="4", data_ID2="6", constraint_type="CANNOT_LINK") 

754 constraints_manager.add_constraint(data_ID1="4", data_ID2="7", constraint_type="CANNOT_LINK") 

755 constraints_manager.add_constraint(data_ID1="4", data_ID2="8", constraint_type="CANNOT_LINK") 

756 constraints_manager.add_constraint(data_ID1="5", data_ID2="6", constraint_type="CANNOT_LINK") 

757 constraints_manager.add_constraint(data_ID1="5", data_ID2="7", constraint_type="CANNOT_LINK") 

758 constraints_manager.add_constraint(data_ID1="5", data_ID2="8", constraint_type="CANNOT_LINK") 

759 constraints_manager.add_constraint(data_ID1="6", data_ID2="7", constraint_type="CANNOT_LINK") 

760 constraints_manager.add_constraint(data_ID1="6", data_ID2="8", constraint_type="CANNOT_LINK") 

761 constraints_manager.add_constraint(data_ID1="7", data_ID2="8", constraint_type="CANNOT_LINK") 

762 

763 # Initialize a `HierarchicalConstrainedClustering` instance. 

764 clustering_model = HierarchicalConstrainedClustering( 

765 linkage="average", 

766 ) 

767 

768 # Run clustering. 

769 clustering_model.cluster( 

770 constraints_manager=constraints_manager, 

771 vectors=vectors, 

772 nb_clusters=5, 

773 ) 

774 assert clustering_model.dict_of_predicted_clusters 

775 assert clustering_model.dict_of_predicted_clusters == { 

776 "0": 0, 

777 "1": 1, 

778 "2": 2, 

779 "3": 3, 

780 "4": 4, 

781 "5": 5, 

782 "6": 6, 

783 "7": 7, 

784 "8": 8, 

785 } 

786 

787 

788# ============================================================================== 

789# test_HierarchicalConstrainedClustering_cluster_end_cases_with_too_many_clusters 

790# ============================================================================== 

791def test_HierarchicalConstrainedClustering_cluster_end_cases_with_too_many_clusters(): 

792 """ 

793 Test that the `clustering.hierarchical.HierarchicalConstrainedClustering` clustering works with too many clusters. 

794 """ 

795 

796 # Define `vectors` and `constraints_manager` 

797 vectors = { 

798 "0": csr_matrix([1.00, 0.00, 0.00, 0.00]), 

799 "1": csr_matrix([0.95, 0.02, 0.02, 0.01]), 

800 "2": csr_matrix([0.98, 0.00, 0.02, 0.00]), 

801 "3": csr_matrix([0.99, 0.00, 0.01, 0.00]), 

802 "4": csr_matrix([0.50, 0.22, 0.21, 0.07]), 

803 "5": csr_matrix([0.50, 0.21, 0.22, 0.07]), 

804 "6": csr_matrix([0.01, 0.01, 0.01, 0.97]), 

805 "7": csr_matrix([0.00, 0.01, 0.00, 0.99]), 

806 "8": csr_matrix([0.00, 0.00, 0.00, 1.00]), 

807 } 

808 constraints_manager = BinaryConstraintsManager(list_of_data_IDs=list(vectors.keys())) 

809 

810 # Initialize a `HierarchicalConstrainedClustering` instance. 

811 clustering_model = HierarchicalConstrainedClustering( 

812 linkage="average", 

813 ) 

814 

815 # Run clustering. 

816 dict_of_predicted_clusters = clustering_model.cluster( 

817 constraints_manager=constraints_manager, 

818 vectors=vectors, 

819 nb_clusters=99, 

820 ) 

821 assert clustering_model.dict_of_predicted_clusters 

822 assert dict_of_predicted_clusters == { 

823 "0": 0, 

824 "1": 1, 

825 "2": 2, 

826 "3": 3, 

827 "4": 4, 

828 "5": 5, 

829 "6": 6, 

830 "7": 7, 

831 "8": 8, 

832 } 

833 

834 

835# ============================================================================== 

836# test_Cluster_for_inconsistent_children_and_members 

837# ============================================================================== 

838def test_Cluster_for_inconsistent_children_and_members(): 

839 """ 

840 Test that the `clustering.hierarchical.Cluster` initialization raises an `ValueError` for inconsistent `children` and `members` parameters. 

841 """ 

842 

843 # Define `vectors`. 

844 vectors = { 

845 "0": csr_matrix([1.00, 0.00]), 

846 "1": csr_matrix([0.99, 0.01]), 

847 "2": csr_matrix([0.02, 0.98]), 

848 "3": csr_matrix([0.01, 0.99]), 

849 "4": csr_matrix([0.00, 1.00]), 

850 } 

851 

852 # Check `ValueError` for both `children` and `members` unset. 

853 with pytest.raises(ValueError, match="by `children` setting or by `members` setting"): 

854 Cluster(vectors=vectors, cluster_ID=2, clustering_iteration=1, children=None, members=None) 

855 

856 # Check `ValueError` for both `children` and `members` set. 

857 with pytest.raises(ValueError, match="by `children` setting or by `members` setting"): 

858 Cluster( 

859 vectors=vectors, 

860 cluster_ID=2, 

861 clustering_iteration=1, 

862 children=[ 

863 Cluster( 

864 vectors=vectors, 

865 cluster_ID=0, 

866 clustering_iteration=0, 

867 members=["0", "1"], 

868 ), 

869 Cluster( 

870 vectors=vectors, 

871 cluster_ID=1, 

872 clustering_iteration=0, 

873 members=["2", "3", "4"], 

874 ), 

875 ], 

876 members=["5", "6", "7", "8", "9"], 

877 ) 

878 

879 

880# ============================================================================== 

881# test_Cluster_to_dict 

882# ============================================================================== 

883def test_Cluster_add_new_children(): 

884 """ 

885 Test that the `clustering.hierarchical.Cluster.add_new_children` method of `Cluster` class works. 

886 """ 

887 

888 # Define `vectors`. 

889 vectors = { 

890 "0": csr_matrix([1.00, 0.00]), 

891 "1": csr_matrix([0.99, 0.01]), 

892 "2": csr_matrix([0.02, 0.98]), 

893 "3": csr_matrix([0.01, 0.99]), 

894 "4": csr_matrix([0.00, 1.00]), 

895 } 

896 

897 # Create `clusters`. 

898 clusters = Cluster( 

899 vectors=vectors, 

900 cluster_ID=2, 

901 clustering_iteration=1, 

902 children=[ 

903 Cluster( 

904 vectors=vectors, 

905 cluster_ID=0, 

906 clustering_iteration=0, 

907 members=["0", "1"], 

908 ), 

909 ], 

910 ) 

911 

912 assert clusters.members == ["0", "1"] 

913 assert clusters.clustering_iteration == 1 

914 assert clusters.get_cluster_size() == 2 

915 

916 clusters.add_new_children( 

917 new_children=[ 

918 Cluster( 

919 vectors=vectors, 

920 cluster_ID=1, 

921 clustering_iteration=0, 

922 members=["2", "3", "4"], 

923 ), 

924 ], 

925 new_clustering_iteration=2, 

926 ) 

927 

928 assert clusters.members == ["0", "1", "2", "3", "4"] 

929 assert clusters.clustering_iteration == 2 

930 assert clusters.get_cluster_size() == 5 

931 

932 

933# ============================================================================== 

934# test_Cluster_to_dict 

935# ============================================================================== 

936def test_Cluster_to_dict(): 

937 """ 

938 Test that the `clustering.hierarchical.Cluster.to_dict` method of `Cluster` class works. 

939 """ 

940 

941 # Define `vectors`. 

942 vectors = { 

943 "0": csr_matrix([1.00, 0.00]), 

944 "1": csr_matrix([0.99, 0.01]), 

945 "2": csr_matrix([0.02, 0.98]), 

946 "3": csr_matrix([0.01, 0.99]), 

947 "4": csr_matrix([0.00, 1.00]), 

948 } 

949 

950 # Create `clusters`. 

951 clusters = Cluster( 

952 vectors=vectors, 

953 cluster_ID=2, 

954 clustering_iteration=1, 

955 children=[ 

956 Cluster( 

957 vectors=vectors, 

958 cluster_ID=0, 

959 clustering_iteration=0, 

960 members=["0", "1"], 

961 ), 

962 Cluster( 

963 vectors=vectors, 

964 cluster_ID=1, 

965 clustering_iteration=0, 

966 members=["2", "3", "4"], 

967 ), 

968 ], 

969 ) 

970 

971 # Define expected dictionnary. 

972 dict_expected = { 

973 "cluster_ID": 2, 

974 "clustering_iteration": 1, 

975 "children": [ 

976 { 

977 "cluster_ID": 0, 

978 "clustering_iteration": 0, 

979 "children": [], 

980 "cluster_inverse_depth": 0, 

981 "members": ["0", "1"], 

982 }, 

983 { 

984 "cluster_ID": 1, 

985 "clustering_iteration": 0, 

986 "children": [], 

987 "cluster_inverse_depth": 0, 

988 "members": ["2", "3", "4"], 

989 }, 

990 ], 

991 "cluster_inverse_depth": 1, 

992 "members": ["0", "1", "2", "3", "4"], 

993 } 

994 

995 assert clusters.to_dict() == dict_expected