Coverage for tests\sampling\test_factory_closest_in_different_clusters.py: 100.00%

51 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-11-17 13:31 +0100

1# -*- coding: utf-8 -*- 

2 

3""" 

4* Name: interactive-clustering/tests/sampling/test_factory_closest_in_different_clusters.py 

5* Description: Unittests for the `sampling.cluster_based` module, `"closest_in_different_clusters"` sampler. 

6* Author: Erwan SCHILD 

7* Created: 17/03/2021 

8* Licence: CeCILL (https://cecill.info/licences.fr.html) 

9""" 

10 

11# ============================================================================== 

12# IMPORT PYTHON DEPENDENCIES 

13# ============================================================================== 

14 

15import pytest 

16from scipy.sparse import csr_matrix 

17 

18from cognitivefactory.interactive_clustering.constraints.binary import BinaryConstraintsManager 

19from cognitivefactory.interactive_clustering.sampling.clusters_based import ClustersBasedConstraintsSampling 

20 

21 

22# ============================================================================== 

23# test_factory_closest_in_different_clusters_sampler_for_correct_settings 

24# ============================================================================== 

25def test_factory_closest_in_different_clusters_sampler_for_correct_settings(): 

26 """ 

27 Test that the `closest_in_different_clusters sampler` works for correct settings. 

28 """ 

29 

30 # Check a correct initialization. 

31 sampler = ClustersBasedConstraintsSampling( 

32 clusters_restriction="different_clusters", distance_restriction="closest_neighbors", random_seed=1 

33 ) 

34 

35 assert sampler 

36 assert sampler.random_seed == 1 

37 

38 

39# ============================================================================== 

40# test_factory_closest_in_different_clusters_sampler_sample_for_incorrect_constraints_manager 

41# ============================================================================== 

42def test_factory_closest_in_different_clusters_sampler_sample_for_incorrect_constraints_manager(): 

43 """ 

44 Test that the `closest_in_different_clusters sampler` sampling raises `ValueError` for incorrect `constraints_manager`. 

45 """ 

46 

47 # Initialize a closest_in_different_clusters sampler instance. 

48 sampler = ClustersBasedConstraintsSampling( 

49 clusters_restriction="different_clusters", distance_restriction="closest_neighbors", random_seed=1 

50 ) 

51 

52 # Check sample with incorrect `constraints_manager`. 

53 with pytest.raises(ValueError, match="`constraints_manager`"): 

54 sampler.sample( 

55 constraints_manager=None, 

56 nb_to_select=None, 

57 ) 

58 

59 

60# ============================================================================== 

61# test_factory_closest_in_different_clusters_sampler_sample_for_incorrect_nb_to_select 

62# ============================================================================== 

63def test_factory_closest_in_different_clusters_sampler_sample_for_incorrect_nb_to_select(): 

64 """ 

65 Test that the `closest_in_different_clusters sampler` sampling raises `ValueError` for incorrect `nb_to_select`. 

66 """ 

67 

68 # Initialize a closest_in_different_clusters sampler instance. 

69 sampler = ClustersBasedConstraintsSampling( 

70 clusters_restriction="different_clusters", distance_restriction="closest_neighbors", random_seed=1 

71 ) 

72 

73 # Check sample with incorrect `nb_to_select`. 

74 with pytest.raises(ValueError, match="`nb_to_select`"): 

75 sampler.sample( 

76 constraints_manager=BinaryConstraintsManager( 

77 list_of_data_IDs=[ 

78 "bonjour", 

79 "salut", 

80 "coucou", 

81 "au revoir", 

82 "a bientôt", 

83 ] 

84 ), 

85 nb_to_select=None, 

86 ) 

87 

88 # Check sample with incorrect `nb_to_select` 

89 with pytest.raises(ValueError, match="`nb_to_select`"): 

90 sampler.sample( 

91 constraints_manager=BinaryConstraintsManager( 

92 list_of_data_IDs=[ 

93 "bonjour", 

94 "salut", 

95 "coucou", 

96 "au revoir", 

97 "a bientôt", 

98 ], 

99 ), 

100 nb_to_select=-99, 

101 ) 

102 

103 

104# ============================================================================== 

105# test_factory_closest_in_different_clusters_sampler_sample_for_zero_nb_to_select 

106# ============================================================================== 

107def test_factory_closest_in_different_clusters_sampler_sample_for_zero_nb_to_select(): 

108 """ 

109 Test that the `closest_in_different_clusters sampler` sampling works for zero `nb_to_select`. 

110 """ 

111 

112 # Initialize a closest_in_different_clusters sampler instance. 

113 sampler = ClustersBasedConstraintsSampling( 

114 clusters_restriction="different_clusters", distance_restriction="closest_neighbors", random_seed=1 

115 ) 

116 

117 # Check sample with zero `nb_to_select` 

118 assert not sampler.sample( 

119 constraints_manager=BinaryConstraintsManager( 

120 list_of_data_IDs=[ 

121 "bonjour", 

122 "salut", 

123 "coucou", 

124 "au revoir", 

125 "a bientôt", 

126 ], 

127 ), 

128 nb_to_select=0, 

129 ) 

130 

131 

132# ============================================================================== 

133# test_factory_closest_in_different_clusters_sampler_sample_for_incorrect_clustering_result 

134# ============================================================================== 

135def test_factory_closest_in_different_clusters_sampler_sample_for_incorrect_clustering_result(): 

136 """ 

137 Test that the `closest_in_different_clusters sampler` sampling raises `ValueError` or `KeyError` for incorrect `clustering_result`. 

138 """ 

139 

140 # Initialize a closest_in_different_clusters sampler instance. 

141 sampler = ClustersBasedConstraintsSampling( 

142 clusters_restriction="different_clusters", distance_restriction="closest_neighbors", random_seed=1 

143 ) 

144 

145 # Check sample with incorrect `clustering_result`. 

146 with pytest.raises(ValueError, match="`clustering_result`"): 

147 sampler.sample( 

148 constraints_manager=BinaryConstraintsManager( 

149 list_of_data_IDs=[ 

150 "bonjour", 

151 "salut", 

152 "coucou", 

153 "au revoir", 

154 "a bientôt", 

155 ], 

156 ), 

157 nb_to_select=3, 

158 clustering_result="unknown", 

159 ) 

160 

161 # Check sample with incorrect `clustering_result`. 

162 with pytest.raises(KeyError, match="'a bientôt'|'au revoir'|'bonjour'|'coucou'|'salut'"): 

163 sampler.sample( 

164 constraints_manager=BinaryConstraintsManager( 

165 list_of_data_IDs=[ 

166 "bonjour", 

167 "salut", 

168 "coucou", 

169 "au revoir", 

170 "a bientôt", 

171 ], 

172 ), 

173 nb_to_select=3, 

174 clustering_result={ 

175 "first": 1, 

176 "second": 2, 

177 }, 

178 vectors={ 

179 "bonjour": csr_matrix([1.0, 0.0]), 

180 "salut": csr_matrix([0.99, 0.0]), 

181 "coucou": csr_matrix([0.8, 0.0]), 

182 "au revoir": csr_matrix([0.0, 1.0]), 

183 "a bientôt": csr_matrix([0.0, 0.9]), 

184 }, 

185 ) 

186 

187 

188# ============================================================================== 

189# test_factory_closest_in_different_clusters_sampler_sample_for_incorrect_vectors 

190# ============================================================================== 

191def test_factory_closest_in_different_clusters_sampler_sample_for_incorrect_vectors(): 

192 """ 

193 Test that the `closest_in_different_clusters sampler` sampling raises `ValueError` or `KeyError` for incorrect `vectors`. 

194 """ 

195 

196 # Initialize a closest_in_different_clusters sampler instance. 

197 sampler = ClustersBasedConstraintsSampling( 

198 clusters_restriction="different_clusters", distance_restriction="closest_neighbors", random_seed=1 

199 ) 

200 

201 # Check sample with incorrect `vectors`. 

202 with pytest.raises(ValueError, match="`vectors`"): 

203 sampler.sample( 

204 constraints_manager=BinaryConstraintsManager( 

205 list_of_data_IDs=[ 

206 "bonjour", 

207 "salut", 

208 "coucou", 

209 "au revoir", 

210 "a bientôt", 

211 ], 

212 ), 

213 nb_to_select=3, 

214 clustering_result={ 

215 "bonjour": 0, 

216 "salut": 0, 

217 "coucou": 0, 

218 "au revoir": 1, 

219 "a bientôt": 1, 

220 }, 

221 vectors="unknown", 

222 ) 

223 

224 # Check sample with incorrect `vectors`. 

225 with pytest.raises(KeyError, match="'a bientôt'|'au revoir'|'bonjour'|'coucou'|'salut'"): 

226 sampler.sample( 

227 constraints_manager=BinaryConstraintsManager( 

228 list_of_data_IDs=[ 

229 "bonjour", 

230 "salut", 

231 "coucou", 

232 "au revoir", 

233 "a bientôt", 

234 ], 

235 ), 

236 nb_to_select=3, 

237 clustering_result={ 

238 "bonjour": 0, 

239 "salut": 0, 

240 "coucou": 0, 

241 "au revoir": 1, 

242 "a bientôt": 1, 

243 }, 

244 vectors={ 

245 "first": 1, 

246 "second": 2, 

247 }, 

248 ) 

249 

250 

251# ============================================================================== 

252# test_factory_closest_in_different_clusters_sampler_sample_for_empty_constraints_manager 

253# ============================================================================== 

254def test_factory_closest_in_different_clusters_sampler_sample_for_empty_constraints_manager(): 

255 """ 

256 Test that the `closest_in_different_clusters sampler` sampling works for empty `constraints_manager`. 

257 """ 

258 

259 # Initialize a closest_in_different_clusters sampler instance. 

260 sampler = ClustersBasedConstraintsSampling( 

261 clusters_restriction="different_clusters", distance_restriction="closest_neighbors", random_seed=1 

262 ) 

263 

264 # Check sample with empty `constraints_manager`. 

265 assert sampler.sample( 

266 constraints_manager=BinaryConstraintsManager( 

267 list_of_data_IDs=[ 

268 "bonjour", 

269 "salut", 

270 "coucou", 

271 "au revoir", 

272 "a bientôt", 

273 ], 

274 ), 

275 nb_to_select=3, 

276 clustering_result={ 

277 "bonjour": 0, 

278 "salut": 0, 

279 "coucou": 0, 

280 "au revoir": 1, 

281 "a bientôt": 1, 

282 }, 

283 vectors={ 

284 "bonjour": csr_matrix([1.0, 0.0]), 

285 "salut": csr_matrix([0.99, 0.0]), 

286 "coucou": csr_matrix([0.8, 0.0]), 

287 "au revoir": csr_matrix([0.0, 0.9]), 

288 "a bientôt": csr_matrix([0.0, 0.8]), 

289 }, 

290 ) == [ 

291 ("a bientôt", "coucou"), 

292 ("au revoir", "coucou"), 

293 ("a bientôt", "salut"), 

294 ] 

295 

296 

297# ============================================================================== 

298# test_factory_closest_in_different_clusters_sampler_sample_for_correct_constraints_manager 

299# ============================================================================== 

300def test_factory_closest_in_different_clusters_sampler_sample_for_correct_constraints_manager(): 

301 """ 

302 Test that the `closest_in_different_clusters sampler` sampling works for correct `constraints_manager`. 

303 """ 

304 

305 # Initialize a closest_in_different_clusters sampler instance. 

306 sampler = ClustersBasedConstraintsSampling( 

307 clusters_restriction="different_clusters", distance_restriction="closest_neighbors", random_seed=1 

308 ) 

309 

310 # Initialize a `BinaryConstraintsManager` instance. 

311 constraints_manager = BinaryConstraintsManager( 

312 list_of_data_IDs=[ 

313 "bonjour", 

314 "salut", 

315 "coucou", 

316 "au revoir", 

317 "a bientôt", 

318 ] 

319 ) 

320 constraints_manager.add_constraint(data_ID1="bonjour", data_ID2="salut", constraint_type="MUST_LINK") 

321 constraints_manager.add_constraint(data_ID1="au revoir", data_ID2="a bientôt", constraint_type="MUST_LINK") 

322 

323 # Check sample with correct `constraints_manager`. 

324 assert sampler.sample( 

325 constraints_manager=constraints_manager, 

326 nb_to_select=3, 

327 clustering_result={ 

328 "bonjour": 0, 

329 "salut": 0, 

330 "coucou": 0, 

331 "au revoir": 1, 

332 "a bientôt": 1, 

333 }, 

334 vectors={ 

335 "bonjour": csr_matrix([1.0, 0.0]), 

336 "salut": csr_matrix([0.99, 0.0]), 

337 "coucou": csr_matrix([0.8, 0.0]), 

338 "au revoir": csr_matrix([0.0, 0.9]), 

339 "a bientôt": csr_matrix([0.0, 0.8]), 

340 }, 

341 ) == [ 

342 ("a bientôt", "coucou"), 

343 ("au revoir", "coucou"), 

344 ("a bientôt", "salut"), 

345 ] 

346 

347 

348# ============================================================================== 

349# test_factory_closest_in_different_clusters_sampler_sample_for_full_annotated_constraints_manager 

350# ============================================================================== 

351def test_factory_closest_in_different_clusters_sampler_sample_for_full_annotated_constraints_manager(): 

352 """ 

353 Test that the `closest_in_different_clusters sampler` sampling works for full annotated `constraints_manager`. 

354 """ 

355 

356 # Initialize a closest_in_different_clusters sampler instance. 

357 sampler = ClustersBasedConstraintsSampling( 

358 clusters_restriction="different_clusters", distance_restriction="closest_neighbors", random_seed=1 

359 ) 

360 

361 # Initialize a `BinaryConstraintsManager` instance. 

362 constraints_manager = BinaryConstraintsManager( 

363 list_of_data_IDs=[ 

364 "bonjour", 

365 "salut", 

366 "coucou", 

367 "au revoir", 

368 "a bientôt", 

369 ] 

370 ) 

371 constraints_manager.add_constraint(data_ID1="bonjour", data_ID2="salut", constraint_type="MUST_LINK") 

372 constraints_manager.add_constraint(data_ID1="bonjour", data_ID2="coucou", constraint_type="MUST_LINK") 

373 constraints_manager.add_constraint(data_ID1="bonjour", data_ID2="au revoir", constraint_type="CANNOT_LINK") 

374 constraints_manager.add_constraint(data_ID1="au revoir", data_ID2="a bientôt", constraint_type="MUST_LINK") 

375 

376 # Check sample for full annotated `constraints_manager`. 

377 assert not sampler.sample( 

378 constraints_manager=constraints_manager, 

379 nb_to_select=3, 

380 clustering_result={ 

381 "bonjour": 0, 

382 "salut": 0, 

383 "coucou": 0, 

384 "au revoir": 1, 

385 "a bientôt": 1, 

386 }, 

387 vectors={ 

388 "bonjour": csr_matrix([1.0, 0.0]), 

389 "salut": csr_matrix([0.99, 0.0]), 

390 "coucou": csr_matrix([0.8, 0.0]), 

391 "au revoir": csr_matrix([0.0, 0.9]), 

392 "a bientôt": csr_matrix([0.0, 0.8]), 

393 }, 

394 )