Coverage for tests\sampling\test_factory_farthest_in_same_cluster.py: 100.00%

51 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-11-17 13:31 +0100

1# -*- coding: utf-8 -*- 

2 

3""" 

4* Name: interactive-clustering/tests/sampling/test_factory_farthest_in_same_cluster.py 

5* Description: Unittests for the `sampling.cluster_based` module, `"farhest_in_same_cluster"` sampler. 

6* Author: Erwan SCHILD 

7* Created: 17/03/2021 

8* Licence: CeCILL (https://cecill.info/licences.fr.html) 

9""" 

10 

11# ============================================================================== 

12# IMPORT PYTHON DEPENDENCIES 

13# ============================================================================== 

14 

15import pytest 

16from scipy.sparse import csr_matrix 

17 

18from cognitivefactory.interactive_clustering.constraints.binary import BinaryConstraintsManager 

19from cognitivefactory.interactive_clustering.sampling.clusters_based import ClustersBasedConstraintsSampling 

20 

21 

22# ============================================================================== 

23# test_factory_farhest_in_same_cluster_sampler_for_correct_settings 

24# ============================================================================== 

25def test_factory_farhest_in_same_cluster_sampler_for_correct_settings(): 

26 """ 

27 Test that the `farhest_in_same_cluster sampler` works for correct settings. 

28 """ 

29 

30 # Check a correct initialization. 

31 sampler = ClustersBasedConstraintsSampling( 

32 clusters_restriction="same_cluster", 

33 distance_restriction="farthest_neighbors", 

34 random_seed=1, 

35 ) 

36 

37 assert sampler 

38 assert sampler.random_seed == 1 

39 

40 

41# ============================================================================== 

42# test_factory_farhest_in_same_cluster_sampler_sample_for_incorrect_constraints_manager 

43# ============================================================================== 

44def test_factory_farhest_in_same_cluster_sampler_sample_for_incorrect_constraints_manager(): 

45 """ 

46 Test that the `farhest_in_same_cluster sampler` sampling raises `ValueError` for incorrect `constraints_manager`. 

47 """ 

48 

49 # Initialize a `farhest_in_same_cluster sampler` instance. 

50 sampler = ClustersBasedConstraintsSampling( 

51 clusters_restriction="same_cluster", 

52 distance_restriction="farthest_neighbors", 

53 random_seed=1, 

54 ) 

55 

56 # Check sample with incorrect `constraints_manager`. 

57 with pytest.raises(ValueError, match="`constraints_manager`"): 

58 sampler.sample( 

59 constraints_manager=None, 

60 nb_to_select=None, 

61 ) 

62 

63 

64# ============================================================================== 

65# test_factory_farhest_in_same_cluster_sampler_sample_for_incorrect_nb_to_select 

66# ============================================================================== 

67def test_factory_farhest_in_same_cluster_sampler_sample_for_incorrect_nb_to_select(): 

68 """ 

69 Test that the `farhest_in_same_cluster sampler` sampling raises `ValueError` for incorrect `nb_to_select`. 

70 """ 

71 

72 # Initialize a `farhest_in_same_cluster sampler` instance. 

73 sampler = ClustersBasedConstraintsSampling( 

74 clusters_restriction="same_cluster", 

75 distance_restriction="farthest_neighbors", 

76 random_seed=1, 

77 ) 

78 

79 # Check sample with incorrect `nb_to_select`. 

80 with pytest.raises(ValueError, match="`nb_to_select`"): 

81 sampler.sample( 

82 constraints_manager=BinaryConstraintsManager( 

83 list_of_data_IDs=[ 

84 "bonjour", 

85 "salut", 

86 "coucou", 

87 "au revoir", 

88 "a bientôt", 

89 ] 

90 ), 

91 nb_to_select=None, 

92 ) 

93 

94 # Check sample with incorrect `nb_to_select` 

95 with pytest.raises(ValueError, match="`nb_to_select`"): 

96 sampler.sample( 

97 constraints_manager=BinaryConstraintsManager( 

98 list_of_data_IDs=[ 

99 "bonjour", 

100 "salut", 

101 "coucou", 

102 "au revoir", 

103 "a bientôt", 

104 ], 

105 ), 

106 nb_to_select=-99, 

107 ) 

108 

109 

110# ============================================================================== 

111# test_factory_farhest_in_same_cluster_sampler_sample_for_zero_nb_to_select 

112# ============================================================================== 

113def test_factory_farhest_in_same_cluster_sampler_sample_for_zero_nb_to_select(): 

114 """ 

115 Test that the `farhest_in_same_cluster sampler` sampling works for zero `nb_to_select`. 

116 """ 

117 

118 # Initialize a `farhest_in_same_cluster sampler` instance. 

119 sampler = ClustersBasedConstraintsSampling( 

120 clusters_restriction="same_cluster", 

121 distance_restriction="farthest_neighbors", 

122 random_seed=1, 

123 ) 

124 

125 # Check sample with zero `nb_to_select` 

126 assert not sampler.sample( 

127 constraints_manager=BinaryConstraintsManager( 

128 list_of_data_IDs=[ 

129 "bonjour", 

130 "salut", 

131 "coucou", 

132 "au revoir", 

133 "a bientôt", 

134 ], 

135 ), 

136 nb_to_select=0, 

137 ) 

138 

139 

140# ============================================================================== 

141# test_factory_farhest_in_same_cluster_sampler_sample_for_incorrect_clustering_result 

142# ============================================================================== 

143def test_factory_farhest_in_same_cluster_sampler_sample_for_incorrect_clustering_result(): 

144 """ 

145 Test that the `farhest_in_same_cluster sampler` sampling raises `ValueError` for incorrect `clustering_result`. 

146 """ 

147 

148 # Initialize a `farhest_in_same_cluster sampler` instance. 

149 sampler = ClustersBasedConstraintsSampling( 

150 clusters_restriction="same_cluster", 

151 distance_restriction="farthest_neighbors", 

152 random_seed=1, 

153 ) 

154 

155 # Check sample with incorrect `clustering_result`. 

156 with pytest.raises(ValueError, match="`clustering_result`"): 

157 sampler.sample( 

158 constraints_manager=BinaryConstraintsManager( 

159 list_of_data_IDs=[ 

160 "bonjour", 

161 "salut", 

162 "coucou", 

163 "au revoir", 

164 "a bientôt", 

165 ], 

166 ), 

167 nb_to_select=3, 

168 clustering_result="unknown", 

169 ) 

170 

171 # Check sample with incorrect `clustering_result`. 

172 with pytest.raises(KeyError, match="'a bientôt'|'au revoir'|'bonjour'|'coucou'|'salut'"): 

173 sampler.sample( 

174 constraints_manager=BinaryConstraintsManager( 

175 list_of_data_IDs=[ 

176 "bonjour", 

177 "salut", 

178 "coucou", 

179 "au revoir", 

180 "a bientôt", 

181 ], 

182 ), 

183 nb_to_select=3, 

184 clustering_result={ 

185 "first": 1, 

186 "second": 2, 

187 }, 

188 vectors={ 

189 "bonjour": csr_matrix([1.0, 0.0]), 

190 "salut": csr_matrix([0.99, 0.0]), 

191 "coucou": csr_matrix([0.8, 0.0]), 

192 "au revoir": csr_matrix([0.0, 1.0]), 

193 "a bientôt": csr_matrix([0.0, 0.9]), 

194 }, 

195 ) 

196 

197 

198# ============================================================================== 

199# test_factory_farhest_in_same_cluster_sampler_sample_for_incorrect_vectors 

200# ============================================================================== 

201def test_factory_farhest_in_same_cluster_sampler_sample_for_incorrect_vectors(): 

202 """ 

203 Test that the `farhest_in_same_cluster sampler` sampling raises `ValueError` for incorrect `vectors`. 

204 """ 

205 

206 # Initialize a `farhest_in_same_cluster sampler` instance. 

207 sampler = ClustersBasedConstraintsSampling( 

208 clusters_restriction="same_cluster", 

209 distance_restriction="farthest_neighbors", 

210 random_seed=1, 

211 ) 

212 

213 # Check sample with incorrect `vectors`. 

214 with pytest.raises(ValueError, match="`vectors`"): 

215 sampler.sample( 

216 constraints_manager=BinaryConstraintsManager( 

217 list_of_data_IDs=[ 

218 "bonjour", 

219 "salut", 

220 "coucou", 

221 "au revoir", 

222 "a bientôt", 

223 ], 

224 ), 

225 nb_to_select=3, 

226 clustering_result={ 

227 "bonjour": 0, 

228 "salut": 0, 

229 "coucou": 0, 

230 "au revoir": 1, 

231 "a bientôt": 1, 

232 }, 

233 vectors="unknown", 

234 ) 

235 

236 # Check sample with incorrect `vectors`. 

237 with pytest.raises(KeyError, match="'a bientôt'|'au revoir'|'bonjour'|'coucou'|'salut'"): 

238 sampler.sample( 

239 constraints_manager=BinaryConstraintsManager( 

240 list_of_data_IDs=[ 

241 "bonjour", 

242 "salut", 

243 "coucou", 

244 "au revoir", 

245 "a bientôt", 

246 ], 

247 ), 

248 nb_to_select=3, 

249 clustering_result={ 

250 "bonjour": 0, 

251 "salut": 0, 

252 "coucou": 0, 

253 "au revoir": 1, 

254 "a bientôt": 1, 

255 }, 

256 vectors={ 

257 "first": 1, 

258 "second": 2, 

259 }, 

260 ) 

261 

262 

263# ============================================================================== 

264# test_factory_farhest_in_same_cluster_sampler_sample_for_empty_constraints_manager 

265# ============================================================================== 

266def test_factory_farhest_in_same_cluster_sampler_sample_for_empty_constraints_manager(): 

267 """ 

268 Test that the `farhest_in_same_cluster sampler` sampling works for empty `constraints_manager`. 

269 """ 

270 

271 # Initialize a `farhest_in_same_cluster sampler` instance. 

272 sampler = ClustersBasedConstraintsSampling( 

273 clusters_restriction="same_cluster", 

274 distance_restriction="farthest_neighbors", 

275 random_seed=1, 

276 ) 

277 

278 # Check sample with empty `constraints_manager` 

279 assert sampler.sample( 

280 constraints_manager=BinaryConstraintsManager( 

281 list_of_data_IDs=[ 

282 "bonjour", 

283 "salut", 

284 "coucou", 

285 "au revoir", 

286 "a bientôt", 

287 ], 

288 ), 

289 nb_to_select=3, 

290 clustering_result={ 

291 "bonjour": 0, 

292 "salut": 0, 

293 "coucou": 0, 

294 "au revoir": 1, 

295 "a bientôt": 1, 

296 }, 

297 vectors={ 

298 "bonjour": csr_matrix([1.0, 0.0]), 

299 "salut": csr_matrix([0.99, 0.0]), 

300 "coucou": csr_matrix([0.8, 0.0]), 

301 "au revoir": csr_matrix([0.0, 0.9]), 

302 "a bientôt": csr_matrix([0.0, 0.8]), 

303 }, 

304 ) == [ 

305 ("bonjour", "coucou"), 

306 ("coucou", "salut"), 

307 ("a bientôt", "au revoir"), 

308 ] 

309 

310 

311# ============================================================================== 

312# test_factory_farhest_in_same_cluster_sampler_sample_for_correct_constraints_manager 

313# ============================================================================== 

314def test_factory_farhest_in_same_cluster_sampler_sample_for_correct_constraints_manager(): 

315 """ 

316 Test that the `farhest_in_same_cluster sampler` sampling works for correct `constraints_manager`. 

317 """ 

318 

319 # Initialize a `farhest_in_same_cluster sampler` instance. 

320 sampler = ClustersBasedConstraintsSampling( 

321 clusters_restriction="same_cluster", 

322 distance_restriction="farthest_neighbors", 

323 random_seed=1, 

324 ) 

325 

326 # Initialize a `BinaryConstraintsManager` instance 

327 constraints_manager = BinaryConstraintsManager( 

328 list_of_data_IDs=[ 

329 "bonjour", 

330 "salut", 

331 "coucou", 

332 "au revoir", 

333 "a bientôt", 

334 ] 

335 ) 

336 constraints_manager.add_constraint(data_ID1="bonjour", data_ID2="salut", constraint_type="MUST_LINK") 

337 constraints_manager.add_constraint(data_ID1="au revoir", data_ID2="a bientôt", constraint_type="MUST_LINK") 

338 

339 # Check sample with correct `constraints_manager` 

340 assert sampler.sample( 

341 constraints_manager=constraints_manager, 

342 nb_to_select=3, 

343 clustering_result={ 

344 "bonjour": 0, 

345 "salut": 0, 

346 "coucou": 0, 

347 "au revoir": 1, 

348 "a bientôt": 1, 

349 }, 

350 vectors={ 

351 "bonjour": csr_matrix([1.0, 0.0]), 

352 "salut": csr_matrix([0.99, 0.0]), 

353 "coucou": csr_matrix([0.8, 0.0]), 

354 "au revoir": csr_matrix([0.0, 0.9]), 

355 "a bientôt": csr_matrix([0.0, 0.8]), 

356 }, 

357 ) == [ 

358 ("bonjour", "coucou"), 

359 ("coucou", "salut"), 

360 ] 

361 

362 

363# ============================================================================== 

364# test_factory_farhest_in_same_cluster_sampler_sample_for_full_annotated_constraints_manager 

365# ============================================================================== 

366def test_factory_farhest_in_same_cluster_sampler_sample_for_full_annotated_constraints_manager(): 

367 """ 

368 Test that the `farhest_in_same_cluster sampler` sampling works for full annotated `constraints_manager`. 

369 """ 

370 

371 # Initialize a `farhest_in_same_cluster sampler` instance. 

372 sampler = ClustersBasedConstraintsSampling( 

373 clusters_restriction="same_cluster", 

374 distance_restriction="farthest_neighbors", 

375 random_seed=1, 

376 ) 

377 

378 # Initialize a `BinaryConstraintsManager` instance 

379 constraints_manager = BinaryConstraintsManager( 

380 list_of_data_IDs=[ 

381 "bonjour", 

382 "salut", 

383 "coucou", 

384 "au revoir", 

385 "a bientôt", 

386 ] 

387 ) 

388 constraints_manager.add_constraint(data_ID1="bonjour", data_ID2="salut", constraint_type="MUST_LINK") 

389 constraints_manager.add_constraint(data_ID1="bonjour", data_ID2="coucou", constraint_type="MUST_LINK") 

390 constraints_manager.add_constraint(data_ID1="bonjour", data_ID2="au revoir", constraint_type="CANNOT_LINK") 

391 constraints_manager.add_constraint(data_ID1="au revoir", data_ID2="a bientôt", constraint_type="MUST_LINK") 

392 

393 # Check sample for full annotated `constraints_manager` 

394 assert not sampler.sample( 

395 constraints_manager=constraints_manager, 

396 nb_to_select=3, 

397 clustering_result={ 

398 "bonjour": 0, 

399 "salut": 0, 

400 "coucou": 0, 

401 "au revoir": 1, 

402 "a bientôt": 1, 

403 }, 

404 vectors={ 

405 "bonjour": csr_matrix([1.0, 0.0]), 

406 "salut": csr_matrix([0.99, 0.0]), 

407 "coucou": csr_matrix([0.8, 0.0]), 

408 "au revoir": csr_matrix([0.0, 0.9]), 

409 "a bientôt": csr_matrix([0.0, 0.8]), 

410 }, 

411 )