Coverage for tests\sampling\test_factory_random_in_same_cluster.py: 100.00%

44 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-11-17 13:31 +0100

1# -*- coding: utf-8 -*- 

2 

3""" 

4* Name: interactive-clustering/tests/sampling/test_factory_random_in_same_cluster.py 

5* Description: Unittests for the `sampling.cluster_based` module, `"random_in_same_cluster"` sampler. 

6* Author: Erwan SCHILD 

7* Created: 17/03/2021 

8* Licence: CeCILL (https://cecill.info/licences.fr.html) 

9""" 

10 

11# ============================================================================== 

12# IMPORT PYTHON DEPENDENCIES 

13# ============================================================================== 

14 

15import pytest 

16 

17from cognitivefactory.interactive_clustering.constraints.binary import BinaryConstraintsManager 

18from cognitivefactory.interactive_clustering.sampling.clusters_based import ClustersBasedConstraintsSampling 

19 

20 

21# ============================================================================== 

22# test_factory_random_in_same_cluster_sampler_for_correct_settings 

23# ============================================================================== 

24def test_factory_random_in_same_cluster_sampler_for_correct_settings(): 

25 """ 

26 Test that the `random_in_same_cluster sampler` works for correct settings. 

27 """ 

28 

29 # Check a correct initialization. 

30 sampler = ClustersBasedConstraintsSampling( 

31 clusters_restriction="same_cluster", 

32 random_seed=1, 

33 ) 

34 

35 assert sampler 

36 assert sampler.random_seed == 1 

37 

38 

39# ============================================================================== 

40# test_factory_random_in_same_cluster_sampler_sample_for_incorrect_constraints_manager 

41# ============================================================================== 

42def test_factory_random_in_same_cluster_sampler_sample_for_incorrect_constraints_manager(): 

43 """ 

44 Test that the `random_in_same_cluster sampler` sampling raises `ValueError` for incorrect `constraints_manager`. 

45 """ 

46 

47 # Initialize a `random_in_same sampler` instance. 

48 sampler = ClustersBasedConstraintsSampling( 

49 clusters_restriction="same_cluster", 

50 random_seed=1, 

51 ) 

52 

53 # Check sample with incorrect `constraints_manager`. 

54 with pytest.raises(ValueError, match="`constraints_manager`"): 

55 sampler.sample( 

56 constraints_manager=None, 

57 nb_to_select=None, 

58 ) 

59 

60 

61# ============================================================================== 

62# test_factory_random_in_same_cluster_sampler_sample_for_incorrect_nb_to_select 

63# ============================================================================== 

64def test_factory_random_in_same_cluster_sampler_sample_for_incorrect_nb_to_select(): 

65 """ 

66 Test that the `random_in_same_cluster sampler` sampling raises `ValueError` for incorrect `nb_to_select`. 

67 """ 

68 

69 # Initialize a `random_in_same sampler` instance. 

70 sampler = ClustersBasedConstraintsSampling( 

71 clusters_restriction="same_cluster", 

72 random_seed=1, 

73 ) 

74 

75 # Check sample with incorrect `nb_to_select`. 

76 with pytest.raises(ValueError, match="`nb_to_select`"): 

77 sampler.sample( 

78 constraints_manager=BinaryConstraintsManager( 

79 list_of_data_IDs=[ 

80 "bonjour", 

81 "salut", 

82 "coucou", 

83 "au revoir", 

84 "a bientôt", 

85 ] 

86 ), 

87 nb_to_select=None, 

88 ) 

89 

90 # Check sample with incorrect `nb_to_select` 

91 with pytest.raises(ValueError, match="`nb_to_select`"): 

92 sampler.sample( 

93 constraints_manager=BinaryConstraintsManager( 

94 list_of_data_IDs=[ 

95 "bonjour", 

96 "salut", 

97 "coucou", 

98 "au revoir", 

99 "a bientôt", 

100 ], 

101 ), 

102 nb_to_select=-99, 

103 ) 

104 

105 

106# ============================================================================== 

107# test_factory_random_in_same_cluster_sampler_sample_for_zero_nb_to_select 

108# ============================================================================== 

109def test_factory_random_in_same_cluster_sampler_sample_for_zero_nb_to_select(): 

110 """ 

111 Test that the `random_in_same_cluster sampler` sampling works for zero `nb_to_select`. 

112 """ 

113 

114 # Initialize a `random_in_same sampler` instance. 

115 sampler = ClustersBasedConstraintsSampling( 

116 clusters_restriction="same_cluster", 

117 random_seed=1, 

118 ) 

119 

120 # Check sample with zero `nb_to_select`. 

121 assert not sampler.sample( 

122 constraints_manager=BinaryConstraintsManager( 

123 list_of_data_IDs=[ 

124 "bonjour", 

125 "salut", 

126 "coucou", 

127 "au revoir", 

128 "a bientôt", 

129 ], 

130 ), 

131 nb_to_select=0, 

132 ) 

133 

134 

135# ============================================================================== 

136# test_factory_random_in_same_cluster_sampler_sample_for_incorrect_clustering_result 

137# ============================================================================== 

138def test_factory_random_in_same_cluster_sampler_sample_for_incorrect_clustering_result(): 

139 """ 

140 Test that the `random_in_same_cluster sampler` sampling raises `ValueError` or `KeyError` for incorrect `clustering_result`. 

141 """ 

142 

143 # Initialize a `random_in_same sampler` instance. 

144 sampler = ClustersBasedConstraintsSampling( 

145 clusters_restriction="same_cluster", 

146 random_seed=1, 

147 ) 

148 

149 # Check sample with incorrect `clustering_result`. 

150 with pytest.raises(ValueError, match="`clustering_result`"): 

151 sampler.sample( 

152 constraints_manager=BinaryConstraintsManager( 

153 list_of_data_IDs=[ 

154 "bonjour", 

155 "salut", 

156 "coucou", 

157 "au revoir", 

158 "a bientôt", 

159 ], 

160 ), 

161 nb_to_select=3, 

162 clustering_result="unknown", 

163 ) 

164 

165 # Check sample with incorrect `clustering_result`. 

166 with pytest.raises(KeyError, match="'a bientôt'|'au revoir'|'bonjour'|'coucou'|'salut'"): 

167 sampler.sample( 

168 constraints_manager=BinaryConstraintsManager( 

169 list_of_data_IDs=[ 

170 "bonjour", 

171 "salut", 

172 "coucou", 

173 "au revoir", 

174 "a bientôt", 

175 ], 

176 ), 

177 nb_to_select=3, 

178 clustering_result={ 

179 "first": 1, 

180 "second": 2, 

181 }, 

182 ) 

183 

184 

185# ============================================================================== 

186# test_factory_random_in_same_cluster_sampler_sample_for_empty_constraints_manager 

187# ============================================================================== 

188def test_factory_random_in_same_cluster_sampler_sample_for_empty_constraints_manager(): 

189 """ 

190 Test that the `random_in_same_cluster sampler` sampling works for empty `constraints_manager`. 

191 """ 

192 

193 # Initialize a `random_in_same sampler` instance. 

194 sampler = ClustersBasedConstraintsSampling( 

195 clusters_restriction="same_cluster", 

196 random_seed=1, 

197 ) 

198 

199 # Check sample with empty `constraints_manager`. 

200 assert sampler.sample( 

201 constraints_manager=BinaryConstraintsManager( 

202 list_of_data_IDs=[ 

203 "bonjour", 

204 "salut", 

205 "coucou", 

206 "au revoir", 

207 "a bientôt", 

208 ], 

209 ), 

210 nb_to_select=3, 

211 clustering_result={ 

212 "bonjour": 0, 

213 "salut": 0, 

214 "coucou": 0, 

215 "au revoir": 1, 

216 "a bientôt": 1, 

217 }, 

218 ) == [("bonjour", "coucou"), ("coucou", "salut"), ("bonjour", "salut")] 

219 

220 

221# ============================================================================== 

222# test_factory_random_in_same_cluster_sampler_sample_for_correct_constraints_manager 

223# ============================================================================== 

224def test_factory_random_in_same_cluster_sampler_sample_for_correct_constraints_manager(): 

225 """ 

226 Test that the `random_in_same_cluster sampler` sampling works for correct `constraints_manager`. 

227 """ 

228 

229 # Initialize a `random_in_same sampler` instance. 

230 sampler = ClustersBasedConstraintsSampling( 

231 clusters_restriction="same_cluster", 

232 random_seed=1, 

233 ) 

234 

235 # Initialize a `BinaryConstraintsManager` instance. 

236 constraints_manager = BinaryConstraintsManager( 

237 list_of_data_IDs=[ 

238 "bonjour", 

239 "salut", 

240 "coucou", 

241 "au revoir", 

242 "a bientôt", 

243 ] 

244 ) 

245 constraints_manager.add_constraint(data_ID1="bonjour", data_ID2="salut", constraint_type="MUST_LINK") 

246 constraints_manager.add_constraint(data_ID1="au revoir", data_ID2="a bientôt", constraint_type="MUST_LINK") 

247 

248 # Check sample with correct `constraints_manager`. 

249 assert sampler.sample( 

250 constraints_manager=constraints_manager, 

251 nb_to_select=3, 

252 clustering_result={ 

253 "bonjour": 0, 

254 "salut": 0, 

255 "coucou": 0, 

256 "au revoir": 1, 

257 "a bientôt": 1, 

258 }, 

259 ) == [("bonjour", "coucou"), ("coucou", "salut")] 

260 

261 

262# ============================================================================== 

263# test_factory_random_in_same_cluster_sampler_sample_for_full_annotated_constraints_manager 

264# ============================================================================== 

265def test_factory_random_in_same_cluster_sampler_sample_for_full_annotated_constraints_manager(): 

266 """ 

267 Test that the `random_in_same_cluster sampler` sampling works for full annotated `constraints_manager`. 

268 """ 

269 

270 # Initialize a `random_in_same sampler` instance. 

271 sampler = ClustersBasedConstraintsSampling( 

272 clusters_restriction="same_cluster", 

273 random_seed=1, 

274 ) 

275 

276 # Initialize a `BinaryConstraintsManager` instance. 

277 constraints_manager = BinaryConstraintsManager( 

278 list_of_data_IDs=[ 

279 "bonjour", 

280 "salut", 

281 "coucou", 

282 "au revoir", 

283 "a bientôt", 

284 ] 

285 ) 

286 constraints_manager.add_constraint(data_ID1="bonjour", data_ID2="salut", constraint_type="MUST_LINK") 

287 constraints_manager.add_constraint(data_ID1="bonjour", data_ID2="coucou", constraint_type="MUST_LINK") 

288 constraints_manager.add_constraint(data_ID1="bonjour", data_ID2="au revoir", constraint_type="CANNOT_LINK") 

289 constraints_manager.add_constraint(data_ID1="au revoir", data_ID2="a bientôt", constraint_type="MUST_LINK") 

290 

291 # Check sample for full annotated `constraints_manager`. 

292 assert not sampler.sample( 

293 constraints_manager=constraints_manager, 

294 nb_to_select=3, 

295 clustering_result={ 

296 "bonjour": 0, 

297 "salut": 0, 

298 "coucou": 0, 

299 "au revoir": 1, 

300 "a bientôt": 1, 

301 }, 

302 )