Coverage for tests\clustering\test_dbscan.py: 100.00%

54 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-11-17 13:31 +0100

1# -*- coding: utf-8 -*- 

2 

3""" 

4* Name: interactive-clustering/tests/clustering/test_dbscan.py 

5* Description: Unittests for the `clustering.dbscan` module. 

6* Author: Marc TRUTT, Esther LENOTRE, David NICOLAZO 

7* Created: 31/10/2022 

8* Licence: CeCILL (https://cecill.info/licences.fr.html) 

9""" 

10 

11# ============================================================================== 

12# IMPORT PYTHON DEPENDENCIES 

13# ============================================================================== 

14 

15import math 

16 

17import numpy as np 

18import pytest 

19from scipy.sparse import csr_matrix 

20 

21from cognitivefactory.interactive_clustering.constraints.binary import BinaryConstraintsManager 

22from src.cognitivefactory.interactive_clustering.clustering.dbscan import DBScanConstrainedClustering 

23 

24 

25# ============================================================================== 

26# test_DBScanConstrainedClustering_for_inconsistent_eps 

27# ============================================================================== 

28def test_DBScanConstrainedClustering_for_inconsistent_eps(): 

29 """ 

30 Test that the `clustering.dbscan.DBScanConstrainedClustering` initialization raises an `ValueError` for inconsistent `eps` parameter. 

31 """ 

32 

33 # Check `ValueError` for bad string value for `model`. 

34 with pytest.raises(ValueError, match="`eps`"): 

35 DBScanConstrainedClustering( 

36 eps=-1, 

37 ) 

38 

39 

40# ============================================================================== 

41# test_DBScanConstrainedClustering_for_inconsistent_min_samples 

42# ============================================================================== 

43def test_DBScanConstrainedClustering_for_inconsistent_min_samples(): 

44 """ 

45 Test that the `clustering.dbscan.DBScanConstrainedClustering` initialization raises an `ValueError` for inconsistent `min_samples` parameter. 

46 """ 

47 

48 # Check `ValueError` for bad string value for `model`. 

49 with pytest.raises(ValueError, match="`min_samples`"): 

50 DBScanConstrainedClustering( 

51 min_samples=-1, 

52 ) 

53 

54 

55# ============================================================================== 

56# test_DBScanConstrainedClustering_for_correct_settings 

57# ============================================================================== 

58def test_DBScanConstrainedClustering_for_correct_settings(): 

59 """ 

60 Test that the `clustering.dbscan.DBScanConstrainedClustering` initialization runs correctly with the correct settings. 

61 """ 

62 

63 # Check a correct initialization. 

64 clustering_model = DBScanConstrainedClustering( 

65 eps=0.5, 

66 min_samples=3, 

67 ) 

68 assert clustering_model 

69 assert math.isclose(clustering_model.eps, 0.5) 

70 assert clustering_model.min_samples == 3 

71 

72 

73# ============================================================================== 

74# test_DBScanConstrainedClustering_cluster_for_inconsistent_constraints_manager 

75# ============================================================================== 

76def test_DBScanConstrainedClustering_cluster_for_inconsistent_constraints_manager(): 

77 """ 

78 Test that the `clustering.dbscan.DBScanConstrainedClustering` clustering raises an `ValueError` for inconsistent `constraints_manager` parameter. 

79 """ 

80 

81 # Initialize a `DBScanConstrainedClustering` instance. 

82 clustering_model = DBScanConstrainedClustering() 

83 

84 # Check `ValueError` for not matrix `vectors`. 

85 with pytest.raises(ValueError, match="`constraints_manager`"): 

86 clustering_model.cluster( 

87 constraints_manager=None, 

88 vectors=None, 

89 ) 

90 

91 

92# ============================================================================== 

93# test_DBScanConstrainedClustering_cluster_for_inconsistent_vectors 

94# ============================================================================== 

95def test_DBScanConstrainedClustering_cluster_for_inconsistent_vectors(): 

96 """ 

97 Test that the `clustering.dbscan.DBScanConstrainedClustering` clustering raises an `ValueError` for inconsistent `vectors` parameter. 

98 """ 

99 

100 # Initialize a `DBScanConstrainedClustering` instance. 

101 clustering_model = DBScanConstrainedClustering() 

102 

103 # Check `ValueError` for not matrix `vectors`. 

104 with pytest.raises(ValueError, match="`vectors`"): 

105 clustering_model.cluster( 

106 constraints_manager=BinaryConstraintsManager(list_of_data_IDs=["first", "second", "third"]), 

107 vectors=None, 

108 ) 

109 

110 

111# ============================================================================== 

112# test_DBScanConstrainedClustering_cluster_for_inconsistent_nb_clusters 

113# ============================================================================== 

114def test_DBScanConstrainedClustering_cluster_for_inconsistent_nb_clusters(): 

115 """ 

116 Test that the `clustering.dbscan.DBScanConstrainedClustering` clustering raises an `ValueError` for inconsistent `nb_clusters` parameter. 

117 """ 

118 

119 # Initialize a `DBScanConstrainedClustering` instance. 

120 clustering_model = DBScanConstrainedClustering() 

121 

122 # Check `ValueError` for not matrix `nb_clusters`. 

123 with pytest.raises(ValueError, match="`nb_clusters`"): 

124 clustering_model.cluster( 

125 constraints_manager=BinaryConstraintsManager(list_of_data_IDs=["first", "second", "third"]), 

126 vectors={"first": np.array([1, 2, 3]), "second": np.array([[4, 5, 6]]), "third": csr_matrix([7, 8, 9])}, 

127 nb_clusters=4, 

128 ) 

129 

130 

131# ============================================================================== 

132# test_DBScanConstrainedClustering_cluster_with_no_constraints_1 

133# ============================================================================== 

134def test_DBScanConstrainedClustering_cluster_with_no_constraints_1(): 

135 """ 

136 Test that the `clustering.dbscan.DBScanConstrainedClustering` clustering works with no `constraints`. 

137 """ 

138 

139 # Define `vectors` and `constraints_manager` 

140 vectors = { 

141 "0": csr_matrix([1.00, 0.00, 0.00, 0.00]), 

142 "1": csr_matrix([0.00, 0.43, 0.00, 0.00]), 

143 "2": csr_matrix([0.00, 0.00, 0.29, 0.00]), 

144 "3": csr_matrix([0.00, 0.00, 0.50, 0.00]), 

145 "4": csr_matrix([0.00, 0.00, 0.00, 0.98]), 

146 "5": csr_matrix([0.00, 0.00, 0.33, 0.00]), 

147 "6": csr_matrix([0.00, 0.00, 0.00, 1.40]), 

148 "7": csr_matrix([0.80, 0.00, 0.00, 0.00]), 

149 "8": csr_matrix([0.00, 0.54, 0.00, 0.00]), 

150 "9": csr_matrix([0.00, 0.00, 0.00, 1.10]), 

151 "10": csr_matrix([1.10, 0.00, 0.00, 0.00]), 

152 "11": csr_matrix([0.00, 0.49, 0.00, 0.00]), 

153 } 

154 

155 constraints_manager = BinaryConstraintsManager(list_of_data_IDs=list(vectors.keys())) 

156 

157 # Initialize a `KMeansConstrainedClustering` instance. 

158 clustering_model = DBScanConstrainedClustering(eps=0.5, min_samples=3) 

159 

160 # Run clustering 2 clusters and no constraints. 

161 dict_of_predicted_clusters = clustering_model.cluster( 

162 constraints_manager=constraints_manager, 

163 vectors=vectors, 

164 ) 

165 

166 assert clustering_model.dict_of_predicted_clusters 

167 assert dict_of_predicted_clusters == { 

168 "0": 0, 

169 "1": 1, 

170 "2": 2, 

171 "3": 2, 

172 "4": 3, 

173 "5": 2, 

174 "6": 3, 

175 "7": 0, 

176 "8": 1, 

177 "9": 3, 

178 "10": 0, 

179 "11": 1, 

180 } 

181 

182 

183# ============================================================================== 

184# test_DBScanConstrainedClustering_cluster_with_no_constraints_2 

185# ============================================================================== 

186def test_DBScanConstrainedClustering_cluster_with_no_constraints_2(): 

187 """ 

188 Test that the `clustering.dbscan.DBScanConstrainedClustering` clustering works with no `constraints`. 

189 """ 

190 

191 # Define `vectors` and `constraints_manager` 

192 vectors = { 

193 "0": csr_matrix([2.00, 0.00, 0.00, 0.00]), 

194 "1": csr_matrix([0.00, 0.43, 0.00, 0.00]), 

195 "2": csr_matrix([0.00, 0.00, 0.29, 0.00]), 

196 "3": csr_matrix([0.00, 0.00, 0.50, 0.00]), 

197 "4": csr_matrix([0.00, 0.00, 0.00, 0.98]), 

198 "5": csr_matrix([0.00, 0.00, 0.33, 0.00]), 

199 "6": csr_matrix([0.00, 0.00, 0.00, 1.40]), 

200 "7": csr_matrix([0.80, 0.00, 0.00, 0.00]), 

201 "8": csr_matrix([0.00, 0.54, 0.00, 0.00]), 

202 "9": csr_matrix([0.00, 0.00, 0.00, 1.10]), 

203 "10": csr_matrix([1.10, 0.00, 0.00, 0.00]), 

204 "11": csr_matrix([0.00, 0.49, 0.00, 0.00]), 

205 } 

206 

207 constraints_manager = BinaryConstraintsManager(list_of_data_IDs=list(vectors.keys())) 

208 

209 # Initialize a `KMeansConstrainedClustering` instance. 

210 clustering_model = DBScanConstrainedClustering( 

211 eps=0.5, 

212 min_samples=3, 

213 ) 

214 

215 # Run clustering 2 clusters and no constraints. 

216 dict_of_predicted_clusters = clustering_model.cluster( 

217 constraints_manager=constraints_manager, 

218 vectors=vectors, 

219 ) 

220 

221 assert clustering_model.dict_of_predicted_clusters 

222 

223 """ 

224 Here, '0' is too far from other points so it is noise 

225 Furthermore, '7' and '10' are in the same neighbourhood, but no other point. 

226 They are not numerous enough to create a cluster 

227 """ 

228 

229 assert dict_of_predicted_clusters == { 

230 "0": -1, 

231 "1": 0, 

232 "2": 1, 

233 "3": 1, 

234 "4": 2, 

235 "5": 1, 

236 "6": 2, 

237 "7": -2, 

238 "8": 0, 

239 "9": 2, 

240 "10": -3, 

241 "11": 0, 

242 } 

243 

244 

245# ============================================================================== 

246# test_DBScanConstrainedClustering_cluster_with_some_constraints 

247# ============================================================================== 

248def test_DBScanConstrainedClustering_cluster_with_some_constraints(): 

249 """ 

250 Test that the `clustering.dbscan.DBScanConstrainedClustering` clustering works with no `constraints`. 

251 """ 

252 

253 # Define `vectors` and `constraints_manager` 

254 vectors = { 

255 "0": csr_matrix([2.00, 0.00, 0.00, 0.00]), 

256 "1": csr_matrix([0.00, 0.43, 0.00, 0.00]), 

257 "2": csr_matrix([0.00, 0.00, 0.29, 0.00]), 

258 "3": csr_matrix([0.00, 0.00, 0.50, 0.00]), 

259 "4": csr_matrix([0.00, 0.00, 0.00, 0.98]), 

260 "5": csr_matrix([0.00, 0.00, 0.33, 0.00]), 

261 "6": csr_matrix([0.00, 0.00, 0.00, 1.40]), 

262 "7": csr_matrix([0.80, 0.00, 0.00, 0.00]), 

263 "8": csr_matrix([0.00, 0.54, 0.00, 0.00]), 

264 "9": csr_matrix([0.00, 0.00, 0.00, 1.10]), 

265 "10": csr_matrix([1.10, 0.00, 0.00, 0.00]), 

266 "11": csr_matrix([0.00, 0.49, 0.00, 0.00]), 

267 } 

268 

269 constraints_manager = BinaryConstraintsManager(list_of_data_IDs=list(vectors.keys())) 

270 constraints_manager.add_constraint(data_ID1="0", data_ID2="7", constraint_type="MUST_LINK") 

271 constraints_manager.add_constraint(data_ID1="0", data_ID2="10", constraint_type="MUST_LINK") 

272 constraints_manager.add_constraint(data_ID1="0", data_ID2="4", constraint_type="CANNOT_LINK") 

273 

274 # Initialize a `KMeansConstrainedClustering` instance. 

275 clustering_model = DBScanConstrainedClustering(eps=0.5, min_samples=3) 

276 

277 # Run clustering 2 clusters and no constraints. 

278 dict_of_predicted_clusters = clustering_model.cluster( 

279 constraints_manager=constraints_manager, 

280 vectors=vectors, 

281 ) 

282 

283 assert clustering_model.dict_of_predicted_clusters 

284 assert dict_of_predicted_clusters == { 

285 "0": 0, 

286 "1": 1, 

287 "2": 2, 

288 "3": 2, 

289 "4": 3, 

290 "5": 2, 

291 "6": 3, 

292 "7": 0, 

293 "8": 1, 

294 "9": 3, 

295 "10": 0, 

296 "11": 1, 

297 }