Coverage for src\cognitivefactory\interactive_clustering\constraints\abstract.py: 100.00%

28 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-11-17 13:31 +0100

1# -*- coding: utf-8 -*- 

2 

3""" 

4* Name: cognitivefactory.interactive_clustering.constraints.abstract 

5* Description: The abstract class used to define constraints managing algorithms. 

6* Author: Erwan SCHILD 

7* Created: 17/03/2021 

8* Licence: CeCILL-C License v1.0 (https://cecill.info/licences.fr.html) 

9""" 

10 

11# ============================================================================== 

12# IMPORTS : 

13# ============================================================================== 

14 

15from abc import ABC, abstractmethod 

16from typing import List, Optional, Tuple 

17 

18 

19# ============================================================================== 

20# ABSTRACT CONSTRAINTS MANAGING 

21# ============================================================================== 

22class AbstractConstraintsManager(ABC): 

23 """ 

24 Abstract class that is used to define constraints manager. 

25 The main inherited methods are about data IDs management, constraints management and constraints exploration. 

26 

27 References: 

28 - Constraints in clustering: `Wagstaff, K. et C. Cardie (2000). Clustering with Instance-level Constraints. Proceedings of the Seventeenth International Conference on Machine Learning, 1103–1110.` 

29 """ 

30 

31 # ============================================================================== 

32 # ABSTRACT METHOD - DATA_ID MANAGEMENT 

33 # ============================================================================== 

34 @abstractmethod 

35 def add_data_ID( 

36 self, 

37 data_ID: str, 

38 ) -> bool: 

39 """ 

40 (ABSTRACT METHOD) 

41 An abstract method that represents the main method used to add a new data ID to manage. 

42 

43 Args: 

44 data_ID (str): The data ID to manage. 

45 

46 Raises: 

47 ValueError: if `data_ID` is already managed. 

48 

49 Returns: 

50 bool: `True` if the addition is done. 

51 """ 

52 

53 @abstractmethod 

54 def delete_data_ID( 

55 self, 

56 data_ID: str, 

57 ) -> bool: 

58 """ 

59 (ABSTRACT METHOD) 

60 An abstract method that represents the main method used to delete a data ID to no longer manage. 

61 

62 Args: 

63 data_ID (str): The data ID to no longer manage. 

64 

65 Raises: 

66 ValueError: if `data_ID` is not managed. 

67 

68 Returns: 

69 bool: `True` if the deletion is done. 

70 """ 

71 

72 @abstractmethod 

73 def get_list_of_managed_data_IDs( 

74 self, 

75 ) -> List[str]: 

76 """ 

77 (ABSTRACT METHOD) 

78 An abstract method that represents the main method used to get the list of data IDs that are managed. 

79 

80 Returns: 

81 List[str]: The list of data IDs that are managed. 

82 """ 

83 

84 # ============================================================================== 

85 # ABSTRACT METHOD - CONSTRAINTS MANAGEMENT 

86 # ============================================================================== 

87 @abstractmethod 

88 def add_constraint( 

89 self, 

90 data_ID1: str, 

91 data_ID2: str, 

92 constraint_type: str, 

93 constraint_value: float = 1.0, 

94 ) -> bool: 

95 """ 

96 (ABSTRACT METHOD) 

97 An abstract method that represents the main method used to add a constraint between two data IDs. 

98 

99 Args: 

100 data_ID1 (str): The first data ID that is concerned for this constraint addition. 

101 data_ID2 (str): The second data ID that is concerned for this constraint addition. 

102 constraint_type (str): The type of the constraint to add. The type have to be `"MUST_LINK"` or `"CANNOT_LINK"`. 

103 constraint_value (float, optional): The value of the constraint to add. The value have to be in range `[0.0, 1.0]`. Defaults to 1.0. 

104 

105 Raises: 

106 ValueError: if `data_ID1`, `data_ID2`, `constraint_type` are not managed, or if a conflict is detected with constraints inference. 

107 

108 Returns: 

109 bool: `True` if the addition is done, `False` is the constraint can't be added. 

110 """ 

111 

112 @abstractmethod 

113 def delete_constraint( 

114 self, 

115 data_ID1: str, 

116 data_ID2: str, 

117 ) -> bool: 

118 """ 

119 (ABSTRACT METHOD) 

120 An abstract method that represents the main method used to delete the constraint between two data IDs. 

121 

122 Args: 

123 data_ID1 (str): The first data ID that is concerned for this constraint deletion. 

124 data_ID2 (str): The second data ID that is concerned for this constraint deletion. 

125 

126 Raises: 

127 ValueError: if `data_ID1` or `data_ID2` are not managed. 

128 

129 Returns: 

130 bool: `True` if the deletion is done. 

131 """ 

132 

133 @abstractmethod 

134 def get_added_constraint( 

135 self, 

136 data_ID1: str, 

137 data_ID2: str, 

138 ) -> Optional[Tuple[str, float]]: 

139 """ 

140 (ABSTRACT METHOD) 

141 An abstract method that represents the main method used to get the constraint added between the two data IDs. 

142 Do not take into account the constraints transitivity, just look at constraints that are explicitly added. 

143 

144 Args: 

145 data_ID1 (str): The first data ID that is concerned for this constraint. 

146 data_ID2 (str): The second data ID that is concerned for this constraint. 

147 

148 Raises: 

149 ValueError: if `data_ID1` or `data_ID2` are not managed. 

150 

151 Returns: 

152 Optional[Tuple[str, float]]: `None` if no constraint, `(constraint_type, constraint_value)` otherwise. 

153 """ 

154 

155 # ============================================================================== 

156 # ABSTRACT METHOD - CONSTRAINTS EXPLORATION 

157 # ============================================================================== 

158 @abstractmethod 

159 def get_inferred_constraint( 

160 self, 

161 data_ID1: str, 

162 data_ID2: str, 

163 threshold: float = 1.0, 

164 ) -> Optional[str]: 

165 """ 

166 (ABSTRACT METHOD) 

167 An abstract method that represents the main method used to check if the constraint inferred by transitivity between the two data IDs. 

168 The transitivity is taken into account, and the `threshold` parameter is used to evaluate the impact of constraints transitivity. 

169 

170 Args: 

171 data_ID1 (str): The first data ID that is concerned for this constraint. 

172 data_ID2 (str): The second data ID that is concerned for this constraint. 

173 threshold (float, optional): The threshold used to evaluate the impact of constraints transitivity link. Defaults to `1.0`. 

174 

175 Raises: 

176 ValueError: if `data_ID1`, `data_ID2` or `threshold` are not managed. 

177 

178 Returns: 

179 Optional[str]: The type of the inferred constraint. The type can be `None`, `"MUST_LINK"` or `"CANNOT_LINK"`. 

180 """ 

181 

182 @abstractmethod 

183 def get_connected_components( 

184 self, 

185 threshold: float = 1.0, 

186 ) -> List[List[str]]: 

187 """ 

188 (ABSTRACT METHOD) 

189 An abstract method that represents the main method used to get the possible lists of data IDs that are connected by a `"MUST_LINK"` constraints. 

190 Each list forms a component of the constraints transitivity graph, and it forms a partition of the managed data IDs. 

191 The transitivity is taken into account, and the `threshold` parameter is used to evaluate the impact of constraints transitivity. 

192 

193 Args: 

194 threshold (float, optional): The threshold used to evaluate the impact of constraints transitivity link. Defaults to `1.0`. 

195 

196 Raises: 

197 ValueError: if `threshold` is not managed. 

198 

199 Returns: 

200 List[List[int]]: The list of lists of data IDs that represent a component of the constraints transitivity graph. 

201 """ 

202 

203 @abstractmethod 

204 def check_completude_of_constraints( 

205 self, 

206 threshold: float = 1.0, 

207 ) -> bool: 

208 """ 

209 (ABSTRACT METHOD) 

210 An abstract method that represents the main method used to check if all possible constraints are known (not necessarily annotated because of the transitivity). 

211 The transitivity is taken into account, and the `threshold` parameter is used to evaluate the impact of constraints transitivity. 

212 

213 Args: 

214 threshold (float, optional): The threshold used to evaluate the impact of constraints transitivity link. Defaults to `1.0`. 

215 

216 Raises: 

217 ValueError: if `threshold` is not managed. 

218 

219 Returns: 

220 bool: Return `True` if all constraints are known, `False` otherwise. 

221 """ 

222 

223 @abstractmethod 

224 def get_min_and_max_number_of_clusters( 

225 self, 

226 threshold: float = 1.0, 

227 ) -> Tuple[int, int]: 

228 """ 

229 (ABSTRACT METHOD) 

230 An abstract method that represents the main method used to get determine, for a clustering model that would not violate any constraints, the range of the possible clusters number. 

231 The transitivity is taken into account, and the `threshold` parameter is used to evaluate the impact of constraints transitivity. 

232 

233 Args: 

234 threshold (float, optional): The threshold used to evaluate the impact of constraints transitivity link. Defaults to `1.0`. 

235 

236 Raises: 

237 ValueError: if `threshold` is not managed. 

238 

239 Returns: 

240 Tuple[int,int]: The minimum and the maximum possible clusters numbers (for a clustering model that would not violate any constraints). 

241 """ 

242 

243 # ============================================================================== 

244 # ABSTRACT METHOD - CONSTRAINTS CONFLICT 

245 # ============================================================================== 

246 

247 @abstractmethod 

248 def get_list_of_involved_data_IDs_in_a_constraint_conflict( 

249 self, 

250 data_ID1: str, 

251 data_ID2: str, 

252 constraint_type: str, 

253 ) -> Optional[List[str]]: 

254 """ 

255 (ABSTRACT METHOD) 

256 An abstract method that represents the main method used to get all data IDs involved in a constraints conflict. 

257 

258 Args: 

259 data_ID1 (str): The first data ID involved in the constraint_conflit. 

260 data_ID2 (str): The second data ID involved in the constraint_conflit. 

261 constraint_type (str): The constraint that create a conflict. The constraints can be `"MUST_LINK"` or `"CANNOT_LINK"`. 

262 

263 Raises: 

264 ValueError: if `data_ID1`, `data_ID2`, `constraint_type` are not managed. 

265 

266 Returns: 

267 Optional[List[str]]: The list of data IDs that are involved in the conflict. It matches data IDs from connected components of `data_ID1` and `data_ID2`. 

268 """ 

269 

270 # ============================================================================== 

271 # ABSTRACT METHOD - SERIALIZATION 

272 # ============================================================================== 

273 @abstractmethod 

274 def to_json( 

275 self, 

276 filepath: str, 

277 ) -> bool: 

278 """ 

279 (ABSTRACT METHOD) 

280 An abstract method that represents the main method used to serialize the constraints manager object into a JSON file. 

281 

282 Args: 

283 filepath (str): The path where to serialize the constraints manager object. 

284 

285 Returns: 

286 bool: `True` if the serialization is done. 

287 """