Coverage for src\cognitivefactory\interactive_clustering\constraints\abstract.py: 100.00%
28 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-11-17 13:31 +0100
« prev ^ index » next coverage.py v7.3.2, created at 2023-11-17 13:31 +0100
1# -*- coding: utf-8 -*-
3"""
4* Name: cognitivefactory.interactive_clustering.constraints.abstract
5* Description: The abstract class used to define constraints managing algorithms.
6* Author: Erwan SCHILD
7* Created: 17/03/2021
8* Licence: CeCILL-C License v1.0 (https://cecill.info/licences.fr.html)
9"""
11# ==============================================================================
12# IMPORTS :
13# ==============================================================================
15from abc import ABC, abstractmethod
16from typing import List, Optional, Tuple
19# ==============================================================================
20# ABSTRACT CONSTRAINTS MANAGING
21# ==============================================================================
22class AbstractConstraintsManager(ABC):
23 """
24 Abstract class that is used to define constraints manager.
25 The main inherited methods are about data IDs management, constraints management and constraints exploration.
27 References:
28 - Constraints in clustering: `Wagstaff, K. et C. Cardie (2000). Clustering with Instance-level Constraints. Proceedings of the Seventeenth International Conference on Machine Learning, 1103–1110.`
29 """
31 # ==============================================================================
32 # ABSTRACT METHOD - DATA_ID MANAGEMENT
33 # ==============================================================================
34 @abstractmethod
35 def add_data_ID(
36 self,
37 data_ID: str,
38 ) -> bool:
39 """
40 (ABSTRACT METHOD)
41 An abstract method that represents the main method used to add a new data ID to manage.
43 Args:
44 data_ID (str): The data ID to manage.
46 Raises:
47 ValueError: if `data_ID` is already managed.
49 Returns:
50 bool: `True` if the addition is done.
51 """
53 @abstractmethod
54 def delete_data_ID(
55 self,
56 data_ID: str,
57 ) -> bool:
58 """
59 (ABSTRACT METHOD)
60 An abstract method that represents the main method used to delete a data ID to no longer manage.
62 Args:
63 data_ID (str): The data ID to no longer manage.
65 Raises:
66 ValueError: if `data_ID` is not managed.
68 Returns:
69 bool: `True` if the deletion is done.
70 """
72 @abstractmethod
73 def get_list_of_managed_data_IDs(
74 self,
75 ) -> List[str]:
76 """
77 (ABSTRACT METHOD)
78 An abstract method that represents the main method used to get the list of data IDs that are managed.
80 Returns:
81 List[str]: The list of data IDs that are managed.
82 """
84 # ==============================================================================
85 # ABSTRACT METHOD - CONSTRAINTS MANAGEMENT
86 # ==============================================================================
87 @abstractmethod
88 def add_constraint(
89 self,
90 data_ID1: str,
91 data_ID2: str,
92 constraint_type: str,
93 constraint_value: float = 1.0,
94 ) -> bool:
95 """
96 (ABSTRACT METHOD)
97 An abstract method that represents the main method used to add a constraint between two data IDs.
99 Args:
100 data_ID1 (str): The first data ID that is concerned for this constraint addition.
101 data_ID2 (str): The second data ID that is concerned for this constraint addition.
102 constraint_type (str): The type of the constraint to add. The type have to be `"MUST_LINK"` or `"CANNOT_LINK"`.
103 constraint_value (float, optional): The value of the constraint to add. The value have to be in range `[0.0, 1.0]`. Defaults to 1.0.
105 Raises:
106 ValueError: if `data_ID1`, `data_ID2`, `constraint_type` are not managed, or if a conflict is detected with constraints inference.
108 Returns:
109 bool: `True` if the addition is done, `False` is the constraint can't be added.
110 """
112 @abstractmethod
113 def delete_constraint(
114 self,
115 data_ID1: str,
116 data_ID2: str,
117 ) -> bool:
118 """
119 (ABSTRACT METHOD)
120 An abstract method that represents the main method used to delete the constraint between two data IDs.
122 Args:
123 data_ID1 (str): The first data ID that is concerned for this constraint deletion.
124 data_ID2 (str): The second data ID that is concerned for this constraint deletion.
126 Raises:
127 ValueError: if `data_ID1` or `data_ID2` are not managed.
129 Returns:
130 bool: `True` if the deletion is done.
131 """
133 @abstractmethod
134 def get_added_constraint(
135 self,
136 data_ID1: str,
137 data_ID2: str,
138 ) -> Optional[Tuple[str, float]]:
139 """
140 (ABSTRACT METHOD)
141 An abstract method that represents the main method used to get the constraint added between the two data IDs.
142 Do not take into account the constraints transitivity, just look at constraints that are explicitly added.
144 Args:
145 data_ID1 (str): The first data ID that is concerned for this constraint.
146 data_ID2 (str): The second data ID that is concerned for this constraint.
148 Raises:
149 ValueError: if `data_ID1` or `data_ID2` are not managed.
151 Returns:
152 Optional[Tuple[str, float]]: `None` if no constraint, `(constraint_type, constraint_value)` otherwise.
153 """
155 # ==============================================================================
156 # ABSTRACT METHOD - CONSTRAINTS EXPLORATION
157 # ==============================================================================
158 @abstractmethod
159 def get_inferred_constraint(
160 self,
161 data_ID1: str,
162 data_ID2: str,
163 threshold: float = 1.0,
164 ) -> Optional[str]:
165 """
166 (ABSTRACT METHOD)
167 An abstract method that represents the main method used to check if the constraint inferred by transitivity between the two data IDs.
168 The transitivity is taken into account, and the `threshold` parameter is used to evaluate the impact of constraints transitivity.
170 Args:
171 data_ID1 (str): The first data ID that is concerned for this constraint.
172 data_ID2 (str): The second data ID that is concerned for this constraint.
173 threshold (float, optional): The threshold used to evaluate the impact of constraints transitivity link. Defaults to `1.0`.
175 Raises:
176 ValueError: if `data_ID1`, `data_ID2` or `threshold` are not managed.
178 Returns:
179 Optional[str]: The type of the inferred constraint. The type can be `None`, `"MUST_LINK"` or `"CANNOT_LINK"`.
180 """
182 @abstractmethod
183 def get_connected_components(
184 self,
185 threshold: float = 1.0,
186 ) -> List[List[str]]:
187 """
188 (ABSTRACT METHOD)
189 An abstract method that represents the main method used to get the possible lists of data IDs that are connected by a `"MUST_LINK"` constraints.
190 Each list forms a component of the constraints transitivity graph, and it forms a partition of the managed data IDs.
191 The transitivity is taken into account, and the `threshold` parameter is used to evaluate the impact of constraints transitivity.
193 Args:
194 threshold (float, optional): The threshold used to evaluate the impact of constraints transitivity link. Defaults to `1.0`.
196 Raises:
197 ValueError: if `threshold` is not managed.
199 Returns:
200 List[List[int]]: The list of lists of data IDs that represent a component of the constraints transitivity graph.
201 """
203 @abstractmethod
204 def check_completude_of_constraints(
205 self,
206 threshold: float = 1.0,
207 ) -> bool:
208 """
209 (ABSTRACT METHOD)
210 An abstract method that represents the main method used to check if all possible constraints are known (not necessarily annotated because of the transitivity).
211 The transitivity is taken into account, and the `threshold` parameter is used to evaluate the impact of constraints transitivity.
213 Args:
214 threshold (float, optional): The threshold used to evaluate the impact of constraints transitivity link. Defaults to `1.0`.
216 Raises:
217 ValueError: if `threshold` is not managed.
219 Returns:
220 bool: Return `True` if all constraints are known, `False` otherwise.
221 """
223 @abstractmethod
224 def get_min_and_max_number_of_clusters(
225 self,
226 threshold: float = 1.0,
227 ) -> Tuple[int, int]:
228 """
229 (ABSTRACT METHOD)
230 An abstract method that represents the main method used to get determine, for a clustering model that would not violate any constraints, the range of the possible clusters number.
231 The transitivity is taken into account, and the `threshold` parameter is used to evaluate the impact of constraints transitivity.
233 Args:
234 threshold (float, optional): The threshold used to evaluate the impact of constraints transitivity link. Defaults to `1.0`.
236 Raises:
237 ValueError: if `threshold` is not managed.
239 Returns:
240 Tuple[int,int]: The minimum and the maximum possible clusters numbers (for a clustering model that would not violate any constraints).
241 """
243 # ==============================================================================
244 # ABSTRACT METHOD - CONSTRAINTS CONFLICT
245 # ==============================================================================
247 @abstractmethod
248 def get_list_of_involved_data_IDs_in_a_constraint_conflict(
249 self,
250 data_ID1: str,
251 data_ID2: str,
252 constraint_type: str,
253 ) -> Optional[List[str]]:
254 """
255 (ABSTRACT METHOD)
256 An abstract method that represents the main method used to get all data IDs involved in a constraints conflict.
258 Args:
259 data_ID1 (str): The first data ID involved in the constraint_conflit.
260 data_ID2 (str): The second data ID involved in the constraint_conflit.
261 constraint_type (str): The constraint that create a conflict. The constraints can be `"MUST_LINK"` or `"CANNOT_LINK"`.
263 Raises:
264 ValueError: if `data_ID1`, `data_ID2`, `constraint_type` are not managed.
266 Returns:
267 Optional[List[str]]: The list of data IDs that are involved in the conflict. It matches data IDs from connected components of `data_ID1` and `data_ID2`.
268 """
270 # ==============================================================================
271 # ABSTRACT METHOD - SERIALIZATION
272 # ==============================================================================
273 @abstractmethod
274 def to_json(
275 self,
276 filepath: str,
277 ) -> bool:
278 """
279 (ABSTRACT METHOD)
280 An abstract method that represents the main method used to serialize the constraints manager object into a JSON file.
282 Args:
283 filepath (str): The path where to serialize the constraints manager object.
285 Returns:
286 bool: `True` if the serialization is done.
287 """