Coverage for src\cognitivefactory\interactive_clustering\clustering\abstract.py: 100.00%
18 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-11-17 13:31 +0100
« prev ^ index » next coverage.py v7.3.2, created at 2023-11-17 13:31 +0100
1# -*- coding: utf-8 -*-
3"""
4* Name: cognitivefactory.interactive_clustering.clustering.abstract
5* Description: The abstract class used to define constrained clustering algorithms.
6* Author: Erwan SCHILD
7* Created: 17/03/2021
8* Licence: CeCILL-C License v1.0 (https://cecill.info/licences.fr.html)
9"""
11# ==============================================================================
12# IMPORT PYTHON DEPENDENCIES
13# ==============================================================================
15from abc import ABC, abstractmethod
16from typing import Dict, Optional
18from scipy.sparse import csr_matrix
20from cognitivefactory.interactive_clustering.constraints.abstract import AbstractConstraintsManager
23# ==============================================================================
24# ABSTRACT CONSTRAINED CLUSTERING
25# ==============================================================================
26class AbstractConstrainedClustering(ABC):
27 """
28 Abstract class that is used to define constrained clustering algorithms.
29 The main inherited method is `cluster`.
31 References:
32 - Survey on Constrained Clustering : `Lampert, T., T.-B.-H. Dao, B. Lafabregue, N. Serrette, G. Forestier, B. Cremilleux, C. Vrain, et P. Gancarski (2018). Constrained distance based clustering for time-series : a comparative and experimental study. Data Mining and Knowledge Discovery 32(6), 1663–1707.`
33 """
35 # ==============================================================================
36 # ABSTRACT METHOD - CLUSTER
37 # ==============================================================================
38 @abstractmethod
39 def cluster(
40 self,
41 constraints_manager: AbstractConstraintsManager,
42 vectors: Dict[str, csr_matrix],
43 nb_clusters: Optional[int],
44 verbose: bool = False,
45 **kargs,
46 ) -> Dict[str, int]:
47 """
48 (ABSTRACT METHOD)
49 An abstract method that represents the main method used to cluster data.
51 Args:
52 constraints_manager (AbstractConstraintsManager): A constraints manager over data IDs that will force clustering to respect some conditions during computation.
53 vectors (Dict[str, csr_matrix]): The representation of data vectors. The keys of the dictionary represents the data IDs. This keys have to refer to the list of data IDs managed by the `constraints_manager`. The value of the dictionary represent the vector of each data.
54 nb_clusters (Optional[int]): The number of clusters to compute. Can be `None` if this parameters is estimated or if the algorithm doesn't need it.
55 verbose (bool, optional): Enable verbose output. Defaults to `False`.
56 **kargs (dict): Other parameters that can be used in the clustering.
58 Raises:
59 ValueError: if `vectors` and `constraints_manager` are incompatible, or if some parameters are incorrectly set.
61 Returns:
62 Dict[str,int]: A dictionary that contains the predicted cluster for each data ID.
63 """
66# ==============================================================================
67# RENAME CLUSTERS BY ORDER
68# ==============================================================================
69def rename_clusters_by_order(
70 clusters: Dict[str, int],
71) -> Dict[str, int]:
72 """
73 Rename cluster ID to be ordered by data IDs.
75 Args:
76 clusters (Dict[str, int]): The dictionary of clusters.
78 Returns:
79 Dict[str, int]: The sorted dictionary of clusters.
80 """
82 # Get `list_of_data_IDs`.
83 list_of_data_IDs = sorted(clusters.keys())
85 # Define a map to be able to rename cluster IDs.
86 mapping_of_old_ID_to_new_ID: Dict[int, int] = {}
87 new_ID: int = 0
88 for data_ID in list_of_data_IDs: # , cluster_ID in clusters.items():
89 if clusters[data_ID] not in mapping_of_old_ID_to_new_ID.keys():
90 mapping_of_old_ID_to_new_ID[clusters[data_ID]] = new_ID
91 new_ID += 1
93 # Rename cluster IDs.
94 new_clusters = {
95 data_ID_to_assign: mapping_of_old_ID_to_new_ID[clusters[data_ID_to_assign]]
96 for data_ID_to_assign in list_of_data_IDs
97 }
99 # Return the new ordered clusters
100 return new_clusters