Coverage for src\cognitivefactory\interactive_clustering\clustering\abstract.py: 100.00%

18 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-11-17 13:31 +0100

1# -*- coding: utf-8 -*- 

2 

3""" 

4* Name: cognitivefactory.interactive_clustering.clustering.abstract 

5* Description: The abstract class used to define constrained clustering algorithms. 

6* Author: Erwan SCHILD 

7* Created: 17/03/2021 

8* Licence: CeCILL-C License v1.0 (https://cecill.info/licences.fr.html) 

9""" 

10 

11# ============================================================================== 

12# IMPORT PYTHON DEPENDENCIES 

13# ============================================================================== 

14 

15from abc import ABC, abstractmethod 

16from typing import Dict, Optional 

17 

18from scipy.sparse import csr_matrix 

19 

20from cognitivefactory.interactive_clustering.constraints.abstract import AbstractConstraintsManager 

21 

22 

23# ============================================================================== 

24# ABSTRACT CONSTRAINED CLUSTERING 

25# ============================================================================== 

26class AbstractConstrainedClustering(ABC): 

27 """ 

28 Abstract class that is used to define constrained clustering algorithms. 

29 The main inherited method is `cluster`. 

30 

31 References: 

32 - Survey on Constrained Clustering : `Lampert, T., T.-B.-H. Dao, B. Lafabregue, N. Serrette, G. Forestier, B. Cremilleux, C. Vrain, et P. Gancarski (2018). Constrained distance based clustering for time-series : a comparative and experimental study. Data Mining and Knowledge Discovery 32(6), 1663–1707.` 

33 """ 

34 

35 # ============================================================================== 

36 # ABSTRACT METHOD - CLUSTER 

37 # ============================================================================== 

38 @abstractmethod 

39 def cluster( 

40 self, 

41 constraints_manager: AbstractConstraintsManager, 

42 vectors: Dict[str, csr_matrix], 

43 nb_clusters: Optional[int], 

44 verbose: bool = False, 

45 **kargs, 

46 ) -> Dict[str, int]: 

47 """ 

48 (ABSTRACT METHOD) 

49 An abstract method that represents the main method used to cluster data. 

50 

51 Args: 

52 constraints_manager (AbstractConstraintsManager): A constraints manager over data IDs that will force clustering to respect some conditions during computation. 

53 vectors (Dict[str, csr_matrix]): The representation of data vectors. The keys of the dictionary represents the data IDs. This keys have to refer to the list of data IDs managed by the `constraints_manager`. The value of the dictionary represent the vector of each data. 

54 nb_clusters (Optional[int]): The number of clusters to compute. Can be `None` if this parameters is estimated or if the algorithm doesn't need it. 

55 verbose (bool, optional): Enable verbose output. Defaults to `False`. 

56 **kargs (dict): Other parameters that can be used in the clustering. 

57 

58 Raises: 

59 ValueError: if `vectors` and `constraints_manager` are incompatible, or if some parameters are incorrectly set. 

60 

61 Returns: 

62 Dict[str,int]: A dictionary that contains the predicted cluster for each data ID. 

63 """ 

64 

65 

66# ============================================================================== 

67# RENAME CLUSTERS BY ORDER 

68# ============================================================================== 

69def rename_clusters_by_order( 

70 clusters: Dict[str, int], 

71) -> Dict[str, int]: 

72 """ 

73 Rename cluster ID to be ordered by data IDs. 

74 

75 Args: 

76 clusters (Dict[str, int]): The dictionary of clusters. 

77 

78 Returns: 

79 Dict[str, int]: The sorted dictionary of clusters. 

80 """ 

81 

82 # Get `list_of_data_IDs`. 

83 list_of_data_IDs = sorted(clusters.keys()) 

84 

85 # Define a map to be able to rename cluster IDs. 

86 mapping_of_old_ID_to_new_ID: Dict[int, int] = {} 

87 new_ID: int = 0 

88 for data_ID in list_of_data_IDs: # , cluster_ID in clusters.items(): 

89 if clusters[data_ID] not in mapping_of_old_ID_to_new_ID.keys(): 

90 mapping_of_old_ID_to_new_ID[clusters[data_ID]] = new_ID 

91 new_ID += 1 

92 

93 # Rename cluster IDs. 

94 new_clusters = { 

95 data_ID_to_assign: mapping_of_old_ID_to_new_ID[clusters[data_ID_to_assign]] 

96 for data_ID_to_assign in list_of_data_IDs 

97 } 

98 

99 # Return the new ordered clusters 

100 return new_clusters