Coverage for src\cognitivefactory\interactive_clustering\utils\frequency.py: 100.00%

9 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-11-17 13:31 +0100

1# -*- coding: utf-8 -*- 

2 

3""" 

4* Name: cognitivefactory.interactive_clustering.utils.frequency 

5* Description: Utilities methods for frequency analysis. 

6* Author: Erwan SCHILD 

7* Created: 17/03/2021 

8* Licence: CeCILL (https://cecill.info/licences.fr.html) 

9""" 

10 

11# ============================================================================== 

12# IMPORT PYTHON DEPENDENCIES 

13# ============================================================================== 

14 

15from typing import Dict, List 

16 

17 

18# ============================================================================== 

19# COMPUTE FREQUENCY OF CLUSTERS : 

20# ============================================================================== 

21def compute_clusters_frequency(clustering_result: Dict[str, int]) -> Dict[int, float]: 

22 """ 

23 Get the frequency of each cluster present in a clustering result. 

24 

25 Args: 

26 clustering_result (Dict[str,int]): The dictionary that contains the predicted cluster for each data ID. 

27 

28 Returns: 

29 Dict[int,float] : Frequency fo each predicted intent. 

30 """ 

31 

32 # Get the total number of data IDs. 

33 nb_of_data_IDs = len(clustering_result.keys()) 

34 

35 # Default case : No data, so no cluster. 

36 if nb_of_data_IDs == 0: 

37 return {} 

38 

39 # Get possible clusters IDs. 

40 list_of_possible_cluster_IDs: List[int] = sorted( 

41 {clustering_result[data_ID] for data_ID in clustering_result.keys()} 

42 ) 

43 

44 # Compute frequency of clusters in `clustering_result`. 

45 frequence_of_clusters: Dict[int, float] = { 

46 cluster_ID: len([data_ID for data_ID in clustering_result if clustering_result[data_ID] == cluster_ID]) 

47 / nb_of_data_IDs 

48 for cluster_ID in list_of_possible_cluster_IDs 

49 } 

50 

51 # Return the frequence of clusters. 

52 return frequence_of_clusters