Coverage for src\cognitivefactory\interactive_clustering\utils\frequency.py: 100.00%
9 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-11-17 13:31 +0100
« prev ^ index » next coverage.py v7.3.2, created at 2023-11-17 13:31 +0100
1# -*- coding: utf-8 -*-
3"""
4* Name: cognitivefactory.interactive_clustering.utils.frequency
5* Description: Utilities methods for frequency analysis.
6* Author: Erwan SCHILD
7* Created: 17/03/2021
8* Licence: CeCILL (https://cecill.info/licences.fr.html)
9"""
11# ==============================================================================
12# IMPORT PYTHON DEPENDENCIES
13# ==============================================================================
15from typing import Dict, List
18# ==============================================================================
19# COMPUTE FREQUENCY OF CLUSTERS :
20# ==============================================================================
21def compute_clusters_frequency(clustering_result: Dict[str, int]) -> Dict[int, float]:
22 """
23 Get the frequency of each cluster present in a clustering result.
25 Args:
26 clustering_result (Dict[str,int]): The dictionary that contains the predicted cluster for each data ID.
28 Returns:
29 Dict[int,float] : Frequency fo each predicted intent.
30 """
32 # Get the total number of data IDs.
33 nb_of_data_IDs = len(clustering_result.keys())
35 # Default case : No data, so no cluster.
36 if nb_of_data_IDs == 0:
37 return {}
39 # Get possible clusters IDs.
40 list_of_possible_cluster_IDs: List[int] = sorted(
41 {clustering_result[data_ID] for data_ID in clustering_result.keys()}
42 )
44 # Compute frequency of clusters in `clustering_result`.
45 frequence_of_clusters: Dict[int, float] = {
46 cluster_ID: len([data_ID for data_ID in clustering_result if clustering_result[data_ID] == cluster_ID])
47 / nb_of_data_IDs
48 for cluster_ID in list_of_possible_cluster_IDs
49 }
51 # Return the frequence of clusters.
52 return frequence_of_clusters