Coverage for src\cognitivefactory\interactive_clustering\sampling\abstract.py: 100.00%

1# -*- coding: utf-8 -*-

3"""

4* Name: cognitivefactory.interactive_clustering.sampling.abstract

5* Description: The abstract class used to define constraints sampling algorithms.

6* Author: Erwan SCHILD

7* Created: 17/03/2021

8* Licence: CeCILL (https://cecill.info/licences.fr.html)

9"""

11# ==============================================================================

12# IMPORT PYTHON DEPENDENCIES

13# ==============================================================================

15from abc import ABC, abstractmethod

16from typing import Dict, List, Optional, Tuple

18from scipy.sparse import csr_matrix

20from cognitivefactory.interactive_clustering.constraints.abstract import AbstractConstraintsManager

23# ==============================================================================

24# ABSTRACT CONSTRAINTS SAMPLING

25# ==============================================================================

26class AbstractConstraintsSampling(ABC):

27 """

28 Abstract class that is used to define constraints sampling algorithms.

29 The main inherited method is `sample`.

30 """

32 # ==============================================================================

33 # ABSTRACT METHOD - SAMPLE

34 # ==============================================================================

35 @abstractmethod

36 def sample(

37 self,

38 constraints_manager: AbstractConstraintsManager,

39 nb_to_select: int,

40 clustering_result: Optional[Dict[str, int]] = None,

41 vectors: Optional[Dict[str, csr_matrix]] = None,

42 **kargs,

43 ) -> List[Tuple[str, str]]:

44 """

45 (ABSTRACT METHOD)

46 An abstract method that represents the main method used to sample couple of data IDs for constraints annotation.

48 Args:

49 constraints_manager (AbstractConstraintsManager): A constraints manager over data IDs.

50 nb_to_select (int): The number of couple of data IDs to select.

51 clustering_result (Optional[Dict[str,int]], optional): A dictionary that represents the predicted cluster for each data ID. The keys of the dictionary represents the data IDs. If `None`, no clustering result are used during the sampling. Defaults to `None`.

52 vectors (Optional[Dict[str, csr_matrix]], optional): vectors (Dict[str, csr_matrix]): The representation of data vectors. The keys of the dictionary represents the data IDs. This keys have to refer to the list of data IDs managed by the `constraints_manager`. The value of the dictionary represent the vector of each data. If `None`, no vectors are used during the sampling. Defaults to `None`

53 **kargs (dict): Other parameters that can be used in the sampling.

55 Raises:

56 ValueError: if some parameters are incorrectly set or incompatible.

58 Returns:

59 List[Tuple[str,str]]: A list of couple of data IDs.

60 """