Coverage for src\cognitivefactory\interactive_clustering_gui\models\settings.py: 100.00%

204 statements  

« prev     ^ index     » next       coverage.py v7.4.3, created at 2024-03-22 23:23 +0100

1# -*- coding: utf-8 -*- 

2 

3""" 

4* Name: cognitivefactory.interactive_clustering_gui.models.settings 

5* Description: Definition of algorithm settings models required for application runs. 

6* Author: Erwan Schild 

7* Created: 16/12/2021 

8* Licence: CeCILL-C License v1.0 (https://cecill.info/licences.fr.html) 

9""" 

10 

11# ============================================================================== 

12# IMPORT PYTHON DEPENDENCIES 

13# ============================================================================== 

14 

15import enum 

16from typing import Any, Dict, Optional, Union 

17 

18from pydantic import BaseModel, root_validator, validator 

19 

20# ============================================================================== 

21# ENUMERATION OF IC GUI MODELIZATIONS 

22# ============================================================================== 

23 

24 

25class ICGUISettings(str, enum.Enum): # noqa: WPS600 (subclassing str) 

26 """The enumeration of available Settings for Interactive Clustering GUI.""" 

27 

28 PREPROCESSING: str = "preprocessing" 

29 VECTORIZATION: str = "vectorization" 

30 SAMPLING: str = "sampling" 

31 CLUSTERING: str = "clustering" 

32 

33 @classmethod 

34 def contains(cls, value: Any) -> bool: 

35 """Test if value is in this enumeration. 

36 

37 Args: 

38 value (Any): A value. 

39 

40 Returns: 

41 bool: `True` if the value is in the enumeration. 

42 """ 

43 return value in cls._value2member_map_ 

44 

45 

46# ============================================================================== 

47# BASE MODEL FOR PREPROCESSING SETTINGS 

48# ============================================================================== 

49 

50 

51class PreprocessingSpacyLanguageModel(str, enum.Enum): # noqa: WPS600 (subclassing str) 

52 """The enumeration of available spacy language model name.""" 

53 

54 FR_CORE_NEWS_MD: str = "fr_core_news_md" 

55 

56 

57class PreprocessingSettingsModel(BaseModel): 

58 """The body model for preprocessing settings.""" 

59 

60 # Parameters. 

61 apply_stopwords_deletion: bool 

62 apply_parsing_filter: bool 

63 apply_lemmatization: bool 

64 spacy_language_model: PreprocessingSpacyLanguageModel 

65 

66 # Export method. 

67 def to_dict(self) -> Dict[str, Any]: 

68 """Export the model as a dictionary 

69 

70 Returns: 

71 Dict[str, Any]: A dictionary that contains paramaters and their values. 

72 """ 

73 return { 

74 "apply_stopwords_deletion": self.apply_stopwords_deletion, 

75 "apply_parsing_filter": self.apply_parsing_filter, 

76 "apply_lemmatization": self.apply_lemmatization, 

77 "spacy_language_model": self.spacy_language_model.value, 

78 } 

79 

80 # Config for schema. 

81 class Config: # noqa: WPS431 (nested class) 

82 """Configuration for body model of preprocessing settings.""" 

83 

84 schema_extra = { 

85 "example": { 

86 "apply_stopwords_deletion": False, 

87 "apply_parsing_filter": False, 

88 "apply_lemmatization": False, 

89 "spacy_language_model": PreprocessingSpacyLanguageModel.FR_CORE_NEWS_MD, 

90 } 

91 } 

92 

93 

94def default_PreprocessingSettingsModel() -> PreprocessingSettingsModel: 

95 """Create a PreprocessingSettingsModel instance with default values. 

96 

97 Returns: 

98 PreprocessingSettingsModel: A PreprocessingSettingsModel instance with default values. 

99 """ 

100 return PreprocessingSettingsModel( 

101 apply_stopwords_deletion=False, 

102 apply_parsing_filter=False, 

103 apply_lemmatization=False, 

104 spacy_language_model=PreprocessingSpacyLanguageModel.FR_CORE_NEWS_MD, 

105 ) 

106 

107 

108# ============================================================================== 

109# BASE MODEL FOR VECTORIZATION SETTINGS 

110# ============================================================================== 

111 

112 

113class VectorizerType(str, enum.Enum): # noqa: WPS600 (subclassing str) 

114 """The enumeration of available vectorizer type.""" 

115 

116 TFIDF: str = "tfidf" 

117 SPACY: str = "spacy" 

118 

119 

120class VectorizationSpacyLanguageModel(str, enum.Enum): # noqa: WPS600 (subclassing str) 

121 """The enumeration of available spacy language model name.""" 

122 

123 FR_CORE_NEWS_MD: str = "fr_core_news_md" 

124 

125 

126class VectorizationSettingsModel(BaseModel): 

127 """The body model for vectorization settings.""" 

128 

129 # Parameters. 

130 vectorizer_type: VectorizerType 

131 spacy_language_model: Optional[VectorizationSpacyLanguageModel] 

132 random_seed: int 

133 

134 @validator("random_seed") 

135 @classmethod 

136 def validate_random_seed(cls, value: int) -> int: 

137 """The validation of random_seed settings. 

138 

139 Args: 

140 value (int): The value of random_seed setting. 

141 

142 Raises: 

143 ValueError: if `random_seed` is incorrectly set. 

144 

145 Returns: 

146 int: The value of random_seed setting. 

147 """ 

148 if value < 0: 

149 raise ValueError("`random_seed` must be greater than or equal to 0.") 

150 return value 

151 

152 @root_validator 

153 @classmethod 

154 def validate_vectorization_settings(cls, values: Dict[str, Any]) -> Dict[str, Any]: 

155 """The validation of vectorization settings. 

156 

157 Args: 

158 values (Dict[str, Any]): The values of vectorization settings. 

159 

160 Raises: 

161 ValueError: if `vectorizer_type` and `spacy_language_model` are incompatible. 

162 

163 Returns: 

164 Dict[str, Any]: The validated values of vectorization settings. 

165 """ 

166 

167 # Case of no vectorizer. 

168 if "vectorizer_type" not in values.keys(): 

169 raise ValueError("The parameter `vectorizer_type` is required.") 

170 

171 # Case of tfidf vectorizer. 

172 if values["vectorizer_type"] == VectorizerType.TFIDF: 

173 if ("spacy_language_model" in values.keys()) and (values["spacy_language_model"] is not None): 

174 raise ValueError("No spacy language model is required when vectorizer is `tfidf`.") 

175 values["spacy_language_model"] = None 

176 

177 # Case of spacy vectorizer. 

178 if values["vectorizer_type"] == VectorizerType.SPACY: 

179 if ("spacy_language_model" not in values.keys()) or (values["spacy_language_model"] is None): 

180 raise ValueError("A spacy language model is required when vectorizer is `spacy`.") 

181 

182 # Return validated values of vectorization settings. 

183 return values 

184 

185 # Export method. 

186 def to_dict(self) -> Dict[str, Any]: 

187 """Export the model as a dictionary 

188 

189 Returns: 

190 Dict[str, Any]: A dictionary that contains paramaters and their values. 

191 """ 

192 return { 

193 "vectorizer_type": self.vectorizer_type.value, 

194 "spacy_language_model": ( 

195 self.spacy_language_model.value if (self.spacy_language_model is not None) else None 

196 ), 

197 "random_seed": self.random_seed, 

198 } 

199 

200 # Config for schema. 

201 class Config: # noqa: WPS431 (nested class) 

202 """Configuration for body model of vectorization settings.""" 

203 

204 schema_extra = { 

205 "example": { 

206 "vectorizer_type": VectorizerType.TFIDF + "|" + VectorizerType.SPACY, 

207 "random_seed": 42, 

208 "!!!SPECIFIC: 'vectorizer_type'=='spacy'": { 

209 "spacy_language_model": VectorizationSpacyLanguageModel.FR_CORE_NEWS_MD, 

210 }, 

211 } 

212 } 

213 

214 

215def default_VectorizationSettingsModel() -> VectorizationSettingsModel: 

216 """Create a VectorizationSettingsModel instance with default values. 

217 

218 Returns: 

219 VectorizationSettingsModel: A VectorizationSettingsModel instance with default values. 

220 """ 

221 return VectorizationSettingsModel( 

222 vectorizer_type=VectorizerType.TFIDF, 

223 spacy_language_model=None, 

224 random_seed=42, 

225 ) 

226 

227 

228# ============================================================================== 

229# BASE MODEL FOR SAMPLING SETTINGS 

230# ============================================================================== 

231 

232 

233class SamplingAlgorithm(str, enum.Enum): # noqa: WPS600 (subclassing str) 

234 """The enumeration of available sampling algorithms.""" 

235 

236 RANDOM: str = "random" 

237 RANDOM_IN_SAME_CLUSTER: str = "random_in_same_cluster" 

238 FARTHEST_IN_SAME_CLUSTER: str = "farthest_in_same_cluster" 

239 CLOSEST_IN_DIFFERENT_CLUSTERS: str = "closest_in_different_clusters" 

240 CUSTOM: str = "custom" 

241 

242 

243class ClusterRestriction(str, enum.Enum): # noqa: WPS600 (subclassing str) 

244 """The enumeration of available cluster restrictions for custom sampling algorithm.""" 

245 

246 SAME_CLUSTER: str = "same_cluster" 

247 DIFFERENT_CLUSTERS: str = "different_clusters" 

248 

249 

250class DistanceRestriction(str, enum.Enum): # noqa: WPS600 (subclassing str) 

251 """The enumeration of available distance restrictions for custom sampling algorithm.""" 

252 

253 CLOSEST_NEIGHBORS: str = "closest_neighbors" 

254 FARTHEST_NEIGHBORS: str = "farthest_neighbors" 

255 

256 

257class CustomSamplingInitSettingsModel(BaseModel): 

258 """The body submodel for custom sampling initialization settings.""" 

259 

260 # Parameters. 

261 clusters_restriction: ClusterRestriction 

262 distance_restriction: DistanceRestriction 

263 without_inferred_constraints: bool 

264 

265 # Export method. 

266 def to_dict(self) -> Dict[str, Any]: 

267 """Export the model as a dictionary 

268 

269 Returns: 

270 Dict[str, Any]: A dictionary that contains paramaters and their values. 

271 """ 

272 return { 

273 "clusters_restriction": self.clusters_restriction.value, 

274 "distance_restriction": self.distance_restriction.value, 

275 "without_inferred_constraints": self.without_inferred_constraints, 

276 } 

277 

278 

279class SamplingSettingsModel(BaseModel): 

280 """Abstract body model for sampling settings.""" 

281 

282 # Parameters. 

283 algorithm: SamplingAlgorithm 

284 random_seed: int 

285 nb_to_select: int 

286 init_kargs: Optional[CustomSamplingInitSettingsModel] 

287 

288 @validator("random_seed") 

289 @classmethod 

290 def validate_random_seed(cls, value: int) -> int: 

291 """The validation of random_seed settings. 

292 

293 Args: 

294 value (int): The value of random_seed setting. 

295 

296 Raises: 

297 ValueError: if `random_seed` is incorrectly set. 

298 

299 Returns: 

300 int: The value of random_seed setting. 

301 """ 

302 if value < 0: 

303 raise ValueError("`random_seed` must be greater than or equal to 0.") 

304 return value 

305 

306 @validator("nb_to_select") 

307 @classmethod 

308 def validate_nb_to_select(cls, value: int) -> int: 

309 """The validation of nb_to_select settings. 

310 

311 Args: 

312 value (int): The value of nb_to_select setting. 

313 

314 Raises: 

315 ValueError: if `nb_to_select` is incorrectly set. 

316 

317 Returns: 

318 int: The value of nb_to_select setting. 

319 """ 

320 if value < 1: 

321 raise ValueError("`nb_to_select` must be greater than or equal to 1.") 

322 return value 

323 

324 @root_validator 

325 @classmethod 

326 def validate_sampling_settings(cls, values: Dict[str, Any]) -> Dict[str, Any]: 

327 """The validation of sampling settings. 

328 

329 Args: 

330 values (Dict[str, Any]): The values of sampling settings. 

331 

332 Raises: 

333 ValueError: if `algorithm` and `init_kargs` are incompatible. 

334 

335 Returns: 

336 Dict[str, Any]: The validated values of sampling settings. 

337 """ 

338 

339 # Case of no sampling algorithm. 

340 if "algorithm" not in values.keys(): 

341 raise ValueError("The parameter `algorithm` is required.") 

342 

343 # Case of custom sampling algorithm. 

344 if values["algorithm"] == SamplingAlgorithm.CUSTOM: 

345 if ("init_kargs" not in values.keys()) or (values["init_kargs"] is None): 

346 raise ValueError( 

347 "A dictionary of initialization (`init_kargs`) is required when algorithm is `custom`." 

348 ) 

349 

350 # Case of predefinite sampling algorithms. 

351 else: 

352 if ("init_kargs" in values.keys()) and (values["init_kargs"] is not None): 

353 raise ValueError( 

354 "No dictionary of initialization (`init_kargs`) is required when algorithm is different from `custom`." 

355 ) 

356 values["init_kargs"] = None 

357 

358 # Return validated values of sampling settings. 

359 return values 

360 

361 # Export method. 

362 def to_dict(self) -> Dict[str, Any]: 

363 """Export the model as a dictionary 

364 

365 Returns: 

366 Dict[str, Any]: A dictionary that contains paramaters and their values. 

367 """ 

368 return { 

369 "algorithm": self.algorithm.value, 

370 "random_seed": self.random_seed, 

371 "nb_to_select": self.nb_to_select, 

372 "init_kargs": self.init_kargs.to_dict() if (self.init_kargs is not None) else None, 

373 } 

374 

375 # Config for schema. 

376 class Config: # noqa: WPS431 (nested class) 

377 """Configuration for body model of sampling settings.""" 

378 

379 schema_extra = { 

380 "example": { 

381 "algorithm": ( 

382 SamplingAlgorithm.RANDOM 

383 + "|" 

384 + SamplingAlgorithm.RANDOM_IN_SAME_CLUSTER 

385 + "|" 

386 + SamplingAlgorithm.CLOSEST_IN_DIFFERENT_CLUSTERS 

387 + "|" 

388 + SamplingAlgorithm.FARTHEST_IN_SAME_CLUSTER 

389 + "|" 

390 + SamplingAlgorithm.CUSTOM 

391 ), 

392 "random_seed": 42, 

393 "nb_to_select": 25, 

394 "!!!SPECIFIC: 'algorithm'=='custom'": { 

395 "init_kargs": { 

396 "clusters_restriction": ( 

397 ClusterRestriction.SAME_CLUSTER + "|" + ClusterRestriction.DIFFERENT_CLUSTERS 

398 ), 

399 "distance_restriction": ( 

400 DistanceRestriction.CLOSEST_NEIGHBORS + "|" + DistanceRestriction.FARTHEST_NEIGHBORS 

401 ), 

402 "without_inferred_constraints": True, 

403 }, 

404 }, 

405 } 

406 } 

407 

408 

409def default_SamplingSettingsModel() -> SamplingSettingsModel: 

410 """Create a SamplingSettingsModel instance with default values. 

411 

412 Returns: 

413 SamplingSettingsModel: A SamplingSettingsModel instance with default values. 

414 """ 

415 return SamplingSettingsModel( 

416 algorithm=SamplingAlgorithm.CLOSEST_IN_DIFFERENT_CLUSTERS, 

417 random_seed=42, 

418 nb_to_select=25, 

419 init_kargs=None, 

420 ) 

421 

422 

423# ============================================================================== 

424# BASE MODEL FOR CLUSTERING SETTINGS 

425# ============================================================================== 

426 

427 

428class ClusteringAlgorithmEnum(str, enum.Enum): # noqa: WPS600 (subclassing str) 

429 """The enumeration of available clustering algorithms.""" 

430 

431 KMEANS: str = "kmeans" 

432 HIERARCHICAL: str = "hierarchical" 

433 SPECTRAL: str = "spectral" 

434 

435 

436class KmeansModelEnum(str, enum.Enum): # noqa: WPS600 (subclassing str) 

437 """The enumeration of available kmeans models.""" 

438 

439 COP: str = "COP" 

440 

441 

442class KmeansInitSettingsModel(BaseModel): 

443 """The body submodel for kmeans instantiation settings.""" 

444 

445 # Parameters. 

446 model: KmeansModelEnum 

447 max_iteration: int 

448 tolerance: float 

449 

450 @validator("max_iteration") 

451 @classmethod 

452 def validate_max_iteration(cls, value: int) -> int: 

453 """The validation of max_iteration settings. 

454 

455 Args: 

456 value (int): The value of max_iteration setting. 

457 

458 Raises: 

459 ValueError: if `max_iteration` is incorrectly set. 

460 

461 Returns: 

462 int: The value of max_iteration setting. 

463 """ 

464 if value < 1: 

465 raise ValueError("`max_iteration` must be greater than or equal to 1.") 

466 return value 

467 

468 @validator("tolerance") 

469 @classmethod 

470 def validate_tolerance(cls, value: float) -> float: 

471 """The validation of tolerance settings. 

472 

473 Args: 

474 value (float): The value of tolerance setting. 

475 

476 Raises: 

477 ValueError: if `tolerance` is incorrectly set. 

478 

479 Returns: 

480 float: The value of tolerance setting. 

481 """ 

482 if value < 0: 

483 raise ValueError("The `tolerance` must be greater than 0.0.") 

484 return value 

485 

486 # Export method. 

487 def to_dict(self) -> Dict[str, Any]: 

488 """Export the model as a dictionary 

489 

490 Returns: 

491 Dict[str, Any]: A dictionary that contains paramaters and their values. 

492 """ 

493 return { 

494 "model": self.model.value, 

495 "max_iteration": self.max_iteration, 

496 "tolerance": self.tolerance, 

497 } 

498 

499 

500def default_KmeansInitSettingsModel() -> KmeansInitSettingsModel: 

501 """Create a KmeansInitSettingsModel instance with default values. 

502 

503 Returns: 

504 KmeansInitSettingsModel: A KmeansInitSettingsModel instance with default values. 

505 """ 

506 return KmeansInitSettingsModel( 

507 model=KmeansModelEnum.COP, 

508 max_iteration=150, 

509 tolerance=0.0001, 

510 ) 

511 

512 

513class HierarchicalLinkageEnum(str, enum.Enum): # noqa: WPS600 (subclassing str) 

514 """The enumeration of available hierarchical linkages.""" 

515 

516 AVERAGE: str = "average" 

517 COMPLETE: str = "complete" 

518 SINGLE: str = "single" 

519 WARD: str = "ward" 

520 

521 

522class HierarchicalInitSettingsModel(BaseModel): 

523 """The body submodel for hierarchical instantiation settings.""" 

524 

525 # Parameters. 

526 linkage: HierarchicalLinkageEnum 

527 

528 # Export method. 

529 def to_dict(self) -> Dict[str, Any]: 

530 """Export the model as a dictionary 

531 

532 Returns: 

533 Dict[str, Any]: A dictionary that contains paramaters and their values. 

534 """ 

535 return { 

536 "linkage": self.linkage.value, 

537 } 

538 

539 

540# NEVER USE: KMeans is used as default clustering algorithm. 

541#### def default_HierarchicalInitSettingsModel() -> HierarchicalInitSettingsModel: 

542#### """Create a HierarchicalInitSettingsModel instance with default values. 

543#### 

544#### Returns: 

545#### HierarchicalInitSettingsModel: A HierarchicalInitSettingsModel instance with default values. 

546#### """ 

547#### return HierarchicalInitSettingsModel( 

548#### linkage=HierarchicalLinkageEnum.WARD, 

549#### ) 

550 

551 

552class SpectralModelEnum(str, enum.Enum): # noqa: WPS600 (subclassing str) 

553 """The enumeration of available spectral models.""" 

554 

555 SPEC: str = "SPEC" 

556 

557 

558class SpectralInitSettingsModel(BaseModel): 

559 """The body submodel for spectral instantiation settings.""" 

560 

561 # Parameters. 

562 model: SpectralModelEnum = SpectralModelEnum.SPEC 

563 nb_components: Optional[int] = None 

564 

565 @validator("nb_components") 

566 @classmethod 

567 def validate_nb_components(cls, value: Optional[int]) -> Optional[int]: 

568 """The validation of nb_components settings. 

569 

570 Args: 

571 value (Optional[int]): The value of nb_components setting. 

572 

573 Raises: 

574 ValueError: if `nb_components` is incorrectly set. 

575 

576 Returns: 

577 Optional[int]: The value of nb_components setting. 

578 """ 

579 if (value is not None) and (value < 2): 

580 raise ValueError("`nb_components` must be `None` or greater than or equal to 2.") 

581 return value 

582 

583 # Export method. 

584 def to_dict(self) -> Dict[str, Any]: 

585 """Export the model as a dictionary 

586 

587 Returns: 

588 Dict[str, Any]: A dictionary that contains paramaters and their values. 

589 """ 

590 return { 

591 "model": self.model.value, 

592 "nb_components": self.nb_components, 

593 } 

594 

595 

596# NEVER USE: KMeans is used as default clustering algorithm. 

597#### def default_SpectralInitSettingsModel() -> SpectralInitSettingsModel: 

598#### """Create a SpectralInitSettingsModel instance with default values. 

599#### 

600#### Returns: 

601#### SpectralInitSettingsModel: A SpectralInitSettingsModel instance with default values. 

602#### """ 

603#### return SpectralInitSettingsModel( 

604#### model=SpectralModelEnum.SPEC, 

605#### nb_components=None, 

606#### ) 

607 

608 

609class ClusteringSettingsModel(BaseModel): 

610 """The body model for clustering settings.""" 

611 

612 # Parameters. 

613 algorithm: ClusteringAlgorithmEnum 

614 random_seed: int 

615 nb_clusters: int 

616 init_kargs: Union[None, KmeansInitSettingsModel, HierarchicalInitSettingsModel, SpectralInitSettingsModel] 

617 

618 @validator("random_seed") 

619 @classmethod 

620 def validate_random_seed(cls, value: int) -> int: 

621 """The validation of random_seed settings. 

622 

623 Args: 

624 value (int): The value of random_seed setting. 

625 

626 Raises: 

627 ValueError: if `random_seed` is incorrectly set. 

628 

629 Returns: 

630 int: The value of random_seed setting. 

631 """ 

632 if value < 0: 

633 raise ValueError("`random_seed` must be greater than or equal to 0.") 

634 return value 

635 

636 @validator("nb_clusters") 

637 @classmethod 

638 def validate_nb_clusters(cls, value: int) -> int: 

639 """The validation of nb_clusters settings. 

640 

641 Args: 

642 value (int): The value of nb_clusters setting. 

643 

644 Raises: 

645 ValueError: if `nb_clusters` is incorrectly set. 

646 

647 Returns: 

648 int: The value of nb_clusters setting. 

649 """ 

650 if value < 2: 

651 raise ValueError("`nb_clusters` must be greater than or equal to 2.") 

652 return value 

653 

654 @root_validator 

655 @classmethod 

656 def validate_clustering_settings(cls, values: Dict[str, Any]) -> Dict[str, Any]: 

657 """The validation of clustering settings. 

658 

659 Args: 

660 values (Dict[str, Any]): The values of clustering settings. 

661 

662 Raises: 

663 ValueError: if `algorithm` and `init_kargs` are incompatible. 

664 

665 Returns: 

666 Dict[str, Any]: The validated values of clustering settings. 

667 """ 

668 

669 # Case of no clustering algorithm. 

670 if "algorithm" not in values.keys(): 

671 raise ValueError("The parameter `algorithm` is required.") 

672 

673 # Case of kmeans clustering algorithm. 

674 if values["algorithm"] == ClusteringAlgorithmEnum.KMEANS: 

675 # Case of no init parameters. 

676 if ("init_kargs" not in values.keys()) or (values["init_kargs"] is None): 

677 raise ValueError( 

678 "A dictionary of initialization (`init_kargs`) is required when algorithm is `kmeans`." 

679 ) 

680 # Case of wrong type init parameters. 

681 if not isinstance(values["init_kargs"], KmeansInitSettingsModel): 

682 raise ValueError( 

683 "The dictionary of initialization (`init_kargs`) is incompatible with algorithm `kmeans`." 

684 ) 

685 

686 # Case of hierarchical clustering algorithm. 

687 if values["algorithm"] == ClusteringAlgorithmEnum.HIERARCHICAL: 

688 # Case of no init parameters. 

689 if ("init_kargs" not in values.keys()) or (values["init_kargs"] is None): 

690 raise ValueError( 

691 "A dictionary of initialization (`init_kargs`) is required when algorithm is `hierarchical`." 

692 ) 

693 # Case of wrong type init parameters. 

694 if not isinstance(values["init_kargs"], HierarchicalInitSettingsModel): 

695 raise ValueError( 

696 "The dictionary of initialization (`init_kargs`) is incompatible with algorithm `hierarchical`." 

697 ) 

698 

699 # Case of spectral clustering algorithm. 

700 if values["algorithm"] == ClusteringAlgorithmEnum.SPECTRAL: 

701 # Case of no init parameters. 

702 if ("init_kargs" not in values.keys()) or (values["init_kargs"] is None): 

703 raise ValueError( 

704 "A dictionary of initialization (`init_kargs`) is required when algorithm is `spectral`." 

705 ) 

706 # Case of wrong type init parameters. 

707 if not isinstance(values["init_kargs"], SpectralInitSettingsModel): 

708 raise ValueError( 

709 "The dictionary of initialization (`init_kargs`) is incompatible with algorithm `spectral`." 

710 ) 

711 

712 # Return validated values of sampling settings. 

713 return values 

714 

715 # Export method. 

716 def to_dict(self) -> Dict[str, Any]: 

717 """Export the model as a dictionary 

718 

719 Returns: 

720 Dict[str, Any]: A dictionary that contains paramaters and their values. 

721 """ 

722 return { 

723 "algorithm": self.algorithm.value, 

724 "random_seed": self.random_seed, 

725 "nb_clusters": self.nb_clusters, 

726 "init_kargs": self.init_kargs.to_dict() if (self.init_kargs is not None) else {}, 

727 } 

728 

729 # Config for schema. 

730 class Config: # noqa: WPS431 (nested class) 

731 """Configuration for body model of clustering settings.""" 

732 

733 schema_extra = { 

734 "example": { 

735 "algorithm": ( 

736 ClusteringAlgorithmEnum.KMEANS 

737 + "|" 

738 + ClusteringAlgorithmEnum.HIERARCHICAL 

739 + "|" 

740 + ClusteringAlgorithmEnum.SPECTRAL 

741 ), 

742 "random_seed": 42, 

743 "nb_clusters": 2, 

744 "init_kargs": { 

745 "!!!SPECIFIC: 'algorithm'=='kmeans'": { 

746 "model": KmeansModelEnum.COP, 

747 "max_iteration": 150, 

748 "tolerance": 0.0001, 

749 }, 

750 "!!!SPECIFIC: 'algorithm'=='hierarchical'": { 

751 "linkage": ( 

752 HierarchicalLinkageEnum.WARD 

753 + "|" 

754 + HierarchicalLinkageEnum.AVERAGE 

755 + "|" 

756 + HierarchicalLinkageEnum.COMPLETE 

757 + "|" 

758 + HierarchicalLinkageEnum.SINGLE 

759 ), 

760 }, 

761 "!!!SPECIFIC: 'algorithm'=='spectral'": { 

762 "model": SpectralModelEnum.SPEC, 

763 "nb_components": None, 

764 }, 

765 }, 

766 } 

767 } 

768 

769 

770def default_ClusteringSettingsModel() -> ClusteringSettingsModel: 

771 """Create a ClusteringSettingsModel instance with default values. 

772 

773 Returns: 

774 ClusteringSettingsModel: A ClusteringSettingsModel instance with default values. 

775 """ 

776 return ClusteringSettingsModel( 

777 algorithm=ClusteringAlgorithmEnum.KMEANS, 

778 random_seed=42, 

779 nb_clusters=2, 

780 init_kargs=default_KmeansInitSettingsModel(), 

781 )