X-Git-Url: https://www.fleuret.org/cgi-bin/gitweb/gitweb.cgi?p=clueless-kmeans.git;a=blobdiff_plain;f=clusterer.h;h=6fa538273c22c039831ed295ce5079cb9b72c323;hp=f73d6c78f557f1d8c565f4f7d37deddd2b57b0b7;hb=HEAD;hpb=ca6d045155d4c948063f49b6de8c35c0e3246e7a diff --git a/clusterer.h b/clusterer.h index f73d6c7..6fa5382 100644 --- a/clusterer.h +++ b/clusterer.h @@ -1,17 +1,17 @@ /* - * clueless-kmean is a variant of k-mean which enforces balanced + * clueless-kmeans is a variant of k-means which enforces balanced * distribution of classes in every cluster * * Copyright (c) 2013 Idiap Research Institute, http://www.idiap.ch/ * Written by Francois Fleuret * - * This file is part of clueless-kmean. + * This file is part of clueless-kmeans. * - * clueless-kmean is free software: you can redistribute it and/or + * clueless-kmeans is free software: you can redistribute it and/or * modify it under the terms of the GNU General Public License * version 3 as published by the Free Software Foundation. * - * clueless-kmean is distributed in the hope that it will be useful, + * clueless-kmeans is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. @@ -31,29 +31,38 @@ class Clusterer { public: enum { + // Standard k-mean STANDARD_ASSOCIATION, + // Same, implemented as a LP problem for sanity check STANDARD_LP_ASSOCIATION, - UNINFORMATIVE_LP_ASSOCIATION + // Criterion forcing to have the same distribution of classes in + // all clusters + UNINFORMATIVE_LP_ASSOCIATION, + // Criterion forcing to have the same number of samples of each + // class in all clusters + UNINFORMATIVE_LP_ASSOCIATION_ABSOLUTE }; const static int max_nb_iterations = 10; const static scalar_t min_iteration_improvement = 0.999; + const static scalar_t min_cluster_variance = 0.01f; int _nb_clusters; int _dim; + scalar_t **_cluster_means, **_cluster_var; scalar_t distance_to_centroid(scalar_t *x, int k); void initialize_clusters(int nb_points, scalar_t **points); - // Standard hard k-mean association + // Standard hard k-means association scalar_t baseline_cluster_association(int nb_points, scalar_t **points, int nb_classes, int *labels, scalar_t **gamma); - // Standard k-mean association implemented as an LP optimization + // Standard k-means association implemented as an LP optimization scalar_t baseline_lp_cluster_association(int nb_points, scalar_t **points, int nb_classes, int *labels, @@ -64,7 +73,8 @@ public: scalar_t uninformative_lp_cluster_association(int nb_points, scalar_t **points, int nb_classes, int *labels, - scalar_t **gamma); + scalar_t **gamma, + int absolute_proportion); void update_clusters(int nb_points, scalar_t **points, scalar_t **gamma);