X-Git-Url: https://www.fleuret.org/cgi-bin/gitweb/gitweb.cgi?p=clueless-kmeans.git;a=blobdiff_plain;f=clusterer.h;h=6fa538273c22c039831ed295ce5079cb9b72c323;hp=ad0c58f910bb22bdb449e8b63d4d5bf4549834c5;hb=HEAD;hpb=04d2b44ba34a811e1fab0b90d38ebd06cd918c52 diff --git a/clusterer.h b/clusterer.h index ad0c58f..6fa5382 100644 --- a/clusterer.h +++ b/clusterer.h @@ -1,17 +1,17 @@ /* - * clueless-kmean is a variant of k-mean which enforces balanced + * clueless-kmeans is a variant of k-means which enforces balanced * distribution of classes in every cluster * * Copyright (c) 2013 Idiap Research Institute, http://www.idiap.ch/ * Written by Francois Fleuret * - * This file is part of clueless-kmean. + * This file is part of clueless-kmeans. * - * clueless-kmean is free software: you can redistribute it and/or + * clueless-kmeans is free software: you can redistribute it and/or * modify it under the terms of the GNU General Public License * version 3 as published by the Free Software Foundation. * - * clueless-kmean is distributed in the hope that it will be useful, + * clueless-kmeans is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. @@ -31,9 +31,16 @@ class Clusterer { public: enum { + // Standard k-mean STANDARD_ASSOCIATION, + // Same, implemented as a LP problem for sanity check STANDARD_LP_ASSOCIATION, - UNINFORMATIVE_LP_ASSOCIATION + // Criterion forcing to have the same distribution of classes in + // all clusters + UNINFORMATIVE_LP_ASSOCIATION, + // Criterion forcing to have the same number of samples of each + // class in all clusters + UNINFORMATIVE_LP_ASSOCIATION_ABSOLUTE }; const static int max_nb_iterations = 10; @@ -42,19 +49,20 @@ public: int _nb_clusters; int _dim; + scalar_t **_cluster_means, **_cluster_var; scalar_t distance_to_centroid(scalar_t *x, int k); void initialize_clusters(int nb_points, scalar_t **points); - // Standard hard k-mean association + // Standard hard k-means association scalar_t baseline_cluster_association(int nb_points, scalar_t **points, int nb_classes, int *labels, scalar_t **gamma); - // Standard k-mean association implemented as an LP optimization + // Standard k-means association implemented as an LP optimization scalar_t baseline_lp_cluster_association(int nb_points, scalar_t **points, int nb_classes, int *labels, @@ -65,7 +73,8 @@ public: scalar_t uninformative_lp_cluster_association(int nb_points, scalar_t **points, int nb_classes, int *labels, - scalar_t **gamma); + scalar_t **gamma, + int absolute_proportion); void update_clusters(int nb_points, scalar_t **points, scalar_t **gamma);