automatic commit
[folded-ctf.git] / loss_machine.cc
1
2 ///////////////////////////////////////////////////////////////////////////
3 // This program is free software: you can redistribute it and/or modify  //
4 // it under the terms of the version 3 of the GNU General Public License //
5 // as published by the Free Software Foundation.                         //
6 //                                                                       //
7 // This program is distributed in the hope that it will be useful, but   //
8 // WITHOUT ANY WARRANTY; without even the implied warranty of            //
9 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU      //
10 // General Public License for more details.                              //
11 //                                                                       //
12 // You should have received a copy of the GNU General Public License     //
13 // along with this program. If not, see <http://www.gnu.org/licenses/>.  //
14 //                                                                       //
15 // Written by Francois Fleuret                                           //
16 // (C) Idiap Research Institute                                          //
17 //                                                                       //
18 // Contact <francois.fleuret@idiap.ch> for comments & bug reports        //
19 ///////////////////////////////////////////////////////////////////////////
20
21 #include "tools.h"
22 #include "loss_machine.h"
23
24 LossMachine::LossMachine(int loss_type) {
25   _loss_type = loss_type;
26 }
27
28 void LossMachine::get_loss_derivatives(SampleSet *samples,
29                                        scalar_t *responses,
30                                        scalar_t *derivatives) {
31
32   switch(_loss_type) {
33
34   case LOSS_EXPONENTIAL:
35     {
36       for(int n = 0; n < samples->nb_samples(); n++) {
37         derivatives[n] =
38           - samples->label(n) * exp( - samples->label(n) * responses[n]);
39       }
40     }
41     break;
42
43   case LOSS_HINGE:
44     {
45       for(int n = 0; n < samples->nb_samples(); n++) {
46         if(samples->label(n) != 0 && samples->label(n) * responses[n] < 1)
47           derivatives[n] = 1;
48         else
49           derivatives[n] = 0;
50       }
51     }
52     break;
53
54   case LOSS_LOGISTIC:
55     {
56       for(int n = 0; n < samples->nb_samples(); n++) {
57         if(samples->label(n) == 0)
58           derivatives[n] = 0.0;
59         else
60           derivatives[n] = samples->label(n) * 1/(1 + exp(samples->label(n) * responses[n]));
61       }
62     }
63     break;
64
65   default:
66     cerr << "Unknown loss type in BoostedClassifier::get_loss_derivatives."
67          << endl;
68     exit(1);
69   }
70
71 }
72
73 scalar_t LossMachine::loss(SampleSet *samples, scalar_t *responses) {
74   scalar_t l = 0;
75
76   switch(_loss_type) {
77
78   case LOSS_EXPONENTIAL:
79     {
80       for(int n = 0; n < samples->nb_samples(); n++) {
81         l += exp( - samples->label(n) * responses[n]);
82         ASSERT(!isinf(l));
83       }
84     }
85     break;
86
87   case LOSS_HINGE:
88     {
89       for(int n = 0; n < samples->nb_samples(); n++) {
90         if(samples->label(n) != 0) {
91           if(samples->label(n) * responses[n] < 1)
92             l += (1 - samples->label(n) * responses[n]);
93         }
94       }
95     }
96     break;
97
98   case LOSS_LOGISTIC:
99     {
100       for(int n = 0; n < samples->nb_samples(); n++) {
101         if(samples->label(n) != 0) {
102           scalar_t u = - samples->label(n) * responses[n];
103           if(u > 20) {
104             l += u;
105           } if(u > -20) {
106             l += log(1 + exp(u));
107           }
108         }
109       }
110     }
111     break;
112
113   default:
114     cerr << "Unknown loss type in LossMachine::loss." << endl;
115     exit(1);
116   }
117
118   return l;
119 }
120
121 scalar_t LossMachine::optimal_weight(SampleSet *sample_set,
122                                      scalar_t *weak_learner_responses,
123                                      scalar_t *current_responses) {
124
125   switch(_loss_type) {
126
127   case LOSS_EXPONENTIAL:
128     {
129       scalar_t num = 0, den = 0, z;
130
131       for(int n = 0; n < sample_set->nb_samples(); n++) {
132         z = sample_set->label(n) * weak_learner_responses[n];
133         if(z > 0) {
134           num += exp( - sample_set->label(n) * current_responses[n]);
135         } else if(z < 0) {
136           den += exp( - sample_set->label(n) * current_responses[n]);
137         }
138       }
139
140       return 0.5 * log(num / den);
141     }
142     break;
143
144   case LOSS_HINGE:
145   case LOSS_LOGISTIC:
146     {
147
148       scalar_t u = 0, du = -0.1;
149       scalar_t *responses = new scalar_t[sample_set->nb_samples()];
150
151       scalar_t l, prev_l = -1;
152
153       const scalar_t minimum_delta_for_optimization = 1e-5;
154
155       int n = 0;
156       while(n < 100 && abs(du) > minimum_delta_for_optimization) {
157         n++;
158         u += du;
159         for(int s = 0; s < sample_set->nb_samples(); s++) {
160           responses[s] = current_responses[s] + u * weak_learner_responses[s] ;
161         }
162         l = loss(sample_set, responses);
163         if(l > prev_l) du = du * -0.25;
164         prev_l = l;
165       }
166
167       (*global.log_stream) << "END l = " << l << " du = " << du << endl;
168
169       delete[] responses;
170
171       return u;
172     }
173
174   default:
175     cerr << "Unknown loss type in LossMachine::optimal_weight." << endl;
176     exit(1);
177   }
178
179 }
180
181 void LossMachine::subsample(int nb, scalar_t *labels, scalar_t *responses,
182                             int nb_to_sample, int *sample_nb_occurences, scalar_t *sample_responses,
183                             int allow_duplicates) {
184
185   switch(_loss_type) {
186
187   case LOSS_EXPONENTIAL:
188     {
189       scalar_t *weights = new scalar_t[nb];
190
191       for(int n = 0; n < nb; n++) {
192         if(labels[n] == 0) {
193           weights[n] = 0;
194         } else {
195           weights[n] = exp( - labels[n] * responses[n]);
196         }
197         sample_nb_occurences[n] = 0;
198         sample_responses[n] = 0.0;
199       }
200
201       scalar_t total_weight;
202       int nb_sampled = 0, sum_sample_nb_occurences = 0;
203
204       int *sampled_indexes = new int[nb_to_sample];
205
206       (*global.log_stream) << "Sampling " << nb_to_sample << " samples." << endl;
207
208       do {
209         total_weight = robust_sampling(nb,
210                                        weights,
211                                        nb_to_sample,
212                                        sampled_indexes);
213
214         for(int k = 0; nb_sampled < nb_to_sample && k < nb_to_sample; k++) {
215           int i = sampled_indexes[k];
216           if(allow_duplicates || sample_nb_occurences[i] == 0) nb_sampled++;
217           sample_nb_occurences[i]++;
218           sum_sample_nb_occurences++;
219         }
220       } while(nb_sampled < nb_to_sample);
221
222       (*global.log_stream) << "Done." << endl;
223
224       delete[] sampled_indexes;
225
226       scalar_t unit_weight = log(total_weight / scalar_t(sum_sample_nb_occurences));
227
228       for(int n = 0; n < nb; n++) {
229         if(sample_nb_occurences[n] > 0) {
230           if(allow_duplicates) {
231             sample_responses[n] = - labels[n] * unit_weight;
232           } else {
233             sample_responses[n] = - labels[n] * (unit_weight + log(scalar_t(sample_nb_occurences[n])));
234             sample_nb_occurences[n] = 1;
235           }
236         }
237       }
238
239       delete[] weights;
240
241     }
242     break;
243
244   default:
245     cerr << "Unknown loss type in LossMachine::resample." << endl;
246     exit(1);
247   }
248
249
250 }