// Copyright (C) 2008 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#ifndef DLIB_KERNEL_FEATURE_RANKINg_H_
#define DLIB_KERNEL_FEATURE_RANKINg_H_
#include <vector>
#include <limits>
#include "feature_ranking_abstract.h"
#include "kcentroid.h"
#include "../optimization.h"
#include "../statistics.h"
#include <iostream>
namespace dlib
{
// ----------------------------------------------------------------------------------------
template <
typename kernel_type,
typename sample_matrix_type,
typename label_matrix_type
>
matrix<typename kernel_type::scalar_type,0,2,typename kernel_type::mem_manager_type> rank_features_impl (
const kcentroid<kernel_type>& kc,
const sample_matrix_type& samples,
const label_matrix_type& labels
)
{
/*
This function ranks features by doing recursive feature elimination
*/
typedef typename kernel_type::scalar_type scalar_type;
typedef typename kernel_type::mem_manager_type mm;
// make sure requires clause is not broken
DLIB_ASSERT(is_binary_classification_problem(samples, labels) == true,
"\tmatrix rank_features()"
<< "\n\t you have given invalid arguments to this function"
);
matrix<scalar_type,0,2,mm> results(samples(0).nr(), 2);
matrix<scalar_type,sample_matrix_type::type::NR,1,mm> mask(samples(0).nr());
set_all_elements(mask,1);
// figure out what the separation is between the two centroids when all the features are
// present.
scalar_type first_separation;
{
kcentroid<kernel_type> c1(kc);
kcentroid<kernel_type> c2(kc);
// find the centers of each class
for (long s = 0; s < samples.size(); ++s)
{
if (labels(s) < 0)
{
c1.train(samples(s));
}
else
{
c2.train(samples(s));
}
}
first_separation = c1(c2);
}
using namespace std;
for (long i = results.nr()-1; i >= 0; --i)
{
long worst_feature_idx = 0;
scalar_type worst_feature_score = -std::numeric_limits<scalar_type>::infinity();
// figure out which feature to remove next
for (long j = 0; j < mask.size(); ++j)
{
// skip features we have already removed
if (mask(j) == 0)
continue;
kcentroid<kernel_type> c1(kc);
kcentroid<kernel_type> c2(kc);
// temporarily remove this feature from the working set of features
mask(j) = 0;
// find the centers of each class
for (long s = 0; s < samples.size(); ++s)
{
if (labels(s) < 0)
{
c1.train(pointwise_multiply(samples(s),mask));
}
else
{
c2.train(pointwise_multiply(samples(s),mask));
}
}
// find the distance between the two centroids and use that
// as the score
const double score = c1(c2);
if (score > worst_feature_score)
{
worst_feature_score = score;
worst_feature_idx = j;
}
// add this feature back to the working set of features
mask(j) = 1;
}
// now that we know what the next worst feature is record it
mask(worst_feature_idx) = 0;
results(i,0) = worst_feature_idx;
results(i,1) = worst_feature_score;
}
// now normalize the results
const scalar_type max_separation = std::max(max(colm(results,1)), first_separation);
set_colm(results,1) = colm(results,1)/max_separation;
for (long r = 0; r < results.nr()-1; ++r)
{
results(r,1) = results(r+1,1);
}
results(results.nr()-1,1) = first_separation/max_separation;
return results;
}
// ----------------------------------------------------------------------------------------
template <
typename kernel_type,
typename sample_matrix_type,
typename label_matrix_type
>
matrix<typename kernel_type::scalar_type,0,2,typename kernel_type::mem_manager_type> rank_features (
const kcentroid<kernel_type>& kc,
const sample_matrix_type& samples,
const label_matrix_type& labels
)
{
return rank_features_impl(kc, mat(samples), mat(labels));
}
// ----------------------------------------------------------------------------------------
template <
typename kernel_type,
typename sample_matrix_type,
typename label_matrix_type
>
matrix<typename kernel_type::scalar_type,0,2,typename kernel_type::mem_manager_type> rank_features_impl (
const kcentroid<kernel_type>& kc,
const sample_matrix_type& samples,
const label_matrix_type& labels,
const long num_features
)
{
/*
This function ranks features by doing recursive feature addition
*/
typedef typename kernel_type::scalar_type scalar_type;
typedef typename kernel_type::mem_manager_type mm;
// make sure requires clause is not broken
DLIB_ASSERT(is_binary_classification_problem(samples, labels) == true,
"\tmatrix rank_features()"
<< "\n\t you have given invalid arguments to this function"
);
DLIB_ASSERT(0 < num_features && num_features <= samples(0).nr(),
"\tmatrix rank_features()"
<< "\n\t you have given invalid arguments to this function"
<< "\n\t num_features: " << num_features
<< "\n\t samples(0).nr(): " << samples(0).nr()
);
matrix<scalar_type,0,2,mm> results(num_features, 2);
matrix<scalar_type,sample_matrix_type::type::NR,1,mm> mask(samples(0).nr());
set_all_elements(mask,0);
using namespace std;
for (long i = 0; i < results.nr(); ++i)
{
long best_feature_idx = 0;
scalar_type best_feature_score = -std::numeric_limits<scalar_type>::infinity();
// figure out which feature to add next
for (long j = 0; j < mask.size(); ++j)
{
// skip features we have already added
if (mask(j) == 1)
continue;
kcentroid<kernel_type> c1(kc);
kcentroid<kernel_type> c2(kc);
// temporarily add this feature to the working set of features
mask(j) = 1;
// find the centers of each class
for (long s = 0; s < samples.size(); ++s)
{
if (labels(s) < 0)
{
c1.train(pointwise_multiply(samples(s),mask));
}
else
{
c2.train(pointwise_multiply(samples(s),mask));
}
}
// find the distance between the two centroids and use that
// as the score
const double score = c1(c2);
if (score > best_feature_score)
{
best_feature_score = score;
best_feature_idx = j;
}
// take this feature back out of the working set of features
mask(j) = 0;
}
// now that we know what the next best feature is record it
mask(best_feature_idx) = 1;
results(i,0) = best_feature_idx;
results(i,1) = best_feature_score;
}
// now normalize the results
set_colm(results,1) = colm(results,1)/max(colm(results,1));
return results;
}
// ----------------------------------------------------------------------------------------
template <
typename kernel_type,
typename sample_matrix_type,
typename label_matrix_type
>
matrix<typename kernel_type::scalar_type,0,2,typename kernel_type::mem_manager_type> rank_features (
const kcentroid<kernel_type>& kc,
const sample_matrix_type& samples,
const label_matrix_type& labels,
const long num_features
)
{
if (mat(samples).nr() > 0 && num_features == mat(samples)(0).nr())
{
// if we are going to rank them all then might as well do the recursive feature elimination version
return rank_features_impl(kc, mat(samples), mat(labels));
}
else
{
return rank_features_impl(kc, mat(samples), mat(labels), num_features);
}
}
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
namespace rank_features_helpers
{
template <
typename K,
typename sample_matrix_type,
typename label_matrix_type
>
typename K::scalar_type centroid_gap (
const kcentroid<K>& kc,
const sample_matrix_type& samples,
const label_matrix_type& labels
)
{
kcentroid<K> kc1(kc);
kcentroid<K> kc2(kc);
// toss all the samples into our kcentroids
for (long i = 0; i < samples.size(); ++i)
{
if (labels(i) > 0)
kc1.train(samples(i));
else
kc2.train(samples(i));
}
// now return the separation between the mean of these two centroids
return kc1(kc2);
}
template <
typename sample_matrix_type,
typename label_matrix_type
>
class test
{
typedef typename sample_matrix_type::type sample_type;
typedef typename sample_type::type scalar_type;
typedef typename sample_type::mem_manager_type mem_manager_type;
public:
test (
const sample_matrix_type& samples_,
const label_matrix_type& labels_,
unsigned long num_sv_,
bool verbose_
) : samples(samples_), labels(labels_), num_sv(num_sv_), verbose(verbose_)
{
}
double operator() (
double gamma
) const
{
using namespace std;
// we are doing the optimization in log space so don't forget to convert back to normal space
gamma = std::exp(gamma);
typedef radial_basis_kernel<sample_type> kernel_type;
// Make a kcentroid and find out what the gap is at the current gamma. Try to pick a reasonable
// tolerance.
const double tolerance = std::min(gamma*0.01, 0.01);
const kernel_type kern(gamma);
kcentroid<kernel_type> kc(kern, tolerance, num_sv);
scalar_type temp = centroid_gap(kc, samples, labels);
if (verbose)
{
cout << "\rChecking goodness of gamma = " << gamma << ". Goodness = "
<< temp << " " << flush;
}
return temp;
}
const sample_matrix_type& samples;
const label_matrix_type& labels;
unsigned long num_sv;
bool verbose;
};
template <
typename sample_matrix_type,
typename label_matrix_type
>
double find_gamma_with_big_centroid_gap_impl (
const sample_matrix_type& samples,
const label_matrix_type& labels,
double initial_gamma,
unsigned long num_sv,
bool verbose
)
{
using namespace std;
if (verbose)
{
cout << endl;
}
test<sample_matrix_type, label_matrix_type> funct(samples, labels, num_sv, verbose);
double best_gamma = std::log(initial_gamma);
double goodness = find_max_single_variable(funct, best_gamma, -15, 15, 1e-3, 100);
if (verbose)
{
cout << "\rBest gamma = " << std::exp(best_gamma) << ". Goodness = "
<< goodness << " " << endl;
}
return std::exp(best_gamma);
}
}
// ----------------------------------------------------------------------------------------
template <
typename sample_matrix_type,
typename label_matrix_type
>
double find_gamma_with_big_centroid_gap (
const sample_matrix_type& samples,
const label_matrix_type& labels,
double initial_gamma = 0.1,
unsigned long num_sv = 40
)
{
DLIB_ASSERT(initial_gamma > 0 && num_sv > 0 && is_binary_classification_problem(samples, labels),
"\t double find_gamma_with_big_centroid_gap()"
<< "\n\t initial_gamma: " << initial_gamma
<< "\n\t num_sv: " << num_sv
<< "\n\t is_binary_classification_problem(): " << is_binary_classification_problem(samples, labels)
);
return rank_features_helpers::find_gamma_with_big_centroid_gap_impl(mat(samples),
mat(labels),
initial_gamma,
num_sv,
false);
}
// ----------------------------------------------------------------------------------------
template <
typename sample_matrix_type,
typename label_matrix_type
>
double verbose_find_gamma_with_big_centroid_gap (
const sample_matrix_type& samples,
const label_matrix_type& labels,
double initial_gamma = 0.1,
unsigned long num_sv = 40
)
{
DLIB_ASSERT(initial_gamma > 0 && num_sv > 0 && is_binary_classification_problem(samples, labels),
"\t double verbose_find_gamma_with_big_centroid_gap()"
<< "\n\t initial_gamma: " << initial_gamma
<< "\n\t num_sv: " << num_sv
<< "\n\t is_binary_classification_problem(): " << is_binary_classification_problem(samples, labels)
);
return rank_features_helpers::find_gamma_with_big_centroid_gap_impl(mat(samples),
mat(labels),
initial_gamma,
num_sv,
true);
}
// ----------------------------------------------------------------------------------------
template <
typename vector_type
>
double compute_mean_squared_distance (
const vector_type& samples
)
{
running_stats<double> rs;
for (unsigned long i = 0; i < samples.size(); ++i)
{
for (unsigned long j = i+1; j < samples.size(); ++j)
{
rs.add(length_squared(samples[i] - samples[j]));
}
}
return rs.mean();
}
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_KERNEL_FEATURE_RANKINg_H_