// Copyright (C) 2011 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#ifndef DLIB_SCAN_iMAGE_Hh_
#define DLIB_SCAN_iMAGE_Hh_
#include <vector>
#include <utility>
#include "scan_image_abstract.h"
#include "../matrix.h"
#include "../algs.h"
#include "../rand.h"
#include "../array2d.h"
#include "../image_transforms/spatial_filtering.h"
#include "../image_transforms/thresholding.h"
namespace dlib
{
// ----------------------------------------------------------------------------------------
namespace impl
{
inline rectangle bounding_box_of_rects (
const std::vector<std::pair<unsigned int, rectangle> >& rects,
const point& position
)
/*!
ensures
- returns the smallest rectangle that contains all the
rectangles in rects. That is, returns the rectangle that
contains translate_rect(rects[i].second,position) for all valid i.
!*/
{
rectangle rect;
for (unsigned long i = 0; i < rects.size(); ++i)
{
rect += translate_rect(rects[i].second,position);
}
return rect;
}
}
// ----------------------------------------------------------------------------------------
template <
typename image_array_type
>
bool all_images_same_size (
const image_array_type& images
)
{
if (images.size() == 0)
return true;
for (unsigned long i = 0; i < images.size(); ++i)
{
if (num_rows(images[0]) != num_rows(images[i]) ||
num_columns(images[0]) != num_columns(images[i]))
return false;
}
return true;
}
// ----------------------------------------------------------------------------------------
template <
typename image_array_type
>
double sum_of_rects_in_images (
const image_array_type& images,
const std::vector<std::pair<unsigned int, rectangle> >& rects,
const point& position
)
{
DLIB_ASSERT(all_images_same_size(images),
"\t double sum_of_rects_in_images()"
<< "\n\t Invalid arguments given to this function."
<< "\n\t all_images_same_size(images): " << all_images_same_size(images)
);
#ifdef ENABLE_ASSERTS
for (unsigned long i = 0; i < rects.size(); ++i)
{
DLIB_ASSERT(rects[i].first < images.size(),
"\t double sum_of_rects_in_images()"
<< "\n\t rects["<<i<<"].first must refer to a valid image."
<< "\n\t rects["<<i<<"].first: " << rects[i].first
<< "\n\t images.size(): " << images.size()
);
}
#endif
typedef typename image_traits<typename image_array_type::type>::pixel_type pixel_type;
typedef typename promote<pixel_type>::type ptype;
ptype temp = 0;
for (unsigned long i = 0; i < rects.size(); ++i)
{
const typename image_array_type::type& img = images[rects[i].first];
const rectangle rect = get_rect(img).intersect(translate_rect(rects[i].second,position));
temp += sum(matrix_cast<ptype>(subm(mat(img), rect)));
}
return static_cast<double>(temp);
}
// ----------------------------------------------------------------------------------------
template <
typename image_array_type
>
double sum_of_rects_in_images_movable_parts (
const image_array_type& images,
const rectangle& window,
const std::vector<std::pair<unsigned int, rectangle> >& fixed_rects,
const std::vector<std::pair<unsigned int, rectangle> >& movable_rects,
const point& position
)
{
DLIB_ASSERT(all_images_same_size(images) && center(window) == point(0,0),
"\t double sum_of_rects_in_images_movable_parts()"
<< "\n\t Invalid arguments given to this function."
<< "\n\t all_images_same_size(images): " << all_images_same_size(images)
<< "\n\t center(window): " << center(window)
);
#ifdef ENABLE_ASSERTS
for (unsigned long i = 0; i < fixed_rects.size(); ++i)
{
DLIB_ASSERT(fixed_rects[i].first < images.size(),
"\t double sum_of_rects_in_images_movable_parts()"
<< "\n\t fixed_rects["<<i<<"].first must refer to a valid image."
<< "\n\t fixed_rects["<<i<<"].first: " << fixed_rects[i].first
<< "\n\t images.size(): " << images.size()
);
}
for (unsigned long i = 0; i < movable_rects.size(); ++i)
{
DLIB_ASSERT(movable_rects[i].first < images.size(),
"\t double sum_of_rects_in_images_movable_parts()"
<< "\n\t movable_rects["<<i<<"].first must refer to a valid image."
<< "\n\t movable_rects["<<i<<"].first: " << movable_rects[i].first
<< "\n\t images.size(): " << images.size()
);
DLIB_ASSERT(center(movable_rects[i].second) == point(0,0),
"\t double sum_of_rects_in_images_movable_parts()"
<< "\n\t movable_rects["<<i<<"].second: " << movable_rects[i].second
);
}
#endif
typedef typename image_traits<typename image_array_type::type>::pixel_type pixel_type;
typedef typename promote<pixel_type>::type ptype;
ptype temp = 0;
// compute TOTAL_FIXED part
for (unsigned long i = 0; i < fixed_rects.size(); ++i)
{
const typename image_array_type::type& img = images[fixed_rects[i].first];
const rectangle rect = get_rect(img).intersect(translate_rect(fixed_rects[i].second,position));
temp += sum(matrix_cast<ptype>(subm(mat(img), rect)));
}
if (images.size() > 0)
{
// compute TOTAL_MOVABLE part
array2d<ptype> tempimg(images[0].nr(), images[0].nc());
for (unsigned long i = 0; i < movable_rects.size(); ++i)
{
const typename image_array_type::type& img = images[movable_rects[i].first];
sum_filter_assign(img, tempimg, movable_rects[i].second);
const rectangle rect = get_rect(tempimg).intersect(translate_rect(window,position));
if (rect.is_empty() == false)
temp += std::max(0,max(matrix_cast<ptype>(subm(mat(tempimg), rect))));
}
}
return static_cast<double>(temp);
}
// ----------------------------------------------------------------------------------------
template <
typename image_type
>
void find_points_above_thresh (
std::vector<std::pair<double, point> >& dets,
const image_type& img_,
const double thresh,
const unsigned long max_dets
)
{
const_image_view<image_type> img(img_);
typedef typename image_traits<image_type>::pixel_type ptype;
dets.clear();
if (max_dets == 0)
return;
unsigned long count = 0;
dlib::rand rnd;
for (long r = 0; r < img.nr(); ++r)
{
for (long c = 0; c < img.nc(); ++c)
{
const ptype val = img[r][c];
if (val >= thresh)
{
++count;
if (dets.size() < max_dets)
{
dets.push_back(std::make_pair(val, point(c,r)));
}
else
{
// The idea here is to cause us to randomly sample possible detection
// locations throughout the image rather than just stopping the detection
// procedure once we hit the max_dets limit. So this method will result
// in a random subsample of all the detections >= thresh being in dets
// at the end of scan_image().
const unsigned long random_index = rnd.get_random_32bit_number()%count;
if (random_index < dets.size())
{
dets[random_index] = std::make_pair(val, point(c,r));
}
}
}
}
}
}
// ----------------------------------------------------------------------------------------
template <
typename image_type
>
std::vector<point> find_peaks (
const image_type& img_,
const double non_max_suppression_radius,
const typename pixel_traits<typename image_traits<image_type>::pixel_type>::basic_pixel_type& thresh
)
{
DLIB_CASSERT(non_max_suppression_radius >= 0);
const_image_view<image_type> img(img_);
using basic_pixel_type = typename pixel_traits<typename image_traits<image_type>::pixel_type>::basic_pixel_type;
std::vector<std::pair<basic_pixel_type,point>> peaks;
for (long r = 1; r+1 < img.nr(); ++r)
{
for (long c = 1; c+1 < img.nc(); ++c)
{
auto val = img[r][c];
if (val < thresh)
continue;
if (
val <= img[r-1][c] ||
val <= img[r+1][c] ||
val <= img[r][c+1] ||
val <= img[r][c-1] ||
val <= img[r-1][c-1] ||
val <= img[r+1][c+1] ||
val <= img[r-1][c+1] ||
val <= img[r+1][c-1]
)
{
continue;
}
peaks.emplace_back(val,point(c,r));
}
}
// now do non-max suppression of the peaks according to the supplied radius.
using pt = std::pair<basic_pixel_type,point>;
// First sort the peaks so the strongest peaks come first. We will greedily accept
// them and then do the normal peak sorting/non-max suppression thing.
std::sort(peaks.rbegin(), peaks.rend(), [](const pt& a, const pt&b ){ return a.first < b.first; });
std::vector<point> final_peaks;
const double radius_sqr = non_max_suppression_radius*non_max_suppression_radius;
// If there are a lot of peaks then we will make a mask image and use that to do
// the non-max suppression since this is fast when peaks.size() is large. Otherwise we
// will do the simpler thing in the else block that doesn't require us to allocate a
// temporary mask image.
if (peaks.size() > 500 && radius_sqr != 0)
{
// hit will record which areas of the image have already been accounted for by some
// peak. So it is our mask image.
matrix<unsigned char> hit(img.nr(), img.nc());
// initially nothing has been hit.
hit = 0;
const unsigned long win_size = std::round(2*non_max_suppression_radius);
const rectangle area = get_rect(img);
for (auto& pp : peaks)
{
auto& p = pp.second;
if (!hit(p.y(),p.x()))
{
final_peaks.emplace_back(p);
// mask out a circle around this new peak
rectangle win = centered_rect(p, win_size, win_size).intersect(area);
for (long r = win.top(); r <= win.bottom(); ++r)
{
for (long c = win.left(); c <= win.right(); ++c)
{
if (length_squared(point(c,r)-p) <= radius_sqr)
hit(r,c) = 1;
}
}
}
}
}
else
{
// if peaks.size() is relatively small then this is a faster way to do the non-max
// suppression.
for (auto& p : peaks)
{
bool hits_any_existing_peak = false;
// If the user set the radius to 0 then just copy the peaks to the output without
// checking anything.
if (radius_sqr != 0)
{
for (auto& v : final_peaks)
{
if (length_squared(p.second-v) <= radius_sqr)
{
hits_any_existing_peak = true;
break;
}
}
}
if (!hits_any_existing_peak)
{
final_peaks.emplace_back(p.second);
}
}
}
return final_peaks;
}
template <
typename image_type
>
std::vector<point> find_peaks (
const image_type& img
)
{
return find_peaks(img, 0, partition_pixels(img));
}
template <
typename image_type
>
std::vector<point> find_peaks (
const image_type& img,
const double non_max_suppression_radius
)
{
return find_peaks(img, non_max_suppression_radius, partition_pixels(img));
}
// ----------------------------------------------------------------------------------------
template <
typename image_array_type
>
void scan_image (
std::vector<std::pair<double, point> >& dets,
const image_array_type& images,
const std::vector<std::pair<unsigned int, rectangle> >& rects,
const double thresh,
const unsigned long max_dets
)
{
DLIB_ASSERT(images.size() > 0 && rects.size() > 0 && all_images_same_size(images),
"\t void scan_image()"
<< "\n\t Invalid arguments given to this function."
<< "\n\t images.size(): " << images.size()
<< "\n\t rects.size(): " << rects.size()
<< "\n\t all_images_same_size(images): " << all_images_same_size(images)
);
#ifdef ENABLE_ASSERTS
for (unsigned long i = 0; i < rects.size(); ++i)
{
DLIB_ASSERT(rects[i].first < images.size(),
"\t void scan_image()"
<< "\n\t rects["<<i<<"].first must refer to a valid image."
<< "\n\t rects["<<i<<"].first: " << rects[i].first
<< "\n\t images.size(): " << images.size()
);
}
#endif
typedef typename image_traits<typename image_array_type::type>::pixel_type pixel_type;
typedef typename promote<pixel_type>::type ptype;
array2d<ptype> accum(images[0].nr(), images[0].nc());
assign_all_pixels(accum, 0);
for (unsigned long i = 0; i < rects.size(); ++i)
sum_filter(images[rects[i].first], accum, rects[i].second);
find_points_above_thresh(dets, accum, thresh, max_dets);
}
// ----------------------------------------------------------------------------------------
template <
typename image_array_type
>
void scan_image_movable_parts (
std::vector<std::pair<double, point> >& dets,
const image_array_type& images,
const rectangle& window,
const std::vector<std::pair<unsigned int, rectangle> >& fixed_rects,
const std::vector<std::pair<unsigned int, rectangle> >& movable_rects,
const double thresh,
const unsigned long max_dets
)
{
DLIB_ASSERT(images.size() > 0 && all_images_same_size(images) &&
center(window) == point(0,0) && window.area() > 0,
"\t void scan_image_movable_parts()"
<< "\n\t Invalid arguments given to this function."
<< "\n\t all_images_same_size(images): " << all_images_same_size(images)
<< "\n\t center(window): " << center(window)
<< "\n\t window.area(): " << window.area()
<< "\n\t images.size(): " << images.size()
);
#ifdef ENABLE_ASSERTS
for (unsigned long i = 0; i < fixed_rects.size(); ++i)
{
DLIB_ASSERT(fixed_rects[i].first < images.size(),
"\t void scan_image_movable_parts()"
<< "\n\t Invalid arguments given to this function."
<< "\n\t fixed_rects["<<i<<"].first must refer to a valid image."
<< "\n\t fixed_rects["<<i<<"].first: " << fixed_rects[i].first
<< "\n\t images.size(): " << images.size()
);
}
for (unsigned long i = 0; i < movable_rects.size(); ++i)
{
DLIB_ASSERT(movable_rects[i].first < images.size(),
"\t void scan_image_movable_parts()"
<< "\n\t Invalid arguments given to this function."
<< "\n\t movable_rects["<<i<<"].first must refer to a valid image."
<< "\n\t movable_rects["<<i<<"].first: " << movable_rects[i].first
<< "\n\t images.size(): " << images.size()
);
DLIB_ASSERT(center(movable_rects[i].second) == point(0,0) &&
movable_rects[i].second.area() > 0,
"\t void scan_image_movable_parts()"
<< "\n\t Invalid arguments given to this function."
<< "\n\t movable_rects["<<i<<"].second: " << movable_rects[i].second
<< "\n\t movable_rects["<<i<<"].second.area(): " << movable_rects[i].second.area()
);
}
#endif
if (movable_rects.size() == 0 && fixed_rects.size() == 0)
return;
typedef typename image_traits<typename image_array_type::type>::pixel_type pixel_type;
typedef typename promote<pixel_type>::type ptype;
array2d<ptype> accum(images[0].nr(), images[0].nc());
assign_all_pixels(accum, 0);
for (unsigned long i = 0; i < fixed_rects.size(); ++i)
sum_filter(images[fixed_rects[i].first], accum, fixed_rects[i].second);
array2d<ptype> temp(accum.nr(), accum.nc());
for (unsigned long i = 0; i < movable_rects.size(); ++i)
{
const rectangle rect = movable_rects[i].second;
sum_filter_assign(images[movable_rects[i].first], temp, rect);
max_filter(temp, accum, window.width(), window.height(), 0);
}
find_points_above_thresh(dets, accum, thresh, max_dets);
}
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_SCAN_iMAGE_Hh_