/usr/lib/python3/dist-packages/caffe/detector.py is in python3-caffe-cpu 1.0.0~rc4-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 | #!/usr/bin/env python
"""
Do windowed detection by classifying a number of images/crops at once,
optionally using the selective search window proposal method.
This implementation follows ideas in
Ross Girshick, Jeff Donahue, Trevor Darrell, Jitendra Malik.
Rich feature hierarchies for accurate object detection and semantic
segmentation.
http://arxiv.org/abs/1311.2524
The selective_search_ijcv_with_python code required for the selective search
proposal mode is available at
https://github.com/sergeyk/selective_search_ijcv_with_python
"""
import numpy as np
import os
import caffe
class Detector(caffe.Net):
"""
Detector extends Net for windowed detection by a list of crops or
selective search proposals.
Parameters
----------
mean, input_scale, raw_scale, channel_swap : params for preprocessing
options.
context_pad : amount of surrounding context to take s.t. a `context_pad`
sized border of pixels in the network input image is context, as in
R-CNN feature extraction.
"""
def __init__(self, model_file, pretrained_file, mean=None,
input_scale=None, raw_scale=None, channel_swap=None,
context_pad=None):
caffe.Net.__init__(self, model_file, pretrained_file, caffe.TEST)
# configure pre-processing
in_ = self.inputs[0]
self.transformer = caffe.io.Transformer(
{in_: self.blobs[in_].data.shape})
self.transformer.set_transpose(in_, (2, 0, 1))
if mean is not None:
self.transformer.set_mean(in_, mean)
if input_scale is not None:
self.transformer.set_input_scale(in_, input_scale)
if raw_scale is not None:
self.transformer.set_raw_scale(in_, raw_scale)
if channel_swap is not None:
self.transformer.set_channel_swap(in_, channel_swap)
self.configure_crop(context_pad)
def detect_windows(self, images_windows):
"""
Do windowed detection over given images and windows. Windows are
extracted then warped to the input dimensions of the net.
Parameters
----------
images_windows: (image filename, window list) iterable.
context_crop: size of context border to crop in pixels.
Returns
-------
detections: list of {filename: image filename, window: crop coordinates,
predictions: prediction vector} dicts.
"""
# Extract windows.
window_inputs = []
for image_fname, windows in images_windows:
image = caffe.io.load_image(image_fname).astype(np.float32)
for window in windows:
window_inputs.append(self.crop(image, window))
# Run through the net (warping windows to input dimensions).
in_ = self.inputs[0]
caffe_in = np.zeros((len(window_inputs), window_inputs[0].shape[2])
+ self.blobs[in_].data.shape[2:],
dtype=np.float32)
for ix, window_in in enumerate(window_inputs):
caffe_in[ix] = self.transformer.preprocess(in_, window_in)
out = self.forward_all(**{in_: caffe_in})
predictions = out[self.outputs[0]]
# Package predictions with images and windows.
detections = []
ix = 0
for image_fname, windows in images_windows:
for window in windows:
detections.append({
'window': window,
'prediction': predictions[ix],
'filename': image_fname
})
ix += 1
return detections
def detect_selective_search(self, image_fnames):
"""
Do windowed detection over Selective Search proposals by extracting
the crop and warping to the input dimensions of the net.
Parameters
----------
image_fnames: list
Returns
-------
detections: list of {filename: image filename, window: crop coordinates,
predictions: prediction vector} dicts.
"""
import selective_search_ijcv_with_python as selective_search
# Make absolute paths so MATLAB can find the files.
image_fnames = [os.path.abspath(f) for f in image_fnames]
windows_list = selective_search.get_windows(
image_fnames,
cmd='selective_search_rcnn'
)
# Run windowed detection on the selective search list.
return self.detect_windows(zip(image_fnames, windows_list))
def crop(self, im, window):
"""
Crop a window from the image for detection. Include surrounding context
according to the `context_pad` configuration.
Parameters
----------
im: H x W x K image ndarray to crop.
window: bounding box coordinates as ymin, xmin, ymax, xmax.
Returns
-------
crop: cropped window.
"""
# Crop window from the image.
crop = im[window[0]:window[2], window[1]:window[3]]
if self.context_pad:
box = window.copy()
crop_size = self.blobs[self.inputs[0]].width # assumes square
scale = crop_size / (1. * crop_size - self.context_pad * 2)
# Crop a box + surrounding context.
half_h = (box[2] - box[0] + 1) / 2.
half_w = (box[3] - box[1] + 1) / 2.
center = (box[0] + half_h, box[1] + half_w)
scaled_dims = scale * np.array((-half_h, -half_w, half_h, half_w))
box = np.round(np.tile(center, 2) + scaled_dims)
full_h = box[2] - box[0] + 1
full_w = box[3] - box[1] + 1
scale_h = crop_size / full_h
scale_w = crop_size / full_w
pad_y = round(max(0, -box[0]) * scale_h) # amount out-of-bounds
pad_x = round(max(0, -box[1]) * scale_w)
# Clip box to image dimensions.
im_h, im_w = im.shape[:2]
box = np.clip(box, 0., [im_h, im_w, im_h, im_w])
clip_h = box[2] - box[0] + 1
clip_w = box[3] - box[1] + 1
assert(clip_h > 0 and clip_w > 0)
crop_h = round(clip_h * scale_h)
crop_w = round(clip_w * scale_w)
if pad_y + crop_h > crop_size:
crop_h = crop_size - pad_y
if pad_x + crop_w > crop_size:
crop_w = crop_size - pad_x
# collect with context padding and place in input
# with mean padding
context_crop = im[box[0]:box[2], box[1]:box[3]]
context_crop = caffe.io.resize_image(context_crop, (crop_h, crop_w))
crop = np.ones(self.crop_dims, dtype=np.float32) * self.crop_mean
crop[pad_y:(pad_y + crop_h), pad_x:(pad_x + crop_w)] = context_crop
return crop
def configure_crop(self, context_pad):
"""
Configure crop dimensions and amount of context for cropping.
If context is included, make the special input mean for context padding.
Parameters
----------
context_pad : amount of context for cropping.
"""
# crop dimensions
in_ = self.inputs[0]
tpose = self.transformer.transpose[in_]
inv_tpose = [tpose[t] for t in tpose]
self.crop_dims = np.array(self.blobs[in_].data.shape[1:])[inv_tpose]
#.transpose(inv_tpose)
# context padding
self.context_pad = context_pad
if self.context_pad:
in_ = self.inputs[0]
transpose = self.transformer.transpose.get(in_)
channel_order = self.transformer.channel_swap.get(in_)
raw_scale = self.transformer.raw_scale.get(in_)
# Padding context crops needs the mean in unprocessed input space.
mean = self.transformer.mean.get(in_)
if mean is not None:
inv_transpose = [transpose[t] for t in transpose]
crop_mean = mean.copy().transpose(inv_transpose)
if channel_order is not None:
channel_order_inverse = [channel_order.index(i)
for i in range(crop_mean.shape[2])]
crop_mean = crop_mean[:, :, channel_order_inverse]
if raw_scale is not None:
crop_mean /= raw_scale
self.crop_mean = crop_mean
else:
self.crop_mean = np.zeros(self.crop_dims, dtype=np.float32)
|