preprocess.py 9.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290
  1. # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import cv2
  15. import numpy as np
  16. from PIL import Image
  17. def decode_image(im_file, im_info):
  18. """read rgb image
  19. Args:
  20. im_file (str|np.ndarray): input can be image path or np.ndarray
  21. im_info (dict): info of image
  22. Returns:
  23. im (np.ndarray): processed image (np.ndarray)
  24. im_info (dict): info of processed image
  25. """
  26. if isinstance(im_file, str):
  27. with open(im_file, 'rb') as f:
  28. im_read = f.read()
  29. data = np.frombuffer(im_read, dtype='uint8')
  30. im = cv2.imdecode(data, 1) # BGR mode, but need RGB mode
  31. im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
  32. else:
  33. im = im_file
  34. im_info['im_shape'] = np.array(im.shape[:2], dtype=np.float32)
  35. im_info['scale_factor'] = np.array([1., 1.], dtype=np.float32)
  36. return im, im_info
  37. class Resize(object):
  38. """resize image by target_size and max_size
  39. Args:
  40. target_size (int): the target size of image
  41. keep_ratio (bool): whether keep_ratio or not, default true
  42. interp (int): method of resize
  43. """
  44. def __init__(self, target_size, keep_ratio=True, interp=cv2.INTER_LINEAR):
  45. if isinstance(target_size, int):
  46. target_size = [target_size, target_size]
  47. self.target_size = target_size
  48. self.keep_ratio = keep_ratio
  49. self.interp = interp
  50. def __call__(self, im, im_info):
  51. """
  52. Args:
  53. im (np.ndarray): image (np.ndarray)
  54. im_info (dict): info of image
  55. Returns:
  56. im (np.ndarray): processed image (np.ndarray)
  57. im_info (dict): info of processed image
  58. """
  59. assert len(self.target_size) == 2
  60. assert self.target_size[0] > 0 and self.target_size[1] > 0
  61. im_channel = im.shape[2]
  62. im_scale_y, im_scale_x = self.generate_scale(im)
  63. im = cv2.resize(
  64. im,
  65. None,
  66. None,
  67. fx=im_scale_x,
  68. fy=im_scale_y,
  69. interpolation=self.interp)
  70. im_info['im_shape'] = np.array(im.shape[:2]).astype('float32')
  71. im_info['scale_factor'] = np.array(
  72. [im_scale_y, im_scale_x]).astype('float32')
  73. return im, im_info
  74. def generate_scale(self, im):
  75. """
  76. Args:
  77. im (np.ndarray): image (np.ndarray)
  78. Returns:
  79. im_scale_x: the resize ratio of X
  80. im_scale_y: the resize ratio of Y
  81. """
  82. origin_shape = im.shape[:2]
  83. im_c = im.shape[2]
  84. if self.keep_ratio:
  85. im_size_min = np.min(origin_shape)
  86. im_size_max = np.max(origin_shape)
  87. target_size_min = np.min(self.target_size)
  88. target_size_max = np.max(self.target_size)
  89. im_scale = float(target_size_min) / float(im_size_min)
  90. if np.round(im_scale * im_size_max) > target_size_max:
  91. im_scale = float(target_size_max) / float(im_size_max)
  92. im_scale_x = im_scale
  93. im_scale_y = im_scale
  94. else:
  95. resize_h, resize_w = self.target_size
  96. im_scale_y = resize_h / float(origin_shape[0])
  97. im_scale_x = resize_w / float(origin_shape[1])
  98. return im_scale_y, im_scale_x
  99. class NormalizeImage(object):
  100. """normalize image
  101. Args:
  102. mean (list): im - mean
  103. std (list): im / std
  104. is_scale (bool): whether need im / 255
  105. norm_type (str): type in ['mean_std', 'none']
  106. """
  107. def __init__(self, mean, std, is_scale=True, norm_type='mean_std'):
  108. self.mean = mean
  109. self.std = std
  110. self.is_scale = is_scale
  111. self.norm_type = norm_type
  112. def __call__(self, im, im_info):
  113. """
  114. Args:
  115. im (np.ndarray): image (np.ndarray)
  116. im_info (dict): info of image
  117. Returns:
  118. im (np.ndarray): processed image (np.ndarray)
  119. im_info (dict): info of processed image
  120. """
  121. im = im.astype(np.float32, copy=False)
  122. if self.is_scale:
  123. scale = 1.0 / 255.0
  124. im *= scale
  125. if self.norm_type == 'mean_std':
  126. mean = np.array(self.mean)[np.newaxis, np.newaxis, :]
  127. std = np.array(self.std)[np.newaxis, np.newaxis, :]
  128. im -= mean
  129. im /= std
  130. return im, im_info
  131. class Permute(object):
  132. """permute image
  133. Args:
  134. to_bgr (bool): whether convert RGB to BGR
  135. channel_first (bool): whether convert HWC to CHW
  136. """
  137. def __init__(self, ):
  138. super(Permute, self).__init__()
  139. def __call__(self, im, im_info):
  140. """
  141. Args:
  142. im (np.ndarray): image (np.ndarray)
  143. im_info (dict): info of image
  144. Returns:
  145. im (np.ndarray): processed image (np.ndarray)
  146. im_info (dict): info of processed image
  147. """
  148. im = im.transpose((2, 0, 1)).copy()
  149. return im, im_info
  150. class PadStride(object):
  151. """ padding image for model with FPN, instead PadBatch(pad_to_stride) in original config
  152. Args:
  153. stride (bool): model with FPN need image shape % stride == 0
  154. """
  155. def __init__(self, stride=0):
  156. self.coarsest_stride = stride
  157. def __call__(self, im, im_info):
  158. """
  159. Args:
  160. im (np.ndarray): image (np.ndarray)
  161. im_info (dict): info of image
  162. Returns:
  163. im (np.ndarray): processed image (np.ndarray)
  164. im_info (dict): info of processed image
  165. """
  166. coarsest_stride = self.coarsest_stride
  167. if coarsest_stride <= 0:
  168. return im, im_info
  169. im_c, im_h, im_w = im.shape
  170. pad_h = int(np.ceil(float(im_h) / coarsest_stride) * coarsest_stride)
  171. pad_w = int(np.ceil(float(im_w) / coarsest_stride) * coarsest_stride)
  172. padding_im = np.zeros((im_c, pad_h, pad_w), dtype=np.float32)
  173. padding_im[:, :im_h, :im_w] = im
  174. return padding_im, im_info
  175. class LetterBoxResize(object):
  176. def __init__(self, target_size):
  177. """
  178. Resize image to target size, convert normalized xywh to pixel xyxy
  179. format ([x_center, y_center, width, height] -> [x0, y0, x1, y1]).
  180. Args:
  181. target_size (int|list): image target size.
  182. """
  183. super(LetterBoxResize, self).__init__()
  184. if isinstance(target_size, int):
  185. target_size = [target_size, target_size]
  186. self.target_size = target_size
  187. def letterbox(self, img, height, width, color=(127.5, 127.5, 127.5)):
  188. # letterbox: resize a rectangular image to a padded rectangular
  189. shape = img.shape[:2] # [height, width]
  190. ratio_h = float(height) / shape[0]
  191. ratio_w = float(width) / shape[1]
  192. ratio = min(ratio_h, ratio_w)
  193. new_shape = (round(shape[1] * ratio),
  194. round(shape[0] * ratio)) # [width, height]
  195. padw = (width - new_shape[0]) / 2
  196. padh = (height - new_shape[1]) / 2
  197. top, bottom = round(padh - 0.1), round(padh + 0.1)
  198. left, right = round(padw - 0.1), round(padw + 0.1)
  199. img = cv2.resize(
  200. img, new_shape, interpolation=cv2.INTER_AREA) # resized, no border
  201. img = cv2.copyMakeBorder(
  202. img, top, bottom, left, right, cv2.BORDER_CONSTANT,
  203. value=color) # padded rectangular
  204. return img, ratio, padw, padh
  205. def __call__(self, im, im_info):
  206. """
  207. Args:
  208. im (np.ndarray): image (np.ndarray)
  209. im_info (dict): info of image
  210. Returns:
  211. im (np.ndarray): processed image (np.ndarray)
  212. im_info (dict): info of processed image
  213. """
  214. assert len(self.target_size) == 2
  215. assert self.target_size[0] > 0 and self.target_size[1] > 0
  216. height, width = self.target_size
  217. h, w = im.shape[:2]
  218. im, ratio, padw, padh = self.letterbox(im, height=height, width=width)
  219. new_shape = [round(h * ratio), round(w * ratio)]
  220. im_info['im_shape'] = np.array(new_shape, dtype=np.float32)
  221. im_info['scale_factor'] = np.array([ratio, ratio], dtype=np.float32)
  222. return im, im_info
  223. class Pad(object):
  224. def __init__(self, size, fill_value=[114.0, 114.0, 114.0]):
  225. """
  226. Pad image to a specified size.
  227. Args:
  228. size (list[int]): image target size
  229. fill_value (list[float]): rgb value of pad area, default (114.0, 114.0, 114.0)
  230. """
  231. super(Pad, self).__init__()
  232. if isinstance(size, int):
  233. size = [size, size]
  234. self.size = size
  235. self.fill_value = fill_value
  236. def __call__(self, im, im_info):
  237. im_h, im_w = im.shape[:2]
  238. h, w = self.size
  239. if h == im_h and w == im_w:
  240. im = im.astype(np.float32)
  241. return im, im_info
  242. canvas = np.ones((h, w, 3), dtype=np.float32)
  243. canvas *= np.array(self.fill_value, dtype=np.float32)
  244. canvas[0:im_h, 0:im_w, :] = im.astype(np.float32)
  245. im = canvas
  246. return im, im_info
  247. def preprocess(im, preprocess_ops):
  248. # process image by preprocess_ops
  249. im_info = {
  250. 'scale_factor': np.array(
  251. [1., 1.], dtype=np.float32),
  252. 'im_shape': None,
  253. }
  254. im, im_info = decode_image(im, im_info)
  255. for operator in preprocess_ops:
  256. im, im_info = operator(im, im_info)
  257. return im, im_info