infer.py 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509
  1. # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import os
  15. import yaml
  16. import glob
  17. import numpy as np
  18. import math
  19. import paddle
  20. from paddle.inference import Config
  21. from paddle.inference import create_predictor
  22. import sys
  23. # add deploy path of PaddleDetection to sys.path
  24. parent_path = os.path.abspath(os.path.join(__file__, *(['..'])))
  25. sys.path.insert(0, parent_path)
  26. from preprocess import preprocess, Resize, NormalizeImage, Permute, Pad, decode_image
  27. from utils import Timer
  28. # Global dictionary
  29. SUPPORT_MODELS = {
  30. 'YOLO', 'PPYOLOE', 'YOLOX', 'YOLOF', 'YOLOv5', 'RTMDet', 'YOLOv6', 'YOLOv7', 'YOLOv8', 'DETR'
  31. }
  32. class Detector(object):
  33. """
  34. Args:
  35. pred_config (object): config of model, defined by `Config(model_dir)`
  36. model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml
  37. device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
  38. run_mode (str): mode of running(paddle/trt_fp32/trt_fp16)
  39. batch_size (int): size of pre batch in inference
  40. trt_min_shape (int): min shape for dynamic shape in trt
  41. trt_max_shape (int): max shape for dynamic shape in trt
  42. trt_opt_shape (int): opt shape for dynamic shape in trt
  43. trt_calib_mode (bool): If the model is produced by TRT offline quantitative
  44. calibration, trt_calib_mode need to set True
  45. cpu_threads (int): cpu threads
  46. enable_mkldnn (bool): whether to open MKLDNN
  47. enable_mkldnn_bfloat16 (bool): whether to turn on mkldnn bfloat16
  48. output_dir (str): The path of output
  49. threshold (float): The threshold of score for visualization
  50. delete_shuffle_pass (bool): whether to remove shuffle_channel_detect_pass in TensorRT.
  51. Used by action model.
  52. """
  53. def __init__(self,
  54. model_dir,
  55. device='CPU',
  56. run_mode='paddle',
  57. batch_size=1,
  58. trt_min_shape=1,
  59. trt_max_shape=1280,
  60. trt_opt_shape=640,
  61. trt_calib_mode=False,
  62. cpu_threads=1,
  63. enable_mkldnn=False,
  64. enable_mkldnn_bfloat16=False,
  65. output_dir='output',
  66. threshold=0.5,
  67. delete_shuffle_pass=False):
  68. self.pred_config = self.set_config(model_dir)
  69. self.predictor, self.config = load_predictor(
  70. model_dir,
  71. self.pred_config.arch,
  72. run_mode=run_mode,
  73. batch_size=batch_size,
  74. min_subgraph_size=self.pred_config.min_subgraph_size,
  75. device=device,
  76. use_dynamic_shape=self.pred_config.use_dynamic_shape,
  77. trt_min_shape=trt_min_shape,
  78. trt_max_shape=trt_max_shape,
  79. trt_opt_shape=trt_opt_shape,
  80. trt_calib_mode=trt_calib_mode,
  81. cpu_threads=cpu_threads,
  82. enable_mkldnn=enable_mkldnn,
  83. enable_mkldnn_bfloat16=enable_mkldnn_bfloat16,
  84. delete_shuffle_pass=delete_shuffle_pass)
  85. self.det_times = Timer()
  86. self.cpu_mem, self.gpu_mem, self.gpu_util = 0, 0, 0
  87. self.batch_size = batch_size
  88. self.output_dir = output_dir
  89. self.threshold = threshold
  90. def set_config(self, model_dir):
  91. return PredictConfig(model_dir)
  92. def preprocess(self, image_list):
  93. preprocess_ops = []
  94. for op_info in self.pred_config.preprocess_infos:
  95. new_op_info = op_info.copy()
  96. op_type = new_op_info.pop('type')
  97. preprocess_ops.append(eval(op_type)(**new_op_info))
  98. input_im_lst = []
  99. input_im_info_lst = []
  100. for im_path in image_list:
  101. im, im_info = preprocess(im_path, preprocess_ops)
  102. input_im_lst.append(im)
  103. input_im_info_lst.append(im_info)
  104. inputs = create_inputs(input_im_lst, input_im_info_lst)
  105. input_names = self.predictor.get_input_names()
  106. for i in range(len(input_names)):
  107. input_tensor = self.predictor.get_input_handle(input_names[i])
  108. if input_names[i] == 'x':
  109. input_tensor.copy_from_cpu(inputs['image'])
  110. else:
  111. input_tensor.copy_from_cpu(inputs[input_names[i]])
  112. return inputs
  113. def postprocess(self, inputs, result):
  114. # postprocess output of predictor
  115. np_boxes_num = result['boxes_num']
  116. assert isinstance(np_boxes_num, np.ndarray), \
  117. '`np_boxes_num` should be a `numpy.ndarray`'
  118. result = {k: v for k, v in result.items() if v is not None}
  119. return result
  120. def filter_box(self, result, threshold):
  121. np_boxes_num = result['boxes_num']
  122. boxes = result['boxes']
  123. start_idx = 0
  124. filter_boxes = []
  125. filter_num = []
  126. for i in range(len(np_boxes_num)):
  127. boxes_num = np_boxes_num[i]
  128. boxes_i = boxes[start_idx:start_idx + boxes_num, :]
  129. idx = boxes_i[:, 1] > threshold
  130. filter_boxes_i = boxes_i[idx, :]
  131. filter_boxes.append(filter_boxes_i)
  132. filter_num.append(filter_boxes_i.shape[0])
  133. start_idx += boxes_num
  134. boxes = np.concatenate(filter_boxes)
  135. filter_num = np.array(filter_num)
  136. filter_res = {'boxes': boxes, 'boxes_num': filter_num}
  137. return filter_res
  138. def predict(self, repeats=1, run_benchmark=False):
  139. '''
  140. Args:
  141. repeats (int): repeats number for prediction
  142. Returns:
  143. result (dict): include 'boxes': np.ndarray: shape:[N,6], N: number of box,
  144. matix element:[class, score, x_min, y_min, x_max, y_max]
  145. MaskRCNN's result include 'masks': np.ndarray:
  146. shape: [N, im_h, im_w]
  147. '''
  148. # model prediction
  149. np_boxes_num, np_boxes, np_masks = np.array([0]), None, None
  150. if run_benchmark:
  151. for i in range(repeats):
  152. self.predictor.run()
  153. paddle.device.cuda.synchronize()
  154. result = dict(
  155. boxes=np_boxes, masks=np_masks, boxes_num=np_boxes_num)
  156. return result
  157. for i in range(repeats):
  158. self.predictor.run()
  159. output_names = self.predictor.get_output_names()
  160. boxes_tensor = self.predictor.get_output_handle(output_names[0])
  161. np_boxes = boxes_tensor.copy_to_cpu()
  162. if len(output_names) == 1:
  163. # some exported model can not get tensor 'bbox_num'
  164. np_boxes_num = np.array([len(np_boxes)])
  165. else:
  166. boxes_num = self.predictor.get_output_handle(output_names[1])
  167. np_boxes_num = boxes_num.copy_to_cpu()
  168. if self.pred_config.mask:
  169. masks_tensor = self.predictor.get_output_handle(output_names[2])
  170. np_masks = masks_tensor.copy_to_cpu()
  171. result = dict(boxes=np_boxes, masks=np_masks, boxes_num=np_boxes_num)
  172. return result
  173. def merge_batch_result(self, batch_result):
  174. if len(batch_result) == 1:
  175. return batch_result[0]
  176. res_key = batch_result[0].keys()
  177. results = {k: [] for k in res_key}
  178. for res in batch_result:
  179. for k, v in res.items():
  180. results[k].append(v)
  181. for k, v in results.items():
  182. if k not in ['masks', 'segm']:
  183. results[k] = np.concatenate(v)
  184. return results
  185. def get_timer(self):
  186. return self.det_times
  187. def predict_image(self,
  188. image_list,
  189. run_benchmark=False,
  190. repeats=1,
  191. visual=True,
  192. save_results=False):
  193. batch_loop_cnt = math.ceil(float(len(image_list)) / self.batch_size)
  194. results = []
  195. for i in range(batch_loop_cnt):
  196. start_index = i * self.batch_size
  197. end_index = min((i + 1) * self.batch_size, len(image_list))
  198. batch_image_list = image_list[start_index:end_index]
  199. if run_benchmark:
  200. # preprocess
  201. inputs = self.preprocess(batch_image_list) # warmup
  202. self.det_times.preprocess_time_s.start()
  203. inputs = self.preprocess(batch_image_list)
  204. self.det_times.preprocess_time_s.end()
  205. # model prediction
  206. result = self.predict(repeats=50, run_benchmark=True) # warmup
  207. self.det_times.inference_time_s.start()
  208. result = self.predict(repeats=repeats, run_benchmark=True)
  209. self.det_times.inference_time_s.end(repeats=repeats)
  210. # postprocess
  211. result_warmup = self.postprocess(inputs, result) # warmup
  212. self.det_times.postprocess_time_s.start()
  213. result = self.postprocess(inputs, result)
  214. self.det_times.postprocess_time_s.end()
  215. self.det_times.img_num += len(batch_image_list)
  216. else:
  217. # preprocess
  218. self.det_times.preprocess_time_s.start()
  219. inputs = self.preprocess(batch_image_list)
  220. self.det_times.preprocess_time_s.end()
  221. # model prediction
  222. self.det_times.inference_time_s.start()
  223. result = self.predict()
  224. self.det_times.inference_time_s.end()
  225. # postprocess
  226. self.det_times.postprocess_time_s.start()
  227. result = self.postprocess(inputs, result)
  228. self.det_times.postprocess_time_s.end()
  229. self.det_times.img_num += len(batch_image_list)
  230. results.append(result)
  231. print('Test iter {}'.format(i))
  232. results = self.merge_batch_result(results)
  233. return results
  234. def create_inputs(imgs, im_info):
  235. """generate input for different model type
  236. Args:
  237. imgs (list(numpy)): list of images (np.ndarray)
  238. im_info (list(dict)): list of image info
  239. Returns:
  240. inputs (dict): input of model
  241. """
  242. inputs = {}
  243. im_shape = []
  244. scale_factor = []
  245. if len(imgs) == 1:
  246. inputs['image'] = np.array((imgs[0], )).astype('float32')
  247. inputs['im_shape'] = np.array(
  248. (im_info[0]['im_shape'], )).astype('float32')
  249. inputs['scale_factor'] = np.array(
  250. (im_info[0]['scale_factor'], )).astype('float32')
  251. return inputs
  252. for e in im_info:
  253. im_shape.append(np.array((e['im_shape'], )).astype('float32'))
  254. scale_factor.append(np.array((e['scale_factor'], )).astype('float32'))
  255. inputs['im_shape'] = np.concatenate(im_shape, axis=0)
  256. inputs['scale_factor'] = np.concatenate(scale_factor, axis=0)
  257. imgs_shape = [[e.shape[1], e.shape[2]] for e in imgs]
  258. max_shape_h = max([e[0] for e in imgs_shape])
  259. max_shape_w = max([e[1] for e in imgs_shape])
  260. padding_imgs = []
  261. for img in imgs:
  262. im_c, im_h, im_w = img.shape[:]
  263. padding_im = np.zeros(
  264. (im_c, max_shape_h, max_shape_w), dtype=np.float32)
  265. padding_im[:, :im_h, :im_w] = img
  266. padding_imgs.append(padding_im)
  267. inputs['image'] = np.stack(padding_imgs, axis=0)
  268. return inputs
  269. class PredictConfig():
  270. """set config of preprocess, postprocess and visualize
  271. Args:
  272. model_dir (str): root path of model.yml
  273. """
  274. def __init__(self, model_dir):
  275. # parsing Yaml config for Preprocess
  276. deploy_file = os.path.join(model_dir, 'infer_cfg.yml')
  277. with open(deploy_file) as f:
  278. yml_conf = yaml.safe_load(f)
  279. self.arch = yml_conf['arch']
  280. self.preprocess_infos = yml_conf['Preprocess']
  281. self.min_subgraph_size = yml_conf['min_subgraph_size']
  282. self.labels = yml_conf['label_list']
  283. self.mask = False
  284. self.use_dynamic_shape = yml_conf['use_dynamic_shape']
  285. if 'mask' in yml_conf:
  286. self.mask = yml_conf['mask']
  287. self.tracker = None
  288. if 'tracker' in yml_conf:
  289. self.tracker = yml_conf['tracker']
  290. if 'NMS' in yml_conf:
  291. self.nms = yml_conf['NMS']
  292. if 'fpn_stride' in yml_conf:
  293. self.fpn_stride = yml_conf['fpn_stride']
  294. if self.arch == 'RCNN' and yml_conf.get('export_onnx', False):
  295. print(
  296. 'The RCNN export model is used for ONNX and it only supports batch_size = 1'
  297. )
  298. self.print_config()
  299. def check_model(self, yml_conf):
  300. """
  301. Raises:
  302. ValueError: loaded model not in supported model type
  303. """
  304. for support_model in SUPPORT_MODELS:
  305. if support_model in yml_conf['arch']:
  306. return True
  307. raise ValueError("Unsupported arch: {}, expect {}".format(yml_conf[
  308. 'arch'], SUPPORT_MODELS))
  309. def print_config(self):
  310. print('----------- Model Configuration -----------')
  311. print('%s: %s' % ('Model Arch', self.arch))
  312. print('%s: ' % ('Transform Order'))
  313. for op_info in self.preprocess_infos:
  314. print('--%s: %s' % ('transform op', op_info['type']))
  315. print('--------------------------------------------')
  316. def load_predictor(model_dir,
  317. arch,
  318. run_mode='paddle',
  319. batch_size=1,
  320. device='CPU',
  321. min_subgraph_size=3,
  322. use_dynamic_shape=False,
  323. trt_min_shape=1,
  324. trt_max_shape=1280,
  325. trt_opt_shape=640,
  326. trt_calib_mode=False,
  327. cpu_threads=1,
  328. enable_mkldnn=False,
  329. enable_mkldnn_bfloat16=False,
  330. delete_shuffle_pass=False):
  331. """set AnalysisConfig, generate AnalysisPredictor
  332. Args:
  333. model_dir (str): root path of __model__ and __params__
  334. device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
  335. run_mode (str): mode of running(paddle/trt_fp32/trt_fp16/trt_int8)
  336. use_dynamic_shape (bool): use dynamic shape or not
  337. trt_min_shape (int): min shape for dynamic shape in trt
  338. trt_max_shape (int): max shape for dynamic shape in trt
  339. trt_opt_shape (int): opt shape for dynamic shape in trt
  340. trt_calib_mode (bool): If the model is produced by TRT offline quantitative
  341. calibration, trt_calib_mode need to set True
  342. delete_shuffle_pass (bool): whether to remove shuffle_channel_detect_pass in TensorRT.
  343. Used by action model.
  344. Returns:
  345. predictor (PaddlePredictor): AnalysisPredictor
  346. Raises:
  347. ValueError: predict by TensorRT need device == 'GPU'.
  348. """
  349. if device != 'GPU' and run_mode != 'paddle':
  350. raise ValueError(
  351. "Predict by TensorRT mode: {}, expect device=='GPU', but device == {}"
  352. .format(run_mode, device))
  353. infer_model = os.path.join(model_dir, 'model.pdmodel')
  354. infer_params = os.path.join(model_dir, 'model.pdiparams')
  355. if not os.path.exists(infer_model):
  356. infer_model = os.path.join(model_dir, 'inference.pdmodel')
  357. infer_params = os.path.join(model_dir, 'inference.pdiparams')
  358. if not os.path.exists(infer_model):
  359. raise ValueError(
  360. "Cannot find any inference model in dir: {},".format(model_dir))
  361. config = Config(infer_model, infer_params)
  362. if device == 'GPU':
  363. # initial GPU memory(M), device ID
  364. config.enable_use_gpu(200, 0)
  365. # optimize graph and fuse op
  366. config.switch_ir_optim(True)
  367. elif device == 'XPU':
  368. if config.lite_engine_enabled():
  369. config.enable_lite_engine()
  370. config.enable_xpu(10 * 1024 * 1024)
  371. elif device == 'NPU':
  372. if config.lite_engine_enabled():
  373. config.enable_lite_engine()
  374. config.enable_custom_device('npu')
  375. else:
  376. config.disable_gpu()
  377. config.set_cpu_math_library_num_threads(cpu_threads)
  378. if enable_mkldnn:
  379. try:
  380. # cache 10 different shapes for mkldnn to avoid memory leak
  381. config.set_mkldnn_cache_capacity(10)
  382. config.enable_mkldnn()
  383. if enable_mkldnn_bfloat16:
  384. config.enable_mkldnn_bfloat16()
  385. except Exception as e:
  386. print(
  387. "The current environment does not support `mkldnn`, so disable mkldnn."
  388. )
  389. pass
  390. precision_map = {
  391. 'trt_int8': Config.Precision.Int8,
  392. 'trt_fp32': Config.Precision.Float32,
  393. 'trt_fp16': Config.Precision.Half
  394. }
  395. if run_mode in precision_map.keys():
  396. config.enable_tensorrt_engine(
  397. workspace_size=(1 << 25) * batch_size,
  398. max_batch_size=batch_size,
  399. min_subgraph_size=min_subgraph_size,
  400. precision_mode=precision_map[run_mode],
  401. use_static=False,
  402. use_calib_mode=trt_calib_mode)
  403. if use_dynamic_shape:
  404. min_input_shape = {
  405. 'image': [batch_size, 3, trt_min_shape, trt_min_shape],
  406. 'scale_factor': [batch_size, 2]
  407. }
  408. max_input_shape = {
  409. 'image': [batch_size, 3, trt_max_shape, trt_max_shape],
  410. 'scale_factor': [batch_size, 2]
  411. }
  412. opt_input_shape = {
  413. 'image': [batch_size, 3, trt_opt_shape, trt_opt_shape],
  414. 'scale_factor': [batch_size, 2]
  415. }
  416. config.set_trt_dynamic_shape_info(min_input_shape, max_input_shape,
  417. opt_input_shape)
  418. print('trt set dynamic shape done!')
  419. # disable print log when predict
  420. config.disable_glog_info()
  421. # enable shared memory
  422. config.enable_memory_optim()
  423. # disable feed, fetch OP, needed by zero_copy_run
  424. config.switch_use_feed_fetch_ops(False)
  425. if delete_shuffle_pass:
  426. config.delete_pass("shuffle_channel_detect_pass")
  427. predictor = create_predictor(config)
  428. return predictor, config
  429. def get_test_images(infer_dir, infer_img):
  430. """
  431. Get image path list in TEST mode
  432. """
  433. assert infer_img is not None or infer_dir is not None, \
  434. "--image_file or --image_dir should be set"
  435. assert infer_img is None or os.path.isfile(infer_img), \
  436. "{} is not a file".format(infer_img)
  437. assert infer_dir is None or os.path.isdir(infer_dir), \
  438. "{} is not a directory".format(infer_dir)
  439. # infer_img has a higher priority
  440. if infer_img and os.path.isfile(infer_img):
  441. return [infer_img]
  442. images = set()
  443. infer_dir = os.path.abspath(infer_dir)
  444. assert os.path.isdir(infer_dir), \
  445. "infer_dir {} is not a directory".format(infer_dir)
  446. exts = ['jpg', 'jpeg', 'png', 'bmp']
  447. exts += [ext.upper() for ext in exts]
  448. for ext in exts:
  449. images.update(glob.glob('{}/*.{}'.format(infer_dir, ext)))
  450. images = list(images)
  451. assert len(images) > 0, "no image found in {}".format(infer_dir)
  452. print("Found {} inference images in total.".format(len(images)))
  453. return images
  454. def print_arguments(args):
  455. print('----------- Running Arguments -----------')
  456. for arg, value in sorted(vars(args).items()):
  457. print('%s: %s' % (arg, value))
  458. print('------------------------------------------')