123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159 |
- import itertools
- from typing import List
- from core.layout import LayoutBox
- def merge_boxes_list(
- boxes_list: List[List[LayoutBox]],
- img_w: int,
- img_h: int,
- method: str = "nms",
- iou_threshold=0.5,
- ) -> List[LayoutBox]:
- """合并多组检测框列表,调用 Weighted-Boxes-Fusion 库实现。
- 可用于合并多个模型的预测结果,或合并单个模型多次的预测结果。
- See: https://github.com/ZFTurbo/Weighted-Boxes-Fusion
- method
- Args:
- boxes_list (List[List[LayoutBox]]):
- 多组检测框列表
- img_w (int):
- 图像宽度
- img_h (int):
- 图像高度
- method (str, optional):
- 合并方法名,可选值: ["nms", "soft_nms", "nmw", "wbf"]. Defaults to "nms".
- iou_threshold (float, optional):
- bbox 匹配的 IoU 阈值. Defaults to 0.5.
- Returns:
- List[LayoutBox]: 合并后的检测框列表
- """
- def ltrb_to_nltrb(ltrb, img_w, img_h):
- """
- Normalize ltrb.
- """
- l, t, r, b = ltrb
- nl = l / img_w
- nt = t / img_h
- nr = r / img_w
- nb = b / img_h
- return [nl, nt, nr, nb]
- def nltrb_to_ltrb(nltrb, img_w, img_h):
- """
- Denormalize normalized ltrb.
- """
- nl, nt, nr, nb = nltrb
- l = nl * img_w
- t = nt * img_h
- r = nr * img_w
- b = nb * img_h
- return [l, t, r, b]
- from ensemble_boxes import (
- nms,
- soft_nms,
- non_maximum_weighted,
- weighted_boxes_fusion,
- )
- merge_funcs = {
- "nms": nms,
- "soft_nms": soft_nms,
- "nmw": non_maximum_weighted,
- "wbf": weighted_boxes_fusion,
- }
- assert method in merge_funcs.keys()
- merge_func = merge_funcs[method]
- nltrbs_list = [
- [ltrb_to_nltrb(b.ltrb, img_w, img_h) for b in boxes]
- for boxes in boxes_list
- ]
- scores_list = [[b.conf for b in boxes] for boxes in boxes_list]
- labels_list = [[b.clazz for b in boxes] for boxes in boxes_list]
- nltrbs, scores, labels = merge_func(
- nltrbs_list, scores_list, labels_list, iou_thr=iou_threshold
- )
- merged_boxes = [
- LayoutBox(
- clazz=int(label),
- bbox=nltrb_to_ltrb(nltrb, img_w, img_h),
- conf=float(score),
- )
- for nltrb, score, label in zip(nltrbs, scores, labels)
- ]
- return merged_boxes
- def clip_boxes_to_image_bound(
- boxes: List[LayoutBox], img_w: int, img_h: int
- ) -> List[LayoutBox]:
- """
- 裁剪检测框尺寸以防止超出图像边界。
- """
- def clip_bbox(bbox: List[int], img_w: int, img_h: int) -> List[int]:
- l, t, r, b = bbox
- l = max(0, int(l))
- t = max(0, int(t))
- r = min(img_w, int(r))
- b = min(img_h, int(b))
- return [l, t, r, b]
- for box in boxes:
- box.bbox = clip_bbox(box.bbox, img_w, img_h)
- return boxes
- def filter_boxes_by_conf(
- boxes: List[LayoutBox],
- conf_threshold: float,
- ) -> List[LayoutBox]:
- """
- 按置信度过滤检测框。
- """
- boxes = list(filter(lambda e: e.conf >= conf_threshold, boxes))
- return boxes
- def filter_boxes_by_overlaps(
- boxes: List[LayoutBox],
- overlaps_iou_threshold: float,
- overlaps_max_count: int,
- ) -> List[LayoutBox]:
- """
- 按置信度和 IoU 过滤检测框。
- 对多个 IoU 大于 `overlaps_iou_threshold` 的区域,仅保留 `overlaps_max_count` 个置信度最高的。
- """
- # 按置信度进行排序
- boxes = sorted(boxes, key=lambda e: e.conf, reverse=True)
- # 每一个桶中都是重叠区域较大的LayoutBox
- buckets: List[List[LayoutBox]] = []
- # 将目标于每一个桶中的每一个LayoutBox进行比较,找到目标应该存在于哪一个桶
- def get_bucket(box: LayoutBox):
- for bucket in buckets:
- for e in bucket:
- if box.iou(e) >= overlaps_iou_threshold:
- return bucket
- return None
- for box in boxes:
- bucket = get_bucket(box)
- # 若当前不存在于目标layout重叠的内容,则新建一个桶
- if not bucket:
- buckets.append([box])
- # 若找到目标应该位于的桶,则只收取置信度较高的overlaps_max_count个框选区域
- elif len(bucket) < overlaps_max_count:
- bucket.append(box)
- # 将所用桶中的数据合为一个列表
- new_boxes = list(itertools.chain.from_iterable(buckets))
- return new_boxes
|