chenguilong
/
yl-ocr-layout


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217
							import random
from fastapi import FastAPI, Request, Form, File, UploadFile
from fastapi.templating import Jinja2Templates
from typing import Dict, List, Optional
from sx_utils import web_try
import cv2
import numpy as np

import base64
from core.predictor import predict_img
from core.layout import LayoutBox
from sx_utils import format_print

app = FastAPI()
templates = Jinja2Templates(directory='templates')
format_print()

colors = [tuple([random.randint(0, 255) for _ in range(3)]) for _ in range(100)]  # for bbox plotting
model_selection_options = [
    'ocr-layout', 
    'ocr-layout-paddle'
]
clazz_names = [
    "code",
    "logo_hb",
    "logo_qz",
    "logo_rain",
    "logo_stack",
    "logo_sun",
    "logo_up",
    "logo_ys",
    "style",
    "table",
    "text",
    "title",
]

##############################################
# -------------GET Request Routes--------------
##############################################
@app.get("/")
def home(request: Request):
    ''' Returns html jinja2 template render for home page form
    '''

    return templates.TemplateResponse('home.html', {
        "request": request,
        "model_selection_options": model_selection_options,
    })


@app.get("/drag_and_drop_detect")
def drag_and_drop_detect(request: Request):
    ''' drag_and_drop_detect detect page. Uses a drag and drop
    file interface to upload files to the server, then renders
    the image + bboxes + labels on HTML canvas.
    '''

    return templates.TemplateResponse('drag_and_drop_detect.html',
                                      {"request": request,
                                       "model_selection_options": model_selection_options,
                                       })


##############################################
# ------------POST Request Routes--------------
##############################################
@app.post("/")
def detect_via_web_form(request: Request,
                        file_list: List[UploadFile] = File(...),
                        model_name: str = Form(...),
                        img_size: int = Form(1824),
                        multi_scale: bool = Form(False),
                        ):
    '''
    Requires an image file upload, model name (ex. yolov5s). Optional image size parameter (Default 1824).
    Intended for human (non-api) users.
    Returns: HTML template render showing bbox data and base64 encoded image
    '''

    img_batch = [cv2.imdecode(np.fromstring(file.file.read(), np.uint8), cv2.IMREAD_COLOR)
                 for file in file_list]

    # create a copy that corrects for cv2.imdecode generating BGR images instead of RGB
    # using cvtColor instead of [...,::-1] to keep array contiguous in RAM
    img_batch_rgb = [cv2.cvtColor(img, cv2.COLOR_BGR2RGB) for img in img_batch]

    results = [predict_img(img, model_name, img_size, multi_scale) for img in img_batch_rgb]
    
    json_results = boxes_list_to_json(results, clazz_names)

    img_str_list = []
    # plot bboxes on the image
    for img, bbox_list in zip(img_batch, json_results):
        for bbox in bbox_list:
            label = f'{bbox["class_name"]} {bbox["confidence"]:.2f}'
            plot_one_box(bbox['bbox'], img, label=label,
                         color=colors[int(bbox['class'])], line_thickness=3)

        img_str_list.append(base64EncodeImage(img))

    # escape the apostrophes in the json string representation
    encoded_json_results = str(json_results).replace("'", r"\'").replace('"', r'\"')

    return templates.TemplateResponse('show_results.html', {
        'request': request,
        'bbox_image_data_zipped': zip(img_str_list, json_results),  # unzipped in jinja2 template
        'bbox_data_str': encoded_json_results,
    })


@app.post("/detect")
@web_try()
def detect_via_api(request: Request,
                   file_list: List[UploadFile] = File(...),
                   model_name: str = Form(...),
                   img_size: int = Form(1920),
                   multi_scale: bool = Form(False),
                   download_image: Optional[bool] = Form(False)):
    '''
    Requires an image file upload, model name (ex. yolov5s).
    Optional image size parameter (Default 1920)
    Optional download_image parameter that includes base64 encoded image(s) with bbox's drawn in the json response

    Returns: JSON results of running YOLOv5 on the uploaded image. If download_image parameter is True, images with
            bboxes drawn are base64 encoded and returned inside the json response.

    Intended for API usage.
    '''

    img_batch = [cv2.imdecode(np.fromstring(file.file.read(), np.uint8), cv2.IMREAD_COLOR)
                 for file in file_list]

    # create a copy that corrects for cv2.imdecode generating BGR images instead of RGB,
    # using cvtColor instead of [...,::-1] to keep array contiguous in RAM
    # 转换图片格式
    img_batch_rgb = [cv2.cvtColor(img, cv2.COLOR_BGR2RGB) for img in img_batch]
    # 选用相关模型进行模版识别
    results = [predict_img(img, model_name, img_size, multi_scale) for img in img_batch_rgb]
    # 处理结果数据
    json_results = boxes_list_to_json(results, clazz_names)

    # 如果需要下载图片，在图片上绘制框
    if download_image:
        for idx, (img, bbox_list) in enumerate(zip(img_batch, json_results)):
            for bbox in bbox_list:
                label = f'{bbox["class_name"]} {bbox["confidence"]:.2f}'
                plot_one_box(bbox['bbox'], img, label=label,
                             color=colors[int(bbox['class'])], line_thickness=3)

            payload = {'image_base64': base64EncodeImage(img)}
            json_results[idx].append(payload)

    encoded_json_results = str(json_results).replace("'", r'"')
    return encoded_json_results


##############################################
# --------------Helper Functions---------------
##############################################

def results_to_json(results, model):
    ''' Converts yolo model output to json (list of list of dicts)'''
    return [
        [
            {
                "class": int(pred[5]),
                "class_name": model.model.names[int(pred[5])],
                "bbox": [int(x) for x in pred[:4].tolist()],  # convert bbox results to int from float
                "confidence": float(pred[4]),
            }
            for pred in result
        ]
        for result in results.xyxy
    ]

# 在图像上绘制框
def plot_one_box(x, im, color=(128, 128, 128), label=None, line_thickness=3):
    # Directly copied from: https://github.com/ultralytics/yolov5/blob/cd540d8625bba8a05329ede3522046ee53eb349d/utils/plots.py
    # Plots one bounding box on image 'im' using OpenCV
    assert im.data.contiguous, 'Image not contiguous. Apply np.ascontiguousarray(im) to plot_on_box() input image.'
    tl = line_thickness or round(0.002 * (im.shape[0] + im.shape[1]) / 2) + 1  # line/font thickness
    c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3]))
    cv2.rectangle(im, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA)
    if label:
        tf = max(tl - 1, 1)  # font thickness
        t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
        c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
        cv2.rectangle(im, c1, c2, color, -1, cv2.LINE_AA)  # filled
        cv2.putText(im, label, (c1[0], c1[1] - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA)


def base64EncodeImage(img):
    ''' Takes an input image and returns a base64 encoded string representation of that image (jpg format)'''
    _, im_arr = cv2.imencode('.jpg', img)
    im_b64 = base64.b64encode(im_arr.tobytes()).decode('utf-8')

    return im_b64


def boxes_list_to_json(boxes_list: List[List[LayoutBox]], clazz_names: List[str]) -> List[List[Dict]]:
    for boxes in boxes_list:
        for box in boxes:
            box.clazz_name = clazz_names[box.clazz]
    return [
        [
            box.to_service_dict()
            for box in boxes
        ]
        for boxes in boxes_list
    ]
    

@app.get("/ping", description="健康检查")
def ping():
    print("->ping")
    return "pong!"