Zhang Li 2 years ago
commit
b6077d2e00

+ 2 - 0
.gitattributes

@@ -0,0 +1,2 @@
+# Auto detect text files and perform LF normalization
+* text=auto

+ 6 - 0
.gitignore

@@ -0,0 +1,6 @@
+/.vscode/
+*.pt
+/.idea/
+
+__pycache__/*
+*/__pycache__/*

+ 120 - 0
Dockerfile

@@ -0,0 +1,120 @@
+FROM nvidia/cuda:11.0-cudnn8-devel-ubuntu18.04 AS builder
+
+RUN sed -i 's#archive.ubuntu.com#mirrors.aliyun.com#g' /etc/apt/sources.list  \
+    && sed -i 's#security.ubuntu.com#mirrors.aliyun.com#g' /etc/apt/sources.list
+
+ENV LANG=zh_CN.UTF-8 LANGUAGE=zh_CN:zh LC_ALL=zh_CN.UTF-8 DEBIAN_FRONTEND=noninteractive
+
+RUN rm -rf  /etc/apt/sources.list.d/  && apt update
+
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    supervisor \
+    iputils-ping \
+    wget \
+    zsh \
+    build-essential \
+    cmake \
+    git \
+    curl \
+    vim \
+    ca-certificates \
+    libjpeg-dev \
+    zip \
+    unzip \
+    libpng-dev \
+    openssh-server \
+    autojump \
+    language-pack-zh-hans \
+    ttf-wqy-zenhei \
+    libgl1-mesa-glx  \
+    libglib2.0-0 \
+    locales &&\
+    rm -rf /var/lib/apt/lists/*
+
+
+RUN locale-gen zh_CN.UTF-8
+RUN dpkg-reconfigure locales
+
+
+CMD ["supervisord", "-n"]
+
+FROM builder as builder1
+
+ENV PYTHON_VERSION 3
+RUN chsh -s `which zsh`
+RUN curl -o ~/miniconda.sh -O  https://repo.anaconda.com/miniconda/Miniconda${PYTHON_VERSION}-latest-Linux-x86_64.sh  && \
+    chmod +x ~/miniconda.sh && \
+    ~/miniconda.sh -b -p /opt/conda && \
+    rm ~/miniconda.sh
+
+RUN ln /opt/conda/bin/conda /usr/local/bin/conda
+RUN conda init zsh
+RUN conda install mamba -n base -c conda-forge
+RUN ln /opt/conda/bin/mamba /usr/local/bin/mamba && mamba init zsh
+
+
+
+FROM builder1 as builder2
+
+RUN apt-get update && apt-get install -y --no-install-recommends openssh-server && rm -rf /var/lib/apt/lists/*
+RUN mkdir /var/run/sshd
+RUN echo 'root:root' | chpasswd
+RUN sed -i 's/.*PermitRootLogin .*/PermitRootLogin yes/' /etc/ssh/sshd_config
+# SSH login fix. Otherwise user is kicked off after login
+RUN sed -i 's@session\s*required\s*pam_loginuid.so@session optional pam_loginuid.so@g' /etc/pam.d/sshd
+
+RUN echo "\
+[program:sshd] \n\
+command=/usr/sbin/sshd -D\n\
+autorestart=True\n\
+autostart=True\n\
+redirect_stderr = true\n\
+" > /etc/supervisor/conf.d/sshd.conf
+
+EXPOSE 22
+
+FROM builder2 as builder3
+
+WORKDIR /workspace
+ADD environment.yml /environment.yml
+RUN sed -i 's#- paddlepaddle#- paddlepaddle-gpu==2.3.0.post110#g' /environment.yml && cat /environment.yml
+RUN mamba update -n base -c defaults conda -y && mamba env create -f /environment.yml && rm -rf /root/.cache
+
+# RUN /opt/conda/envs/py38/bin/python -m ipykernel install --name py38 --display-name "py38"
+# RUN echo "c.MultiKernelManager.default_kernel_name = 'py38'">>/root/.jupyter/jupyter_notebook_config.py
+RUN echo "\
+[program:be]\n\
+directory=/workspace\n\
+command=/opt/conda/envs/py38/bin/gunicorn server:app --workers 1 --worker-class=uvicorn.workers.UvicornWorker  --bind 0.0.0.0:8080  --log-level=info --reload \n\
+autorestart=true\n\
+startretries=0\n\
+redirect_stderr=true\n\
+stdout_logfile=/var/log/be.log\n\
+stdout_logfile_maxbytes=50MB\n\
+environment=CUDA_VISIBLE_DEVICES=0\n\
+" > /etc/supervisor/conf.d/be.conf
+
+RUN git clone https://gitee.com/monkeycc/yolov5.git
+
+Add . /workspace
+EXPOSE 8080
+
+
+
+# RUN mamba install -y jupyterlab -n base && mamba init zsh
+# RUN /opt/conda/bin/jupyter notebook --generate-config && \
+#     echo "c.NotebookApp.password='argon2:\$argon2id\$v=19\$m=10240,t=10,p=8\$+zIUCF9Uk2FiCHlV8njX5A\$I5Mm/64DORArcXYTXWRVng'">>/root/.jupyter/jupyter_notebook_config.py
+
+
+# RUN mkdir -p /data && echo "\
+# [program:jupyter]\n\
+# directory=/data\n\
+# command=/opt/conda/bin/jupyter lab --ip 0.0.0.0 --port 8888 --allow-root --no-browser \n\
+# autorestart=true\n\
+# startretries=0\n\
+# redirect_stderr=true\n\
+# stdout_logfile=/dev/stdout\n\
+# stdout_logfile_maxbytes=0\n\
+# " > /etc/supervisor/conf.d/jupyter.conf
+
+# EXPOSE 8888

+ 13 - 0
Makefile

@@ -0,0 +1,13 @@
+NAME=layout
+VERSION=latest
+BUILD_TIME      := $(shell date "+%F %T")
+COMMIT_SHA1     := $(shell git rev-parse HEAD)
+AUTHOR          := $(shell git show -s --format='%an')
+
+
+.PHONY: all gpu
+all: gpu
+gpu:
+	@docker build -t registry.cn-hangzhou.aliyuncs.com/sxtest/$(NAME):gpu --build-arg VERSION=gpu .
+	@docker push registry.cn-hangzhou.aliyuncs.com/sxtest/$(NAME):gpu
+

+ 27 - 0
README.md

@@ -0,0 +1,27 @@
+# layout-server
+
+用于版面分析.
+
+
+## 环境
+
+- python >= 3.8
+- pytorch
+- cv2
+
+```shell
+conda env create -f environment.yml
+```
+
+## demo
+
+```shell
+python main.py
+```
+
+## 服务端
+
+```shell
+# port 8080
+python server.py --port 8080
+```

+ 56 - 0
client.py

@@ -0,0 +1,56 @@
+''' Example client sending POST request to server (localhost:8000/detect/)and printing the YOLO results
+
+The send_request() function has a couple options demonstrating all the ways you can interact 
+with the /detect endpoint
+'''
+
+import requests as r
+import json
+from pprint import pprint
+
+import base64
+from io import BytesIO
+from PIL import Image
+
+def send_request(file_list = ['./images/zidane.jpg'], 
+                    model_name = 'yolov5s',
+                    img_size = 640,
+                    download_image = False):
+
+    #upload multiple files as list of tuples
+    files = [('file_list', open(file,"rb")) for file in file_list]
+
+    #pass the other form data here
+    other_form_data = {'model_name': model_name,
+                    'img_size': img_size,
+                    'download_image': download_image}
+
+    res = r.post("http://localhost:8000/detect/", 
+                    data = other_form_data, 
+                    files = files)
+
+    if download_image:
+        json_data = res.json()
+
+        for img_data in json_data:
+            for bbox_data in img_data:
+                #parse json to detect if the dict contains image data (base64) or bbox data
+                if 'image_base64' in bbox_data.keys():
+                    #decode and show base64 encoded image
+                    img = Image.open(BytesIO(base64.b64decode(str(bbox_data['image_base64']))))
+                    img.show()
+                else:
+                    #otherwise print json bbox data
+                    pprint(bbox_data)
+
+    else:
+        #if no images were downloaded, just display json response
+        pprint(json.loads(res.text))
+
+
+if __name__ == '__main__':
+    #example uploading image batch
+    #send_request(file_list=['./images/bus.jpg','./images/zidane.jpg'])
+
+    #example uploading image and receiving bbox json + image with bboxes drawn
+    send_request(file_list=['./images/bus.jpg'], download_image = True)

+ 20 - 0
docker-compose.yml

@@ -0,0 +1,20 @@
+version: '2'
+services:
+  layoutv2:
+    hostname: layoutv2
+    container_name: layoutv2
+    restart: always
+    image: layout:v2
+    privileged: true
+    ipc: host
+    tty: true
+    working_dir: /workspace
+    ports:
+      - '18080:8080'
+    volumes:
+      - ./:/workspace
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - capabilities: [gpu]

+ 40 - 0
environment.yml

@@ -0,0 +1,40 @@
+name: py38
+channels:
+  - https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free/ # Anocanda清华镜像
+  - https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/msys2/
+  - https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/pytorch/
+  - bioconda
+  - fastai
+  - defaults
+  - r
+  - https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge/
+  - conda-forge
+dependencies:
+  - python=3.8
+  - ipython
+  - ipykernel
+  - pip
+  - pytorch
+  - torchvision
+  - torchaudio
+  - cudatoolkit=11.0
+  - fastai
+  - nbdev
+  - pip:
+      - cmake
+      - opencv-python
+      - cython
+      - fastapi
+      - uvicorn
+      - jinja2
+      - aiofiles
+      - python-multipart
+      - requests
+      - PyYAML>=5.3.1
+      - pandas>=1.1.4
+      - seaborn>=0.11.0
+      - tqdm>=4.41.0
+      - tensorboard>=2.4.1
+      - gunicorn
+      - -i https://pypi.douban.com/simple
+prefix: /opt/conda/envs/py38

BIN
images/bus.jpg


BIN
images/city_street.jpg


BIN
images/zidane.jpg


+ 17 - 0
minimal_client_server_example/client_minimal.py

@@ -0,0 +1,17 @@
+''' Example client sending POST request to server and printing the YOLO results
+'''
+
+import requests as r
+import json
+from pprint import pprint
+
+def send_request(image = '../images/zidane.jpg', model_name = 'yolov5s'):
+    res = r.post("http://localhost:8000", 
+                    data={'model_name': model_name}, 
+                    files = {'file': open(image , "rb")} #pass the files here
+                    )
+
+    pprint(json.loads(res.text))
+
+if __name__ == '__main__':
+    send_request()

+ 94 - 0
minimal_client_server_example/server_minimal.py

@@ -0,0 +1,94 @@
+'''
+This file is a barebones FastAPI example that:
+  1. Accepts GET request, renders a HTML form at localhost:8000 allowing the user to
+     upload a image and select YOLO model, then submit that data via POST
+  2. Accept POST request, run YOLO model on input image, return JSON output
+
+Works with client_minimal.py
+
+This script does not require any of the HTML templates in /templates or other code in this repo
+and does not involve stuff like Bootstrap, Javascript, JQuery, etc.
+'''
+
+from fastapi import FastAPI, Request, Form, File, UploadFile
+from fastapi.responses import HTMLResponse
+
+from PIL import Image
+from io import BytesIO
+
+import torch
+
+app = FastAPI()
+
+@app.get("/")
+async def home(request: Request):
+  ''' Returns barebones HTML form allowing the user to select a file and model '''
+
+  html_content = '''
+<form method="post" enctype="multipart/form-data">
+  <div>
+    <label>Upload Image</label>
+    <input name="file" type="file" multiple>
+    <div>
+      <label>Select YOLO Model</label>
+      <select name="model_name">
+        <option>yolov5s</option>
+        <option>yolov5m</option>
+        <option>yolov5l</option>
+        <option>yolov5x</option>
+      </select>
+    </div>
+  </div>
+  <button type="submit">Submit</button>
+</form>
+'''
+
+  return HTMLResponse(content=html_content, status_code=200)
+
+
+@app.post("/")
+async def process_home_form(file: UploadFile = File(...), 
+              model_name: str = Form(...)):
+  
+    '''
+    Requires an image file upload, model name (ex. yolov5s).
+    Returns: json response with list of list of dicts.
+      Each dict contains class, class_name, confidence, normalized_bbox
+
+    Note: Because this is an async method, the YOLO inference is a blocking
+    operation.
+    '''
+
+    model = torch.hub.load('ultralytics/yolov5', model_name, pretrained=True, force_reload = False)
+
+    #This is how you decode + process image with PIL
+    results = model(Image.open(BytesIO(await file.read())))
+
+    #This is how you decode + process image with OpenCV + numpy
+    #results = model(cv2.cvtColor(cv2.imdecode(np.fromstring(await file.read(), np.uint8), cv2.IMREAD_COLOR), cv2.COLOR_RGB2BGR))
+
+    json_results = results_to_json(results,model)
+    return json_results
+
+
+def results_to_json(results, model):
+    ''' Helper function for process_home_form()'''
+    return [
+        [
+          {
+          "class": int(pred[5]),
+          "class_name": model.model.names[int(pred[5])],
+          "bbox": [int(x) for x in pred[:4].tolist()], #convert bbox results to int from float
+          "confidence": float(pred[4]),
+          }
+        for pred in result
+        ]
+      for result in results.xyxy
+      ]
+
+
+if __name__ == '__main__':
+    import uvicorn
+    
+    app_str = 'server_minimal:app'
+    uvicorn.run(app_str, host='localhost', port=8000, reload=True, workers=1)

+ 39 - 0
requirements.txt

@@ -0,0 +1,39 @@
+# pip install -r requirements.txt
+
+# FastAPI --------------------------------------
+fastapi
+uvicorn
+jinja2
+aiofiles
+python-multipart
+requests
+
+# Other stuff from https://github.com/ultralytics/yolov5/blob/master/requirements.txt
+# base ----------------------------------------
+Cython
+matplotlib>=3.2.2
+numpy>=1.18.5
+opencv-python>=4.1.2
+Pillow
+PyYAML>=5.3.1
+scipy>=1.4.1
+tensorboard>=2.2
+torch>=1.7.0
+tqdm>=4.41.0
+
+# logging -------------------------------------
+# wandb
+
+# plotting ------------------------------------
+seaborn>=0.11.0
+pandas
+
+# export --------------------------------------
+# coremltools>=4.1
+# onnx>=1.8.1
+# scikit-learn==0.19.2  # for coreml quantization
+
+# extras --------------------------------------
+thop  # FLOPS computation
+pycocotools>=2.0  # COCO mAP
+torchvision>=0.8.1

+ 204 - 0
server.py

@@ -0,0 +1,204 @@
+from fastapi import FastAPI, Request, Form, File, UploadFile
+from fastapi.templating import Jinja2Templates
+from pydantic import BaseModel
+from typing import List, Optional
+
+import cv2
+import numpy as np
+
+import torch
+import base64
+import random
+
+YOLO_DIR = '/workspace/yolov5'
+# WEIGHTS = '/data/yolov5/runs/train/yolov5x_layout_reuslt37/weights/best.pt'
+WEIGHTS = '/workspace/best.pt'
+device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+
+
+app = FastAPI()
+templates = Jinja2Templates(directory = 'templates')
+
+model_selection_options = ['ocr-layout']
+model_dict = {model_name: None for model_name in model_selection_options} #set up model cache
+
+colors = [tuple([random.randint(0, 255) for _ in range(3)]) for _ in range(100)] #for bbox plotting
+
+##############################################
+#-------------GET Request Routes--------------
+##############################################
+@app.get("/")
+def home(request: Request):
+    ''' Returns html jinja2 template render for home page form
+    '''
+
+    return templates.TemplateResponse('home.html', {
+            "request": request,
+            "model_selection_options": model_selection_options,
+        })
+
+@app.get("/drag_and_drop_detect")
+def drag_and_drop_detect(request: Request):
+    ''' drag_and_drop_detect detect page. Uses a drag and drop
+    file interface to upload files to the server, then renders
+    the image + bboxes + labels on HTML canvas.
+    '''
+
+    return templates.TemplateResponse('drag_and_drop_detect.html',
+            {"request": request,
+            "model_selection_options": model_selection_options,
+        })
+
+
+##############################################
+#------------POST Request Routes--------------
+##############################################
+@app.post("/")
+def detect_via_web_form(request: Request,
+                        file_list: List[UploadFile] = File(...),
+                        model_name: str = Form(...),
+                        img_size: int = Form(1824)):
+
+    '''
+    Requires an image file upload, model name (ex. yolov5s). Optional image size parameter (Default 1824).
+    Intended for human (non-api) users.
+    Returns: HTML template render showing bbox data and base64 encoded image
+    '''
+
+    #assume input validated properly if we got here
+    if model_dict[model_name] is None:
+        model_dict[model_name] = model = torch.hub.load(YOLO_DIR, 'custom', path=WEIGHTS, source='local').to(device)
+
+    img_batch = [cv2.imdecode(np.fromstring(file.file.read(), np.uint8), cv2.IMREAD_COLOR)
+                    for file in file_list]
+
+    #create a copy that corrects for cv2.imdecode generating BGR images instead of RGB
+    #using cvtColor instead of [...,::-1] to keep array contiguous in RAM
+    img_batch_rgb = [cv2.cvtColor(img, cv2.COLOR_BGR2RGB) for img in img_batch]
+
+    results = model_dict[model_name](img_batch_rgb, size = img_size)
+
+    json_results = results_to_json(results,model_dict[model_name])
+
+    img_str_list = []
+    #plot bboxes on the image
+    for img, bbox_list in zip(img_batch, json_results):
+        for bbox in bbox_list:
+            label = f'{bbox["class_name"]} {bbox["confidence"]:.2f}'
+            plot_one_box(bbox['bbox'], img, label=label,
+                    color=colors[int(bbox['class'])], line_thickness=3)
+
+        img_str_list.append(base64EncodeImage(img))
+
+    #escape the apostrophes in the json string representation
+    encoded_json_results = str(json_results).replace("'",r"\'").replace('"',r'\"')
+
+    return templates.TemplateResponse('show_results.html', {
+            'request': request,
+            'bbox_image_data_zipped': zip(img_str_list,json_results), #unzipped in jinja2 template
+            'bbox_data_str': encoded_json_results,
+        })
+
+
+@app.post("/detect")
+def detect_via_api(request: Request,
+                file_list: List[UploadFile] = File(...),
+                model_name: str = Form(...),
+                img_size: Optional[int] = Form(1824),
+                download_image: Optional[bool] = Form(False)):
+
+    '''
+    Requires an image file upload, model name (ex. yolov5s).
+    Optional image size parameter (Default 1824)
+    Optional download_image parameter that includes base64 encoded image(s) with bbox's drawn in the json response
+
+    Returns: JSON results of running YOLOv5 on the uploaded image. If download_image parameter is True, images with
+            bboxes drawn are base64 encoded and returned inside the json response.
+
+    Intended for API usage.
+    '''
+
+    if model_dict[model_name] is None:
+          model_dict[model_name] = model = torch.hub.load(YOLO_DIR, 'custom', path=WEIGHTS, source='local').to(device)
+
+    img_batch = [cv2.imdecode(np.fromstring(file.file.read(), np.uint8), cv2.IMREAD_COLOR)
+                for file in file_list]
+
+    #create a copy that corrects for cv2.imdecode generating BGR images instead of RGB,
+    #using cvtColor instead of [...,::-1] to keep array contiguous in RAM
+    img_batch_rgb = [cv2.cvtColor(img, cv2.COLOR_BGR2RGB) for img in img_batch]
+
+    results = model_dict[model_name](img_batch_rgb, size = img_size)
+    json_results = results_to_json(results,model_dict[model_name])
+
+    if download_image:
+        for idx, (img, bbox_list) in enumerate(zip(img_batch, json_results)):
+            for bbox in bbox_list:
+                label = f'{bbox["class_name"]} {bbox["confidence"]:.2f}'
+                plot_one_box(bbox['bbox'], img, label=label,
+                        color=colors[int(bbox['class'])], line_thickness=3)
+
+            payload = {'image_base64':base64EncodeImage(img)}
+            json_results[idx].append(payload)
+
+    encoded_json_results = str(json_results).replace("'",r'"')
+    return encoded_json_results
+
+##############################################
+#--------------Helper Functions---------------
+##############################################
+
+def results_to_json(results, model):
+    ''' Converts yolo model output to json (list of list of dicts)'''
+    return [
+                [
+                    {
+                    "class": int(pred[5]),
+                    "class_name": model.model.names[int(pred[5])],
+                    "bbox": [int(x) for x in pred[:4].tolist()], #convert bbox results to int from float
+                    "confidence": float(pred[4]),
+                    }
+                for pred in result
+                ]
+            for result in results.xyxy
+            ]
+
+
+def plot_one_box(x, im, color=(128, 128, 128), label=None, line_thickness=3):
+    # Directly copied from: https://github.com/ultralytics/yolov5/blob/cd540d8625bba8a05329ede3522046ee53eb349d/utils/plots.py
+    # Plots one bounding box on image 'im' using OpenCV
+    assert im.data.contiguous, 'Image not contiguous. Apply np.ascontiguousarray(im) to plot_on_box() input image.'
+    tl = line_thickness or round(0.002 * (im.shape[0] + im.shape[1]) / 2) + 1  # line/font thickness
+    c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3]))
+    cv2.rectangle(im, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA)
+    if label:
+        tf = max(tl - 1, 1)  # font thickness
+        t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
+        c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
+        cv2.rectangle(im, c1, c2, color, -1, cv2.LINE_AA)  # filled
+        cv2.putText(im, label, (c1[0], c1[1] - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA)
+
+
+def base64EncodeImage(img):
+    ''' Takes an input image and returns a base64 encoded string representation of that image (jpg format)'''
+    _, im_arr = cv2.imencode('.jpg', img)
+    im_b64 = base64.b64encode(im_arr.tobytes()).decode('utf-8')
+
+    return im_b64
+
+if __name__ == '__main__':
+    import uvicorn
+    import argparse
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--host', default = 'localhost')
+    parser.add_argument('--port', default = 8000)
+    parser.add_argument('--precache-models', action='store_true',
+            help='Pre-cache all models in memory upon initialization, otherwise dynamically caches models')
+    opt = parser.parse_args()
+
+    # if opt.precache_models:
+    #     model_dict = {model_name: torch.hub.load('ultralytics/yolov5', model_name, pretrained=True)
+    #                     for model_name in model_selection_options}
+
+    app_str = 'server:app' #make the app string equal to whatever the name of this file is
+    uvicorn.run(app_str, host= opt.host, port=int(opt.port), reload=True)

+ 359 - 0
templates/drag_and_drop_detect.html

@@ -0,0 +1,359 @@
+<!-- This is a Jinja2 Template that extends layout.html
+Implements a drag & drop file upload interface for YOLOv5 detection.
+
+Once a user image is uploaded, this code AJAX requests the image, model name
+and image size to the FastAPI server's /detect endpoint. The server sends
+the list of bounding boxes back, and the image + bounding boxes + labels
+are rendered inside the canvas element.
+
+Bounding box labels' height above the bounding box are adjusted to not overlap
+with eachother in complicated scenes (such as a busy city street).
+
+TODO:
+  - Improve efficiency of the algorithm for making box labels not overlap in crowded scenes
+-->
+
+{% extends "layout.html" %} {% block title %}
+<title>YOLOv5 Drag & Drop Demo</title> {% endblock %} {% block header %}
+<!-- Include JQuery for this page -->
+<script
+  src="https://code.jquery.com/jquery-3.6.0.min.js"
+  integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4="
+  crossorigin="anonymous"
+></script>
+{% endblock %} {% block content %}
+<div style="overflow-x: hidden">
+  <div class="row">
+    <div class="col-auto m-2">
+      <label for="model_name" class="form-label"
+        ><b>Select YOLOv5 Model</b></label
+      >
+      <select class="form-select" id="model_name" name="model_name">
+        {% for selection in model_selection_options %}
+        <option value="{{ selection }}">{{ selection }}</option>
+        {% endfor %}
+      </select>
+      <label for="img_size" class="form-label"
+        ><b>Model Inference Size</b></label
+      >
+      <input
+        type="text"
+        class="form-control"
+        id="img_size"
+        name="img_size"
+        value="1824"
+      />
+    </div>
+
+    <div class="col">
+      <!-- Drag and drop images in this div -->
+      <div
+        class="m-2"
+        id="drop-region"
+        style="border: 3px dashed limegreen; height: 150px"
+      >
+        <div class="container-fluid">
+          <div class="d-flex justify-content-center align-items-center h-100">
+            <b>Drag & Drop Images Or Click To Upload</b>
+          </div>
+        </div>
+      </div>
+    </div>
+  </div>
+
+  <!-- Our image + bboxes will be drawn here! Downscale image to fit page width-->
+  <canvas id="canvas" style="max-width: 99%; height: auto"></canvas>
+</div>
+
+<script type="text/javascript">
+  /*This script block handles canvas updates after getting bboxes from the server. */
+
+  const canvas = document.getElementById('canvas')
+  const ctx = canvas.getContext('2d')
+  var image = new Image(60, 45) // Using optional size for image
+  image.onload = drawImageWithBBoxes // Draw when image has loaded
+
+  var colormap = {
+    car: [255, 216, 0], //yellow
+    person: [255, 0, 0], //red
+    truck: [255, 0, 255], //purple
+  }
+
+  var data //list of list of dictionaries obtained from server's /detect endpoint
+
+  function drawImageWithBBoxes() {
+    // Use the intrinsic size of image in CSS pixels for the canvas element
+    canvas.width = this.naturalWidth
+    canvas.height = this.naturalHeight
+
+    //draw the image
+    ctx.drawImage(this, 0, 0)
+
+    //draw some bboxes!
+    ctx.lineWidth = 2
+    ctx.font = '24px arial'
+    ctx.strokeStyle = 'yellow'
+    textboxlocations = []
+    FILL_ALPHA = 1
+
+    const randomBetween = (min, max) =>
+      min + Math.floor(Math.random() * (max - min + 1))
+    for (const item of data) {
+      if (item['class_name'] in colormap) {
+        rgb = colormap[item['class_name']]
+      } else {
+        //random color for this class
+        rgb = [
+          randomBetween(0, 255),
+          randomBetween(0, 255),
+          randomBetween(0, 255),
+        ]
+        colormap[item['class_name']] = rgb
+      }
+      ctx.fillStyle = `rgba(${rgb[0]},${rgb[1]},${rgb[2]},${FILL_ALPHA})`
+      ctx.strokeStyle = `rgba(${rgb[0]},${rgb[1]},${rgb[2]},${FILL_ALPHA})`
+
+      // draw bbox
+      ctx.strokeRect(
+        item['bbox'][0],
+        item['bbox'][1],
+        item['bbox'][2] - item['bbox'][0],
+        item['bbox'][3] - item['bbox'][1]
+      )
+
+      let label = `${item['class_name']} ${do_rounding(item['confidence'])}`
+      let textMeasures = ctx.measureText(label)
+      let textHeight =
+        textMeasures.actualBoundingBoxAscent +
+        textMeasures.actualBoundingBoxDescent
+      let padding = 2
+
+      let x = item['bbox'][0]
+      let y = item['bbox'][1] - textHeight - 2 * padding
+      let w = textMeasures.width + 2 * padding
+      let h = textHeight + 2 * padding
+
+      //check if new textbox would overlap with previous textboxes drawn
+      while (
+        textboxlocations.some(box => IOU(box, [x, y, x + w, y + h]) > 0.01)
+      ) {
+        //if so, move the textbox up by h + 5 pixels
+        y -= h + 3
+      }
+      //don't let the textbox go beyond top of the image
+      if (y <= 0) {
+        y = 0
+      }
+
+      textboxlocations.push([x - 1, y - 1, x + w + 1, y + h + 1])
+      // draw text background box
+      ctx.fillRect(x, y, w, h)
+
+      //draw text
+      if (rgb[0] + rgb[1] + rgb[2] > (255 * 3) / 2) {
+        ctx.fillStyle = 'black'
+      } else {
+        ctx.fillStyle = 'white'
+      }
+      ctx.fillText(label, x + padding, y + h - padding)
+
+      //draw line between text and bbox top left corner
+
+      ctx.beginPath()
+      ctx.moveTo(x, y)
+      ctx.lineTo(x, item['bbox'][1])
+      ctx.stroke()
+    }
+  }
+
+  function IOU(boxA, boxB, isPixel = 0) {
+    /*This function computes the IOU of 2 boxes.
+    This is used solely to make sure bbox labels don't overlap vertically */
+
+    // determine the (x, y)-coordinates of the intersection rectangle
+    xA = Math.max(boxA[0], boxB[0])
+    yA = Math.max(boxA[1], boxB[1])
+    xB = Math.min(boxA[2], boxB[2])
+    yB = Math.min(boxA[3], boxB[3])
+
+    if (xA >= xB || yA >= yB) {
+      return 0
+    }
+
+    //compute the area of intersection rectangle
+    interArea = (xB - xA + isPixel) * (yB - yA + isPixel)
+
+    //compute the area of both rectangles
+    boxAArea = (boxA[2] - boxA[0] + isPixel) * (boxA[3] - boxA[1] + isPixel)
+    boxBArea = (boxB[2] - boxB[0] + isPixel) * (boxB[3] - boxB[1] + isPixel)
+
+    // compute the intersection over union by taking the intersection
+    // area and dividing it by the sum of areas - the interesection area
+    iou = interArea / (boxAArea + boxBArea - interArea)
+    return iou
+  }
+
+  function do_rounding(num, places = 2) {
+    return Math.round((num + Number.EPSILON) * 10 ** places) / 10 ** places
+  }
+</script>
+
+<script type="text/javascript">
+  /*This script block handles the drag and drop + AJAX request to server */
+
+  // where files are dropped + file selector is opened
+  var dropRegion = document.getElementById('drop-region')
+
+  // open file selector when clicked on the drop region
+  var fakeInput = document.createElement('input')
+  fakeInput.type = 'file'
+  fakeInput.accept = 'image/*'
+  fakeInput.multiple = false //dont allow multiple file upload
+  dropRegion.addEventListener('click', function () {
+    fakeInput.click()
+  })
+
+  function validateImage(image) {
+    // check the type
+    var validTypes = ['image/jpeg', 'image/png', 'image/gif']
+    if (validTypes.indexOf(image.type) === -1) {
+      alert('Invalid File Type')
+      return false
+    }
+
+    // check the size
+    var maxSizeInBytes = 10e6 // 10MB
+    if (image.size > maxSizeInBytes) {
+      alert('File too large')
+      return false
+    }
+
+    return true
+  }
+
+  function handleFiles(files) {
+    for (var i = 0, len = files.length; i < len; i++) {
+      if (validateImage(files[i])) previewAnduploadImage(files[i])
+    }
+  }
+
+  fakeInput.addEventListener('change', function () {
+    var files = fakeInput.files
+    handleFiles(files)
+  })
+  function preventDefault(e) {
+    e.preventDefault()
+    e.stopPropagation()
+  }
+
+  dropRegion.addEventListener('dragenter', preventDefault, false)
+  dropRegion.addEventListener('dragleave', preventDefault, false)
+  dropRegion.addEventListener('dragover', preventDefault, false)
+  dropRegion.addEventListener('drop', preventDefault, false)
+
+  function handleDrop(e) {
+    var data = e.dataTransfer,
+      files = data.files
+
+    handleFiles(files)
+  }
+
+  dropRegion.addEventListener('drop', handleDrop, false)
+
+  function handleDrop(e) {
+    var data = e.dataTransfer,
+      files = data.files
+
+    handleFiles(files)
+  }
+
+  dropRegion.addEventListener('drop', handleDrop, false)
+
+  function handleDrop(e) {
+    var dt = e.dataTransfer,
+      files = dt.files
+
+    if (files.length) {
+      handleFiles(files)
+    } else {
+      // check for img
+      var html = dt.getData('text/html'),
+        match = html && /\bsrc="?([^"\s]+)"?\s*/.exec(html),
+        url = match && match[1]
+
+      if (url) {
+        uploadImageFromURL(url)
+        return
+      }
+    }
+
+    function uploadImageFromURL(url) {
+      var img = new Image()
+      var c = document.createElement('canvas')
+      var ctx = c.getContext('2d')
+
+      img.onload = function () {
+        c.width = this.naturalWidth // update canvas size to match image
+        c.height = this.naturalHeight
+        ctx.drawImage(this, 0, 0) // draw in image
+        c.toBlob(function (blob) {
+          // get content as PNG blob
+
+          // call our main function
+          handleFiles([blob])
+        }, 'image/png')
+      }
+      img.onerror = function () {
+        alert('Error in uploading')
+      }
+      img.crossOrigin = '' // if from different origin
+      img.src = url
+    }
+  }
+
+  function previewAnduploadImage(img) {
+    /* This function reads the user's image, AJAX requests it to the server,
+      JSON parses the result and draws the image onto the canvas.
+
+      The bboxes are drawn in drawImageWithBBoxes() which get run after
+      image is drawn on the canvas.     */
+
+    // read the image...
+    var reader = new FileReader()
+    reader.onload = function (e) {
+      image.src = e.target.result
+    }
+
+    // create FormData
+    var formData = new FormData()
+    formData.append('file_list', img)
+    formData.append('model_name', $('#model_name').val())
+    formData.append('img_size', $('#img_size').val())
+
+    $.ajax({
+      url: '/detect',
+      data: formData,
+      processData: false,
+      contentType: false,
+      type: 'POST',
+      success: function (json_result_data) {
+        json_result_data = json_result_data.replaceAll("'", '"')
+        data = JSON.parse(json_result_data)[0] //read json result of YOLO
+
+        //read the image, triggers image to load on canvas and bbox's to be drawn
+        reader.readAsDataURL(img)
+      },
+      error: function (xhr, ajaxOptions, thrownError) {
+        alert(
+          'Error code ' +
+            xhr.status +
+            ': ' +
+            thrownError +
+            '\nMessage: ' +
+            JSON.parse(xhr.responseText)['message']
+        )
+      },
+    })
+  }
+</script>
+{% endblock %}

+ 51 - 0
templates/home.html

@@ -0,0 +1,51 @@
+<!-- This is a Jinja2 Template that extends layout.html
+Implements a simple Bootstrap 5 form submission interface for YOLOv5 detection.
+-->
+
+{% extends "layout.html" %} {% block title %}
+<title>YOLOv5 Demo</title>
+{% endblock %} {% block content %}
+<div class="m-2">
+  <h5>Upload image(s) and select a YOLOv5 model</h5>
+
+  <hr />
+
+  <form method="post" action="" enctype="multipart/form-data">
+    <div class="my-2">
+      <label for="file_list" class="form-label">Upload Images</label>
+      <input
+        class="form-control w-auto"
+        type="file"
+        name="file_list"
+        id="file_list"
+        multiple
+      />
+    </div>
+    <div class="my-2">
+      <div class="form-group">
+        <label>Select YOLO Model</label>
+      </div>
+      <select class="form-select w-auto" name="model_name">
+        {% for selection in model_selection_options %}
+        <option value="{{ selection }}">{{ selection }}</option>
+        {% endfor %}
+      </select>
+    </div>
+    <div class="my-2">
+      <label for="img_size_input" class="form-label"
+        >Model Inference Size</label
+      >
+      <input
+        type="text"
+        class="form-control w-auto"
+        id="img_size_input"
+        name="img_size"
+        value="1824"
+      />
+    </div>
+
+    <button class="btn btn-primary" type="submit">Submit</button>
+  </form>
+</div>
+
+{% endblock %}

+ 41 - 0
templates/layout.html

@@ -0,0 +1,41 @@
+<!DOCTYPE html>
+<html>
+<head>
+    <!-- Required meta tags -->
+    <meta charset="utf-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1">
+
+    {% block title %}{% endblock %}
+    {% block header %}{% endblock %}
+
+    <!-- Bootstrap CSS and JS bundle -->
+    <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.0.2/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha384-EVSTQN3/azprG1Anm3QDgpJLIm9Nao0Yz1ztcQTwFspd3yD65VohhpuuCOmLASjC" crossorigin="anonymous">
+    <script src="https://cdn.jsdelivr.net/npm/bootstrap@5.0.2/dist/js/bootstrap.bundle.min.js" integrity="sha384-MrcW6ZMFYlzcLA8Nl+NtUVF0sA7MsXsP1UyJoMp4YLEuNSfAP+JcXn/tWtIaxVXM" crossorigin="anonymous"></script>
+
+</head>
+<body>
+  <!-- Navbar -->
+  <nav class="navbar navbar-expand navbar-dark bg-dark">
+    <a class="navbar-brand" href="#">YOLOv5-FastAPI-Demo</a>
+    <div class="collapse navbar-collapse" id="navbarNav">
+      <ul class="navbar-nav">
+        <li class="nav-item"> 
+          <a class="nav-link" href="/">Home</a>
+        </li>
+        <li class="nav-item"> 
+          <a class="nav-link" href="/drag_and_drop_detect">Drag & Drop Detection</a>
+        </li>
+        <li class="nav-item">
+          <a class="nav-link" href="/docs">Docs</a>
+        </li>
+      </ul>
+    </div>
+  </nav>
+
+  <!--Other template content goes here-->
+  <main role="main" class="container"></main>
+    {% block content %}{% endblock %}
+  </main>
+
+</body>
+</html>

+ 56 - 0
templates/show_results.html

@@ -0,0 +1,56 @@
+{% extends "layout.html" %}
+
+{% block content %}
+
+<!--Create button that calls the download() function above, use dummy iframe
+to stop the form from redirecting -->
+<iframe name="dummyframe" id="dummyframe" style="display: none;"></iframe>
+<form onsubmit="download('results.json', '{{ bbox_data_str }}')" target="dummyframe">
+  <div class="d-flex">
+    <div class="d-inline-block ml-1 mr-3 py-1"><h2>Results</h2></div>
+    <div class="d-inline-block mx-3 py-1"><input class="btn btn-primary" type="submit" value="Download Results"></div>
+  </div>
+</form>
+
+{% for img_base64, bbox_list in bbox_image_data_zipped %}
+<table class="table">
+<thead>
+  <tr>
+    <th>Class</th>
+    <th>Bounding Box [x1, y1, x2, y2]</th>
+    <th>Confidence</th>
+  </tr>
+</thead>
+  {% for bbox in bbox_list %}
+    <tr>
+      <td>{{ bbox['class_name'] }}</td>
+      <td>{{ bbox['bbox'] }}</td>
+      <td>{{ bbox['confidence'] }}</td>
+    </tr>
+  {% endfor %}
+</table>
+
+<!--Display base64 encoded image, scale image so it fits browser window horizontally -->
+<img src="data:image/jpeg;charset=utf-8;base64,{{ img_base64 }}" id="result_image" style="max-width: 100%;height: auto;width: auto\9;" />
+
+<hr/>
+{% endfor %}
+
+<script>
+  // from https://stackoverflow.com/questions/3665115/how-to-create-a-file-in-memory-for-user-to-download-but-not-through-server/18197341#18197341
+  function download(filename, text) {
+    var element = document.createElement('a');
+    element.setAttribute('href', 'data:text/plain;charset=utf-8,' + encodeURIComponent(text));
+    element.setAttribute('download', filename);
+
+    element.style.display = 'none';
+    document.body.appendChild(element);
+
+    element.click();
+
+    document.body.removeChild(element);
+  }
+</script>
+
+
+{% endblock %}