Initial commit

e583b27c · treasearcher · e583b27c · e583b27c · e583b27c · e583b27c
Commit e583b27c authored Dec 24, 2021 by treasearcher
159 changed files
--- a/live_xv_cam_car_graph_gcy/.idea/inspectionProfiles/Project_Default.xml
+++ b/live_xv_cam_car_graph_gcy/.idea/inspectionProfiles/Project_Default.xml
+<component name="InspectionProjectProfileManager">
+  <profile version="1.0">
+    <option name="myName" value="Project Default" />
+    <inspection_tool class="PyPackageRequirementsInspection" enabled="true" level="WARNING" enabled_by_default="true">
+      <option name="ignoredPackages">
+        <value>
+          <list size="15">
+            <item index="0" class="java.lang.String" itemvalue="tqdm" />
+            <item index="1" class="java.lang.String" itemvalue="easydict" />
+            <item index="2" class="java.lang.String" itemvalue="scikit_image" />
+            <item index="3" class="java.lang.String" itemvalue="matplotlib" />
+            <item index="4" class="java.lang.String" itemvalue="tensorboardX" />
+            <item index="5" class="java.lang.String" itemvalue="torch" />
+            <item index="6" class="java.lang.String" itemvalue="numpy" />
+            <item index="7" class="java.lang.String" itemvalue="pycocotools" />
+            <item index="8" class="java.lang.String" itemvalue="skimage" />
+            <item index="9" class="java.lang.String" itemvalue="Pillow" />
+            <item index="10" class="java.lang.String" itemvalue="opencv_python" />
+            <item index="11" class="java.lang.String" itemvalue="opencv-python" />
+            <item index="12" class="java.lang.String" itemvalue="h5py" />
+            <item index="13" class="java.lang.String" itemvalue="pycrayon" />
+            <item index="14" class="java.lang.String" itemvalue="Cython" />
+          </list>
+        </value>
+      </option>
+    </inspection_tool>
+  </profile>
+</component>
\ No newline at end of file
--- a/live_xv_cam_car_graph_gcy/.idea/inspectionProfiles/profiles_settings.xml
+++ b/live_xv_cam_car_graph_gcy/.idea/inspectionProfiles/profiles_settings.xml
+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>
\ No newline at end of file
--- a/live_xv_cam_car_graph_gcy/.idea/live_xv_cam_car_graph.iml
+++ b/live_xv_cam_car_graph_gcy/.idea/live_xv_cam_car_graph.iml
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$" />
+    <orderEntry type="inheritedJdk" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+</module>
\ No newline at end of file
--- a/live_xv_cam_car_graph_gcy/.idea/modules.xml
+++ b/live_xv_cam_car_graph_gcy/.idea/modules.xml
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/live_xv_cam_car_graph.iml" filepath="$PROJECT_DIR$/.idea/live_xv_cam_car_graph.iml" />
+    </modules>
+  </component>
+</project>
\ No newline at end of file
--- a/live_xv_cam_car_graph_gcy/cam_car.py
+++ b/live_xv_cam_car_graph_gcy/cam_car.py
+import cv2
+import json
+from socket import *
+from tool.utils import load_class_names, plot_boxes_cv2
+import queue, _thread, threading, time
+import numpy as np
+import subprocess
+
+ip_add = '192.168.1.110'
+# ip_add = '127.0.0.1'
+
+
+command = ['ffmpeg',
+           '-re',
+           '-i', '-',
+           '-r', '15',
+           '-c:v', 'libx264',
+           #    '-preset','superfast',
+           #    '-maxrate', '3000k',
+           #    '-bufsize', '6000k',
+           # '-pix_fmt', 'yuv420p',
+           # '-g', '50',
+           # '-s', '640x360',
+           # '-c:a', 'aac',
+           # '-b:a', '160k',
+           # '-ac', '2',
+           # '-ar', '44100',
+           # '-b:a', '160k',
+           '-f', 'flv',
+           'rtmp://localhost/live/AI']
+process = subprocess.Popen(command, shell=False, stdin=subprocess.PIPE)
+# print("subprocess established!")
+
+# ip_add = ''
+
+def send_from(arr, dest):
+    view = memoryview(arr).cast('B')
+    while len(view):
+        nsent = dest.send(view)
+        view = view[nsent:]
+
+
+def recv_into(arr, source):
+    view = memoryview(arr).cast('B')
+    while len(view):
+        nrecv = source.recv_into(view)
+        view = view[nrecv:]
+
+
+s = socket(AF_INET, SOCK_STREAM)
+s.bind(('', 25000))
+s.listen(1)
+c, a = s.accept()
+print(1)
+c_2 = socket(AF_INET, SOCK_STREAM)
+c_2.connect((ip_add, 25002))
+print(2)
+qsize = 10
+boxQue = queue.Queue(qsize)
+img_sent = queue.Queue(qsize * 10)
+lock = threading.Lock()
+# time.sleep(10)
+
+fps = 0
+fps_dis = 0
+
+
+def recv_box():
+    lth = np.zeros(shape=(1,), dtype=np.int64)
+    while 1:
+        if boxQue.full():
+            # print('box is full')
+            time.sleep(0.1)
+        else:
+            recv_into(lth, c_2)
+            if lth[0] == 0:
+                lock.acquire()
+                boxQue.put([0])
+                lock.release()
+                continue
+            arr = np.zeros(shape=(1, lth[0], 7), dtype=np.float32)
+            recv_into(arr, c_2)
+            box = arr.tolist()
+            for i in range(lth[0]):
+                box[0][i][-1] = np.int64(box[0][i][-1])
+            lock.acquire()
+            boxQue.put(box)
+            lock.release()
+
+
+# sum_flag = np.zeros(shape=(1,), dtype=np.int32)
+# def recv_flag():
+#     global sum_flag
+#     recv_into(sum_flag, c)
+#     print('done')
+
+# def cam_send():
+#     cap = cv2.VideoCapture(0)
+#     flag = cap.isOpened()
+#     print(flag)
+#     # _thread.start_new_thread(recv_flag, ())
+#     cnt_arr = np.zeros(shape=(1,), dtype=np.int32)
+#     while 1:
+#         _, img = cap.read()
+#         send_from(img, c)
+#         send_from(np.array([np.sum(img)]), c)
+#         recv_into(cnt_arr, c)
+#         if cnt_arr[0]>=5:
+#             break
+#     while 1:
+#         while img_sent.full():
+#             # print('sent is full')
+#             time.sleep(0.1)
+#         _, img = cap.read()
+#         # print(img)
+#         send_from(img, c)
+#         lock.acquire()
+#         img_sent.put(img)
+#         lock.release()
+#         # print(np.sum(img))
+#         if cv2.waitKey(1) & 0xFF == ord('q'):
+#             break
+#     cap.release()
+
+cap = cv2.VideoCapture('rtmp://localhost/live/VR')
+flag = cap.isOpened()
+print("cap is opened: ", flag)
+# while(True):
+#     # 获取一帧
+#     ret, frame = cap.read()
+#     cv2.imshow('frame', frame)
+#     if cv2.waitKey(1) == ord('q'):
+#         break
+
+img_read = ''
+lock_img = threading.Lock()
+def cam_read():
+    global cap, img_read
+    print("no connection in the stream, reconnecting")
+    time.sleep(1)
+    cnt_arr = np.zeros(shape=(1,), dtype=np.int32)
+    while True:
+        if not cap.isOpened():
+            # _, frame = None
+            print('error while subprocess not running')
+            cam_send()
+        break
+
+    while True:
+        _, img = cap.read()
+        img = cv2.resize(img, (640, 480))
+        lock_img.acquire()
+        img_read = img
+        lock_img.release()
+
+
+def cam_send():
+    # while True:
+    #     _, img = cap.read()
+    #     # print(type(img), img.shape[:])
+    #     # cv2.imshow("img", img)
+    #     # time.sleep(0.1)
+    #     send_from(img, c)
+    #     send_from(np.array([np.sum(img)]), c)
+    #     recv_into(cnt_arr, c)
+    #     print("cnt_arr<5,loop!")
+    #     if cnt_arr[0] >= 5:
+    #         break
+
+    global img_read
+    while True:
+        while img_sent.full() or img_read == '':
+            # print('sent is full')
+            time.sleep(0.1)
+        # _, img = cap.read()
+        lock_img.acquire()
+        img = img_read.copy()
+        img_read = ''
+        lock_img.release()
+        # img = cv2.resize(img, (640, 480))
+        # cv2.imshow("img", img)   #########
+        send_from(img, c)
+        lock.acquire()
+        img_sent.put(img)
+        lock.release()
+        if cv2.waitKey(1) == 27:
+            break
+        else:
+            print('key is wrong')
+
+            # boxes, confs, clss = trt_yolo.detect(frame, 0.3)
+            # frame = vis.draw_bboxes(frame, boxes, confs, clss)
+            # cv2.putText(frame, "FPS: " + str(round(fps, 2)), (10, 50), font, 3, (255, 50, 0), 3)
+            # toc = time.time()
+            # curr_fps = 1.0 / (toc - tic)
+            # fps = curr_fps if fps == 0.0 else (fps * 0.95 + curr_fps * 0.05)
+            # tic = toc
+            # computing_time = (1 / fps) * 1000
+            # cv2.putText(frame, str(round(computing_time, 2)) + " ms", (10, 90), font, 3, (255, 50, 0), 3)
+            # ret_toRTSP, frame_toRTSP = cv2.imencode('.png', frame)
+            # process.stdin.write(frame_toRTSP.tobytes())
+            # # process.stdin.write(frame.tostring())
+            # # cv2.imshow("cap",frame)
+
+
+def fps_update():
+    global fps, fps_dis
+    while 1:
+        time.sleep(10)
+        print(fps)
+        fps_dis = fps / 10
+        fps = 0
+
+
+_thread.start_new_thread(recv_box, ())
+_thread.start_new_thread(cam_read, ())
+_thread.start_new_thread(cam_send, ())
+_thread.start_new_thread(fps_update, ())
+
+# def get_box():
+#     while boxQue.empty():
+#         time.sleep(0.1)
+#     lock.acquire()
+#     box = boxQue.get()
+#     lock.release()
+#     return box
+
+
+namesfile = 'data/coco.names'
+class_names = load_class_names(namesfile)
+while (1):
+    # get a frame
+    while img_sent.empty() or boxQue.empty():
+        # print('sent or box are empty')
+        time.sleep(0.1)
+    # print("empty end")
+    lock.acquire()
+    img = img_sent.get()
+    boxes = boxQue.get()
+    lock.release()
+    # print(np.sum(img))
+    # start = time.time()
+    if boxes[0] == 0:
+        # print("img boxes is empty!")
+        pass
+    else:
+        img = plot_boxes_cv2(img, boxes[0], 'predictions.jpg', class_names)
+    img = cv2.putText(img, 'FPS: {}'.format(fps_dis), (100, 100), cv2.FONT_HERSHEY_SIMPLEX, 2, (0, 0, 0), 2)
+    print(img)
+    cv2.imshow('fps:', img)
+    fps += 1
+    if cv2.waitKey(1) & 0xFF == ord('q'):
+        break
+    ##########################################################################################################
+    # ret_toRTSP, frame_toRTSP = cv2.imencode('.png', img)
+    # print(frame_toRTSP)
+    # process.stdin.write(frame_toRTSP.tobytes())
+    ##########################################################################################################
+    # end = time.time()
+    # print('time: ', end - start)
+    # cv2.imshow('fps:', img)
+    # fps += 1
+    # if cv2.waitKey(1) & 0xFF == ord('q'):
+    #     break
+
+cv2.destroyAllWindows()
+c.close()
+s.close()
--- a/live_xv_cam_car_graph_gcy/cap_test.py
+++ b/live_xv_cam_car_graph_gcy/cap_test.py
+import cv2
+
+cap = cv2.VideoCapture(0)
+flag = cap.isOpened()
+print(flag)
+while (1):
+    # get a frame
+    _, img = cap.read()
+    cv2.imshow('', img)
+    cv2.waitKey(1)
--- a/live_xv_cam_car_graph_gcy/data/coco.names
+++ b/live_xv_cam_car_graph_gcy/data/coco.names
+person
+bicycle
+car
+motorbike
+aeroplane
+bus
+train
+truck
+boat
+traffic light
+fire hydrant
+stop sign
+parking meter
+bench
+bird
+cat
+dog
+horse
+sheep
+cow
+elephant
+bear
+zebra
+giraffe
+backpack
+umbrella
+handbag
+tie
+suitcase
+frisbee
+skis
+snowboard
+sports ball
+kite
+baseball bat
+baseball glove
+skateboard
+surfboard
+tennis racket
+bottle
+wine glass
+cup
+fork
+knife
+spoon
+bowl
+banana
+apple
+sandwich
+orange
+broccoli
+carrot
+hot dog
+pizza
+donut
+cake
+chair
+sofa
+pottedplant
+bed
+diningtable
+toilet
+tvmonitor
+laptop
+mouse
+remote
+keyboard
+cell phone
+microwave
+oven
+toaster
+sink
+refrigerator
+book
+clock
+vase
+scissors
+teddy bear
+hair drier
+toothbrush
--- a/live_xv_cam_car_graph_gcy/data/dog.jpg
+++ b/live_xv_cam_car_graph_gcy/data/dog.jpg
--- a/live_xv_cam_car_graph_gcy/data/giraffe.jpg
+++ b/live_xv_cam_car_graph_gcy/data/giraffe.jpg
--- a/live_xv_cam_car_graph_gcy/data/prediction.jpg
+++ b/live_xv_cam_car_graph_gcy/data/prediction.jpg
--- a/live_xv_cam_car_graph_gcy/data/voc.names
+++ b/live_xv_cam_car_graph_gcy/data/voc.names
+aeroplane
+bicycle
+bird
+boat
+bottle
+bus
+car
+cat
+chair
+cow
+diningtable
+dog
+horse
+motorbike
+person
+pottedplant
+sheep
+sofa
+train
+tvmonitor
--- a/live_xv_cam_car_graph_gcy/predictions.jpg
+++ b/live_xv_cam_car_graph_gcy/predictions.jpg
--- a/live_xv_cam_car_graph_gcy/test.py
+++ b/live_xv_cam_car_graph_gcy/test.py
+import cv2
+cap = cv2.VideoCapture(0)
+while cap.isOpened():
+    _, img = cap.read()
+    cv2.imshow('', img)
+    cv2.waitKey(1)
--- a/live_xv_cam_car_graph_gcy/tool/__init__.py
+++ b/live_xv_cam_car_graph_gcy/tool/__init__.py
--- a/live_xv_cam_car_graph_gcy/tool/__pycache__/__init__.cpython-36.pyc
+++ b/live_xv_cam_car_graph_gcy/tool/__pycache__/__init__.cpython-36.pyc
--- a/live_xv_cam_car_graph_gcy/tool/__pycache__/__init__.cpython-37.pyc
+++ b/live_xv_cam_car_graph_gcy/tool/__pycache__/__init__.cpython-37.pyc
--- a/live_xv_cam_car_graph_gcy/tool/__pycache__/__init__.cpython-38.pyc
+++ b/live_xv_cam_car_graph_gcy/tool/__pycache__/__init__.cpython-38.pyc
--- a/live_xv_cam_car_graph_gcy/tool/__pycache__/utils.cpython-36.pyc
+++ b/live_xv_cam_car_graph_gcy/tool/__pycache__/utils.cpython-36.pyc
--- a/live_xv_cam_car_graph_gcy/tool/__pycache__/utils.cpython-37.pyc
+++ b/live_xv_cam_car_graph_gcy/tool/__pycache__/utils.cpython-37.pyc
--- a/live_xv_cam_car_graph_gcy/tool/__pycache__/utils.cpython-38.pyc
+++ b/live_xv_cam_car_graph_gcy/tool/__pycache__/utils.cpython-38.pyc
--- a/live_xv_cam_car_graph_gcy/tool/camera.py
+++ b/live_xv_cam_car_graph_gcy/tool/camera.py
+# -*- coding: utf-8 -*-
+'''
+@Time          : 2020/04/26 15:48
+@Author        : Tianxiaomo
+@File          : camera.py
+@Noice         :
+@Modificattion :
+    @Author    :
+    @Time      :
+    @Detail    :
+
+'''
+from __future__ import division
+import cv2
+from tool.darknet2pytorch import Darknet
+import argparse
+from tool.utils import *
+from tool.torch_utils import *
+
+
+def arg_parse():
+    """
+    Parse arguements to the detect module
+
+    """
+
+    parser = argparse.ArgumentParser(description='YOLO v3 Cam Demo')
+    parser.add_argument("--confidence", dest="confidence", help="Object Confidence to filter predictions", default=0.25)
+    parser.add_argument("--nms_thresh", dest="nms_thresh", help="NMS Threshhold", default=0.4)
+    parser.add_argument("--reso", dest='reso', help=
+    "Input resolution of the network. Increase to increase accuracy. Decrease to increase speed",
+                        default="160", type=str)
+    return parser.parse_args()
+
+
+if __name__ == '__main__':
+    cfgfile = "cfg/yolov4.cfg"
+    weightsfile = "weight/yolov4.weights"
+
+    args = arg_parse()
+    confidence = float(args.confidence)
+    nms_thesh = float(args.nms_thresh)
+    CUDA = torch.cuda.is_available()
+    num_classes = 80
+    bbox_attrs = 5 + num_classes
+    class_names = load_class_names("data/coco.names")
+
+    model = Darknet(cfgfile)
+    model.load_weights(weightsfile)
+
+    if CUDA:
+        model.cuda()
+
+    model.eval()
+    cap = cv2.VideoCapture(0)
+
+    assert cap.isOpened(), 'Cannot capture source'
+
+    frames = 0
+    start = time.time()
+    while cap.isOpened():
+        ret, frame = cap.read()
+        if ret:
+            sized = cv2.resize(frame, (model.width, model.height))
+            sized = cv2.cvtColor(sized, cv2.COLOR_BGR2RGB)
+            boxes = do_detect(model, sized, 0.5, 0.4, CUDA)
+
+            orig_im = plot_boxes_cv2(frame, boxes, class_names=class_names)
+
+            cv2.imshow("frame", orig_im)
+            key = cv2.waitKey(1)
+            if key & 0xFF == ord('q'):
+                break
+            frames += 1
+            print("FPS of the video is {:5.2f}".format(frames / (time.time() - start)))
+        else:
+            break
--- a/live_xv_cam_car_graph_gcy/tool/coco_annotation.py
+++ b/live_xv_cam_car_graph_gcy/tool/coco_annotation.py
+# -*- coding: utf-8 -*-
+'''
+@Time          : 2020/05/08 11:45
+@Author        : Tianxiaomo
+@File          : coco_annotatin.py
+@Noice         :
+@Modificattion :
+    @Author    :
+    @Time      :
+    @Detail    :
+
+'''
+import json
+from collections import defaultdict
+from tqdm import tqdm
+import os
+
+"""hyper parameters"""
+json_file_path = 'E:/Dataset/mscoco2017/annotations/instances_train2017.json'
+images_dir_path = 'mscoco2017/train2017/'
+output_path = '../data/val.txt'
+
+"""load json file"""
+name_box_id = defaultdict(list)
+id_name = dict()
+with open(json_file_path, encoding='utf-8') as f:
+    data = json.load(f)
+
+"""generate labels"""
+images = data['images']
+annotations = data['annotations']
+for ant in tqdm(annotations):
+    id = ant['image_id']
+    # name = os.path.join(images_dir_path, images[id]['file_name'])
+    name = os.path.join(images_dir_path, '{:012d}.jpg'.format(id))
+    cat = ant['category_id']
+
+    if cat >= 1 and cat <= 11:
+        cat = cat - 1
+    elif cat >= 13 and cat <= 25:
+        cat = cat - 2
+    elif cat >= 27 and cat <= 28:
+        cat = cat - 3
+    elif cat >= 31 and cat <= 44:
+        cat = cat - 5
+    elif cat >= 46 and cat <= 65:
+        cat = cat - 6
+    elif cat == 67:
+        cat = cat - 7
+    elif cat == 70:
+        cat = cat - 9
+    elif cat >= 72 and cat <= 82:
+        cat = cat - 10
+    elif cat >= 84 and cat <= 90:
+        cat = cat - 11
+
+    name_box_id[name].append([ant['bbox'], cat])
+
+"""write to txt"""
+with open(output_path, 'w') as f:
+    for key in tqdm(name_box_id.keys()):
+        f.write(key)
+        box_infos = name_box_id[key]
+        for info in box_infos:
+            x_min = int(info[0][0])
+            y_min = int(info[0][1])
+            x_max = x_min + int(info[0][2])
+            y_max = y_min + int(info[0][3])
+
+            box_info = " %d,%d,%d,%d,%d" % (
+                x_min, y_min, x_max, y_max, int(info[1]))
+            f.write(box_info)
+        f.write('\n')
--- a/live_xv_cam_car_graph_gcy/tool/config.py
+++ b/live_xv_cam_car_graph_gcy/tool/config.py
--- a/live_xv_cam_car_graph_gcy/tool/darknet2onnx.py
+++ b/live_xv_cam_car_graph_gcy/tool/darknet2onnx.py
+import sys
+import torch
+from tool.darknet2pytorch import Darknet
+
+
+def transform_to_onnx(cfgfile, weightfile, batch_size=1):
+    model = Darknet(cfgfile)
+
+    model.print_network()
+    model.load_weights(weightfile)
+    print('Loading weights from %s... Done!' % (weightfile))
+
+    dynamic = False
+    if batch_size <= 0:
+        dynamic = True
+
+    input_names = ["input"]
+    output_names = ['boxes', 'confs']
+
+    if dynamic:
+        x = torch.randn((1, 3, model.height, model.width), requires_grad=True)
+        onnx_file_name = "yolov4_-1_3_{}_{}_dynamic.onnx".format(model.height, model.width)
+        dynamic_axes = {"input": {0: "batch_size"}, "boxes": {0: "batch_size"}, "confs": {0: "batch_size"}}
+        # Export the model
+        print('Export the onnx model ...')
+        torch.onnx.export(model,
+                          x,
+                          onnx_file_name,
+                          export_params=True,
+                          opset_version=11,
+                          do_constant_folding=True,
+                          input_names=input_names, output_names=output_names,
+                          dynamic_axes=dynamic_axes)
+
+        print('Onnx model exporting done')
+        return onnx_file_name
+
+    else:
+        x = torch.randn((batch_size, 3, model.height, model.width), requires_grad=True)
+        onnx_file_name = "yolov4_{}_3_{}_{}_static.onnx".format(batch_size, model.height, model.width)
+        torch.onnx.export(model,
+                          x,
+                          onnx_file_name,
+                          export_params=True,
+                          opset_version=11,
+                          do_constant_folding=True,
+                          input_names=input_names, output_names=output_names,
+                          dynamic_axes=None)
+
+        print('Onnx model exporting done')
+        return onnx_file_name
+
+
+if __name__ == '__main__':
+    if len(sys.argv) == 3:
+        cfgfile = sys.argv[1]
+        weightfile = sys.argv[2]
+        transform_to_onnx(cfgfile, weightfile)
+    elif len(sys.argv) == 4:
+        cfgfile = sys.argv[1]
+        weightfile = sys.argv[2]
+        batch_size = int(sys.argv[3])
+        transform_to_onnx(cfgfile, weightfile, batch_size)
+    elif len(sys.argv) == 5:
+        cfgfile = sys.argv[1]
+        weightfile = sys.argv[2]
+        batch_size = int(sys.argv[3])
+        dynamic = True if sys.argv[4] == 'True' else False
+        transform_to_onnx(cfgfile, weightfile, batch_size, dynamic)
+    else:
+        print('Please execute this script this way:\n')
+        print('  python darknet2onnx.py <cfgFile> <weightFile>')
+        print('or')
+        print('  python darknet2onnx.py <cfgFile> <weightFile> <batchSize>')
--- a/live_xv_cam_car_graph_gcy/tool/darknet2pytorch.py
+++ b/live_xv_cam_car_graph_gcy/tool/darknet2pytorch.py
--- a/live_xv_cam_car_graph_gcy/tool/onnx2tensorflow.py
+++ b/live_xv_cam_car_graph_gcy/tool/onnx2tensorflow.py
+import sys
+import onnx
+from onnx_tf.backend import prepare
+
+
+# tensorflow >=2.0
+# 1: Thanks:github:https://github.com/onnx/onnx-tensorflow
+# 2: Run git clone https://github.com/onnx/onnx-tensorflow.git && cd onnx-tensorflow
+#    Run pip install -e .
+# Note:
+#    Errors will occur when using "pip install onnx-tf", at least for me,
+#    it is recommended to use source code installation
+def transform_to_tensorflow(onnx_input_path, pb_output_path):
+    onnx_model = onnx.load(onnx_input_path)  # load onnx model
+    tf_exp = prepare(onnx_model)  # prepare tf representation
+    tf_exp.export_graph(pb_output_path)  # export the model
+
+
+if __name__ == '__main__':
+    if len(sys.argv) == 1:
+        sys.argv.append('../weight/yolov4_1_3_608_608.onnx')  # use:darknet2onnx.py
+        sys.argv.append('../weight/yolov4.pb')  # use:onnx2tensorflow.py
+    if len(sys.argv) == 3:
+        onnxfile = sys.argv[1]
+        tfpb_outfile = sys.argv[2]
+        transform_to_tensorflow(onnxfile, tfpb_outfile)
+    else:
+        print('Please execute this script this way:\n')
+        print('  python onnx2tensorflow.py <onnxfile> <tfpboutfile>')
--- a/live_xv_cam_car_graph_gcy/tool/region_loss.py
+++ b/live_xv_cam_car_graph_gcy/tool/region_loss.py
+import torch.nn as nn
+import torch.nn.functional as F
+from tool.torch_utils import *
+
+
+def build_targets(pred_boxes, target, anchors, num_anchors, num_classes, nH, nW, noobject_scale, object_scale,
+                  sil_thresh, seen):
+    nB = target.size(0)
+    nA = num_anchors
+    nC = num_classes
+    anchor_step = len(anchors) / num_anchors
+    conf_mask = torch.ones(nB, nA, nH, nW) * noobject_scale
+    coord_mask = torch.zeros(nB, nA, nH, nW)
+    cls_mask = torch.zeros(nB, nA, nH, nW)
+    tx = torch.zeros(nB, nA, nH, nW)
+    ty = torch.zeros(nB, nA, nH, nW)
+    tw = torch.zeros(nB, nA, nH, nW)
+    th = torch.zeros(nB, nA, nH, nW)
+    tconf = torch.zeros(nB, nA, nH, nW)
+    tcls = torch.zeros(nB, nA, nH, nW)
+
+    nAnchors = nA * nH * nW
+    nPixels = nH * nW
+    for b in range(nB):
+        cur_pred_boxes = pred_boxes[b * nAnchors:(b + 1) * nAnchors].t()
+        cur_ious = torch.zeros(nAnchors)
+        for t in range(50):
+            if target[b][t * 5 + 1] == 0:
+                break
+            gx = target[b][t * 5 + 1] * nW
+            gy = target[b][t * 5 + 2] * nH
+            gw = target[b][t * 5 + 3] * nW
+            gh = target[b][t * 5 + 4] * nH
+            cur_gt_boxes = torch.FloatTensor([gx, gy, gw, gh]).repeat(nAnchors, 1).t()
+            cur_ious = torch.max(cur_ious, bbox_ious(cur_pred_boxes, cur_gt_boxes, x1y1x2y2=False))
+        conf_mask[b][cur_ious > sil_thresh] = 0
+    if seen < 12800:
+        if anchor_step == 4:
+            tx = torch.FloatTensor(anchors).view(nA, anchor_step).index_select(1, torch.LongTensor([2])).view(1, nA, 1,
+                                                                                                              1).repeat(
+                nB, 1, nH, nW)
+            ty = torch.FloatTensor(anchors).view(num_anchors, anchor_step).index_select(1, torch.LongTensor([2])).view(
+                1, nA, 1, 1).repeat(nB, 1, nH, nW)
+        else:
+            tx.fill_(0.5)
+            ty.fill_(0.5)
+        tw.zero_()
+        th.zero_()
+        coord_mask.fill_(1)
+
+    nGT = 0
+    nCorrect = 0
+    for b in range(nB):
+        for t in range(50):
+            if target[b][t * 5 + 1] == 0:
+                break
+            nGT = nGT + 1
+            best_iou = 0.0
+            best_n = -1
+            min_dist = 10000
+            gx = target[b][t * 5 + 1] * nW
+            gy = target[b][t * 5 + 2] * nH
+            gi = int(gx)
+            gj = int(gy)
+            gw = target[b][t * 5 + 3] * nW
+            gh = target[b][t * 5 + 4] * nH
+            gt_box = [0, 0, gw, gh]
+            for n in range(nA):
+                aw = anchors[anchor_step * n]
+                ah = anchors[anchor_step * n + 1]
+                anchor_box = [0, 0, aw, ah]
+                iou = bbox_iou(anchor_box, gt_box, x1y1x2y2=False)
+                if anchor_step == 4:
+                    ax = anchors[anchor_step * n + 2]
+                    ay = anchors[anchor_step * n + 3]
+                    dist = pow(((gi + ax) - gx), 2) + pow(((gj + ay) - gy), 2)
+                if iou > best_iou:
+                    best_iou = iou
+                    best_n = n
+                elif anchor_step == 4 and iou == best_iou and dist < min_dist:
+                    best_iou = iou
+                    best_n = n
+                    min_dist = dist
+
+            gt_box = [gx, gy, gw, gh]
+            pred_box = pred_boxes[b * nAnchors + best_n * nPixels + gj * nW + gi]
+
+            coord_mask[b][best_n][gj][gi] = 1
+            cls_mask[b][best_n][gj][gi] = 1
+            conf_mask[b][best_n][gj][gi] = object_scale
+            tx[b][best_n][gj][gi] = target[b][t * 5 + 1] * nW - gi
+            ty[b][best_n][gj][gi] = target[b][t * 5 + 2] * nH - gj
+            tw[b][best_n][gj][gi] = math.log(gw / anchors[anchor_step * best_n])
+            th[b][best_n][gj][gi] = math.log(gh / anchors[anchor_step * best_n + 1])
+            iou = bbox_iou(gt_box, pred_box, x1y1x2y2=False)  # best_iou
+            tconf[b][best_n][gj][gi] = iou
+            tcls[b][best_n][gj][gi] = target[b][t * 5]
+            if iou > 0.5:
+                nCorrect = nCorrect + 1
+
+    return nGT, nCorrect, coord_mask, conf_mask, cls_mask, tx, ty, tw, th, tconf, tcls
+
+
+class RegionLoss(nn.Module):
+    def __init__(self, num_classes=0, anchors=[], num_anchors=1):
+        super(RegionLoss, self).__init__()
+        self.num_classes = num_classes
+        self.anchors = anchors
+        self.num_anchors = num_anchors
+        self.anchor_step = len(anchors) / num_anchors
+        self.coord_scale = 1
+        self.noobject_scale = 1
+        self.object_scale = 5
+        self.class_scale = 1
+        self.thresh = 0.6
+        self.seen = 0
+
+    def forward(self, output, target):
+        # output : BxAs*(4+1+num_classes)*H*W
+        t0 = time.time()
+        nB = output.data.size(0)
+        nA = self.num_anchors
+        nC = self.num_classes
+        nH = output.data.size(2)
+        nW = output.data.size(3)
+
+        output = output.view(nB, nA, (5 + nC), nH, nW)
+        x = F.sigmoid(output.index_select(2, Variable(torch.cuda.LongTensor([0]))).view(nB, nA, nH, nW))
+        y = F.sigmoid(output.index_select(2, Variable(torch.cuda.LongTensor([1]))).view(nB, nA, nH, nW))
+        w = output.index_select(2, Variable(torch.cuda.LongTensor([2]))).view(nB, nA, nH, nW)
+        h = output.index_select(2, Variable(torch.cuda.LongTensor([3]))).view(nB, nA, nH, nW)
+        conf = F.sigmoid(output.index_select(2, Variable(torch.cuda.LongTensor([4]))).view(nB, nA, nH, nW))
+        cls = output.index_select(2, Variable(torch.linspace(5, 5 + nC - 1, nC).long().cuda()))
+        cls = cls.view(nB * nA, nC, nH * nW).transpose(1, 2).contiguous().view(nB * nA * nH * nW, nC)
+        t1 = time.time()
+
+        pred_boxes = torch.cuda.FloatTensor(4, nB * nA * nH * nW)
+        grid_x = torch.linspace(0, nW - 1, nW).repeat(nH, 1).repeat(nB * nA, 1, 1).view(nB * nA * nH * nW).cuda()
+        grid_y = torch.linspace(0, nH - 1, nH).repeat(nW, 1).t().repeat(nB * nA, 1, 1).view(nB * nA * nH * nW).cuda()
+        anchor_w = torch.Tensor(self.anchors).view(nA, self.anchor_step).index_select(1, torch.LongTensor([0])).cuda()
+        anchor_h = torch.Tensor(self.anchors).view(nA, self.anchor_step).index_select(1, torch.LongTensor([1])).cuda()
+        anchor_w = anchor_w.repeat(nB, 1).repeat(1, 1, nH * nW).view(nB * nA * nH * nW)
+        anchor_h = anchor_h.repeat(nB, 1).repeat(1, 1, nH * nW).view(nB * nA * nH * nW)
+        pred_boxes[0] = x.data + grid_x
+        pred_boxes[1] = y.data + grid_y
+        pred_boxes[2] = torch.exp(w.data) * anchor_w
+        pred_boxes[3] = torch.exp(h.data) * anchor_h
+        pred_boxes = convert2cpu(pred_boxes.transpose(0, 1).contiguous().view(-1, 4))
+        t2 = time.time()
+
+        nGT, nCorrect, coord_mask, conf_mask, cls_mask, tx, ty, tw, th, tconf, tcls = build_targets(pred_boxes,
+                                                                                                    target.data,
+                                                                                                    self.anchors, nA,
+                                                                                                    nC, \
+                                                                                                    nH, nW,
+                                                                                                    self.noobject_scale,
+                                                                                                    self.object_scale,
+                                                                                                    self.thresh,
+                                                                                                    self.seen)
+        cls_mask = (cls_mask == 1)
+        nProposals = int((conf > 0.25).sum().data[0])
+
+        tx = Variable(tx.cuda())
+        ty = Variable(ty.cuda())
+        tw = Variable(tw.cuda())
+        th = Variable(th.cuda())
+        tconf = Variable(tconf.cuda())
+        tcls = Variable(tcls.view(-1)[cls_mask].long().cuda())
+
+        coord_mask = Variable(coord_mask.cuda())
+        conf_mask = Variable(conf_mask.cuda().sqrt())
+        cls_mask = Variable(cls_mask.view(-1, 1).repeat(1, nC).cuda())
+        cls = cls[cls_mask].view(-1, nC)
+
+        t3 = time.time()
+
+        loss_x = self.coord_scale * nn.MSELoss(reduction='sum')(x * coord_mask, tx * coord_mask) / 2.0
+        loss_y = self.coord_scale * nn.MSELoss(reduction='sum')(y * coord_mask, ty * coord_mask) / 2.0
+        loss_w = self.coord_scale * nn.MSELoss(reduction='sum')(w * coord_mask, tw * coord_mask) / 2.0
+        loss_h = self.coord_scale * nn.MSELoss(reduction='sum')(h * coord_mask, th * coord_mask) / 2.0
+        loss_conf = nn.MSELoss(reduction='sum')(conf * conf_mask, tconf * conf_mask) / 2.0
+        loss_cls = self.class_scale * nn.CrossEntropyLoss(reduction='sum')(cls, tcls)
+        loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls
+        t4 = time.time()
+        if False:
+            print('-----------------------------------')
+            print('        activation : %f' % (t1 - t0))
+            print(' create pred_boxes : %f' % (t2 - t1))
+            print('     build targets : %f' % (t3 - t2))
+            print('       create loss : %f' % (t4 - t3))
+            print('             total : %f' % (t4 - t0))
+        print('%d: nGT %d, recall %d, proposals %d, loss: x %f, y %f, w %f, h %f, conf %f, cls %f, total %f' % (
+        self.seen, nGT, nCorrect, nProposals, loss_x.data[0], loss_y.data[0], loss_w.data[0], loss_h.data[0],
+        loss_conf.data[0], loss_cls.data[0], loss.data[0]))
+        return loss
--- a/live_xv_cam_car_graph_gcy/tool/torch_utils.py
+++ b/live_xv_cam_car_graph_gcy/tool/torch_utils.py
+import sys
+import os
+import time
+import math
+import torch
+import numpy as np
+from torch.autograd import Variable
+
+import itertools
+import struct  # get_image_size
+import imghdr  # get_image_size
+
+from tool import utils 
+
+
+def bbox_ious(boxes1, boxes2, x1y1x2y2=True):
+    if x1y1x2y2:
+        mx = torch.min(boxes1[0], boxes2[0])
+        Mx = torch.max(boxes1[2], boxes2[2])
+        my = torch.min(boxes1[1], boxes2[1])
+        My = torch.max(boxes1[3], boxes2[3])
+        w1 = boxes1[2] - boxes1[0]
+        h1 = boxes1[3] - boxes1[1]
+        w2 = boxes2[2] - boxes2[0]
+        h2 = boxes2[3] - boxes2[1]
+    else:
+        mx = torch.min(boxes1[0] - boxes1[2] / 2.0, boxes2[0] - boxes2[2] / 2.0)
+        Mx = torch.max(boxes1[0] + boxes1[2] / 2.0, boxes2[0] + boxes2[2] / 2.0)
+        my = torch.min(boxes1[1] - boxes1[3] / 2.0, boxes2[1] - boxes2[3] / 2.0)
+        My = torch.max(boxes1[1] + boxes1[3] / 2.0, boxes2[1] + boxes2[3] / 2.0)
+        w1 = boxes1[2]
+        h1 = boxes1[3]
+        w2 = boxes2[2]
+        h2 = boxes2[3]
+    uw = Mx - mx
+    uh = My - my
+    cw = w1 + w2 - uw
+    ch = h1 + h2 - uh
+    mask = ((cw <= 0) + (ch <= 0) > 0)
+    area1 = w1 * h1
+    area2 = w2 * h2
+    carea = cw * ch
+    carea[mask] = 0
+    uarea = area1 + area2 - carea
+    return carea / uarea
+
+
+def get_region_boxes(boxes_and_confs):
+
+    # print('Getting boxes from boxes and confs ...')
+
+    boxes_list = []
+    confs_list = []
+
+    for item in boxes_and_confs:
+        boxes_list.append(item[0])
+        confs_list.append(item[1])
+
+    # boxes: [batch, num1 + num2 + num3, 1, 4]
+    # confs: [batch, num1 + num2 + num3, num_classes]
+    boxes = torch.cat(boxes_list, dim=1)
+    confs = torch.cat(confs_list, dim=1)
+        
+    return [boxes, confs]
+
+
+def convert2cpu(gpu_matrix):
+    return torch.FloatTensor(gpu_matrix.size()).copy_(gpu_matrix)
+
+
+def convert2cpu_long(gpu_matrix):
+    return torch.LongTensor(gpu_matrix.size()).copy_(gpu_matrix)
+
+
+
+def do_detect(model, img, conf_thresh, nms_thresh, use_cuda=1):
+    model.eval()
+
+    if type(img) == np.ndarray and len(img.shape) == 3:  # cv2 image
+        img = torch.from_numpy(img.transpose(2, 0, 1)).float().div(255.0).unsqueeze(0)
+    elif type(img) == np.ndarray and len(img.shape) == 4:
+        img = torch.from_numpy(img.transpose(0, 3, 1, 2)).float().div(255.0)
+    else:
+        print("unknow image type")
+        exit(-1)
+
+    if use_cuda:
+        img = img.cuda()
+    img = torch.autograd.Variable(img)
+
+    output = model(img)
+
+    return utils.post_processing(img, conf_thresh, nms_thresh, output)
+
--- a/live_xv_cam_car_graph_gcy/tool/tv_reference/README.md
+++ b/live_xv_cam_car_graph_gcy/tool/tv_reference/README.md
+# Object detection reference training scripts
+
+This folder contains reference training scripts for object detection.
+They serve as a log of how to train specific models, to provide baseline
+training and evaluation scripts to quickly bootstrap research.
+
+To execute the example commands below you must install the following:
+
+```
+cython
+pycocotools
+matplotlib
+```
+
+You must modify the following flags:
+
+`--data-path=/path/to/coco/dataset`
+
+`--nproc_per_node=<number_of_gpus_available>`
+
+Except otherwise noted, all models have been trained on 8x V100 GPUs. 
+
+### Faster R-CNN
+```
+python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py\
+    --dataset coco --model fasterrcnn_resnet50_fpn --epochs 26\
+    --lr-steps 16 22 --aspect-ratio-group-factor 3
+```
+
+
+### Mask R-CNN
+```
+python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py\
+    --dataset coco --model maskrcnn_resnet50_fpn --epochs 26\
+    --lr-steps 16 22 --aspect-ratio-group-factor 3
+```
+
+
+### Keypoint R-CNN
+```
+python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py\
+    --dataset coco_kp --model keypointrcnn_resnet50_fpn --epochs 46\
+    --lr-steps 36 43 --aspect-ratio-group-factor 3
+```
+
--- a/live_xv_cam_car_graph_gcy/tool/tv_reference/coco_eval.py
+++ b/live_xv_cam_car_graph_gcy/tool/tv_reference/coco_eval.py
--- a/live_xv_cam_car_graph_gcy/tool/tv_reference/coco_utils.py
+++ b/live_xv_cam_car_graph_gcy/tool/tv_reference/coco_utils.py
+import copy
+import os
+from PIL import Image
+
+import torch
+import torch.utils.data
+import torchvision
+
+from pycocotools import mask as coco_mask
+from pycocotools.coco import COCO
+
+from . import transforms as T
+
+
+class FilterAndRemapCocoCategories(object):
+    def __init__(self, categories, remap=True):
+        self.categories = categories
+        self.remap = remap
+
+    def __call__(self, image, target):
+        anno = target["annotations"]
+        anno = [obj for obj in anno if obj["category_id"] in self.categories]
+        if not self.remap:
+            target["annotations"] = anno
+            return image, target
+        anno = copy.deepcopy(anno)
+        for obj in anno:
+            obj["category_id"] = self.categories.index(obj["category_id"])
+        target["annotations"] = anno
+        return image, target
+
+
+def convert_coco_poly_to_mask(segmentations, height, width):
+    masks = []
+    for polygons in segmentations:
+        rles = coco_mask.frPyObjects(polygons, height, width)
+        mask = coco_mask.decode(rles)
+        if len(mask.shape) < 3:
+            mask = mask[..., None]
+        mask = torch.as_tensor(mask, dtype=torch.uint8)
+        mask = mask.any(dim=2)
+        masks.append(mask)
+    if masks:
+        masks = torch.stack(masks, dim=0)
+    else:
+        masks = torch.zeros((0, height, width), dtype=torch.uint8)
+    return masks
+
+
+class ConvertCocoPolysToMask(object):
+    def __call__(self, image, target):
+        w, h = image.size
+
+        image_id = target["image_id"]
+        image_id = torch.tensor([image_id])
+
+        anno = target["annotations"]
+
+        anno = [obj for obj in anno if obj['iscrowd'] == 0]
+
+        boxes = [obj["bbox"] for obj in anno]
+        # guard against no boxes via resizing
+        boxes = torch.as_tensor(boxes, dtype=torch.float32).reshape(-1, 4)
+        boxes[:, 2:] += boxes[:, :2]
+        boxes[:, 0::2].clamp_(min=0, max=w)
+        boxes[:, 1::2].clamp_(min=0, max=h)
+
+        classes = [obj["category_id"] for obj in anno]
+        classes = torch.tensor(classes, dtype=torch.int64)
+
+        segmentations = [obj["segmentation"] for obj in anno]
+        masks = convert_coco_poly_to_mask(segmentations, h, w)
+
+        keypoints = None
+        if anno and "keypoints" in anno[0]:
+            keypoints = [obj["keypoints"] for obj in anno]
+            keypoints = torch.as_tensor(keypoints, dtype=torch.float32)
+            num_keypoints = keypoints.shape[0]
+            if num_keypoints:
+                keypoints = keypoints.view(num_keypoints, -1, 3)
+
+        keep = (boxes[:, 3] > boxes[:, 1]) & (boxes[:, 2] > boxes[:, 0])
+        boxes = boxes[keep]
+        classes = classes[keep]
+        masks = masks[keep]
+        if keypoints is not None:
+            keypoints = keypoints[keep]
+
+        target = {}
+        target["boxes"] = boxes
+        target["labels"] = classes
+        target["masks"] = masks
+        target["image_id"] = image_id
+        if keypoints is not None:
+            target["keypoints"] = keypoints
+
+        # for conversion to coco api
+        area = torch.tensor([obj["area"] for obj in anno])
+        iscrowd = torch.tensor([obj["iscrowd"] for obj in anno])
+        target["area"] = area
+        target["iscrowd"] = iscrowd
+
+        return image, target
+
+
+def _coco_remove_images_without_annotations(dataset, cat_list=None):
+    def _has_only_empty_bbox(anno):
+        return all(any(o <= 1 for o in obj["bbox"][2:]) for obj in anno)
+
+    def _count_visible_keypoints(anno):
+        return sum(sum(1 for v in ann["keypoints"][2::3] if v > 0) for ann in anno)
+
+    min_keypoints_per_image = 10
+
+    def _has_valid_annotation(anno):
+        # if it's empty, there is no annotation
+        if len(anno) == 0:
+            return False
+        # if all boxes have close to zero area, there is no annotation
+        if _has_only_empty_bbox(anno):
+            return False
+        # keypoints task have a slight different critera for considering
+        # if an annotation is valid
+        if "keypoints" not in anno[0]:
+            return True
+        # for keypoint detection tasks, only consider valid images those
+        # containing at least min_keypoints_per_image
+        if _count_visible_keypoints(anno) >= min_keypoints_per_image:
+            return True
+        return False
+
+    assert isinstance(dataset, torchvision.datasets.CocoDetection)
+    ids = []
+    for ds_idx, img_id in enumerate(dataset.ids):
+        ann_ids = dataset.coco.getAnnIds(imgIds=img_id, iscrowd=None)
+        anno = dataset.coco.loadAnns(ann_ids)
+        if cat_list:
+            anno = [obj for obj in anno if obj["category_id"] in cat_list]
+        if _has_valid_annotation(anno):
+            ids.append(ds_idx)
+
+    dataset = torch.utils.data.Subset(dataset, ids)
+    return dataset
+
+
+def convert_to_coco_api(ds, bbox_fmt='voc'):
+    """
+    """
+    print("in function convert_to_coco_api...")
+    coco_ds = COCO()
+    # annotation IDs need to start at 1, not 0, see torchvision issue #1530
+    ann_id = 1
+    dataset = {'images': [], 'categories': [], 'annotations': []}
+    categories = set()
+    for img_idx in range(len(ds)):
+        # find better way to get target
+        # targets = ds.get_annotations(img_idx)
+        img, targets = ds[img_idx]
+        image_id = targets["image_id"].item()
+        img_dict = {}
+        img_dict['id'] = image_id
+        img_dict['height'] = img.shape[-2]
+        img_dict['width'] = img.shape[-1]
+        dataset['images'].append(img_dict)
+        bboxes = targets["boxes"]
+        # to coco format: xmin, ymin, w, h
+        if bbox_fmt.lower() == "voc":  # xmin, ymin, xmax, ymax
+            bboxes[:, 2:] -= bboxes[:, :2]
+        elif bbox_fmt.lower() == "yolo":  # xcen, ycen, w, h
+            bboxes[:, :2] = bboxes[:, :2] - bboxes[:, 2:]/2
+        elif bbox_fmt.lower() == "coco":
+            pass
+        else:
+            raise ValueError(f"bounding box format {bbox_fmt} not supported!")
+        bboxes = bboxes.tolist()
+        labels = targets['labels'].tolist()
+        areas = targets['area'].tolist()
+        iscrowd = targets['iscrowd'].tolist()
+        if 'masks' in targets:
+            masks = targets['masks']
+            # make masks Fortran contiguous for coco_mask
+            masks = masks.permute(0, 2, 1).contiguous().permute(0, 2, 1)
+        if 'keypoints' in targets:
+            keypoints = targets['keypoints']
+            keypoints = keypoints.reshape(keypoints.shape[0], -1).tolist()
+        num_objs = len(bboxes)
+        for i in range(num_objs):
+            ann = {}
+            ann['image_id'] = image_id
+            ann['bbox'] = bboxes[i]
+            ann['category_id'] = labels[i]
+            categories.add(labels[i])
+            ann['area'] = areas[i]
+            ann['iscrowd'] = iscrowd[i]
+            ann['id'] = ann_id
+            if 'masks' in targets:
+                ann["segmentation"] = coco_mask.encode(masks[i].numpy())
+            if 'keypoints' in targets:
+                ann['keypoints'] = keypoints[i]
+                ann['num_keypoints'] = sum(k != 0 for k in keypoints[i][2::3])
+            dataset['annotations'].append(ann)
+            ann_id += 1
+    dataset['categories'] = [{'id': i} for i in sorted(categories)]
+    coco_ds.dataset = dataset
+    coco_ds.createIndex()
+    return coco_ds
+
+
+def get_coco_api_from_dataset(dataset):
+    for _ in range(10):
+        if isinstance(dataset, torchvision.datasets.CocoDetection):
+            break
+        if isinstance(dataset, torch.utils.data.Subset):
+            dataset = dataset.dataset
+    if isinstance(dataset, torchvision.datasets.CocoDetection):
+        return dataset.coco
+    return convert_to_coco_api(dataset)
+
+
+class CocoDetection(torchvision.datasets.CocoDetection):
+    def __init__(self, img_folder, ann_file, transforms):
+        super(CocoDetection, self).__init__(img_folder, ann_file)
+        self._transforms = transforms
+
+    def __getitem__(self, idx):
+        img, target = super(CocoDetection, self).__getitem__(idx)
+        image_id = self.ids[idx]
+        target = dict(image_id=image_id, annotations=target)
+        if self._transforms is not None:
+            img, target = self._transforms(img, target)
+        return img, target
+
+
+def get_coco(root, image_set, transforms, mode='instances'):
+    anno_file_template = "{}_{}2017.json"
+    PATHS = {
+        "train": ("train2017", os.path.join("annotations", anno_file_template.format(mode, "train"))),
+        "val": ("val2017", os.path.join("annotations", anno_file_template.format(mode, "val"))),
+        # "train": ("val2017", os.path.join("annotations", anno_file_template.format(mode, "val")))
+    }
+
+    t = [ConvertCocoPolysToMask()]
+
+    if transforms is not None:
+        t.append(transforms)
+    transforms = T.Compose(t)
+
+    img_folder, ann_file = PATHS[image_set]
+    img_folder = os.path.join(root, img_folder)
+    ann_file = os.path.join(root, ann_file)
+
+    dataset = CocoDetection(img_folder, ann_file, transforms=transforms)
+
+    if image_set == "train":
+        dataset = _coco_remove_images_without_annotations(dataset)
+
+    # dataset = torch.utils.data.Subset(dataset, [i for i in range(500)])
+
+    return dataset
+
+
+def get_coco_kp(root, image_set, transforms):
+    return get_coco(root, image_set, transforms, mode="person_keypoints")
--- a/live_xv_cam_car_graph_gcy/tool/tv_reference/engine.py
+++ b/live_xv_cam_car_graph_gcy/tool/tv_reference/engine.py
+import math
+import sys
+import time
+import torch
+
+import torchvision.models.detection.mask_rcnn
+
+from .coco_utils import get_coco_api_from_dataset
+from .coco_eval import CocoEvaluator
+from . import utils
+
+
+def train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq):
+    model.train()
+    metric_logger = utils.MetricLogger(delimiter="  ")
+    metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}'))
+    header = 'Epoch: [{}]'.format(epoch)
+
+    lr_scheduler = None
+    if epoch == 0:
+        warmup_factor = 1. / 1000
+        warmup_iters = min(1000, len(data_loader) - 1)
+
+        lr_scheduler = utils.warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor)
+
+    for images, targets in metric_logger.log_every(data_loader, print_freq, header):
+        images = list(image.to(device) for image in images)
+        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
+
+        loss_dict = model(images, targets)
+
+        losses = sum(loss for loss in loss_dict.values())
+
+        # reduce losses over all GPUs for logging purposes
+        loss_dict_reduced = utils.reduce_dict(loss_dict)
+        losses_reduced = sum(loss for loss in loss_dict_reduced.values())
+
+        loss_value = losses_reduced.item()
+
+        if not math.isfinite(loss_value):
+            print("Loss is {}, stopping training".format(loss_value))
+            print(loss_dict_reduced)
+            sys.exit(1)
+
+        optimizer.zero_grad()
+        losses.backward()
+        optimizer.step()
+
+        if lr_scheduler is not None:
+            lr_scheduler.step()
+
+        metric_logger.update(loss=losses_reduced, **loss_dict_reduced)
+        metric_logger.update(lr=optimizer.param_groups[0]["lr"])
+
+    return metric_logger
+
+
+def _get_iou_types(model):
+    model_without_ddp = model
+    if isinstance(model, torch.nn.parallel.DistributedDataParallel):
+        model_without_ddp = model.module
+    iou_types = ["bbox"]
+    if isinstance(model_without_ddp, torchvision.models.detection.MaskRCNN):
+        iou_types.append("segm")
+    if isinstance(model_without_ddp, torchvision.models.detection.KeypointRCNN):
+        iou_types.append("keypoints")
+    return iou_types
+
+
+@torch.no_grad()
+def evaluate(model, data_loader, device):
+    n_threads = torch.get_num_threads()
+    # FIXME remove this and make paste_masks_in_image run on the GPU
+    torch.set_num_threads(1)
+    cpu_device = torch.device("cpu")
+    model.eval()
+    metric_logger = utils.MetricLogger(delimiter="  ")
+    header = 'Test:'
+
+    coco = get_coco_api_from_dataset(data_loader.dataset)
+    iou_types = _get_iou_types(model)
+    coco_evaluator = CocoEvaluator(coco, iou_types)
+
+    for images, targets in metric_logger.log_every(data_loader, 100, header):
+        images = list(img.to(device) for img in images)
+        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
+
+        torch.cuda.synchronize()
+        model_time = time.time()
+        outputs = model(images)
+
+        outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs]
+        model_time = time.time() - model_time
+
+        res = {target["image_id"].item(): output for target, output in zip(targets, outputs)}
+        evaluator_time = time.time()
+        coco_evaluator.update(res)
+        evaluator_time = time.time() - evaluator_time
+        metric_logger.update(model_time=model_time, evaluator_time=evaluator_time)
+
+    # gather the stats from all processes
+    metric_logger.synchronize_between_processes()
+    print("Averaged stats:", metric_logger)
+    coco_evaluator.synchronize_between_processes()
+
+    # accumulate predictions from all images
+    coco_evaluator.accumulate()
+    coco_evaluator.summarize()
+    torch.set_num_threads(n_threads)
+    return coco_evaluator
--- a/live_xv_cam_car_graph_gcy/tool/tv_reference/group_by_aspect_ratio.py
+++ b/live_xv_cam_car_graph_gcy/tool/tv_reference/group_by_aspect_ratio.py
+import bisect
+from collections import defaultdict
+import copy
+from itertools import repeat, chain
+import math
+import numpy as np
+
+import torch
+import torch.utils.data
+from torch.utils.data.sampler import BatchSampler, Sampler
+from torch.utils.model_zoo import tqdm
+import torchvision
+
+from PIL import Image
+
+
+def _repeat_to_at_least(iterable, n):
+    repeat_times = math.ceil(n / len(iterable))
+    repeated = chain.from_iterable(repeat(iterable, repeat_times))
+    return list(repeated)
+
+
+class GroupedBatchSampler(BatchSampler):
+    """
+    Wraps another sampler to yield a mini-batch of indices.
+    It enforces that the batch only contain elements from the same group.
+    It also tries to provide mini-batches which follows an ordering which is
+    as close as possible to the ordering from the original sampler.
+    Arguments:
+        sampler (Sampler): Base sampler.
+        group_ids (list[int]): If the sampler produces indices in range [0, N),
+            `group_ids` must be a list of `N` ints which contains the group id of each sample.
+            The group ids must be a continuous set of integers starting from
+            0, i.e. they must be in the range [0, num_groups).
+        batch_size (int): Size of mini-batch.
+    """
+    def __init__(self, sampler, group_ids, batch_size):
+        if not isinstance(sampler, Sampler):
+            raise ValueError(
+                "sampler should be an instance of "
+                "torch.utils.data.Sampler, but got sampler={}".format(sampler)
+            )
+        self.sampler = sampler
+        self.group_ids = group_ids
+        self.batch_size = batch_size
+
+    def __iter__(self):
+        buffer_per_group = defaultdict(list)
+        samples_per_group = defaultdict(list)
+
+        num_batches = 0
+        for idx in self.sampler:
+            group_id = self.group_ids[idx]
+            buffer_per_group[group_id].append(idx)
+            samples_per_group[group_id].append(idx)
+            if len(buffer_per_group[group_id]) == self.batch_size:
+                yield buffer_per_group[group_id]
+                num_batches += 1
+                del buffer_per_group[group_id]
+            assert len(buffer_per_group[group_id]) < self.batch_size
+
+        # now we have run out of elements that satisfy
+        # the group criteria, let's return the remaining
+        # elements so that the size of the sampler is
+        # deterministic
+        expected_num_batches = len(self)
+        num_remaining = expected_num_batches - num_batches
+        if num_remaining > 0:
+            # for the remaining batches, take first the buffers with largest number
+            # of elements
+            for group_id, _ in sorted(buffer_per_group.items(),
+                                      key=lambda x: len(x[1]), reverse=True):
+                remaining = self.batch_size - len(buffer_per_group[group_id])
+                samples_from_group_id = _repeat_to_at_least(samples_per_group[group_id], remaining)
+                buffer_per_group[group_id].extend(samples_from_group_id[:remaining])
+                assert len(buffer_per_group[group_id]) == self.batch_size
+                yield buffer_per_group[group_id]
+                num_remaining -= 1
+                if num_remaining == 0:
+                    break
+        assert num_remaining == 0
+
+    def __len__(self):
+        return len(self.sampler) // self.batch_size
+
+
+def _compute_aspect_ratios_slow(dataset, indices=None):
+    print("Your dataset doesn't support the fast path for "
+          "computing the aspect ratios, so will iterate over "
+          "the full dataset and load every image instead. "
+          "This might take some time...")
+    if indices is None:
+        indices = range(len(dataset))
+
+    class SubsetSampler(Sampler):
+        def __init__(self, indices):
+            self.indices = indices
+
+        def __iter__(self):
+            return iter(self.indices)
+
+        def __len__(self):
+            return len(self.indices)
+
+    sampler = SubsetSampler(indices)
+    data_loader = torch.utils.data.DataLoader(
+        dataset, batch_size=1, sampler=sampler,
+        num_workers=14,  # you might want to increase it for faster processing
+        collate_fn=lambda x: x[0])
+    aspect_ratios = []
+    with tqdm(total=len(dataset)) as pbar:
+        for _i, (img, _) in enumerate(data_loader):
+            pbar.update(1)
+            height, width = img.shape[-2:]
+            aspect_ratio = float(width) / float(height)
+            aspect_ratios.append(aspect_ratio)
+    return aspect_ratios
+
+
+def _compute_aspect_ratios_custom_dataset(dataset, indices=None):
+    if indices is None:
+        indices = range(len(dataset))
+    aspect_ratios = []
+    for i in indices:
+        height, width = dataset.get_height_and_width(i)
+        aspect_ratio = float(width) / float(height)
+        aspect_ratios.append(aspect_ratio)
+    return aspect_ratios
+
+
+def _compute_aspect_ratios_coco_dataset(dataset, indices=None):
+    if indices is None:
+        indices = range(len(dataset))
+    aspect_ratios = []
+    for i in indices:
+        img_info = dataset.coco.imgs[dataset.ids[i]]
+        aspect_ratio = float(img_info["width"]) / float(img_info["height"])
+        aspect_ratios.append(aspect_ratio)
+    return aspect_ratios
+
+
+def _compute_aspect_ratios_voc_dataset(dataset, indices=None):
+    if indices is None:
+        indices = range(len(dataset))
+    aspect_ratios = []
+    for i in indices:
+        # this doesn't load the data into memory, because PIL loads it lazily
+        width, height = Image.open(dataset.images[i]).size
+        aspect_ratio = float(width) / float(height)
+        aspect_ratios.append(aspect_ratio)
+    return aspect_ratios
+
+
+def _compute_aspect_ratios_subset_dataset(dataset, indices=None):
+    if indices is None:
+        indices = range(len(dataset))
+
+    ds_indices = [dataset.indices[i] for i in indices]
+    return compute_aspect_ratios(dataset.dataset, ds_indices)
+
+
+def compute_aspect_ratios(dataset, indices=None):
+    if hasattr(dataset, "get_height_and_width"):
+        return _compute_aspect_ratios_custom_dataset(dataset, indices)
+
+    if isinstance(dataset, torchvision.datasets.CocoDetection):
+        return _compute_aspect_ratios_coco_dataset(dataset, indices)
+
+    if isinstance(dataset, torchvision.datasets.VOCDetection):
+        return _compute_aspect_ratios_voc_dataset(dataset, indices)
+
+    if isinstance(dataset, torch.utils.data.Subset):
+        return _compute_aspect_ratios_subset_dataset(dataset, indices)
+
+    # slow path
+    return _compute_aspect_ratios_slow(dataset, indices)
+
+
+def _quantize(x, bins):
+    bins = copy.deepcopy(bins)
+    bins = sorted(bins)
+    quantized = list(map(lambda y: bisect.bisect_right(bins, y), x))
+    return quantized
+
+
+def create_aspect_ratio_groups(dataset, k=0):
+    aspect_ratios = compute_aspect_ratios(dataset)
+    bins = (2 ** np.linspace(-1, 1, 2 * k + 1)).tolist() if k > 0 else [1.0]
+    groups = _quantize(aspect_ratios, bins)
+    # count number of elements per group
+    counts = np.unique(groups, return_counts=True)[1]
+    fbins = [0] + bins + [np.inf]
+    print("Using {} as bins for aspect ratio quantization".format(fbins))
+    print("Count of instances per bin: {}".format(counts))
+    return groups
--- a/live_xv_cam_car_graph_gcy/tool/tv_reference/train.py
+++ b/live_xv_cam_car_graph_gcy/tool/tv_reference/train.py
+r"""PyTorch Detection Training.
+
+To run in a multi-gpu environment, use the distributed launcher::
+
+    python -m torch.distributed.launch --nproc_per_node=$NGPU --use_env \
+        train.py ... --world-size $NGPU
+
+The default hyperparameters are tuned for training on 8 gpus and 2 images per gpu.
+    --lr 0.02 --batch-size 2 --world-size 8
+If you use different number of gpus, the learning rate should be changed to 0.02/8*$NGPU.
+
+On top of that, for training Faster/Mask R-CNN, the default hyperparameters are
+    --epochs 26 --lr-steps 16 22 --aspect-ratio-group-factor 3
+
+Also, if you train Keypoint R-CNN, the default hyperparameters are
+    --epochs 46 --lr-steps 36 43 --aspect-ratio-group-factor 3
+Because the number of images is smaller in the person keypoint subset of COCO,
+the number of epochs should be adapted so that we have the same number of iterations.
+"""
+import datetime
+import os
+import time
+
+import torch
+import torch.utils.data
+from torch import nn
+import torchvision
+import torchvision.models.detection
+import torchvision.models.detection.mask_rcnn
+
+from .coco_utils import get_coco, get_coco_kp
+
+from .group_by_aspect_ratio import GroupedBatchSampler, create_aspect_ratio_groups
+from .engine import train_one_epoch, evaluate
+
+from . import utils
+from . import transforms as T
+
+
+def get_dataset(name, image_set, transform, data_path):
+    paths = {
+        "coco": (data_path, get_coco, 91),
+        "coco_kp": (data_path, get_coco_kp, 2)
+    }
+    p, ds_fn, num_classes = paths[name]
+
+    ds = ds_fn(p, image_set=image_set, transforms=transform)
+    return ds, num_classes
+
+
+def get_transform(train):
+    transforms = []
+    transforms.append(T.ToTensor())
+    if train:
+        transforms.append(T.RandomHorizontalFlip(0.5))
+    return T.Compose(transforms)
+
+
+def main(args):
+    utils.init_distributed_mode(args)
+    print(args)
+
+    device = torch.device(args.device)
+
+    # Data loading code
+    print("Loading data")
+
+    dataset, num_classes = get_dataset(args.dataset, "train", get_transform(train=True), args.data_path)
+    dataset_test, _ = get_dataset(args.dataset, "val", get_transform(train=False), args.data_path)
+
+    print("Creating data loaders")
+    if args.distributed:
+        train_sampler = torch.utils.data.distributed.DistributedSampler(dataset)
+        test_sampler = torch.utils.data.distributed.DistributedSampler(dataset_test)
+    else:
+        train_sampler = torch.utils.data.RandomSampler(dataset)
+        test_sampler = torch.utils.data.SequentialSampler(dataset_test)
+
+    if args.aspect_ratio_group_factor >= 0:
+        group_ids = create_aspect_ratio_groups(dataset, k=args.aspect_ratio_group_factor)
+        train_batch_sampler = GroupedBatchSampler(train_sampler, group_ids, args.batch_size)
+    else:
+        train_batch_sampler = torch.utils.data.BatchSampler(
+            train_sampler, args.batch_size, drop_last=True)
+
+    data_loader = torch.utils.data.DataLoader(
+        dataset, batch_sampler=train_batch_sampler, num_workers=args.workers,
+        collate_fn=utils.collate_fn)
+
+    data_loader_test = torch.utils.data.DataLoader(
+        dataset_test, batch_size=1,
+        sampler=test_sampler, num_workers=args.workers,
+        collate_fn=utils.collate_fn)
+
+    print("Creating model")
+    model = torchvision.models.detection.__dict__[args.model](num_classes=num_classes,
+                                                              pretrained=args.pretrained)
+    model.to(device)
+
+    model_without_ddp = model
+    if args.distributed:
+        model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])
+        model_without_ddp = model.module
+
+    params = [p for p in model.parameters() if p.requires_grad]
+    optimizer = torch.optim.SGD(
+        params, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay)
+
+    # lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=args.lr_step_size, gamma=args.lr_gamma)
+    lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=args.lr_steps, gamma=args.lr_gamma)
+
+    if args.resume:
+        checkpoint = torch.load(args.resume, map_location='cpu')
+        model_without_ddp.load_state_dict(checkpoint['model'])
+        optimizer.load_state_dict(checkpoint['optimizer'])
+        lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])
+        args.start_epoch = checkpoint['epoch'] + 1
+
+    if args.test_only:
+        evaluate(model, data_loader_test, device=device)
+        return
+
+    print("Start training")
+    start_time = time.time()
+    for epoch in range(args.start_epoch, args.epochs):
+        if args.distributed:
+            train_sampler.set_epoch(epoch)
+        train_one_epoch(model, optimizer, data_loader, device, epoch, args.print_freq)
+        lr_scheduler.step()
+        if args.output_dir:
+            utils.save_on_master({
+                'model': model_without_ddp.state_dict(),
+                'optimizer': optimizer.state_dict(),
+                'lr_scheduler': lr_scheduler.state_dict(),
+                'args': args,
+                'epoch': epoch},
+                os.path.join(args.output_dir, 'model_{}.pth'.format(epoch)))
+
+        # evaluate after every epoch
+        evaluate(model, data_loader_test, device=device)
+
+    total_time = time.time() - start_time
+    total_time_str = str(datetime.timedelta(seconds=int(total_time)))
+    print('Training time {}'.format(total_time_str))
+
+
+if __name__ == "__main__":
+    import argparse
+    parser = argparse.ArgumentParser(
+        description=__doc__)
+
+    parser.add_argument('--data-path', default='/datasets01/COCO/022719/', help='dataset')
+    parser.add_argument('--dataset', default='coco', help='dataset')
+    parser.add_argument('--model', default='maskrcnn_resnet50_fpn', help='model')
+    parser.add_argument('--device', default='cuda', help='device')
+    parser.add_argument('-b', '--batch-size', default=2, type=int,
+                        help='images per gpu, the total batch size is $NGPU x batch_size')
+    parser.add_argument('--epochs', default=26, type=int, metavar='N',
+                        help='number of total epochs to run')
+    parser.add_argument('-j', '--workers', default=4, type=int, metavar='N',
+                        help='number of data loading workers (default: 4)')
+    parser.add_argument('--lr', default=0.02, type=float,
+                        help='initial learning rate, 0.02 is the default value for training '
+                        'on 8 gpus and 2 images_per_gpu')
+    parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
+                        help='momentum')
+    parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float,
+                        metavar='W', help='weight decay (default: 1e-4)',
+                        dest='weight_decay')
+    parser.add_argument('--lr-step-size', default=8, type=int, help='decrease lr every step-size epochs')
+    parser.add_argument('--lr-steps', default=[16, 22], nargs='+', type=int, help='decrease lr every step-size epochs')
+    parser.add_argument('--lr-gamma', default=0.1, type=float, help='decrease lr by a factor of lr-gamma')
+    parser.add_argument('--print-freq', default=20, type=int, help='print frequency')
+    parser.add_argument('--output-dir', default='.', help='path where to save')
+    parser.add_argument('--resume', default='', help='resume from checkpoint')
+    parser.add_argument('--start_epoch', default=0, type=int, help='start epoch')
+    parser.add_argument('--aspect-ratio-group-factor', default=3, type=int)
+    parser.add_argument(
+        "--test-only",
+        dest="test_only",
+        help="Only test the model",
+        action="store_true",
+    )
+    parser.add_argument(
+        "--pretrained",
+        dest="pretrained",
+        help="Use pre-trained models from the modelzoo",
+        action="store_true",
+    )
+
+    # distributed training parameters
+    parser.add_argument('--world-size', default=1, type=int,
+                        help='number of distributed processes')
+    parser.add_argument('--dist-url', default='env://', help='url used to set up distributed training')
+
+    args = parser.parse_args()
+
+    if args.output_dir:
+        utils.mkdir(args.output_dir)
+
+    main(args)
--- a/live_xv_cam_car_graph_gcy/tool/tv_reference/transforms.py
+++ b/live_xv_cam_car_graph_gcy/tool/tv_reference/transforms.py
+import random
+import torch
+
+from torchvision.transforms import functional as F
+
+
+def _flip_coco_person_keypoints(kps, width):
+    flip_inds = [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15]
+    flipped_data = kps[:, flip_inds]
+    flipped_data[..., 0] = width - flipped_data[..., 0]
+    # Maintain COCO convention that if visibility == 0, then x, y = 0
+    inds = flipped_data[..., 2] == 0
+    flipped_data[inds] = 0
+    return flipped_data
+
+
+class Compose(object):
+    def __init__(self, transforms):
+        self.transforms = transforms
+
+    def __call__(self, image, target):
+        for t in self.transforms:
+            image, target = t(image, target)
+        return image, target
+
+
+class RandomHorizontalFlip(object):
+    def __init__(self, prob):
+        self.prob = prob
+
+    def __call__(self, image, target):
+        if random.random() < self.prob:
+            height, width = image.shape[-2:]
+            image = image.flip(-1)
+            bbox = target["boxes"]
+            bbox[:, [0, 2]] = width - bbox[:, [2, 0]]
+            target["boxes"] = bbox
+            if "masks" in target:
+                target["masks"] = target["masks"].flip(-1)
+            if "keypoints" in target:
+                keypoints = target["keypoints"]
+                keypoints = _flip_coco_person_keypoints(keypoints, width)
+                target["keypoints"] = keypoints
+        return image, target
+
+
+class ToTensor(object):
+    def __call__(self, image, target):
+        image = F.to_tensor(image)
+        return image, target
--- a/live_xv_cam_car_graph_gcy/tool/tv_reference/utils.py
+++ b/live_xv_cam_car_graph_gcy/tool/tv_reference/utils.py
+from collections import defaultdict, deque
+import datetime
+import pickle
+import time
+
+import torch
+import torch.distributed as dist
+
+import errno
+import os
+
+
+class SmoothedValue(object):
+    """Track a series of values and provide access to smoothed values over a
+    window or the global series average.
+    """
+
+    def __init__(self, window_size=20, fmt=None):
+        if fmt is None:
+            fmt = "{median:.4f} ({global_avg:.4f})"
+        self.deque = deque(maxlen=window_size)
+        self.total = 0.0
+        self.count = 0
+        self.fmt = fmt
+
+    def update(self, value, n=1):
+        self.deque.append(value)
+        self.count += n
+        self.total += value * n
+
+    def synchronize_between_processes(self):
+        """
+        Warning: does not synchronize the deque!
+        """
+        if not is_dist_avail_and_initialized():
+            return
+        t = torch.tensor([self.count, self.total], dtype=torch.float64, device='cuda')
+        dist.barrier()
+        dist.all_reduce(t)
+        t = t.tolist()
+        self.count = int(t[0])
+        self.total = t[1]
+
+    @property
+    def median(self):
+        d = torch.tensor(list(self.deque))
+        return d.median().item()
+
+    @property
+    def avg(self):
+        d = torch.tensor(list(self.deque), dtype=torch.float32)
+        return d.mean().item()
+
+    @property
+    def global_avg(self):
+        return self.total / self.count
+
+    @property
+    def max(self):
+        return max(self.deque)
+
+    @property
+    def value(self):
+        return self.deque[-1]
+
+    def __str__(self):
+        return self.fmt.format(
+            median=self.median,
+            avg=self.avg,
+            global_avg=self.global_avg,
+            max=self.max,
+            value=self.value)
+
+
+def all_gather(data):
+    """
+    Run all_gather on arbitrary picklable data (not necessarily tensors)
+    Args:
+        data: any picklable object
+    Returns:
+        list[data]: list of data gathered from each rank
+    """
+    world_size = get_world_size()
+    if world_size == 1:
+        return [data]
+
+    # serialized to a Tensor
+    buffer = pickle.dumps(data)
+    storage = torch.ByteStorage.from_buffer(buffer)
+    tensor = torch.ByteTensor(storage).to("cuda")
+
+    # obtain Tensor size of each rank
+    local_size = torch.tensor([tensor.numel()], device="cuda")
+    size_list = [torch.tensor([0], device="cuda") for _ in range(world_size)]
+    dist.all_gather(size_list, local_size)
+    size_list = [int(size.item()) for size in size_list]
+    max_size = max(size_list)
+
+    # receiving Tensor from all ranks
+    # we pad the tensor because torch all_gather does not support
+    # gathering tensors of different shapes
+    tensor_list = []
+    for _ in size_list:
+        tensor_list.append(torch.empty((max_size,), dtype=torch.uint8, device="cuda"))
+    if local_size != max_size:
+        padding = torch.empty(size=(max_size - local_size,), dtype=torch.uint8, device="cuda")
+        tensor = torch.cat((tensor, padding), dim=0)
+    dist.all_gather(tensor_list, tensor)
+
+    data_list = []
+    for size, tensor in zip(size_list, tensor_list):
+        buffer = tensor.cpu().numpy().tobytes()[:size]
+        data_list.append(pickle.loads(buffer))
+
+    return data_list
+
+
+def reduce_dict(input_dict, average=True):
+    """
+    Args:
+        input_dict (dict): all the values will be reduced
+        average (bool): whether to do average or sum
+    Reduce the values in the dictionary from all processes so that all processes
+    have the averaged results. Returns a dict with the same fields as
+    input_dict, after reduction.
+    """
+    world_size = get_world_size()
+    if world_size < 2:
+        return input_dict
+    with torch.no_grad():
+        names = []
+        values = []
+        # sort the keys so that they are consistent across processes
+        for k in sorted(input_dict.keys()):
+            names.append(k)
+            values.append(input_dict[k])
+        values = torch.stack(values, dim=0)
+        dist.all_reduce(values)
+        if average:
+            values /= world_size
+        reduced_dict = {k: v for k, v in zip(names, values)}
+    return reduced_dict
+
+
+class MetricLogger(object):
+    def __init__(self, delimiter="\t"):
+        self.meters = defaultdict(SmoothedValue)
+        self.delimiter = delimiter
+
+    def update(self, **kwargs):
+        for k, v in kwargs.items():
+            if isinstance(v, torch.Tensor):
+                v = v.item()
+            assert isinstance(v, (float, int))
+            self.meters[k].update(v)
+
+    def __getattr__(self, attr):
+        if attr in self.meters:
+            return self.meters[attr]
+        if attr in self.__dict__:
+            return self.__dict__[attr]
+        raise AttributeError("'{}' object has no attribute '{}'".format(
+            type(self).__name__, attr))
+
+    def __str__(self):
+        loss_str = []
+        for name, meter in self.meters.items():
+            loss_str.append(
+                "{}: {}".format(name, str(meter))
+            )
+        return self.delimiter.join(loss_str)
+
+    def synchronize_between_processes(self):
+        for meter in self.meters.values():
+            meter.synchronize_between_processes()
+
+    def add_meter(self, name, meter):
+        self.meters[name] = meter
+
+    def log_every(self, iterable, print_freq, header=None):
+        i = 0
+        if not header:
+            header = ''
+        start_time = time.time()
+        end = time.time()
+        iter_time = SmoothedValue(fmt='{avg:.4f}')
+        data_time = SmoothedValue(fmt='{avg:.4f}')
+        space_fmt = ':' + str(len(str(len(iterable)))) + 'd'
+        if torch.cuda.is_available():
+            log_msg = self.delimiter.join([
+                header,
+                '[{0' + space_fmt + '}/{1}]',
+                'eta: {eta}',
+                '{meters}',
+                'time: {time}',
+                'data: {data}',
+                'max mem: {memory:.0f}'
+            ])
+        else:
+            log_msg = self.delimiter.join([
+                header,
+                '[{0' + space_fmt + '}/{1}]',
+                'eta: {eta}',
+                '{meters}',
+                'time: {time}',
+                'data: {data}'
+            ])
+        MB = 1024.0 * 1024.0
+        for obj in iterable:
+            data_time.update(time.time() - end)
+            yield obj
+            iter_time.update(time.time() - end)
+            if i % print_freq == 0 or i == len(iterable) - 1:
+                eta_seconds = iter_time.global_avg * (len(iterable) - i)
+                eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))
+                if torch.cuda.is_available():
+                    print(log_msg.format(
+                        i, len(iterable), eta=eta_string,
+                        meters=str(self),
+                        time=str(iter_time), data=str(data_time),
+                        memory=torch.cuda.max_memory_allocated() / MB))
+                else:
+                    print(log_msg.format(
+                        i, len(iterable), eta=eta_string,
+                        meters=str(self),
+                        time=str(iter_time), data=str(data_time)))
+            i += 1
+            end = time.time()
+        total_time = time.time() - start_time
+        total_time_str = str(datetime.timedelta(seconds=int(total_time)))
+        print('{} Total time: {} ({:.4f} s / it)'.format(
+            header, total_time_str, total_time / len(iterable)))
+
+
+def collate_fn(batch):
+    return tuple(zip(*batch))
+
+
+def warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor):
+
+    def f(x):
+        if x >= warmup_iters:
+            return 1
+        alpha = float(x) / warmup_iters
+        return warmup_factor * (1 - alpha) + alpha
+
+    return torch.optim.lr_scheduler.LambdaLR(optimizer, f)
+
+
+def mkdir(path):
+    try:
+        os.makedirs(path)
+    except OSError as e:
+        if e.errno != errno.EEXIST:
+            raise
+
+
+def setup_for_distributed(is_master):
+    """
+    This function disables printing when not in master process
+    """
+    import builtins as __builtin__
+    builtin_print = __builtin__.print
+
+    def print(*args, **kwargs):
+        force = kwargs.pop('force', False)
+        if is_master or force:
+            builtin_print(*args, **kwargs)
+
+    __builtin__.print = print
+
+
+def is_dist_avail_and_initialized():
+    if not dist.is_available():
+        return False
+    if not dist.is_initialized():
+        return False
+    return True
+
+
+def get_world_size():
+    if not is_dist_avail_and_initialized():
+        return 1
+    return dist.get_world_size()
+
+
+def get_rank():
+    if not is_dist_avail_and_initialized():
+        return 0
+    return dist.get_rank()
+
+
+def is_main_process():
+    return get_rank() == 0
+
+
+def save_on_master(*args, **kwargs):
+    if is_main_process():
+        torch.save(*args, **kwargs)
+
+
+def init_distributed_mode(args):
+    if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ:
+        args.rank = int(os.environ["RANK"])
+        args.world_size = int(os.environ['WORLD_SIZE'])
+        args.gpu = int(os.environ['LOCAL_RANK'])
+    elif 'SLURM_PROCID' in os.environ:
+        args.rank = int(os.environ['SLURM_PROCID'])
+        args.gpu = args.rank % torch.cuda.device_count()
+    else:
+        print('Not using distributed mode')
+        args.distributed = False
+        return
+
+    args.distributed = True
+
+    torch.cuda.set_device(args.gpu)
+    args.dist_backend = 'nccl'
+    print('| distributed init (rank {}): {}'.format(
+        args.rank, args.dist_url), flush=True)
+    torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
+                                         world_size=args.world_size, rank=args.rank)
+    torch.distributed.barrier()
+    setup_for_distributed(args.rank == 0)
--- a/live_xv_cam_car_graph_gcy/tool/utils.py
+++ b/live_xv_cam_car_graph_gcy/tool/utils.py
+import sys
+import os
+import time
+import math
+import numpy as np
+
+import itertools
+import struct  # get_image_size
+import imghdr  # get_image_size
+
+
+def sigmoid(x):
+    return 1.0 / (np.exp(-x) + 1.)
+
+
+def softmax(x):
+    x = np.exp(x - np.expand_dims(np.max(x, axis=1), axis=1))
+    x = x / np.expand_dims(x.sum(axis=1), axis=1)
+    return x
+
+
+def bbox_iou(box1, box2, x1y1x2y2=True):
+    
+    # print('iou box1:', box1)
+    # print('iou box2:', box2)
+
+    if x1y1x2y2:
+        mx = min(box1[0], box2[0])
+        Mx = max(box1[2], box2[2])
+        my = min(box1[1], box2[1])
+        My = max(box1[3], box2[3])
+        w1 = box1[2] - box1[0]
+        h1 = box1[3] - box1[1]
+        w2 = box2[2] - box2[0]
+        h2 = box2[3] - box2[1]
+    else:
+        w1 = box1[2]
+        h1 = box1[3]
+        w2 = box2[2]
+        h2 = box2[3]
+
+        mx = min(box1[0], box2[0])
+        Mx = max(box1[0] + w1, box2[0] + w2)
+        my = min(box1[1], box2[1])
+        My = max(box1[1] + h1, box2[1] + h2)
+    uw = Mx - mx
+    uh = My - my
+    cw = w1 + w2 - uw
+    ch = h1 + h2 - uh
+    carea = 0
+    if cw <= 0 or ch <= 0:
+        return 0.0
+
+    area1 = w1 * h1
+    area2 = w2 * h2
+    carea = cw * ch
+    uarea = area1 + area2 - carea
+    return carea / uarea
+
+
+def nms_cpu(boxes, confs, nms_thresh=0.5, min_mode=False):
+    # print(boxes.shape)
+    x1 = boxes[:, 0]
+    y1 = boxes[:, 1]
+    x2 = boxes[:, 2]
+    y2 = boxes[:, 3]
+
+    areas = (x2 - x1) * (y2 - y1)
+    order = confs.argsort()[::-1]
+
+    keep = []
+    while order.size > 0:
+        idx_self = order[0]
+        idx_other = order[1:]
+
+        keep.append(idx_self)
+
+        xx1 = np.maximum(x1[idx_self], x1[idx_other])
+        yy1 = np.maximum(y1[idx_self], y1[idx_other])
+        xx2 = np.minimum(x2[idx_self], x2[idx_other])
+        yy2 = np.minimum(y2[idx_self], y2[idx_other])
+
+        w = np.maximum(0.0, xx2 - xx1)
+        h = np.maximum(0.0, yy2 - yy1)
+        inter = w * h
+
+        if min_mode:
+            over = inter / np.minimum(areas[order[0]], areas[order[1:]])
+        else:
+            over = inter / (areas[order[0]] + areas[order[1:]] - inter)
+
+        inds = np.where(over <= nms_thresh)[0]
+        order = order[inds + 1]
+    
+    return np.array(keep)
+
+
+
+def plot_boxes_cv2(img, boxes, savename=None, class_names=None, color=None):
+    import cv2
+    img = np.copy(img)
+    colors = np.array([[1, 0, 1], [0, 0, 1], [0, 1, 1], [0, 1, 0], [1, 1, 0], [1, 0, 0]], dtype=np.float32)
+
+    def get_color(c, x, max_val):
+        ratio = float(x) / max_val * 5
+        i = int(math.floor(ratio))
+        j = int(math.ceil(ratio))
+        ratio = ratio - i
+        r = (1 - ratio) * colors[i][c] + ratio * colors[j][c]
+        return int(r * 255)
+
+    width = img.shape[1]
+    height = img.shape[0]
+    for i in range(len(boxes)):
+        box = boxes[i]
+        x1 = int(box[0] * width)
+        y1 = int(box[1] * height)
+        x2 = int(box[2] * width)
+        y2 = int(box[3] * height)
+
+        if color:
+            rgb = color
+        else:
+            rgb = (255, 0, 0)
+        if len(box) >= 7 and class_names:
+            cls_conf = box[5]
+            cls_id = box[6]
+            # print('%s: %f' % (class_names[cls_id], cls_conf))
+            classes = len(class_names)
+            offset = cls_id * 123457 % classes
+            red = get_color(2, offset, classes)
+            green = get_color(1, offset, classes)
+            blue = get_color(0, offset, classes)
+            if color is None:
+                rgb = (red, green, blue)
+            img = cv2.putText(img, class_names[cls_id], (x1, y1), cv2.FONT_HERSHEY_SIMPLEX, 1.2, rgb, 1)
+        img = cv2.rectangle(img, (x1, y1), (x2, y2), rgb, 1)
+    # if savename:
+    #     print("save plot results to %s" % savename)
+    #     cv2.imwrite(savename, img)
+    return img
+
+
+def read_truths(lab_path):
+    if not os.path.exists(lab_path):
+        return np.array([])
+    if os.path.getsize(lab_path):
+        truths = np.loadtxt(lab_path)
+        truths = truths.reshape(truths.size / 5, 5)  # to avoid single truth problem
+        return truths
+    else:
+        return np.array([])
+
+
+def load_class_names(namesfile):
+    class_names = []
+    with open(namesfile, 'r') as fp:
+        lines = fp.readlines()
+    for line in lines:
+        line = line.rstrip()
+        class_names.append(line)
+    return class_names
+
+
+
+def post_processing(img, conf_thresh, nms_thresh, output):
+
+    # anchors = [12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401]
+    # num_anchors = 9
+    # anchor_masks = [[0, 1, 2], [3, 4, 5], [6, 7, 8]]
+    # strides = [8, 16, 32]
+    # anchor_step = len(anchors) // num_anchors
+
+    # [batch, num, 1, 4]
+    box_array = output[0]
+    # [batch, num, num_classes]
+    confs = output[1]
+
+    t1 = time.time()
+
+    if type(box_array).__name__ != 'ndarray':
+        box_array = box_array.cpu().detach().numpy()
+        confs = confs.cpu().detach().numpy()
+
+    num_classes = confs.shape[2]
+
+    # [batch, num, 4]
+    box_array = box_array[:, :, 0]
+
+    # [batch, num, num_classes] --> [batch, num]
+    max_conf = np.max(confs, axis=2)
+    max_id = np.argmax(confs, axis=2)
+
+    t2 = time.time()
+
+    bboxes_batch = []
+    for i in range(box_array.shape[0]):
+       
+        argwhere = max_conf[i] > conf_thresh
+        l_box_array = box_array[i, argwhere, :]
+        l_max_conf = max_conf[i, argwhere]
+        l_max_id = max_id[i, argwhere]
+
+        bboxes = []
+        # nms for each class
+        for j in range(num_classes):
+
+            cls_argwhere = l_max_id == j
+            ll_box_array = l_box_array[cls_argwhere, :]
+            ll_max_conf = l_max_conf[cls_argwhere]
+            ll_max_id = l_max_id[cls_argwhere]
+
+            keep = nms_cpu(ll_box_array, ll_max_conf, nms_thresh)
+            
+            if (keep.size > 0):
+                ll_box_array = ll_box_array[keep, :]
+                ll_max_conf = ll_max_conf[keep]
+                ll_max_id = ll_max_id[keep]
+
+                for k in range(ll_box_array.shape[0]):
+                    bboxes.append([ll_box_array[k, 0], ll_box_array[k, 1], ll_box_array[k, 2], ll_box_array[k, 3], ll_max_conf[k], ll_max_conf[k], ll_max_id[k]])
+        
+        bboxes_batch.append(bboxes)
+
+    t3 = time.time()
+
+    print('-----------------------------------')
+    print('       max and argmax : %f' % (t2 - t1))
+    print('                  nms : %f' % (t3 - t2))
+    print('Post processing total : %f' % (t3 - t1))
+    print('-----------------------------------')
+    
+    return bboxes_batch
--- a/live_xv_cam_car_graph_gcy/tool/utils_iou.py
+++ b/live_xv_cam_car_graph_gcy/tool/utils_iou.py
+# -*- coding: utf-8 -*-
+'''
+
+'''
+import torch
+import os, sys
+from torch.nn import functional as F
+
+import numpy as np
+from packaging import version
+
+
+__all__ = [
+    "bboxes_iou",
+    "bboxes_giou",
+    "bboxes_diou",
+    "bboxes_ciou",
+]
+
+
+if version.parse(torch.__version__) >= version.parse('1.5.0'):
+    def _true_divide(dividend, divisor):
+        return torch.true_divide(dividend, divisor)
+else:
+    def _true_divide(dividend, divisor):
+        return dividend / divisor
+
+def bboxes_iou(bboxes_a, bboxes_b, fmt='voc', iou_type='iou'):
+    """Calculate the Intersection of Unions (IoUs) between bounding boxes.
+    IoU is calculated as a ratio of area of the intersection
+    and area of the union.
+
+    Args:
+        bbox_a (array): An array whose shape is :math:`(N, 4)`.
+            :math:`N` is the number of bounding boxes.
+            The dtype should be :obj:`numpy.float32`.
+        bbox_b (array): An array similar to :obj:`bbox_a`,
+            whose shape is :math:`(K, 4)`.
+            The dtype should be :obj:`numpy.float32`.
+    Returns:
+        array:
+        An array whose shape is :math:`(N, K)`. \
+        An element at index :math:`(n, k)` contains IoUs between \
+        :math:`n` th bounding box in :obj:`bbox_a` and :math:`k` th bounding \
+        box in :obj:`bbox_b`.
+
+    from: https://github.com/chainer/chainercv
+    """
+    if bboxes_a.shape[1] != 4 or bboxes_b.shape[1] != 4:
+        raise IndexError
+
+    N, K = bboxes_a.shape[0], bboxes_b.shape[0]
+
+    if fmt.lower() == 'voc':  # xmin, ymin, xmax, ymax
+        # top left
+        tl_intersect = torch.max(
+            bboxes_a[:, np.newaxis, :2],
+            bboxes_b[:, :2]
+        ) # of shape `(N,K,2)`
+        # bottom right
+        br_intersect = torch.min(
+            bboxes_a[:, np.newaxis, 2:],
+            bboxes_b[:, 2:]
+        )
+        bb_a = bboxes_a[:, 2:] - bboxes_a[:, :2]
+        bb_b = bboxes_b[:, 2:] - bboxes_b[:, :2]
+        # bb_* can also be seen vectors representing box_width, box_height
+    elif fmt.lower() == 'yolo':  # xcen, ycen, w, h
+        # top left
+        tl_intersect = torch.max(
+            bboxes_a[:, np.newaxis, :2] - bboxes_a[:, np.newaxis, 2:] / 2,
+            bboxes_b[:, :2] - bboxes_b[:, 2:] / 2
+        )
+        # bottom right
+        br_intersect = torch.min(
+            bboxes_a[:, np.newaxis, :2] + bboxes_a[:, np.newaxis, 2:] / 2,
+            bboxes_b[:, :2] + bboxes_b[:, 2:] / 2
+        )
+        bb_a = bboxes_a[:, 2:]
+        bb_b = bboxes_b[:, 2:]
+    elif fmt.lower() == 'coco':  # xmin, ymin, w, h
+        # top left
+        tl_intersect = torch.max(
+            bboxes_a[:, np.newaxis, :2],
+            bboxes_b[:, :2]
+        )
+        # bottom right
+        br_intersect = torch.min(
+            bboxes_a[:, np.newaxis, :2] + bboxes_a[:, np.newaxis, 2:],
+            bboxes_b[:, :2] + bboxes_b[:, 2:]
+        )
+        bb_a = bboxes_a[:, 2:]
+        bb_b = bboxes_b[:, 2:]
+    
+    area_a = torch.prod(bb_a, 1)
+    area_b = torch.prod(bb_b, 1)
+    
+    # torch.prod(input, dim, keepdim=False, dtype=None) → Tensor
+    # Returns the product of each row of the input tensor in the given dimension dim
+    # if tl, br does not form a nondegenerate squre, then the corr. element in the `prod` would be 0
+    en = (tl_intersect < br_intersect).type(tl_intersect.type()).prod(dim=2)  # shape `(N,K,2)` ---> shape `(N,K)`
+
+    area_intersect = torch.prod(br_intersect - tl_intersect, 2) * en  # * ((tl < br).all())
+    area_union = (area_a[:, np.newaxis] + area_b - area_intersect)
+
+    iou = _true_divide(area_intersect, area_union)
+
+    if iou_type.lower() == 'iou':
+        return iou
+
+    if fmt.lower() == 'voc':  # xmin, ymin, xmax, ymax
+        # top left
+        tl_union = torch.min(
+            bboxes_a[:, np.newaxis, :2],
+            bboxes_b[:, :2]
+        ) # of shape `(N,K,2)`
+        # bottom right
+        br_union = torch.max(
+            bboxes_a[:, np.newaxis, 2:],
+            bboxes_b[:, 2:]
+        )
+    elif fmt.lower() == 'yolo':  # xcen, ycen, w, h
+        # top left
+        tl_union = torch.min(
+            bboxes_a[:, np.newaxis, :2] - bboxes_a[:, np.newaxis, 2:] / 2,
+            bboxes_b[:, :2] - bboxes_b[:, 2:] / 2
+        )
+        # bottom right
+        br_union = torch.max(
+            bboxes_a[:, np.newaxis, :2] + bboxes_a[:, np.newaxis, 2:] / 2,
+            bboxes_b[:, :2] + bboxes_b[:, 2:] / 2
+        )
+    elif fmt.lower() == 'coco':  # xmin, ymin, w, h
+        # top left
+        tl_union = torch.min(
+            bboxes_a[:, np.newaxis, :2],
+            bboxes_b[:, :2]
+        )
+        # bottom right
+        br_union = torch.max(
+            bboxes_a[:, np.newaxis, :2] + bboxes_a[:, np.newaxis, 2:],
+            bboxes_b[:, :2] + bboxes_b[:, 2:]
+        )
+    
+    # c for covering, of shape `(N,K,2)`
+    # the last dim is box width, box hight
+    bboxes_c = br_union - tl_union
+
+    area_covering = torch.prod(bboxes_c, 2)  # shape `(N,K)`
+
+    giou = iou - _true_divide(area_covering - area_union, area_covering)
+
+    if iou_type.lower() == 'giou':
+        return giou
+
+    if fmt.lower() == 'voc':  # xmin, ymin, xmax, ymax
+        centre_a = (bboxes_a[..., 2 :] + bboxes_a[..., : 2]) / 2
+        centre_b = (bboxes_b[..., 2 :] + bboxes_b[..., : 2]) / 2
+    elif fmt.lower() == 'yolo':  # xcen, ycen, w, h
+        centre_a = bboxes_a[..., : 2]
+        centre_b = bboxes_b[..., : 2]
+    elif fmt.lower() == 'coco':  # xmin, ymin, w, h
+        centre_a = bboxes_a[..., 2 :] + bboxes_a[..., : 2]/2
+        centre_b = bboxes_b[..., 2 :] + bboxes_b[..., : 2]/2
+
+    centre_dist = torch.norm(centre_a[:, np.newaxis] - centre_b, p='fro', dim=2)
+    diag_len = torch.norm(bboxes_c, p='fro', dim=2)
+
+    diou = iou - _true_divide(centre_dist.pow(2), diag_len.pow(2))
+
+    if iou_type.lower() == 'diou':
+        return diou
+
+    """ the legacy custom cosine similarity:
+
+    # bb_a of shape `(N,2)`, bb_b of shape `(K,2)`
+    v = torch.einsum('nm,km->nk', bb_a, bb_b)
+    v = _true_divide(v, (torch.norm(bb_a, p='fro', dim=1)[:,np.newaxis] * torch.norm(bb_b, p='fro', dim=1)))
+    # avoid nan for torch.acos near \pm 1
+    # https://github.com/pytorch/pytorch/issues/8069
+    eps = 1e-7
+    v = torch.clamp(v, -1+eps, 1-eps)
+    """
+    v = F.cosine_similarity(bb_a[:,np.newaxis,:], bb_b, dim=-1)
+    v = (_true_divide(2*torch.acos(v), np.pi)).pow(2)
+    with torch.no_grad():
+        alpha = (_true_divide(v, 1-iou+v)) * ((iou>=0.5).type(iou.type()))
+
+    ciou = diou - alpha * v
+
+    if iou_type.lower() == 'ciou':
+        return ciou
+
+
+def bboxes_giou(bboxes_a, bboxes_b, fmt='voc'):
+    return bboxes_iou(bboxes_a, bboxes_b, fmt, 'giou')
+
+
+def bboxes_diou(bboxes_a, bboxes_b, fmt='voc'):
+    return bboxes_iou(bboxes_a, bboxes_b, fmt, 'diou')
+
+
+def bboxes_ciou(bboxes_a, bboxes_b, fmt='voc'):
+    return bboxes_iou(bboxes_a, bboxes_b, fmt, 'ciou')
--- a/live_xv_cam_car_graph_gcy/tool/utils_iou_test.py
+++ b/live_xv_cam_car_graph_gcy/tool/utils_iou_test.py
--- a/live_xv_cam_car_graph_gcy/tool/yolo_layer.py
+++ b/live_xv_cam_car_graph_gcy/tool/yolo_layer.py
--- a/live_xv_net_edge1/.gitignore
+++ b/live_xv_net_edge1/.gitignore
+ttest
+*.weights
+*.pth
+*.onnx
+*.engine
+*.pyc
+*.infer
+*.npy
+
+z_demo_*
+
+__pycache__
+.idea
+.vscode
+runs
+log
+
+*.jpg
+*.json
+data/outcome
--- a/live_xv_net_edge1/DeepStream/Readme.md
+++ b/live_xv_net_edge1/DeepStream/Readme.md
+# This should be run in JetPack 4.4 / JetPack 4.4 G.A. with DeepStream 5.0 / DeepStream 5.0 GA .
+
+1. Compile the custom plugin for Yolo
+2. Convert the ONNX file to TRT with TRTEXEC / TensorRT
+3. Change the model-engine-file in config_infer_primary_yoloV4.txt
+4. In the deepstream_app_config_yoloV4.txt, change 
+          a) source0 : uri=file:<your file> directory. 
+          b) primary-gie : model-engine-file=<your_onnx_engine>
+# Note that for multi-batch, overhead is large owing to NMS is not used.
--- a/live_xv_net_edge1/DeepStream/config_infer_primary_yoloV4.txt
+++ b/live_xv_net_edge1/DeepStream/config_infer_primary_yoloV4.txt
+################################################################################
+# Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+################################################################################
+
+# Following properties are mandatory when engine files are not specified:
+#   int8-calib-file(Only in INT8), model-file-format
+#   Caffemodel mandatory properties: model-file, proto-file, output-blob-names
+#   UFF: uff-file, input-dims, uff-input-blob-name, output-blob-names
+#   ONNX: onnx-file
+#
+# Mandatory properties for detectors:
+#   num-detected-classes
+#
+# Optional properties for detectors:
+#   cluster-mode(Default=Group Rectangles), interval(Primary mode only, Default=0)
+#   custom-lib-path
+#   parse-bbox-func-name
+#
+# Mandatory properties for classifiers:
+#   classifier-threshold, is-classifier
+#
+# Optional properties for classifiers:
+#   classifier-async-mode(Secondary mode only, Default=false)
+#
+# Optional properties in secondary mode:
+#   operate-on-gie-id(Default=0), operate-on-class-ids(Defaults to all classes),
+#   input-object-min-width, input-object-min-height, input-object-max-width,
+#   input-object-max-height
+#
+# Following properties are always recommended:
+#   batch-size(Default=1)
+#
+# Other optional properties:
+#   net-scale-factor(Default=1), network-mode(Default=0 i.e FP32),
+#   model-color-format(Default=0 i.e. RGB) model-engine-file, labelfile-path,
+#   mean-file, gie-unique-id(Default=0), offsets, process-mode (Default=1 i.e. primary),
+#   custom-lib-path, network-mode(Default=0 i.e FP32)
+#
+# The values in the config file are overridden by values set through GObject
+# properties.
+
+[property]
+gpu-id=0
+net-scale-factor=0.0039215697906911373
+#0=RGB, 1=BGR
+model-color-format=0
+model-engine-file=<onnx_engine_file>
+labelfile-path=labels.txt
+## 0=FP32, 1=INT8, 2=FP16 mode
+network-mode=2
+num-detected-classes=80
+gie-unique-id=1
+network-type=0
+is-classifier=0
+## 0=Group Rectangles, 1=DBSCAN, 2=NMS, 3= DBSCAN+NMS Hybrid, 4 = None(No clustering)
+cluster-mode=2
+maintain-aspect-ratio=1
+parse-bbox-func-name=NvDsInferParseCustomYoloV4
+custom-lib-path=nvdsinfer_custom_impl_Yolo/libnvdsinfer_custom_impl_Yolo.so
+engine-create-func-name=NvDsInferYoloCudaEngineGet
+#scaling-filter=0
+#scaling-compute-hw=0
+#output-blob-names=2012
+
+[class-attrs-all]
+nms-iou-threshold=0.2
+pre-cluster-threshold=0.4
--- a/live_xv_net_edge1/DeepStream/deepstream_app_config_yoloV4.txt
+++ b/live_xv_net_edge1/DeepStream/deepstream_app_config_yoloV4.txt
+################################################################################
+# Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+################################################################################
+
+[application]
+enable-perf-measurement=1
+perf-measurement-interval-sec=5
+#gie-kitti-output-dir=streamscl
+
+[tiled-display]
+enable=0
+rows=1
+columns=1
+width=1280
+height=720
+gpu-id=0
+#(0): nvbuf-mem-default - Default memory allocated, specific to particular platform
+#(1): nvbuf-mem-cuda-pinned - Allocate Pinned/Host cuda memory, applicable for Tesla
+#(2): nvbuf-mem-cuda-device - Allocate Device cuda memory, applicable for Tesla
+#(3): nvbuf-mem-cuda-unified - Allocate Unified cuda memory, applicable for Tesla
+#(4): nvbuf-mem-surface-array - Allocate Surface Array memory, applicable for Jetson
+nvbuf-memory-type=0
+
+[source0]
+enable=1
+#Type - 1=CameraV4L2 2=URI 3=MultiURI
+type=3
+uri=file:<Your_file_source>
+
+num-sources=1
+gpu-id=0
+# (0): memtype_device   - Memory type Device
+# (1): memtype_pinned   - Memory type Host Pinned
+# (2): memtype_unified  - Memory type Unified
+cudadec-memtype=0
+
+[sink0]
+enable=1
+#Type - 1=FakeSink 2=EglSink 3=File
+type=2
+sync=1
+source-id=0
+gpu-id=0
+
+[osd]
+enable=1
+gpu-id=0
+border-width=1
+text-size=12
+text-color=1;1;1;1;
+text-bg-color=0.3;0.3;0.3;1
+font=Serif
+show-clock=0
+clock-x-offset=800
+clock-y-offset=820
+clock-text-size=12
+clock-color=1;0;0;0
+nvbuf-memory-type=0
+
+[streammux]
+gpu-id=0
+##Boolean property to inform muxer that sources are live
+live-source=0
+batch-size=1
+##time out in usec, to wait after the first buffer is available
+##to push the batch even if the complete batch is not formed
+batched-push-timeout=40000
+## Set muxer output width and height
+width=1280
+height=720
+##Enable to maintain aspect ratio wrt source, and allow black borders, works
+##along with width, height properties
+enable-padding=0
+nvbuf-memory-type=0
+
+# config-file property is mandatory for any gie section.
+# Other properties are optional and if set will override the properties set in
+# the infer config file.
+[primary-gie]
+enable=1
+gpu-id=0
+model-engine-file=<onnx_engine_file>
+labelfile-path=labels.txt
+#batch-size=1
+#Required by the app for OSD, not a plugin property
+bbox-border-color0=1;0;0;1
+bbox-border-color1=0;1;1;1
+bbox-border-color2=0;0;1;1
+bbox-border-color3=0;1;0;1
+interval=0
+gie-unique-id=1
+nvbuf-memory-type=0
+config-file=config_infer_primary_yoloV4.txt
+
+[sink1]
+enable=1
+type=3
+#1=mp4 2=mkv
+container=1
+#1=h264 2=h265 3=mpeg4
+codec=1
+#encoder type 0=Hardware 1=Software
+enc-type=0
+sync=0
+bitrate=4000000
+#H264 Profile - 0=Baseline 2=Main 4=High
+#H265 Profile - 0=Main 1=Main10
+profile=0
+output-file=fp16_clip1_cam1.mp4
+source-id=0
+
+[tracker]
+enable=1
+# For the case of NvDCF tracker, tracker-width and tracker-height must be a multiple of 32, respectively
+tracker-width=608
+tracker-height=608
+#ll-lib-file=/opt/nvidia/deepstream/deepstream-5.0/lib/libnvds_mot_iou.so
+#ll-lib-file=/opt/nvidia/deepstream/deepstream-5.0/lib/libnvds_nvdcf.so
+ll-lib-file=/opt/nvidia/deepstream/deepstream-5.0/lib/libnvds_mot_klt.so
+#ll-config-file required for IOU only
+#ll-config-file=iou_config.txt
+gpu-id=0
+
+[tests]
+file-loop=0
--- a/live_xv_net_edge1/DeepStream/labels.txt
+++ b/live_xv_net_edge1/DeepStream/labels.txt
+person
+bicycle
+car
+motorcycle
+airplane
+bus
+train
+truck
+boat
+traffic light
+fire hydrant
+stop sign
+parking meter
+bench
+bird
+cat
+dog
+horse
+sheep
+cow
+elephant
+bear
+zebra
+giraffe
+backpack
+umbrella
+handbag
+tie
+suitcase
+frisbee
+skis
+snowboard
+sports ball
+kite
+baseball bat
+baseball glove
+skateboard
+surfboard
+tennis racket
+bottle
+wine glass
+cup
+fork
+knife
+spoon
+bowl
+banana
+apple
+sandwich
+orange
+broccoli
+carrot
+hot dog
+pizza
+donut
+cake
+chair
+couch
+potted plant
+bed
+dining table
+toilet
+tv
+laptop
+mouse
+remote
+keyboard
+cell phone
+microwave
+oven
+toaster
+sink
+refrigerator
+book
+clock
+vase
+scissors
+teddy bear
+hair drier
+toothbrush
--- a/live_xv_net_edge1/DeepStream/nvdsinfer_custom_impl_Yolo/Makefile
+++ b/live_xv_net_edge1/DeepStream/nvdsinfer_custom_impl_Yolo/Makefile
+################################################################################
+# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+################################################################################
+
+CUDA_VER?=
+ifeq ($(CUDA_VER),)
+  $(error "CUDA_VER is not set")
+endif
+CC:= g++
+NVCC:=/usr/local/cuda-$(CUDA_VER)/bin/nvcc
+
+CFLAGS:= -Wall -std=c++11 -shared -fPIC -Wno-error=deprecated-declarations
+CFLAGS+= -I../../includes -I/usr/local/cuda-$(CUDA_VER)/include
+
+LIBS:= -lnvinfer_plugin -lnvinfer -lnvparsers -L/usr/local/cuda-$(CUDA_VER)/lib64 -lcudart -lcublas -lstdc++fs
+LFLAGS:= -shared -Wl,--start-group $(LIBS) -Wl,--end-group
+
+INCS:= $(wildcard *.h)
+SRCFILES:= nvdsinfer_yolo_engine.cpp \
+           nvdsparsebbox_Yolo.cpp   \
+           yoloPlugins.cpp    \
+           trt_utils.cpp              \
+           yolo.cpp              \
+           kernels.cu
+TARGET_LIB:= libnvdsinfer_custom_impl_Yolo.so
+
+TARGET_OBJS:= $(SRCFILES:.cpp=.o)
+TARGET_OBJS:= $(TARGET_OBJS:.cu=.o)
+
+all: $(TARGET_LIB)
+
+%.o: %.cpp $(INCS) Makefile
+	$(CC) -c -o $@ $(CFLAGS) $<
+
+%.o: %.cu $(INCS) Makefile
+	$(NVCC) -c -o $@ --compiler-options '-fPIC' $<
+
+$(TARGET_LIB) : $(TARGET_OBJS)
+	$(CC) -o $@  $(TARGET_OBJS) $(LFLAGS)
+
+clean:
+	rm -rf $(TARGET_LIB)
--- a/live_xv_net_edge1/DeepStream/nvdsinfer_custom_impl_Yolo/Readme.md
+++ b/live_xv_net_edge1/DeepStream/nvdsinfer_custom_impl_Yolo/Readme.md
+export CUDA_VER=X.Y
+make
--- a/live_xv_net_edge1/DeepStream/nvdsinfer_custom_impl_Yolo/kernels.cu
+++ b/live_xv_net_edge1/DeepStream/nvdsinfer_custom_impl_Yolo/kernels.cu
+/*
+ * Copyright (c) 2018-2019 NVIDIA Corporation.  All rights reserved.
+ *
+ * NVIDIA Corporation and its licensors retain all intellectual property
+ * and proprietary rights in and to this software, related documentation
+ * and any modifications thereto.  Any use, reproduction, disclosure or
+ * distribution of this software and related documentation without an express
+ * license agreement from NVIDIA Corporation is strictly prohibited.
+ *
+ */
+
+#include <cuda.h>
+#include <cuda_runtime.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+
+inline __device__ float sigmoidGPU(const float& x) { return 1.0f / (1.0f + __expf(-x)); }
+
+__global__ void gpuYoloLayerV3(const float* input, float* output, const uint gridSize, const uint numOutputClasses,
+                               const uint numBBoxes)
+{
+    uint x_id = blockIdx.x * blockDim.x + threadIdx.x;
+    uint y_id = blockIdx.y * blockDim.y + threadIdx.y;
+    uint z_id = blockIdx.z * blockDim.z + threadIdx.z;
+
+    if ((x_id >= gridSize) || (y_id >= gridSize) || (z_id >= numBBoxes))
+    {
+        return;
+    }
+
+    const int numGridCells = gridSize * gridSize;
+    const int bbindex = y_id * gridSize + x_id;
+
+    output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 0)]
+        = sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 0)]);
+
+    output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 1)]
+        = sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 1)]);
+
+    output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 2)]
+        = __expf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 2)]);
+
+    output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 3)]
+        = __expf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 3)]);
+
+    output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 4)]
+        = sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 4)]);
+
+    for (uint i = 0; i < numOutputClasses; ++i)
+    {
+        output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + (5 + i))]
+            = sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + (5 + i))]);
+    }
+}
+
+cudaError_t cudaYoloLayerV3(const void* input, void* output, const uint& batchSize, const uint& gridSize,
+                            const uint& numOutputClasses, const uint& numBBoxes,
+                            uint64_t outputSize, cudaStream_t stream);
+
+cudaError_t cudaYoloLayerV3(const void* input, void* output, const uint& batchSize, const uint& gridSize,
+                            const uint& numOutputClasses, const uint& numBBoxes,
+                            uint64_t outputSize, cudaStream_t stream)
+{
+    dim3 threads_per_block(16, 16, 4);
+    dim3 number_of_blocks((gridSize / threads_per_block.x) + 1,
+                          (gridSize / threads_per_block.y) + 1,
+                          (numBBoxes / threads_per_block.z) + 1);
+    for (unsigned int batch = 0; batch < batchSize; ++batch)
+    {
+        gpuYoloLayerV3<<<number_of_blocks, threads_per_block, 0, stream>>>(
+            reinterpret_cast<const float*>(input) + (batch * outputSize),
+            reinterpret_cast<float*>(output) + (batch * outputSize), gridSize, numOutputClasses,
+            numBBoxes);
+    }
+    return cudaGetLastError();
+}
--- a/live_xv_net_edge1/DeepStream/nvdsinfer_custom_impl_Yolo/nvdsinfer_yolo_engine.cpp
+++ b/live_xv_net_edge1/DeepStream/nvdsinfer_custom_impl_Yolo/nvdsinfer_yolo_engine.cpp
+/*
+ * Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "nvdsinfer_custom_impl.h"
+#include "nvdsinfer_context.h"
+#include "yoloPlugins.h"
+#include "yolo.h"
+
+#include <algorithm>
+
+#define USE_CUDA_ENGINE_GET_API 1
+
+static bool getYoloNetworkInfo (NetworkInfo &networkInfo, const NvDsInferContextInitParams* initParams)
+{
+    std::string yoloCfg = initParams->customNetworkConfigFilePath;
+    std::string yoloType;
+
+    std::transform (yoloCfg.begin(), yoloCfg.end(), yoloCfg.begin(), [] (uint8_t c) {
+        return std::tolower (c);});
+
+    if (yoloCfg.find("yolov2") != std::string::npos) {
+        if (yoloCfg.find("yolov2-tiny") != std::string::npos)
+            yoloType = "yolov2-tiny";
+        else
+            yoloType = "yolov2";
+    } else if (yoloCfg.find("yolov3") != std::string::npos) {
+        if (yoloCfg.find("yolov3-tiny") != std::string::npos)
+            yoloType = "yolov3-tiny";
+        else
+            yoloType = "yolov3";
+    } else {
+        std::cerr << "Yolo type is not defined from config file name:"
+                  << yoloCfg << std::endl;
+        return false;
+    }
+
+    networkInfo.networkType     = yoloType;
+    networkInfo.configFilePath  = initParams->customNetworkConfigFilePath;
+    networkInfo.wtsFilePath     = initParams->modelFilePath;
+    networkInfo.deviceType      = (initParams->useDLA ? "kDLA" : "kGPU");
+    networkInfo.inputBlobName   = "data";
+
+    if (networkInfo.configFilePath.empty() ||
+        networkInfo.wtsFilePath.empty()) {
+        std::cerr << "Yolo config file or weights file is NOT specified."
+                  << std::endl;
+        return false;
+    }
+
+    if (!fileExists(networkInfo.configFilePath) ||
+        !fileExists(networkInfo.wtsFilePath)) {
+        std::cerr << "Yolo config file or weights file is NOT exist."
+                  << std::endl;
+        return false;
+    }
+
+    return true;
+}
+
+#if !USE_CUDA_ENGINE_GET_API
+IModelParser* NvDsInferCreateModelParser(
+    const NvDsInferContextInitParams* initParams) {
+    NetworkInfo networkInfo;
+    if (!getYoloNetworkInfo(networkInfo, initParams)) {
+      return nullptr;
+    }
+
+    return new Yolo(networkInfo);
+}
+#else
+extern "C"
+bool NvDsInferYoloCudaEngineGet(nvinfer1::IBuilder * const builder,
+        const NvDsInferContextInitParams * const initParams,
+        nvinfer1::DataType dataType,
+        nvinfer1::ICudaEngine *& cudaEngine);
+
+extern "C"
+bool NvDsInferYoloCudaEngineGet(nvinfer1::IBuilder * const builder,
+        const NvDsInferContextInitParams * const initParams,
+        nvinfer1::DataType dataType,
+        nvinfer1::ICudaEngine *& cudaEngine)
+{
+    NetworkInfo networkInfo;
+    if (!getYoloNetworkInfo(networkInfo, initParams)) {
+      return false;
+    }
+
+    Yolo yolo(networkInfo);
+    cudaEngine = yolo.createEngine (builder);
+    if (cudaEngine == nullptr)
+    {
+        std::cerr << "Failed to build cuda engine on "
+                  << networkInfo.configFilePath << std::endl;
+        return false;
+    }
+
+    return true;
+}
+#endif
--- a/live_xv_net_edge1/DeepStream/nvdsinfer_custom_impl_Yolo/nvdsparsebbox_Yolo.cpp
+++ b/live_xv_net_edge1/DeepStream/nvdsinfer_custom_impl_Yolo/nvdsparsebbox_Yolo.cpp
--- a/live_xv_net_edge1/DeepStream/nvdsinfer_custom_impl_Yolo/trt_utils.cpp
+++ b/live_xv_net_edge1/DeepStream/nvdsinfer_custom_impl_Yolo/trt_utils.cpp
--- a/live_xv_net_edge1/DeepStream/nvdsinfer_custom_impl_Yolo/trt_utils.h
+++ b/live_xv_net_edge1/DeepStream/nvdsinfer_custom_impl_Yolo/trt_utils.h
--- a/live_xv_net_edge1/DeepStream/nvdsinfer_custom_impl_Yolo/yolo.cpp
+++ b/live_xv_net_edge1/DeepStream/nvdsinfer_custom_impl_Yolo/yolo.cpp
--- a/live_xv_net_edge1/DeepStream/nvdsinfer_custom_impl_Yolo/yolo.h
+++ b/live_xv_net_edge1/DeepStream/nvdsinfer_custom_impl_Yolo/yolo.h
--- a/live_xv_net_edge1/DeepStream/nvdsinfer_custom_impl_Yolo/yoloPlugins.cpp
+++ b/live_xv_net_edge1/DeepStream/nvdsinfer_custom_impl_Yolo/yoloPlugins.cpp
--- a/live_xv_net_edge1/DeepStream/nvdsinfer_custom_impl_Yolo/yoloPlugins.h
+++ b/live_xv_net_edge1/DeepStream/nvdsinfer_custom_impl_Yolo/yoloPlugins.h
--- a/live_xv_net_edge1/License.txt
+++ b/live_xv_net_edge1/License.txt
--- a/live_xv_net_edge1/README.md
+++ b/live_xv_net_edge1/README.md
--- a/live_xv_net_edge1/Use_yolov4_to_train_your_own_data.md
+++ b/live_xv_net_edge1/Use_yolov4_to_train_your_own_data.md
--- a/live_xv_net_edge1/cfg.py
+++ b/live_xv_net_edge1/cfg.py
--- a/live_xv_net_edge1/cfg/yolov3-tiny.cfg
+++ b/live_xv_net_edge1/cfg/yolov3-tiny.cfg
--- a/live_xv_net_edge1/cfg/yolov3.cfg
+++ b/live_xv_net_edge1/cfg/yolov3.cfg
--- a/live_xv_net_edge1/cfg/yolov4-custom.cfg
+++ b/live_xv_net_edge1/cfg/yolov4-custom.cfg
--- a/live_xv_net_edge1/cfg/yolov4-tiny.cfg
+++ b/live_xv_net_edge1/cfg/yolov4-tiny.cfg
--- a/live_xv_net_edge1/cfg/yolov4.cfg
+++ b/live_xv_net_edge1/cfg/yolov4.cfg
--- a/live_xv_net_edge1/data/coco.names
+++ b/live_xv_net_edge1/data/coco.names
--- a/live_xv_net_edge1/data/voc.names
+++ b/live_xv_net_edge1/data/voc.names
+aeroplane
+bicycle
+bird
+boat
+bottle
+bus
+car
+cat
+chair
+cow
+diningtable
+dog
+horse
+motorbike
+person
+pottedplant
+sheep
+sofa
+train
+tvmonitor
--- a/live_xv_net_edge1/dataset.py
+++ b/live_xv_net_edge1/dataset.py
--- a/live_xv_net_edge1/demo.py
+++ b/live_xv_net_edge1/demo.py
--- a/live_xv_net_edge1/demo_darknet2onnx.py
+++ b/live_xv_net_edge1/demo_darknet2onnx.py
--- a/live_xv_net_edge1/demo_pytorch2onnx.py
+++ b/live_xv_net_edge1/demo_pytorch2onnx.py
--- a/live_xv_net_edge1/demo_tensorflow.py
+++ b/live_xv_net_edge1/demo_tensorflow.py
--- a/live_xv_net_edge1/demo_trt.py
+++ b/live_xv_net_edge1/demo_trt.py
--- a/live_xv_net_edge1/evaluate_on_coco.py
+++ b/live_xv_net_edge1/evaluate_on_coco.py
--- a/live_xv_net_edge1/models.py
+++ b/live_xv_net_edge1/models.py
--- a/live_xv_net_edge1/requirements.txt
+++ b/live_xv_net_edge1/requirements.txt
+numpy==1.18.2
+torch==1.4.0
+tensorboardX==2.0
+scikit_image==0.16.2
+matplotlib==2.2.3
+tqdm==4.43.0
+easydict==1.9
+Pillow==7.1.2
+skimage
+opencv_python
+pycocotools
\ No newline at end of file
--- a/live_xv_net_edge1/test.py
+++ b/live_xv_net_edge1/test.py
--- a/live_xv_net_edge1/tool/__init__.py
+++ b/live_xv_net_edge1/tool/__init__.py
--- a/live_xv_net_edge1/tool/camera.py
+++ b/live_xv_net_edge1/tool/camera.py
--- a/live_xv_net_edge1/tool/coco_annotation.py
+++ b/live_xv_net_edge1/tool/coco_annotation.py
--- a/live_xv_net_edge1/tool/config.py
+++ b/live_xv_net_edge1/tool/config.py
--- a/live_xv_net_edge1/tool/darknet2onnx.py
+++ b/live_xv_net_edge1/tool/darknet2onnx.py
--- a/live_xv_net_edge1/tool/darknet2pytorch.py
+++ b/live_xv_net_edge1/tool/darknet2pytorch.py
--- a/live_xv_net_edge1/tool/onnx2tensorflow.py
+++ b/live_xv_net_edge1/tool/onnx2tensorflow.py
--- a/live_xv_net_edge1/tool/region_loss.py
+++ b/live_xv_net_edge1/tool/region_loss.py
--- a/live_xv_net_edge1/tool/torch_utils.py
+++ b/live_xv_net_edge1/tool/torch_utils.py
--- a/live_xv_net_edge1/tool/tv_reference/README.md
+++ b/live_xv_net_edge1/tool/tv_reference/README.md
--- a/live_xv_net_edge1/tool/tv_reference/coco_eval.py
+++ b/live_xv_net_edge1/tool/tv_reference/coco_eval.py
--- a/live_xv_net_edge1/tool/tv_reference/coco_utils.py
+++ b/live_xv_net_edge1/tool/tv_reference/coco_utils.py
--- a/live_xv_net_edge1/tool/tv_reference/engine.py
+++ b/live_xv_net_edge1/tool/tv_reference/engine.py
--- a/live_xv_net_edge1/tool/tv_reference/group_by_aspect_ratio.py
+++ b/live_xv_net_edge1/tool/tv_reference/group_by_aspect_ratio.py
--- a/live_xv_net_edge1/tool/tv_reference/train.py
+++ b/live_xv_net_edge1/tool/tv_reference/train.py
--- a/live_xv_net_edge1/tool/tv_reference/transforms.py
+++ b/live_xv_net_edge1/tool/tv_reference/transforms.py
--- a/live_xv_net_edge1/tool/tv_reference/utils.py
+++ b/live_xv_net_edge1/tool/tv_reference/utils.py
--- a/live_xv_net_edge1/tool/utils.py
+++ b/live_xv_net_edge1/tool/utils.py
--- a/live_xv_net_edge1/tool/utils_iou.py
+++ b/live_xv_net_edge1/tool/utils_iou.py
--- a/live_xv_net_edge1/tool/utils_iou_test.py
+++ b/live_xv_net_edge1/tool/utils_iou_test.py
--- a/live_xv_net_edge1/tool/yolo_layer.py
+++ b/live_xv_net_edge1/tool/yolo_layer.py
--- a/live_xv_net_edge1/train.py
+++ b/live_xv_net_edge1/train.py
--- a/live_xv_net_edge1/trans_edge1.py
+++ b/live_xv_net_edge1/trans_edge1.py
--- a/live_xv_net_edge2/.gitignore
+++ b/live_xv_net_edge2/.gitignore
--- a/live_xv_net_edge2/DeepStream/Readme.md
+++ b/live_xv_net_edge2/DeepStream/Readme.md
--- a/live_xv_net_edge2/DeepStream/config_infer_primary_yoloV4.txt
+++ b/live_xv_net_edge2/DeepStream/config_infer_primary_yoloV4.txt
--- a/live_xv_net_edge2/DeepStream/deepstream_app_config_yoloV4.txt
+++ b/live_xv_net_edge2/DeepStream/deepstream_app_config_yoloV4.txt
--- a/live_xv_net_edge2/DeepStream/labels.txt
+++ b/live_xv_net_edge2/DeepStream/labels.txt
--- a/live_xv_net_edge2/DeepStream/nvdsinfer_custom_impl_Yolo/Makefile
+++ b/live_xv_net_edge2/DeepStream/nvdsinfer_custom_impl_Yolo/Makefile
--- a/live_xv_net_edge2/DeepStream/nvdsinfer_custom_impl_Yolo/Readme.md
+++ b/live_xv_net_edge2/DeepStream/nvdsinfer_custom_impl_Yolo/Readme.md
--- a/live_xv_net_edge2/DeepStream/nvdsinfer_custom_impl_Yolo/kernels.cu
+++ b/live_xv_net_edge2/DeepStream/nvdsinfer_custom_impl_Yolo/kernels.cu
--- a/live_xv_net_edge2/DeepStream/nvdsinfer_custom_impl_Yolo/nvdsinfer_yolo_engine.cpp
+++ b/live_xv_net_edge2/DeepStream/nvdsinfer_custom_impl_Yolo/nvdsinfer_yolo_engine.cpp
--- a/live_xv_net_edge2/DeepStream/nvdsinfer_custom_impl_Yolo/nvdsparsebbox_Yolo.cpp
+++ b/live_xv_net_edge2/DeepStream/nvdsinfer_custom_impl_Yolo/nvdsparsebbox_Yolo.cpp
--- a/live_xv_net_edge2/DeepStream/nvdsinfer_custom_impl_Yolo/trt_utils.cpp
+++ b/live_xv_net_edge2/DeepStream/nvdsinfer_custom_impl_Yolo/trt_utils.cpp
--- a/live_xv_net_edge2/DeepStream/nvdsinfer_custom_impl_Yolo/trt_utils.h
+++ b/live_xv_net_edge2/DeepStream/nvdsinfer_custom_impl_Yolo/trt_utils.h
--- a/live_xv_net_edge2/DeepStream/nvdsinfer_custom_impl_Yolo/yolo.cpp
+++ b/live_xv_net_edge2/DeepStream/nvdsinfer_custom_impl_Yolo/yolo.cpp
--- a/live_xv_net_edge2/DeepStream/nvdsinfer_custom_impl_Yolo/yolo.h
+++ b/live_xv_net_edge2/DeepStream/nvdsinfer_custom_impl_Yolo/yolo.h
--- a/live_xv_net_edge2/DeepStream/nvdsinfer_custom_impl_Yolo/yoloPlugins.cpp
+++ b/live_xv_net_edge2/DeepStream/nvdsinfer_custom_impl_Yolo/yoloPlugins.cpp
--- a/live_xv_net_edge2/DeepStream/nvdsinfer_custom_impl_Yolo/yoloPlugins.h
+++ b/live_xv_net_edge2/DeepStream/nvdsinfer_custom_impl_Yolo/yoloPlugins.h
--- a/live_xv_net_edge2/License.txt
+++ b/live_xv_net_edge2/License.txt
--- a/live_xv_net_edge2/README.md
+++ b/live_xv_net_edge2/README.md
--- a/live_xv_net_edge2/Use_yolov4_to_train_your_own_data.md
+++ b/live_xv_net_edge2/Use_yolov4_to_train_your_own_data.md
--- a/live_xv_net_edge2/cfg.py
+++ b/live_xv_net_edge2/cfg.py
--- a/live_xv_net_edge2/cfg/yolov3-tiny.cfg
+++ b/live_xv_net_edge2/cfg/yolov3-tiny.cfg
--- a/live_xv_net_edge2/cfg/yolov3.cfg
+++ b/live_xv_net_edge2/cfg/yolov3.cfg
--- a/live_xv_net_edge2/cfg/yolov4-custom.cfg
+++ b/live_xv_net_edge2/cfg/yolov4-custom.cfg
--- a/live_xv_net_edge2/cfg/yolov4-tiny.cfg
+++ b/live_xv_net_edge2/cfg/yolov4-tiny.cfg
--- a/live_xv_net_edge2/cfg/yolov4.cfg
+++ b/live_xv_net_edge2/cfg/yolov4.cfg
--- a/live_xv_net_edge2/data/coco.names
+++ b/live_xv_net_edge2/data/coco.names
--- a/live_xv_net_edge2/data/voc.names
+++ b/live_xv_net_edge2/data/voc.names
--- a/live_xv_net_edge2/dataset.py
+++ b/live_xv_net_edge2/dataset.py
--- a/live_xv_net_edge2/demo.py
+++ b/live_xv_net_edge2/demo.py
--- a/live_xv_net_edge2/demo_darknet2onnx.py
+++ b/live_xv_net_edge2/demo_darknet2onnx.py
--- a/live_xv_net_edge2/demo_pytorch2onnx.py
+++ b/live_xv_net_edge2/demo_pytorch2onnx.py
--- a/live_xv_net_edge2/demo_tensorflow.py
+++ b/live_xv_net_edge2/demo_tensorflow.py
--- a/live_xv_net_edge2/demo_trt.py
+++ b/live_xv_net_edge2/demo_trt.py
--- a/live_xv_net_edge2/evaluate_on_coco.py
+++ b/live_xv_net_edge2/evaluate_on_coco.py
--- a/live_xv_net_edge2/models.py
+++ b/live_xv_net_edge2/models.py
--- a/live_xv_net_edge2/requirements.txt
+++ b/live_xv_net_edge2/requirements.txt
--- a/live_xv_net_edge2/tool/__init__.py
+++ b/live_xv_net_edge2/tool/__init__.py
--- a/live_xv_net_edge2/tool/camera.py
+++ b/live_xv_net_edge2/tool/camera.py
--- a/live_xv_net_edge2/tool/coco_annotation.py
+++ b/live_xv_net_edge2/tool/coco_annotation.py
--- a/live_xv_net_edge2/tool/config.py
+++ b/live_xv_net_edge2/tool/config.py
--- a/live_xv_net_edge2/tool/darknet2onnx.py
+++ b/live_xv_net_edge2/tool/darknet2onnx.py
--- a/live_xv_net_edge2/tool/darknet2pytorch.py
+++ b/live_xv_net_edge2/tool/darknet2pytorch.py
--- a/live_xv_net_edge2/tool/onnx2tensorflow.py
+++ b/live_xv_net_edge2/tool/onnx2tensorflow.py
--- a/live_xv_net_edge2/tool/region_loss.py
+++ b/live_xv_net_edge2/tool/region_loss.py
--- a/live_xv_net_edge2/tool/torch_utils.py
+++ b/live_xv_net_edge2/tool/torch_utils.py
--- a/live_xv_net_edge2/tool/tv_reference/README.md
+++ b/live_xv_net_edge2/tool/tv_reference/README.md
--- a/live_xv_net_edge2/tool/tv_reference/coco_eval.py
+++ b/live_xv_net_edge2/tool/tv_reference/coco_eval.py
--- a/live_xv_net_edge2/tool/tv_reference/coco_utils.py
+++ b/live_xv_net_edge2/tool/tv_reference/coco_utils.py
--- a/live_xv_net_edge2/tool/tv_reference/engine.py
+++ b/live_xv_net_edge2/tool/tv_reference/engine.py
--- a/live_xv_net_edge2/tool/tv_reference/group_by_aspect_ratio.py
+++ b/live_xv_net_edge2/tool/tv_reference/group_by_aspect_ratio.py
--- a/live_xv_net_edge2/tool/tv_reference/train.py
+++ b/live_xv_net_edge2/tool/tv_reference/train.py
--- a/live_xv_net_edge2/tool/tv_reference/transforms.py
+++ b/live_xv_net_edge2/tool/tv_reference/transforms.py
--- a/live_xv_net_edge2/tool/tv_reference/utils.py
+++ b/live_xv_net_edge2/tool/tv_reference/utils.py
--- a/live_xv_net_edge2/tool/utils.py
+++ b/live_xv_net_edge2/tool/utils.py
--- a/live_xv_net_edge2/tool/utils_iou.py
+++ b/live_xv_net_edge2/tool/utils_iou.py
--- a/live_xv_net_edge2/tool/utils_iou_test.py
+++ b/live_xv_net_edge2/tool/utils_iou_test.py
--- a/live_xv_net_edge2/tool/yolo_layer.py
+++ b/live_xv_net_edge2/tool/yolo_layer.py
--- a/live_xv_net_edge2/train.py
+++ b/live_xv_net_edge2/train.py
--- a/live_xv_net_edge2/trans_edge2.py
+++ b/live_xv_net_edge2/trans_edge2.py