Commit e583b27c authored by treasearcher's avatar treasearcher

Initial commit

parents
<component name="InspectionProjectProfileManager">
<profile version="1.0">
<option name="myName" value="Project Default" />
<inspection_tool class="PyPackageRequirementsInspection" enabled="true" level="WARNING" enabled_by_default="true">
<option name="ignoredPackages">
<value>
<list size="15">
<item index="0" class="java.lang.String" itemvalue="tqdm" />
<item index="1" class="java.lang.String" itemvalue="easydict" />
<item index="2" class="java.lang.String" itemvalue="scikit_image" />
<item index="3" class="java.lang.String" itemvalue="matplotlib" />
<item index="4" class="java.lang.String" itemvalue="tensorboardX" />
<item index="5" class="java.lang.String" itemvalue="torch" />
<item index="6" class="java.lang.String" itemvalue="numpy" />
<item index="7" class="java.lang.String" itemvalue="pycocotools" />
<item index="8" class="java.lang.String" itemvalue="skimage" />
<item index="9" class="java.lang.String" itemvalue="Pillow" />
<item index="10" class="java.lang.String" itemvalue="opencv_python" />
<item index="11" class="java.lang.String" itemvalue="opencv-python" />
<item index="12" class="java.lang.String" itemvalue="h5py" />
<item index="13" class="java.lang.String" itemvalue="pycrayon" />
<item index="14" class="java.lang.String" itemvalue="Cython" />
</list>
</value>
</option>
</inspection_tool>
</profile>
</component>
\ No newline at end of file
<component name="InspectionProjectProfileManager">
<settings>
<option name="USE_PROJECT_PROFILE" value="false" />
<version value="1.0" />
</settings>
</component>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/live_xv_cam_car_graph.iml" filepath="$PROJECT_DIR$/.idea/live_xv_cam_car_graph.iml" />
</modules>
</component>
</project>
\ No newline at end of file
import cv2
import json
from socket import *
from tool.utils import load_class_names, plot_boxes_cv2
import queue, _thread, threading, time
import numpy as np
import subprocess
ip_add = '192.168.1.110'
# ip_add = '127.0.0.1'
command = ['ffmpeg',
'-re',
'-i', '-',
'-r', '15',
'-c:v', 'libx264',
# '-preset','superfast',
# '-maxrate', '3000k',
# '-bufsize', '6000k',
# '-pix_fmt', 'yuv420p',
# '-g', '50',
# '-s', '640x360',
# '-c:a', 'aac',
# '-b:a', '160k',
# '-ac', '2',
# '-ar', '44100',
# '-b:a', '160k',
'-f', 'flv',
'rtmp://localhost/live/AI']
process = subprocess.Popen(command, shell=False, stdin=subprocess.PIPE)
# print("subprocess established!")
# ip_add = ''
def send_from(arr, dest):
view = memoryview(arr).cast('B')
while len(view):
nsent = dest.send(view)
view = view[nsent:]
def recv_into(arr, source):
view = memoryview(arr).cast('B')
while len(view):
nrecv = source.recv_into(view)
view = view[nrecv:]
s = socket(AF_INET, SOCK_STREAM)
s.bind(('', 25000))
s.listen(1)
c, a = s.accept()
print(1)
c_2 = socket(AF_INET, SOCK_STREAM)
c_2.connect((ip_add, 25002))
print(2)
qsize = 10
boxQue = queue.Queue(qsize)
img_sent = queue.Queue(qsize * 10)
lock = threading.Lock()
# time.sleep(10)
fps = 0
fps_dis = 0
def recv_box():
lth = np.zeros(shape=(1,), dtype=np.int64)
while 1:
if boxQue.full():
# print('box is full')
time.sleep(0.1)
else:
recv_into(lth, c_2)
if lth[0] == 0:
lock.acquire()
boxQue.put([0])
lock.release()
continue
arr = np.zeros(shape=(1, lth[0], 7), dtype=np.float32)
recv_into(arr, c_2)
box = arr.tolist()
for i in range(lth[0]):
box[0][i][-1] = np.int64(box[0][i][-1])
lock.acquire()
boxQue.put(box)
lock.release()
# sum_flag = np.zeros(shape=(1,), dtype=np.int32)
# def recv_flag():
# global sum_flag
# recv_into(sum_flag, c)
# print('done')
# def cam_send():
# cap = cv2.VideoCapture(0)
# flag = cap.isOpened()
# print(flag)
# # _thread.start_new_thread(recv_flag, ())
# cnt_arr = np.zeros(shape=(1,), dtype=np.int32)
# while 1:
# _, img = cap.read()
# send_from(img, c)
# send_from(np.array([np.sum(img)]), c)
# recv_into(cnt_arr, c)
# if cnt_arr[0]>=5:
# break
# while 1:
# while img_sent.full():
# # print('sent is full')
# time.sleep(0.1)
# _, img = cap.read()
# # print(img)
# send_from(img, c)
# lock.acquire()
# img_sent.put(img)
# lock.release()
# # print(np.sum(img))
# if cv2.waitKey(1) & 0xFF == ord('q'):
# break
# cap.release()
cap = cv2.VideoCapture('rtmp://localhost/live/VR')
flag = cap.isOpened()
print("cap is opened: ", flag)
# while(True):
# # 获取一帧
# ret, frame = cap.read()
# cv2.imshow('frame', frame)
# if cv2.waitKey(1) == ord('q'):
# break
img_read = ''
lock_img = threading.Lock()
def cam_read():
global cap, img_read
print("no connection in the stream, reconnecting")
time.sleep(1)
cnt_arr = np.zeros(shape=(1,), dtype=np.int32)
while True:
if not cap.isOpened():
# _, frame = None
print('error while subprocess not running')
cam_send()
break
while True:
_, img = cap.read()
img = cv2.resize(img, (640, 480))
lock_img.acquire()
img_read = img
lock_img.release()
def cam_send():
# while True:
# _, img = cap.read()
# # print(type(img), img.shape[:])
# # cv2.imshow("img", img)
# # time.sleep(0.1)
# send_from(img, c)
# send_from(np.array([np.sum(img)]), c)
# recv_into(cnt_arr, c)
# print("cnt_arr<5,loop!")
# if cnt_arr[0] >= 5:
# break
global img_read
while True:
while img_sent.full() or img_read == '':
# print('sent is full')
time.sleep(0.1)
# _, img = cap.read()
lock_img.acquire()
img = img_read.copy()
img_read = ''
lock_img.release()
# img = cv2.resize(img, (640, 480))
# cv2.imshow("img", img) #########
send_from(img, c)
lock.acquire()
img_sent.put(img)
lock.release()
if cv2.waitKey(1) == 27:
break
else:
print('key is wrong')
# boxes, confs, clss = trt_yolo.detect(frame, 0.3)
# frame = vis.draw_bboxes(frame, boxes, confs, clss)
# cv2.putText(frame, "FPS: " + str(round(fps, 2)), (10, 50), font, 3, (255, 50, 0), 3)
# toc = time.time()
# curr_fps = 1.0 / (toc - tic)
# fps = curr_fps if fps == 0.0 else (fps * 0.95 + curr_fps * 0.05)
# tic = toc
# computing_time = (1 / fps) * 1000
# cv2.putText(frame, str(round(computing_time, 2)) + " ms", (10, 90), font, 3, (255, 50, 0), 3)
# ret_toRTSP, frame_toRTSP = cv2.imencode('.png', frame)
# process.stdin.write(frame_toRTSP.tobytes())
# # process.stdin.write(frame.tostring())
# # cv2.imshow("cap",frame)
def fps_update():
global fps, fps_dis
while 1:
time.sleep(10)
print(fps)
fps_dis = fps / 10
fps = 0
_thread.start_new_thread(recv_box, ())
_thread.start_new_thread(cam_read, ())
_thread.start_new_thread(cam_send, ())
_thread.start_new_thread(fps_update, ())
# def get_box():
# while boxQue.empty():
# time.sleep(0.1)
# lock.acquire()
# box = boxQue.get()
# lock.release()
# return box
namesfile = 'data/coco.names'
class_names = load_class_names(namesfile)
while (1):
# get a frame
while img_sent.empty() or boxQue.empty():
# print('sent or box are empty')
time.sleep(0.1)
# print("empty end")
lock.acquire()
img = img_sent.get()
boxes = boxQue.get()
lock.release()
# print(np.sum(img))
# start = time.time()
if boxes[0] == 0:
# print("img boxes is empty!")
pass
else:
img = plot_boxes_cv2(img, boxes[0], 'predictions.jpg', class_names)
img = cv2.putText(img, 'FPS: {}'.format(fps_dis), (100, 100), cv2.FONT_HERSHEY_SIMPLEX, 2, (0, 0, 0), 2)
print(img)
cv2.imshow('fps:', img)
fps += 1
if cv2.waitKey(1) & 0xFF == ord('q'):
break
##########################################################################################################
# ret_toRTSP, frame_toRTSP = cv2.imencode('.png', img)
# print(frame_toRTSP)
# process.stdin.write(frame_toRTSP.tobytes())
##########################################################################################################
# end = time.time()
# print('time: ', end - start)
# cv2.imshow('fps:', img)
# fps += 1
# if cv2.waitKey(1) & 0xFF == ord('q'):
# break
cv2.destroyAllWindows()
c.close()
s.close()
import cv2
cap = cv2.VideoCapture(0)
flag = cap.isOpened()
print(flag)
while (1):
# get a frame
_, img = cap.read()
cv2.imshow('', img)
cv2.waitKey(1)
person
bicycle
car
motorbike
aeroplane
bus
train
truck
boat
traffic light
fire hydrant
stop sign
parking meter
bench
bird
cat
dog
horse
sheep
cow
elephant
bear
zebra
giraffe
backpack
umbrella
handbag
tie
suitcase
frisbee
skis
snowboard
sports ball
kite
baseball bat
baseball glove
skateboard
surfboard
tennis racket
bottle
wine glass
cup
fork
knife
spoon
bowl
banana
apple
sandwich
orange
broccoli
carrot
hot dog
pizza
donut
cake
chair
sofa
pottedplant
bed
diningtable
toilet
tvmonitor
laptop
mouse
remote
keyboard
cell phone
microwave
oven
toaster
sink
refrigerator
book
clock
vase
scissors
teddy bear
hair drier
toothbrush
aeroplane
bicycle
bird
boat
bottle
bus
car
cat
chair
cow
diningtable
dog
horse
motorbike
person
pottedplant
sheep
sofa
train
tvmonitor
import cv2
cap = cv2.VideoCapture(0)
while cap.isOpened():
_, img = cap.read()
cv2.imshow('', img)
cv2.waitKey(1)
# -*- coding: utf-8 -*-
'''
@Time : 2020/04/26 15:48
@Author : Tianxiaomo
@File : camera.py
@Noice :
@Modificattion :
@Author :
@Time :
@Detail :
'''
from __future__ import division
import cv2
from tool.darknet2pytorch import Darknet
import argparse
from tool.utils import *
from tool.torch_utils import *
def arg_parse():
"""
Parse arguements to the detect module
"""
parser = argparse.ArgumentParser(description='YOLO v3 Cam Demo')
parser.add_argument("--confidence", dest="confidence", help="Object Confidence to filter predictions", default=0.25)
parser.add_argument("--nms_thresh", dest="nms_thresh", help="NMS Threshhold", default=0.4)
parser.add_argument("--reso", dest='reso', help=
"Input resolution of the network. Increase to increase accuracy. Decrease to increase speed",
default="160", type=str)
return parser.parse_args()
if __name__ == '__main__':
cfgfile = "cfg/yolov4.cfg"
weightsfile = "weight/yolov4.weights"
args = arg_parse()
confidence = float(args.confidence)
nms_thesh = float(args.nms_thresh)
CUDA = torch.cuda.is_available()
num_classes = 80
bbox_attrs = 5 + num_classes
class_names = load_class_names("data/coco.names")
model = Darknet(cfgfile)
model.load_weights(weightsfile)
if CUDA:
model.cuda()
model.eval()
cap = cv2.VideoCapture(0)
assert cap.isOpened(), 'Cannot capture source'
frames = 0
start = time.time()
while cap.isOpened():
ret, frame = cap.read()
if ret:
sized = cv2.resize(frame, (model.width, model.height))
sized = cv2.cvtColor(sized, cv2.COLOR_BGR2RGB)
boxes = do_detect(model, sized, 0.5, 0.4, CUDA)
orig_im = plot_boxes_cv2(frame, boxes, class_names=class_names)
cv2.imshow("frame", orig_im)
key = cv2.waitKey(1)
if key & 0xFF == ord('q'):
break
frames += 1
print("FPS of the video is {:5.2f}".format(frames / (time.time() - start)))
else:
break
# -*- coding: utf-8 -*-
'''
@Time : 2020/05/08 11:45
@Author : Tianxiaomo
@File : coco_annotatin.py
@Noice :
@Modificattion :
@Author :
@Time :
@Detail :
'''
import json
from collections import defaultdict
from tqdm import tqdm
import os
"""hyper parameters"""
json_file_path = 'E:/Dataset/mscoco2017/annotations/instances_train2017.json'
images_dir_path = 'mscoco2017/train2017/'
output_path = '../data/val.txt'
"""load json file"""
name_box_id = defaultdict(list)
id_name = dict()
with open(json_file_path, encoding='utf-8') as f:
data = json.load(f)
"""generate labels"""
images = data['images']
annotations = data['annotations']
for ant in tqdm(annotations):
id = ant['image_id']
# name = os.path.join(images_dir_path, images[id]['file_name'])
name = os.path.join(images_dir_path, '{:012d}.jpg'.format(id))
cat = ant['category_id']
if cat >= 1 and cat <= 11:
cat = cat - 1
elif cat >= 13 and cat <= 25:
cat = cat - 2
elif cat >= 27 and cat <= 28:
cat = cat - 3
elif cat >= 31 and cat <= 44:
cat = cat - 5
elif cat >= 46 and cat <= 65:
cat = cat - 6
elif cat == 67:
cat = cat - 7
elif cat == 70:
cat = cat - 9
elif cat >= 72 and cat <= 82:
cat = cat - 10
elif cat >= 84 and cat <= 90:
cat = cat - 11
name_box_id[name].append([ant['bbox'], cat])
"""write to txt"""
with open(output_path, 'w') as f:
for key in tqdm(name_box_id.keys()):
f.write(key)
box_infos = name_box_id[key]
for info in box_infos:
x_min = int(info[0][0])
y_min = int(info[0][1])
x_max = x_min + int(info[0][2])
y_max = y_min + int(info[0][3])
box_info = " %d,%d,%d,%d,%d" % (
x_min, y_min, x_max, y_max, int(info[1]))
f.write(box_info)
f.write('\n')
This diff is collapsed.
import sys
import torch
from tool.darknet2pytorch import Darknet
def transform_to_onnx(cfgfile, weightfile, batch_size=1):
model = Darknet(cfgfile)
model.print_network()
model.load_weights(weightfile)
print('Loading weights from %s... Done!' % (weightfile))
dynamic = False
if batch_size <= 0:
dynamic = True
input_names = ["input"]
output_names = ['boxes', 'confs']
if dynamic:
x = torch.randn((1, 3, model.height, model.width), requires_grad=True)
onnx_file_name = "yolov4_-1_3_{}_{}_dynamic.onnx".format(model.height, model.width)
dynamic_axes = {"input": {0: "batch_size"}, "boxes": {0: "batch_size"}, "confs": {0: "batch_size"}}
# Export the model
print('Export the onnx model ...')
torch.onnx.export(model,
x,
onnx_file_name,
export_params=True,
opset_version=11,
do_constant_folding=True,
input_names=input_names, output_names=output_names,
dynamic_axes=dynamic_axes)
print('Onnx model exporting done')
return onnx_file_name
else:
x = torch.randn((batch_size, 3, model.height, model.width), requires_grad=True)
onnx_file_name = "yolov4_{}_3_{}_{}_static.onnx".format(batch_size, model.height, model.width)
torch.onnx.export(model,
x,
onnx_file_name,
export_params=True,
opset_version=11,
do_constant_folding=True,
input_names=input_names, output_names=output_names,
dynamic_axes=None)
print('Onnx model exporting done')
return onnx_file_name
if __name__ == '__main__':
if len(sys.argv) == 3:
cfgfile = sys.argv[1]
weightfile = sys.argv[2]
transform_to_onnx(cfgfile, weightfile)
elif len(sys.argv) == 4:
cfgfile = sys.argv[1]
weightfile = sys.argv[2]
batch_size = int(sys.argv[3])
transform_to_onnx(cfgfile, weightfile, batch_size)
elif len(sys.argv) == 5:
cfgfile = sys.argv[1]
weightfile = sys.argv[2]
batch_size = int(sys.argv[3])
dynamic = True if sys.argv[4] == 'True' else False
transform_to_onnx(cfgfile, weightfile, batch_size, dynamic)
else:
print('Please execute this script this way:\n')
print(' python darknet2onnx.py <cfgFile> <weightFile>')
print('or')
print(' python darknet2onnx.py <cfgFile> <weightFile> <batchSize>')
This diff is collapsed.
import sys
import onnx
from onnx_tf.backend import prepare
# tensorflow >=2.0
# 1: Thanks:github:https://github.com/onnx/onnx-tensorflow
# 2: Run git clone https://github.com/onnx/onnx-tensorflow.git && cd onnx-tensorflow
# Run pip install -e .
# Note:
# Errors will occur when using "pip install onnx-tf", at least for me,
# it is recommended to use source code installation
def transform_to_tensorflow(onnx_input_path, pb_output_path):
onnx_model = onnx.load(onnx_input_path) # load onnx model
tf_exp = prepare(onnx_model) # prepare tf representation
tf_exp.export_graph(pb_output_path) # export the model
if __name__ == '__main__':
if len(sys.argv) == 1:
sys.argv.append('../weight/yolov4_1_3_608_608.onnx') # use:darknet2onnx.py
sys.argv.append('../weight/yolov4.pb') # use:onnx2tensorflow.py
if len(sys.argv) == 3:
onnxfile = sys.argv[1]
tfpb_outfile = sys.argv[2]
transform_to_tensorflow(onnxfile, tfpb_outfile)
else:
print('Please execute this script this way:\n')
print(' python onnx2tensorflow.py <onnxfile> <tfpboutfile>')
import torch.nn as nn
import torch.nn.functional as F
from tool.torch_utils import *
def build_targets(pred_boxes, target, anchors, num_anchors, num_classes, nH, nW, noobject_scale, object_scale,
sil_thresh, seen):
nB = target.size(0)
nA = num_anchors
nC = num_classes
anchor_step = len(anchors) / num_anchors
conf_mask = torch.ones(nB, nA, nH, nW) * noobject_scale
coord_mask = torch.zeros(nB, nA, nH, nW)
cls_mask = torch.zeros(nB, nA, nH, nW)
tx = torch.zeros(nB, nA, nH, nW)
ty = torch.zeros(nB, nA, nH, nW)
tw = torch.zeros(nB, nA, nH, nW)
th = torch.zeros(nB, nA, nH, nW)
tconf = torch.zeros(nB, nA, nH, nW)
tcls = torch.zeros(nB, nA, nH, nW)
nAnchors = nA * nH * nW
nPixels = nH * nW
for b in range(nB):
cur_pred_boxes = pred_boxes[b * nAnchors:(b + 1) * nAnchors].t()
cur_ious = torch.zeros(nAnchors)
for t in range(50):
if target[b][t * 5 + 1] == 0:
break
gx = target[b][t * 5 + 1] * nW
gy = target[b][t * 5 + 2] * nH
gw = target[b][t * 5 + 3] * nW
gh = target[b][t * 5 + 4] * nH
cur_gt_boxes = torch.FloatTensor([gx, gy, gw, gh]).repeat(nAnchors, 1).t()
cur_ious = torch.max(cur_ious, bbox_ious(cur_pred_boxes, cur_gt_boxes, x1y1x2y2=False))
conf_mask[b][cur_ious > sil_thresh] = 0
if seen < 12800:
if anchor_step == 4:
tx = torch.FloatTensor(anchors).view(nA, anchor_step).index_select(1, torch.LongTensor([2])).view(1, nA, 1,
1).repeat(
nB, 1, nH, nW)
ty = torch.FloatTensor(anchors).view(num_anchors, anchor_step).index_select(1, torch.LongTensor([2])).view(
1, nA, 1, 1).repeat(nB, 1, nH, nW)
else:
tx.fill_(0.5)
ty.fill_(0.5)
tw.zero_()
th.zero_()
coord_mask.fill_(1)
nGT = 0
nCorrect = 0
for b in range(nB):
for t in range(50):
if target[b][t * 5 + 1] == 0:
break
nGT = nGT + 1
best_iou = 0.0
best_n = -1
min_dist = 10000
gx = target[b][t * 5 + 1] * nW
gy = target[b][t * 5 + 2] * nH
gi = int(gx)
gj = int(gy)
gw = target[b][t * 5 + 3] * nW
gh = target[b][t * 5 + 4] * nH
gt_box = [0, 0, gw, gh]
for n in range(nA):
aw = anchors[anchor_step * n]
ah = anchors[anchor_step * n + 1]
anchor_box = [0, 0, aw, ah]
iou = bbox_iou(anchor_box, gt_box, x1y1x2y2=False)
if anchor_step == 4:
ax = anchors[anchor_step * n + 2]
ay = anchors[anchor_step * n + 3]
dist = pow(((gi + ax) - gx), 2) + pow(((gj + ay) - gy), 2)
if iou > best_iou:
best_iou = iou
best_n = n
elif anchor_step == 4 and iou == best_iou and dist < min_dist:
best_iou = iou
best_n = n
min_dist = dist
gt_box = [gx, gy, gw, gh]
pred_box = pred_boxes[b * nAnchors + best_n * nPixels + gj * nW + gi]
coord_mask[b][best_n][gj][gi] = 1
cls_mask[b][best_n][gj][gi] = 1
conf_mask[b][best_n][gj][gi] = object_scale
tx[b][best_n][gj][gi] = target[b][t * 5 + 1] * nW - gi
ty[b][best_n][gj][gi] = target[b][t * 5 + 2] * nH - gj
tw[b][best_n][gj][gi] = math.log(gw / anchors[anchor_step * best_n])
th[b][best_n][gj][gi] = math.log(gh / anchors[anchor_step * best_n + 1])
iou = bbox_iou(gt_box, pred_box, x1y1x2y2=False) # best_iou
tconf[b][best_n][gj][gi] = iou
tcls[b][best_n][gj][gi] = target[b][t * 5]
if iou > 0.5:
nCorrect = nCorrect + 1
return nGT, nCorrect, coord_mask, conf_mask, cls_mask, tx, ty, tw, th, tconf, tcls
class RegionLoss(nn.Module):
def __init__(self, num_classes=0, anchors=[], num_anchors=1):
super(RegionLoss, self).__init__()
self.num_classes = num_classes
self.anchors = anchors
self.num_anchors = num_anchors
self.anchor_step = len(anchors) / num_anchors
self.coord_scale = 1
self.noobject_scale = 1
self.object_scale = 5
self.class_scale = 1
self.thresh = 0.6
self.seen = 0
def forward(self, output, target):
# output : BxAs*(4+1+num_classes)*H*W
t0 = time.time()
nB = output.data.size(0)
nA = self.num_anchors
nC = self.num_classes
nH = output.data.size(2)
nW = output.data.size(3)
output = output.view(nB, nA, (5 + nC), nH, nW)
x = F.sigmoid(output.index_select(2, Variable(torch.cuda.LongTensor([0]))).view(nB, nA, nH, nW))
y = F.sigmoid(output.index_select(2, Variable(torch.cuda.LongTensor([1]))).view(nB, nA, nH, nW))
w = output.index_select(2, Variable(torch.cuda.LongTensor([2]))).view(nB, nA, nH, nW)
h = output.index_select(2, Variable(torch.cuda.LongTensor([3]))).view(nB, nA, nH, nW)
conf = F.sigmoid(output.index_select(2, Variable(torch.cuda.LongTensor([4]))).view(nB, nA, nH, nW))
cls = output.index_select(2, Variable(torch.linspace(5, 5 + nC - 1, nC).long().cuda()))
cls = cls.view(nB * nA, nC, nH * nW).transpose(1, 2).contiguous().view(nB * nA * nH * nW, nC)
t1 = time.time()
pred_boxes = torch.cuda.FloatTensor(4, nB * nA * nH * nW)
grid_x = torch.linspace(0, nW - 1, nW).repeat(nH, 1).repeat(nB * nA, 1, 1).view(nB * nA * nH * nW).cuda()
grid_y = torch.linspace(0, nH - 1, nH).repeat(nW, 1).t().repeat(nB * nA, 1, 1).view(nB * nA * nH * nW).cuda()
anchor_w = torch.Tensor(self.anchors).view(nA, self.anchor_step).index_select(1, torch.LongTensor([0])).cuda()
anchor_h = torch.Tensor(self.anchors).view(nA, self.anchor_step).index_select(1, torch.LongTensor([1])).cuda()
anchor_w = anchor_w.repeat(nB, 1).repeat(1, 1, nH * nW).view(nB * nA * nH * nW)
anchor_h = anchor_h.repeat(nB, 1).repeat(1, 1, nH * nW).view(nB * nA * nH * nW)
pred_boxes[0] = x.data + grid_x
pred_boxes[1] = y.data + grid_y
pred_boxes[2] = torch.exp(w.data) * anchor_w
pred_boxes[3] = torch.exp(h.data) * anchor_h
pred_boxes = convert2cpu(pred_boxes.transpose(0, 1).contiguous().view(-1, 4))
t2 = time.time()
nGT, nCorrect, coord_mask, conf_mask, cls_mask, tx, ty, tw, th, tconf, tcls = build_targets(pred_boxes,
target.data,
self.anchors, nA,
nC, \
nH, nW,
self.noobject_scale,
self.object_scale,
self.thresh,
self.seen)
cls_mask = (cls_mask == 1)
nProposals = int((conf > 0.25).sum().data[0])
tx = Variable(tx.cuda())
ty = Variable(ty.cuda())
tw = Variable(tw.cuda())
th = Variable(th.cuda())
tconf = Variable(tconf.cuda())
tcls = Variable(tcls.view(-1)[cls_mask].long().cuda())
coord_mask = Variable(coord_mask.cuda())
conf_mask = Variable(conf_mask.cuda().sqrt())
cls_mask = Variable(cls_mask.view(-1, 1).repeat(1, nC).cuda())
cls = cls[cls_mask].view(-1, nC)
t3 = time.time()
loss_x = self.coord_scale * nn.MSELoss(reduction='sum')(x * coord_mask, tx * coord_mask) / 2.0
loss_y = self.coord_scale * nn.MSELoss(reduction='sum')(y * coord_mask, ty * coord_mask) / 2.0
loss_w = self.coord_scale * nn.MSELoss(reduction='sum')(w * coord_mask, tw * coord_mask) / 2.0
loss_h = self.coord_scale * nn.MSELoss(reduction='sum')(h * coord_mask, th * coord_mask) / 2.0
loss_conf = nn.MSELoss(reduction='sum')(conf * conf_mask, tconf * conf_mask) / 2.0
loss_cls = self.class_scale * nn.CrossEntropyLoss(reduction='sum')(cls, tcls)
loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls
t4 = time.time()
if False:
print('-----------------------------------')
print(' activation : %f' % (t1 - t0))
print(' create pred_boxes : %f' % (t2 - t1))
print(' build targets : %f' % (t3 - t2))
print(' create loss : %f' % (t4 - t3))
print(' total : %f' % (t4 - t0))
print('%d: nGT %d, recall %d, proposals %d, loss: x %f, y %f, w %f, h %f, conf %f, cls %f, total %f' % (
self.seen, nGT, nCorrect, nProposals, loss_x.data[0], loss_y.data[0], loss_w.data[0], loss_h.data[0],
loss_conf.data[0], loss_cls.data[0], loss.data[0]))
return loss
import sys
import os
import time
import math
import torch
import numpy as np
from torch.autograd import Variable
import itertools
import struct # get_image_size
import imghdr # get_image_size
from tool import utils
def bbox_ious(boxes1, boxes2, x1y1x2y2=True):
if x1y1x2y2:
mx = torch.min(boxes1[0], boxes2[0])
Mx = torch.max(boxes1[2], boxes2[2])
my = torch.min(boxes1[1], boxes2[1])
My = torch.max(boxes1[3], boxes2[3])
w1 = boxes1[2] - boxes1[0]
h1 = boxes1[3] - boxes1[1]
w2 = boxes2[2] - boxes2[0]
h2 = boxes2[3] - boxes2[1]
else:
mx = torch.min(boxes1[0] - boxes1[2] / 2.0, boxes2[0] - boxes2[2] / 2.0)
Mx = torch.max(boxes1[0] + boxes1[2] / 2.0, boxes2[0] + boxes2[2] / 2.0)
my = torch.min(boxes1[1] - boxes1[3] / 2.0, boxes2[1] - boxes2[3] / 2.0)
My = torch.max(boxes1[1] + boxes1[3] / 2.0, boxes2[1] + boxes2[3] / 2.0)
w1 = boxes1[2]
h1 = boxes1[3]
w2 = boxes2[2]
h2 = boxes2[3]
uw = Mx - mx
uh = My - my
cw = w1 + w2 - uw
ch = h1 + h2 - uh
mask = ((cw <= 0) + (ch <= 0) > 0)
area1 = w1 * h1
area2 = w2 * h2
carea = cw * ch
carea[mask] = 0
uarea = area1 + area2 - carea
return carea / uarea
def get_region_boxes(boxes_and_confs):
# print('Getting boxes from boxes and confs ...')
boxes_list = []
confs_list = []
for item in boxes_and_confs:
boxes_list.append(item[0])
confs_list.append(item[1])
# boxes: [batch, num1 + num2 + num3, 1, 4]
# confs: [batch, num1 + num2 + num3, num_classes]
boxes = torch.cat(boxes_list, dim=1)
confs = torch.cat(confs_list, dim=1)
return [boxes, confs]
def convert2cpu(gpu_matrix):
return torch.FloatTensor(gpu_matrix.size()).copy_(gpu_matrix)
def convert2cpu_long(gpu_matrix):
return torch.LongTensor(gpu_matrix.size()).copy_(gpu_matrix)
def do_detect(model, img, conf_thresh, nms_thresh, use_cuda=1):
model.eval()
if type(img) == np.ndarray and len(img.shape) == 3: # cv2 image
img = torch.from_numpy(img.transpose(2, 0, 1)).float().div(255.0).unsqueeze(0)
elif type(img) == np.ndarray and len(img.shape) == 4:
img = torch.from_numpy(img.transpose(0, 3, 1, 2)).float().div(255.0)
else:
print("unknow image type")
exit(-1)
if use_cuda:
img = img.cuda()
img = torch.autograd.Variable(img)
output = model(img)
return utils.post_processing(img, conf_thresh, nms_thresh, output)
# Object detection reference training scripts
This folder contains reference training scripts for object detection.
They serve as a log of how to train specific models, to provide baseline
training and evaluation scripts to quickly bootstrap research.
To execute the example commands below you must install the following:
```
cython
pycocotools
matplotlib
```
You must modify the following flags:
`--data-path=/path/to/coco/dataset`
`--nproc_per_node=<number_of_gpus_available>`
Except otherwise noted, all models have been trained on 8x V100 GPUs.
### Faster R-CNN
```
python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py\
--dataset coco --model fasterrcnn_resnet50_fpn --epochs 26\
--lr-steps 16 22 --aspect-ratio-group-factor 3
```
### Mask R-CNN
```
python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py\
--dataset coco --model maskrcnn_resnet50_fpn --epochs 26\
--lr-steps 16 22 --aspect-ratio-group-factor 3
```
### Keypoint R-CNN
```
python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py\
--dataset coco_kp --model keypointrcnn_resnet50_fpn --epochs 46\
--lr-steps 36 43 --aspect-ratio-group-factor 3
```
This diff is collapsed.
import copy
import os
from PIL import Image
import torch
import torch.utils.data
import torchvision
from pycocotools import mask as coco_mask
from pycocotools.coco import COCO
from . import transforms as T
class FilterAndRemapCocoCategories(object):
def __init__(self, categories, remap=True):
self.categories = categories
self.remap = remap
def __call__(self, image, target):
anno = target["annotations"]
anno = [obj for obj in anno if obj["category_id"] in self.categories]
if not self.remap:
target["annotations"] = anno
return image, target
anno = copy.deepcopy(anno)
for obj in anno:
obj["category_id"] = self.categories.index(obj["category_id"])
target["annotations"] = anno
return image, target
def convert_coco_poly_to_mask(segmentations, height, width):
masks = []
for polygons in segmentations:
rles = coco_mask.frPyObjects(polygons, height, width)
mask = coco_mask.decode(rles)
if len(mask.shape) < 3:
mask = mask[..., None]
mask = torch.as_tensor(mask, dtype=torch.uint8)
mask = mask.any(dim=2)
masks.append(mask)
if masks:
masks = torch.stack(masks, dim=0)
else:
masks = torch.zeros((0, height, width), dtype=torch.uint8)
return masks
class ConvertCocoPolysToMask(object):
def __call__(self, image, target):
w, h = image.size
image_id = target["image_id"]
image_id = torch.tensor([image_id])
anno = target["annotations"]
anno = [obj for obj in anno if obj['iscrowd'] == 0]
boxes = [obj["bbox"] for obj in anno]
# guard against no boxes via resizing
boxes = torch.as_tensor(boxes, dtype=torch.float32).reshape(-1, 4)
boxes[:, 2:] += boxes[:, :2]
boxes[:, 0::2].clamp_(min=0, max=w)
boxes[:, 1::2].clamp_(min=0, max=h)
classes = [obj["category_id"] for obj in anno]
classes = torch.tensor(classes, dtype=torch.int64)
segmentations = [obj["segmentation"] for obj in anno]
masks = convert_coco_poly_to_mask(segmentations, h, w)
keypoints = None
if anno and "keypoints" in anno[0]:
keypoints = [obj["keypoints"] for obj in anno]
keypoints = torch.as_tensor(keypoints, dtype=torch.float32)
num_keypoints = keypoints.shape[0]
if num_keypoints:
keypoints = keypoints.view(num_keypoints, -1, 3)
keep = (boxes[:, 3] > boxes[:, 1]) & (boxes[:, 2] > boxes[:, 0])
boxes = boxes[keep]
classes = classes[keep]
masks = masks[keep]
if keypoints is not None:
keypoints = keypoints[keep]
target = {}
target["boxes"] = boxes
target["labels"] = classes
target["masks"] = masks
target["image_id"] = image_id
if keypoints is not None:
target["keypoints"] = keypoints
# for conversion to coco api
area = torch.tensor([obj["area"] for obj in anno])
iscrowd = torch.tensor([obj["iscrowd"] for obj in anno])
target["area"] = area
target["iscrowd"] = iscrowd
return image, target
def _coco_remove_images_without_annotations(dataset, cat_list=None):
def _has_only_empty_bbox(anno):
return all(any(o <= 1 for o in obj["bbox"][2:]) for obj in anno)
def _count_visible_keypoints(anno):
return sum(sum(1 for v in ann["keypoints"][2::3] if v > 0) for ann in anno)
min_keypoints_per_image = 10
def _has_valid_annotation(anno):
# if it's empty, there is no annotation
if len(anno) == 0:
return False
# if all boxes have close to zero area, there is no annotation
if _has_only_empty_bbox(anno):
return False
# keypoints task have a slight different critera for considering
# if an annotation is valid
if "keypoints" not in anno[0]:
return True
# for keypoint detection tasks, only consider valid images those
# containing at least min_keypoints_per_image
if _count_visible_keypoints(anno) >= min_keypoints_per_image:
return True
return False
assert isinstance(dataset, torchvision.datasets.CocoDetection)
ids = []
for ds_idx, img_id in enumerate(dataset.ids):
ann_ids = dataset.coco.getAnnIds(imgIds=img_id, iscrowd=None)
anno = dataset.coco.loadAnns(ann_ids)
if cat_list:
anno = [obj for obj in anno if obj["category_id"] in cat_list]
if _has_valid_annotation(anno):
ids.append(ds_idx)
dataset = torch.utils.data.Subset(dataset, ids)
return dataset
def convert_to_coco_api(ds, bbox_fmt='voc'):
"""
"""
print("in function convert_to_coco_api...")
coco_ds = COCO()
# annotation IDs need to start at 1, not 0, see torchvision issue #1530
ann_id = 1
dataset = {'images': [], 'categories': [], 'annotations': []}
categories = set()
for img_idx in range(len(ds)):
# find better way to get target
# targets = ds.get_annotations(img_idx)
img, targets = ds[img_idx]
image_id = targets["image_id"].item()
img_dict = {}
img_dict['id'] = image_id
img_dict['height'] = img.shape[-2]
img_dict['width'] = img.shape[-1]
dataset['images'].append(img_dict)
bboxes = targets["boxes"]
# to coco format: xmin, ymin, w, h
if bbox_fmt.lower() == "voc": # xmin, ymin, xmax, ymax
bboxes[:, 2:] -= bboxes[:, :2]
elif bbox_fmt.lower() == "yolo": # xcen, ycen, w, h
bboxes[:, :2] = bboxes[:, :2] - bboxes[:, 2:]/2
elif bbox_fmt.lower() == "coco":
pass
else:
raise ValueError(f"bounding box format {bbox_fmt} not supported!")
bboxes = bboxes.tolist()
labels = targets['labels'].tolist()
areas = targets['area'].tolist()
iscrowd = targets['iscrowd'].tolist()
if 'masks' in targets:
masks = targets['masks']
# make masks Fortran contiguous for coco_mask
masks = masks.permute(0, 2, 1).contiguous().permute(0, 2, 1)
if 'keypoints' in targets:
keypoints = targets['keypoints']
keypoints = keypoints.reshape(keypoints.shape[0], -1).tolist()
num_objs = len(bboxes)
for i in range(num_objs):
ann = {}
ann['image_id'] = image_id
ann['bbox'] = bboxes[i]
ann['category_id'] = labels[i]
categories.add(labels[i])
ann['area'] = areas[i]
ann['iscrowd'] = iscrowd[i]
ann['id'] = ann_id
if 'masks' in targets:
ann["segmentation"] = coco_mask.encode(masks[i].numpy())
if 'keypoints' in targets:
ann['keypoints'] = keypoints[i]
ann['num_keypoints'] = sum(k != 0 for k in keypoints[i][2::3])
dataset['annotations'].append(ann)
ann_id += 1
dataset['categories'] = [{'id': i} for i in sorted(categories)]
coco_ds.dataset = dataset
coco_ds.createIndex()
return coco_ds
def get_coco_api_from_dataset(dataset):
for _ in range(10):
if isinstance(dataset, torchvision.datasets.CocoDetection):
break
if isinstance(dataset, torch.utils.data.Subset):
dataset = dataset.dataset
if isinstance(dataset, torchvision.datasets.CocoDetection):
return dataset.coco
return convert_to_coco_api(dataset)
class CocoDetection(torchvision.datasets.CocoDetection):
def __init__(self, img_folder, ann_file, transforms):
super(CocoDetection, self).__init__(img_folder, ann_file)
self._transforms = transforms
def __getitem__(self, idx):
img, target = super(CocoDetection, self).__getitem__(idx)
image_id = self.ids[idx]
target = dict(image_id=image_id, annotations=target)
if self._transforms is not None:
img, target = self._transforms(img, target)
return img, target
def get_coco(root, image_set, transforms, mode='instances'):
anno_file_template = "{}_{}2017.json"
PATHS = {
"train": ("train2017", os.path.join("annotations", anno_file_template.format(mode, "train"))),
"val": ("val2017", os.path.join("annotations", anno_file_template.format(mode, "val"))),
# "train": ("val2017", os.path.join("annotations", anno_file_template.format(mode, "val")))
}
t = [ConvertCocoPolysToMask()]
if transforms is not None:
t.append(transforms)
transforms = T.Compose(t)
img_folder, ann_file = PATHS[image_set]
img_folder = os.path.join(root, img_folder)
ann_file = os.path.join(root, ann_file)
dataset = CocoDetection(img_folder, ann_file, transforms=transforms)
if image_set == "train":
dataset = _coco_remove_images_without_annotations(dataset)
# dataset = torch.utils.data.Subset(dataset, [i for i in range(500)])
return dataset
def get_coco_kp(root, image_set, transforms):
return get_coco(root, image_set, transforms, mode="person_keypoints")
import math
import sys
import time
import torch
import torchvision.models.detection.mask_rcnn
from .coco_utils import get_coco_api_from_dataset
from .coco_eval import CocoEvaluator
from . import utils
def train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq):
model.train()
metric_logger = utils.MetricLogger(delimiter=" ")
metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}'))
header = 'Epoch: [{}]'.format(epoch)
lr_scheduler = None
if epoch == 0:
warmup_factor = 1. / 1000
warmup_iters = min(1000, len(data_loader) - 1)
lr_scheduler = utils.warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor)
for images, targets in metric_logger.log_every(data_loader, print_freq, header):
images = list(image.to(device) for image in images)
targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
loss_dict = model(images, targets)
losses = sum(loss for loss in loss_dict.values())
# reduce losses over all GPUs for logging purposes
loss_dict_reduced = utils.reduce_dict(loss_dict)
losses_reduced = sum(loss for loss in loss_dict_reduced.values())
loss_value = losses_reduced.item()
if not math.isfinite(loss_value):
print("Loss is {}, stopping training".format(loss_value))
print(loss_dict_reduced)
sys.exit(1)
optimizer.zero_grad()
losses.backward()
optimizer.step()
if lr_scheduler is not None:
lr_scheduler.step()
metric_logger.update(loss=losses_reduced, **loss_dict_reduced)
metric_logger.update(lr=optimizer.param_groups[0]["lr"])
return metric_logger
def _get_iou_types(model):
model_without_ddp = model
if isinstance(model, torch.nn.parallel.DistributedDataParallel):
model_without_ddp = model.module
iou_types = ["bbox"]
if isinstance(model_without_ddp, torchvision.models.detection.MaskRCNN):
iou_types.append("segm")
if isinstance(model_without_ddp, torchvision.models.detection.KeypointRCNN):
iou_types.append("keypoints")
return iou_types
@torch.no_grad()
def evaluate(model, data_loader, device):
n_threads = torch.get_num_threads()
# FIXME remove this and make paste_masks_in_image run on the GPU
torch.set_num_threads(1)
cpu_device = torch.device("cpu")
model.eval()
metric_logger = utils.MetricLogger(delimiter=" ")
header = 'Test:'
coco = get_coco_api_from_dataset(data_loader.dataset)
iou_types = _get_iou_types(model)
coco_evaluator = CocoEvaluator(coco, iou_types)
for images, targets in metric_logger.log_every(data_loader, 100, header):
images = list(img.to(device) for img in images)
targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
torch.cuda.synchronize()
model_time = time.time()
outputs = model(images)
outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs]
model_time = time.time() - model_time
res = {target["image_id"].item(): output for target, output in zip(targets, outputs)}
evaluator_time = time.time()
coco_evaluator.update(res)
evaluator_time = time.time() - evaluator_time
metric_logger.update(model_time=model_time, evaluator_time=evaluator_time)
# gather the stats from all processes
metric_logger.synchronize_between_processes()
print("Averaged stats:", metric_logger)
coco_evaluator.synchronize_between_processes()
# accumulate predictions from all images
coco_evaluator.accumulate()
coco_evaluator.summarize()
torch.set_num_threads(n_threads)
return coco_evaluator
import bisect
from collections import defaultdict
import copy
from itertools import repeat, chain
import math
import numpy as np
import torch
import torch.utils.data
from torch.utils.data.sampler import BatchSampler, Sampler
from torch.utils.model_zoo import tqdm
import torchvision
from PIL import Image
def _repeat_to_at_least(iterable, n):
repeat_times = math.ceil(n / len(iterable))
repeated = chain.from_iterable(repeat(iterable, repeat_times))
return list(repeated)
class GroupedBatchSampler(BatchSampler):
"""
Wraps another sampler to yield a mini-batch of indices.
It enforces that the batch only contain elements from the same group.
It also tries to provide mini-batches which follows an ordering which is
as close as possible to the ordering from the original sampler.
Arguments:
sampler (Sampler): Base sampler.
group_ids (list[int]): If the sampler produces indices in range [0, N),
`group_ids` must be a list of `N` ints which contains the group id of each sample.
The group ids must be a continuous set of integers starting from
0, i.e. they must be in the range [0, num_groups).
batch_size (int): Size of mini-batch.
"""
def __init__(self, sampler, group_ids, batch_size):
if not isinstance(sampler, Sampler):
raise ValueError(
"sampler should be an instance of "
"torch.utils.data.Sampler, but got sampler={}".format(sampler)
)
self.sampler = sampler
self.group_ids = group_ids
self.batch_size = batch_size
def __iter__(self):
buffer_per_group = defaultdict(list)
samples_per_group = defaultdict(list)
num_batches = 0
for idx in self.sampler:
group_id = self.group_ids[idx]
buffer_per_group[group_id].append(idx)
samples_per_group[group_id].append(idx)
if len(buffer_per_group[group_id]) == self.batch_size:
yield buffer_per_group[group_id]
num_batches += 1
del buffer_per_group[group_id]
assert len(buffer_per_group[group_id]) < self.batch_size
# now we have run out of elements that satisfy
# the group criteria, let's return the remaining
# elements so that the size of the sampler is
# deterministic
expected_num_batches = len(self)
num_remaining = expected_num_batches - num_batches
if num_remaining > 0:
# for the remaining batches, take first the buffers with largest number
# of elements
for group_id, _ in sorted(buffer_per_group.items(),
key=lambda x: len(x[1]), reverse=True):
remaining = self.batch_size - len(buffer_per_group[group_id])
samples_from_group_id = _repeat_to_at_least(samples_per_group[group_id], remaining)
buffer_per_group[group_id].extend(samples_from_group_id[:remaining])
assert len(buffer_per_group[group_id]) == self.batch_size
yield buffer_per_group[group_id]
num_remaining -= 1
if num_remaining == 0:
break
assert num_remaining == 0
def __len__(self):
return len(self.sampler) // self.batch_size
def _compute_aspect_ratios_slow(dataset, indices=None):
print("Your dataset doesn't support the fast path for "
"computing the aspect ratios, so will iterate over "
"the full dataset and load every image instead. "
"This might take some time...")
if indices is None:
indices = range(len(dataset))
class SubsetSampler(Sampler):
def __init__(self, indices):
self.indices = indices
def __iter__(self):
return iter(self.indices)
def __len__(self):
return len(self.indices)
sampler = SubsetSampler(indices)
data_loader = torch.utils.data.DataLoader(
dataset, batch_size=1, sampler=sampler,
num_workers=14, # you might want to increase it for faster processing
collate_fn=lambda x: x[0])
aspect_ratios = []
with tqdm(total=len(dataset)) as pbar:
for _i, (img, _) in enumerate(data_loader):
pbar.update(1)
height, width = img.shape[-2:]
aspect_ratio = float(width) / float(height)
aspect_ratios.append(aspect_ratio)
return aspect_ratios
def _compute_aspect_ratios_custom_dataset(dataset, indices=None):
if indices is None:
indices = range(len(dataset))
aspect_ratios = []
for i in indices:
height, width = dataset.get_height_and_width(i)
aspect_ratio = float(width) / float(height)
aspect_ratios.append(aspect_ratio)
return aspect_ratios
def _compute_aspect_ratios_coco_dataset(dataset, indices=None):
if indices is None:
indices = range(len(dataset))
aspect_ratios = []
for i in indices:
img_info = dataset.coco.imgs[dataset.ids[i]]
aspect_ratio = float(img_info["width"]) / float(img_info["height"])
aspect_ratios.append(aspect_ratio)
return aspect_ratios
def _compute_aspect_ratios_voc_dataset(dataset, indices=None):
if indices is None:
indices = range(len(dataset))
aspect_ratios = []
for i in indices:
# this doesn't load the data into memory, because PIL loads it lazily
width, height = Image.open(dataset.images[i]).size
aspect_ratio = float(width) / float(height)
aspect_ratios.append(aspect_ratio)
return aspect_ratios
def _compute_aspect_ratios_subset_dataset(dataset, indices=None):
if indices is None:
indices = range(len(dataset))
ds_indices = [dataset.indices[i] for i in indices]
return compute_aspect_ratios(dataset.dataset, ds_indices)
def compute_aspect_ratios(dataset, indices=None):
if hasattr(dataset, "get_height_and_width"):
return _compute_aspect_ratios_custom_dataset(dataset, indices)
if isinstance(dataset, torchvision.datasets.CocoDetection):
return _compute_aspect_ratios_coco_dataset(dataset, indices)
if isinstance(dataset, torchvision.datasets.VOCDetection):
return _compute_aspect_ratios_voc_dataset(dataset, indices)
if isinstance(dataset, torch.utils.data.Subset):
return _compute_aspect_ratios_subset_dataset(dataset, indices)
# slow path
return _compute_aspect_ratios_slow(dataset, indices)
def _quantize(x, bins):
bins = copy.deepcopy(bins)
bins = sorted(bins)
quantized = list(map(lambda y: bisect.bisect_right(bins, y), x))
return quantized
def create_aspect_ratio_groups(dataset, k=0):
aspect_ratios = compute_aspect_ratios(dataset)
bins = (2 ** np.linspace(-1, 1, 2 * k + 1)).tolist() if k > 0 else [1.0]
groups = _quantize(aspect_ratios, bins)
# count number of elements per group
counts = np.unique(groups, return_counts=True)[1]
fbins = [0] + bins + [np.inf]
print("Using {} as bins for aspect ratio quantization".format(fbins))
print("Count of instances per bin: {}".format(counts))
return groups
r"""PyTorch Detection Training.
To run in a multi-gpu environment, use the distributed launcher::
python -m torch.distributed.launch --nproc_per_node=$NGPU --use_env \
train.py ... --world-size $NGPU
The default hyperparameters are tuned for training on 8 gpus and 2 images per gpu.
--lr 0.02 --batch-size 2 --world-size 8
If you use different number of gpus, the learning rate should be changed to 0.02/8*$NGPU.
On top of that, for training Faster/Mask R-CNN, the default hyperparameters are
--epochs 26 --lr-steps 16 22 --aspect-ratio-group-factor 3
Also, if you train Keypoint R-CNN, the default hyperparameters are
--epochs 46 --lr-steps 36 43 --aspect-ratio-group-factor 3
Because the number of images is smaller in the person keypoint subset of COCO,
the number of epochs should be adapted so that we have the same number of iterations.
"""
import datetime
import os
import time
import torch
import torch.utils.data
from torch import nn
import torchvision
import torchvision.models.detection
import torchvision.models.detection.mask_rcnn
from .coco_utils import get_coco, get_coco_kp
from .group_by_aspect_ratio import GroupedBatchSampler, create_aspect_ratio_groups
from .engine import train_one_epoch, evaluate
from . import utils
from . import transforms as T
def get_dataset(name, image_set, transform, data_path):
paths = {
"coco": (data_path, get_coco, 91),
"coco_kp": (data_path, get_coco_kp, 2)
}
p, ds_fn, num_classes = paths[name]
ds = ds_fn(p, image_set=image_set, transforms=transform)
return ds, num_classes
def get_transform(train):
transforms = []
transforms.append(T.ToTensor())
if train:
transforms.append(T.RandomHorizontalFlip(0.5))
return T.Compose(transforms)
def main(args):
utils.init_distributed_mode(args)
print(args)
device = torch.device(args.device)
# Data loading code
print("Loading data")
dataset, num_classes = get_dataset(args.dataset, "train", get_transform(train=True), args.data_path)
dataset_test, _ = get_dataset(args.dataset, "val", get_transform(train=False), args.data_path)
print("Creating data loaders")
if args.distributed:
train_sampler = torch.utils.data.distributed.DistributedSampler(dataset)
test_sampler = torch.utils.data.distributed.DistributedSampler(dataset_test)
else:
train_sampler = torch.utils.data.RandomSampler(dataset)
test_sampler = torch.utils.data.SequentialSampler(dataset_test)
if args.aspect_ratio_group_factor >= 0:
group_ids = create_aspect_ratio_groups(dataset, k=args.aspect_ratio_group_factor)
train_batch_sampler = GroupedBatchSampler(train_sampler, group_ids, args.batch_size)
else:
train_batch_sampler = torch.utils.data.BatchSampler(
train_sampler, args.batch_size, drop_last=True)
data_loader = torch.utils.data.DataLoader(
dataset, batch_sampler=train_batch_sampler, num_workers=args.workers,
collate_fn=utils.collate_fn)
data_loader_test = torch.utils.data.DataLoader(
dataset_test, batch_size=1,
sampler=test_sampler, num_workers=args.workers,
collate_fn=utils.collate_fn)
print("Creating model")
model = torchvision.models.detection.__dict__[args.model](num_classes=num_classes,
pretrained=args.pretrained)
model.to(device)
model_without_ddp = model
if args.distributed:
model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])
model_without_ddp = model.module
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(
params, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay)
# lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=args.lr_step_size, gamma=args.lr_gamma)
lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=args.lr_steps, gamma=args.lr_gamma)
if args.resume:
checkpoint = torch.load(args.resume, map_location='cpu')
model_without_ddp.load_state_dict(checkpoint['model'])
optimizer.load_state_dict(checkpoint['optimizer'])
lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])
args.start_epoch = checkpoint['epoch'] + 1
if args.test_only:
evaluate(model, data_loader_test, device=device)
return
print("Start training")
start_time = time.time()
for epoch in range(args.start_epoch, args.epochs):
if args.distributed:
train_sampler.set_epoch(epoch)
train_one_epoch(model, optimizer, data_loader, device, epoch, args.print_freq)
lr_scheduler.step()
if args.output_dir:
utils.save_on_master({
'model': model_without_ddp.state_dict(),
'optimizer': optimizer.state_dict(),
'lr_scheduler': lr_scheduler.state_dict(),
'args': args,
'epoch': epoch},
os.path.join(args.output_dir, 'model_{}.pth'.format(epoch)))
# evaluate after every epoch
evaluate(model, data_loader_test, device=device)
total_time = time.time() - start_time
total_time_str = str(datetime.timedelta(seconds=int(total_time)))
print('Training time {}'.format(total_time_str))
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(
description=__doc__)
parser.add_argument('--data-path', default='/datasets01/COCO/022719/', help='dataset')
parser.add_argument('--dataset', default='coco', help='dataset')
parser.add_argument('--model', default='maskrcnn_resnet50_fpn', help='model')
parser.add_argument('--device', default='cuda', help='device')
parser.add_argument('-b', '--batch-size', default=2, type=int,
help='images per gpu, the total batch size is $NGPU x batch_size')
parser.add_argument('--epochs', default=26, type=int, metavar='N',
help='number of total epochs to run')
parser.add_argument('-j', '--workers', default=4, type=int, metavar='N',
help='number of data loading workers (default: 4)')
parser.add_argument('--lr', default=0.02, type=float,
help='initial learning rate, 0.02 is the default value for training '
'on 8 gpus and 2 images_per_gpu')
parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
help='momentum')
parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float,
metavar='W', help='weight decay (default: 1e-4)',
dest='weight_decay')
parser.add_argument('--lr-step-size', default=8, type=int, help='decrease lr every step-size epochs')
parser.add_argument('--lr-steps', default=[16, 22], nargs='+', type=int, help='decrease lr every step-size epochs')
parser.add_argument('--lr-gamma', default=0.1, type=float, help='decrease lr by a factor of lr-gamma')
parser.add_argument('--print-freq', default=20, type=int, help='print frequency')
parser.add_argument('--output-dir', default='.', help='path where to save')
parser.add_argument('--resume', default='', help='resume from checkpoint')
parser.add_argument('--start_epoch', default=0, type=int, help='start epoch')
parser.add_argument('--aspect-ratio-group-factor', default=3, type=int)
parser.add_argument(
"--test-only",
dest="test_only",
help="Only test the model",
action="store_true",
)
parser.add_argument(
"--pretrained",
dest="pretrained",
help="Use pre-trained models from the modelzoo",
action="store_true",
)
# distributed training parameters
parser.add_argument('--world-size', default=1, type=int,
help='number of distributed processes')
parser.add_argument('--dist-url', default='env://', help='url used to set up distributed training')
args = parser.parse_args()
if args.output_dir:
utils.mkdir(args.output_dir)
main(args)
import random
import torch
from torchvision.transforms import functional as F
def _flip_coco_person_keypoints(kps, width):
flip_inds = [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15]
flipped_data = kps[:, flip_inds]
flipped_data[..., 0] = width - flipped_data[..., 0]
# Maintain COCO convention that if visibility == 0, then x, y = 0
inds = flipped_data[..., 2] == 0
flipped_data[inds] = 0
return flipped_data
class Compose(object):
def __init__(self, transforms):
self.transforms = transforms
def __call__(self, image, target):
for t in self.transforms:
image, target = t(image, target)
return image, target
class RandomHorizontalFlip(object):
def __init__(self, prob):
self.prob = prob
def __call__(self, image, target):
if random.random() < self.prob:
height, width = image.shape[-2:]
image = image.flip(-1)
bbox = target["boxes"]
bbox[:, [0, 2]] = width - bbox[:, [2, 0]]
target["boxes"] = bbox
if "masks" in target:
target["masks"] = target["masks"].flip(-1)
if "keypoints" in target:
keypoints = target["keypoints"]
keypoints = _flip_coco_person_keypoints(keypoints, width)
target["keypoints"] = keypoints
return image, target
class ToTensor(object):
def __call__(self, image, target):
image = F.to_tensor(image)
return image, target
from collections import defaultdict, deque
import datetime
import pickle
import time
import torch
import torch.distributed as dist
import errno
import os
class SmoothedValue(object):
"""Track a series of values and provide access to smoothed values over a
window or the global series average.
"""
def __init__(self, window_size=20, fmt=None):
if fmt is None:
fmt = "{median:.4f} ({global_avg:.4f})"
self.deque = deque(maxlen=window_size)
self.total = 0.0
self.count = 0
self.fmt = fmt
def update(self, value, n=1):
self.deque.append(value)
self.count += n
self.total += value * n
def synchronize_between_processes(self):
"""
Warning: does not synchronize the deque!
"""
if not is_dist_avail_and_initialized():
return
t = torch.tensor([self.count, self.total], dtype=torch.float64, device='cuda')
dist.barrier()
dist.all_reduce(t)
t = t.tolist()
self.count = int(t[0])
self.total = t[1]
@property
def median(self):
d = torch.tensor(list(self.deque))
return d.median().item()
@property
def avg(self):
d = torch.tensor(list(self.deque), dtype=torch.float32)
return d.mean().item()
@property
def global_avg(self):
return self.total / self.count
@property
def max(self):
return max(self.deque)
@property
def value(self):
return self.deque[-1]
def __str__(self):
return self.fmt.format(
median=self.median,
avg=self.avg,
global_avg=self.global_avg,
max=self.max,
value=self.value)
def all_gather(data):
"""
Run all_gather on arbitrary picklable data (not necessarily tensors)
Args:
data: any picklable object
Returns:
list[data]: list of data gathered from each rank
"""
world_size = get_world_size()
if world_size == 1:
return [data]
# serialized to a Tensor
buffer = pickle.dumps(data)
storage = torch.ByteStorage.from_buffer(buffer)
tensor = torch.ByteTensor(storage).to("cuda")
# obtain Tensor size of each rank
local_size = torch.tensor([tensor.numel()], device="cuda")
size_list = [torch.tensor([0], device="cuda") for _ in range(world_size)]
dist.all_gather(size_list, local_size)
size_list = [int(size.item()) for size in size_list]
max_size = max(size_list)
# receiving Tensor from all ranks
# we pad the tensor because torch all_gather does not support
# gathering tensors of different shapes
tensor_list = []
for _ in size_list:
tensor_list.append(torch.empty((max_size,), dtype=torch.uint8, device="cuda"))
if local_size != max_size:
padding = torch.empty(size=(max_size - local_size,), dtype=torch.uint8, device="cuda")
tensor = torch.cat((tensor, padding), dim=0)
dist.all_gather(tensor_list, tensor)
data_list = []
for size, tensor in zip(size_list, tensor_list):
buffer = tensor.cpu().numpy().tobytes()[:size]
data_list.append(pickle.loads(buffer))
return data_list
def reduce_dict(input_dict, average=True):
"""
Args:
input_dict (dict): all the values will be reduced
average (bool): whether to do average or sum
Reduce the values in the dictionary from all processes so that all processes
have the averaged results. Returns a dict with the same fields as
input_dict, after reduction.
"""
world_size = get_world_size()
if world_size < 2:
return input_dict
with torch.no_grad():
names = []
values = []
# sort the keys so that they are consistent across processes
for k in sorted(input_dict.keys()):
names.append(k)
values.append(input_dict[k])
values = torch.stack(values, dim=0)
dist.all_reduce(values)
if average:
values /= world_size
reduced_dict = {k: v for k, v in zip(names, values)}
return reduced_dict
class MetricLogger(object):
def __init__(self, delimiter="\t"):
self.meters = defaultdict(SmoothedValue)
self.delimiter = delimiter
def update(self, **kwargs):
for k, v in kwargs.items():
if isinstance(v, torch.Tensor):
v = v.item()
assert isinstance(v, (float, int))
self.meters[k].update(v)
def __getattr__(self, attr):
if attr in self.meters:
return self.meters[attr]
if attr in self.__dict__:
return self.__dict__[attr]
raise AttributeError("'{}' object has no attribute '{}'".format(
type(self).__name__, attr))
def __str__(self):
loss_str = []
for name, meter in self.meters.items():
loss_str.append(
"{}: {}".format(name, str(meter))
)
return self.delimiter.join(loss_str)
def synchronize_between_processes(self):
for meter in self.meters.values():
meter.synchronize_between_processes()
def add_meter(self, name, meter):
self.meters[name] = meter
def log_every(self, iterable, print_freq, header=None):
i = 0
if not header:
header = ''
start_time = time.time()
end = time.time()
iter_time = SmoothedValue(fmt='{avg:.4f}')
data_time = SmoothedValue(fmt='{avg:.4f}')
space_fmt = ':' + str(len(str(len(iterable)))) + 'd'
if torch.cuda.is_available():
log_msg = self.delimiter.join([
header,
'[{0' + space_fmt + '}/{1}]',
'eta: {eta}',
'{meters}',
'time: {time}',
'data: {data}',
'max mem: {memory:.0f}'
])
else:
log_msg = self.delimiter.join([
header,
'[{0' + space_fmt + '}/{1}]',
'eta: {eta}',
'{meters}',
'time: {time}',
'data: {data}'
])
MB = 1024.0 * 1024.0
for obj in iterable:
data_time.update(time.time() - end)
yield obj
iter_time.update(time.time() - end)
if i % print_freq == 0 or i == len(iterable) - 1:
eta_seconds = iter_time.global_avg * (len(iterable) - i)
eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))
if torch.cuda.is_available():
print(log_msg.format(
i, len(iterable), eta=eta_string,
meters=str(self),
time=str(iter_time), data=str(data_time),
memory=torch.cuda.max_memory_allocated() / MB))
else:
print(log_msg.format(
i, len(iterable), eta=eta_string,
meters=str(self),
time=str(iter_time), data=str(data_time)))
i += 1
end = time.time()
total_time = time.time() - start_time
total_time_str = str(datetime.timedelta(seconds=int(total_time)))
print('{} Total time: {} ({:.4f} s / it)'.format(
header, total_time_str, total_time / len(iterable)))
def collate_fn(batch):
return tuple(zip(*batch))
def warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor):
def f(x):
if x >= warmup_iters:
return 1
alpha = float(x) / warmup_iters
return warmup_factor * (1 - alpha) + alpha
return torch.optim.lr_scheduler.LambdaLR(optimizer, f)
def mkdir(path):
try:
os.makedirs(path)
except OSError as e:
if e.errno != errno.EEXIST:
raise
def setup_for_distributed(is_master):
"""
This function disables printing when not in master process
"""
import builtins as __builtin__
builtin_print = __builtin__.print
def print(*args, **kwargs):
force = kwargs.pop('force', False)
if is_master or force:
builtin_print(*args, **kwargs)
__builtin__.print = print
def is_dist_avail_and_initialized():
if not dist.is_available():
return False
if not dist.is_initialized():
return False
return True
def get_world_size():
if not is_dist_avail_and_initialized():
return 1
return dist.get_world_size()
def get_rank():
if not is_dist_avail_and_initialized():
return 0
return dist.get_rank()
def is_main_process():
return get_rank() == 0
def save_on_master(*args, **kwargs):
if is_main_process():
torch.save(*args, **kwargs)
def init_distributed_mode(args):
if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ:
args.rank = int(os.environ["RANK"])
args.world_size = int(os.environ['WORLD_SIZE'])
args.gpu = int(os.environ['LOCAL_RANK'])
elif 'SLURM_PROCID' in os.environ:
args.rank = int(os.environ['SLURM_PROCID'])
args.gpu = args.rank % torch.cuda.device_count()
else:
print('Not using distributed mode')
args.distributed = False
return
args.distributed = True
torch.cuda.set_device(args.gpu)
args.dist_backend = 'nccl'
print('| distributed init (rank {}): {}'.format(
args.rank, args.dist_url), flush=True)
torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
world_size=args.world_size, rank=args.rank)
torch.distributed.barrier()
setup_for_distributed(args.rank == 0)
import sys
import os
import time
import math
import numpy as np
import itertools
import struct # get_image_size
import imghdr # get_image_size
def sigmoid(x):
return 1.0 / (np.exp(-x) + 1.)
def softmax(x):
x = np.exp(x - np.expand_dims(np.max(x, axis=1), axis=1))
x = x / np.expand_dims(x.sum(axis=1), axis=1)
return x
def bbox_iou(box1, box2, x1y1x2y2=True):
# print('iou box1:', box1)
# print('iou box2:', box2)
if x1y1x2y2:
mx = min(box1[0], box2[0])
Mx = max(box1[2], box2[2])
my = min(box1[1], box2[1])
My = max(box1[3], box2[3])
w1 = box1[2] - box1[0]
h1 = box1[3] - box1[1]
w2 = box2[2] - box2[0]
h2 = box2[3] - box2[1]
else:
w1 = box1[2]
h1 = box1[3]
w2 = box2[2]
h2 = box2[3]
mx = min(box1[0], box2[0])
Mx = max(box1[0] + w1, box2[0] + w2)
my = min(box1[1], box2[1])
My = max(box1[1] + h1, box2[1] + h2)
uw = Mx - mx
uh = My - my
cw = w1 + w2 - uw
ch = h1 + h2 - uh
carea = 0
if cw <= 0 or ch <= 0:
return 0.0
area1 = w1 * h1
area2 = w2 * h2
carea = cw * ch
uarea = area1 + area2 - carea
return carea / uarea
def nms_cpu(boxes, confs, nms_thresh=0.5, min_mode=False):
# print(boxes.shape)
x1 = boxes[:, 0]
y1 = boxes[:, 1]
x2 = boxes[:, 2]
y2 = boxes[:, 3]
areas = (x2 - x1) * (y2 - y1)
order = confs.argsort()[::-1]
keep = []
while order.size > 0:
idx_self = order[0]
idx_other = order[1:]
keep.append(idx_self)
xx1 = np.maximum(x1[idx_self], x1[idx_other])
yy1 = np.maximum(y1[idx_self], y1[idx_other])
xx2 = np.minimum(x2[idx_self], x2[idx_other])
yy2 = np.minimum(y2[idx_self], y2[idx_other])
w = np.maximum(0.0, xx2 - xx1)
h = np.maximum(0.0, yy2 - yy1)
inter = w * h
if min_mode:
over = inter / np.minimum(areas[order[0]], areas[order[1:]])
else:
over = inter / (areas[order[0]] + areas[order[1:]] - inter)
inds = np.where(over <= nms_thresh)[0]
order = order[inds + 1]
return np.array(keep)
def plot_boxes_cv2(img, boxes, savename=None, class_names=None, color=None):
import cv2
img = np.copy(img)
colors = np.array([[1, 0, 1], [0, 0, 1], [0, 1, 1], [0, 1, 0], [1, 1, 0], [1, 0, 0]], dtype=np.float32)
def get_color(c, x, max_val):
ratio = float(x) / max_val * 5
i = int(math.floor(ratio))
j = int(math.ceil(ratio))
ratio = ratio - i
r = (1 - ratio) * colors[i][c] + ratio * colors[j][c]
return int(r * 255)
width = img.shape[1]
height = img.shape[0]
for i in range(len(boxes)):
box = boxes[i]
x1 = int(box[0] * width)
y1 = int(box[1] * height)
x2 = int(box[2] * width)
y2 = int(box[3] * height)
if color:
rgb = color
else:
rgb = (255, 0, 0)
if len(box) >= 7 and class_names:
cls_conf = box[5]
cls_id = box[6]
# print('%s: %f' % (class_names[cls_id], cls_conf))
classes = len(class_names)
offset = cls_id * 123457 % classes
red = get_color(2, offset, classes)
green = get_color(1, offset, classes)
blue = get_color(0, offset, classes)
if color is None:
rgb = (red, green, blue)
img = cv2.putText(img, class_names[cls_id], (x1, y1), cv2.FONT_HERSHEY_SIMPLEX, 1.2, rgb, 1)
img = cv2.rectangle(img, (x1, y1), (x2, y2), rgb, 1)
# if savename:
# print("save plot results to %s" % savename)
# cv2.imwrite(savename, img)
return img
def read_truths(lab_path):
if not os.path.exists(lab_path):
return np.array([])
if os.path.getsize(lab_path):
truths = np.loadtxt(lab_path)
truths = truths.reshape(truths.size / 5, 5) # to avoid single truth problem
return truths
else:
return np.array([])
def load_class_names(namesfile):
class_names = []
with open(namesfile, 'r') as fp:
lines = fp.readlines()
for line in lines:
line = line.rstrip()
class_names.append(line)
return class_names
def post_processing(img, conf_thresh, nms_thresh, output):
# anchors = [12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401]
# num_anchors = 9
# anchor_masks = [[0, 1, 2], [3, 4, 5], [6, 7, 8]]
# strides = [8, 16, 32]
# anchor_step = len(anchors) // num_anchors
# [batch, num, 1, 4]
box_array = output[0]
# [batch, num, num_classes]
confs = output[1]
t1 = time.time()
if type(box_array).__name__ != 'ndarray':
box_array = box_array.cpu().detach().numpy()
confs = confs.cpu().detach().numpy()
num_classes = confs.shape[2]
# [batch, num, 4]
box_array = box_array[:, :, 0]
# [batch, num, num_classes] --> [batch, num]
max_conf = np.max(confs, axis=2)
max_id = np.argmax(confs, axis=2)
t2 = time.time()
bboxes_batch = []
for i in range(box_array.shape[0]):
argwhere = max_conf[i] > conf_thresh
l_box_array = box_array[i, argwhere, :]
l_max_conf = max_conf[i, argwhere]
l_max_id = max_id[i, argwhere]
bboxes = []
# nms for each class
for j in range(num_classes):
cls_argwhere = l_max_id == j
ll_box_array = l_box_array[cls_argwhere, :]
ll_max_conf = l_max_conf[cls_argwhere]
ll_max_id = l_max_id[cls_argwhere]
keep = nms_cpu(ll_box_array, ll_max_conf, nms_thresh)
if (keep.size > 0):
ll_box_array = ll_box_array[keep, :]
ll_max_conf = ll_max_conf[keep]
ll_max_id = ll_max_id[keep]
for k in range(ll_box_array.shape[0]):
bboxes.append([ll_box_array[k, 0], ll_box_array[k, 1], ll_box_array[k, 2], ll_box_array[k, 3], ll_max_conf[k], ll_max_conf[k], ll_max_id[k]])
bboxes_batch.append(bboxes)
t3 = time.time()
print('-----------------------------------')
print(' max and argmax : %f' % (t2 - t1))
print(' nms : %f' % (t3 - t2))
print('Post processing total : %f' % (t3 - t1))
print('-----------------------------------')
return bboxes_batch
# -*- coding: utf-8 -*-
'''
'''
import torch
import os, sys
from torch.nn import functional as F
import numpy as np
from packaging import version
__all__ = [
"bboxes_iou",
"bboxes_giou",
"bboxes_diou",
"bboxes_ciou",
]
if version.parse(torch.__version__) >= version.parse('1.5.0'):
def _true_divide(dividend, divisor):
return torch.true_divide(dividend, divisor)
else:
def _true_divide(dividend, divisor):
return dividend / divisor
def bboxes_iou(bboxes_a, bboxes_b, fmt='voc', iou_type='iou'):
"""Calculate the Intersection of Unions (IoUs) between bounding boxes.
IoU is calculated as a ratio of area of the intersection
and area of the union.
Args:
bbox_a (array): An array whose shape is :math:`(N, 4)`.
:math:`N` is the number of bounding boxes.
The dtype should be :obj:`numpy.float32`.
bbox_b (array): An array similar to :obj:`bbox_a`,
whose shape is :math:`(K, 4)`.
The dtype should be :obj:`numpy.float32`.
Returns:
array:
An array whose shape is :math:`(N, K)`. \
An element at index :math:`(n, k)` contains IoUs between \
:math:`n` th bounding box in :obj:`bbox_a` and :math:`k` th bounding \
box in :obj:`bbox_b`.
from: https://github.com/chainer/chainercv
"""
if bboxes_a.shape[1] != 4 or bboxes_b.shape[1] != 4:
raise IndexError
N, K = bboxes_a.shape[0], bboxes_b.shape[0]
if fmt.lower() == 'voc': # xmin, ymin, xmax, ymax
# top left
tl_intersect = torch.max(
bboxes_a[:, np.newaxis, :2],
bboxes_b[:, :2]
) # of shape `(N,K,2)`
# bottom right
br_intersect = torch.min(
bboxes_a[:, np.newaxis, 2:],
bboxes_b[:, 2:]
)
bb_a = bboxes_a[:, 2:] - bboxes_a[:, :2]
bb_b = bboxes_b[:, 2:] - bboxes_b[:, :2]
# bb_* can also be seen vectors representing box_width, box_height
elif fmt.lower() == 'yolo': # xcen, ycen, w, h
# top left
tl_intersect = torch.max(
bboxes_a[:, np.newaxis, :2] - bboxes_a[:, np.newaxis, 2:] / 2,
bboxes_b[:, :2] - bboxes_b[:, 2:] / 2
)
# bottom right
br_intersect = torch.min(
bboxes_a[:, np.newaxis, :2] + bboxes_a[:, np.newaxis, 2:] / 2,
bboxes_b[:, :2] + bboxes_b[:, 2:] / 2
)
bb_a = bboxes_a[:, 2:]
bb_b = bboxes_b[:, 2:]
elif fmt.lower() == 'coco': # xmin, ymin, w, h
# top left
tl_intersect = torch.max(
bboxes_a[:, np.newaxis, :2],
bboxes_b[:, :2]
)
# bottom right
br_intersect = torch.min(
bboxes_a[:, np.newaxis, :2] + bboxes_a[:, np.newaxis, 2:],
bboxes_b[:, :2] + bboxes_b[:, 2:]
)
bb_a = bboxes_a[:, 2:]
bb_b = bboxes_b[:, 2:]
area_a = torch.prod(bb_a, 1)
area_b = torch.prod(bb_b, 1)
# torch.prod(input, dim, keepdim=False, dtype=None) → Tensor
# Returns the product of each row of the input tensor in the given dimension dim
# if tl, br does not form a nondegenerate squre, then the corr. element in the `prod` would be 0
en = (tl_intersect < br_intersect).type(tl_intersect.type()).prod(dim=2) # shape `(N,K,2)` ---> shape `(N,K)`
area_intersect = torch.prod(br_intersect - tl_intersect, 2) * en # * ((tl < br).all())
area_union = (area_a[:, np.newaxis] + area_b - area_intersect)
iou = _true_divide(area_intersect, area_union)
if iou_type.lower() == 'iou':
return iou
if fmt.lower() == 'voc': # xmin, ymin, xmax, ymax
# top left
tl_union = torch.min(
bboxes_a[:, np.newaxis, :2],
bboxes_b[:, :2]
) # of shape `(N,K,2)`
# bottom right
br_union = torch.max(
bboxes_a[:, np.newaxis, 2:],
bboxes_b[:, 2:]
)
elif fmt.lower() == 'yolo': # xcen, ycen, w, h
# top left
tl_union = torch.min(
bboxes_a[:, np.newaxis, :2] - bboxes_a[:, np.newaxis, 2:] / 2,
bboxes_b[:, :2] - bboxes_b[:, 2:] / 2
)
# bottom right
br_union = torch.max(
bboxes_a[:, np.newaxis, :2] + bboxes_a[:, np.newaxis, 2:] / 2,
bboxes_b[:, :2] + bboxes_b[:, 2:] / 2
)
elif fmt.lower() == 'coco': # xmin, ymin, w, h
# top left
tl_union = torch.min(
bboxes_a[:, np.newaxis, :2],
bboxes_b[:, :2]
)
# bottom right
br_union = torch.max(
bboxes_a[:, np.newaxis, :2] + bboxes_a[:, np.newaxis, 2:],
bboxes_b[:, :2] + bboxes_b[:, 2:]
)
# c for covering, of shape `(N,K,2)`
# the last dim is box width, box hight
bboxes_c = br_union - tl_union
area_covering = torch.prod(bboxes_c, 2) # shape `(N,K)`
giou = iou - _true_divide(area_covering - area_union, area_covering)
if iou_type.lower() == 'giou':
return giou
if fmt.lower() == 'voc': # xmin, ymin, xmax, ymax
centre_a = (bboxes_a[..., 2 :] + bboxes_a[..., : 2]) / 2
centre_b = (bboxes_b[..., 2 :] + bboxes_b[..., : 2]) / 2
elif fmt.lower() == 'yolo': # xcen, ycen, w, h
centre_a = bboxes_a[..., : 2]
centre_b = bboxes_b[..., : 2]
elif fmt.lower() == 'coco': # xmin, ymin, w, h
centre_a = bboxes_a[..., 2 :] + bboxes_a[..., : 2]/2
centre_b = bboxes_b[..., 2 :] + bboxes_b[..., : 2]/2
centre_dist = torch.norm(centre_a[:, np.newaxis] - centre_b, p='fro', dim=2)
diag_len = torch.norm(bboxes_c, p='fro', dim=2)
diou = iou - _true_divide(centre_dist.pow(2), diag_len.pow(2))
if iou_type.lower() == 'diou':
return diou
""" the legacy custom cosine similarity:
# bb_a of shape `(N,2)`, bb_b of shape `(K,2)`
v = torch.einsum('nm,km->nk', bb_a, bb_b)
v = _true_divide(v, (torch.norm(bb_a, p='fro', dim=1)[:,np.newaxis] * torch.norm(bb_b, p='fro', dim=1)))
# avoid nan for torch.acos near \pm 1
# https://github.com/pytorch/pytorch/issues/8069
eps = 1e-7
v = torch.clamp(v, -1+eps, 1-eps)
"""
v = F.cosine_similarity(bb_a[:,np.newaxis,:], bb_b, dim=-1)
v = (_true_divide(2*torch.acos(v), np.pi)).pow(2)
with torch.no_grad():
alpha = (_true_divide(v, 1-iou+v)) * ((iou>=0.5).type(iou.type()))
ciou = diou - alpha * v
if iou_type.lower() == 'ciou':
return ciou
def bboxes_giou(bboxes_a, bboxes_b, fmt='voc'):
return bboxes_iou(bboxes_a, bboxes_b, fmt, 'giou')
def bboxes_diou(bboxes_a, bboxes_b, fmt='voc'):
return bboxes_iou(bboxes_a, bboxes_b, fmt, 'diou')
def bboxes_ciou(bboxes_a, bboxes_b, fmt='voc'):
return bboxes_iou(bboxes_a, bboxes_b, fmt, 'ciou')
This diff is collapsed.
This diff is collapsed.
ttest
*.weights
*.pth
*.onnx
*.engine
*.pyc
*.infer
*.npy
z_demo_*
__pycache__
.idea
.vscode
runs
log
*.jpg
*.json
data/outcome
# This should be run in JetPack 4.4 / JetPack 4.4 G.A. with DeepStream 5.0 / DeepStream 5.0 GA .
1. Compile the custom plugin for Yolo
2. Convert the ONNX file to TRT with TRTEXEC / TensorRT
3. Change the model-engine-file in config_infer_primary_yoloV4.txt
4. In the deepstream_app_config_yoloV4.txt, change
a) source0 : uri=file:<your file> directory.
b) primary-gie : model-engine-file=<your_onnx_engine>
# Note that for multi-batch, overhead is large owing to NMS is not used.
################################################################################
# Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.
################################################################################
# Following properties are mandatory when engine files are not specified:
# int8-calib-file(Only in INT8), model-file-format
# Caffemodel mandatory properties: model-file, proto-file, output-blob-names
# UFF: uff-file, input-dims, uff-input-blob-name, output-blob-names
# ONNX: onnx-file
#
# Mandatory properties for detectors:
# num-detected-classes
#
# Optional properties for detectors:
# cluster-mode(Default=Group Rectangles), interval(Primary mode only, Default=0)
# custom-lib-path
# parse-bbox-func-name
#
# Mandatory properties for classifiers:
# classifier-threshold, is-classifier
#
# Optional properties for classifiers:
# classifier-async-mode(Secondary mode only, Default=false)
#
# Optional properties in secondary mode:
# operate-on-gie-id(Default=0), operate-on-class-ids(Defaults to all classes),
# input-object-min-width, input-object-min-height, input-object-max-width,
# input-object-max-height
#
# Following properties are always recommended:
# batch-size(Default=1)
#
# Other optional properties:
# net-scale-factor(Default=1), network-mode(Default=0 i.e FP32),
# model-color-format(Default=0 i.e. RGB) model-engine-file, labelfile-path,
# mean-file, gie-unique-id(Default=0), offsets, process-mode (Default=1 i.e. primary),
# custom-lib-path, network-mode(Default=0 i.e FP32)
#
# The values in the config file are overridden by values set through GObject
# properties.
[property]
gpu-id=0
net-scale-factor=0.0039215697906911373
#0=RGB, 1=BGR
model-color-format=0
model-engine-file=<onnx_engine_file>
labelfile-path=labels.txt
## 0=FP32, 1=INT8, 2=FP16 mode
network-mode=2
num-detected-classes=80
gie-unique-id=1
network-type=0
is-classifier=0
## 0=Group Rectangles, 1=DBSCAN, 2=NMS, 3= DBSCAN+NMS Hybrid, 4 = None(No clustering)
cluster-mode=2
maintain-aspect-ratio=1
parse-bbox-func-name=NvDsInferParseCustomYoloV4
custom-lib-path=nvdsinfer_custom_impl_Yolo/libnvdsinfer_custom_impl_Yolo.so
engine-create-func-name=NvDsInferYoloCudaEngineGet
#scaling-filter=0
#scaling-compute-hw=0
#output-blob-names=2012
[class-attrs-all]
nms-iou-threshold=0.2
pre-cluster-threshold=0.4
################################################################################
# Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.
################################################################################
[application]
enable-perf-measurement=1
perf-measurement-interval-sec=5
#gie-kitti-output-dir=streamscl
[tiled-display]
enable=0
rows=1
columns=1
width=1280
height=720
gpu-id=0
#(0): nvbuf-mem-default - Default memory allocated, specific to particular platform
#(1): nvbuf-mem-cuda-pinned - Allocate Pinned/Host cuda memory, applicable for Tesla
#(2): nvbuf-mem-cuda-device - Allocate Device cuda memory, applicable for Tesla
#(3): nvbuf-mem-cuda-unified - Allocate Unified cuda memory, applicable for Tesla
#(4): nvbuf-mem-surface-array - Allocate Surface Array memory, applicable for Jetson
nvbuf-memory-type=0
[source0]
enable=1
#Type - 1=CameraV4L2 2=URI 3=MultiURI
type=3
uri=file:<Your_file_source>
num-sources=1
gpu-id=0
# (0): memtype_device - Memory type Device
# (1): memtype_pinned - Memory type Host Pinned
# (2): memtype_unified - Memory type Unified
cudadec-memtype=0
[sink0]
enable=1
#Type - 1=FakeSink 2=EglSink 3=File
type=2
sync=1
source-id=0
gpu-id=0
[osd]
enable=1
gpu-id=0
border-width=1
text-size=12
text-color=1;1;1;1;
text-bg-color=0.3;0.3;0.3;1
font=Serif
show-clock=0
clock-x-offset=800
clock-y-offset=820
clock-text-size=12
clock-color=1;0;0;0
nvbuf-memory-type=0
[streammux]
gpu-id=0
##Boolean property to inform muxer that sources are live
live-source=0
batch-size=1
##time out in usec, to wait after the first buffer is available
##to push the batch even if the complete batch is not formed
batched-push-timeout=40000
## Set muxer output width and height
width=1280
height=720
##Enable to maintain aspect ratio wrt source, and allow black borders, works
##along with width, height properties
enable-padding=0
nvbuf-memory-type=0
# config-file property is mandatory for any gie section.
# Other properties are optional and if set will override the properties set in
# the infer config file.
[primary-gie]
enable=1
gpu-id=0
model-engine-file=<onnx_engine_file>
labelfile-path=labels.txt
#batch-size=1
#Required by the app for OSD, not a plugin property
bbox-border-color0=1;0;0;1
bbox-border-color1=0;1;1;1
bbox-border-color2=0;0;1;1
bbox-border-color3=0;1;0;1
interval=0
gie-unique-id=1
nvbuf-memory-type=0
config-file=config_infer_primary_yoloV4.txt
[sink1]
enable=1
type=3
#1=mp4 2=mkv
container=1
#1=h264 2=h265 3=mpeg4
codec=1
#encoder type 0=Hardware 1=Software
enc-type=0
sync=0
bitrate=4000000
#H264 Profile - 0=Baseline 2=Main 4=High
#H265 Profile - 0=Main 1=Main10
profile=0
output-file=fp16_clip1_cam1.mp4
source-id=0
[tracker]
enable=1
# For the case of NvDCF tracker, tracker-width and tracker-height must be a multiple of 32, respectively
tracker-width=608
tracker-height=608
#ll-lib-file=/opt/nvidia/deepstream/deepstream-5.0/lib/libnvds_mot_iou.so
#ll-lib-file=/opt/nvidia/deepstream/deepstream-5.0/lib/libnvds_nvdcf.so
ll-lib-file=/opt/nvidia/deepstream/deepstream-5.0/lib/libnvds_mot_klt.so
#ll-config-file required for IOU only
#ll-config-file=iou_config.txt
gpu-id=0
[tests]
file-loop=0
person
bicycle
car
motorcycle
airplane
bus
train
truck
boat
traffic light
fire hydrant
stop sign
parking meter
bench
bird
cat
dog
horse
sheep
cow
elephant
bear
zebra
giraffe
backpack
umbrella
handbag
tie
suitcase
frisbee
skis
snowboard
sports ball
kite
baseball bat
baseball glove
skateboard
surfboard
tennis racket
bottle
wine glass
cup
fork
knife
spoon
bowl
banana
apple
sandwich
orange
broccoli
carrot
hot dog
pizza
donut
cake
chair
couch
potted plant
bed
dining table
toilet
tv
laptop
mouse
remote
keyboard
cell phone
microwave
oven
toaster
sink
refrigerator
book
clock
vase
scissors
teddy bear
hair drier
toothbrush
################################################################################
# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.
################################################################################
CUDA_VER?=
ifeq ($(CUDA_VER),)
$(error "CUDA_VER is not set")
endif
CC:= g++
NVCC:=/usr/local/cuda-$(CUDA_VER)/bin/nvcc
CFLAGS:= -Wall -std=c++11 -shared -fPIC -Wno-error=deprecated-declarations
CFLAGS+= -I../../includes -I/usr/local/cuda-$(CUDA_VER)/include
LIBS:= -lnvinfer_plugin -lnvinfer -lnvparsers -L/usr/local/cuda-$(CUDA_VER)/lib64 -lcudart -lcublas -lstdc++fs
LFLAGS:= -shared -Wl,--start-group $(LIBS) -Wl,--end-group
INCS:= $(wildcard *.h)
SRCFILES:= nvdsinfer_yolo_engine.cpp \
nvdsparsebbox_Yolo.cpp \
yoloPlugins.cpp \
trt_utils.cpp \
yolo.cpp \
kernels.cu
TARGET_LIB:= libnvdsinfer_custom_impl_Yolo.so
TARGET_OBJS:= $(SRCFILES:.cpp=.o)
TARGET_OBJS:= $(TARGET_OBJS:.cu=.o)
all: $(TARGET_LIB)
%.o: %.cpp $(INCS) Makefile
$(CC) -c -o $@ $(CFLAGS) $<
%.o: %.cu $(INCS) Makefile
$(NVCC) -c -o $@ --compiler-options '-fPIC' $<
$(TARGET_LIB) : $(TARGET_OBJS)
$(CC) -o $@ $(TARGET_OBJS) $(LFLAGS)
clean:
rm -rf $(TARGET_LIB)
/*
* Copyright (c) 2018-2019 NVIDIA Corporation. All rights reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property
* and proprietary rights in and to this software, related documentation
* and any modifications thereto. Any use, reproduction, disclosure or
* distribution of this software and related documentation without an express
* license agreement from NVIDIA Corporation is strictly prohibited.
*
*/
#include <cuda.h>
#include <cuda_runtime.h>
#include <stdint.h>
#include <stdio.h>
#include <string.h>
inline __device__ float sigmoidGPU(const float& x) { return 1.0f / (1.0f + __expf(-x)); }
__global__ void gpuYoloLayerV3(const float* input, float* output, const uint gridSize, const uint numOutputClasses,
const uint numBBoxes)
{
uint x_id = blockIdx.x * blockDim.x + threadIdx.x;
uint y_id = blockIdx.y * blockDim.y + threadIdx.y;
uint z_id = blockIdx.z * blockDim.z + threadIdx.z;
if ((x_id >= gridSize) || (y_id >= gridSize) || (z_id >= numBBoxes))
{
return;
}
const int numGridCells = gridSize * gridSize;
const int bbindex = y_id * gridSize + x_id;
output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 0)]
= sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 0)]);
output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 1)]
= sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 1)]);
output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 2)]
= __expf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 2)]);
output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 3)]
= __expf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 3)]);
output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 4)]
= sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 4)]);
for (uint i = 0; i < numOutputClasses; ++i)
{
output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + (5 + i))]
= sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + (5 + i))]);
}
}
cudaError_t cudaYoloLayerV3(const void* input, void* output, const uint& batchSize, const uint& gridSize,
const uint& numOutputClasses, const uint& numBBoxes,
uint64_t outputSize, cudaStream_t stream);
cudaError_t cudaYoloLayerV3(const void* input, void* output, const uint& batchSize, const uint& gridSize,
const uint& numOutputClasses, const uint& numBBoxes,
uint64_t outputSize, cudaStream_t stream)
{
dim3 threads_per_block(16, 16, 4);
dim3 number_of_blocks((gridSize / threads_per_block.x) + 1,
(gridSize / threads_per_block.y) + 1,
(numBBoxes / threads_per_block.z) + 1);
for (unsigned int batch = 0; batch < batchSize; ++batch)
{
gpuYoloLayerV3<<<number_of_blocks, threads_per_block, 0, stream>>>(
reinterpret_cast<const float*>(input) + (batch * outputSize),
reinterpret_cast<float*>(output) + (batch * outputSize), gridSize, numOutputClasses,
numBBoxes);
}
return cudaGetLastError();
}
/*
* Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include "nvdsinfer_custom_impl.h"
#include "nvdsinfer_context.h"
#include "yoloPlugins.h"
#include "yolo.h"
#include <algorithm>
#define USE_CUDA_ENGINE_GET_API 1
static bool getYoloNetworkInfo (NetworkInfo &networkInfo, const NvDsInferContextInitParams* initParams)
{
std::string yoloCfg = initParams->customNetworkConfigFilePath;
std::string yoloType;
std::transform (yoloCfg.begin(), yoloCfg.end(), yoloCfg.begin(), [] (uint8_t c) {
return std::tolower (c);});
if (yoloCfg.find("yolov2") != std::string::npos) {
if (yoloCfg.find("yolov2-tiny") != std::string::npos)
yoloType = "yolov2-tiny";
else
yoloType = "yolov2";
} else if (yoloCfg.find("yolov3") != std::string::npos) {
if (yoloCfg.find("yolov3-tiny") != std::string::npos)
yoloType = "yolov3-tiny";
else
yoloType = "yolov3";
} else {
std::cerr << "Yolo type is not defined from config file name:"
<< yoloCfg << std::endl;
return false;
}
networkInfo.networkType = yoloType;
networkInfo.configFilePath = initParams->customNetworkConfigFilePath;
networkInfo.wtsFilePath = initParams->modelFilePath;
networkInfo.deviceType = (initParams->useDLA ? "kDLA" : "kGPU");
networkInfo.inputBlobName = "data";
if (networkInfo.configFilePath.empty() ||
networkInfo.wtsFilePath.empty()) {
std::cerr << "Yolo config file or weights file is NOT specified."
<< std::endl;
return false;
}
if (!fileExists(networkInfo.configFilePath) ||
!fileExists(networkInfo.wtsFilePath)) {
std::cerr << "Yolo config file or weights file is NOT exist."
<< std::endl;
return false;
}
return true;
}
#if !USE_CUDA_ENGINE_GET_API
IModelParser* NvDsInferCreateModelParser(
const NvDsInferContextInitParams* initParams) {
NetworkInfo networkInfo;
if (!getYoloNetworkInfo(networkInfo, initParams)) {
return nullptr;
}
return new Yolo(networkInfo);
}
#else
extern "C"
bool NvDsInferYoloCudaEngineGet(nvinfer1::IBuilder * const builder,
const NvDsInferContextInitParams * const initParams,
nvinfer1::DataType dataType,
nvinfer1::ICudaEngine *& cudaEngine);
extern "C"
bool NvDsInferYoloCudaEngineGet(nvinfer1::IBuilder * const builder,
const NvDsInferContextInitParams * const initParams,
nvinfer1::DataType dataType,
nvinfer1::ICudaEngine *& cudaEngine)
{
NetworkInfo networkInfo;
if (!getYoloNetworkInfo(networkInfo, initParams)) {
return false;
}
Yolo yolo(networkInfo);
cudaEngine = yolo.createEngine (builder);
if (cudaEngine == nullptr)
{
std::cerr << "Failed to build cuda engine on "
<< networkInfo.configFilePath << std::endl;
return false;
}
return true;
}
#endif
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
aeroplane
bicycle
bird
boat
bottle
bus
car
cat
chair
cow
diningtable
dog
horse
motorbike
person
pottedplant
sheep
sofa
train
tvmonitor
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
numpy==1.18.2
torch==1.4.0
tensorboardX==2.0
scikit_image==0.16.2
matplotlib==2.2.3
tqdm==4.43.0
easydict==1.9
Pillow==7.1.2
skimage
opencv_python
pycocotools
\ No newline at end of file
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment