Commit 153bbbdf authored by indigo's avatar indigo

feat(init project): init project

parent 6ef4c73c
Pipeline #112 failed with stages
ttest
*.weights
*.pth
*.onnx
*.engine
*.pyc
*.infer
*.npy
z_demo_*
__pycache__
.idea
.vscode
runs
log
*.jpg
*.json
data/outcome
# This should be run in JetPack 4.4 / JetPack 4.4 G.A. with DeepStream 5.0 / DeepStream 5.0 GA .
1. Compile the custom plugin for Yolo
2. Convert the ONNX file to TRT with TRTEXEC / TensorRT
3. Change the model-engine-file in config_infer_primary_yoloV4.txt
4. In the deepstream_app_config_yoloV4.txt, change
a) source0 : uri=file:<your file> directory.
b) primary-gie : model-engine-file=<your_onnx_engine>
# Note that for multi-batch, overhead is large owing to NMS is not used.
################################################################################
# Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.
################################################################################
# Following properties are mandatory when engine files are not specified:
# int8-calib-file(Only in INT8), model-file-format
# Caffemodel mandatory properties: model-file, proto-file, output-blob-names
# UFF: uff-file, input-dims, uff-input-blob-name, output-blob-names
# ONNX: onnx-file
#
# Mandatory properties for detectors:
# num-detected-classes
#
# Optional properties for detectors:
# cluster-mode(Default=Group Rectangles), interval(Primary mode only, Default=0)
# custom-lib-path
# parse-bbox-func-name
#
# Mandatory properties for classifiers:
# classifier-threshold, is-classifier
#
# Optional properties for classifiers:
# classifier-async-mode(Secondary mode only, Default=false)
#
# Optional properties in secondary mode:
# operate-on-gie-id(Default=0), operate-on-class-ids(Defaults to all classes),
# input-object-min-width, input-object-min-height, input-object-max-width,
# input-object-max-height
#
# Following properties are always recommended:
# batch-size(Default=1)
#
# Other optional properties:
# net-scale-factor(Default=1), network-mode(Default=0 i.e FP32),
# model-color-format(Default=0 i.e. RGB) model-engine-file, labelfile-path,
# mean-file, gie-unique-id(Default=0), offsets, process-mode (Default=1 i.e. primary),
# custom-lib-path, network-mode(Default=0 i.e FP32)
#
# The values in the config file are overridden by values set through GObject
# properties.
[property]
gpu-id=0
net-scale-factor=0.0039215697906911373
#0=RGB, 1=BGR
model-color-format=0
model-engine-file=<onnx_engine_file>
labelfile-path=labels.txt
## 0=FP32, 1=INT8, 2=FP16 mode
network-mode=2
num-detected-classes=80
gie-unique-id=1
network-type=0
is-classifier=0
## 0=Group Rectangles, 1=DBSCAN, 2=NMS, 3= DBSCAN+NMS Hybrid, 4 = None(No clustering)
cluster-mode=2
maintain-aspect-ratio=1
parse-bbox-func-name=NvDsInferParseCustomYoloV4
custom-lib-path=nvdsinfer_custom_impl_Yolo/libnvdsinfer_custom_impl_Yolo.so
engine-create-func-name=NvDsInferYoloCudaEngineGet
#scaling-filter=0
#scaling-compute-hw=0
#output-blob-names=2012
[class-attrs-all]
nms-iou-threshold=0.2
pre-cluster-threshold=0.4
################################################################################
# Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.
################################################################################
[application]
enable-perf-measurement=1
perf-measurement-interval-sec=5
#gie-kitti-output-dir=streamscl
[tiled-display]
enable=0
rows=1
columns=1
width=1280
height=720
gpu-id=0
#(0): nvbuf-mem-default - Default memory allocated, specific to particular platform
#(1): nvbuf-mem-cuda-pinned - Allocate Pinned/Host cuda memory, applicable for Tesla
#(2): nvbuf-mem-cuda-device - Allocate Device cuda memory, applicable for Tesla
#(3): nvbuf-mem-cuda-unified - Allocate Unified cuda memory, applicable for Tesla
#(4): nvbuf-mem-surface-array - Allocate Surface Array memory, applicable for Jetson
nvbuf-memory-type=0
[source0]
enable=1
#Type - 1=CameraV4L2 2=URI 3=MultiURI
type=3
uri=file:<Your_file_source>
num-sources=1
gpu-id=0
# (0): memtype_device - Memory type Device
# (1): memtype_pinned - Memory type Host Pinned
# (2): memtype_unified - Memory type Unified
cudadec-memtype=0
[sink0]
enable=1
#Type - 1=FakeSink 2=EglSink 3=File
type=2
sync=1
source-id=0
gpu-id=0
[osd]
enable=1
gpu-id=0
border-width=1
text-size=12
text-color=1;1;1;1;
text-bg-color=0.3;0.3;0.3;1
font=Serif
show-clock=0
clock-x-offset=800
clock-y-offset=820
clock-text-size=12
clock-color=1;0;0;0
nvbuf-memory-type=0
[streammux]
gpu-id=0
##Boolean property to inform muxer that sources are live
live-source=0
batch-size=1
##time out in usec, to wait after the first buffer is available
##to push the batch even if the complete batch is not formed
batched-push-timeout=40000
## Set muxer output width and height
width=1280
height=720
##Enable to maintain aspect ratio wrt source, and allow black borders, works
##along with width, height properties
enable-padding=0
nvbuf-memory-type=0
# config-file property is mandatory for any gie section.
# Other properties are optional and if set will override the properties set in
# the infer config file.
[primary-gie]
enable=1
gpu-id=0
model-engine-file=<onnx_engine_file>
labelfile-path=labels.txt
#batch-size=1
#Required by the app for OSD, not a plugin property
bbox-border-color0=1;0;0;1
bbox-border-color1=0;1;1;1
bbox-border-color2=0;0;1;1
bbox-border-color3=0;1;0;1
interval=0
gie-unique-id=1
nvbuf-memory-type=0
config-file=config_infer_primary_yoloV4.txt
[sink1]
enable=1
type=3
#1=mp4 2=mkv
container=1
#1=h264 2=h265 3=mpeg4
codec=1
#encoder type 0=Hardware 1=Software
enc-type=0
sync=0
bitrate=4000000
#H264 Profile - 0=Baseline 2=Main 4=High
#H265 Profile - 0=Main 1=Main10
profile=0
output-file=fp16_clip1_cam1.mp4
source-id=0
[tracker]
enable=1
# For the case of NvDCF tracker, tracker-width and tracker-height must be a multiple of 32, respectively
tracker-width=608
tracker-height=608
#ll-lib-file=/opt/nvidia/deepstream/deepstream-5.0/lib/libnvds_mot_iou.so
#ll-lib-file=/opt/nvidia/deepstream/deepstream-5.0/lib/libnvds_nvdcf.so
ll-lib-file=/opt/nvidia/deepstream/deepstream-5.0/lib/libnvds_mot_klt.so
#ll-config-file required for IOU only
#ll-config-file=iou_config.txt
gpu-id=0
[tests]
file-loop=0
person
bicycle
car
motorcycle
airplane
bus
train
truck
boat
traffic light
fire hydrant
stop sign
parking meter
bench
bird
cat
dog
horse
sheep
cow
elephant
bear
zebra
giraffe
backpack
umbrella
handbag
tie
suitcase
frisbee
skis
snowboard
sports ball
kite
baseball bat
baseball glove
skateboard
surfboard
tennis racket
bottle
wine glass
cup
fork
knife
spoon
bowl
banana
apple
sandwich
orange
broccoli
carrot
hot dog
pizza
donut
cake
chair
couch
potted plant
bed
dining table
toilet
tv
laptop
mouse
remote
keyboard
cell phone
microwave
oven
toaster
sink
refrigerator
book
clock
vase
scissors
teddy bear
hair drier
toothbrush
################################################################################
# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.
################################################################################
CUDA_VER?=
ifeq ($(CUDA_VER),)
$(error "CUDA_VER is not set")
endif
CC:= g++
NVCC:=/usr/local/cuda-$(CUDA_VER)/bin/nvcc
CFLAGS:= -Wall -std=c++11 -shared -fPIC -Wno-error=deprecated-declarations
CFLAGS+= -I../../includes -I/usr/local/cuda-$(CUDA_VER)/include
LIBS:= -lnvinfer_plugin -lnvinfer -lnvparsers -L/usr/local/cuda-$(CUDA_VER)/lib64 -lcudart -lcublas -lstdc++fs
LFLAGS:= -shared -Wl,--start-group $(LIBS) -Wl,--end-group
INCS:= $(wildcard *.h)
SRCFILES:= nvdsinfer_yolo_engine.cpp \
nvdsparsebbox_Yolo.cpp \
yoloPlugins.cpp \
trt_utils.cpp \
yolo.cpp \
kernels.cu
TARGET_LIB:= libnvdsinfer_custom_impl_Yolo.so
TARGET_OBJS:= $(SRCFILES:.cpp=.o)
TARGET_OBJS:= $(TARGET_OBJS:.cu=.o)
all: $(TARGET_LIB)
%.o: %.cpp $(INCS) Makefile
$(CC) -c -o $@ $(CFLAGS) $<
%.o: %.cu $(INCS) Makefile
$(NVCC) -c -o $@ --compiler-options '-fPIC' $<
$(TARGET_LIB) : $(TARGET_OBJS)
$(CC) -o $@ $(TARGET_OBJS) $(LFLAGS)
clean:
rm -rf $(TARGET_LIB)
/*
* Copyright (c) 2018-2019 NVIDIA Corporation. All rights reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property
* and proprietary rights in and to this software, related documentation
* and any modifications thereto. Any use, reproduction, disclosure or
* distribution of this software and related documentation without an express
* license agreement from NVIDIA Corporation is strictly prohibited.
*
*/
#include <cuda.h>
#include <cuda_runtime.h>
#include <stdint.h>
#include <stdio.h>
#include <string.h>
inline __device__ float sigmoidGPU(const float& x) { return 1.0f / (1.0f + __expf(-x)); }
__global__ void gpuYoloLayerV3(const float* input, float* output, const uint gridSize, const uint numOutputClasses,
const uint numBBoxes)
{
uint x_id = blockIdx.x * blockDim.x + threadIdx.x;
uint y_id = blockIdx.y * blockDim.y + threadIdx.y;
uint z_id = blockIdx.z * blockDim.z + threadIdx.z;
if ((x_id >= gridSize) || (y_id >= gridSize) || (z_id >= numBBoxes))
{
return;
}
const int numGridCells = gridSize * gridSize;
const int bbindex = y_id * gridSize + x_id;
output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 0)]
= sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 0)]);
output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 1)]
= sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 1)]);
output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 2)]
= __expf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 2)]);
output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 3)]
= __expf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 3)]);
output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 4)]
= sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 4)]);
for (uint i = 0; i < numOutputClasses; ++i)
{
output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + (5 + i))]
= sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + (5 + i))]);
}
}
cudaError_t cudaYoloLayerV3(const void* input, void* output, const uint& batchSize, const uint& gridSize,
const uint& numOutputClasses, const uint& numBBoxes,
uint64_t outputSize, cudaStream_t stream);
cudaError_t cudaYoloLayerV3(const void* input, void* output, const uint& batchSize, const uint& gridSize,
const uint& numOutputClasses, const uint& numBBoxes,
uint64_t outputSize, cudaStream_t stream)
{
dim3 threads_per_block(16, 16, 4);
dim3 number_of_blocks((gridSize / threads_per_block.x) + 1,
(gridSize / threads_per_block.y) + 1,
(numBBoxes / threads_per_block.z) + 1);
for (unsigned int batch = 0; batch < batchSize; ++batch)
{
gpuYoloLayerV3<<<number_of_blocks, threads_per_block, 0, stream>>>(
reinterpret_cast<const float*>(input) + (batch * outputSize),
reinterpret_cast<float*>(output) + (batch * outputSize), gridSize, numOutputClasses,
numBBoxes);
}
return cudaGetLastError();
}
/*
* Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include "nvdsinfer_custom_impl.h"
#include "nvdsinfer_context.h"
#include "yoloPlugins.h"
#include "yolo.h"
#include <algorithm>
#define USE_CUDA_ENGINE_GET_API 1
static bool getYoloNetworkInfo (NetworkInfo &networkInfo, const NvDsInferContextInitParams* initParams)
{
std::string yoloCfg = initParams->customNetworkConfigFilePath;
std::string yoloType;
std::transform (yoloCfg.begin(), yoloCfg.end(), yoloCfg.begin(), [] (uint8_t c) {
return std::tolower (c);});
if (yoloCfg.find("yolov2") != std::string::npos) {
if (yoloCfg.find("yolov2-tiny") != std::string::npos)
yoloType = "yolov2-tiny";
else
yoloType = "yolov2";
} else if (yoloCfg.find("yolov3") != std::string::npos) {
if (yoloCfg.find("yolov3-tiny") != std::string::npos)
yoloType = "yolov3-tiny";
else
yoloType = "yolov3";
} else {
std::cerr << "Yolo type is not defined from config file name:"
<< yoloCfg << std::endl;
return false;
}
networkInfo.networkType = yoloType;
networkInfo.configFilePath = initParams->customNetworkConfigFilePath;
networkInfo.wtsFilePath = initParams->modelFilePath;
networkInfo.deviceType = (initParams->useDLA ? "kDLA" : "kGPU");
networkInfo.inputBlobName = "data";
if (networkInfo.configFilePath.empty() ||
networkInfo.wtsFilePath.empty()) {
std::cerr << "Yolo config file or weights file is NOT specified."
<< std::endl;
return false;
}
if (!fileExists(networkInfo.configFilePath) ||
!fileExists(networkInfo.wtsFilePath)) {
std::cerr << "Yolo config file or weights file is NOT exist."
<< std::endl;
return false;
}
return true;
}
#if !USE_CUDA_ENGINE_GET_API
IModelParser* NvDsInferCreateModelParser(
const NvDsInferContextInitParams* initParams) {
NetworkInfo networkInfo;
if (!getYoloNetworkInfo(networkInfo, initParams)) {
return nullptr;
}
return new Yolo(networkInfo);
}
#else
extern "C"
bool NvDsInferYoloCudaEngineGet(nvinfer1::IBuilder * const builder,
const NvDsInferContextInitParams * const initParams,
nvinfer1::DataType dataType,
nvinfer1::ICudaEngine *& cudaEngine);
extern "C"
bool NvDsInferYoloCudaEngineGet(nvinfer1::IBuilder * const builder,
const NvDsInferContextInitParams * const initParams,
nvinfer1::DataType dataType,
nvinfer1::ICudaEngine *& cudaEngine)
{
NetworkInfo networkInfo;
if (!getYoloNetworkInfo(networkInfo, initParams)) {
return false;
}
Yolo yolo(networkInfo);
cudaEngine = yolo.createEngine (builder);
if (cudaEngine == nullptr)
{
std::cerr << "Failed to build cuda engine on "
<< networkInfo.configFilePath << std::endl;
return false;
}
return true;
}
#endif
/*
* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include <algorithm>
#include <cassert>
#include <cmath>
#include <cstring>
#include <fstream>
#include <iostream>
#include <unordered_map>
#include "nvdsinfer_custom_impl.h"
#include "trt_utils.h"
static const int NUM_CLASSES_YOLO = 80;
extern "C" bool NvDsInferParseCustomYoloV3(
std::vector<NvDsInferLayerInfo> const& outputLayersInfo,
NvDsInferNetworkInfo const& networkInfo,
NvDsInferParseDetectionParams const& detectionParams,
std::vector<NvDsInferParseObjectInfo>& objectList);
extern "C" bool NvDsInferParseCustomYoloV3Tiny(
std::vector<NvDsInferLayerInfo> const& outputLayersInfo,
NvDsInferNetworkInfo const& networkInfo,
NvDsInferParseDetectionParams const& detectionParams,
std::vector<NvDsInferParseObjectInfo>& objectList);
extern "C" bool NvDsInferParseCustomYoloV2(
std::vector<NvDsInferLayerInfo> const& outputLayersInfo,
NvDsInferNetworkInfo const& networkInfo,
NvDsInferParseDetectionParams const& detectionParams,
std::vector<NvDsInferParseObjectInfo>& objectList);
extern "C" bool NvDsInferParseCustomYoloV2Tiny(
std::vector<NvDsInferLayerInfo> const& outputLayersInfo,
NvDsInferNetworkInfo const& networkInfo,
NvDsInferParseDetectionParams const& detectionParams,
std::vector<NvDsInferParseObjectInfo>& objectList);
extern "C" bool NvDsInferParseCustomYoloTLT(
std::vector<NvDsInferLayerInfo> const& outputLayersInfo,
NvDsInferNetworkInfo const& networkInfo,
NvDsInferParseDetectionParams const& detectionParams,
std::vector<NvDsInferParseObjectInfo>& objectList);
extern "C" bool NvDsInferParseCustomYoloV4(
std::vector<NvDsInferLayerInfo> const& outputLayersInfo,
NvDsInferNetworkInfo const& networkInfo,
NvDsInferParseDetectionParams const& detectionParams,
std::vector<NvDsInferParseObjectInfo>& objectList);
/* This is a sample bounding box parsing function for the sample YoloV3 detector model */
static NvDsInferParseObjectInfo convertBBox(const float& bx, const float& by, const float& bw,
const float& bh, const int& stride, const uint& netW,
const uint& netH)
{
NvDsInferParseObjectInfo b;
// Restore coordinates to network input resolution
float xCenter = bx * stride;
float yCenter = by * stride;
float x0 = xCenter - bw / 2;
float y0 = yCenter - bh / 2;
float x1 = x0 + bw;
float y1 = y0 + bh;
x0 = clamp(x0, 0, netW);
y0 = clamp(y0, 0, netH);
x1 = clamp(x1, 0, netW);
y1 = clamp(y1, 0, netH);
b.left = x0;
b.width = clamp(x1 - x0, 0, netW);
b.top = y0;
b.height = clamp(y1 - y0, 0, netH);
return b;
}
static void addBBoxProposal(const float bx, const float by, const float bw, const float bh,
const uint stride, const uint& netW, const uint& netH, const int maxIndex,
const float maxProb, std::vector<NvDsInferParseObjectInfo>& binfo)
{
NvDsInferParseObjectInfo bbi = convertBBox(bx, by, bw, bh, stride, netW, netH);
if (bbi.width < 1 || bbi.height < 1) return;
bbi.detectionConfidence = maxProb;
bbi.classId = maxIndex;
binfo.push_back(bbi);
}
static std::vector<NvDsInferParseObjectInfo>
decodeYoloV2Tensor(
const float* detections, const std::vector<float> &anchors,
const uint gridSizeW, const uint gridSizeH, const uint stride, const uint numBBoxes,
const uint numOutputClasses, const uint& netW,
const uint& netH)
{
std::vector<NvDsInferParseObjectInfo> binfo;
for (uint y = 0; y < gridSizeH; ++y) {
for (uint x = 0; x < gridSizeW; ++x) {
for (uint b = 0; b < numBBoxes; ++b)
{
const float pw = anchors[b * 2];
const float ph = anchors[b * 2 + 1];
const int numGridCells = gridSizeH * gridSizeW;
const int bbindex = y * gridSizeW + x;
const float bx
= x + detections[bbindex + numGridCells * (b * (5 + numOutputClasses) + 0)];
const float by
= y + detections[bbindex + numGridCells * (b * (5 + numOutputClasses) + 1)];
const float bw
= pw * exp (detections[bbindex + numGridCells * (b * (5 + numOutputClasses) + 2)]);
const float bh
= ph * exp (detections[bbindex + numGridCells * (b * (5 + numOutputClasses) + 3)]);
const float objectness
= detections[bbindex + numGridCells * (b * (5 + numOutputClasses) + 4)];
float maxProb = 0.0f;
int maxIndex = -1;
for (uint i = 0; i < numOutputClasses; ++i)
{
float prob
= (detections[bbindex
+ numGridCells * (b * (5 + numOutputClasses) + (5 + i))]);
if (prob > maxProb)
{
maxProb = prob;
maxIndex = i;
}
}
maxProb = objectness * maxProb;
addBBoxProposal(bx, by, bw, bh, stride, netW, netH, maxIndex, maxProb, binfo);
}
}
}
return binfo;
}
static std::vector<NvDsInferParseObjectInfo>
decodeYoloV3Tensor(
const float* detections, const std::vector<int> &mask, const std::vector<float> &anchors,
const uint gridSizeW, const uint gridSizeH, const uint stride, const uint numBBoxes,
const uint numOutputClasses, const uint& netW,
const uint& netH)
{
std::vector<NvDsInferParseObjectInfo> binfo;
for (uint y = 0; y < gridSizeH; ++y) {
for (uint x = 0; x < gridSizeW; ++x) {
for (uint b = 0; b < numBBoxes; ++b)
{
const float pw = anchors[mask[b] * 2];
const float ph = anchors[mask[b] * 2 + 1];
const int numGridCells = gridSizeH * gridSizeW;
const int bbindex = y * gridSizeW + x;
const float bx
= x + detections[bbindex + numGridCells * (b * (5 + numOutputClasses) + 0)];
const float by
= y + detections[bbindex + numGridCells * (b * (5 + numOutputClasses) + 1)];
const float bw
= pw * detections[bbindex + numGridCells * (b * (5 + numOutputClasses) + 2)];
const float bh
= ph * detections[bbindex + numGridCells * (b * (5 + numOutputClasses) + 3)];
const float objectness
= detections[bbindex + numGridCells * (b * (5 + numOutputClasses) + 4)];
float maxProb = 0.0f;
int maxIndex = -1;
for (uint i = 0; i < numOutputClasses; ++i)
{
float prob
= (detections[bbindex
+ numGridCells * (b * (5 + numOutputClasses) + (5 + i))]);
if (prob > maxProb)
{
maxProb = prob;
maxIndex = i;
}
}
maxProb = objectness * maxProb;
addBBoxProposal(bx, by, bw, bh, stride, netW, netH, maxIndex, maxProb, binfo);
}
}
}
return binfo;
}
static inline std::vector<const NvDsInferLayerInfo*>
SortLayers(const std::vector<NvDsInferLayerInfo> & outputLayersInfo)
{
std::vector<const NvDsInferLayerInfo*> outLayers;
for (auto const &layer : outputLayersInfo) {
outLayers.push_back (&layer);
}
std::sort(outLayers.begin(), outLayers.end(),
[](const NvDsInferLayerInfo* a, const NvDsInferLayerInfo* b) {
return a->inferDims.d[1] < b->inferDims.d[1];
});
return outLayers;
}
static bool NvDsInferParseYoloV3(
std::vector<NvDsInferLayerInfo> const& outputLayersInfo,
NvDsInferNetworkInfo const& networkInfo,
NvDsInferParseDetectionParams const& detectionParams,
std::vector<NvDsInferParseObjectInfo>& objectList,
const std::vector<float> &anchors,
const std::vector<std::vector<int>> &masks)
{
const uint kNUM_BBOXES = 3;
const std::vector<const NvDsInferLayerInfo*> sortedLayers =
SortLayers (outputLayersInfo);
if (sortedLayers.size() != masks.size()) {
std::cerr << "ERROR: yoloV3 output layer.size: " << sortedLayers.size()
<< " does not match mask.size: " << masks.size() << std::endl;
return false;
}
if (NUM_CLASSES_YOLO != detectionParams.numClassesConfigured)
{
std::cerr << "WARNING: Num classes mismatch. Configured:"
<< detectionParams.numClassesConfigured
<< ", detected by network: " << NUM_CLASSES_YOLO << std::endl;
}
std::vector<NvDsInferParseObjectInfo> objects;
for (uint idx = 0; idx < masks.size(); ++idx) {
const NvDsInferLayerInfo &layer = *sortedLayers[idx]; // 255 x Grid x Grid
assert(layer.inferDims.numDims == 3);
const uint gridSizeH = layer.inferDims.d[1];
const uint gridSizeW = layer.inferDims.d[2];
const uint stride = DIVUP(networkInfo.width, gridSizeW);
assert(stride == DIVUP(networkInfo.height, gridSizeH));
std::vector<NvDsInferParseObjectInfo> outObjs =
decodeYoloV3Tensor((const float*)(layer.buffer), masks[idx], anchors, gridSizeW, gridSizeH, stride, kNUM_BBOXES,
NUM_CLASSES_YOLO, networkInfo.width, networkInfo.height);
objects.insert(objects.end(), outObjs.begin(), outObjs.end());
}
objectList = objects;
return true;
}
static NvDsInferParseObjectInfo convertBBoxYoloV4(const float& bx1, const float& by1, const float& bx2,
const float& by2, const uint& netW, const uint& netH)
{
NvDsInferParseObjectInfo b;
// Restore coordinates to network input resolution
float x1 = bx1 * netW;
float y1 = by1 * netH;
float x2 = bx2 * netW;
float y2 = by2 * netH;
x1 = clamp(x1, 0, netW);
y1 = clamp(y1, 0, netH);
x2 = clamp(x2, 0, netW);
y2 = clamp(y2, 0, netH);
b.left = x1;
b.width = clamp(x2 - x1, 0, netW);
b.top = y1;
b.height = clamp(y2 - y1, 0, netH);
return b;
}
static void addBBoxProposalYoloV4(const float bx, const float by, const float bw, const float bh,
const uint& netW, const uint& netH, const int maxIndex,
const float maxProb, std::vector<NvDsInferParseObjectInfo>& binfo)
{
NvDsInferParseObjectInfo bbi = convertBBoxYoloV4(bx, by, bw, bh, netW, netH);
if (bbi.width < 1 || bbi.height < 1) return;
bbi.detectionConfidence = maxProb;
bbi.classId = maxIndex;
binfo.push_back(bbi);
}
static std::vector<NvDsInferParseObjectInfo>
decodeYoloV4Tensor(
const float* boxes, const float* scores,
const uint num_bboxes, NvDsInferParseDetectionParams const& detectionParams,
const uint& netW, const uint& netH)
{
std::vector<NvDsInferParseObjectInfo> binfo;
uint bbox_location = 0;
uint score_location = 0;
for (uint b = 0; b < num_bboxes; ++b)
{
float bx1 = boxes[bbox_location];
float by1 = boxes[bbox_location + 1];
float bx2 = boxes[bbox_location + 2];
float by2 = boxes[bbox_location + 3];
float maxProb = 0.0f;
int maxIndex = -1;
for (uint c = 0; c < detectionParams.numClassesConfigured; ++c)
{
float prob = scores[score_location + c];
if (prob > maxProb)
{
maxProb = prob;
maxIndex = c;
}
}
if (maxProb > detectionParams.perClassPreclusterThreshold[maxIndex])
{
addBBoxProposalYoloV4(bx1, by1, bx2, by2, netW, netH, maxIndex, maxProb, binfo);
}
bbox_location += 4;
score_location += detectionParams.numClassesConfigured;
}
return binfo;
}
/* C-linkage to prevent name-mangling */
static bool NvDsInferParseYoloV4(
std::vector<NvDsInferLayerInfo> const& outputLayersInfo,
NvDsInferNetworkInfo const& networkInfo,
NvDsInferParseDetectionParams const& detectionParams,
std::vector<NvDsInferParseObjectInfo>& objectList)
{
if (NUM_CLASSES_YOLO != detectionParams.numClassesConfigured)
{
std::cerr << "WARNING: Num classes mismatch. Configured:"
<< detectionParams.numClassesConfigured
<< ", detected by network: " << NUM_CLASSES_YOLO << std::endl;
}
std::vector<NvDsInferParseObjectInfo> objects;
const NvDsInferLayerInfo &boxes = outputLayersInfo[0]; // num_boxes x 4
const NvDsInferLayerInfo &scores = outputLayersInfo[1]; // num_boxes x num_classes
const NvDsInferLayerInfo &subbox = outputLayersInfo[2];
//* printf("%d\n", subbox.inferDims.numDims);
// 3 dimensional: [num_boxes, 1, 4]
assert(boxes.inferDims.numDims == 3);
// 2 dimensional: [num_boxes, num_classes]
assert(scores.inferDims.numDims == 2);
// The second dimension should be num_classes
assert(detectionParams.numClassesConfigured == scores.inferDims.d[1]);
uint num_bboxes = boxes.inferDims.d[0];
// std::cout << "Network Info: " << networkInfo.height << " " << networkInfo.width << std::endl;
std::vector<NvDsInferParseObjectInfo> outObjs =
decodeYoloV4Tensor(
(const float*)(boxes.buffer), (const float*)(scores.buffer), num_bboxes, detectionParams,
networkInfo.width, networkInfo.height);
objects.insert(objects.end(), outObjs.begin(), outObjs.end());
objectList = objects;
return true;
}
extern "C" bool NvDsInferParseCustomYoloV4(
std::vector<NvDsInferLayerInfo> const& outputLayersInfo,
NvDsInferNetworkInfo const& networkInfo,
NvDsInferParseDetectionParams const& detectionParams,
std::vector<NvDsInferParseObjectInfo>& objectList)
{
return NvDsInferParseYoloV4 (
outputLayersInfo, networkInfo, detectionParams, objectList);
}
extern "C" bool NvDsInferParseCustomYoloV3(
std::vector<NvDsInferLayerInfo> const& outputLayersInfo,
NvDsInferNetworkInfo const& networkInfo,
NvDsInferParseDetectionParams const& detectionParams,
std::vector<NvDsInferParseObjectInfo>& objectList)
{
static const std::vector<float> kANCHORS = {
10.0, 13.0, 16.0, 30.0, 33.0, 23.0, 30.0, 61.0, 62.0,
45.0, 59.0, 119.0, 116.0, 90.0, 156.0, 198.0, 373.0, 326.0};
static const std::vector<std::vector<int>> kMASKS = {
{6, 7, 8},
{3, 4, 5},
{0, 1, 2}};
return NvDsInferParseYoloV3 (
outputLayersInfo, networkInfo, detectionParams, objectList,
kANCHORS, kMASKS);
}
extern "C" bool NvDsInferParseCustomYoloV3Tiny(
std::vector<NvDsInferLayerInfo> const& outputLayersInfo,
NvDsInferNetworkInfo const& networkInfo,
NvDsInferParseDetectionParams const& detectionParams,
std::vector<NvDsInferParseObjectInfo>& objectList)
{
static const std::vector<float> kANCHORS = {
10, 14, 23, 27, 37, 58, 81, 82, 135, 169, 344, 319};
static const std::vector<std::vector<int>> kMASKS = {
{3, 4, 5},
//{0, 1, 2}}; // as per output result, select {1,2,3}
{1, 2, 3}};
return NvDsInferParseYoloV3 (
outputLayersInfo, networkInfo, detectionParams, objectList,
kANCHORS, kMASKS);
}
static bool NvDsInferParseYoloV2(
std::vector<NvDsInferLayerInfo> const& outputLayersInfo,
NvDsInferNetworkInfo const& networkInfo,
NvDsInferParseDetectionParams const& detectionParams,
std::vector<NvDsInferParseObjectInfo>& objectList)
{
// copy anchor data from yolov2.cfg file
std::vector<float> anchors = {0.57273, 0.677385, 1.87446, 2.06253, 3.33843,
5.47434, 7.88282, 3.52778, 9.77052, 9.16828};
const uint kNUM_BBOXES = 5;
if (outputLayersInfo.empty()) {
std::cerr << "Could not find output layer in bbox parsing" << std::endl;;
return false;
}
const NvDsInferLayerInfo &layer = outputLayersInfo[0];
if (NUM_CLASSES_YOLO != detectionParams.numClassesConfigured)
{
std::cerr << "WARNING: Num classes mismatch. Configured:"
<< detectionParams.numClassesConfigured
<< ", detected by network: " << NUM_CLASSES_YOLO << std::endl;
}
assert(layer.inferDims.numDims == 3);
const uint gridSizeH = layer.inferDims.d[1];
const uint gridSizeW = layer.inferDims.d[2];
const uint stride = DIVUP(networkInfo.width, gridSizeW);
assert(stride == DIVUP(networkInfo.height, gridSizeH));
for (auto& anchor : anchors) {
anchor *= stride;
}
std::vector<NvDsInferParseObjectInfo> objects =
decodeYoloV2Tensor((const float*)(layer.buffer), anchors, gridSizeW, gridSizeH, stride, kNUM_BBOXES,
NUM_CLASSES_YOLO, networkInfo.width, networkInfo.height);
objectList = objects;
return true;
}
extern "C" bool NvDsInferParseCustomYoloV2(
std::vector<NvDsInferLayerInfo> const& outputLayersInfo,
NvDsInferNetworkInfo const& networkInfo,
NvDsInferParseDetectionParams const& detectionParams,
std::vector<NvDsInferParseObjectInfo>& objectList)
{
return NvDsInferParseYoloV2 (
outputLayersInfo, networkInfo, detectionParams, objectList);
}
extern "C" bool NvDsInferParseCustomYoloV2Tiny(
std::vector<NvDsInferLayerInfo> const& outputLayersInfo,
NvDsInferNetworkInfo const& networkInfo,
NvDsInferParseDetectionParams const& detectionParams,
std::vector<NvDsInferParseObjectInfo>& objectList)
{
return NvDsInferParseYoloV2 (
outputLayersInfo, networkInfo, detectionParams, objectList);
}
extern "C" bool NvDsInferParseCustomYoloTLT(
std::vector<NvDsInferLayerInfo> const& outputLayersInfo,
NvDsInferNetworkInfo const& networkInfo,
NvDsInferParseDetectionParams const& detectionParams,
std::vector<NvDsInferParseObjectInfo>& objectList)
{
if(outputLayersInfo.size() != 4)
{
std::cerr << "Mismatch in the number of output buffers."
<< "Expected 4 output buffers, detected in the network :"
<< outputLayersInfo.size() << std::endl;
return false;
}
const int topK = 200;
const int* keepCount = static_cast <const int*>(outputLayersInfo.at(0).buffer);
const float* boxes = static_cast <const float*>(outputLayersInfo.at(1).buffer);
const float* scores = static_cast <const float*>(outputLayersInfo.at(2).buffer);
const float* cls = static_cast <const float*>(outputLayersInfo.at(3).buffer);
for (int i = 0; (i < keepCount[0]) && (objectList.size() <= topK); ++i)
{
const float* loc = &boxes[0] + (i * 4);
const float* conf = &scores[0] + i;
const float* cls_id = &cls[0] + i;
if(conf[0] > 1.001)
continue;
if((loc[0] < 0) || (loc[1] < 0) || (loc[2] < 0) || (loc[3] < 0))
continue;
if((loc[0] > networkInfo.width) || (loc[2] > networkInfo.width) || (loc[1] > networkInfo.height) || (loc[3] > networkInfo.width))
continue;
if((loc[2] < loc[0]) || (loc[3] < loc[1]))
continue;
if(((loc[3] - loc[1]) > networkInfo.height) || ((loc[2]-loc[0]) > networkInfo.width))
continue;
NvDsInferParseObjectInfo curObj{static_cast<unsigned int>(cls_id[0]),
loc[0],loc[1],(loc[2]-loc[0]),
(loc[3]-loc[1]), conf[0]};
objectList.push_back(curObj);
}
return true;
}
/* Check that the custom function has been defined correctly */
CHECK_CUSTOM_PARSE_FUNC_PROTOTYPE(NvDsInferParseCustomYoloV4);
CHECK_CUSTOM_PARSE_FUNC_PROTOTYPE(NvDsInferParseCustomYoloV3);
CHECK_CUSTOM_PARSE_FUNC_PROTOTYPE(NvDsInferParseCustomYoloV3Tiny);
CHECK_CUSTOM_PARSE_FUNC_PROTOTYPE(NvDsInferParseCustomYoloV2);
CHECK_CUSTOM_PARSE_FUNC_PROTOTYPE(NvDsInferParseCustomYoloV2Tiny);
CHECK_CUSTOM_PARSE_FUNC_PROTOTYPE(NvDsInferParseCustomYoloTLT);
/*
* Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include "trt_utils.h"
#include <experimental/filesystem>
#include <fstream>
#include <iomanip>
#include <functional>
#include <algorithm>
#include <math.h>
#include "NvInferPlugin.h"
static void leftTrim(std::string& s)
{
s.erase(s.begin(), std::find_if(s.begin(), s.end(), [](int ch) { return !isspace(ch); }));
}
static void rightTrim(std::string& s)
{
s.erase(std::find_if(s.rbegin(), s.rend(), [](int ch) { return !isspace(ch); }).base(), s.end());
}
std::string trim(std::string s)
{
leftTrim(s);
rightTrim(s);
return s;
}
float clamp(const float val, const float minVal, const float maxVal)
{
assert(minVal <= maxVal);
return std::min(maxVal, std::max(minVal, val));
}
bool fileExists(const std::string fileName, bool verbose)
{
if (!std::experimental::filesystem::exists(std::experimental::filesystem::path(fileName)))
{
if (verbose) std::cout << "File does not exist : " << fileName << std::endl;
return false;
}
return true;
}
std::vector<float> loadWeights(const std::string weightsFilePath, const std::string& networkType)
{
assert(fileExists(weightsFilePath));
std::cout << "Loading pre-trained weights..." << std::endl;
std::ifstream file(weightsFilePath, std::ios_base::binary);
assert(file.good());
std::string line;
if (networkType == "yolov2")
{
// Remove 4 int32 bytes of data from the stream belonging to the header
file.ignore(4 * 4);
}
else if ((networkType == "yolov3") || (networkType == "yolov3-tiny")
|| (networkType == "yolov2-tiny"))
{
// Remove 5 int32 bytes of data from the stream belonging to the header
file.ignore(4 * 5);
}
else
{
std::cout << "Invalid network type" << std::endl;
assert(0);
}
std::vector<float> weights;
char floatWeight[4];
while (!file.eof())
{
file.read(floatWeight, 4);
assert(file.gcount() == 4);
weights.push_back(*reinterpret_cast<float*>(floatWeight));
if (file.peek() == std::istream::traits_type::eof()) break;
}
std::cout << "Loading weights of " << networkType << " complete!"
<< std::endl;
std::cout << "Total Number of weights read : " << weights.size() << std::endl;
return weights;
}
std::string dimsToString(const nvinfer1::Dims d)
{
std::stringstream s;
assert(d.nbDims >= 1);
for (int i = 0; i < d.nbDims - 1; ++i)
{
s << std::setw(4) << d.d[i] << " x";
}
s << std::setw(4) << d.d[d.nbDims - 1];
return s.str();
}
void displayDimType(const nvinfer1::Dims d)
{
std::cout << "(" << d.nbDims << ") ";
for (int i = 0; i < d.nbDims; ++i)
{
switch (d.type[i])
{
case nvinfer1::DimensionType::kSPATIAL: std::cout << "kSPATIAL "; break;
case nvinfer1::DimensionType::kCHANNEL: std::cout << "kCHANNEL "; break;
case nvinfer1::DimensionType::kINDEX: std::cout << "kINDEX "; break;
case nvinfer1::DimensionType::kSEQUENCE: std::cout << "kSEQUENCE "; break;
}
}
std::cout << std::endl;
}
int getNumChannels(nvinfer1::ITensor* t)
{
nvinfer1::Dims d = t->getDimensions();
assert(d.nbDims == 3);
return d.d[0];
}
uint64_t get3DTensorVolume(nvinfer1::Dims inputDims)
{
assert(inputDims.nbDims == 3);
return inputDims.d[0] * inputDims.d[1] * inputDims.d[2];
}
nvinfer1::ILayer* netAddMaxpool(int layerIdx, std::map<std::string, std::string>& block,
nvinfer1::ITensor* input, nvinfer1::INetworkDefinition* network)
{
assert(block.at("type") == "maxpool");
assert(block.find("size") != block.end());
assert(block.find("stride") != block.end());
int size = std::stoi(block.at("size"));
int stride = std::stoi(block.at("stride"));
nvinfer1::IPoolingLayer* pool
= network->addPooling(*input, nvinfer1::PoolingType::kMAX, nvinfer1::DimsHW{size, size});
assert(pool);
std::string maxpoolLayerName = "maxpool_" + std::to_string(layerIdx);
pool->setStride(nvinfer1::DimsHW{stride, stride});
pool->setPaddingMode(nvinfer1::PaddingMode::kSAME_UPPER);
pool->setName(maxpoolLayerName.c_str());
return pool;
}
nvinfer1::ILayer* netAddConvLinear(int layerIdx, std::map<std::string, std::string>& block,
std::vector<float>& weights,
std::vector<nvinfer1::Weights>& trtWeights, int& weightPtr,
int& inputChannels, nvinfer1::ITensor* input,
nvinfer1::INetworkDefinition* network)
{
assert(block.at("type") == "convolutional");
assert(block.find("batch_normalize") == block.end());
assert(block.at("activation") == "linear");
assert(block.find("filters") != block.end());
assert(block.find("pad") != block.end());
assert(block.find("size") != block.end());
assert(block.find("stride") != block.end());
int filters = std::stoi(block.at("filters"));
int padding = std::stoi(block.at("pad"));
int kernelSize = std::stoi(block.at("size"));
int stride = std::stoi(block.at("stride"));
int pad;
if (padding)
pad = (kernelSize - 1) / 2;
else
pad = 0;
// load the convolution layer bias
nvinfer1::Weights convBias{nvinfer1::DataType::kFLOAT, nullptr, filters};
float* val = new float[filters];
for (int i = 0; i < filters; ++i)
{
val[i] = weights[weightPtr];
weightPtr++;
}
convBias.values = val;
trtWeights.push_back(convBias);
// load the convolutional layer weights
int size = filters * inputChannels * kernelSize * kernelSize;
nvinfer1::Weights convWt{nvinfer1::DataType::kFLOAT, nullptr, size};
val = new float[size];
for (int i = 0; i < size; ++i)
{
val[i] = weights[weightPtr];
weightPtr++;
}
convWt.values = val;
trtWeights.push_back(convWt);
nvinfer1::IConvolutionLayer* conv = network->addConvolution(
*input, filters, nvinfer1::DimsHW{kernelSize, kernelSize}, convWt, convBias);
assert(conv != nullptr);
std::string convLayerName = "conv_" + std::to_string(layerIdx);
conv->setName(convLayerName.c_str());
conv->setStride(nvinfer1::DimsHW{stride, stride});
conv->setPadding(nvinfer1::DimsHW{pad, pad});
return conv;
}
nvinfer1::ILayer* netAddConvBNLeaky(int layerIdx, std::map<std::string, std::string>& block,
std::vector<float>& weights,
std::vector<nvinfer1::Weights>& trtWeights, int& weightPtr,
int& inputChannels, nvinfer1::ITensor* input,
nvinfer1::INetworkDefinition* network)
{
assert(block.at("type") == "convolutional");
assert(block.find("batch_normalize") != block.end());
assert(block.at("batch_normalize") == "1");
assert(block.at("activation") == "leaky");
assert(block.find("filters") != block.end());
assert(block.find("pad") != block.end());
assert(block.find("size") != block.end());
assert(block.find("stride") != block.end());
bool batchNormalize, bias;
if (block.find("batch_normalize") != block.end())
{
batchNormalize = (block.at("batch_normalize") == "1");
bias = false;
}
else
{
batchNormalize = false;
bias = true;
}
// all conv_bn_leaky layers assume bias is false
assert(batchNormalize == true && bias == false);
UNUSED(batchNormalize);
UNUSED(bias);
int filters = std::stoi(block.at("filters"));
int padding = std::stoi(block.at("pad"));
int kernelSize = std::stoi(block.at("size"));
int stride = std::stoi(block.at("stride"));
int pad;
if (padding)
pad = (kernelSize - 1) / 2;
else
pad = 0;
/***** CONVOLUTION LAYER *****/
/*****************************/
// batch norm weights are before the conv layer
// load BN biases (bn_biases)
std::vector<float> bnBiases;
for (int i = 0; i < filters; ++i)
{
bnBiases.push_back(weights[weightPtr]);
weightPtr++;
}
// load BN weights
std::vector<float> bnWeights;
for (int i = 0; i < filters; ++i)
{
bnWeights.push_back(weights[weightPtr]);
weightPtr++;
}
// load BN running_mean
std::vector<float> bnRunningMean;
for (int i = 0; i < filters; ++i)
{
bnRunningMean.push_back(weights[weightPtr]);
weightPtr++;
}
// load BN running_var
std::vector<float> bnRunningVar;
for (int i = 0; i < filters; ++i)
{
// 1e-05 for numerical stability
bnRunningVar.push_back(sqrt(weights[weightPtr] + 1.0e-5));
weightPtr++;
}
// load Conv layer weights (GKCRS)
int size = filters * inputChannels * kernelSize * kernelSize;
nvinfer1::Weights convWt{nvinfer1::DataType::kFLOAT, nullptr, size};
float* val = new float[size];
for (int i = 0; i < size; ++i)
{
val[i] = weights[weightPtr];
weightPtr++;
}
convWt.values = val;
trtWeights.push_back(convWt);
nvinfer1::Weights convBias{nvinfer1::DataType::kFLOAT, nullptr, 0};
trtWeights.push_back(convBias);
nvinfer1::IConvolutionLayer* conv = network->addConvolution(
*input, filters, nvinfer1::DimsHW{kernelSize, kernelSize}, convWt, convBias);
assert(conv != nullptr);
std::string convLayerName = "conv_" + std::to_string(layerIdx);
conv->setName(convLayerName.c_str());
conv->setStride(nvinfer1::DimsHW{stride, stride});
conv->setPadding(nvinfer1::DimsHW{pad, pad});
/***** BATCHNORM LAYER *****/
/***************************/
size = filters;
// create the weights
nvinfer1::Weights shift{nvinfer1::DataType::kFLOAT, nullptr, size};
nvinfer1::Weights scale{nvinfer1::DataType::kFLOAT, nullptr, size};
nvinfer1::Weights power{nvinfer1::DataType::kFLOAT, nullptr, size};
float* shiftWt = new float[size];
for (int i = 0; i < size; ++i)
{
shiftWt[i]
= bnBiases.at(i) - ((bnRunningMean.at(i) * bnWeights.at(i)) / bnRunningVar.at(i));
}
shift.values = shiftWt;
float* scaleWt = new float[size];
for (int i = 0; i < size; ++i)
{
scaleWt[i] = bnWeights.at(i) / bnRunningVar[i];
}
scale.values = scaleWt;
float* powerWt = new float[size];
for (int i = 0; i < size; ++i)
{
powerWt[i] = 1.0;
}
power.values = powerWt;
trtWeights.push_back(shift);
trtWeights.push_back(scale);
trtWeights.push_back(power);
// Add the batch norm layers
nvinfer1::IScaleLayer* bn = network->addScale(
*conv->getOutput(0), nvinfer1::ScaleMode::kCHANNEL, shift, scale, power);
assert(bn != nullptr);
std::string bnLayerName = "batch_norm_" + std::to_string(layerIdx);
bn->setName(bnLayerName.c_str());
/***** ACTIVATION LAYER *****/
/****************************/
nvinfer1::ITensor* bnOutput = bn->getOutput(0);
nvinfer1::IActivationLayer* leaky = network->addActivation(
*bnOutput, nvinfer1::ActivationType::kLEAKY_RELU);
leaky->setAlpha(0.1);
assert(leaky != nullptr);
std::string leakyLayerName = "leaky_" + std::to_string(layerIdx);
leaky->setName(leakyLayerName.c_str());
return leaky;
}
nvinfer1::ILayer* netAddUpsample(int layerIdx, std::map<std::string, std::string>& block,
std::vector<float>& weights,
std::vector<nvinfer1::Weights>& trtWeights, int& inputChannels,
nvinfer1::ITensor* input, nvinfer1::INetworkDefinition* network)
{
assert(block.at("type") == "upsample");
nvinfer1::Dims inpDims = input->getDimensions();
assert(inpDims.nbDims == 3);
assert(inpDims.d[1] == inpDims.d[2]);
int h = inpDims.d[1];
int w = inpDims.d[2];
int stride = std::stoi(block.at("stride"));
// add pre multiply matrix as a constant
nvinfer1::Dims preDims{3,
{1, stride * h, w},
{nvinfer1::DimensionType::kCHANNEL, nvinfer1::DimensionType::kSPATIAL,
nvinfer1::DimensionType::kSPATIAL}};
int size = stride * h * w;
nvinfer1::Weights preMul{nvinfer1::DataType::kFLOAT, nullptr, size};
float* preWt = new float[size];
/* (2*h * w)
[ [1, 0, ..., 0],
[1, 0, ..., 0],
[0, 1, ..., 0],
[0, 1, ..., 0],
...,
...,
[0, 0, ..., 1],
[0, 0, ..., 1] ]
*/
for (int i = 0, idx = 0; i < h; ++i)
{
for (int s = 0; s < stride; ++s)
{
for (int j = 0; j < w; ++j, ++idx)
{
preWt[idx] = (i == j) ? 1.0 : 0.0;
}
}
}
preMul.values = preWt;
trtWeights.push_back(preMul);
nvinfer1::IConstantLayer* preM = network->addConstant(preDims, preMul);
assert(preM != nullptr);
std::string preLayerName = "preMul_" + std::to_string(layerIdx);
preM->setName(preLayerName.c_str());
// add post multiply matrix as a constant
nvinfer1::Dims postDims{3,
{1, h, stride * w},
{nvinfer1::DimensionType::kCHANNEL, nvinfer1::DimensionType::kSPATIAL,
nvinfer1::DimensionType::kSPATIAL}};
size = stride * h * w;
nvinfer1::Weights postMul{nvinfer1::DataType::kFLOAT, nullptr, size};
float* postWt = new float[size];
/* (h * 2*w)
[ [1, 1, 0, 0, ..., 0, 0],
[0, 0, 1, 1, ..., 0, 0],
...,
...,
[0, 0, 0, 0, ..., 1, 1] ]
*/
for (int i = 0, idx = 0; i < h; ++i)
{
for (int j = 0; j < stride * w; ++j, ++idx)
{
postWt[idx] = (j / stride == i) ? 1.0 : 0.0;
}
}
postMul.values = postWt;
trtWeights.push_back(postMul);
nvinfer1::IConstantLayer* post_m = network->addConstant(postDims, postMul);
assert(post_m != nullptr);
std::string postLayerName = "postMul_" + std::to_string(layerIdx);
post_m->setName(postLayerName.c_str());
// add matrix multiply layers for upsampling
nvinfer1::IMatrixMultiplyLayer* mm1
= network->addMatrixMultiply(*preM->getOutput(0), nvinfer1::MatrixOperation::kNONE, *input,
nvinfer1::MatrixOperation::kNONE);
assert(mm1 != nullptr);
std::string mm1LayerName = "mm1_" + std::to_string(layerIdx);
mm1->setName(mm1LayerName.c_str());
nvinfer1::IMatrixMultiplyLayer* mm2
= network->addMatrixMultiply(*mm1->getOutput(0), nvinfer1::MatrixOperation::kNONE,
*post_m->getOutput(0), nvinfer1::MatrixOperation::kNONE);
assert(mm2 != nullptr);
std::string mm2LayerName = "mm2_" + std::to_string(layerIdx);
mm2->setName(mm2LayerName.c_str());
return mm2;
}
void printLayerInfo(std::string layerIndex, std::string layerName, std::string layerInput,
std::string layerOutput, std::string weightPtr)
{
std::cout << std::setw(6) << std::left << layerIndex << std::setw(15) << std::left << layerName;
std::cout << std::setw(20) << std::left << layerInput << std::setw(20) << std::left
<< layerOutput;
std::cout << std::setw(6) << std::left << weightPtr << std::endl;
}
/*
* Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef __TRT_UTILS_H__
#define __TRT_UTILS_H__
#include <set>
#include <map>
#include <string>
#include <vector>
#include <cassert>
#include <iostream>
#include <fstream>
#include "NvInfer.h"
#define UNUSED(expr) (void)(expr)
#define DIVUP(n, d) ((n) + (d)-1) / (d)
std::string trim(std::string s);
float clamp(const float val, const float minVal, const float maxVal);
bool fileExists(const std::string fileName, bool verbose = true);
std::vector<float> loadWeights(const std::string weightsFilePath, const std::string& networkType);
std::string dimsToString(const nvinfer1::Dims d);
void displayDimType(const nvinfer1::Dims d);
int getNumChannels(nvinfer1::ITensor* t);
uint64_t get3DTensorVolume(nvinfer1::Dims inputDims);
// Helper functions to create yolo engine
nvinfer1::ILayer* netAddMaxpool(int layerIdx, std::map<std::string, std::string>& block,
nvinfer1::ITensor* input, nvinfer1::INetworkDefinition* network);
nvinfer1::ILayer* netAddConvLinear(int layerIdx, std::map<std::string, std::string>& block,
std::vector<float>& weights,
std::vector<nvinfer1::Weights>& trtWeights, int& weightPtr,
int& inputChannels, nvinfer1::ITensor* input,
nvinfer1::INetworkDefinition* network);
nvinfer1::ILayer* netAddConvBNLeaky(int layerIdx, std::map<std::string, std::string>& block,
std::vector<float>& weights,
std::vector<nvinfer1::Weights>& trtWeights, int& weightPtr,
int& inputChannels, nvinfer1::ITensor* input,
nvinfer1::INetworkDefinition* network);
nvinfer1::ILayer* netAddUpsample(int layerIdx, std::map<std::string, std::string>& block,
std::vector<float>& weights,
std::vector<nvinfer1::Weights>& trtWeights, int& inputChannels,
nvinfer1::ITensor* input, nvinfer1::INetworkDefinition* network);
void printLayerInfo(std::string layerIndex, std::string layerName, std::string layerInput,
std::string layerOutput, std::string weightPtr);
#endif
/*
* Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include "yolo.h"
#include "yoloPlugins.h"
#include <fstream>
#include <iomanip>
#include <iterator>
Yolo::Yolo(const NetworkInfo& networkInfo)
: m_NetworkType(networkInfo.networkType), // yolov3
m_ConfigFilePath(networkInfo.configFilePath), // yolov3.cfg
m_WtsFilePath(networkInfo.wtsFilePath), // yolov3.weights
m_DeviceType(networkInfo.deviceType), // kDLA, kGPU
m_InputBlobName(networkInfo.inputBlobName), // data
m_InputH(0),
m_InputW(0),
m_InputC(0),
m_InputSize(0)
{}
Yolo::~Yolo()
{
destroyNetworkUtils();
}
nvinfer1::ICudaEngine *Yolo::createEngine (nvinfer1::IBuilder* builder)
{
assert (builder);
std::vector<float> weights = loadWeights(m_WtsFilePath, m_NetworkType);
std::vector<nvinfer1::Weights> trtWeights;
nvinfer1::INetworkDefinition *network = builder->createNetwork();
if (parseModel(*network) != NVDSINFER_SUCCESS) {
network->destroy();
return nullptr;
}
// Build the engine
std::cout << "Building the TensorRT Engine..." << std::endl;
nvinfer1::ICudaEngine * engine = builder->buildCudaEngine(*network);
if (engine) {
std::cout << "Building complete!" << std::endl;
} else {
std::cerr << "Building engine failed!" << std::endl;
}
// destroy
network->destroy();
return engine;
}
NvDsInferStatus Yolo::parseModel(nvinfer1::INetworkDefinition& network) {
destroyNetworkUtils();
m_ConfigBlocks = parseConfigFile(m_ConfigFilePath);
parseConfigBlocks();
std::vector<float> weights = loadWeights(m_WtsFilePath, m_NetworkType);
// build yolo network
std::cout << "Building Yolo network..." << std::endl;
NvDsInferStatus status = buildYoloNetwork(weights, network);
if (status == NVDSINFER_SUCCESS) {
std::cout << "Building yolo network complete!" << std::endl;
} else {
std::cerr << "Building yolo network failed!" << std::endl;
}
return status;
}
NvDsInferStatus Yolo::buildYoloNetwork(
std::vector<float>& weights, nvinfer1::INetworkDefinition& network) {
int weightPtr = 0;
int channels = m_InputC;
nvinfer1::ITensor* data =
network.addInput(m_InputBlobName.c_str(), nvinfer1::DataType::kFLOAT,
nvinfer1::DimsCHW{static_cast<int>(m_InputC),
static_cast<int>(m_InputH), static_cast<int>(m_InputW)});
assert(data != nullptr && data->getDimensions().nbDims > 0);
nvinfer1::ITensor* previous = data;
std::vector<nvinfer1::ITensor*> tensorOutputs;
uint outputTensorCount = 0;
// build the network using the network API
for (uint i = 0; i < m_ConfigBlocks.size(); ++i) {
// check if num. of channels is correct
assert(getNumChannels(previous) == channels);
std::string layerIndex = "(" + std::to_string(tensorOutputs.size()) + ")";
if (m_ConfigBlocks.at(i).at("type") == "net") {
printLayerInfo("", "layer", " inp_size", " out_size", "weightPtr");
} else if (m_ConfigBlocks.at(i).at("type") == "convolutional") {
std::string inputVol = dimsToString(previous->getDimensions());
nvinfer1::ILayer* out;
std::string layerType;
// check if batch_norm enabled
if (m_ConfigBlocks.at(i).find("batch_normalize") !=
m_ConfigBlocks.at(i).end()) {
out = netAddConvBNLeaky(i, m_ConfigBlocks.at(i), weights,
m_TrtWeights, weightPtr, channels, previous, &network);
layerType = "conv-bn-leaky";
}
else
{
out = netAddConvLinear(i, m_ConfigBlocks.at(i), weights,
m_TrtWeights, weightPtr, channels, previous, &network);
layerType = "conv-linear";
}
previous = out->getOutput(0);
assert(previous != nullptr);
channels = getNumChannels(previous);
std::string outputVol = dimsToString(previous->getDimensions());
tensorOutputs.push_back(out->getOutput(0));
printLayerInfo(layerIndex, layerType, inputVol, outputVol, std::to_string(weightPtr));
} else if (m_ConfigBlocks.at(i).at("type") == "shortcut") {
assert(m_ConfigBlocks.at(i).at("activation") == "linear");
assert(m_ConfigBlocks.at(i).find("from") !=
m_ConfigBlocks.at(i).end());
int from = stoi(m_ConfigBlocks.at(i).at("from"));
std::string inputVol = dimsToString(previous->getDimensions());
// check if indexes are correct
assert((i - 2 >= 0) && (i - 2 < tensorOutputs.size()));
assert((i + from - 1 >= 0) && (i + from - 1 < tensorOutputs.size()));
assert(i + from - 1 < i - 2);
nvinfer1::IElementWiseLayer* ew = network.addElementWise(
*tensorOutputs[i - 2], *tensorOutputs[i + from - 1],
nvinfer1::ElementWiseOperation::kSUM);
assert(ew != nullptr);
std::string ewLayerName = "shortcut_" + std::to_string(i);
ew->setName(ewLayerName.c_str());
previous = ew->getOutput(0);
assert(previous != nullptr);
std::string outputVol = dimsToString(previous->getDimensions());
tensorOutputs.push_back(ew->getOutput(0));
printLayerInfo(layerIndex, "skip", inputVol, outputVol, " -");
} else if (m_ConfigBlocks.at(i).at("type") == "yolo") {
nvinfer1::Dims prevTensorDims = previous->getDimensions();
assert(prevTensorDims.d[1] == prevTensorDims.d[2]);
TensorInfo& curYoloTensor = m_OutputTensors.at(outputTensorCount);
curYoloTensor.gridSize = prevTensorDims.d[1];
curYoloTensor.stride = m_InputW / curYoloTensor.gridSize;
m_OutputTensors.at(outputTensorCount).volume = curYoloTensor.gridSize
* curYoloTensor.gridSize
* (curYoloTensor.numBBoxes * (5 + curYoloTensor.numClasses));
std::string layerName = "yolo_" + std::to_string(i);
curYoloTensor.blobName = layerName;
nvinfer1::IPluginV2* yoloPlugin
= new YoloLayerV3(m_OutputTensors.at(outputTensorCount).numBBoxes,
m_OutputTensors.at(outputTensorCount).numClasses,
m_OutputTensors.at(outputTensorCount).gridSize);
assert(yoloPlugin != nullptr);
nvinfer1::IPluginV2Layer* yolo =
network.addPluginV2(&previous, 1, *yoloPlugin);
assert(yolo != nullptr);
yolo->setName(layerName.c_str());
std::string inputVol = dimsToString(previous->getDimensions());
previous = yolo->getOutput(0);
assert(previous != nullptr);
previous->setName(layerName.c_str());
std::string outputVol = dimsToString(previous->getDimensions());
network.markOutput(*previous);
channels = getNumChannels(previous);
tensorOutputs.push_back(yolo->getOutput(0));
printLayerInfo(layerIndex, "yolo", inputVol, outputVol, std::to_string(weightPtr));
++outputTensorCount;
} else if (m_ConfigBlocks.at(i).at("type") == "region") {
nvinfer1::Dims prevTensorDims = previous->getDimensions();
assert(prevTensorDims.d[1] == prevTensorDims.d[2]);
TensorInfo& curRegionTensor = m_OutputTensors.at(outputTensorCount);
curRegionTensor.gridSize = prevTensorDims.d[1];
curRegionTensor.stride = m_InputW / curRegionTensor.gridSize;
m_OutputTensors.at(outputTensorCount).volume = curRegionTensor.gridSize
* curRegionTensor.gridSize
* (curRegionTensor.numBBoxes * (5 + curRegionTensor.numClasses));
std::string layerName = "region_" + std::to_string(i);
curRegionTensor.blobName = layerName;
nvinfer1::plugin::RegionParameters RegionParameters{
static_cast<int>(curRegionTensor.numBBoxes), 4,
static_cast<int>(curRegionTensor.numClasses), nullptr};
std::string inputVol = dimsToString(previous->getDimensions());
nvinfer1::IPluginV2* regionPlugin
= createRegionPlugin(RegionParameters);
assert(regionPlugin != nullptr);
nvinfer1::IPluginV2Layer* region =
network.addPluginV2(&previous, 1, *regionPlugin);
assert(region != nullptr);
region->setName(layerName.c_str());
previous = region->getOutput(0);
assert(previous != nullptr);
previous->setName(layerName.c_str());
std::string outputVol = dimsToString(previous->getDimensions());
network.markOutput(*previous);
channels = getNumChannels(previous);
tensorOutputs.push_back(region->getOutput(0));
printLayerInfo(layerIndex, "region", inputVol, outputVol, std::to_string(weightPtr));
std::cout << "Anchors are being converted to network input resolution i.e. Anchors x "
<< curRegionTensor.stride << " (stride)" << std::endl;
for (auto& anchor : curRegionTensor.anchors) anchor *= curRegionTensor.stride;
++outputTensorCount;
} else if (m_ConfigBlocks.at(i).at("type") == "reorg") {
std::string inputVol = dimsToString(previous->getDimensions());
nvinfer1::IPluginV2* reorgPlugin = createReorgPlugin(2);
assert(reorgPlugin != nullptr);
nvinfer1::IPluginV2Layer* reorg =
network.addPluginV2(&previous, 1, *reorgPlugin);
assert(reorg != nullptr);
std::string layerName = "reorg_" + std::to_string(i);
reorg->setName(layerName.c_str());
previous = reorg->getOutput(0);
assert(previous != nullptr);
std::string outputVol = dimsToString(previous->getDimensions());
channels = getNumChannels(previous);
tensorOutputs.push_back(reorg->getOutput(0));
printLayerInfo(layerIndex, "reorg", inputVol, outputVol, std::to_string(weightPtr));
}
// route layers (single or concat)
else if (m_ConfigBlocks.at(i).at("type") == "route") {
std::string strLayers = m_ConfigBlocks.at(i).at("layers");
std::vector<int> idxLayers;
size_t lastPos = 0, pos = 0;
while ((pos = strLayers.find(',', lastPos)) != std::string::npos) {
int vL = std::stoi(trim(strLayers.substr(lastPos, pos - lastPos)));
idxLayers.push_back (vL);
lastPos = pos + 1;
}
if (lastPos < strLayers.length()) {
std::string lastV = trim(strLayers.substr(lastPos));
if (!lastV.empty()) {
idxLayers.push_back (std::stoi(lastV));
}
}
assert (!idxLayers.empty());
std::vector<nvinfer1::ITensor*> concatInputs;
for (int idxLayer : idxLayers) {
if (idxLayer < 0) {
idxLayer = tensorOutputs.size() + idxLayer;
}
assert (idxLayer >= 0 && idxLayer < (int)tensorOutputs.size());
concatInputs.push_back (tensorOutputs[idxLayer]);
}
nvinfer1::IConcatenationLayer* concat =
network.addConcatenation(concatInputs.data(), concatInputs.size());
assert(concat != nullptr);
std::string concatLayerName = "route_" + std::to_string(i - 1);
concat->setName(concatLayerName.c_str());
// concatenate along the channel dimension
concat->setAxis(0);
previous = concat->getOutput(0);
assert(previous != nullptr);
std::string outputVol = dimsToString(previous->getDimensions());
// set the output volume depth
channels
= getNumChannels(previous);
tensorOutputs.push_back(concat->getOutput(0));
printLayerInfo(layerIndex, "route", " -", outputVol,
std::to_string(weightPtr));
} else if (m_ConfigBlocks.at(i).at("type") == "upsample") {
std::string inputVol = dimsToString(previous->getDimensions());
nvinfer1::ILayer* out = netAddUpsample(i - 1, m_ConfigBlocks[i],
weights, m_TrtWeights, channels, previous, &network);
previous = out->getOutput(0);
std::string outputVol = dimsToString(previous->getDimensions());
tensorOutputs.push_back(out->getOutput(0));
printLayerInfo(layerIndex, "upsample", inputVol, outputVol, " -");
} else if (m_ConfigBlocks.at(i).at("type") == "maxpool") {
std::string inputVol = dimsToString(previous->getDimensions());
nvinfer1::ILayer* out =
netAddMaxpool(i, m_ConfigBlocks.at(i), previous, &network);
previous = out->getOutput(0);
assert(previous != nullptr);
std::string outputVol = dimsToString(previous->getDimensions());
tensorOutputs.push_back(out->getOutput(0));
printLayerInfo(layerIndex, "maxpool", inputVol, outputVol, std::to_string(weightPtr));
}
else
{
std::cout << "Unsupported layer type --> \""
<< m_ConfigBlocks.at(i).at("type") << "\"" << std::endl;
assert(0);
}
}
if ((int)weights.size() != weightPtr)
{
std::cout << "Number of unused weights left : " << weights.size() - weightPtr << std::endl;
assert(0);
}
std::cout << "Output yolo blob names :" << std::endl;
for (auto& tensor : m_OutputTensors) {
std::cout << tensor.blobName << std::endl;
}
int nbLayers = network.getNbLayers();
std::cout << "Total number of yolo layers: " << nbLayers << std::endl;
return NVDSINFER_SUCCESS;
}
std::vector<std::map<std::string, std::string>>
Yolo::parseConfigFile (const std::string cfgFilePath)
{
assert(fileExists(cfgFilePath));
std::ifstream file(cfgFilePath);
assert(file.good());
std::string line;
std::vector<std::map<std::string, std::string>> blocks;
std::map<std::string, std::string> block;
while (getline(file, line))
{
if (line.size() == 0) continue;
if (line.front() == '#') continue;
line = trim(line);
if (line.front() == '[')
{
if (block.size() > 0)
{
blocks.push_back(block);
block.clear();
}
std::string key = "type";
std::string value = trim(line.substr(1, line.size() - 2));
block.insert(std::pair<std::string, std::string>(key, value));
}
else
{
int cpos = line.find('=');
std::string key = trim(line.substr(0, cpos));
std::string value = trim(line.substr(cpos + 1));
block.insert(std::pair<std::string, std::string>(key, value));
}
}
blocks.push_back(block);
return blocks;
}
void Yolo::parseConfigBlocks()
{
for (auto block : m_ConfigBlocks) {
if (block.at("type") == "net")
{
assert((block.find("height") != block.end())
&& "Missing 'height' param in network cfg");
assert((block.find("width") != block.end()) && "Missing 'width' param in network cfg");
assert((block.find("channels") != block.end())
&& "Missing 'channels' param in network cfg");
m_InputH = std::stoul(block.at("height"));
m_InputW = std::stoul(block.at("width"));
m_InputC = std::stoul(block.at("channels"));
assert(m_InputW == m_InputH);
m_InputSize = m_InputC * m_InputH * m_InputW;
}
else if ((block.at("type") == "region") || (block.at("type") == "yolo"))
{
assert((block.find("num") != block.end())
&& std::string("Missing 'num' param in " + block.at("type") + " layer").c_str());
assert((block.find("classes") != block.end())
&& std::string("Missing 'classes' param in " + block.at("type") + " layer")
.c_str());
assert((block.find("anchors") != block.end())
&& std::string("Missing 'anchors' param in " + block.at("type") + " layer")
.c_str());
TensorInfo outputTensor;
std::string anchorString = block.at("anchors");
while (!anchorString.empty())
{
int npos = anchorString.find_first_of(',');
if (npos != -1)
{
float anchor = std::stof(trim(anchorString.substr(0, npos)));
outputTensor.anchors.push_back(anchor);
anchorString.erase(0, npos + 1);
}
else
{
float anchor = std::stof(trim(anchorString));
outputTensor.anchors.push_back(anchor);
break;
}
}
if ((m_NetworkType == "yolov3") || (m_NetworkType == "yolov3-tiny"))
{
assert((block.find("mask") != block.end())
&& std::string("Missing 'mask' param in " + block.at("type") + " layer")
.c_str());
std::string maskString = block.at("mask");
while (!maskString.empty())
{
int npos = maskString.find_first_of(',');
if (npos != -1)
{
uint mask = std::stoul(trim(maskString.substr(0, npos)));
outputTensor.masks.push_back(mask);
maskString.erase(0, npos + 1);
}
else
{
uint mask = std::stoul(trim(maskString));
outputTensor.masks.push_back(mask);
break;
}
}
}
outputTensor.numBBoxes = outputTensor.masks.size() > 0
? outputTensor.masks.size()
: std::stoul(trim(block.at("num")));
outputTensor.numClasses = std::stoul(block.at("classes"));
m_OutputTensors.push_back(outputTensor);
}
}
}
void Yolo::destroyNetworkUtils() {
// deallocate the weights
for (uint i = 0; i < m_TrtWeights.size(); ++i) {
if (m_TrtWeights[i].count > 0)
free(const_cast<void*>(m_TrtWeights[i].values));
}
m_TrtWeights.clear();
}
/*
* Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef _YOLO_H_
#define _YOLO_H_
#include <stdint.h>
#include <string>
#include <vector>
#include <memory>
#include "NvInfer.h"
#include "trt_utils.h"
#include "nvdsinfer_custom_impl.h"
/**
* Holds all the file paths required to build a network.
*/
struct NetworkInfo
{
std::string networkType;
std::string configFilePath;
std::string wtsFilePath;
std::string deviceType;
std::string inputBlobName;
};
/**
* Holds information about an output tensor of the yolo network.
*/
struct TensorInfo
{
std::string blobName;
uint stride{0};
uint gridSize{0};
uint numClasses{0};
uint numBBoxes{0};
uint64_t volume{0};
std::vector<uint> masks;
std::vector<float> anchors;
int bindingIndex{-1};
float* hostBuffer{nullptr};
};
class Yolo : public IModelParser {
public:
Yolo(const NetworkInfo& networkInfo);
~Yolo() override;
bool hasFullDimsSupported() const override { return false; }
const char* getModelName() const override {
return m_ConfigFilePath.empty() ? m_NetworkType.c_str()
: m_ConfigFilePath.c_str();
}
NvDsInferStatus parseModel(nvinfer1::INetworkDefinition& network) override;
nvinfer1::ICudaEngine *createEngine (nvinfer1::IBuilder* builder);
protected:
const std::string m_NetworkType;
const std::string m_ConfigFilePath;
const std::string m_WtsFilePath;
const std::string m_DeviceType;
const std::string m_InputBlobName;
std::vector<TensorInfo> m_OutputTensors;
std::vector<std::map<std::string, std::string>> m_ConfigBlocks;
uint m_InputH;
uint m_InputW;
uint m_InputC;
uint64_t m_InputSize;
// TRT specific members
std::vector<nvinfer1::Weights> m_TrtWeights;
private:
NvDsInferStatus buildYoloNetwork(
std::vector<float>& weights, nvinfer1::INetworkDefinition& network);
std::vector<std::map<std::string, std::string>> parseConfigFile(
const std::string cfgFilePath);
void parseConfigBlocks();
void destroyNetworkUtils();
};
#endif // _YOLO_H_
/*
* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include "yoloPlugins.h"
#include "NvInferPlugin.h"
#include <cassert>
#include <iostream>
#include <memory>
namespace {
template <typename T>
void write(char*& buffer, const T& val)
{
*reinterpret_cast<T*>(buffer) = val;
buffer += sizeof(T);
}
template <typename T>
void read(const char*& buffer, T& val)
{
val = *reinterpret_cast<const T*>(buffer);
buffer += sizeof(T);
}
} //namespace
// Forward declaration of cuda kernels
cudaError_t cudaYoloLayerV3 (
const void* input, void* output, const uint& batchSize,
const uint& gridSize, const uint& numOutputClasses,
const uint& numBBoxes, uint64_t outputSize, cudaStream_t stream);
YoloLayerV3::YoloLayerV3 (const void* data, size_t length)
{
const char *d = static_cast<const char*>(data);
read(d, m_NumBoxes);
read(d, m_NumClasses);
read(d, m_GridSize);
read(d, m_OutputSize);
};
YoloLayerV3::YoloLayerV3 (
const uint& numBoxes, const uint& numClasses, const uint& gridSize) :
m_NumBoxes(numBoxes),
m_NumClasses(numClasses),
m_GridSize(gridSize)
{
assert(m_NumBoxes > 0);
assert(m_NumClasses > 0);
assert(m_GridSize > 0);
m_OutputSize = m_GridSize * m_GridSize * (m_NumBoxes * (4 + 1 + m_NumClasses));
};
nvinfer1::Dims
YoloLayerV3::getOutputDimensions(
int index, const nvinfer1::Dims* inputs, int nbInputDims)
{
assert(index == 0);
assert(nbInputDims == 1);
return inputs[0];
}
bool YoloLayerV3::supportsFormat (
nvinfer1::DataType type, nvinfer1::PluginFormat format) const {
return (type == nvinfer1::DataType::kFLOAT &&
format == nvinfer1::PluginFormat::kNCHW);
}
void
YoloLayerV3::configureWithFormat (
const nvinfer1::Dims* inputDims, int nbInputs,
const nvinfer1::Dims* outputDims, int nbOutputs,
nvinfer1::DataType type, nvinfer1::PluginFormat format, int maxBatchSize)
{
assert(nbInputs == 1);
assert (format == nvinfer1::PluginFormat::kNCHW);
assert(inputDims != nullptr);
}
int YoloLayerV3::enqueue(
int batchSize, const void* const* inputs, void** outputs, void* workspace,
cudaStream_t stream)
{
CHECK(cudaYoloLayerV3(
inputs[0], outputs[0], batchSize, m_GridSize, m_NumClasses, m_NumBoxes,
m_OutputSize, stream));
return 0;
}
size_t YoloLayerV3::getSerializationSize() const
{
return sizeof(m_NumBoxes) + sizeof(m_NumClasses) + sizeof(m_GridSize) + sizeof(m_OutputSize);
}
void YoloLayerV3::serialize(void* buffer) const
{
char *d = static_cast<char*>(buffer);
write(d, m_NumBoxes);
write(d, m_NumClasses);
write(d, m_GridSize);
write(d, m_OutputSize);
}
nvinfer1::IPluginV2* YoloLayerV3::clone() const
{
return new YoloLayerV3 (m_NumBoxes, m_NumClasses, m_GridSize);
}
REGISTER_TENSORRT_PLUGIN(YoloLayerV3PluginCreator);
/*
* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef __YOLO_PLUGINS__
#define __YOLO_PLUGINS__
#include <cassert>
#include <cstring>
#include <cuda_runtime_api.h>
#include <iostream>
#include <memory>
#include "NvInferPlugin.h"
#define CHECK(status) \
{ \
if (status != 0) \
{ \
std::cout << "Cuda failure: " << cudaGetErrorString(status) << " in file " << __FILE__ \
<< " at line " << __LINE__ << std::endl; \
abort(); \
} \
}
namespace
{
const char* YOLOV3LAYER_PLUGIN_VERSION {"1"};
const char* YOLOV3LAYER_PLUGIN_NAME {"YoloLayerV3_TRT"};
} // namespace
class YoloLayerV3 : public nvinfer1::IPluginV2
{
public:
YoloLayerV3 (const void* data, size_t length);
YoloLayerV3 (const uint& numBoxes, const uint& numClasses, const uint& gridSize);
const char* getPluginType () const override { return YOLOV3LAYER_PLUGIN_NAME; }
const char* getPluginVersion () const override { return YOLOV3LAYER_PLUGIN_VERSION; }
int getNbOutputs () const override { return 1; }
nvinfer1::Dims getOutputDimensions (
int index, const nvinfer1::Dims* inputs,
int nbInputDims) override;
bool supportsFormat (
nvinfer1::DataType type, nvinfer1::PluginFormat format) const override;
void configureWithFormat (
const nvinfer1::Dims* inputDims, int nbInputs,
const nvinfer1::Dims* outputDims, int nbOutputs,
nvinfer1::DataType type, nvinfer1::PluginFormat format, int maxBatchSize) override;
int initialize () override { return 0; }
void terminate () override {}
size_t getWorkspaceSize (int maxBatchSize) const override { return 0; }
int enqueue (
int batchSize, const void* const* inputs, void** outputs,
void* workspace, cudaStream_t stream) override;
size_t getSerializationSize() const override;
void serialize (void* buffer) const override;
void destroy () override { delete this; }
nvinfer1::IPluginV2* clone() const override;
void setPluginNamespace (const char* pluginNamespace)override {
m_Namespace = pluginNamespace;
}
virtual const char* getPluginNamespace () const override {
return m_Namespace.c_str();
}
private:
uint m_NumBoxes {0};
uint m_NumClasses {0};
uint m_GridSize {0};
uint64_t m_OutputSize {0};
std::string m_Namespace {""};
};
class YoloLayerV3PluginCreator : public nvinfer1::IPluginCreator
{
public:
YoloLayerV3PluginCreator () {}
~YoloLayerV3PluginCreator () {}
const char* getPluginName () const override { return YOLOV3LAYER_PLUGIN_NAME; }
const char* getPluginVersion () const override { return YOLOV3LAYER_PLUGIN_VERSION; }
const nvinfer1::PluginFieldCollection* getFieldNames() override {
std::cerr<< "YoloLayerV3PluginCreator::getFieldNames is not implemented" << std::endl;
return nullptr;
}
nvinfer1::IPluginV2* createPlugin (
const char* name, const nvinfer1::PluginFieldCollection* fc) override
{
std::cerr<< "YoloLayerV3PluginCreator::getFieldNames is not implemented.\n";
return nullptr;
}
nvinfer1::IPluginV2* deserializePlugin (
const char* name, const void* serialData, size_t serialLength) override
{
std::cout << "Deserialize yoloLayerV3 plugin: " << name << std::endl;
return new YoloLayerV3(serialData, serialLength);
}
void setPluginNamespace(const char* libNamespace) override {
m_Namespace = libNamespace;
}
const char* getPluginNamespace() const override {
return m_Namespace.c_str();
}
private:
std::string m_Namespace {""};
};
#endif // __YOLO_PLUGINS__
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
import _thread
import queue
import threading
import time
from socket import *
import cv2
import numpy as np
from tool.utils import load_class_names, plot_boxes_cv2
ip_add = '127.0.0.1'
server_port = 25000
connect_port = 25003
def send_from(arr, dest):
view = memoryview(arr).cast('B')
while len(view):
nsent = dest.send(view)
view = view[nsent:]
def recv_into(arr, source):
view = memoryview(arr).cast('B')
while len(view):
nrecv = source.recv_into(view)
view = view[nrecv:]
c_2 = socket(AF_INET, SOCK_STREAM)
c_2.connect((ip_add, connect_port))
s = socket(AF_INET, SOCK_STREAM)
s.bind(('', server_port))
s.listen(3)
qsize = 1
boxQue = queue.Queue(qsize)
img_sent = queue.Queue(qsize * 20)
lock = threading.Lock()
# time.sleep(10)
fps = 0
fps_dis = 0
def recv_box():
lth = np.zeros(shape=(1,), dtype=np.int32)
while 1:
if boxQue.full():
# print('box is full')
time.sleep(0.1)
else:
recv_into(lth, c_2)
if lth[0] == 0:
lock.acquire()
boxQue.put([0])
lock.release()
continue
arr = np.zeros(shape=(1, lth[0], 7), dtype=np.float32)
recv_into(arr, c_2)
box = arr.tolist()
for i in range(lth[0]):
box[0][i][-1] = np.int64(box[0][i][-1])
lock.acquire()
boxQue.put(box)
lock.release()
# sum_flag = np.zeros(shape=(1,), dtype=np.int32)
# def recv_flag():
# global sum_flag
# recv_into(sum_flag, c)
# print('done')
def cam_send():
c, a = s.accept()
cap = cv2.VideoCapture(0)
flag = cap.isOpened()
print(flag)
# _thread.start_new_thread(recv_flag, ())
cnt_arr = np.zeros(shape=(1,), dtype=np.int32)
while 1:
_, img = cap.read()
send_from(img, c)
send_from(np.array([np.sum(img)]), c)
recv_into(cnt_arr, c)
if cnt_arr[0] >= 5:
break
while 1:
while img_sent.full():
# print('sent is full')
time.sleep(0.1)
_, img = cap.read()
# print(img)
send_from(img, c)
lock.acquire()
img_sent.put(img)
lock.release()
# print(np.sum(img))
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
def fps_update():
global fps, fps_dis
while 1:
time.sleep(10)
print(fps)
fps_dis = fps / 10
fps = 0
_thread.start_new_thread(recv_box, ())
_thread.start_new_thread(cam_send, ())
_thread.start_new_thread(fps_update, ())
# def get_box():
# while boxQue.empty():
# time.sleep(0.1)
# lock.acquire()
# box = boxQue.get()
# lock.release()
# return box
namesfile = 'data/coco.names'
class_names = load_class_names(namesfile)
while (1):
# get a frame
while img_sent.empty() or boxQue.empty():
# print('sent or box are empty')
time.sleep(0.1)
lock.acquire()
img = img_sent.get()
boxes = boxQue.get()
lock.release()
# print(np.sum(img))
# start = time.time()
if boxes[0] == 0:
pass
else:
img = plot_boxes_cv2(img, boxes[0], 'predictions.jpg', class_names)
img = cv2.putText(img, 'FPS: {}'.format(fps_dis), (100, 100), cv2.FONT_HERSHEY_SIMPLEX, 2, (0, 0, 0), 2)
# end = time.time()
# print('time: ', end - start)
cv2.imshow('fps:', img)
fps += 1
# send_from(frame, c_3)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cv2.destroyAllWindows()
#c.close()
s.close()
[net]
# Testing
batch=1
subdivisions=1
# Training
# batch=64
# subdivisions=2
width=416
height=416
channels=3
momentum=0.9
decay=0.0005
angle=0
saturation = 1.5
exposure = 1.5
hue=.1
learning_rate=0.001
burn_in=1000
max_batches = 500200
policy=steps
steps=400000,450000
scales=.1,.1
# 0
[convolutional]
batch_normalize=1
filters=16
size=3
stride=1
pad=1
activation=leaky
# 1
[maxpool]
size=2
stride=2
# 2
[convolutional]
batch_normalize=1
filters=32
size=3
stride=1
pad=1
activation=leaky
# 3
[maxpool]
size=2
stride=2
# 4
[convolutional]
batch_normalize=1
filters=64
size=3
stride=1
pad=1
activation=leaky
# 5
[maxpool]
size=2
stride=2
# 6
[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=leaky
# 7
[maxpool]
size=2
stride=2
# 8
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky
# 9
[maxpool]
size=2
stride=2
# 10
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
# 11
[maxpool]
size=2
stride=1
# 12
[convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
pad=1
activation=leaky
###########
# 13
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
# 14
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
# 15
[convolutional]
size=1
stride=1
pad=1
filters=255
activation=linear
# 16
[yolo]
mask = 3,4,5
anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319
classes=80
num=6
jitter=.3
ignore_thresh = .7
truth_thresh = 1
random=1
# 17
[route]
layers = -4
# 18
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
# 19
[upsample]
stride=2
# 20
[route]
layers = -1, 8
# 21
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky
# 22
[convolutional]
size=1
stride=1
pad=1
filters=255
activation=linear
# 23
[yolo]
mask = 1,2,3
anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319
classes=80
num=6
jitter=.3
ignore_thresh = .7
truth_thresh = 1
random=1
[net]
# Testing
batch=1
subdivisions=1
# Training
# batch=64
# subdivisions=16
width=416
height=416
channels=3
momentum=0.9
decay=0.0005
angle=0
saturation = 1.5
exposure = 1.5
hue=.1
learning_rate=0.001
burn_in=1000
max_batches = 500200
policy=steps
steps=400000,450000
scales=.1,.1
[convolutional]
batch_normalize=1
filters=32
size=3
stride=1
pad=1
activation=leaky
# Downsample
[convolutional]
batch_normalize=1
filters=64
size=3
stride=2
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=32
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=64
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
# Downsample
[convolutional]
batch_normalize=1
filters=128
size=3
stride=2
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=64
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=64
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
# Downsample
[convolutional]
batch_normalize=1
filters=256
size=3
stride=2
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
# Downsample
[convolutional]
batch_normalize=1
filters=512
size=3
stride=2
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
# Downsample
[convolutional]
batch_normalize=1
filters=1024
size=3
stride=2
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
######################
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=1024
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=1024
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=1024
activation=leaky
[convolutional]
size=1
stride=1
pad=1
filters=255
activation=linear
[yolo]
mask = 6,7,8
anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326
classes=80
num=9
jitter=.3
ignore_thresh = .5
truth_thresh = 1
random=1
[route]
layers = -4
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[upsample]
stride=2
[route]
layers = -1, 61
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=512
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=512
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=512
activation=leaky
[convolutional]
size=1
stride=1
pad=1
filters=255
activation=linear
[yolo]
mask = 3,4,5
anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326
classes=80
num=9
jitter=.3
ignore_thresh = .5
truth_thresh = 1
random=1
[route]
layers = -4
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[upsample]
stride=2
[route]
layers = -1, 36
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=256
activation=leaky
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=256
activation=leaky
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=256
activation=leaky
[convolutional]
size=1
stride=1
pad=1
filters=255
activation=linear
[yolo]
mask = 0,1,2
anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326
classes=80
num=9
jitter=.3
ignore_thresh = .5
truth_thresh = 1
random=1
[net]
# Testing
#batch=1
#subdivisions=1
# Training
batch=64
subdivisions=16
width=608
height=608
channels=3
momentum=0.949
decay=0.0005
angle=0
saturation = 1.5
exposure = 1.5
hue=.1
learning_rate=0.001
burn_in=1000
max_batches = 500500
policy=steps
steps=400000,450000
scales=.1,.1
#cutmix=1
mosaic=1
#:104x104 54:52x52 85:26x26 104:13x13 for 416
[convolutional]
batch_normalize=1
filters=32
size=3
stride=1
pad=1
activation=mish
# Downsample
[convolutional]
batch_normalize=1
filters=64
size=3
stride=2
pad=1
activation=mish
[convolutional]
batch_normalize=1
filters=64
size=1
stride=1
pad=1
activation=mish
[route]
layers = -2
[convolutional]
batch_normalize=1
filters=64
size=1
stride=1
pad=1
activation=mish
[convolutional]
batch_normalize=1
filters=32
size=1
stride=1
pad=1
activation=mish
[convolutional]
batch_normalize=1
filters=64
size=3
stride=1
pad=1
activation=mish
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=64
size=1
stride=1
pad=1
activation=mish
[route]
layers = -1,-7
[convolutional]
batch_normalize=1
filters=64
size=1
stride=1
pad=1
activation=mish
# Downsample
[convolutional]
batch_normalize=1
filters=128
size=3
stride=2
pad=1
activation=mish
[convolutional]
batch_normalize=1
filters=64
size=1
stride=1
pad=1
activation=mish
[route]
layers = -2
[convolutional]
batch_normalize=1
filters=64
size=1
stride=1
pad=1
activation=mish
[convolutional]
batch_normalize=1
filters=64
size=1
stride=1
pad=1
activation=mish
[convolutional]
batch_normalize=1
filters=64
size=3
stride=1
pad=1
activation=mish
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=64
size=1
stride=1
pad=1
activation=mish
[convolutional]
batch_normalize=1
filters=64
size=3
stride=1
pad=1
activation=mish
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=64
size=1
stride=1
pad=1
activation=mish
[route]
layers = -1,-10
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=mish
# Downsample
[convolutional]
batch_normalize=1
filters=256
size=3
stride=2
pad=1
activation=mish
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=mish
[route]
layers = -2
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=mish
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=mish
[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=mish
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=mish
[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=mish
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=mish
[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=mish
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=mish
[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=mish
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=mish
[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=mish
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=mish
[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=mish
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=mish
[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=mish
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=mish
[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=mish
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=mish
[route]
layers = -1,-28
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=mish
# Downsample
[convolutional]
batch_normalize=1
filters=512
size=3
stride=2
pad=1
activation=mish
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=mish
[route]
layers = -2
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=mish
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=mish
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=mish
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=mish
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=mish
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=mish
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=mish
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=mish
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=mish
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=mish
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=mish
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=mish
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=mish
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=mish
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=mish
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=mish
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=mish
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=mish
[route]
layers = -1,-28
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=mish
# Downsample
[convolutional]
batch_normalize=1
filters=1024
size=3
stride=2
pad=1
activation=mish
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=mish
[route]
layers = -2
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=mish
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=mish
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=mish
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=mish
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=mish
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=mish
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=mish
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=mish
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=mish
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=mish
[route]
layers = -1,-16
[convolutional]
batch_normalize=1
filters=1024
size=1
stride=1
pad=1
activation=mish
##########################
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=1024
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky
### SPP ###
[maxpool]
stride=1
size=5
[route]
layers=-2
[maxpool]
stride=1
size=9
[route]
layers=-4
[maxpool]
stride=1
size=13
[route]
layers=-1,-3,-5,-6
### End SPP ###
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=1024
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[upsample]
stride=2
[route]
layers = 85
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[route]
layers = -1, -3
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=512
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=512
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[upsample]
stride=2
[route]
layers = 54
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[route]
layers = -1, -3
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=256
activation=leaky
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=256
activation=leaky
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
##########################
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=256
activation=leaky
[convolutional]
size=1
stride=1
pad=1
filters=255
activation=linear
[yolo]
mask = 0,1,2
anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401
classes=80
num=9
jitter=.3
ignore_thresh = .7
truth_thresh = 1
scale_x_y = 1.2
iou_thresh=0.213
cls_normalizer=1.0
iou_normalizer=0.07
iou_loss=ciou
nms_kind=greedynms
beta_nms=0.6
[route]
layers = -4
[convolutional]
batch_normalize=1
size=3
stride=2
pad=1
filters=256
activation=leaky
[route]
layers = -1, -16
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=512
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=512
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=512
activation=leaky
[convolutional]
size=1
stride=1
pad=1
filters=255
activation=linear
[yolo]
mask = 3,4,5
anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401
classes=80
num=9
jitter=.3
ignore_thresh = .7
truth_thresh = 1
scale_x_y = 1.1
iou_thresh=0.213
cls_normalizer=1.0
iou_normalizer=0.07
iou_loss=ciou
nms_kind=greedynms
beta_nms=0.6
[route]
layers = -4
[convolutional]
batch_normalize=1
size=3
stride=2
pad=1
filters=512
activation=leaky
[route]
layers = -1, -37
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=1024
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=1024
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=1024
activation=leaky
[convolutional]
size=1
stride=1
pad=1
filters=255
activation=linear
[yolo]
mask = 6,7,8
anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401
classes=80
num=9
jitter=.3
ignore_thresh = .7
truth_thresh = 1
random=1
scale_x_y = 1.05
iou_thresh=0.213
cls_normalizer=1.0
iou_normalizer=0.07
iou_loss=ciou
nms_kind=greedynms
beta_nms=0.6
[net]
# Testing
#batch=1
#subdivisions=1
# Training
batch=64
subdivisions=1
width=416
height=416
channels=3
momentum=0.9
decay=0.0005
angle=0
saturation = 1.5
exposure = 1.5
hue=.1
learning_rate=0.00261
burn_in=1000
max_batches = 500200
policy=steps
steps=400000,450000
scales=.1,.1
[convolutional]
batch_normalize=1
filters=32
size=3
stride=2
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=64
size=3
stride=2
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=64
size=3
stride=1
pad=1
activation=leaky
[route]
layers=-1
groups=2
group_id=1
[convolutional]
batch_normalize=1
filters=32
size=3
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=32
size=3
stride=1
pad=1
activation=leaky
[route]
layers = -1,-2
[convolutional]
batch_normalize=1
filters=64
size=1
stride=1
pad=1
activation=leaky
[route]
layers = -6,-1
[maxpool]
size=2
stride=2
[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=leaky
[route]
layers=-1
groups=2
group_id=1
[convolutional]
batch_normalize=1
filters=64
size=3
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=64
size=3
stride=1
pad=1
activation=leaky
[route]
layers = -1,-2
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[route]
layers = -6,-1
[maxpool]
size=2
stride=2
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky
[route]
layers=-1
groups=2
group_id=1
[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=leaky
[route]
layers = -1,-2
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[route]
layers = -6,-1
[maxpool]
size=2
stride=2
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
##################################
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[convolutional]
size=1
stride=1
pad=1
filters=255
activation=linear
[yolo]
mask = 3,4,5
anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319
classes=80
num=6
jitter=.3
scale_x_y = 1.05
cls_normalizer=1.0
iou_normalizer=0.07
iou_loss=ciou
ignore_thresh = .7
truth_thresh = 1
random=0
resize=1.5
nms_kind=greedynms
beta_nms=0.6
[route]
layers = -4
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[upsample]
stride=2
[route]
layers = -1, 23
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky
[convolutional]
size=1
stride=1
pad=1
filters=255
activation=linear
[yolo]
mask = 1,2,3
anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319
classes=80
num=6
jitter=.3
scale_x_y = 1.05
cls_normalizer=1.0
iou_normalizer=0.07
iou_loss=ciou
ignore_thresh = .7
truth_thresh = 1
random=0
resize=1.5
nms_kind=greedynms
beta_nms=0.6
[net]
batch=64
subdivisions=8
# Training
#width=512
#height=512
width=608
height=608
channels=3
momentum=0.949
decay=0.0005
angle=0
saturation = 1.5
exposure = 1.5
hue=.1
learning_rate=0.0013
burn_in=1000
max_batches = 500500
policy=steps
steps=400000,450000
scales=.1,.1
#cutmix=1
mosaic=1
#:104x104 54:52x52 85:26x26 104:13x13 for 416
[convolutional]
batch_normalize=1
filters=32
size=3
stride=1
pad=1
activation=mish
# Downsample
[convolutional]
batch_normalize=1
filters=64
size=3
stride=2
pad=1
activation=mish
[convolutional]
batch_normalize=1
filters=64
size=1
stride=1
pad=1
activation=mish
[route]
layers = -2
[convolutional]
batch_normalize=1
filters=64
size=1
stride=1
pad=1
activation=mish
[convolutional]
batch_normalize=1
filters=32
size=1
stride=1
pad=1
activation=mish
[convolutional]
batch_normalize=1
filters=64
size=3
stride=1
pad=1
activation=mish
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=64
size=1
stride=1
pad=1
activation=mish
[route]
layers = -1,-7
[convolutional]
batch_normalize=1
filters=64
size=1
stride=1
pad=1
activation=mish
# Downsample
[convolutional]
batch_normalize=1
filters=128
size=3
stride=2
pad=1
activation=mish
[convolutional]
batch_normalize=1
filters=64
size=1
stride=1
pad=1
activation=mish
[route]
layers = -2
[convolutional]
batch_normalize=1
filters=64
size=1
stride=1
pad=1
activation=mish
[convolutional]
batch_normalize=1
filters=64
size=1
stride=1
pad=1
activation=mish
[convolutional]
batch_normalize=1
filters=64
size=3
stride=1
pad=1
activation=mish
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=64
size=1
stride=1
pad=1
activation=mish
[convolutional]
batch_normalize=1
filters=64
size=3
stride=1
pad=1
activation=mish
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=64
size=1
stride=1
pad=1
activation=mish
[route]
layers = -1,-10
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=mish
# Downsample
[convolutional]
batch_normalize=1
filters=256
size=3
stride=2
pad=1
activation=mish
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=mish
[route]
layers = -2
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=mish
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=mish
[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=mish
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=mish
[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=mish
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=mish
[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=mish
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=mish
[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=mish
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=mish
[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=mish
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=mish
[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=mish
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=mish
[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=mish
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=mish
[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=mish
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=mish
[route]
layers = -1,-28
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=mish
# Downsample
[convolutional]
batch_normalize=1
filters=512
size=3
stride=2
pad=1
activation=mish
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=mish
[route]
layers = -2
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=mish
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=mish
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=mish
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=mish
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=mish
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=mish
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=mish
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=mish
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=mish
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=mish
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=mish
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=mish
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=mish
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=mish
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=mish
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=mish
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=mish
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=mish
[route]
layers = -1,-28
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=mish
# Downsample
[convolutional]
batch_normalize=1
filters=1024
size=3
stride=2
pad=1
activation=mish
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=mish
[route]
layers = -2
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=mish
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=mish
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=mish
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=mish
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=mish
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=mish
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=mish
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=mish
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=mish
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=mish
[route]
layers = -1,-16
[convolutional]
batch_normalize=1
filters=1024
size=1
stride=1
pad=1
activation=mish
##########################
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=1024
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky
### SPP ###
[maxpool]
stride=1
size=5
[route]
layers=-2
[maxpool]
stride=1
size=9
[route]
layers=-4
[maxpool]
stride=1
size=13
[route]
layers=-1,-3,-5,-6
### End SPP ###
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=1024
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[upsample]
stride=2
[route]
layers = 85
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[route]
layers = -1, -3
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=512
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=512
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[upsample]
stride=2
[route]
layers = 54
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[route]
layers = -1, -3
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=256
activation=leaky
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=256
activation=leaky
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
##########################
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=256
activation=leaky
[convolutional]
size=1
stride=1
pad=1
filters=255
activation=linear
[yolo]
mask = 0,1,2
anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401
classes=80
num=9
jitter=.3
ignore_thresh = .7
truth_thresh = 1
scale_x_y = 1.2
iou_thresh=0.213
cls_normalizer=1.0
iou_normalizer=0.07
iou_loss=ciou
nms_kind=greedynms
beta_nms=0.6
max_delta=5
[route]
layers = -4
[convolutional]
batch_normalize=1
size=3
stride=2
pad=1
filters=256
activation=leaky
[route]
layers = -1, -16
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=512
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=512
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=512
activation=leaky
[convolutional]
size=1
stride=1
pad=1
filters=255
activation=linear
[yolo]
mask = 3,4,5
anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401
classes=80
num=9
jitter=.3
ignore_thresh = .7
truth_thresh = 1
scale_x_y = 1.1
iou_thresh=0.213
cls_normalizer=1.0
iou_normalizer=0.07
iou_loss=ciou
nms_kind=greedynms
beta_nms=0.6
max_delta=5
[route]
layers = -4
[convolutional]
batch_normalize=1
size=3
stride=2
pad=1
filters=512
activation=leaky
[route]
layers = -1, -37
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=1024
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=1024
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=1024
activation=leaky
[convolutional]
size=1
stride=1
pad=1
filters=255
activation=linear
[yolo]
mask = 6,7,8
anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401
classes=80
num=9
jitter=.3
ignore_thresh = .7
truth_thresh = 1
random=1
scale_x_y = 1.05
iou_thresh=0.213
cls_normalizer=1.0
iou_normalizer=0.07
iou_loss=ciou
nms_kind=greedynms
beta_nms=0.6
max_delta=5
person
bicycle
car
motorbike
aeroplane
bus
train
truck
boat
traffic light
fire hydrant
stop sign
parking meter
bench
bird
cat
dog
horse
sheep
cow
elephant
bear
zebra
giraffe
backpack
umbrella
handbag
tie
suitcase
frisbee
skis
snowboard
sports ball
kite
baseball bat
baseball glove
skateboard
surfboard
tennis racket
bottle
wine glass
cup
fork
knife
spoon
bowl
banana
apple
sandwich
orange
broccoli
carrot
hot dog
pizza
donut
cake
chair
sofa
pottedplant
bed
diningtable
toilet
tvmonitor
laptop
mouse
remote
keyboard
cell phone
microwave
oven
toaster
sink
refrigerator
book
clock
vase
scissors
teddy bear
hair drier
toothbrush
aeroplane
bicycle
bird
boat
bottle
bus
car
cat
chair
cow
diningtable
dog
horse
motorbike
person
pottedplant
sheep
sofa
train
tvmonitor
FROM ubuntu:18.04
RUN apt-get -yqq update
RUN sed -i s@/archive.ubuntu.com/@/mirrors.aliyun.com/@g /etc/apt/sources.list
RUN apt-get clean
RUN apt-get -yqq update
RUN apt-get install -yqq openssh-client openssh-server
RUN echo 'root:PASSWORD' | chpasswd
RUN sed -i 's/#PermitRootLogin prohibit-password/PermitRootLogin yes/' /etc/ssh/sshd_config
RUN service ssh restart
RUN apt-get install -y software-properties-common
RUN add-apt-repository ppa:deadsnakes/ppa
RUN apt-get install -y python3.9
RUN apt-get autoremove -y python3
RUN ln -s /usr/bin/python3.9 /usr/bin/python
RUN ln -s /usr/bin/python3.9 /usr/bin/python3
RUN apt-get install -y python3.9-distutils
RUN apt-get install -y wget
RUN wget https://bootstrap.pypa.io/get-pip.py
RUN python get-pip.py
RUN pip3 -V
RUN ln -s /usr/local/bin/pip3 /usr/bin/pip3
RUN apt-get -yqq install libssl-dev libffi-dev gcc python3.9-dev libgl1-mesa-glx libsm6 libxext6 libglib2.0-0
RUN apt-get -yqq update
RUN pip3 config set global.index-url https://mirrors.aliyun.com/pypi/simple/
ADD requirements.txt /edge1/requirements.txt
#RUN pip install -i https://pypi.tuna.tsinghua.edu.cn/simple --no-cache-dir -r requirements.txt
RUN pip3 install -r /edge1/requirements.txt
WORKDIR /edge1
ADD . /edge1
CMD ["python", "models.py"]
\ No newline at end of file
import logging
import threading
import queue
from socket import *
import numpy as np
import _thread
import time
qsize = 1
ip_add = '127.0.0.1'
server_port = 25001
connect_port = 25000
logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s")
log = logging.getLogger(__name__)
log.setLevel(logging.DEBUG)
def send_from(arr, dest):
view = memoryview(arr).cast('B')
while len(view):
nsent = dest.send(view)
view = view[nsent:]
def recv_into(arr, source):
view = memoryview(arr).cast('B')
while len(view):
nrecv = source.recv_into(view)
view = view[nrecv:]
def doConnect(host, port):
sock = socket(AF_INET, SOCK_STREAM)
sock.settimeout(20)
flag = True
while flag:
try:
if flag:
log.info("try connect %s : %d", host, port)
sock.connect((host, port))
flag = False
log.info("try connect %s : %d SUCCESS", host, port)
except Exception as e:
log.error("Address-related error connecting to server: %s" % e)
time.sleep(3)
return sock
class trans_thread(threading.Thread):
def __init__(self):
threading.Thread.__init__(self)
self.lock = threading.Lock()
self.imgQue = queue.Queue(qsize)
self.d2Que = queue.Queue(qsize)
self.server = socket(AF_INET, SOCK_STREAM)
self.server.bind(('', server_port))
self.server.listen(3)
log.info("bind %d", server_port)
self.a_clinet, addr = self.server.accept()
self.client = doConnect(ip_add, connect_port)
log.info('edge 1 init successfully')
def put_d2(self, d2):
while self.d2Que.full():
# print('d2 is full')
time.sleep(0.1)
d2 = d2.detach().numpy()
# print(len(d2))
# print(d2.dtype)
self.lock.acquire()
self.d2Que.put(d2)
self.lock.release()
def get_img(self):
while self.imgQue.empty():
# print('img is empty')
time.sleep(0.1)
self.lock.acquire()
img = self.imgQue.get()
self.lock.release()
# print('2: ', np.sum(img))
return img
def recv(self):
try:
arr = np.zeros(shape=(480, 640, 3), dtype=np.uint8)
img_sum = np.zeros(shape=(1,), dtype=np.int32)
cnt = 0
while 1:
recv_into(arr, self.client)
recv_into(img_sum, self.client)
if img_sum[0] == np.sum(arr):
cnt += 1
else:
cnt = 0
send_from(np.array([cnt]), self.client)
if cnt >= 5:
break
while 1:
if not self.imgQue.full():
recv_into(arr, self.client)
self.lock.acquire()
self.imgQue.put(arr)
self.lock.release()
# print('1: ', np.sum(arr))
else:
# print('img is full')
time.sleep(0.1)
except Exception as e:
log.error("connecting error: %s" % e)
self.client = doConnect(ip_add, connect_port)
def send(self):
while 1:
if not self.d2Que.empty():
self.lock.acquire()
d2 = self.d2Que.get()
self.lock.release()
send_from(d2, self.a_clinet)
else:
# print('d2 is empty')
time.sleep(0.1)
def run(self):
_thread.start_new_thread(self.recv, ())
_thread.start_new_thread(self.send, ())
# def print_time(threadName, delay, counter):
# while counter:
# if exitFlag:
# threadName.exit()
# time.sleep(delay)
# print ("%s: %s" % (threadName, time.ctime(time.time())))
# counter -= 1
apiVersion: apps/v1
kind: Deployment
metadata:
name: edge1
spec:
replicas: 1
selector:
matchLabels:
app: edge1
template:
metadata:
labels:
app: edge1
spec:
hostNetwork: true
nodeSelector:
kubernetes.io/hostname: node1
containers:
- name: edge1
image: k8s-master:5000/edge/edge1:v1
imagePullPolicy: Always
ports:
- containerPort: 25001
#---
#apiVersion: v1
#kind: Service
#metadata:
# name: edge1
#spec:
# type: NodePort
# selector:
# app: edge1
# ports:
# - name: tcp
# port: 32001
# targetPort: 25001
# nodePort: 32001
import logging
import cv2
import torch.nn.functional as F
from torch import nn
from tool.torch_utils import *
from tool.yolo_layer import YoloLayer
class Mish(torch.nn.Module):
def __init__(self):
super().__init__()
def forward(self, x):
x = x * (torch.tanh(torch.nn.functional.softplus(x)))
return x
class Upsample(nn.Module):
def __init__(self):
super(Upsample, self).__init__()
def forward(self, x, target_size, inference=False):
assert (x.data.dim() == 4)
# _, _, tH, tW = target_size
if inference:
# B = x.data.size(0)
# C = x.data.size(1)
# H = x.data.size(2)
# W = x.data.size(3)
return x.view(x.size(0), x.size(1), x.size(2), 1, x.size(3), 1). \
expand(x.size(0), x.size(1), x.size(2), target_size[2] // x.size(2), x.size(3),
target_size[3] // x.size(3)). \
contiguous().view(x.size(0), x.size(1), target_size[2], target_size[3])
else:
return F.interpolate(x, size=(target_size[2], target_size[3]), mode='nearest')
class Conv_Bn_Activation(nn.Module):
def __init__(self, in_channels, out_channels, kernel_size, stride, activation, bn=True, bias=False):
super().__init__()
pad = (kernel_size - 1) // 2
self.conv = nn.ModuleList()
if bias:
self.conv.append(nn.Conv2d(in_channels, out_channels, kernel_size, stride, pad))
else:
self.conv.append(nn.Conv2d(in_channels, out_channels, kernel_size, stride, pad, bias=False))
if bn:
self.conv.append(nn.BatchNorm2d(out_channels))
if activation == "mish":
self.conv.append(Mish())
elif activation == "relu":
self.conv.append(nn.ReLU(inplace=True))
elif activation == "leaky":
self.conv.append(nn.LeakyReLU(0.1, inplace=True))
elif activation == "linear":
pass
else:
print("activate error !!! {} {} {}".format(sys._getframe().f_code.co_filename,
sys._getframe().f_code.co_name, sys._getframe().f_lineno))
def forward(self, x):
for l in self.conv:
x = l(x)
return x
class ResBlock(nn.Module):
"""
Sequential residual blocks each of which consists of \
two convolution layers.
Args:
ch (int): number of input and output channels.
nblocks (int): number of residual blocks.
shortcut (bool): if True, residual tensor addition is enabled.
"""
def __init__(self, ch, nblocks=1, shortcut=True):
super().__init__()
self.shortcut = shortcut
self.module_list = nn.ModuleList()
for i in range(nblocks):
resblock_one = nn.ModuleList()
resblock_one.append(Conv_Bn_Activation(ch, ch, 1, 1, 'mish'))
resblock_one.append(Conv_Bn_Activation(ch, ch, 3, 1, 'mish'))
self.module_list.append(resblock_one)
def forward(self, x):
for module in self.module_list:
h = x
for res in module:
h = res(h)
x = x + h if self.shortcut else h
return x
class DownSample1(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = Conv_Bn_Activation(3, 32, 3, 1, 'mish')
self.conv2 = Conv_Bn_Activation(32, 64, 3, 2, 'mish')
self.conv3 = Conv_Bn_Activation(64, 64, 1, 1, 'mish')
# [route]
# layers = -2
self.conv4 = Conv_Bn_Activation(64, 64, 1, 1, 'mish')
self.conv5 = Conv_Bn_Activation(64, 32, 1, 1, 'mish')
self.conv6 = Conv_Bn_Activation(32, 64, 3, 1, 'mish')
# [shortcut]
# from=-3
# activation = linear
self.conv7 = Conv_Bn_Activation(64, 64, 1, 1, 'mish')
# [route]
# layers = -1, -7
self.conv8 = Conv_Bn_Activation(128, 64, 1, 1, 'mish')
def forward(self, input):
x1 = self.conv1(input)
x2 = self.conv2(x1)
x3 = self.conv3(x2)
# route -2
x4 = self.conv4(x2)
x5 = self.conv5(x4)
x6 = self.conv6(x5)
# shortcut -3
x6 = x6 + x4
x7 = self.conv7(x6)
# [route]
# layers = -1, -7
x7 = torch.cat([x7, x3], dim=1)
x8 = self.conv8(x7)
return x8
class DownSample2(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = Conv_Bn_Activation(64, 128, 3, 2, 'mish')
self.conv2 = Conv_Bn_Activation(128, 64, 1, 1, 'mish')
# r -2
self.conv3 = Conv_Bn_Activation(128, 64, 1, 1, 'mish')
self.resblock = ResBlock(ch=64, nblocks=2)
# s -3
self.conv4 = Conv_Bn_Activation(64, 64, 1, 1, 'mish')
# r -1 -10
self.conv5 = Conv_Bn_Activation(128, 128, 1, 1, 'mish')
def forward(self, input):
x1 = self.conv1(input)
x2 = self.conv2(x1)
x3 = self.conv3(x1)
r = self.resblock(x3)
x4 = self.conv4(r)
x4 = torch.cat([x4, x2], dim=1)
x5 = self.conv5(x4)
return x5
class DownSample3(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = Conv_Bn_Activation(128, 256, 3, 2, 'mish')
self.conv2 = Conv_Bn_Activation(256, 128, 1, 1, 'mish')
self.conv3 = Conv_Bn_Activation(256, 128, 1, 1, 'mish')
self.resblock = ResBlock(ch=128, nblocks=8)
self.conv4 = Conv_Bn_Activation(128, 128, 1, 1, 'mish')
self.conv5 = Conv_Bn_Activation(256, 256, 1, 1, 'mish')
def forward(self, input):
x1 = self.conv1(input)
x2 = self.conv2(x1)
x3 = self.conv3(x1)
r = self.resblock(x3)
x4 = self.conv4(r)
x4 = torch.cat([x4, x2], dim=1)
x5 = self.conv5(x4)
return x5
class DownSample4(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = Conv_Bn_Activation(256, 512, 3, 2, 'mish')
self.conv2 = Conv_Bn_Activation(512, 256, 1, 1, 'mish')
self.conv3 = Conv_Bn_Activation(512, 256, 1, 1, 'mish')
self.resblock = ResBlock(ch=256, nblocks=8)
self.conv4 = Conv_Bn_Activation(256, 256, 1, 1, 'mish')
self.conv5 = Conv_Bn_Activation(512, 512, 1, 1, 'mish')
def forward(self, input):
x1 = self.conv1(input)
x2 = self.conv2(x1)
x3 = self.conv3(x1)
r = self.resblock(x3)
x4 = self.conv4(r)
x4 = torch.cat([x4, x2], dim=1)
x5 = self.conv5(x4)
return x5
class DownSample5(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = Conv_Bn_Activation(512, 1024, 3, 2, 'mish')
self.conv2 = Conv_Bn_Activation(1024, 512, 1, 1, 'mish')
self.conv3 = Conv_Bn_Activation(1024, 512, 1, 1, 'mish')
self.resblock = ResBlock(ch=512, nblocks=4)
self.conv4 = Conv_Bn_Activation(512, 512, 1, 1, 'mish')
self.conv5 = Conv_Bn_Activation(1024, 1024, 1, 1, 'mish')
def forward(self, input):
x1 = self.conv1(input)
x2 = self.conv2(x1)
x3 = self.conv3(x1)
r = self.resblock(x3)
x4 = self.conv4(r)
x4 = torch.cat([x4, x2], dim=1)
x5 = self.conv5(x4)
return x5
class Neck(nn.Module):
def __init__(self, inference=False):
super().__init__()
self.inference = inference
self.conv1 = Conv_Bn_Activation(1024, 512, 1, 1, 'leaky')
self.conv2 = Conv_Bn_Activation(512, 1024, 3, 1, 'leaky')
self.conv3 = Conv_Bn_Activation(1024, 512, 1, 1, 'leaky')
# SPP
self.maxpool1 = nn.MaxPool2d(kernel_size=5, stride=1, padding=5 // 2)
self.maxpool2 = nn.MaxPool2d(kernel_size=9, stride=1, padding=9 // 2)
self.maxpool3 = nn.MaxPool2d(kernel_size=13, stride=1, padding=13 // 2)
# R -1 -3 -5 -6
# SPP
self.conv4 = Conv_Bn_Activation(2048, 512, 1, 1, 'leaky')
self.conv5 = Conv_Bn_Activation(512, 1024, 3, 1, 'leaky')
self.conv6 = Conv_Bn_Activation(1024, 512, 1, 1, 'leaky')
self.conv7 = Conv_Bn_Activation(512, 256, 1, 1, 'leaky')
# UP
self.upsample1 = Upsample()
# R 85
self.conv8 = Conv_Bn_Activation(512, 256, 1, 1, 'leaky')
# R -1 -3
self.conv9 = Conv_Bn_Activation(512, 256, 1, 1, 'leaky')
self.conv10 = Conv_Bn_Activation(256, 512, 3, 1, 'leaky')
self.conv11 = Conv_Bn_Activation(512, 256, 1, 1, 'leaky')
self.conv12 = Conv_Bn_Activation(256, 512, 3, 1, 'leaky')
self.conv13 = Conv_Bn_Activation(512, 256, 1, 1, 'leaky')
self.conv14 = Conv_Bn_Activation(256, 128, 1, 1, 'leaky')
# UP
self.upsample2 = Upsample()
# R 54
self.conv15 = Conv_Bn_Activation(256, 128, 1, 1, 'leaky')
# R -1 -3
self.conv16 = Conv_Bn_Activation(256, 128, 1, 1, 'leaky')
self.conv17 = Conv_Bn_Activation(128, 256, 3, 1, 'leaky')
self.conv18 = Conv_Bn_Activation(256, 128, 1, 1, 'leaky')
self.conv19 = Conv_Bn_Activation(128, 256, 3, 1, 'leaky')
self.conv20 = Conv_Bn_Activation(256, 128, 1, 1, 'leaky')
def forward(self, input, downsample4, downsample3, inference=False):
x1 = self.conv1(input)
x2 = self.conv2(x1)
x3 = self.conv3(x2)
# SPP
m1 = self.maxpool1(x3)
m2 = self.maxpool2(x3)
m3 = self.maxpool3(x3)
spp = torch.cat([m3, m2, m1, x3], dim=1)
# SPP end
x4 = self.conv4(spp)
x5 = self.conv5(x4)
x6 = self.conv6(x5)
x7 = self.conv7(x6)
# UP
up = self.upsample1(x7, downsample4.size(), self.inference)
# R 85
x8 = self.conv8(downsample4)
# R -1 -3
x8 = torch.cat([x8, up], dim=1)
x9 = self.conv9(x8)
x10 = self.conv10(x9)
x11 = self.conv11(x10)
x12 = self.conv12(x11)
x13 = self.conv13(x12)
x14 = self.conv14(x13)
# UP
up = self.upsample2(x14, downsample3.size(), self.inference)
# R 54
x15 = self.conv15(downsample3)
# R -1 -3
x15 = torch.cat([x15, up], dim=1)
x16 = self.conv16(x15)
x17 = self.conv17(x16)
x18 = self.conv18(x17)
x19 = self.conv19(x18)
x20 = self.conv20(x19)
return x20, x13, x6
class Yolov4Head(nn.Module):
def __init__(self, output_ch, n_classes, inference=False):
super().__init__()
self.inference = inference
self.conv1 = Conv_Bn_Activation(128, 256, 3, 1, 'leaky')
self.conv2 = Conv_Bn_Activation(256, output_ch, 1, 1, 'linear', bn=False, bias=True)
self.yolo1 = YoloLayer(
anchor_mask=[0, 1, 2], num_classes=n_classes,
anchors=[12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401],
num_anchors=9, stride=8)
# R -4
self.conv3 = Conv_Bn_Activation(128, 256, 3, 2, 'leaky')
# R -1 -16
self.conv4 = Conv_Bn_Activation(512, 256, 1, 1, 'leaky')
self.conv5 = Conv_Bn_Activation(256, 512, 3, 1, 'leaky')
self.conv6 = Conv_Bn_Activation(512, 256, 1, 1, 'leaky')
self.conv7 = Conv_Bn_Activation(256, 512, 3, 1, 'leaky')
self.conv8 = Conv_Bn_Activation(512, 256, 1, 1, 'leaky')
self.conv9 = Conv_Bn_Activation(256, 512, 3, 1, 'leaky')
self.conv10 = Conv_Bn_Activation(512, output_ch, 1, 1, 'linear', bn=False, bias=True)
self.yolo2 = YoloLayer(
anchor_mask=[3, 4, 5], num_classes=n_classes,
anchors=[12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401],
num_anchors=9, stride=16)
# R -4
self.conv11 = Conv_Bn_Activation(256, 512, 3, 2, 'leaky')
# R -1 -37
self.conv12 = Conv_Bn_Activation(1024, 512, 1, 1, 'leaky')
self.conv13 = Conv_Bn_Activation(512, 1024, 3, 1, 'leaky')
self.conv14 = Conv_Bn_Activation(1024, 512, 1, 1, 'leaky')
self.conv15 = Conv_Bn_Activation(512, 1024, 3, 1, 'leaky')
self.conv16 = Conv_Bn_Activation(1024, 512, 1, 1, 'leaky')
self.conv17 = Conv_Bn_Activation(512, 1024, 3, 1, 'leaky')
self.conv18 = Conv_Bn_Activation(1024, output_ch, 1, 1, 'linear', bn=False, bias=True)
self.yolo3 = YoloLayer(
anchor_mask=[6, 7, 8], num_classes=n_classes,
anchors=[12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401],
num_anchors=9, stride=32)
def forward(self, input1, input2, input3):
x1 = self.conv1(input1)
x2 = self.conv2(x1)
x3 = self.conv3(input1)
# R -1 -16
x3 = torch.cat([x3, input2], dim=1)
x4 = self.conv4(x3)
x5 = self.conv5(x4)
x6 = self.conv6(x5)
x7 = self.conv7(x6)
x8 = self.conv8(x7)
x9 = self.conv9(x8)
x10 = self.conv10(x9)
# R -4
x11 = self.conv11(x8)
# R -1 -37
x11 = torch.cat([x11, input3], dim=1)
x12 = self.conv12(x11)
x13 = self.conv13(x12)
x14 = self.conv14(x13)
x15 = self.conv15(x14)
x16 = self.conv16(x15)
x17 = self.conv17(x16)
x18 = self.conv18(x17)
if self.inference:
y1 = self.yolo1(x2)
y2 = self.yolo2(x10)
y3 = self.yolo3(x18)
return get_region_boxes([y1, y2, y3])
else:
return [x2, x10, x18]
class Yolov4(nn.Module):
def __init__(self, yolov4conv137weight=None, n_classes=80, inference=False):
super().__init__()
output_ch = (4 + 1 + n_classes) * 3
# backbone
self.down1 = DownSample1()
self.down2 = DownSample2()
self.down3 = DownSample3()
self.down4 = DownSample4()
self.down5 = DownSample5()
# neck
self.neek = Neck(inference)
# yolov4conv137
if yolov4conv137weight:
_model = nn.Sequential(self.down1, self.down2, self.down3, self.down4, self.down5, self.neek)
pretrained_dict = torch.load(yolov4conv137weight)
model_dict = _model.state_dict()
# 1. filter out unnecessary keys
pretrained_dict = {k1: v for (k, v), k1 in zip(pretrained_dict.items(), model_dict)}
# 2. overwrite entries in the existing state dict
model_dict.update(pretrained_dict)
_model.load_state_dict(model_dict)
# head
self.head = Yolov4Head(output_ch, n_classes, inference)
def forward(self, input):
d1 = self.down1(input)
d2 = self.down2(d1)
d3 = self.down3(d2)
d4 = self.down4(d3)
d5 = self.down5(d4)
x20, x13, x6 = self.neek(d5, d4, d3)
output = self.head(x20, x13, x6)
return output
logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s")
log = logging.getLogger(__name__)
log.setLevel(logging.DEBUG)
namesfile = 'data/coco.names'
n_classes = 80
weightfile = './yolov4.pth'
height = 608
width = 608
model = Yolov4(yolov4conv137weight=None, n_classes=n_classes, inference=True)
pretrained_dict = torch.load(weightfile, map_location=torch.device('cpu'))
model.load_state_dict(pretrained_dict)
model.eval()
# time.sleep(10)
from config_edge import trans_thread
trans = trans_thread()
trans.start()
while 1:
img = trans.get_img()
# print(np.sum(img))
img = cv2.resize(img, (width, height))
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
if type(img) == np.ndarray and len(img.shape) == 3: # cv2 image
img = torch.from_numpy(img.transpose(2, 0, 1)).float().div(255.0).unsqueeze(0)
elif type(img) == np.ndarray and len(img.shape) == 4:
img = torch.from_numpy(img.transpose(0, 3, 1, 2)).float().div(255.0)
else:
print("unknow image type")
exit(-1)
img = torch.autograd.Variable(img)
d1 = model.down1(img)
d2 = model.down2(d1)
log.info(d2.shape[0])
log.info(d2.shape[1])
log.info(d2.shape[2])
log.info(d2.shape[3])
trans.put_d2(d2)
FROM ubuntu:18.04
RUN apt-get -yqq update
RUN sed -i s@/archive.ubuntu.com/@/mirrors.aliyun.com/@g /etc/apt/sources.list
RUN apt-get clean
RUN apt-get -yqq update
RUN apt-get install -yqq openssh-client openssh-server
RUN echo 'root:PASSWORD' | chpasswd
RUN sed -i 's/#PermitRootLogin prohibit-password/PermitRootLogin yes/' /etc/ssh/sshd_config
RUN service ssh restart
RUN apt-get install -y software-properties-common
RUN add-apt-repository ppa:deadsnakes/ppa
RUN apt-get install -y python3.9
RUN apt-get autoremove -y python3
RUN ln -s /usr/bin/python3.9 /usr/bin/python
RUN ln -s /usr/bin/python3.9 /usr/bin/python3
RUN apt-get install -y python3.9-distutils
RUN apt-get install -y wget
RUN wget https://bootstrap.pypa.io/get-pip.py
RUN python get-pip.py
RUN pip3 -V
RUN ln -s /usr/local/bin/pip3 /usr/bin/pip3
RUN apt-get -yqq install libssl-dev libffi-dev gcc python3.9-dev libgl1-mesa-glx libsm6 libxext6 libglib2.0-0
RUN apt-get -yqq update
RUN pip3 config set global.index-url https://mirrors.aliyun.com/pypi/simple/
ADD requirements.txt /edge2/requirements.txt
#RUN pip install -i https://pypi.tuna.tsinghua.edu.cn/simple --no-cache-dir -r requirements.txt
RUN pip3 install -r /edge2/requirements.txt
WORKDIR /edge2
ADD . /edge2
CMD ["python", "models.py"]
\ No newline at end of file
import logging
import threading
import queue
from socket import *
import numpy as np
import _thread
import time
qsize = 1
ip_add = '127.0.0.1'
server_port = 25002
connect_port = 25001
logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s")
log = logging.getLogger(__name__)
log.setLevel(logging.DEBUG)
def send_from(arr, dest):
view = memoryview(arr).cast('B')
while len(view):
nsent = dest.send(view)
view = view[nsent:]
def recv_into(arr, source):
view = memoryview(arr).cast('B')
while len(view):
nrecv = source.recv_into(view)
view = view[nrecv:]
def doConnect(host, port):
sock = socket(AF_INET, SOCK_STREAM)
sock.settimeout(20)
flag = True
while flag:
try:
if flag:
log.info("try connect %s : %d", host, port)
sock.connect((host, port))
flag = False
except Exception as e:
log.error("Address-related error connecting to server: %s" % e)
time.sleep(3)
return sock
class trans_thread(threading.Thread):
def __init__(self):
threading.Thread.__init__(self)
self.lock = threading.Lock()
self.d2Que = queue.Queue(qsize)
self.d4Que = queue.Queue(qsize)
self.server = socket(AF_INET, SOCK_STREAM)
self.server.bind(('', server_port))
self.server.listen(3)
log.info("bind %d", server_port)
self.a_client, _ = self.server.accept()
self.client = doConnect(ip_add, connect_port)
log.info('edge 2 init successfully')
def get_d2(self):
while self.d2Que.empty():
print('d2 is empty')
time.sleep(0.1)
self.lock.acquire()
d2 = self.d2Que.get()
self.lock.release()
return d2
def recv(self):
try:
arr = np.zeros(shape=(1, 128, 152, 152), dtype=np.float32)
while 1:
if not self.d2Que.full():
recv_into(arr, self.client)
self.lock.acquire()
self.d2Que.put(arr)
self.lock.release()
else:
# print('d2 is full')
time.sleep(0.1)
except Exception as e:
log.error("connecting error: %s" % e)
self.client = doConnect(ip_add, connect_port)
def send(self):
while 1:
if not self.d4Que.empty():
self.lock.acquire()
d4 = self.d4Que.get()
self.lock.release()
send_from(d4, self.a_client)
else:
time.sleep(0.1)
def put_d4(self, d4):
while self.d4Que.full():
# print('d2 is full')
time.sleep(0.1)
d4 = d4.detach().numpy()
self.lock.acquire()
self.d4Que.put(d4)
self.lock.release()
def run(self):
_thread.start_new_thread(self.recv, ())
_thread.start_new_thread(self.send, ())
# def print_time(threadName, delay, counter):
# while counter:
# if exitFlag:
# threadName.exit()
# time.sleep(delay)
# print ("%s: %s" % (threadName, time.ctime(time.time())))
# counter -= 1
apiVersion: apps/v1
kind: Deployment
metadata:
name: edge2
spec:
replicas: 1
selector:
matchLabels:
app: edge2
template:
metadata:
labels:
app: edge2
spec:
hostNetwork: true
nodeSelector:
kubernetes.io/hostname: node2
containers:
- name: edge2
image: k8s-master:5000/edge/edge2:v1
imagePullPolicy: Always
ports:
- containerPort: 25002
#---
#apiVersion: v1
#kind: Service
#metadata:
# name: edge2
#spec:
# type: NodePort
# selector:
# app: edge2
# ports:
# - name: tcp
# port: 32002
# targetPort: 25002
# nodePort: 32002
import sys
import torch.nn.functional as F
from torch import nn
from tool.torch_utils import *
from tool.yolo_layer import YoloLayer
class Mish(torch.nn.Module):
def __init__(self):
super().__init__()
def forward(self, x):
x = x * (torch.tanh(torch.nn.functional.softplus(x)))
return x
class Upsample(nn.Module):
def __init__(self):
super(Upsample, self).__init__()
def forward(self, x, target_size, inference=False):
assert (x.data.dim() == 4)
# _, _, tH, tW = target_size
if inference:
#B = x.data.size(0)
#C = x.data.size(1)
#H = x.data.size(2)
#W = x.data.size(3)
return x.view(x.size(0), x.size(1), x.size(2), 1, x.size(3), 1).\
expand(x.size(0), x.size(1), x.size(2), target_size[2] // x.size(2), x.size(3), target_size[3] // x.size(3)).\
contiguous().view(x.size(0), x.size(1), target_size[2], target_size[3])
else:
return F.interpolate(x, size=(target_size[2], target_size[3]), mode='nearest')
class Conv_Bn_Activation(nn.Module):
def __init__(self, in_channels, out_channels, kernel_size, stride, activation, bn=True, bias=False):
super().__init__()
pad = (kernel_size - 1) // 2
self.conv = nn.ModuleList()
if bias:
self.conv.append(nn.Conv2d(in_channels, out_channels, kernel_size, stride, pad))
else:
self.conv.append(nn.Conv2d(in_channels, out_channels, kernel_size, stride, pad, bias=False))
if bn:
self.conv.append(nn.BatchNorm2d(out_channels))
if activation == "mish":
self.conv.append(Mish())
elif activation == "relu":
self.conv.append(nn.ReLU(inplace=True))
elif activation == "leaky":
self.conv.append(nn.LeakyReLU(0.1, inplace=True))
elif activation == "linear":
pass
else:
print("activate error !!! {} {} {}".format(sys._getframe().f_code.co_filename,
sys._getframe().f_code.co_name, sys._getframe().f_lineno))
def forward(self, x):
for l in self.conv:
x = l(x)
return x
class ResBlock(nn.Module):
"""
Sequential residual blocks each of which consists of \
two convolution layers.
Args:
ch (int): number of input and output channels.
nblocks (int): number of residual blocks.
shortcut (bool): if True, residual tensor addition is enabled.
"""
def __init__(self, ch, nblocks=1, shortcut=True):
super().__init__()
self.shortcut = shortcut
self.module_list = nn.ModuleList()
for i in range(nblocks):
resblock_one = nn.ModuleList()
resblock_one.append(Conv_Bn_Activation(ch, ch, 1, 1, 'mish'))
resblock_one.append(Conv_Bn_Activation(ch, ch, 3, 1, 'mish'))
self.module_list.append(resblock_one)
def forward(self, x):
for module in self.module_list:
h = x
for res in module:
h = res(h)
x = x + h if self.shortcut else h
return x
class DownSample1(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = Conv_Bn_Activation(3, 32, 3, 1, 'mish')
self.conv2 = Conv_Bn_Activation(32, 64, 3, 2, 'mish')
self.conv3 = Conv_Bn_Activation(64, 64, 1, 1, 'mish')
# [route]
# layers = -2
self.conv4 = Conv_Bn_Activation(64, 64, 1, 1, 'mish')
self.conv5 = Conv_Bn_Activation(64, 32, 1, 1, 'mish')
self.conv6 = Conv_Bn_Activation(32, 64, 3, 1, 'mish')
# [shortcut]
# from=-3
# activation = linear
self.conv7 = Conv_Bn_Activation(64, 64, 1, 1, 'mish')
# [route]
# layers = -1, -7
self.conv8 = Conv_Bn_Activation(128, 64, 1, 1, 'mish')
def forward(self, input):
x1 = self.conv1(input)
x2 = self.conv2(x1)
x3 = self.conv3(x2)
# route -2
x4 = self.conv4(x2)
x5 = self.conv5(x4)
x6 = self.conv6(x5)
# shortcut -3
x6 = x6 + x4
x7 = self.conv7(x6)
# [route]
# layers = -1, -7
x7 = torch.cat([x7, x3], dim=1)
x8 = self.conv8(x7)
return x8
class DownSample2(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = Conv_Bn_Activation(64, 128, 3, 2, 'mish')
self.conv2 = Conv_Bn_Activation(128, 64, 1, 1, 'mish')
# r -2
self.conv3 = Conv_Bn_Activation(128, 64, 1, 1, 'mish')
self.resblock = ResBlock(ch=64, nblocks=2)
# s -3
self.conv4 = Conv_Bn_Activation(64, 64, 1, 1, 'mish')
# r -1 -10
self.conv5 = Conv_Bn_Activation(128, 128, 1, 1, 'mish')
def forward(self, input):
x1 = self.conv1(input)
x2 = self.conv2(x1)
x3 = self.conv3(x1)
r = self.resblock(x3)
x4 = self.conv4(r)
x4 = torch.cat([x4, x2], dim=1)
x5 = self.conv5(x4)
return x5
class DownSample3(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = Conv_Bn_Activation(128, 256, 3, 2, 'mish')
self.conv2 = Conv_Bn_Activation(256, 128, 1, 1, 'mish')
self.conv3 = Conv_Bn_Activation(256, 128, 1, 1, 'mish')
self.resblock = ResBlock(ch=128, nblocks=8)
self.conv4 = Conv_Bn_Activation(128, 128, 1, 1, 'mish')
self.conv5 = Conv_Bn_Activation(256, 256, 1, 1, 'mish')
def forward(self, input):
x1 = self.conv1(input)
x2 = self.conv2(x1)
x3 = self.conv3(x1)
r = self.resblock(x3)
x4 = self.conv4(r)
x4 = torch.cat([x4, x2], dim=1)
x5 = self.conv5(x4)
return x5
class DownSample4(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = Conv_Bn_Activation(256, 512, 3, 2, 'mish')
self.conv2 = Conv_Bn_Activation(512, 256, 1, 1, 'mish')
self.conv3 = Conv_Bn_Activation(512, 256, 1, 1, 'mish')
self.resblock = ResBlock(ch=256, nblocks=8)
self.conv4 = Conv_Bn_Activation(256, 256, 1, 1, 'mish')
self.conv5 = Conv_Bn_Activation(512, 512, 1, 1, 'mish')
def forward(self, input):
x1 = self.conv1(input)
x2 = self.conv2(x1)
x3 = self.conv3(x1)
r = self.resblock(x3)
x4 = self.conv4(r)
x4 = torch.cat([x4, x2], dim=1)
x5 = self.conv5(x4)
return x5
class DownSample5(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = Conv_Bn_Activation(512, 1024, 3, 2, 'mish')
self.conv2 = Conv_Bn_Activation(1024, 512, 1, 1, 'mish')
self.conv3 = Conv_Bn_Activation(1024, 512, 1, 1, 'mish')
self.resblock = ResBlock(ch=512, nblocks=4)
self.conv4 = Conv_Bn_Activation(512, 512, 1, 1, 'mish')
self.conv5 = Conv_Bn_Activation(1024, 1024, 1, 1, 'mish')
def forward(self, input):
x1 = self.conv1(input)
x2 = self.conv2(x1)
x3 = self.conv3(x1)
r = self.resblock(x3)
x4 = self.conv4(r)
x4 = torch.cat([x4, x2], dim=1)
x5 = self.conv5(x4)
return x5
class Neck(nn.Module):
def __init__(self, inference=False):
super().__init__()
self.inference = inference
self.conv1 = Conv_Bn_Activation(1024, 512, 1, 1, 'leaky')
self.conv2 = Conv_Bn_Activation(512, 1024, 3, 1, 'leaky')
self.conv3 = Conv_Bn_Activation(1024, 512, 1, 1, 'leaky')
# SPP
self.maxpool1 = nn.MaxPool2d(kernel_size=5, stride=1, padding=5 // 2)
self.maxpool2 = nn.MaxPool2d(kernel_size=9, stride=1, padding=9 // 2)
self.maxpool3 = nn.MaxPool2d(kernel_size=13, stride=1, padding=13 // 2)
# R -1 -3 -5 -6
# SPP
self.conv4 = Conv_Bn_Activation(2048, 512, 1, 1, 'leaky')
self.conv5 = Conv_Bn_Activation(512, 1024, 3, 1, 'leaky')
self.conv6 = Conv_Bn_Activation(1024, 512, 1, 1, 'leaky')
self.conv7 = Conv_Bn_Activation(512, 256, 1, 1, 'leaky')
# UP
self.upsample1 = Upsample()
# R 85
self.conv8 = Conv_Bn_Activation(512, 256, 1, 1, 'leaky')
# R -1 -3
self.conv9 = Conv_Bn_Activation(512, 256, 1, 1, 'leaky')
self.conv10 = Conv_Bn_Activation(256, 512, 3, 1, 'leaky')
self.conv11 = Conv_Bn_Activation(512, 256, 1, 1, 'leaky')
self.conv12 = Conv_Bn_Activation(256, 512, 3, 1, 'leaky')
self.conv13 = Conv_Bn_Activation(512, 256, 1, 1, 'leaky')
self.conv14 = Conv_Bn_Activation(256, 128, 1, 1, 'leaky')
# UP
self.upsample2 = Upsample()
# R 54
self.conv15 = Conv_Bn_Activation(256, 128, 1, 1, 'leaky')
# R -1 -3
self.conv16 = Conv_Bn_Activation(256, 128, 1, 1, 'leaky')
self.conv17 = Conv_Bn_Activation(128, 256, 3, 1, 'leaky')
self.conv18 = Conv_Bn_Activation(256, 128, 1, 1, 'leaky')
self.conv19 = Conv_Bn_Activation(128, 256, 3, 1, 'leaky')
self.conv20 = Conv_Bn_Activation(256, 128, 1, 1, 'leaky')
def forward(self, input, downsample4, downsample3, inference=False):
x1 = self.conv1(input)
x2 = self.conv2(x1)
x3 = self.conv3(x2)
# SPP
m1 = self.maxpool1(x3)
m2 = self.maxpool2(x3)
m3 = self.maxpool3(x3)
spp = torch.cat([m3, m2, m1, x3], dim=1)
# SPP end
x4 = self.conv4(spp)
x5 = self.conv5(x4)
x6 = self.conv6(x5)
x7 = self.conv7(x6)
# UP
up = self.upsample1(x7, downsample4.size(), self.inference)
# R 85
x8 = self.conv8(downsample4)
# R -1 -3
x8 = torch.cat([x8, up], dim=1)
x9 = self.conv9(x8)
x10 = self.conv10(x9)
x11 = self.conv11(x10)
x12 = self.conv12(x11)
x13 = self.conv13(x12)
x14 = self.conv14(x13)
# UP
up = self.upsample2(x14, downsample3.size(), self.inference)
# R 54
x15 = self.conv15(downsample3)
# R -1 -3
x15 = torch.cat([x15, up], dim=1)
x16 = self.conv16(x15)
x17 = self.conv17(x16)
x18 = self.conv18(x17)
x19 = self.conv19(x18)
x20 = self.conv20(x19)
return x20, x13, x6
class Yolov4Head(nn.Module):
def __init__(self, output_ch, n_classes, inference=False):
super().__init__()
self.inference = inference
self.conv1 = Conv_Bn_Activation(128, 256, 3, 1, 'leaky')
self.conv2 = Conv_Bn_Activation(256, output_ch, 1, 1, 'linear', bn=False, bias=True)
self.yolo1 = YoloLayer(
anchor_mask=[0, 1, 2], num_classes=n_classes,
anchors=[12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401],
num_anchors=9, stride=8)
# R -4
self.conv3 = Conv_Bn_Activation(128, 256, 3, 2, 'leaky')
# R -1 -16
self.conv4 = Conv_Bn_Activation(512, 256, 1, 1, 'leaky')
self.conv5 = Conv_Bn_Activation(256, 512, 3, 1, 'leaky')
self.conv6 = Conv_Bn_Activation(512, 256, 1, 1, 'leaky')
self.conv7 = Conv_Bn_Activation(256, 512, 3, 1, 'leaky')
self.conv8 = Conv_Bn_Activation(512, 256, 1, 1, 'leaky')
self.conv9 = Conv_Bn_Activation(256, 512, 3, 1, 'leaky')
self.conv10 = Conv_Bn_Activation(512, output_ch, 1, 1, 'linear', bn=False, bias=True)
self.yolo2 = YoloLayer(
anchor_mask=[3, 4, 5], num_classes=n_classes,
anchors=[12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401],
num_anchors=9, stride=16)
# R -4
self.conv11 = Conv_Bn_Activation(256, 512, 3, 2, 'leaky')
# R -1 -37
self.conv12 = Conv_Bn_Activation(1024, 512, 1, 1, 'leaky')
self.conv13 = Conv_Bn_Activation(512, 1024, 3, 1, 'leaky')
self.conv14 = Conv_Bn_Activation(1024, 512, 1, 1, 'leaky')
self.conv15 = Conv_Bn_Activation(512, 1024, 3, 1, 'leaky')
self.conv16 = Conv_Bn_Activation(1024, 512, 1, 1, 'leaky')
self.conv17 = Conv_Bn_Activation(512, 1024, 3, 1, 'leaky')
self.conv18 = Conv_Bn_Activation(1024, output_ch, 1, 1, 'linear', bn=False, bias=True)
self.yolo3 = YoloLayer(
anchor_mask=[6, 7, 8], num_classes=n_classes,
anchors=[12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401],
num_anchors=9, stride=32)
def forward(self, input1, input2, input3):
x1 = self.conv1(input1)
x2 = self.conv2(x1)
x3 = self.conv3(input1)
# R -1 -16
x3 = torch.cat([x3, input2], dim=1)
x4 = self.conv4(x3)
x5 = self.conv5(x4)
x6 = self.conv6(x5)
x7 = self.conv7(x6)
x8 = self.conv8(x7)
x9 = self.conv9(x8)
x10 = self.conv10(x9)
# R -4
x11 = self.conv11(x8)
# R -1 -37
x11 = torch.cat([x11, input3], dim=1)
x12 = self.conv12(x11)
x13 = self.conv13(x12)
x14 = self.conv14(x13)
x15 = self.conv15(x14)
x16 = self.conv16(x15)
x17 = self.conv17(x16)
x18 = self.conv18(x17)
if self.inference:
y1 = self.yolo1(x2)
y2 = self.yolo2(x10)
y3 = self.yolo3(x18)
return get_region_boxes([y1, y2, y3])
else:
return [x2, x10, x18]
class Yolov4(nn.Module):
def __init__(self, yolov4conv137weight=None, n_classes=80, inference=False):
super().__init__()
output_ch = (4 + 1 + n_classes) * 3
# backbone
self.down1 = DownSample1()
self.down2 = DownSample2()
self.down3 = DownSample3()
self.down4 = DownSample4()
self.down5 = DownSample5()
# neck
self.neek = Neck(inference)
# yolov4conv137
if yolov4conv137weight:
_model = nn.Sequential(self.down1, self.down2, self.down3, self.down4, self.down5, self.neek)
pretrained_dict = torch.load(yolov4conv137weight)
model_dict = _model.state_dict()
# 1. filter out unnecessary keys
pretrained_dict = {k1: v for (k, v), k1 in zip(pretrained_dict.items(), model_dict)}
# 2. overwrite entries in the existing state dict
model_dict.update(pretrained_dict)
_model.load_state_dict(model_dict)
# head
self.head = Yolov4Head(output_ch, n_classes, inference)
def forward(self, input):
pass
# d1 = self.down1(input)
# d2 = self.down2(d1)
#d3 = self.down3(input)
#d4 = self.down4(d3)
# d5 = self.down5(d4)
#
# x20, x13, x6 = self.neek(d5, d4, d3)
#
# output = self.head(x20, x13, x6)
# return output
namesfile = 'data/coco.names'
n_classes = 80
weightfile = './yolov4.pth'
height = 608
width = 608
model = Yolov4(yolov4conv137weight=None, n_classes=n_classes, inference=True)
pretrained_dict = torch.load(weightfile, map_location=torch.device('cpu'))
model.load_state_dict(pretrained_dict)
model.eval()
# time.sleep(10)
from config_edge import trans_thread
trans = trans_thread()
trans.start()
while 1:
d2 = trans.get_d2()
d2 = torch.from_numpy(d2)
print(d2.shape[0])
print(d2.shape[1])
print(d2.shape[2])
print(d2.shape[3])
d3 = model.down3(d2)
trans.put_d4(d3)
FROM ubuntu:18.04
RUN apt-get -yqq update
RUN sed -i s@/archive.ubuntu.com/@/mirrors.aliyun.com/@g /etc/apt/sources.list
RUN apt-get clean
RUN apt-get -yqq update
RUN apt-get install -yqq openssh-client openssh-server
RUN echo 'root:PASSWORD' | chpasswd
RUN sed -i 's/#PermitRootLogin prohibit-password/PermitRootLogin yes/' /etc/ssh/sshd_config
RUN service ssh restart
RUN apt-get install -y software-properties-common
RUN add-apt-repository ppa:deadsnakes/ppa
RUN apt-get install -y python3.9
RUN apt-get autoremove -y python3
RUN ln -s /usr/bin/python3.9 /usr/bin/python
RUN ln -s /usr/bin/python3.9 /usr/bin/python3
RUN apt-get install -y python3.9-distutils
RUN apt-get install -y wget
RUN wget https://bootstrap.pypa.io/get-pip.py
RUN python get-pip.py
RUN pip3 -V
RUN ln -s /usr/local/bin/pip3 /usr/bin/pip3
RUN apt-get -yqq install libssl-dev libffi-dev gcc python3.9-dev libgl1-mesa-glx libsm6 libxext6 libglib2.0-0
RUN apt-get -yqq update
RUN pip3 config set global.index-url https://mirrors.aliyun.com/pypi/simple/
ADD requirements.txt /edge3/requirements.txt
#RUN pip install -i https://pypi.tuna.tsinghua.edu.cn/simple --no-cache-dir -r requirements.txt
RUN pip3 install -r /edge3/requirements.txt
WORKDIR /edge3
ADD . /edge3
CMD ["python", "models.py"]
\ No newline at end of file
import logging
import threading
import queue
from socket import *
import numpy as np
import _thread
import time
qsize = 1
ip_add = '127.0.0.1'
server_port = 25003
connect_port = 25002
logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s")
log = logging.getLogger(__name__)
log.setLevel(logging.DEBUG)
def send_from(arr, dest):
view = memoryview(arr).cast('B')
while len(view):
nsent = dest.send(view)
view = view[nsent:]
def recv_into(arr, source):
view = memoryview(arr).cast('B')
while len(view):
nrecv = source.recv_into(view)
view = view[nrecv:]
def doConnect(host, port):
sock = socket(AF_INET, SOCK_STREAM)
sock.settimeout(20)
flag = True
while flag:
try:
if flag:
log.info("try connect %s : %d", host, port)
sock.connect((host, port))
flag = False
except Exception as e:
log.error("Address-related error connecting to server: %s" % e)
time.sleep(3)
return sock
class trans_thread(threading.Thread):
def __init__(self):
threading.Thread.__init__(self)
self.lock = threading.Lock()
self.d4Que = queue.Queue(qsize)
self.boxQue = queue.Queue(qsize)
self.server = socket(AF_INET, SOCK_STREAM)
self.server.bind(('', 25003))
self.server.listen(3)
log.info("bind %d", server_port)
self.a_client, _ = self.server.accept()
self.a_client.settimeout(5)
self.client = doConnect(ip_add, connect_port)
print('edge 3 init successfully')
def put_box(self, box):
while self.boxQue.full():
# print('box is full')
time.sleep(0.1)
self.lock.acquire()
self.boxQue.put((len(box[0]), np.array(box, dtype=np.float32)))
self.lock.release()
def get_d4(self):
while self.d4Que.empty():
print('d4 is empty')
time.sleep(0.1)
self.lock.acquire()
d4 = self.d4Que.get()
self.lock.release()
return d4
def recv(self):
arr = np.zeros(shape=(1, 256, 76, 76), dtype=np.float32)
while 1:
if not self.d4Que.full():
recv_into(arr, self.client)
self.lock.acquire()
self.d4Que.put(arr)
self.lock.release()
else:
# print('d2 is full')
time.sleep(0.1)
def send(self):
try:
while 1:
if not self.boxQue.empty():
self.lock.acquire()
lth, box = self.boxQue.get()
self.lock.release()
if lth == 0:
send_from(np.zeros(1, dtype=np.uint8), self.a_client)
else:
lth = np.array([lth])
send_from(lth, self.a_client)
send_from(box, self.a_client)
else:
# print('box is empty')
time.sleep(0.1)
except Exception as e:
log.error("connecting error: %s" % e)
self.a_client, _ = self.server.accept()
def run(self):
_thread.start_new_thread(self.recv, ())
_thread.start_new_thread(self.send, ())
# def print_time(threadName, delay, counter):
# while counter:
# if exitFlag:
# threadName.exit()
# time.sleep(delay)
# print ("%s: %s" % (threadName, time.ctime(time.time())))
# counter -= 1
apiVersion: apps/v1
kind: Deployment
metadata:
name: edge3
spec:
replicas: 1
selector:
matchLabels:
app: edge3
template:
metadata:
labels:
app: edge3
spec:
hostNetwork: true
nodeSelector:
kubernetes.io/hostname: node3
containers:
- name: edge3
image: k8s-master:5000/edge/edge3:v1
imagePullPolicy: Always
ports:
- containerPort: 25003
#---
#apiVersion: v1
#kind: Service
#metadata:
# name: edge3
#spec:
# type: NodePort
# selector:
# app: edge3
# ports:
# - name: tcp
# port: 32003
# targetPort: 25003
# nodePort: 32003
import logging
import sys
import torch.nn.functional as F
from torch import nn
from tool.torch_utils import *
from tool.torch_utils import do_detect
from tool.utils import load_class_names
from tool.yolo_layer import YoloLayer
class Mish(torch.nn.Module):
def __init__(self):
super().__init__()
def forward(self, x):
x = x * (torch.tanh(torch.nn.functional.softplus(x)))
return x
class Upsample(nn.Module):
def __init__(self):
super(Upsample, self).__init__()
def forward(self, x, target_size, inference=False):
assert (x.data.dim() == 4)
# _, _, tH, tW = target_size
if inference:
# B = x.data.size(0)
# C = x.data.size(1)
# H = x.data.size(2)
# W = x.data.size(3)
return x.view(x.size(0), x.size(1), x.size(2), 1, x.size(3), 1). \
expand(x.size(0), x.size(1), x.size(2), target_size[2] // x.size(2), x.size(3),
target_size[3] // x.size(3)). \
contiguous().view(x.size(0), x.size(1), target_size[2], target_size[3])
else:
return F.interpolate(x, size=(target_size[2], target_size[3]), mode='nearest')
class Conv_Bn_Activation(nn.Module):
def __init__(self, in_channels, out_channels, kernel_size, stride, activation, bn=True, bias=False):
super().__init__()
pad = (kernel_size - 1) // 2
self.conv = nn.ModuleList()
if bias:
self.conv.append(nn.Conv2d(in_channels, out_channels, kernel_size, stride, pad))
else:
self.conv.append(nn.Conv2d(in_channels, out_channels, kernel_size, stride, pad, bias=False))
if bn:
self.conv.append(nn.BatchNorm2d(out_channels))
if activation == "mish":
self.conv.append(Mish())
elif activation == "relu":
self.conv.append(nn.ReLU(inplace=True))
elif activation == "leaky":
self.conv.append(nn.LeakyReLU(0.1, inplace=True))
elif activation == "linear":
pass
else:
print("activate error !!! {} {} {}".format(sys._getframe().f_code.co_filename,
sys._getframe().f_code.co_name, sys._getframe().f_lineno))
def forward(self, x):
for l in self.conv:
x = l(x)
return x
class ResBlock(nn.Module):
"""
Sequential residual blocks each of which consists of \
two convolution layers.
Args:
ch (int): number of input and output channels.
nblocks (int): number of residual blocks.
shortcut (bool): if True, residual tensor addition is enabled.
"""
def __init__(self, ch, nblocks=1, shortcut=True):
super().__init__()
self.shortcut = shortcut
self.module_list = nn.ModuleList()
for i in range(nblocks):
resblock_one = nn.ModuleList()
resblock_one.append(Conv_Bn_Activation(ch, ch, 1, 1, 'mish'))
resblock_one.append(Conv_Bn_Activation(ch, ch, 3, 1, 'mish'))
self.module_list.append(resblock_one)
def forward(self, x):
for module in self.module_list:
h = x
for res in module:
h = res(h)
x = x + h if self.shortcut else h
return x
class DownSample1(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = Conv_Bn_Activation(3, 32, 3, 1, 'mish')
self.conv2 = Conv_Bn_Activation(32, 64, 3, 2, 'mish')
self.conv3 = Conv_Bn_Activation(64, 64, 1, 1, 'mish')
# [route]
# layers = -2
self.conv4 = Conv_Bn_Activation(64, 64, 1, 1, 'mish')
self.conv5 = Conv_Bn_Activation(64, 32, 1, 1, 'mish')
self.conv6 = Conv_Bn_Activation(32, 64, 3, 1, 'mish')
# [shortcut]
# from=-3
# activation = linear
self.conv7 = Conv_Bn_Activation(64, 64, 1, 1, 'mish')
# [route]
# layers = -1, -7
self.conv8 = Conv_Bn_Activation(128, 64, 1, 1, 'mish')
def forward(self, input):
x1 = self.conv1(input)
x2 = self.conv2(x1)
x3 = self.conv3(x2)
# route -2
x4 = self.conv4(x2)
x5 = self.conv5(x4)
x6 = self.conv6(x5)
# shortcut -3
x6 = x6 + x4
x7 = self.conv7(x6)
# [route]
# layers = -1, -7
x7 = torch.cat([x7, x3], dim=1)
x8 = self.conv8(x7)
return x8
class DownSample2(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = Conv_Bn_Activation(64, 128, 3, 2, 'mish')
self.conv2 = Conv_Bn_Activation(128, 64, 1, 1, 'mish')
# r -2
self.conv3 = Conv_Bn_Activation(128, 64, 1, 1, 'mish')
self.resblock = ResBlock(ch=64, nblocks=2)
# s -3
self.conv4 = Conv_Bn_Activation(64, 64, 1, 1, 'mish')
# r -1 -10
self.conv5 = Conv_Bn_Activation(128, 128, 1, 1, 'mish')
def forward(self, input):
x1 = self.conv1(input)
x2 = self.conv2(x1)
x3 = self.conv3(x1)
r = self.resblock(x3)
x4 = self.conv4(r)
x4 = torch.cat([x4, x2], dim=1)
x5 = self.conv5(x4)
return x5
class DownSample3(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = Conv_Bn_Activation(128, 256, 3, 2, 'mish')
self.conv2 = Conv_Bn_Activation(256, 128, 1, 1, 'mish')
self.conv3 = Conv_Bn_Activation(256, 128, 1, 1, 'mish')
self.resblock = ResBlock(ch=128, nblocks=8)
self.conv4 = Conv_Bn_Activation(128, 128, 1, 1, 'mish')
self.conv5 = Conv_Bn_Activation(256, 256, 1, 1, 'mish')
def forward(self, input):
x1 = self.conv1(input)
x2 = self.conv2(x1)
x3 = self.conv3(x1)
r = self.resblock(x3)
x4 = self.conv4(r)
x4 = torch.cat([x4, x2], dim=1)
x5 = self.conv5(x4)
return x5
class DownSample4(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = Conv_Bn_Activation(256, 512, 3, 2, 'mish')
self.conv2 = Conv_Bn_Activation(512, 256, 1, 1, 'mish')
self.conv3 = Conv_Bn_Activation(512, 256, 1, 1, 'mish')
self.resblock = ResBlock(ch=256, nblocks=8)
self.conv4 = Conv_Bn_Activation(256, 256, 1, 1, 'mish')
self.conv5 = Conv_Bn_Activation(512, 512, 1, 1, 'mish')
def forward(self, input):
x1 = self.conv1(input)
x2 = self.conv2(x1)
x3 = self.conv3(x1)
r = self.resblock(x3)
x4 = self.conv4(r)
x4 = torch.cat([x4, x2], dim=1)
x5 = self.conv5(x4)
return x5
class DownSample5(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = Conv_Bn_Activation(512, 1024, 3, 2, 'mish')
self.conv2 = Conv_Bn_Activation(1024, 512, 1, 1, 'mish')
self.conv3 = Conv_Bn_Activation(1024, 512, 1, 1, 'mish')
self.resblock = ResBlock(ch=512, nblocks=4)
self.conv4 = Conv_Bn_Activation(512, 512, 1, 1, 'mish')
self.conv5 = Conv_Bn_Activation(1024, 1024, 1, 1, 'mish')
def forward(self, input):
x1 = self.conv1(input)
x2 = self.conv2(x1)
x3 = self.conv3(x1)
r = self.resblock(x3)
x4 = self.conv4(r)
x4 = torch.cat([x4, x2], dim=1)
x5 = self.conv5(x4)
return x5
class Neck(nn.Module):
def __init__(self, inference=False):
super().__init__()
self.inference = inference
self.conv1 = Conv_Bn_Activation(1024, 512, 1, 1, 'leaky')
self.conv2 = Conv_Bn_Activation(512, 1024, 3, 1, 'leaky')
self.conv3 = Conv_Bn_Activation(1024, 512, 1, 1, 'leaky')
# SPP
self.maxpool1 = nn.MaxPool2d(kernel_size=5, stride=1, padding=5 // 2)
self.maxpool2 = nn.MaxPool2d(kernel_size=9, stride=1, padding=9 // 2)
self.maxpool3 = nn.MaxPool2d(kernel_size=13, stride=1, padding=13 // 2)
# R -1 -3 -5 -6
# SPP
self.conv4 = Conv_Bn_Activation(2048, 512, 1, 1, 'leaky')
self.conv5 = Conv_Bn_Activation(512, 1024, 3, 1, 'leaky')
self.conv6 = Conv_Bn_Activation(1024, 512, 1, 1, 'leaky')
self.conv7 = Conv_Bn_Activation(512, 256, 1, 1, 'leaky')
# UP
self.upsample1 = Upsample()
# R 85
self.conv8 = Conv_Bn_Activation(512, 256, 1, 1, 'leaky')
# R -1 -3
self.conv9 = Conv_Bn_Activation(512, 256, 1, 1, 'leaky')
self.conv10 = Conv_Bn_Activation(256, 512, 3, 1, 'leaky')
self.conv11 = Conv_Bn_Activation(512, 256, 1, 1, 'leaky')
self.conv12 = Conv_Bn_Activation(256, 512, 3, 1, 'leaky')
self.conv13 = Conv_Bn_Activation(512, 256, 1, 1, 'leaky')
self.conv14 = Conv_Bn_Activation(256, 128, 1, 1, 'leaky')
# UP
self.upsample2 = Upsample()
# R 54
self.conv15 = Conv_Bn_Activation(256, 128, 1, 1, 'leaky')
# R -1 -3
self.conv16 = Conv_Bn_Activation(256, 128, 1, 1, 'leaky')
self.conv17 = Conv_Bn_Activation(128, 256, 3, 1, 'leaky')
self.conv18 = Conv_Bn_Activation(256, 128, 1, 1, 'leaky')
self.conv19 = Conv_Bn_Activation(128, 256, 3, 1, 'leaky')
self.conv20 = Conv_Bn_Activation(256, 128, 1, 1, 'leaky')
def forward(self, input, downsample4, downsample3, inference=False):
x1 = self.conv1(input)
x2 = self.conv2(x1)
x3 = self.conv3(x2)
# SPP
m1 = self.maxpool1(x3)
m2 = self.maxpool2(x3)
m3 = self.maxpool3(x3)
spp = torch.cat([m3, m2, m1, x3], dim=1)
# SPP end
x4 = self.conv4(spp)
x5 = self.conv5(x4)
x6 = self.conv6(x5)
x7 = self.conv7(x6)
# UP
up = self.upsample1(x7, downsample4.size(), self.inference)
# R 85
x8 = self.conv8(downsample4)
# R -1 -3
x8 = torch.cat([x8, up], dim=1)
x9 = self.conv9(x8)
x10 = self.conv10(x9)
x11 = self.conv11(x10)
x12 = self.conv12(x11)
x13 = self.conv13(x12)
x14 = self.conv14(x13)
# UP
up = self.upsample2(x14, downsample3.size(), self.inference)
# R 54
x15 = self.conv15(downsample3)
# R -1 -3
x15 = torch.cat([x15, up], dim=1)
x16 = self.conv16(x15)
x17 = self.conv17(x16)
x18 = self.conv18(x17)
x19 = self.conv19(x18)
x20 = self.conv20(x19)
return x20, x13, x6
class Yolov4Head(nn.Module):
def __init__(self, output_ch, n_classes, inference=False):
super().__init__()
self.inference = inference
self.conv1 = Conv_Bn_Activation(128, 256, 3, 1, 'leaky')
self.conv2 = Conv_Bn_Activation(256, output_ch, 1, 1, 'linear', bn=False, bias=True)
self.yolo1 = YoloLayer(
anchor_mask=[0, 1, 2], num_classes=n_classes,
anchors=[12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401],
num_anchors=9, stride=8)
# R -4
self.conv3 = Conv_Bn_Activation(128, 256, 3, 2, 'leaky')
# R -1 -16
self.conv4 = Conv_Bn_Activation(512, 256, 1, 1, 'leaky')
self.conv5 = Conv_Bn_Activation(256, 512, 3, 1, 'leaky')
self.conv6 = Conv_Bn_Activation(512, 256, 1, 1, 'leaky')
self.conv7 = Conv_Bn_Activation(256, 512, 3, 1, 'leaky')
self.conv8 = Conv_Bn_Activation(512, 256, 1, 1, 'leaky')
self.conv9 = Conv_Bn_Activation(256, 512, 3, 1, 'leaky')
self.conv10 = Conv_Bn_Activation(512, output_ch, 1, 1, 'linear', bn=False, bias=True)
self.yolo2 = YoloLayer(
anchor_mask=[3, 4, 5], num_classes=n_classes,
anchors=[12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401],
num_anchors=9, stride=16)
# R -4
self.conv11 = Conv_Bn_Activation(256, 512, 3, 2, 'leaky')
# R -1 -37
self.conv12 = Conv_Bn_Activation(1024, 512, 1, 1, 'leaky')
self.conv13 = Conv_Bn_Activation(512, 1024, 3, 1, 'leaky')
self.conv14 = Conv_Bn_Activation(1024, 512, 1, 1, 'leaky')
self.conv15 = Conv_Bn_Activation(512, 1024, 3, 1, 'leaky')
self.conv16 = Conv_Bn_Activation(1024, 512, 1, 1, 'leaky')
self.conv17 = Conv_Bn_Activation(512, 1024, 3, 1, 'leaky')
self.conv18 = Conv_Bn_Activation(1024, output_ch, 1, 1, 'linear', bn=False, bias=True)
self.yolo3 = YoloLayer(
anchor_mask=[6, 7, 8], num_classes=n_classes,
anchors=[12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401],
num_anchors=9, stride=32)
def forward(self, input1, input2, input3):
x1 = self.conv1(input1)
x2 = self.conv2(x1)
x3 = self.conv3(input1)
# R -1 -16
x3 = torch.cat([x3, input2], dim=1)
x4 = self.conv4(x3)
x5 = self.conv5(x4)
x6 = self.conv6(x5)
x7 = self.conv7(x6)
x8 = self.conv8(x7)
x9 = self.conv9(x8)
x10 = self.conv10(x9)
# R -4
x11 = self.conv11(x8)
# R -1 -37
x11 = torch.cat([x11, input3], dim=1)
x12 = self.conv12(x11)
x13 = self.conv13(x12)
x14 = self.conv14(x13)
x15 = self.conv15(x14)
x16 = self.conv16(x15)
x17 = self.conv17(x16)
x18 = self.conv18(x17)
if self.inference:
y1 = self.yolo1(x2)
y2 = self.yolo2(x10)
y3 = self.yolo3(x18)
return get_region_boxes([y1, y2, y3])
else:
return [x2, x10, x18]
class Yolov4(nn.Module):
def __init__(self, yolov4conv137weight=None, n_classes=80, inference=False):
super().__init__()
output_ch = (4 + 1 + n_classes) * 3
# backbone
self.down1 = DownSample1()
self.down2 = DownSample2()
self.down3 = DownSample3()
self.down4 = DownSample4()
self.down5 = DownSample5()
# neck
self.neek = Neck(inference)
# yolov4conv137
if yolov4conv137weight:
_model = nn.Sequential(self.down1, self.down2, self.down3, self.down4, self.down5, self.neek)
pretrained_dict = torch.load(yolov4conv137weight)
model_dict = _model.state_dict()
# 1. filter out unnecessary keys
pretrained_dict = {k1: v for (k, v), k1 in zip(pretrained_dict.items(), model_dict)}
# 2. overwrite entries in the existing state dict
model_dict.update(pretrained_dict)
_model.load_state_dict(model_dict)
# head
self.head = Yolov4Head(output_ch, n_classes, inference)
def forward(self, d3):
d4 = self.down4(d3)
d5 = self.down5(d4)
x20, x13, x6 = self.neek(d5, d4, d3)
output = self.head(x20, x13, x6)
return output
logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s")
log = logging.getLogger(__name__)
log.setLevel(logging.DEBUG)
namesfile = 'data/coco.names'
n_classes = 80
weightfile = './yolov4.pth'
height = 608
width = 608
model = Yolov4(yolov4conv137weight=None, n_classes=n_classes, inference=True)
pretrained_dict = torch.load(weightfile, map_location=torch.device('cpu'))
model.load_state_dict(pretrained_dict)
model.eval()
# time.sleep(10)
from config_edge import trans_thread
trans = trans_thread()
trans.start()
model.eval()
class_names = load_class_names(namesfile)
while 1:
d3 = trans.get_d4()
d3 = torch.from_numpy(d3)
log.info(d3.shape[0])
log.info(d3.shape[1])
log.info(d3.shape[2])
log.info(d3.shape[3])
boxes = do_detect(model, d3, 0.4, 0.6, False)
# print(boxes)
trans.put_box(boxes)
numpy==1.20.1
torch==1.8.0
tensorboardX==2.0
matplotlib==3.3.4
tqdm==4.43.0
easydict==1.9
Pillow==8.1.2
scikit-image
opencv_python
pycocotools
kubernetes
\ No newline at end of file
# -*- coding: utf-8 -*-
'''
@Time : 2020/04/26 15:48
@Author : Tianxiaomo
@File : camera.py
@Noice :
@Modificattion :
@Author :
@Time :
@Detail :
'''
from __future__ import division
import cv2
from tool.darknet2pytorch import Darknet
import argparse
from tool.utils import *
from tool.torch_utils import *
def arg_parse():
"""
Parse arguements to the detect module
"""
parser = argparse.ArgumentParser(description='YOLO v3 Cam Demo')
parser.add_argument("--confidence", dest="confidence", help="Object Confidence to filter predictions", default=0.25)
parser.add_argument("--nms_thresh", dest="nms_thresh", help="NMS Threshhold", default=0.4)
parser.add_argument("--reso", dest='reso', help=
"Input resolution of the network. Increase to increase accuracy. Decrease to increase speed",
default="160", type=str)
return parser.parse_args()
if __name__ == '__main__':
cfgfile = "cfg/yolov4.cfg"
weightsfile = "weight/yolov4.weights"
args = arg_parse()
confidence = float(args.confidence)
nms_thesh = float(args.nms_thresh)
CUDA = torch.cuda.is_available()
num_classes = 80
bbox_attrs = 5 + num_classes
class_names = load_class_names("data/coco.names")
model = Darknet(cfgfile)
model.load_weights(weightsfile)
if CUDA:
model.cuda()
model.eval()
cap = cv2.VideoCapture(0)
assert cap.isOpened(), 'Cannot capture source'
frames = 0
start = time.time()
while cap.isOpened():
ret, frame = cap.read()
if ret:
sized = cv2.resize(frame, (model.width, model.height))
sized = cv2.cvtColor(sized, cv2.COLOR_BGR2RGB)
boxes = do_detect(model, sized, 0.5, 0.4, CUDA)
orig_im = plot_boxes_cv2(frame, boxes, class_names=class_names)
cv2.imshow("frame", orig_im)
key = cv2.waitKey(1)
if key & 0xFF == ord('q'):
break
frames += 1
print("FPS of the video is {:5.2f}".format(frames / (time.time() - start)))
else:
break
# -*- coding: utf-8 -*-
'''
@Time : 2020/05/08 11:45
@Author : Tianxiaomo
@File : coco_annotatin.py
@Noice :
@Modificattion :
@Author :
@Time :
@Detail :
'''
import json
from collections import defaultdict
from tqdm import tqdm
import os
"""hyper parameters"""
json_file_path = 'E:/Dataset/mscoco2017/annotations/instances_train2017.json'
images_dir_path = 'mscoco2017/train2017/'
output_path = '../data/val.txt'
"""load json file"""
name_box_id = defaultdict(list)
id_name = dict()
with open(json_file_path, encoding='utf-8') as f:
data = json.load(f)
"""generate labels"""
images = data['images']
annotations = data['annotations']
for ant in tqdm(annotations):
id = ant['image_id']
# name = os.path.join(images_dir_path, images[id]['file_name'])
name = os.path.join(images_dir_path, '{:012d}.jpg'.format(id))
cat = ant['category_id']
if cat >= 1 and cat <= 11:
cat = cat - 1
elif cat >= 13 and cat <= 25:
cat = cat - 2
elif cat >= 27 and cat <= 28:
cat = cat - 3
elif cat >= 31 and cat <= 44:
cat = cat - 5
elif cat >= 46 and cat <= 65:
cat = cat - 6
elif cat == 67:
cat = cat - 7
elif cat == 70:
cat = cat - 9
elif cat >= 72 and cat <= 82:
cat = cat - 10
elif cat >= 84 and cat <= 90:
cat = cat - 11
name_box_id[name].append([ant['bbox'], cat])
"""write to txt"""
with open(output_path, 'w') as f:
for key in tqdm(name_box_id.keys()):
f.write(key)
box_infos = name_box_id[key]
for info in box_infos:
x_min = int(info[0][0])
y_min = int(info[0][1])
x_max = x_min + int(info[0][2])
y_max = y_min + int(info[0][3])
box_info = " %d,%d,%d,%d,%d" % (
x_min, y_min, x_max, y_max, int(info[1]))
f.write(box_info)
f.write('\n')
import torch
from tool.torch_utils import convert2cpu
def parse_cfg(cfgfile):
blocks = []
fp = open(cfgfile, 'r')
block = None
line = fp.readline()
while line != '':
line = line.rstrip()
if line == '' or line[0] == '#':
line = fp.readline()
continue
elif line[0] == '[':
if block:
blocks.append(block)
block = dict()
block['type'] = line.lstrip('[').rstrip(']')
# set default value
if block['type'] == 'convolutional':
block['batch_normalize'] = 0
else:
key, value = line.split('=')
key = key.strip()
if key == 'type':
key = '_type'
value = value.strip()
block[key] = value
line = fp.readline()
if block:
blocks.append(block)
fp.close()
return blocks
def print_cfg(blocks):
print('layer filters size input output');
prev_width = 416
prev_height = 416
prev_filters = 3
out_filters = []
out_widths = []
out_heights = []
ind = -2
for block in blocks:
ind = ind + 1
if block['type'] == 'net':
prev_width = int(block['width'])
prev_height = int(block['height'])
continue
elif block['type'] == 'convolutional':
filters = int(block['filters'])
kernel_size = int(block['size'])
stride = int(block['stride'])
is_pad = int(block['pad'])
pad = (kernel_size - 1) // 2 if is_pad else 0
width = (prev_width + 2 * pad - kernel_size) // stride + 1
height = (prev_height + 2 * pad - kernel_size) // stride + 1
print('%5d %-6s %4d %d x %d / %d %3d x %3d x%4d -> %3d x %3d x%4d' % (
ind, 'conv', filters, kernel_size, kernel_size, stride, prev_width, prev_height, prev_filters, width,
height, filters))
prev_width = width
prev_height = height
prev_filters = filters
out_widths.append(prev_width)
out_heights.append(prev_height)
out_filters.append(prev_filters)
elif block['type'] == 'maxpool':
pool_size = int(block['size'])
stride = int(block['stride'])
width = prev_width // stride
height = prev_height // stride
print('%5d %-6s %d x %d / %d %3d x %3d x%4d -> %3d x %3d x%4d' % (
ind, 'max', pool_size, pool_size, stride, prev_width, prev_height, prev_filters, width, height,
filters))
prev_width = width
prev_height = height
prev_filters = filters
out_widths.append(prev_width)
out_heights.append(prev_height)
out_filters.append(prev_filters)
elif block['type'] == 'avgpool':
width = 1
height = 1
print('%5d %-6s %3d x %3d x%4d -> %3d' % (
ind, 'avg', prev_width, prev_height, prev_filters, prev_filters))
prev_width = width
prev_height = height
prev_filters = filters
out_widths.append(prev_width)
out_heights.append(prev_height)
out_filters.append(prev_filters)
elif block['type'] == 'softmax':
print('%5d %-6s -> %3d' % (ind, 'softmax', prev_filters))
out_widths.append(prev_width)
out_heights.append(prev_height)
out_filters.append(prev_filters)
elif block['type'] == 'cost':
print('%5d %-6s -> %3d' % (ind, 'cost', prev_filters))
out_widths.append(prev_width)
out_heights.append(prev_height)
out_filters.append(prev_filters)
elif block['type'] == 'reorg':
stride = int(block['stride'])
filters = stride * stride * prev_filters
width = prev_width // stride
height = prev_height // stride
print('%5d %-6s / %d %3d x %3d x%4d -> %3d x %3d x%4d' % (
ind, 'reorg', stride, prev_width, prev_height, prev_filters, width, height, filters))
prev_width = width
prev_height = height
prev_filters = filters
out_widths.append(prev_width)
out_heights.append(prev_height)
out_filters.append(prev_filters)
elif block['type'] == 'upsample':
stride = int(block['stride'])
filters = prev_filters
width = prev_width * stride
height = prev_height * stride
print('%5d %-6s * %d %3d x %3d x%4d -> %3d x %3d x%4d' % (
ind, 'upsample', stride, prev_width, prev_height, prev_filters, width, height, filters))
prev_width = width
prev_height = height
prev_filters = filters
out_widths.append(prev_width)
out_heights.append(prev_height)
out_filters.append(prev_filters)
elif block['type'] == 'route':
layers = block['layers'].split(',')
layers = [int(i) if int(i) > 0 else int(i) + ind for i in layers]
if len(layers) == 1:
print('%5d %-6s %d' % (ind, 'route', layers[0]))
prev_width = out_widths[layers[0]]
prev_height = out_heights[layers[0]]
prev_filters = out_filters[layers[0]]
elif len(layers) == 2:
print('%5d %-6s %d %d' % (ind, 'route', layers[0], layers[1]))
prev_width = out_widths[layers[0]]
prev_height = out_heights[layers[0]]
assert (prev_width == out_widths[layers[1]])
assert (prev_height == out_heights[layers[1]])
prev_filters = out_filters[layers[0]] + out_filters[layers[1]]
elif len(layers) == 4:
print('%5d %-6s %d %d %d %d' % (ind, 'route', layers[0], layers[1], layers[2], layers[3]))
prev_width = out_widths[layers[0]]
prev_height = out_heights[layers[0]]
assert (prev_width == out_widths[layers[1]] == out_widths[layers[2]] == out_widths[layers[3]])
assert (prev_height == out_heights[layers[1]] == out_heights[layers[2]] == out_heights[layers[3]])
prev_filters = out_filters[layers[0]] + out_filters[layers[1]] + out_filters[layers[2]] + out_filters[
layers[3]]
else:
print("route error !!! {} {} {}".format(sys._getframe().f_code.co_filename,
sys._getframe().f_code.co_name, sys._getframe().f_lineno))
out_widths.append(prev_width)
out_heights.append(prev_height)
out_filters.append(prev_filters)
elif block['type'] in ['region', 'yolo']:
print('%5d %-6s' % (ind, 'detection'))
out_widths.append(prev_width)
out_heights.append(prev_height)
out_filters.append(prev_filters)
elif block['type'] == 'shortcut':
from_id = int(block['from'])
from_id = from_id if from_id > 0 else from_id + ind
print('%5d %-6s %d' % (ind, 'shortcut', from_id))
prev_width = out_widths[from_id]
prev_height = out_heights[from_id]
prev_filters = out_filters[from_id]
out_widths.append(prev_width)
out_heights.append(prev_height)
out_filters.append(prev_filters)
elif block['type'] == 'connected':
filters = int(block['output'])
print('%5d %-6s %d -> %3d' % (ind, 'connected', prev_filters, filters))
prev_filters = filters
out_widths.append(1)
out_heights.append(1)
out_filters.append(prev_filters)
else:
print('unknown type %s' % (block['type']))
def load_conv(buf, start, conv_model):
num_w = conv_model.weight.numel()
num_b = conv_model.bias.numel()
conv_model.bias.data.copy_(torch.from_numpy(buf[start:start + num_b]));
start = start + num_b
conv_model.weight.data.copy_(torch.from_numpy(buf[start:start + num_w]).reshape(conv_model.weight.data.shape));
start = start + num_w
return start
def save_conv(fp, conv_model):
if conv_model.bias.is_cuda:
convert2cpu(conv_model.bias.data).numpy().tofile(fp)
convert2cpu(conv_model.weight.data).numpy().tofile(fp)
else:
conv_model.bias.data.numpy().tofile(fp)
conv_model.weight.data.numpy().tofile(fp)
def load_conv_bn(buf, start, conv_model, bn_model):
num_w = conv_model.weight.numel()
num_b = bn_model.bias.numel()
bn_model.bias.data.copy_(torch.from_numpy(buf[start:start + num_b]));
start = start + num_b
bn_model.weight.data.copy_(torch.from_numpy(buf[start:start + num_b]));
start = start + num_b
bn_model.running_mean.copy_(torch.from_numpy(buf[start:start + num_b]));
start = start + num_b
bn_model.running_var.copy_(torch.from_numpy(buf[start:start + num_b]));
start = start + num_b
conv_model.weight.data.copy_(torch.from_numpy(buf[start:start + num_w]).reshape(conv_model.weight.data.shape));
start = start + num_w
return start
def save_conv_bn(fp, conv_model, bn_model):
if bn_model.bias.is_cuda:
convert2cpu(bn_model.bias.data).numpy().tofile(fp)
convert2cpu(bn_model.weight.data).numpy().tofile(fp)
convert2cpu(bn_model.running_mean).numpy().tofile(fp)
convert2cpu(bn_model.running_var).numpy().tofile(fp)
convert2cpu(conv_model.weight.data).numpy().tofile(fp)
else:
bn_model.bias.data.numpy().tofile(fp)
bn_model.weight.data.numpy().tofile(fp)
bn_model.running_mean.numpy().tofile(fp)
bn_model.running_var.numpy().tofile(fp)
conv_model.weight.data.numpy().tofile(fp)
def load_fc(buf, start, fc_model):
num_w = fc_model.weight.numel()
num_b = fc_model.bias.numel()
fc_model.bias.data.copy_(torch.from_numpy(buf[start:start + num_b]));
start = start + num_b
fc_model.weight.data.copy_(torch.from_numpy(buf[start:start + num_w]));
start = start + num_w
return start
def save_fc(fp, fc_model):
fc_model.bias.data.numpy().tofile(fp)
fc_model.weight.data.numpy().tofile(fp)
if __name__ == '__main__':
import sys
blocks = parse_cfg('cfg/yolo.cfg')
if len(sys.argv) == 2:
blocks = parse_cfg(sys.argv[1])
print_cfg(blocks)
import sys
import torch
from tool.darknet2pytorch import Darknet
def transform_to_onnx(cfgfile, weightfile, batch_size=1):
model = Darknet(cfgfile)
model.print_network()
model.load_weights(weightfile)
print('Loading weights from %s... Done!' % (weightfile))
dynamic = False
if batch_size <= 0:
dynamic = True
input_names = ["input"]
output_names = ['boxes', 'confs']
if dynamic:
x = torch.randn((1, 3, model.height, model.width), requires_grad=True)
onnx_file_name = "yolov4_-1_3_{}_{}_dynamic.onnx".format(model.height, model.width)
dynamic_axes = {"input": {0: "batch_size"}, "boxes": {0: "batch_size"}, "confs": {0: "batch_size"}}
# Export the model
print('Export the onnx model ...')
torch.onnx.export(model,
x,
onnx_file_name,
export_params=True,
opset_version=11,
do_constant_folding=True,
input_names=input_names, output_names=output_names,
dynamic_axes=dynamic_axes)
print('Onnx model exporting done')
return onnx_file_name
else:
x = torch.randn((batch_size, 3, model.height, model.width), requires_grad=True)
onnx_file_name = "yolov4_{}_3_{}_{}_static.onnx".format(batch_size, model.height, model.width)
torch.onnx.export(model,
x,
onnx_file_name,
export_params=True,
opset_version=11,
do_constant_folding=True,
input_names=input_names, output_names=output_names,
dynamic_axes=None)
print('Onnx model exporting done')
return onnx_file_name
if __name__ == '__main__':
if len(sys.argv) == 3:
cfgfile = sys.argv[1]
weightfile = sys.argv[2]
transform_to_onnx(cfgfile, weightfile)
elif len(sys.argv) == 4:
cfgfile = sys.argv[1]
weightfile = sys.argv[2]
batch_size = int(sys.argv[3])
transform_to_onnx(cfgfile, weightfile, batch_size)
elif len(sys.argv) == 5:
cfgfile = sys.argv[1]
weightfile = sys.argv[2]
batch_size = int(sys.argv[3])
dynamic = True if sys.argv[4] == 'True' else False
transform_to_onnx(cfgfile, weightfile, batch_size, dynamic)
else:
print('Please execute this script this way:\n')
print(' python darknet2onnx.py <cfgFile> <weightFile>')
print('or')
print(' python darknet2onnx.py <cfgFile> <weightFile> <batchSize>')
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from tool.region_loss import RegionLoss
from tool.yolo_layer import YoloLayer
from tool.config import *
from tool.torch_utils import *
class Mish(torch.nn.Module):
def __init__(self):
super().__init__()
def forward(self, x):
x = x * (torch.tanh(torch.nn.functional.softplus(x)))
return x
class MaxPoolDark(nn.Module):
def __init__(self, size=2, stride=1):
super(MaxPoolDark, self).__init__()
self.size = size
self.stride = stride
def forward(self, x):
'''
darknet output_size = (input_size + p - k) / s +1
p : padding = k - 1
k : size
s : stride
torch output_size = (input_size + 2*p -k) / s +1
p : padding = k//2
'''
p = self.size // 2
if ((x.shape[2] - 1) // self.stride) != ((x.shape[2] + 2 * p - self.size) // self.stride):
padding1 = (self.size - 1) // 2
padding2 = padding1 + 1
else:
padding1 = (self.size - 1) // 2
padding2 = padding1
if ((x.shape[3] - 1) // self.stride) != ((x.shape[3] + 2 * p - self.size) // self.stride):
padding3 = (self.size - 1) // 2
padding4 = padding3 + 1
else:
padding3 = (self.size - 1) // 2
padding4 = padding3
x = F.max_pool2d(F.pad(x, (padding3, padding4, padding1, padding2), mode='replicate'),
self.size, stride=self.stride)
return x
class Upsample_expand(nn.Module):
def __init__(self, stride=2):
super(Upsample_expand, self).__init__()
self.stride = stride
def forward(self, x):
assert (x.data.dim() == 4)
x = x.view(x.size(0), x.size(1), x.size(2), 1, x.size(3), 1).\
expand(x.size(0), x.size(1), x.size(2), self.stride, x.size(3), self.stride).contiguous().\
view(x.size(0), x.size(1), x.size(2) * self.stride, x.size(3) * self.stride)
return x
class Upsample_interpolate(nn.Module):
def __init__(self, stride):
super(Upsample_interpolate, self).__init__()
self.stride = stride
def forward(self, x):
assert (x.data.dim() == 4)
out = F.interpolate(x, size=(x.size(2) * self.stride, x.size(3) * self.stride), mode='nearest')
return out
class Reorg(nn.Module):
def __init__(self, stride=2):
super(Reorg, self).__init__()
self.stride = stride
def forward(self, x):
stride = self.stride
assert (x.data.dim() == 4)
B = x.data.size(0)
C = x.data.size(1)
H = x.data.size(2)
W = x.data.size(3)
assert (H % stride == 0)
assert (W % stride == 0)
ws = stride
hs = stride
x = x.view(B, C, H / hs, hs, W / ws, ws).transpose(3, 4).contiguous()
x = x.view(B, C, H / hs * W / ws, hs * ws).transpose(2, 3).contiguous()
x = x.view(B, C, hs * ws, H / hs, W / ws).transpose(1, 2).contiguous()
x = x.view(B, hs * ws * C, H / hs, W / ws)
return x
class GlobalAvgPool2d(nn.Module):
def __init__(self):
super(GlobalAvgPool2d, self).__init__()
def forward(self, x):
N = x.data.size(0)
C = x.data.size(1)
H = x.data.size(2)
W = x.data.size(3)
x = F.avg_pool2d(x, (H, W))
x = x.view(N, C)
return x
# for route and shortcut
class EmptyModule(nn.Module):
def __init__(self):
super(EmptyModule, self).__init__()
def forward(self, x):
return x
# support route shortcut and reorg
class Darknet(nn.Module):
def __init__(self, cfgfile, inference=False):
super(Darknet, self).__init__()
self.inference = inference
self.training = not self.inference
self.blocks = parse_cfg(cfgfile)
self.width = int(self.blocks[0]['width'])
self.height = int(self.blocks[0]['height'])
self.models = self.create_network(self.blocks) # merge conv, bn,leaky
self.loss = self.models[len(self.models) - 1]
if self.blocks[(len(self.blocks) - 1)]['type'] == 'region':
self.anchors = self.loss.anchors
self.num_anchors = self.loss.num_anchors
self.anchor_step = self.loss.anchor_step
self.num_classes = self.loss.num_classes
self.header = torch.IntTensor([0, 0, 0, 0])
self.seen = 0
def forward(self, x):
ind = -2
self.loss = None
outputs = dict()
out_boxes = []
for block in self.blocks:
ind = ind + 1
# if ind > 0:
# return x
if block['type'] == 'net':
continue
elif block['type'] in ['convolutional', 'maxpool', 'reorg', 'upsample', 'avgpool', 'softmax', 'connected']:
x = self.models[ind](x)
outputs[ind] = x
elif block['type'] == 'route':
layers = block['layers'].split(',')
layers = [int(i) if int(i) > 0 else int(i) + ind for i in layers]
if len(layers) == 1:
if 'groups' not in block.keys() or int(block['groups']) == 1:
x = outputs[layers[0]]
outputs[ind] = x
else:
groups = int(block['groups'])
group_id = int(block['group_id'])
_, b, _, _ = outputs[layers[0]].shape
x = outputs[layers[0]][:, b // groups * group_id:b // groups * (group_id + 1)]
outputs[ind] = x
elif len(layers) == 2:
x1 = outputs[layers[0]]
x2 = outputs[layers[1]]
x = torch.cat((x1, x2), 1)
outputs[ind] = x
elif len(layers) == 4:
x1 = outputs[layers[0]]
x2 = outputs[layers[1]]
x3 = outputs[layers[2]]
x4 = outputs[layers[3]]
x = torch.cat((x1, x2, x3, x4), 1)
outputs[ind] = x
else:
print("rounte number > 2 ,is {}".format(len(layers)))
elif block['type'] == 'shortcut':
from_layer = int(block['from'])
activation = block['activation']
from_layer = from_layer if from_layer > 0 else from_layer + ind
x1 = outputs[from_layer]
x2 = outputs[ind - 1]
x = x1 + x2
if activation == 'leaky':
x = F.leaky_relu(x, 0.1, inplace=True)
elif activation == 'relu':
x = F.relu(x, inplace=True)
outputs[ind] = x
elif block['type'] == 'region':
continue
if self.loss:
self.loss = self.loss + self.models[ind](x)
else:
self.loss = self.models[ind](x)
outputs[ind] = None
elif block['type'] == 'yolo':
# if self.training:
# pass
# else:
# boxes = self.models[ind](x)
# out_boxes.append(boxes)
boxes = self.models[ind](x)
out_boxes.append(boxes)
elif block['type'] == 'cost':
continue
else:
print('unknown type %s' % (block['type']))
if self.training:
return out_boxes
else:
return get_region_boxes(out_boxes)
def print_network(self):
print_cfg(self.blocks)
def create_network(self, blocks):
models = nn.ModuleList()
prev_filters = 3
out_filters = []
prev_stride = 1
out_strides = []
conv_id = 0
for block in blocks:
if block['type'] == 'net':
prev_filters = int(block['channels'])
continue
elif block['type'] == 'convolutional':
conv_id = conv_id + 1
batch_normalize = int(block['batch_normalize'])
filters = int(block['filters'])
kernel_size = int(block['size'])
stride = int(block['stride'])
is_pad = int(block['pad'])
pad = (kernel_size - 1) // 2 if is_pad else 0
activation = block['activation']
model = nn.Sequential()
if batch_normalize:
model.add_module('conv{0}'.format(conv_id),
nn.Conv2d(prev_filters, filters, kernel_size, stride, pad, bias=False))
model.add_module('bn{0}'.format(conv_id), nn.BatchNorm2d(filters))
# model.add_module('bn{0}'.format(conv_id), BN2d(filters))
else:
model.add_module('conv{0}'.format(conv_id),
nn.Conv2d(prev_filters, filters, kernel_size, stride, pad))
if activation == 'leaky':
model.add_module('leaky{0}'.format(conv_id), nn.LeakyReLU(0.1, inplace=True))
elif activation == 'relu':
model.add_module('relu{0}'.format(conv_id), nn.ReLU(inplace=True))
elif activation == 'mish':
model.add_module('mish{0}'.format(conv_id), Mish())
else:
print("convalution havn't activate {}".format(activation))
prev_filters = filters
out_filters.append(prev_filters)
prev_stride = stride * prev_stride
out_strides.append(prev_stride)
models.append(model)
elif block['type'] == 'maxpool':
pool_size = int(block['size'])
stride = int(block['stride'])
if stride == 1 and pool_size % 2:
# You can use Maxpooldark instead, here is convenient to convert onnx.
# Example: [maxpool] size=3 stride=1
model = nn.MaxPool2d(kernel_size=pool_size, stride=stride, padding=pool_size // 2)
elif stride == pool_size:
# You can use Maxpooldark instead, here is convenient to convert onnx.
# Example: [maxpool] size=2 stride=2
model = nn.MaxPool2d(kernel_size=pool_size, stride=stride, padding=0)
else:
model = MaxPoolDark(pool_size, stride)
out_filters.append(prev_filters)
prev_stride = stride * prev_stride
out_strides.append(prev_stride)
models.append(model)
elif block['type'] == 'avgpool':
model = GlobalAvgPool2d()
out_filters.append(prev_filters)
models.append(model)
elif block['type'] == 'softmax':
model = nn.Softmax()
out_strides.append(prev_stride)
out_filters.append(prev_filters)
models.append(model)
elif block['type'] == 'cost':
if block['_type'] == 'sse':
model = nn.MSELoss(reduction='mean')
elif block['_type'] == 'L1':
model = nn.L1Loss(reduction='mean')
elif block['_type'] == 'smooth':
model = nn.SmoothL1Loss(reduction='mean')
out_filters.append(1)
out_strides.append(prev_stride)
models.append(model)
elif block['type'] == 'reorg':
stride = int(block['stride'])
prev_filters = stride * stride * prev_filters
out_filters.append(prev_filters)
prev_stride = prev_stride * stride
out_strides.append(prev_stride)
models.append(Reorg(stride))
elif block['type'] == 'upsample':
stride = int(block['stride'])
out_filters.append(prev_filters)
prev_stride = prev_stride // stride
out_strides.append(prev_stride)
models.append(Upsample_expand(stride))
# models.append(Upsample_interpolate(stride))
elif block['type'] == 'route':
layers = block['layers'].split(',')
ind = len(models)
layers = [int(i) if int(i) > 0 else int(i) + ind for i in layers]
if len(layers) == 1:
if 'groups' not in block.keys() or int(block['groups']) == 1:
prev_filters = out_filters[layers[0]]
prev_stride = out_strides[layers[0]]
else:
prev_filters = out_filters[layers[0]] // int(block['groups'])
prev_stride = out_strides[layers[0]] // int(block['groups'])
elif len(layers) == 2:
assert (layers[0] == ind - 1 or layers[1] == ind - 1)
prev_filters = out_filters[layers[0]] + out_filters[layers[1]]
prev_stride = out_strides[layers[0]]
elif len(layers) == 4:
assert (layers[0] == ind - 1)
prev_filters = out_filters[layers[0]] + out_filters[layers[1]] + out_filters[layers[2]] + \
out_filters[layers[3]]
prev_stride = out_strides[layers[0]]
else:
print("route error!!!")
out_filters.append(prev_filters)
out_strides.append(prev_stride)
models.append(EmptyModule())
elif block['type'] == 'shortcut':
ind = len(models)
prev_filters = out_filters[ind - 1]
out_filters.append(prev_filters)
prev_stride = out_strides[ind - 1]
out_strides.append(prev_stride)
models.append(EmptyModule())
elif block['type'] == 'connected':
filters = int(block['output'])
if block['activation'] == 'linear':
model = nn.Linear(prev_filters, filters)
elif block['activation'] == 'leaky':
model = nn.Sequential(
nn.Linear(prev_filters, filters),
nn.LeakyReLU(0.1, inplace=True))
elif block['activation'] == 'relu':
model = nn.Sequential(
nn.Linear(prev_filters, filters),
nn.ReLU(inplace=True))
prev_filters = filters
out_filters.append(prev_filters)
out_strides.append(prev_stride)
models.append(model)
elif block['type'] == 'region':
loss = RegionLoss()
anchors = block['anchors'].split(',')
loss.anchors = [float(i) for i in anchors]
loss.num_classes = int(block['classes'])
loss.num_anchors = int(block['num'])
loss.anchor_step = len(loss.anchors) // loss.num_anchors
loss.object_scale = float(block['object_scale'])
loss.noobject_scale = float(block['noobject_scale'])
loss.class_scale = float(block['class_scale'])
loss.coord_scale = float(block['coord_scale'])
out_filters.append(prev_filters)
out_strides.append(prev_stride)
models.append(loss)
elif block['type'] == 'yolo':
yolo_layer = YoloLayer()
anchors = block['anchors'].split(',')
anchor_mask = block['mask'].split(',')
yolo_layer.anchor_mask = [int(i) for i in anchor_mask]
yolo_layer.anchors = [float(i) for i in anchors]
yolo_layer.num_classes = int(block['classes'])
self.num_classes = yolo_layer.num_classes
yolo_layer.num_anchors = int(block['num'])
yolo_layer.anchor_step = len(yolo_layer.anchors) // yolo_layer.num_anchors
yolo_layer.stride = prev_stride
yolo_layer.scale_x_y = float(block['scale_x_y'])
# yolo_layer.object_scale = float(block['object_scale'])
# yolo_layer.noobject_scale = float(block['noobject_scale'])
# yolo_layer.class_scale = float(block['class_scale'])
# yolo_layer.coord_scale = float(block['coord_scale'])
out_filters.append(prev_filters)
out_strides.append(prev_stride)
models.append(yolo_layer)
else:
print('unknown type %s' % (block['type']))
return models
def load_weights(self, weightfile):
fp = open(weightfile, 'rb')
header = np.fromfile(fp, count=5, dtype=np.int32)
self.header = torch.from_numpy(header)
self.seen = self.header[3]
buf = np.fromfile(fp, dtype=np.float32)
fp.close()
start = 0
ind = -2
for block in self.blocks:
if start >= buf.size:
break
ind = ind + 1
if block['type'] == 'net':
continue
elif block['type'] == 'convolutional':
model = self.models[ind]
batch_normalize = int(block['batch_normalize'])
if batch_normalize:
start = load_conv_bn(buf, start, model[0], model[1])
else:
start = load_conv(buf, start, model[0])
elif block['type'] == 'connected':
model = self.models[ind]
if block['activation'] != 'linear':
start = load_fc(buf, start, model[0])
else:
start = load_fc(buf, start, model)
elif block['type'] == 'maxpool':
pass
elif block['type'] == 'reorg':
pass
elif block['type'] == 'upsample':
pass
elif block['type'] == 'route':
pass
elif block['type'] == 'shortcut':
pass
elif block['type'] == 'region':
pass
elif block['type'] == 'yolo':
pass
elif block['type'] == 'avgpool':
pass
elif block['type'] == 'softmax':
pass
elif block['type'] == 'cost':
pass
else:
print('unknown type %s' % (block['type']))
# def save_weights(self, outfile, cutoff=0):
# if cutoff <= 0:
# cutoff = len(self.blocks) - 1
#
# fp = open(outfile, 'wb')
# self.header[3] = self.seen
# header = self.header
# header.numpy().tofile(fp)
#
# ind = -1
# for blockId in range(1, cutoff + 1):
# ind = ind + 1
# block = self.blocks[blockId]
# if block['type'] == 'convolutional':
# model = self.models[ind]
# batch_normalize = int(block['batch_normalize'])
# if batch_normalize:
# save_conv_bn(fp, model[0], model[1])
# else:
# save_conv(fp, model[0])
# elif block['type'] == 'connected':
# model = self.models[ind]
# if block['activation'] != 'linear':
# save_fc(fc, model)
# else:
# save_fc(fc, model[0])
# elif block['type'] == 'maxpool':
# pass
# elif block['type'] == 'reorg':
# pass
# elif block['type'] == 'upsample':
# pass
# elif block['type'] == 'route':
# pass
# elif block['type'] == 'shortcut':
# pass
# elif block['type'] == 'region':
# pass
# elif block['type'] == 'yolo':
# pass
# elif block['type'] == 'avgpool':
# pass
# elif block['type'] == 'softmax':
# pass
# elif block['type'] == 'cost':
# pass
# else:
# print('unknown type %s' % (block['type']))
# fp.close()
import sys
import onnx
from onnx_tf.backend import prepare
# tensorflow >=2.0
# 1: Thanks:github:https://github.com/onnx/onnx-tensorflow
# 2: Run git clone https://github.com/onnx/onnx-tensorflow.git && cd onnx-tensorflow
# Run pip install -e .
# Note:
# Errors will occur when using "pip install onnx-tf", at least for me,
# it is recommended to use source code installation
def transform_to_tensorflow(onnx_input_path, pb_output_path):
onnx_model = onnx.load(onnx_input_path) # load onnx model
tf_exp = prepare(onnx_model) # prepare tf representation
tf_exp.export_graph(pb_output_path) # export the model
if __name__ == '__main__':
if len(sys.argv) == 1:
sys.argv.append('../weight/yolov4_1_3_608_608.onnx') # use:darknet2onnx.py
sys.argv.append('../weight/yolov4.pb') # use:onnx2tensorflow.py
if len(sys.argv) == 3:
onnxfile = sys.argv[1]
tfpb_outfile = sys.argv[2]
transform_to_tensorflow(onnxfile, tfpb_outfile)
else:
print('Please execute this script this way:\n')
print(' python onnx2tensorflow.py <onnxfile> <tfpboutfile>')
import torch.nn as nn
import torch.nn.functional as F
from tool.torch_utils import *
def build_targets(pred_boxes, target, anchors, num_anchors, num_classes, nH, nW, noobject_scale, object_scale,
sil_thresh, seen):
nB = target.size(0)
nA = num_anchors
nC = num_classes
anchor_step = len(anchors) / num_anchors
conf_mask = torch.ones(nB, nA, nH, nW) * noobject_scale
coord_mask = torch.zeros(nB, nA, nH, nW)
cls_mask = torch.zeros(nB, nA, nH, nW)
tx = torch.zeros(nB, nA, nH, nW)
ty = torch.zeros(nB, nA, nH, nW)
tw = torch.zeros(nB, nA, nH, nW)
th = torch.zeros(nB, nA, nH, nW)
tconf = torch.zeros(nB, nA, nH, nW)
tcls = torch.zeros(nB, nA, nH, nW)
nAnchors = nA * nH * nW
nPixels = nH * nW
for b in range(nB):
cur_pred_boxes = pred_boxes[b * nAnchors:(b + 1) * nAnchors].t()
cur_ious = torch.zeros(nAnchors)
for t in range(50):
if target[b][t * 5 + 1] == 0:
break
gx = target[b][t * 5 + 1] * nW
gy = target[b][t * 5 + 2] * nH
gw = target[b][t * 5 + 3] * nW
gh = target[b][t * 5 + 4] * nH
cur_gt_boxes = torch.FloatTensor([gx, gy, gw, gh]).repeat(nAnchors, 1).t()
cur_ious = torch.max(cur_ious, bbox_ious(cur_pred_boxes, cur_gt_boxes, x1y1x2y2=False))
conf_mask[b][cur_ious > sil_thresh] = 0
if seen < 12800:
if anchor_step == 4:
tx = torch.FloatTensor(anchors).view(nA, anchor_step).index_select(1, torch.LongTensor([2])).view(1, nA, 1,
1).repeat(
nB, 1, nH, nW)
ty = torch.FloatTensor(anchors).view(num_anchors, anchor_step).index_select(1, torch.LongTensor([2])).view(
1, nA, 1, 1).repeat(nB, 1, nH, nW)
else:
tx.fill_(0.5)
ty.fill_(0.5)
tw.zero_()
th.zero_()
coord_mask.fill_(1)
nGT = 0
nCorrect = 0
for b in range(nB):
for t in range(50):
if target[b][t * 5 + 1] == 0:
break
nGT = nGT + 1
best_iou = 0.0
best_n = -1
min_dist = 10000
gx = target[b][t * 5 + 1] * nW
gy = target[b][t * 5 + 2] * nH
gi = int(gx)
gj = int(gy)
gw = target[b][t * 5 + 3] * nW
gh = target[b][t * 5 + 4] * nH
gt_box = [0, 0, gw, gh]
for n in range(nA):
aw = anchors[anchor_step * n]
ah = anchors[anchor_step * n + 1]
anchor_box = [0, 0, aw, ah]
iou = bbox_iou(anchor_box, gt_box, x1y1x2y2=False)
if anchor_step == 4:
ax = anchors[anchor_step * n + 2]
ay = anchors[anchor_step * n + 3]
dist = pow(((gi + ax) - gx), 2) + pow(((gj + ay) - gy), 2)
if iou > best_iou:
best_iou = iou
best_n = n
elif anchor_step == 4 and iou == best_iou and dist < min_dist:
best_iou = iou
best_n = n
min_dist = dist
gt_box = [gx, gy, gw, gh]
pred_box = pred_boxes[b * nAnchors + best_n * nPixels + gj * nW + gi]
coord_mask[b][best_n][gj][gi] = 1
cls_mask[b][best_n][gj][gi] = 1
conf_mask[b][best_n][gj][gi] = object_scale
tx[b][best_n][gj][gi] = target[b][t * 5 + 1] * nW - gi
ty[b][best_n][gj][gi] = target[b][t * 5 + 2] * nH - gj
tw[b][best_n][gj][gi] = math.log(gw / anchors[anchor_step * best_n])
th[b][best_n][gj][gi] = math.log(gh / anchors[anchor_step * best_n + 1])
iou = bbox_iou(gt_box, pred_box, x1y1x2y2=False) # best_iou
tconf[b][best_n][gj][gi] = iou
tcls[b][best_n][gj][gi] = target[b][t * 5]
if iou > 0.5:
nCorrect = nCorrect + 1
return nGT, nCorrect, coord_mask, conf_mask, cls_mask, tx, ty, tw, th, tconf, tcls
class RegionLoss(nn.Module):
def __init__(self, num_classes=0, anchors=[], num_anchors=1):
super(RegionLoss, self).__init__()
self.num_classes = num_classes
self.anchors = anchors
self.num_anchors = num_anchors
self.anchor_step = len(anchors) / num_anchors
self.coord_scale = 1
self.noobject_scale = 1
self.object_scale = 5
self.class_scale = 1
self.thresh = 0.6
self.seen = 0
def forward(self, output, target):
# output : BxAs*(4+1+num_classes)*H*W
t0 = time.time()
nB = output.data.size(0)
nA = self.num_anchors
nC = self.num_classes
nH = output.data.size(2)
nW = output.data.size(3)
output = output.view(nB, nA, (5 + nC), nH, nW)
x = F.sigmoid(output.index_select(2, Variable(torch.cuda.LongTensor([0]))).view(nB, nA, nH, nW))
y = F.sigmoid(output.index_select(2, Variable(torch.cuda.LongTensor([1]))).view(nB, nA, nH, nW))
w = output.index_select(2, Variable(torch.cuda.LongTensor([2]))).view(nB, nA, nH, nW)
h = output.index_select(2, Variable(torch.cuda.LongTensor([3]))).view(nB, nA, nH, nW)
conf = F.sigmoid(output.index_select(2, Variable(torch.cuda.LongTensor([4]))).view(nB, nA, nH, nW))
cls = output.index_select(2, Variable(torch.linspace(5, 5 + nC - 1, nC).long().cuda()))
cls = cls.view(nB * nA, nC, nH * nW).transpose(1, 2).contiguous().view(nB * nA * nH * nW, nC)
t1 = time.time()
pred_boxes = torch.cuda.FloatTensor(4, nB * nA * nH * nW)
grid_x = torch.linspace(0, nW - 1, nW).repeat(nH, 1).repeat(nB * nA, 1, 1).view(nB * nA * nH * nW).cuda()
grid_y = torch.linspace(0, nH - 1, nH).repeat(nW, 1).t().repeat(nB * nA, 1, 1).view(nB * nA * nH * nW).cuda()
anchor_w = torch.Tensor(self.anchors).view(nA, self.anchor_step).index_select(1, torch.LongTensor([0])).cuda()
anchor_h = torch.Tensor(self.anchors).view(nA, self.anchor_step).index_select(1, torch.LongTensor([1])).cuda()
anchor_w = anchor_w.repeat(nB, 1).repeat(1, 1, nH * nW).view(nB * nA * nH * nW)
anchor_h = anchor_h.repeat(nB, 1).repeat(1, 1, nH * nW).view(nB * nA * nH * nW)
pred_boxes[0] = x.data + grid_x
pred_boxes[1] = y.data + grid_y
pred_boxes[2] = torch.exp(w.data) * anchor_w
pred_boxes[3] = torch.exp(h.data) * anchor_h
pred_boxes = convert2cpu(pred_boxes.transpose(0, 1).contiguous().view(-1, 4))
t2 = time.time()
nGT, nCorrect, coord_mask, conf_mask, cls_mask, tx, ty, tw, th, tconf, tcls = build_targets(pred_boxes,
target.data,
self.anchors, nA,
nC, \
nH, nW,
self.noobject_scale,
self.object_scale,
self.thresh,
self.seen)
cls_mask = (cls_mask == 1)
nProposals = int((conf > 0.25).sum().data[0])
tx = Variable(tx.cuda())
ty = Variable(ty.cuda())
tw = Variable(tw.cuda())
th = Variable(th.cuda())
tconf = Variable(tconf.cuda())
tcls = Variable(tcls.view(-1)[cls_mask].long().cuda())
coord_mask = Variable(coord_mask.cuda())
conf_mask = Variable(conf_mask.cuda().sqrt())
cls_mask = Variable(cls_mask.view(-1, 1).repeat(1, nC).cuda())
cls = cls[cls_mask].view(-1, nC)
t3 = time.time()
loss_x = self.coord_scale * nn.MSELoss(reduction='sum')(x * coord_mask, tx * coord_mask) / 2.0
loss_y = self.coord_scale * nn.MSELoss(reduction='sum')(y * coord_mask, ty * coord_mask) / 2.0
loss_w = self.coord_scale * nn.MSELoss(reduction='sum')(w * coord_mask, tw * coord_mask) / 2.0
loss_h = self.coord_scale * nn.MSELoss(reduction='sum')(h * coord_mask, th * coord_mask) / 2.0
loss_conf = nn.MSELoss(reduction='sum')(conf * conf_mask, tconf * conf_mask) / 2.0
loss_cls = self.class_scale * nn.CrossEntropyLoss(reduction='sum')(cls, tcls)
loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls
t4 = time.time()
if False:
print('-----------------------------------')
print(' activation : %f' % (t1 - t0))
print(' create pred_boxes : %f' % (t2 - t1))
print(' build targets : %f' % (t3 - t2))
print(' create loss : %f' % (t4 - t3))
print(' total : %f' % (t4 - t0))
print('%d: nGT %d, recall %d, proposals %d, loss: x %f, y %f, w %f, h %f, conf %f, cls %f, total %f' % (
self.seen, nGT, nCorrect, nProposals, loss_x.data[0], loss_y.data[0], loss_w.data[0], loss_h.data[0],
loss_conf.data[0], loss_cls.data[0], loss.data[0]))
return loss
import sys
import os
import time
import math
import torch
import numpy as np
from torch.autograd import Variable
import itertools
import struct # get_image_size
import imghdr # get_image_size
from tool import utils
def bbox_ious(boxes1, boxes2, x1y1x2y2=True):
if x1y1x2y2:
mx = torch.min(boxes1[0], boxes2[0])
Mx = torch.max(boxes1[2], boxes2[2])
my = torch.min(boxes1[1], boxes2[1])
My = torch.max(boxes1[3], boxes2[3])
w1 = boxes1[2] - boxes1[0]
h1 = boxes1[3] - boxes1[1]
w2 = boxes2[2] - boxes2[0]
h2 = boxes2[3] - boxes2[1]
else:
mx = torch.min(boxes1[0] - boxes1[2] / 2.0, boxes2[0] - boxes2[2] / 2.0)
Mx = torch.max(boxes1[0] + boxes1[2] / 2.0, boxes2[0] + boxes2[2] / 2.0)
my = torch.min(boxes1[1] - boxes1[3] / 2.0, boxes2[1] - boxes2[3] / 2.0)
My = torch.max(boxes1[1] + boxes1[3] / 2.0, boxes2[1] + boxes2[3] / 2.0)
w1 = boxes1[2]
h1 = boxes1[3]
w2 = boxes2[2]
h2 = boxes2[3]
uw = Mx - mx
uh = My - my
cw = w1 + w2 - uw
ch = h1 + h2 - uh
mask = ((cw <= 0) + (ch <= 0) > 0)
area1 = w1 * h1
area2 = w2 * h2
carea = cw * ch
carea[mask] = 0
uarea = area1 + area2 - carea
return carea / uarea
def get_region_boxes(boxes_and_confs):
# print('Getting boxes from boxes and confs ...')
boxes_list = []
confs_list = []
for item in boxes_and_confs:
boxes_list.append(item[0])
confs_list.append(item[1])
# boxes: [batch, num1 + num2 + num3, 1, 4]
# confs: [batch, num1 + num2 + num3, num_classes]
boxes = torch.cat(boxes_list, dim=1)
confs = torch.cat(confs_list, dim=1)
return [boxes, confs]
def convert2cpu(gpu_matrix):
return torch.FloatTensor(gpu_matrix.size()).copy_(gpu_matrix)
def convert2cpu_long(gpu_matrix):
return torch.LongTensor(gpu_matrix.size()).copy_(gpu_matrix)
def do_detect(model, img, conf_thresh, nms_thresh, use_cuda=1):
model.eval()
if type(img) == np.ndarray and len(img.shape) == 3: # cv2 image
img = torch.from_numpy(img.transpose(2, 0, 1)).float().div(255.0).unsqueeze(0)
elif type(img) == np.ndarray and len(img.shape) == 4:
img = torch.from_numpy(img.transpose(0, 3, 1, 2)).float().div(255.0)
else:
print("unknow image type")
exit(-1)
if use_cuda:
img = img.cuda()
img = torch.autograd.Variable(img)
output = model(img)
return utils.post_processing(img, conf_thresh, nms_thresh, output)
# Object detection reference training scripts
This folder contains reference training scripts for object detection.
They serve as a log of how to train specific models, to provide baseline
training and evaluation scripts to quickly bootstrap research.
To execute the example commands below you must install the following:
```
cython
pycocotools
matplotlib
```
You must modify the following flags:
`--data-path=/path/to/coco/dataset`
`--nproc_per_node=<number_of_gpus_available>`
Except otherwise noted, all models have been trained on 8x V100 GPUs.
### Faster R-CNN
```
python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py\
--dataset coco --model fasterrcnn_resnet50_fpn --epochs 26\
--lr-steps 16 22 --aspect-ratio-group-factor 3
```
### Mask R-CNN
```
python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py\
--dataset coco --model maskrcnn_resnet50_fpn --epochs 26\
--lr-steps 16 22 --aspect-ratio-group-factor 3
```
### Keypoint R-CNN
```
python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py\
--dataset coco_kp --model keypointrcnn_resnet50_fpn --epochs 46\
--lr-steps 36 43 --aspect-ratio-group-factor 3
```
import json
import tempfile
import numpy as np
import copy
import time
import torch
import torch._six
from pycocotools.cocoeval import COCOeval
from pycocotools.coco import COCO
import pycocotools.mask as mask_util
from collections import defaultdict
from . import utils
class CocoEvaluator(object):
def __init__(self, coco_gt, iou_types, bbox_fmt='coco'):
assert isinstance(iou_types, (list, tuple))
coco_gt = copy.deepcopy(coco_gt)
self.coco_gt = coco_gt
self.bbox_fmt = bbox_fmt.lower()
assert self.bbox_fmt in ['voc', 'coco', 'yolo']
self.iou_types = iou_types
self.coco_eval = {}
for iou_type in iou_types:
self.coco_eval[iou_type] = COCOeval(coco_gt, iouType=iou_type)
self.img_ids = []
self.eval_imgs = {k: [] for k in iou_types}
def update(self, predictions):
img_ids = list(np.unique(list(predictions.keys())))
self.img_ids.extend(img_ids)
for iou_type in self.iou_types:
results = self.prepare(predictions, iou_type)
coco_dt = loadRes(self.coco_gt, results) if results else COCO()
coco_eval = self.coco_eval[iou_type]
coco_eval.cocoDt = coco_dt
coco_eval.params.imgIds = list(img_ids)
img_ids, eval_imgs = evaluate(coco_eval)
self.eval_imgs[iou_type].append(eval_imgs)
def synchronize_between_processes(self):
for iou_type in self.iou_types:
self.eval_imgs[iou_type] = np.concatenate(self.eval_imgs[iou_type], 2)
create_common_coco_eval(self.coco_eval[iou_type], self.img_ids, self.eval_imgs[iou_type])
def accumulate(self):
for coco_eval in self.coco_eval.values():
coco_eval.accumulate()
def summarize(self):
for iou_type, coco_eval in self.coco_eval.items():
print("IoU metric: {}".format(iou_type))
coco_eval.summarize()
def prepare(self, predictions, iou_type):
if iou_type == "bbox":
return self.prepare_for_coco_detection(predictions)
elif iou_type == "segm":
return self.prepare_for_coco_segmentation(predictions)
elif iou_type == "keypoints":
return self.prepare_for_coco_keypoint(predictions)
else:
raise ValueError("Unknown iou type {}".format(iou_type))
def prepare_for_coco_detection(self, predictions):
coco_results = []
for original_id, prediction in predictions.items():
if len(prediction) == 0:
continue
if self.bbox_fmt == 'coco':
boxes = prediction["boxes"].tolist()
else:
boxes = prediction["boxes"]
boxes = convert_to_xywh(boxes, fmt=self.bbox_fmt).tolist()
scores = prediction["scores"].tolist()
labels = prediction["labels"].tolist()
coco_results.extend(
[
{
"image_id": original_id,
"category_id": labels[k],
"bbox": box,
"score": scores[k],
}
for k, box in enumerate(boxes)
]
)
return coco_results
def prepare_for_coco_segmentation(self, predictions):
coco_results = []
for original_id, prediction in predictions.items():
if len(prediction) == 0:
continue
scores = prediction["scores"]
labels = prediction["labels"]
masks = prediction["masks"]
masks = masks > 0.5
scores = prediction["scores"].tolist()
labels = prediction["labels"].tolist()
rles = [
mask_util.encode(np.array(mask[0, :, :, np.newaxis], dtype=np.uint8, order="F"))[0]
for mask in masks
]
for rle in rles:
rle["counts"] = rle["counts"].decode("utf-8")
coco_results.extend(
[
{
"image_id": original_id,
"category_id": labels[k],
"segmentation": rle,
"score": scores[k],
}
for k, rle in enumerate(rles)
]
)
return coco_results
def prepare_for_coco_keypoint(self, predictions):
coco_results = []
for original_id, prediction in predictions.items():
if len(prediction) == 0:
continue
# boxes = prediction["boxes"]
# boxes = convert_to_xywh(boxes).tolist()
scores = prediction["scores"].tolist()
labels = prediction["labels"].tolist()
keypoints = prediction["keypoints"]
keypoints = keypoints.flatten(start_dim=1).tolist()
coco_results.extend(
[
{
"image_id": original_id,
"category_id": labels[k],
'keypoints': keypoint,
"score": scores[k],
}
for k, keypoint in enumerate(keypoints)
]
)
return coco_results
def convert_to_xywh(boxes, fmt='voc'):
if fmt.lower() == 'voc':
xmin, ymin, xmax, ymax = boxes.unbind(1)
return torch.stack((xmin, ymin, xmax - xmin, ymax - ymin), dim=1)
elif fmt.lower() == 'yolo':
xcen, ycen, w, h = boxes.unbind(1)
return torch.stack((xcen-w/2, ycen-h/2, w, h), dim=1)
def merge(img_ids, eval_imgs):
all_img_ids = utils.all_gather(img_ids)
all_eval_imgs = utils.all_gather(eval_imgs)
merged_img_ids = []
for p in all_img_ids:
merged_img_ids.extend(p)
merged_eval_imgs = []
for p in all_eval_imgs:
merged_eval_imgs.append(p)
merged_img_ids = np.array(merged_img_ids)
merged_eval_imgs = np.concatenate(merged_eval_imgs, 2)
# keep only unique (and in sorted order) images
merged_img_ids, idx = np.unique(merged_img_ids, return_index=True)
merged_eval_imgs = merged_eval_imgs[..., idx]
return merged_img_ids, merged_eval_imgs
def create_common_coco_eval(coco_eval, img_ids, eval_imgs):
img_ids, eval_imgs = merge(img_ids, eval_imgs)
img_ids = list(img_ids)
eval_imgs = list(eval_imgs.flatten())
coco_eval.evalImgs = eval_imgs
coco_eval.params.imgIds = img_ids
coco_eval._paramsEval = copy.deepcopy(coco_eval.params)
#################################################################
# From pycocotools, just removed the prints and fixed
# a Python3 bug about unicode not defined
#################################################################
# Ideally, pycocotools wouldn't have hard-coded prints
# so that we could avoid copy-pasting those two functions
def createIndex(self):
# create index
# print('creating index...')
anns, cats, imgs = {}, {}, {}
imgToAnns, catToImgs = defaultdict(list), defaultdict(list)
if 'annotations' in self.dataset:
for ann in self.dataset['annotations']:
imgToAnns[ann['image_id']].append(ann)
anns[ann['id']] = ann
if 'images' in self.dataset:
for img in self.dataset['images']:
imgs[img['id']] = img
if 'categories' in self.dataset:
for cat in self.dataset['categories']:
cats[cat['id']] = cat
if 'annotations' in self.dataset and 'categories' in self.dataset:
for ann in self.dataset['annotations']:
catToImgs[ann['category_id']].append(ann['image_id'])
# print('index created!')
# create class members
self.anns = anns
self.imgToAnns = imgToAnns
self.catToImgs = catToImgs
self.imgs = imgs
self.cats = cats
maskUtils = mask_util
def loadRes(self, resFile):
"""
Load result file and return a result api object.
:param resFile (str) : file name of result file
:return: res (obj) : result api object
"""
res = COCO()
res.dataset['images'] = [img for img in self.dataset['images']]
# print('Loading and preparing results...')
# tic = time.time()
if isinstance(resFile, torch._six.string_classes):
anns = json.load(open(resFile))
elif type(resFile) == np.ndarray:
anns = self.loadNumpyAnnotations(resFile)
else:
anns = resFile
assert type(anns) == list, 'results in not an array of objects'
annsImgIds = [ann['image_id'] for ann in anns]
assert set(annsImgIds) == (set(annsImgIds) & set(self.getImgIds())), \
'Results do not correspond to current coco set'
if 'caption' in anns[0]:
imgIds = set([img['id'] for img in res.dataset['images']]) & set([ann['image_id'] for ann in anns])
res.dataset['images'] = [img for img in res.dataset['images'] if img['id'] in imgIds]
for id, ann in enumerate(anns):
ann['id'] = id + 1
elif 'bbox' in anns[0] and not anns[0]['bbox'] == []:
res.dataset['categories'] = copy.deepcopy(self.dataset['categories'])
for id, ann in enumerate(anns):
ann['bbox'] = ann['bbox'][0]
bb = ann['bbox']
x1, x2, y1, y2 = [bb[0], bb[0] + bb[2], bb[1], bb[1] + bb[3]]
if 'segmentation' not in ann:
ann['segmentation'] = [[x1, y1, x1, y2, x2, y2, x2, y1]]
ann['area'] = bb[2] * bb[3]
ann['id'] = id + 1
ann['iscrowd'] = 0
elif 'segmentation' in anns[0]:
res.dataset['categories'] = copy.deepcopy(self.dataset['categories'])
for id, ann in enumerate(anns):
# now only support compressed RLE format as segmentation results
ann['area'] = maskUtils.area(ann['segmentation'])
if 'bbox' not in ann:
ann['bbox'] = maskUtils.toBbox(ann['segmentation'])
ann['id'] = id + 1
ann['iscrowd'] = 0
elif 'keypoints' in anns[0]:
res.dataset['categories'] = copy.deepcopy(self.dataset['categories'])
for id, ann in enumerate(anns):
s = ann['keypoints']
x = s[0::3]
y = s[1::3]
x1, x2, y1, y2 = np.min(x), np.max(x), np.min(y), np.max(y)
ann['area'] = (x2 - x1) * (y2 - y1)
ann['id'] = id + 1
ann['bbox'] = [x1, y1, x2 - x1, y2 - y1]
# print('DONE (t={:0.2f}s)'.format(time.time()- tic))
res.dataset['annotations'] = anns
createIndex(res)
return res
def evaluate(self):
'''
Run per image evaluation on given images and store results (a list of dict) in self.evalImgs
:return: None
'''
# tic = time.time()
# print('Running per image evaluation...')
p = self.params
# add backward compatibility if useSegm is specified in params
if p.useSegm is not None:
p.iouType = 'segm' if p.useSegm == 1 else 'bbox'
print('useSegm (deprecated) is not None. Running {} evaluation'.format(p.iouType))
# print('Evaluate annotation type *{}*'.format(p.iouType))
p.imgIds = list(np.unique(p.imgIds))
if p.useCats:
p.catIds = list(np.unique(p.catIds))
p.maxDets = sorted(p.maxDets)
self.params = p
self._prepare()
# loop through images, area range, max detection number
catIds = p.catIds if p.useCats else [-1]
if p.iouType == 'segm' or p.iouType == 'bbox':
computeIoU = self.computeIoU
elif p.iouType == 'keypoints':
computeIoU = self.computeOks
self.ious = {
(imgId, catId): computeIoU(imgId, catId)
for imgId in p.imgIds
for catId in catIds}
evaluateImg = self.evaluateImg
maxDet = p.maxDets[-1]
evalImgs = [
evaluateImg(imgId, catId, areaRng, maxDet)
for catId in catIds
for areaRng in p.areaRng
for imgId in p.imgIds
]
# this is NOT in the pycocotools code, but could be done outside
evalImgs = np.asarray(evalImgs).reshape(len(catIds), len(p.areaRng), len(p.imgIds))
self._paramsEval = copy.deepcopy(self.params)
# toc = time.time()
# print('DONE (t={:0.2f}s).'.format(toc-tic))
return p.imgIds, evalImgs
#################################################################
# end of straight copy from pycocotools, just removing the prints
#################################################################
import copy
import os
from PIL import Image
import torch
import torch.utils.data
import torchvision
from pycocotools import mask as coco_mask
from pycocotools.coco import COCO
from . import transforms as T
class FilterAndRemapCocoCategories(object):
def __init__(self, categories, remap=True):
self.categories = categories
self.remap = remap
def __call__(self, image, target):
anno = target["annotations"]
anno = [obj for obj in anno if obj["category_id"] in self.categories]
if not self.remap:
target["annotations"] = anno
return image, target
anno = copy.deepcopy(anno)
for obj in anno:
obj["category_id"] = self.categories.index(obj["category_id"])
target["annotations"] = anno
return image, target
def convert_coco_poly_to_mask(segmentations, height, width):
masks = []
for polygons in segmentations:
rles = coco_mask.frPyObjects(polygons, height, width)
mask = coco_mask.decode(rles)
if len(mask.shape) < 3:
mask = mask[..., None]
mask = torch.as_tensor(mask, dtype=torch.uint8)
mask = mask.any(dim=2)
masks.append(mask)
if masks:
masks = torch.stack(masks, dim=0)
else:
masks = torch.zeros((0, height, width), dtype=torch.uint8)
return masks
class ConvertCocoPolysToMask(object):
def __call__(self, image, target):
w, h = image.size
image_id = target["image_id"]
image_id = torch.tensor([image_id])
anno = target["annotations"]
anno = [obj for obj in anno if obj['iscrowd'] == 0]
boxes = [obj["bbox"] for obj in anno]
# guard against no boxes via resizing
boxes = torch.as_tensor(boxes, dtype=torch.float32).reshape(-1, 4)
boxes[:, 2:] += boxes[:, :2]
boxes[:, 0::2].clamp_(min=0, max=w)
boxes[:, 1::2].clamp_(min=0, max=h)
classes = [obj["category_id"] for obj in anno]
classes = torch.tensor(classes, dtype=torch.int64)
segmentations = [obj["segmentation"] for obj in anno]
masks = convert_coco_poly_to_mask(segmentations, h, w)
keypoints = None
if anno and "keypoints" in anno[0]:
keypoints = [obj["keypoints"] for obj in anno]
keypoints = torch.as_tensor(keypoints, dtype=torch.float32)
num_keypoints = keypoints.shape[0]
if num_keypoints:
keypoints = keypoints.view(num_keypoints, -1, 3)
keep = (boxes[:, 3] > boxes[:, 1]) & (boxes[:, 2] > boxes[:, 0])
boxes = boxes[keep]
classes = classes[keep]
masks = masks[keep]
if keypoints is not None:
keypoints = keypoints[keep]
target = {}
target["boxes"] = boxes
target["labels"] = classes
target["masks"] = masks
target["image_id"] = image_id
if keypoints is not None:
target["keypoints"] = keypoints
# for conversion to coco api
area = torch.tensor([obj["area"] for obj in anno])
iscrowd = torch.tensor([obj["iscrowd"] for obj in anno])
target["area"] = area
target["iscrowd"] = iscrowd
return image, target
def _coco_remove_images_without_annotations(dataset, cat_list=None):
def _has_only_empty_bbox(anno):
return all(any(o <= 1 for o in obj["bbox"][2:]) for obj in anno)
def _count_visible_keypoints(anno):
return sum(sum(1 for v in ann["keypoints"][2::3] if v > 0) for ann in anno)
min_keypoints_per_image = 10
def _has_valid_annotation(anno):
# if it's empty, there is no annotation
if len(anno) == 0:
return False
# if all boxes have close to zero area, there is no annotation
if _has_only_empty_bbox(anno):
return False
# keypoints task have a slight different critera for considering
# if an annotation is valid
if "keypoints" not in anno[0]:
return True
# for keypoint detection tasks, only consider valid images those
# containing at least min_keypoints_per_image
if _count_visible_keypoints(anno) >= min_keypoints_per_image:
return True
return False
assert isinstance(dataset, torchvision.datasets.CocoDetection)
ids = []
for ds_idx, img_id in enumerate(dataset.ids):
ann_ids = dataset.coco.getAnnIds(imgIds=img_id, iscrowd=None)
anno = dataset.coco.loadAnns(ann_ids)
if cat_list:
anno = [obj for obj in anno if obj["category_id"] in cat_list]
if _has_valid_annotation(anno):
ids.append(ds_idx)
dataset = torch.utils.data.Subset(dataset, ids)
return dataset
def convert_to_coco_api(ds, bbox_fmt='voc'):
"""
"""
print("in function convert_to_coco_api...")
coco_ds = COCO()
# annotation IDs need to start at 1, not 0, see torchvision issue #1530
ann_id = 1
dataset = {'images': [], 'categories': [], 'annotations': []}
categories = set()
for img_idx in range(len(ds)):
# find better way to get target
# targets = ds.get_annotations(img_idx)
img, targets = ds[img_idx]
image_id = targets["image_id"].item()
img_dict = {}
img_dict['id'] = image_id
img_dict['height'] = img.shape[-2]
img_dict['width'] = img.shape[-1]
dataset['images'].append(img_dict)
bboxes = targets["boxes"]
# to coco format: xmin, ymin, w, h
if bbox_fmt.lower() == "voc": # xmin, ymin, xmax, ymax
bboxes[:, 2:] -= bboxes[:, :2]
elif bbox_fmt.lower() == "yolo": # xcen, ycen, w, h
bboxes[:, :2] = bboxes[:, :2] - bboxes[:, 2:]/2
elif bbox_fmt.lower() == "coco":
pass
else:
raise ValueError(f"bounding box format {bbox_fmt} not supported!")
bboxes = bboxes.tolist()
labels = targets['labels'].tolist()
areas = targets['area'].tolist()
iscrowd = targets['iscrowd'].tolist()
if 'masks' in targets:
masks = targets['masks']
# make masks Fortran contiguous for coco_mask
masks = masks.permute(0, 2, 1).contiguous().permute(0, 2, 1)
if 'keypoints' in targets:
keypoints = targets['keypoints']
keypoints = keypoints.reshape(keypoints.shape[0], -1).tolist()
num_objs = len(bboxes)
for i in range(num_objs):
ann = {}
ann['image_id'] = image_id
ann['bbox'] = bboxes[i]
ann['category_id'] = labels[i]
categories.add(labels[i])
ann['area'] = areas[i]
ann['iscrowd'] = iscrowd[i]
ann['id'] = ann_id
if 'masks' in targets:
ann["segmentation"] = coco_mask.encode(masks[i].numpy())
if 'keypoints' in targets:
ann['keypoints'] = keypoints[i]
ann['num_keypoints'] = sum(k != 0 for k in keypoints[i][2::3])
dataset['annotations'].append(ann)
ann_id += 1
dataset['categories'] = [{'id': i} for i in sorted(categories)]
coco_ds.dataset = dataset
coco_ds.createIndex()
return coco_ds
def get_coco_api_from_dataset(dataset):
for _ in range(10):
if isinstance(dataset, torchvision.datasets.CocoDetection):
break
if isinstance(dataset, torch.utils.data.Subset):
dataset = dataset.dataset
if isinstance(dataset, torchvision.datasets.CocoDetection):
return dataset.coco
return convert_to_coco_api(dataset)
class CocoDetection(torchvision.datasets.CocoDetection):
def __init__(self, img_folder, ann_file, transforms):
super(CocoDetection, self).__init__(img_folder, ann_file)
self._transforms = transforms
def __getitem__(self, idx):
img, target = super(CocoDetection, self).__getitem__(idx)
image_id = self.ids[idx]
target = dict(image_id=image_id, annotations=target)
if self._transforms is not None:
img, target = self._transforms(img, target)
return img, target
def get_coco(root, image_set, transforms, mode='instances'):
anno_file_template = "{}_{}2017.json"
PATHS = {
"train": ("train2017", os.path.join("annotations", anno_file_template.format(mode, "train"))),
"val": ("val2017", os.path.join("annotations", anno_file_template.format(mode, "val"))),
# "train": ("val2017", os.path.join("annotations", anno_file_template.format(mode, "val")))
}
t = [ConvertCocoPolysToMask()]
if transforms is not None:
t.append(transforms)
transforms = T.Compose(t)
img_folder, ann_file = PATHS[image_set]
img_folder = os.path.join(root, img_folder)
ann_file = os.path.join(root, ann_file)
dataset = CocoDetection(img_folder, ann_file, transforms=transforms)
if image_set == "train":
dataset = _coco_remove_images_without_annotations(dataset)
# dataset = torch.utils.data.Subset(dataset, [i for i in range(500)])
return dataset
def get_coco_kp(root, image_set, transforms):
return get_coco(root, image_set, transforms, mode="person_keypoints")
import math
import sys
import time
import torch
import torchvision.models.detection.mask_rcnn
from .coco_utils import get_coco_api_from_dataset
from .coco_eval import CocoEvaluator
from . import utils
def train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq):
model.train()
metric_logger = utils.MetricLogger(delimiter=" ")
metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}'))
header = 'Epoch: [{}]'.format(epoch)
lr_scheduler = None
if epoch == 0:
warmup_factor = 1. / 1000
warmup_iters = min(1000, len(data_loader) - 1)
lr_scheduler = utils.warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor)
for images, targets in metric_logger.log_every(data_loader, print_freq, header):
images = list(image.to(device) for image in images)
targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
loss_dict = model(images, targets)
losses = sum(loss for loss in loss_dict.values())
# reduce losses over all GPUs for logging purposes
loss_dict_reduced = utils.reduce_dict(loss_dict)
losses_reduced = sum(loss for loss in loss_dict_reduced.values())
loss_value = losses_reduced.item()
if not math.isfinite(loss_value):
print("Loss is {}, stopping training".format(loss_value))
print(loss_dict_reduced)
sys.exit(1)
optimizer.zero_grad()
losses.backward()
optimizer.step()
if lr_scheduler is not None:
lr_scheduler.step()
metric_logger.update(loss=losses_reduced, **loss_dict_reduced)
metric_logger.update(lr=optimizer.param_groups[0]["lr"])
return metric_logger
def _get_iou_types(model):
model_without_ddp = model
if isinstance(model, torch.nn.parallel.DistributedDataParallel):
model_without_ddp = model.module
iou_types = ["bbox"]
if isinstance(model_without_ddp, torchvision.models.detection.MaskRCNN):
iou_types.append("segm")
if isinstance(model_without_ddp, torchvision.models.detection.KeypointRCNN):
iou_types.append("keypoints")
return iou_types
@torch.no_grad()
def evaluate(model, data_loader, device):
n_threads = torch.get_num_threads()
# FIXME remove this and make paste_masks_in_image run on the GPU
torch.set_num_threads(1)
cpu_device = torch.device("cpu")
model.eval()
metric_logger = utils.MetricLogger(delimiter=" ")
header = 'Test:'
coco = get_coco_api_from_dataset(data_loader.dataset)
iou_types = _get_iou_types(model)
coco_evaluator = CocoEvaluator(coco, iou_types)
for images, targets in metric_logger.log_every(data_loader, 100, header):
images = list(img.to(device) for img in images)
targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
torch.cuda.synchronize()
model_time = time.time()
outputs = model(images)
outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs]
model_time = time.time() - model_time
res = {target["image_id"].item(): output for target, output in zip(targets, outputs)}
evaluator_time = time.time()
coco_evaluator.update(res)
evaluator_time = time.time() - evaluator_time
metric_logger.update(model_time=model_time, evaluator_time=evaluator_time)
# gather the stats from all processes
metric_logger.synchronize_between_processes()
print("Averaged stats:", metric_logger)
coco_evaluator.synchronize_between_processes()
# accumulate predictions from all images
coco_evaluator.accumulate()
coco_evaluator.summarize()
torch.set_num_threads(n_threads)
return coco_evaluator
import bisect
from collections import defaultdict
import copy
from itertools import repeat, chain
import math
import numpy as np
import torch
import torch.utils.data
from torch.utils.data.sampler import BatchSampler, Sampler
from torch.utils.model_zoo import tqdm
import torchvision
from PIL import Image
def _repeat_to_at_least(iterable, n):
repeat_times = math.ceil(n / len(iterable))
repeated = chain.from_iterable(repeat(iterable, repeat_times))
return list(repeated)
class GroupedBatchSampler(BatchSampler):
"""
Wraps another sampler to yield a mini-batch of indices.
It enforces that the batch only contain elements from the same group.
It also tries to provide mini-batches which follows an ordering which is
as close as possible to the ordering from the original sampler.
Arguments:
sampler (Sampler): Base sampler.
group_ids (list[int]): If the sampler produces indices in range [0, N),
`group_ids` must be a list of `N` ints which contains the group id of each sample.
The group ids must be a continuous set of integers starting from
0, i.e. they must be in the range [0, num_groups).
batch_size (int): Size of mini-batch.
"""
def __init__(self, sampler, group_ids, batch_size):
if not isinstance(sampler, Sampler):
raise ValueError(
"sampler should be an instance of "
"torch.utils.data.Sampler, but got sampler={}".format(sampler)
)
self.sampler = sampler
self.group_ids = group_ids
self.batch_size = batch_size
def __iter__(self):
buffer_per_group = defaultdict(list)
samples_per_group = defaultdict(list)
num_batches = 0
for idx in self.sampler:
group_id = self.group_ids[idx]
buffer_per_group[group_id].append(idx)
samples_per_group[group_id].append(idx)
if len(buffer_per_group[group_id]) == self.batch_size:
yield buffer_per_group[group_id]
num_batches += 1
del buffer_per_group[group_id]
assert len(buffer_per_group[group_id]) < self.batch_size
# now we have run out of elements that satisfy
# the group criteria, let's return the remaining
# elements so that the size of the sampler is
# deterministic
expected_num_batches = len(self)
num_remaining = expected_num_batches - num_batches
if num_remaining > 0:
# for the remaining batches, take first the buffers with largest number
# of elements
for group_id, _ in sorted(buffer_per_group.items(),
key=lambda x: len(x[1]), reverse=True):
remaining = self.batch_size - len(buffer_per_group[group_id])
samples_from_group_id = _repeat_to_at_least(samples_per_group[group_id], remaining)
buffer_per_group[group_id].extend(samples_from_group_id[:remaining])
assert len(buffer_per_group[group_id]) == self.batch_size
yield buffer_per_group[group_id]
num_remaining -= 1
if num_remaining == 0:
break
assert num_remaining == 0
def __len__(self):
return len(self.sampler) // self.batch_size
def _compute_aspect_ratios_slow(dataset, indices=None):
print("Your dataset doesn't support the fast path for "
"computing the aspect ratios, so will iterate over "
"the full dataset and load every image instead. "
"This might take some time...")
if indices is None:
indices = range(len(dataset))
class SubsetSampler(Sampler):
def __init__(self, indices):
self.indices = indices
def __iter__(self):
return iter(self.indices)
def __len__(self):
return len(self.indices)
sampler = SubsetSampler(indices)
data_loader = torch.utils.data.DataLoader(
dataset, batch_size=1, sampler=sampler,
num_workers=14, # you might want to increase it for faster processing
collate_fn=lambda x: x[0])
aspect_ratios = []
with tqdm(total=len(dataset)) as pbar:
for _i, (img, _) in enumerate(data_loader):
pbar.update(1)
height, width = img.shape[-2:]
aspect_ratio = float(width) / float(height)
aspect_ratios.append(aspect_ratio)
return aspect_ratios
def _compute_aspect_ratios_custom_dataset(dataset, indices=None):
if indices is None:
indices = range(len(dataset))
aspect_ratios = []
for i in indices:
height, width = dataset.get_height_and_width(i)
aspect_ratio = float(width) / float(height)
aspect_ratios.append(aspect_ratio)
return aspect_ratios
def _compute_aspect_ratios_coco_dataset(dataset, indices=None):
if indices is None:
indices = range(len(dataset))
aspect_ratios = []
for i in indices:
img_info = dataset.coco.imgs[dataset.ids[i]]
aspect_ratio = float(img_info["width"]) / float(img_info["height"])
aspect_ratios.append(aspect_ratio)
return aspect_ratios
def _compute_aspect_ratios_voc_dataset(dataset, indices=None):
if indices is None:
indices = range(len(dataset))
aspect_ratios = []
for i in indices:
# this doesn't load the data into memory, because PIL loads it lazily
width, height = Image.open(dataset.images[i]).size
aspect_ratio = float(width) / float(height)
aspect_ratios.append(aspect_ratio)
return aspect_ratios
def _compute_aspect_ratios_subset_dataset(dataset, indices=None):
if indices is None:
indices = range(len(dataset))
ds_indices = [dataset.indices[i] for i in indices]
return compute_aspect_ratios(dataset.dataset, ds_indices)
def compute_aspect_ratios(dataset, indices=None):
if hasattr(dataset, "get_height_and_width"):
return _compute_aspect_ratios_custom_dataset(dataset, indices)
if isinstance(dataset, torchvision.datasets.CocoDetection):
return _compute_aspect_ratios_coco_dataset(dataset, indices)
if isinstance(dataset, torchvision.datasets.VOCDetection):
return _compute_aspect_ratios_voc_dataset(dataset, indices)
if isinstance(dataset, torch.utils.data.Subset):
return _compute_aspect_ratios_subset_dataset(dataset, indices)
# slow path
return _compute_aspect_ratios_slow(dataset, indices)
def _quantize(x, bins):
bins = copy.deepcopy(bins)
bins = sorted(bins)
quantized = list(map(lambda y: bisect.bisect_right(bins, y), x))
return quantized
def create_aspect_ratio_groups(dataset, k=0):
aspect_ratios = compute_aspect_ratios(dataset)
bins = (2 ** np.linspace(-1, 1, 2 * k + 1)).tolist() if k > 0 else [1.0]
groups = _quantize(aspect_ratios, bins)
# count number of elements per group
counts = np.unique(groups, return_counts=True)[1]
fbins = [0] + bins + [np.inf]
print("Using {} as bins for aspect ratio quantization".format(fbins))
print("Count of instances per bin: {}".format(counts))
return groups
r"""PyTorch Detection Training.
To run in a multi-gpu environment, use the distributed launcher::
python -m torch.distributed.launch --nproc_per_node=$NGPU --use_env \
train.py ... --world-size $NGPU
The default hyperparameters are tuned for training on 8 gpus and 2 images per gpu.
--lr 0.02 --batch-size 2 --world-size 8
If you use different number of gpus, the learning rate should be changed to 0.02/8*$NGPU.
On top of that, for training Faster/Mask R-CNN, the default hyperparameters are
--epochs 26 --lr-steps 16 22 --aspect-ratio-group-factor 3
Also, if you train Keypoint R-CNN, the default hyperparameters are
--epochs 46 --lr-steps 36 43 --aspect-ratio-group-factor 3
Because the number of images is smaller in the person keypoint subset of COCO,
the number of epochs should be adapted so that we have the same number of iterations.
"""
import datetime
import os
import time
import torch
import torch.utils.data
from torch import nn
import torchvision
import torchvision.models.detection
import torchvision.models.detection.mask_rcnn
from .coco_utils import get_coco, get_coco_kp
from .group_by_aspect_ratio import GroupedBatchSampler, create_aspect_ratio_groups
from .engine import train_one_epoch, evaluate
from . import utils
from . import transforms as T
def get_dataset(name, image_set, transform, data_path):
paths = {
"coco": (data_path, get_coco, 91),
"coco_kp": (data_path, get_coco_kp, 2)
}
p, ds_fn, num_classes = paths[name]
ds = ds_fn(p, image_set=image_set, transforms=transform)
return ds, num_classes
def get_transform(train):
transforms = []
transforms.append(T.ToTensor())
if train:
transforms.append(T.RandomHorizontalFlip(0.5))
return T.Compose(transforms)
def main(args):
utils.init_distributed_mode(args)
print(args)
device = torch.device(args.device)
# Data loading code
print("Loading data")
dataset, num_classes = get_dataset(args.dataset, "train", get_transform(train=True), args.data_path)
dataset_test, _ = get_dataset(args.dataset, "val", get_transform(train=False), args.data_path)
print("Creating data loaders")
if args.distributed:
train_sampler = torch.utils.data.distributed.DistributedSampler(dataset)
test_sampler = torch.utils.data.distributed.DistributedSampler(dataset_test)
else:
train_sampler = torch.utils.data.RandomSampler(dataset)
test_sampler = torch.utils.data.SequentialSampler(dataset_test)
if args.aspect_ratio_group_factor >= 0:
group_ids = create_aspect_ratio_groups(dataset, k=args.aspect_ratio_group_factor)
train_batch_sampler = GroupedBatchSampler(train_sampler, group_ids, args.batch_size)
else:
train_batch_sampler = torch.utils.data.BatchSampler(
train_sampler, args.batch_size, drop_last=True)
data_loader = torch.utils.data.DataLoader(
dataset, batch_sampler=train_batch_sampler, num_workers=args.workers,
collate_fn=utils.collate_fn)
data_loader_test = torch.utils.data.DataLoader(
dataset_test, batch_size=1,
sampler=test_sampler, num_workers=args.workers,
collate_fn=utils.collate_fn)
print("Creating model")
model = torchvision.models.detection.__dict__[args.model](num_classes=num_classes,
pretrained=args.pretrained)
model.to(device)
model_without_ddp = model
if args.distributed:
model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])
model_without_ddp = model.module
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(
params, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay)
# lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=args.lr_step_size, gamma=args.lr_gamma)
lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=args.lr_steps, gamma=args.lr_gamma)
if args.resume:
checkpoint = torch.load(args.resume, map_location='cpu')
model_without_ddp.load_state_dict(checkpoint['model'])
optimizer.load_state_dict(checkpoint['optimizer'])
lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])
args.start_epoch = checkpoint['epoch'] + 1
if args.test_only:
evaluate(model, data_loader_test, device=device)
return
print("Start training")
start_time = time.time()
for epoch in range(args.start_epoch, args.epochs):
if args.distributed:
train_sampler.set_epoch(epoch)
train_one_epoch(model, optimizer, data_loader, device, epoch, args.print_freq)
lr_scheduler.step()
if args.output_dir:
utils.save_on_master({
'model': model_without_ddp.state_dict(),
'optimizer': optimizer.state_dict(),
'lr_scheduler': lr_scheduler.state_dict(),
'args': args,
'epoch': epoch},
os.path.join(args.output_dir, 'model_{}.pth'.format(epoch)))
# evaluate after every epoch
evaluate(model, data_loader_test, device=device)
total_time = time.time() - start_time
total_time_str = str(datetime.timedelta(seconds=int(total_time)))
print('Training time {}'.format(total_time_str))
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(
description=__doc__)
parser.add_argument('--data-path', default='/datasets01/COCO/022719/', help='dataset')
parser.add_argument('--dataset', default='coco', help='dataset')
parser.add_argument('--model', default='maskrcnn_resnet50_fpn', help='model')
parser.add_argument('--device', default='cuda', help='device')
parser.add_argument('-b', '--batch-size', default=2, type=int,
help='images per gpu, the total batch size is $NGPU x batch_size')
parser.add_argument('--epochs', default=26, type=int, metavar='N',
help='number of total epochs to run')
parser.add_argument('-j', '--workers', default=4, type=int, metavar='N',
help='number of data loading workers (default: 4)')
parser.add_argument('--lr', default=0.02, type=float,
help='initial learning rate, 0.02 is the default value for training '
'on 8 gpus and 2 images_per_gpu')
parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
help='momentum')
parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float,
metavar='W', help='weight decay (default: 1e-4)',
dest='weight_decay')
parser.add_argument('--lr-step-size', default=8, type=int, help='decrease lr every step-size epochs')
parser.add_argument('--lr-steps', default=[16, 22], nargs='+', type=int, help='decrease lr every step-size epochs')
parser.add_argument('--lr-gamma', default=0.1, type=float, help='decrease lr by a factor of lr-gamma')
parser.add_argument('--print-freq', default=20, type=int, help='print frequency')
parser.add_argument('--output-dir', default='.', help='path where to save')
parser.add_argument('--resume', default='', help='resume from checkpoint')
parser.add_argument('--start_epoch', default=0, type=int, help='start epoch')
parser.add_argument('--aspect-ratio-group-factor', default=3, type=int)
parser.add_argument(
"--test-only",
dest="test_only",
help="Only test the model",
action="store_true",
)
parser.add_argument(
"--pretrained",
dest="pretrained",
help="Use pre-trained models from the modelzoo",
action="store_true",
)
# distributed training parameters
parser.add_argument('--world-size', default=1, type=int,
help='number of distributed processes')
parser.add_argument('--dist-url', default='env://', help='url used to set up distributed training')
args = parser.parse_args()
if args.output_dir:
utils.mkdir(args.output_dir)
main(args)
import random
import torch
from torchvision.transforms import functional as F
def _flip_coco_person_keypoints(kps, width):
flip_inds = [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15]
flipped_data = kps[:, flip_inds]
flipped_data[..., 0] = width - flipped_data[..., 0]
# Maintain COCO convention that if visibility == 0, then x, y = 0
inds = flipped_data[..., 2] == 0
flipped_data[inds] = 0
return flipped_data
class Compose(object):
def __init__(self, transforms):
self.transforms = transforms
def __call__(self, image, target):
for t in self.transforms:
image, target = t(image, target)
return image, target
class RandomHorizontalFlip(object):
def __init__(self, prob):
self.prob = prob
def __call__(self, image, target):
if random.random() < self.prob:
height, width = image.shape[-2:]
image = image.flip(-1)
bbox = target["boxes"]
bbox[:, [0, 2]] = width - bbox[:, [2, 0]]
target["boxes"] = bbox
if "masks" in target:
target["masks"] = target["masks"].flip(-1)
if "keypoints" in target:
keypoints = target["keypoints"]
keypoints = _flip_coco_person_keypoints(keypoints, width)
target["keypoints"] = keypoints
return image, target
class ToTensor(object):
def __call__(self, image, target):
image = F.to_tensor(image)
return image, target
from collections import defaultdict, deque
import datetime
import pickle
import time
import torch
import torch.distributed as dist
import errno
import os
class SmoothedValue(object):
"""Track a series of values and provide access to smoothed values over a
window or the global series average.
"""
def __init__(self, window_size=20, fmt=None):
if fmt is None:
fmt = "{median:.4f} ({global_avg:.4f})"
self.deque = deque(maxlen=window_size)
self.total = 0.0
self.count = 0
self.fmt = fmt
def update(self, value, n=1):
self.deque.append(value)
self.count += n
self.total += value * n
def synchronize_between_processes(self):
"""
Warning: does not synchronize the deque!
"""
if not is_dist_avail_and_initialized():
return
t = torch.tensor([self.count, self.total], dtype=torch.float64, device='cuda')
dist.barrier()
dist.all_reduce(t)
t = t.tolist()
self.count = int(t[0])
self.total = t[1]
@property
def median(self):
d = torch.tensor(list(self.deque))
return d.median().item()
@property
def avg(self):
d = torch.tensor(list(self.deque), dtype=torch.float32)
return d.mean().item()
@property
def global_avg(self):
return self.total / self.count
@property
def max(self):
return max(self.deque)
@property
def value(self):
return self.deque[-1]
def __str__(self):
return self.fmt.format(
median=self.median,
avg=self.avg,
global_avg=self.global_avg,
max=self.max,
value=self.value)
def all_gather(data):
"""
Run all_gather on arbitrary picklable data (not necessarily tensors)
Args:
data: any picklable object
Returns:
list[data]: list of data gathered from each rank
"""
world_size = get_world_size()
if world_size == 1:
return [data]
# serialized to a Tensor
buffer = pickle.dumps(data)
storage = torch.ByteStorage.from_buffer(buffer)
tensor = torch.ByteTensor(storage).to("cuda")
# obtain Tensor size of each rank
local_size = torch.tensor([tensor.numel()], device="cuda")
size_list = [torch.tensor([0], device="cuda") for _ in range(world_size)]
dist.all_gather(size_list, local_size)
size_list = [int(size.item()) for size in size_list]
max_size = max(size_list)
# receiving Tensor from all ranks
# we pad the tensor because torch all_gather does not support
# gathering tensors of different shapes
tensor_list = []
for _ in size_list:
tensor_list.append(torch.empty((max_size,), dtype=torch.uint8, device="cuda"))
if local_size != max_size:
padding = torch.empty(size=(max_size - local_size,), dtype=torch.uint8, device="cuda")
tensor = torch.cat((tensor, padding), dim=0)
dist.all_gather(tensor_list, tensor)
data_list = []
for size, tensor in zip(size_list, tensor_list):
buffer = tensor.cpu().numpy().tobytes()[:size]
data_list.append(pickle.loads(buffer))
return data_list
def reduce_dict(input_dict, average=True):
"""
Args:
input_dict (dict): all the values will be reduced
average (bool): whether to do average or sum
Reduce the values in the dictionary from all processes so that all processes
have the averaged results. Returns a dict with the same fields as
input_dict, after reduction.
"""
world_size = get_world_size()
if world_size < 2:
return input_dict
with torch.no_grad():
names = []
values = []
# sort the keys so that they are consistent across processes
for k in sorted(input_dict.keys()):
names.append(k)
values.append(input_dict[k])
values = torch.stack(values, dim=0)
dist.all_reduce(values)
if average:
values /= world_size
reduced_dict = {k: v for k, v in zip(names, values)}
return reduced_dict
class MetricLogger(object):
def __init__(self, delimiter="\t"):
self.meters = defaultdict(SmoothedValue)
self.delimiter = delimiter
def update(self, **kwargs):
for k, v in kwargs.items():
if isinstance(v, torch.Tensor):
v = v.item()
assert isinstance(v, (float, int))
self.meters[k].update(v)
def __getattr__(self, attr):
if attr in self.meters:
return self.meters[attr]
if attr in self.__dict__:
return self.__dict__[attr]
raise AttributeError("'{}' object has no attribute '{}'".format(
type(self).__name__, attr))
def __str__(self):
loss_str = []
for name, meter in self.meters.items():
loss_str.append(
"{}: {}".format(name, str(meter))
)
return self.delimiter.join(loss_str)
def synchronize_between_processes(self):
for meter in self.meters.values():
meter.synchronize_between_processes()
def add_meter(self, name, meter):
self.meters[name] = meter
def log_every(self, iterable, print_freq, header=None):
i = 0
if not header:
header = ''
start_time = time.time()
end = time.time()
iter_time = SmoothedValue(fmt='{avg:.4f}')
data_time = SmoothedValue(fmt='{avg:.4f}')
space_fmt = ':' + str(len(str(len(iterable)))) + 'd'
if torch.cuda.is_available():
log_msg = self.delimiter.join([
header,
'[{0' + space_fmt + '}/{1}]',
'eta: {eta}',
'{meters}',
'time: {time}',
'data: {data}',
'max mem: {memory:.0f}'
])
else:
log_msg = self.delimiter.join([
header,
'[{0' + space_fmt + '}/{1}]',
'eta: {eta}',
'{meters}',
'time: {time}',
'data: {data}'
])
MB = 1024.0 * 1024.0
for obj in iterable:
data_time.update(time.time() - end)
yield obj
iter_time.update(time.time() - end)
if i % print_freq == 0 or i == len(iterable) - 1:
eta_seconds = iter_time.global_avg * (len(iterable) - i)
eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))
if torch.cuda.is_available():
print(log_msg.format(
i, len(iterable), eta=eta_string,
meters=str(self),
time=str(iter_time), data=str(data_time),
memory=torch.cuda.max_memory_allocated() / MB))
else:
print(log_msg.format(
i, len(iterable), eta=eta_string,
meters=str(self),
time=str(iter_time), data=str(data_time)))
i += 1
end = time.time()
total_time = time.time() - start_time
total_time_str = str(datetime.timedelta(seconds=int(total_time)))
print('{} Total time: {} ({:.4f} s / it)'.format(
header, total_time_str, total_time / len(iterable)))
def collate_fn(batch):
return tuple(zip(*batch))
def warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor):
def f(x):
if x >= warmup_iters:
return 1
alpha = float(x) / warmup_iters
return warmup_factor * (1 - alpha) + alpha
return torch.optim.lr_scheduler.LambdaLR(optimizer, f)
def mkdir(path):
try:
os.makedirs(path)
except OSError as e:
if e.errno != errno.EEXIST:
raise
def setup_for_distributed(is_master):
"""
This function disables printing when not in master process
"""
import builtins as __builtin__
builtin_print = __builtin__.print
def print(*args, **kwargs):
force = kwargs.pop('force', False)
if is_master or force:
builtin_print(*args, **kwargs)
__builtin__.print = print
def is_dist_avail_and_initialized():
if not dist.is_available():
return False
if not dist.is_initialized():
return False
return True
def get_world_size():
if not is_dist_avail_and_initialized():
return 1
return dist.get_world_size()
def get_rank():
if not is_dist_avail_and_initialized():
return 0
return dist.get_rank()
def is_main_process():
return get_rank() == 0
def save_on_master(*args, **kwargs):
if is_main_process():
torch.save(*args, **kwargs)
def init_distributed_mode(args):
if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ:
args.rank = int(os.environ["RANK"])
args.world_size = int(os.environ['WORLD_SIZE'])
args.gpu = int(os.environ['LOCAL_RANK'])
elif 'SLURM_PROCID' in os.environ:
args.rank = int(os.environ['SLURM_PROCID'])
args.gpu = args.rank % torch.cuda.device_count()
else:
print('Not using distributed mode')
args.distributed = False
return
args.distributed = True
torch.cuda.set_device(args.gpu)
args.dist_backend = 'nccl'
print('| distributed init (rank {}): {}'.format(
args.rank, args.dist_url), flush=True)
torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
world_size=args.world_size, rank=args.rank)
torch.distributed.barrier()
setup_for_distributed(args.rank == 0)
import sys
import os
import time
import math
import numpy as np
import itertools
import struct # get_image_size
import imghdr # get_image_size
def sigmoid(x):
return 1.0 / (np.exp(-x) + 1.)
def softmax(x):
x = np.exp(x - np.expand_dims(np.max(x, axis=1), axis=1))
x = x / np.expand_dims(x.sum(axis=1), axis=1)
return x
def bbox_iou(box1, box2, x1y1x2y2=True):
# print('iou box1:', box1)
# print('iou box2:', box2)
if x1y1x2y2:
mx = min(box1[0], box2[0])
Mx = max(box1[2], box2[2])
my = min(box1[1], box2[1])
My = max(box1[3], box2[3])
w1 = box1[2] - box1[0]
h1 = box1[3] - box1[1]
w2 = box2[2] - box2[0]
h2 = box2[3] - box2[1]
else:
w1 = box1[2]
h1 = box1[3]
w2 = box2[2]
h2 = box2[3]
mx = min(box1[0], box2[0])
Mx = max(box1[0] + w1, box2[0] + w2)
my = min(box1[1], box2[1])
My = max(box1[1] + h1, box2[1] + h2)
uw = Mx - mx
uh = My - my
cw = w1 + w2 - uw
ch = h1 + h2 - uh
carea = 0
if cw <= 0 or ch <= 0:
return 0.0
area1 = w1 * h1
area2 = w2 * h2
carea = cw * ch
uarea = area1 + area2 - carea
return carea / uarea
def nms_cpu(boxes, confs, nms_thresh=0.5, min_mode=False):
# print(boxes.shape)
x1 = boxes[:, 0]
y1 = boxes[:, 1]
x2 = boxes[:, 2]
y2 = boxes[:, 3]
areas = (x2 - x1) * (y2 - y1)
order = confs.argsort()[::-1]
keep = []
while order.size > 0:
idx_self = order[0]
idx_other = order[1:]
keep.append(idx_self)
xx1 = np.maximum(x1[idx_self], x1[idx_other])
yy1 = np.maximum(y1[idx_self], y1[idx_other])
xx2 = np.minimum(x2[idx_self], x2[idx_other])
yy2 = np.minimum(y2[idx_self], y2[idx_other])
w = np.maximum(0.0, xx2 - xx1)
h = np.maximum(0.0, yy2 - yy1)
inter = w * h
if min_mode:
over = inter / np.minimum(areas[order[0]], areas[order[1:]])
else:
over = inter / (areas[order[0]] + areas[order[1:]] - inter)
inds = np.where(over <= nms_thresh)[0]
order = order[inds + 1]
return np.array(keep)
def plot_boxes_cv2(img, boxes, savename=None, class_names=None, color=None):
import cv2
img = np.copy(img)
colors = np.array([[1, 0, 1], [0, 0, 1], [0, 1, 1], [0, 1, 0], [1, 1, 0], [1, 0, 0]], dtype=np.float32)
def get_color(c, x, max_val):
ratio = float(x) / max_val * 5
i = int(math.floor(ratio))
j = int(math.ceil(ratio))
ratio = ratio - i
r = (1 - ratio) * colors[i][c] + ratio * colors[j][c]
return int(r * 255)
width = img.shape[1]
height = img.shape[0]
for i in range(len(boxes)):
box = boxes[i]
x1 = int(box[0] * width)
y1 = int(box[1] * height)
x2 = int(box[2] * width)
y2 = int(box[3] * height)
if color:
rgb = color
else:
rgb = (255, 0, 0)
if len(box) >= 7 and class_names:
cls_conf = box[5]
cls_id = box[6]
print('%s: %f' % (class_names[cls_id], cls_conf))
classes = len(class_names)
offset = cls_id * 123457 % classes
red = get_color(2, offset, classes)
green = get_color(1, offset, classes)
blue = get_color(0, offset, classes)
if color is None:
rgb = (red, green, blue)
img = cv2.putText(img, class_names[cls_id], (x1, y1), cv2.FONT_HERSHEY_SIMPLEX, 1.2, rgb, 1)
img = cv2.rectangle(img, (x1, y1), (x2, y2), rgb, 1)
if savename:
print("save plot results to %s" % savename)
cv2.imwrite(savename, img)
return img
def read_truths(lab_path):
if not os.path.exists(lab_path):
return np.array([])
if os.path.getsize(lab_path):
truths = np.loadtxt(lab_path)
truths = truths.reshape(truths.size / 5, 5) # to avoid single truth problem
return truths
else:
return np.array([])
def load_class_names(namesfile):
class_names = []
with open(namesfile, 'r') as fp:
lines = fp.readlines()
for line in lines:
line = line.rstrip()
class_names.append(line)
return class_names
def post_processing(img, conf_thresh, nms_thresh, output):
# anchors = [12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401]
# num_anchors = 9
# anchor_masks = [[0, 1, 2], [3, 4, 5], [6, 7, 8]]
# strides = [8, 16, 32]
# anchor_step = len(anchors) // num_anchors
# [batch, num, 1, 4]
box_array = output[0]
# [batch, num, num_classes]
confs = output[1]
t1 = time.time()
if type(box_array).__name__ != 'ndarray':
box_array = box_array.cpu().detach().numpy()
confs = confs.cpu().detach().numpy()
num_classes = confs.shape[2]
# [batch, num, 4]
box_array = box_array[:, :, 0]
# [batch, num, num_classes] --> [batch, num]
max_conf = np.max(confs, axis=2)
max_id = np.argmax(confs, axis=2)
t2 = time.time()
bboxes_batch = []
for i in range(box_array.shape[0]):
argwhere = max_conf[i] > conf_thresh
l_box_array = box_array[i, argwhere, :]
l_max_conf = max_conf[i, argwhere]
l_max_id = max_id[i, argwhere]
bboxes = []
# nms for each class
for j in range(num_classes):
cls_argwhere = l_max_id == j
ll_box_array = l_box_array[cls_argwhere, :]
ll_max_conf = l_max_conf[cls_argwhere]
ll_max_id = l_max_id[cls_argwhere]
keep = nms_cpu(ll_box_array, ll_max_conf, nms_thresh)
if (keep.size > 0):
ll_box_array = ll_box_array[keep, :]
ll_max_conf = ll_max_conf[keep]
ll_max_id = ll_max_id[keep]
for k in range(ll_box_array.shape[0]):
bboxes.append([ll_box_array[k, 0], ll_box_array[k, 1], ll_box_array[k, 2], ll_box_array[k, 3], ll_max_conf[k], ll_max_conf[k], ll_max_id[k]])
bboxes_batch.append(bboxes)
t3 = time.time()
print('-----------------------------------')
print(' max and argmax : %f' % (t2 - t1))
print(' nms : %f' % (t3 - t2))
print('Post processing total : %f' % (t3 - t1))
print('-----------------------------------')
return bboxes_batch
# -*- coding: utf-8 -*-
'''
'''
import torch
import os, sys
from torch.nn import functional as F
import numpy as np
from packaging import version
__all__ = [
"bboxes_iou",
"bboxes_giou",
"bboxes_diou",
"bboxes_ciou",
]
if version.parse(torch.__version__) >= version.parse('1.5.0'):
def _true_divide(dividend, divisor):
return torch.true_divide(dividend, divisor)
else:
def _true_divide(dividend, divisor):
return dividend / divisor
def bboxes_iou(bboxes_a, bboxes_b, fmt='voc', iou_type='iou'):
"""Calculate the Intersection of Unions (IoUs) between bounding boxes.
IoU is calculated as a ratio of area of the intersection
and area of the union.
Args:
bbox_a (array): An array whose shape is :math:`(N, 4)`.
:math:`N` is the number of bounding boxes.
The dtype should be :obj:`numpy.float32`.
bbox_b (array): An array similar to :obj:`bbox_a`,
whose shape is :math:`(K, 4)`.
The dtype should be :obj:`numpy.float32`.
Returns:
array:
An array whose shape is :math:`(N, K)`. \
An element at index :math:`(n, k)` contains IoUs between \
:math:`n` th bounding box in :obj:`bbox_a` and :math:`k` th bounding \
box in :obj:`bbox_b`.
from: https://github.com/chainer/chainercv
"""
if bboxes_a.shape[1] != 4 or bboxes_b.shape[1] != 4:
raise IndexError
N, K = bboxes_a.shape[0], bboxes_b.shape[0]
if fmt.lower() == 'voc': # xmin, ymin, xmax, ymax
# top left
tl_intersect = torch.max(
bboxes_a[:, np.newaxis, :2],
bboxes_b[:, :2]
) # of shape `(N,K,2)`
# bottom right
br_intersect = torch.min(
bboxes_a[:, np.newaxis, 2:],
bboxes_b[:, 2:]
)
bb_a = bboxes_a[:, 2:] - bboxes_a[:, :2]
bb_b = bboxes_b[:, 2:] - bboxes_b[:, :2]
# bb_* can also be seen vectors representing box_width, box_height
elif fmt.lower() == 'yolo': # xcen, ycen, w, h
# top left
tl_intersect = torch.max(
bboxes_a[:, np.newaxis, :2] - bboxes_a[:, np.newaxis, 2:] / 2,
bboxes_b[:, :2] - bboxes_b[:, 2:] / 2
)
# bottom right
br_intersect = torch.min(
bboxes_a[:, np.newaxis, :2] + bboxes_a[:, np.newaxis, 2:] / 2,
bboxes_b[:, :2] + bboxes_b[:, 2:] / 2
)
bb_a = bboxes_a[:, 2:]
bb_b = bboxes_b[:, 2:]
elif fmt.lower() == 'coco': # xmin, ymin, w, h
# top left
tl_intersect = torch.max(
bboxes_a[:, np.newaxis, :2],
bboxes_b[:, :2]
)
# bottom right
br_intersect = torch.min(
bboxes_a[:, np.newaxis, :2] + bboxes_a[:, np.newaxis, 2:],
bboxes_b[:, :2] + bboxes_b[:, 2:]
)
bb_a = bboxes_a[:, 2:]
bb_b = bboxes_b[:, 2:]
area_a = torch.prod(bb_a, 1)
area_b = torch.prod(bb_b, 1)
# torch.prod(input, dim, keepdim=False, dtype=None) → Tensor
# Returns the product of each row of the input tensor in the given dimension dim
# if tl, br does not form a nondegenerate squre, then the corr. element in the `prod` would be 0
en = (tl_intersect < br_intersect).type(tl_intersect.type()).prod(dim=2) # shape `(N,K,2)` ---> shape `(N,K)`
area_intersect = torch.prod(br_intersect - tl_intersect, 2) * en # * ((tl < br).all())
area_union = (area_a[:, np.newaxis] + area_b - area_intersect)
iou = _true_divide(area_intersect, area_union)
if iou_type.lower() == 'iou':
return iou
if fmt.lower() == 'voc': # xmin, ymin, xmax, ymax
# top left
tl_union = torch.min(
bboxes_a[:, np.newaxis, :2],
bboxes_b[:, :2]
) # of shape `(N,K,2)`
# bottom right
br_union = torch.max(
bboxes_a[:, np.newaxis, 2:],
bboxes_b[:, 2:]
)
elif fmt.lower() == 'yolo': # xcen, ycen, w, h
# top left
tl_union = torch.min(
bboxes_a[:, np.newaxis, :2] - bboxes_a[:, np.newaxis, 2:] / 2,
bboxes_b[:, :2] - bboxes_b[:, 2:] / 2
)
# bottom right
br_union = torch.max(
bboxes_a[:, np.newaxis, :2] + bboxes_a[:, np.newaxis, 2:] / 2,
bboxes_b[:, :2] + bboxes_b[:, 2:] / 2
)
elif fmt.lower() == 'coco': # xmin, ymin, w, h
# top left
tl_union = torch.min(
bboxes_a[:, np.newaxis, :2],
bboxes_b[:, :2]
)
# bottom right
br_union = torch.max(
bboxes_a[:, np.newaxis, :2] + bboxes_a[:, np.newaxis, 2:],
bboxes_b[:, :2] + bboxes_b[:, 2:]
)
# c for covering, of shape `(N,K,2)`
# the last dim is box width, box hight
bboxes_c = br_union - tl_union
area_covering = torch.prod(bboxes_c, 2) # shape `(N,K)`
giou = iou - _true_divide(area_covering - area_union, area_covering)
if iou_type.lower() == 'giou':
return giou
if fmt.lower() == 'voc': # xmin, ymin, xmax, ymax
centre_a = (bboxes_a[..., 2 :] + bboxes_a[..., : 2]) / 2
centre_b = (bboxes_b[..., 2 :] + bboxes_b[..., : 2]) / 2
elif fmt.lower() == 'yolo': # xcen, ycen, w, h
centre_a = bboxes_a[..., : 2]
centre_b = bboxes_b[..., : 2]
elif fmt.lower() == 'coco': # xmin, ymin, w, h
centre_a = bboxes_a[..., 2 :] + bboxes_a[..., : 2]/2
centre_b = bboxes_b[..., 2 :] + bboxes_b[..., : 2]/2
centre_dist = torch.norm(centre_a[:, np.newaxis] - centre_b, p='fro', dim=2)
diag_len = torch.norm(bboxes_c, p='fro', dim=2)
diou = iou - _true_divide(centre_dist.pow(2), diag_len.pow(2))
if iou_type.lower() == 'diou':
return diou
""" the legacy custom cosine similarity:
# bb_a of shape `(N,2)`, bb_b of shape `(K,2)`
v = torch.einsum('nm,km->nk', bb_a, bb_b)
v = _true_divide(v, (torch.norm(bb_a, p='fro', dim=1)[:,np.newaxis] * torch.norm(bb_b, p='fro', dim=1)))
# avoid nan for torch.acos near \pm 1
# https://github.com/pytorch/pytorch/issues/8069
eps = 1e-7
v = torch.clamp(v, -1+eps, 1-eps)
"""
v = F.cosine_similarity(bb_a[:,np.newaxis,:], bb_b, dim=-1)
v = (_true_divide(2*torch.acos(v), np.pi)).pow(2)
with torch.no_grad():
alpha = (_true_divide(v, 1-iou+v)) * ((iou>=0.5).type(iou.type()))
ciou = diou - alpha * v
if iou_type.lower() == 'ciou':
return ciou
def bboxes_giou(bboxes_a, bboxes_b, fmt='voc'):
return bboxes_iou(bboxes_a, bboxes_b, fmt, 'giou')
def bboxes_diou(bboxes_a, bboxes_b, fmt='voc'):
return bboxes_iou(bboxes_a, bboxes_b, fmt, 'diou')
def bboxes_ciou(bboxes_a, bboxes_b, fmt='voc'):
return bboxes_iou(bboxes_a, bboxes_b, fmt, 'ciou')
# -*- coding: utf-8 -*-
'''
'''
import torch
import os, sys
from torch.nn import functional as F
from easydict import EasyDict as ED
import numpy as np
from packaging import version
if version.parse(torch.__version__) >= version.parse('1.5.0'):
def _true_divide(dividend, divisor):
return torch.true_divide(dividend, divisor)
else:
def _true_divide(dividend, divisor):
return dividend / divisor
def bboxes_iou_test(bboxes_a, bboxes_b, fmt='voc', iou_type='iou'):
"""
test function for the bboxes_iou function in `train_acne.py`,
with message printing and plot
"""
if 'plt' not in dir():
import matplotlib.pyplot as plt
if 'cv2' not in dir():
try:
import cv2
except ModuleNotFoundError:
cv2 = None
from PIL import Image, ImageDraw
assert iou_type.lower() in ['iou', 'giou', 'diou', 'ciou']
if isinstance(bboxes_a, np.ndarray):
bboxes_a = torch.Tensor(bboxes_a)
if isinstance(bboxes_b, np.ndarray):
bboxes_b = torch.Tensor(bboxes_b)
if bboxes_a.shape[1] != 4 or bboxes_b.shape[1] != 4:
raise IndexError
N, K = bboxes_a.shape[0], bboxes_b.shape[0]
# if N, K all equal 1, then plot
# top left
if fmt.lower() == 'voc': # xmin, ymin, xmax, ymax
# top left
tl_intersect = torch.max(bboxes_a[:, np.newaxis, :2], bboxes_b[:, :2]) # of shape `(N,K,2)`
# bottom right
br_intersect = torch.min(bboxes_a[:, np.newaxis, 2:], bboxes_b[:, 2:])
bb_a = bboxes_a[:, 2:] - bboxes_a[:, :2] # w, h
bb_b = bboxes_b[:, 2:] - bboxes_b[:, :2] # w, h
elif fmt.lower() == 'yolo': # xcen, ycen, w, h
tl_intersect = torch.max((bboxes_a[:, np.newaxis, :2] - bboxes_a[:, np.newaxis, 2:] / 2),
(bboxes_b[:, :2] - bboxes_b[:, 2:] / 2))
# bottom right
br_intersect = torch.min((bboxes_a[:, np.newaxis, :2] + bboxes_a[:, np.newaxis, 2:] / 2),
(bboxes_b[:, :2] + bboxes_b[:, 2:] / 2))
bb_a = bboxes_a[:, 2:]
bb_b = bboxes_b[:, 2:]
elif fmt.lower() == 'coco': # xmin, ymin, w, h
# top left
tl_intersect = torch.max(bboxes_a[:, np.newaxis, :2], bboxes_b[:, :2])
# bottom right
br_intersect = torch.min((bboxes_a[:, np.newaxis, :2] + bboxes_a[:, np.newaxis, 2:]),
(bboxes_b[:, :2] + bboxes_b[:, 2:]))
bb_a = bboxes_a[:, 2:]
bb_b = bboxes_b[:, 2:]
area_a = torch.prod(bb_a, 1)
area_b = torch.prod(bb_b, 1)
# torch.prod(input, dim, keepdim=False, dtype=None) → Tensor
# Returns the product of each row of the input tensor in the given dimension dim
# if tl, br does not form a nondegenerate squre, then the corr. element in the `prod` would be 0
en = (tl_intersect < br_intersect).type(tl_intersect.type()).prod(dim=2) # shape `(N,K,2)` ---> shape `(N,K)`
area_intersect = torch.prod(br_intersect - tl_intersect, 2) * en # * ((tl < br).all())
area_union = (area_a[:, np.newaxis] + area_b - area_intersect)
iou = _true_divide(area_intersect, area_union)
# if iou_type.lower() == 'iou':
# return iou
if fmt.lower() == 'voc': # xmin, ymin, xmax, ymax
# top left
tl_union = torch.min(bboxes_a[:, np.newaxis, :2], bboxes_b[:, :2]) # of shape `(N,K,2)`
# bottom right
br_union = torch.max(bboxes_a[:, np.newaxis, 2:], bboxes_b[:, 2:])
elif fmt.lower() == 'yolo': # xcen, ycen, w, h
tl_union = torch.min((bboxes_a[:, np.newaxis, :2] - bboxes_a[:, np.newaxis, 2:] / 2),
(bboxes_b[:, :2] - bboxes_b[:, 2:] / 2))
# bottom right
br_union = torch.max((bboxes_a[:, np.newaxis, :2] + bboxes_a[:, np.newaxis, 2:] / 2),
(bboxes_b[:, :2] + bboxes_b[:, 2:] / 2))
elif fmt.lower() == 'coco': # xmin, ymin, w, h
# top left
tl_union = torch.min(bboxes_a[:, np.newaxis, :2], bboxes_b[:, :2])
# bottom right
br_union = torch.max((bboxes_a[:, np.newaxis, :2] + bboxes_a[:, np.newaxis, 2:]),
(bboxes_b[:, :2] + bboxes_b[:, 2:]))
# c for covering, of shape `(N,K,2)`
# the last dim is box width, box hight
bboxes_c = br_union - tl_union
area_covering = torch.prod(bboxes_c, 2) # shape `(N,K)`
giou = iou - (area_covering - area_union) / area_covering
print(f"tl_union.shape = {tl_union.shape}")
print(f"br_union.shape = {br_union.shape}")
print(f"bboxes_c.shape = {bboxes_c.shape}")
# if iou_type.lower() == 'giou':
# return giou
if fmt.lower() == 'voc': # xmin, ymin, xmax, ymax
centre_a = (bboxes_a[..., 2 :] + bboxes_a[..., : 2]) / 2
centre_b = (bboxes_b[..., 2 :] + bboxes_b[..., : 2]) / 2
elif fmt.lower() == 'yolo': # xcen, ycen, w, h
centre_a = (bboxes_a[..., : 2] + bboxes_a[..., 2 :]) / 2
centre_b = (bboxes_b[..., : 2] + bboxes_b[..., 2 :]) / 2
elif fmt.lower() == 'coco': # xmin, ymin, w, h
centre_a = bboxes_a[..., 2 :] + bboxes_a[..., : 2]/2
centre_b = bboxes_b[..., 2 :] + bboxes_b[..., : 2]/2
centre_dist = torch.norm(centre_a[:, np.newaxis] - centre_b, p='fro', dim=2)
diag_len = torch.norm(bboxes_c, p='fro', dim=2)
diou = iou - centre_dist.pow(2) / diag_len.pow(2)
# if iou_type.lower() == 'diou':
# return diou
""" the legacy custom cosine similarity:
# bb_a of shape `(N,2)`, bb_b of shape `(K,2)`
v = torch.einsum('nm,km->nk', bb_a, bb_b)
v = _true_divide(v, (torch.norm(bb_a, p='fro', dim=1)[:,np.newaxis] * torch.norm(bb_b, p='fro', dim=1)))
# avoid nan for torch.acos near \pm 1
# https://github.com/pytorch/pytorch/issues/8069
eps = 1e-7
v = torch.clamp(v, -1+eps, 1-eps)
"""
v = F.cosine_similarity(bb_a[:,np.newaxis,:], bb_b, dim=-1)
v = (_true_divide(2*torch.acos(v), np.pi)).pow(2)
alpha = (_true_divide(v, 1-iou+v))*((iou>=0.5).type(iou.type()))
ciou = diou - alpha * v
if N==K==1:
print("\n"+"*"*50)
print(f"bboxes_a = {bboxes_a}")
print(f"bboxes_b = {bboxes_b}")
print(f"area_a = {area_a}")
print(f"area_b = {area_b}")
print(f"area_intersect = {area_intersect}")
print(f"area_union = {area_union}")
print(f"tl_intersect = {tl_intersect}")
print(f"br_intersect = {br_intersect}")
print(f"tl_union = {tl_union}")
print(f"br_union = {br_union}")
print(f"area_covering (area of bboxes_c) = {area_covering}")
print(f"centre_dist = {centre_dist}")
print(f"diag_len = {diag_len}")
print("for computing ciou")
inner_product = torch.einsum('nm,km->nk', bb_a, bb_b)
product_of_lengths = torch.norm(bb_a, p='fro', dim=1)[:,np.newaxis] * torch.norm(bb_b, p='fro', dim=1)
print(f"inner product of bb_a and bb_b is {inner_product}")
print(f"product of lengths of bb_a and bb_b is {product_of_lengths}")
print(f"inner product divided by product of lengths equals {_true_divide(inner_product, product_of_lengths)}")
print(f"normalized angle distance = {v}")
print(f"alpha = {alpha}")
print(f"v = {v}")
print(f"alpha = {alpha}")
bc = ED({"xmin":tl_union.numpy().astype(int)[0][0][0], "ymin":tl_union.numpy().astype(int)[0][0][1], "xmax":br_union.numpy().astype(int)[0][0][0], "ymax":br_union.numpy().astype(int)[0][0][1]})
adjust_x = bc.xmin - int(0.25*(bc.xmax-bc.xmin))
adjust_y = bc.ymin - int(0.25*(bc.ymax-bc.ymin))
print(f"adjust_x = {adjust_x}")
print(f"adjust_y = {adjust_y}")
bc.xmin, bc.ymin, bc.xmax, bc.ymax = bc.xmin-adjust_x, bc.ymin-adjust_y, bc.xmax-adjust_x, bc.ymax-adjust_y
ba, bb = bboxes_a.numpy().astype(int)[0], bboxes_b.numpy().astype(int)[0]
if fmt.lower() == 'voc': # xmin, ymin, xmax, ymax
ba = ED({"xmin":ba[0]-adjust_x, "ymin":ba[1]-adjust_y, "xmax":ba[2]-adjust_x, "ymax":ba[3]-adjust_y})
bb = ED({"xmin":bb[0]-adjust_x, "ymin":bb[1]-adjust_y, "xmax":bb[2]-adjust_x, "ymax":bb[3]-adjust_y})
elif fmt.lower() == 'yolo': # xcen, ycen, w, h
ba = ED({"xmin":ba[0]-ba[2]//2-adjust_x, "ymin":ba[1]-ba[3]//2-adjust_y, "xmax":ba[0]+ba[2]//2-adjust_x, "ymax":ba[1]+ba[3]//2-adjust_y})
bb = ED({"xmin":bb[0]-bb[2]//2-adjust_x, "ymin":bb[1]-bb[3]//2-adjust_y, "xmax":bb[0]+bb[2]//2-adjust_x, "ymax":bb[1]+bb[3]//2-adjust_y})
elif fmt.lower() == 'coco': # xmin, ymin, w, h
ba = ED({"xmin":ba[0]-adjust_x, "ymin":ba[1]-adjust_y, "xmax":ba[0]+ba[2]-adjust_x, "ymax":ba[1]+ba[3]-adjust_y})
bb = ED({"xmin":bb[0]-adjust_x, "ymin":bb[1]-adjust_y, "xmax":bb[0]+bb[2]-adjust_x, "ymax":bb[1]+bb[3]-adjust_y})
print(f"ba = {ba}")
print(f"bb = {bb}")
print(f"bc = {bc}")
plane = np.full(shape=(int(1.5*(bc.ymax-bc.ymin)),int(1.5*(bc.xmax-bc.xmin)),3), fill_value=255, dtype=np.uint8)
img_with_boxes = plane.copy()
line_size = 1
if cv2:
cv2.rectangle(img_with_boxes, (ba.xmin, ba.ymin), (ba.xmax, ba.ymax), (0, 255, 0), line_size)
cv2.rectangle(img_with_boxes, (bb.xmin, bb.ymin), (bb.xmax, bb.ymax), (0, 0, 255), line_size)
cv2.rectangle(img_with_boxes, (max(0,bc.ymin-1), max(0,bc.ymin-1)), (bc.xmax, bc.ymax), (255, 0, 0), line_size)
else:
img_with_boxes = Image.fromarray(img_with_boxes)
drawer = ImageDraw.Draw(img_with_boxes)
# drawer.line([(ba.xmin, ba.ymin), (ba.xmin, ba.ymax), (ba.xmax, ba.ymax), (ba.xmax, ba.ymin), (ba.xmin, ba.ymin)], fill='green', width=line_size)
# drawer.line([(bb.xmin, bb.ymin), (bb.xmin, bb.ymax), (bb.xmax, bb.ymax), (bb.xmax, bb.ymin), (bb.xmin, bb.ymin)], fill='blue', width=line_size)
# drawer.line([((max(0,bc.xmin-1), max(0,bc.ymin-1)), ((max(0,bc.xmin-1), bc.ymax), (bc.xmax, bc.ymax), (bc.xmax, max(0,bc.ymin-1)), ((max(0,bc.xmin-1), max(0,bc.ymin-1))], fill='red', width=line_size)
drawer.rectangle([(ba.xmin, ba.ymin), (ba.xmax, ba.ymax)], outline='green', width=line_size)
drawer.rectangle([(bb.xmin, bb.ymin), (bb.xmax, bb.ymax)], outline='blue', width=line_size)
drawer.rectangle([(max(0,bc.xmin-1), max(0,bc.ymin-1)), (bc.xmax+1, bc.ymax+1)], outline='red', width=line_size)
img_with_boxes = np.array(img_with_boxes)
del drawer
plt.figure(figsize=(7,7))
plt.imshow(img_with_boxes)
plt.show()
print(f"iou = {iou}")
print(f"giou = {giou}")
print(f"diou = {diou}")
print(f"ciou = {ciou}")
if iou_type.lower() == 'ciou':
return ciou
elif iou_type.lower() == 'diou':
return diou
elif iou_type.lower() == 'giou':
return giou
elif iou_type.lower() == 'iou':
return iou
def original_iou_test(bboxes_a, bboxes_b, xyxy=True):
"""
test function for the original iou function in `train.py`
"""
if bboxes_a.shape[1] != 4 or bboxes_b.shape[1] != 4:
raise IndexError
if isinstance(bboxes_a, np.ndarray):
bboxes_a = torch.Tensor(bboxes_a)
if isinstance(bboxes_b, np.ndarray):
bboxes_b = torch.Tensor(bboxes_a)
N, K = bboxes_a.shape[0], bboxes_b.shape[0]
# if N, K all equal 1, then plot
# top left
if xyxy:
tl = torch.max(bboxes_a[:, None, :2], bboxes_b[:, :2])
# bottom right
br = torch.min(bboxes_a[:, None, 2:], bboxes_b[:, 2:])
area_a = torch.prod(bboxes_a[:, 2:] - bboxes_a[:, :2], 1)
area_b = torch.prod(bboxes_b[:, 2:] - bboxes_b[:, :2], 1)
else:
tl = torch.max((bboxes_a[:, None, :2] - bboxes_a[:, None, 2:] / 2),
(bboxes_b[:, :2] - bboxes_b[:, 2:] / 2))
# bottom right
br = torch.min((bboxes_a[:, None, :2] + bboxes_a[:, None, 2:] / 2),
(bboxes_b[:, :2] + bboxes_b[:, 2:] / 2))
area_a = torch.prod(bboxes_a[:, 2:], 1)
area_b = torch.prod(bboxes_b[:, 2:], 1)
en = (tl < br).type(tl.type()).prod(dim=2)
area_i = torch.prod(br - tl, 2) * en # * ((tl < br).all())
print(f"tl.shape = {tl.shape}")
print(f"br.shape = {br.shape}")
print(f"area_a.shape = {area_a.shape}")
print(f"area_b.shape = {area_b.shape}")
print(f"en.shape = {en.shape}")
print(f"area_i.shape = {area_i.shape}")
if N == K == 1:
pass
return area_i / (area_a[:, None] + area_b - area_i)
import torch.nn as nn
import torch.nn.functional as F
from tool.torch_utils import *
def yolo_forward(output, conf_thresh, num_classes, anchors, num_anchors, scale_x_y, only_objectness=1,
validation=False):
# Output would be invalid if it does not satisfy this assert
# assert (output.size(1) == (5 + num_classes) * num_anchors)
# print(output.size())
# Slice the second dimension (channel) of output into:
# [ 2, 2, 1, num_classes, 2, 2, 1, num_classes, 2, 2, 1, num_classes ]
# And then into
# bxy = [ 6 ] bwh = [ 6 ] det_conf = [ 3 ] cls_conf = [ num_classes * 3 ]
batch = output.size(0)
H = output.size(2)
W = output.size(3)
bxy_list = []
bwh_list = []
det_confs_list = []
cls_confs_list = []
for i in range(num_anchors):
begin = i * (5 + num_classes)
end = (i + 1) * (5 + num_classes)
bxy_list.append(output[:, begin : begin + 2])
bwh_list.append(output[:, begin + 2 : begin + 4])
det_confs_list.append(output[:, begin + 4 : begin + 5])
cls_confs_list.append(output[:, begin + 5 : end])
# Shape: [batch, num_anchors * 2, H, W]
bxy = torch.cat(bxy_list, dim=1)
# Shape: [batch, num_anchors * 2, H, W]
bwh = torch.cat(bwh_list, dim=1)
# Shape: [batch, num_anchors, H, W]
det_confs = torch.cat(det_confs_list, dim=1)
# Shape: [batch, num_anchors * H * W]
det_confs = det_confs.view(batch, num_anchors * H * W)
# Shape: [batch, num_anchors * num_classes, H, W]
cls_confs = torch.cat(cls_confs_list, dim=1)
# Shape: [batch, num_anchors, num_classes, H * W]
cls_confs = cls_confs.view(batch, num_anchors, num_classes, H * W)
# Shape: [batch, num_anchors, num_classes, H * W] --> [batch, num_anchors * H * W, num_classes]
cls_confs = cls_confs.permute(0, 1, 3, 2).reshape(batch, num_anchors * H * W, num_classes)
# Apply sigmoid(), exp() and softmax() to slices
#
bxy = torch.sigmoid(bxy) * scale_x_y - 0.5 * (scale_x_y - 1)
bwh = torch.exp(bwh)
det_confs = torch.sigmoid(det_confs)
cls_confs = torch.sigmoid(cls_confs)
# Prepare C-x, C-y, P-w, P-h (None of them are torch related)
grid_x = np.expand_dims(np.expand_dims(np.expand_dims(np.linspace(0, W - 1, W), axis=0).repeat(H, 0), axis=0), axis=0)
grid_y = np.expand_dims(np.expand_dims(np.expand_dims(np.linspace(0, H - 1, H), axis=1).repeat(W, 1), axis=0), axis=0)
# grid_x = torch.linspace(0, W - 1, W).reshape(1, 1, 1, W).repeat(1, 1, H, 1)
# grid_y = torch.linspace(0, H - 1, H).reshape(1, 1, H, 1).repeat(1, 1, 1, W)
anchor_w = []
anchor_h = []
for i in range(num_anchors):
anchor_w.append(anchors[i * 2])
anchor_h.append(anchors[i * 2 + 1])
device = None
cuda_check = output.is_cuda
if cuda_check:
device = output.get_device()
bx_list = []
by_list = []
bw_list = []
bh_list = []
# Apply C-x, C-y, P-w, P-h
for i in range(num_anchors):
ii = i * 2
# Shape: [batch, 1, H, W]
bx = bxy[:, ii : ii + 1] + torch.tensor(grid_x, device=device, dtype=torch.float32) # grid_x.to(device=device, dtype=torch.float32)
# Shape: [batch, 1, H, W]
by = bxy[:, ii + 1 : ii + 2] + torch.tensor(grid_y, device=device, dtype=torch.float32) # grid_y.to(device=device, dtype=torch.float32)
# Shape: [batch, 1, H, W]
bw = bwh[:, ii : ii + 1] * anchor_w[i]
# Shape: [batch, 1, H, W]
bh = bwh[:, ii + 1 : ii + 2] * anchor_h[i]
bx_list.append(bx)
by_list.append(by)
bw_list.append(bw)
bh_list.append(bh)
########################################
# Figure out bboxes from slices #
########################################
# Shape: [batch, num_anchors, H, W]
bx = torch.cat(bx_list, dim=1)
# Shape: [batch, num_anchors, H, W]
by = torch.cat(by_list, dim=1)
# Shape: [batch, num_anchors, H, W]
bw = torch.cat(bw_list, dim=1)
# Shape: [batch, num_anchors, H, W]
bh = torch.cat(bh_list, dim=1)
# Shape: [batch, 2 * num_anchors, H, W]
bx_bw = torch.cat((bx, bw), dim=1)
# Shape: [batch, 2 * num_anchors, H, W]
by_bh = torch.cat((by, bh), dim=1)
# normalize coordinates to [0, 1]
bx_bw /= W
by_bh /= H
# Shape: [batch, num_anchors * H * W, 1]
bx = bx_bw[:, :num_anchors].view(batch, num_anchors * H * W, 1)
by = by_bh[:, :num_anchors].view(batch, num_anchors * H * W, 1)
bw = bx_bw[:, num_anchors:].view(batch, num_anchors * H * W, 1)
bh = by_bh[:, num_anchors:].view(batch, num_anchors * H * W, 1)
bx1 = bx - bw * 0.5
by1 = by - bh * 0.5
bx2 = bx1 + bw
by2 = by1 + bh
# Shape: [batch, num_anchors * h * w, 4] -> [batch, num_anchors * h * w, 1, 4]
boxes = torch.cat((bx1, by1, bx2, by2), dim=2).view(batch, num_anchors * H * W, 1, 4)
# boxes = boxes.repeat(1, 1, num_classes, 1)
# boxes: [batch, num_anchors * H * W, 1, 4]
# cls_confs: [batch, num_anchors * H * W, num_classes]
# det_confs: [batch, num_anchors * H * W]
det_confs = det_confs.view(batch, num_anchors * H * W, 1)
confs = cls_confs * det_confs
# boxes: [batch, num_anchors * H * W, 1, 4]
# confs: [batch, num_anchors * H * W, num_classes]
return boxes, confs
def yolo_forward_dynamic(output, conf_thresh, num_classes, anchors, num_anchors, scale_x_y, only_objectness=1,
validation=False):
# Output would be invalid if it does not satisfy this assert
# assert (output.size(1) == (5 + num_classes) * num_anchors)
# print(output.size())
# Slice the second dimension (channel) of output into:
# [ 2, 2, 1, num_classes, 2, 2, 1, num_classes, 2, 2, 1, num_classes ]
# And then into
# bxy = [ 6 ] bwh = [ 6 ] det_conf = [ 3 ] cls_conf = [ num_classes * 3 ]
# batch = output.size(0)
# H = output.size(2)
# W = output.size(3)
bxy_list = []
bwh_list = []
det_confs_list = []
cls_confs_list = []
for i in range(num_anchors):
begin = i * (5 + num_classes)
end = (i + 1) * (5 + num_classes)
bxy_list.append(output[:, begin : begin + 2])
bwh_list.append(output[:, begin + 2 : begin + 4])
det_confs_list.append(output[:, begin + 4 : begin + 5])
cls_confs_list.append(output[:, begin + 5 : end])
# Shape: [batch, num_anchors * 2, H, W]
bxy = torch.cat(bxy_list, dim=1)
# Shape: [batch, num_anchors * 2, H, W]
bwh = torch.cat(bwh_list, dim=1)
# Shape: [batch, num_anchors, H, W]
det_confs = torch.cat(det_confs_list, dim=1)
# Shape: [batch, num_anchors * H * W]
det_confs = det_confs.view(output.size(0), num_anchors * output.size(2) * output.size(3))
# Shape: [batch, num_anchors * num_classes, H, W]
cls_confs = torch.cat(cls_confs_list, dim=1)
# Shape: [batch, num_anchors, num_classes, H * W]
cls_confs = cls_confs.view(output.size(0), num_anchors, num_classes, output.size(2) * output.size(3))
# Shape: [batch, num_anchors, num_classes, H * W] --> [batch, num_anchors * H * W, num_classes]
cls_confs = cls_confs.permute(0, 1, 3, 2).reshape(output.size(0), num_anchors * output.size(2) * output.size(3), num_classes)
# Apply sigmoid(), exp() and softmax() to slices
#
bxy = torch.sigmoid(bxy) * scale_x_y - 0.5 * (scale_x_y - 1)
bwh = torch.exp(bwh)
det_confs = torch.sigmoid(det_confs)
cls_confs = torch.sigmoid(cls_confs)
# Prepare C-x, C-y, P-w, P-h (None of them are torch related)
grid_x = np.expand_dims(np.expand_dims(np.expand_dims(np.linspace(0, output.size(3) - 1, output.size(3)), axis=0).repeat(output.size(2), 0), axis=0), axis=0)
grid_y = np.expand_dims(np.expand_dims(np.expand_dims(np.linspace(0, output.size(2) - 1, output.size(2)), axis=1).repeat(output.size(3), 1), axis=0), axis=0)
# grid_x = torch.linspace(0, W - 1, W).reshape(1, 1, 1, W).repeat(1, 1, H, 1)
# grid_y = torch.linspace(0, H - 1, H).reshape(1, 1, H, 1).repeat(1, 1, 1, W)
anchor_w = []
anchor_h = []
for i in range(num_anchors):
anchor_w.append(anchors[i * 2])
anchor_h.append(anchors[i * 2 + 1])
device = None
cuda_check = output.is_cuda
if cuda_check:
device = output.get_device()
bx_list = []
by_list = []
bw_list = []
bh_list = []
# Apply C-x, C-y, P-w, P-h
for i in range(num_anchors):
ii = i * 2
# Shape: [batch, 1, H, W]
bx = bxy[:, ii : ii + 1] + torch.tensor(grid_x, device=device, dtype=torch.float32) # grid_x.to(device=device, dtype=torch.float32)
# Shape: [batch, 1, H, W]
by = bxy[:, ii + 1 : ii + 2] + torch.tensor(grid_y, device=device, dtype=torch.float32) # grid_y.to(device=device, dtype=torch.float32)
# Shape: [batch, 1, H, W]
bw = bwh[:, ii : ii + 1] * anchor_w[i]
# Shape: [batch, 1, H, W]
bh = bwh[:, ii + 1 : ii + 2] * anchor_h[i]
bx_list.append(bx)
by_list.append(by)
bw_list.append(bw)
bh_list.append(bh)
########################################
# Figure out bboxes from slices #
########################################
# Shape: [batch, num_anchors, H, W]
bx = torch.cat(bx_list, dim=1)
# Shape: [batch, num_anchors, H, W]
by = torch.cat(by_list, dim=1)
# Shape: [batch, num_anchors, H, W]
bw = torch.cat(bw_list, dim=1)
# Shape: [batch, num_anchors, H, W]
bh = torch.cat(bh_list, dim=1)
# Shape: [batch, 2 * num_anchors, H, W]
bx_bw = torch.cat((bx, bw), dim=1)
# Shape: [batch, 2 * num_anchors, H, W]
by_bh = torch.cat((by, bh), dim=1)
# normalize coordinates to [0, 1]
bx_bw /= output.size(3)
by_bh /= output.size(2)
# Shape: [batch, num_anchors * H * W, 1]
bx = bx_bw[:, :num_anchors].view(output.size(0), num_anchors * output.size(2) * output.size(3), 1)
by = by_bh[:, :num_anchors].view(output.size(0), num_anchors * output.size(2) * output.size(3), 1)
bw = bx_bw[:, num_anchors:].view(output.size(0), num_anchors * output.size(2) * output.size(3), 1)
bh = by_bh[:, num_anchors:].view(output.size(0), num_anchors * output.size(2) * output.size(3), 1)
bx1 = bx - bw * 0.5
by1 = by - bh * 0.5
bx2 = bx1 + bw
by2 = by1 + bh
# Shape: [batch, num_anchors * h * w, 4] -> [batch, num_anchors * h * w, 1, 4]
boxes = torch.cat((bx1, by1, bx2, by2), dim=2).view(output.size(0), num_anchors * output.size(2) * output.size(3), 1, 4)
# boxes = boxes.repeat(1, 1, num_classes, 1)
# boxes: [batch, num_anchors * H * W, 1, 4]
# cls_confs: [batch, num_anchors * H * W, num_classes]
# det_confs: [batch, num_anchors * H * W]
det_confs = det_confs.view(output.size(0), num_anchors * output.size(2) * output.size(3), 1)
confs = cls_confs * det_confs
# boxes: [batch, num_anchors * H * W, 1, 4]
# confs: [batch, num_anchors * H * W, num_classes]
return boxes, confs
class YoloLayer(nn.Module):
''' Yolo layer
model_out: while inference,is post-processing inside or outside the model
true:outside
'''
def __init__(self, anchor_mask=[], num_classes=0, anchors=[], num_anchors=1, stride=32, model_out=False):
super(YoloLayer, self).__init__()
self.anchor_mask = anchor_mask
self.num_classes = num_classes
self.anchors = anchors
self.num_anchors = num_anchors
self.anchor_step = len(anchors) // num_anchors
self.coord_scale = 1
self.noobject_scale = 1
self.object_scale = 5
self.class_scale = 1
self.thresh = 0.6
self.stride = stride
self.seen = 0
self.scale_x_y = 1
self.model_out = model_out
def forward(self, output, target=None):
if self.training:
return output
masked_anchors = []
for m in self.anchor_mask:
masked_anchors += self.anchors[m * self.anchor_step:(m + 1) * self.anchor_step]
masked_anchors = [anchor / self.stride for anchor in masked_anchors]
return yolo_forward_dynamic(output, self.thresh, self.num_classes, masked_anchors, len(self.anchor_mask),scale_x_y=self.scale_x_y)
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment