700字范文 > 54 记录yolov7 训练部署ncnn 部署mnn 部署rk3399 npu 部署openvino 部署oak vpu 部署TensorRT

54 记录yolov7 训练部署ncnn 部署mnn 部署rk3399 npu 部署openvino 部署oak vpu 部署TensorRT

时间：2021-02-03 21:33:11

基本思想：记录一下yolov7训练人头检测和部署oak的vpu相机上和rk3399 pro开发板上，完成需求

一、准备数据集，来自官方数据集，存在问题，已经修正

链接: /s/1SUGKRgxeV7ddZpLdILhOwA?pwd=atjt 提取码: atjt

实验数据和模型

链接: /s/1mjYitnFfGvzLLcCzSQwLbg?pwd=7yaw 提取码: 7yaw

二、在现有的数据集基础上转yolo数据集

ubuntu@ubuntu:~$ git clone /WongKinYiu/yolov7.gitCloning into 'yolov7'...remote: Enumerating objects: 1094, done.remote: Total 1094 (delta 0), reused 0 (delta 0), pack-reused 1094Receiving objects: 100% (1094/1094), 69.89 MiB | 2.21 MiB/s, done.Resolving deltas: 100% (517/517), done.

下载权重测试一下

ubuntu@ubuntu:~/yolov7/weights$ubuntu@ubuntu:~/yolov7/weights$ wget /WongKinYiu/yolov7/releases/download/v0.1/yolov7.ptubuntu@ubuntu:~/yolov7/weights$ cd ..

测试一下

ssh://ubuntu@10.10.71.226:22/usr/bin/python -u /home/ubuntu/yolov7/detect.pyNamespace(agnostic_nms=False, augment=False, classes=None, conf_thres=0.25, device='', exist_ok=False, img_size=640, iou_thres=0.45, name='exp', no_trace=False, nosave=False, project='runs/detect', save_conf=False, save_txt=False, source='inference/images', update=False, view_img=False, weights='yolov7.pt')YOLOR 🚀 v0.1-116-g8c0bf3f torch 1.9.0+cu111 CUDA:0 (NVIDIA GeForce RTX 3060, 12053.0MB)Fusing layers... RepConv.fuse_repvgg_blockRepConv.fuse_repvgg_blockRepConv.fuse_repvgg_block/home/ubuntu/.local/lib/python3.8/site-packages/torch/nn/functional.py:718: UserWarning: Named tensors and all their associated APIs are an experimental feature and subject to change. Please do not use them for anything important until they are released as stable. (Triggered internally at /pytorch/c10/core/TensorImpl.h:1156.)return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)Model Summary: 306 layers, 36905341 parameters, 6652669 gradients, 104.5 GFLOPSConvert model to Traced-model... traced_script_module saved! model is traced! 4 persons, 1 bus, 1 tie, Done. (11.6ms) Inference, (3.0ms) NMSThe image with the result is saved in: runs/detect/exp2/bus.jpg5 horses, Done. (10.8ms) Inference, (0.4ms) NMSThe image with the result is saved in: runs/detect/exp2/horses.jpg2 persons, 1 tie, 1 cake, Done. (12.3ms) Inference, (0.4ms) NMSThe image with the result is saved in: runs/detect/exp2/image1.jpg2 persons, 1 sports ball, Done. (10.9ms) Inference, (0.3ms) NMSThe image with the result is saved in: runs/detect/exp2/image2.jpg1 dog, 1 horse, Done. (10.7ms) Inference, (0.3ms) NMSThe image with the result is saved in: runs/detect/exp2/image3.jpg3 persons, 1 tie, Done. (9.6ms) Inference, (0.4ms) NMSThe image with the result is saved in: runs/detect/exp2/zidane.jpgDone. (0.335s)Process finished with exit code 0

测试结果

三、转数据集

１)划分文件

# coding:utf-8import osimport randomimport argparseparser = argparse.ArgumentParser()# xml文件的地址，根据自己的数据进行修改 xml一般存放在Annotations下parser.add_argument('--xml_path', default='/home/ubuntu/yolov7/trainData/Annotations', type=str, help='input xml label path')# 数据集的划分，地址选择自己数据下的ImageSets/Mainparser.add_argument('--txt_path', default='/home/ubuntu/yolov7/trainData/ImageSets/Main', type=str, help='output txt label path')opt = parser.parse_args()trainval_percent = 1.0train_percent = 0.9xmlfilepath = opt.xml_pathtxtsavepath = opt.txt_pathtotal_xml = os.listdir(xmlfilepath)if not os.path.exists(txtsavepath):os.makedirs(txtsavepath)num = len(total_xml)list_index = range(num)tv = int(num * trainval_percent)tr = int(tv * train_percent)trainval = random.sample(list_index, tv)train = random.sample(trainval, tr)file_train = open(txtsavepath + '/train.txt', 'w')file_val = open(txtsavepath + '/val.txt', 'w')for i in list_index:name = total_xml[i][:-4] + '\n'if i in trainval:if i in train:file_train.write(name)else:file_val.write(name)file_train.close()file_val.close()

2)生成标签

# -*- coding: utf-8 -*-import xml.etree.ElementTree as ETimport osfrom os import getcwdsets = ['train', 'val']classes = ["person"] # 改成自己的类别vocdef convert(size, box):dw = 1. / (size[0])dh = 1. / (size[1])x = (box[0] + box[1]) / 2.0 - 1y = (box[2] + box[3]) / 2.0 - 1w = box[1] - box[0]h = box[3] - box[2]x = x * dww = w * dwy = y * dhh = h * dhreturn x, y, w, hdef convert_annotation(image_id):in_file = open('/home/ubuntu/yolov7/trainData/Annotations/%s.xml' % (image_id))out_file = open('/home/ubuntu/yolov7/trainData/labels/%s.txt' % (image_id), 'w')tree = ET.parse(in_file)root = tree.getroot()size = root.find('size')w = int(size.find('width').text)h = int(size.find('height').text)for obj in root.iter('object'):difficult = Noneif obj.find('Difficult') is None:difficult = obj.find('difficult').textelse:difficult = obj.find('Difficult').textcls = obj.find('name').textif cls not in classes or int(difficult) == 1:continuecls_id = classes.index(cls)xmlbox = obj.find('bndbox')b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text),float(xmlbox.find('ymax').text))b1, b2, b3, b4 = b# 标注越界修正if b2 > w:b2 = wif b4 > h:b4 = hb = (b1, b2, b3, b4)bb = convert((w, h), b)out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')wd = getcwd()for image_set in sets:if not os.path.exists('/home/ubuntu/yolov7/trainData/labels/'):os.makedirs('/home/ubuntu/yolov7/trainData/labels/')image_ids = open('/home/ubuntu/yolov7/trainData/ImageSets/Main/%s.txt' % (image_set)).read().strip().split()list_file = open('/home/ubuntu/yolov7/trainData/%s.txt' % (image_set), 'w')for image_id in image_ids:list_file.write('/home/ubuntu/yolov7/trainData/images/%s.jpg\n' % (image_id))convert_annotation(image_id)list_file.close()print("voc_scrip finish")

四、修改coco.yaml

# COCO dataset # download command/URL (optional)#download: bash ./scripts/get_coco.sh# train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/]train: /home/ubuntu/yolov7/trainData/train.txt # 118287 imagesval: /home/ubuntu/yolov7/trainData/val.txt # 5000 images#test: ./coco/test-dev.txt # 20288 of 40670 images, submit to /competitions/20794# number of classesnc: 1# class namesnames: [ 'person']

修改yolov7.yaml

nc: １ # number of classes

五、开始训练

ubuntu@ubuntu:~/yolov7$ python3 train.py --weights weights/yolov7.pt --cfg cfg/training/yolov7.yaml --data data/coco.yaml --device 0 --batch-size 8 --epoch 1000 --hyp data/hyp.scratch.p5.yaml

测试

ubuntu@ubuntu:~/yolov7$ python3 detect.py --weights runs/train/exp3/weights/best.pt Namespace(agnostic_nms=False, augment=False, classes=None, conf_thres=0.25, device='', exist_ok=False, img_size=640, iou_thres=0.45, name='exp', no_trace=False, nosave=False, project='runs/detect', save_conf=False, save_txt=False, source='inference/images', update=False, view_img=False, weights=['runs/train/exp3/weights/best.pt'])YOLOR 🚀 v0.1-116-g8c0bf3f torch 1.9.0+cu111 CUDA:0 (NVIDIA GeForce RTX 3050 Laptop GPU, 3910.5MB)Fusing layers... RepConv.fuse_repvgg_blockRepConv.fuse_repvgg_blockRepConv.fuse_repvgg_blockIDetect.fuse/home/ubuntu/.local/lib/python3.8/site-packages/torch/nn/functional.py:718: UserWarning: Named tensors and all their associated APIs are an experimental feature and subject to change. Please do not use them for anything important until they are released as stable. (Triggered internally at /pytorch/c10/core/TensorImpl.h:1156.)return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)Model Summary: 314 layers, 36481772 parameters, 6194944 gradients, 103.2 GFLOPSConvert model to Traced-model... traced_script_module saved! model is traced! 3 persons, Done. (22.3ms) Inference, (5.9ms) NMSThe image with the result is saved in: runs/detect/exp3/bus.jpgDone. (18.4ms) Inference, (0.2ms) NMSThe image with the result is saved in: runs/detect/exp3/horses.jpg4 persons, Done. (21.1ms) Inference, (0.6ms) NMSThe image with the result is saved in: runs/detect/exp3/image1.jpg2 persons, Done. (18.7ms) Inference, (0.6ms) NMSThe image with the result is saved in: runs/detect/exp3/image2.jpg2 persons, Done. (17.9ms) Inference, (0.6ms) NMSThe image with the result is saved in: runs/detect/exp3/image3.jpg2 persons, Done. (15.5ms) Inference, (0.6ms) NMSThe image with the result is saved in: runs/detect/exp3/zidane.jpgDone. (0.550s)

测试结果(发现尴尬的不，把马脸和狗脸识别成人头了。。。。)

六、转模型到onnx，不要把nms加

ubuntu@ubuntu:~/yolov7$ python3 export.py --weights runs/train/exp3/weights/best.pt --simplify --img-size 640Import onnx_graphsurgeon failure: No module named 'onnx_graphsurgeon'Namespace(batch_size=1, conf_thres=0.25, device='cpu', dynamic=False, dynamic_batch=False, end2end=False, fp16=False, grid=False, img_size=[640, 640], include_nms=False, int8=False, iou_thres=0.45, max_wh=None, simplify=True, topk_all=100, weights='runs/train/exp3/weights/best.pt')YOLOR 🚀 v0.1-116-g8c0bf3f torch 1.9.0+cu111 CPUFusing layers... RepConv.fuse_repvgg_blockRepConv.fuse_repvgg_blockRepConv.fuse_repvgg_blockIDetect.fuse/home/ubuntu/.local/lib/python3.8/site-packages/torch/nn/functional.py:718: UserWarning: Named tensors and all their associated APIs are an experimental feature and subject to change. Please do not use them for anything important until they are released as stable. (Triggered internally at /pytorch/c10/core/TensorImpl.h:1156.)return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)Model Summary: 314 layers, 36481772 parameters, 6194944 gradients, 103.2 GFLOPSStarting TorchScript export with torch 1.9.0+cu111...TorchScript export success, saved as runs/train/exp3/weights/best.torchscript.ptCoreML export failure: No module named 'coremltools'Starting TorchScript-Lite export with torch 1.9.0+cu111...TorchScript-Lite export success, saved as runs/train/exp3/weights/best.torchscript.ptlStarting ONNX export with onnx 1.12.0...Starting to simplify ONNX...ONNX export success, saved as runs/train/exp3/weights/best.onnxExport complete (11.29s). Visualize with /lutzroeder/netron.

七、转ncnn，直接调用ncnn官方的example即可

ubuntu@ubuntu:~/ncnn/build/install/bin$ ./onnx2ncnn /home/ubuntu/yolov7/runs/train/exp3/weights/best.onnx /home/ubuntu/yolov7/runs/train/exp3/weights/best.param /home/ubuntu/yolov7/runs/train/exp3/weights/best.bin

参考源码/Tencent/ncnn/tree/master/examples

模型需要改掉后面的param文件这三个红框改成-1,否则会出现乱框

cmakelists.txt

cmake_minimum_required(VERSION 3.16)project(untitled22)set(CMAKE_CXX_FLAGS "-std=c++11")set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fopenmp ")set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp")set(CMAKE_CXX_STANDARD 11)include_directories(${CMAKE_SOURCE_DIR})include_directories(${CMAKE_SOURCE_DIR}/include)include_directories(${CMAKE_SOURCE_DIR}/include/ncnn)find_package(OpenCV REQUIRED)#message(STATUS ${OpenCV_INCLUDE_DIRS})#添加头文件include_directories(${OpenCV_INCLUDE_DIRS})#链接Opencv库add_library(libncnn STATIC IMPORTED)set_target_properties(libncnn PROPERTIES IMPORTED_LOCATION ${CMAKE_SOURCE_DIR}/lib/libncnn.a)add_executable(untitled22 main.cpp)target_link_libraries(untitled22 ${OpenCV_LIBS} libncnn )

main.cpp

// Tencent is pleased to support the open source community by making ncnn available.//// Copyright (C) THL A29 Limited, a Tencent company. All rights reserved.//// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except// in compliance with the License. You may obtain a copy of the License at//// /licenses/BSD-3-Clause//// Unless required by applicable law or agreed to in writing, software distributed// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR// CONDITIONS OF ANY KIND, either express or implied. See the License for the// specific language governing permissions and limitations under the License.#include "layer.h"#include "net.h"#if defined(USE_NCNN_SIMPLEOCV)#include "simpleocv.h"#else#include <opencv2/core/core.hpp>#include <opencv2/highgui/highgui.hpp>#include <opencv2/imgproc/imgproc.hpp>#endif#include <float.h>#include <stdio.h>#include <vector>#define MAX_STRIDE 32struct Object{cv::Rect_<float> rect;int label;float prob;};static inline float intersection_area(const Object& a, const Object& b){cv::Rect_<float> inter = a.rect & b.rect;return inter.area();}static void qsort_descent_inplace(std::vector<Object>& objects, int left, int right){int i = left;int j = right;float p = objects[(left + right) / 2].prob;while (i <= j){while (objects[i].prob > p)i++;while (objects[j].prob < p)j--;if (i <= j){// swapstd::swap(objects[i], objects[j]);i++;j--;}}#pragma omp parallel sections{#pragma omp section{if (left < j) qsort_descent_inplace(objects, left, j);}#pragma omp section{if (i < right) qsort_descent_inplace(objects, i, right);}}}static void qsort_descent_inplace(std::vector<Object>& objects){if (objects.empty())return;qsort_descent_inplace(objects, 0, objects.size() - 1);}static void nms_sorted_bboxes(const std::vector<Object>& faceobjects, std::vector<int>& picked, float nms_threshold, bool agnostic = false){picked.clear();const int n = faceobjects.size();std::vector<float> areas(n);for (int i = 0; i < n; i++){areas[i] = faceobjects[i].rect.area();}for (int i = 0; i < n; i++){const Object& a = faceobjects[i];int keep = 1;for (int j = 0; j < (int)picked.size(); j++){const Object& b = faceobjects[picked[j]];if (!agnostic && a.label != b.label)continue;// intersection over unionfloat inter_area = intersection_area(a, b);float union_area = areas[i] + areas[picked[j]] - inter_area;// float IoU = inter_area / union_areaif (inter_area / union_area > nms_threshold)keep = 0;}if (keep)picked.push_back(i);}}static inline float sigmoid(float x){return static_cast<float>(1.f / (1.f + exp(-x)));}static void generate_proposals(const ncnn::Mat& anchors, int stride, const ncnn::Mat& in_pad, const ncnn::Mat& feat_blob, float prob_threshold, std::vector<Object>& objects){const int num_grid = feat_blob.h;int num_grid_x;int num_grid_y;if (in_pad.w > in_pad.h){num_grid_x = in_pad.w / stride;num_grid_y = num_grid / num_grid_x;}else{num_grid_y = in_pad.h / stride;num_grid_x = num_grid / num_grid_y;}const int num_class = feat_blob.w - 5;const int num_anchors = anchors.w / 2;for (int q = 0; q < num_anchors; q++){const float anchor_w = anchors[q * 2];const float anchor_h = anchors[q * 2 + 1];const ncnn::Mat feat = feat_blob.channel(q);for (int i = 0; i < num_grid_y; i++){for (int j = 0; j < num_grid_x; j++){const float* featptr = feat.row(i * num_grid_x + j);float box_confidence = sigmoid(featptr[4]);if (box_confidence >= prob_threshold){// find class index with max class scoreint class_index = 0;float class_score = -FLT_MAX;for (int k = 0; k < num_class; k++){float score = featptr[5 + k];if (score > class_score){class_index = k;class_score = score;}}float confidence = box_confidence * sigmoid(class_score);if (confidence >= prob_threshold){float dx = sigmoid(featptr[0]);float dy = sigmoid(featptr[1]);float dw = sigmoid(featptr[2]);float dh = sigmoid(featptr[3]);float pb_cx = (dx * 2.f - 0.5f + j) * stride;float pb_cy = (dy * 2.f - 0.5f + i) * stride;float pb_w = pow(dw * 2.f, 2) * anchor_w;float pb_h = pow(dh * 2.f, 2) * anchor_h;float x0 = pb_cx - pb_w * 0.5f;float y0 = pb_cy - pb_h * 0.5f;float x1 = pb_cx + pb_w * 0.5f;float y1 = pb_cy + pb_h * 0.5f;Object obj;obj.rect.x = x0;obj.rect.y = y0;obj.rect.width = x1 - x0;obj.rect.height = y1 - y0;obj.label = class_index;obj.prob = confidence;objects.push_back(obj);}}}}}}static int detect_yolov7(const cv::Mat& bgr, std::vector<Object>& objects){ncnn::Net yolov7;yolov7.opt.use_vulkan_compute = true;// yolov7.opt.use_bf16_storage = true;// original pretrained model from /WongKinYiu/yolov7// the ncnn model /nihui/ncnn-assets/tree/master/modelsyolov7.load_param("/home/ubuntu/yolov7/runs/train/exp3/weights/best.param");yolov7.load_model("/home/ubuntu/yolov7/runs/train/exp3/weights/best.bin");const int target_size = 640;const float prob_threshold = 0.25f;const float nms_threshold = 0.45f;int img_w = bgr.cols;int img_h = bgr.rows;// letterbox pad to multiple of MAX_STRIDEint w = img_w;int h = img_h;float scale = 1.f;if (w > h){scale = (float)target_size / w;w = target_size;h = h * scale;}else{scale = (float)target_size / h;h = target_size;w = w * scale;}ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, ncnn::Mat::PIXEL_BGR2RGB, img_w, img_h, w, h);int wpad = (w + MAX_STRIDE - 1) / MAX_STRIDE * MAX_STRIDE - w;int hpad = (h + MAX_STRIDE - 1) / MAX_STRIDE * MAX_STRIDE - h;ncnn::Mat in_pad;ncnn::copy_make_border(in, in_pad, hpad / 2, hpad - hpad / 2, wpad / 2, wpad - wpad / 2, ncnn::BORDER_CONSTANT, 114.f);const float norm_vals[3] = {1 / 255.f, 1 / 255.f, 1 / 255.f};in_pad.substract_mean_normalize(0, norm_vals);ncnn::Extractor ex = yolov7.create_extractor();ex.input("images", in_pad);std::vector<Object> proposals;// stride 8{ncnn::Mat out;ex.extract("output", out);ncnn::Mat anchors(6);anchors[0] = 12.f;anchors[1] = 16.f;anchors[2] = 19.f;anchors[3] = 36.f;anchors[4] = 40.f;anchors[5] = 28.f;std::vector<Object> objects8;generate_proposals(anchors, 8, in_pad, out, prob_threshold, objects8);proposals.insert(proposals.end(), objects8.begin(), objects8.end());}// stride 16{ncnn::Mat out;ex.extract("534", out);ncnn::Mat anchors(6);anchors[0] = 36.f;anchors[1] = 75.f;anchors[2] = 76.f;anchors[3] = 55.f;anchors[4] = 72.f;anchors[5] = 146.f;std::vector<Object> objects16;generate_proposals(anchors, 16, in_pad, out, prob_threshold, objects16);proposals.insert(proposals.end(), objects16.begin(), objects16.end());}// stride 32{ncnn::Mat out;ex.extract("554", out);ncnn::Mat anchors(6);anchors[0] = 142.f;anchors[1] = 110.f;anchors[2] = 192.f;anchors[3] = 243.f;anchors[4] = 459.f;anchors[5] = 401.f;std::vector<Object> objects32;generate_proposals(anchors, 32, in_pad, out, prob_threshold, objects32);proposals.insert(proposals.end(), objects32.begin(), objects32.end());}// sort all proposals by score from highest to lowestqsort_descent_inplace(proposals);// apply nms with nms_thresholdstd::vector<int> picked;nms_sorted_bboxes(proposals, picked, nms_threshold);int count = picked.size();objects.resize(count);for (int i = 0; i < count; i++){objects[i] = proposals[picked[i]];// adjust offset to original unpaddedfloat x0 = (objects[i].rect.x - (wpad / 2)) / scale;float y0 = (objects[i].rect.y - (hpad / 2)) / scale;float x1 = (objects[i].rect.x + objects[i].rect.width - (wpad / 2)) / scale;float y1 = (objects[i].rect.y + objects[i].rect.height - (hpad / 2)) / scale;// clipx0 = std::max(std::min(x0, (float)(img_w - 1)), 0.f);y0 = std::max(std::min(y0, (float)(img_h - 1)), 0.f);x1 = std::max(std::min(x1, (float)(img_w - 1)), 0.f);y1 = std::max(std::min(y1, (float)(img_h - 1)), 0.f);objects[i].rect.x = x0;objects[i].rect.y = y0;objects[i].rect.width = x1 - x0;objects[i].rect.height = y1 - y0;}return 0;}static void draw_objects(const cv::Mat& bgr, const std::vector<Object>& objects){static const char* class_names[] = {"person"};static const unsigned char colors[19][3] = {{54, 67, 244},{99, 30, 233},{176, 39, 156},{183, 58, 103},{181, 81, 63},{243, 150, 33},{244, 169, 3},{212, 188, 0},{136, 150, 0},{80, 175, 76},{74, 195, 139},{57, 220, 205},{59, 235, 255},{7, 193, 255},{0, 152, 255},{34, 87, 255},{72, 85, 121},{158, 158, 158},{139, 125, 96}};int color_index = 0;cv::Mat image = bgr.clone();for (size_t i = 0; i < objects.size(); i++){const Object& obj = objects[i];const unsigned char* color = colors[color_index % 19];color_index++;cv::Scalar cc(color[0], color[1], color[2]);fprintf(stderr, "%d = %.5f at %.2f %.2f %.2f x %.2f\n", obj.label, obj.prob,obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height);cv::rectangle(image, obj.rect, cc, 2);char text[256];sprintf(text, "%s %.1f%%", class_names[obj.label], obj.prob * 100);int baseLine = 0;cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);int x = obj.rect.x;int y = obj.rect.y - label_size.height - baseLine;if (y < 0)y = 0;if (x + label_size.width > image.cols)x = image.cols - label_size.width;cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)),cc, -1);cv::putText(image, text, cv::Point(x, y + label_size.height),cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(255, 255, 255));}cv::imshow("image", image);cv::waitKey(0);}int main(int argc, char** argv){cv::Mat m = cv::imread("/home/ubuntu/yolov7/inference/images/bus.jpg");if (m.empty()){return -1;}std::vector<Object> objects;detect_yolov7(m, objects);draw_objects(m, objects);return 0;}

测试结果

八、mnn测试结果

ubuntu@ubuntu:~/MNN/build$ ./MNNConvert -f ONNX --modelFile /home/ubuntu/yolov7/runs/train/exp3/weights/best.onnx --MNNModel /home/ubuntu/yolov7/runs/train/exp3/weights/best.mnn --bizCode MNNStart to Convert Other Model Format To MNN Model...[10:36:41] /home/ubuntu/MNN/tools/converter/source/onnx/onnxConverter.cpp:40: ONNX Model ir version: 6Start to Optimize the MNN Net...inputTensors : [ images, ]outputTensors: [ 534, 554, output, ]Converted Success!

cmakelists.txt

cmake_minimum_required(VERSION 3.16)project(untitled22)set(CMAKE_CXX_FLAGS "-std=c++11")set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fopenmp ")set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp")set(CMAKE_CXX_STANDARD 11)include_directories(${CMAKE_SOURCE_DIR})include_directories(${CMAKE_SOURCE_DIR}/include)include_directories(${CMAKE_SOURCE_DIR}/include/MNN)find_package(OpenCV REQUIRED)#message(STATUS ${OpenCV_INCLUDE_DIRS})#添加头文件include_directories(${OpenCV_INCLUDE_DIRS})#链接Opencv库add_library(libmnn SHARED IMPORTED)set_target_properties(libmnn PROPERTIES IMPORTED_LOCATION ${CMAKE_SOURCE_DIR}/lib/libMNN.so)add_executable(untitled22 main.cpp)target_link_libraries(untitled22 ${OpenCV_LIBS} libmnn )

main.cpp

#include <iostream>#include <algorithm>#include <vector>#include <opencv2/core/core.hpp>#include <opencv2/highgui/highgui.hpp>#include <opencv2/imgproc.hpp>#include <opencv2/opencv.hpp>#include<MNN/Interpreter.hpp>#include<MNN/ImageProcess.hpp>using namespace std;using namespace cv;typedef struct {int width;int height;} YoloSize;typedef struct {std::string name;int stride;std::vector<YoloSize> anchors;} YoloLayerData;class BoxInfo{public:int x1,y1,x2,y2,label,id;float score;};static inline float sigmoid(float x){return static_cast<float>(1.f / (1.f + exp(-x)));}double GetIOU(cv::Rect_<float> bb_test, cv::Rect_<float> bb_gt){float in = (bb_test & bb_gt).area();float un = bb_test.area() + bb_gt.area() - in;if (un < DBL_EPSILON)return 0;return (double)(in / un);}std::vector<BoxInfo> decode_infer(MNN::Tensor & data, int stride, int net_size, int num_classes,const std::vector<YoloSize> &anchors, float threshold){std::vector<BoxInfo> result;int batchs, channels, height, width, pred_item ;batchs = data.shape()[0];channels = data.shape()[1];height = data.shape()[2];width = data.shape()[3];pred_item = data.shape()[4];auto data_ptr = data.host<float>();for(int bi=0; bi<batchs; bi++){auto batch_ptr = data_ptr + bi*(channels*height*width*pred_item);for(int ci=0; ci<channels; ci++){auto channel_ptr = batch_ptr + ci*(height*width*pred_item);for(int hi=0; hi<height; hi++){auto height_ptr = channel_ptr + hi*(width * pred_item);for(int wi=0; wi<width; wi++){auto width_ptr = height_ptr + wi*pred_item;auto cls_ptr = width_ptr + 5;auto confidence = sigmoid(width_ptr[4]);for(int cls_id=0; cls_id<num_classes; cls_id++){float score = sigmoid(cls_ptr[cls_id]) * confidence;if(score > threshold){float cx = (sigmoid(width_ptr[0]) * 2.f - 0.5f + wi) * (float) stride;float cy = (sigmoid(width_ptr[1]) * 2.f - 0.5f + hi) * (float) stride;float w = pow(sigmoid(width_ptr[2]) * 2.f, 2) * anchors[ci].width;float h = pow(sigmoid(width_ptr[3]) * 2.f, 2) * anchors[ci].height;BoxInfo box;box.x1 = std::max(0, std::min(net_size, int((cx - w / 2.f) )));box.y1 = std::max(0, std::min(net_size, int((cy - h / 2.f) )));box.x2 = std::max(0, std::min(net_size, int((cx + w / 2.f) )));box.y2 = std::max(0, std::min(net_size, int((cy + h / 2.f) )));box.score = score;box.label = cls_id;result.push_back(box);}}}}}}return result;}void nms(std::vector<BoxInfo> &input_boxes, float NMS_THRESH) {std::sort(input_boxes.begin(), input_boxes.end(), [](BoxInfo a, BoxInfo b) { return a.score > b.score; });std::vector<float> vArea(input_boxes.size());for (int i = 0; i < int(input_boxes.size()); ++i) {vArea[i] = (input_boxes.at(i).x2 - input_boxes.at(i).x1 + 1)* (input_boxes.at(i).y2 - input_boxes.at(i).y1 + 1);}for (int i = 0; i < int(input_boxes.size()); ++i) {for (int j = i + 1; j < int(input_boxes.size());) {float xx1 = std::max(input_boxes[i].x1, input_boxes[j].x1);float yy1 = std::max(input_boxes[i].y1, input_boxes[j].y1);float xx2 = std::min(input_boxes[i].x2, input_boxes[j].x2);float yy2 = std::min(input_boxes[i].y2, input_boxes[j].y2);float w = std::max(float(0), xx2 - xx1 + 1);float h = std::max(float(0), yy2 - yy1 + 1);float inter = w * h;float ovr = inter / (vArea[i] + vArea[j] - inter);if (ovr >= NMS_THRESH) {input_boxes.erase(input_boxes.begin() + j);vArea.erase(vArea.begin() + j);} else {j++;}}}}void scale_coords(std::vector<BoxInfo> &boxes, int w_from, int h_from, int w_to, int h_to){float w_ratio = float(w_to)/float(w_from);float h_ratio = float(h_to)/float(h_from);for(auto &box: boxes){box.x1 *= w_ratio;box.x2 *= w_ratio;box.y1 *= h_ratio;box.y2 *= h_ratio;}return ;}cv::Mat draw_box(cv::Mat & cv_mat, std::vector<BoxInfo> &boxes, const std::vector<std::string> &labels,unsigned char colors[][3]){for(auto box : boxes){int width = box.x2-box.x1;int height = box.y2-box.y1;cv::Point p = cv::Point(box.x1, box.y1);cv::Rect rect = cv::Rect(box.x1, box.y1, width, height);cv::rectangle(cv_mat, rect, cv::Scalar(colors[box.label][0],colors[box.label][1],colors[box.label][2]));string text = labels[box.label] + ":" + std::to_string(box.score) ;cv::putText(cv_mat, text, p, cv::FONT_HERSHEY_PLAIN, 1, cv::Scalar(colors[box.label][0],colors[box.label][1],colors[box.label][2]));}return cv_mat;}int main(int argc, char **argv) {std::vector<std::string> labels = {"person"};unsigned char colors[][3] = {{255, 0, 0}};cv::Mat bgr = cv::imread("/home/ubuntu/yolov7/inference/images/bus.jpg");;// 预处理和源码不太一样，所以影响了后面的int target_size = 640;cv::Mat resize_img;cv::resize(bgr, resize_img, cv::Size(target_size, target_size));float cls_threshold = 0.25;// MNN inferenceauto mnnNet = std::shared_ptr<MNN::Interpreter>(MNN::Interpreter::createFromFile("/home/ubuntu/yolov7/runs/train/exp3/weights/best.mnn"));auto t1 = std::chrono::steady_clock::now();MNN::ScheduleConfig netConfig;netConfig.type = MNN_FORWARD_CPU;netConfig.numThread = 4;auto session = mnnNet->createSession(netConfig);auto input = mnnNet->getSessionInput(session, nullptr);mnnNet->resizeTensor(input, {1, 3, (int) target_size, (int) target_size});mnnNet->resizeSession(session);MNN::CV::ImageProcess::Config config;const float mean_vals[3] = {0, 0, 0};const float norm_255[3] = {1.f / 255, 1.f / 255.f, 1.f / 255};std::shared_ptr<MNN::CV::ImageProcess> pretreat(MNN::CV::ImageProcess::create(MNN::CV::BGR, MNN::CV::RGB, mean_vals, 3,norm_255, 3));pretreat->convert(resize_img.data, (int) target_size, (int) target_size, resize_img.step[0], input);mnnNet->runSession(session);std::vector<YoloLayerData> yolov7_layers{{"554", 32, {{142, 110}, {192, 243}, {459, 401}}},{"534", 16, {{36, 75}, {76, 55}, {72, 146}}},{"output", 8, {{12, 16}, {19, 36}, {40, 28}}},};auto output = mnnNet->getSessionOutput(session, yolov7_layers[2].name.c_str());MNN::Tensor outputHost(output, output->getDimensionType());output->copyToHostTensor(&outputHost);//毫秒级std::vector<float> vec_scores;std::vector<float> vec_new_scores;std::vector<int> vec_labels;int outputHost_shape_c = outputHost.channel();int outputHost_shape_d = outputHost.dimensions();int outputHost_shape_w = outputHost.width();int outputHost_shape_h = outputHost.height();printf("shape_d=%d shape_c=%d shape_h=%d shape_w=%d outputHost.elementSize()=%d\n", outputHost_shape_d,outputHost_shape_c, outputHost_shape_h, outputHost_shape_w, outputHost.elementSize());auto yolov7_534 = mnnNet->getSessionOutput(session, yolov7_layers[1].name.c_str());MNN::Tensor output_534_Host(yolov7_534, yolov7_534->getDimensionType());yolov7_534->copyToHostTensor(&output_534_Host);outputHost_shape_c = output_534_Host.channel();outputHost_shape_d = output_534_Host.dimensions();outputHost_shape_w = output_534_Host.width();outputHost_shape_h = output_534_Host.height();printf("shape_d=%d shape_c=%d shape_h=%d shape_w=%d output_534_Host.elementSize()=%d\n", outputHost_shape_d,outputHost_shape_c, outputHost_shape_h, outputHost_shape_w, output_534_Host.elementSize());auto yolov7_554 = mnnNet->getSessionOutput(session, yolov7_layers[0].name.c_str());MNN::Tensor output_544_Host(yolov7_554, yolov7_554->getDimensionType());yolov7_554->copyToHostTensor(&output_544_Host);outputHost_shape_c = output_544_Host.channel();outputHost_shape_d = output_544_Host.dimensions();outputHost_shape_w = output_544_Host.width();outputHost_shape_h = output_544_Host.height();printf("shape_d=%d shape_c=%d shape_h=%d shape_w=%d output_544_Host.elementSize()=%d\n", outputHost_shape_d,outputHost_shape_c, outputHost_shape_h, outputHost_shape_w, output_544_Host.elementSize());std::vector<YoloLayerData> & layers = yolov7_layers;std::vector<BoxInfo> result;std::vector<BoxInfo> boxes;float threshold = 0.3;float nms_threshold = 0.7;boxes = decode_infer(outputHost, layers[2].stride, target_size, labels.size(), layers[2].anchors, threshold);result.insert(result.begin(), boxes.begin(), boxes.end());boxes = decode_infer(output_534_Host, layers[1].stride, target_size, labels.size(), layers[1].anchors, threshold);result.insert(result.begin(), boxes.begin(), boxes.end());boxes = decode_infer(output_544_Host, layers[0].stride, target_size, labels.size(), layers[0].anchors, threshold);result.insert(result.begin(), boxes.begin(), boxes.end());nms(result, nms_threshold);scale_coords(result, target_size, target_size, bgr.cols, bgr.rows);cv::Mat frame_show = draw_box(bgr, result, labels,colors);cv::imshow("out",bgr);cv::imwrite("dp.jpg",bgr);cv::waitKey(0);mnnNet->releaseModel();mnnNet->releaseSession(session);return 0;}

测试结果

九、rk3399 pro

转模型

from rknn.api import RKNNONNX_MODEL = '/home/ubuntu/yolov7/runs/train/exp3/weights/best.onnx'RKNN_MODEL = '/home/ubuntu/yolov7/runs/train/exp3/weights/best.rknn'if __name__ == '__main__':# Create RKNN objectrknn = RKNN(verbose=True)# pre-process configprint('--> config model')rknn.config(mean_values=[[0, 0, 0]], std_values=[[255, 255, 255]], reorder_channel='0 1 2',target_platform='rk3399pro',quantized_dtype='asymmetric_affine-u8', optimization_level=3, output_optimize=1)print('done')print('--> Loading model')ret = rknn.load_onnx(model=ONNX_MODEL)if ret != 0:print('Load model failed!')exit(ret)print('done')# Build modelprint('--> Building model')ret = rknn.build(do_quantization=True, dataset='dataset.txt') # ,pre_compile=Trueif ret != 0:print('Build yolov5s failed!')exit(ret)print('done')# Export rknn modelprint('--> Export RKNN model')ret = rknn.export_rknn(RKNN_MODEL)if ret != 0:print('Export yolov5s.rknn failed!')exit(ret)print('done')rknn.release()

py测试

import osimport numpy as npimport cv2from rknn.api import RKNNONNX_MODEL = '/home/ubuntu/yolov7/runs/train/exp3/weights/best.onnx'RKNN_MODEL = '/home/ubuntu/yolov7/runs/train/exp3/weights/best.rknn'IMG_PATH = './bus.jpg'DATASET = './dataset.txt'QUANTIZE_ON = TrueBOX_THRESH = 0.5NMS_THRESH = 0.6IMG_SIZE = 640CLASSES = ("person")def sigmoid(x):return 1 / (1 + np.exp(-x))def xywh2xyxy(x):# Convert [x, y, w, h] to [x1, y1, x2, y2]y = np.copy(x)y[:, 0] = x[:, 0] - x[:, 2] / 2 # top left xy[:, 1] = x[:, 1] - x[:, 3] / 2 # top left yy[:, 2] = x[:, 0] + x[:, 2] / 2 # bottom right xy[:, 3] = x[:, 1] + x[:, 3] / 2 # bottom right yreturn ydef process(input, mask, anchors):anchors = [anchors[i] for i in mask]grid_h, grid_w = map(int, input.shape[0:2])box_confidence = sigmoid(input[..., 4])box_confidence = np.expand_dims(box_confidence, axis=-1)box_class_probs = sigmoid(input[..., 5:])box_xy = sigmoid(input[..., :2])*2 - 0.5col = np.tile(np.arange(0, grid_w), grid_w).reshape(-1, grid_w)row = np.tile(np.arange(0, grid_h).reshape(-1, 1), grid_h)col = col.reshape(grid_h, grid_w, 1, 1).repeat(3, axis=-2)row = row.reshape(grid_h, grid_w, 1, 1).repeat(3, axis=-2)grid = np.concatenate((col, row), axis=-1)box_xy += gridbox_xy *= int(IMG_SIZE/grid_h)box_wh = pow(sigmoid(input[..., 2:4])*2, 2)box_wh = box_wh * anchorsbox = np.concatenate((box_xy, box_wh), axis=-1)return box, box_confidence, box_class_probsdef filter_boxes(boxes, box_confidences, box_class_probs):"""Filter boxes with box threshold. It's a bit different with origin yolov5 post process!# Argumentsboxes: ndarray, boxes of objects.box_confidences: ndarray, confidences of objects.box_class_probs: ndarray, class_probs of objects.# Returnsboxes: ndarray, filtered boxes.classes: ndarray, classes for boxes.scores: ndarray, scores for boxes."""box_classes = np.argmax(box_class_probs, axis=-1)box_class_scores = np.max(box_class_probs, axis=-1)pos = np.where(box_confidences[...,0] >= BOX_THRESH)boxes = boxes[pos]classes = box_classes[pos]scores = box_class_scores[pos]return boxes, classes, scoresdef nms_boxes(boxes, scores):"""Suppress non-maximal boxes.# Argumentsboxes: ndarray, boxes of objects.scores: ndarray, scores of objects.# Returnskeep: ndarray, index of effective boxes."""x = boxes[:, 0]y = boxes[:, 1]w = boxes[:, 2] - boxes[:, 0]h = boxes[:, 3] - boxes[:, 1]areas = w * horder = scores.argsort()[::-1]keep = []while order.size > 0:i = order[0]keep.append(i)xx1 = np.maximum(x[i], x[order[1:]])yy1 = np.maximum(y[i], y[order[1:]])xx2 = np.minimum(x[i] + w[i], x[order[1:]] + w[order[1:]])yy2 = np.minimum(y[i] + h[i], y[order[1:]] + h[order[1:]])w1 = np.maximum(0.0, xx2 - xx1 + 0.00001)h1 = np.maximum(0.0, yy2 - yy1 + 0.00001)inter = w1 * h1ovr = inter / (areas[i] + areas[order[1:]] - inter)inds = np.where(ovr <= NMS_THRESH)[0]order = order[inds + 1]keep = np.array(keep)return keepdef yolov7_post_process(input_data):masks = [[0, 1, 2], [3, 4, 5], [6, 7, 8]]anchors = [[12,16], [ 19,36], [40,28],[36,75], [76,55],[72,146],[142,110], [192,243], [459,401]]boxes, classes, scores = [], [], []for input,mask in zip(input_data, masks):b, c, s = process(input, mask, anchors)b, c, s = filter_boxes(b, c, s)boxes.append(b)classes.append(c)scores.append(s)boxes = np.concatenate(boxes)boxes = xywh2xyxy(boxes)classes = np.concatenate(classes)scores = np.concatenate(scores)nboxes, nclasses, nscores = [], [], []for c in set(classes):inds = np.where(classes == c)b = boxes[inds]c = classes[inds]s = scores[inds]keep = nms_boxes(b, s)nboxes.append(b[keep])nclasses.append(c[keep])nscores.append(s[keep])if not nclasses and not nscores:return None, None, Noneboxes = np.concatenate(nboxes)classes = np.concatenate(nclasses)scores = np.concatenate(nscores)return boxes, classes, scoresdef draw(image, boxes, scores, classes):"""Draw the boxes on the image.# Argument:image: original image.boxes: ndarray, boxes of objects.classes: ndarray, classes of objects.scores: ndarray, scores of objects.all_classes: all classes name."""for box, score, cl in zip(boxes, scores, classes):top, left, right, bottom = boxprint('class: {}, score: {}'.format(CLASSES[cl], score))print('box coordinate left,top,right,down: [{}, {}, {}, {}]'.format(top, left, right, bottom))top = int(top)left = int(left)right = int(right)bottom = int(bottom)cv2.rectangle(image, (top, left), (right, bottom), (255, 0, 0), 2)cv2.putText(image, '{0} {1:.2f}'.format(CLASSES[cl], score),(top, left - 6),cv2.FONT_HERSHEY_SIMPLEX,0.6, (0, 0, 255), 2)def letterbox(im, new_shape=(640, 640), color=(0, 0, 0)):# Resize and pad image while meeting stride-multiple constraintsshape = im.shape[:2] # current shape [height, width]if isinstance(new_shape, int):new_shape = (new_shape, new_shape)# Scale ratio (new / old)r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])# Compute paddingratio = r, r # width, height ratiosnew_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh paddingdw /= 2 # divide padding into 2 sidesdh /= 2if shape[::-1] != new_unpad: # resizeim = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))left, right = int(round(dw - 0.1)), int(round(dw + 0.1))im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add borderreturn im, ratio, (dw, dh)if __name__ == '__main__':# Create RKNN objectrknn = RKNN()if not os.path.exists(RKNN_MODEL):print("model not exist")exit(-1)# Load ONNX modelprint("--> Loading model")ret = rknn.load_rknn(RKNN_MODEL)if ret != 0:print("Load rknn model failed!")exit(ret)print("done")# init runtime environmentprint('--> Init runtime environment')ret = rknn.init_runtime()# ret = rknn.init_runtime('rk1808', device_id='1808')if ret != 0:print('Init runtime environment failed')exit(ret)print('done')# Set inputsimg = cv2.imread(IMG_PATH)# img, ratio, (dw, dh) = letterbox(img, new_shape=(IMG_SIZE, IMG_SIZE))img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)img = cv2.resize(img,(IMG_SIZE, IMG_SIZE))# Inferenceprint('--> Running model')outputs = rknn.inference(inputs=[img])# post processinput0_data = outputs[0]input1_data = outputs[1]input2_data = outputs[2]input0_data=np.transpose(input0_data, (0, 1, 4, 2,3))input1_data=np.transpose(input1_data, (0, 1, 4, 2,3))input2_data =np.transpose(input2_data, (0, 1, 4, 2,3))input0_data = input0_data.reshape([3,-1]+list(input0_data.shape[-2:]))input1_data = input1_data.reshape([3,-1]+list(input1_data.shape[-2:]))input2_data = input2_data.reshape([3,-1]+list(input2_data.shape[-2:]))input_data = list()input_data.append(np.transpose(input0_data, (2, 3, 0, 1)))input_data.append(np.transpose(input1_data, (2, 3, 0, 1)))input_data.append(np.transpose(input2_data, (2, 3, 0, 1)))boxes, classes, scores = yolov7_post_process(input_data)img_1 = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)if boxes is not None:draw(img_1, boxes, scores, classes)cv2.imshow("post process result", img_1)cv2.imwrite("post.jpg",img_1)cv2.waitKeyEx(0)rknn.release()

测试结果

惊奇使用netron发现

在reshape之后，就是１×３×６×２０×２０，那其实可以使用这样写，就不用修改官方的demo模型了

这里有三种方案，第一种方式直接取shape的节点编码，缺点是python可以，第二种方式修改pytorch源代码去掉生成transpose,第三种方式是修改onnx直接remove后面两个节点

也可以这样搞

from rknn.api import RKNNONNX_MODEL = '/home/ubuntu/yolov7/runs/train/exp3/weights/best.onnx'RKNN_MODEL = '/home/ubuntu/yolov7/runs/train/exp3/weights/best_remove_transpose.rknn'if __name__ == '__main__':# Create RKNN objectrknn = RKNN(verbose=True)# pre-process configprint('--> config model')rknn.config(mean_values=[[0, 0, 0]], std_values=[[255, 255, 255]], reorder_channel='0 1 2',target_platform='rk3399pro',quantized_dtype='asymmetric_affine-u8', optimization_level=3, output_optimize=1)print('done')print('--> Loading model')ret = rknn.load_onnx(model=ONNX_MODEL,outputs=[513,533,553])if ret != 0:print('Load model failed!')exit(ret)print('done')# Build modelprint('--> Building model')ret = rknn.build(do_quantization=True, dataset='dataset.txt') # ,pre_compile=Trueif ret != 0:print('Build yolov5s failed!')exit(ret)print('done')# Export rknn modelprint('--> Export RKNN model')ret = rknn.export_rknn(RKNN_MODEL)if ret != 0:print('Export yolov5s.rknn failed!')exit(ret)print('done')rknn.release()

测试就直接套用yolov5的测试代码

import osimport numpy as npimport cv2from rknn.api import RKNNONNX_MODEL = '/home/ubuntu/yolov7/runs/train/exp3/weights/best.onnx'RKNN_MODEL = '/home/ubuntu/yolov7/runs/train/exp3/weights/best_remove_transpose.rknn'IMG_PATH = './bus.jpg'DATASET = './dataset.txt'QUANTIZE_ON = TrueBOX_THRESH = 0.5NMS_THRESH = 0.6IMG_SIZE = 640CLASSES = ("person")def sigmoid(x):return 1 / (1 + np.exp(-x))def xywh2xyxy(x):# Convert [x, y, w, h] to [x1, y1, x2, y2]y = np.copy(x)y[:, 0] = x[:, 0] - x[:, 2] / 2 # top left xy[:, 1] = x[:, 1] - x[:, 3] / 2 # top left yy[:, 2] = x[:, 0] + x[:, 2] / 2 # bottom right xy[:, 3] = x[:, 1] + x[:, 3] / 2 # bottom right yreturn ydef process(input, mask, anchors):anchors = [anchors[i] for i in mask]grid_h, grid_w = map(int, input.shape[0:2])box_confidence = sigmoid(input[..., 4])box_confidence = np.expand_dims(box_confidence, axis=-1)box_class_probs = sigmoid(input[..., 5:])box_xy = sigmoid(input[..., :2])*2 - 0.5col = np.tile(np.arange(0, grid_w), grid_w).reshape(-1, grid_w)row = np.tile(np.arange(0, grid_h).reshape(-1, 1), grid_h)col = col.reshape(grid_h, grid_w, 1, 1).repeat(3, axis=-2)row = row.reshape(grid_h, grid_w, 1, 1).repeat(3, axis=-2)grid = np.concatenate((col, row), axis=-1)box_xy += gridbox_xy *= int(IMG_SIZE/grid_h)box_wh = pow(sigmoid(input[..., 2:4])*2, 2)box_wh = box_wh * anchorsbox = np.concatenate((box_xy, box_wh), axis=-1)return box, box_confidence, box_class_probsdef filter_boxes(boxes, box_confidences, box_class_probs):"""Filter boxes with box threshold. It's a bit different with origin yolov5 post process!# Argumentsboxes: ndarray, boxes of objects.box_confidences: ndarray, confidences of objects.box_class_probs: ndarray, class_probs of objects.# Returnsboxes: ndarray, filtered boxes.classes: ndarray, classes for boxes.scores: ndarray, scores for boxes."""box_classes = np.argmax(box_class_probs, axis=-1)box_class_scores = np.max(box_class_probs, axis=-1)pos = np.where(box_confidences[...,0] >= BOX_THRESH)boxes = boxes[pos]classes = box_classes[pos]scores = box_class_scores[pos]return boxes, classes, scoresdef nms_boxes(boxes, scores):"""Suppress non-maximal boxes.# Argumentsboxes: ndarray, boxes of objects.scores: ndarray, scores of objects.# Returnskeep: ndarray, index of effective boxes."""x = boxes[:, 0]y = boxes[:, 1]w = boxes[:, 2] - boxes[:, 0]h = boxes[:, 3] - boxes[:, 1]areas = w * horder = scores.argsort()[::-1]keep = []while order.size > 0:i = order[0]keep.append(i)xx1 = np.maximum(x[i], x[order[1:]])yy1 = np.maximum(y[i], y[order[1:]])xx2 = np.minimum(x[i] + w[i], x[order[1:]] + w[order[1:]])yy2 = np.minimum(y[i] + h[i], y[order[1:]] + h[order[1:]])w1 = np.maximum(0.0, xx2 - xx1 + 0.00001)h1 = np.maximum(0.0, yy2 - yy1 + 0.00001)inter = w1 * h1ovr = inter / (areas[i] + areas[order[1:]] - inter)inds = np.where(ovr <= NMS_THRESH)[0]order = order[inds + 1]keep = np.array(keep)return keepdef yolov7_post_process(input_data):masks = [[0, 1, 2], [3, 4, 5], [6, 7, 8]]anchors = [[12,16], [ 19,36], [40,28],[36,75], [76,55],[72,146],[142,110], [192,243], [459,401]]boxes, classes, scores = [], [], []for input,mask in zip(input_data, masks):b, c, s = process(input, mask, anchors)b, c, s = filter_boxes(b, c, s)boxes.append(b)classes.append(c)scores.append(s)boxes = np.concatenate(boxes)boxes = xywh2xyxy(boxes)classes = np.concatenate(classes)scores = np.concatenate(scores)nboxes, nclasses, nscores = [], [], []for c in set(classes):inds = np.where(classes == c)b = boxes[inds]c = classes[inds]s = scores[inds]keep = nms_boxes(b, s)nboxes.append(b[keep])nclasses.append(c[keep])nscores.append(s[keep])if not nclasses and not nscores:return None, None, Noneboxes = np.concatenate(nboxes)classes = np.concatenate(nclasses)scores = np.concatenate(nscores)return boxes, classes, scoresdef draw(image, boxes, scores, classes):"""Draw the boxes on the image.# Argument:image: original image.boxes: ndarray, boxes of objects.classes: ndarray, classes of objects.scores: ndarray, scores of objects.all_classes: all classes name."""for box, score, cl in zip(boxes, scores, classes):top, left, right, bottom = boxprint('class: {}, score: {}'.format(CLASSES[cl], score))print('box coordinate left,top,right,down: [{}, {}, {}, {}]'.format(top, left, right, bottom))top = int(top)left = int(left)right = int(right)bottom = int(bottom)cv2.rectangle(image, (top, left), (right, bottom), (255, 0, 0), 2)cv2.putText(image, '{0} {1:.2f}'.format(CLASSES[cl], score),(top, left - 6),cv2.FONT_HERSHEY_SIMPLEX,0.6, (0, 0, 255), 2)def letterbox(im, new_shape=(640, 640), color=(0, 0, 0)):# Resize and pad image while meeting stride-multiple constraintsshape = im.shape[:2] # current shape [height, width]if isinstance(new_shape, int):new_shape = (new_shape, new_shape)# Scale ratio (new / old)r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])# Compute paddingratio = r, r # width, height ratiosnew_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh paddingdw /= 2 # divide padding into 2 sidesdh /= 2if shape[::-1] != new_unpad: # resizeim = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))left, right = int(round(dw - 0.1)), int(round(dw + 0.1))im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add borderreturn im, ratio, (dw, dh)if __name__ == '__main__':# Create RKNN objectrknn = RKNN()if not os.path.exists(RKNN_MODEL):print("model not exist")exit(-1)# Load ONNX modelprint("--> Loading model")ret = rknn.load_rknn(RKNN_MODEL)if ret != 0:print("Load rknn model failed!")exit(ret)print("done")# init runtime environmentprint('--> Init runtime environment')ret = rknn.init_runtime()# ret = rknn.init_runtime('rk1808', device_id='1808')if ret != 0:print('Init runtime environment failed')exit(ret)print('done')# Set inputsimg = cv2.imread(IMG_PATH)# img, ratio, (dw, dh) = letterbox(img, new_shape=(IMG_SIZE, IMG_SIZE))img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)img = cv2.resize(img,(IMG_SIZE, IMG_SIZE))# Inferenceprint('--> Running model')outputs = rknn.inference(inputs=[img])# post processinput0_data = outputs[0]input1_data = outputs[1]input2_data = outputs[2]# input0_data=np.transpose(input0_data, (0, 1, 4, 2,3))# input1_data=np.transpose(input1_data, (0, 1, 4, 2,3))# input2_data =np.transpose(input2_data, (0, 1, 4, 2,3))input0_data = input0_data.reshape([3,-1]+list(input0_data.shape[-2:]))input1_data = input1_data.reshape([3,-1]+list(input1_data.shape[-2:]))input2_data = input2_data.reshape([3,-1]+list(input2_data.shape[-2:]))print(input0_data.shape)print(input1_data.shape)print(input2_data.shape)input_data = list()input_data.append(np.transpose(input0_data, (2, 3, 0, 1)))input_data.append(np.transpose(input1_data, (2, 3, 0, 1)))input_data.append(np.transpose(input2_data, (2, 3, 0, 1)))print(np.transpose(input0_data, (2, 3, 0, 1)).shape)print(np.transpose(input1_data, (2, 3, 0, 1)).shape)print(np.transpose(input2_data, (2, 3, 0, 1)).shape)boxes, classes, scores = yolov7_post_process(input_data)img_1 = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)if boxes is not None:draw(img_1, boxes, scores, classes)cv2.imshow("post process result", img_1)cv2.waitKeyEx(0)rknn.release()

结果是一样的

c++测试 rk3399 pro＆rv1126,测试以rk3399 pro为准，硬件我实际用rv1126

测试硬件开发板rv1126摄像头开发板，在它的基础上进行开发，公司所属深圳思核科技有限公司

第一步：先通过adb进行系统，修改ip地址，主要因为我的ip地址网段是192.168.10.*我使用命令怎么也没搜到它，哎，看手册可以先找到ip在修改，所以这么搞

使用python同一网段ip失败，所以的用adb命令搜索

import platformimport sysimport osimport timeimport threadinglive_ip = 0def get_os():os = platform.system()if os == "Windows":return "n"else:return "c"def ping_ip(ip_str):cmd = ["ping", "-{op}".format(op=get_os()),"1", ip_str]output = os.popen(" ".join(cmd)).readlines()for line in output:if str(line).upper().find("TTL") >= 0:print("ip: %s is ok ***" % ip_str)global live_iplive_ip += 1breakdef find_ip(ip_prefix):'''''给出当前的127.0.0 ，然后扫描整个段所有地址'''threads = []for i in range(1, 256):ip = '%s.%s' % (ip_prefix, i)threads.append(threading.Thread(target=ping_ip, args={ip, }))for i in threads:i.start()for i in threads:i.join()if __name__ == "__main__":print("start time %s" % time.ctime())cmd_args = sys.argv[1:]args = "".join(cmd_args)ip_pre = "192.168.10"find_ip(ip_pre)print("end time %s" % time.ctime())print('本次扫描共检测到本网络存在%s台设备' % live_ip)

使用adb命令进入系统

C:\Users\Administrator>f:F:\>adb.exeAndroid Debug Bridge version 1.0.41Version 29.0.5-5949299Installed as F:\adb.exeglobal options:-a listen on all network interfaces, not just localhost-d use USB device (error if multiple devices connected)-e use TCP/IP device (error if multiple TCP/IP devices available)-s SERIAL use device with given serial (overrides $ANDROID_SERIAL)-t IDuse device with given transport id-H name of adb server host [default=localhost]-P port of adb server [default=5037]-L SOCKET listen on given socket for adb server [default=tcp:localhost:5037]F:\>adb shell* daemon not running; starting now at tcp:5037* daemon started successfully[root@owlvtech:/]# ifconfigeth0Link encap:Ethernet HWaddr F6:F9:51:F0:5B:C7inet addr:192.168.1.100 Bcast:192.168.1.255 Mask:255.255.255.0UP BROADCAST RUNNING MULTICAST MTU:1500 Metric:1RX packets:3289 errors:0 dropped:0 overruns:0 frame:0TX packets:117 errors:0 dropped:0 overruns:0 carrier:0collisions:0 txqueuelen:1000RX bytes:247207 (241.4 KiB) TX bytes:10048 (9.8 KiB)Interrupt:61lo Link encap:Local Loopbackinet addr:127.0.0.1 Mask:255.0.0.0UP LOOPBACK RUNNING MTU:65536 Metric:1RX packets:57 errors:0 dropped:0 overruns:0 frame:0TX packets:57 errors:0 dropped:0 overruns:0 carrier:0collisions:0 txqueuelen:1000RX bytes:5472 (5.3 KiB) TX bytes:5472 (5.3 KiB)[root@owlvtech:/]# df -hFilesystemSize Used Avail Use% Mounted on/dev/root 944M 169M 717M 20% /devtmpfs 239M0 239M 0% /devtmpfs 240M0 240M 0% /dev/shmtmpfs 240M 312K 239M 1% /tmptmpfs 240M 268K 239M 1% /run/dev/mmcblk0p7 183M 138M 33M 82% /oem/dev/mmcblk0p8 1008M 3.0M 1005M 1% /userdata/dev/block/by-name/media 5.0G 11M 4.7G 1% /userdata/media[root@owlvtech:/]#

第二步：先拉个视频流看一下，看官方说明支持rtsp视频流，可能我用的是wifi转有线接口，网不好，测试是可用的

C:\Users\Administrator>ffplay rtsp://192.168.10.100/live/mainstream -x 640 -y 480

第三步：移植模型试试

ubuntu@sxj731533730:~$ ssh root@192.168.10.100root@192.168.10.100's password:rochchiproute: SIOCADDRT: File exists[root@owlvtech:~]#

需要交叉编译，自己编译去吧，只提供源代码

cmakelists.txt

cmake_minimum_required(VERSION 3.16)project(untitled10)set(CMAKE_CXX_FLAGS "-std=c++11")set(CMAKE_CXX_STANDARD 11)set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fopenmp ")set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp")include_directories(${CMAKE_SOURCE_DIR})include_directories(${CMAKE_SOURCE_DIR}/include)find_package(OpenCV REQUIRED)#message(STATUS ${OpenCV_INCLUDE_DIRS})#添加头文件include_directories(${OpenCV_INCLUDE_DIRS})#链接Opencv库add_library(librknn_api SHARED IMPORTED)set_target_properties(librknn_api PROPERTIES IMPORTED_LOCATION ${CMAKE_SOURCE_DIR}/lib64/librknn_api.so)add_executable(untitled10 main.cpp)target_link_libraries(untitled10 ${OpenCV_LIBS} librknn_api )

main.cpp

#include <stdio.h>#include <stdint.h>#include <stdlib.h>#include <queue>#include "rknn_api.h"#include "opencv2/core/core.hpp"#include "opencv2/imgproc/imgproc.hpp"#include "opencv2/highgui/highgui.hpp"#include <chrono>#define OBJ_NAME_MAX_SIZE 16#define OBJ_NUMB_MAX_SIZE 200#define OBJ_CLASS_NUM1#define PROP_BOX_SIZE(5+OBJ_CLASS_NUM)using namespace std;typedef struct _BOX_RECT {int left;int right;int top;int bottom;} BOX_RECT;typedef struct __detect_result_t {char name[OBJ_NAME_MAX_SIZE];int class_index;BOX_RECT box;float prop;} detect_result_t;typedef struct _detect_result_group_t {int id;int count;detect_result_t results[OBJ_NUMB_MAX_SIZE];} detect_result_group_t;const int anchor0[6] = {12,16, 19,36, 40,28};const int anchor1[6] = {36,75, 76,55, 72,146};const int anchor2[6] = {142,110, 192,243, 459,401};void printRKNNTensor(rknn_tensor_attr *attr) {printf("index=%d name=%s n_dims=%d dims=[%d %d %d %d] n_elems=%d size=%d ""fmt=%d type=%d qnt_type=%d fl=%d zp=%d scale=%f\n",attr->index, attr->name, attr->n_dims, attr->dims[3], attr->dims[2],attr->dims[1], attr->dims[0], attr->n_elems, attr->size, 0, attr->type,attr->qnt_type, attr->fl, attr->zp, attr->scale);}float sigmoid(float x) {return 1.0 / (1.0 + expf(-x));}float unsigmoid(float y) {return -1.0 * logf((1.0 / y) - 1.0);}int process_fp(float *input, int *anchor, int grid_h, int grid_w, int height, int width, int stride,std::vector<float> &boxes, std::vector<float> &boxScores, std::vector<int> &classId,float threshold) {int validCount = 0;int grid_len = grid_h * grid_w;float thres_sigmoid = unsigmoid(threshold);for (int a = 0; a < 3; a++) {for (int i = 0; i < grid_h; i++) {for (int j = 0; j < grid_w; j++) {float box_confidence = input[(PROP_BOX_SIZE * a + 4) * grid_len + i * grid_w + j];if (box_confidence >= thres_sigmoid) {int offset = (PROP_BOX_SIZE * a) * grid_len + i * grid_w + j;float *in_ptr = input + offset;float box_x = sigmoid(*in_ptr) * 2.0 - 0.5;float box_y = sigmoid(in_ptr[grid_len]) * 2.0 - 0.5;float box_w = sigmoid(in_ptr[2 * grid_len]) * 2.0;float box_h = sigmoid(in_ptr[3 * grid_len]) * 2.0;box_x = (box_x + j) * (float) stride;box_y = (box_y + i) * (float) stride;box_w = box_w * box_w * (float) anchor[a * 2];box_h = box_h * box_h * (float) anchor[a * 2 + 1];box_x -= (box_w / 2.0);box_y -= (box_h / 2.0);boxes.push_back(box_x);boxes.push_back(box_y);boxes.push_back(box_w);boxes.push_back(box_h);float maxClassProbs = in_ptr[5 * grid_len];int maxClassId = 0;for (int k = 1; k < OBJ_CLASS_NUM; ++k) {float prob = in_ptr[(5 + k) * grid_len];if (prob > maxClassProbs) {maxClassId = k;maxClassProbs = prob;}}float box_conf_f32 = sigmoid(box_confidence);float class_prob_f32 = sigmoid(maxClassProbs);boxScores.push_back(box_conf_f32 * class_prob_f32);classId.push_back(maxClassId);validCount++;}}}}return validCount;}float CalculateOverlap(float xmin0, float ymin0, float xmax0, float ymax0, float xmin1, float ymin1, float xmax1,float ymax1) {float w = fmax(0.f, fmin(xmax0, xmax1) - fmax(xmin0, xmin1) + 1.0);float h = fmax(0.f, fmin(ymax0, ymax1) - fmax(ymin0, ymin1) + 1.0);float i = w * h;float u = (xmax0 - xmin0 + 1.0) * (ymax0 - ymin0 + 1.0) + (xmax1 - xmin1 + 1.0) * (ymax1 - ymin1 + 1.0) - i;return u <= 0.f ? 0.f : (i / u);}int nms(int validCount, std::vector<float> &outputLocations, std::vector<int> &order, float threshold) {for (int i = 0; i < validCount; ++i) {if (order[i] == -1) {continue;}int n = order[i];for (int j = i + 1; j < validCount; ++j) {int m = order[j];if (m == -1) {continue;}float xmin0 = outputLocations[n * 4 + 0];float ymin0 = outputLocations[n * 4 + 1];float xmax0 = outputLocations[n * 4 + 0] + outputLocations[n * 4 + 2];float ymax0 = outputLocations[n * 4 + 1] + outputLocations[n * 4 + 3];float xmin1 = outputLocations[m * 4 + 0];float ymin1 = outputLocations[m * 4 + 1];float xmax1 = outputLocations[m * 4 + 0] + outputLocations[m * 4 + 2];float ymax1 = outputLocations[m * 4 + 1] + outputLocations[m * 4 + 3];float iou = CalculateOverlap(xmin0, ymin0, xmax0, ymax0, xmin1, ymin1, xmax1, ymax1);if (iou > threshold) {order[j] = -1;}}}return 0;}int quick_sort_indice_inverse(std::vector<float> &input,int left,int right,std::vector<int> &indices) {float key;int key_index;int low = left;int high = right;if (left < right) {key_index = indices[left];key = input[left];while (low < high) {while (low < high && input[high] <= key) {high--;}input[low] = input[high];indices[low] = indices[high];while (low < high && input[low] >= key) {low++;}input[high] = input[low];indices[high] = indices[low];}input[low] = key;indices[low] = key_index;quick_sort_indice_inverse(input, left, low - 1, indices);quick_sort_indice_inverse(input, low + 1, right, indices);}return low;}int clamp(float val, int min, int max) {return val > min ? (val < max ? val : max) : min;}int post_process_fp(float *input0, float *input1, float *input2, int model_in_h, int model_in_w,int h_offset, int w_offset, float resize_scale, float conf_threshold, float nms_threshold,detect_result_group_t *group, const char *labels[]) {memset(group, 0, sizeof(detect_result_group_t));std::vector<float> filterBoxes;std::vector<float> boxesScore;std::vector<int> classId;int stride0 = 8;int grid_h0 = model_in_h / stride0;int grid_w0 = model_in_w / stride0;int validCount0 = 0;validCount0 = process_fp(input0, (int *) anchor0, grid_h0, grid_w0, model_in_h, model_in_w,stride0, filterBoxes, boxesScore, classId, conf_threshold);int stride1 = 16;int grid_h1 = model_in_h / stride1;int grid_w1 = model_in_w / stride1;int validCount1 = 0;validCount1 = process_fp(input1, (int *) anchor1, grid_h1, grid_w1, model_in_h, model_in_w,stride1, filterBoxes, boxesScore, classId, conf_threshold);int stride2 = 32;int grid_h2 = model_in_h / stride2;int grid_w2 = model_in_w / stride2;int validCount2 = 0;validCount2 = process_fp(input2, (int *) anchor2, grid_h2, grid_w2, model_in_h, model_in_w,stride2, filterBoxes, boxesScore, classId, conf_threshold);int validCount = validCount0 + validCount1 + validCount2;// no object detectif (validCount <= 0) {return 0;}std::vector<int> indexArray;for (int i = 0; i < validCount; ++i) {indexArray.push_back(i);}quick_sort_indice_inverse(boxesScore, 0, validCount - 1, indexArray);nms(validCount, filterBoxes, indexArray, nms_threshold);int last_count = 0;/* box valid detect target */for (int i = 0; i < validCount; ++i) {if (indexArray[i] == -1 || boxesScore[i] < conf_threshold || last_count >= OBJ_NUMB_MAX_SIZE) {continue;}int n = indexArray[i];float x1 = filterBoxes[n * 4 + 0];float y1 = filterBoxes[n * 4 + 1];float x2 = x1 + filterBoxes[n * 4 + 2];float y2 = y1 + filterBoxes[n * 4 + 3];int id = classId[n];group->results[last_count].box.left = (int) ((clamp(x1, 0, model_in_w) - w_offset) / resize_scale);group->results[last_count].box.top = (int) ((clamp(y1, 0, model_in_h) - h_offset) / resize_scale);group->results[last_count].box.right = (int) ((clamp(x2, 0, model_in_w) - w_offset) / resize_scale);group->results[last_count].box.bottom = (int) ((clamp(y2, 0, model_in_h) - h_offset) / resize_scale);group->results[last_count].prop = boxesScore[i];group->results[last_count].class_index = id;const char *label = labels[id];strncpy(group->results[last_count].name, label, OBJ_NAME_MAX_SIZE);// printf("result %2d: (%4d, %4d, %4d, %4d), %s\n", i, group->results[last_count].box.left, group->results[last_count].box.top,// group->results[last_count].box.right, group->results[last_count].box.bottom, label);last_count++;}group->count = last_count;return 0;}float deqnt_affine_to_f32(uint8_t qnt, uint8_t zp, float scale) {return ((float) qnt - (float) zp) * scale;}int32_t __clip(float val, float min, float max) {float f = val <= min ? min : (val >= max ? max : val);return f;}uint8_t qnt_f32_to_affine(float f32, uint8_t zp, float scale) {float dst_val = (f32 / scale) + zp;uint8_t res = (uint8_t) __clip(dst_val, 0, 255);return res;}int process_u8(uint8_t *input, int *anchor, int grid_h, int grid_w, int height, int width, int stride,std::vector<float> &boxes, std::vector<float> &boxScores, std::vector<int> &classId,float threshold, uint8_t zp, float scale) {int validCount = 0;int grid_len = grid_h * grid_w;float thres = unsigmoid(threshold);uint8_t thres_u8 = qnt_f32_to_affine(thres, zp, scale);for (int a = 0; a < 3; a++) {for (int i = 0; i < grid_h; i++) {for (int j = 0; j < grid_w; j++) {uint8_t box_confidence = input[(PROP_BOX_SIZE * a + 4) * grid_len + i * grid_w + j];if (box_confidence >= thres_u8) {int offset = (PROP_BOX_SIZE * a) * grid_len + i * grid_w + j;uint8_t *in_ptr = input + offset;float box_x = sigmoid(deqnt_affine_to_f32(*in_ptr, zp, scale)) * 2.0 - 0.5;float box_y = sigmoid(deqnt_affine_to_f32(in_ptr[grid_len], zp, scale)) * 2.0 - 0.5;float box_w = sigmoid(deqnt_affine_to_f32(in_ptr[2 * grid_len], zp, scale)) * 2.0;float box_h = sigmoid(deqnt_affine_to_f32(in_ptr[3 * grid_len], zp, scale)) * 2.0;box_x = (box_x + j) * (float) stride;box_y = (box_y + i) * (float) stride;box_w = box_w * box_w * (float) anchor[a * 2];box_h = box_h * box_h * (float) anchor[a * 2 + 1];box_x -= (box_w / 2.0);box_y -= (box_h / 2.0);boxes.push_back(box_x);boxes.push_back(box_y);boxes.push_back(box_w);boxes.push_back(box_h);uint8_t maxClassProbs = in_ptr[5 * grid_len];int maxClassId = 0;for (int k = 1; k < OBJ_CLASS_NUM; ++k) {uint8_t prob = in_ptr[(5 + k) * grid_len];if (prob > maxClassProbs) {maxClassId = k;maxClassProbs = prob;}}float box_conf_f32 = sigmoid(deqnt_affine_to_f32(box_confidence, zp, scale));float class_prob_f32 = sigmoid(deqnt_affine_to_f32(maxClassProbs, zp, scale));boxScores.push_back(box_conf_f32 * class_prob_f32);classId.push_back(maxClassId);validCount++;}}}}return validCount;}int post_process_u8(uint8_t *input0, uint8_t *input1, uint8_t *input2, int model_in_h, int model_in_w,int h_offset, int w_offset, float resize_scale, float conf_threshold, float nms_threshold,std::vector<uint8_t> &qnt_zps, std::vector<float> &qnt_scales,detect_result_group_t *group, const char *labels[]) {memset(group, 0, sizeof(detect_result_group_t));std::vector<float> filterBoxes;std::vector<float> boxesScore;std::vector<int> classId;int stride0 = 8;int grid_h0 = model_in_h / stride0;int grid_w0 = model_in_w / stride0;int validCount0 = 0;validCount0 = process_u8(input0, (int *) anchor0, grid_h0, grid_w0, model_in_h, model_in_w,stride0, filterBoxes, boxesScore, classId, conf_threshold, qnt_zps[0], qnt_scales[0]);int stride1 = 16;int grid_h1 = model_in_h / stride1;int grid_w1 = model_in_w / stride1;int validCount1 = 0;validCount1 = process_u8(input1, (int *) anchor1, grid_h1, grid_w1, model_in_h, model_in_w,stride1, filterBoxes, boxesScore, classId, conf_threshold, qnt_zps[1], qnt_scales[1]);int stride2 = 32;int grid_h2 = model_in_h / stride2;int grid_w2 = model_in_w / stride2;int validCount2 = 0;validCount2 = process_u8(input2, (int *) anchor2, grid_h2, grid_w2, model_in_h, model_in_w,stride2, filterBoxes, boxesScore, classId, conf_threshold, qnt_zps[2], qnt_scales[2]);int validCount = validCount0 + validCount1 + validCount2;// no object detectif (validCount <= 0) {return 0;}std::vector<int> indexArray;for (int i = 0; i < validCount; ++i) {indexArray.push_back(i);}quick_sort_indice_inverse(boxesScore, 0, validCount - 1, indexArray);nms(validCount, filterBoxes, indexArray, nms_threshold);int last_count = 0;group->count = 0;/* box valid detect target */for (int i = 0; i < validCount; ++i) {if (indexArray[i] == -1 || boxesScore[i] < conf_threshold || last_count >= OBJ_NUMB_MAX_SIZE) {continue;}int n = indexArray[i];float x1 = filterBoxes[n * 4 + 0];float y1 = filterBoxes[n * 4 + 1];float x2 = x1 + filterBoxes[n * 4 + 2];float y2 = y1 + filterBoxes[n * 4 + 3];int id = classId[n];group->results[last_count].box.left = (int) ((clamp(x1, 0, model_in_w) - w_offset) / resize_scale);group->results[last_count].box.top = (int) ((clamp(y1, 0, model_in_h) - h_offset) / resize_scale);group->results[last_count].box.right = (int) ((clamp(x2, 0, model_in_w) - w_offset) / resize_scale);group->results[last_count].box.bottom = (int) ((clamp(y2, 0, model_in_h) - h_offset) / resize_scale);group->results[last_count].prop = boxesScore[i];group->results[last_count].class_index = id;const char *label = labels[id];strncpy(group->results[last_count].name, label, OBJ_NAME_MAX_SIZE);// printf("result %2d: (%4d, %4d, %4d, %4d), %s\n", i, group->results[last_count].box.left, group->results[last_count].box.top,// group->results[last_count].box.right, group->results[last_count].box.bottom, label);last_count++;}group->count = last_count;return 0;}void letterbox(cv::Mat rgb,cv::Mat &img_resize,int target_width,int target_height){float shape_0=rgb.rows;float shape_1=rgb.cols;float new_shape_0=target_height;float new_shape_1=target_width;float r=std::min(new_shape_0/shape_0,new_shape_1/shape_1);float new_unpad_0=int(round(shape_1*r));float new_unpad_1=int(round(shape_0*r));float dw=new_shape_1-new_unpad_0;float dh=new_shape_0-new_unpad_1;dw=dw/2;dh=dh/2;cv::Mat copy_rgb=rgb.clone();if(int(shape_0)!=int(new_unpad_0)&&int(shape_1)!=int(new_unpad_1)){cv::resize(copy_rgb,img_resize,cv::Size(new_unpad_0,new_unpad_1));copy_rgb=img_resize;}int top=int(round(dh-0.1));int bottom=int(round(dh+0.1));int left=int(round(dw-0.1));int right=int(round(dw+0.1));cv::copyMakeBorder(copy_rgb, img_resize,top, bottom, left, right, cv::BORDER_CONSTANT, cv::Scalar(0,0,0));}int main(int argc, char **argv) {const char *img_path = "../bus.jpg";const char *model_path = "../best_remove_transpose.rknn";const char *post_process_type = "fp";//fpconst int target_width = 640;const int target_height = 640;const char *image_process_mode = "letter_box";float resize_scale = 0;int h_pad=0;int w_pad=0;const float nms_threshold = 0.6;const float conf_threshold = 0.3;const char *labels[] = {"person"};// Load imagecv::Mat bgr = cv::imread(img_path);if (!bgr.data) {printf("cv::imread %s fail!\n", img_path);return -1;}cv::Mat rgb;//BGR->RGBcv::cvtColor(bgr, rgb, cv::COLOR_BGR2RGB);cv::Mat img_resize;float correction[2] = {0, 0};float scale_factor[] = {0, 0};int width=rgb.cols;int height=rgb.rows;// Letter box resizefloat img_wh_ratio = (float) width / (float) height;float input_wh_ratio = (float) target_width / (float) target_height;int resize_width;int resize_height;if (img_wh_ratio >= input_wh_ratio) {//pad height dimresize_scale = (float) target_width / (float) width;resize_width = target_width;resize_height = (int) ((float) height * resize_scale);w_pad = 0;h_pad = (target_height - resize_height) / 2;} else {//pad width dimresize_scale = (float) target_height / (float) height;resize_width = (int) ((float) width * resize_scale);resize_height = target_height;w_pad = (target_width - resize_width) / 2;;h_pad = 0;}if(strcmp(image_process_mode,"letter_box")==0){letterbox(rgb,img_resize,target_width,target_height);}else {cv::resize(rgb, img_resize, cv::Size(target_width, target_height));}// Load modelFILE *fp = fopen(model_path, "rb");if (fp == NULL) {printf("fopen %s fail!\n", model_path);return -1;}fseek(fp, 0, SEEK_END);int model_len = ftell(fp);void *model = malloc(model_len);fseek(fp, 0, SEEK_SET);if (model_len != fread(model, 1, model_len, fp)) {printf("fread %s fail!\n", model_path);free(model);return -1;}rknn_context ctx = 0;int ret = rknn_init(&ctx, model, model_len, 0);if (ret < 0) {printf("rknn_init fail! ret=%d\n", ret);return -1;}/* Query sdk version */rknn_sdk_version version;ret = rknn_query(ctx, RKNN_QUERY_SDK_VERSION, &version,sizeof(rknn_sdk_version));if (ret < 0) {printf("rknn_init error ret=%d\n", ret);return -1;}printf("sdk version: %s driver version: %s\n", version.api_version,version.drv_version);/* Get input,output attr */rknn_input_output_num io_num;ret = rknn_query(ctx, RKNN_QUERY_IN_OUT_NUM, &io_num, sizeof(io_num));if (ret < 0) {printf("rknn_init error ret=%d\n", ret);return -1;}printf("model input num: %d, output num: %d\n", io_num.n_input,io_num.n_output);rknn_tensor_attr input_attrs[io_num.n_input];memset(input_attrs, 0, sizeof(input_attrs));for (int i = 0; i < io_num.n_input; i++) {input_attrs[i].index = i;ret = rknn_query(ctx, RKNN_QUERY_INPUT_ATTR, &(input_attrs[i]),sizeof(rknn_tensor_attr));if (ret < 0) {printf("rknn_init error ret=%d\n", ret);return -1;}printRKNNTensor(&(input_attrs[i]));}rknn_tensor_attr output_attrs[io_num.n_output];memset(output_attrs, 0, sizeof(output_attrs));for (int i = 0; i < io_num.n_output; i++) {output_attrs[i].index = i;ret = rknn_query(ctx, RKNN_QUERY_OUTPUT_ATTR, &(output_attrs[i]),sizeof(rknn_tensor_attr));printRKNNTensor(&(output_attrs[i]));}int input_channel = 3;int input_width = 0;int input_height = 0;if (input_attrs[0].fmt == RKNN_TENSOR_NCHW) {printf("model is NCHW input fmt\n");input_width = input_attrs[0].dims[0];input_height = input_attrs[0].dims[1];printf("input_width=%d input_height=%d\n", input_width, input_height);} else {printf("model is NHWC input fmt\n");input_width = input_attrs[0].dims[1];input_height = input_attrs[0].dims[2];printf("input_width=%d input_height=%d\n", input_width, input_height);}printf("model input height=%d, width=%d, channel=%d\n", input_height, input_width,input_channel);/* Init input tensor */rknn_input inputs[1];memset(inputs, 0, sizeof(inputs));inputs[0].index = 0;inputs[0].buf = img_resize.data;inputs[0].type = RKNN_TENSOR_UINT8;inputs[0].size = input_width * input_height * input_channel;inputs[0].fmt = RKNN_TENSOR_NHWC;inputs[0].pass_through = 0;/* Init output tensor */rknn_output outputs[io_num.n_output];memset(outputs, 0, sizeof(outputs));for (int i = 0; i < io_num.n_output; i++) {if (strcmp(post_process_type, "fp") == 0) {outputs[i].want_float = 1;} else if (strcmp(post_process_type, "u8") == 0) {outputs[i].want_float = 0;}}printf("img.cols: %d, img.rows: %d\n", img_resize.cols, img_resize.rows);auto t1=std::chrono::steady_clock::now();rknn_inputs_set(ctx, io_num.n_input, inputs);ret = rknn_run(ctx, NULL);if (ret < 0) {printf("ctx error ret=%d\n", ret);return -1;}ret = rknn_outputs_get(ctx, io_num.n_output, outputs, NULL);if (ret < 0) {printf("outputs error ret=%d\n", ret);return -1;}/* Post process */std::vector<float> out_scales;std::vector<uint8_t> out_zps;for (int i = 0; i < io_num.n_output; ++i) {out_scales.push_back(output_attrs[i].scale);out_zps.push_back(output_attrs[i].zp);}detect_result_group_t detect_result_group;if (strcmp(post_process_type, "u8") == 0) {post_process_u8((uint8_t *) outputs[0].buf, (uint8_t *) outputs[1].buf, (uint8_t *) outputs[2].buf,input_height, input_width,h_pad, w_pad, resize_scale, conf_threshold, nms_threshold, out_zps, out_scales,&detect_result_group, labels);} else if (strcmp(post_process_type, "fp") == 0) {post_process_fp((float *) outputs[0].buf, (float *) outputs[1].buf, (float *) outputs[2].buf, input_height,input_width,h_pad, w_pad, resize_scale, conf_threshold, nms_threshold, &detect_result_group, labels);}//毫秒级auto t2=std::chrono::steady_clock::now();double dr_ms=std::chrono::duration<double,std::milli>(t2-t1).count();printf("%lf ms\n",dr_ms);for (int i = 0; i < detect_result_group.count; i++) {detect_result_t *det_result = &(detect_result_group.results[i]);printf("%s @ (%d %d %d %d) %f\n",det_result->name,det_result->box.left, det_result->box.top, det_result->box.right, det_result->box.bottom,det_result->prop);int bx1 = det_result->box.left;int by1 = det_result->box.top;int bx2 = det_result->box.right;int by2 = det_result->box.bottom;cv::rectangle(bgr, cv::Point(bx1, by1), cv::Point(bx2, by2), cv::Scalar(231, 232, 143)); //两点的方式char text[256];sprintf(text, "%s %.1f%% ", det_result->name, det_result->prop * 100);int baseLine = 0;cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);int x = bx1;int y = by1 - label_size.height - baseLine;if (y < 0)y = 0;if (x + label_size.width > bgr.cols)x = bgr.cols - label_size.width;cv::rectangle(bgr, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)),cv::Scalar(0, 0, 255), -1);cv::putText(bgr, text, cv::Point(x, y + label_size.height),cv::FONT_HERSHEY_DUPLEX, 0.4, cv::Scalar(255, 255, 255), 1, cv::LINE_AA);cv::imwrite("../bgr9.jpg", bgr);}ret = rknn_outputs_release(ctx, io_num.n_output, outputs);if (ret < 0) {printf("rknn_query fail! ret=%d\n", ret);goto Error;}Error:if (ctx > 0)rknn_destroy(ctx);if (model)free(model);if (fp)fclose(fp);return 0;}

测试结果

十、openvino部署

转模型xml

ubuntu@ubuntu:~/yolov7$ python /opt/intel/openvino_/deployment_tools/model_optimizer/mo.py --input_model /home/ubuntu/yolov7/runs/train/exp3/weights/best.onnx --output_dir /home/ubuntu/yolov7/runs/train/exp3/weights/FP16 --input_shape [1,3,640,640] --data_type FP16

使用CLion进行阅读调试代码(Debug)#

先进入clion.sh位置(/home/zranguai/software/CLion-.3.2/clion-.3.2/bin) 然后sh clion.sh

source /opt/intel/openvino_/bin/setupvars.sh

cmakelists.txt

cmake_minimum_required(VERSION 3.4.1)set(CMAKE_CXX_STANDARD 14)project(untitled23)find_package(OpenCV REQUIRED)find_package(ngraph REQUIRED)find_package(InferenceEngine REQUIRED)include_directories(${OpenCV_INCLUDE_DIRS}${CMAKE_CURRENT_SOURCE_DIR}${CMAKE_CURRENT_BINARY_DIR})add_executable(untitled23 main.cpp)target_link_libraries(untitled23${InferenceEngine_LIBRARIES}${NGRAPH_LIBRARIES}${OpenCV_LIBS})

main.cpp

#include <opencv2/opencv.hpp>#include <inference_engine.hpp>#include <iostream>#include <chrono>#include <opencv2/dnn/dnn.hpp>#include <cmath>using namespace std;using namespace cv;using namespace InferenceEngine;typedef struct {int width;int height;} YoloSize;typedef struct {std::string name;int stride;std::vector<YoloSize> anchors;} YoloLayerData;class BoxInfo{public:int x1,y1,x2,y2,label,id;float score;};static inline float sigmoid(float x){return static_cast<float>(1.f / (1.f + exp(-x)));}double GetIOU(cv::Rect_<float> bb_test, cv::Rect_<float> bb_gt){float in = (bb_test & bb_gt).area();float un = bb_test.area() + bb_gt.area() - in;if (un < DBL_EPSILON)return 0;return (double)(in / un);}std::vector<BoxInfo> decode_infer(const float *data_ptr, int stride, int net_size, int num_classes,const std::vector<YoloSize> &anchors, float threshold,int idx){int s[3]={20,40,80};std::vector<BoxInfo> result;int batchs, channels, height, width, pred_item ;batchs = 1;channels = 3;height = s[idx];width = s[idx];pred_item = 6;std::cout<<batchs<<" "<<channels<<" "<<height<<" "<<width<<" "<<pred_item<<" "<<std::endl;for(int i=0;i<20;i++){std::cout<<data_ptr[i]<<" ";}std::cout<<std::endl;for(int bi=0; bi<batchs; bi++){auto batch_ptr = data_ptr + bi*(channels*height*width*pred_item);for(int ci=0; ci<channels; ci++){auto channel_ptr = batch_ptr + ci*(height*width*pred_item);for(int hi=0; hi<height; hi++){auto height_ptr = channel_ptr + hi*(width * pred_item);for(int wi=0; wi<width; wi++){auto width_ptr = height_ptr + wi*pred_item;auto cls_ptr = width_ptr + 5;auto confidence = sigmoid(width_ptr[4]);for(int cls_id=0; cls_id<num_classes; cls_id++){float score = sigmoid(cls_ptr[cls_id]) * confidence;if(score > threshold){float cx = (sigmoid(width_ptr[0]) * 2.f - 0.5f + wi) * (float) stride;float cy = (sigmoid(width_ptr[1]) * 2.f - 0.5f + hi) * (float) stride;float w = pow(sigmoid(width_ptr[2]) * 2.f, 2) * anchors[ci].width;float h = pow(sigmoid(width_ptr[3]) * 2.f, 2) * anchors[ci].height;BoxInfo box;box.x1 = std::max(0, std::min(net_size, int((cx - w / 2.f) )));box.y1 = std::max(0, std::min(net_size, int((cy - h / 2.f) )));box.x2 = std::max(0, std::min(net_size, int((cx + w / 2.f) )));box.y2 = std::max(0, std::min(net_size, int((cy + h / 2.f) )));box.score = score;box.label = cls_id;result.push_back(box);}}}}}}return result;}void nms(std::vector<BoxInfo> &input_boxes, float NMS_THRESH) {std::sort(input_boxes.begin(), input_boxes.end(), [](BoxInfo a, BoxInfo b) { return a.score > b.score; });std::vector<float> vArea(input_boxes.size());for (int i = 0; i < int(input_boxes.size()); ++i) {vArea[i] = (input_boxes.at(i).x2 - input_boxes.at(i).x1 + 1)* (input_boxes.at(i).y2 - input_boxes.at(i).y1 + 1);}for (int i = 0; i < int(input_boxes.size()); ++i) {for (int j = i + 1; j < int(input_boxes.size());) {float xx1 = std::max(input_boxes[i].x1, input_boxes[j].x1);float yy1 = std::max(input_boxes[i].y1, input_boxes[j].y1);float xx2 = std::min(input_boxes[i].x2, input_boxes[j].x2);float yy2 = std::min(input_boxes[i].y2, input_boxes[j].y2);float w = std::max(float(0), xx2 - xx1 + 1);float h = std::max(float(0), yy2 - yy1 + 1);float inter = w * h;float ovr = inter / (vArea[i] + vArea[j] - inter);if (ovr >= NMS_THRESH) {input_boxes.erase(input_boxes.begin() + j);vArea.erase(vArea.begin() + j);} else {j++;}}}}void scale_coords(std::vector<BoxInfo> &boxes, int w_from, int h_from, int w_to, int h_to){float w_ratio = float(w_to)/float(w_from);float h_ratio = float(h_to)/float(h_from);for(auto &box: boxes){box.x1 *= w_ratio;box.x2 *= w_ratio;box.y1 *= h_ratio;box.y2 *= h_ratio;}return ;}cv::Mat draw_box(cv::Mat & cv_mat, std::vector<BoxInfo> &boxes, const std::vector<std::string> &labels,unsigned char colors[][3]){for(auto box : boxes){int width = box.x2-box.x1;int height = box.y2-box.y1;cv::Point p = cv::Point(box.x1, box.y1);cv::Rect rect = cv::Rect(box.x1, box.y1, width, height);cv::rectangle(cv_mat, rect, cv::Scalar(colors[box.label][0],colors[box.label][1],colors[box.label][2]));string text = labels[box.label] + ":" + std::to_string(box.score) ;cv::putText(cv_mat, text, p, cv::FONT_HERSHEY_PLAIN, 1, cv::Scalar(colors[box.label][0],colors[box.label][1],colors[box.label][2]));}return cv_mat;}int main(int argc, char const *argv[]) {string _xml_path = "/home/ubuntu/yolov7/runs/train/exp3/weights/FP16/best.xml";ExecutableNetwork _network;OutputsDataMap _outputinfo;//参数区double _cof_threshold = 0.3;double _nms_area_threshold = 0.5;Core ie;auto cnnNetwork = ie.ReadNetwork(_xml_path);//输入设置InputsDataMap inputInfo(cnnNetwork.getInputsInfo());InputInfo::Ptr& input = inputInfo.begin()->second;string _input_name = inputInfo.begin()->first;input->setPrecision(Precision::FP32);input->getInputData()->setLayout(Layout::NCHW);ICNNNetwork::InputShapes inputShapes = cnnNetwork.getInputShapes();SizeVector& inSizeVector = inputShapes.begin()->second;cnnNetwork.reshape(inputShapes);//输出设置_outputinfo = OutputsDataMap(cnnNetwork.getOutputsInfo());for (auto &output : _outputinfo) {output.second->setPrecision(Precision::FP32);}//获取可执行网络//_network = ie.LoadNetwork(cnnNetwork, "GPU");_network = ie.LoadNetwork(cnnNetwork, "CPU");Mat src = imread("/home/ubuntu/yolov7/inference/images/bus.jpg");Mat inframe;resize(src, inframe, Size(640, 640));auto start = chrono::high_resolution_clock::now();if(inframe.empty()){cout << "无效图片输入" << endl;return false;}cvtColor(inframe,inframe,COLOR_BGR2RGB);size_t img_size = 640*640;InferRequest::Ptr infer_request = _network.CreateInferRequestPtr();Blob::Ptr frameBlob = infer_request->GetBlob(_input_name);InferenceEngine::LockedMemory<void> blobMapped = InferenceEngine::as<InferenceEngine::MemoryBlob>(frameBlob)->wmap();float* blob_data = blobMapped.as<float*>();//nchwfor(size_t row =0;row<640;row++){for(size_t col=0;col<640;col++){for(size_t ch =0;ch<3;ch++){blob_data[img_size*ch + row*640 + col] = float(inframe.at<Vec3b>(row,col)[ch])/255.0f;}}}//执行预测infer_request->Infer();//获取各层结果std::vector<YoloLayerData> yolov7_layers{{"554", 32, {{142, 110}, {192, 243}, {459, 401}}},{"534", 16, {{36, 75}, {76, 55}, {72, 146}}},{"output", 8, {{12, 16}, {19, 36}, {40, 28}}},};vector<float> origin_rect_cof;std::vector<YoloLayerData> & layers = yolov7_layers;std::vector<BoxInfo> result;std::vector<BoxInfo> boxes;float threshold = 0.3;float nms_threshold = 0.7;int i=0;std::vector<std::string> labels = {"person"};unsigned char colors[][3] = {{0, 255, 0}};for (auto &output : _outputinfo) {auto output_name = output.first;std::cout<<output_name<<std::endl;Blob::Ptr blob = infer_request->GetBlob(yolov7_layers[2-i].name.c_str());LockedMemory<const void> blobMapped = as<MemoryBlob>(blob)->rmap();const float *output_blob = blobMapped.as<float *>();boxes = decode_infer(output_blob, layers[2-i].stride, 640, labels.size(), layers[2-i].anchors, threshold,2-i);result.insert(result.begin(), boxes.begin(), boxes.end());++i;std::cout<<std::endl;}nms(result, nms_threshold);scale_coords(result, 640, 640, src.cols, src.rows);cv::Mat frame_show = draw_box(src, result, labels,colors);cv::imshow("out",src);cv::imwrite("dp.jpg",src);cv::waitKey(0);auto end = chrono::high_resolution_clock::now();std::chrono::duration<double> diff = end - start;cout << "use " << diff.count() << " s" << endl;return 0;}

测试结果

十一、转oak模型OAK-D4T

似乎官方已经有了版本

depthai_yolo · master · OAK中国_官方 / DepthAI examples · GitCode

但是还是想自己搞个,那就干呗

ubuntu@ubuntu:~$ python3 /opt/intel/openvino_/deployment_tools/model_optimizer/mo.py --input_model /home/ubuntu/yolov7/runs/train/exp3/weights/best.onnx --output_dir /home/ubuntu/yolov7/runs/train/exp3/weights/F16_OAK --input_shape [1,3,640,640] --data_type FP16 --scale_values [255.0,255.0,255.0] --mean_values [0,0,0]ubuntu@ubuntu:/opt/intel/openvino_/deployment_tools/tools/compile_tool$ ./compile_tool -m /home/ubuntu/yolov7/runs/train/exp3/weights/F16_OAK/best.xml -ip U8 -d MYRIAD -VPU_NUMBER_OF_SHAVES 4 -VPU_NUMBER_OF_CMX_SLICES 4

测试代码，参考yolox修改

from pathlib import Pathimport numpy as npimport cv2import depthai as daiimport timesyncNN = FalseSHAPE = 640labelMap = ["person"]BOX_THRESH=0.5NMS_THRESH=0.5def sigmoid(x):return 1 / (1 + np.exp(-x))def xywh2xyxy(x,ratio):# Convert [x, y, w, h] to [x1, y1, x2, y2]y = np.copy(x)y[:, 0] = (x[:, 0] - x[:, 2] / 2 ) /ratio# top left xy[:, 1] = (x[:, 1] - x[:, 3] / 2 )/ratio# top left yy[:, 2] = (x[:, 0] + x[:, 2] / 2 ) /ratio# bottom right xy[:, 3] = (x[:, 1] + x[:, 3] / 2 )/ratio # bottom right yreturn ydef process(input, mask, anchors):anchors = [anchors[i] for i in mask]grid_h, grid_w = map(int, input.shape[0:2])box_confidence = sigmoid(input[..., 4])box_confidence = np.expand_dims(box_confidence, axis=-1)box_class_probs = sigmoid(input[..., 5:])box_xy = sigmoid(input[..., :2])*2 - 0.5col = np.tile(np.arange(0, grid_w), grid_w).reshape(-1, grid_w)row = np.tile(np.arange(0, grid_h).reshape(-1, 1), grid_h)col = col.reshape(grid_h, grid_w, 1, 1).repeat(3, axis=-2)row = row.reshape(grid_h, grid_w, 1, 1).repeat(3, axis=-2)grid = np.concatenate((col, row), axis=-1)box_xy += gridbox_xy *= int(SHAPE/grid_h)box_wh = pow(sigmoid(input[..., 2:4])*2, 2)box_wh = box_wh * anchorsbox = np.concatenate((box_xy, box_wh), axis=-1)return box, box_confidence, box_class_probsdef filter_boxes(boxes, box_confidences, box_class_probs):"""Filter boxes with box threshold. It's a bit different with origin yolov5 post process!# Argumentsboxes: ndarray, boxes of objects.box_confidences: ndarray, confidences of objects.box_class_probs: ndarray, class_probs of objects.# Returnsboxes: ndarray, filtered boxes.classes: ndarray, classes for boxes.scores: ndarray, scores for boxes."""box_classes = np.argmax(box_class_probs, axis=-1)box_class_scores = np.max(box_class_probs, axis=-1)pos = np.where(box_confidences[...,0] >= BOX_THRESH)boxes = boxes[pos]classes = box_classes[pos]scores = box_class_scores[pos]return boxes, classes, scoresdef nms_boxes(boxes, scores):"""Suppress non-maximal boxes.# Argumentsboxes: ndarray, boxes of objects.scores: ndarray, scores of objects.# Returnskeep: ndarray, index of effective boxes."""x = boxes[:, 0]y = boxes[:, 1]w = boxes[:, 2] - boxes[:, 0]h = boxes[:, 3] - boxes[:, 1]areas = w * horder = scores.argsort()[::-1]keep = []while order.size > 0:i = order[0]keep.append(i)xx1 = np.maximum(x[i], x[order[1:]])yy1 = np.maximum(y[i], y[order[1:]])xx2 = np.minimum(x[i] + w[i], x[order[1:]] + w[order[1:]])yy2 = np.minimum(y[i] + h[i], y[order[1:]] + h[order[1:]])w1 = np.maximum(0.0, xx2 - xx1 + 0.00001)h1 = np.maximum(0.0, yy2 - yy1 + 0.00001)inter = w1 * h1ovr = inter / (areas[i] + areas[order[1:]] - inter)inds = np.where(ovr <= NMS_THRESH)[0]order = order[inds + 1]keep = np.array(keep)return keepdef yolov7_post_process(input_data,ratio):masks = [[0, 1, 2], [3, 4, 5], [6, 7, 8]]anchors = [[12,16], [ 19,36], [40,28],[36,75], [76,55],[72,146],[142,110], [192,243], [459,401]]boxes, classes, scores = [], [], []for input,mask in zip(input_data, masks):b, c, s = process(input, mask, anchors)b, c, s = filter_boxes(b, c, s)boxes.append(b)classes.append(c)scores.append(s)boxes = np.concatenate(boxes)boxes = xywh2xyxy(boxes,ratio)classes = np.concatenate(classes)scores = np.concatenate(scores)nboxes, nclasses, nscores = [], [], []for c in set(classes):inds = np.where(classes == c)b = boxes[inds]c = classes[inds]s = scores[inds]keep = nms_boxes(b, s)nboxes.append(b[keep])nclasses.append(c[keep])nscores.append(s[keep])if not nclasses and not nscores:return None, None, Noneboxes = np.concatenate(nboxes)classes = np.concatenate(nclasses)scores = np.concatenate(nscores)return boxes, classes, scoresdef draw(image, boxes, scores, classes):"""Draw the boxes on the image.# Argument:image: original image.boxes: ndarray, boxes of objects.classes: ndarray, classes of objects.scores: ndarray, scores of objects.all_classes: all classes name."""for box, score, cl in zip(boxes, scores, classes):top, left, right, bottom = boxprint('class: {}, score: {}'.format(labelMap[cl], score))print('box coordinate left,top,right,down: [{}, {}, {}, {}]'.format(top, left, right, bottom))top = int(top)left = int(left)right = int(right)bottom = int(bottom)cv2.rectangle(image, (top, left), (right, bottom), (255, 0, 0), 2)cv2.putText(image, '{0} {1:.2f}'.format(labelMap[cl], score),(top, left - 6),cv2.FONT_HERSHEY_SIMPLEX,0.6, (0, 0, 255), 2)def preproc(image, input_size, mean, std, swap=(2, 0, 1)):if len(image.shape) == 3:padded_img = np.ones((input_size[0], input_size[1], 3)) * 114.0else:padded_img = np.ones(input_size) * 114.0img = np.array(image)r = min(input_size[0] / img.shape[0], input_size[1] / img.shape[1])resized_img = cv2.resize(img,(int(img.shape[1] * r), int(img.shape[0] * r)),interpolation=cv2.INTER_LINEAR,).astype(np.float32)padded_img[: int(img.shape[0] * r), : int(img.shape[1] * r)] = resized_imgpadded_img = padded_img[:, :, ::-1]if mean is not None:padded_img -= meanif std is not None:padded_img /= stdpadded_img = padded_img.transpose(swap)padded_img = np.ascontiguousarray(padded_img, dtype=np.float16)return padded_img, rp = dai.Pipeline()p.setOpenVINOVersion(dai.OpenVINO.VERSION__4)class FPSHandler:def __init__(self, cap=None):self.timestamp = time.time()self.start = time.time()self.frame_cnt = 0def next_iter(self):self.timestamp = time.time()self.frame_cnt += 1def fps(self):return self.frame_cnt / (self.timestamp - self.start)camera = p.create(dai.node.ColorCamera)camera.setPreviewSize(SHAPE, SHAPE)camera.setResolution(dai.ColorCameraProperties.SensorResolution.THE_1080_P)camera.setInterleaved(False)camera.setColorOrder(dai.ColorCameraProperties.ColorOrder.BGR)camera.setFps(60)nn = p.create(dai.node.NeuralNetwork)nn.setBlobPath(str(Path("/home/ubuntu/yolov7/runs/train/exp3/weights/F16_OAK/best.blob").resolve().absolute()))nn.setNumInferenceThreads(2)nn.input.setBlocking(True)# Send camera frames to the hostcamera_xout = p.create(dai.node.XLinkOut)camera_xout.setStreamName("camera")camera.preview.link(camera_xout.input)# Send converted frames from the host to the NNnn_xin = p.create(dai.node.XLinkIn)nn_xin.setStreamName("nnInput")nn_xin.out.link(nn.input)# Send bounding boxes from the NN to the host via XLinknn_xout = p.create(dai.node.XLinkOut)nn_xout.setStreamName("nn")nn.out.link(nn_xout.input)# Pipeline is defined, now we can connect to the devicewith dai.Device(p) as device:qCamera = device.getOutputQueue(name="camera", maxSize=4, blocking=False)qNnInput = device.getInputQueue("nnInput", maxSize=4, blocking=False)qNn = device.getOutputQueue(name="nn", maxSize=4, blocking=True)fps = FPSHandler()while True:inRgb = qCamera.get()frame = inRgb.getCvFrame()frame=cv2.imread("/home/ubuntu/yolov7/inference/images/bus.jpg")#可以注释掉测试视频# Set these according to your datasetmean = (0,0,0)std = (1, 1, 1)image, ratio = preproc(frame, (SHAPE, SHAPE), mean, std)#print(ratio)#frame=cv2.resize(frame,(640,640))# NOTE: The model expects an FP16 input image, but ImgFrame accepts a list of ints only. I work around this by# spreading the FP16 across two ints#print(ratio)image = list(image)dai_frame = dai.ImgFrame()dai_frame.setHeight(SHAPE)dai_frame.setWidth(SHAPE)dai_frame.setData(image)qNnInput.send(dai_frame)if syncNN:in_nn = qNn.get()else:in_nn = qNn.tryGet()if in_nn is not None:fps.next_iter()cv2.putText(frame, "Fps: {:.2f}".format(fps.fps()), (2, SHAPE - 4), cv2.FONT_HERSHEY_TRIPLEX, 0.4, color=(255, 255, 255))layers = in_nn.getAllLayers()for item in layers:print(item.name,item.dims)data_output = np.array(in_nn.getLayerFp16('output')).reshape(1, 3, 80, 80, len(labelMap)+5)data_534 = np.array(in_nn.getLayerFp16('534')).reshape(1, 3, 40, 40, len(labelMap)+5)data_554 = np.array(in_nn.getLayerFp16('554')).reshape(1, 3, 20, 20, len(labelMap)+5)outputs=[]outputs.append(data_output)outputs.append(data_534)outputs.append(data_554)# post processinput0_data = outputs[0]input1_data = outputs[1]input2_data = outputs[2]input0_data=np.transpose(input0_data, (0, 1, 4, 2,3))input1_data=np.transpose(input1_data, (0, 1, 4, 2,3))input2_data =np.transpose(input2_data, (0, 1, 4, 2,3))input0_data = input0_data.reshape([3, -1] + list(input0_data.shape[-2:]))input1_data = input1_data.reshape([3, -1] + list(input1_data.shape[-2:]))input2_data = input2_data.reshape([3, -1] + list(input2_data.shape[-2:]))print(input0_data.shape)print(input1_data.shape)print(input2_data.shape)input_data = list()input_data.append(np.transpose(input0_data, (2, 3, 0, 1)))input_data.append(np.transpose(input1_data, (2, 3, 0, 1)))input_data.append(np.transpose(input2_data, (2, 3, 0, 1)))print(np.transpose(input0_data, (2, 3, 0, 1)).shape)print(np.transpose(input1_data, (2, 3, 0, 1)).shape)print(np.transpose(input2_data, (2, 3, 0, 1)).shape)boxes, classes, scores = yolov7_post_process(input_data,ratio)if boxes is not None:draw(frame, boxes, scores, classes)cv2.imshow("rgb", frame)if cv2.waitKey(1) == ord('q'):break

测试结果

C++ oak

cmakelists.txt

cmake_minimum_required(VERSION 3.16)project(depthai)set(CMAKE_CXX_STANDARD 11)find_package(OpenCV REQUIRED)#message(STATUS ${OpenCV_INCLUDE_DIRS})#添加头文件include_directories(${OpenCV_INCLUDE_DIRS})include_directories(${CMAKE_SOURCE_DIR}/include/utility)#链接Opencv库find_package(depthai CONFIG REQUIRED)add_executable(depthai main.cpp include/utility/utility.cpp)target_link_libraries(depthai ${OpenCV_LIBS} depthai::opencv)

main.cpp

测试结果

YOLOV7_tiny在13fps左右

#include <stdio.h>#include <string>#include <iostream>#include <opencv2/core/core.hpp>#include <opencv2/highgui/highgui.hpp>#include <opencv2/imgproc/imgproc.hpp>#include "utility.hpp"#include <vector>#include "depthai/depthai.hpp"using namespace std;using namespace std::chrono;using namespace cv;typedef struct {int width;int height;} YoloSize;typedef struct {std::string name;int stride;std::vector<YoloSize> anchors;} YoloLayerData;class BoxInfo {public:int x1, y1, x2, y2, label, id;float score;};static inline float sigmoid(float x) {return static_cast<float>(1.f / (1.f + exp(-x)));}double GetIOU(cv::Rect_<float> bb_test, cv::Rect_<float> bb_gt) {float in = (bb_test & bb_gt).area();float un = bb_test.area() + bb_gt.area() - in;if (un < DBL_EPSILON)return 0;return (double) (in / un);}std::vector<BoxInfo> decode_infer(vector<float> data_pt, int stride, int net_size, int num_classes,const std::vector<YoloSize> &anchors, float threshold, int idx) {const int s[3] = {20, 40, 80};std::vector<BoxInfo> result;int batchs, channels, height, width, pred_item;batchs = 1;channels = 3;height = s[idx];width = s[idx];pred_item = 6;float data_ptr[batchs * channels * height * width * pred_item];std::cout << batchs * channels * height * width * pred_item << std::endl << data_pt.size() << std::endl;for (int i = 0; i < data_pt.size(); i++) {data_ptr[i] = data_pt[i];}std::cout << batchs << " " << channels << " " << height << " " << width << " " << pred_item << " " << std::endl;for (int i = 0; i < 20; i++) {std::cout << data_ptr[i] << " ";}std::cout << std::endl;for (int bi = 0; bi < batchs; bi++) {auto batch_ptr = data_ptr + bi * (channels * height * width * pred_item);for (int ci = 0; ci < channels; ci++) {auto channel_ptr = batch_ptr + ci * (height * width * pred_item);for (int hi = 0; hi < height; hi++) {auto height_ptr = channel_ptr + hi * (width * pred_item);for (int wi = 0; wi < width; wi++) {auto width_ptr = height_ptr + wi * pred_item;auto cls_ptr = width_ptr + 5;auto confidence = sigmoid(width_ptr[4]);for (int cls_id = 0; cls_id < num_classes; cls_id++) {float score = sigmoid(cls_ptr[cls_id]) * confidence;if (score > threshold) {float cx = (sigmoid(width_ptr[0]) * 2.f - 0.5f + wi) * (float) stride;float cy = (sigmoid(width_ptr[1]) * 2.f - 0.5f + hi) * (float) stride;float w = pow(sigmoid(width_ptr[2]) * 2.f, 2) * anchors[ci].width;float h = pow(sigmoid(width_ptr[3]) * 2.f, 2) * anchors[ci].height;BoxInfo box;box.x1 = std::max(0, std::min(net_size, int((cx - w / 2.f))));box.y1 = std::max(0, std::min(net_size, int((cy - h / 2.f))));box.x2 = std::max(0, std::min(net_size, int((cx + w / 2.f))));box.y2 = std::max(0, std::min(net_size, int((cy + h / 2.f))));box.score = score;box.label = cls_id;result.push_back(box);}}}}}}return result;}void nms(std::vector<BoxInfo> &input_boxes, float NMS_THRESH) {std::sort(input_boxes.begin(), input_boxes.end(), [](BoxInfo a, BoxInfo b) { return a.score > b.score; });std::vector<float> vArea(input_boxes.size());for (int i = 0; i < int(input_boxes.size()); ++i) {vArea[i] = (input_boxes.at(i).x2 - input_boxes.at(i).x1 + 1)* (input_boxes.at(i).y2 - input_boxes.at(i).y1 + 1);}for (int i = 0; i < int(input_boxes.size()); ++i) {for (int j = i + 1; j < int(input_boxes.size());) {float xx1 = std::max(input_boxes[i].x1, input_boxes[j].x1);float yy1 = std::max(input_boxes[i].y1, input_boxes[j].y1);float xx2 = std::min(input_boxes[i].x2, input_boxes[j].x2);float yy2 = std::min(input_boxes[i].y2, input_boxes[j].y2);float w = std::max(float(0), xx2 - xx1 + 1);float h = std::max(float(0), yy2 - yy1 + 1);float inter = w * h;float ovr = inter / (vArea[i] + vArea[j] - inter);if (ovr >= NMS_THRESH) {input_boxes.erase(input_boxes.begin() + j);vArea.erase(vArea.begin() + j);} else {j++;}}}}void scale_coords(std::vector<BoxInfo> &boxes, int w_from, int h_from, int w_to, int h_to) {float w_ratio = float(w_to) / float(w_from);float h_ratio = float(h_to) / float(h_from);for (auto &box: boxes) {box.x1 *= w_ratio;box.x2 *= w_ratio;box.y1 *= h_ratio;box.y2 *= h_ratio;}return;}cv::Mat draw_box(cv::Mat &cv_mat, std::vector<BoxInfo> &boxes, const std::vector<std::string> &labels,unsigned char colors[][3]) {for (auto box : boxes) {int width = box.x2 - box.x1;int height = box.y2 - box.y1;cv::Point p = cv::Point(box.x1, box.y1);cv::Rect rect = cv::Rect(box.x1, box.y1, width, height);cv::rectangle(cv_mat, rect, cv::Scalar(colors[box.label][0], colors[box.label][1], colors[box.label][2]));string text = labels[box.label] + ":" + std::to_string(box.score);cv::putText(cv_mat, text, p, cv::FONT_HERSHEY_PLAIN, 1,cv::Scalar(colors[box.label][0], colors[box.label][1], colors[box.label][2]));}return cv_mat;}int main(int argc, char **argv) {std::vector<YoloLayerData> yolov7_layers{{"554", 32, {{116, 90}, {156, 198}, {373, 326}}},{"534", 16, {{30, 61}, {62, 45}, {59, 119}}},{"output", 8, {{12, 16}, {16, 30}, {33, 23}}},};vector<float> origin_rect_cof;std::vector<YoloLayerData> &layers = yolov7_layers;float threshold = 0.4;float nms_threshold = 0.5;int i = 0;std::vector<std::string> labels = {"person"};unsigned char colors[][3] = {{0, 255, 0}};int target_width = 640;int target_height = 640;dai::Pipeline pipeline;auto camRgb = pipeline.create<dai::node::ColorCamera>();camRgb->setPreviewSize(640, 640);camRgb->setBoardSocket(dai::CameraBoardSocket::RGB);camRgb->setResolution(dai::ColorCameraProperties::SensorResolution::THE_1080_P);camRgb->setInterleaved(false);camRgb->setColorOrder(dai::ColorCameraProperties::ColorOrder::RGB);camRgb->setFps(60);//定义//auto cam = pipeline.create<dai::node::XLinkIn>();//camRgb->setStreamName("inFrame");auto net = pipeline.create<dai::node::NeuralNetwork>();dai::OpenVINO::Blob blob("/home/ubuntu/experiment_yolov7/exp7_YOLOV7Tiny_rknn/weights/F16_OAK/best.blob");net->setBlob(blob);net->input.setBlocking(false);//基本熟练明白oak的函数使用了camRgb->preview.link(net->input);//定义输出auto xlinkParserOut = pipeline.create<dai::node::XLinkOut>();xlinkParserOut->setStreamName("parseOut");auto xlinkoutOut = pipeline.create<dai::node::XLinkOut>();xlinkoutOut->setStreamName("out");auto xlinkoutpassthroughOut = pipeline.create<dai::node::XLinkOut>();xlinkoutpassthroughOut->setStreamName("passthrough");net->out.link(xlinkParserOut->input);net->passthrough.link(xlinkoutpassthroughOut->input);//结构推送相机dai::Device device(pipeline);//取帧显示// auto inqueue = device.getInputQueue("inFrame");//maxsize 代表缓冲数据auto detqueue = device.getOutputQueue("parseOut", 8, false);//maxsize 代表缓冲数据auto passthrough = device.getOutputQueue("passthrough", 8, false);//maxsize 代表缓冲数据bool printOutputLayersOnce = true;// while(true) {// cv::Mat frame=cv::imread("/home/ubuntu/yolov7/inference/images/bus.jpg");// if(frame.empty()) break;// cv::Mat bgr;// cv::cvtColor(frame,bgr,cv::COLOR_BGR2RGB);// cv::resize(bgr,bgr,cv::Size(target_width,target_height));// auto img = std::make_shared<dai::ImgFrame>();bgr = resizeKeepAspectRatio(bgr, cv::Size(target_height, target_width), cv::Scalar(0));// toPlanar(bgr, img->getData());// img->setTimestamp(steady_clock::now());// img->setWidth(target_height);// img->setHeight(target_width);// inqueue->send(img);//// auto inNN = detqueue->get<dai::NNData>();// if( printOutputLayersOnce&&inNN) {// std::cout << "Output layer names: ";// for(const auto& ten : inNN->getAllLayerNames()) {//std::cout << ten << ", ";// }// std::cout << std::endl;// printOutputLayersOnce = false;// }//// std::vector<BoxInfo> result;// std::vector<BoxInfo> boxes;// auto output=inNN->getLayerFp16(inNN->getAllLayerNames()[2]);// boxes = decode_infer(output, layers[2].stride, 640, labels.size(), layers[2].anchors, threshold,2);// result.insert(result.begin(), boxes.begin(), boxes.end());////// auto output_534=inNN->getLayerFp16(inNN->getAllLayerNames()[0]);// boxes = decode_infer(output_534, layers[1].stride, 640, labels.size(), layers[1].anchors, threshold,1);// result.insert(result.begin(), boxes.begin(), boxes.end());//// auto output_554=inNN->getLayerFp16(inNN->getAllLayerNames()[1]);// boxes = decode_infer(output_554, layers[0].stride, 640, labels.size(), layers[0].anchors, threshold,0);// result.insert(result.begin(), boxes.begin(), boxes.end());// nms(result, nms_threshold);// scale_coords(result, 640, 640, frame.cols, frame.rows);// cv::Mat frame_show = draw_box(frame, result, labels,colors);//// cv::imshow("demo", frame_show);// cv::imwrite("demo.jpg",frame_show);// int key = cv::waitKey(1);// if(key == 'q' || key == 'Q') return 0;// }auto startTime = steady_clock::now();int counter = 0;float fps = 0;while (true) {auto ImgFrame = passthrough->get<dai::ImgFrame>();auto frame = ImgFrame->getCvFrame();auto inNN = detqueue->get<dai::NNData>();if (printOutputLayersOnce && inNN) {std::cout << "Output layer names: ";for (const auto &ten : inNN->getAllLayerNames()) {std::cout << ten << ", ";}std::cout << std::endl;printOutputLayersOnce = false;}std::vector<BoxInfo> result;std::vector<BoxInfo> boxes;auto output = inNN->getLayerFp16(inNN->getAllLayerNames()[2]);boxes = decode_infer(output, layers[2].stride, 640, labels.size(), layers[2].anchors, threshold, 2);result.insert(result.begin(), boxes.begin(), boxes.end());auto output_534 = inNN->getLayerFp16(inNN->getAllLayerNames()[0]);boxes = decode_infer(output_534, layers[1].stride, 640, labels.size(), layers[1].anchors, threshold, 1);result.insert(result.begin(), boxes.begin(), boxes.end());auto output_554 = inNN->getLayerFp16(inNN->getAllLayerNames()[1]);boxes = decode_infer(output_554, layers[0].stride, 640, labels.size(), layers[0].anchors, threshold, 0);result.insert(result.begin(), boxes.begin(), boxes.end());nms(result, nms_threshold);scale_coords(result, 640, 640, frame.cols, frame.rows);cv::Mat frame_show = draw_box(frame, result, labels, colors);counter++;auto currentTime = steady_clock::now();auto elapsed = duration_cast<duration<float>>(currentTime - startTime);if (elapsed > seconds(1)) {fps = counter / elapsed.count();counter = 0;startTime = currentTime;}std::stringstream fpsStr;fpsStr << "NN fps: " << std::fixed << std::setprecision(2) << fps;cv::putText(frame, fpsStr.str(), cv::Point(2, frame_show.rows - 4), cv::FONT_HERSHEY_TRIPLEX, 0.4,cv::Scalar(255, 255, 255));cv::imshow("demo", frame_show);int key = cv::waitKey(1);if (key == 'q' || key == 'Q') return 0;cv::waitKey(1);}return 0;}

＆＆改了一版本yolov7_tiny检测人头的和深度测定版本

#include <stdio.h>#include <string>#include <iostream>#include <opencv2/core/core.hpp>#include <opencv2/highgui/highgui.hpp>#include <opencv2/imgproc/imgproc.hpp>#include "utility.hpp"#include <vector>#include "depthai/depthai.hpp"static std::atomic<bool> newConfig{false};using namespace std;using namespace std::chrono;using namespace cv;typedef struct {int width;int height;} YoloSize;typedef struct {std::string name;int stride;std::vector<YoloSize> anchors;} YoloLayerData;class BoxInfo {public:int x1, y1, x2, y2, label, id;float score;};static inline float sigmoid(float x) {return static_cast<float>(1.f / (1.f + exp(-x)));}double GetIOU(cv::Rect_<float> bb_test, cv::Rect_<float> bb_gt) {float in = (bb_test & bb_gt).area();float un = bb_test.area() + bb_gt.area() - in;if (un < DBL_EPSILON)return 0;return (double) (in / un);}std::vector<BoxInfo> decode_infer(vector<float> data_pt, int stride, int net_size, int num_classes,const std::vector<YoloSize> &anchors, float threshold, int idx) {const int s[3] = {20, 40, 80};std::vector<BoxInfo> result;int batchs, channels, height, width, pred_item;batchs = 1;channels = 3;height = s[idx];width = s[idx];pred_item = 6;float data_ptr[batchs * channels * height * width * pred_item];//std::cout << batchs * channels * height * width * pred_item << std::endl << data_pt.size() << std::endl;for (int i = 0; i < data_pt.size(); i++) {data_ptr[i] = data_pt[i];}//std::cout << batchs << " " << channels << " " << height << " " << width << " " << pred_item << " " << std::endl;// for (int i = 0; i < 20; i++) {// std::cout << data_ptr[i] << " ";// }std::cout << std::endl;for (int bi = 0; bi < batchs; bi++) {auto batch_ptr = data_ptr + bi * (channels * height * width * pred_item);for (int ci = 0; ci < channels; ci++) {auto channel_ptr = batch_ptr + ci * (height * width * pred_item);for (int hi = 0; hi < height; hi++) {auto height_ptr = channel_ptr + hi * (width * pred_item);for (int wi = 0; wi < width; wi++) {auto width_ptr = height_ptr + wi * pred_item;auto cls_ptr = width_ptr + 5;auto confidence = sigmoid(width_ptr[4]);for (int cls_id = 0; cls_id < num_classes; cls_id++) {float score = sigmoid(cls_ptr[cls_id]) * confidence;if (score > threshold) {float cx = (sigmoid(width_ptr[0]) * 2.f - 0.5f + wi) * (float) stride;float cy = (sigmoid(width_ptr[1]) * 2.f - 0.5f + hi) * (float) stride;float w = pow(sigmoid(width_ptr[2]) * 2.f, 2) * anchors[ci].width;float h = pow(sigmoid(width_ptr[3]) * 2.f, 2) * anchors[ci].height;BoxInfo box;box.x1 = std::max(0, std::min(net_size, int((cx - w / 2.f))));box.y1 = std::max(0, std::min(net_size, int((cy - h / 2.f))));box.x2 = std::max(0, std::min(net_size, int((cx + w / 2.f))));box.y2 = std::max(0, std::min(net_size, int((cy + h / 2.f))));box.score = score;box.label = cls_id;result.push_back(box);}}}}}}return result;}void nms(std::vector<BoxInfo> &input_boxes, float NMS_THRESH) {std::sort(input_boxes.begin(), input_boxes.end(), [](BoxInfo a, BoxInfo b) { return a.score > b.score; });std::vector<float> vArea(input_boxes.size());for (int i = 0; i < int(input_boxes.size()); ++i) {vArea[i] = (input_boxes.at(i).x2 - input_boxes.at(i).x1 + 1)* (input_boxes.at(i).y2 - input_boxes.at(i).y1 + 1);}for (int i = 0; i < int(input_boxes.size()); ++i) {for (int j = i + 1; j < int(input_boxes.size());) {float xx1 = std::max(input_boxes[i].x1, input_boxes[j].x1);float yy1 = std::max(input_boxes[i].y1, input_boxes[j].y1);float xx2 = std::min(input_boxes[i].x2, input_boxes[j].x2);float yy2 = std::min(input_boxes[i].y2, input_boxes[j].y2);float w = std::max(float(0), xx2 - xx1 + 1);float h = std::max(float(0), yy2 - yy1 + 1);float inter = w * h;float ovr = inter / (vArea[i] + vArea[j] - inter);if (ovr >= NMS_THRESH) {input_boxes.erase(input_boxes.begin() + j);vArea.erase(vArea.begin() + j);} else {j++;}}}}void scale_coords(std::vector<BoxInfo> &boxes, int w_from, int h_from, int w_to, int h_to) {float w_ratio = float(w_to) / float(w_from);float h_ratio = float(h_to) / float(h_from);for (auto &box: boxes) {box.x1 *= w_ratio;box.x2 *= w_ratio;box.y1 *= h_ratio;box.y2 *= h_ratio;}return;}cv::Mat draw_box(cv::Mat &cv_mat, std::vector<BoxInfo> &boxes, const std::vector<std::string> &labels,unsigned char colors[][3]) {for (auto box : boxes) {int width = box.x2 - box.x1;int height = box.y2 - box.y1;cv::Point p = cv::Point(box.x1, box.y1);cv::Rect rect = cv::Rect(box.x1, box.y1, width, height);cv::rectangle(cv_mat, rect, cv::Scalar(colors[box.label][0], colors[box.label][1], colors[box.label][2]));string text = labels[box.label] + ":" + std::to_string(box.score);cv::putText(cv_mat, text, p, cv::FONT_HERSHEY_PLAIN, 1,cv::Scalar(colors[box.label][0], colors[box.label][1], colors[box.label][2]));}return cv_mat;}int main(int argc, char **argv) {std::vector<YoloLayerData> yolov7_layers{{"554", 32, {{116, 90}, {156, 198}, {373, 326}}},{"534", 16, {{30, 61}, {62, 45}, {59, 119}}},{"output", 8, {{12, 16}, {16, 30}, {33, 23}}},};vector<float> origin_rect_cof;std::vector<YoloLayerData> &layers = yolov7_layers;float threshold = 0.4;float nms_threshold = 0.5;int i = 0;std::vector<std::string> labels = {"head"};unsigned char colors[][3] = {{0, 255, 0}};float target_width = 640;float target_height = 640;dai::Pipeline pipeline;auto camRgb = pipeline.create<dai::node::ColorCamera>();auto rgbOut=pipeline.create<dai::node::XLinkOut>();rgbOut->setStreamName("rgbOut");camRgb->video.link(rgbOut->input);camRgb->setPreviewSize(640, 640);camRgb->setBoardSocket(dai::CameraBoardSocket::RGB);camRgb->setResolution(dai::ColorCameraProperties::SensorResolution::THE_1080_P);camRgb->setInterleaved(false);camRgb->setColorOrder(dai::ColorCameraProperties::ColorOrder::RGB);camRgb->setFps(60);camRgb->setPreviewKeepAspectRatio(false);auto monoLeft = pipeline.create<dai::node::MonoCamera>();auto monoRight = pipeline.create<dai::node::MonoCamera>();auto stereo = pipeline.create<dai::node::StereoDepth>();auto spatialDataCalculator = pipeline.create<dai::node::SpatialLocationCalculator>();//定义//auto cam = pipeline.create<dai::node::XLinkIn>();//camRgb->setStreamName("inFrame");auto net = pipeline.create<dai::node::NeuralNetwork>();dai::OpenVINO::Blob blob("../best.blob");net->setBlob(blob);net->input.setBlocking(false);//基本熟练明白oak的函数使用了camRgb->preview.link(net->input);auto xoutDepth = pipeline.create<dai::node::XLinkOut>();auto xoutSpatialData = pipeline.create<dai::node::XLinkOut>();auto xinSpatialCalcConfig = pipeline.create<dai::node::XLinkIn>();xoutDepth->setStreamName("depth");xoutSpatialData->setStreamName("spatialData");xinSpatialCalcConfig->setStreamName("spatialCalcConfig");monoLeft->setResolution(dai::MonoCameraProperties::SensorResolution::THE_720_P);monoLeft->setBoardSocket(dai::CameraBoardSocket::LEFT);monoRight->setResolution(dai::MonoCameraProperties::SensorResolution::THE_720_P);monoRight->setBoardSocket(dai::CameraBoardSocket::RIGHT);stereo->setDefaultProfilePreset(dai::node::StereoDepth::PresetMode::HIGH_DENSITY);stereo->setLeftRightCheck(true);stereo->setDepthAlign(dai::CameraBoardSocket::RGB);stereo->setExtendedDisparity(true);// LR-check is required for depth alignmentdai::Point2f topLeft(0.4f, 0.4f);dai::Point2f bottomRight(0.6f, 0.6f);dai::SpatialLocationCalculatorConfigData config;config.depthThresholds.lowerThreshold = 100;config.depthThresholds.upperThreshold = 10000;config.roi = dai::Rect(topLeft, bottomRight);spatialDataCalculator->inputConfig.setWaitForMessage(false);spatialDataCalculator->initialConfig.addROI(config);// LinkingmonoLeft->out.link(stereo->left);monoRight->out.link(stereo->right);spatialDataCalculator->passthroughDepth.link(xoutDepth->input);stereo->depth.link(spatialDataCalculator->inputDepth);spatialDataCalculator->out.link(xoutSpatialData->input);xinSpatialCalcConfig->out.link(spatialDataCalculator->inputConfig);//定义输出auto xlinkParserOut = pipeline.create<dai::node::XLinkOut>();xlinkParserOut->setStreamName("parseOut");auto xlinkoutOut = pipeline.create<dai::node::XLinkOut>();xlinkoutOut->setStreamName("out");auto xlinkoutpassthroughOut = pipeline.create<dai::node::XLinkOut>();xlinkoutpassthroughOut->setStreamName("passthrough");net->out.link(xlinkParserOut->input);net->passthrough.link(xlinkoutpassthroughOut->input);//结构推送相机dai::Device device(pipeline);//取帧显示// auto inqueue = device.getInputQueue("inFrame");//maxsize 代表缓冲数据auto detqueue = device.getOutputQueue("parseOut", 8, false);//maxsize 代表缓冲数据auto passthrough = device.getOutputQueue("passthrough", 8, false);//maxsize 代表缓冲数据auto depthQueue = device.getOutputQueue("depth", 8, false);auto spatialCalcQueue = device.getOutputQueue("spatialData", 8, false);auto spatialCalcConfigInQueue = device.getInputQueue("spatialCalcConfig");auto videoqueue=device.getOutputQueue("rgbOut",8,false);bool printOutputLayersOnce = true;auto color = cv::Scalar(0, 255, 0);auto startTime = steady_clock::now();int counter = 0;float fps = 0;while (true) {auto inDepth = depthQueue->get<dai::ImgFrame>();auto ImgFrame = videoqueue->get<dai::ImgFrame>();// auto ImgFrame = passthrough->get<dai::ImgFrame>();auto frame = ImgFrame->getCvFrame();target_height=frame.rows*1.0;target_width=frame.cols*1.0;cv::Mat depthFrameColor;cv::Mat depthFrame = inDepth->getFrame(); // depthFrame values are in millimeterscv::normalize(depthFrame, depthFrameColor, 255, 0, cv::NORM_INF, CV_8UC1);cv::equalizeHist(depthFrameColor, depthFrameColor);cv::applyColorMap(depthFrameColor, depthFrameColor, cv::COLORMAP_HOT);auto inNN = detqueue->get<dai::NNData>();if (printOutputLayersOnce && inNN) {std::cout << "Output layer names: ";for (const auto &ten : inNN->getAllLayerNames()) {std::cout << ten << ", ";}std::cout << std::endl;printOutputLayersOnce = false;}std::vector<BoxInfo> result;std::vector<BoxInfo> boxes;auto output = inNN->getLayerFp16(inNN->getAllLayerNames()[2]);boxes = decode_infer(output, layers[2].stride, 640, labels.size(), layers[2].anchors, threshold, 2);result.insert(result.begin(), boxes.begin(), boxes.end());auto output_534 = inNN->getLayerFp16(inNN->getAllLayerNames()[0]);boxes = decode_infer(output_534, layers[1].stride, 640, labels.size(), layers[1].anchors, threshold, 1);result.insert(result.begin(), boxes.begin(), boxes.end());auto output_554 = inNN->getLayerFp16(inNN->getAllLayerNames()[1]);boxes = decode_infer(output_554, layers[0].stride, 640, labels.size(), layers[0].anchors, threshold, 0);result.insert(result.begin(), boxes.begin(), boxes.end());nms(result, nms_threshold);scale_coords(result, 640, 640, frame.cols, frame.rows);cv::Mat frame_show = draw_box(frame, result, labels, colors);counter++;auto currentTime = steady_clock::now();auto elapsed = duration_cast<duration<float>>(currentTime - startTime);if (elapsed > seconds(1)) {fps = counter / elapsed.count();counter = 0;startTime = currentTime;}std::stringstream fpsStr;fpsStr << "NN fps: " << std::fixed << std::setprecision(2) << fps;printf("fps %f\n",fps);cv::putText(frame, fpsStr.str(), cv::Point(2, frame_show.rows - 4), cv::FONT_HERSHEY_TRIPLEX, 0.4,cv::Scalar(0, 255, 0));for(auto &item:result){topLeft.x = item.x1 * depthFrameColor.cols / target_width / depthFrameColor.cols;topLeft.y = item.y1 * depthFrameColor.rows / target_height / depthFrame.rows;bottomRight.x = ( item.x2 * depthFrameColor.cols / target_width) /depthFrameColor.cols;bottomRight.y = ( item.y2 * depthFrameColor.rows / target_height ) /depthFrameColor.rows;//std::cout<<topLeft.x<<" "<<topLeft.y<<" "<<bottomRight.x<<" "<<bottomRight.y<<" "<<std::endl;config.roi = dai::Rect(topLeft, bottomRight);dai::SpatialLocationCalculatorConfig cfg;cfg.addROI(config);spatialCalcConfigInQueue->send(cfg);cv::Mat depthFrame = inDepth->getFrame(); // depthFrame values are in millimeterscv::Mat depthFrameColor;cv::normalize(depthFrame, depthFrameColor, 255, 0, cv::NORM_INF, CV_8UC1);cv::equalizeHist(depthFrameColor, depthFrameColor);cv::applyColorMap(depthFrameColor, depthFrameColor, cv::COLORMAP_HOT);auto spatialData = spatialCalcQueue->get<dai::SpatialLocationCalculatorData>()->getSpatialLocations();for(auto depthData : spatialData) {auto roi = depthData.config.roi;roi = roi.denormalize(depthFrameColor.cols, depthFrameColor.rows);auto xmin = (int)roi.topLeft().x;auto ymin = (int)roi.topLeft().y;auto xmax = (int)roi.bottomRight().x;auto ymax = (int)roi.bottomRight().y;auto depthMin = depthData.depthMin;auto depthMax = depthData.depthMax;auto coords = depthData.spatialCoordinates;auto distance = std::sqrt(coords.x * coords.x + coords.y * coords.y + coords.z * coords.z);auto fontType = cv::FONT_HERSHEY_TRIPLEX;std::stringstream depthDistance;depthDistance.precision(2);depthDistance << fixed << static_cast<float>(distance / 1000.0f) << "m";cv::putText(depthFrameColor, depthDistance.str(), cv::Point(item.x1 + 10, item.y1 + 70), fontType, 0.5, color);std::stringstream rgbDistance;rgbDistance.precision(2);rgbDistance << fixed << static_cast<float>(distance / 1000.0f) << "m";cv::putText(frame, rgbDistance.str(), cv::Point(item.x1 + 10, item.y1 + 70), fontType, 0.5, color);cv::rectangle(frame, cv::Rect(cv::Point(item.x1, item.y1), cv::Point(item.x2, item.y2)), color, cv::FONT_HERSHEY_SIMPLEX);std::stringstream rgb_depthX,depthX;rgb_depthX << "X: " << (int)depthData.spatialCoordinates.x << " mm";cv::putText(frame, rgb_depthX.str(), cv::Point(item.x1 + 10, item.y1 + 20), cv::FONT_HERSHEY_TRIPLEX, 0.5, color);std::stringstream rgb_depthY,depthY;rgb_depthY << "Y: " << (int)depthData.spatialCoordinates.y << " mm";cv::putText(frame, rgb_depthY.str(), cv::Point(item.x1 + 10, item.y1 + 35), cv::FONT_HERSHEY_TRIPLEX, 0.5, color);std::stringstream rgb_depthZ,depthZ;rgb_depthZ << "Z: " << (int)depthData.spatialCoordinates.z << " mm";cv::putText(frame, rgb_depthZ.str(), cv::Point(item.x1 + 10, item.y1 + 50), cv::FONT_HERSHEY_TRIPLEX, 0.5, color);cv::rectangle(depthFrameColor, cv::Rect(cv::Point(xmin, ymin), cv::Point(xmax, ymax)), color, cv::FONT_HERSHEY_SIMPLEX);depthX << "X: " << (int)depthData.spatialCoordinates.x << " mm";cv::putText(depthFrameColor, depthX.str(), cv::Point(xmin + 10, ymin + 20), cv::FONT_HERSHEY_TRIPLEX, 0.5, color);depthY << "Y: " << (int)depthData.spatialCoordinates.y << " mm";cv::putText(depthFrameColor, depthY.str(), cv::Point(xmin+ 10, ymin + 35), cv::FONT_HERSHEY_TRIPLEX, 0.5, color);depthZ << "Z: " << (int)depthData.spatialCoordinates.z << " mm";cv::putText(depthFrameColor, depthZ.str(), cv::Point(xmin + 10, ymin + 50), cv::FONT_HERSHEY_TRIPLEX, 0.5, color);}// Show the framecv::imshow("depth", depthFrameColor);cv::imwrite("depth.jpg",depthFrameColor);}cv::imshow("demo", frame_show);cv::imwrite("demo.jpg",frame_show);int key = cv::waitKey(1);if (key == 'q' || key == 'Q') return 0;cv::waitKey(1);}return 0;}

测试结果

测试模型链接: /s/1Fapu0m68M7u_45XxLl7IyQ?pwd=g3f9 提取码: g3f9

就不放了1080p的人头检测和深度图显示

杯子检测的代码链接: /s/10SuFJ5dVAYCF4em8aUSlDw?pwd=4va5 提取码: 4va5

#include <stdio.h>#include <string>#include <iostream>#include <opencv2/core/core.hpp>#include <opencv2/highgui/highgui.hpp>#include <opencv2/imgproc/imgproc.hpp>#include "utility.hpp"#include <vector>#include "depthai/depthai.hpp"static std::atomic<bool> newConfig{false};using namespace std;using namespace std::chrono;using namespace cv;typedef struct {int width;int height;} YoloSize;typedef struct {std::string name;int stride;std::vector<YoloSize> anchors;} YoloLayerData;class BoxInfo {public:int x1, y1, x2, y2, label, id;float score;};static inline float sigmoid(float x) {return static_cast<float>(1.f / (1.f + exp(-x)));}double GetIOU(cv::Rect_<float> bb_test, cv::Rect_<float> bb_gt) {float in = (bb_test & bb_gt).area();float un = bb_test.area() + bb_gt.area() - in;if (un < DBL_EPSILON)return 0;return (double) (in / un);}std::vector<BoxInfo> decode_infer(vector<float> data_pt, int stride, int net_size, int num_classes,const std::vector<YoloSize> &anchors, float threshold, int idx) {const int s[3] = {20, 40, 80};std::vector<BoxInfo> result;int batchs, channels, height, width, pred_item;batchs = 1;channels = 3;height = s[idx];width = s[idx];pred_item = 6;float data_ptr[batchs * channels * height * width * pred_item];//std::cout << batchs * channels * height * width * pred_item << std::endl << data_pt.size() << std::endl;for (int i = 0; i < data_pt.size(); i++) {data_ptr[i] = data_pt[i];}//std::cout << batchs << " " << channels << " " << height << " " << width << " " << pred_item << " " << std::endl;// for (int i = 0; i < 20; i++) {// std::cout << data_ptr[i] << " ";// }std::cout << std::endl;for (int bi = 0; bi < batchs; bi++) {auto batch_ptr = data_ptr + bi * (channels * height * width * pred_item);for (int ci = 0; ci < channels; ci++) {auto channel_ptr = batch_ptr + ci * (height * width * pred_item);for (int hi = 0; hi < height; hi++) {auto height_ptr = channel_ptr + hi * (width * pred_item);for (int wi = 0; wi < width; wi++) {auto width_ptr = height_ptr + wi * pred_item;auto cls_ptr = width_ptr + 5;auto confidence = sigmoid(width_ptr[4]);for (int cls_id = 0; cls_id < num_classes; cls_id++) {float score = sigmoid(cls_ptr[cls_id]) * confidence;if (score > threshold) {float cx = (sigmoid(width_ptr[0]) * 2.f - 0.5f + wi) * (float) stride;float cy = (sigmoid(width_ptr[1]) * 2.f - 0.5f + hi) * (float) stride;float w = pow(sigmoid(width_ptr[2]) * 2.f, 2) * anchors[ci].width;float h = pow(sigmoid(width_ptr[3]) * 2.f, 2) * anchors[ci].height;BoxInfo box;box.x1 = std::max(0, std::min(net_size, int((cx - w / 2.f))));box.y1 = std::max(0, std::min(net_size, int((cy - h / 2.f))));box.x2 = std::max(0, std::min(net_size, int((cx + w / 2.f))));box.y2 = std::max(0, std::min(net_size, int((cy + h / 2.f))));box.score = score;box.label = cls_id;result.push_back(box);}}}}}}return result;}void nms(std::vector<BoxInfo> &input_boxes, float NMS_THRESH) {std::sort(input_boxes.begin(), input_boxes.end(), [](BoxInfo a, BoxInfo b) { return a.score > b.score; });std::vector<float> vArea(input_boxes.size());for (int i = 0; i < int(input_boxes.size()); ++i) {vArea[i] = (input_boxes.at(i).x2 - input_boxes.at(i).x1 + 1)* (input_boxes.at(i).y2 - input_boxes.at(i).y1 + 1);}for (int i = 0; i < int(input_boxes.size()); ++i) {for (int j = i + 1; j < int(input_boxes.size());) {float xx1 = std::max(input_boxes[i].x1, input_boxes[j].x1);float yy1 = std::max(input_boxes[i].y1, input_boxes[j].y1);float xx2 = std::min(input_boxes[i].x2, input_boxes[j].x2);float yy2 = std::min(input_boxes[i].y2, input_boxes[j].y2);float w = std::max(float(0), xx2 - xx1 + 1);float h = std::max(float(0), yy2 - yy1 + 1);float inter = w * h;float ovr = inter / (vArea[i] + vArea[j] - inter);if (ovr >= NMS_THRESH) {input_boxes.erase(input_boxes.begin() + j);vArea.erase(vArea.begin() + j);} else {j++;}}}}void scale_coords(std::vector<BoxInfo> &boxes, int w_from, int h_from, int w_to, int h_to) {float w_ratio = float(w_to) / float(w_from);float h_ratio = float(h_to) / float(h_from);for (auto &box: boxes) {box.x1 *= w_ratio;box.x2 *= w_ratio;box.y1 *= h_ratio;box.y2 *= h_ratio;}return;}cv::Mat draw_box(cv::Mat &cv_mat, std::vector<BoxInfo> &boxes, const std::vector<std::string> &labels,unsigned char colors[][3]) {for (auto box : boxes) {int width = box.x2 - box.x1;int height = box.y2 - box.y1;cv::Point p = cv::Point(box.x1, box.y1);cv::Rect rect = cv::Rect(box.x1, box.y1, width, height);cv::rectangle(cv_mat, rect, cv::Scalar(colors[box.label][0], colors[box.label][1], colors[box.label][2]));string text = labels[box.label] + ":" + std::to_string(box.score);cv::putText(cv_mat, text, p, cv::FONT_HERSHEY_PLAIN, 1,cv::Scalar(colors[box.label][0], colors[box.label][1], colors[box.label][2]));}return cv_mat;}int main(int argc, char **argv) {std::vector<YoloLayerData> yolov7_layers{{"324", 32, {{116, 90}, {156, 198}, {373, 326}}},{"304", 16, {{30, 61}, {62, 45}, {59, 119}}},{"output", 8, {{12, 16}, {16, 30}, {33, 23}}},};vector<float> origin_rect_cof;std::vector<YoloLayerData> &layers = yolov7_layers;float threshold = 0.4;float nms_threshold = 0.5;int i = 0;std::vector<std::string> labels = {"cup"};unsigned char colors[][3] = {{0, 255, 0}};float target_width = 640;float target_height = 640;dai::Pipeline pipeline;auto camRgb = pipeline.create<dai::node::ColorCamera>();auto rgbOut=pipeline.create<dai::node::XLinkOut>();rgbOut->setStreamName("rgbOut");camRgb->video.link(rgbOut->input);camRgb->setPreviewSize(640, 640);camRgb->setBoardSocket(dai::CameraBoardSocket::RGB);camRgb->setResolution(dai::ColorCameraProperties::SensorResolution::THE_1080_P);camRgb->setInterleaved(false);camRgb->setColorOrder(dai::ColorCameraProperties::ColorOrder::RGB);camRgb->setFps(60);camRgb->setPreviewKeepAspectRatio(false);auto monoLeft = pipeline.create<dai::node::MonoCamera>();auto monoRight = pipeline.create<dai::node::MonoCamera>();auto stereo = pipeline.create<dai::node::StereoDepth>();auto spatialDataCalculator = pipeline.create<dai::node::SpatialLocationCalculator>();//定义//auto cam = pipeline.create<dai::node::XLinkIn>();//camRgb->setStreamName("inFrame");auto net = pipeline.create<dai::node::NeuralNetwork>();dai::OpenVINO::Blob blob("../model_300_300/best_cup.blob");net->setBlob(blob);net->input.setBlocking(false);//基本熟练明白oak的函数使用了camRgb->preview.link(net->input);auto xoutDepth = pipeline.create<dai::node::XLinkOut>();auto xoutSpatialData = pipeline.create<dai::node::XLinkOut>();auto xinSpatialCalcConfig = pipeline.create<dai::node::XLinkIn>();xoutDepth->setStreamName("depth");xoutSpatialData->setStreamName("spatialData");xinSpatialCalcConfig->setStreamName("spatialCalcConfig");monoLeft->setResolution(dai::MonoCameraProperties::SensorResolution::THE_720_P);monoLeft->setBoardSocket(dai::CameraBoardSocket::LEFT);monoRight->setResolution(dai::MonoCameraProperties::SensorResolution::THE_720_P);monoRight->setBoardSocket(dai::CameraBoardSocket::RIGHT);stereo->setDefaultProfilePreset(dai::node::StereoDepth::PresetMode::HIGH_ACCURACY);stereo->setLeftRightCheck(true);stereo->setDepthAlign(dai::CameraBoardSocket::RGB);stereo->setExtendedDisparity(true);// LR-check is required for depth alignmentdai::Point2f topLeft(0.4f, 0.4f);dai::Point2f bottomRight(0.6f, 0.6f);dai::SpatialLocationCalculatorConfigData config;config.depthThresholds.lowerThreshold = 100;config.depthThresholds.upperThreshold = 10000;config.roi = dai::Rect(topLeft, bottomRight);spatialDataCalculator->inputConfig.setWaitForMessage(false);spatialDataCalculator->initialConfig.addROI(config);// LinkingmonoLeft->out.link(stereo->left);monoRight->out.link(stereo->right);spatialDataCalculator->passthroughDepth.link(xoutDepth->input);stereo->depth.link(spatialDataCalculator->inputDepth);spatialDataCalculator->out.link(xoutSpatialData->input);xinSpatialCalcConfig->out.link(spatialDataCalculator->inputConfig);//定义输出auto xlinkParserOut = pipeline.create<dai::node::XLinkOut>();xlinkParserOut->setStreamName("parseOut");auto xlinkoutOut = pipeline.create<dai::node::XLinkOut>();xlinkoutOut->setStreamName("out");auto xlinkoutpassthroughOut = pipeline.create<dai::node::XLinkOut>();xlinkoutpassthroughOut->setStreamName("passthrough");net->out.link(xlinkParserOut->input);net->passthrough.link(xlinkoutpassthroughOut->input);//结构推送相机dai::Device device(pipeline);//取帧显示// auto inqueue = device.getInputQueue("inFrame");//maxsize 代表缓冲数据auto detqueue = device.getOutputQueue("parseOut", 8, false);//maxsize 代表缓冲数据auto passthrough = device.getOutputQueue("passthrough", 8, false);//maxsize 代表缓冲数据auto depthQueue = device.getOutputQueue("depth", 8, false);auto spatialCalcQueue = device.getOutputQueue("spatialData", 8, false);auto spatialCalcConfigInQueue = device.getInputQueue("spatialCalcConfig");auto videoqueue=device.getOutputQueue("rgbOut",8,false);bool printOutputLayersOnce = true;auto color = cv::Scalar(0, 255, 0);auto startTime = steady_clock::now();int counter = 0;float fps = 0;while (true) {auto inDepth = depthQueue->get<dai::ImgFrame>();auto ImgFrame = videoqueue->get<dai::ImgFrame>();// auto ImgFrame = passthrough->get<dai::ImgFrame>();auto frame = ImgFrame->getCvFrame();target_height=frame.rows*1.0;target_width=frame.cols*1.0;cv::Mat depthFrameColor;cv::Mat depthFrame = inDepth->getFrame(); // depthFrame values are in millimeterscv::normalize(depthFrame, depthFrameColor, 255, 0, cv::NORM_INF, CV_8UC1);cv::equalizeHist(depthFrameColor, depthFrameColor);cv::applyColorMap(depthFrameColor, depthFrameColor, cv::COLORMAP_HOT);auto inNN = detqueue->get<dai::NNData>();if (printOutputLayersOnce && inNN) {std::cout << "Output layer names: ";for (const auto &ten : inNN->getAllLayerNames()) {std::cout << ten << ", ";}std::cout << std::endl;printOutputLayersOnce = false;}std::vector<BoxInfo> result;std::vector<BoxInfo> boxes;auto output = inNN->getLayerFp16(inNN->getAllLayerNames()[2]);boxes = decode_infer(output, layers[2].stride, 640, labels.size(), layers[2].anchors, threshold, 2);result.insert(result.begin(), boxes.begin(), boxes.end());auto output_534 = inNN->getLayerFp16(inNN->getAllLayerNames()[0]);boxes = decode_infer(output_534, layers[1].stride, 640, labels.size(), layers[1].anchors, threshold, 1);result.insert(result.begin(), boxes.begin(), boxes.end());auto output_554 = inNN->getLayerFp16(inNN->getAllLayerNames()[1]);boxes = decode_infer(output_554, layers[0].stride, 640, labels.size(), layers[0].anchors, threshold, 0);result.insert(result.begin(), boxes.begin(), boxes.end());nms(result, nms_threshold);scale_coords(result, 640, 640, frame.cols, frame.rows);cv::Mat frame_show = draw_box(frame, result, labels, colors);counter++;auto currentTime = steady_clock::now();auto elapsed = duration_cast<duration<float>>(currentTime - startTime);if (elapsed > seconds(1)) {fps = counter / elapsed.count();counter = 0;startTime = currentTime;}std::stringstream fpsStr;fpsStr << "NN fps: " << std::fixed << std::setprecision(2) << fps;printf("fps %f\n",fps);cv::putText(frame, fpsStr.str(), cv::Point(2, frame_show.rows - 4), cv::FONT_HERSHEY_TRIPLEX, 0.4,cv::Scalar(0, 255, 0));for(auto &item:result){topLeft.x = item.x1 * depthFrameColor.cols / target_width / depthFrameColor.cols;topLeft.y = item.y1 * depthFrameColor.rows / target_height / depthFrame.rows;bottomRight.x = ( item.x2 * depthFrameColor.cols / target_width) /depthFrameColor.cols;bottomRight.y = ( item.y2 * depthFrameColor.rows / target_height ) /depthFrameColor.rows;//std::cout<<topLeft.x<<" "<<topLeft.y<<" "<<bottomRight.x<<" "<<bottomRight.y<<" "<<std::endl;config.roi = dai::Rect(topLeft, bottomRight);dai::SpatialLocationCalculatorConfig cfg;cfg.addROI(config);spatialCalcConfigInQueue->send(cfg);auto spatialData = spatialCalcQueue->get<dai::SpatialLocationCalculatorData>()->getSpatialLocations();for(auto depthData : spatialData) {auto roi = depthData.config.roi;roi = roi.denormalize(depthFrameColor.cols, depthFrameColor.rows);auto xmin = (int)roi.topLeft().x;auto ymin = (int)roi.topLeft().y;auto xmax = (int)roi.bottomRight().x;auto ymax = (int)roi.bottomRight().y;auto depthMin = depthData.depthMin;auto depthMax = depthData.depthMax;auto coords = depthData.spatialCoordinates;auto distance = std::sqrt(coords.x * coords.x + coords.y * coords.y + coords.z * coords.z);auto fontType = cv::FONT_HERSHEY_TRIPLEX;std::stringstream depthDistance;depthDistance.precision(2);depthDistance << fixed << static_cast<float>(distance / 1000.0f) << "m";cv::putText(depthFrameColor, depthDistance.str(), cv::Point(item.x1 + 10, item.y1 + 70), fontType, 0.5, color);std::stringstream rgbDistance;rgbDistance.precision(2);rgbDistance << fixed << static_cast<float>(distance / 1000.0f) << "m";cv::putText(frame, rgbDistance.str(), cv::Point(item.x1 + 10, item.y1 + 70), fontType, 0.5, color);cv::rectangle(frame, cv::Rect(cv::Point(item.x1, item.y1), cv::Point(item.x2, item.y2)), color, cv::FONT_HERSHEY_SIMPLEX);std::stringstream rgb_depthX,depthX;rgb_depthX << "X: " << (int)depthData.spatialCoordinates.x << " mm";cv::putText(frame, rgb_depthX.str(), cv::Point(item.x1 + 10, item.y1 + 20), cv::FONT_HERSHEY_TRIPLEX, 0.5, color);std::stringstream rgb_depthY,depthY;rgb_depthY << "Y: " << (int)depthData.spatialCoordinates.y << " mm";cv::putText(frame, rgb_depthY.str(), cv::Point(item.x1 + 10, item.y1 + 35), cv::FONT_HERSHEY_TRIPLEX, 0.5, color);std::stringstream rgb_depthZ,depthZ;rgb_depthZ << "Z: " << (int)depthData.spatialCoordinates.z << " mm";cv::putText(frame, rgb_depthZ.str(), cv::Point(item.x1 + 10, item.y1 + 50), cv::FONT_HERSHEY_TRIPLEX, 0.5, color);cv::rectangle(depthFrameColor, cv::Rect(cv::Point(xmin, ymin), cv::Point(xmax, ymax)), color, cv::FONT_HERSHEY_SIMPLEX);depthX << "X: " << (int)depthData.spatialCoordinates.x << " mm";cv::putText(depthFrameColor, depthX.str(), cv::Point(xmin + 10, ymin + 20), cv::FONT_HERSHEY_TRIPLEX, 0.5, color);depthY << "Y: " << (int)depthData.spatialCoordinates.y << " mm";cv::putText(depthFrameColor, depthY.str(), cv::Point(xmin+ 10, ymin + 35), cv::FONT_HERSHEY_TRIPLEX, 0.5, color);depthZ << "Z: " << (int)depthData.spatialCoordinates.z << " mm";cv::putText(depthFrameColor, depthZ.str(), cv::Point(xmin + 10, ymin + 50), cv::FONT_HERSHEY_TRIPLEX, 0.5, color);}// Show the frame}cv::imshow("depth", depthFrameColor);cv::imshow("demo", frame_show);cv::imwrite("demo.jpg",frame_show);int key = cv::waitKey(1);if (key == 'q' || key == 'Q') return 0;cv::waitKey(1);}return 0;}

效果图

参考

mnn-yolov5: Depoly YoloV5 by MNN.

本内容不代表本网观点和政治立场，如有侵犯你的权益请联系我们处理。

网友评论

网友评论仅供其表达个人看法，并不表明网站立场。