工具类
#pragma once
#include <ncnn/layer.h>
#include <ncnn/net.h>
#include <vector>
#include <algorithm>
#include <cmath>
#include <ranges>
struct BoundingBox
{
float left = 0.f;
float top = 0.f;
float right = 0.f;
float bottom = 0.f;
float confidence = 0.f;
int classLabel = -1;
BoundingBox() = default;
BoundingBox(float left, float top, float right, float bottom, float confidence, int classLabel)
: left(left), top(top), right(right), bottom(bottom), confidence(confidence), classLabel(classLabel) {}
};
inline float sigmoid(float x)
{
return static_cast<float>(1.f / (1.f + std::exp(-x)));
}
// 生成检测框
void generate_proposals(const ncnn::Mat& anchors, int stride, const ncnn::Mat& input, const ncnn::Mat& featureBlob, float confidenceThreshold, std::vector<BoundingBox>& objects)
{
const int numGrid = featureBlob.h;
int numGridX;
int numGridY;
numGridY = input.h / stride;
numGridX = numGrid / numGridY;
const int numClass = featureBlob.w - 5;
const int numAnchors = anchors.w / 2;
for (int anchorIndex = 0; anchorIndex < numAnchors; anchorIndex++)
{
const float anchorW = anchors[anchorIndex * 2];
const float anchorH = anchors[anchorIndex * 2 + 1];
const ncnn::Mat feat = featureBlob.channel(anchorIndex);
for (int i = 0; i < numGridY; i++)
{
for (int j = 0; j < numGridX; j++)
{
const float* featPtr = feat.row(i * numGridX + j);
float boxConfidence = sigmoid(featPtr[4]);
if (boxConfidence >= confidenceThreshold)
{
int classIndex = 0;
float classScore = -FLT_MAX;
for (int k = 0; k < numClass; k++)
{
float score = featPtr[5 + k];
if (score > classScore)
{
classIndex = k;
classScore = score;
}
}
float confidence = sigmoid(classScore);
if (confidence >= confidenceThreshold)
{
float dx = sigmoid(featPtr[0]);
float dy = sigmoid(featPtr[1]);
float dw = sigmoid(featPtr[2]);
float dh = sigmoid(featPtr[3]);
float centerX = (dx * 2.f - 0.5f + j) * stride;
float centerY = (dy * 2.f - 0.5f + i) * stride;
float width = std::pow(dw * 2.f, 2) * anchorW;
float height = std::pow(dh * 2.f, 2) * anchorH;
float x0 = centerX - width * 0.5f;
float y0 = centerY - height * 0.5f;
float x1 = centerX + width * 0.5f;
float y1 = centerY + height * 0.5f;
BoundingBox obj;
obj.left = x0;
obj.top = y0;
obj.right = x1;
obj.bottom = y1;
obj.classLabel = classIndex;
obj.confidence = confidence;
objects.push_back(obj);
}
}
}
}
}
}
// 计算交并比
float iou(const BoundingBox& box1, const BoundingBox& box2)
{
float interLeft = std::max(box1.left, box2.left);
float interTop = std::max(box1.top, box2.top);
float interRight = std::min(box1.right, box2.right);
float interBottom = std::min(box1.bottom, box2.bottom);
float interArea = std::max(interRight - interLeft, 0.0f) * std::max(interBottom - interTop, 0.0f);
if (interArea == 0.0f)
return 0.0f;
float box1Area = std::max(0.0f, box1.right - box1.left) * std::max(0.0f, box1.bottom - box1.top);
float box2Area = std::max(0.0f, box2.right - box2.left) * std::max(0.0f, box2.bottom - box2.top);
return interArea / (box1Area + box2Area - interArea);
}
// 非最大抑制
std::vector<BoundingBox> nms(std::vector<BoundingBox>& boxes, float threshold) {
std::ranges::sort(boxes, [](const BoundingBox& box1, const BoundingBox& box2)
{
return box1.confidence > box2.confidence;
});
std::vector<BoundingBox> output;
output.reserve(boxes.size());
std::vector<bool> remove_flags(boxes.size());
for (int i = 0; i < boxes.size(); ++i) {
if (remove_flags[i])
continue;
auto& box1 = boxes[i];
output.emplace_back(box1);
for (int j = i + 1; j < boxes.size(); ++j) {
if (remove_flags[j])
continue;
auto& box2 = boxes[j];
if (box2.classLabel == box1.classLabel) {
if (iou(box1, box2) >= threshold)
remove_flags[j] = true;
}
}
}
return output;
}
使用代码
#include <layer.h>
#include <net.h>
#include <opencv2/opencv.hpp>
#include <float.h>
#include <stdio.h>
#include <vector>
#include "NcnnUtil.hpp" // 解析库
#include "cap.h" // 包含capture库
int main()
{
ncnn::Net yolov5; // 创建ncnn::Net对象yolov5
yolov5.opt.use_vulkan_compute = true; // 使用GPU计算,注意需要NCNN的库支持才行,不支持则需要自己重新编译一下,具体可以看我的另一篇文章
if (yolov5.load_param("best-sim-opt.param")) // 加载参数文件
return -1;
if (yolov5.load_model("best-sim-opt.bin")) // 加载模型文件
return -1;
const int imgsize = 640; // 推理大小
const float probThreshold = 0.25f; // 定义概率阈值
const float nms_threshold = 0.5f; // 定义非极大值抑制的阈值
int gamex = 2560; // 定义窗口的x轴大小
int gamey = 1440; // 定义窗口的y轴大小
capture cap(gamex, gamey, imgsize, imgsize, "窗口名");
ncnn::Mat anchors1(6);
anchors1[0] = 10.f;
anchors1[1] = 13.f;
anchors1[2] = 16.f;
anchors1[3] = 30.f;
anchors1[4] = 33.f;
anchors1[5] = 23.f;
ncnn::Mat anchors2(6);
anchors2[0] = 30.f;
anchors2[1] = 61.f;
anchors2[2] = 62.f;
anchors2[3] = 45.f;
anchors2[4] = 59.f;
anchors2[5] = 119.f;
ncnn::Mat anchors3(6);
anchors3[0] = 116.f;
anchors3[1] = 90.f;
anchors3[2] = 156.f;
anchors3[3] = 198.f;
anchors3[4] = 373.f;
anchors3[5] = 326.f;
const float normalVals[3] = { 1 / 255.f, 1 / 255.f, 1 / 255.f }; // 定义归一化值的数组
while (true)
{
// 获取目标窗口的图像
auto img = (BYTE*)cap.cap();
ncnn::Mat inPad = ncnn::Mat::from_pixels(img, ncnn::Mat::PIXEL_BGR2RGB, imgsize, imgsize); // 创建ncnn::Mat对象inPad
inPad.substract_mean_normalize(0, normalVals); // 对inPad进行减去均值并归一化操作
auto t1 = std::chrono::steady_clock::now();
ncnn::Extractor ex = yolov5.create_extractor();
ex.input("images", inPad);
std::vector<BoundingBox> proposals; // 创建保存边界框的向量box
// stride 8
{
ncnn::Mat out;
ex.extract("output", out); // 模型参数中Permute的值,需要一致
std::vector<BoundingBox> objects8;
generate_proposals(anchors1, 8, inPad, out, probThreshold, objects8);
proposals.insert(proposals.end(), objects8.begin(), objects8.end());
}
// stride 16
{
ncnn::Mat out;
ex.extract("output1", out); // 模型参数中Permute的值,需要一致
std::vector<BoundingBox> objects16;
generate_proposals(anchors2, 16, inPad, out, probThreshold, objects16);
proposals.insert(proposals.end(), objects16.begin(), objects16.end());
}
// stride 32
{
ncnn::Mat out;
ex.extract("output2", out); // 模型参数中Permute的值,需要一致
std::vector<BoundingBox> objects32;
generate_proposals(anchors3, 32, inPad, out, probThreshold, objects32);
proposals.insert(proposals.end(), objects32.begin(), objects32.end());
}
auto t2 = std::chrono::steady_clock::now();
double dr_ms = std::chrono::duration<double, std::milli>(t2 - t1).count();
cout <<"infer time : "<< dr_ms << "ms" << endl; // 输出推理时间
vector<BoundingBox> newbox = cpu_nms(proposals, nms_threshold); // 对proposals进行非极大值抑制操作,保存在newbox中
cv::Mat img = cv::Mat(imgsize, imgsize, CV_8UC3, img); // 创建cv::Mat对象a,用于显示图像
if (!newbox.empty()) // 如果newbox不为空
{
for (const BoundingBox& detection : newbox) // 遍历newbox中的每个边界框
{
cv::rectangle(img, cv::Point((int)detection.left, (int)detection.top), cv::Point((int)detection.right, (int)detection.bottom), cv::Scalar(0, 255, 0), 1); // 在图像a上绘制边界框
cv::Point screen_center = cv::Point(img.cols / 2, img.rows / 2);
// 边界框中心点坐标
cv::Point center = cv::Point((detection.left + detection.right) / 2, (detection.top + detection.bottom) / 2);
// 计算x距离
double x_distance = center.x - screen_center.x;
// 计算y距离
double y_distance = center.y - screen_center.y;
std::cout << "(x=" << x_distance << ", y=" << y_distance << ")" << std::endl;
}
}
cv::imshow("c", img);
cv::waitKey(1);
}
return 0;
}
Comments