工具类

#pragma once
#include <ncnn/layer.h>
#include <ncnn/net.h>
#include <vector>
#include <algorithm>
#include <cmath>
#include <ranges>

struct BoundingBox
{
    float left = 0.f;
    float top = 0.f;
    float right = 0.f;
    float bottom = 0.f;
    float confidence = 0.f;
    int classLabel = -1;

    BoundingBox() = default;
    BoundingBox(float left, float top, float right, float bottom, float confidence, int classLabel)
        : left(left), top(top), right(right), bottom(bottom), confidence(confidence), classLabel(classLabel) {}
};

inline float sigmoid(float x)
{
    return static_cast<float>(1.f / (1.f + std::exp(-x)));
}

// 生成检测框
void generate_proposals(const ncnn::Mat& anchors, int stride, const ncnn::Mat& input, const ncnn::Mat& featureBlob, float confidenceThreshold, std::vector<BoundingBox>& objects)
{
    const int numGrid = featureBlob.h;

    int numGridX;
    int numGridY;

    numGridY = input.h / stride;
    numGridX = numGrid / numGridY;

    const int numClass = featureBlob.w - 5;
    const int numAnchors = anchors.w / 2;

    for (int anchorIndex = 0; anchorIndex < numAnchors; anchorIndex++)
    {
        const float anchorW = anchors[anchorIndex * 2];
        const float anchorH = anchors[anchorIndex * 2 + 1];

        const ncnn::Mat feat = featureBlob.channel(anchorIndex);
        for (int i = 0; i < numGridY; i++)
        {
            for (int j = 0; j < numGridX; j++)
            {
                const float* featPtr = feat.row(i * numGridX + j);
                float boxConfidence = sigmoid(featPtr[4]);
                if (boxConfidence >= confidenceThreshold)
                {
                    int classIndex = 0;
                    float classScore = -FLT_MAX;
                    for (int k = 0; k < numClass; k++)
                    {
                        float score = featPtr[5 + k];
                        if (score > classScore)
                        {
                            classIndex = k;
                            classScore = score;
                        }
                    }
                    float confidence = sigmoid(classScore);

                    if (confidence >= confidenceThreshold)
                    {
                        float dx = sigmoid(featPtr[0]);
                        float dy = sigmoid(featPtr[1]);
                        float dw = sigmoid(featPtr[2]);
                        float dh = sigmoid(featPtr[3]);

                        float centerX = (dx * 2.f - 0.5f + j) * stride;
                        float centerY = (dy * 2.f - 0.5f + i) * stride;

                        float width = std::pow(dw * 2.f, 2) * anchorW;
                        float height = std::pow(dh * 2.f, 2) * anchorH;

                        float x0 = centerX - width * 0.5f;
                        float y0 = centerY - height * 0.5f;
                        float x1 = centerX + width * 0.5f;
                        float y1 = centerY + height * 0.5f;

                        BoundingBox obj;
                        obj.left = x0;
                        obj.top = y0;
                        obj.right = x1;
                        obj.bottom = y1;
                        obj.classLabel = classIndex;
                        obj.confidence = confidence;
                        objects.push_back(obj);
                    }
                }
            }
        }
    }
}

// 计算交并比
float iou(const BoundingBox& box1, const BoundingBox& box2)
{
    float interLeft = std::max(box1.left, box2.left);
    float interTop = std::max(box1.top, box2.top);
    float interRight = std::min(box1.right, box2.right);
    float interBottom = std::min(box1.bottom, box2.bottom);

    float interArea = std::max(interRight - interLeft, 0.0f) * std::max(interBottom - interTop, 0.0f);
    if (interArea == 0.0f)
        return 0.0f;

    float box1Area = std::max(0.0f, box1.right - box1.left) * std::max(0.0f, box1.bottom - box1.top);
    float box2Area = std::max(0.0f, box2.right - box2.left) * std::max(0.0f, box2.bottom - box2.top);
    return interArea / (box1Area + box2Area - interArea);
}

// 非最大抑制
std::vector<BoundingBox> nms(std::vector<BoundingBox>& boxes, float threshold) {

    std::ranges::sort(boxes, [](const BoundingBox& box1, const BoundingBox& box2)
        {
            return box1.confidence > box2.confidence;
        });

    std::vector<BoundingBox> output;
    output.reserve(boxes.size());
    std::vector<bool> remove_flags(boxes.size());
    for (int i = 0; i < boxes.size(); ++i) {
        if (remove_flags[i])
            continue;

        auto& box1 = boxes[i];
        output.emplace_back(box1);
        for (int j = i + 1; j < boxes.size(); ++j) {
            if (remove_flags[j])
                continue;

            auto& box2 = boxes[j];
            if (box2.classLabel == box1.classLabel) {
                if (iou(box1, box2) >= threshold)
                    remove_flags[j] = true;
            }
        }
    }
    return output;
}

使用代码

#include <layer.h>
#include <net.h>

#include <opencv2/opencv.hpp>

#include <float.h>
#include <stdio.h>
#include <vector>

#include "NcnnUtil.hpp"  //  解析库
#include "cap.h"  // 包含capture库

int main()
{
    ncnn::Net yolov5;   // 创建ncnn::Net对象yolov5
    yolov5.opt.use_vulkan_compute = true;   // 使用GPU计算,注意需要NCNN的库支持才行,不支持则需要自己重新编译一下,具体可以看我的另一篇文章
    if (yolov5.load_param("best-sim-opt.param"))   // 加载参数文件
        return -1;
    if (yolov5.load_model("best-sim-opt.bin"))     // 加载模型文件
        return -1;

    const int imgsize = 640;    // 推理大小
    const float probThreshold = 0.25f; // 定义概率阈值
    const float nms_threshold = 0.5f;    // 定义非极大值抑制的阈值
    int gamex = 2560;  // 定义窗口的x轴大小
    int gamey = 1440;   // 定义窗口的y轴大小

    capture cap(gamex, gamey, imgsize, imgsize, "窗口名");

    ncnn::Mat anchors1(6);
    anchors1[0] = 10.f;
    anchors1[1] = 13.f;
    anchors1[2] = 16.f;
    anchors1[3] = 30.f;
    anchors1[4] = 33.f;
    anchors1[5] = 23.f;
    ncnn::Mat anchors2(6);
    anchors2[0] = 30.f;
    anchors2[1] = 61.f;
    anchors2[2] = 62.f;
    anchors2[3] = 45.f;
    anchors2[4] = 59.f;
    anchors2[5] = 119.f;
    ncnn::Mat anchors3(6);
    anchors3[0] = 116.f;
    anchors3[1] = 90.f;
    anchors3[2] = 156.f;
    anchors3[3] = 198.f;
    anchors3[4] = 373.f;
    anchors3[5] = 326.f;

    const float normalVals[3] = { 1 / 255.f, 1 / 255.f, 1 / 255.f }; // 定义归一化值的数组

    while (true)
    {
      // 获取目标窗口的图像
      auto img = (BYTE*)cap.cap();

      ncnn::Mat inPad = ncnn::Mat::from_pixels(img, ncnn::Mat::PIXEL_BGR2RGB, imgsize, imgsize); // 创建ncnn::Mat对象inPad
      inPad.substract_mean_normalize(0, normalVals);   // 对inPad进行减去均值并归一化操作

      auto t1 = std::chrono::steady_clock::now();

        ncnn::Extractor ex = yolov5.create_extractor();
        ex.input("images", inPad);

        std::vector<BoundingBox> proposals;     // 创建保存边界框的向量box
        // stride 8
        {
            ncnn::Mat out;
            ex.extract("output", out);  //  模型参数中Permute的值,需要一致
            std::vector<BoundingBox> objects8; 
            generate_proposals(anchors1, 8, inPad, out, probThreshold, objects8);   
            proposals.insert(proposals.end(), objects8.begin(), objects8.end());
        }
        // stride 16
        {
            ncnn::Mat out;
            ex.extract("output1", out);  //  模型参数中Permute的值,需要一致
            std::vector<BoundingBox> objects16;   
            generate_proposals(anchors2, 16, inPad, out, probThreshold, objects16);  
            proposals.insert(proposals.end(), objects16.begin(), objects16.end());
        }
        // stride 32
        {
            ncnn::Mat out;
            ex.extract("output2", out); //  模型参数中Permute的值,需要一致
            std::vector<BoundingBox> objects32;
            generate_proposals(anchors3, 32, inPad, out, probThreshold, objects32);
            proposals.insert(proposals.end(), objects32.begin(), objects32.end());
        }

        auto t2 = std::chrono::steady_clock::now(); 
        double dr_ms = std::chrono::duration<double, std::milli>(t2 - t1).count();   
        cout  <<"infer time : "<< dr_ms << "ms" << endl;    // 输出推理时间
        vector<BoundingBox> newbox = cpu_nms(proposals, nms_threshold);        // 对proposals进行非极大值抑制操作,保存在newbox中

        cv::Mat img = cv::Mat(imgsize, imgsize, CV_8UC3, img); // 创建cv::Mat对象a,用于显示图像
        if (!newbox.empty())    // 如果newbox不为空
        {
            for (const BoundingBox& detection : newbox)     // 遍历newbox中的每个边界框
            {
                cv::rectangle(img, cv::Point((int)detection.left, (int)detection.top), cv::Point((int)detection.right, (int)detection.bottom), cv::Scalar(0, 255, 0), 1);     // 在图像a上绘制边界框
                cv::Point screen_center = cv::Point(img.cols / 2, img.rows / 2);
                // 边界框中心点坐标
                cv::Point center = cv::Point((detection.left + detection.right) / 2, (detection.top + detection.bottom) / 2);
                // 计算x距离
                double x_distance = center.x - screen_center.x;
                // 计算y距离
                double y_distance = center.y - screen_center.y;
                std::cout << "(x=" << x_distance << ", y=" << y_distance << ")" << std::endl;
            }
        }
        cv::imshow("c", img);  
        cv::waitKey(1); 
    }
    return 0;
}