| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445 |
- #include "../pch/pch.h"
- #include "YoloFeatureManager.h"
- #include <fstream>
- #include <algorithm>
- #include <iostream>
- #include <functional>
- #include <numeric>
- #include <sstream>
- #include "../tool/debuglog.h"
- #include "../worker/CVideoCaptureWorker.h"
- #include "YoloClassName.h"
- YoloFeatureManager::YoloFeatureManager()
- {
- inputWidth = 320;
- inputHeight = 320;
- }
- YoloFeatureManager::~YoloFeatureManager()
- {
-
- }
- void YoloFeatureManager::loadModel(const std::string & modelPath)
- {
- try
- {
- net = cv::dnn::readNetFromONNX(modelPath);
- CONF_THRESHOLD = 0.5f; // 可以根据需要调整置信度阈值
- NMS_THRESHOLD = 0.4f; // 可以根据需要调整NMS阈值
- FRUIT_VEGETABLE_COUNT = sizeof(FRUIT_VEGETABLE_NAMES) / sizeof(FRUIT_VEGETABLE_NAMES[0]);
- }
- catch (const std::exception& e)
- {
- std::string aa = std::string(e.what());
- DEBUG_LOG(("加载模型失败: " + std::string(e.what())).c_str());
- return;
- }
- }
- void YoloFeatureManager::loadModel(const std::string& modelPath, const std::string& configPath)
- {
- try
- {
- net = cv::dnn::readNetFromModelOptimizer(modelPath, configPath);
- // 设置目标设备 (可选: CPU, GPU, MYRIAD等)
- net.setPreferableBackend(cv::dnn::DNN_BACKEND_INFERENCE_ENGINE);
- net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU); // 或DNN_TARGET_MYRIAD等
- CONF_THRESHOLD = 0.5f; // 可以根据需要调整置信度阈值
- NMS_THRESHOLD = 0.4f; // 可以根据需要调整NMS阈值
- FRUIT_VEGETABLE_COUNT = sizeof(FRUIT_VEGETABLE_NAMES) / sizeof(FRUIT_VEGETABLE_NAMES[0]);
- }
- catch (const std::exception& e)
- {
- std::string aa = std::string(e.what());
- DEBUG_LOG(("加载模型失败: " + std::string(e.what())).c_str());
- return;
- }
- }
- // 寻找置信度最高的类别
- int YoloFeatureManager::getTopClass(const cv::Mat& output)
- {
- // 将输出展平为一维数组
- cv::Mat flatOutput = output.reshape(1, 1);
- double maxVal;
- cv::Point maxLoc;
- // 找到最大值的位置(即最高置信度类别索引)
- cv::minMaxLoc(flatOutput, nullptr, &maxVal, nullptr, &maxLoc);
- return maxLoc.x;
- }
- // 获取类别名称
- std::string YoloFeatureManager::getClassName(std::size_t classId) const
- {
- if (classId >= 0 && classId < FRUIT_VEGETABLE_COUNT)
- {
- std::string englishName = FRUIT_VEGETABLE_NAMES[classId];
- // 这里可以添加一个映射表,将英文名称映射到中文名称
- auto it = FRUIT_VEGETABLE_CN_MAP.find(englishName);
- if (it != FRUIT_VEGETABLE_CN_MAP.end())
- {
- return it->second; // 找到则返回中文名称
- }
- else
- {
- return "Unknown"; // 未找到则返回默认值
- }
- }
- return "Unknown";
- }
- std::vector<float> YoloFeatureManager::extractFeatures(const std::string & imagePath)
- {
- try
- {
- auto time_1 = std::chrono::high_resolution_clock::now();
- cv::Mat image = cv::imread(imagePath);
- if (image.empty())
- {
- throw std::runtime_error("Could not load image: " + imagePath);
- }
- // 转换为blob(归一化+通道转换)
- cv::Mat blob;
- cv::dnn::blobFromImage(image, blob, 1.0 / 255, cv::Size(inputWidth, inputHeight), cv::Scalar(0, 0, 0), true, false);
- net.setInput(blob);
- auto time_2 = std::chrono::high_resolution_clock::now();
- //获取模型的所有层名称(调试用)
- //std::vector<cv::String> layerNames = net.getLayerNames();
- // 获取Flatten层输出(yolo26s-cls的Flatten层名称为 "onnx_node!/model.10/Flatten",这是GAP后分类头前的一层)'
- // GAP层是onnx_node!/model.10/pool/GlobalAveragePool
- cv::Mat featureMat = net.forward("onnx_node!/model.10/Flatten");
- // 检查输出是否有效
- if (featureMat.empty())
- {
- throw std::runtime_error("模型前向传播未产生有效输出");
- }
- if (featureMat.type() != CV_32F)
- {
- throw std::runtime_error("Mat类型错误");
- }
- float norm_before = cv::norm(featureMat, cv::NORM_L2);
- DEBUG_HELPER::debug_printf("归一化前 norm:%.6f\n", norm_before);
- cv::normalize(featureMat, featureMat, 1.0, 0.0, cv::NORM_L2); //L2归一化
- float norm_after = cv::norm(featureMat, cv::NORM_L2);
- DEBUG_HELPER::debug_printf("归一化后 norm:%.6f\n", norm_after);
- // 将Mat格式的特征转换为vector<float>(方便后续计算/存储)
- std::vector<float> feature_vector;
- feature_vector.assign((float *)featureMat.data, (float *)featureMat.data + featureMat.total());
- //进行时间统计
- auto time_3 = std::chrono::high_resolution_clock::now();
- auto duration_1 = std::chrono::duration_cast<std::chrono::milliseconds>(time_2 - time_1);
- std::wstring msg = L"图片处理耗时: " + std::to_wstring(duration_1.count()) + L" 毫秒";
- DEBUG_LOG(msg.c_str());
- auto duration_2 = std::chrono::duration_cast<std::chrono::milliseconds>(time_3 - time_2);
- std::wstring msg2 = L"模型推理耗时: " + std::to_wstring(duration_2.count()) + L" 毫秒";
- DEBUG_LOG(msg2.c_str());
- auto totalDuration = std::chrono::duration_cast<std::chrono::milliseconds>(time_3 - time_1);
- std::wstring msg4 = L"总耗时: " + std::to_wstring(totalDuration.count()) + L" 毫秒";
- DEBUG_LOG(msg4.c_str());
- return feature_vector;
- }
- catch (const std::exception & e)
- {
- std::string aa = std::string(e.what());
- DEBUG_LOG(("提取特征失败: " + std::string(e.what())).c_str());
- return {};
- }
- }
- void YoloFeatureManager::Detection(const std::string & imagePath)
- {
- cv::Mat image = cv::imread(imagePath);
- if (image.empty())
- {
- throw std::runtime_error("Could not load image: " + imagePath);
- }
- // 构造输入blob(图像预处理)
- // 参数说明:输入图像、缩放因子、输入尺寸、均值归一化、是否交换RB通道、是否裁剪
- cv::Mat blob;
- cv::dnn::blobFromImage(image, blob, 1.0 / 255, cv::Size(inputWidth, inputHeight), cv::Scalar(0, 0, 0), true, false);
- // -------------------------- 4. 模型推理 --------------------------
- // 设置网络输入
- net.setInput(blob);
- // 获取输出层名称
- std::vector<std::string> outLayerNames = net.getUnconnectedOutLayersNames();
- // 前向推理
- std::vector<cv::Mat> outs;
- net.forward(outs, outLayerNames);
- // -------------------------- 5. 解析推理结果 --------------------------
- std::vector<cv::Rect> boxes; // 检测框
- std::vector<int> classIds; // 类别ID
- std::vector<float> confidences; // 置信度
- // 遍历所有输出层的结果
- for (const cv::Mat & out : outs)
- {
- float * data = (float *)out.data;
- // 遍历每个检测结果
- for (int i = 0; i < out.rows; i++, data += out.cols)
- {
- // 获取类别置信度
- cv::Mat scores = out.row(i).colRange(5, out.cols);
- cv::Point classIdPoint;
- double confidence;
- // 找到最大置信度对应的类别
- cv::minMaxLoc(scores, 0, &confidence, 0, &classIdPoint);
- // 过滤低置信度结果
- if (confidence > CONF_THRESHOLD)
- {
- // 解析检测框坐标(YOLO输出的是相对坐标,需转换为绝对坐标)
- int centerX = (int)(data[0] * image.cols);
- int centerY = (int)(data[1] * image.rows);
- int width = (int)(data[2] * image.cols);
- int height = (int)(data[3] * image.rows);
- // 计算检测框左上角坐标
- int left = centerX - width / 2;
- int top = centerY - height / 2;
- // 保存结果
- boxes.push_back(cv::Rect(left, top, width, height));
- classIds.push_back(classIdPoint.x);
- confidences.push_back((float)confidence);
- }
- }
- }
- // -------------------------- 6. 非极大值抑制(NMS) --------------------------
- std::vector<int> indices;
- cv::dnn::NMSBoxes(boxes, confidences, CONF_THRESHOLD, NMS_THRESHOLD, indices);
- // 提取NMS后的结果
- std::vector<cv::Rect> finalBoxes;
- std::vector<int> finalClassIds;
- std::vector<float> finalConfidences;
- for (int idx : indices)
- {
- finalBoxes.push_back(boxes[idx]);
- finalClassIds.push_back(classIds[idx]);
- finalConfidences.push_back(confidences[idx]);
- }
- // -------------------------- 7. 绘制并显示结果 --------------------------
- drawDetection(image, finalBoxes, finalClassIds, finalConfidences);
- // 显示检测结果
- cv::imshow("YOLO Detection Result", image);
- // 保存检测结果
- cv::imwrite("result.jpg", image);
- cv::waitKey(0);
- cv::destroyAllWindows();
- }
- // 绘制检测结果
- void YoloFeatureManager::drawDetection(cv::Mat & img, const std::vector<cv::Rect> & boxes, const std::vector<int> & classIds,
- const std::vector<float> & confidences)
- {
- // 生成随机颜色(每个类别一种颜色)
- std::vector<cv::Scalar> colors;
- srand(time(0));
- for (std::size_t i = 0; i < FRUIT_VEGETABLE_COUNT; i++)
- {
- int r = rand() % 256;
- int g = rand() % 256;
- int b = rand() % 256;
- colors.push_back(cv::Scalar(r, g, b));
- }
- // 绘制每个检测框
- for (size_t i = 0; i < boxes.size(); i++)
- {
- cv::Rect box = boxes[i];
- // 绘制矩形框
- cv::rectangle(img, box, colors[classIds[i]], 2);
- // 构造标签文本(类别 + 置信度)
- std::string label = FRUIT_VEGETABLE_NAMES[classIds[i]] + ": " + std::to_string(confidences[i]).substr(0, 4);
- int baseLine;
- cv::Size labelSize = cv::getTextSize(label, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
- // 绘制标签背景
- cv::rectangle(img, cv::Point(box.x, box.y - labelSize.height),
- cv::Point(box.x + labelSize.width, box.y + baseLine),
- colors[classIds[i]], cv::FILLED);
- // 绘制标签文本
- cv::putText(img, label, cv::Point(box.x, box.y), cv::FONT_HERSHEY_SIMPLEX,
- 0.5, cv::Scalar(255, 255, 255), 1);
- }
- }
- std::string YoloFeatureManager::Class(cv::Mat & image)
- {
- try
- {
- std::string className = "";
- // ====================== 图像预处理 ======================
- // 转换为blob格式:归一化(0-1)、通道转换(BGR->RGB)、调整尺寸
- cv::Mat blob;
- cv::dnn::blobFromImage(image, blob, 1.0 / 255.0, cv::Size(inputWidth, inputHeight), cv::Scalar(0, 0, 0), true, false);
- net.setInput(blob);
- // ====================== 模型推理 ======================
- cv::Mat output = net.forward(); // 输出形状:1x1000(对应ImageNet 1000类)
- // ====================== 解析结果 ======================
- int topClassIdx = this->getTopClass(output);
- float topConfidence = output.at<float>(topClassIdx);
- // 只显示置信度高于阈值的结果
- if (topConfidence > 0.8)
- {
- className = this->getClassName(topClassIdx);
- }
- else
- {
- className = "Unknown";
- }
- // 在画面上绘制分类结果
- //std::wstring resultText = CLewaimaiString::ANSIToUnicode(className) + L" : " + std::to_wstring(round(topConfidence * 10000) / 100) + L"%";
- //this->drawChineseText(image, resultText.c_str(), cv::Point(20, 50), cv::Scalar(0, 255, 0), 24);
- //cv::imshow("yolo26n-cls 实时图像分类", image);
- //if (cv::waitKey(30) >= 0); // 按任意键退出
- return className;
- }
- catch (const std::exception& e)
- {
- std::string aa = std::string(e.what());
- DEBUG_LOG(("YOLO分类失败: " + std::string(e.what())).c_str());
- return {};
- }
-
- }
- std::string YoloFeatureManager::ClassFromVideoCapture()
- {
- try
- {
- cv::Mat image;
- CVideoCaptureWorker::GetInstance()->GetFrame(image);
- if (image.empty())
- {
- //DEBUG_LOG("从摄像头获取帧失败");
- return "Unknown";
- }
- std::string name = this->Class(image);
- return name;
- }
- catch (const std::exception& e)
- {
- std::string aa = std::string(e.what());
- DEBUG_LOG(("YOLO分类失败: " + std::string(e.what())).c_str());
- return {};
- }
-
- }
- void YoloFeatureManager::drawChineseText(cv::Mat & img, const wchar_t * text, cv::Point pos, cv::Scalar color, int fontSize)
- {
- // 1. 检查输入有效性
- if (img.empty() || text == nullptr || wcslen(text) == 0)
- {
- return;
- }
- if (img.type() != CV_8UC3)
- {
- // 仅支持 3 通道彩色图像
- cvtColor(img, img, cv::COLOR_GRAY2BGR);
- }
- // 2. 创建内存 DC 并关联临时位图(关键:基于图像的 DC 创建,而非屏幕 DC)
- HDC hScreenDC = GetDC(NULL);
- HDC hMemDC = CreateCompatibleDC(hScreenDC);
- // 创建与原图像尺寸、格式匹配的位图
- HBITMAP hMemBmp = CreateCompatibleBitmap(hScreenDC, img.cols, img.rows);
- // 保存原始位图句柄,用于后续恢复
- HBITMAP hOldBmp = (HBITMAP)SelectObject(hMemDC, hMemBmp);
- // 3. 将 OpenCV 图像数据复制到内存位图(保留原图像内容,而非黑色)
- BITMAPINFO bmi = { 0 };
- bmi.bmiHeader.biSize = sizeof(BITMAPINFOHEADER);
- bmi.bmiHeader.biWidth = img.cols;
- bmi.bmiHeader.biHeight = -img.rows; // 翻转 Y 轴(OpenCV 与 GDI 坐标方向相反)
- bmi.bmiHeader.biPlanes = 1;
- bmi.bmiHeader.biBitCount = 24;
- bmi.bmiHeader.biCompression = BI_RGB;
- // 将 OpenCV 图像写入内存位图
- SetDIBits(hScreenDC, hMemBmp, 0, img.rows, img.data, &bmi, DIB_RGB_COLORS);
- // 4. 设置中文字体(修复字体创建参数,增加容错)
- HFONT hFont = CreateFont(
- fontSize, 0, 0, 0, FW_NORMAL, 0, 0, 0,
- GB2312_CHARSET, OUT_DEFAULT_PRECIS, CLIP_DEFAULT_PRECIS,
- DEFAULT_QUALITY, DEFAULT_PITCH | FF_DONTCARE, L"黑体"
- );
- HFONT hOldFont = (HFONT)SelectObject(hMemDC, hFont);
- // 5. 设置文字绘制属性(背景透明、颜色正确)
- SetBkMode(hMemDC, TRANSPARENT);
- // OpenCV 是 BGR,GDI 是 RGB,需转换
- SetTextColor(hMemDC, RGB((int)color[2], (int)color[1], (int)color[0]));
- // 6. 绘制中文字符(确保坐标在图像范围内)
- int textLen = wcslen(text);
- if (pos.x >= 0 && pos.y >= 0 && pos.x < img.cols && pos.y < img.rows)
- {
- TextOutW(hMemDC, pos.x, pos.y, text, textLen);
- }
- // 7. 将绘制后的位图数据复制回 OpenCV 图像
- GetDIBits(hScreenDC, hMemBmp, 0, img.rows, img.data, &bmi, DIB_RGB_COLORS);
- // 8. 释放资源(关键:恢复原始句柄后再删除,避免内存泄漏)
- SelectObject(hMemDC, hOldFont); // 恢复原始字体
- DeleteObject(hFont); // 删除自定义字体
- SelectObject(hMemDC, hOldBmp); // 恢复原始位图
- DeleteObject(hMemBmp); // 删除内存位图
- DeleteDC(hMemDC); // 删除内存 DC
- ReleaseDC(NULL, hScreenDC); // 释放屏幕 DC
- }
|