zhangyang
/
zhipuzi_pos_windows


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445
							#include "../pch/pch.h"

#include "YoloFeatureManager.h"
#include <fstream>
#include <algorithm>
#include <iostream>
#include <functional>
#include <numeric>
#include <sstream>

#include "../tool/debuglog.h"

#include "../worker/CVideoCaptureWorker.h"

#include "YoloClassName.h"

YoloFeatureManager::YoloFeatureManager()
{
	inputWidth = 320;
	inputHeight = 320;
}

YoloFeatureManager::~YoloFeatureManager()
{
	
}

void YoloFeatureManager::loadModel(const std::string & modelPath)
{
	try
	{
		net = cv::dnn::readNetFromONNX(modelPath);

		CONF_THRESHOLD = 0.5f; // 可以根据需要调整置信度阈值
		NMS_THRESHOLD = 0.4f;  // 可以根据需要调整NMS阈值

		FRUIT_VEGETABLE_COUNT = sizeof(FRUIT_VEGETABLE_NAMES) / sizeof(FRUIT_VEGETABLE_NAMES[0]);
	}
	catch (const std::exception& e)
	{
		std::string aa = std::string(e.what());
		DEBUG_LOG(("加载模型失败: " + std::string(e.what())).c_str());
		return;
	}
}

void YoloFeatureManager::loadModel(const std::string& modelPath, const std::string& configPath)
{
	try
	{
		net = cv::dnn::readNetFromModelOptimizer(modelPath, configPath);

		// 设置目标设备 (可选: CPU, GPU, MYRIAD等)
		net.setPreferableBackend(cv::dnn::DNN_BACKEND_INFERENCE_ENGINE);
		net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU);  // 或DNN_TARGET_MYRIAD等

		CONF_THRESHOLD = 0.5f; // 可以根据需要调整置信度阈值
		NMS_THRESHOLD = 0.4f;  // 可以根据需要调整NMS阈值

		FRUIT_VEGETABLE_COUNT = sizeof(FRUIT_VEGETABLE_NAMES) / sizeof(FRUIT_VEGETABLE_NAMES[0]);
	}
	catch (const std::exception& e)
	{
		std::string aa = std::string(e.what());
		DEBUG_LOG(("加载模型失败: " + std::string(e.what())).c_str());
		return;
	}
}

// 寻找置信度最高的类别
int YoloFeatureManager::getTopClass(const cv::Mat& output)
{
	// 将输出展平为一维数组
	cv::Mat flatOutput = output.reshape(1, 1);

	double maxVal;

	cv::Point maxLoc;

	// 找到最大值的位置（即最高置信度类别索引）
	cv::minMaxLoc(flatOutput, nullptr, &maxVal, nullptr, &maxLoc);

	return maxLoc.x;
}

//	获取类别名称
std::string YoloFeatureManager::getClassName(std::size_t classId) const
{
	if (classId >= 0 && classId < FRUIT_VEGETABLE_COUNT)
	{
		std::string englishName = FRUIT_VEGETABLE_NAMES[classId];

		// 这里可以添加一个映射表，将英文名称映射到中文名称
		auto it = FRUIT_VEGETABLE_CN_MAP.find(englishName);
		if (it != FRUIT_VEGETABLE_CN_MAP.end())
		{
			return it->second; // 找到则返回中文名称
		}
		else
		{
			return "Unknown"; // 未找到则返回默认值
		}
	}

	return "Unknown";
}

std::vector<float> YoloFeatureManager::extractFeatures(const std::string & imagePath)
{
	try
	{
		auto time_1 = std::chrono::high_resolution_clock::now();

		cv::Mat image = cv::imread(imagePath);
		if (image.empty())
		{
			throw std::runtime_error("Could not load image: " + imagePath);
		}

		// 转换为blob（归一化+通道转换）
		cv::Mat blob;
		cv::dnn::blobFromImage(image, blob, 1.0 / 255, cv::Size(inputWidth, inputHeight), cv::Scalar(0, 0, 0), true, false);
		net.setInput(blob);

		auto time_2 = std::chrono::high_resolution_clock::now();

		//获取模型的所有层名称（调试用）
		//std::vector<cv::String> layerNames = net.getLayerNames();

		// 获取Flatten层输出（yolo26s-cls的Flatten层名称为 "onnx_node!/model.10/Flatten"，这是GAP后分类头前的一层）'
		// GAP层是onnx_node!/model.10/pool/GlobalAveragePool
		cv::Mat featureMat = net.forward("onnx_node!/model.10/Flatten");

		// 检查输出是否有效
		if (featureMat.empty())
		{
			throw std::runtime_error("模型前向传播未产生有效输出");
		}

		if (featureMat.type() != CV_32F)
		{
			throw std::runtime_error("Mat类型错误");
		}

		float norm_before = cv::norm(featureMat, cv::NORM_L2);
		DEBUG_HELPER::debug_printf("归一化前 norm：%.6f\n", norm_before);

		cv::normalize(featureMat, featureMat, 1.0, 0.0, cv::NORM_L2); //L2归一化

		float norm_after = cv::norm(featureMat, cv::NORM_L2);
		DEBUG_HELPER::debug_printf("归一化后 norm：%.6f\n", norm_after);

		// 将Mat格式的特征转换为vector<float>（方便后续计算/存储）
		std::vector<float> feature_vector;
		feature_vector.assign((float *)featureMat.data, (float *)featureMat.data + featureMat.total());

		//进行时间统计
		auto time_3 = std::chrono::high_resolution_clock::now();

		auto duration_1 = std::chrono::duration_cast<std::chrono::milliseconds>(time_2 - time_1);
		std::wstring msg = L"图片处理耗时: " + std::to_wstring(duration_1.count()) + L" 毫秒";
		DEBUG_LOG(msg.c_str());

		auto duration_2 = std::chrono::duration_cast<std::chrono::milliseconds>(time_3 - time_2);
		std::wstring msg2 = L"模型推理耗时: " + std::to_wstring(duration_2.count()) + L" 毫秒";
		DEBUG_LOG(msg2.c_str());

		auto totalDuration = std::chrono::duration_cast<std::chrono::milliseconds>(time_3 - time_1);
		std::wstring msg4 = L"总耗时: " + std::to_wstring(totalDuration.count()) + L" 毫秒";
		DEBUG_LOG(msg4.c_str());

		return feature_vector;
	}
	catch (const std::exception & e)
	{
		std::string aa = std::string(e.what());
		DEBUG_LOG(("提取特征失败: " + std::string(e.what())).c_str());
		return {};
	}
}

void YoloFeatureManager::Detection(const std::string & imagePath)
{
	cv::Mat image = cv::imread(imagePath);
	if (image.empty())
	{
		throw std::runtime_error("Could not load image: " + imagePath);
	}

	// 构造输入blob（图像预处理）
	// 参数说明：输入图像、缩放因子、输入尺寸、均值归一化、是否交换RB通道、是否裁剪
	cv::Mat blob;
	cv::dnn::blobFromImage(image, blob, 1.0 / 255, cv::Size(inputWidth, inputHeight), cv::Scalar(0, 0, 0), true, false);

	// -------------------------- 4. 模型推理 --------------------------
	// 设置网络输入
	net.setInput(blob);

	// 获取输出层名称
	std::vector<std::string> outLayerNames = net.getUnconnectedOutLayersNames();

	// 前向推理
	std::vector<cv::Mat> outs;
	net.forward(outs, outLayerNames);

	// -------------------------- 5. 解析推理结果 --------------------------
	std::vector<cv::Rect> boxes;        // 检测框
	std::vector<int> classIds;      // 类别ID
	std::vector<float> confidences; // 置信度

	// 遍历所有输出层的结果
	for (const cv::Mat & out : outs)
	{
		float * data = (float *)out.data;
		// 遍历每个检测结果
		for (int i = 0; i < out.rows; i++, data += out.cols)
		{
			// 获取类别置信度
			cv::Mat scores = out.row(i).colRange(5, out.cols);
			cv::Point classIdPoint;
			double confidence;

			// 找到最大置信度对应的类别
			cv::minMaxLoc(scores, 0, &confidence, 0, &classIdPoint);

			// 过滤低置信度结果
			if (confidence > CONF_THRESHOLD)
			{
				// 解析检测框坐标（YOLO输出的是相对坐标，需转换为绝对坐标）
				int centerX = (int)(data[0] * image.cols);
				int centerY = (int)(data[1] * image.rows);
				int width = (int)(data[2] * image.cols);
				int height = (int)(data[3] * image.rows);

				// 计算检测框左上角坐标
				int left = centerX - width / 2;
				int top = centerY - height / 2;

				// 保存结果
				boxes.push_back(cv::Rect(left, top, width, height));
				classIds.push_back(classIdPoint.x);
				confidences.push_back((float)confidence);
			}
		}
	}

	// -------------------------- 6. 非极大值抑制（NMS） --------------------------
	std::vector<int> indices;
	cv::dnn::NMSBoxes(boxes, confidences, CONF_THRESHOLD, NMS_THRESHOLD, indices);

	// 提取NMS后的结果
	std::vector<cv::Rect> finalBoxes;
	std::vector<int> finalClassIds;
	std::vector<float> finalConfidences;
	for (int idx : indices)
	{
		finalBoxes.push_back(boxes[idx]);
		finalClassIds.push_back(classIds[idx]);
		finalConfidences.push_back(confidences[idx]);
	}

	// -------------------------- 7. 绘制并显示结果 --------------------------
	drawDetection(image, finalBoxes, finalClassIds, finalConfidences);

	// 显示检测结果
	cv::imshow("YOLO Detection Result", image);
	// 保存检测结果
	cv::imwrite("result.jpg", image);

	cv::waitKey(0);
	cv::destroyAllWindows();
}

// 绘制检测结果
void YoloFeatureManager::drawDetection(cv::Mat & img, const std::vector<cv::Rect> & boxes, const std::vector<int> & classIds,
	const std::vector<float> & confidences)
{
	// 生成随机颜色（每个类别一种颜色）
	std::vector<cv::Scalar> colors;
	srand(time(0));
	for (std::size_t i = 0; i < FRUIT_VEGETABLE_COUNT; i++)
	{
		int r = rand() % 256;
		int g = rand() % 256;
		int b = rand() % 256;
		colors.push_back(cv::Scalar(r, g, b));
	}

	// 绘制每个检测框
	for (size_t i = 0; i < boxes.size(); i++)
	{
		cv::Rect box = boxes[i];
		// 绘制矩形框
		cv::rectangle(img, box, colors[classIds[i]], 2);

		// 构造标签文本（类别 + 置信度）
		std::string label = FRUIT_VEGETABLE_NAMES[classIds[i]] + ": " + std::to_string(confidences[i]).substr(0, 4);
		int baseLine;
		cv::Size labelSize = cv::getTextSize(label, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);

		// 绘制标签背景
		cv::rectangle(img, cv::Point(box.x, box.y - labelSize.height),
			cv::Point(box.x + labelSize.width, box.y + baseLine),
			colors[classIds[i]], cv::FILLED);

		// 绘制标签文本
		cv::putText(img, label, cv::Point(box.x, box.y), cv::FONT_HERSHEY_SIMPLEX,
			0.5, cv::Scalar(255, 255, 255), 1);
	}
}

std::string YoloFeatureManager::Class(cv::Mat & image)
{
	try
	{
		std::string className = "";

		// ====================== 图像预处理 ======================
			// 转换为blob格式：归一化(0-1)、通道转换(BGR->RGB)、调整尺寸
		cv::Mat blob;
		cv::dnn::blobFromImage(image, blob, 1.0 / 255.0, cv::Size(inputWidth, inputHeight), cv::Scalar(0, 0, 0), true, false);
		net.setInput(blob);

		// ====================== 模型推理 ======================
		cv::Mat output = net.forward(); // 输出形状：1x1000（对应ImageNet 1000类）

		// ====================== 解析结果 ======================
		int topClassIdx = this->getTopClass(output);
		float topConfidence = output.at<float>(topClassIdx);

		// 只显示置信度高于阈值的结果
		if (topConfidence > 0.8)
		{
			className = this->getClassName(topClassIdx);
		}
		else
		{
			className = "Unknown";
		}

		// 在画面上绘制分类结果
		//std::wstring resultText = CLewaimaiString::ANSIToUnicode(className) + L" ： " + std::to_wstring(round(topConfidence * 10000) / 100) + L"%";
		//this->drawChineseText(image, resultText.c_str(), cv::Point(20, 50), cv::Scalar(0, 255, 0), 24);
		//cv::imshow("yolo26n-cls 实时图像分类", image);
		//if (cv::waitKey(30) >= 0); // 按任意键退出

		return className;
	}
	catch (const std::exception& e)
	{
		std::string aa = std::string(e.what());
		DEBUG_LOG(("YOLO分类失败: " + std::string(e.what())).c_str());
		return {};
	}
	
}

std::string YoloFeatureManager::ClassFromVideoCapture()
{
	try
	{
		cv::Mat image;
		CVideoCaptureWorker::GetInstance()->GetFrame(image);
		if (image.empty())
		{
			//DEBUG_LOG("从摄像头获取帧失败");
			return "Unknown";
		}

		std::string name = this->Class(image);

		return name;
	}
	catch (const std::exception& e)
	{
		std::string aa = std::string(e.what());
		DEBUG_LOG(("YOLO分类失败: " + std::string(e.what())).c_str());
		return {};
	}
	
}

void YoloFeatureManager::drawChineseText(cv::Mat & img, const wchar_t * text, cv::Point pos, cv::Scalar color, int fontSize)
{
	// 1. 检查输入有效性
	if (img.empty() || text == nullptr || wcslen(text) == 0)
	{
		return;
	}
	if (img.type() != CV_8UC3)
	{ 
		// 仅支持 3 通道彩色图像
		cvtColor(img, img, cv::COLOR_GRAY2BGR);
	}

	// 2. 创建内存 DC 并关联临时位图（关键：基于图像的 DC 创建，而非屏幕 DC）
	HDC hScreenDC = GetDC(NULL);
	HDC hMemDC = CreateCompatibleDC(hScreenDC);
	// 创建与原图像尺寸、格式匹配的位图
	HBITMAP hMemBmp = CreateCompatibleBitmap(hScreenDC, img.cols, img.rows);
	// 保存原始位图句柄，用于后续恢复
	HBITMAP hOldBmp = (HBITMAP)SelectObject(hMemDC, hMemBmp);

	// 3. 将 OpenCV 图像数据复制到内存位图（保留原图像内容，而非黑色）
	BITMAPINFO bmi = { 0 };
	bmi.bmiHeader.biSize = sizeof(BITMAPINFOHEADER);
	bmi.bmiHeader.biWidth = img.cols;
	bmi.bmiHeader.biHeight = -img.rows; // 翻转 Y 轴（OpenCV 与 GDI 坐标方向相反）
	bmi.bmiHeader.biPlanes = 1;
	bmi.bmiHeader.biBitCount = 24;
	bmi.bmiHeader.biCompression = BI_RGB;
	// 将 OpenCV 图像写入内存位图
	SetDIBits(hScreenDC, hMemBmp, 0, img.rows, img.data, &bmi, DIB_RGB_COLORS);

	// 4. 设置中文字体（修复字体创建参数，增加容错）
	HFONT hFont = CreateFont(
		fontSize, 0, 0, 0, FW_NORMAL, 0, 0, 0,
		GB2312_CHARSET, OUT_DEFAULT_PRECIS, CLIP_DEFAULT_PRECIS,
		DEFAULT_QUALITY, DEFAULT_PITCH | FF_DONTCARE, L"黑体"
	);
	HFONT hOldFont = (HFONT)SelectObject(hMemDC, hFont);

	// 5. 设置文字绘制属性（背景透明、颜色正确）
	SetBkMode(hMemDC, TRANSPARENT);
	// OpenCV 是 BGR，GDI 是 RGB，需转换
	SetTextColor(hMemDC, RGB((int)color[2], (int)color[1], (int)color[0]));

	// 6. 绘制中文字符（确保坐标在图像范围内）
	int textLen = wcslen(text);
	if (pos.x >= 0 && pos.y >= 0 && pos.x < img.cols && pos.y < img.rows)
	{
		TextOutW(hMemDC, pos.x, pos.y, text, textLen);
	}

	// 7. 将绘制后的位图数据复制回 OpenCV 图像
	GetDIBits(hScreenDC, hMemBmp, 0, img.rows, img.data, &bmi, DIB_RGB_COLORS);

	// 8. 释放资源（关键：恢复原始句柄后再删除，避免内存泄漏）
	SelectObject(hMemDC, hOldFont); // 恢复原始字体
	DeleteObject(hFont);            // 删除自定义字体
	SelectObject(hMemDC, hOldBmp);  // 恢复原始位图
	DeleteObject(hMemBmp);          // 删除内存位图
	DeleteDC(hMemDC);               // 删除内存 DC
	ReleaseDC(NULL, hScreenDC);     // 释放屏幕 DC
}