张洋 4 napja
szülő
commit
e4c8ddbb21

BIN
bin/Win32/Release/zhipuzi_pos_windows/3.jpg


BIN
bin/Win32/Release/zhipuzi_pos_windows/ai/best.onnx


BIN
bin/Win32/Release/zhipuzi_pos_windows/ai/best_800.onnx


BIN
bin/Win32/Release/zhipuzi_pos_windows/ai/yolo26n-cls.onnx


BIN
bin/Win32/Release/zhipuzi_pos_windows/ai/yolo26n.onnx


BIN
res/ai/yolo26n-cls-fruit.onnx


BIN
bin/Win32/Release/zhipuzi_pos_windows/image_features.db


BIN
res/ai/best.onnx


BIN
res/ai/best_800.onnx


BIN
res/ai/yolo26n-cls.onnx


BIN
res/ai/yolo26n.onnx


BIN
res/ai/yolo26s-cls.onnx


BIN
res/images/jidan/1.jpeg


BIN
res/images/jidan/2.webp


BIN
res/images/jidan/3.png


BIN
res/images/jidan/4.webp


+ 78 - 193
zhipuzi_pos_windows/ai/YoloFeatureExtractor.cpp

@@ -6,17 +6,18 @@
 #include <iostream>
 #include <iostream>
 #include <functional>
 #include <functional>
 #include <numeric>
 #include <numeric>
+#include <sstream>
 
 
 #include "../tool/debuglog.h"
 #include "../tool/debuglog.h"
 
 
-YoloFeatureExtractor::YoloFeatureExtractor(const std::string & modelPath, const std::string & classesPath)
-	: inputWidth(224), inputHeight(224)
+YoloFeatureExtractor::YoloFeatureExtractor(const std::string& modelPath, const std::string& classesPath)
+	: inputWidth(640), inputHeight(640)
 {
 {
 	net = cv::dnn::readNetFromONNX(modelPath);
 	net = cv::dnn::readNetFromONNX(modelPath);
 	loadClassNames(classesPath);
 	loadClassNames(classesPath);
 }
 }
 
 
-void YoloFeatureExtractor::loadClassNames(const std::string & file)
+void YoloFeatureExtractor::loadClassNames(const std::string& file)
 {
 {
 	std::ifstream ifs(file);
 	std::ifstream ifs(file);
 	std::string line;
 	std::string line;
@@ -26,7 +27,7 @@ void YoloFeatureExtractor::loadClassNames(const std::string & file)
 	}
 	}
 }
 }
 
 
-std::vector<float> YoloFeatureExtractor::globalAveragePooling(const cv::Mat & featureMap)
+std::vector<float> YoloFeatureExtractor::globalAveragePooling(const cv::Mat& featureMap)
 {
 {
 	std::vector<float> features;
 	std::vector<float> features;
 
 
@@ -61,7 +62,16 @@ std::vector<float> YoloFeatureExtractor::globalAveragePooling(const cv::Mat & fe
 	return pooled.reshape(1, 1);
 	return pooled.reshape(1, 1);
 }
 }
 
 
-std::vector<float> YoloFeatureExtractor::extractFeatures(const std::string & imagePath)
+// L2 归一化
+void YoloFeatureExtractor::normalizeL2(cv::Mat& feat)
+{
+	if (feat.empty() || feat.type() != CV_32F) return;
+	float norm = cv::norm(feat, cv::NORM_L2);
+
+	if (norm > 1e-6) feat /= norm;
+}
+
+std::vector<float> YoloFeatureExtractor::extractFeatures(const std::string& imagePath)
 {
 {
 	try
 	try
 	{
 	{
@@ -73,58 +83,87 @@ std::vector<float> YoloFeatureExtractor::extractFeatures(const std::string & ima
 			throw std::runtime_error("Could not load image: " + imagePath);
 			throw std::runtime_error("Could not load image: " + imagePath);
 		}
 		}
 
 
-		cv::Mat resizedImage;
-		//cv::resize(image, resizedImage, cv::Size(inputWidth, inputHeight));
+		cv::resize(image, image, cv::Size(inputWidth, inputHeight));
 
 
 		cv::Mat blob;
 		cv::Mat blob;
 		cv::dnn::blobFromImage(image, blob, 1.0 / 255.0, cv::Size(inputWidth, inputHeight), cv::Scalar(0, 0, 0), true, false);
 		cv::dnn::blobFromImage(image, blob, 1.0 / 255.0, cv::Size(inputWidth, inputHeight), cv::Scalar(0, 0, 0), true, false);
 		net.setInput(blob);
 		net.setInput(blob);
 
 
+		std::string info = custom_printf("✅ blob 形状:%d×%d×%d×%d\n", blob.size[0], blob.size[1], blob.size[2], blob.size[3]);
+		DEBUG_LOG(info.c_str());
+
 		auto time_2 = std::chrono::high_resolution_clock::now();
 		auto time_2 = std::chrono::high_resolution_clock::now();
 
 
 		std::vector<cv::String> layerNames = net.getLayerNames();
 		std::vector<cv::String> layerNames = net.getLayerNames();
 		std::vector<cv::String> outputNames;
 		std::vector<cv::String> outputNames;
 
 
-		// 选择GAP层(对于yolo2026,通常是倒数第6层)的输出作为特征向量
-		outputNames.push_back(layerNames[layerNames.size() - 6]);
+		std::vector<float> features;
 
 
-		std::vector<cv::Mat> outputs;
-		net.forward(outputs, outputNames);
+		int layerIndex = -6;
+		if (layerIndex == -1)
+		{
+			outputNames.push_back(layerNames[layerNames.size() - 1]);
 
 
-		auto time_3 = std::chrono::high_resolution_clock::now();
+			std::vector<cv::Mat> outputs;
+			net.forward(outputs, outputNames);
 
 
-		// 检查输出是否有效
-		if (outputs.empty() || outputs[0].empty())
-		{
-			throw std::runtime_error("模型前向传播未产生有效输出");
+			for (size_t i = 0; i < outputs.size(); ++i)
+			{
+				cv::Mat output = outputs[i];
+				features.reserve(features.size() + output.total());
+				for (int j = 0; j < output.total(); ++j)
+				{
+					features.push_back(output.at<float>(j));
+				}
+			}
 		}
 		}
+		else if (layerIndex == -6)
+		{
+			// 选择GAP层(对于yolo2026,通常是倒数第6层)的输出作为特征向量
+			outputNames.push_back(layerNames[layerNames.size() - 6]);
+
+			std::vector<cv::Mat> outputs;
+			net.forward(outputs, outputNames);
 
 
-		// 获取GAP层输出并转换为特征向量
-		//cv::Mat featuresMat = outputs[0].reshape(1, 1);
-		cv::Mat featuresMat = outputs[0];
-		cv::normalize(featuresMat, featuresMat, 1.0, 0.0, cv::NORM_L2);
+			// 检查输出是否有效
+			if (outputs.empty() || outputs[0].empty())
+			{
+				throw std::runtime_error("模型前向传播未产生有效输出");
+			}
 
 
-		// 转换为std::vector<float>
-		std::vector<float> features(featuresMat.begin<float>(), featuresMat.end<float>());
+			// 获取GAP层输出并转换为特征向量
+			cv::Mat featuresMat = outputs[0];
+			info = printf("✅ 原始特征形状:%d×%d,类型:%d(CV_32F=5)\n", featuresMat.cols, featuresMat.rows, featuresMat.type());
+			DEBUG_LOG(info.c_str());
+			
+			cv::Mat featuresMatVec = featuresMat.reshape(1, 1);
+			info = printf("✅ 重塑后特征形状:%d×%d,类型:%d\n", featuresMatVec.cols, featuresMatVec.rows, featuresMatVec.type());
+			DEBUG_LOG(info.c_str());
 
 
-		/*
-		// 应用全局平均池化获取特征向量
-		//std::vector<float> features = globalAveragePooling(outputs[0]);
+			float norm_before = cv::norm(featuresMatVec, cv::NORM_L2);
+			printf("📌 归一化前 norm:%.6f\n", norm_before);
 
 
-		// L2归一化特征向量
-		if (!features.empty())
-		{
-			float norm = std::sqrt(std::inner_product(features.begin(), features.end(), features.begin(), 0.0f));
-			if (norm > 1e-6)
+
+			normalizeL2(featuresMatVec);
+			//cv::normalize(featuresMat, featuresMat, 1.0, 0.0, cv::NORM_L2);
+
+			float norm_after = cv::norm(featuresMatVec, cv::NORM_L2);
+			printf("📌 归一化后 norm:%.6f\n", norm_after);
+
+			features.reserve(features.size() + featuresMat.total());
+			for (int j = 0; j < featuresMat.total(); ++j)
 			{
 			{
-				for (auto & val : features)
-				{
-					val /= norm;
-				}
+				features.push_back(featuresMat.at<float>(j));
 			}
 			}
-		}*/
 
 
-		auto time_4 = std::chrono::high_resolution_clock::now();
+			int a = 1;
+
+			// 转换为std::vector<float>
+			//features = std::vector<float>(featuresMat.begin<float>(), featuresMat.end<float>());
+		}
+
+
+		auto time_3 = std::chrono::high_resolution_clock::now();
 
 
 
 
 		auto duration_1 = std::chrono::duration_cast<std::chrono::milliseconds>(time_2 - time_1);
 		auto duration_1 = std::chrono::duration_cast<std::chrono::milliseconds>(time_2 - time_1);
@@ -132,173 +171,19 @@ std::vector<float> YoloFeatureExtractor::extractFeatures(const std::string & ima
 		DEBUG_LOG(msg.c_str());
 		DEBUG_LOG(msg.c_str());
 
 
 		auto duration_2 = std::chrono::duration_cast<std::chrono::milliseconds>(time_3 - time_2);
 		auto duration_2 = std::chrono::duration_cast<std::chrono::milliseconds>(time_3 - time_2);
-		std::wstring msg2 = L"模型前向传播完成,耗时: " + std::to_wstring(duration_2.count()) + L" 毫秒";
+		std::wstring msg2 = L"模型推理完成,耗时: " + std::to_wstring(duration_2.count()) + L" 毫秒";
 		DEBUG_LOG(msg2.c_str());
 		DEBUG_LOG(msg2.c_str());
 
 
-		auto duration_3 = std::chrono::duration_cast<std::chrono::milliseconds>(time_4 - time_3);
-		std::wstring msg3 = L"特征处理完成,耗时: " + std::to_wstring(duration_3.count()) + L" 毫秒";
-		DEBUG_LOG(msg3.c_str());
-
-		auto totalDuration = std::chrono::duration_cast<std::chrono::milliseconds>(time_4 - time_1);
+		auto totalDuration = std::chrono::duration_cast<std::chrono::milliseconds>(time_3 - time_1);
 		std::wstring msg4 = L"总耗时: " + std::to_wstring(totalDuration.count()) + L" 毫秒";
 		std::wstring msg4 = L"总耗时: " + std::to_wstring(totalDuration.count()) + L" 毫秒";
 		DEBUG_LOG(msg4.c_str());
 		DEBUG_LOG(msg4.c_str());
 
 
 		return features;
 		return features;
 	}
 	}
-	catch (const std::exception & e)
+	catch (const std::exception& e)
 	{
 	{
 		std::string aa = std::string(e.what());
 		std::string aa = std::string(e.what());
 		DEBUG_LOG(("提取特征失败: " + std::string(e.what())).c_str());
 		DEBUG_LOG(("提取特征失败: " + std::string(e.what())).c_str());
 		return {};
 		return {};
 	}
 	}
-}
-
-std::vector<float> YoloFeatureExtractor::extractBackboneFeatures(const std::string & imagePath)
-{
-	cv::Mat image = cv::imread(imagePath);
-	if (image.empty())
-	{
-		throw std::runtime_error("Could not load image: " + imagePath);
-	}
-
-	cv::Mat blob;
-	cv::dnn::blobFromImage(image, blob, 1.0 / 255.0, cv::Size(inputWidth, inputHeight), cv::Scalar(0, 0, 0), true, false);
-	net.setInput(blob);
-
-	std::vector<cv::String> layerNames = net.getLayerNames();
-	std::vector<cv::String> backboneLayers;
-
-	for (const auto & name : layerNames)
-	{
-		if (name.find("backbone") != std::string::npos ||
-			name.find("conv") != std::string::npos ||
-			name.find("stage") != std::string::npos)
-		{
-			backboneLayers.push_back(name);
-		}
-	}
-
-	if (backboneLayers.empty())
-	{
-		backboneLayers.push_back(layerNames[layerNames.size() / 2]);
-	}
-
-	std::vector<cv::Mat> outputs;
-	net.forward(outputs, backboneLayers);
-
-	std::vector<float> features;
-	for (size_t i = 0; i < outputs.size(); ++i)
-	{
-		cv::Mat output = outputs[i];
-		features.reserve(features.size() + output.total());
-		for (int j = 0; j < output.total(); ++j)
-		{
-			features.push_back(output.at<float>(j));
-		}
-	}
-
-	return features;
-}
-
-std::vector<std::vector<float>> YoloFeatureExtractor::extractROIFeatures(const std::string & imagePath)
-{
-	cv::Mat image = cv::imread(imagePath);
-	if (image.empty())
-	{
-		throw std::runtime_error("Could not load image: " + imagePath);
-	}
-
-	cv::Mat blob;
-	cv::dnn::blobFromImage(image, blob, 1.0 / 255.0, cv::Size(inputWidth, inputHeight), cv::Scalar(0, 0, 0), true, false);
-	net.setInput(blob);
-
-	std::vector<cv::Mat> outputs;
-	net.forward(outputs, net.getUnconnectedOutLayersNames());
-
-	const float CONFIDENCE_THRESHOLD = 0.5;
-	const float NMS_THRESHOLD = 0.4;
-
-	std::vector<int> classIds;
-	std::vector<float> confidences;
-	std::vector<cv::Rect> boxes;
-
-	float x_factor = static_cast<float>(image.cols) / inputWidth;
-	float y_factor = static_cast<float>(image.rows) / inputHeight;
-
-	for (size_t outputIdx = 0; outputIdx < outputs.size(); ++outputIdx)
-	{
-		float * data = (float *)outputs[outputIdx].data;
-		int rows = outputs[outputIdx].rows;
-		int dimensions = outputs[outputIdx].cols;
-
-		for (int i = 0; i < rows; ++i)
-		{
-			float objectness = data[4];
-			if (objectness >= CONFIDENCE_THRESHOLD)
-			{
-				std::vector<float> probs;
-				for (int c = 5; c < dimensions; ++c)
-				{
-					probs.push_back(data[c]);
-				}
-
-				int maxClassId = 0;
-				float maxScore = probs[0];
-				for (size_t p = 1; p < probs.size(); ++p)
-				{
-					if (probs[p] > maxScore)
-					{
-						maxScore = probs[p];
-						maxClassId = static_cast<int>(p);
-					}
-				}
-
-				if (maxScore > CONFIDENCE_THRESHOLD)
-				{
-					confidences.push_back(objectness * maxScore);
-					classIds.push_back(maxClassId);
-
-					float x = data[0];
-					float y = data[1];
-					float w = data[2];
-					float h = data[3];
-
-					int left = static_cast<int>((x - 0.5 * w) * x_factor);
-					int top = static_cast<int>((y - 0.5 * h) * y_factor);
-					int width = static_cast<int>(w * x_factor);
-					int height = static_cast<int>(h * y_factor);
-
-					boxes.push_back(cv::Rect(left, top, width, height));
-				}
-			}
-			data += dimensions;
-		}
-	}
-
-	std::vector<int> nms_result;
-	cv::dnn::NMSBoxes(boxes, confidences, CONFIDENCE_THRESHOLD, NMS_THRESHOLD, nms_result);
-
-	std::vector<std::vector<float>> roiFeatures;
-	for (size_t i = 0; i < nms_result.size(); ++i)
-	{
-		int idx = nms_result[i];
-		cv::Rect box = boxes[idx];
-
-		box.x = std::max(0, std::min(box.x, image.cols - 1));
-		box.y = std::max(0, std::min(box.y, image.rows - 1));
-		box.width = std::max(0, std::min(box.width, image.cols - box.x));
-		box.height = std::max(0, std::min(box.height, image.rows - box.y));
-
-		std::vector<float> roiFeature;
-		roiFeature.push_back(static_cast<float>(box.x) / image.cols);
-		roiFeature.push_back(static_cast<float>(box.y) / image.rows);
-		roiFeature.push_back(static_cast<float>(box.width) / image.cols);
-		roiFeature.push_back(static_cast<float>(box.height) / image.rows);
-		roiFeature.push_back(confidences[idx]);
-		roiFeature.push_back(static_cast<float>(classIds[idx]));
-
-		roiFeatures.push_back(roiFeature);
-	}
-
-	return roiFeatures;
 }
 }

+ 1 - 0
zhipuzi_pos_windows/ai/YoloFeatureExtractor.h

@@ -19,6 +19,7 @@ public:
 
 
 	void loadClassNames(const std::string & file);
 	void loadClassNames(const std::string & file);
 	std::vector<float> globalAveragePooling(const cv::Mat & featureMap);
 	std::vector<float> globalAveragePooling(const cv::Mat & featureMap);
+	void normalizeL2(cv::Mat& feat);
 	std::vector<float> extractFeatures(const std::string & imagePath);
 	std::vector<float> extractFeatures(const std::string & imagePath);
 	std::vector<float> extractBackboneFeatures(const std::string & imagePath);
 	std::vector<float> extractBackboneFeatures(const std::string & imagePath);
 	std::vector<std::vector<float>> extractROIFeatures(const std::string & imagePath);
 	std::vector<std::vector<float>> extractROIFeatures(const std::string & imagePath);

+ 5 - 5
zhipuzi_pos_windows/ai/test.cpp

@@ -31,7 +31,7 @@ int AITest()
 		//用于测试的图片目录
 		//用于测试的图片目录
 		std::string galleryDir = (mainDir.parent_path().parent_path().parent_path().parent_path() /"res"/"images").string();       // 图库目录路径
 		std::string galleryDir = (mainDir.parent_path().parent_path().parent_path().parent_path() /"res"/"images").string();       // 图库目录路径
 
 
-		std::string modelPath = sMainDir + "/ai/yolo26n-cls.onnx";           // YOLO2026模型路径
+		std::string modelPath = sMainDir + "/ai/best.onnx";           // YOLO2026模型路径
 		std::string classesPath = sMainDir + "/ai/cls.names";             // 类别文件路径
 		std::string classesPath = sMainDir + "/ai/cls.names";             // 类别文件路径
 		std::string searchImagePath = sMainDir + "/3.jpg"; // 搜索图片路径
 		std::string searchImagePath = sMainDir + "/3.jpg"; // 搜索图片路径
 
 
@@ -83,9 +83,6 @@ int AITest()
 			int featureDimension = 0;
 			int featureDimension = 0;
 			int processedCount = 0;
 			int processedCount = 0;
 
 
-			// 初始化数据库表结构
-			vecManager.initializeDatabase(1280); // 假设特征维度为1000,实际会在第一次处理时确定
-
 			for (size_t i = 0; i < galleryImages.size(); ++i)
 			for (size_t i = 0; i < galleryImages.size(); ++i)
 			{
 			{
 				try
 				try
@@ -97,6 +94,9 @@ int AITest()
 						{
 						{
 							featureDimension = static_cast<int>(features.size());
 							featureDimension = static_cast<int>(features.size());
 							std::cout << "特征维度: " << featureDimension << std::endl;
 							std::cout << "特征维度: " << featureDimension << std::endl;
+
+							// 初始化数据库表结构
+							vecManager.initializeDatabase(featureDimension);
 						}
 						}
 						vecManager.addFeatureVector(features, galleryImages[i]);
 						vecManager.addFeatureVector(features, galleryImages[i]);
 						processedCount++;
 						processedCount++;
@@ -141,7 +141,7 @@ int AITest()
 
 
 		// 进行相似性搜索
 		// 进行相似性搜索
 		std::cout << "正在进行相似性搜索..." << std::endl;
 		std::cout << "正在进行相似性搜索..." << std::endl;
-		std::vector<std::pair<std::string, float>> searchResults = vecManager.searchSimilarVectors(queryFeatures, 5);
+		std::vector<std::pair<std::string, float>> searchResults = vecManager.searchSimilarVectors(queryFeatures, 20);
 
 
 		// 显示搜索结果
 		// 显示搜索结果
 		std::cout << "\n=== 搜索结果 ===" << std::endl;
 		std::cout << "\n=== 搜索结果 ===" << std::endl;

+ 16 - 2
zhipuzi_pos_windows/tool/debuglog.h

@@ -1,4 +1,4 @@
-#pragma once
+#pragma once
 #include <windows.h>
 #include <windows.h>
 #include <sstream>
 #include <sstream>
 #include <cstdio>
 #include <cstdio>
@@ -23,4 +23,18 @@
     wchar __fullmsg[DEBUG_LOG_BUFFER_SIZE + 128]; \
     wchar __fullmsg[DEBUG_LOG_BUFFER_SIZE + 128]; \
     std::swprintf(__fullmsg, sizeof(__fullmsg), L"[%s]@%s:%d: %s\n", __func__, __FILE__, __LINE__, __logbuf); \
     std::swprintf(__fullmsg, sizeof(__fullmsg), L"[%s]@%s:%d: %s\n", __func__, __FILE__, __LINE__, __logbuf); \
     OutputDebugString(__fullmsg); \
     OutputDebugString(__fullmsg); \
-} while(0)
+} while(0)
+
+inline std::string custom_printf(const char* format, ...) {
+    std::ostringstream oss;
+
+    va_list args;
+    va_start(args, format);
+    vprintf(format, args); // 原生打印到控制台(或重定向的输出)
+    char buffer[1024]; // 创建一个足够大的缓冲区来存储格式化后的字符串(可选)
+    vsnprintf(buffer, sizeof(buffer), format, args); // 将格式化后的字符串存储到buffer中(可选)
+    oss << buffer; // 将格式化后的字符串追加到oss中(可选)
+    va_end(args); // 清理变量参数列表(optional)
+
+    return oss.str();
+}