1 dag sedan · 541f7cd930
--- a/bin/Win32/Debug/zhipuzi_pos_windows/3.jpg
+++ b/bin/Win32/Debug/zhipuzi_pos_windows/3.jpg
--- a/bin/Win32/Debug/zhipuzi_pos_windows/ai/yolo26n-cls.onnx
+++ b/bin/Win32/Debug/zhipuzi_pos_windows/ai/yolo26n-cls.onnx
--- a/bin/Win32/Debug/zhipuzi_pos_windows/image_features.db
+++ b/bin/Win32/Debug/zhipuzi_pos_windows/image_features.db
--- a/bin/Win32/Release/zhipuzi_pos_windows/ai/yolo26n-cls.onnx
+++ b/bin/Win32/Release/zhipuzi_pos_windows/ai/yolo26n-cls.onnx
--- a/bin/Win32/Release/zhipuzi_pos_windows/image_features.db
+++ b/bin/Win32/Release/zhipuzi_pos_windows/image_features.db
--- a/res/ai/yolo26n-cls.onnx
+++ b/res/ai/yolo26n-cls.onnx
--- a/zhipuzi_pos_windows/ai/YoloFeatureExtractor.cpp
+++ b/zhipuzi_pos_windows/ai/YoloFeatureExtractor.cpp
@@ -4,6 +4,8 @@
 
				 #include <fstream>
			
 
				 #include <algorithm>
			
 
				 #include <iostream>
			
 
				+#include <functional>
			
 
				+#include <numeric>
			
 
				 
			
 
				 #include "../tool/debuglog.h"
			
 
				 
			
@@ -24,41 +26,156 @@ void YoloFeatureExtractor::loadClassNames(const std::string & file)
 
				 	}
			
 
				 }
			
 
				 
			
 
				-std::vector<float> YoloFeatureExtractor::extractFeatures(const std::string & imagePath)
			
 
				+std::vector<float> YoloFeatureExtractor::globalAveragePooling(const cv::Mat & featureMap)
			
 
				 {
			
 
				-	auto start_time = std::chrono::high_resolution_clock::now();
			
 
				+	std::vector<float> features;
			
 
				 
			
 
				-	cv::Mat image = cv::imread(imagePath);
			
 
				-	if (image.empty())
			
 
				+	// 检查特征图是否为空
			
 
				+	if (featureMap.empty())
			
 
				 	{
			
 
				-		throw std::runtime_error("Could not load image: " + imagePath);
			
 
				+		std::cerr << "特征图为空" << std::endl;
			
 
				+		return features;
			
 
				 	}
			
 
				 
			
 
				-	cv::Mat blob;
			
 
				-	cv::dnn::blobFromImage(image, blob, 1.0 / 255.0, cv::Size(inputWidth, inputHeight), cv::Scalar(0, 0, 0), true, false);
			
 
				-	net.setInput(blob);
			
 
				+	// 获取特征图维度信息
			
 
				+	int dims = featureMap.dims;
			
 
				+	if (dims < 2)
			
 
				+	{
			
 
				+		std::cerr << "特征图维度不足" << std::endl;
			
 
				+		return features;
			
 
				+	}
			
 
				 
			
 
				-	std::vector<cv::Mat> outputs;
			
 
				-	net.forward(outputs, net.getUnconnectedOutLayersNames());
			
 
				+	// 获取各维度大小
			
 
				+	const int * sizes = featureMap.size.p;
			
 
				+	int batchSize = sizes[0];
			
 
				+	int channels = (dims > 1) ? sizes[1] : 1;
			
 
				+	int height = (dims > 2) ? sizes[2] : 1;
			
 
				+	int width = (dims > 3) ? sizes[3] : 1;
			
 
				 
			
 
				-	std::vector<float> features;
			
 
				-	for (size_t i = 0; i < outputs.size(); ++i)
			
 
				+	// 验证维度有效性
			
 
				+	if (batchSize <= 0 || channels <= 0 || height <= 0 || width <= 0)
			
 
				+	{
			
 
				+		std::cerr << "特征图维度无效" << std::endl;
			
 
				+		return features;
			
 
				+	}
			
 
				+
			
 
				+	// 获取数据指针
			
 
				+	const float * data = reinterpret_cast<const float *>(featureMap.data);
			
 
				+
			
 
				+	// 对每个通道执行全局平均池化
			
 
				+	for (int c = 0; c < channels; ++c)
			
 
				 	{
			
 
				-		float * data = (float *)outputs[i].data;
			
 
				-		int totalElements = outputs[i].total();
			
 
				-		for (int idx = 0; idx < totalElements; ++idx)
			
 
				+		float sum = 0.0f;
			
 
				+		int channelOffset = c * height * width;
			
 
				+
			
 
				+		for (int h = 0; h < height; ++h)
			
 
				 		{
			
 
				-			features.push_back(data[idx]);
			
 
				+			int rowOffset = channelOffset + h * width;
			
 
				+			for (int w = 0; w < width; ++w)
			
 
				+			{
			
 
				+				sum += data[rowOffset + w];
			
 
				+			}
			
 
				 		}
			
 
				+
			
 
				+		// 计算平均值并添加到特征向量
			
 
				+		features.push_back(sum / (height * width));
			
 
				 	}
			
 
				 
			
 
				-	auto end_time = std::chrono::high_resolution_clock::now();
			
 
				-	auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time);
			
 
				+	return features;
			
 
				+}
			
 
				+
			
 
				+std::vector<float> YoloFeatureExtractor::extractFeatures(const std::string & imagePath)
			
 
				+{
			
 
				+	try
			
 
				+	{
			
 
				+		auto time_1 = std::chrono::high_resolution_clock::now();
			
 
				+
			
 
				+		cv::Mat image = cv::imread(imagePath);
			
 
				+		if (image.empty())
			
 
				+		{
			
 
				+			throw std::runtime_error("Could not load image: " + imagePath);
			
 
				+		}
			
 
				 
			
 
				-	std::wstring msg = L"特征提取完成，耗时: " + std::to_wstring(duration.count()) + L" 毫秒";
			
 
				-	DEBUG_LOG(msg.c_str());
			
 
				+		cv::Mat resizedImage;
			
 
				+		cv::resize(image, resizedImage, cv::Size(inputWidth, inputHeight));
			
 
				 
			
 
				-	return features;
			
 
				+		cv::Mat blob;
			
 
				+		cv::dnn::blobFromImage(resizedImage, blob, 1.0 / 255.0, cv::Size(inputWidth, inputHeight), cv::Scalar(0, 0, 0), true, false);
			
 
				+		net.setInput(blob);
			
 
				+
			
 
				+		auto time_2 = std::chrono::high_resolution_clock::now();
			
 
				+
			
 
				+		std::vector<cv::String> layerNames = net.getLayerNames();
			
 
				+		std::vector<cv::String> outputNames;
			
 
				+
			
 
				+		// 选择合适的特征层（使用倒数第二个卷积层作为特征提取层）
			
 
				+		if (layerNames.size() >= 8)
			
 
				+		{
			
 
				+			// 通常倒数第8层左右是较好的特征层
			
 
				+			outputNames.push_back(layerNames[layerNames.size() - 7]);
			
 
				+		}
			
 
				+		else if (layerNames.size() >= 2)
			
 
				+		{
			
 
				+			outputNames.push_back(layerNames[layerNames.size() - 2]);
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			outputNames.push_back(layerNames.back());
			
 
				+		}
			
 
				+
			
 
				+		std::vector<cv::Mat> outputs;
			
 
				+		net.forward(outputs, outputNames);
			
 
				+
			
 
				+		auto time_3 = std::chrono::high_resolution_clock::now();
			
 
				+
			
 
				+		// 检查输出是否有效
			
 
				+		if (outputs.empty() || outputs[0].empty())
			
 
				+		{
			
 
				+			throw std::runtime_error("模型前向传播未产生有效输出");
			
 
				+		}
			
 
				+
			
 
				+		// 应用全局平均池化获取特征向量
			
 
				+		std::vector<float> features = globalAveragePooling(outputs[0]);
			
 
				+
			
 
				+		// L2归一化特征向量
			
 
				+		if (!features.empty())
			
 
				+		{
			
 
				+			float norm = std::sqrt(std::inner_product(features.begin(), features.end(), features.begin(), 0.0f));
			
 
				+			if (norm > 1e-6)
			
 
				+			{
			
 
				+				for (auto & val : features)
			
 
				+				{
			
 
				+					val /= norm;
			
 
				+				}
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		auto time_4 = std::chrono::high_resolution_clock::now();
			
 
				+
			
 
				+
			
 
				+		auto duration_1 = std::chrono::duration_cast<std::chrono::milliseconds>(time_2 - time_1);
			
 
				+		std::wstring msg = L"图片处理完成，耗时: " + std::to_wstring(duration_1.count()) + L" 毫秒";
			
 
				+		DEBUG_LOG(msg.c_str());
			
 
				+
			
 
				+		auto duration_2 = std::chrono::duration_cast<std::chrono::milliseconds>(time_3 - time_2);
			
 
				+		std::wstring msg2 = L"模型前向传播完成，耗时: " + std::to_wstring(duration_2.count()) + L" 毫秒";
			
 
				+		DEBUG_LOG(msg2.c_str());
			
 
				+
			
 
				+		auto duration_3 = std::chrono::duration_cast<std::chrono::milliseconds>(time_4 - time_3);
			
 
				+		std::wstring msg3 = L"特征处理完成，耗时: " + std::to_wstring(duration_3.count()) + L" 毫秒";
			
 
				+		DEBUG_LOG(msg3.c_str());
			
 
				+
			
 
				+		auto totalDuration = std::chrono::duration_cast<std::chrono::milliseconds>(time_4 - time_1);
			
 
				+		std::wstring msg4 = L"总耗时: " + std::to_wstring(totalDuration.count()) + L" 毫秒";
			
 
				+		DEBUG_LOG(msg4.c_str());
			
 
				+
			
 
				+		return features;
			
 
				+	}
			
 
				+	catch (const std::exception & e)
			
 
				+	{
			
 
				+		DEBUG_LOG(("提取特征失败: " + std::string(e.what())).c_str());
			
 
				+		return {};
			
 
				+	}
			
 
				 }
			
 
				 
			
 
				 std::vector<float> YoloFeatureExtractor::extractBackboneFeatures(const std::string & imagePath)
			
--- a/zhipuzi_pos_windows/ai/YoloFeatureExtractor.h
+++ b/zhipuzi_pos_windows/ai/YoloFeatureExtractor.h
@@ -18,6 +18,7 @@ public:
 
				 	~YoloFeatureExtractor() = default;
			
 
				 
			
 
				 	void loadClassNames(const std::string & file);
			
 
				+	std::vector<float> globalAveragePooling(const cv::Mat & featureMap);
			
 
				 	std::vector<float> extractFeatures(const std::string & imagePath);
			
 
				 	std::vector<float> extractBackboneFeatures(const std::string & imagePath);
			
 
				 	std::vector<std::vector<float>> extractROIFeatures(const std::string & imagePath);
			
--- a/zhipuzi_pos_windows/ai/test.cpp
+++ b/zhipuzi_pos_windows/ai/test.cpp
@@ -84,22 +84,19 @@ int AITest()
 
				 			int processedCount = 0;
			
 
				 
			
 
				 			// 初始化数据库表结构
			
 
				-			vecManager.initializeDatabase(45); // 假设特征维度为1000，实际会在第一次处理时确定
			
 
				+			vecManager.initializeDatabase(1280); // 假设特征维度为1000，实际会在第一次处理时确定
			
 
				 
			
 
				 			for (size_t i = 0; i < galleryImages.size(); ++i)
			
 
				 			{
			
 
				 				try
			
 
				 				{
			
 
				 					std::vector<float> features = extractor.extractFeatures(galleryImages[i]);
			
 
				-					int b = 1;
			
 
				 					if (!features.empty())
			
 
				 					{
			
 
				 						if (featureDimension == 0)
			
 
				 						{
			
 
				 							featureDimension = static_cast<int>(features.size());
			
 
				 							std::cout << "特征维度: " << featureDimension << std::endl;
			
 
				-							// 重新初始化数据库以匹配实际维度
			
 
				-							vecManager.initializeDatabase(featureDimension);
			
 
				 						}
			
 
				 						vecManager.addFeatureVector(features, galleryImages[i]);
			
 
				 						processedCount++;