张洋 1 dag sedan
förälder
incheckning
541f7cd930

BIN
bin/Win32/Debug/zhipuzi_pos_windows/3.jpg


BIN
bin/Win32/Debug/zhipuzi_pos_windows/ai/yolo26n-cls.onnx


BIN
bin/Win32/Debug/zhipuzi_pos_windows/image_features.db


BIN
bin/Win32/Release/zhipuzi_pos_windows/ai/yolo26n-cls.onnx


BIN
bin/Win32/Release/zhipuzi_pos_windows/image_features.db


BIN
res/ai/yolo26n-cls.onnx


+ 138 - 21
zhipuzi_pos_windows/ai/YoloFeatureExtractor.cpp

@@ -4,6 +4,8 @@
 #include <fstream>
 #include <algorithm>
 #include <iostream>
+#include <functional>
+#include <numeric>
 
 #include "../tool/debuglog.h"
 
@@ -24,41 +26,156 @@ void YoloFeatureExtractor::loadClassNames(const std::string & file)
 	}
 }
 
-std::vector<float> YoloFeatureExtractor::extractFeatures(const std::string & imagePath)
+std::vector<float> YoloFeatureExtractor::globalAveragePooling(const cv::Mat & featureMap)
 {
-	auto start_time = std::chrono::high_resolution_clock::now();
+	std::vector<float> features;
 
-	cv::Mat image = cv::imread(imagePath);
-	if (image.empty())
+	// 检查特征图是否为空
+	if (featureMap.empty())
 	{
-		throw std::runtime_error("Could not load image: " + imagePath);
+		std::cerr << "特征图为空" << std::endl;
+		return features;
 	}
 
-	cv::Mat blob;
-	cv::dnn::blobFromImage(image, blob, 1.0 / 255.0, cv::Size(inputWidth, inputHeight), cv::Scalar(0, 0, 0), true, false);
-	net.setInput(blob);
+	// 获取特征图维度信息
+	int dims = featureMap.dims;
+	if (dims < 2)
+	{
+		std::cerr << "特征图维度不足" << std::endl;
+		return features;
+	}
 
-	std::vector<cv::Mat> outputs;
-	net.forward(outputs, net.getUnconnectedOutLayersNames());
+	// 获取各维度大小
+	const int * sizes = featureMap.size.p;
+	int batchSize = sizes[0];
+	int channels = (dims > 1) ? sizes[1] : 1;
+	int height = (dims > 2) ? sizes[2] : 1;
+	int width = (dims > 3) ? sizes[3] : 1;
 
-	std::vector<float> features;
-	for (size_t i = 0; i < outputs.size(); ++i)
+	// 验证维度有效性
+	if (batchSize <= 0 || channels <= 0 || height <= 0 || width <= 0)
+	{
+		std::cerr << "特征图维度无效" << std::endl;
+		return features;
+	}
+
+	// 获取数据指针
+	const float * data = reinterpret_cast<const float *>(featureMap.data);
+
+	// 对每个通道执行全局平均池化
+	for (int c = 0; c < channels; ++c)
 	{
-		float * data = (float *)outputs[i].data;
-		int totalElements = outputs[i].total();
-		for (int idx = 0; idx < totalElements; ++idx)
+		float sum = 0.0f;
+		int channelOffset = c * height * width;
+
+		for (int h = 0; h < height; ++h)
 		{
-			features.push_back(data[idx]);
+			int rowOffset = channelOffset + h * width;
+			for (int w = 0; w < width; ++w)
+			{
+				sum += data[rowOffset + w];
+			}
 		}
+
+		// 计算平均值并添加到特征向量
+		features.push_back(sum / (height * width));
 	}
 
-	auto end_time = std::chrono::high_resolution_clock::now();
-	auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time);
+	return features;
+}
+
+std::vector<float> YoloFeatureExtractor::extractFeatures(const std::string & imagePath)
+{
+	try
+	{
+		auto time_1 = std::chrono::high_resolution_clock::now();
+
+		cv::Mat image = cv::imread(imagePath);
+		if (image.empty())
+		{
+			throw std::runtime_error("Could not load image: " + imagePath);
+		}
 
-	std::wstring msg = L"特征提取完成,耗时: " + std::to_wstring(duration.count()) + L" 毫秒";
-	DEBUG_LOG(msg.c_str());
+		cv::Mat resizedImage;
+		cv::resize(image, resizedImage, cv::Size(inputWidth, inputHeight));
 
-	return features;
+		cv::Mat blob;
+		cv::dnn::blobFromImage(resizedImage, blob, 1.0 / 255.0, cv::Size(inputWidth, inputHeight), cv::Scalar(0, 0, 0), true, false);
+		net.setInput(blob);
+
+		auto time_2 = std::chrono::high_resolution_clock::now();
+
+		std::vector<cv::String> layerNames = net.getLayerNames();
+		std::vector<cv::String> outputNames;
+
+		// 选择合适的特征层(使用倒数第二个卷积层作为特征提取层)
+		if (layerNames.size() >= 8)
+		{
+			// 通常倒数第8层左右是较好的特征层
+			outputNames.push_back(layerNames[layerNames.size() - 7]);
+		}
+		else if (layerNames.size() >= 2)
+		{
+			outputNames.push_back(layerNames[layerNames.size() - 2]);
+		}
+		else
+		{
+			outputNames.push_back(layerNames.back());
+		}
+
+		std::vector<cv::Mat> outputs;
+		net.forward(outputs, outputNames);
+
+		auto time_3 = std::chrono::high_resolution_clock::now();
+
+		// 检查输出是否有效
+		if (outputs.empty() || outputs[0].empty())
+		{
+			throw std::runtime_error("模型前向传播未产生有效输出");
+		}
+
+		// 应用全局平均池化获取特征向量
+		std::vector<float> features = globalAveragePooling(outputs[0]);
+
+		// L2归一化特征向量
+		if (!features.empty())
+		{
+			float norm = std::sqrt(std::inner_product(features.begin(), features.end(), features.begin(), 0.0f));
+			if (norm > 1e-6)
+			{
+				for (auto & val : features)
+				{
+					val /= norm;
+				}
+			}
+		}
+
+		auto time_4 = std::chrono::high_resolution_clock::now();
+
+
+		auto duration_1 = std::chrono::duration_cast<std::chrono::milliseconds>(time_2 - time_1);
+		std::wstring msg = L"图片处理完成,耗时: " + std::to_wstring(duration_1.count()) + L" 毫秒";
+		DEBUG_LOG(msg.c_str());
+
+		auto duration_2 = std::chrono::duration_cast<std::chrono::milliseconds>(time_3 - time_2);
+		std::wstring msg2 = L"模型前向传播完成,耗时: " + std::to_wstring(duration_2.count()) + L" 毫秒";
+		DEBUG_LOG(msg2.c_str());
+
+		auto duration_3 = std::chrono::duration_cast<std::chrono::milliseconds>(time_4 - time_3);
+		std::wstring msg3 = L"特征处理完成,耗时: " + std::to_wstring(duration_3.count()) + L" 毫秒";
+		DEBUG_LOG(msg3.c_str());
+
+		auto totalDuration = std::chrono::duration_cast<std::chrono::milliseconds>(time_4 - time_1);
+		std::wstring msg4 = L"总耗时: " + std::to_wstring(totalDuration.count()) + L" 毫秒";
+		DEBUG_LOG(msg4.c_str());
+
+		return features;
+	}
+	catch (const std::exception & e)
+	{
+		DEBUG_LOG(("提取特征失败: " + std::string(e.what())).c_str());
+		return {};
+	}
 }
 
 std::vector<float> YoloFeatureExtractor::extractBackboneFeatures(const std::string & imagePath)

+ 1 - 0
zhipuzi_pos_windows/ai/YoloFeatureExtractor.h

@@ -18,6 +18,7 @@ public:
 	~YoloFeatureExtractor() = default;
 
 	void loadClassNames(const std::string & file);
+	std::vector<float> globalAveragePooling(const cv::Mat & featureMap);
 	std::vector<float> extractFeatures(const std::string & imagePath);
 	std::vector<float> extractBackboneFeatures(const std::string & imagePath);
 	std::vector<std::vector<float>> extractROIFeatures(const std::string & imagePath);

+ 1 - 4
zhipuzi_pos_windows/ai/test.cpp

@@ -84,22 +84,19 @@ int AITest()
 			int processedCount = 0;
 
 			// 初始化数据库表结构
-			vecManager.initializeDatabase(45); // 假设特征维度为1000,实际会在第一次处理时确定
+			vecManager.initializeDatabase(1280); // 假设特征维度为1000,实际会在第一次处理时确定
 
 			for (size_t i = 0; i < galleryImages.size(); ++i)
 			{
 				try
 				{
 					std::vector<float> features = extractor.extractFeatures(galleryImages[i]);
-					int b = 1;
 					if (!features.empty())
 					{
 						if (featureDimension == 0)
 						{
 							featureDimension = static_cast<int>(features.size());
 							std::cout << "特征维度: " << featureDimension << std::endl;
-							// 重新初始化数据库以匹配实际维度
-							vecManager.initializeDatabase(featureDimension);
 						}
 						vecManager.addFeatureVector(features, galleryImages[i]);
 						processedCount++;