Pārlūkot izejas kodu

模式尺寸改为800

向量数据库改为余弦距离
张洋 16 stundas atpakaļ
vecāks
revīzija
188ed25650

BIN
res/ai/best.onnx


BIN
res/images/kuerlexiangli/006c1QKIly1hrydiqne0kj30m80m876s.jpg


+ 2 - 3
zhipuzi_pos_windows/ai/SQLiteVecManager.cpp

@@ -143,9 +143,8 @@ std::vector<std::pair<std::string, float>> SQLiteVecManager::searchSimilarVector
 		// 使用sqlite-vec的向量搜索功能
 		std::string blobData = vectorToBlob(queryVector);
 
-		std::string sql = "SELECT image_path, distance FROM image_features "
-			"WHERE feature_vector MATCH vec_f32(?) "
-			"ORDER BY distance "
+		std::string sql = "SELECT image_path, vec_distance_cosine(feature_vector, ?) AS distance FROM image_features "
+			"ORDER BY distance ASC "
 			"LIMIT " + std::to_string(k) + ";";
 
 		sqlite3_stmt * stmt;

+ 28 - 40
zhipuzi_pos_windows/ai/YoloFeatureExtractor.cpp

@@ -10,7 +10,7 @@
 #include "../tool/debuglog.h"
 
 YoloFeatureExtractor::YoloFeatureExtractor(const std::string & modelPath, const std::string & classesPath)
-	: inputWidth(640), inputHeight(640)
+	: inputWidth(800), inputHeight(800)
 {
 	net = cv::dnn::readNetFromONNX(modelPath);
 	loadClassNames(classesPath);
@@ -45,43 +45,20 @@ std::vector<float> YoloFeatureExtractor::globalAveragePooling(const cv::Mat & fe
 		return features;
 	}
 
-	// 获取各维度大小
-	const int * sizes = featureMap.size.p;
-	int batchSize = sizes[0];
-	int channels = (dims > 1) ? sizes[1] : 1;
-	int height = (dims > 2) ? sizes[2] : 1;
-	int width = (dims > 3) ? sizes[3] : 1;
+	// featureMap形状: [1, channels, height, width]
+	int channels = featureMap.size[1];
+	int height = featureMap.size[2];
+	int width = featureMap.size[3];
 
-	// 验证维度有效性
-	if (batchSize <= 0 || channels <= 0 || height <= 0 || width <= 0)
-	{
-		std::cerr << "特征图维度无效" << std::endl;
-		return features;
-	}
-
-	// 获取数据指针
-	const float * data = reinterpret_cast<const float *>(featureMap.data);
-
-	// 对每个通道执行全局平均池化
-	for (int c = 0; c < channels; ++c)
-	{
-		float sum = 0.0f;
-		int channelOffset = c * height * width;
-
-		for (int h = 0; h < height; ++h)
-		{
-			int rowOffset = channelOffset + h * width;
-			for (int w = 0; w < width; ++w)
-			{
-				sum += data[rowOffset + w];
-			}
-		}
+	// 重塑为 [channels, height*width]
+	cv::Mat reshaped = featureMap.reshape(1, channels);
+	cv::Mat pooled;
 
-		// 计算平均值并添加到特征向量
-		features.push_back(sum / (height * width));
-	}
+	// 对每个通道进行平均池化
+	cv::reduce(reshaped, pooled, 1, cv::REDUCE_AVG);
 
-	return features;
+	// 重塑为 [1, channels] 特征向量
+	return pooled.reshape(1, 1);
 }
 
 std::vector<float> YoloFeatureExtractor::extractFeatures(const std::string & imagePath)
@@ -97,10 +74,10 @@ std::vector<float> YoloFeatureExtractor::extractFeatures(const std::string & ima
 		}
 
 		cv::Mat resizedImage;
-		cv::resize(image, resizedImage, cv::Size(inputWidth, inputHeight));
+		//cv::resize(image, resizedImage, cv::Size(inputWidth, inputHeight));
 
 		cv::Mat blob;
-		cv::dnn::blobFromImage(resizedImage, blob, 1.0 / 255.0, cv::Size(inputWidth, inputHeight), cv::Scalar(0, 0, 0), true, false);
+		cv::dnn::blobFromImage(image, blob, 1.0 / 255.0, cv::Size(inputWidth, inputHeight), cv::Scalar(0, 0, 0), true, false);
 		net.setInput(blob);
 
 		auto time_2 = std::chrono::high_resolution_clock::now();
@@ -112,12 +89,13 @@ std::vector<float> YoloFeatureExtractor::extractFeatures(const std::string & ima
 		if (layerNames.size() >= 8)
 		{
 			// 通常倒数第8层左右是较好的特征层
-			outputNames.push_back(layerNames[layerNames.size() - 7]);
+			outputNames.push_back(layerNames[layerNames.size() - 6]);
 		}
 		else if (layerNames.size() >= 2)
 		{
 			outputNames.push_back(layerNames[layerNames.size() - 2]);
 		}
+		
 		else
 		{
 			outputNames.push_back(layerNames.back());
@@ -134,8 +112,17 @@ std::vector<float> YoloFeatureExtractor::extractFeatures(const std::string & ima
 			throw std::runtime_error("模型前向传播未产生有效输出");
 		}
 
+		// 获取GAP层输出并转换为特征向量
+		//cv::Mat featuresMat = outputs[0].reshape(1, 1);
+		cv::Mat featuresMat = outputs[0];
+		cv::normalize(featuresMat, featuresMat, 1.0, 0.0, cv::NORM_L2);
+
+		// 转换为std::vector<float>
+		std::vector<float> features(featuresMat.begin<float>(), featuresMat.end<float>());
+
+		/*
 		// 应用全局平均池化获取特征向量
-		std::vector<float> features = globalAveragePooling(outputs[0]);
+		//std::vector<float> features = globalAveragePooling(outputs[0]);
 
 		// L2归一化特征向量
 		if (!features.empty())
@@ -148,7 +135,7 @@ std::vector<float> YoloFeatureExtractor::extractFeatures(const std::string & ima
 					val /= norm;
 				}
 			}
-		}
+		}*/
 
 		auto time_4 = std::chrono::high_resolution_clock::now();
 
@@ -173,6 +160,7 @@ std::vector<float> YoloFeatureExtractor::extractFeatures(const std::string & ima
 	}
 	catch (const std::exception & e)
 	{
+		std::string aa = std::string(e.what());
 		DEBUG_LOG(("提取特征失败: " + std::string(e.what())).c_str());
 		return {};
 	}