|
@@ -4,6 +4,8 @@
|
|
|
#include <fstream>
|
|
#include <fstream>
|
|
|
#include <algorithm>
|
|
#include <algorithm>
|
|
|
#include <iostream>
|
|
#include <iostream>
|
|
|
|
|
+#include <functional>
|
|
|
|
|
+#include <numeric>
|
|
|
|
|
|
|
|
#include "../tool/debuglog.h"
|
|
#include "../tool/debuglog.h"
|
|
|
|
|
|
|
@@ -24,41 +26,156 @@ void YoloFeatureExtractor::loadClassNames(const std::string & file)
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
-std::vector<float> YoloFeatureExtractor::extractFeatures(const std::string & imagePath)
|
|
|
|
|
|
|
+std::vector<float> YoloFeatureExtractor::globalAveragePooling(const cv::Mat & featureMap)
|
|
|
{
|
|
{
|
|
|
- auto start_time = std::chrono::high_resolution_clock::now();
|
|
|
|
|
|
|
+ std::vector<float> features;
|
|
|
|
|
|
|
|
- cv::Mat image = cv::imread(imagePath);
|
|
|
|
|
- if (image.empty())
|
|
|
|
|
|
|
+ // 检查特征图是否为空
|
|
|
|
|
+ if (featureMap.empty())
|
|
|
{
|
|
{
|
|
|
- throw std::runtime_error("Could not load image: " + imagePath);
|
|
|
|
|
|
|
+ std::cerr << "特征图为空" << std::endl;
|
|
|
|
|
+ return features;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- cv::Mat blob;
|
|
|
|
|
- cv::dnn::blobFromImage(image, blob, 1.0 / 255.0, cv::Size(inputWidth, inputHeight), cv::Scalar(0, 0, 0), true, false);
|
|
|
|
|
- net.setInput(blob);
|
|
|
|
|
|
|
+ // 获取特征图维度信息
|
|
|
|
|
+ int dims = featureMap.dims;
|
|
|
|
|
+ if (dims < 2)
|
|
|
|
|
+ {
|
|
|
|
|
+ std::cerr << "特征图维度不足" << std::endl;
|
|
|
|
|
+ return features;
|
|
|
|
|
+ }
|
|
|
|
|
|
|
|
- std::vector<cv::Mat> outputs;
|
|
|
|
|
- net.forward(outputs, net.getUnconnectedOutLayersNames());
|
|
|
|
|
|
|
+ // 获取各维度大小
|
|
|
|
|
+ const int * sizes = featureMap.size.p;
|
|
|
|
|
+ int batchSize = sizes[0];
|
|
|
|
|
+ int channels = (dims > 1) ? sizes[1] : 1;
|
|
|
|
|
+ int height = (dims > 2) ? sizes[2] : 1;
|
|
|
|
|
+ int width = (dims > 3) ? sizes[3] : 1;
|
|
|
|
|
|
|
|
- std::vector<float> features;
|
|
|
|
|
- for (size_t i = 0; i < outputs.size(); ++i)
|
|
|
|
|
|
|
+ // 验证维度有效性
|
|
|
|
|
+ if (batchSize <= 0 || channels <= 0 || height <= 0 || width <= 0)
|
|
|
|
|
+ {
|
|
|
|
|
+ std::cerr << "特征图维度无效" << std::endl;
|
|
|
|
|
+ return features;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ // 获取数据指针
|
|
|
|
|
+ const float * data = reinterpret_cast<const float *>(featureMap.data);
|
|
|
|
|
+
|
|
|
|
|
+ // 对每个通道执行全局平均池化
|
|
|
|
|
+ for (int c = 0; c < channels; ++c)
|
|
|
{
|
|
{
|
|
|
- float * data = (float *)outputs[i].data;
|
|
|
|
|
- int totalElements = outputs[i].total();
|
|
|
|
|
- for (int idx = 0; idx < totalElements; ++idx)
|
|
|
|
|
|
|
+ float sum = 0.0f;
|
|
|
|
|
+ int channelOffset = c * height * width;
|
|
|
|
|
+
|
|
|
|
|
+ for (int h = 0; h < height; ++h)
|
|
|
{
|
|
{
|
|
|
- features.push_back(data[idx]);
|
|
|
|
|
|
|
+ int rowOffset = channelOffset + h * width;
|
|
|
|
|
+ for (int w = 0; w < width; ++w)
|
|
|
|
|
+ {
|
|
|
|
|
+ sum += data[rowOffset + w];
|
|
|
|
|
+ }
|
|
|
}
|
|
}
|
|
|
|
|
+
|
|
|
|
|
+ // 计算平均值并添加到特征向量
|
|
|
|
|
+ features.push_back(sum / (height * width));
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- auto end_time = std::chrono::high_resolution_clock::now();
|
|
|
|
|
- auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time);
|
|
|
|
|
|
|
+ return features;
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+std::vector<float> YoloFeatureExtractor::extractFeatures(const std::string & imagePath)
|
|
|
|
|
+{
|
|
|
|
|
+ try
|
|
|
|
|
+ {
|
|
|
|
|
+ auto time_1 = std::chrono::high_resolution_clock::now();
|
|
|
|
|
+
|
|
|
|
|
+ cv::Mat image = cv::imread(imagePath);
|
|
|
|
|
+ if (image.empty())
|
|
|
|
|
+ {
|
|
|
|
|
+ throw std::runtime_error("Could not load image: " + imagePath);
|
|
|
|
|
+ }
|
|
|
|
|
|
|
|
- std::wstring msg = L"特征提取完成,耗时: " + std::to_wstring(duration.count()) + L" 毫秒";
|
|
|
|
|
- DEBUG_LOG(msg.c_str());
|
|
|
|
|
|
|
+ cv::Mat resizedImage;
|
|
|
|
|
+ cv::resize(image, resizedImage, cv::Size(inputWidth, inputHeight));
|
|
|
|
|
|
|
|
- return features;
|
|
|
|
|
|
|
+ cv::Mat blob;
|
|
|
|
|
+ cv::dnn::blobFromImage(resizedImage, blob, 1.0 / 255.0, cv::Size(inputWidth, inputHeight), cv::Scalar(0, 0, 0), true, false);
|
|
|
|
|
+ net.setInput(blob);
|
|
|
|
|
+
|
|
|
|
|
+ auto time_2 = std::chrono::high_resolution_clock::now();
|
|
|
|
|
+
|
|
|
|
|
+ std::vector<cv::String> layerNames = net.getLayerNames();
|
|
|
|
|
+ std::vector<cv::String> outputNames;
|
|
|
|
|
+
|
|
|
|
|
+ // 选择合适的特征层(使用倒数第二个卷积层作为特征提取层)
|
|
|
|
|
+ if (layerNames.size() >= 8)
|
|
|
|
|
+ {
|
|
|
|
|
+ // 通常倒数第8层左右是较好的特征层
|
|
|
|
|
+ outputNames.push_back(layerNames[layerNames.size() - 7]);
|
|
|
|
|
+ }
|
|
|
|
|
+ else if (layerNames.size() >= 2)
|
|
|
|
|
+ {
|
|
|
|
|
+ outputNames.push_back(layerNames[layerNames.size() - 2]);
|
|
|
|
|
+ }
|
|
|
|
|
+ else
|
|
|
|
|
+ {
|
|
|
|
|
+ outputNames.push_back(layerNames.back());
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ std::vector<cv::Mat> outputs;
|
|
|
|
|
+ net.forward(outputs, outputNames);
|
|
|
|
|
+
|
|
|
|
|
+ auto time_3 = std::chrono::high_resolution_clock::now();
|
|
|
|
|
+
|
|
|
|
|
+ // 检查输出是否有效
|
|
|
|
|
+ if (outputs.empty() || outputs[0].empty())
|
|
|
|
|
+ {
|
|
|
|
|
+ throw std::runtime_error("模型前向传播未产生有效输出");
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ // 应用全局平均池化获取特征向量
|
|
|
|
|
+ std::vector<float> features = globalAveragePooling(outputs[0]);
|
|
|
|
|
+
|
|
|
|
|
+ // L2归一化特征向量
|
|
|
|
|
+ if (!features.empty())
|
|
|
|
|
+ {
|
|
|
|
|
+ float norm = std::sqrt(std::inner_product(features.begin(), features.end(), features.begin(), 0.0f));
|
|
|
|
|
+ if (norm > 1e-6)
|
|
|
|
|
+ {
|
|
|
|
|
+ for (auto & val : features)
|
|
|
|
|
+ {
|
|
|
|
|
+ val /= norm;
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ auto time_4 = std::chrono::high_resolution_clock::now();
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+ auto duration_1 = std::chrono::duration_cast<std::chrono::milliseconds>(time_2 - time_1);
|
|
|
|
|
+ std::wstring msg = L"图片处理完成,耗时: " + std::to_wstring(duration_1.count()) + L" 毫秒";
|
|
|
|
|
+ DEBUG_LOG(msg.c_str());
|
|
|
|
|
+
|
|
|
|
|
+ auto duration_2 = std::chrono::duration_cast<std::chrono::milliseconds>(time_3 - time_2);
|
|
|
|
|
+ std::wstring msg2 = L"模型前向传播完成,耗时: " + std::to_wstring(duration_2.count()) + L" 毫秒";
|
|
|
|
|
+ DEBUG_LOG(msg2.c_str());
|
|
|
|
|
+
|
|
|
|
|
+ auto duration_3 = std::chrono::duration_cast<std::chrono::milliseconds>(time_4 - time_3);
|
|
|
|
|
+ std::wstring msg3 = L"特征处理完成,耗时: " + std::to_wstring(duration_3.count()) + L" 毫秒";
|
|
|
|
|
+ DEBUG_LOG(msg3.c_str());
|
|
|
|
|
+
|
|
|
|
|
+ auto totalDuration = std::chrono::duration_cast<std::chrono::milliseconds>(time_4 - time_1);
|
|
|
|
|
+ std::wstring msg4 = L"总耗时: " + std::to_wstring(totalDuration.count()) + L" 毫秒";
|
|
|
|
|
+ DEBUG_LOG(msg4.c_str());
|
|
|
|
|
+
|
|
|
|
|
+ return features;
|
|
|
|
|
+ }
|
|
|
|
|
+ catch (const std::exception & e)
|
|
|
|
|
+ {
|
|
|
|
|
+ DEBUG_LOG(("提取特征失败: " + std::string(e.what())).c_str());
|
|
|
|
|
+ return {};
|
|
|
|
|
+ }
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
std::vector<float> YoloFeatureExtractor::extractBackboneFeatures(const std::string & imagePath)
|
|
std::vector<float> YoloFeatureExtractor::extractBackboneFeatures(const std::string & imagePath)
|