|
@@ -6,17 +6,18 @@
|
|
|
#include <iostream>
|
|
#include <iostream>
|
|
|
#include <functional>
|
|
#include <functional>
|
|
|
#include <numeric>
|
|
#include <numeric>
|
|
|
|
|
+#include <sstream>
|
|
|
|
|
|
|
|
#include "../tool/debuglog.h"
|
|
#include "../tool/debuglog.h"
|
|
|
|
|
|
|
|
-YoloFeatureExtractor::YoloFeatureExtractor(const std::string & modelPath, const std::string & classesPath)
|
|
|
|
|
- : inputWidth(224), inputHeight(224)
|
|
|
|
|
|
|
+YoloFeatureExtractor::YoloFeatureExtractor(const std::string& modelPath, const std::string& classesPath)
|
|
|
|
|
+ : inputWidth(640), inputHeight(640)
|
|
|
{
|
|
{
|
|
|
net = cv::dnn::readNetFromONNX(modelPath);
|
|
net = cv::dnn::readNetFromONNX(modelPath);
|
|
|
loadClassNames(classesPath);
|
|
loadClassNames(classesPath);
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
-void YoloFeatureExtractor::loadClassNames(const std::string & file)
|
|
|
|
|
|
|
+void YoloFeatureExtractor::loadClassNames(const std::string& file)
|
|
|
{
|
|
{
|
|
|
std::ifstream ifs(file);
|
|
std::ifstream ifs(file);
|
|
|
std::string line;
|
|
std::string line;
|
|
@@ -26,7 +27,7 @@ void YoloFeatureExtractor::loadClassNames(const std::string & file)
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
-std::vector<float> YoloFeatureExtractor::globalAveragePooling(const cv::Mat & featureMap)
|
|
|
|
|
|
|
+std::vector<float> YoloFeatureExtractor::globalAveragePooling(const cv::Mat& featureMap)
|
|
|
{
|
|
{
|
|
|
std::vector<float> features;
|
|
std::vector<float> features;
|
|
|
|
|
|
|
@@ -61,7 +62,16 @@ std::vector<float> YoloFeatureExtractor::globalAveragePooling(const cv::Mat & fe
|
|
|
return pooled.reshape(1, 1);
|
|
return pooled.reshape(1, 1);
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
-std::vector<float> YoloFeatureExtractor::extractFeatures(const std::string & imagePath)
|
|
|
|
|
|
|
+// L2 归一化
|
|
|
|
|
+void YoloFeatureExtractor::normalizeL2(cv::Mat& feat)
|
|
|
|
|
+{
|
|
|
|
|
+ if (feat.empty() || feat.type() != CV_32F) return;
|
|
|
|
|
+ float norm = cv::norm(feat, cv::NORM_L2);
|
|
|
|
|
+
|
|
|
|
|
+ if (norm > 1e-6) feat /= norm;
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+std::vector<float> YoloFeatureExtractor::extractFeatures(const std::string& imagePath)
|
|
|
{
|
|
{
|
|
|
try
|
|
try
|
|
|
{
|
|
{
|
|
@@ -73,58 +83,87 @@ std::vector<float> YoloFeatureExtractor::extractFeatures(const std::string & ima
|
|
|
throw std::runtime_error("Could not load image: " + imagePath);
|
|
throw std::runtime_error("Could not load image: " + imagePath);
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- cv::Mat resizedImage;
|
|
|
|
|
- //cv::resize(image, resizedImage, cv::Size(inputWidth, inputHeight));
|
|
|
|
|
|
|
+ cv::resize(image, image, cv::Size(inputWidth, inputHeight));
|
|
|
|
|
|
|
|
cv::Mat blob;
|
|
cv::Mat blob;
|
|
|
cv::dnn::blobFromImage(image, blob, 1.0 / 255.0, cv::Size(inputWidth, inputHeight), cv::Scalar(0, 0, 0), true, false);
|
|
cv::dnn::blobFromImage(image, blob, 1.0 / 255.0, cv::Size(inputWidth, inputHeight), cv::Scalar(0, 0, 0), true, false);
|
|
|
net.setInput(blob);
|
|
net.setInput(blob);
|
|
|
|
|
|
|
|
|
|
+ std::string info = custom_printf("✅ blob 形状:%d×%d×%d×%d\n", blob.size[0], blob.size[1], blob.size[2], blob.size[3]);
|
|
|
|
|
+ DEBUG_LOG(info.c_str());
|
|
|
|
|
+
|
|
|
auto time_2 = std::chrono::high_resolution_clock::now();
|
|
auto time_2 = std::chrono::high_resolution_clock::now();
|
|
|
|
|
|
|
|
std::vector<cv::String> layerNames = net.getLayerNames();
|
|
std::vector<cv::String> layerNames = net.getLayerNames();
|
|
|
std::vector<cv::String> outputNames;
|
|
std::vector<cv::String> outputNames;
|
|
|
|
|
|
|
|
- // 选择GAP层(对于yolo2026,通常是倒数第6层)的输出作为特征向量
|
|
|
|
|
- outputNames.push_back(layerNames[layerNames.size() - 6]);
|
|
|
|
|
|
|
+ std::vector<float> features;
|
|
|
|
|
|
|
|
- std::vector<cv::Mat> outputs;
|
|
|
|
|
- net.forward(outputs, outputNames);
|
|
|
|
|
|
|
+ int layerIndex = -6;
|
|
|
|
|
+ if (layerIndex == -1)
|
|
|
|
|
+ {
|
|
|
|
|
+ outputNames.push_back(layerNames[layerNames.size() - 1]);
|
|
|
|
|
|
|
|
- auto time_3 = std::chrono::high_resolution_clock::now();
|
|
|
|
|
|
|
+ std::vector<cv::Mat> outputs;
|
|
|
|
|
+ net.forward(outputs, outputNames);
|
|
|
|
|
|
|
|
- // 检查输出是否有效
|
|
|
|
|
- if (outputs.empty() || outputs[0].empty())
|
|
|
|
|
- {
|
|
|
|
|
- throw std::runtime_error("模型前向传播未产生有效输出");
|
|
|
|
|
|
|
+ for (size_t i = 0; i < outputs.size(); ++i)
|
|
|
|
|
+ {
|
|
|
|
|
+ cv::Mat output = outputs[i];
|
|
|
|
|
+ features.reserve(features.size() + output.total());
|
|
|
|
|
+ for (int j = 0; j < output.total(); ++j)
|
|
|
|
|
+ {
|
|
|
|
|
+ features.push_back(output.at<float>(j));
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
}
|
|
}
|
|
|
|
|
+ else if (layerIndex == -6)
|
|
|
|
|
+ {
|
|
|
|
|
+ // 选择GAP层(对于yolo2026,通常是倒数第6层)的输出作为特征向量
|
|
|
|
|
+ outputNames.push_back(layerNames[layerNames.size() - 6]);
|
|
|
|
|
+
|
|
|
|
|
+ std::vector<cv::Mat> outputs;
|
|
|
|
|
+ net.forward(outputs, outputNames);
|
|
|
|
|
|
|
|
- // 获取GAP层输出并转换为特征向量
|
|
|
|
|
- //cv::Mat featuresMat = outputs[0].reshape(1, 1);
|
|
|
|
|
- cv::Mat featuresMat = outputs[0];
|
|
|
|
|
- cv::normalize(featuresMat, featuresMat, 1.0, 0.0, cv::NORM_L2);
|
|
|
|
|
|
|
+ // 检查输出是否有效
|
|
|
|
|
+ if (outputs.empty() || outputs[0].empty())
|
|
|
|
|
+ {
|
|
|
|
|
+ throw std::runtime_error("模型前向传播未产生有效输出");
|
|
|
|
|
+ }
|
|
|
|
|
|
|
|
- // 转换为std::vector<float>
|
|
|
|
|
- std::vector<float> features(featuresMat.begin<float>(), featuresMat.end<float>());
|
|
|
|
|
|
|
+ // 获取GAP层输出并转换为特征向量
|
|
|
|
|
+ cv::Mat featuresMat = outputs[0];
|
|
|
|
|
+ info = printf("✅ 原始特征形状:%d×%d,类型:%d(CV_32F=5)\n", featuresMat.cols, featuresMat.rows, featuresMat.type());
|
|
|
|
|
+ DEBUG_LOG(info.c_str());
|
|
|
|
|
+
|
|
|
|
|
+ cv::Mat featuresMatVec = featuresMat.reshape(1, 1);
|
|
|
|
|
+ info = printf("✅ 重塑后特征形状:%d×%d,类型:%d\n", featuresMatVec.cols, featuresMatVec.rows, featuresMatVec.type());
|
|
|
|
|
+ DEBUG_LOG(info.c_str());
|
|
|
|
|
|
|
|
- /*
|
|
|
|
|
- // 应用全局平均池化获取特征向量
|
|
|
|
|
- //std::vector<float> features = globalAveragePooling(outputs[0]);
|
|
|
|
|
|
|
+ float norm_before = cv::norm(featuresMatVec, cv::NORM_L2);
|
|
|
|
|
+ printf("📌 归一化前 norm:%.6f\n", norm_before);
|
|
|
|
|
|
|
|
- // L2归一化特征向量
|
|
|
|
|
- if (!features.empty())
|
|
|
|
|
- {
|
|
|
|
|
- float norm = std::sqrt(std::inner_product(features.begin(), features.end(), features.begin(), 0.0f));
|
|
|
|
|
- if (norm > 1e-6)
|
|
|
|
|
|
|
+
|
|
|
|
|
+ normalizeL2(featuresMatVec);
|
|
|
|
|
+ //cv::normalize(featuresMat, featuresMat, 1.0, 0.0, cv::NORM_L2);
|
|
|
|
|
+
|
|
|
|
|
+ float norm_after = cv::norm(featuresMatVec, cv::NORM_L2);
|
|
|
|
|
+ printf("📌 归一化后 norm:%.6f\n", norm_after);
|
|
|
|
|
+
|
|
|
|
|
+ features.reserve(features.size() + featuresMat.total());
|
|
|
|
|
+ for (int j = 0; j < featuresMat.total(); ++j)
|
|
|
{
|
|
{
|
|
|
- for (auto & val : features)
|
|
|
|
|
- {
|
|
|
|
|
- val /= norm;
|
|
|
|
|
- }
|
|
|
|
|
|
|
+ features.push_back(featuresMat.at<float>(j));
|
|
|
}
|
|
}
|
|
|
- }*/
|
|
|
|
|
|
|
|
|
|
- auto time_4 = std::chrono::high_resolution_clock::now();
|
|
|
|
|
|
|
+ int a = 1;
|
|
|
|
|
+
|
|
|
|
|
+ // 转换为std::vector<float>
|
|
|
|
|
+ //features = std::vector<float>(featuresMat.begin<float>(), featuresMat.end<float>());
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+ auto time_3 = std::chrono::high_resolution_clock::now();
|
|
|
|
|
|
|
|
|
|
|
|
|
auto duration_1 = std::chrono::duration_cast<std::chrono::milliseconds>(time_2 - time_1);
|
|
auto duration_1 = std::chrono::duration_cast<std::chrono::milliseconds>(time_2 - time_1);
|
|
@@ -132,173 +171,19 @@ std::vector<float> YoloFeatureExtractor::extractFeatures(const std::string & ima
|
|
|
DEBUG_LOG(msg.c_str());
|
|
DEBUG_LOG(msg.c_str());
|
|
|
|
|
|
|
|
auto duration_2 = std::chrono::duration_cast<std::chrono::milliseconds>(time_3 - time_2);
|
|
auto duration_2 = std::chrono::duration_cast<std::chrono::milliseconds>(time_3 - time_2);
|
|
|
- std::wstring msg2 = L"模型前向传播完成,耗时: " + std::to_wstring(duration_2.count()) + L" 毫秒";
|
|
|
|
|
|
|
+ std::wstring msg2 = L"模型推理完成,耗时: " + std::to_wstring(duration_2.count()) + L" 毫秒";
|
|
|
DEBUG_LOG(msg2.c_str());
|
|
DEBUG_LOG(msg2.c_str());
|
|
|
|
|
|
|
|
- auto duration_3 = std::chrono::duration_cast<std::chrono::milliseconds>(time_4 - time_3);
|
|
|
|
|
- std::wstring msg3 = L"特征处理完成,耗时: " + std::to_wstring(duration_3.count()) + L" 毫秒";
|
|
|
|
|
- DEBUG_LOG(msg3.c_str());
|
|
|
|
|
-
|
|
|
|
|
- auto totalDuration = std::chrono::duration_cast<std::chrono::milliseconds>(time_4 - time_1);
|
|
|
|
|
|
|
+ auto totalDuration = std::chrono::duration_cast<std::chrono::milliseconds>(time_3 - time_1);
|
|
|
std::wstring msg4 = L"总耗时: " + std::to_wstring(totalDuration.count()) + L" 毫秒";
|
|
std::wstring msg4 = L"总耗时: " + std::to_wstring(totalDuration.count()) + L" 毫秒";
|
|
|
DEBUG_LOG(msg4.c_str());
|
|
DEBUG_LOG(msg4.c_str());
|
|
|
|
|
|
|
|
return features;
|
|
return features;
|
|
|
}
|
|
}
|
|
|
- catch (const std::exception & e)
|
|
|
|
|
|
|
+ catch (const std::exception& e)
|
|
|
{
|
|
{
|
|
|
std::string aa = std::string(e.what());
|
|
std::string aa = std::string(e.what());
|
|
|
DEBUG_LOG(("提取特征失败: " + std::string(e.what())).c_str());
|
|
DEBUG_LOG(("提取特征失败: " + std::string(e.what())).c_str());
|
|
|
return {};
|
|
return {};
|
|
|
}
|
|
}
|
|
|
-}
|
|
|
|
|
-
|
|
|
|
|
-std::vector<float> YoloFeatureExtractor::extractBackboneFeatures(const std::string & imagePath)
|
|
|
|
|
-{
|
|
|
|
|
- cv::Mat image = cv::imread(imagePath);
|
|
|
|
|
- if (image.empty())
|
|
|
|
|
- {
|
|
|
|
|
- throw std::runtime_error("Could not load image: " + imagePath);
|
|
|
|
|
- }
|
|
|
|
|
-
|
|
|
|
|
- cv::Mat blob;
|
|
|
|
|
- cv::dnn::blobFromImage(image, blob, 1.0 / 255.0, cv::Size(inputWidth, inputHeight), cv::Scalar(0, 0, 0), true, false);
|
|
|
|
|
- net.setInput(blob);
|
|
|
|
|
-
|
|
|
|
|
- std::vector<cv::String> layerNames = net.getLayerNames();
|
|
|
|
|
- std::vector<cv::String> backboneLayers;
|
|
|
|
|
-
|
|
|
|
|
- for (const auto & name : layerNames)
|
|
|
|
|
- {
|
|
|
|
|
- if (name.find("backbone") != std::string::npos ||
|
|
|
|
|
- name.find("conv") != std::string::npos ||
|
|
|
|
|
- name.find("stage") != std::string::npos)
|
|
|
|
|
- {
|
|
|
|
|
- backboneLayers.push_back(name);
|
|
|
|
|
- }
|
|
|
|
|
- }
|
|
|
|
|
-
|
|
|
|
|
- if (backboneLayers.empty())
|
|
|
|
|
- {
|
|
|
|
|
- backboneLayers.push_back(layerNames[layerNames.size() / 2]);
|
|
|
|
|
- }
|
|
|
|
|
-
|
|
|
|
|
- std::vector<cv::Mat> outputs;
|
|
|
|
|
- net.forward(outputs, backboneLayers);
|
|
|
|
|
-
|
|
|
|
|
- std::vector<float> features;
|
|
|
|
|
- for (size_t i = 0; i < outputs.size(); ++i)
|
|
|
|
|
- {
|
|
|
|
|
- cv::Mat output = outputs[i];
|
|
|
|
|
- features.reserve(features.size() + output.total());
|
|
|
|
|
- for (int j = 0; j < output.total(); ++j)
|
|
|
|
|
- {
|
|
|
|
|
- features.push_back(output.at<float>(j));
|
|
|
|
|
- }
|
|
|
|
|
- }
|
|
|
|
|
-
|
|
|
|
|
- return features;
|
|
|
|
|
-}
|
|
|
|
|
-
|
|
|
|
|
-std::vector<std::vector<float>> YoloFeatureExtractor::extractROIFeatures(const std::string & imagePath)
|
|
|
|
|
-{
|
|
|
|
|
- cv::Mat image = cv::imread(imagePath);
|
|
|
|
|
- if (image.empty())
|
|
|
|
|
- {
|
|
|
|
|
- throw std::runtime_error("Could not load image: " + imagePath);
|
|
|
|
|
- }
|
|
|
|
|
-
|
|
|
|
|
- cv::Mat blob;
|
|
|
|
|
- cv::dnn::blobFromImage(image, blob, 1.0 / 255.0, cv::Size(inputWidth, inputHeight), cv::Scalar(0, 0, 0), true, false);
|
|
|
|
|
- net.setInput(blob);
|
|
|
|
|
-
|
|
|
|
|
- std::vector<cv::Mat> outputs;
|
|
|
|
|
- net.forward(outputs, net.getUnconnectedOutLayersNames());
|
|
|
|
|
-
|
|
|
|
|
- const float CONFIDENCE_THRESHOLD = 0.5;
|
|
|
|
|
- const float NMS_THRESHOLD = 0.4;
|
|
|
|
|
-
|
|
|
|
|
- std::vector<int> classIds;
|
|
|
|
|
- std::vector<float> confidences;
|
|
|
|
|
- std::vector<cv::Rect> boxes;
|
|
|
|
|
-
|
|
|
|
|
- float x_factor = static_cast<float>(image.cols) / inputWidth;
|
|
|
|
|
- float y_factor = static_cast<float>(image.rows) / inputHeight;
|
|
|
|
|
-
|
|
|
|
|
- for (size_t outputIdx = 0; outputIdx < outputs.size(); ++outputIdx)
|
|
|
|
|
- {
|
|
|
|
|
- float * data = (float *)outputs[outputIdx].data;
|
|
|
|
|
- int rows = outputs[outputIdx].rows;
|
|
|
|
|
- int dimensions = outputs[outputIdx].cols;
|
|
|
|
|
-
|
|
|
|
|
- for (int i = 0; i < rows; ++i)
|
|
|
|
|
- {
|
|
|
|
|
- float objectness = data[4];
|
|
|
|
|
- if (objectness >= CONFIDENCE_THRESHOLD)
|
|
|
|
|
- {
|
|
|
|
|
- std::vector<float> probs;
|
|
|
|
|
- for (int c = 5; c < dimensions; ++c)
|
|
|
|
|
- {
|
|
|
|
|
- probs.push_back(data[c]);
|
|
|
|
|
- }
|
|
|
|
|
-
|
|
|
|
|
- int maxClassId = 0;
|
|
|
|
|
- float maxScore = probs[0];
|
|
|
|
|
- for (size_t p = 1; p < probs.size(); ++p)
|
|
|
|
|
- {
|
|
|
|
|
- if (probs[p] > maxScore)
|
|
|
|
|
- {
|
|
|
|
|
- maxScore = probs[p];
|
|
|
|
|
- maxClassId = static_cast<int>(p);
|
|
|
|
|
- }
|
|
|
|
|
- }
|
|
|
|
|
-
|
|
|
|
|
- if (maxScore > CONFIDENCE_THRESHOLD)
|
|
|
|
|
- {
|
|
|
|
|
- confidences.push_back(objectness * maxScore);
|
|
|
|
|
- classIds.push_back(maxClassId);
|
|
|
|
|
-
|
|
|
|
|
- float x = data[0];
|
|
|
|
|
- float y = data[1];
|
|
|
|
|
- float w = data[2];
|
|
|
|
|
- float h = data[3];
|
|
|
|
|
-
|
|
|
|
|
- int left = static_cast<int>((x - 0.5 * w) * x_factor);
|
|
|
|
|
- int top = static_cast<int>((y - 0.5 * h) * y_factor);
|
|
|
|
|
- int width = static_cast<int>(w * x_factor);
|
|
|
|
|
- int height = static_cast<int>(h * y_factor);
|
|
|
|
|
-
|
|
|
|
|
- boxes.push_back(cv::Rect(left, top, width, height));
|
|
|
|
|
- }
|
|
|
|
|
- }
|
|
|
|
|
- data += dimensions;
|
|
|
|
|
- }
|
|
|
|
|
- }
|
|
|
|
|
-
|
|
|
|
|
- std::vector<int> nms_result;
|
|
|
|
|
- cv::dnn::NMSBoxes(boxes, confidences, CONFIDENCE_THRESHOLD, NMS_THRESHOLD, nms_result);
|
|
|
|
|
-
|
|
|
|
|
- std::vector<std::vector<float>> roiFeatures;
|
|
|
|
|
- for (size_t i = 0; i < nms_result.size(); ++i)
|
|
|
|
|
- {
|
|
|
|
|
- int idx = nms_result[i];
|
|
|
|
|
- cv::Rect box = boxes[idx];
|
|
|
|
|
-
|
|
|
|
|
- box.x = std::max(0, std::min(box.x, image.cols - 1));
|
|
|
|
|
- box.y = std::max(0, std::min(box.y, image.rows - 1));
|
|
|
|
|
- box.width = std::max(0, std::min(box.width, image.cols - box.x));
|
|
|
|
|
- box.height = std::max(0, std::min(box.height, image.rows - box.y));
|
|
|
|
|
-
|
|
|
|
|
- std::vector<float> roiFeature;
|
|
|
|
|
- roiFeature.push_back(static_cast<float>(box.x) / image.cols);
|
|
|
|
|
- roiFeature.push_back(static_cast<float>(box.y) / image.rows);
|
|
|
|
|
- roiFeature.push_back(static_cast<float>(box.width) / image.cols);
|
|
|
|
|
- roiFeature.push_back(static_cast<float>(box.height) / image.rows);
|
|
|
|
|
- roiFeature.push_back(confidences[idx]);
|
|
|
|
|
- roiFeature.push_back(static_cast<float>(classIds[idx]));
|
|
|
|
|
-
|
|
|
|
|
- roiFeatures.push_back(roiFeature);
|
|
|
|
|
- }
|
|
|
|
|
-
|
|
|
|
|
- return roiFeatures;
|
|
|
|
|
}
|
|
}
|