YoloFeatureExtractor.cpp 8.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275
  1. 
  2. #include "YoloFeatureExtractor.h"
  3. #include <fstream>
  4. #include <algorithm>
  5. #include <iostream>
  6. #include <opencv2/core/ocl.hpp>
  7. YoloFeatureExtractor::YoloFeatureExtractor(const std::string & modelPath, const std::string & classesPath)
  8. : inputWidth(448), inputHeight(448)
  9. {
  10. try
  11. {
  12. net = cv::dnn::readNetFromONNX(modelPath);
  13. //net.setPreferableBackend(cv::dnn::DNN_BACKEND_INFERENCE_ENGINE);
  14. //net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU); // 或DNN_TARGET_MYRIAD等
  15. loadClassNames(classesPath);
  16. }
  17. catch (const std::exception& e)
  18. {
  19. std::string aa = std::string(e.what());
  20. std::cout << "Error loading model: " << aa << std::endl;
  21. }
  22. }
  23. YoloFeatureExtractor::YoloFeatureExtractor(const std::string& modelPath, const std::string& configPath, const std::string& classesPath)
  24. {
  25. try
  26. {
  27. net = cv::dnn::readNetFromModelOptimizer(modelPath, configPath);
  28. // 设置目标设备 (可选: CPU, GPU, MYRIAD等)
  29. net.setPreferableBackend(cv::dnn::DNN_BACKEND_INFERENCE_ENGINE);
  30. net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU); // 或DNN_TARGET_MYRIAD等
  31. loadClassNames(classesPath);
  32. }
  33. catch (const std::exception& e)
  34. {
  35. std::string aa = std::string(e.what());
  36. std::cout << "Error loading model: " << aa << std::endl;
  37. }
  38. }
  39. void YoloFeatureExtractor::initOpenCL()
  40. {
  41. if (cv::ocl::haveOpenCL())
  42. {
  43. std::cout << "OpenCL is available. Enabling OpenCL for DNN module." << std::endl;
  44. net.setPreferableBackend(cv::dnn::DNN_BACKEND_DEFAULT);
  45. net.setPreferableTarget(cv::dnn::DNN_TARGET_OPENCL);
  46. }
  47. else
  48. {
  49. std::cout << "OpenCL is not available. Using default backend and target." << std::endl;
  50. net.setPreferableBackend(cv::dnn::DNN_BACKEND_DEFAULT);
  51. net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU);
  52. }
  53. }
  54. void YoloFeatureExtractor::loadClassNames(const std::string & file)
  55. {
  56. std::ifstream ifs(file);
  57. std::string line;
  58. while (std::getline(ifs, line))
  59. {
  60. classNames.push_back(line);
  61. }
  62. }
  63. std::vector<float> YoloFeatureExtractor::extractFeatures(const std::string & imagePath)
  64. {
  65. auto start_time = std::chrono::high_resolution_clock::now();
  66. cv::Mat image = cv::imread(imagePath);
  67. if (image.empty())
  68. {
  69. throw std::runtime_error("Could not load image: " + imagePath);
  70. }
  71. cv::Mat blob;
  72. cv::dnn::blobFromImage(image, blob, 1.0 / 255.0, cv::Size(inputWidth, inputHeight), cv::Scalar(0, 0, 0), true, false);
  73. net.setInput(blob);
  74. auto start_time2 = std::chrono::high_resolution_clock::now();
  75. std::vector<cv::Mat> outputs;
  76. net.forward(outputs, net.getUnconnectedOutLayersNames());
  77. auto start_time3 = std::chrono::high_resolution_clock::now();
  78. std::vector<float> features;
  79. for (size_t i = 0; i < outputs.size(); ++i)
  80. {
  81. float * data = (float *)outputs[i].data;
  82. int totalElements = outputs[i].total();
  83. for (int idx = 0; idx < totalElements; ++idx)
  84. {
  85. features.push_back(data[idx]);
  86. }
  87. }
  88. auto duration1 = std::chrono::duration_cast<std::chrono::milliseconds>(start_time2 - start_time);
  89. auto duration2 = std::chrono::duration_cast<std::chrono::milliseconds>(start_time3 - start_time2);
  90. auto totalDuration = std::chrono::duration_cast<std::chrono::milliseconds>(start_time3 - start_time);
  91. std::cout << "图片前期处理耗时: " << duration1.count() << " 毫秒" << std::endl;
  92. std::cout << "模型推理耗时: " << duration2.count() << " 毫秒" << std::endl;
  93. std::cout << "总耗时: " << totalDuration.count() << " 毫秒" << std::endl;
  94. return features;
  95. }
  96. std::vector<float> YoloFeatureExtractor::extractBackboneFeatures(const std::string & imagePath)
  97. {
  98. cv::Mat image = cv::imread(imagePath);
  99. if (image.empty())
  100. {
  101. throw std::runtime_error("Could not load image: " + imagePath);
  102. }
  103. cv::Mat blob;
  104. cv::dnn::blobFromImage(image, blob, 1.0 / 255.0, cv::Size(inputWidth, inputHeight), cv::Scalar(0, 0, 0), true, false);
  105. net.setInput(blob);
  106. std::vector<cv::String> layerNames = net.getLayerNames();
  107. std::vector<cv::String> backboneLayers;
  108. for (const auto & name : layerNames)
  109. {
  110. if (name.find("backbone") != std::string::npos ||
  111. name.find("conv") != std::string::npos ||
  112. name.find("stage") != std::string::npos)
  113. {
  114. backboneLayers.push_back(name);
  115. }
  116. }
  117. if (backboneLayers.empty())
  118. {
  119. backboneLayers.push_back(layerNames[layerNames.size() / 2]);
  120. }
  121. std::vector<cv::Mat> outputs;
  122. net.forward(outputs, backboneLayers);
  123. std::vector<float> features;
  124. for (size_t i = 0; i < outputs.size(); ++i)
  125. {
  126. cv::Mat output = outputs[i];
  127. features.reserve(features.size() + output.total());
  128. for (int j = 0; j < output.total(); ++j)
  129. {
  130. features.push_back(output.at<float>(j));
  131. }
  132. }
  133. return features;
  134. }
  135. std::vector<std::vector<float>> YoloFeatureExtractor::extractROIFeatures(const std::string & imagePath)
  136. {
  137. cv::Mat image = cv::imread(imagePath);
  138. if (image.empty())
  139. {
  140. throw std::runtime_error("Could not load image: " + imagePath);
  141. }
  142. cv::Mat blob;
  143. cv::dnn::blobFromImage(image, blob, 1.0 / 255.0, cv::Size(inputWidth, inputHeight), cv::Scalar(0, 0, 0), true, false);
  144. net.setInput(blob);
  145. std::vector<cv::Mat> outputs;
  146. net.forward(outputs, net.getUnconnectedOutLayersNames());
  147. const float CONFIDENCE_THRESHOLD = 0.5;
  148. const float NMS_THRESHOLD = 0.4;
  149. std::vector<int> classIds;
  150. std::vector<float> confidences;
  151. std::vector<cv::Rect> boxes;
  152. float x_factor = static_cast<float>(image.cols) / inputWidth;
  153. float y_factor = static_cast<float>(image.rows) / inputHeight;
  154. for (size_t outputIdx = 0; outputIdx < outputs.size(); ++outputIdx)
  155. {
  156. float * data = (float *)outputs[outputIdx].data;
  157. int rows = outputs[outputIdx].rows;
  158. int dimensions = outputs[outputIdx].cols;
  159. for (int i = 0; i < rows; ++i)
  160. {
  161. float objectness = data[4];
  162. if (objectness >= CONFIDENCE_THRESHOLD)
  163. {
  164. std::vector<float> probs;
  165. for (int c = 5; c < dimensions; ++c)
  166. {
  167. probs.push_back(data[c]);
  168. }
  169. int maxClassId = 0;
  170. float maxScore = probs[0];
  171. for (size_t p = 1; p < probs.size(); ++p)
  172. {
  173. if (probs[p] > maxScore)
  174. {
  175. maxScore = probs[p];
  176. maxClassId = static_cast<int>(p);
  177. }
  178. }
  179. if (maxScore > CONFIDENCE_THRESHOLD)
  180. {
  181. confidences.push_back(objectness * maxScore);
  182. classIds.push_back(maxClassId);
  183. float x = data[0];
  184. float y = data[1];
  185. float w = data[2];
  186. float h = data[3];
  187. int left = static_cast<int>((x - 0.5 * w) * x_factor);
  188. int top = static_cast<int>((y - 0.5 * h) * y_factor);
  189. int width = static_cast<int>(w * x_factor);
  190. int height = static_cast<int>(h * y_factor);
  191. boxes.push_back(cv::Rect(left, top, width, height));
  192. }
  193. }
  194. data += dimensions;
  195. }
  196. }
  197. std::vector<int> nms_result;
  198. cv::dnn::NMSBoxes(boxes, confidences, CONFIDENCE_THRESHOLD, NMS_THRESHOLD, nms_result);
  199. std::vector<std::vector<float>> roiFeatures;
  200. for (size_t i = 0; i < nms_result.size(); ++i)
  201. {
  202. int idx = nms_result[i];
  203. cv::Rect box = boxes[idx];
  204. box.x = std::max(0, std::min(box.x, image.cols - 1));
  205. box.y = std::max(0, std::min(box.y, image.rows - 1));
  206. box.width = std::max(0, std::min(box.width, image.cols - box.x));
  207. box.height = std::max(0, std::min(box.height, image.rows - box.y));
  208. std::vector<float> roiFeature;
  209. roiFeature.push_back(static_cast<float>(box.x) / image.cols);
  210. roiFeature.push_back(static_cast<float>(box.y) / image.rows);
  211. roiFeature.push_back(static_cast<float>(box.width) / image.cols);
  212. roiFeature.push_back(static_cast<float>(box.height) / image.rows);
  213. roiFeature.push_back(confidences[idx]);
  214. roiFeature.push_back(static_cast<float>(classIds[idx]));
  215. roiFeatures.push_back(roiFeature);
  216. }
  217. return roiFeatures;
  218. }