yolov5.cpp 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319
  1. #include <iostream>
  2. #include <opencv2/opencv.hpp>
  3. #include <opencv2/dnn/dnn.hpp>
  4. #include <string>
  5. #include <vector>
  6. using cv::Mat;
  7. using std::cout;
  8. using std::endl;
  9. using std::string;
  10. using std::vector;
  11. static const vector<string> class_name = {"person"};
  12. // const vector<vector<vector<float>>> anchors =
  13. // {{{3.90, 5.848}, {6.684, 6.81}, {30.48, 25.40}},
  14. // {{38.40, 32}, {48.38, 40.32}, {60.96, 50.80}},
  15. // {{76.8, 64}, {96.76, 80.63}, {121.91, 101.59}}};
  16. const vector<vector<vector<float>>> anchors =
  17. {{{3.90, 5.848}, {6.684, 6.81}, {5.79, 9.88}},
  18. {{7.754, 11.71}, {10.25, 16.5}, {13.66, 12.74}},
  19. {{14.73, 22.51}, {19.187, 33.625}, {33.906, 54.719}}};
  20. const vector<int> strides = {8, 16, 32};
  21. const vector<int> grid_sizes = {160, 80, 40};
  22. const vector<float> stride_list = {4, 8, 16};
  23. float sigmoid(float x) {
  24. return 1.0 / (1.0 + std::exp(-x));
  25. };
  26. std::vector<std::vector<std::vector<std::vector<std::vector<float>>>>> make_grid(int nx = 20, int ny = 20) {
  27. std::vector<std::vector<std::vector<std::vector<std::vector<float>>>>> grid(1,
  28. std::vector<std::vector<std::vector<std::vector<float>>>>(1,
  29. std::vector<std::vector<std::vector<float>>>(ny,
  30. std::vector<std::vector<float>>(nx,
  31. std::vector<float>(2)))));
  32. for (int y = 0; y < ny; ++y) {
  33. for (int x = 0; x < nx; ++x) {
  34. grid[0][0][y][x][0] = x; // xv
  35. grid[0][0][y][x][1] = y; // yv
  36. }
  37. }
  38. return grid;
  39. };
  40. vector<Mat> postprocess(const vector<Mat> &out, float conf = 0.7, int len_data = 6){
  41. int nc = 1;
  42. int no = nc + 5;
  43. int nl = anchors.size();
  44. //int nl = 1;
  45. int na = anchors[0].size() / 2;
  46. vector<Mat> result;
  47. for(int i = 0; i < nl; i++){
  48. std::vector<std::vector<std::vector<std::vector<std::vector<float>>>>> grid = make_grid(grid_sizes[i], grid_sizes[i]);
  49. cv::Mat x = out[i];
  50. cv::Mat x_sigmoid = x.clone();
  51. for (int n = 0; n < x.size[0]; n++) {
  52. for (int j = 0; j < x_sigmoid.size[1]; j++) {
  53. for (int k = 0; k < x_sigmoid.size[2]; k++) {
  54. for (int l = 0; l < x_sigmoid.size[3]; l++) {
  55. for (int m = 0; m < x_sigmoid.size[4]; m++) {
  56. // 假设数据是多通道的二维数据
  57. // int channel = l * x_sigmoid.size[3] + m;
  58. int offset = j * x_sigmoid.size[2]*x_sigmoid.size[3]*x_sigmoid.size[4] + k * x_sigmoid.size[3] * x_sigmoid.size[4] + l *x_sigmoid.size[4] + m;
  59. x_sigmoid.at<float>(n, offset) = sigmoid(x.at<float>(n, offset));
  60. float x_sigmoid_before = x_sigmoid.at<float>(n, offset);
  61. if(m < 2){
  62. x_sigmoid.at<float>(n, offset) = ((x_sigmoid.at<float>(n, offset) * 2.0) - 0.5 + grid[0][0][k][l][m]) * stride_list[i];
  63. }else if(m < 4){
  64. x_sigmoid.at<float>(n, offset) = (x_sigmoid.at<float>(n, offset) * 2.0)*(x_sigmoid.at<float>(n, offset) * 2) * anchors[i][j][m-2];
  65. }
  66. //printf("j: %d, k: %d, l: %d, m: %d, before: %f, data: %f \n", j, k, l, m, x_sigmoid_before, x_sigmoid.at<float>(j, offset));
  67. }
  68. }
  69. }
  70. }
  71. }
  72. //printf("x_sigmoid.size: %d %d %d %d\n", x_sigmoid.size[1], x_sigmoid.size[2], x_sigmoid.size[3], x_sigmoid.size[4]);
  73. //printf("x_sigmoid value: %f\n", x_sigmoid.at<cv::Vec<float, 1>>(0, 0)[0]);
  74. x_sigmoid = x_sigmoid.reshape(1, {x_sigmoid.size[1]*x_sigmoid.size[2]*x_sigmoid.size[3], 6});
  75. result.push_back(x_sigmoid);
  76. }
  77. return result;
  78. };
  79. void print_result(const Mat &result, float conf = 0.7, int len_data = 6)
  80. {
  81. float *pdata = (float *)result.data;
  82. for (int i = 0; i < result.total() / len_data; i++)
  83. {
  84. if (pdata[4] > conf)
  85. {
  86. for (int j = 0; j < len_data; j++)
  87. {
  88. cout << pdata[j] << " ";
  89. }
  90. cout << endl;
  91. }
  92. pdata += len_data;
  93. }
  94. return;
  95. }
  96. vector<vector<float>> get_info(const Mat &result, float conf = 0.7, int len_data = 6)
  97. {
  98. float *pdata = (float *)result.data;
  99. vector<vector<float>> info;
  100. for (int i = 0; i < result.total() / len_data; i++)
  101. {
  102. if (pdata[4] > conf)
  103. {
  104. vector<float> info_line;
  105. for (int j = 0; j < len_data; j++)
  106. {
  107. // cout << pdata[j] << " ";
  108. info_line.push_back(pdata[j]);
  109. }
  110. // cout << endl;
  111. info.push_back(info_line);
  112. }
  113. pdata += len_data;
  114. }
  115. return info;
  116. }
  117. void info_simplify(vector<vector<float>> &info, float witdh_scale = 1.0, float height_scale = 1.0)
  118. {
  119. for (auto i = 0; i < info.size(); i++)
  120. {
  121. info[i][5] = std::max_element(info[i].cbegin() + 5, info[i].cend()) - (info[i].cbegin() + 5);
  122. info[i].resize(6);
  123. float x = info[i][0];
  124. float y = info[i][1];
  125. float w = info[i][2];
  126. float h = info[i][3];
  127. info[i][0] = (x - w / 2.0) * witdh_scale;
  128. info[i][1] = (y - h / 2.0) * height_scale;
  129. info[i][2] = (x + w / 2.0) * witdh_scale;
  130. info[i][3] = (y + h / 2.0) * height_scale;
  131. }
  132. }
  133. vector<vector<vector<float>>> split_info(vector<vector<float>> &info)
  134. {
  135. vector<vector<vector<float>>> info_split;
  136. vector<int> class_id;
  137. for (auto i = 0; i < info.size(); i++)
  138. {
  139. if (std::find(class_id.begin(), class_id.end(), (int)info[i][5]) == class_id.end())
  140. {
  141. class_id.push_back((int)info[i][5]);
  142. vector<vector<float>> info_;
  143. info_split.push_back(info_);
  144. }
  145. info_split[std::find(class_id.begin(), class_id.end(), (int)info[i][5]) - class_id.begin()].push_back(info[i]);
  146. }
  147. return info_split;
  148. }
  149. void nms(vector<vector<float>> &info, float iou = 0.45)
  150. {
  151. int counter = 0;
  152. vector<vector<float>> return_info;
  153. while (counter < info.size())
  154. {
  155. return_info.clear();
  156. float x1 = 0;
  157. float x2 = 0;
  158. float y1 = 0;
  159. float y2 = 0;
  160. // 按置信度降序排列
  161. std::sort(info.begin(), info.end(), [](vector<float> p1, vector<float> p2)
  162. { return p1[4] > p2[4]; });
  163. for (auto i = 0; i < info.size(); i++)
  164. {
  165. if (i < counter)
  166. {
  167. return_info.push_back(info[i]);
  168. continue;
  169. }
  170. if (i == counter)
  171. {
  172. x1 = info[i][0];
  173. y1 = info[i][1];
  174. x2 = info[i][2];
  175. y2 = info[i][3];
  176. return_info.push_back(info[i]);
  177. continue;
  178. }
  179. if (info[i][0] > x2 or info[i][2] < x1 or info[i][1] > y2 or info[i][3] < y1)
  180. {
  181. return_info.push_back(info[i]);
  182. }
  183. else
  184. {
  185. float over_x1 = std::max(x1, info[i][0]);
  186. float over_y1 = std::max(y1, info[i][1]);
  187. float over_x2 = std::min(x2, info[i][2]);
  188. float over_y2 = std::min(y2, info[i][3]);
  189. float s_over = (over_x2 - over_x1) * (over_y2 - over_y1);
  190. float s_total = (x2 - x1) * (y2 - y1) + (info[i][0] - info[i][2]) * (info[i][1] - info[i][3]) - s_over;
  191. if (s_over / s_total < iou)
  192. {
  193. return_info.push_back(info[i]);
  194. }
  195. }
  196. }
  197. info = return_info;
  198. counter += 1;
  199. }
  200. }
  201. void print_info(const vector<vector<float>> &info)
  202. {
  203. for (auto i = 0; i < info.size(); i++)
  204. {
  205. for (auto j = 0; j < info[i].size(); j++)
  206. {
  207. cout << info[i][j] << " ";
  208. }
  209. cout << endl;
  210. }
  211. }
  212. void draw_box(Mat &img, const vector<vector<float>> &info)
  213. {
  214. for (int i = 0; i < info.size(); i++)
  215. {
  216. cv::rectangle(img, cv::Point(info[i][0], info[i][1]), cv::Point(info[i][2], info[i][3]), cv::Scalar(0, 255, 0));
  217. // string label;
  218. // label += class_name[info[i][5]];
  219. // label += " ";
  220. // std::stringstream oss;
  221. // oss << info[i][4];
  222. // label += oss.str();
  223. // cv::putText(img, label, cv::Point(info[i][0], info[i][1]), 1, 2, cv::Scalar(0, 255, 0), 2);
  224. }
  225. // 在左上角显示总数
  226. string head_info = "Total: " + std::to_string(info.size());
  227. cv::putText(img, head_info, cv::Point(10, 20), 1, 2, cv::Scalar(0, 255, 0), 2);
  228. }
  229. int main()
  230. {
  231. cv::dnn::Net net = cv::dnn::readNet("yolo-crowd-n.onnx");
  232. printf("net loaded\n");
  233. Mat img = cv::imread("image.jpg", -1);
  234. if (img.empty()) {
  235. std::cout << "Failed to load image" << std::endl;
  236. return -1;
  237. }
  238. // 获取图像的高度、宽度和通道数
  239. int height = img.rows; // 图像高度
  240. int width = img.cols; // 图像宽度
  241. int channels = img.channels(); // 图像通道数
  242. // 输出图像信息
  243. std::cout << "Image size: " << height << " x " << width << ", Channels: " << channels << std::endl;
  244. cv::resize(img, img, cv::Size(640, 640));
  245. Mat blob = cv::dnn::blobFromImage(img, 1.0 / 255.0, cv::Size(640, 640), cv::Scalar(), true); // bgr rgb
  246. float height_scale = height / 640.0;
  247. float width_scale = width / 640.0;
  248. net.setInput(blob);
  249. vector<Mat> netoutput;
  250. vector<string> out_name = {"output", "947", "961"};
  251. net.forward(netoutput, out_name);
  252. printf("netouput_size:%ld \n", netoutput.size());
  253. // 将输出根据grid和anchor进行计算,得到预测框的坐标和置信度
  254. vector<Mat> result_list = postprocess(netoutput);
  255. // 拼接
  256. Mat concatMAT;
  257. vconcat(result_list[0], result_list[1], concatMAT);
  258. vconcat(concatMAT, result_list[2], concatMAT);
  259. Mat result = concatMAT;
  260. // 根据置信度 获得有效框
  261. vector<vector<float>> info = get_info(result, 0.25, 6);
  262. info_simplify(info, width_scale, height_scale);
  263. vector<vector<vector<float>>> info_split = split_info(info);
  264. printf("info size: %ld\n", info_split.size());
  265. cv::resize(img, img, cv::Size(width, height));
  266. for(auto i=0; i < info_split.size(); i++)
  267. {
  268. printf("class %d size: %ld\n", i, info_split[i].size());
  269. // 根据阈值进行nms
  270. nms(info_split[i], 0.45);
  271. draw_box(img, info_split[i]);
  272. printf("class %d size: %ld\n", i, info_split[i].size());
  273. }
  274. cv::imwrite("result.jpg", img);
  275. return 0;
  276. }