yolov5_trt.cpp 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533
  1. #include <iostream>
  2. #include <opencv2/opencv.hpp>
  3. #include <opencv2/dnn/dnn.hpp>
  4. #include <string>
  5. #include <vector>
  6. #include <fstream>
  7. #include <iostream>
  8. #include <sstream>
  9. #include <vector>
  10. #include <NvInfer.h>
  11. #include </home/cl/package/TensorRT-8.6.1.6/samples/common/logger.h>
  12. using cv::Mat;
  13. using std::cout;
  14. using std::endl;
  15. using std::string;
  16. using std::vector;
  17. using namespace nvinfer1;
  18. static const vector<string> class_name = {"person"};
  19. // const vector<vector<vector<float>>> anchors =
  20. // {{{3.90, 5.848}, {6.684, 6.81}, {30.48, 25.40}},
  21. // {{38.40, 32}, {48.38, 40.32}, {60.96, 50.80}},
  22. // {{76.8, 64}, {96.76, 80.63}, {121.91, 101.59}}};
  23. const vector<vector<vector<float>>> anchors =
  24. {{{3.90, 5.848}, {6.684, 6.81}, {5.79, 9.88}},
  25. {{7.754, 11.71}, {10.25, 16.5}, {13.66, 12.74}},
  26. {{14.73, 22.51}, {19.187, 33.625}, {33.906, 54.719}}};
  27. const vector<int> strides = {8, 16, 32};
  28. const vector<int> grid_sizes = {160, 80, 40};
  29. const vector<float> stride_list = {4, 8, 16};
  30. float sigmoid(float x) {
  31. return 1.0 / (1.0 + std::exp(-x));
  32. };
  33. std::vector<std::vector<std::vector<std::vector<std::vector<float>>>>> make_grid(int nx = 20, int ny = 20) {
  34. std::vector<std::vector<std::vector<std::vector<std::vector<float>>>>> grid(1,
  35. std::vector<std::vector<std::vector<std::vector<float>>>>(1,
  36. std::vector<std::vector<std::vector<float>>>(ny,
  37. std::vector<std::vector<float>>(nx,
  38. std::vector<float>(2)))));
  39. for (int y = 0; y < ny; ++y) {
  40. for (int x = 0; x < nx; ++x) {
  41. grid[0][0][y][x][0] = x; // xv
  42. grid[0][0][y][x][1] = y; // yv
  43. }
  44. }
  45. return grid;
  46. };
  47. float* postprocess(float *results, float conf = 0.7, int len_data = 6){
  48. int nc = 1;
  49. int no = nc + 5;
  50. int nl = anchors.size();
  51. //int nl = 1;
  52. int na = anchors[0].size() / 2;
  53. int count = 0;
  54. for(int i = 0; i < nl; i++){
  55. std::vector<std::vector<std::vector<std::vector<std::vector<float>>>>> grid = make_grid(grid_sizes[i], grid_sizes[i]);
  56. for (int n = 0; n < 1; n++) {
  57. for (int j = 0; j < 3; j++) {
  58. for (int k = 0; k < grid_sizes[i]; k++) {
  59. for (int l = 0; l < grid_sizes[i]; l++) {
  60. for (int m = 0; m < 6; m++) {
  61. // 假设数据是多通道的二维数据
  62. // int channel = l * x_sigmoid.size[3] + m;
  63. int offset;
  64. if(i == 0){
  65. offset = j * grid_sizes[i] * grid_sizes[i] * 6 + k * grid_sizes[i] * 6 + l * 6 + m;
  66. }else if(i == 1){
  67. int zero_offset = 3 * 160 * 160 * 6;
  68. offset = zero_offset + j * grid_sizes[i] * grid_sizes[i] * 6 + k * grid_sizes[i] * 6 + l * 6 + m;
  69. }else if(i == 2){
  70. int zero_offset = 3 * 160 * 160 * 6 + 3 * 80 * 80 * 6;
  71. offset = zero_offset + j * grid_sizes[i] * grid_sizes[i] * 6 + k * grid_sizes[i] * 6 + l * 6 + m;
  72. }
  73. float x = results[offset];
  74. x = sigmoid(x);
  75. float x_sigmoid_before = results[offset];
  76. if(m < 2){
  77. x = ((x * 2.0) - 0.5 + grid[0][0][k][l][m]) * stride_list[i];
  78. }else if(m < 4){
  79. x = (x * 2.0)*(x * 2.0) * anchors[i][j][m-2];
  80. }
  81. results[offset] = x;
  82. count++;
  83. //printf("j: %d, k: %d, l: %d, m: %d, before: %f, data: %f \n", j, k, l, m, x_sigmoid_before, results[offset]);
  84. //printf("j: %d, k: %d, l: %d, m: %d, before: %f, data: %f \n", j, k, l, m, x_sigmoid_before, results[offset]);
  85. }
  86. }
  87. }
  88. }
  89. }
  90. }
  91. printf("postprocess count: %d\n", count);
  92. return results;
  93. };
  94. void print_result(const Mat &result, float conf = 0.7, int len_data = 6)
  95. {
  96. float *pdata = (float *)result.data;
  97. for (int i = 0; i < result.total() / len_data; i++)
  98. {
  99. if (pdata[4] > conf)
  100. {
  101. for (int j = 0; j < len_data; j++)
  102. {
  103. cout << pdata[j] << " ";
  104. }
  105. cout << endl;
  106. }
  107. pdata += len_data;
  108. }
  109. return;
  110. }
  111. vector<vector<float>> get_info(float *result, float conf = 0.7, int len_data = 6)
  112. {
  113. float *pdata = result;
  114. vector<vector<float>> info;
  115. for (int i = 0; i < 604800 / len_data; i++)
  116. {
  117. if (pdata[4] > conf)
  118. {
  119. vector<float> info_line;
  120. for (int j = 0; j < len_data; j++)
  121. {
  122. // cout << pdata[j] << " ";
  123. info_line.push_back(pdata[j]);
  124. }
  125. // cout << endl;
  126. info.push_back(info_line);
  127. }
  128. pdata += len_data;
  129. }
  130. return info;
  131. }
  132. void info_simplify(vector<vector<float>> &info, float witdh_scale = 1.0, float height_scale = 1.0)
  133. {
  134. for (auto i = 0; i < info.size(); i++)
  135. {
  136. info[i][5] = std::max_element(info[i].cbegin() + 5, info[i].cend()) - (info[i].cbegin() + 5);
  137. info[i].resize(6);
  138. float x = info[i][0];
  139. float y = info[i][1];
  140. float w = info[i][2];
  141. float h = info[i][3];
  142. info[i][0] = (x - w / 2.0) * witdh_scale;
  143. info[i][1] = (y - h / 2.0) * height_scale;
  144. info[i][2] = (x + w / 2.0) * witdh_scale;
  145. info[i][3] = (y + h / 2.0) * height_scale;
  146. }
  147. }
  148. vector<vector<vector<float>>> split_info(vector<vector<float>> &info)
  149. {
  150. vector<vector<vector<float>>> info_split;
  151. vector<int> class_id;
  152. for (auto i = 0; i < info.size(); i++)
  153. {
  154. if (std::find(class_id.begin(), class_id.end(), (int)info[i][5]) == class_id.end())
  155. {
  156. class_id.push_back((int)info[i][5]);
  157. vector<vector<float>> info_;
  158. info_split.push_back(info_);
  159. }
  160. info_split[std::find(class_id.begin(), class_id.end(), (int)info[i][5]) - class_id.begin()].push_back(info[i]);
  161. }
  162. return info_split;
  163. }
  164. void nms(vector<vector<float>> &info, float iou = 0.45)
  165. {
  166. int counter = 0;
  167. vector<vector<float>> return_info;
  168. while (counter < info.size())
  169. {
  170. return_info.clear();
  171. float x1 = 0;
  172. float x2 = 0;
  173. float y1 = 0;
  174. float y2 = 0;
  175. // 按置信度降序排列
  176. std::sort(info.begin(), info.end(), [](vector<float> p1, vector<float> p2)
  177. { return p1[4] > p2[4]; });
  178. for (auto i = 0; i < info.size(); i++)
  179. {
  180. if (i < counter)
  181. {
  182. return_info.push_back(info[i]);
  183. continue;
  184. }
  185. if (i == counter)
  186. {
  187. x1 = info[i][0];
  188. y1 = info[i][1];
  189. x2 = info[i][2];
  190. y2 = info[i][3];
  191. return_info.push_back(info[i]);
  192. continue;
  193. }
  194. if (info[i][0] > x2 or info[i][2] < x1 or info[i][1] > y2 or info[i][3] < y1)
  195. {
  196. return_info.push_back(info[i]);
  197. }
  198. else
  199. {
  200. float over_x1 = std::max(x1, info[i][0]);
  201. float over_y1 = std::max(y1, info[i][1]);
  202. float over_x2 = std::min(x2, info[i][2]);
  203. float over_y2 = std::min(y2, info[i][3]);
  204. float s_over = (over_x2 - over_x1) * (over_y2 - over_y1);
  205. float s_total = (x2 - x1) * (y2 - y1) + (info[i][0] - info[i][2]) * (info[i][1] - info[i][3]) - s_over;
  206. if (s_over / s_total < iou)
  207. {
  208. return_info.push_back(info[i]);
  209. }
  210. }
  211. }
  212. info = return_info;
  213. counter += 1;
  214. }
  215. }
  216. void print_info(const vector<vector<float>> &info)
  217. {
  218. for (auto i = 0; i < info.size(); i++)
  219. {
  220. for (auto j = 0; j < info[i].size(); j++)
  221. {
  222. cout << info[i][j] << " ";
  223. }
  224. cout << endl;
  225. }
  226. }
  227. void draw_box(Mat &img, const vector<vector<float>> &info)
  228. {
  229. for (int i = 0; i < info.size(); i++)
  230. {
  231. cv::rectangle(img, cv::Point(info[i][0], info[i][1]), cv::Point(info[i][2], info[i][3]), cv::Scalar(0, 255, 0));
  232. // string label;
  233. // label += class_name[info[i][5]];
  234. // label += " ";
  235. // std::stringstream oss;
  236. // oss << info[i][4];
  237. // label += oss.str();
  238. // cv::putText(img, label, cv::Point(info[i][0], info[i][1]), 1, 2, cv::Scalar(0, 255, 0), 2);
  239. }
  240. // 在左上角显示总数
  241. string head_info = "Total: " + std::to_string(info.size());
  242. cv::putText(img, head_info, cv::Point(10, 20), 1, 2, cv::Scalar(0, 255, 0), 2);
  243. }
  244. void CHECK(int status) {
  245. if (status != 0) {
  246. std::cerr << "Cuda failure: " << status << std::endl;
  247. std::abort();
  248. }
  249. }
  250. using namespace nvinfer1;
  251. using namespace sample;
  252. const char* IN_NAME = "images";
  253. const char* OUT_NAME1 = "output";
  254. const char* OUT_NAME2 = "947";
  255. const char* OUT_NAME3 = "961";
  256. static const int IN_H = 640;
  257. static const int IN_W = 640;
  258. static const int BATCH_SIZE = 1;
  259. static const int EXPLICIT_BATCH = 1 << (int)(NetworkDefinitionCreationFlag::kEXPLICIT_BATCH);
  260. void doInference(IExecutionContext& context, float* input, float* output, int batchSize)
  261. {
  262. const ICudaEngine& engine = context.getEngine();
  263. // Pointers to input and output device buffers to pass to engine.
  264. // Engine requires exactly IEngine::getNbBindings() number of buffers.
  265. assert(engine.getNbBindings() == 4);
  266. void* buffers[4];
  267. // In order to bind the buffers, we need to know the names of the input and output tensors.
  268. // Note that indices are guaranteed to be less than IEngine::getNbBindings()
  269. const int inputIndex = engine.getBindingIndex(IN_NAME);
  270. const int outputIndex1 = engine.getBindingIndex(OUT_NAME1);
  271. const int outputIndex2 = engine.getBindingIndex(OUT_NAME2);
  272. const int outputIndex3 = engine.getBindingIndex(OUT_NAME3);
  273. printf("outputIndex1: %d, outputIndex2: %d, outputIndex3: %d\n", outputIndex1, outputIndex2, outputIndex3);
  274. // Create GPU buffers on device
  275. CHECK(cudaMalloc(&buffers[inputIndex], batchSize * 3 * IN_H * IN_W * sizeof(float)));
  276. CHECK(cudaMalloc(&buffers[outputIndex1], batchSize * 3 * IN_H/4 * IN_W /4 * 6 * sizeof(float)));
  277. CHECK(cudaMalloc(&buffers[outputIndex2], batchSize * 3 * IN_H/8 * IN_W /8 * 6 * sizeof(float)));
  278. CHECK(cudaMalloc(&buffers[outputIndex3], batchSize * 3 * IN_H/16 * IN_W /16 * 6 * sizeof(float)));
  279. // Create stream
  280. cudaStream_t stream;
  281. CHECK(cudaStreamCreate(&stream));
  282. // DMA input batch data to device, infer on the batch asynchronously, and DMA output back to host
  283. CHECK(cudaMemcpyAsync(buffers[inputIndex], input, batchSize * 3 * IN_H * IN_W * sizeof(float), cudaMemcpyHostToDevice, stream));
  284. context.enqueue(batchSize, buffers, stream, nullptr);
  285. CHECK(cudaMemcpyAsync(output, buffers[outputIndex1], batchSize * 3 * IN_H/4 * IN_W /4 * 6 * sizeof(float), cudaMemcpyDeviceToHost, stream));
  286. CHECK(cudaMemcpyAsync(output + 3 * (IN_H/4 * IN_W/4) * 6, buffers[outputIndex2], batchSize * 3 * IN_H/8 * IN_W /8 * 6 * sizeof(float), cudaMemcpyDeviceToHost, stream));
  287. CHECK(cudaMemcpyAsync(output + 3 * (IN_H/4 * IN_W/4 + IN_H/8 * IN_W/8) * 6, buffers[outputIndex3], batchSize * 3 * IN_H/16 * IN_W /16 * 6 * sizeof(float), cudaMemcpyDeviceToHost, stream));
  288. cudaStreamSynchronize(stream);
  289. // Release stream and buffers
  290. cudaStreamDestroy(stream);
  291. CHECK(cudaFree(buffers[inputIndex]));
  292. CHECK(cudaFree(buffers[outputIndex1]));
  293. CHECK(cudaFree(buffers[outputIndex2]));
  294. CHECK(cudaFree(buffers[outputIndex3]));
  295. }
  296. int main()
  297. {
  298. // create a model using the API directly and serialize it to a stream
  299. char *trtModelStream{ nullptr };
  300. size_t size{ 0 };
  301. // 读取engine文件
  302. std::ifstream file("yolov5-crowd-n.engine", std::ios::binary);
  303. if (file.good()) {
  304. file.seekg(0, file.end);
  305. size = file.tellg();
  306. file.seekg(0, file.beg);
  307. trtModelStream = new char[size];
  308. assert(trtModelStream);
  309. file.read(trtModelStream, size);
  310. file.close();
  311. }
  312. Logger m_logger;
  313. IRuntime* runtime = createInferRuntime(m_logger);
  314. assert(runtime != nullptr);
  315. ICudaEngine* engine = runtime->deserializeCudaEngine(trtModelStream, size, nullptr);
  316. assert(engine != nullptr);
  317. IExecutionContext* context = engine->createExecutionContext();
  318. assert(context != nullptr);
  319. // generate input data
  320. float data[BATCH_SIZE * 3 * IN_H * IN_W];
  321. // Run inference
  322. int num_total = 3 * (IN_H/4 * IN_W/4 + IN_H/8 * IN_W/8 + IN_H/16 * IN_W/16) * 6;
  323. // 存储推理结果
  324. float prob[num_total];
  325. printf("num_total: %d\n", num_total);
  326. Mat img = cv::imread("image.jpg", -1);
  327. if (img.empty()) {
  328. std::cout << "Failed to load image" << std::endl;
  329. return -1;
  330. }
  331. // 获取图像的高度、宽度和通道数
  332. int height = img.rows; // 图像高度
  333. int width = img.cols; // 图像宽度
  334. int channels = img.channels(); // 图像通道数
  335. // 输出图像信息
  336. std::cout << "Image size: " << height << " x " << width << ", Channels: " << channels << std::endl;
  337. cv::resize(img, img, cv::Size(640, 640));
  338. Mat blob = cv::dnn::blobFromImage(img, 1.0 / 255.0, cv::Size(640, 640), cv::Scalar(), true); // bgr rgb
  339. std::memcpy(data, blob.data, 3 * 640 * 640 * sizeof(float));
  340. float height_scale = height / 640.0;
  341. float width_scale = width / 640.0;
  342. doInference(*context, data, prob, BATCH_SIZE);
  343. printf("inference done");
  344. // Destroy the engine
  345. context->destroy();
  346. engine->destroy();
  347. runtime->destroy();
  348. // 将输出根据grid和anchor进行计算,得到预测框的坐标和置信度
  349. float* result = postprocess(prob); // prob -> 604800
  350. float* prob_ptr = result;
  351. int count = 0;
  352. int invalid_count = 0;
  353. for (int i = 0; i < num_total / 6; i++)
  354. {
  355. {
  356. for (int j = 0; j < 6; j++)
  357. {
  358. //printf("%f ", prob_ptr[j]);
  359. //count++;
  360. }
  361. if( prob_ptr[4] < 0){
  362. invalid_count++;
  363. }else if(prob_ptr[4] > 0.25){
  364. count++;
  365. }
  366. //printf("\n");
  367. }
  368. prob_ptr += 6;
  369. }
  370. printf("invalid_count: %d\n", invalid_count);
  371. printf("count: %d\n", count);
  372. // 根据置信度 获得有效框
  373. vector<vector<float>> info = get_info(result, 0.25, 6);
  374. // 将推理的xywh 转换为原图中的 xyxy
  375. info_simplify(info, width_scale, height_scale);
  376. // 将推理结果按类别分割
  377. vector<vector<vector<float>>> info_split = split_info(info);
  378. printf("info size: %ld\n", info_split.size());
  379. cv::resize(img, img, cv::Size(width, height));
  380. for(auto i=0; i < info_split.size(); i++)
  381. {
  382. printf("class %d size: %ld\n", i, info_split[i].size());
  383. // 根据阈值进行nms
  384. nms(info_split[i], 0.45);
  385. draw_box(img, info_split[i]);
  386. printf("class %d size: %ld\n", i, info_split[i].size());
  387. }
  388. cv::imwrite("result.jpg", img);
  389. return 0;
  390. }