123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533 |
- #include <iostream>
- #include <opencv2/opencv.hpp>
- #include <opencv2/dnn/dnn.hpp>
- #include <string>
- #include <vector>
- #include <fstream>
- #include <iostream>
- #include <sstream>
- #include <vector>
- #include <NvInfer.h>
- #include </home/cl/package/TensorRT-8.6.1.6/samples/common/logger.h>
- using cv::Mat;
- using std::cout;
- using std::endl;
- using std::string;
- using std::vector;
- using namespace nvinfer1;
- static const vector<string> class_name = {"person"};
- // const vector<vector<vector<float>>> anchors =
- // {{{3.90, 5.848}, {6.684, 6.81}, {30.48, 25.40}},
- // {{38.40, 32}, {48.38, 40.32}, {60.96, 50.80}},
- // {{76.8, 64}, {96.76, 80.63}, {121.91, 101.59}}};
- const vector<vector<vector<float>>> anchors =
- {{{3.90, 5.848}, {6.684, 6.81}, {5.79, 9.88}},
- {{7.754, 11.71}, {10.25, 16.5}, {13.66, 12.74}},
- {{14.73, 22.51}, {19.187, 33.625}, {33.906, 54.719}}};
- const vector<int> strides = {8, 16, 32};
- const vector<int> grid_sizes = {160, 80, 40};
- const vector<float> stride_list = {4, 8, 16};
- float sigmoid(float x) {
- return 1.0 / (1.0 + std::exp(-x));
- };
- std::vector<std::vector<std::vector<std::vector<std::vector<float>>>>> make_grid(int nx = 20, int ny = 20) {
- std::vector<std::vector<std::vector<std::vector<std::vector<float>>>>> grid(1,
- std::vector<std::vector<std::vector<std::vector<float>>>>(1,
- std::vector<std::vector<std::vector<float>>>(ny,
- std::vector<std::vector<float>>(nx,
- std::vector<float>(2)))));
- for (int y = 0; y < ny; ++y) {
- for (int x = 0; x < nx; ++x) {
- grid[0][0][y][x][0] = x; // xv
- grid[0][0][y][x][1] = y; // yv
- }
- }
- return grid;
- };
- float* postprocess(float *results, float conf = 0.7, int len_data = 6){
- int nc = 1;
- int no = nc + 5;
- int nl = anchors.size();
- //int nl = 1;
- int na = anchors[0].size() / 2;
- int count = 0;
- for(int i = 0; i < nl; i++){
- std::vector<std::vector<std::vector<std::vector<std::vector<float>>>>> grid = make_grid(grid_sizes[i], grid_sizes[i]);
- for (int n = 0; n < 1; n++) {
- for (int j = 0; j < 3; j++) {
- for (int k = 0; k < grid_sizes[i]; k++) {
- for (int l = 0; l < grid_sizes[i]; l++) {
- for (int m = 0; m < 6; m++) {
- // 假设数据是多通道的二维数据
- // int channel = l * x_sigmoid.size[3] + m;
- int offset;
- if(i == 0){
- offset = j * grid_sizes[i] * grid_sizes[i] * 6 + k * grid_sizes[i] * 6 + l * 6 + m;
- }else if(i == 1){
- int zero_offset = 3 * 160 * 160 * 6;
- offset = zero_offset + j * grid_sizes[i] * grid_sizes[i] * 6 + k * grid_sizes[i] * 6 + l * 6 + m;
- }else if(i == 2){
- int zero_offset = 3 * 160 * 160 * 6 + 3 * 80 * 80 * 6;
- offset = zero_offset + j * grid_sizes[i] * grid_sizes[i] * 6 + k * grid_sizes[i] * 6 + l * 6 + m;
- }
- float x = results[offset];
- x = sigmoid(x);
- float x_sigmoid_before = results[offset];
- if(m < 2){
- x = ((x * 2.0) - 0.5 + grid[0][0][k][l][m]) * stride_list[i];
- }else if(m < 4){
- x = (x * 2.0)*(x * 2.0) * anchors[i][j][m-2];
- }
- results[offset] = x;
- count++;
- //printf("j: %d, k: %d, l: %d, m: %d, before: %f, data: %f \n", j, k, l, m, x_sigmoid_before, results[offset]);
- //printf("j: %d, k: %d, l: %d, m: %d, before: %f, data: %f \n", j, k, l, m, x_sigmoid_before, results[offset]);
- }
- }
- }
- }
- }
- }
- printf("postprocess count: %d\n", count);
- return results;
- };
- void print_result(const Mat &result, float conf = 0.7, int len_data = 6)
- {
- float *pdata = (float *)result.data;
- for (int i = 0; i < result.total() / len_data; i++)
- {
- if (pdata[4] > conf)
- {
- for (int j = 0; j < len_data; j++)
- {
- cout << pdata[j] << " ";
- }
- cout << endl;
- }
- pdata += len_data;
- }
- return;
- }
- vector<vector<float>> get_info(float *result, float conf = 0.7, int len_data = 6)
- {
- float *pdata = result;
- vector<vector<float>> info;
- for (int i = 0; i < 604800 / len_data; i++)
- {
- if (pdata[4] > conf)
- {
- vector<float> info_line;
- for (int j = 0; j < len_data; j++)
- {
- // cout << pdata[j] << " ";
- info_line.push_back(pdata[j]);
- }
- // cout << endl;
- info.push_back(info_line);
- }
- pdata += len_data;
- }
- return info;
- }
- void info_simplify(vector<vector<float>> &info, float witdh_scale = 1.0, float height_scale = 1.0)
- {
- for (auto i = 0; i < info.size(); i++)
- {
- info[i][5] = std::max_element(info[i].cbegin() + 5, info[i].cend()) - (info[i].cbegin() + 5);
- info[i].resize(6);
- float x = info[i][0];
- float y = info[i][1];
- float w = info[i][2];
- float h = info[i][3];
- info[i][0] = (x - w / 2.0) * witdh_scale;
- info[i][1] = (y - h / 2.0) * height_scale;
- info[i][2] = (x + w / 2.0) * witdh_scale;
- info[i][3] = (y + h / 2.0) * height_scale;
- }
- }
- vector<vector<vector<float>>> split_info(vector<vector<float>> &info)
- {
- vector<vector<vector<float>>> info_split;
- vector<int> class_id;
- for (auto i = 0; i < info.size(); i++)
- {
- if (std::find(class_id.begin(), class_id.end(), (int)info[i][5]) == class_id.end())
- {
- class_id.push_back((int)info[i][5]);
- vector<vector<float>> info_;
- info_split.push_back(info_);
- }
- info_split[std::find(class_id.begin(), class_id.end(), (int)info[i][5]) - class_id.begin()].push_back(info[i]);
- }
- return info_split;
- }
- void nms(vector<vector<float>> &info, float iou = 0.45)
- {
- int counter = 0;
- vector<vector<float>> return_info;
- while (counter < info.size())
- {
- return_info.clear();
- float x1 = 0;
- float x2 = 0;
- float y1 = 0;
- float y2 = 0;
- // 按置信度降序排列
- std::sort(info.begin(), info.end(), [](vector<float> p1, vector<float> p2)
- { return p1[4] > p2[4]; });
- for (auto i = 0; i < info.size(); i++)
- {
- if (i < counter)
- {
- return_info.push_back(info[i]);
- continue;
- }
- if (i == counter)
- {
- x1 = info[i][0];
- y1 = info[i][1];
- x2 = info[i][2];
- y2 = info[i][3];
- return_info.push_back(info[i]);
- continue;
- }
- if (info[i][0] > x2 or info[i][2] < x1 or info[i][1] > y2 or info[i][3] < y1)
- {
- return_info.push_back(info[i]);
- }
- else
- {
- float over_x1 = std::max(x1, info[i][0]);
- float over_y1 = std::max(y1, info[i][1]);
- float over_x2 = std::min(x2, info[i][2]);
- float over_y2 = std::min(y2, info[i][3]);
- float s_over = (over_x2 - over_x1) * (over_y2 - over_y1);
- float s_total = (x2 - x1) * (y2 - y1) + (info[i][0] - info[i][2]) * (info[i][1] - info[i][3]) - s_over;
- if (s_over / s_total < iou)
- {
- return_info.push_back(info[i]);
- }
- }
- }
- info = return_info;
- counter += 1;
- }
- }
- void print_info(const vector<vector<float>> &info)
- {
- for (auto i = 0; i < info.size(); i++)
- {
- for (auto j = 0; j < info[i].size(); j++)
- {
- cout << info[i][j] << " ";
- }
- cout << endl;
- }
- }
- void draw_box(Mat &img, const vector<vector<float>> &info)
- {
- for (int i = 0; i < info.size(); i++)
- {
- cv::rectangle(img, cv::Point(info[i][0], info[i][1]), cv::Point(info[i][2], info[i][3]), cv::Scalar(0, 255, 0));
- // string label;
- // label += class_name[info[i][5]];
- // label += " ";
- // std::stringstream oss;
- // oss << info[i][4];
- // label += oss.str();
- // cv::putText(img, label, cv::Point(info[i][0], info[i][1]), 1, 2, cv::Scalar(0, 255, 0), 2);
- }
- // 在左上角显示总数
- string head_info = "Total: " + std::to_string(info.size());
- cv::putText(img, head_info, cv::Point(10, 20), 1, 2, cv::Scalar(0, 255, 0), 2);
- }
- void CHECK(int status) {
- if (status != 0) {
- std::cerr << "Cuda failure: " << status << std::endl;
- std::abort();
- }
- }
- using namespace nvinfer1;
- using namespace sample;
- const char* IN_NAME = "images";
- const char* OUT_NAME1 = "output";
- const char* OUT_NAME2 = "947";
- const char* OUT_NAME3 = "961";
- static const int IN_H = 640;
- static const int IN_W = 640;
- static const int BATCH_SIZE = 1;
- static const int EXPLICIT_BATCH = 1 << (int)(NetworkDefinitionCreationFlag::kEXPLICIT_BATCH);
- void doInference(IExecutionContext& context, float* input, float* output, int batchSize)
- {
- const ICudaEngine& engine = context.getEngine();
- // Pointers to input and output device buffers to pass to engine.
- // Engine requires exactly IEngine::getNbBindings() number of buffers.
- assert(engine.getNbBindings() == 4);
- void* buffers[4];
- // In order to bind the buffers, we need to know the names of the input and output tensors.
- // Note that indices are guaranteed to be less than IEngine::getNbBindings()
- const int inputIndex = engine.getBindingIndex(IN_NAME);
- const int outputIndex1 = engine.getBindingIndex(OUT_NAME1);
- const int outputIndex2 = engine.getBindingIndex(OUT_NAME2);
- const int outputIndex3 = engine.getBindingIndex(OUT_NAME3);
- printf("outputIndex1: %d, outputIndex2: %d, outputIndex3: %d\n", outputIndex1, outputIndex2, outputIndex3);
- // Create GPU buffers on device
- CHECK(cudaMalloc(&buffers[inputIndex], batchSize * 3 * IN_H * IN_W * sizeof(float)));
- CHECK(cudaMalloc(&buffers[outputIndex1], batchSize * 3 * IN_H/4 * IN_W /4 * 6 * sizeof(float)));
- CHECK(cudaMalloc(&buffers[outputIndex2], batchSize * 3 * IN_H/8 * IN_W /8 * 6 * sizeof(float)));
- CHECK(cudaMalloc(&buffers[outputIndex3], batchSize * 3 * IN_H/16 * IN_W /16 * 6 * sizeof(float)));
- // Create stream
- cudaStream_t stream;
- CHECK(cudaStreamCreate(&stream));
- // DMA input batch data to device, infer on the batch asynchronously, and DMA output back to host
- CHECK(cudaMemcpyAsync(buffers[inputIndex], input, batchSize * 3 * IN_H * IN_W * sizeof(float), cudaMemcpyHostToDevice, stream));
- context.enqueue(batchSize, buffers, stream, nullptr);
- CHECK(cudaMemcpyAsync(output, buffers[outputIndex1], batchSize * 3 * IN_H/4 * IN_W /4 * 6 * sizeof(float), cudaMemcpyDeviceToHost, stream));
- CHECK(cudaMemcpyAsync(output + 3 * (IN_H/4 * IN_W/4) * 6, buffers[outputIndex2], batchSize * 3 * IN_H/8 * IN_W /8 * 6 * sizeof(float), cudaMemcpyDeviceToHost, stream));
- CHECK(cudaMemcpyAsync(output + 3 * (IN_H/4 * IN_W/4 + IN_H/8 * IN_W/8) * 6, buffers[outputIndex3], batchSize * 3 * IN_H/16 * IN_W /16 * 6 * sizeof(float), cudaMemcpyDeviceToHost, stream));
- cudaStreamSynchronize(stream);
- // Release stream and buffers
- cudaStreamDestroy(stream);
- CHECK(cudaFree(buffers[inputIndex]));
- CHECK(cudaFree(buffers[outputIndex1]));
- CHECK(cudaFree(buffers[outputIndex2]));
- CHECK(cudaFree(buffers[outputIndex3]));
- }
- int main()
- {
- // create a model using the API directly and serialize it to a stream
- char *trtModelStream{ nullptr };
- size_t size{ 0 };
- // 读取engine文件
- std::ifstream file("yolov5-crowd-n.engine", std::ios::binary);
- if (file.good()) {
- file.seekg(0, file.end);
- size = file.tellg();
- file.seekg(0, file.beg);
- trtModelStream = new char[size];
- assert(trtModelStream);
- file.read(trtModelStream, size);
- file.close();
- }
- Logger m_logger;
- IRuntime* runtime = createInferRuntime(m_logger);
- assert(runtime != nullptr);
- ICudaEngine* engine = runtime->deserializeCudaEngine(trtModelStream, size, nullptr);
- assert(engine != nullptr);
- IExecutionContext* context = engine->createExecutionContext();
- assert(context != nullptr);
- // generate input data
- float data[BATCH_SIZE * 3 * IN_H * IN_W];
- // Run inference
- int num_total = 3 * (IN_H/4 * IN_W/4 + IN_H/8 * IN_W/8 + IN_H/16 * IN_W/16) * 6;
- // 存储推理结果
- float prob[num_total];
- printf("num_total: %d\n", num_total);
- Mat img = cv::imread("image.jpg", -1);
- if (img.empty()) {
- std::cout << "Failed to load image" << std::endl;
- return -1;
- }
- // 获取图像的高度、宽度和通道数
- int height = img.rows; // 图像高度
- int width = img.cols; // 图像宽度
- int channels = img.channels(); // 图像通道数
- // 输出图像信息
- std::cout << "Image size: " << height << " x " << width << ", Channels: " << channels << std::endl;
- cv::resize(img, img, cv::Size(640, 640));
-
- Mat blob = cv::dnn::blobFromImage(img, 1.0 / 255.0, cv::Size(640, 640), cv::Scalar(), true); // bgr rgb
- std::memcpy(data, blob.data, 3 * 640 * 640 * sizeof(float));
- float height_scale = height / 640.0;
- float width_scale = width / 640.0;
- doInference(*context, data, prob, BATCH_SIZE);
- printf("inference done");
- // Destroy the engine
- context->destroy();
- engine->destroy();
- runtime->destroy();
- // 将输出根据grid和anchor进行计算,得到预测框的坐标和置信度
- float* result = postprocess(prob); // prob -> 604800
- float* prob_ptr = result;
- int count = 0;
- int invalid_count = 0;
- for (int i = 0; i < num_total / 6; i++)
- {
- {
- for (int j = 0; j < 6; j++)
- {
- //printf("%f ", prob_ptr[j]);
- //count++;
- }
- if( prob_ptr[4] < 0){
- invalid_count++;
- }else if(prob_ptr[4] > 0.25){
- count++;
- }
- //printf("\n");
-
- }
- prob_ptr += 6;
- }
- printf("invalid_count: %d\n", invalid_count);
- printf("count: %d\n", count);
- // 根据置信度 获得有效框
- vector<vector<float>> info = get_info(result, 0.25, 6);
- // 将推理的xywh 转换为原图中的 xyxy
- info_simplify(info, width_scale, height_scale);
- // 将推理结果按类别分割
- vector<vector<vector<float>>> info_split = split_info(info);
- printf("info size: %ld\n", info_split.size());
- cv::resize(img, img, cv::Size(width, height));
- for(auto i=0; i < info_split.size(); i++)
- {
- printf("class %d size: %ld\n", i, info_split[i].size());
- // 根据阈值进行nms
- nms(info_split[i], 0.45);
- draw_box(img, info_split[i]);
- printf("class %d size: %ld\n", i, info_split[i].size());
- }
- cv::imwrite("result.jpg", img);
- return 0;
- }
|