#include #include #include #include #include #include #include #include #include #include #include using cv::Mat; using std::cout; using std::endl; using std::string; using std::vector; using namespace nvinfer1; static const vector class_name = {"person"}; // const vector>> anchors = // {{{3.90, 5.848}, {6.684, 6.81}, {30.48, 25.40}}, // {{38.40, 32}, {48.38, 40.32}, {60.96, 50.80}}, // {{76.8, 64}, {96.76, 80.63}, {121.91, 101.59}}}; const vector>> anchors = {{{3.90, 5.848}, {6.684, 6.81}, {5.79, 9.88}}, {{7.754, 11.71}, {10.25, 16.5}, {13.66, 12.74}}, {{14.73, 22.51}, {19.187, 33.625}, {33.906, 54.719}}}; const vector strides = {8, 16, 32}; const vector grid_sizes = {160, 80, 40}; const vector stride_list = {4, 8, 16}; float sigmoid(float x) { return 1.0 / (1.0 + std::exp(-x)); }; std::vector>>>> make_grid(int nx = 20, int ny = 20) { std::vector>>>> grid(1, std::vector>>>(1, std::vector>>(ny, std::vector>(nx, std::vector(2))))); for (int y = 0; y < ny; ++y) { for (int x = 0; x < nx; ++x) { grid[0][0][y][x][0] = x; // xv grid[0][0][y][x][1] = y; // yv } } return grid; }; float* postprocess(float *results, float conf = 0.7, int len_data = 6){ int nc = 1; int no = nc + 5; int nl = anchors.size(); //int nl = 1; int na = anchors[0].size() / 2; int count = 0; for(int i = 0; i < nl; i++){ std::vector>>>> grid = make_grid(grid_sizes[i], grid_sizes[i]); for (int n = 0; n < 1; n++) { for (int j = 0; j < 3; j++) { for (int k = 0; k < grid_sizes[i]; k++) { for (int l = 0; l < grid_sizes[i]; l++) { for (int m = 0; m < 6; m++) { // 假设数据是多通道的二维数据 // int channel = l * x_sigmoid.size[3] + m; int offset; if(i == 0){ offset = j * grid_sizes[i] * grid_sizes[i] * 6 + k * grid_sizes[i] * 6 + l * 6 + m; }else if(i == 1){ int zero_offset = 3 * 160 * 160 * 6; offset = zero_offset + j * grid_sizes[i] * grid_sizes[i] * 6 + k * grid_sizes[i] * 6 + l * 6 + m; }else if(i == 2){ int zero_offset = 3 * 160 * 160 * 6 + 3 * 80 * 80 * 6; offset = zero_offset + j * grid_sizes[i] * grid_sizes[i] * 6 + k * grid_sizes[i] * 6 + l * 6 + m; } float x = results[offset]; x = sigmoid(x); float x_sigmoid_before = results[offset]; if(m < 2){ x = ((x * 2.0) - 0.5 + grid[0][0][k][l][m]) * stride_list[i]; }else if(m < 4){ x = (x * 2.0)*(x * 2.0) * anchors[i][j][m-2]; } results[offset] = x; count++; //printf("j: %d, k: %d, l: %d, m: %d, before: %f, data: %f \n", j, k, l, m, x_sigmoid_before, results[offset]); //printf("j: %d, k: %d, l: %d, m: %d, before: %f, data: %f \n", j, k, l, m, x_sigmoid_before, results[offset]); } } } } } } printf("postprocess count: %d\n", count); return results; }; void print_result(const Mat &result, float conf = 0.7, int len_data = 6) { float *pdata = (float *)result.data; for (int i = 0; i < result.total() / len_data; i++) { if (pdata[4] > conf) { for (int j = 0; j < len_data; j++) { cout << pdata[j] << " "; } cout << endl; } pdata += len_data; } return; } vector> get_info(float *result, float conf = 0.7, int len_data = 6) { float *pdata = result; vector> info; for (int i = 0; i < 604800 / len_data; i++) { if (pdata[4] > conf) { vector info_line; for (int j = 0; j < len_data; j++) { // cout << pdata[j] << " "; info_line.push_back(pdata[j]); } // cout << endl; info.push_back(info_line); } pdata += len_data; } return info; } void info_simplify(vector> &info, float witdh_scale = 1.0, float height_scale = 1.0) { for (auto i = 0; i < info.size(); i++) { info[i][5] = std::max_element(info[i].cbegin() + 5, info[i].cend()) - (info[i].cbegin() + 5); info[i].resize(6); float x = info[i][0]; float y = info[i][1]; float w = info[i][2]; float h = info[i][3]; info[i][0] = (x - w / 2.0) * witdh_scale; info[i][1] = (y - h / 2.0) * height_scale; info[i][2] = (x + w / 2.0) * witdh_scale; info[i][3] = (y + h / 2.0) * height_scale; } } vector>> split_info(vector> &info) { vector>> info_split; vector class_id; for (auto i = 0; i < info.size(); i++) { if (std::find(class_id.begin(), class_id.end(), (int)info[i][5]) == class_id.end()) { class_id.push_back((int)info[i][5]); vector> info_; info_split.push_back(info_); } info_split[std::find(class_id.begin(), class_id.end(), (int)info[i][5]) - class_id.begin()].push_back(info[i]); } return info_split; } void nms(vector> &info, float iou = 0.45) { int counter = 0; vector> return_info; while (counter < info.size()) { return_info.clear(); float x1 = 0; float x2 = 0; float y1 = 0; float y2 = 0; // 按置信度降序排列 std::sort(info.begin(), info.end(), [](vector p1, vector p2) { return p1[4] > p2[4]; }); for (auto i = 0; i < info.size(); i++) { if (i < counter) { return_info.push_back(info[i]); continue; } if (i == counter) { x1 = info[i][0]; y1 = info[i][1]; x2 = info[i][2]; y2 = info[i][3]; return_info.push_back(info[i]); continue; } if (info[i][0] > x2 or info[i][2] < x1 or info[i][1] > y2 or info[i][3] < y1) { return_info.push_back(info[i]); } else { float over_x1 = std::max(x1, info[i][0]); float over_y1 = std::max(y1, info[i][1]); float over_x2 = std::min(x2, info[i][2]); float over_y2 = std::min(y2, info[i][3]); float s_over = (over_x2 - over_x1) * (over_y2 - over_y1); float s_total = (x2 - x1) * (y2 - y1) + (info[i][0] - info[i][2]) * (info[i][1] - info[i][3]) - s_over; if (s_over / s_total < iou) { return_info.push_back(info[i]); } } } info = return_info; counter += 1; } } void print_info(const vector> &info) { for (auto i = 0; i < info.size(); i++) { for (auto j = 0; j < info[i].size(); j++) { cout << info[i][j] << " "; } cout << endl; } } void draw_box(Mat &img, const vector> &info) { for (int i = 0; i < info.size(); i++) { cv::rectangle(img, cv::Point(info[i][0], info[i][1]), cv::Point(info[i][2], info[i][3]), cv::Scalar(0, 255, 0)); // string label; // label += class_name[info[i][5]]; // label += " "; // std::stringstream oss; // oss << info[i][4]; // label += oss.str(); // cv::putText(img, label, cv::Point(info[i][0], info[i][1]), 1, 2, cv::Scalar(0, 255, 0), 2); } // 在左上角显示总数 string head_info = "Total: " + std::to_string(info.size()); cv::putText(img, head_info, cv::Point(10, 20), 1, 2, cv::Scalar(0, 255, 0), 2); } void CHECK(int status) { if (status != 0) { std::cerr << "Cuda failure: " << status << std::endl; std::abort(); } } using namespace nvinfer1; using namespace sample; const char* IN_NAME = "images"; const char* OUT_NAME1 = "output"; const char* OUT_NAME2 = "947"; const char* OUT_NAME3 = "961"; static const int IN_H = 640; static const int IN_W = 640; static const int BATCH_SIZE = 1; static const int EXPLICIT_BATCH = 1 << (int)(NetworkDefinitionCreationFlag::kEXPLICIT_BATCH); void doInference(IExecutionContext& context, float* input, float* output, int batchSize) { const ICudaEngine& engine = context.getEngine(); // Pointers to input and output device buffers to pass to engine. // Engine requires exactly IEngine::getNbBindings() number of buffers. assert(engine.getNbBindings() == 4); void* buffers[4]; // In order to bind the buffers, we need to know the names of the input and output tensors. // Note that indices are guaranteed to be less than IEngine::getNbBindings() const int inputIndex = engine.getBindingIndex(IN_NAME); const int outputIndex1 = engine.getBindingIndex(OUT_NAME1); const int outputIndex2 = engine.getBindingIndex(OUT_NAME2); const int outputIndex3 = engine.getBindingIndex(OUT_NAME3); printf("outputIndex1: %d, outputIndex2: %d, outputIndex3: %d\n", outputIndex1, outputIndex2, outputIndex3); // Create GPU buffers on device CHECK(cudaMalloc(&buffers[inputIndex], batchSize * 3 * IN_H * IN_W * sizeof(float))); CHECK(cudaMalloc(&buffers[outputIndex1], batchSize * 3 * IN_H/4 * IN_W /4 * 6 * sizeof(float))); CHECK(cudaMalloc(&buffers[outputIndex2], batchSize * 3 * IN_H/8 * IN_W /8 * 6 * sizeof(float))); CHECK(cudaMalloc(&buffers[outputIndex3], batchSize * 3 * IN_H/16 * IN_W /16 * 6 * sizeof(float))); // Create stream cudaStream_t stream; CHECK(cudaStreamCreate(&stream)); // DMA input batch data to device, infer on the batch asynchronously, and DMA output back to host CHECK(cudaMemcpyAsync(buffers[inputIndex], input, batchSize * 3 * IN_H * IN_W * sizeof(float), cudaMemcpyHostToDevice, stream)); context.enqueue(batchSize, buffers, stream, nullptr); CHECK(cudaMemcpyAsync(output, buffers[outputIndex1], batchSize * 3 * IN_H/4 * IN_W /4 * 6 * sizeof(float), cudaMemcpyDeviceToHost, stream)); CHECK(cudaMemcpyAsync(output + 3 * (IN_H/4 * IN_W/4) * 6, buffers[outputIndex2], batchSize * 3 * IN_H/8 * IN_W /8 * 6 * sizeof(float), cudaMemcpyDeviceToHost, stream)); CHECK(cudaMemcpyAsync(output + 3 * (IN_H/4 * IN_W/4 + IN_H/8 * IN_W/8) * 6, buffers[outputIndex3], batchSize * 3 * IN_H/16 * IN_W /16 * 6 * sizeof(float), cudaMemcpyDeviceToHost, stream)); cudaStreamSynchronize(stream); // Release stream and buffers cudaStreamDestroy(stream); CHECK(cudaFree(buffers[inputIndex])); CHECK(cudaFree(buffers[outputIndex1])); CHECK(cudaFree(buffers[outputIndex2])); CHECK(cudaFree(buffers[outputIndex3])); } int main() { // create a model using the API directly and serialize it to a stream char *trtModelStream{ nullptr }; size_t size{ 0 }; std::ifstream file("yolov5-crowd-n.engine", std::ios::binary); if (file.good()) { file.seekg(0, file.end); size = file.tellg(); file.seekg(0, file.beg); trtModelStream = new char[size]; assert(trtModelStream); file.read(trtModelStream, size); file.close(); } Logger m_logger; IRuntime* runtime = createInferRuntime(m_logger); assert(runtime != nullptr); ICudaEngine* engine = runtime->deserializeCudaEngine(trtModelStream, size, nullptr); assert(engine != nullptr); IExecutionContext* context = engine->createExecutionContext(); assert(context != nullptr); // generate input data float data[BATCH_SIZE * 3 * IN_H * IN_W]; // Run inference int num_total = 3 * (IN_H/4 * IN_W/4 + IN_H/8 * IN_W/8 + IN_H/16 * IN_W/16) * 6; float prob[num_total]; printf("num_total: %d\n", num_total); Mat img = cv::imread("image.jpg", -1); if (img.empty()) { std::cout << "Failed to load image" << std::endl; return -1; } // 获取图像的高度、宽度和通道数 int height = img.rows; // 图像高度 int width = img.cols; // 图像宽度 int channels = img.channels(); // 图像通道数 // 输出图像信息 std::cout << "Image size: " << height << " x " << width << ", Channels: " << channels << std::endl; cv::resize(img, img, cv::Size(640, 640)); Mat blob = cv::dnn::blobFromImage(img, 1.0 / 255.0, cv::Size(640, 640), cv::Scalar(), true); // bgr rgb std::memcpy(data, blob.data, 3 * 640 * 640 * sizeof(float)); float height_scale = height / 640.0; float width_scale = width / 640.0; doInference(*context, data, prob, BATCH_SIZE); // 推理结果 // float *prob_ptr = prob; // int count = 0; // for (int i = 0; i < num_total / 6; i++) // { // { // for (int j = 0; j < 6; j++) // { // //printf("%f ", prob_ptr[j]); // count++; // } // //printf("\n"); // } // prob_ptr += 6; // } // printf("count: %d\n", count); printf("inference done"); // Destroy the engine context->destroy(); engine->destroy(); runtime->destroy(); // 将输出根据grid和anchor进行计算，得到预测框的坐标和置信度 float* result = postprocess(prob); // prob -> 604800 float* prob_ptr = result; int count = 0; int invalid_count = 0; for (int i = 0; i < num_total / 6; i++) { { for (int j = 0; j < 6; j++) { //printf("%f ", prob_ptr[j]); //count++; } if( prob_ptr[4] < 0){ invalid_count++; }else if(prob_ptr[4] > 0.25){ count++; } //printf("\n"); } prob_ptr += 6; } printf("invalid_count: %d\n", invalid_count); printf("count: %d\n", count); // 根据置信度获得有效框 vector> info = get_info(result, 0.25, 6); info_simplify(info, width_scale, height_scale); vector>> info_split = split_info(info); printf("info size: %ld\n", info_split.size()); cv::resize(img, img, cv::Size(width, height)); for(auto i=0; i < info_split.size(); i++) { printf("class %d size: %ld\n", i, info_split[i].size()); // 根据阈值进行nms nms(info_split[i], 0.45); draw_box(img, info_split[i]); printf("class %d size: %ld\n", i, info_split[i].size()); } cv::imwrite("result.jpg", img); return 0; }