#include #include #include #include #include using cv::Mat; using std::cout; using std::endl; using std::string; using std::vector; static const vector class_name = {"person"}; // const vector>> anchors = // {{{3.90, 5.848}, {6.684, 6.81}, {30.48, 25.40}}, // {{38.40, 32}, {48.38, 40.32}, {60.96, 50.80}}, // {{76.8, 64}, {96.76, 80.63}, {121.91, 101.59}}}; const vector>> anchors = {{{3.90, 5.848}, {6.684, 6.81}, {5.79, 9.88}}, {{7.754, 11.71}, {10.25, 16.5}, {13.66, 12.74}}, {{14.73, 22.51}, {19.187, 33.625}, {33.906, 54.719}}}; const vector strides = {8, 16, 32}; const vector grid_sizes = {160, 80, 40}; const vector stride_list = {4, 8, 16}; float sigmoid(float x) { return 1.0 / (1.0 + std::exp(-x)); }; std::vector>>>> make_grid(int nx = 20, int ny = 20) { std::vector>>>> grid(1, std::vector>>>(1, std::vector>>(ny, std::vector>(nx, std::vector(2))))); for (int y = 0; y < ny; ++y) { for (int x = 0; x < nx; ++x) { grid[0][0][y][x][0] = x; // xv grid[0][0][y][x][1] = y; // yv } } return grid; }; vector postprocess(const vector &out, float conf = 0.7, int len_data = 6){ int nc = 1; int no = nc + 5; int nl = anchors.size(); //int nl = 1; int na = anchors[0].size() / 2; vector result; for(int i = 0; i < nl; i++){ std::vector>>>> grid = make_grid(grid_sizes[i], grid_sizes[i]); cv::Mat x = out[i]; cv::Mat x_sigmoid = x.clone(); for (int n = 0; n < x.size[0]; n++) { for (int j = 0; j < x_sigmoid.size[1]; j++) { for (int k = 0; k < x_sigmoid.size[2]; k++) { for (int l = 0; l < x_sigmoid.size[3]; l++) { for (int m = 0; m < x_sigmoid.size[4]; m++) { // 假设数据是多通道的二维数据 // int channel = l * x_sigmoid.size[3] + m; int offset = j * x_sigmoid.size[2]*x_sigmoid.size[3]*x_sigmoid.size[4] + k * x_sigmoid.size[3] * x_sigmoid.size[4] + l *x_sigmoid.size[4] + m; x_sigmoid.at(n, offset) = sigmoid(x.at(n, offset)); float x_sigmoid_before = x_sigmoid.at(n, offset); if(m < 2){ x_sigmoid.at(n, offset) = ((x_sigmoid.at(n, offset) * 2.0) - 0.5 + grid[0][0][k][l][m]) * stride_list[i]; }else if(m < 4){ x_sigmoid.at(n, offset) = (x_sigmoid.at(n, offset) * 2.0)*(x_sigmoid.at(n, offset) * 2) * anchors[i][j][m-2]; } //printf("j: %d, k: %d, l: %d, m: %d, before: %f, data: %f \n", j, k, l, m, x_sigmoid_before, x_sigmoid.at(j, offset)); } } } } } //printf("x_sigmoid.size: %d %d %d %d\n", x_sigmoid.size[1], x_sigmoid.size[2], x_sigmoid.size[3], x_sigmoid.size[4]); //printf("x_sigmoid value: %f\n", x_sigmoid.at>(0, 0)[0]); x_sigmoid = x_sigmoid.reshape(1, {x_sigmoid.size[1]*x_sigmoid.size[2]*x_sigmoid.size[3], 6}); result.push_back(x_sigmoid); } return result; }; void print_result(const Mat &result, float conf = 0.7, int len_data = 6) { float *pdata = (float *)result.data; for (int i = 0; i < result.total() / len_data; i++) { if (pdata[4] > conf) { for (int j = 0; j < len_data; j++) { cout << pdata[j] << " "; } cout << endl; } pdata += len_data; } return; } vector> get_info(const Mat &result, float conf = 0.7, int len_data = 6) { float *pdata = (float *)result.data; vector> info; for (int i = 0; i < result.total() / len_data; i++) { if (pdata[4] > conf) { vector info_line; for (int j = 0; j < len_data; j++) { // cout << pdata[j] << " "; info_line.push_back(pdata[j]); } // cout << endl; info.push_back(info_line); } pdata += len_data; } return info; } void info_simplify(vector> &info, float witdh_scale = 1.0, float height_scale = 1.0) { for (auto i = 0; i < info.size(); i++) { info[i][5] = std::max_element(info[i].cbegin() + 5, info[i].cend()) - (info[i].cbegin() + 5); info[i].resize(6); float x = info[i][0]; float y = info[i][1]; float w = info[i][2]; float h = info[i][3]; info[i][0] = (x - w / 2.0) * witdh_scale; info[i][1] = (y - h / 2.0) * height_scale; info[i][2] = (x + w / 2.0) * witdh_scale; info[i][3] = (y + h / 2.0) * height_scale; } } vector>> split_info(vector> &info) { vector>> info_split; vector class_id; for (auto i = 0; i < info.size(); i++) { if (std::find(class_id.begin(), class_id.end(), (int)info[i][5]) == class_id.end()) { class_id.push_back((int)info[i][5]); vector> info_; info_split.push_back(info_); } info_split[std::find(class_id.begin(), class_id.end(), (int)info[i][5]) - class_id.begin()].push_back(info[i]); } return info_split; } void nms(vector> &info, float iou = 0.45) { int counter = 0; vector> return_info; while (counter < info.size()) { return_info.clear(); float x1 = 0; float x2 = 0; float y1 = 0; float y2 = 0; // 按置信度降序排列 std::sort(info.begin(), info.end(), [](vector p1, vector p2) { return p1[4] > p2[4]; }); for (auto i = 0; i < info.size(); i++) { if (i < counter) { return_info.push_back(info[i]); continue; } if (i == counter) { x1 = info[i][0]; y1 = info[i][1]; x2 = info[i][2]; y2 = info[i][3]; return_info.push_back(info[i]); continue; } if (info[i][0] > x2 or info[i][2] < x1 or info[i][1] > y2 or info[i][3] < y1) { return_info.push_back(info[i]); } else { float over_x1 = std::max(x1, info[i][0]); float over_y1 = std::max(y1, info[i][1]); float over_x2 = std::min(x2, info[i][2]); float over_y2 = std::min(y2, info[i][3]); float s_over = (over_x2 - over_x1) * (over_y2 - over_y1); float s_total = (x2 - x1) * (y2 - y1) + (info[i][0] - info[i][2]) * (info[i][1] - info[i][3]) - s_over; if (s_over / s_total < iou) { return_info.push_back(info[i]); } } } info = return_info; counter += 1; } } void print_info(const vector> &info) { for (auto i = 0; i < info.size(); i++) { for (auto j = 0; j < info[i].size(); j++) { cout << info[i][j] << " "; } cout << endl; } } void draw_box(Mat &img, const vector> &info) { for (int i = 0; i < info.size(); i++) { cv::rectangle(img, cv::Point(info[i][0], info[i][1]), cv::Point(info[i][2], info[i][3]), cv::Scalar(0, 255, 0)); // string label; // label += class_name[info[i][5]]; // label += " "; // std::stringstream oss; // oss << info[i][4]; // label += oss.str(); // cv::putText(img, label, cv::Point(info[i][0], info[i][1]), 1, 2, cv::Scalar(0, 255, 0), 2); } // 在左上角显示总数 string head_info = "Total: " + std::to_string(info.size()); cv::putText(img, head_info, cv::Point(10, 20), 1, 2, cv::Scalar(0, 255, 0), 2); } int main() { cv::dnn::Net net = cv::dnn::readNet("yolo-crowd-n.onnx"); printf("net loaded\n"); Mat img = cv::imread("image.jpg", -1); if (img.empty()) { std::cout << "Failed to load image" << std::endl; return -1; } // 获取图像的高度、宽度和通道数 int height = img.rows; // 图像高度 int width = img.cols; // 图像宽度 int channels = img.channels(); // 图像通道数 // 输出图像信息 std::cout << "Image size: " << height << " x " << width << ", Channels: " << channels << std::endl; cv::resize(img, img, cv::Size(640, 640)); Mat blob = cv::dnn::blobFromImage(img, 1.0 / 255.0, cv::Size(640, 640), cv::Scalar(), true); // bgr rgb float height_scale = height / 640.0; float width_scale = width / 640.0; net.setInput(blob); vector netoutput; vector out_name = {"output", "947", "961"}; net.forward(netoutput, out_name); printf("netouput_size:%ld \n", netoutput.size()); // 将输出根据grid和anchor进行计算,得到预测框的坐标和置信度 vector result_list = postprocess(netoutput); // 拼接 Mat concatMAT; vconcat(result_list[0], result_list[1], concatMAT); vconcat(concatMAT, result_list[2], concatMAT); Mat result = concatMAT; // 根据置信度 获得有效框 vector> info = get_info(result, 0.25, 6); info_simplify(info, width_scale, height_scale); vector>> info_split = split_info(info); printf("info size: %ld\n", info_split.size()); cv::resize(img, img, cv::Size(width, height)); for(auto i=0; i < info_split.size(); i++) { printf("class %d size: %ld\n", i, info_split[i].size()); // 根据阈值进行nms nms(info_split[i], 0.45); draw_box(img, info_split[i]); printf("class %d size: %ld\n", i, info_split[i].size()); } cv::imwrite("result.jpg", img); return 0; }