123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319 |
- #include <iostream>
- #include <opencv2/opencv.hpp>
- #include <opencv2/dnn/dnn.hpp>
- #include <string>
- #include <vector>
- using cv::Mat;
- using std::cout;
- using std::endl;
- using std::string;
- using std::vector;
- static const vector<string> class_name = {"person"};
- // const vector<vector<vector<float>>> anchors =
- // {{{3.90, 5.848}, {6.684, 6.81}, {30.48, 25.40}},
- // {{38.40, 32}, {48.38, 40.32}, {60.96, 50.80}},
- // {{76.8, 64}, {96.76, 80.63}, {121.91, 101.59}}};
- const vector<vector<vector<float>>> anchors =
- {{{3.90, 5.848}, {6.684, 6.81}, {5.79, 9.88}},
- {{7.754, 11.71}, {10.25, 16.5}, {13.66, 12.74}},
- {{14.73, 22.51}, {19.187, 33.625}, {33.906, 54.719}}};
- const vector<int> strides = {8, 16, 32};
- const vector<int> grid_sizes = {160, 80, 40};
- const vector<float> stride_list = {4, 8, 16};
- float sigmoid(float x) {
- return 1.0 / (1.0 + std::exp(-x));
- };
- std::vector<std::vector<std::vector<std::vector<std::vector<float>>>>> make_grid(int nx = 20, int ny = 20) {
- std::vector<std::vector<std::vector<std::vector<std::vector<float>>>>> grid(1,
- std::vector<std::vector<std::vector<std::vector<float>>>>(1,
- std::vector<std::vector<std::vector<float>>>(ny,
- std::vector<std::vector<float>>(nx,
- std::vector<float>(2)))));
- for (int y = 0; y < ny; ++y) {
- for (int x = 0; x < nx; ++x) {
- grid[0][0][y][x][0] = x; // xv
- grid[0][0][y][x][1] = y; // yv
- }
- }
- return grid;
- };
- vector<Mat> postprocess(const vector<Mat> &out, float conf = 0.7, int len_data = 6){
- int nc = 1;
- int no = nc + 5;
- int nl = anchors.size();
- //int nl = 1;
- int na = anchors[0].size() / 2;
- vector<Mat> result;
- for(int i = 0; i < nl; i++){
- std::vector<std::vector<std::vector<std::vector<std::vector<float>>>>> grid = make_grid(grid_sizes[i], grid_sizes[i]);
- cv::Mat x = out[i];
- cv::Mat x_sigmoid = x.clone();
- for (int n = 0; n < x.size[0]; n++) {
- for (int j = 0; j < x_sigmoid.size[1]; j++) {
- for (int k = 0; k < x_sigmoid.size[2]; k++) {
- for (int l = 0; l < x_sigmoid.size[3]; l++) {
- for (int m = 0; m < x_sigmoid.size[4]; m++) {
- // 假设数据是多通道的二维数据
- // int channel = l * x_sigmoid.size[3] + m;
- int offset = j * x_sigmoid.size[2]*x_sigmoid.size[3]*x_sigmoid.size[4] + k * x_sigmoid.size[3] * x_sigmoid.size[4] + l *x_sigmoid.size[4] + m;
- x_sigmoid.at<float>(n, offset) = sigmoid(x.at<float>(n, offset));
- float x_sigmoid_before = x_sigmoid.at<float>(n, offset);
- if(m < 2){
- x_sigmoid.at<float>(n, offset) = ((x_sigmoid.at<float>(n, offset) * 2.0) - 0.5 + grid[0][0][k][l][m]) * stride_list[i];
- }else if(m < 4){
- x_sigmoid.at<float>(n, offset) = (x_sigmoid.at<float>(n, offset) * 2.0)*(x_sigmoid.at<float>(n, offset) * 2) * anchors[i][j][m-2];
- }
- //printf("j: %d, k: %d, l: %d, m: %d, before: %f, data: %f \n", j, k, l, m, x_sigmoid_before, x_sigmoid.at<float>(j, offset));
- }
- }
- }
- }
- }
-
- //printf("x_sigmoid.size: %d %d %d %d\n", x_sigmoid.size[1], x_sigmoid.size[2], x_sigmoid.size[3], x_sigmoid.size[4]);
- //printf("x_sigmoid value: %f\n", x_sigmoid.at<cv::Vec<float, 1>>(0, 0)[0]);
- x_sigmoid = x_sigmoid.reshape(1, {x_sigmoid.size[1]*x_sigmoid.size[2]*x_sigmoid.size[3], 6});
- result.push_back(x_sigmoid);
- }
-
- return result;
- };
- void print_result(const Mat &result, float conf = 0.7, int len_data = 6)
- {
- float *pdata = (float *)result.data;
- for (int i = 0; i < result.total() / len_data; i++)
- {
- if (pdata[4] > conf)
- {
- for (int j = 0; j < len_data; j++)
- {
- cout << pdata[j] << " ";
- }
- cout << endl;
- }
- pdata += len_data;
- }
- return;
- }
- vector<vector<float>> get_info(const Mat &result, float conf = 0.7, int len_data = 6)
- {
- float *pdata = (float *)result.data;
- vector<vector<float>> info;
- for (int i = 0; i < result.total() / len_data; i++)
- {
- if (pdata[4] > conf)
- {
- vector<float> info_line;
- for (int j = 0; j < len_data; j++)
- {
- // cout << pdata[j] << " ";
- info_line.push_back(pdata[j]);
- }
- // cout << endl;
- info.push_back(info_line);
- }
- pdata += len_data;
- }
- return info;
- }
- void info_simplify(vector<vector<float>> &info, float witdh_scale = 1.0, float height_scale = 1.0)
- {
- for (auto i = 0; i < info.size(); i++)
- {
- info[i][5] = std::max_element(info[i].cbegin() + 5, info[i].cend()) - (info[i].cbegin() + 5);
- info[i].resize(6);
- float x = info[i][0];
- float y = info[i][1];
- float w = info[i][2];
- float h = info[i][3];
- info[i][0] = (x - w / 2.0) * witdh_scale;
- info[i][1] = (y - h / 2.0) * height_scale;
- info[i][2] = (x + w / 2.0) * witdh_scale;
- info[i][3] = (y + h / 2.0) * height_scale;
- }
- }
- vector<vector<vector<float>>> split_info(vector<vector<float>> &info)
- {
- vector<vector<vector<float>>> info_split;
- vector<int> class_id;
- for (auto i = 0; i < info.size(); i++)
- {
- if (std::find(class_id.begin(), class_id.end(), (int)info[i][5]) == class_id.end())
- {
- class_id.push_back((int)info[i][5]);
- vector<vector<float>> info_;
- info_split.push_back(info_);
- }
- info_split[std::find(class_id.begin(), class_id.end(), (int)info[i][5]) - class_id.begin()].push_back(info[i]);
- }
- return info_split;
- }
- void nms(vector<vector<float>> &info, float iou = 0.45)
- {
- int counter = 0;
- vector<vector<float>> return_info;
- while (counter < info.size())
- {
- return_info.clear();
- float x1 = 0;
- float x2 = 0;
- float y1 = 0;
- float y2 = 0;
- // 按置信度降序排列
- std::sort(info.begin(), info.end(), [](vector<float> p1, vector<float> p2)
- { return p1[4] > p2[4]; });
- for (auto i = 0; i < info.size(); i++)
- {
- if (i < counter)
- {
- return_info.push_back(info[i]);
- continue;
- }
- if (i == counter)
- {
- x1 = info[i][0];
- y1 = info[i][1];
- x2 = info[i][2];
- y2 = info[i][3];
- return_info.push_back(info[i]);
- continue;
- }
- if (info[i][0] > x2 or info[i][2] < x1 or info[i][1] > y2 or info[i][3] < y1)
- {
- return_info.push_back(info[i]);
- }
- else
- {
- float over_x1 = std::max(x1, info[i][0]);
- float over_y1 = std::max(y1, info[i][1]);
- float over_x2 = std::min(x2, info[i][2]);
- float over_y2 = std::min(y2, info[i][3]);
- float s_over = (over_x2 - over_x1) * (over_y2 - over_y1);
- float s_total = (x2 - x1) * (y2 - y1) + (info[i][0] - info[i][2]) * (info[i][1] - info[i][3]) - s_over;
- if (s_over / s_total < iou)
- {
- return_info.push_back(info[i]);
- }
- }
- }
- info = return_info;
- counter += 1;
- }
- }
- void print_info(const vector<vector<float>> &info)
- {
- for (auto i = 0; i < info.size(); i++)
- {
- for (auto j = 0; j < info[i].size(); j++)
- {
- cout << info[i][j] << " ";
- }
- cout << endl;
- }
- }
- void draw_box(Mat &img, const vector<vector<float>> &info)
- {
- for (int i = 0; i < info.size(); i++)
- {
- cv::rectangle(img, cv::Point(info[i][0], info[i][1]), cv::Point(info[i][2], info[i][3]), cv::Scalar(0, 255, 0));
- // string label;
- // label += class_name[info[i][5]];
- // label += " ";
- // std::stringstream oss;
- // oss << info[i][4];
- // label += oss.str();
- // cv::putText(img, label, cv::Point(info[i][0], info[i][1]), 1, 2, cv::Scalar(0, 255, 0), 2);
- }
- // 在左上角显示总数
- string head_info = "Total: " + std::to_string(info.size());
- cv::putText(img, head_info, cv::Point(10, 20), 1, 2, cv::Scalar(0, 255, 0), 2);
- }
- int main()
- {
- cv::dnn::Net net = cv::dnn::readNet("yolo-crowd-n.onnx");
- printf("net loaded\n");
- Mat img = cv::imread("image.jpg", -1);
- if (img.empty()) {
- std::cout << "Failed to load image" << std::endl;
- return -1;
- }
- // 获取图像的高度、宽度和通道数
- int height = img.rows; // 图像高度
- int width = img.cols; // 图像宽度
- int channels = img.channels(); // 图像通道数
- // 输出图像信息
- std::cout << "Image size: " << height << " x " << width << ", Channels: " << channels << std::endl;
- cv::resize(img, img, cv::Size(640, 640));
-
- Mat blob = cv::dnn::blobFromImage(img, 1.0 / 255.0, cv::Size(640, 640), cv::Scalar(), true); // bgr rgb
- float height_scale = height / 640.0;
- float width_scale = width / 640.0;
- net.setInput(blob);
- vector<Mat> netoutput;
- vector<string> out_name = {"output", "947", "961"};
- net.forward(netoutput, out_name);
- printf("netouput_size:%ld \n", netoutput.size());
- // 将输出根据grid和anchor进行计算,得到预测框的坐标和置信度
- vector<Mat> result_list = postprocess(netoutput);
- // 拼接
- Mat concatMAT;
- vconcat(result_list[0], result_list[1], concatMAT);
- vconcat(concatMAT, result_list[2], concatMAT);
- Mat result = concatMAT;
- // 根据置信度 获得有效框
- vector<vector<float>> info = get_info(result, 0.25, 6);
- info_simplify(info, width_scale, height_scale);
- vector<vector<vector<float>>> info_split = split_info(info);
- printf("info size: %ld\n", info_split.size());
- cv::resize(img, img, cv::Size(width, height));
- for(auto i=0; i < info_split.size(); i++)
- {
- printf("class %d size: %ld\n", i, info_split[i].size());
- // 根据阈值进行nms
- nms(info_split[i], 0.45);
- draw_box(img, info_split[i]);
- printf("class %d size: %ld\n", i, info_split[i].size());
- }
- cv::imwrite("result.jpg", img);
- return 0;
- }
|