123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174 |
- #include "onnxruntime_cxx_api.h"
- #include <iostream>
- #include <assert.h>
- using namespace std;
- using namespace Ort;
- #include <fstream>
- #include <vector>
- #include <cstdint>
- #include <list>
- #include <string>
- struct WavHeader {
- char riff[4]; // "RIFF"
- uint32_t chunkSize;
- char wave[4]; // "WAVE"
- char fmt[4]; // "fmt "
- uint32_t subchunk1Size;
- uint16_t audioFormat;
- uint16_t numChannels;
- uint32_t sampleRate;
- uint32_t byteRate;
- uint16_t blockAlign;
- uint16_t bitsPerSample;
- char data[4]; // "data"
- uint32_t subchunk2Size;
- };
- void floatToWav(const std::vector<float>& data, const std::string& filename, uint32_t sampleRate = 22050, uint16_t numChannels = 1) {
- std::ofstream file(filename, std::ios::binary);
- if (!file) {
- std::cerr << "Failed to open file for writing." << std::endl;
- return;
- }
- WavHeader header;
- std::copy(std::begin("RIFF"), std::end("RIFF"), header.riff);
- header.chunkSize = sizeof(WavHeader) - 8 + data.size() * sizeof(float);
- std::copy(std::begin("WAVE"), std::end("WAVE"), header.wave);
- std::copy(std::begin("fmt "), std::end("fmt "), header.fmt);
- header.subchunk1Size = 16;
- header.audioFormat = 3; // IEEE float
- header.numChannels = numChannels;
- header.sampleRate = sampleRate;
- header.bitsPerSample = 32; // 32-bit float
- header.byteRate = header.sampleRate * header.numChannels * header.bitsPerSample / 8;
- header.blockAlign = header.numChannels * header.bitsPerSample / 8;
- std::copy(std::begin("data"), std::end("data"), header.data);
- header.subchunk2Size = data.size() * sizeof(float);
- file.write(reinterpret_cast<char*>(&header), sizeof(header));
- file.write(reinterpret_cast<const char*>(data.data()), data.size() * sizeof(float));
- if (!file) {
- std::cerr << "Error writing to file." << std::endl;
- }
- }
- void writeVectorToFile(const std::vector<float>& data, const std::string& filename) {
- std::ofstream outFile(filename);
- if (!outFile) {
- std::cerr << "Error opening file for writing: " << filename << std::endl;
- return;
- }
- for (const auto& item : data) {
- outFile << item << std::endl;
- }
- outFile.close();
- }
- int main() {
- // Create a new environment
- Ort::Env env(OrtLoggingLevel::ORT_LOGGING_LEVEL_WARNING, "test");
- // Create a new session and load the model
- Ort::SessionOptions session_options;
- session_options.SetIntraOpNumThreads(1);
- const char* model_path = "./model_184000_audio_len.onnx";
- Ort::Session session(env, model_path, session_options);
- Ort::AllocatorWithDefaultOptions allocator;
- //model info
- // 获得模型又多少个输入和输出,一般是指对应网络层的数目
- // 一般输入只有图像的话input_nodes为1
- size_t num_input_nodes = session.GetInputCount();
- // 如果是多输出网络,就会是对应输出的数目
- size_t num_output_nodes = session.GetOutputCount();
- printf("Number of inputs = %zu\n", num_input_nodes);
- printf("Number of output = %zu\n", num_output_nodes);
- // 自动获取维度数量
- auto input_dims = session.GetInputTypeInfo(0).GetTensorTypeAndShapeInfo().GetShape();
- auto output_dims = session.GetOutputTypeInfo(0).GetTensorTypeAndShapeInfo().GetShape();
- std::cout << "input_dims:" << input_dims[0] << std::endl;
- std::cout << "output_dims:" << output_dims[0] << std::endl;
- std::vector<const char*> input_node_names = {"input", "input_lengths", "scales", "sid"};
- std::vector<const char*> output_node_names = {"output", "output_lengths"};
- // printf("inputs init\n");
- // Input text
- string text = "一号哨,发生犯人爆狱!";
- // python预处理之后的 输入数据----------------------------------------------------------------------------------------------------------
- int64_t input_data[] = {0, 51, 0, 198, 0, 66, 0, 96, 0, 162, 0, 196, 0, 16,
- 0, 61, 0, 96, 0, 162, 0, 196, 0, 3, 0, 16, 0, 48,
- 0, 43, 0, 198, 0, 61, 0, 110, 0, 139, 0, 198, 0, 16,
- 0, 48, 0, 43, 0, 56, 0, 196, 0, 150, 0, 110, 0, 56,
- 0, 197, 0, 16, 0, 58, 0, 96, 0, 162, 0, 196, 0, 126,
- 0, 196, 0, 5, 0};
- std::vector<int64_t> input_node_dims = {1, sizeof(input_data)/sizeof(input_data[0])};
- size_t input_tensor_size = sizeof(input_data)/sizeof(input_data[0]);
- std::vector<int32_t> input_tensor_values(input_tensor_size);
- // Prepare input data
-
- auto memory_info = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault);
- // Create input tensor object from data
- Ort::Value input_tensor = Ort::Value::CreateTensor<int64_t>(memory_info, input_data, input_tensor_size, input_node_dims.data(), input_node_dims.size());
- assert(input_tensor.IsTensor());
- std::vector<int64_t> input_lengh_dims = {1};
- int64_t input_lengths[] = {sizeof(input_data)/sizeof(input_data[0])};
- auto lengths_memory_info = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU);
- Ort::Value input_lengths_tensor = Ort::Value::CreateTensor<int64_t>(lengths_memory_info, input_lengths, 1, input_lengh_dims.data(), input_lengh_dims.size());
- assert(input_lengths_tensor.IsTensor());
-
- std::vector<int64_t> scales_dims = {3};
- std::vector<float> scales_data = {0.667, 0.8, 1.0};
- auto scales_memory_info = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU);
- Ort::Value scales_tensor = Ort::Value::CreateTensor<float>(scales_memory_info, scales_data.data(), scales_data.size(), scales_dims.data(), scales_dims.size());
- assert(scales_tensor.IsTensor());
- std::vector<int64_t> sid_dims = {1};
- int64_t sid[] = {25};
- auto sid_memory_info = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU);
- Ort::Value sid_tensor = Ort::Value::CreateTensor<int64_t>(sid_memory_info, sid, 1, sid_dims.data(), sid_dims.size());
- assert(sid_tensor.IsTensor());
- std::vector<Ort::Value> ort_inputs;
- ort_inputs.push_back(std::move(input_tensor));
- ort_inputs.push_back(std::move(input_lengths_tensor));
- ort_inputs.push_back(std::move(scales_tensor));
- ort_inputs.push_back(std::move(sid_tensor));
- // Run model
- auto output_tensors = session.Run(Ort::RunOptions{nullptr}, input_node_names.data(), ort_inputs.data(), ort_inputs.size(), output_node_names.data(), output_node_names.size());
- // Get pointer to output tensor
- float* audio = output_tensors[0].GetTensorMutableData<float>();
- int* audio_lengths = output_tensors[1].GetTensorMutableData<int32_t>();
- // Print output
- int len = audio_lengths[0]*256;
- printf("audio_length: %d\n", len);
- std::vector<float> audioData(len);
- for(int i=0; i<len; i++){
- audioData[i] = audio[i];
- }
- //writeVectorToFile(audioData, "datas_cpp.txt");
- floatToWav(audioData, "output.wav");
- return 0;
-
- }
|