#include "onnxruntime_cxx_api.h" #include #include using namespace std; using namespace Ort; #include #include #include #include #include struct WavHeader { char riff[4]; // "RIFF" uint32_t chunkSize; char wave[4]; // "WAVE" char fmt[4]; // "fmt " uint32_t subchunk1Size; uint16_t audioFormat; uint16_t numChannels; uint32_t sampleRate; uint32_t byteRate; uint16_t blockAlign; uint16_t bitsPerSample; char data[4]; // "data" uint32_t subchunk2Size; }; void floatToWav(const std::vector& data, const std::string& filename, uint32_t sampleRate = 22050, uint16_t numChannels = 1) { std::ofstream file(filename, std::ios::binary); if (!file) { std::cerr << "Failed to open file for writing." << std::endl; return; } WavHeader header; std::copy(std::begin("RIFF"), std::end("RIFF"), header.riff); header.chunkSize = sizeof(WavHeader) - 8 + data.size() * sizeof(float); std::copy(std::begin("WAVE"), std::end("WAVE"), header.wave); std::copy(std::begin("fmt "), std::end("fmt "), header.fmt); header.subchunk1Size = 16; header.audioFormat = 3; // IEEE float header.numChannels = numChannels; header.sampleRate = sampleRate; header.bitsPerSample = 32; // 32-bit float header.byteRate = header.sampleRate * header.numChannels * header.bitsPerSample / 8; header.blockAlign = header.numChannels * header.bitsPerSample / 8; std::copy(std::begin("data"), std::end("data"), header.data); header.subchunk2Size = data.size() * sizeof(float); file.write(reinterpret_cast(&header), sizeof(header)); file.write(reinterpret_cast(data.data()), data.size() * sizeof(float)); if (!file) { std::cerr << "Error writing to file." << std::endl; } } void writeVectorToFile(const std::vector& data, const std::string& filename) { std::ofstream outFile(filename); if (!outFile) { std::cerr << "Error opening file for writing: " << filename << std::endl; return; } for (const auto& item : data) { outFile << item << std::endl; } outFile.close(); } int main() { // Create a new environment Ort::Env env(OrtLoggingLevel::ORT_LOGGING_LEVEL_WARNING, "test"); // Create a new session and load the model Ort::SessionOptions session_options; session_options.SetIntraOpNumThreads(1); const char* model_path = "./model_184000_audio_len.onnx"; Ort::Session session(env, model_path, session_options); Ort::AllocatorWithDefaultOptions allocator; //model info // 获得模型又多少个输入和输出,一般是指对应网络层的数目 // 一般输入只有图像的话input_nodes为1 size_t num_input_nodes = session.GetInputCount(); // 如果是多输出网络,就会是对应输出的数目 size_t num_output_nodes = session.GetOutputCount(); printf("Number of inputs = %zu\n", num_input_nodes); printf("Number of output = %zu\n", num_output_nodes); // 自动获取维度数量 auto input_dims = session.GetInputTypeInfo(0).GetTensorTypeAndShapeInfo().GetShape(); auto output_dims = session.GetOutputTypeInfo(0).GetTensorTypeAndShapeInfo().GetShape(); std::cout << "input_dims:" << input_dims[0] << std::endl; std::cout << "output_dims:" << output_dims[0] << std::endl; std::vector input_node_names = {"input", "input_lengths", "scales", "sid"}; std::vector output_node_names = {"output", "output_lengths"}; // printf("inputs init\n"); // Input text string text = "一号哨,发生犯人爆狱!"; // python预处理之后的 输入数据---------------------------------------------------------------------------------------------------------- int64_t input_data[] = {0, 51, 0, 198, 0, 66, 0, 96, 0, 162, 0, 196, 0, 16, 0, 61, 0, 96, 0, 162, 0, 196, 0, 3, 0, 16, 0, 48, 0, 43, 0, 198, 0, 61, 0, 110, 0, 139, 0, 198, 0, 16, 0, 48, 0, 43, 0, 56, 0, 196, 0, 150, 0, 110, 0, 56, 0, 197, 0, 16, 0, 58, 0, 96, 0, 162, 0, 196, 0, 126, 0, 196, 0, 5, 0}; std::vector input_node_dims = {1, sizeof(input_data)/sizeof(input_data[0])}; size_t input_tensor_size = sizeof(input_data)/sizeof(input_data[0]); std::vector input_tensor_values(input_tensor_size); // Prepare input data auto memory_info = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault); // Create input tensor object from data Ort::Value input_tensor = Ort::Value::CreateTensor(memory_info, input_data, input_tensor_size, input_node_dims.data(), input_node_dims.size()); assert(input_tensor.IsTensor()); std::vector input_lengh_dims = {1}; int64_t input_lengths[] = {sizeof(input_data)/sizeof(input_data[0])}; auto lengths_memory_info = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU); Ort::Value input_lengths_tensor = Ort::Value::CreateTensor(lengths_memory_info, input_lengths, 1, input_lengh_dims.data(), input_lengh_dims.size()); assert(input_lengths_tensor.IsTensor()); std::vector scales_dims = {3}; std::vector scales_data = {0.667, 0.8, 1.0}; auto scales_memory_info = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU); Ort::Value scales_tensor = Ort::Value::CreateTensor(scales_memory_info, scales_data.data(), scales_data.size(), scales_dims.data(), scales_dims.size()); assert(scales_tensor.IsTensor()); std::vector sid_dims = {1}; int64_t sid[] = {25}; auto sid_memory_info = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU); Ort::Value sid_tensor = Ort::Value::CreateTensor(sid_memory_info, sid, 1, sid_dims.data(), sid_dims.size()); assert(sid_tensor.IsTensor()); std::vector ort_inputs; ort_inputs.push_back(std::move(input_tensor)); ort_inputs.push_back(std::move(input_lengths_tensor)); ort_inputs.push_back(std::move(scales_tensor)); ort_inputs.push_back(std::move(sid_tensor)); // Run model auto output_tensors = session.Run(Ort::RunOptions{nullptr}, input_node_names.data(), ort_inputs.data(), ort_inputs.size(), output_node_names.data(), output_node_names.size()); // Get pointer to output tensor float* audio = output_tensors[0].GetTensorMutableData(); int* audio_lengths = output_tensors[1].GetTensorMutableData(); // Print output int len = audio_lengths[0]*256; printf("audio_length: %d\n", len); std::vector audioData(len); for(int i=0; i