TensorRT(4)：NvInferRuntime.h接口头文件分析_综合

文章目录

- 一、ICudaEngine类

记录NvInferRuntime.h头文件中的几个重要的接口类以及类中各接口函数的作用
头文件中都有英文注释，这里只是简单中文翻译一下，如有误，欢迎讨论。业余时间，看多少记录多少，纯属笔记。

NvInferRuntime.h是运行TensorRT的最重要的接口头文件之一，NV都说了
This is the top-level API file for TensorRT extended runtime library

介绍几个重要的类
1、ICudaEngine
2、IExecutionContext
3、IPluginFactory

一、ICudaEngine类

用来构建网络上执行推理的引擎，官方提示，不要从这个类中继承，否则可能破坏前向传播的兼容性。
介绍几个常用的类函数，为了排版，删除原有的英文注释，笔者看不懂的部分，直接贴上英文注释。

class ICudaEngine
{
    
public:// 获取绑定索引的个数，比如input绑定了一个，output绑定了一个，那索引总数就为2virtual int getNbBindings() const noexcept = 0;// 根据节点名，获取绑定的索引virtual int getBindingIndex(const char* name) const noexcept = 0;//! 根据索引，获取绑定的节点名virtual const char* getBindingName(int bindingIndex) const noexcept = 0;//! 确定是否是输入节点，virtual bool bindingIsInput(int bindingIndex) const noexcept = 0;//! 获取节点对应的维度virtual Dims getBindingDimensions(int bindingIndex) const noexcept = 0;//! 获取索引对于的数据类型virtual DataType getBindingDataType(int bindingIndex) const noexcept = 0;//! 获得最大batchSize的大小virtual int getMaxBatchSize() const noexcept = 0;//! \return The number of layers in the network.//! virtual int getNbLayers() const noexcept = 0;//! 获取工作空间的大小, 通常小于设置的值TRT_DEPRECATEDvirtual std::size_t getWorkspaceSize() const noexcept = 0;//! 序列化模型，序列化之后可以保存到本地virtual IHostMemory* serialize() const noexcept = 0;//! 创建执行文件，创建好后，可以推断模型，见IExecutionContextvirtual IExecutionContext* createExecutionContext() noexcept = 0;//! 销毁对象，释放空间virtual void destroy() noexcept = 0;//! 获取索引对应的tensor在gpu上还是cpu上virtual TensorLocation getLocation(int bindingIndex) const noexcept = 0;protected:virtual ~ICudaEngine() {
    }public://! \see getDeviceMemorySize() IExecutionContext::setDeviceMemory()//! 默认情况下，创建IExecutionContext时，会分配持久设备内存来保留激活数据// 如果不想这样，可以通过这个函数创建，并通过IExecutionContext::setDeviceMemory()设置空间virtual IExecutionContext* createExecutionContextWithoutDeviceMemory() noexcept = 0;//! 获取设备内存大小virtual size_t getDeviceMemorySize() const noexcept = 0;//! 返回这个engine能否被修改virtual bool isRefittable() const noexcept = 0;//! 返回元素每个组成部分的字节数virtual int getBindingBytesPerComponent(int bindingIndex) const noexcept = 0;//!//! \brief Return the number of components included in one element.//!//! The number of elements in the vectors is returned if getBindingVectorizedDim() != -1.//!//! \param bindingIndex The binding Index.//!//! \see ICudaEngine::getBindingVectorizedDim()//!virtual int getBindingComponentsPerElement(int bindingIndex) const noexcept = 0;//!//! \brief Return the binding format.//!//! \param bindingIndex The binding Index.//! 返回绑定数据格式，如NCHWvirtual TensorFormat getBindingFormat(int bindingIndex) const noexcept = 0;//!//! \brief Return the human readable description of the tensor format.//!//! The description includes the order, vectorization, data type, strides,//! and etc. Examples are shown as follows://! Example 1: kCHW + FP32//! "Row major linear FP32 format"//! Example 2: kCHW2 + FP16//! "Two wide channel vectorized row major FP16 format"//! Example 3: kHWC8 + FP16 + Line Stride = 32//! "Channel major FP16 format where C % 8 == 0 and H Stride % 32 == 0"//!//! \param bindingIndex The binding Index.//! 返回绑定数据格式，如NCHW, 和以上不同的是，这边返回的是字符串，前面返回的是枚举// 如NCHW fp32返回的是 　Row major linear FP32 format (kLINEAR)virtual const char* getBindingFormatDesc(int bindingIndex) const noexcept = 0;//!//! \brief Return the dimension index that the buffer is vectorized.//!//! Specifically -1 is returned if scalars per vector is 1.//!//! \param bindingIndex The binding Index.//! 返回向量化索引内存virtual int getBindingVectorizedDim(int bindingIndex) const noexcept = 0;//!//! \brief Returns the name of the network associated with the engine.//!//! The name is set during network creation and is retrieved after//! building or deserialization.//!//! \see INetworkDefinition::setName(), INetworkDefinition::getName()//!//! \return A zero delimited C-style string representing the name of the network.//!返回与引擎关联的网络的名称，名称是在网络创建期间设置的，并在之后创建建立或反序列化。virtual const char* getName() const noexcept = 0;//!//! \brief Get the number of optimization profiles defined for this engine.//!//! \return Number of optimization profiles. It is always at least 1.//!//! \see IExecutionContext::setOptimizationProfile()virtual int getNbOptimizationProfiles() const noexcept = 0;//!//! \brief Get the minimum / optimum / maximum dimensions for a particular binding under an optimization profile.//!//! \param bindingIndex The binding index (must be between 0 and getNbBindings() - 1)//!//! \param profileIndex The profile index (must be between 0 and getNbOptimizationProfiles()-1)//!//! \param select Whether to query the minimum, optimum, or maximum dimensions for this binding.//!//! \return The minimum / optimum / maximum dimensions for this binding in this profile.virtual Dims getProfileDimensions(int bindingIndex, int profileIndex, OptProfileSelector select) const noexcept = 0;//!//! \brief Get minimum / optimum / maximum values for an input shape binding under an optimization profile.//!//! \param profileIndex The profile index (must be between 0 and getNbOptimizationProfiles()-1)//!//! \param inputIndex The input index (must be between 0 and getNbBindings() - 1)//!//! \param select Whether to query the minimum, optimum, or maximum shape values for this binding.//!//! \return If the binding is an input shape binding, return a pointer to an array that has//! the same number of elements as the corresponding tensor, i.e. 1 if dims.nbDims == 0, or dims.d[0]//! if dims.nbDims == 1, where dims = getBindingDimensions(inputIndex). The array contains//! the elementwise minimum / optimum / maximum values for this shape binding under the profile.//! If either of the indices is out of range, or if the binding is not an input shape binding, return//! nullptr.virtual const int32_t* getProfileShapeValues(int profileIndex, int inputIndex, OptProfileSelector select) constnoexcept= 0;//!//! \brief True if tensor is required as input for shape calculations or output from them.//!//! TensorRT evaluates a network in two phases://!//! 1. Compute shape information required to determine memory allocation requirements//! and validate that runtime sizes make sense.//!//! 2. Process tensors on the device.//!//! Some tensors are required in phase 1. These tensors are called "shape tensors", and always//! have type Int32 and no more than one dimension. These tensors are not always shapes//! themselves, but might be used to calculate tensor shapes for phase 2.//!//! isShapeBinding(i) returns true if the tensor is a required input or an output computed in phase 1.//! isExecutionBinding(i) returns true if the tensor is a required input or an output computed in phase 2.//!//! For example, if a network uses an input tensor with binding i as an addend//! to an IElementWiseLayer that computes the "reshape dimensions" for IShuffleLayer,//! then isShapeBinding(i) == true.//!//! It's possible to have a tensor be required by both phases. For instance, a tensor//! can be used for the "reshape dimensions" and as the indices for an IGatherLayer//! collecting floating-point data.//!//! It's also possible to have a tensor be required by neither phase, but nonetheless//! shows up in the engine's inputs. For example, if an input tensor is used only//! as an input to IShapeLayer, only its shape matters and its values are irrelevant.//!//! \see isExecutionBinding()//!virtual bool isShapeBinding(int bindingIndex) const noexcept = 0;//!//! \brief True if pointer to tensor data is required for execution phase, false if nullptr can be supplied.//!//! For example, if a network uses an input tensor with binding i ONLY as the "reshape dimensions"//! input of IShuffleLayer, then isExecutionBinding(i) is false, and a nullptr can be//! supplied for it when calling IExecutionContext::execute or IExecutionContext::enqueue.//!//! \see isShapeBinding()//!virtual bool isExecutionBinding(int bindingIndex) const noexcept = 0;//!//! \brief determine that execution capability this engine has.//!//! If the engine has EngineCapability::kDEFAULT, then all engine functionality is valid..//! If the engine has EngineCapability::kSAFE_GPU, then only the functionality in safe::ICudaEngine is valid.//! If the engine has EngineCapability::kSAFE_DLA, then only serialize, destroy, and const-accessor functions are valid.//!//! \return The EngineCapability flag that the engine was built for.//!virtual EngineCapability getEngineCapability() const noexcept = 0;//! \brief Set the ErrorRecorder for this interface//!//! Assigns the ErrorRecorder to this interface. The ErrorRecorder will track all errors during execution.//! This function will call incRefCount of the registered ErrorRecorder at least once. Setting//! recorder to nullptr unregisters the recorder with the interface, resulting in a call to decRefCount if//! a recorder has been registered.//!//! \param recorder The error recorder to register with this interface.////! \see getErrorRecorder//!virtual void setErrorRecorder(IErrorRecorder* recorder) noexcept = 0;//!//! \brief get the ErrorRecorder assigned to this interface.//!//! Retrieves the assigned error recorder object for the given class. A default error recorder does not exist,//! so a nullptr will be returned if setErrorRecorder has not been called.//!//! \return A pointer to the IErrorRecorder object that has been registered.//!//! \see setErrorRecorder//!virtual IErrorRecorder* getErrorRecorder() const noexcept = 0;//!//! \brief Query whether the engine was built with an implicit batch dimension.//!//! \return True if tensors have implicit batch dimension, false otherwise.//!//! This is an engine-wide property. Either all tensors in the engine//! have an implicit batch dimension or none of them do.//!//! hasImplicitBatchDimension() is true if and only if the INetworkDefinition//! from which this engine was built was created with createNetwork() or//! createNetworkV2() without NetworkDefinitionCreationFlag::kEXPLICIT_BATCH flag.//!//! \see createNetworkV2//!virtual bool hasImplicitBatchDimension() const TRTNOEXCEPT = 0;
};

举例：

/*==================================================================== 文件 ： sampleCaffeClassf.cc 功能 ： TensorRT学习系列4、ICudaEngine接口类 ====================================================================*/
#include "NvCaffeParser.h"
#include "NvInfer.h"
#include "NvInferPlugin.h"
#include "NvInferRuntimeCommon.h"
#include "logger.h"
#include "cuda_runtime_api.h"
#include "common.h"#include <cstdlib>
#include <fstream>
#include <iostream>
#include <sstream>
#include <string>
#include <opencv2/opencv.hpp>using namespace nvinfer1;
using namespace plugin;
using namespace nvcaffeparser1;const int MODEL_HEIGHT = 256;
const int MODEL_WIDTH = 256;
const int MODEL_CHANNEL = 3;
const int MODEL_OUTPUT_SIZE = 5; // 5分类int main()
{
    std::string strTrtSavedPath = "./savedTrt.trt";// gLogger// gLogger是一个日志类，必须要有，但又不是那么重要，可以自己继承IRuntime* runtime = createInferRuntime(gLogger);std::ifstream fin(strTrtSavedPath);// 1、将文件中的内容读取至cached_engine字符串std::string modelData = "";while (fin.peek() != EOF){
     // 使用fin.peek()防止文件读取时无限循环std::stringstream buffer;buffer << fin.rdbuf();modelData.append(buffer.str());}fin.close();// 2、 将序列化得到的结果进行反序列化，以执行后续的inferenceICudaEngine* engine = runtime->deserializeCudaEngine(modelData.data(), modelData.size(), nullptr);// inference推断过程IExecutionContext *context = engine->createExecutionContext();   // inference推断过程int nInputIdx = engine->getBindingIndex("data");     // 获取输入节点索引int nOutputIndex = engine->getBindingIndex("prob");  // 获取输出节点索引int nNumIndex =  engine->getNbBindings();            // 获取总索引个数char achInputTensorName[128];                        strcpy(achInputTensorName,  engine->getBindingName(nInputIdx));//获取对应索引的节点名bool bIsInputTensor = engine->bindingIsInput(nInputIdx); // 是否是输入节点Dims tDimInput = engine->getBindingDimensions(nInputIdx); // 获取输入检点的维度DataType emInputDataType = engine->getBindingDataType(nInputIdx);// 获取输入数据的类型int nMaxBatchSize = engine->getMaxBatchSize();    // 获取最大BatchSizeint nNumLayers = engine->getNbLayers(); // 获取网络层的个数int nWorkSpaceSize = engine->getWorkspaceSize(); // 获取工作空间的大小, 通常小于设置的值TensorLocation emLocattion= engine->getLocation(nInputIdx); // 获取索引对应的tensor在gpu上还是cpu上int nBindingSize = engine->getBindingBytesPerComponent(0); //返回元素每个组成部分的字节数TensorFormat emTensorFormat = engine->getBindingFormat(nInputIdx); // 返回数据格式std::cout << " 输入节点索引 nInputIdx = " << nInputIdx << std::endl;std::cout << " 输入节点索引 nOutputIdx = " << nOutputIndex << std::endl;std::cout << " 总索引个数 nNumIndex = " << engine->getNbBindings() << std::endl;std::cout << " input 节点名 = " <<  achInputTensorName << std::endl;for(int i=0; i<tDimInput.nbDims; ++i){
    std::cout << " 输入维度 dim[" << i << "] = " << tDimInput.d[i] << std::endl;}//申请GPU显存std::cout << " 输入数据类型为 " << int(emInputDataType) << std::endl;std::cout << " 工作空间大小为 " << nWorkSpaceSize << std::endl;std::cout << " 输入数据在 " << int(emLocattion) << std::endl;std::cout << " 每个元素空间字节大小：" << nBindingSize << std::endl;std::cout << " 输入数据格式： " << int(emTensorFormat) << std::endl;std::cout << " 输入数据格式：　" << engine->getBindingFormatDesc(nInputIdx) << std::endl;// inference推断过程std::cout << " 网络layer的个数：" << nNumLayers << std::endl;printf("end ... hello world \n");return 0;
}