pip install -i https://mirrors.cloud.tencent.com/pypi/simple tiinfer
from typing import Dictimport tiinfer### Log Code #### Add the following code to print the request log recorded by the framework to the terminal.import loggingimport syslogger_formatter = "%(asctime)s %(levelname)s %(module)s:%(lineno)d %(message)s"logging.basicConfig(stream=sys.stdout, format=logger_formatter, level=logging.DEBUG)### Log Code #### tiinfer supports the native mosec.Worker.class HelloWorld(mosec.Worker):def forward(self, req: Dict) -> Dict:return {"hello": f"world. raw req is {req}"}# Launch two processes to process requests simultaneously.tiinfer.append_worker(HelloWorld, num=2)
TI_MODEL_DIR=pwdpython3 -m tiinfer --timeout 30000
/v1/models/m:predict. Use the following command to access:> curl -X POST -d '{"key": "values"}' http://127.0.0.1:8501/v1/models/m:predict{"hello": "world. raw is {'key': 'values'}"}


Environment Variables | Description | Default Value |
TI_MODEL_DIR | Model path | /data/model/ |
TI_PREPROCESS_NUMS | Number of pre-processing processes | 0 |
TI_INFERENCE_NUMS | Number of inference processes | 1 |
TI_POSTPROCESS_NUMS | Number of post-processing processes | 0 |
TI_INFERENCE_MAX_BATCH_SIZE | Inference batch number | 1 |
__init__ function to perform some initialization work. Only override it when necessary. Note that you need to first call super.__init__() to complete the parent class initialization.class Worker:def __init__(self)def forward(data)
tiinfer.append_worker() function for orchestration.def append_worker(worker: Type[mosec.Worker],num: int = 1,start_method: str = "spawn",env: Union[None, List[Dict[str, str]]] = None,) -> None"""worker: A processing worker that inherits mosec.Worker and implements the forward method.num: Number of processes for parallel computing (≥1)start_method: Process startup method ("spawn" or "fork")env: Environment variables required before process startup"""
import tiinferfrom mosec import Workerfrom typing import Dict, Anyclass MyPreprocess(Worker):def forward(self, data: Dict) -> Any:# Input is a Dict converted from JSON and requires some necessary pre-processing.class MyPredict(Worker):def __int__(self):super.__init__()# Read and load the model.def forward(self, data: Any) -> Any:# The input is the result of pre-processing. The inference result is obtained after inference by calling the model.class MyPostprocess(Worker):def forward(self, data: Any) -> Dict:# The input is the inference result. Convert it into a Dict through post-processing and return it as JSON to the caller.# Orchestration handling process: 1 x pre-processing worker > 2 x inference workers > 1 x post-processing workertiinfer.append_worker(MyPreprocess, 1)tiinfer.append_worker(MyPredict, 2)tiinfer.append_worker(MyPostprocess, 1)
import loggingimport osimport timefrom typing import Dict, Listfrom urllib.request import urlretrieveimport cv2 # type: ignoreimport numpy as np # type: ignoreimport torch # type: ignoreimport tiinferimport tiinfer.utilsimport mosec### Log Code #### Add the following code to print the request log recorded by the framework to the terminal.logger = logging.getLogger()logger.setLevel(logging.DEBUG)formatter = logging.Formatter("%(asctime)s - %(process)d - %(levelname)s - %(filename)s:%(lineno)s - %(message)s")sh = logging.StreamHandler()sh.setFormatter(formatter)logger.addHandler(sh)### Log Code #### The pre-processing procedure decodes the input Base64-encoded string and performs some scaling operations as required by the model.class Pre(mosec.Worker):def forward(self, req: Dict) -> cv2.Mat:# The pre-processed input data is a Dict converted from JSON.img_base64_bytes = req["image"]img = tiinfer.utils.image_to_cv2_mat(img_base64_bytes)# bgr -> rgbimg = img[:, :, ::-1]# Perform some pre-processing on the image.img = cv2.resize(img, (256, 256))crop_img = (img[16 : 16 + 224, 16 : 16 + 224].astype(np.float32) / 255) # center cropcrop_img -= [0.485, 0.456, 0.406]crop_img /= [0.229, 0.224, 0.225]crop_img = np.transpose(crop_img, (2, 0, 1))return crop_img# Load the model, perform inference on the pre-processed results, convert the results into the final format, and then pass them to the caller.class Infer(mosec.Worker):def __init__(self) -> None:super().__init__()# Retrieve the current directory and load the model as needed.self.root_path = tiinfer.TI_MODEL_DIR# Preferentially use a GPU.self.device = (torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu"))### Start Loading Non-accelerated Models #### The model exists under the model directory.model_file = os.path.join(self.root_path, "model/resNet50.pt")### End Loading Non-accelerated Models ###### Start Loading Accelerated Models #### #Additionally import tiacc_inference for accelerated models.# import tiacc_inference# model_file = os.path.join(self.root_path, "model/tiacc.pt")### End Loading Accelerated Models #### Load the model.self.model = torch.jit.load(model_file)self.model.eval()# Categorization requires knowledge of the final category.self.categories = load_categories()def forward(self, img: cv2.Mat) -> Dict:with torch.no_grad():batch = torch.stack([torch.tensor(arr, device=self.device) for arr in [img]])pred_results = self.model(batch)prob = torch.nn.functional.softmax(pred_results, dim=1)top1_prob, top1_catid = torch.topk(prob, 1)return [{"confidence": top1_prob[i].tolist()[0],"pred": self.categories[top1_catid[i].tolist()[0]],}for i in range(top1_prob.size(0))][0]# Read the category information corresponding to the tag ID from the tag file.def load_categories() -> List[str]:logging.info("loading categories file...")local_filename = "imagenet_classes.txt"if not os.path.exists("imagenet_classes.txt"):local_filename, _ = urlretrieve("https://raw.githubusercontent.com/pytorch/hub/master/imagenet_classes.txt")with open(local_filename, encoding="utf8") as file:return list(map(lambda x: x.strip(), file.readlines()))# Orchestration handling process: pre-processing for 2 processes > inference for 1 process.tiinfer.append_worker(Pre, 2)tiinfer.append_worker(Infer, 1)
Format | Scenario | Download Address |
TorchScript | Category | |
TorchScript | Detection | |
TorchScript | NLP | |
TorchScript | OCR | |
Detectron2 | Detection | |
MMDetection | Detection | |
HuggingFace | NLP | |
SavedModel | NLP | |
SavedModel | Recommendation | |
FrozenGraph | NLP | |
ONNX | Detection |
from mmdet.apis import init_detectormodel = init_detector(config, checkpoint, device=device)
import tiacc_inferencemodel = tiacc_inference.load('tiacc.pt') # tiacc.pt is the new model generated after model optimization.
import torchmodel = torch.load(checkpoint) # .pth model file
import tiacc_inferencemodel = tiacc_inference.load('tiacc.pt') # tiacc.pt is the new model generated after model optimization.
from detectron2.config import get_cfgfrom detectron2.modeling import build_modelcfg = get_cfg()cfg.MODEL.DEVICE = devicecfg.MODEL.WEIGHTS = checkpointmodel = build_model(cfg)
import tiacc_inferencemodel = tiacc_inference.load('tiacc.pt') # tiacc.pt is the new model generated after model optimization.
Feedback