Retrieving Object Data (position) from Custom Model Detection

Hello everyone,

I’ve successfully trained a custom model using YOLOv5 and loaded it using torch.hub.load. Following the Stereolabs tutorial here, I have a couple of questions:

  1. I’ve detected objects and passed their information to the ingest_custom_box_objects function, but I’m uncertain about its actual effect. How can I visualize the impact of this function?
  2. After using the ingest function, I attempted to access the positions of detected objects using objects = sl.Objects() and zed.retrieve_objects(objects, obj_runtime_param) sa tutorial says. However, when checking objects.object_list, it appears to be null. How can I retrieve the positions of my detected objects with my custom model?

You can see the code below.

Thank you for your assistance.

import pyzed.sl as sl
from typing import Any
import numpy as np
import torch
import cv2
import time

def init_zed():
    zed = sl.Camera()
    init_params = sl.InitParameters()
    init_params.coordinate_units = sl.UNIT.METER
    init_params.camera_resolution = sl.RESOLUTION.HD1080
    init_params.camera_fps = 30
    err = zed.open(init_params)

    if err != sl.ERROR_CODE.SUCCESS:
        print(repr(err))
        zed.close()
        exit(1)
    else:
        print("Zed Camera Started!")
        return zed

def load_model(model_name):
    model = torch.hub.load("ultralytics/yolov5","custom", path=model_name, force_reload=True)
    return model

def detector(model,frame,device):
    model.to(device)
    frame = [frame]
    results = model(frame)
    labels, cord = results.xyxyn[0][:, -1], results.xyxyn[0][:, :-1]
    return labels, cord

def main():
    model_name = "model/yolov5/bestModel.pt"
    device = 'cuda' if torch.cuda.is_available() else "! cpu" 

    # İnitalizing zed and setting parameters
    zed = init_zed()
    runtime_params = sl.RuntimeParameters()
    image_left = sl.Mat()
    depth_left = sl.Mat()
    point_cloud = sl.Mat()

    # Setting object detection parameters
    obj_params = sl.ObjectDetectionParameters()
    obj_params.detection_model = sl.OBJECT_DETECTION_MODEL.CUSTOM_BOX_OBJECTS
    zed.enable_object_detection(obj_params)

    
    obj_runtime_params = sl.ObjectDetectionRuntimeParameters()

    model = load_model(model_name)
    while True:
        if zed.grab(runtime_params) == sl.ERROR_CODE.SUCCESS:
            zed.retrieve_image(image_left, sl.VIEW.LEFT)
            zed.retrieve_measure(depth_left, sl.MEASURE.DEPTH)
            zed.retrieve_measure(point_cloud, sl.MEASURE.XYZRGBA)
            image = image_left.get_data()
            labels, cords = detector(model, image, device)
            labelL = lables.tolist()
            cordL = cords.tolist()

            objects_in = []
            # The "detections" variable contains your custom 2D detections
            for label, cord in zip(labelL, cordL):
                  tmp = sl.CustomBoxObjectData()
                  # Fill the detections into the correct SDK format
                  tmp.unique_object_id = sl.generate_unique_id()
                  tmp.probability = cord[4]
                  tmp.label = int(label)
                  xmin, ymin, xmax, ymax, conf = cord
                  # Convert to [top-left, top-right, bottom-right, bottom-left]
                  top_left = [xmin, ymin]
                  top_right = [xmax, ymin]
                  bottom_right = [xmax, ymax]
                  bottom_left = [xmin, ymax]
                  tmp.bounding_box_2d = np.array([top_left, top_right, bottom_right, bottom_left])
                  tmp.is_grounded = True 
                  objects_in.append(tmp)           
            zed.ingest_custom_box_objects(objects_in)

            
            objects = sl.Objects() # Structure containing all the detected objects
            zed.retrieve_objects(objects, obj_runtime_params) # Retrieve the 3D tracked objects
            for object in objects.object_list:
                print("{} {}".format(object.id, object.position))

Hi @aseris, welcome to the forums!

Do you get any error at all?
I tried running your code, and I had to fix a typo at the line: (lables → labels)

labelL = lables.tolist()

Also, did you try our GitHub sample for custom Object Detection?
It’s a complete, working implementation that you can reference from: