Show point cloud from .svo at a specific time

I’d like to visualise a point cloud of a video in a specific time (‘–desired_time’)
Currently, I have the following code but it shows point cloud from that time while I want the point cloud to be frozen at the right timestamp:

import sys
import depth_sensing_viewer.viewer as gl
import pyzed.sl as sl
import argparse
import cv2

#python "Visualise_points_cloud\mono_camera\depth_sensing_given_tmstmp.py" --input_svo_file "VideoToBodies\Files\3cameras\14-08-2023_15h55\cam_14-08-2023_15h55_0.svo" --desired_time 10

def parse_args(init, desired_time):
    if len(opt.input_svo_file) > 0 and opt.input_svo_file.endswith(".svo"):
        init.set_from_svo_file(opt.input_svo_file)
        print("[Sample] Using SVO File input: {0}".format(opt.input_svo_file))
    elif len(opt.ip_address)>0 :
        ip_str = opt.ip_address
        if ip_str.replace(':','').replace('.','').isdigit() and len(ip_str.split('.'))==4 and len(ip_str.split(':'))==2:
            init.set_from_stream(ip_str.split(':')[0],int(ip_str.split(':')[1]))
            print("[Sample] Using Stream input, IP : ",ip_str)
        elif ip_str.replace(':','').replace('.','').isdigit() and len(ip_str.split('.'))==4:
            init.set_from_stream(ip_str)
            print("[Sample] Using Stream input, IP : ",ip_str)
        else :
            print("Unvalid IP format. Using live stream")
    if ("HD2K" in opt.resolution):
        init.camera_resolution = sl.RESOLUTION.HD2K
        print("[Sample] Using Camera in resolution HD2K")
    elif ("HD1200" in opt.resolution):
        init.camera_resolution = sl.RESOLUTION.HD1200
        print("[Sample] Using Camera in resolution HD1200")
    elif ("HD1080" in opt.resolution):
        init.camera_resolution = sl.RESOLUTION.HD1080
        print("[Sample] Using Camera in resolution HD1080")
    elif ("HD720" in opt.resolution):
        init.camera_resolution = sl.RESOLUTION.HD720
        print("[Sample] Using Camera in resolution HD720")
    elif ("SVGA" in opt.resolution):
        init.camera_resolution = sl.RESOLUTION.SVGA
        print("[Sample] Using Camera in resolution SVGA")
    elif ("VGA" in opt.resolution):
        init.camera_resolution = sl.RESOLUTION.VGA
        print("[Sample] Using Camera in resolution VGA")
    elif len(opt.resolution)>0: 
        print("[Sample] No valid resolution entered. Using default")
    else : 
        print("[Sample] Using default resolution")

def main(desired_time):
    print("Running Depth Sensing sample ... Press 'Esc' to quit\nPress 's' to save the point cloud")

    init = sl.InitParameters(depth_mode=sl.DEPTH_MODE.ULTRA,
                             coordinate_units=sl.UNIT.METER,
                             coordinate_system=sl.COORDINATE_SYSTEM.RIGHT_HANDED_Y_UP)
    parse_args(init, desired_time)
    zed = sl.Camera()
    status = zed.open(init)
    if status != sl.ERROR_CODE.SUCCESS:
        print(repr(status))
        exit()


    svo_fps = zed.get_camera_information().camera_configuration.fps
    desired_frame = int(svo_fps * desired_time)

    # Set the SVO position to the desired frame
    zed.set_svo_position(desired_frame)

    res = sl.Resolution()
    res.width = 720
    res.height = 404

    camera_model = zed.get_camera_information().camera_model
    point_cloud = sl.Mat(res.width, res.height, sl.MAT_TYPE.F32_C4, sl.MEM.CPU)


    viewer = gl.GLViewer()
    viewer.init(1, sys.argv, camera_model, res)

    #point_cloud = sl.Mat(res.width, res.height, sl.MAT_TYPE.F32_C4, sl.MEM.CPU)

    while viewer.is_available():
        if zed.grab() == sl.ERROR_CODE.SUCCESS:
            zed.retrieve_measure(point_cloud, sl.MEASURE.XYZRGBA,sl.MEM.CPU, res)
            viewer.updateData(point_cloud)
    viewer.exit()
    zed.close()

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument('--input_svo_file', type=str, help='Path to an .svo file', default='')
    parser.add_argument('--ip_address', type=str, help='IP Address for streaming setup', default='')
    parser.add_argument('--resolution', type=str, help='Resolution setting', default='')
    parser.add_argument('--desired_time', type=float, help='Desired time in seconds to display point cloud', default=10.0)
    opt = parser.parse_args()

    if len(opt.input_svo_file) > 0 and len(opt.ip_address) > 0:
        print("Specify only input_svo_file or ip_address, not both. Exit program")
        exit()

    main(opt.desired_time)

Hi,

Each time you call the grab() method, the SDK will go to the next frame.

To stay on a specific frame, either call grab only once after calling setSVOPosition(), or call re-set the svo position at the end of each loop.

Thanks for reply.

When resseting in the loop, I can effectively display point cloud. However, it is a bit overkilling to retrieve point cloud every iteration in the loop.
Moreover, it is really important to me to retrieve the point cloud with its color information for a given timestamp. This is my current code:

def remove_invalid_points(point_cloud):
    """
    Removes NaN and infinite values from a point cloud.

    :param point_cloud: The input point cloud as a numpy array.
    :return: The cleaned point cloud with NaN and infinite values removed.
    """
    # Remove NaN values
    point_cloud = point_cloud[~np.isnan(point_cloud).any(axis=1)]
    # Remove infinite values
    point_cloud = point_cloud[~np.isinf(point_cloud).any(axis=1)]
    return point_cloud


def test01_works(timestamp):
    svo_file1 = 'VideoToBodies/Files/3cameras/14-08-2023_15h55/cam_14-08-2023_15h55_0.svo'

    print("Initializing ZED cameras...")
    camera = init_camera_from_svo(svo_file1)
    fps = 30

    print("Setting SVO position...")
    set_svo_position(camera, timestamp, fps)

    res = sl.Resolution()
    res.width = camera.get_camera_information().camera_configuration.resolution.width
    res.height = camera.get_camera_information().camera_configuration.resolution.height
    depth = sl.Mat(res.width, res.height, sl.MAT_TYPE.F32_C4, sl.MEM.CPU)
    image = sl.Mat()

    key= ''
    while key != 113:  # for 'q' key
        if camera.grab() == sl.ERROR_CODE.SUCCESS:
            camera.retrieve_measure(depth, sl.MEASURE.XYZRGBA,sl.MEM.CPU, res)
            camera.retrieve_image(image, sl.VIEW.LEFT, sl.MEM.CPU, res)

            # Step 1: Convert sl.Mat to numpy array
            point_cloud_np = depth.get_data()
            rgba_channel = point_cloud_np[:,:, 3].view(np.uint32)

            # Extract R, G, B, and alpha components
            # Note: & is the bitwise AND operator, >> is the right shift operator
            r = ((rgba_channel >> 24) & 0xFF) / 255.0  # Extract the first 8 bits
            g = ((rgba_channel >> 16) & 0xFF) / 255.0   # Extract the next 8 bits
            b = ((rgba_channel >> 8) & 0xFF) / 255.0    # Extract the next 8 bits

            # Combine the extracted R, G, B values into an Nx3 array (assuming alpha is not needed)
            rgb = np.stack([r, g, b], axis=-1).reshape(-1, 3)

            print("reshaped : xtracted RGB values:\n", rgb)

            # Extract the x, y, z coordinates and colors
            xyz = point_cloud_np[:, :, :3].reshape(-1, 3)
            xyz = remove_invalid_points(xyz)
            

            # Create an Open3D point cloud object
            pcd = o3d.geometry.PointCloud()
            pcd.points = o3d.utility.Vector3dVector(xyz)
            pcd.colors = o3d.utility.Vector3dVector(rgb)

            # Visualize the point cloud
            o3d.visualization.draw_geometries([pcd])

            key = input("Press 'q' to exit...")

Currently, it shows an unexpected image looking as follows:

While the image input is the following :


I would have expected the colors of the images to be projected on my point cloud

Did you try the other solution which is to call the grab() only once in the loop ?

For the color information, I would suggest to take a look at our Depth sensing sample (https://github.com/stereolabs/zed-sdk/blob/master/depth%20sensing/depth%20sensing/python/depth_sensing.py).

Let us know if you need more assistance in that regard.

Yess but couldn’t manage to make it work. On the other hand, I was able to make it in the following way :

import pyzed.sl as sl
import numpy as np
import open3d as o3d
import numpy as np
import matplotlib.pyplot as plt
import cv2
from matplotlib import pyplot as plt
import math

def init_camera_from_svo(svo_file):
    cam = sl.Camera()
    input_type = sl.InputType()
    input_type.set_from_svo_file(svo_file)
    init_params = sl.InitParameters(input_t=input_type, svo_real_time_mode=False)
    init_params.depth_mode = sl.DEPTH_MODE.ULTRA

    err = cam.open(init_params)
    if err != sl.ERROR_CODE.SUCCESS:
        print(f"Failed to open SVO file: {svo_file}")
        exit(1)

    return cam

def set_svo_position(camera, timestamp, fps):
    # Convert timestamp to frame number
    frame_number = int(timestamp * fps)
    camera.set_svo_position(frame_number)
    
    # Attempt to grab a frame to check if the seek was successful
    if camera.grab() == sl.ERROR_CODE.SUCCESS:
        print(f"Successfully seeked to Frame number: {frame_number}")
        # Reset SVO position to the desired frame after checking
        camera.set_svo_position(frame_number)
    else:
        print(f"Failed to seek to Frame number: {frame_number}")

def main(svo_path , timestamp):

    print("Initializing ZED cameras...")
    camera = init_camera_from_svo(svo_path)
    
    print("Setting SVO position...")
    set_svo_position(camera, timestamp, 30)  # Assuming 30 FPS

    res = sl.Resolution()
    res.width = camera.get_camera_information().camera_configuration.resolution.width
    res.height = camera.get_camera_information().camera_configuration.resolution.height

    point_cloud = sl.Mat(res.width, res.height, sl.MAT_TYPE.F32_C4, sl.MEM.CPU) 
    #depth = sl.Mat(res.width, res.height, sl.MAT_TYPE.F32_C4, sl.MEM.CPU)
    image = sl.Mat(res.width, res.height)

    if camera.grab() == sl.ERROR_CODE.SUCCESS:
        # Retrieve the left image and the colored point cloud
        camera.retrieve_image(image, sl.VIEW.LEFT)
        camera.retrieve_measure(point_cloud, sl.MEASURE.XYZRGBA)

        # Convert to NumPy arrays
        image_np = image.get_data()
        point_cloud_np = point_cloud.get_data()

        # Extract RGB and XYZ directly using vectorized operations
        # Assuming point_cloud_np structure is [Height, Width, 4] where last dimension is XYZA
        xyz = point_cloud_np[:, :, :3].reshape(-1, 3)
        bgra = image_np.reshape(-1, 4)
        rgb = bgra[:, :3][:, ::-1] / 255.0   # Normalize RGB values to 0-1 range

        # Filter out points with invalid depth directly using NumPy
        valid_points = np.isfinite(xyz[:, 2])
        xyz_filtered = xyz[valid_points]
        rgb_filtered = rgb[valid_points]

        # Create an Open3D point cloud
        pcd = o3d.geometry.PointCloud()
        pcd.points = o3d.utility.Vector3dVector(xyz_filtered)
        pcd.colors = o3d.utility.Vector3dVector(rgb_filtered)

        # Visualize the point cloud
        o3d.visualization.draw_geometries([pcd])

Here below the expected point cloud output that works (see above picture)

My ultimate wish would be to be able to use image from left AND right camera instead of solely left :slight_smile:

Hey @Adrien
I think you could use the XYZRGBA_RIGHT measure in retrieve_measure to get started on that (along with the RIGHT view should you need it).

1 Like