ROS2 pointcloud slow down node

Currently running a node, which subscribe to the topic of the left image as well as the point cloud. The node use machine learning for object detection, when a object is found it uses the point cloud to determine the position of the object. and then publish the positions of the object as well as a image stream with bounding boxes. When re moving the point cloud part in the code the speed of the node is not perfect but at around 0.3 s, but with the use of the point cloud it increase to 2 second per publish. How to increase the speed? Am i missing something in the publisher? the common.yaml is fully with default value.

This is the code:

import rclpy
from rclpy.node import Node
from cv_bridge import CvBridge
from rclpy.time import Time
from sensor_msgs.msg import Image, PointCloud2
import cv2

import torch
import numpy as np
from numpy import random
from models.experimental import attempt_load
from utils.general import check_img_size, non_max_suppression, scale_coords, set_logging
from utils.plots import plot_one_box
from utils.torch_utils import select_device, time_synchronized, TracedModel

from detection_interfaces.msg import Detections
from detection_interfaces.msg import Detected

import RPi.GPIO as GPIO
led_strip = GPIO.PWM(32,500)

class ImageSubscriber(Node):
def init(self):
self.pcd_as_numpy_array = np.array([], [])
self.pcd_subscription = self.create_subscription(PointCloud2, ‘zed2/zed_node/point_cloud/cloud_registered’, self.listener_callback_pcb, 10)
self.subscription = self.create_subscription(Image, ‘zed2/zed_node/left/image_rect_color’, self.listener_callback, 10)
self.subscription # prevent unused variable warning
self.publisher = self.create_publisher(Image, ‘Yolo_result’, 10)
self.publisher_detections = self.create_publisher(Detections, “detections”, 10)

    self.bridge = CvBridge()

    device = 'cuda:0'
    device = select_device('0')
    self.device = device
    half = device.type != 'cpu'
    self.half = half
    weights = ''
    imgsz = 416
    model = attempt_load(weights, map_location=device)
    stride = int(model.stride.max())
    imgsz = check_img_size(imgsz, s=stride)
    model = TracedModel(model, device, img_size=imgsz)
    names = model.module.names if hasattr(model, 'module') else model.names
    self.names = names
    colors = [[random.randint(0,255) for _ in range(3)] for _ in names]
    self.colors = colors
    if device.type != 'cpu':
        model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.parameters())))
    self.model = model

def listener_callback(self, data):
    msg = Detections()
    msg.header.stamp = self.get_clock().now().to_msg()
    self.get_logger().info('Receiving image')
    img0 = self.bridge.imgmsg_to_cv2(data)
    img1= cv2.cvtColor(img0, cv2.COLOR_RGBA2RGB) #change rgba image to rgb, needed for the yolo model to work
    img = img1[:, :, ::-1].transpose(2, 0, 1)
    img = np.ascontiguousarray(img)
    img = torch.from_numpy(img).to(self.device)
    img = img.half() if self.half else img.float()
    img /= 255.0
    if img.ndimension() == 3:
        img = img.unsqueeze(0)
    with torch.no_grad():
        pred = self.model(img, augment=False)[0]
    conf_thres = 0.1
    iou_thres = 0.45
    pred = non_max_suppression(pred, conf_thres, iou_thres, classes=None, agnostic=False)
    for i, det in enumerate(pred):  # detections per image
        s = ""
        gn = torch.tensor(img0.shape)[[1,0,1,0]]
        if len(det):
            # Rescale boxes from img_size to im0 size
            det[:, :4] = scale_coords(img.shape[2:], det[:, :4], img0.shape).round()
            for *xyxy, conf, cls in reversed(det):                   
                i, j = (int(xyxy[0]), int(xyxy[1])), (int(xyxy[2]), int(xyxy[3]))  #get the pixels of the bounding box
                center_point = round((i[0]+j[0])/2), round((i[1]+j[1])/2)    #get the middle point od the bounding box
                circle =, center_point, 2, (0,255,0), 1)  #place a circle at the middle point
                xyz_pos = (self.pcd_as_numpy_array[center_point[0]*center_point[1]][0], self.pcd_as_numpy_array[center_point[0]*center_point[1]][1], self.pcd_as_numpy_array[center_point[0]*center_point[1]][2])  #gets xyz poistion of middle point
                label = f'{self.names[int(cls)]} {conf:.2f} {xyz_pos}'
                plot_one_box(xyxy, img0, label=label, color=self.colors[int(cls)], line_thickness=1)
                #text_coord = cv2.putText(img0, str(xyz_pos), center_point, cv2.FONT_HERSHEY_PLAIN, 2, (0, 0, 255))
                msg.detected.append(detection_to_msg(self.names[int(cls)], xyz_pos[0], xyz_pos[1], xyz_pos[2]))
            self.publisher.publish(self.bridge.cv2_to_imgmsg(img0, "bgra8"))
def listener_callback_pcb(self, data):
    pcd = np.array(list(read_points(data)))
    self.pcd_as_numpy_array = np.around(pcd, decimals=4)

The code below is “ported” from

common_msgs/sensor_msgs/src/sensor_msgs at noetic-devel · ros/common_msgs · GitHub

I’ll make an official port and PR to this repo later:

GitHub - ros2/common_interfaces: A set of packages which contain common interface files (.msg and .srv).

import sys
from collections import namedtuple
import ctypes
import math
import struct
from sensor_msgs.msg import PointCloud2, PointField

_DATATYPES[PointField.INT8] = (‘b’, 1)
_DATATYPES[PointField.UINT8] = (‘B’, 1)
_DATATYPES[PointField.INT16] = (‘h’, 2)
_DATATYPES[PointField.UINT16] = (‘H’, 2)
_DATATYPES[PointField.INT32] = (‘i’, 4)
_DATATYPES[PointField.UINT32] = (‘I’, 4)
_DATATYPES[PointField.FLOAT32] = (‘f’, 4)
_DATATYPES[PointField.FLOAT64] = (‘d’, 8)

def read_points(cloud, field_names=None, skip_nans=False, uvs=[]):
Read points from a L{sensor_msgs.PointCloud2} message.
@param cloud: The point cloud to read from.
@type cloud: L{sensor_msgs.PointCloud2}
@param field_names: The names of fields to read. If None, read all fields. [default: None]
@type field_names: iterable
@param skip_nans: If True, then don’t return any point with a NaN value.
@type skip_nans: bool [default: False]
@param uvs: If specified, then only return the points at the given coordinates. [default: empty list]
@type uvs: iterable
@return: Generator which yields a list of values for each point.
@rtype: generator
assert isinstance(cloud, PointCloud2), ‘cloud is not a sensor_msgs.msg.PointCloud2’
fmt = _get_struct_fmt(cloud.is_bigendian, cloud.fields, field_names)
width, height, point_step, row_step, data, isnan = cloud.width, cloud.height, cloud.point_step, cloud.row_step,, math.isnan
unpack_from = struct.Struct(fmt).unpack_from

if skip_nans:
    if uvs:
        for u, v in uvs:
            p = unpack_from(data, (row_step * v) + (point_step * u))
            has_nan = False
            for pv in p:
                if isnan(pv):
                    has_nan = True
            if not has_nan:
                yield p
        for v in range(height):
            offset = row_step * v
            for u in range(width):
                p = unpack_from(data, offset)
                has_nan = False
                for pv in p:
                    if isnan(pv):
                        has_nan = True
                if not has_nan:
                    yield p
                offset += point_step
    if uvs:
        for u, v in uvs:
            yield unpack_from(data, (row_step * v) + (point_step * u))
        for v in range(height):
            offset = row_step * v
            for u in range(width):
                yield unpack_from(data, offset)
                offset += point_step

def _get_struct_fmt(is_bigendian, fields, field_names=None):
fmt = ‘>’ if is_bigendian else ‘<’

offset = 0
for field in (f for f in sorted(fields, key=lambda f: f.offset) if field_names is None or in field_names):
    if offset < field.offset:
        fmt += 'x' * (field.offset - offset)
        offset = field.offset
    if field.datatype not in _DATATYPES:
        print('Skipping unknown PointField datatype [%d]' % field.datatype, file=sys.stderr)
        datatype_fmt, datatype_length = _DATATYPES[field.datatype]
        fmt    += field.count * datatype_fmt
        offset += field.count * datatype_length

return fmt

def detection_to_msg(type, x, y, z):
detection_msg = Detected()
detection_msg.type = type
detection_msg.position.x = x
detection_msg.position.y = y
detection_msg.position.z = z
return detection_msg

def main(args=None):
image_subscriber = ImageSubscriber()

if name == “main”:

Hi @Kars007
Welcome to the Stereolabs community.

High-definition point cloud processing is a high-demanding operation, so depending on the CPU power it can take a lot of time if the code is not well optimized.

You can try to reduce the amount of data to be parsed by reducing the output resolution:

A note: it’s not easy to read your code because it’s not well formatted, and not formatted Python code can have no sense. Can you please edit your post to make it readable?

Hey Myzhar,

Thank you for your respons and sorry for the formatting.I tried change it but couldn’t find the edit buttom. The read_points() function was slowing down the publisher drastically, which slowed down the publisher of the zed wrapper as well. I fixed it by using the ros2_numpy(GitHub - Box-Robotics/ros2_numpy: Tools for converting ROS messages to and from numpy arrays) with the function rnp.point_cloud2.get_xyz_points(rnp.point_cloud2.pointcloud2_to_array(data), remove_nans=False). This makes it possible to published at a speed of around 10Hz.

Best regards,