ZED X on ZED Box Orin NX: grab() ≈ 20 ms, max ~35 FPS – Hardware limit?

Cubia · May 28, 2025, 10:19pm

Environment:

Camera: ZED X
Host: ZED Box Orin NX 16 GB (JetPack 6.0, MAXN/jetson_clocks)
Depth Mode: NEURAL_LIGHT, SVGA@60
Display: HDMI output active during execution

Code (C++):

#include "m3t/zed_camera.h"

ZedCamera::~ZedCamera() {
  if (initial_set_up_) zed_.close();
}

ZedCamera &ZedCamera::GetInstance() {
  static ZedCamera zed;
  return zed;
}

void ZedCamera::UseColorCamera() { use_color_camera_ = true; }

void ZedCamera::UseDepthCamera() { use_depth_camera_ = true; }

bool ZedCamera::GrabFrame() {
  if (!is_opened_) return false;
  if (zed_.grab(runtime_params_) != sl::ERROR_CODE::SUCCESS) return false;
}

int ZedCamera::RegisterID() {
  const std::lock_guard<std::mutex> lock{mutex_};
  update_capture_ids_.insert({next_id_, true});
  return next_id_++;
}

bool ZedCamera::UnregisterID(int id) {
  const std::lock_guard<std::mutex> lock{mutex_};
  return update_capture_ids_.erase(id);
}

bool ZedCamera::SetUp(bool svo_mode, const std::string &svo_path) {
  const std::lock_guard<std::mutex> lock{mutex_};
  if (!initial_set_up_) {
    init_params_.depth_mode = sl::DEPTH_MODE::NEURAL_LIGHT;
    init_params_.coordinate_units = sl::UNIT::METER;
    init_params_.camera_resolution = sl::RESOLUTION::SVGA;
    //init_params_.depth_minimum_distance = 0.5 ; // Set the minimum depth perception distance to 15cm
    init_params_.depth_maximum_distance = 20 ; // Set the minimum depth perception distance to 15cm
    init_params_.camera_fps = 60;

    if (svo_mode) init_params_.input.setFromSVOFile(svo_path.c_str());

    if (zed_.open(init_params_) != sl::ERROR_CODE::SUCCESS) return false;

    runtime_params_.confidence_threshold = 100;
    runtime_params_.enable_fill_mode = true;
    runtime_params_.texture_confidence_threshold = 100;

    if (use_color_camera_) {
      zed_.retrieveImage(color_image_, sl::VIEW::LEFT, sl::MEM::CPU);
      color_intrinsics_ = zed_.getCameraInformation().camera_configuration.calibration_parameters.left_cam;
    }

    if (use_depth_camera_) {
      zed_.retrieveMeasure(depth_image_, sl::MEASURE::DEPTH, sl::MEM::CPU);
      depth_intrinsics_ = zed_.getCameraInformation().camera_configuration.calibration_parameters.left_cam;
    }

    initial_set_up_ = true;
  }
  return true;
}

bool ZedCamera::UpdateCapture(int id, bool type) {
  const std::lock_guard<std::mutex> lock{mutex_};
  if (!initial_set_up_) return false;
  if (update_capture_ids_.at(id)) {

    if (type){
      if (zed_.grab(runtime_params_) != sl::ERROR_CODE::SUCCESS) return false;
      //std::cout << type << "valore type";
    }

    if (use_color_camera_) zed_.retrieveImage(color_image_, sl::VIEW::LEFT, sl::MEM::CPU);
    if (use_depth_camera_) zed_.retrieveMeasure(depth_image_, sl::MEASURE::DEPTH, sl::MEM::CPU);
    for (auto &[_, v] : update_capture_ids_) v = false;
  }
  update_capture_ids_.at(id) = true;
  return true;
}

bool ZedCamera::use_color_camera() const { return use_color_camera_; }

bool ZedCamera::use_depth_camera() const { return use_depth_camera_; }

const sl::Mat &ZedCamera::color_image() const { return color_image_; }

const sl::Mat &ZedCamera::depth_image() const { return depth_image_; }

const sl::CameraParameters &ZedCamera::color_intrinsics() const { return color_intrinsics_; }

const sl::CameraParameters &ZedCamera::depth_intrinsics() const { return depth_intrinsics_; }

float ZedCamera::depth_scale() const { return depth_scale_; }

float ZedCamera::get_image_fps() {
  // versione senza argomenti: restituisce gli FPS misurati
  return zed_.getCurrentFPS();
}

ZedColorCamera::ZedColorCamera(const std::string &name, float image_scale)
    : ColorCamera{name}, image_scale_{image_scale}, zed_{ZedCamera::GetInstance()} {
  zed_.UseColorCamera();
  zed_id_ = zed_.RegisterID();
}

ZedColorCamera::ZedColorCamera(const std::string &name, const std::filesystem::path &metafile_path)
    : ColorCamera{name, metafile_path}, zed_{ZedCamera::GetInstance()} {
  zed_.UseColorCamera();
  zed_id_ = zed_.RegisterID();
}

ZedColorCamera::~ZedColorCamera() {
  zed_.UnregisterID(zed_id_);
}

bool ZedColorCamera::SetUp() {
  set_up_ = false;
  if (!metafile_path_.empty()) if (!LoadMetaData()) return false;
  if (!initial_set_up_ && !zed_.SetUp()) return false;

  GetIntrinsics();

  {
    // 1) leggi i parametri raw dalla ZED
    const sl::CameraParameters &p = zed_.color_intrinsics();

    // 2) la camera matrix non rettificata
    cv::Mat1f K(3,3);
    K << p.fx,   0.f,  p.cx,
          0.f, p.fy,  p.cy,
          0.f,   0.f,    1.f;

    // 3) il vettore di distorsione completo
    cv::Mat1f D(1,12);
    for (int i = 0; i < 12; ++i)  
      D(0,i) = static_cast<float>(p.disto[i]);

    // 4) la camera matrix rettificata (eventualmente scalata)
    cv::Mat1f Kn(3,3);
    Kn << intrinsics_.fu, 0.f,            intrinsics_.ppu,
          0.f,            intrinsics_.fv, intrinsics_.ppv,
          0.f,            0.f,            1.f;

    // 5) costruisci le mappe di remap
    cv::initUndistortRectifyMap(
      K, D, cv::Mat(),   // no R
      Kn,
      cv::Size{intrinsics_.width, intrinsics_.height},
      CV_32FC1,
      distortion_map1_,  // membro cv::Mat
      distortion_map2_   // membro cv::Mat
    );

  }
  
  set_camera2world_pose(Transform3fA::Identity());

  SaveMetaDataIfDesired();
  set_up_ = true;
  initial_set_up_ = true;
  return UpdateImage(true);
}

bool ZedColorCamera::UpdateImage(bool synchronized) {

  using Clock = std::chrono::high_resolution_clock;
  auto t0 = Clock::now();

  if (!set_up_) return false;
  zed_.UpdateCapture(zed_id_, true);

  const sl::Mat &zed_image = zed_.color_image();
  cv::Mat rgba(
    zed_image.getHeight(),
    zed_image.getWidth(),
    CV_8UC4,
    zed_image.getPtr<sl::uchar1>(sl::MEM::CPU),
    zed_image.getStepBytes(sl::MEM::CPU)
  );
  
  //cv::Mat rgb;
  cv::cvtColor(rgba, image_, cv::COLOR_RGBA2RGB);
  auto t1 = Clock::now();

  //std::cout << "[TIMING UPDATE COLOR]" << std::chrono::duration_cast<std::chrono::milliseconds>(t1 - t0).count() << "ms";
  //std::cerr << "Update image color";

  // 2) undistort
  //cv::remap(rgb, image_, distortion_map1_, distortion_map2_,
    //        cv::INTER_LINEAR, cv::BORDER_CONSTANT);

  return true;
}

void ZedColorCamera::set_image_scale(float image_scale) {
  image_scale_ = image_scale;
  set_up_ = false;
}

float ZedColorCamera::image_scale() const { return image_scale_; }

bool ZedColorCamera::LoadMetaData() {
  cv::FileStorage fs;
  if (!OpenYamlFileStorage(metafile_path_, &fs)) return false;
  ReadOptionalValueFromYaml(fs, "camera2world_pose", &camera2world_pose_);
  ReadOptionalValueFromYaml(fs, "save_directory", &save_directory_);
  ReadOptionalValueFromYaml(fs, "save_index", &save_index_);
  ReadOptionalValueFromYaml(fs, "save_image_type", &save_image_type_);
  ReadOptionalValueFromYaml(fs, "save_images", &save_images_);
  ReadOptionalValueFromYaml(fs, "image_scale", &image_scale_);
  fs.release();
  if (save_directory_.is_relative())
    save_directory_ = metafile_path_.parent_path() / save_directory_;
  world2camera_pose_ = camera2world_pose_.inverse();
  return true;
}

void ZedColorCamera::GetIntrinsics() {
  const sl::CameraParameters &param = zed_.color_intrinsics();
  intrinsics_.fu = param.fx * image_scale_;
  intrinsics_.fv = param.fy * image_scale_;
  intrinsics_.ppu = param.cx;
  intrinsics_.ppv = param.cy;
  intrinsics_.width = param.image_size.width;
  intrinsics_.height = param.image_size.height;
}

ZedDepthCamera::ZedDepthCamera(const std::string &name, float image_scale, float depth_offset)
    : DepthCamera{name}, image_scale_{image_scale}, depth_offset_{depth_offset}, zed_{ZedCamera::GetInstance()} {
  zed_.UseDepthCamera();
  zed_id_ = zed_.RegisterID();
}

ZedDepthCamera::ZedDepthCamera(const std::string &name, const std::filesystem::path &metafile_path)
    : DepthCamera{name, metafile_path}, zed_{ZedCamera::GetInstance()} {
  zed_.UseDepthCamera();
  zed_id_ = zed_.RegisterID();
}

ZedDepthCamera::~ZedDepthCamera() {
  zed_.UnregisterID(zed_id_);
}

bool ZedDepthCamera::SetUp() {
  set_up_ = false;
  if (!metafile_path_.empty()) if (!LoadMetaData()) return false;
  if (!initial_set_up_ && !zed_.SetUp()) return false;
  
  GetIntrinsics();

  {
    
    // 1) leggi i parametri raw dalla ZED
    const sl::CameraParameters &p = zed_.color_intrinsics();

    // 2) la camera matrix non rettificata
    cv::Mat1f K(3,3);
    K << p.fx,   0.f,  p.cx,
          0.f, p.fy,  p.cy,
          0.f,   0.f,    1.f;

    // 3) il vettore di distorsione completo
    cv::Mat1f D(1,12);
    for (int i = 0; i < 12; ++i)  
      D(0,i) = static_cast<float>(p.disto[i]);

    // 4) la camera matrix rettificata (eventualmente scalata)
    cv::Mat1f Kn(3,3);
    Kn << intrinsics_.fu, 0.f,            intrinsics_.ppu,
          0.f,            intrinsics_.fv, intrinsics_.ppv,
          0.f,            0.f,            1.f;

    // 5) costruisci le mappe di remap
    cv::initUndistortRectifyMap(
      K, D, cv::Mat(),   // no R
      Kn,
      cv::Size{intrinsics_.width, intrinsics_.height},
      CV_32FC1,
      distortion_map1_,  // membro cv::Mat
      distortion_map2_   // membro cv::Mat
    );
    
  }

  set_camera2world_pose(Transform3fA::Identity());

  SaveMetaDataIfDesired();
  set_up_ = true;
  initial_set_up_ = true;
  return UpdateImage(true);
}

bool ZedDepthCamera::UpdateImage(bool synchronized) {

  using Clock = std::chrono::high_resolution_clock;
  auto t0 = Clock::now();

  if (!set_up_) return false;
  zed_.UpdateCapture(zed_id_, false);
  const sl::Mat &zed_depth = zed_.depth_image();
  
  cv::Mat depth_mat(
    zed_depth.getHeight(),
    zed_depth.getWidth(),
    CV_32FC1,
    zed_depth.getPtr<sl::float1>(sl::MEM::CPU),
    zed_depth.getStepBytes(sl::MEM::CPU)
  );

  image_ = depth_mat.clone();

  // 2) Applica offset in float
  if (depth_offset_ != 0.0f) {
    image_ += (depth_offset_ / depth_scale_);
  }

  auto t1 = Clock::now();

  //std::cout << "[TIMING UPDATE DEPTH]" << std::chrono::duration_cast<std::chrono::milliseconds>(t1 - t0).count() << "ms";

  //std::cerr << "Update image depth";

  // 3) *** Conserva il float puro ***
  /*cv::remap(
    image_, 
    image_,           // CV_32FC1
    distortion_map1_, 
    distortion_map2_, 
    cv::INTER_NEAREST,
    cv::BORDER_CONSTANT,
    std::numeric_limits<float>::quiet_NaN()  // o 0 come preferisci
  );*/

  return true;
}

void ZedDepthCamera::set_image_scale(float image_scale) {
  image_scale_ = image_scale;
  set_up_ = false;
}

void ZedDepthCamera::set_depth_offset(float depth_offset) {
  depth_offset_ = depth_offset;
}

float ZedDepthCamera::image_scale() const { return image_scale_; }

float ZedDepthCamera::depth_offset() const { return depth_offset_; }

bool ZedDepthCamera::LoadMetaData() {
  cv::FileStorage fs;
  if (!OpenYamlFileStorage(metafile_path_, &fs)) return false;
  ReadOptionalValueFromYaml(fs, "camera2world_pose", &camera2world_pose_);
  ReadOptionalValueFromYaml(fs, "save_directory", &save_directory_);
  ReadOptionalValueFromYaml(fs, "save_index", &save_index_);
  ReadOptionalValueFromYaml(fs, "save_image_type", &save_image_type_);
  ReadOptionalValueFromYaml(fs, "save_images", &save_images_);
  ReadOptionalValueFromYaml(fs, "image_scale", &image_scale_);
  ReadOptionalValueFromYaml(fs, "depth_offset", &depth_offset_);
  fs.release();
  if (save_directory_.is_relative())
    save_directory_ = metafile_path_.parent_path() / save_directory_;
  world2camera_pose_ = camera2world_pose_.inverse();
  return true;
}

void ZedDepthCamera::GetIntrinsics() {
  const sl::CameraParameters &param = zed_.depth_intrinsics();
  intrinsics_.fu = param.fx * image_scale_;
  intrinsics_.fv = param.fy * image_scale_;
  intrinsics_.ppu = param.cx;
  intrinsics_.ppv = param.cy;
  intrinsics_.width = param.image_size.width;
  intrinsics_.height = param.image_size.height;
}

}

Observed:

grab() consistently takes 18–22 ms → ~45 FPS theoretical.
Full pipeline (grab + retrieveImage + cv::imshow) tops out at ≈ 35 FPS.
ZED Depth Viewer on the same hardware shows RGB 60 FPS + depth 30 FPS in “Neural” mode, but custom C++ can’t reproduce better than ~35 FPS.

Question for StereoLabs / community:
Is there any way to further accelerate the ZED SDK’s Neural Light (or Neural) depth path on Orin NX (e.g. TensorRT/DLA flags, compile‐time options), or is the ≈ 20 ms grab latency a hard hardware limit on the ZED Box?

Thanks!

Cubia · May 29, 2025, 8:38pm

Any suggestions on that? I reported the performances with only one ZED X but in reality my goal is to capture 3/4 cameras simultaneously. Any kind of help would be really appreciated!

hbeaumont · June 3, 2025, 12:12pm

Hi Cubia,

Thank you for reaching us.
As you did not send the header file, I cannot run your sample to test it. Could you please check the runtime of the grab() using this sample: zed-sdk/depth sensing/depth sensing/cpp at master · stereolabs/zed-sdk · GitHub to compare it with your sample ?

Also, do not hesitate to run the ZED_Diagnostic tool ans to send us your json report, so we can check this too.

Best regards,