Hello!
We have an issue with the reliability of the Zed Cameras. We replicate in our code the example of the SDK: zed-sdk/object detection/image viewer/cpp at master · stereolabs/zed-sdk · GitHub
In a windows 11 IoT enterprise we digest the USB feed with NVidia RTX A4000 and RTX A5000 graphic cards.
But at some point after the 3rd hour or so, we get a grabFrame() error, it says that a CUDA error has occurred. From then on, we get no feed and we cannot close and reopen the camera, we have found no recovery option. If we close and reopen the application, including the openGL context, it complains about the architecture of the graphics card and says we need a newer one to be able to open the camera.
Ideally we would like to run this setup around the clock. Thank you in advance
Our opening params
ZED_TRACE(QString("OPENING CAMERA [%1], Mode: %2").arg(m_serial, modeStr()));
m_camera = new sl::Camera;
sl::RESOLUTION res = sl::RESOLUTION::HD720;
switch (m_mode) {
case ZedLow:
m_fps = 15;
res = sl::RESOLUTION::HD720;
break;
case ZedMed:
m_fps = 30;
res = sl::RESOLUTION::HD720;
break;
case ZedMed2:
m_fps = 15;
res = sl::RESOLUTION::HD1080;
break;
case ZedHigh:
m_fps = 30;
res = sl::RESOLUTION::HD720;
break;
case ZedHigh2:
m_fps = 30;
res = sl::RESOLUTION::HD1080;
break;
}
sl::InitParameters initParams {};
initParams.sdk_verbose = 0;
#ifdef ZED_NOISY_DEBUG
initParams.sdk_verbose = 1;
#endif
initParams.camera_fps = m_fps;
initParams.camera_resolution = res;
initParams.open_timeout_sec = 10.f;
initParams.camera_disable_self_calib = true;
initParams.depth_mode = sl::DEPTH_MODE::NEURAL_LIGHT;
initParams.coordinate_units = sl::UNIT::METER;
// initParams.grab_compute_capping_fps = qMin(m_fps, 30); /// REVIEW WHEN OTHER PROBLEMS SOLVED
initParams.async_grab_camera_recovery = false; // We manage recovery
initParams.enable_image_enhancement = false;
initParams.enable_image_validity_check = false;
initParams.depth_stabilization = 0;
initParams.input.setFromSerialNumber(m_serial.toInt());
emit startOpenTimerRequested();
auto const err = m_camera->open(initParams);
emit stopOpenTimerRequested();
if (err != sl::ERROR_CODE::SUCCESS)
{
// *** Errors Found:
// - CAMERA_NOT_DETECTED: Bad USB connection. If Camera not connected, this point is not reached
// - CORRUPTED_SDK_INSTALLATION: AI Resource Not Found, error trying to download it. No internet connection.
// *** Possible Errors:
// - NOT_ENOUGH_GPU_MEMORY, CUDA_ERROR, MODULE_NOT_COMPATIBLE_WITH_CUDA_VERSION
closeCamera();
return err;
}
m_cameraReady = true;
ZED_TRACE("OPENED");
return sl::ERROR_CODE::SUCCESS;
}
And our Loop function, that runs on another thread:
void ZEDCamera::loop()
{
ZED_TRACE("Start Camera Loop Thread");
setStatus(OpeningCamera);
// --- Phase 1: Graphics Initialization Sync ---
while (!m_gfxReady && m_isAlive) {
if (m_instances == 0 && m_trackings == 0) {
m_thread->msleep(100);
continue;
}
emit gfxInitializationRequested();
m_thread->msleep(64); // Wait for GL context to bind
}
// Loop variables
sl::ERROR_CODE err = sl::ERROR_CODE::SUCCESS;
int frameErrorCount = 0;
const int errorTries = 5000;
int openRetryCount = 0;
int retryOnSecs = 30;
bool isFirstOpen = true;
QTime retryTime = QTime::currentTime();
auto triggerRestart = [&](int seconds) {
ZED_TRACE(QString("Restarting Camera (Delay: %1s)...").arg(seconds));
closeCamera(); // 1. MUST BE FIRST
sl::Camera::reboot(m_serial.toInt()); // 2. Hardware reset
m_restartCamera = false;
retryTime = QTime::currentTime();
retryOnSecs = seconds;
setCameraInfo("🟡 Hardware reset triggered. Waiting for USB re-enumeration...");
};
// --- Phase 2: Main Operational Loop ---
while (m_isAlive.load(std::memory_order_relaxed)) {
// A) CONNECTION MANAGEMENT
if (!m_cameraReady) {
if (!isFirstOpen && retryTime.secsTo(QTime::currentTime()) < retryOnSecs) {
m_thread->msleep(200);
continue;
}
setCameraInfo("🟡 Initializing camera...");
if (!isFirstOpen) {
ZED_TRACE(QString("Retry Attempt %1. Cooldown: %2s")
.arg(openRetryCount).arg(retryOnSecs));
}
err = openCamera();
if (err != sl::ERROR_CODE::SUCCESS) {
isFirstOpen = false;
openRetryCount++;
retryTime = QTime::currentTime();
setCameraError(
err,
"Open Camera",
QString(" Connection retry on %1 secs").arg(retryOnSecs)
);
if (openRetryCount > 1000) {
setCameraInfo("🔴 Camera Not Found");
break;
}
continue;
}
// Success
isFirstOpen = false;
openRetryCount = 0;
frameErrorCount = 0;
setStatus(CameraOk);
setCameraInfo("🟢 Ready");
}
// B) TRACKING & AI SYNC
if (m_cameraReady) {
if (m_trackings > 0 && !m_trackingReady && m_status != ErrorTracking) {
setStatus(InitializingTracking);
setCameraInfo("🟡 Initializing Camera Tracking...");
err = initializeTracking();
if (err != sl::ERROR_CODE::SUCCESS) {
setStatus(ErrorTracking);
setCameraError(err, "AI Optimization");
} else {
setStatus(CameraOk);
setCameraInfo("🟢 Ready");
}
}
else if (m_trackings == 0 && m_trackingReady) {
finalizeTracking();
}
}
// C) MANUAL RESTART CHECK
if (m_restartCamera) {
triggerRestart(15);
continue;
}
// D) GRAB & PROCESS
sl::ERROR_CODE grabErr = grabFrame();
sl::ERROR_CODE viewErr =
(grabErr == sl::ERROR_CODE::SUCCESS)
? generateFrameViews()
: grabErr;
if (grabErr == sl::ERROR_CODE::SUCCESS &&
viewErr == sl::ERROR_CODE::SUCCESS)
{
frameErrorCount = 0;
if (generateBoxTracking()) {
static int frameGood = 0;
if (++frameGood % 600 == 0) {
emit keepAliveConfirmed();
frameGood = 0;
}
}
m_thread->msleep(2);
continue;
}
// E) ERROR HANDLING
err = (grabErr != sl::ERROR_CODE::SUCCESS) ? grabErr : viewErr;
frameErrorCount++;
if (err == sl::ERROR_CODE::CAMERA_REBOOTING ||
err == sl::ERROR_CODE::CUDA_ERROR)
{
ZED_TRACE(QString(">>> CRITICAL ERROR [%1]. Forcing Reset.")
.arg(skCameraError(err)));
setStatus(ErrorTracking);
setCameraInfo("🔴 Camera Disconnected.\n Trying to reconnect. Please wait.");
triggerRestart(30);
} else {
if (frameErrorCount % 1000 == 0) {
ZED_TRACE(QString(">>> Grab Error: %1. Counter: %2/%3")
.arg(skCameraError(err))
.arg(frameErrorCount)
.arg(errorTries));
setCameraError(err, "Grab Frame");
}
if (frameErrorCount >= errorTries) {
ZED_TRACE(">>> Error threshold reached. Restarting...");
setStatus(ErrorTracking);
triggerRestart(30);
} else {
m_thread->msleep(500);
}
}
}
// --- Phase 3: Exit Logic ---
closeCamera();
if (m_status != ErrorOpen && m_status != ErrorTracking) {
setStatus(CameraClosed);
}
emit gfxFinalizationRequested();
ZED_TRACE("End Loop Thread");
}
Myzhar
March 30, 2026, 11:13am
3
Hi @Tomas-Skandal
What ZED SDK version are you using?
I recommend you perform a more reliable grab error check on the sign of the returned value of the grab function:
Negative values are warnings
Positive values are errors
0 is good, no errors
If you enabled the frame status check, it can return warnings (sl::ERROR_CODE::CORRUPTED_FRAME) when the scene is dark, and your application would stop.
Thank you, we changed it, but CUDA_ERROR is error 18, and NO_GPU_COMPATIBLE is error code 2, which are the ones we got. We get CUDA_ERROR on grabFrame() until we close and reopen, then we get NO_GPU_COMPATIBLE
Also the zed sdk version are both 5.2.2 with tensor 10.13_v5 with CUDA 13.2 and drivers 595.71 as well as 5.0.0 with tensor 10.9_v5 with CUDA 12.8 with NVIDIA Drivers 573.96.
Regardless of which, we end up with this behavior
Myzhar
March 30, 2026, 12:51pm
6
Are you using a laptop? Is it possible that it disables the GPU for energy saving?
We are using Windows 11 IoT enterprise servers with USB powersave and sleep disabled via device manager, as well as power plan controls, also any feature regarding USB power savings is disabled in BIOS
Of course these servers are not laptops
Myzhar
March 31, 2026, 5:24pm
9
This is the weird behavior. If you had a problem with drivers or CUDA, you would get this error from the very beginning, not after hours of flawless run.
Do you have a log of the application showing the error?
Hello!
I just left it running with sdk_verbose flag set to on in two machines and will get back to you. Unfortunately it says at the beginning that the log level of the verbosity is INFO, is there any verbose DEBUG level we should be aware of? In camera.hpp only sdk_verbose = 1 is mentioned