Hi,
I am aiming to map 180 degrees sector using Zed 2i and object detection (not built-in custom object detection). The camera rotates slowly on the camera stand from left to right until the area of interest is covered.
Right now I’m only interested in showing the locations of the objects in 2D-plane. Currently, the code is run offline over the svo-file.
I have no interest in using object tracking if it is not necessary. I’m only interested in knowing the locations on the map, and since there are multiple frames & locations, I’m trying to use the camera’s odometry to transform detections to the same coordinate system.
Is there something I’m doing wrong here? I’m worried that the error cumulates after every frame iteration. Detections and distances in single frames tend to go according to the ground truth, so I’m not worried about finding xyz-coordinates in single frames.
Code right now in the stripped format.
import numpy as np
from pyzed import sl
import sys
import cv2
def zed_init(filepath):
"""init zed camera parameters"""
input_type = sl.InputType()
input_type.set_from_svo_file(filepath)
init = sl.InitParameters(input_t=input_type, svo_real_time_mode=False)
init.camera_resolution = sl.RESOLUTION.HD1080
init.coordinate_units = sl.UNIT.METER
init.depth_maximum_distance = 15
init.depth_mode = sl.DEPTH_MODE.NEURAL
cam = sl.Camera()
status = cam.open(init)
if status != sl.ERROR_CODE.SUCCESS:
print(repr(status))
exit()
tracking_parameters = sl.PositionalTrackingParameters()
err = cam.enable_positional_tracking(tracking_parameters)
if err != sl.ERROR_CODE.SUCCESS:
exit(-1)
return cam
def main():
"""
Read SVO sample to read the svo file
extract left rgb, depth
detect camera pose movement
do inference and save files.
"""
detector = initialize_detector()
if len(sys.argv) != 2:
print("Please specify path to .svo file.")
exit()
filepath = sys.argv[1]
cam = zed_init(filepath)
mat = sl.Mat()
zed_pose_cam = sl.Pose()
key = ""
stop_counter = 0
transform_total_api = None
runtime = sl.RuntimeParameters()
cluster_data = None
cluster_data_total = []
started = False
counter = 30 # Between how many frames one must inspect images.
while key != 113: # for 'q' key
err = cam.grab(runtime)
if err == sl.ERROR_CODE.END_OF_SVOFILE_REACHED:
break
elif err == sl.ERROR_CODE.SUCCESS:
stop_counter += 1
# Retrieve and transform the pose data into a new frame located at the center of the camera
err = cam.get_position(zed_pose_cam, sl.REFERENCE_FRAME.CAMERA)
if err != sl.POSITIONAL_TRACKING_STATE.OK and started:
print("Location lost, Stopping the camera")
break
elif err == sl.POSITIONAL_TRACKING_STATE.OK and not started:
print("Camera found location, starting the tracking")
started = True
# transformation matrix using sl.Transform()
transform_api = sl.Transform()
transform_api.init_rotation_translation(
zed_pose_cam.get_rotation_matrix(sl.Rotation()),
zed_pose_cam.get_translation(sl.Translation()),
)
# update total transform between frames
if transform_total_api is not None:
transform_total_api = transform_api * transform_total_api
if stop_counter == counter:
cam.retrieve_image(mat, sl.VIEW.LEFT)
img = mat.get_data()
# inference logic
xyz_coordinates = run_inference(cam, detector)
stop_counter = 0
xyz_thresh = [(x * -1, y, z) for (x, y, z) in xyz_coordinates]
if transform_total_api is None:
# init identity matrix
transform_total_api = sl.Transform()
transform_total_api.set_identity()
# first inference does not need any transformation
bottom_xyz_ref = np.array(xyz_thresh)
else:
inverse_transform_total = np.linalg.inv(transform_total_api.m)
bottom_xyz_ref = []
for point in xyz_thresh:
point_4x1 = np.append(point, 1)
point_ref1 = np.dot(inverse_transform_total, point_4x1)
bottom_xyz_ref.append(point_ref1)
if np.array(bottom_xyz_ref) != []:
cluster_data = cluster_preprocess(bottom_xyz_ref)
cluster_data_total.append(cluster_data.T)
else:
key = cv2.waitKey(1)
# Stop the stream and start processing samples
cv2.destroyAllWindows()
cam.close()
if __name__ == "__main__":
main()