Hi team… I did custom object detection in yolov7 pyTorch mode. Here is the code :
#!/usr/bin/env python3
import sys
import numpy as np
from OpenGL.GLUT import *
import argparse
import torch
import cv2
import pyzed.sl as sl
import torch.backends.cudnn as cudnn
sys.path.insert(0, ‘./yolov7-gpu’)
from models.experimental import attempt_load
from utils.general import check_img_size, non_max_suppression, scale_coords, xyxy2xywh
from utils.torch_utils import select_device
from utils.datasets import letterbox
from threading import Lock, Thread
from time import sleep
import ogl_viewer.viewer as gl
import cv_viewer.tracking_viewer as cv_viewer
lock = Lock()
run_signal = False
exit_signal = False
def img_preprocess(img, device, half, net_size):
net_image, ratio, pad = letterbox(img[:, :, :3], net_size, auto=False)
net_image = net_image.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB
net_image = np.ascontiguousarray(net_image)
img = torch.from_numpy(net_image).to(device)
img = img.half() if half else img.float() # uint8 to fp16/32
img /= 255.0 # 0 - 255 to 0.0 - 1.0
if img.ndimension() == 3:
img = img.unsqueeze(0)
return img, ratio, pad
def xywh2abcd(xywh, im_shape):
output = np.zeros((4, 2))
# Center / Width / Height -> BBox corners coordinates
x_min = (xywh[0] - 0.5*xywh[2]) * im_shape[1]
x_max = (xywh[0] + 0.5*xywh[2]) * im_shape[1]
y_min = (xywh[1] - 0.5*xywh[3]) * im_shape[0]
y_max = (xywh[1] + 0.5*xywh[3]) * im_shape[0]
# A ------ B
# | Object |
# D ------ C
output[0][0] = x_min
output[0][1] = y_min
output[1][0] = x_max
output[1][1] = y_min
output[2][0] = x_min
output[2][1] = y_max
output[3][0] = x_max
output[3][1] = y_max
return output
def detections_to_custom_box(detections, im, im0):
output = []
for i, det in enumerate(detections):
if len(det):
det[:, :4] = scale_coords(im.shape[2:], det[:, :4], im0.shape).round()
gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh
for *xyxy, conf, cls in reversed(det):
xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh
# Creating ingestable objects for the ZED SDK
obj = sl.CustomBoxObjectData()
obj.bounding_box_2d = xywh2abcd(xywh, im0.shape)
obj.label = cls
obj.probability = conf
obj.is_grounded = False
output.append(obj)
return output
def torch_thread(weights, img_size, conf_thres=0.2, iou_thres=0.45):
global image_net, exit_signal, run_signal, detections
print("Intializing Network...")
device = select_device()
half = device.type != 'cpu' # half precision only supported on CUDA
imgsz = img_size
# Load model
model = attempt_load(weights, map_location=device) # load FP32
stride = int(model.stride.max()) # model stride
imgsz = check_img_size(imgsz, s=stride) # check img_size
if half:
model.half() # to FP16
cudnn.benchmark = True
# Run inference
if device.type != 'cpu':
model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.parameters()))) # run once
while not exit_signal:
if run_signal:
lock.acquire()
img, ratio, pad = img_preprocess(image_net, device, half, imgsz)
pred = model(img)[0]
det = non_max_suppression(pred, conf_thres, iou_thres)
# ZED CustomBox format (with inverse letterboxing tf applied)
detections = detections_to_custom_box(det, img, image_net)
lock.release()
run_signal = False
sleep(0.01)
def main():
global image_net, exit_signal, run_signal, detections
capture_thread = Thread(target=torch_thread,
kwargs={'weights': opt.weights, 'img_size': opt.img_size, "conf_thres": opt.conf_thres})
capture_thread.start()
print("Initializing Camera...")
zed = sl.Camera()
input_type = sl.InputType()
if opt.svo is not None:
input_type.set_from_svo_file(opt.svo)
# Create a InitParameters object and set configuration parameters
init_params = sl.InitParameters(input_t=input_type, svo_real_time_mode=True)
init_params.camera_resolution = sl.RESOLUTION.HD720
init_params.coordinate_units = sl.UNIT.METER
init_params.depth_mode = sl.DEPTH_MODE.ULTRA # QUALITY
init_params.coordinate_system = sl.COORDINATE_SYSTEM.RIGHT_HANDED_Y_UP
init_params.depth_maximum_distance = 50
runtime_params = sl.RuntimeParameters()
status = zed.open(init_params)
if status != sl.ERROR_CODE.SUCCESS:
print(repr(status))
exit()
image_left_tmp = sl.Mat()
print("Initialized Camera")
positional_tracking_parameters = sl.PositionalTrackingParameters()
# If the camera is static, uncomment the following line to have better performances and boxes sticked to the ground.
# positional_tracking_parameters.set_as_static = True
zed.enable_positional_tracking(positional_tracking_parameters)
obj_param = sl.ObjectDetectionParameters()
obj_param.detection_model = sl.DETECTION_MODEL.CUSTOM_BOX_OBJECTS
obj_param.enable_tracking = True
zed.enable_object_detection(obj_param)
objects = sl.Objects()
obj_runtime_param = sl.ObjectDetectionRuntimeParameters()
# Display
camera_infos = zed.get_camera_information()
# Create OpenGL viewer
viewer = gl.GLViewer()
point_cloud_res = sl.Resolution(min(camera_infos.camera_resolution.width, 720),
min(camera_infos.camera_resolution.height, 404))
point_cloud_render = sl.Mat()
viewer.init(camera_infos.camera_model, point_cloud_res, obj_param.enable_tracking)
point_cloud = sl.Mat(point_cloud_res.width, point_cloud_res.height, sl.MAT_TYPE.F32_C4, sl.MEM.CPU)
image_left = sl.Mat()
# Utilities for 2D display
display_resolution = sl.Resolution(min(camera_infos.camera_resolution.width, 1280),
min(camera_infos.camera_resolution.height, 720))
image_scale = [display_resolution.width / camera_infos.camera_resolution.width, display_resolution.height / camera_infos.camera_resolution.height]
image_left_ocv = np.full((display_resolution.height, display_resolution.width, 4), [245, 239, 239, 255], np.uint8)
# Utilities for tracks view
camera_config = zed.get_camera_information().camera_configuration
tracks_resolution = sl.Resolution(400, display_resolution.height)
track_view_generator = cv_viewer.TrackingViewer(tracks_resolution, camera_config.camera_fps,
init_params.depth_maximum_distance)
track_view_generator.set_camera_calibration(camera_config.calibration_parameters)
image_track_ocv = np.zeros((tracks_resolution.height, tracks_resolution.width, 4), np.uint8)
# Camera pose
cam_w_pose = sl.Pose()
while viewer.is_available() and not exit_signal:
if zed.grab(runtime_params) == sl.ERROR_CODE.SUCCESS:
# -- Get the image
lock.acquire()
zed.retrieve_image(image_left_tmp, sl.VIEW.LEFT)
image_net = image_left_tmp.get_data()
lock.release()
run_signal = True
# -- Detection running on the other thread
while run_signal:
sleep(0.001)
# Wait for detections
lock.acquire()
# -- Ingest detections
zed.ingest_custom_box_objects(detections)
lock.release()
zed.retrieve_objects(objects, obj_runtime_param)
# -- Display
# Retrieve display data
zed.retrieve_measure(point_cloud, sl.MEASURE.XYZRGBA, sl.MEM.CPU, point_cloud_res)
point_cloud.copy_to(point_cloud_render)
zed.retrieve_image(image_left, sl.VIEW.LEFT, sl.MEM.CPU, display_resolution)
zed.get_position(cam_w_pose, sl.REFERENCE_FRAME.WORLD)
# 3D rendering
viewer.updateData(point_cloud_render, objects)
# 2D rendering
np.copyto(image_left_ocv, image_left.get_data())
cv_viewer.render_2D(image_left_ocv, image_scale, objects, obj_param.enable_tracking)
global_image = cv2.hconcat([image_left_ocv, image_track_ocv])
# Tracking view
track_view_generator.generate_view(objects, cam_w_pose, image_track_ocv, objects.is_tracked)
cv2.imshow("ZED | 2D View and Birds View", global_image)
key = cv2.waitKey(10)
if key == 27:
exit_signal = True
else:
exit_signal = True
viewer.exit()
exit_signal = True
zed.close()
if name == ‘main’:
parser = argparse.ArgumentParser()
parser.add_argument(‘–weights’, nargs=‘+’, type=str, default=‘…/weights/best.pt’, help=‘model.pt path(s)’)
parser.add_argument(‘–svo’, type=str, default=None, help=‘optional svo file’)
parser.add_argument(‘–img_size’, type=int, default=416, help=‘inference size (pixels)’)
parser.add_argument(‘–conf_thres’, type=float, default=0.4, help=‘object confidence threshold’)
opt = parser.parse_args()
with torch.no_grad():
main()
But I am getting invalid svo error and list index out of range.
Intializing Network…
Initializing Camera…
INVALID SVO FILE
Fusing layers…
RepConv.fuse_repvgg_block
RepConv.fuse_repvgg_block
RepConv.fuse_repvgg_block
Exception in thread Thread-1 (torch_thread):
Traceback (most recent call last):
File “C:\Users\shirs\Documents\Pythonobjdetection\yolov7-gpu\utils\google_utils.py”, line 26, in attempt_download
assets = [x[‘name’] for x in response[‘assets’]] # release assets
KeyError: ‘assets’
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File “C:\Users\shirs\anaconda3\envs\yolov7-gpu-env\lib\threading.py”, line 1016, in _bootstrap_inner
self.run()
File “C:\Users\shirs\anaconda3\envs\yolov7-gpu-env\lib\threading.py”, line 953, in run
self._target(*self._args, **self._kwargs)
File “C:\Users\shirs\Documents\Pythonobjdetection\yolov7-gpu\zed44.py”, line 100, in torch_thread
model = attempt_load(weights, map_location=device) # load FP32
File “C:\Users\shirs\Documents\Pythonobjdetection\yolov7-gpu\models\experimental.py”, line 251, in attempt_load
attempt_download(w)
File “C:\Users\shirs\Documents\Pythonobjdetection\yolov7-gpu\utils\google_utils.py”, line 31, in attempt_download
tag = subprocess.check_output(‘git tag’, shell=True).decode().split()[-1]
IndexError: list index out of range
Pls can you give solution?