Hello everyone.
I’m trying to set my own model in custom detector api (python). I’ve already trained my net and it works well with images as inputs, however I’d like to use Zed2 camera to detect my custom objects.
I tried to manage with tutorial from stereolabs and also git samples, however finally I got stuck and can’t find solution, why api doesn’t work properly.
Detector worked well with original model and detected human, bag, etc. Now it doesn’t detect anything.
Can anyone help me what I made wrong and how can I properly define my own models for detection?
Here is my code:
# Import packages
from collections import namedtuple
import os
import cv2
import numpy as np
import tensorflow.compat.v1 as tf
import sys
import pyzed.sl as sl
import xml.etree.ElementTree as ET
def main():
# This is needed since the notebook is stored in the object_detection folder.
sys.path.append("..")
# Import utilites
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as vis_util
# Name of the directory containing the object detection module we're using
MODEL_NAME = 'inference_graph_3'
IMAGE_NAME = 'PC046572.JPG'
# Grab path to current working directory
CWD_PATH = os.getcwd()
# Path to frozen detection graph .pb file, which contains the model that is used
# for object detection.
PATH_TO_CKPT = os.path.join(CWD_PATH,MODEL_NAME,'frozen_inference_graph.pb')
# Path to label map file
PATH_TO_LABELS = os.path.join(CWD_PATH,'data','labelmap.pbtxt')
Detection = namedtuple("Detection", ["image_path", "gt", "pred"])
# Number of classes the object detector can identify
NUM_CLASSES = 1
# Load the label map.
# Label maps map indices to category names, so that when our convolution
# network predicts `5`, we know that this corresponds to `king`.
# Here we use internal utility functions, but anything that returns a
# dictionary mapping integers to appropriate string labels would be fine
label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
category_index = label_map_util.create_category_index(categories)
# Load the Tensorflow model into memory.
detection_graph = tf.Graph()
with detection_graph.as_default():
od_graph_def = tf.GraphDef()
with tf.io.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
serialized_graph = fid.read()
od_graph_def.ParseFromString(serialized_graph)
tf.import_graph_def(od_graph_def, name='')
sess = tf.Session(graph=detection_graph)
# Define input and output tensors (i.e. data) for the object detection classifier
# Input tensor is the image
image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
# Create a Camera object
zed = sl.Camera()
# Create a InitParameters object and set configuration parameters
init_params = sl.InitParameters()
init_params.camera_resolution = sl.RESOLUTION.HD720 # Use HD720 video mode
init_params.depth_mode = sl.DEPTH_MODE.PERFORMANCE
init_params.coordinate_units = sl.UNIT.METER
init_params.sdk_verbose = True
# Open the camera
err = zed.open(init_params)
if err != sl.ERROR_CODE.SUCCESS:
exit(1)
obj_param = sl.ObjectDetectionParameters()
obj_param.detection_model = sl.DETECTION_MODEL.CUSTOM_BOX_OBJECTS
obj_param.enable_tracking=True
obj_param.enable_mask_output=True
camera_infos = zed.get_camera_information()
if obj_param.enable_tracking :
positional_tracking_param = sl.PositionalTrackingParameters()
#positional_tracking_param.set_as_static = True
positional_tracking_param.set_floor_as_origin = True
zed.enable_positional_tracking(positional_tracking_param)
print("Object Detection: Loading Module...")
err = zed.enable_object_detection(obj_param)
if err != sl.ERROR_CODE.SUCCESS :
print (repr(err))
zed.close()
exit(1)
# Output tensors are the detection boxes, scores, and classes
# Each box represents a part of the image where a particular object was detected
detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
# Each score represents level of confidence for each of the objects.
# The score is shown on the result image, together with the class label.
detection_scores = detection_graph.get_tensor_by_name('detection_scores:0')
detection_classes = detection_graph.get_tensor_by_name('detection_classes:0')
# Number of objects detected
num_detections = detection_graph.get_tensor_by_name('num_detections:0')
objects = sl.Objects()
obj_runtime_param = sl.ObjectDetectionRuntimeParameters()
obj_runtime_param.detection_confidence_threshold = 40
while zed.grab() == sl.ERROR_CODE.SUCCESS:
objects_in = []
# The "detections" variable contains your custom 2D detections
for it in num_detections:
tmp = sl.CustomBoxObjectData()
# Fill the detections into the correct SDK format
tmp.unique_object_id = num_detections
tmp.probability = detection_scores
tmp.label = detection_classes
tmp.bounding_box_2d = detection_boxes
tmp.is_grounded = False
objects_in.append(tmp)
zed.ingest_custom_box_objects(objects_in)
for neighbor in root.iter('bndbox'):
xmin = int(neighbor.find('xmin').text)
ymin = int(neighbor.find('ymin').text)
xmax = int(neighbor.find('xmax').text)
ymax = int(neighbor.find('ymax').text)
annotations.append([xmin, ymin, xmax, ymax])
err = zed.retrieve_objects(objects, obj_runtime_param)
if objects.is_new :
obj_array = objects.object_list
print(str(len(obj_array))+" Object(s) detected\n")
if len(obj_array) > 0 :
first_object = obj_array[0]
print("First object attributes:")
print(" Label '"+repr(first_object.label)+"' (conf. "+str(int(first_object.confidence))+"/100)")
if obj_param.enable_tracking :
print(" Tracking ID: "+str(int(first_object.id))+" tracking state: "+repr(first_object.tracking_state)+" / "+repr(first_object.action_state))
position = first_object.position
velocity = first_object.velocity
dimensions = first_object.dimensions
print(" 3D position: [{0},{1},{2}]\n Velocity: [{3},{4},{5}]\n 3D dimentions: [{6},{7},{8}]".format(position[0],position[1],position[2],velocity[0],velocity[1],velocity[2],dimensions[0],dimensions[1],dimensions[2]))
if first_object.mask.is_init():
print(" 2D mask available")
print(" Bounding Box 2D ")
bounding_box_2d = first_object.bounding_box_2d
for it in bounding_box_2d :
print(" "+str(it),end='')
print("\n Bounding Box 3D ")
bounding_box = first_object.bounding_box
for it in bounding_box :
print(" "+str(it),end='')
input('\nPress enter to continue: ')
# Close the camera
zed.close()