ich bin gerade dabei eine Object_Detection zu programmieren, die stabil und zuverlässig nur Personen erkennt. Ich realisiere das ganze mit einer kleinen logitech web cam und einem nvidia jetson tx 1. Mein jetziger Stand ist, dass die Erkennung von Personen auf dem jetson tx1 läuft, ich diese aber noch durch object tracking unterstützen möchte. Das Object Tracking habe ich einfachheitshalber erstmal auf meinem Rechner zum Laufen gebracht. Sprich sie funktioniert an sich.
Mein Problem ist jetzt, dass ich es nicht hinbekomme: 1. Das Object Tracking auf dem Jetson Tx1 zum Laufen zu bringen und 2. nicht weiß wie ich das Object Tracking am besten in den Code von meiner Object_detection einbringe.
Ich würde euch einfach mal beide Module zeigen.
p.s. Ich bin neu im Forum, falls ich es schlecht erklärt oder wichtige Teile meines Projekts vergessen habe, kann ich diese noch nachträglich einfügen. VIELEN DANK!!
Object_detection.py
Code: Alles auswählen
## @package object detection
import tensorflow as tf
from models.research.object_detection.utils import label_map_util
from models.research.object_detection.utils import visualization_utils as vis_util
from models.research.object_detection.utils import ops as utils_ops
import os
import tarfile
import six.moves.urllib as urllib
import numpy as np
import cv2 as cv
## ObjectDetection Class
#
# This class uses a tensorflow model to perform object detection on images.
#
# Any model from
# https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/detection_model_zoo.md
# can be used
# and is automatically downloaded if the correct model path is entered.
#
# See main function for usage.
# Some of the models were tested already on intersection images. faster_rcnn_resnet101_coco_2018_01_28 yielded good detections results at
# 100 ms cycle time using 1080Ti.
# faster_rcnn_nas_coco_2018_01_28 yielded very good results, however, at a cycle time of 1 s.
class ObjectDetection:
## The constructor
#
# @param self the object pointer
# @param name name of the model to be used (retrieve name from model zoo website)
# @param path_to_frozen_graph path where model is saved
def __init__(self, model_name, path_to_frozen_graph):
print("init")
# What model to download.
self.model_name = model_name
self.MODEL_FILE = self.model_name + '.tar.gz'
self.DOWNLOAD_BASE = 'http://download.tensorflow.org/models/object_detection/'
# Path to frozen detection graph. This is the actual model that is used for the object detection.
self.PATH_TO_FROZEN_GRAPH = path_to_frozen_graph + '/' + self.model_name + '/frozen_inference_graph.pb'
# List of the strings that is used to add correct label for each box.
self.PATH_TO_LABELS = path_to_frozen_graph + '/' + os.path.join('/models/research/object_detection/data',
'mscoco_label_map.pbtxt')
opener = urllib.request.URLopener()
opener.retrieve(self.DOWNLOAD_BASE + self.MODEL_FILE, self.MODEL_FILE)
tar_file = tarfile.open(self.MODEL_FILE)
for file in tar_file.getmembers():
file_name = os.path.basename(file.name)
if 'frozen_inference_graph.pb' in file_name:
tar_file.extract(file, os.getcwd())
self.detection_graph = tf.Graph()
with self.detection_graph.as_default():
od_graph_def = tf.GraphDef()
with tf.gfile.GFile(self.PATH_TO_FROZEN_GRAPH, 'rb') as fid:
serialized_graph = fid.read()
od_graph_def.ParseFromString(serialized_graph)
tf.import_graph_def(od_graph_def, name='')
self.category_index = label_map_util.create_category_index_from_labelmap(self.PATH_TO_LABELS,
use_display_name=True)
# self.detection_graph.as_default()
with self.detection_graph.as_default():
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
self.sess = tf.Session(config=config)
ops = tf.get_default_graph().get_operations()
all_tensor_names = {output.name for op in ops for output in op.outputs}
self.tensor_dict = {}
for key in [
'num_detections', 'detection_boxes', 'detection_scores',
'detection_classes', 'detection_masks'
]:
tensor_name = key + ':0'
if tensor_name in all_tensor_names:
self.tensor_dict[key] = tf.get_default_graph().get_tensor_by_name(
tensor_name)
if 'detection_masks' in self.tensor_dict:
# The following processing is only for single image
detection_boxes = tf.squeeze(self.tensor_dict['detection_boxes'], [0])
detection_masks = tf.squeeze(self.tensor_dict['detection_masks'], [0])
# Reframe is required to translate mask from box coordinates to image coordinates and fit the image size.
real_num_detection = tf.cast(self.tensor_dict['num_detections'][0], tf.int32)
detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1])
detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1])
detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
detection_masks, detection_boxes, int(1920 * 0.5), int(1080 * 0.5))
detection_masks_reframed = tf.cast(
tf.greater(detection_masks_reframed, 0.5), tf.uint8)
# Follow the convention by adding back the batch dimension
self.tensor_dict['detection_masks'] = tf.expand_dims(
detection_masks_reframed, 0)
self.image_tensor = tf.get_default_graph().get_tensor_by_name('image_tensor:0')
img = np.zeros((int(1920 * 0.5), int(1080 * 0.5), 3), dtype=np.uint8)
self.sess.run(self.tensor_dict, feed_dict={self.image_tensor: np.expand_dims(img, 0)})
## loads image to numpy array
#
# @param image image to load to numpy array
def load_image_into_numpy_array(self, image):
(im_width, im_height) = image.size
return np.array(image.getdata()).reshape(
(im_height, im_width, 3)).astype(np.uint8)
## creates output boxes for one image
#
# @param image image to run inference on
def run_inference_for_single_image(self, image):
import time
start = time.time()
output_dict = self.sess.run(self.tensor_dict,
feed_dict={self.image_tensor: np.expand_dims(image, 0)})
end = time.time()
print(end - start)
# all outputs are float32 numpy arrays, so convert types as appropriate
output_dict['num_detections'] = int(output_dict['num_detections'][0])
output_dict['detection_classes'] = output_dict[
'detection_classes'][0].astype(np.uint8)
output_dict['detection_boxes'] = output_dict['detection_boxes'][0]
output_dict['detection_scores'] = output_dict['detection_scores'][0]
if 'detection_masks' in output_dict:
output_dict['detection_masks'] = output_dict['detection_masks'][0]
return output_dict
## draws boxes on images
#
# @param image image to draw boxes on
# @param output_dict dict containing boxes, classes, and probabilities
def visualize_boxes_and_labels(self, image, output_dict):
# Visualization of the results of a detection.
vis_util.visualize_boxes_and_labels_on_image_array(
image,
output_dict['detection_boxes'],
output_dict['detection_classes'],
output_dict['detection_scores'],
self.category_index,
instance_masks=output_dict.get('detection_masks'),
use_normalized_coordinates=True,
line_thickness=8,
min_score_thresh=0.1)
return image
## runs inference and draws on image
#
# @param image image to run inference on and draw boxes
def detect(self, image):
output_dict = self.run_inference_for_single_image(image)
# Visualization of the results of a detection.
vis_util.visualize_boxes_and_labels_on_image_array(
image,
output_dict['detection_boxes'],
output_dict['detection_classes'],
output_dict['detection_scores'],
self.category_index,
instance_masks=output_dict.get('detection_masks'),
use_normalized_coordinates=True,
line_thickness=8)
return image
## Filters prediction dict to contain only one class
# @param output_dict: prediction from object detection class
# @param class_id: the id of the object (see COCO documentation for more details)
# @return reduced output_dict
def reduce_to_one_class(output_dict, class_id):
indices = [i for i, x in enumerate(output_dict['detection_classes']) if x == class_id]
return {'detection_classes': output_dict['detection_classes'][indices],
'detection_boxes': output_dict['detection_boxes'][indices],
'detection_scores': output_dict['detection_scores'][indices],
'num_detections': len(indices)}
## keeps most likely object in dict
# @param output_dict: prediction from object detection class
# @return reduced output_dict
def get_most_likely_object(output_dict):
indices = [['detection_scores'].index(max(['detection_scores']))]
return {'detection_classes': output_dict['detection_classes'][indices],
'detection_boxes': output_dict['detection_boxes'][indices],
'detection_scores': output_dict['detection_scores'][indices],
'num_detections': len(indices)}
## main function containing sample usage
#
def main():
# COCO-trained models
model_name = 'ssd_mobilenet_v1_coco_2018_01_28'
path_to_frozen_graph = "/home/nvidia/dev/lecturecam/tf_detection"
#path_to_frozen_graph = "home/nvidia/dev/EmbeddedObjectDetection"
od = ObjectDetection(model_name=model_name, path_to_frozen_graph=path_to_frozen_graph)
# init cam
cap = cv.VideoCapture(1)
while True:
# Capture frame-by-frame
_, img = cap.read()
# resize img
img = cv.resize(img, (int(1920 * 0.5), int(1080 * 0.5)))
output_dict = od.run_inference_for_single_image(img)
output_dict = reduce_to_one_class(output_dict, class_id=1)
#output_dict = get_most_likely_object(output_dict)
image = od.visualize_boxes_and_labels(img.copy(), output_dict)
cv.imshow("output", image)
cv.waitKey(1)
if __name__ == "__main__":
main()
Object_tracking.py
Code: Alles auswählen
from __future__ import print_function
import sys
import cv2 as cv
from random import randint
trackerTypes = ['BOOSTING', 'MIL', 'KCF', 'TLD', 'MEDIANFLOW', 'GOTURN', 'MOSSE', 'CSRT']
def createTrackerByName(trackerType):
# Create a tracker based on tracker name
if trackerType == trackerTypes[0]:
tracker = cv.TrackerBoosting_create()
elif trackerType == trackerTypes[1]:
tracker = cv.TrackerMIL_create()
elif trackerType == trackerTypes[2]:
tracker = cv.TrackerKCF_create()
elif trackerType == trackerTypes[3]:
tracker = cv.TrackerTLD_create()
elif trackerType == trackerTypes[4]:
tracker = cv.TrackerMedianFlow_create()
elif trackerType == trackerTypes[5]:
tracker = cv.TrackerGOTURN_create()
elif trackerType == trackerTypes[6]:
tracker = cv.TrackerMOSSE_create()
elif trackerType == trackerTypes[7]:
tracker = cv.TrackerCSRT_create()
else:
tracker = None
print('Incorrect tracker name')
print('Available trackers are:')
for t in trackerTypes:
print(t)
return tracker
# Set video to load
videoPath = "/home/kav/Desktop/animate-ball-color.gif"
# Create a video capture object to read videos
cap = cv.VideoCapture(videoPath)
# Read first frame
success, frame = cap.read()
# quit if unable to read the video file
if not success:
print('Failed to read video')
sys.exit(1)
def main():
tracker = cv.TrackerKCF_create()
# init cam
cap = cv.VideoCapture(-1)
## Select boxes
bboxes = []
colors = []
# OpenCV's selectROI function doesn't work for selecting multiple objects in Python
# So we will call this function in a loop till we are done selecting all objects
while(True):
# draw bounding boxes over objects
# selectROI's default behaviour is to draw box starting from the center
# when fromCenter is set to false, you can draw box starting from top left corner
# Capture frame-by-frame
success, frame = cap.read()
bbox = cv.selectROI('MultiTracker', frame)
bboxes.append(bbox)
colors.append((randint(0, 255), randint(0, 255), randint(0, 255)))
print("Press q to quit selecting boxes and start tracking")
print("Hold q to stop and relax q to continue")
print("Press ESC to quit")
k = cv.waitKey(0) & 0xFF
if (k == 113): # q is pressed
break
print('Selected bounding boxes {}'.format(bboxes))
# Specify the tracker type
trackerType = "KCF"
# Create MultiTracker object
multiTracker = cv.MultiTracker_create()
# Initialize MultiTracker
for bbox in bboxes:
multiTracker.add(createTrackerByName(trackerType), frame, bbox)
# Process video and track objects
while cap.isOpened():
success, frame = cap.read()
if not success:
break
# get updated location of objects in subsequent frames
success, boxes = multiTracker.update(frame)
# draw tracked objects
for i, newbox in enumerate(boxes):
p1 = (int(newbox[0]), int(newbox[1]))
p2 = (int(newbox[0] + newbox[2]), int(newbox[1] + newbox[3]))
cv.rectangle(frame, p1, p2, colors[i], 2, 1)
# show frame
cv.imshow('MultiTracker', frame)
# quit on ESC button
if cv.waitKey(1) & 0xFF == 27: # Esc pressed
break
print('Finish')
# # Create MultiTracker object
# multiTracker = cv.MultiTracker_create()
#
# # Initialize MultiTracker
# for bbox in bboxes:
# multiTracker.add(createTrackerByName(trackerType), frame, bbox)
# while (True):
# _, frame = cap.read()
#
# print('Selected bounding boxes {}'.format(bboxes))
if __name__ == '__main__':
main()