Object Detection & Object Tracking zusammenführen

Oltroi · Donnerstag 23. Mai 2019, 10:43

Hey,
ich bin gerade dabei eine Object_Detection zu programmieren, die stabil und zuverlässig nur Personen erkennt. Ich realisiere das ganze mit einer kleinen logitech web cam und einem nvidia jetson tx 1. Mein jetziger Stand ist, dass die Erkennung von Personen auf dem jetson tx1 läuft, ich diese aber noch durch object tracking unterstützen möchte. Das Object Tracking habe ich einfachheitshalber erstmal auf meinem Rechner zum Laufen gebracht. Sprich sie funktioniert an sich.
Mein Problem ist jetzt, dass ich es nicht hinbekomme: 1. Das Object Tracking auf dem Jetson Tx1 zum Laufen zu bringen und 2. nicht weiß wie ich das Object Tracking am besten in den Code von meiner Object_detection einbringe.
Ich würde euch einfach mal beide Module zeigen.

p.s. Ich bin neu im Forum, falls ich es schlecht erklärt oder wichtige Teile meines Projekts vergessen habe, kann ich diese noch nachträglich einfügen. VIELEN DANK!!

Object_detection.py

Code: Alles auswählen

## @package object detection

import tensorflow as tf
from models.research.object_detection.utils import label_map_util
from models.research.object_detection.utils import visualization_utils as vis_util
from models.research.object_detection.utils import ops as utils_ops
import os
import tarfile
import six.moves.urllib as urllib
import numpy as np
import cv2 as cv


## ObjectDetection Class
#
#  This class uses a tensorflow model to perform object detection on images.
#
#  Any model from
#  https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/detection_model_zoo.md
#  can be used
#  and is automatically downloaded if the correct model path is entered.
#
#  See main function for usage.
#  Some of the models were tested already on intersection images. faster_rcnn_resnet101_coco_2018_01_28 yielded good detections results at
#  100 ms cycle time using 1080Ti.
#  faster_rcnn_nas_coco_2018_01_28 yielded very good results, however, at a cycle time of 1 s.
class ObjectDetection:
    ## The constructor
    #
    #  @param self the object pointer
    #  @param name name of the model to be used (retrieve name from model zoo website)
    #  @param path_to_frozen_graph path where model is saved
    def __init__(self, model_name, path_to_frozen_graph):
        print("init")
        # What model to download.
        self.model_name = model_name
        self.MODEL_FILE = self.model_name + '.tar.gz'
        self.DOWNLOAD_BASE = 'http://download.tensorflow.org/models/object_detection/'

        # Path to frozen detection graph. This is the actual model that is used for the object detection.
        self.PATH_TO_FROZEN_GRAPH = path_to_frozen_graph + '/' + self.model_name + '/frozen_inference_graph.pb'

        # List of the strings that is used to add correct label for each box.
        self.PATH_TO_LABELS = path_to_frozen_graph + '/' + os.path.join('/models/research/object_detection/data',
                                           'mscoco_label_map.pbtxt')
        opener = urllib.request.URLopener()
        opener.retrieve(self.DOWNLOAD_BASE + self.MODEL_FILE, self.MODEL_FILE)
        tar_file = tarfile.open(self.MODEL_FILE)
        for file in tar_file.getmembers():
            file_name = os.path.basename(file.name)
            if 'frozen_inference_graph.pb' in file_name:
                tar_file.extract(file, os.getcwd())

        self.detection_graph = tf.Graph()
        with self.detection_graph.as_default():
            od_graph_def = tf.GraphDef()
            with tf.gfile.GFile(self.PATH_TO_FROZEN_GRAPH, 'rb') as fid:
                serialized_graph = fid.read()
                od_graph_def.ParseFromString(serialized_graph)
                tf.import_graph_def(od_graph_def, name='')

        self.category_index = label_map_util.create_category_index_from_labelmap(self.PATH_TO_LABELS,
                                                                                 use_display_name=True)
        # self.detection_graph.as_default()
        with self.detection_graph.as_default():
            config = tf.ConfigProto()
            config.gpu_options.allow_growth = True
            self.sess = tf.Session(config=config)
            ops = tf.get_default_graph().get_operations()
            all_tensor_names = {output.name for op in ops for output in op.outputs}
            self.tensor_dict = {}
            for key in [
                'num_detections', 'detection_boxes', 'detection_scores',
                'detection_classes', 'detection_masks'
            ]:
                tensor_name = key + ':0'
                if tensor_name in all_tensor_names:
                    self.tensor_dict[key] = tf.get_default_graph().get_tensor_by_name(
                        tensor_name)
            if 'detection_masks' in self.tensor_dict:
                # The following processing is only for single image
                detection_boxes = tf.squeeze(self.tensor_dict['detection_boxes'], [0])
                detection_masks = tf.squeeze(self.tensor_dict['detection_masks'], [0])
                # Reframe is required to translate mask from box coordinates to image coordinates and fit the image size.
                real_num_detection = tf.cast(self.tensor_dict['num_detections'][0], tf.int32)
                detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1])
                detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1])
                detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
                    detection_masks, detection_boxes, int(1920 * 0.5), int(1080 * 0.5))
                detection_masks_reframed = tf.cast(
                    tf.greater(detection_masks_reframed, 0.5), tf.uint8)
                # Follow the convention by adding back the batch dimension
                self.tensor_dict['detection_masks'] = tf.expand_dims(
                    detection_masks_reframed, 0)
            self.image_tensor = tf.get_default_graph().get_tensor_by_name('image_tensor:0')
        img = np.zeros((int(1920 * 0.5), int(1080 * 0.5), 3), dtype=np.uint8)
        self.sess.run(self.tensor_dict, feed_dict={self.image_tensor: np.expand_dims(img, 0)})

    ## loads image to numpy array
    #
    #  @param image image to load to numpy array
    def load_image_into_numpy_array(self, image):
        (im_width, im_height) = image.size
        return np.array(image.getdata()).reshape(
            (im_height, im_width, 3)).astype(np.uint8)

    ## creates output boxes for one image
    #
    #  @param image image to run inference on
    def run_inference_for_single_image(self, image):
        import time
        start = time.time()
        output_dict = self.sess.run(self.tensor_dict,
                                    feed_dict={self.image_tensor: np.expand_dims(image, 0)})
        end = time.time()
        print(end - start)

        # all outputs are float32 numpy arrays, so convert types as appropriate
        output_dict['num_detections'] = int(output_dict['num_detections'][0])
        output_dict['detection_classes'] = output_dict[
            'detection_classes'][0].astype(np.uint8)
        output_dict['detection_boxes'] = output_dict['detection_boxes'][0]
        output_dict['detection_scores'] = output_dict['detection_scores'][0]
        if 'detection_masks' in output_dict:
            output_dict['detection_masks'] = output_dict['detection_masks'][0]
        return output_dict

    ## draws boxes on images
    #
    #  @param image image to draw boxes on
    #  @param output_dict dict containing boxes, classes, and probabilities
    def visualize_boxes_and_labels(self, image, output_dict):
        # Visualization of the results of a detection.
        vis_util.visualize_boxes_and_labels_on_image_array(
            image,
            output_dict['detection_boxes'],
            output_dict['detection_classes'],
            output_dict['detection_scores'],
            self.category_index,
            instance_masks=output_dict.get('detection_masks'),
            use_normalized_coordinates=True,
            line_thickness=8,
            min_score_thresh=0.1)

        return image

    ## runs inference and draws on image
    #
    #   @param image image to run inference on and draw boxes
    def detect(self, image):
        output_dict = self.run_inference_for_single_image(image)
        # Visualization of the results of a detection.
        vis_util.visualize_boxes_and_labels_on_image_array(
            image,
            output_dict['detection_boxes'],
            output_dict['detection_classes'],
            output_dict['detection_scores'],
            self.category_index,
            instance_masks=output_dict.get('detection_masks'),
            use_normalized_coordinates=True,
            line_thickness=8)
        return image

## Filters prediction dict to contain only one class
#   @param output_dict: prediction from object detection class
#   @param class_id: the id of the object (see COCO documentation for more details)
#   @return reduced output_dict
def reduce_to_one_class(output_dict, class_id):
    indices = [i for i, x in enumerate(output_dict['detection_classes']) if x == class_id]
    return {'detection_classes': output_dict['detection_classes'][indices],
            'detection_boxes': output_dict['detection_boxes'][indices],
            'detection_scores': output_dict['detection_scores'][indices],
            'num_detections': len(indices)}


## keeps most likely object in dict
#   @param output_dict: prediction from object detection class
#   @return reduced output_dict
def get_most_likely_object(output_dict):
    indices = [['detection_scores'].index(max(['detection_scores']))]
    return {'detection_classes': output_dict['detection_classes'][indices],
            'detection_boxes': output_dict['detection_boxes'][indices],
            'detection_scores': output_dict['detection_scores'][indices],
            'num_detections': len(indices)}


## main function containing sample usage
#
def main():
    # COCO-trained models
    model_name = 'ssd_mobilenet_v1_coco_2018_01_28'

    path_to_frozen_graph = "/home/nvidia/dev/lecturecam/tf_detection"
    #path_to_frozen_graph = "home/nvidia/dev/EmbeddedObjectDetection"
    od = ObjectDetection(model_name=model_name, path_to_frozen_graph=path_to_frozen_graph)

    # init cam
    cap = cv.VideoCapture(1)

    while True:
        # Capture frame-by-frame
        _, img = cap.read()
        # resize img
        img = cv.resize(img, (int(1920 * 0.5), int(1080 * 0.5)))
        output_dict = od.run_inference_for_single_image(img)
        output_dict = reduce_to_one_class(output_dict, class_id=1)
        #output_dict = get_most_likely_object(output_dict)
        image = od.visualize_boxes_and_labels(img.copy(), output_dict)
        cv.imshow("output", image)
        cv.waitKey(1)


if __name__ == "__main__":
    main()

Object_tracking.py

Code: Alles auswählen

from __future__ import print_function
import sys
import cv2 as cv
from random import randint

trackerTypes = ['BOOSTING', 'MIL', 'KCF', 'TLD', 'MEDIANFLOW', 'GOTURN', 'MOSSE', 'CSRT']


def createTrackerByName(trackerType):
    # Create a tracker based on tracker name
    if trackerType == trackerTypes[0]:
        tracker = cv.TrackerBoosting_create()
    elif trackerType == trackerTypes[1]:
        tracker = cv.TrackerMIL_create()
    elif trackerType == trackerTypes[2]:
        tracker = cv.TrackerKCF_create()
    elif trackerType == trackerTypes[3]:
        tracker = cv.TrackerTLD_create()
    elif trackerType == trackerTypes[4]:
        tracker = cv.TrackerMedianFlow_create()
    elif trackerType == trackerTypes[5]:
        tracker = cv.TrackerGOTURN_create()
    elif trackerType == trackerTypes[6]:
        tracker = cv.TrackerMOSSE_create()
    elif trackerType == trackerTypes[7]:
        tracker = cv.TrackerCSRT_create()
    else:
        tracker = None
        print('Incorrect tracker name')
        print('Available trackers are:')
        for t in trackerTypes:
            print(t)

    return tracker

# Set video to load
videoPath = "/home/kav/Desktop/animate-ball-color.gif"

# Create a video capture object to read videos
cap = cv.VideoCapture(videoPath)

# Read first frame
success, frame = cap.read()
# quit if unable to read the video file
if not success:
    print('Failed to read video')
    sys.exit(1)


def main():
    tracker = cv.TrackerKCF_create()
    # init cam
    cap = cv.VideoCapture(-1)


    ## Select boxes
    bboxes = []
    colors = []

    # OpenCV's selectROI function doesn't work for selecting multiple objects in Python
    # So we will call this function in a loop till we are done selecting all objects

    while(True):
        # draw bounding boxes over objects
        # selectROI's default behaviour is to draw box starting from the center
        # when fromCenter is set to false, you can draw box starting from top left corner
        # Capture frame-by-frame
        success, frame = cap.read()
        bbox = cv.selectROI('MultiTracker', frame)
        bboxes.append(bbox)
        colors.append((randint(0, 255), randint(0, 255), randint(0, 255)))
        print("Press q to quit selecting boxes and start tracking")
        print("Hold q to stop and relax q to continue")
        print("Press ESC to quit")
        k = cv.waitKey(0) & 0xFF
        if (k == 113):  # q is pressed
            break

    print('Selected bounding boxes {}'.format(bboxes))

    # Specify the tracker type
    trackerType = "KCF"

    # Create MultiTracker object
    multiTracker = cv.MultiTracker_create()

    # Initialize MultiTracker
    for bbox in bboxes:
        multiTracker.add(createTrackerByName(trackerType), frame, bbox)

    # Process video and track objects
    while cap.isOpened():
        success, frame = cap.read()
        if not success:
            break

        # get updated location of objects in subsequent frames
        success, boxes = multiTracker.update(frame)

        # draw tracked objects
        for i, newbox in enumerate(boxes):
            p1 = (int(newbox[0]), int(newbox[1]))
            p2 = (int(newbox[0] + newbox[2]), int(newbox[1] + newbox[3]))
            cv.rectangle(frame, p1, p2, colors[i], 2, 1)

        # show frame
        cv.imshow('MultiTracker', frame)

        # quit on ESC button
        if cv.waitKey(1) & 0xFF == 27:  # Esc pressed
            break
    print('Finish')


    # # Create MultiTracker object
    # multiTracker = cv.MultiTracker_create()
    #
    # # Initialize MultiTracker
    # for bbox in bboxes:
    #     multiTracker.add(createTrackerByName(trackerType), frame, bbox)
    # while (True):
    #     _, frame = cap.read()
    #
    # print('Selected bounding boxes {}'.format(bboxes))

if __name__ == '__main__':
   main()