OCR mit der USB-Cam und tkinter

Stellt hier eure Projekte vor.
Internetseiten, Skripte, und alles andere bzgl. Python.
Antworten
Benutzeravatar
kaytec
User
Beiträge: 608
Registriert: Dienstag 13. Februar 2007, 21:57

Hallo,

die Größe des Dokumentes ist natütrlich von der Auflösung der Kamera abhänging, doch Teilbereiche gehen auch bei schlechter Auflösung. Der markierte Text wird in der Zwischenablage gespeichert.

Code: Alles auswählen

#! /usr/bin/env python
# -*- coding: utf-8

import tkinter as tk

import cv2
from PIL import Image, ImageTk
from functools import partial
import pyocr

WIDTH = 640
HEIGHT = 480
DEFAULT_CAM_ID = -1

class OCRCam(object):

    PROPID_WIDTH = 3
    PROPID_HEIGHT = 4
    
    def __init__(self, cam_id = DEFAULT_CAM_ID):
        self.cam = cv2.VideoCapture(cam_id)
        if not self.cam.isOpened():
           raise RuntimeError("can not open camera {0!r}".format(
                cam_id))
                
        self.width = int(self.cam.get(self.PROPID_WIDTH))
        self.height = int(self.cam.get(self.PROPID_HEIGHT))
            
    @property
    def size(self):
        return self.width, self.height
                
    def __enter__(self):
        return self

    def __exit__(self, *args):
        self.release()
        
    def get_text(self, lang = "deu"):
        return pyocr.tesseract.image_to_string(
                Image.frombytes("RGB", 
                                self.size,
                                self.get_image(), 
                                "raw", 
                                "BGR"), 
                                lang=lang,
                                builder=pyocr.builders.TextBuilder())
                                
    def get_image(self):
        state, frame = self.cam.read()

        if not state:
            raise RuntimeError("could not read image")
        else:
            return frame
            
    def release(self):
        self.cam.release()

        
class OCRCamUI(tk.Frame):
    def __init__(self, parent, ocr_cam, width, height, 
        update_interval = 100):
        tk.Frame.__init__(self, parent)
        self.parent = parent
        self.ocr_cam = ocr_cam
        self.width = width
        self.height = height
        self.update_interval = update_interval
        self.after_id = None
        self.tk_image = None
        self.image_label = tk.Label(self)
        self.image_label.pack()
        
    def run(self):
        try:
            tk_image = Image.frombytes("RGB",  self.ocr_cam.size,
                self.ocr_cam.get_image(), "raw", "BGR").resize(
                (self.width, self.height))
        except RuntimeError:
            self.raise_cam_id_error()
            return
        self.tk_image = ImageTk.PhotoImage(tk_image)
        self.image_label.config(image = self.tk_image)
        self.after_id = self.after(self.update_interval, self.run)
        
    def get_text(self):
        top = tk.Toplevel()
        top.title("OCR SCAN")
        text_box=tk.Text(top)
        text_box.pack()
        text_box.insert(tk.END, self.ocr_cam.get_text())
        text_box.tag_config("sel", background="skyblue")
        text_box.bind("<ButtonRelease-1>", partial(self.paste_to_clipboard, 
                                                   top, 
                                                   text_box))
        
    def paste_to_clipboard(self, top, text_box, event):
        top.clipboard_clear()
        try:
            self.parent.clipboard_append(text_box.get(tk.SEL_FIRST, 
                                                      tk.SEL_LAST))
        except tk.TclError:
            top.title("OCR SCAN --> NO TEXT SELECTED")
        else:
            top.title("OCR SCAN--> TEXT COPIED TO CLIPBOARD")
        
    def release(self):
        self.after_cancel(self.after_id)
        self.parent.destroy()
        
        
def main():
    root = tk.Tk()
    root.title("OCR CAM")
    root.resizable(0, 0)

    try:
        with OCRCam() as ocr_cam:
            ocr_cam_ui = OCRCamUI(root, ocr_cam, WIDTH, HEIGHT)
            ocr_cam_ui.pack()
            ocr_cam_ui.run()
            tk.Button(text = "SCAN", command = ocr_cam_ui.get_text).pack()
            root.protocol("WM_DELETE_WINDOW", ocr_cam_ui.release)
            root.mainloop()
    except RuntimeError:
        tk.Label(root, text = "can not open camera {0!r}".format(
                DEFAULT_CAM_ID), font = "Arial 20", height = 10).pack()
        root.mainloop()

if __name__ == "__main__":
    main()
Gruß Frank
Benutzeravatar
__blackjack__
User
Beiträge: 13134
Registriert: Samstag 2. Juni 2018, 10:21
Wohnort: 127.0.0.1
Kontaktdaten:

Da fehlt eine `raise_cam_id_error()`-Methode. Und `RuntimeError` zu behandeln ist ziemlich grosszügig. Wobei `RuntimeError` auszulösen etwas ”schwammig” ist. Ich würde dafür eine eigene Ausnahme definieren.
“There will always be things we wish to say in our programs that in all known languages can only be said poorly.” — Alan J. Perlis
Benutzeravatar
kaytec
User
Beiträge: 608
Registriert: Dienstag 13. Februar 2007, 21:57

Danke _blackjack_ !

Code: Alles auswählen

#! /usr/bin/env python
# -*- coding: utf-8

import tkinter as tk

import cv2
from PIL import Image, ImageTk
from functools import partial
import pyocr

WIDTH = 640
HEIGHT = 480
DEFAULT_CAM_ID = -1


class OCRCam(object):

    PROPID_WIDTH = 3
    PROPID_HEIGHT = 4
    
    def __init__(self, cam_id = DEFAULT_CAM_ID):
        self.cam = cv2.VideoCapture(cam_id)
        if not self.cam.isOpened():
           raise RuntimeError("can not open camera {0!r}".format(
                cam_id))
                
        self.width = int(self.cam.get(self.PROPID_WIDTH))
        self.height = int(self.cam.get(self.PROPID_HEIGHT))
            
    @property
    def size(self):
        return self.width, self.height
                
    def __enter__(self):
        return self

    def __exit__(self, *args):
        self.release()
        
    def get_text(self, lang = "deu"):
        return pyocr.tesseract.image_to_string(
                Image.frombytes("RGB", 
                                self.size,
                                self.get_image(), 
                                "raw", 
                                "BGR"), 
                                lang=lang,
                                builder=pyocr.builders.TextBuilder())
                                
    def get_image(self):
        state, frame = self.cam.read()

        if not state:
            raise RuntimeError("could not read image")
        else:
            return frame
            
    def release(self):

        self.cam.release()

        
class OCRCamUI(tk.Frame):
    def __init__(self, parent, ocr_cam, width, height, 
        update_interval = 100):
        tk.Frame.__init__(self, parent)
        self.parent = parent
        self.ocr_cam = ocr_cam
        self.width = width
        self.height = height
        self.update_interval = update_interval
        self.after_id = None
        self.tk_image = None
        self.image_label = tk.Label(self)
        self.image_label.pack()
        
    def run(self):
        try:
            tk_image = Image.frombytes("RGB",  self.ocr_cam.size,
                self.ocr_cam.get_image(), "raw", "BGR").resize(
                (self.width, self.height))
        except RuntimeError:
            self.raise_cam_id_error()
            return
        self.tk_image = ImageTk.PhotoImage(tk_image)
        self.image_label.config(image = self.tk_image)
        self.after_id = self.after(self.update_interval, self.run)
        
    def get_text(self):
        top = tk.Toplevel()
        top.title("OCR SCAN")
        text_box=tk.Text(top)
        text_box.pack()
        try:
            text_box.insert(tk.END, self.ocr_cam.get_text())
        except RuntimeError as e:
            print(e)
            text_box.insert(tk.END, "NO CAM")
        text_box.tag_config("sel", background="skyblue")
        text_box.bind("<ButtonRelease-1>", partial(self.paste_to_clipboard, 
                                                   top, 
                                                   text_box))
        
    def paste_to_clipboard(self, top, text_box, event):
        top.clipboard_clear()
        try:
            self.parent.clipboard_append(text_box.get(tk.SEL_FIRST, 
                                                      tk.SEL_LAST))
        except tk.TclError:
            top.title("OCR SCAN --> NO TEXT SELECTED")
        else:
            top.title("OCR SCAN--> TEXT COPIED TO CLIPBOARD")
            
    def raise_cam_id_error(self):
        self.image_label.config(image="", text='> NO CAM <', font='Arial 60')
        
    def release(self):
        self.after_cancel(self.after_id)
        self.parent.destroy()
        
def main():
    root = tk.Tk()
    root.title("OCR CAM")
    root.resizable(0, 0)

    try:
        with OCRCam() as ocr_cam:
            ocr_cam_ui = OCRCamUI(root, ocr_cam, WIDTH, HEIGHT)
            ocr_cam_ui.pack()
            ocr_cam_ui.run()
            tk.Button(text = "SCAN", command = ocr_cam_ui.get_text).pack()
            root.protocol("WM_DELETE_WINDOW", ocr_cam_ui.release)
            root.mainloop()
    except RuntimeError:
        tk.Label(root, text = "can not open camera {0!r}".format(
                DEFAULT_CAM_ID), font = "Arial 20", height = 10).pack()
        root.mainloop()

if __name__ == "__main__":
    main()
Gruß Frank
Antworten