die Größe des Dokumentes ist natütrlich von der Auflösung der Kamera abhänging, doch Teilbereiche gehen auch bei schlechter Auflösung. Der markierte Text wird in der Zwischenablage gespeichert.
Code: Alles auswählen
#! /usr/bin/env python
# -*- coding: utf-8
import tkinter as tk
import cv2
from PIL import Image, ImageTk
from functools import partial
import pyocr
WIDTH = 640
HEIGHT = 480
DEFAULT_CAM_ID = -1
class OCRCam(object):
PROPID_WIDTH = 3
PROPID_HEIGHT = 4
def __init__(self, cam_id = DEFAULT_CAM_ID):
self.cam = cv2.VideoCapture(cam_id)
if not self.cam.isOpened():
raise RuntimeError("can not open camera {0!r}".format(
cam_id))
self.width = int(self.cam.get(self.PROPID_WIDTH))
self.height = int(self.cam.get(self.PROPID_HEIGHT))
@property
def size(self):
return self.width, self.height
def __enter__(self):
return self
def __exit__(self, *args):
self.release()
def get_text(self, lang = "deu"):
return pyocr.tesseract.image_to_string(
Image.frombytes("RGB",
self.size,
self.get_image(),
"raw",
"BGR"),
lang=lang,
builder=pyocr.builders.TextBuilder())
def get_image(self):
state, frame = self.cam.read()
if not state:
raise RuntimeError("could not read image")
else:
return frame
def release(self):
self.cam.release()
class OCRCamUI(tk.Frame):
def __init__(self, parent, ocr_cam, width, height,
update_interval = 100):
tk.Frame.__init__(self, parent)
self.parent = parent
self.ocr_cam = ocr_cam
self.width = width
self.height = height
self.update_interval = update_interval
self.after_id = None
self.tk_image = None
self.image_label = tk.Label(self)
self.image_label.pack()
def run(self):
try:
tk_image = Image.frombytes("RGB", self.ocr_cam.size,
self.ocr_cam.get_image(), "raw", "BGR").resize(
(self.width, self.height))
except RuntimeError:
self.raise_cam_id_error()
return
self.tk_image = ImageTk.PhotoImage(tk_image)
self.image_label.config(image = self.tk_image)
self.after_id = self.after(self.update_interval, self.run)
def get_text(self):
top = tk.Toplevel()
top.title("OCR SCAN")
text_box=tk.Text(top)
text_box.pack()
text_box.insert(tk.END, self.ocr_cam.get_text())
text_box.tag_config("sel", background="skyblue")
text_box.bind("<ButtonRelease-1>", partial(self.paste_to_clipboard,
top,
text_box))
def paste_to_clipboard(self, top, text_box, event):
top.clipboard_clear()
try:
self.parent.clipboard_append(text_box.get(tk.SEL_FIRST,
tk.SEL_LAST))
except tk.TclError:
top.title("OCR SCAN --> NO TEXT SELECTED")
else:
top.title("OCR SCAN--> TEXT COPIED TO CLIPBOARD")
def release(self):
self.after_cancel(self.after_id)
self.parent.destroy()
def main():
root = tk.Tk()
root.title("OCR CAM")
root.resizable(0, 0)
try:
with OCRCam() as ocr_cam:
ocr_cam_ui = OCRCamUI(root, ocr_cam, WIDTH, HEIGHT)
ocr_cam_ui.pack()
ocr_cam_ui.run()
tk.Button(text = "SCAN", command = ocr_cam_ui.get_text).pack()
root.protocol("WM_DELETE_WINDOW", ocr_cam_ui.release)
root.mainloop()
except RuntimeError:
tk.Label(root, text = "can not open camera {0!r}".format(
DEFAULT_CAM_ID), font = "Arial 20", height = 10).pack()
root.mainloop()
if __name__ == "__main__":
main()