@__deets__ ich werde mir das Paper heute abend mal genauer zu Gemüt führen danke
@pillmuncher super das schaue ich mir morgen mal genauer an hast du da mit deiner Lib vlt. ein kleines Beispiel parat um in Blöcke und Linien zu gruppieren ?
Lib zu supporten (open source), wenn dort dein Code hilfreich ist verlinke ich das auch sehr gerne
Mein Support besteht eher darin einen HOCR-Parser hinzuzufügen um PDF-Dateien mit Textlayer zu erzeugen (um die XML-Datei erzeugen zu können müssen die Koordinaten + Ergebnisse korrekt sein in Reihenfolge) sowie danach ein optimiertes Model zur Texterkennung und um Tabellen aus Dokumenten zu extrahieren.
Ich füge mal noch Testcode hinzu falls du Bock und Zeit hast gerne ein Beispiel mit deiner Lib (PS: das sind alles nur Tests
Code: Alles auswählen
import math
import itertools
from scipy.spatial import distance as dist
import cv2
import numpy as np
from PIL import Image
import copy
import quads
from doctr.models import ocr_predictor
from doctr.io import DocumentFile
image = cv2.imread('/home/felix/Desktop/Data/OCR_Data/1.jpg')
img = DocumentFile.from_images('/home/felix/Desktop/Data/OCR_Data/1.jpg')
model = ocr_predictor(det_arch='db_mobilenet_v3_large', reco_arch='crnn_vgg16_bn', pretrained=True)
result = model(img)
#print(result)
#result.show(img)
print(result.render())
#Generate two text boxes a larger one that covers them
def merge_boxes(box1, box2):
return [min(box1[0], box2[0]),
min(box1[1], box2[1]),
max(box1[2], box2[2]),
max(box1[3], box2[3])]
points = list()
box_centres = list()
max_coords = list()
left_upper_corners = list()
right_upper_corners = list()
normalized_boxes = list()
test_boxes = list()
center_points = list()
boxes = list()
h = None
w = None
for page in result.pages:
h, w = page.dimensions
for block in page.blocks:
for line in block.lines:
for word in line.words:
points.append(word.geometry)
xmin, ymin, xmax, ymax = [tupl for tuploftupls in word.geometry for tupl in tuploftupls]
xmin = int(xmin * w)
ymin = int(ymin * h)
xmax = int(xmax * w)
ymax = int(ymax * h)
boxes.append((ymin, ymax, xmin, xmax))
y_box_heigth = ymax - ymin
x_box_width = xmax - xmin
x_y_width_height = [xmin, ymin, xmax-xmin+1, ymax-ymin+1]
normalized_boxes.append(x_y_width_height)
# cv2.rectangle(image, (xmin, ymin), (xmax, ymax), (0, 255, 0), 2)
# print(xmin, ymin, xmax, ymax)
# print(word)
x, y = (xmin + xmax) // 2, (ymin + ymax) // 2
center_points.append((x, y))
test_boxes.append([[xmin,ymin], [xmax, ymax]])
# Draw a circle in the center of rectangle
# cv2.circle(img=image, center=(x, y), radius=3, color=(255, 0, 0), thickness=3)
left_upper_corner = (xmin, ymin)
right_upper_corner = (xmax, ymin)
# cv2.circle(image, center=left_upper_corner, radius=3, color=(0, 0, 255), thickness=3)
# cv2.circle(image, center=right_upper_corner, radius=3, color=(0, 0, 0), thickness=4)
# print(word)
# print(f"center: ({x}, {y})")
left_upper_corners.append(left_upper_corner)
right_upper_corners.append(right_upper_corner)
max_coords.append((xmax, ymax))
box_centres.append((x, y))
print(h)
print(w)
#print(normalized_boxes)
#print(boxes)
#print(center_points)
#print(result.render())
#zipped_list = zip(center_points, boxes)
#sorted_boxes = sorted(zipped_list, key=min([x[1] for x in zipped_list]))
#print(sorted_boxes)
#boxes = [x[1] for x in sorted_boxes]
img = 255 * np.ones((h,w,3), np.uint8)
x, y = w // 2, h // 2 # center of image
boxes = sorted(boxes , key=lambda k: (k[1] * w, k[0]))
i = 0
for ymin, ymax, xmin, xmax in boxes:
i += 1
if i == 80:
break
cv2.rectangle(image, (xmin, ymin), (xmax, ymax), (0, 255, 0), 2)
cv2.imwrite('x.jpg', image)
center_points = sorted(center_points , key=lambda k: (k[1], k[0]))
#center_points = sorted(center_points, key=lambda x: x[0] + x[1] * w)
i = 0
for center, left_upper, right_upper in zip(center_points, left_upper_corners, right_upper_corners):
if i == len(center_points)-1:
break
cv2.circle(img, center=center, radius=3, color=(0, 0, 255), thickness=3)
# cv2.circle(img, center=left_upper, radius=3, color=(0, 255, 0), thickness=3)
# cv2.circle(img, center=right_upper, radius=3, color=(255, 0, 0), thickness=3)
i += 1
cv2.imwrite('y.jpg', img)
def overlap(source, target):
# unpack points
tl1, br1 = source
tl2, br2 = target
# checks
if (tl1[0] >= br2[0] or tl2[0] >= br1[0]):
return False
if (tl1[1] >= br2[1] or tl2[1] >= br1[1]):
return False
return True
# returns all overlapping boxes
def getAllOverlaps(boxes, bounds, index):
overlaps = []
for a in range(len(boxes)):
if a != index:
if overlap(bounds, boxes[a]):
overlaps.append(a)
return overlaps
def tup(point):
return (point[0], point[1])
# go through the boxes and start merging
boxes = test_boxes
index_list = list()
merge_margin = 15
# this is gonna take a long time
finished = False
#highlight = [[0,0], [1,1]]
#points = [[[0,0]]]
while not finished:
# set end con
finished = True
# check progress
print("Len Boxes: " + str(len(boxes)))
# loop through boxes
index = 0
while index < len(boxes)-1:
# grab current box
# TODO: rewrite this into function and track merged boxes by index -> also possible if we grow only left to right ?
# TODO: possible to use also for lines -> grow only left to right : two modus -> "blocks" and "lines"
# TODO: or compute distance from all boxes inside a block (permutations) and then sort them !??
curr = boxes[index]
# add margin
tl, br = curr[0][:], curr[1][:]
tl[0] -= merge_margin
tl[1] -= merge_margin
br[0] += merge_margin
br[1] += merge_margin
# get matching boxes
overlaps = getAllOverlaps(boxes, [tl, br], index)
print(overlaps) # TODO: <----- thats the single box indexes !!!!
# check if empty
if overlaps:
# combine boxes
# convert to a contour
con = []
overlaps.append(index)
for ind in overlaps:
tl, br = boxes[ind]
con.append([tl])
con.append([br])
con = np.array(con)
# get bounding rect
x,y,w,h = cv2.boundingRect(con)
# stop growing
merged = [[x,y], [x+w-1, y+h-1]]
# highlights
#highlight = merged[:]
#print(highlight)
#points = con
#print(points)
# remove boxes from list
overlaps.sort(reverse = True)
print(overlaps)
for ind in overlaps:
del boxes[ind]
boxes.append(merged)
# set flag
finished = False
break
index += 1
for box in boxes:
cv2.rectangle(img, tup(box[0]), tup(box[1]), (0,200,0), 1)
print(boxes)
cv2.imwrite('z.jpg', img)
"""
def calculate_ratio_to_image(h, w, distance):
return {'height_ratio': (distance * 100) / int(h), 'width_ratio': (distance * 100) / int(w) }
line_indexes = list()
temp_list = list()
for idx, corners in enumerate(left_right_upper_corners):
if idx == len(left_right_upper_corners)-1:
break
cv2.line(image, left_right_upper_corners[idx][1], left_right_upper_corners[idx+1][0], (100, 100, 100), thickness=3, lineType=8)
max_value_distance = math.sqrt(((left_right_upper_corners[idx][1][0] - left_right_upper_corners[idx+1][0][0]) ** 2) + ((left_right_upper_corners[idx][1][1] - left_right_upper_corners[idx+1][0][1]) ** 2))
# print(max_value_distance)
ratio_to_image = calculate_ratio_to_image(h, w, max_value_distance)
# if ratio_to_image < 12 and idx not in temp_list and idx+1 not in temp_list:
# temp_list.append(idx)
# temp_list.append(idx+1)
# else:
cv2.imwrite('x.jpg', image)
"""
"""
def compute_box_distances(box_points):
combined_elements = list()
for idx, box_point in enumerate(box_points):
if idx == len(box_points)-1:
break
max_value_distance = math.sqrt(((int(box_points[idx][0])-int(box_points[idx+1][0]))**2)+((int(box_points[idx][1])-int(box_points[idx+1][1]))**2))
ratio_to_image = calculate_ratio_to_image(h, w, max_value_distance)
return None
lines = list()
for idx, max_coord in enumerate(max_coords):
if idx == len(max_coords)-1:
break
max_value_distance = math.sqrt(((int(max_coords[idx][0])-int(max_coords[idx+1][0]))**2)+((int(max_coords[idx][1])-int(max_coords[idx+1][1]))**2))
ratio_to_image = calculate_ratio_to_image(h, w, max_value_distance)
# print(max_value_distance)
# print(ratio_to_image)
blocks = list()
for idx, center in enumerate(box_centres):
if idx == len(box_centres)-1:
break
midpoint_distance = math.sqrt(((int(box_centres[idx][0])-int(box_centres[idx+1][0]))**2)+((int(box_centres[idx][1])-int(box_centres[idx+1][1]))**2))
ratio_to_image = calculate_ratio_to_image(h, w, midpoint_distance)
# print(midpoint_distance)
# print(ratio_to_image)
#print('UP')
#print(box_centres)
#print(len(box_centres))
#permutations = calculate_perm(box_centres)
#print(len(permutations))
#D = dist.euclidean((xA, yA), (xB, yB)) / refObj[2]
#(mX, mY) = midpoint((xA, yA), (xB, yB))
#for perm in permutations:
# dist = calculate_centr_distances(perm[0], perm[1])
#print(dist)
#print(points[0])
cv2.imwrite('x.jpg', image)
def organize_elements(result):
pass
"""