Hoshen-Kopelman Algorithmus zur Cluster Detektion
Verfasst: Montag 17. Juli 2023, 16:37
Hallo zusammen,
ich bin ziemlich neu in der Programmierszene und ziemlich überfordert
Ich möchte den Hoshen-Kopelman Algorithmus zur Cluster Detektion in einer Matrix anwenden und anschließend die "Größe" des größten Clusters bestimmen. Dafür habe folgenden Code dafür geschrieben (siehe unten). Allerdings sind einige Cluster, je nach Beispiel Matrix, falsch gelabelt (in meinem Code müsste z.B. 3 und 6 dasselbe Cluster sein). Könnte mir jemand bei der Fehlerbehebung helfen?
import numpy as np
import matplotlib.pyplot as plt
class UnionFind:
def __init__(self, max_labels):
self.labels = [0] * max_labels
self.labels[0] = 0
self.n_labels = max_labels
def find(self, x):
y = x
while self.labels[y] != y:
y = self.labels[y]
while self.labels[x] != x:
z = self.labels[x]
self.labels[x] = y
x = z
return y
def union(self, x, y):
self.labels[self.find(x)] = self.find(y)
return self.find(x)
def make_set(self):
self.labels[0] += 1
assert self.labels[0] < self.n_labels
self.labels[self.labels[0]] = self.labels[0]
return self.labels[0]
def hoshen_kopelman(matrix, m, n):
uf = UnionFind(m * n // 2)
cluster_sizes = {} # Dictionary zur Speicherung der Clustergrößen
for i in range(m):
for j in range(n):
if matrix[i][j]:
up = matrix[i - 1][j] if i > 0 else 0
left = matrix[i][j - 1] if j > 0 else 0
if up == 0 and left == 0:
matrix[i][j] = uf.make_set()
elif up == 0 or left == 0:
matrix[i][j] = max(up, left)
else:
matrix[i][j] = uf.union(up, left)
label = matrix[i][j]
cluster_sizes[label] = cluster_sizes.get(label, 0) + 1
largest_cluster_size = max(cluster_sizes.values())
matrix_size = m * n
largest_cluster_ratio = largest_cluster_size / matrix_size
return matrix, largest_cluster_ratio
matrix1 = np.array([[1, 0, 0, 1, 0, 1],
[1, 0, 1, 0, 0, 1],
[1, 0, 1, 1, 0, 1],
[0, 1, 0, 0, 1, 1],
[0, 1, 1, 0, 1, 1],
[1, 0, 1, 0, 1, 1]])
hkm, largest_cluster_ratio = hoshen_kopelman(matrix1, 6, 6)
fig, ax = plt.subplots()
min_val = 0
max_val = 6
m = 6
n = 6
ax.matshow(hkm, cmap=plt.cm.Blues)
for i in range(m):
for j in range(n):
c = hkm[i, j]
ax.text(j, i, str(c), va='center', ha='center')
plt.show()
print("size of the largest cluster(ratio to the matrix size):", largest_cluster_ratio)
ich bin ziemlich neu in der Programmierszene und ziemlich überfordert

import numpy as np
import matplotlib.pyplot as plt
class UnionFind:
def __init__(self, max_labels):
self.labels = [0] * max_labels
self.labels[0] = 0
self.n_labels = max_labels
def find(self, x):
y = x
while self.labels[y] != y:
y = self.labels[y]
while self.labels[x] != x:
z = self.labels[x]
self.labels[x] = y
x = z
return y
def union(self, x, y):
self.labels[self.find(x)] = self.find(y)
return self.find(x)
def make_set(self):
self.labels[0] += 1
assert self.labels[0] < self.n_labels
self.labels[self.labels[0]] = self.labels[0]
return self.labels[0]
def hoshen_kopelman(matrix, m, n):
uf = UnionFind(m * n // 2)
cluster_sizes = {} # Dictionary zur Speicherung der Clustergrößen
for i in range(m):
for j in range(n):
if matrix[i][j]:
up = matrix[i - 1][j] if i > 0 else 0
left = matrix[i][j - 1] if j > 0 else 0
if up == 0 and left == 0:
matrix[i][j] = uf.make_set()
elif up == 0 or left == 0:
matrix[i][j] = max(up, left)
else:
matrix[i][j] = uf.union(up, left)
label = matrix[i][j]
cluster_sizes[label] = cluster_sizes.get(label, 0) + 1
largest_cluster_size = max(cluster_sizes.values())
matrix_size = m * n
largest_cluster_ratio = largest_cluster_size / matrix_size
return matrix, largest_cluster_ratio
matrix1 = np.array([[1, 0, 0, 1, 0, 1],
[1, 0, 1, 0, 0, 1],
[1, 0, 1, 1, 0, 1],
[0, 1, 0, 0, 1, 1],
[0, 1, 1, 0, 1, 1],
[1, 0, 1, 0, 1, 1]])
hkm, largest_cluster_ratio = hoshen_kopelman(matrix1, 6, 6)
fig, ax = plt.subplots()
min_val = 0
max_val = 6
m = 6
n = 6
ax.matshow(hkm, cmap=plt.cm.Blues)
for i in range(m):
for j in range(n):
c = hkm[i, j]
ax.text(j, i, str(c), va='center', ha='center')
plt.show()
print("size of the largest cluster(ratio to the matrix size):", largest_cluster_ratio)