Ich habe ein Problem und zwar klassifiziert k means nicht korrekt die Daten, meine Vermutung liegt wahrscheinlich an der kmeans methode.
Leider weiß ich nicht mehr weiter.
Mfg
Code: Alles auswählen
#!/usr/bin/env python
# coding: utf-8
# In[1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_blobs
# In[18]:
data = make_blobs(n_samples=200, n_features=2,
centers=4, cluster_std=1.8,random_state=101)
plt.scatter(data[0][:,0],data[0][:,1],c=data[1],cmap='rainbow')
feature,cluster = data
# In[19]:
k = 4
random_indices = np.random.choice(len(feature), k)
initial_cs = feature[random_indices]
cs = np.copy(initial_cs)
# In[20]:
def kmeans(data, k, centroids):
alldistances = np.array([np.linalg.norm(data - centroid, axis=1)
for centroid in centroids])
assert alldistances.shape == (k, len(data))
point_classification = np.argmin(alldistances, axis=0)
new_centroids = np.array([np.mean(data[point_classification == pointclass],
axis=0)
for pointclass in np.arange(k)])
return point_classification, new_centroids
point_classification, new_centroids = kmeans(feature, 4, initial)
# Ergebnisse betrachten:
fig, ax = plt.subplots(figsize=(10,10))
ax.set_xlim(np.min(feature[:,0])-0.2, np.max(feature[:,0])+0.2)
ax.set_ylim(np.min(feature[:,1])-0.2, np.max(feature[:,1])+0.2)
ax.scatter(feature[:,0], feature[:,1] , c =point_classification)
ax.plot(new_centroids[:,0],new_centroids[:,1],'+', c = 'black',markersize = 20)