
Code: Alles auswählen
#!/usr/bin/env python
# coding: utf-8
# In[6]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_blobs
# In[12]:
data = make_blobs(n_samples=200, n_features=2,
centers=4, cluster_std=1.8,random_state=101)
plt.scatter(data[0][:,0],data[0][:,1],c=data[1],cmap='rainbow')
feature,cluster = data
# In[30]:
indices = np.random.choice(len(feature),k)
initial = np.copy(feature[initialmeans])
point_classification = np.full(len(feature),-1)
# In[57]:
def kmeans(data, k, centroids):
alldistances = np.array([np.linalg.norm(data - centroid, axis=1)
for centroid in centroids])
assert alldistances.shape == (k, len(data))
point_classification = np.argmin(alldistances, axis=0)
new_centroids = np.array([np.mean(data[point_classification == pointclass],
axis=0)
for pointclass in np.arange(k)])
return point_classification, new_centroids
point_classification, new_centroids = kmeans(feature, 4, initial)
# Ergebnisse betrachten:
fig, ax = plt.subplots(figsize=(10,10))
ax.set_xlim(np.min(feature[:,0])-0.2, np.max(feature[:,0])+0.2)