ValueError: array must not contain infs or NaNs
Den Datensatz findet ihr hier: https://www.kaggle.com/datasets/varpit9 ... ings-stats
Code: Alles auswählen
from scipy import stats
import numpy as np
import pandas as pd
from pingouin import multivariate_normality
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.stats import multivariate_normal
rng = np.random.default_rng()
pts = 1000
a = rng.normal(0, 1, size=pts)
b = rng.normal(2, 1, size=pts)
df = pd.read_csv("Football teams.csv")
data = df.drop(['Team','Tournament'], axis=1)
multivariate_normality(data, alpha=.5)
df.head()
# In[25]:
sns.pairplot(data, hue="Rating")
# In[ ]:
# In[87]:
data['Rating'] = np.where(df["Rating"] <= 6.6, 0, 1)
sum(data['Rating'] == 0)
# In[88]:
sns.pairplot(data, hue="Rating")
# In[108]:
K = 2 # number of clusters
means = np.zeros((K,8))
covs = np.zeros((K,8,8))
for k in range(K):
means[k] = np.random.normal(size=(8,))
covs[k] = np.eye(8)
weights = np.ones((K,1))/K
print("Initial mean vectors (one per row):\n" + str(means))
print(data.shape)
# In[107]:
r = np.zeros((K,98)) # will store the responsibilities
X, Y = np.meshgrid(np.linspace(-10,10,100), np.linspace(-10,10,100))
N=98
for em_iter in range(100):
means_old = means.copy()
# E-step: update responsibilities
for k in range(K):
r[k] = weights[k]*multivariate_normal.pdf(mean=means[k,:], cov=covs[k,:,:], x=data)
r = r/np.sum(r, axis=0)
# M-step
N_k = np.sum(r, axis=1)
for k in range(K):
# update means
means[k] = np.sum(r[k]*data.T, axis=1)/N_k[k]
# update covariances
diff = data.T - means[k:k+1].T
_tmp = np.sqrt(r[k:k+1])*diff
covs[k] = np.inner(_tmp, _tmp)/N_k[k]
# weights
weights = N_k/N
# log-likelihood
gmm_nll = 0
for k in range(K):
gmm_nll += weights[k]*multivariate_normal.pdf(mean=means[k,:].ravel(), cov=covs[k,:,:], x=data)
NLL += [-np.sum(np.log(gmm_nll))]
plt.figure()
plt.plot(x, y, 'ko', alpha=0.3)
plt.plot(means[:,0], means[:,1], 'oy', markersize=25)
for k in range(K):
rv = multivariate_normal(means[k,:], covs[k])
plt.contour(X, Y, rv.pdf(pos), alpha = 1.0, zorder=10)
plt.xlabel("$x_1$")
plt.ylabel("$x_2$")
plt.text(x=3.5, y=8, s="EM iteration "+str(em_iter+1))
if la.norm(NLL[em_iter+1]-NLL[em_iter]) < 1e-6:
print("Converged after iteration ", em_iter+1)
break
# plot final the mixture model
plt.figure()
gmm = 0
for k in range(3):
mix_comp = multivariate_normal(means[k,:].ravel(), covs[k,:,:])
gmm += weights[k]*mix_comp.pdf(pos)
plt.plot(data['Goals'], data['yellow_cards'], 'ko', alpha=0.3)
plt.contour(X, Y, gmm, alpha = 1.0, zorder=10)
plt.xlim([-8,8]);
plt.ylim([-6,6]);
# In[ ]: