Seite 1 von 1

Regression

Verfasst: Freitag 31. Juli 2020, 10:35
von patite
Hallo
Ich arbeite mit dem Dataset Boston housing
X, y = load_boston(return_X_y=True)
Hier ist mein code:

Code: Alles auswählen

# 5-fold cross-validation
k_folds = KFold(n_splits=5, random_state=10, shuffle=True)        
    
# performance metrics

def mse_score(y_true, y_pred):
    return np.mean( (y_true - y_pred) ** 2.0)    

def rmse_score(y_true, y_pred):
    return np.sqrt(mse_score(y_true, y_pred))

def mad_score(y_true, y_pred):
    return np.mean( np.abs(y_true - y_pred) )

def pr2_score(y_true, y_pred):                       
    return np.corrcoef(y_true, y_pred)[0, 1] ** 2.0

perf_metrics = {"MSE": mse_score, 
                "RMSE": rmse_score, 
                "Pseudo-R2": pr2_score,
                "MAD": mad_score
                }

# pre-instantiation
df_metrics = pd.DataFrame(index=[0], columns=["Fold", "Metric", "Train", "Test"])

# main loop
k, f = 0, 0
for (train, test) in k_folds.split(X):
    f += 1
   # separate variables and folds
    x_train = X.values[train],    
    x_test = X.values[test]
      
    y_train = y.values[train]
    y_test = y.values[test]
    
    # fit model
    lr = LinearRegression().fit(x_train, y_train)    
    y_train_pred = ml.predict(x_train)
    y_test_pred = ml.predict(x_test)

    # compute metrics
    for pf in list(perf_metrics.keys()):
        df_metrics.loc[k, "Fold"] = f
        df_metrics.loc[k, "Metric"] = pf
       
        df_metrics.loc[k, "Train"] = perf_metrics[pf](y_train.ravel(),y_train_pred.ravel())
        df_metrics.loc[k, "Test"] = perf_metrics[pf](y_test.ravel(), y_test_pred.ravel())
        k += 1
        
# final organization
df_metrics = df_metrics.apply(pd.to_numeric, errors="ignore");
Ich bekomme folgende Fehlermeldung. Weisst jemand wo das Problem liegt?
ValueError Traceback (most recent call last)
<ipython-input-47-3aad90aea50c> in <module>
27
28 df_metrics.loc[k, "Train"] = perf_metrics[pf](y_train.ravel(),y_train_pred.ravel())
---> 29 df_metrics.loc[k, "Test"] = perf_metrics[pf](y_test.ravel(), y_test_pred.ravel())
30 k += 1
31

<ipython-input-8-0c6c11603f73> in mse_score(y_true, y_pred)
9 # homework: https://scikit-learn.org/stable/modules ... on-metrics
10 def mse_score(y_true, y_pred):
---> 11 return np.mean( (y_true - y_pred) ** 2.0) #instead of using the sklearn function mean_squared_error the function is defined
12
13 def rmse_score(y_true, y_pred):

ValueError: operands could not be broadcast together with shapes (102,) (101,)