Ich nutze den multiclass Classifier XGBoost. Im y_all DataFrame stehen Werte wie 0,1,2 oder 3 (4 Klassen). Anhand der Daten von X_all soll der Classifier traniert werden und dann nutzbar sein um neue Daten in einer der 4 Klassen einzustufen.
Kriege folgenden Error:
Code: Alles auswählen
ValueError: Target is multiclass but average='binary'. Please choose another average setting, one of [None, 'micro', 'macro', 'weighted']
Code: Alles auswählen
[color=#FF0000]ValueError Traceback (most recent call last)
<ipython-input-8-4217ca86886e> in <module>
4 #Boosting refers to this general problem of producing a very accurate prediction rule
5 #by combining rough and moderately inaccurate rules-of-thumb
----> 6 train_predict(clf_xg, X_train, y_train, X_test, y_test)
7 print('')
<ipython-input-7-45b04513073f> in train_predict(clf, X_train, y_train, X_test, y_test)
46
47 # Print the results of prediction for both training and testing
---> 48 f1, acc = predict_labels(clf, X_train, y_train)
49 print(f1, acc)
50 print("F1 score and accuracy score for training set: {:.4f} , {:.4f}.".format(f1 , acc))
<ipython-input-7-45b04513073f> in predict_labels(clf, features, target)
33 print("Made predictions in {:.4f} seconds.".format(end - start))
34 #
---> 35 return f1_score(target, y_pred, pos_label='H'), sum(target == y_pred) / float(len(y_pred))
36
37
~/anacon/anaconda3/envs/eddy_workspace/lib/python3.7/site-packages/sklearn/metrics/_classification.py in f1_score(y_true, y_pred, labels, pos_label, average, sample_weight, zero_division)
1097 pos_label=pos_label, average=average,
1098 sample_weight=sample_weight,
-> 1099 zero_division=zero_division)
1100
1101
~/anacon/anaconda3/envs/eddy_workspace/lib/python3.7/site-packages/sklearn/metrics/_classification.py in fbeta_score(y_true, y_pred, beta, labels, pos_label, average, sample_weight, zero_division)
1224 warn_for=('f-score',),
1225 sample_weight=sample_weight,
-> 1226 zero_division=zero_division)
1227 return f
1228
~/anacon/anaconda3/envs/eddy_workspace/lib/python3.7/site-packages/sklearn/metrics/_classification.py in precision_recall_fscore_support(y_true, y_pred, beta, labels, pos_label, average, warn_for, sample_weight, zero_division)
1482 raise ValueError("beta should be >=0 in the F-beta score")
1483 labels = _check_set_wise_labels(y_true, y_pred, average, labels,
-> 1484 pos_label)
1485
1486 # Calculate tp_sum, pred_sum, true_sum ###
~/anacon/anaconda3/envs/eddy_workspace/lib/python3.7/site-packages/sklearn/metrics/_classification.py in _check_set_wise_labels(y_true, y_pred, average, labels, pos_label)
1314 raise ValueError("Target is %s but average='binary'. Please "
1315 "choose another average setting, one of %r."
-> 1316 % (y_type, average_options))
1317 elif pos_label not in (None, 1):
1318 warnings.warn("Note that pos_label (set to %r) is ignored when "
.[/color]
Code: Alles auswählen
def train_classifier(clf, X_train, y_train):
''' Fits a classifier to the training data. '''
# Start the clock, train the classifier, then stop the clock
start = time()
clf.fit(X_train, y_train)
end = time()
# Print the results
print("Trained model in {:.4f} seconds".format(end - start))
def predict_labels(clf, features, target):
''' Makes predictions using a fit classifier based on F1 score. '''
# Start the clock, make predictions, then stop the clock
start = time()
y_pred = clf.predict(features)
end = time()
# Print and return results
print("Made predictions in {:.4f} seconds.".format(end - start))
#
return f1_score(target, y_pred, pos_label='H'), sum(target == y_pred) / float(len(y_pred))
def train_predict(clf, X_train, y_train, X_test, y_test):
''' Train and predict using a classifer based on F1 score. '''
# Indicate the classifier and the training set size
print( "Training a {} using a training set size of {}. . .".format(clf.__class__.__name__, len(X_train)))
# Train the classifier
train_classifier(clf, X_train, y_train)
# Print the results of prediction for both training and testing
f1, acc = predict_labels(clf, X_train, y_train)
print(f1, acc)
print("F1 score and accuracy score for training set: {:.4f} , {:.4f}.".format(f1 , acc))
f1, acc = predict_labels(clf, X_test, y_test)
print("F1 score and accuracy score for test set: {:.4f} , {:.4f}.".format(f1 , acc))
Code: Alles auswählen
from sklearn.model_selection import train_test_split
# Shuffle and split the dataset into training and testing set.
X_train, X_test, y_train, y_test = train_test_split(X_all, y_all,
test_size = 50,
random_state = 2,
stratify = y_all)
Wenn ich average='weighted' setze, ändert sich der Fehler wie folgt:
Code: Alles auswählen
ValueError: Unable to coerce to Series, length must be 1: given 694
Fehler genauer:
Code: Alles auswählen
ValueError Traceback (most recent call last)
<ipython-input-17-4217ca86886e> in <module>
4 #Boosting refers to this general problem of producing a very accurate prediction rule
5 #by combining rough and moderately inaccurate rules-of-thumb
----> 6 train_predict(clf_xg, X_train, y_train, X_test, y_test)
7 print('')
<ipython-input-16-b50ecbd49bb5> in train_predict(clf, X_train, y_train, X_test, y_test)
46
47 # Print the results of prediction for both training and testing
---> 48 f1, acc = predict_labels(clf, X_train, y_train)
49 print(f1, acc)
50 print("F1 score and accuracy score for training set: {:.4f} , {:.4f}.".format(f1 , acc))
<ipython-input-16-b50ecbd49bb5> in predict_labels(clf, features, target)
33 print("Made predictions in {:.4f} seconds.".format(end - start))
34 #
---> 35 return f1_score(target, y_pred, pos_label='H',average='weighted'), sum(target == y_pred) / float(len(y_pred))
36
37
~/anacon/anaconda3/envs/eddy_workspace/lib/python3.7/site-packages/pandas/core/ops/__init__.py in f(self, other)
825 def f(self, other):
826
--> 827 other = _align_method_FRAME(self, other, axis=None)
828
829 if isinstance(other, ABCDataFrame):
~/anacon/anaconda3/envs/eddy_workspace/lib/python3.7/site-packages/pandas/core/ops/__init__.py in _align_method_FRAME(left, right, axis)
645
646 if right.ndim == 1:
--> 647 right = to_series(right)
648
649 elif right.ndim == 2:
~/anacon/anaconda3/envs/eddy_workspace/lib/python3.7/site-packages/pandas/core/ops/__init__.py in to_series(right)
637 if len(left.columns) != len(right):
638 raise ValueError(
--> 639 msg.format(req_len=len(left.columns), given_len=len(right))
640 )
641 right = left._constructor_sliced(right, index=left.columns)