了解您的问题,我会帮您修正代码。
C
值初始化逻辑回归模型并进行交叉验证评估from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score
from sklearn.metrics import make_scorer, accuracy_score, precision_score, recall_score, f1_score
# 定义不同C值
C_values = [0.01, 0.1, 1, 10, 100]
# 初始化存储不同C值下的性能指标的字典
logistic_scores = {
C: {
'accuracy': [],
'precision': [],
'recall': [],
'f1': []
} for C in C_values
}
# 针对不同C值进行训练和评估
for C in C_values:
lr = LogisticRegression(C=C, solver='liblinear', random_state=42)
# 使用make_scorer转换指标函数为scorer对象
scoring = {
'accuracy': make_scorer(score_func=accuracy_score),
'precision': make_scorer(score_func=precision_score, average='macro'),
'recall': make_scorer(score_func=recall_score, average='macro'),
'f1': make_scorer(score_func=f1_score, average='macro')
}
# 对每个模型进行交叉验证并计算各项指标
for metric, scorer in scoring.items():
scores = cross_val_score(lr, X_train, y_train, cv=5, scoring=scorer)
mean_score = scores.mean() * 100 # 转换为百分比
logistic_scores[C][metric].append(mean_score)
C
值并打印# 选择最优的C值,基于准确率
optimal_C_idx = max(range(len(C_values)), key=lambda i: logistic_scores[C_values[i]]['accuracy'][0])
optimal_C = C_values[optimal_C_idx]
print(f"Optimal C value for Logistic Regression based on accuracy: {optimal_C}")
C
值变化的图import matplotlib.pyplot as plt
# 绘制性能指标随C值变化的图
plt.figure(figsize=(12, 8))
for metric, values in logistic_scores[optimal_C].items():
plt.plot(C_values, values, label=f'{metric} (Optimal C={optimal_C})')
plt.title('Logistic Regression Performance Metrics vs. Regularization Parameter C')
plt.xlabel('Regularization Parameter C')
plt.ylabel('Scores (%)')
plt.legend()
plt.xscale('log') # 使用对数刻度来显示C值
plt.grid(True)
plt.show()
请注意,在第2步中,我们只选择了准确率最高的那个 C
值作为最优 C
值,并在第3步中绘制了该 C
值对应的所有性能指标。如果想要为其他性能指标也找到最优 C
值,您可以根据需要修改这部分代码。