需要提前导入的库:
numpy
import numpy as np 绘图
import matplotlib.pyplot as plt 数据集
from sklearn import datasets 各种机器学习库:
kNN算法
from sklearn.neighbors import KNeighborsClassifier 线性回归
from sklearn.linear_model import LinearRegression PCA
from sklearn.decomposition import PCA
pca = PCA(n_components=2)
pca.fit(X_train)
X_train_reduction = pca.transform(X_train)
X_test_reduction = pca.transform(X_test) 逻辑回归
from sklearn.linear_model import LogisticRegression SVM
from sklearn.svm import LinearSVC 使用多项式核函数的SVM
from sklearn.svm import SVC
SVC(kernel='poly', degree=degree, C=C) SVM解决回归问题
from sklearn.svm import LinearSVR OvO
log_reg = LogisticRegression(multi_class='multinomial', solver='newton-cg') OvR
from sklearn.multiclass import OneVsRestClassifier
ovr = OneVsRestClassifier(log_reg) 决策树
from sklearn.tree import DecisionTreeClassifier 解决回归问题的决策树
from sklearn.tree import DecisionTreeRegressor 集成学习
from sklearn.ensemble import VotingClassifier Bagging
from sklearn.ensemble import BaggingClassifier 随机森林
from sklearn.ensemble import RandomForestClassifier 数据处理方法:
train_test_split
from sklearn.model_selection import train_test_split 计算准确度
from sklearn.metrics import accuracy_score 网格搜索
from sklearn.model_selection import GridSearchCV
param_grid = [
{ 'weights':['uniform'], 'n_neighbors':[i for i in range(1, 11)]},
{ 'weights':['distance'], 'n_neighbors':[i for i in range(1, 11)],'p':[i for i in range(1, 6)]}
]
knn_clf = KNeighborsClassifier()
grid_search = GridSearchCV(knn_clf, param_grid)
grid_search.fit(X_train, y_train) 均值方差归一化
from sklearn.preprocessing import StandardScaler
standardScaler = StandardScaler()
standardScaler.fit(X_train) MSE
from sklearn.metrics import mean_squared_error MAE
from sklearn.metrics import mean_absolute_error R Square
from sklearn.metrics import r2_score 添加多项式特征
from sklearn.preprocessing import PolynomialFeatures
poly = PolynomialFeatures(degree=2) # 添加二次幂特征
poly.fit(X)
X2 = poly.transform(X) Pipeline
from sklearn.pipeline import Pipeline
# 传入每一步骤所对应的类 1.多项式的特征 2.数据归一化 3.线性回归
poly_reg = Pipeline([
("poly", PolynomialFeatures(degree=2)),
("std_scaler", StandardScaler()),
("lin_reg", LinearRegression())
])
poly_reg.fit(X, y)
y_predict = poly_reg.predict(X) 交叉验证
from sklearn.model_selection import cross_val_score 岭回归
from sklearn.linear_model import Ridge LASSO
from sklearn.linear_model import Lasso 绘制决策边界
def plot_decision_boundary(model, axis):
x0, x1 = np.meshgrid(
np.linspace(axis[0], axis[1], int((axis[1]-axis[0])*100)).reshape(-1,1),
np.linspace(axis[2], axis[3], int((axis[3]-axis[2])*100)).reshape(-1,1)
)
X_new = np.c_[x0.ravel(), x1.ravel()]
y_predict = model.predict(X_new)
zz = y_predict.reshape(x0.shape)
from matplotlib.colors import ListedColormap
custom_cmap = ListedColormap(['#EF9A9A', '#FFF59D','#90CAF9'])
plt.contourf(x0, x1, zz, linewidth=5, cmap=custom_cmap) 混淆矩阵
from sklearn.metrics import confusion_matrix 精准率
from sklearn.metrics import precision_score 召回率
from sklearn.metrics import recall_score F1 Score
from sklearn.metrics import f1_score ROC
from sklearn.metrics import roc_curve ROC_SCORE
from sklearn.metrics import roc_auc_score
本文标题:scikit-learn常用库与函数整理
本文链接:https://blog.quwenai.cn/post/8060.html
版权声明:本文不使用任何协议授权,您可以任何形式自由转载或使用。






还没有评论,来说两句吧...