常用接口
from sklearn import datasets from sklearn.feature_extraction import DictVectorizer from sklearn.model_selection import train_test_split from sklearn.model_selection import GridSearchCV from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.preprocessing import StandardScaler from sklearn.feature_selection import VarianceThreshold from sklearn.decomposition import PCA from sklearn.neighbors import KNeighborsClassifier from scipy.stats import pearsonr
KNN K近邻算法
sklearn.neighbors.KNeighborsClassifier分类器
from sklearn.neighbors import KNeighborsClassifier
k = KNeighborsClassifier(n_neighbors=3,)
from sklearn import datasets from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler from sklearn.neighbors import KNeighborsClassifier iris = datasets.load_iris() x_train,x_test,y_train,y_test = train_test_split(iris.data,iris.target,train_size=0.7,random_state=0) s =StandardScaler() x_train = s.fit_transform(x_train) x_test = s.transform(x_test) k = KNeighborsClassifier(n_neighbors=5) k.fit(x_train,y_train) test = k.predict(x_test) print(test == y_test) score = k.score(x_test,y_test) print(score)
模型选择–交叉验证与网格搜索
sklearn.model_selection.GridSearchCV
from sklearn.model_selection import GridSearchCV
k = KNeighborsClassifier()
new_k = GridSearchCV(k,param_grid={'n_neighbors':range(1,5)},cv=5)
new_k.fit(x_train,y_train)
sklearn用GridSearchCV对预估器进行封装,返回新的预估器对象,用法和原始的一样。用GridSearchCV返回新的预估器,有新的属性,如最佳得分,最佳参数等
朴素贝叶斯
决策树与随机森林
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
DecisionTreeClassifier分类器参数: RandomForestClassifier随机森林分类器参数: