from sklearn datasets import load_iris,fetch_20newsgroups
iris=load_iris()//加载数据集
news=fetch_20newsgroups()//大数据集,已经下载好了的
print(“数据集特征值:”,iris.data)
print(“数据集的目标值:”,iris[“target”])
print(“数据集特征值的名字:”,iris.feature.names)
iris[“data”]
iris.target
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
API:
seaborn.lmplot( x,y(横纵坐标列名), ,data ,hue(目标值按什么分类),fit_reg=(是否进行线性拟合) )
//数据可视化
iris_d=pd.dataFrame( data=iris.data, cloums[] (列索引,index行索引) )
iris_d[“target”]=iris.target
def iris_plot(data,col1,col2)
sns.lmplot(col1,col2,data=data,hue=”target”,fit_reg=false)
plt.show()
iris_plot(iris_d,”sepal_width”,”petal_length”)
数据集的划分
API:sklearn.model_selection.train_test_split(x,y, )
x:数据集的特征值 iris.data
y:数据的目标值 iris.target
test_size测试集的大小
random_state= 随机数种子
return x_trian,x_test,y_trian,y_test
form sklearn.model_selection import train_test_split
x_trian,x_test,y_trian,y_test=
train_test_split(iris.data,iris.target,test_size=0.2,randomstate=22)
特征值预处理 (归一化)
API:sklearn.preprocessing.MinMaxscaler(feature_range=(0,1))
//1、实例化
transfer=MinMaxScaler(feature_range(3,5)) //standardScanler()
//2、转换
ret_data=transfer.fit_transform( data[ [ “”,”“,”” ] ] )
print(“归一化之后的数据:”ret_data)
标准化
API:
standardScanler()
#鸢尾花完整流程
form sklearn.datasets import load_iris
form sklearn.model_selection import trian_test_split
form sklearn.preprocessing import StandardScanler
form sklearn.neighbors import KneighborsClassfier
iris=load_iris()
x_train,x_test,y_train,y_test=
train_test_split(iris.data,iris.target,test_size=0.2,random_state=22)
特征值处理
transfer=StandardScanler()
x_train=transfer.fit_transform(x_train)
x_test=transfer.fit_transform(x_test)
实例化一个估计器
estimator=KneighborsClassifier()
模型训练
estimator.fit(x_train,y_train)
预测值结果输出
y_pre=estimator.predict(x_test)
print(“预测值和真实值:”,y_test==y_pre)
//准确率的计算
socre=estimato.score(x_text,y_test)
案例运用
import pandas as p
data=pd.read_csv(“./data/FBlocation/train.csv”)