机器学习PCA降维的掌纹识别对比算法
1.读数据,算出每个算法的score来进行比较
import cv2
import pandas as pd
data=[] #掌纹数据
for i in range(100):
person_palm=[] #一个人的掌纹
for j in range(1,7):
path='./palmbases/P_'+str(i)+'_'+str(j)+'.bmp'
palm_imag=cv2.imread(path)
person_palm.append(palm_imag)
data.append(person_palm)
imgs=[]
#将三维照片转化为二维
data2=[]
target=[] #标签(1-100代表100个人)
for i in range(100):
for j in range(6):
target.append(i+1) #标签
b=[]
for m in range(128): #图片大小为128*128
a=[]
for n in range(128):
a.append(data[i][j][m][n][1]) #data 中获取第 i 个人的第 j 张图像的第 m 行、第 n 列的元素的索引为 1 的值
b.append(a)
data2.append(b)
def to_series(data):#将掌纹数据变为一维的series
data_se=[]
for palm in data:
series=[]
for i in range(128):
for j in range(128):
series.append(palm[i][j])
data_se.append(series)
return pd.DataFrame(data_se)
data=to_series(data2)
2.随机森林
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from PcaKnn import pca_de
from PcaRead import target, data
#随机森林
clf=RandomForestClassifier(n_estimators=100,criterion="gini",max_depth=100)
x_train,x_test,y_train,y_test=train_test_split(pca_de(data,60),target,test_size=0.3,random_state=1)
clf.fit(x_train,y_train)
predict=clf.predict(x_test)
print(clf)
print(accuracy_score(predict,y_test))
3.KNN
import cv2 #读取照片
import numpy as np
from PIL import Image
import pandas as pd #数据处理
import numpy.matlib
from sklearn.decomposition import PCA #降维
from sklearn.model_selection import train_test_split #划分数据
import matplotlib.pyplot as plt
from pylab import mpl
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score,recall_score
import PcaRead
from sklearn.preprocessing import scale,StandardScaler,Normalizer
#降维到n维,PCA的无监督学习
def pca_de(data,n:int):
pca=PCA(n_components=n)
data=pca.fit_transform(data)
return data
#处理数据(scale,standary,normalizer)标准缩放,数据的标准化和归一化。
data_s=scale(PcaRead.data)
data_stds=StandardScaler().fit_transform(PcaRead.data)
data_nor=Normalizer().fit_transform(PcaRead.data)
#knn算法
def knn(data,n:int):#n是数据需要降的维数
score=[]
k=[1,2,3,4,5,6,7,8]
data=pca_de(data,n)#降维
#划分数据集
x_train,x_test,y_train,y_test=train_test_split(data, PcaRead.target, test_size=0.3, random_state=1) #将30%的数据分配给测试集
for i in k:
model=KNeighborsClassifier(n_neighbors=i)
model.fit(x_train,y_train)
predict=model.predict(x_test)
score.append(accuracy_score(predict,y_test))
print('K_neighbors={},score={}'.format(i,accuracy_score(predict,y_test)))
return score
#选取k
score = knn(PcaRead.data, 60)
plt.plot([1,2,3,4,5,6,7,8],score,color="r",marker="*")
plt.title("knn score-k(dimmison=60,data)")
plt.grid()
plt.xlabel("k")
plt.ylabel("score")
plt.ylim([0.5,1])
plt.show()
#由图可得:k=1的时候准确率score最好
#选取降维m和数据处理方法
dimmsion=range(20,300,10)
def knn_1(data,n:int):#k=1的knn模型
score=[]
k=[1]
data=pca_de(data,n)#降维
#划分数据集
x_train,x_test,y_train,y_test=train_test_split(data,PcaRead.target,test_size=0.3,random_state=1)
for i in k:
model=KNeighborsClassifier(n_neighbors=i)
model.fit(x_train,y_train)
predict=model.predict(x_test)
score=accuracy_score(predict,y_test)
print('K_neighbors={},score={}'.format(i,accuracy_score(predict,y_test)))
return score
score=[] #原数据
score_s=[] #scale
score_stds=[] #StandardScaler
score_nor=[] #normalizer
for n in dimmsion:
print(n,'维度')
score.append(knn_1(PcaRead.data,n))
score_s.append(knn_1(data_s,n))
score_stds.append(knn_1(data_stds,n))
score_nor.append(knn_1(data_nor,n))
# 设置显示中文字体
mpl.rcParams["font.sans-serif"] = ["SimHei"]
plt.rcParams.update({'font.size': 30})
plt.figure(figsize=(20, 10), dpi=100)
plt.plot(dimmsion, score, color="k", marker="o", label="原数据")
plt.plot(dimmsion, score_s, color="c", marker="o", label="scale,标准缩放")
plt.plot(dimmsion, score_stds, color="r", marker="o", label="standardscaler,数据的标准化")
plt.plot(dimmsion, score_nor, color="g", marker="o", label="normolizer,归一化")
plt.xlabel("dimmission")
plt.ylabel("score")
plt.xlim([0, 300])
plt.ylim([0.92, 0.97])
plt.grid()
plt.legend(loc="best")
plt.title("KNN dimmissiom -- score")
plt.show()
# 由下图可知,当降维数为60维,knn的n_comp=1,数据进行归一化时,模型的准确性最高 score=0.961
4.逻辑回归和岭回归
from matplotlib import pyplot as plt
from sklearn.linear_model import RidgeClassifierCV,LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import Normalizer
from PcaKnn import pca_de
from PcaRead import target, data
#逻辑回归和岭回归
def ridge(data,model):
data=Normalizer().fit_transform(data)
#data=StandardScaler().fit_transform(data)
x_train,x_test,y_train,y_test=train_test_split(data,target,test_size=0.3,random_state=1)
model.fit(x_train,y_train)
predict=model.predict(x_test)
print(accuracy_score(predict,y_test))
return accuracy_score(predict,y_test)
score_r=[]
score_l=[]
for i in range(10,300,10):
score_r.append(ridge(pca_de(data,i),RidgeClassifierCV()))
score_l.append(ridge(pca_de(data,i),LogisticRegression()))
plt.figure(figsize=(15,5))
plt.plot(range(10,300,10),score_r,marker="o",label="RidgeClassifierCV")
plt.plot(range(10,300,10),score_l,marker="o",label="LogisticRegression")
plt.xlabel("dimmission")
plt.ylabel("score")
plt.grid()
plt.xticks(range(10,300,10))
plt.legend()
plt.title("RidgeClassifierCV and LogisticRegression ----- score")
5.SVM
import pandas as pd #数据处理
from sklearn.model_selection import train_test_split #划分数据
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
import PcaRead
from sklearn.preprocessing import scale,StandardScaler,Normalizer
from sklearn.svm import LinearSVC,SVC #线性支持向量机分类,支持向量机svc
from PcaKnn import pca_de
#svm支持向量机
#Linearsvc
def linear(data,n,model):
data=pca_de(data,n)#对数据进行降维
if model != 0:
data=model.fit_transform(data)
x_train,x_test,y_train,y_test=train_test_split(data, PcaRead.target, test_size=0.3, random_state=1)
clf=LinearSVC()
clf.fit(x_train,y_train)
predict=clf.predict(x_test)
print("score:",accuracy_score(predict,y_test))
return accuracy_score(predict,y_test)
score=[]
score_1=[]
score_2=[]
dimmison=[20,40,60,80,100,120,140,160,180,200,220,240,260,280,300]
for i in [20,40,60,80,100,120,140,160,180,200,220,240,260,280,300]:
print("dimmission:",i)
score.append(linear(PcaRead.data, i, 0))
score_1.append(linear(PcaRead.data, i, StandardScaler()))
score_2.append(linear(PcaRead.data, i, Normalizer()))
b=[]
for i in range(15):
a=[]
a.append(i*20+20)
a.append(score[i])
a.append(score_1[i])
a.append(score_2[i])
b.append(a)
x=pd.DataFrame(b)
x.columns=["维度","原数据","scale","normalizer"]
print(x)
plt.figure(figsize=(20, 5), dpi=100)
plt.plot(dimmison,score,color="b",marker="o",label="原数据")
plt.plot(dimmison,score_1,color="r",marker="o",label="standardscaler")
plt.plot(dimmison,score_2,color="y",marker="o",label="normolizer")
plt.xlabel("dimmission")
plt.ylabel("score")
plt.xlim([0,300])
#plt.ylim([0.92,0.97])
plt.grid()
plt.legend(loc="best")
plt.show()
#由下图可知,降维的维度对标准化处理数据和归一化数据处理的数据影响不大,原数据当维数超过150维时逐渐变得平滑。综合来看,将数据降维到150维进行归一化预测准确率较高0.983
kers=['linear','poly','rbf','sigmoid']#不同内核函数
pca_data=pca_de(PcaRead.data, 60)
def svc(data,model):
if model!=0:
data=model.fit_transform(data)
x_train,x_test,y_train,y_test=train_test_split(data, PcaRead.target, test_size=0.3, random_state=1)
score=[]
for ker in kers:
clf=SVC(kernel=ker,tol=1e-5)
clf.fit(x_train,y_train)
predict=clf.predict(x_test)
score.append(accuracy_score(predict,y_test))
print("kernel={},查准率={}".format(ker,accuracy_score(predict,y_test)))
return score
score=[]
for model in [0,StandardScaler(),Normalizer()]:# 循环遍历三个模型:原始数据(model=0)、通过 StandardScaler() 进行标准化的数据和通过 Normalizer() 进行归一化的数据。对于每个模型,调用 svc 函数,将不同内核的准确率分数存储在 score 列表中。
score.append(svc(pca_data,model))
score_1=pd.DataFrame(score)
score_1.columns=kers
score_1.index=["原数据","标准化","归一化"]
print(score_1)
def au(x,y):
for a,b,i in zip(x,y,range(len(x))): # zip 函数
plt.text(a,b+0.01,"%.3f"%y[i],ha='center',fontsize=10)
plt.rcParams.update({'font.size': 15})
plt.figure(figsize=(10,5))
#plt.grid()
cm=plt.bar([1,5,9,13],score[0],label="原数据")
au([1,5,9,13],score[0])
cm=plt.bar([2,6,10,14],score[1],label="标准化")
au([2,6,10,14],score[1])
cm=plt.bar([3,7,11,15],score[2],label="归一化")
au([3,7,11,15],score[2])
plt.xlabel("核函数")
plt.ylabel("score")
plt.title("核函数,数据处理---score")
plt.xlim([0,20])
plt.xticks([2,6,10,14],['linear','poly','rbf','sigmoid'])
plt.legend(loc="best")
6.求score
from matplotlib import pyplot as plt
from PcaSvm import au
#各个模型最好score
score=[0.972,0.983,0.994,0.950,0.996,0.777]
model=["knn","Linearsvc","SVC","随机森林","岭回归","逻辑回归"]
plt.figure(figsize=(10,5))
plt.bar(range(1,7),score,width=0.3,color=['tan',
'khaki', 'pink', 'skyblue','lawngreen','salmon'])
au(range(1,7),score)
plt.xticks(range(1,7),model)
plt.xlabel("models")
plt.ylabel("best-score")
plt.title("best score of models")
7.结果对比
相关知识
基于PCA与LDA的数据降维实践
机器学习教程 之 SKlearn 中 PCA 算法的运用:人脸识别实例
基于Python机器学习实现的花卉识别
机器学习(三)降维之PCA及鸢尾花降维
机器学习之路:经典的鸢尾花数据集
基于PCA的数据降维(鸢尾花(iris)数据集)
基于机器学习的鸢尾花数据集的三分类算法的实现 C++
如何降低数据噪声对机器学习的影响
【机器学习】KNN算法实现手写板字迹识别
机器学习算法
网址: 机器学习PCA降维的掌纹识别对比算法 https://www.huajiangbk.com/newsview1568771.html
上一篇: 生物特征识别中的Gabor滤波器 |
下一篇: 机器学习之掌纹识别(掌纹分类) |
推荐分享

- 1君子兰什么品种最名贵 十大名 4012
- 2世界上最名贵的10种兰花图片 3364
- 3花圈挽联怎么写? 3286
- 4迷信说家里不能放假花 家里摆 1878
- 5香山红叶什么时候红 1493
- 6花的意思,花的解释,花的拼音 1210
- 7教师节送什么花最合适 1167
- 8勿忘我花图片 1103
- 9橄榄枝的象征意义 1093
- 10洛阳的市花 1039