首页 分享 机器学习PCA降维的掌纹识别对比算法

机器学习PCA降维的掌纹识别对比算法

来源:花匠小妙招 时间:2025-01-13 11:21

1.读数据,算出每个算法的score来进行比较

import cv2

import pandas as pd

data=[] #掌纹数据

for i in range(100):

person_palm=[] #一个人的掌纹

for j in range(1,7):

path='./palmbases/P_'+str(i)+'_'+str(j)+'.bmp'

palm_imag=cv2.imread(path)

person_palm.append(palm_imag)

data.append(person_palm)

imgs=[]

#将三维照片转化为二维

data2=[]

target=[] #标签(1-100代表100个人)

for i in range(100):

for j in range(6):

target.append(i+1) #标签

b=[]

for m in range(128): #图片大小为128*128

a=[]

for n in range(128):

a.append(data[i][j][m][n][1]) #data 中获取第 i 个人的第 j 张图像的第 m 行、第 n 列的元素的索引为 1 的值

b.append(a)

data2.append(b)

def to_series(data):#将掌纹数据变为一维的series

data_se=[]

for palm in data:

series=[]

for i in range(128):

for j in range(128):

series.append(palm[i][j])

data_se.append(series)

return pd.DataFrame(data_se)

data=to_series(data2)

2.随机森林

from sklearn.ensemble import RandomForestClassifier

from sklearn.model_selection import train_test_split

from PcaKnn import pca_de

from PcaRead import target, data

#随机森林

clf=RandomForestClassifier(n_estimators=100,criterion="gini",max_depth=100)

x_train,x_test,y_train,y_test=train_test_split(pca_de(data,60),target,test_size=0.3,random_state=1)

clf.fit(x_train,y_train)

predict=clf.predict(x_test)

print(clf)

print(accuracy_score(predict,y_test))

3.KNN

import cv2 #读取照片

import numpy as np

from PIL import Image

import pandas as pd #数据处理

import numpy.matlib

from sklearn.decomposition import PCA #降维

from sklearn.model_selection import train_test_split #划分数据

import matplotlib.pyplot as plt

from pylab import mpl

from sklearn.neighbors import KNeighborsClassifier

from sklearn.metrics import accuracy_score,recall_score

import PcaRead

from sklearn.preprocessing import scale,StandardScaler,Normalizer

#降维到n维,PCA的无监督学习

def pca_de(data,n:int):

pca=PCA(n_components=n)

data=pca.fit_transform(data)

return data

#处理数据(scale,standary,normalizer)标准缩放,数据的标准化和归一化。

data_s=scale(PcaRead.data)

data_stds=StandardScaler().fit_transform(PcaRead.data)

data_nor=Normalizer().fit_transform(PcaRead.data)

#knn算法

def knn(data,n:int):#n是数据需要降的维数

score=[]

k=[1,2,3,4,5,6,7,8]

data=pca_de(data,n)#降维

#划分数据集

x_train,x_test,y_train,y_test=train_test_split(data, PcaRead.target, test_size=0.3, random_state=1) #将30%的数据分配给测试集

for i in k:

model=KNeighborsClassifier(n_neighbors=i)

model.fit(x_train,y_train)

predict=model.predict(x_test)

score.append(accuracy_score(predict,y_test))

print('K_neighbors={},score={}'.format(i,accuracy_score(predict,y_test)))

return score

#选取k

score = knn(PcaRead.data, 60)

plt.plot([1,2,3,4,5,6,7,8],score,color="r",marker="*")

plt.title("knn score-k(dimmison=60,data)")

plt.grid()

plt.xlabel("k")

plt.ylabel("score")

plt.ylim([0.5,1])

plt.show()

#由图可得:k=1的时候准确率score最好

#选取降维m和数据处理方法

dimmsion=range(20,300,10)

def knn_1(data,n:int):#k=1的knn模型

score=[]

k=[1]

data=pca_de(data,n)#降维

#划分数据集

x_train,x_test,y_train,y_test=train_test_split(data,PcaRead.target,test_size=0.3,random_state=1)

for i in k:

model=KNeighborsClassifier(n_neighbors=i)

model.fit(x_train,y_train)

predict=model.predict(x_test)

score=accuracy_score(predict,y_test)

print('K_neighbors={},score={}'.format(i,accuracy_score(predict,y_test)))

return score

score=[] #原数据

score_s=[] #scale

score_stds=[] #StandardScaler

score_nor=[] #normalizer

for n in dimmsion:

print(n,'维度')

score.append(knn_1(PcaRead.data,n))

score_s.append(knn_1(data_s,n))

score_stds.append(knn_1(data_stds,n))

score_nor.append(knn_1(data_nor,n))

# 设置显示中文字体

mpl.rcParams["font.sans-serif"] = ["SimHei"]

plt.rcParams.update({'font.size': 30})

plt.figure(figsize=(20, 10), dpi=100)

plt.plot(dimmsion, score, color="k", marker="o", label="原数据")

plt.plot(dimmsion, score_s, color="c", marker="o", label="scale,标准缩放")

plt.plot(dimmsion, score_stds, color="r", marker="o", label="standardscaler,数据的标准化")

plt.plot(dimmsion, score_nor, color="g", marker="o", label="normolizer,归一化")

plt.xlabel("dimmission")

plt.ylabel("score")

plt.xlim([0, 300])

plt.ylim([0.92, 0.97])

plt.grid()

plt.legend(loc="best")

plt.title("KNN dimmissiom -- score")

plt.show()

# 由下图可知,当降维数为60维,knn的n_comp=1,数据进行归一化时,模型的准确性最高 score=0.961

4.逻辑回归和岭回归

from matplotlib import pyplot as plt

from sklearn.linear_model import RidgeClassifierCV,LogisticRegression

from sklearn.metrics import accuracy_score

from sklearn.model_selection import train_test_split

from sklearn.preprocessing import Normalizer

from PcaKnn import pca_de

from PcaRead import target, data

#逻辑回归和岭回归

def ridge(data,model):

data=Normalizer().fit_transform(data)

#data=StandardScaler().fit_transform(data)

x_train,x_test,y_train,y_test=train_test_split(data,target,test_size=0.3,random_state=1)

model.fit(x_train,y_train)

predict=model.predict(x_test)

print(accuracy_score(predict,y_test))

return accuracy_score(predict,y_test)

score_r=[]

score_l=[]

for i in range(10,300,10):

score_r.append(ridge(pca_de(data,i),RidgeClassifierCV()))

score_l.append(ridge(pca_de(data,i),LogisticRegression()))

plt.figure(figsize=(15,5))

plt.plot(range(10,300,10),score_r,marker="o",label="RidgeClassifierCV")

plt.plot(range(10,300,10),score_l,marker="o",label="LogisticRegression")

plt.xlabel("dimmission")

plt.ylabel("score")

plt.grid()

plt.xticks(range(10,300,10))

plt.legend()

plt.title("RidgeClassifierCV and LogisticRegression ----- score")

5.SVM

import pandas as pd #数据处理

from sklearn.model_selection import train_test_split #划分数据

import matplotlib.pyplot as plt

from sklearn.metrics import accuracy_score

import PcaRead

from sklearn.preprocessing import scale,StandardScaler,Normalizer

from sklearn.svm import LinearSVC,SVC #线性支持向量机分类,支持向量机svc

from PcaKnn import pca_de

#svm支持向量机

#Linearsvc

def linear(data,n,model):

data=pca_de(data,n)#对数据进行降维

if model != 0:

data=model.fit_transform(data)

x_train,x_test,y_train,y_test=train_test_split(data, PcaRead.target, test_size=0.3, random_state=1)

clf=LinearSVC()

clf.fit(x_train,y_train)

predict=clf.predict(x_test)

print("score:",accuracy_score(predict,y_test))

return accuracy_score(predict,y_test)

score=[]

score_1=[]

score_2=[]

dimmison=[20,40,60,80,100,120,140,160,180,200,220,240,260,280,300]

for i in [20,40,60,80,100,120,140,160,180,200,220,240,260,280,300]:

print("dimmission:",i)

score.append(linear(PcaRead.data, i, 0))

score_1.append(linear(PcaRead.data, i, StandardScaler()))

score_2.append(linear(PcaRead.data, i, Normalizer()))

b=[]

for i in range(15):

a=[]

a.append(i*20+20)

a.append(score[i])

a.append(score_1[i])

a.append(score_2[i])

b.append(a)

x=pd.DataFrame(b)

x.columns=["维度","原数据","scale","normalizer"]

print(x)

plt.figure(figsize=(20, 5), dpi=100)

plt.plot(dimmison,score,color="b",marker="o",label="原数据")

plt.plot(dimmison,score_1,color="r",marker="o",label="standardscaler")

plt.plot(dimmison,score_2,color="y",marker="o",label="normolizer")

plt.xlabel("dimmission")

plt.ylabel("score")

plt.xlim([0,300])

#plt.ylim([0.92,0.97])

plt.grid()

plt.legend(loc="best")

plt.show()

#由下图可知,降维的维度对标准化处理数据和归一化数据处理的数据影响不大,原数据当维数超过150维时逐渐变得平滑。综合来看,将数据降维到150维进行归一化预测准确率较高0.983

kers=['linear','poly','rbf','sigmoid']#不同内核函数

pca_data=pca_de(PcaRead.data, 60)

def svc(data,model):

if model!=0:

data=model.fit_transform(data)

x_train,x_test,y_train,y_test=train_test_split(data, PcaRead.target, test_size=0.3, random_state=1)

score=[]

for ker in kers:

clf=SVC(kernel=ker,tol=1e-5)

clf.fit(x_train,y_train)

predict=clf.predict(x_test)

score.append(accuracy_score(predict,y_test))

print("kernel={},查准率={}".format(ker,accuracy_score(predict,y_test)))

return score

score=[]

for model in [0,StandardScaler(),Normalizer()]:# 循环遍历三个模型:原始数据(model=0)、通过 StandardScaler() 进行标准化的数据和通过 Normalizer() 进行归一化的数据。对于每个模型,调用 svc 函数,将不同内核的准确率分数存储在 score 列表中。

score.append(svc(pca_data,model))

score_1=pd.DataFrame(score)

score_1.columns=kers

score_1.index=["原数据","标准化","归一化"]

print(score_1)

def au(x,y):

for a,b,i in zip(x,y,range(len(x))): # zip 函数

plt.text(a,b+0.01,"%.3f"%y[i],ha='center',fontsize=10)

plt.rcParams.update({'font.size': 15})

plt.figure(figsize=(10,5))

#plt.grid()

cm=plt.bar([1,5,9,13],score[0],label="原数据")

au([1,5,9,13],score[0])

cm=plt.bar([2,6,10,14],score[1],label="标准化")

au([2,6,10,14],score[1])

cm=plt.bar([3,7,11,15],score[2],label="归一化")

au([3,7,11,15],score[2])

plt.xlabel("核函数")

plt.ylabel("score")

plt.title("核函数,数据处理---score")

plt.xlim([0,20])

plt.xticks([2,6,10,14],['linear','poly','rbf','sigmoid'])

plt.legend(loc="best")

6.求score

from matplotlib import pyplot as plt

from PcaSvm import au

#各个模型最好score

score=[0.972,0.983,0.994,0.950,0.996,0.777]

model=["knn","Linearsvc","SVC","随机森林","岭回归","逻辑回归"]

plt.figure(figsize=(10,5))

plt.bar(range(1,7),score,width=0.3,color=['tan',

'khaki', 'pink', 'skyblue','lawngreen','salmon'])

au(range(1,7),score)

plt.xticks(range(1,7),model)

plt.xlabel("models")

plt.ylabel("best-score")

plt.title("best score of models")

7.结果对比

相关知识

基于PCA与LDA的数据降维实践
机器学习教程 之 SKlearn 中 PCA 算法的运用:人脸识别实例
基于Python机器学习实现的花卉识别
机器学习(三)降维之PCA及鸢尾花降维
机器学习之路:经典的鸢尾花数据集
基于PCA的数据降维(鸢尾花(iris)数据集)
基于机器学习的鸢尾花数据集的三分类算法的实现 C++
如何降低数据噪声对机器学习的影响
【机器学习】KNN算法实现手写板字迹识别
机器学习算法

网址: 机器学习PCA降维的掌纹识别对比算法 https://www.huajiangbk.com/newsview1568771.html

所属分类:花卉
上一篇: 生物特征识别中的Gabor滤波器
下一篇: 机器学习之掌纹识别(掌纹分类)

推荐分享