1200字范文,内容丰富有趣,写作的好帮手!
1200字范文 > 【机器学习】无监督学习--(降维)主成分分析PCA

【机器学习】无监督学习--(降维)主成分分析PCA

时间:2022-11-13 04:24:46

相关推荐

【机器学习】无监督学习--(降维)主成分分析PCA

注:数据集在文章末尾

1. PCA简单例子

import numpy as npimport matplotlib.pyplot as plt# 载入数据data = np.genfromtxt("data.csv", delimiter=",")x_data = data[:,0]y_data = data[:,1]plt.scatter(x_data,y_data)plt.show()print(x_data.shape)

输出:

# 数据中心化def zeroMean(dataMat):# 按列求平均,即各个特征的平均meanVal = np.mean(dataMat, axis=0) newData = dataMat - meanValreturn newData, meanValnewData,meanVal=zeroMean(data) # np.cov用于求协方差矩阵,参数rowvar=0说明数据一行代表一个样本covMat = np.cov(newData, rowvar=0)# 输出协方差矩阵print(covMat)

输出:

# np.linalg.eig求矩阵的特征值和特征向量eigVals, eigVects = np.linalg.eig(np.mat(covMat))# 输出特征值print(eigVals)# 输出特征向量print(eigVects)

输出 :

# 对特征值从小到大排序eigValIndice = np.argsort(eigVals)top = 1# 最大的n个特征值的下标n_eigValIndice = eigValIndice[-1:-(top+1):-1]# 最大的n个特征值对应的特征向量n_eigVect = eigVects[:,n_eigValIndice]print(n_eigVect)

输出:

# 低维特征空间的数据lowDDataMat = newData*n_eigVect# 利用低纬度数据来重构数据reconMat = (lowDDataMat*n_eigVect.T) + meanVal# 载入数据data = np.genfromtxt("data.csv", delimiter=",")x_data = data[:,0]y_data = data[:,1]plt.scatter(x_data,y_data)# 重构的数据x_data = np.array(reconMat)[:,0]y_data = np.array(reconMat)[:,1]plt.scatter(x_data,y_data,c='r')plt.show()plt.show()

2. PCA手写数字降维可视化

from sklearn.neural_network import MLPClassifierfrom sklearn.datasets import load_digitsfrom sklearn.model_selection import train_test_splitfrom sklearn.metrics import classification_report,confusion_matriximport numpy as npimport matplotlib.pyplot as pltdigits = load_digits()#载入数据x_data = digits.data #数据y_data = digits.target #标签## x_data.shape = (1797,64)x_train,x_test,y_train,y_test = train_test_split(x_data,y_data) #分割数据1/4为测试数据,3/4为训练数据## 模型mlp = MLPClassifier(hidden_layer_sizes=(100,50) ,max_iter=500)mlp.fit(x_train,y_train)# 数据中心化def zeroMean(dataMat):# 按列求平均,即各个特征的平均meanVal = np.mean(dataMat, axis=0) newData = dataMat - meanValreturn newData, meanValdef pca(dataMat,top):# 数据中心化newData,meanVal=zeroMean(dataMat) # np.cov用于求协方差矩阵,参数rowvar=0说明数据一行代表一个样本covMat = np.cov(newData, rowvar=0)# np.linalg.eig求矩阵的特征值和特征向量eigVals, eigVects = np.linalg.eig(np.mat(covMat))# 对特征值从小到大排序eigValIndice = np.argsort(eigVals)# 最大的n个特征值的下标n_eigValIndice = eigValIndice[-1:-(top+1):-1]# 最大的n个特征值对应的特征向量n_eigVect = eigVects[:,n_eigValIndice]# 低维特征空间的数据lowDDataMat = newData*n_eigVect# 利用低纬度数据来重构数据reconMat = (lowDDataMat*n_eigVect.T) + meanVal# 返回低维特征空间的数据和重构的矩阵return lowDDataMat,reconMat # 降成两维lowDDataMat,reconMat = pca(x_data,2)# 重构的数据x = np.array(lowDDataMat)[:,0]y = np.array(lowDDataMat)[:,1]plt.scatter(x,y,c='r')plt.show()

输出:

predictions = mlp.predict(x_data)# 重构的数据x = np.array(lowDDataMat)[:,0]y = np.array(lowDDataMat)[:,1]plt.scatter(x,y,c=y_data)plt.show()

输出:

# 降成三维lowDDataMat,reconMat = pca(x_data,3)from mpl_toolkits.mplot3d import Axes3D x = np.array(lowDDataMat)[:,0]y = np.array(lowDDataMat)[:,1]z = np.array(lowDDataMat)[:,2]ax = plt.figure().add_subplot(111, projection = '3d') ax.scatter(x, y, z, c = y_data, s = 10) #点为红色三角形 plt.show()

输出:

数据集:“data.csv”:

32.502345269453031,31.7070058465699253.426804033275019,68.7775959816389161.530358025636438,62.56238229794580347.475639634786098,71.54663223356777759.813207869512318,87.23092513368739355.142188413943821,78.21151827079923252.211796692214001,79.6419730498087439.299566694317065,59.17148932186950848.10504169176825,75.33124229706305652.550014442733818,71.30087988685035345.419730144973755,55.16567714595912354.351634881228918,82.47884675749791944.164049496773352,62.00892324572582558.16847071685779,75.39287042599495756.727208057096611,81.4361921588786448.955888566093719,60.72360244067396544.687196231480904,82.89250373145371560.297326851333466,97.37989686216607845.618643772955828,48.84715331735507238.816817537445637,56.87721318626850666.189816606752601,83.87856466460276365.41605174513407,118.5912173025224947.48120860786787,57.25181946226896941.57564261748702,51.39174407983230751.84518690563943,75.38065166531235759.37082089523,74.76556403215137457.31000343834809,95.45505292257473763.615561251453308,95.22936601755530746.737619407976972,79.05240616956558650.556760148547767,83.43207142132371252.223996085553047,63.35879031749787835.567830047746632,41.41288530370056342.436476944055642,76.61734128007404458.16454011019286,96.76956642610819957.504447615341789,74.08413011660252345.440530725319981,66.58814441422859461.89622268029126,77.76848241779302433.093831736163963,50.71958891231208436.436009511386871,62.12457081807178137.675654860850742,60.81024664990221144.555608383275356,52.68298336638778143.318282631865721,58.56982471769286750.073145632289034,82.90598148507051243.870612645218372,61.42470980433912362.997480747553091,115.2441528007952932.669043763467187,45.57058882337608540.166899008703702,54.08405479622361253.575077531673656,87.99445275811041333.864214971778239,52.72549437590042564.707138666121296,93.57611869265824138.119824026822805,80.16627544737096444.502538064645101,65.10171157056032640.599538384552318,65.56230126040037541.720676356341293,65.28088692082282351.088634678336796,73.43464154632430155.078095904923202,71.1397278586189441.377726534895203,79.10282968354985762.494697427269791,86.52053844034715349.203887540826003,84.74269780782621841.102685187349664,59.35885024862493341.18105169822,61.68403752483362750.186389494880601,69.84760415824918352.378446219236217,86.09829120577410350.135485486286122,59.10883926769964333.644706006191782,69.8996816436276339.557901222906828,44.86249071116439856.130388816875467,85.49806777884022357.362052133238237,95.53668684646721960.269214393997906,70.25193441977158735.678093889410732,52.72173496477498831.588116998132829,50.39267013507989653.66093226167304,63.64239877565775346.682228649471917,72.24725106866236543.10789102464,57.81251297618140270.34607561504933,104.2571015854382244.492855880854073,86.643188257.50453330326841,91.48677800011013536.930076609191808,55.23166088621283655.805733357942742,79.55043667850760938.954769073377065,44.84712424246760156.901214702247074,80.20752313968276356.868900661384046,83.1427497920434634.33312470421609,55.72348926054391459.04974121466681,77.63418251167786457.788223993230673,99.05141484174826954.282328705967409,79.12064627468002751.088719898979143,69.58889785111847550.282836348230731,69.51050331149438944.211741752090113,73.68756431831728538.005488008060688,61.36690453724013132.940479942618296,67.17065576899511853.691639571070056,85.66820314500154268.76573426962166,114.8538712339139446.230966498310252,90.12357206996742368.319360818255362,97.91982103524284850.030174340312143,81.53699078301502849.239765342753763,72.11183246961566350.039575939875988,85.2334232567348.149858891028863,66.22495788805463225.128484647772304,53.454394214850524

本内容不代表本网观点和政治立场,如有侵犯你的权益请联系我们处理。
网友评论
网友评论仅供其表达个人看法,并不表明网站立场。