一、原理
KL(Karhunen-Loeve)变换,也称为Hotelling变换、特征向量变换(Eigenvector-Based Transform)、主成分分析(PCA Principle Component Analysis)等。
KL变换方法是应用最广泛的一种特征提取方法之一,它是一种利用图像的统计性质的变换。在信号处理、模式识别、数字图像处理等领域已经得到了广泛的应用。其基本思想是提取出空间原始数据中的主要特征,减少数据冗余,使得数据在一个低维的特征空间被处理,同时保持原始数据的绝大部分的信息,从而解决数据空间维数过高的瓶颈问题。常用在数据压缩、特征提取等方面。
图中水平轴和垂直轴表示数据集的自然坐标轴。标号为1和2的旋转坐标轴是应用这个数据集的主变量分析产生的结果。从图可以看出数据集投影到1号轴上抓住了数据的主要特征,即具有双峰(即在它的结构上有两个聚类)的特点。
二、实现流程
输入:多波段的遥感影像,输出:变换后的各成分
三、代码实现
3.1 方法一:基于Eigen库与GDAL库
//eigen实现主成分分析
void featurenormalize(MatrixXd &X)
{
//计算每一维度均值MatrixXd meanval = X.colwise().mean();RowVectorXd meanvecRow = meanval;//样本均值化为0X.rowwise() -= meanvecRow;
}
void computeCov(MatrixXd &X, MatrixXd &C)
{
//计算协方差矩阵C = XTX / n-1;C = X.adjoint() * X;C = C.array() / (X.rows() - 1);
}
void computeEig(MatrixXd &C, MatrixXd &vec, MatrixXd &val)
{
//计算特征值和特征向量,使用selfadjont按照对阵矩阵的算法去计算,可以让产生的vec和val按照有序排列(默认从大到小)SelfAdjointEigenSolver<MatrixXd> eig(C);vec = eig.eigenvectors();val = eig.eigenvalues();
}
int computeDim(MatrixXd &val)
{
//输出信息量达到95%的前n主成分/*int dim;double sum = 0;for (int i = val.rows() - 1; i >= 0; --i){sum += val(i, 0);dim = i;if (sum / val.sum() >= 0.95)break;}return val.rows() - dim;*/return 7;//这里设置输出7个主成分
}void writePcaImg(const char* path, int width, int height, double *pBuff, double *adfGeo, const char *prj, int bandNum, int imageSize, int pcaInd)
{
GDALDriver *pDriver = GetGDALDriverManager()->GetDriverByName("GTiff"); //图像驱动char** ppszOptions = NULL;int depth = 8;//图像位深int dim = 1;//每个图像波段数,这里将每个主成分存储到一个单波段图像GDALDataset* dst = pDriver->Create(path, width, height, dim, GDT_Float64, ppszOptions);//创建图像if (dst == nullptr)printf("Can't Write Image!");dst->SetGeoTransform(adfGeo);//设置坐标dst->SetProjection(prj);//设置投影dst->RasterIO(GF_Write, 0, 0, width, height, &pBuff[(bandNum - pcaInd)*imageSize], width, height,GDT_Float64, dim, nullptr, dim*depth, width * dim * depth, depth);//写入图像GDALClose(dst);
}
int main(int argc, char *argv[])
{
//读取影像char* pszFilename = "G:/before.img";char *outPath = "G:/result/Eigen_PC";GDALDataset *poDataset;GDALAllRegister();poDataset = (GDALDataset *)GDALOpen(pszFilename, GA_ReadOnly);if (poDataset == NULL){
printf_s("read failed!\n");}else{
printf_s("read successful!\n");}double adfGeoTransform[6];if (poDataset->GetGeoTransform(adfGeoTransform) == CE_Failure)//读取坐标信息{
printf("获取参数失败");}const char *prj = poDataset->GetProjectionRef();//读取投影信息int iWidth = poDataset->GetRasterXSize();//图像宽度int iHeight = poDataset->GetRasterYSize();//图像高度int iBandCount = poDataset->GetRasterCount();//波段数int iImageSize = iWidth * iHeight;//图像像元数double *pBuff1 = new double[iImageSize*iBandCount];//开辟空间存储原始图像poDataset->RasterIO(GF_Read, 0, 0, iWidth, iHeight, pBuff1,iWidth, iHeight, GDT_Float64, iBandCount, 0, 0, 0, 0);//读取原始图像MatrixXd staMat = Map<MatrixXd>(pBuff1, iImageSize, iBandCount);//将图像读入eigen矩阵MatrixXd X(iImageSize, iBandCount), C(iBandCount, iBandCount);//按波段存储至X矩阵,构建协方差矩阵CMatrixXd vec, val;//构建特征向量、特征值矩阵vec、valX = MatrixXd(staMat);MatrixXd Y = X;//零均值化featurenormalize(X);//计算协方差computeCov(X, C);//计算特征值和特征向量computeEig(C, vec, val);cout << "特征值为:" << val << endl;cout << "特征向量为:" << vec << endl;//计算损失率,确定降低维数int dim = computeDim(val);//计算结果MatrixXd res = Y * vec.rightCols(dim);//验证结果featurenormalize(res);MatrixXd CY;computeCov(res, CY);MatrixXd vecY, valY;computeEig(CY, vecY, valY);/*cout << "主成分分析后的特征向量:"<<endl << vecY << endl;cout << "主成分分析后的特征值:" << endl<< valY << endl;*///将主成分分量存储至pBuff2double *pBuff2 = new double[iImageSize*iBandCount];for (int i = 0; i < dim; ++i){
for (int j = 0; j < iImageSize; ++j){
pBuff2[i*iImageSize + j] = res(j, i);}}//各个主成分写入图像(包含坐标及投影信息)for (int i = 0; i < iBandCount; i++){
char x[] = " ";strcpy(x, outPath);char dstPath[10] = {
};sprintf(dstPath, "%d.tif", i + 1);strcat(x, dstPath);writePcaImg(x, iWidth, iHeight, pBuff2, adfGeoTransform, prj, 7, iImageSize, i + 1);cout << "pca " << i + 1 << " complete" << endl;}cout << "pca complete!" << endl;cin.get();return 0;
}
3.2 方法二:基于openCV库与GDAL库
//图像灰度化函数
static Mat toGrayscale(InputArray _src) {
Mat src = _src.getMat();// only allow one channelif (src.channels() != 1) {
CV_Error(CV_StsBadArg, "Only Matrices with one channel are supported");}// create and return normalized imageMat dst;cv::normalize(_src, dst, 0, 255, NORM_MINMAX, CV_8UC1);return dst;
}int Cal_PCA(Mat data, float T, String path, Mat &Eigenvector)
{
Mat data_t = data.t(); PCA pca(data.t(), Mat(), CV_PCA_DATA_AS_ROW,T); //这个pca就已经算出来了Mat pca_tMean = pca.mean;//零均值化Mat data_tFloat;data_t.convertTo(data_tFloat, CV_32FC1); //类型转换for (int i = 0; i < data_t.cols; i++){
for (int j = 0; j < data_t.rows;j++){
data_tFloat.at<float>(j, i) -= pca_tMean.at<float>(0, i);}}//零均值化后的数组计算主成分PCA pca_tnew(data_tFloat, Mat(), CV_PCA_DATA_AS_ROW,T);Mat Eigenvalues = pca_tnew.eigenvalues.t(); //特征值Eigenvector = pca_tnew.eigenvectors; //特征向量float sum_Ei;for (int i = 0; i < Eigenvector.rows; i++){
sum_Ei = 0;for (int j = 0; j < Eigenvector.cols; j++){
sum_Ei += abs(Eigenvector.at<float>(i,j));}for (int j = 0; j < Eigenvector.cols; j++){
Eigenvector.at<float>(i,j) = Eigenvector.at<float>(i,j) / sum_Ei;}}cout <<"new_Eigenvector="<< Eigenvector << endl;cout << "Eigenvalues=" << Eigenvalues << endl;//根据需要达到的百分比设置成分个数double Eigenvalues_sum = sum(Eigenvalues).val[0]; //特征值的和,val是干嘛?回答:将一组字符型数据的数字部分转换成相应的数值型数据int Eigenvalues_len = Eigenvalues.cols;float flag_sum = 0;int i = 0;while (flag_sum <= T && i < Eigenvalues_len) // 计算达到要求的主成分数{
flag_sum = flag_sum + Eigenvalues.ptr<float>(0)[i] / Eigenvalues_sum; //从第几个开始就能达到所需要的比重i++;}//主要参数存入FileStorage f_pca(path, FileStorage::WRITE); f_pca << "num" << i;//f_pca << "coff" << coeff;f_pca << "Eigenvector" << Eigenvector;Eigenvalues.release(); //释放内存//coeff.release();f_pca.release();return 0;
}
/* data:输入数据path:数据保存路径Eigenvector:特征向量GDALDataset:数据集iHeight:图像高*/
void Output_PCA(Mat data, String path, Mat Eigenvector, GDALDataset *poDS, int iHeight, int iWidth, int nBands, int imageSize, double *adfGeo, const char *prj)
{
GDALDriver *pDriver = GetGDALDriverManager()->GetDriverByName("GTiff"); //图像驱动FileStorage f_pca(path, FileStorage::READ);int num;const char* sPath = "G:/result/a";//把数据读取出f_pca["num"] >> num;f_pca.release();cout << "Eigenvector=" << Eigenvector << endl;Mat data_float;data.convertTo(data_float, CV_32FC1);/*Mat Eigenvector_8U;Eigenvector.convertTo(Eigenvector_8U, CV_8U);*/Mat Y = Eigenvector * data_float;//进行波段循环,将矩阵形式表示的图像数据以行向量形式表示,如92*112的图像,表示为1*10304,for (int r = 1; r <= num; ++r){
GDALRasterBand *xBand = poDS->GetRasterBand(r);Mat ReCons;ReCons = Y.row(r - 1).reshape(data.channels(), iHeight); // 从行向量重塑为图像形状ReCons = toGrayscale(ReCons);switch (r){
case 1:imwrite("G:/result/Opencv_PC1.tif", ReCons);break;case 2:imwrite("G:/result/Opencv_PC2.tif", ReCons);break;case 3:imwrite("G:/result/Opencv_PC3.tif", ReCons);break;case 4:imwrite("G:/result/Opencv_PC4.tif", ReCons);break;case 5:imwrite("G:/result/Opencv_PC5.tif", ReCons);break;case 6:imwrite("G:/result/Opencv_PC6.tif", ReCons);break;case 7:imwrite("G:/result/Opencv_PC7.tif", ReCons);break;}cout << "PC" << r << "complete" << endl;}
}void UpdateRasterFile(const char* pszRasterFile ,const char* txtPath)
{
//使之支持中文路径CPLSetConfigOption("GDAL_FILENAME_IS_UTF8", "NO");//注册栅格驱动GDALAllRegister();//打开要更新的数据,第二个参数使用介个GDALDataset *poDS = (GDALDataset*)GDALOpen(pszRasterFile, GA_Update);if (poDS == NULL){
cout << "打开图像" << pszRasterFile << "失败";return;}double adfGeoTransform[6];if (poDS->GetGeoTransform(adfGeoTransform) == CE_Failure)//读取坐标信息{
printf("获取参数失败");}const char *prj = poDS->GetProjectionRef();//读取投影信息//获取图像大小int iWidth = poDS->GetRasterXSize();int iHeight = poDS->GetRasterYSize();//获取波段个数int nBands = poDS->GetRasterCount();//创建数组Mat data(nBands, iWidth*iHeight, CV_8U, Scalar::all(0));//进行波段循环,将矩阵形式表示的图像数据以行向量形式表示for (int r = 1; r <= nBands; ++r){
GDALRasterBand *xBand = poDS->GetRasterBand(r);unsigned char *pBuf = new unsigned char[iWidth * iHeight]; //动态指配数组xBand->RasterIO(GF_Read, 0, 0, iWidth, iHeight, pBuf, iWidth, iHeight, GDT_Byte, 0, 0);for (int i = 0; i < data.cols; i++){
data.at<uchar>(r - 1, i) = pBuf[i];}}Mat Eigenvector;Cal_PCA(data, 1, txtPath, Eigenvector); //第一步:进行主成分分析Output_PCA(data, txtPath, Eigenvector, poDS, iHeight, iWidth, nBands, iHeight*iWidth, adfGeoTransform, prj); //第二步:输出主成分图像system("pause");
}
int main()
{
const char *imagePath = "G:/before.img";const char *txtPath = "G:/b.txt";UpdateRasterFile(imagePath,txtPath);return 0;
}
四、处理结果
原始图像
结果图像
用Eigen和GDAL库处理结果(PC1 - PC6):
用openCV和GDAL库处理结果(PC1 - PC6):
五、结果分析
由生成的主成分分析后的图像可以看出第一二主成分所占的信息最多,依次递减,第七主成分所包含的信息最少,基本只剩下噪声。基本符合主成分分析的结果。
不同的库其调用方式不同,在调用openCV库时应该先将GDAL导入的图像矩阵转置,生成的才是正确的特征值,再将其特征向量单位化,才能得到与eigen库相同的结果,从这个方面来看,openCV库主成分分析函数具有更差的适用性。