package com.*.prophet.algorithm.sugar;import com.*.prophet.algorithm.core.KMeans;/*** @Author:langjf* @Date: 2021/9/14* @Desc K——Menaus 判断 位置相距多少米为最优K*/
public class LocationsKMeansHelper {private final static double PI = 3.14159265358979323; // 圆周率private final static double R = 6371229; // 地球的半径public static double getDistance(double lng1, double lat1, double lng2, double lat2) {double x, y, distance;x = (lng2 - lng1) * PI * R* Math.cos(((lat1 + lat2) / 2) * PI / 180) / 180;y = (lat2 - lat1) * PI * R / 180;distance = Math.hypot(x, y);return distance;}//以两个位置之间的距离寻找最优解public static void KMeans(float[][] datas, double range) {//默认簇节点为4com.gwm.prophet.algorithm.core.KMeans s = new KMeans(datas);s.cluster();while (!isClassify(s, range)) {s.setResent(s.getClassCount() + 1, s);}s.printConsole();}//判断是否最优Kpublic static boolean isClassify(KMeans s, double range) {boolean isClassify = true;for (int i = 0; i < s.getInstanceNumber(); i++) {int j = Integer.valueOf(String.valueOf(s.getData()[i][2]).substring(0, 1));float[][] data = s.getData();double distance = getDistance(data[i][0], data[i][1], data[j][0], data[j][1]);if (range < distance) {isClassify = false;break;}}return isClassify;}public static void main(String[] args) {float[][] data = new float[10][3];data[0] = new float[]{(float) 99.75346, (float) 24.09003, 0};data[1] = new float[]{(float) 106.75346, (float) 25.09003, 0};data[2] = new float[]{(float) 107.75346, (float) 27.09003, 0};data[3] = new float[]{(float) 106.75346, (float) 27.09003, 0};data[4] = new float[]{(float) 105.75346, (float) 28.09003, 0};data[5] = new float[]{(float) 104.75346, (float) 25.09003, 0};data[6] = new float[]{(float) 104.75346, (float) 25.09003, 0};data[7] = new float[]{(float) 104.75346, (float) 25.09003, 0};data[8] = new float[]{(float) 104.75346, (float) 21.09003, 0};data[8] = new float[]{(float) 104.75346, (float) 21.09003, 0};KMeans(data, 500);}
}
package com.gwm.prophet.algorithm.core;import lombok.Data;import java.io.BufferedWriter;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;/*** @Author:langjf* @Date: 2021/9/1* @Desc*/class PPoint {public float x;public float y;public int flag = -1;public PPoint() {}public PPoint(float x, float y) {this.x = x;this.y = y;}
}@Data
public class KMeans {//聚类的数目int ClassCount =4;//样本数目(测试集)int InstanceNumber = 9;//样本属性数目(测试)int FieldCount = 2;//设置异常点阈值参数(每一类初始的最小数目为InstanceNumber/ClassCount^t)final static double t = 2.0;//存放数据的矩阵private float[][] data;//每个类的均值中心private float[][] classData;//噪声集合索引private ArrayList<Integer> noises;//存放每次变换结果的矩阵private ArrayList<ArrayList<Integer>> resultList;//数据归1最大值float[] max = new float[FieldCount];public KMeans(float[][] data) {this.data = data;this.classData = new float[ClassCount][FieldCount];this.InstanceNumber=data.length;noises = new ArrayList<Integer>();resultList = new ArrayList<ArrayList<Integer>>(ClassCount);}public KMeans() {//最后一位用来储存结果classData = new float[ClassCount][FieldCount];data = new float[InstanceNumber][FieldCount+1];noises = new ArrayList<Integer>();resultList = new ArrayList<ArrayList<Integer>>(ClassCount);data[0]= new float[]{(float) 99.75346, (float)24.09003,0};data[1]= new float[]{(float) 106.75346,(float)25.09003,0};data[2]= new float[]{(float) 107.75346,(float)27.09003,0};data[3]= new float[]{(float) 106.75346,(float)27.09003,0};data[4]= new float[]{(float) 105.75346,(float)28.09003,0};data[5]= new float[]{(float) 104.75346,(float)25.09003,0};data[6]= new float[]{(float) 104.75346,(float)25.09003,0};data[7]= new float[]{(float) 104.75346,(float)25.09003,0};data[8]= new float[]{(float) 104.75346,(float)21.09003,0};}public static void main(String[] args) {KMeans s= new KMeans();s.cluster();s.printConsole();}public void setResent(int classCount, KMeans s){s.ClassCount = classCount;s.data = data;s.classData = new float[ClassCount][FieldCount];s.InstanceNumber=data.length;s.noises = new ArrayList<Integer>();s.resultList = new ArrayList<ArrayList<Integer>>(ClassCount);s.cluster();}public void cluster() {//数据归一化normalize();//标记是否需要重新找初始点boolean needFindInitials = true;//找初始点的迭代次数int times = 1;//找初始点while (needFindInitials) {needFindInitials = false;resultList.clear();System.out.println("寻找第" + (times++) + "次");//一次找初始点的尝试和根据初始点的分类findInitials();firstClassify();for (int i = 0; i < resultList.size(); i++) {if (resultList.get(i).size() < InstanceNumber / Math.pow(ClassCount, t)) {needFindInitials = true;noises.addAll(resultList.get(i));}}}Adjust();}private void normalize() {// 计算数据每个维度最大值maxfor (int i = 0; i < InstanceNumber; i++) {for (int j = 0; j < FieldCount; j++) {if (data[i][j] > max[j]) {max[j] = data[i][j];}}}// 每个维度归一化值=原始值/maxfor (int i = 0; i < InstanceNumber; i++) {for (int j = 0; j < FieldCount; j++) {data[i][j] = data[i][j] / max[j];}}}/*** 寻找初始聚类中心*/private void findInitials() {int i, j, a, b;i = j = a = b = 0;float maxDis = 0;int alreadyCls = 2;// 选取距离最远的两个点a,b作为聚类中心点ArrayList<Integer> initials = new ArrayList<Integer>();for (; i < InstanceNumber; i++) {// 噪声点不参与计算if (noises.contains(i)) {continue;}j = i + 1;for (; j < InstanceNumber; j++) {// 噪声点不参与计算if (noises.contains(j)) {continue;}float newDis = calDis(data[i], data[j]);if (maxDis < newDis) {a = i;b = j;maxDis = newDis;}}}// initials添加初始聚类中心点序号a,binitials.add(a);initials.add(b);// classData添加聚类中心点data[a],data[b]classData[0] = data[a];classData[1] = data[b];// 新增两个聚类,并添加聚类成员ArrayList<Integer> resultOne = new ArrayList<Integer>();ArrayList<Integer> resultTwo = new ArrayList<Integer>();resultOne.add(a);resultTwo.add(b);resultList.add(resultOne);resultList.add(resultTwo);// 1、计算剩下每个点x与其他点的最小距离l,并记录Map<x,l>// 2、选取Map<x,l>中的最大l,并以对应的点x作为新的聚类中心while (alreadyCls < ClassCount) {i = j = 0;float maxMin = 0;int newClass = -1;for (; i < InstanceNumber; i++) {float min = 0;float newMin = 0;if (initials.contains(i)) {continue;}if (noises.contains(i)) {continue;}for (j = 0; j < alreadyCls; j++) {newMin = calDis(data[i], classData[j]);if (min == 0 || newMin < min) {min = newMin;}}if (min > maxMin) {maxMin = min;newClass = i;}}// initials添加新的聚类中心点序号newClassinitials.add(newClass);// classData添加新的聚类中心点data[newClass]classData[alreadyCls++] = data[newClass];// 新增一个聚类,并添加成员ArrayList<Integer> rslt = new ArrayList<Integer>();rslt.add(newClass);resultList.add(rslt);}}/*** 首次聚类分配* 点x到哪个聚类中心点最近,则划分到哪个聚类*/public void firstClassify() {for (int i = 0; i < InstanceNumber; i++) {float min = 0f;int clsId = -1;for (int j = 0; j < classData.length; j++) {// 欧式距离float newMin = calDis(classData[j], data[i]);if (clsId == -1 || newMin < min) {clsId = j;min = newMin;}}if (!resultList.get(clsId).contains(i)) {resultList.get(clsId).add(i);}}}// 迭代分类,直到各个类的数据不再变化public void Adjust() {// 记录是否发生变化boolean change = true;// 循环的次数int times = 1;while (change) {// 复位change = false;System.out.println("迭代" + (times++) + "次");// 重新计算每个类的均值for (int i = 0; i < ClassCount; i++) {// 原有的数据ArrayList<Integer> cls = resultList.get(i);// 新的均值float[] newMean = new float[FieldCount];// 计算均值for (Integer index : cls) {for (int j = 0; j < FieldCount; j++)newMean[j] += data[index][j];}for (int j = 0; j < FieldCount; j++) {newMean[j] /= cls.size();}if (!compareMean(newMean, classData[i])) {classData[i] = newMean;change = true;}}// 清空之前的数据for (ArrayList<Integer> cls : resultList) {cls.clear();}// 重新分配for (int i = 0; i < InstanceNumber; i++) {float min = 0f;int clsId = -1;for (int j = 0; j < classData.length; j++) {float newMin = calDis(classData[j], data[i]);if (clsId == -1 || newMin < min) {clsId = j;min = newMin;}}data[i][FieldCount] = clsId;resultList.get(clsId).add(i);}}}/*** 计算a样本和b样本的欧式距离作为不相似度** @param aVector 样本a* @param bVector 样本b* @return 欧式距离长度*/private float calDis(float[] aVector, float[] bVector) {double dis = 0;int i = 0;/* 最后一个数据在训练集中为结果,所以不考虑 */for (; i < aVector.length; i++)dis += Math.pow(bVector[i] - aVector[i], 2);dis = Math.pow(dis, 0.5);return (float) dis;}/*** 判断两个均值向量是否相等** @param a 向量a* @param b 向量b* @return*/private boolean compareMean(float[] a, float[] b) {if (a.length != b.length)return false;for (int i = 0; i < a.length; i++) {if (a[i] > 0 && b[i] > 0 && a[i] != b[i]) {return false;}}return true;}/*** 打印结果*/public void printConsole(){for (int i = 0; i < InstanceNumber; i++) {System.out.println(String.valueOf(data[i][FieldCount]).substring(0, 1));}// 统计每类的数目,打印到控制台for (int i = 0; i < ClassCount; i++) {System.out.println("第" + (i + 1) + "类数目: "+ resultList.get(i).size());}}}