AdaBoost-code_综合

在这里插入图片描述
link
创建一个Adaboost_Demonstration类：
初始化参数：
学习率，权重=1/N

class Adaboost_Demonstration:def __init__(self, X, y, learning_rate=1.):"""输入的X为N*2矩阵, y为一维向量, y的值只能取1或-1:param X: 数据点:param y: 数据点标记"""self.X = Xself.y = y# 给每个弱分类器一个衰减, 避免过拟合self.learning_rate = learning_rate# 样本的个数self.num_samples = len(self.X)# 初始化数据样本的权重self.sample_weight = np.full(self.num_samples, 1 / self.num_samples)# python list用来存储所有的弱分类器对象self.classifiers = []# 储存在每一步的错误率self.errors_list = []# 定义弱分类器, 这里我们直接调用sklearn的决策树, max_depth=1代表着这是一个一层决策树, 也就是决策树桩self.alphas = []

将构建的弱分类器对象，和弱分类器的权重append到列表
预测：每次预测的时候，将所有弱分类器的预测结果*权重，
结合策略：再对结果用sign取符号或者取平均

	def predict(self, data=None, labels=None, reduction="sign"):"""预测数据点的分类:param reduction: "sign"对弱分类的线性加权组合取符号, "mean"取平均"""if data is None:data = self.Xlabels = self.y# 计算弱分类器线性加权组合的结果predictions = np.zeros([len(data)]).astype("float")for classifier, alpha in zip(self.classifiers, self.alphas):predictions += alpha * classifier.predict(data)# 对结果取符号if reduction == "sign":predictions = np.sign(predictions)# 对结果求均值elif reduction == "mean":predictions /= len(self.classifiers)# 如果可以的话获取f1 scoreif labels is not None and reduction == "sign":f1 = f1_score(predictions, labels)return predictions, f1else:return predictions

定义弱分类器，直接调用sklearn的决策树，实例化对象之后预测结果

再计算错误率error_rate：预测10个结果，猜错了4个，则错误率为0.4

error_rate = np.mean(np.average((predictions != self.y), weights=self.sample_weight))

计算alpha，即当前弱分类器的权重：

alpha = self.learning_rate * (np.log((1 - error_rate) / error_rate)) / 2

在这里插入图片描述

计算W_t+1，即下一个弱分类器的权重：

# 计算t+1的权重
self.sample_weight *= np.exp(-alpha * self.y * predictions)
# 归一化, 归一化因子为Z: sum(self.sample_weight)
self.sample_weight /= np.sum(self.sample_weight)

在这里插入图片描述
其中归一化因子Z：

	#定义决策树def __next__(self, reduction="mean", plot=True, plot_mode="2d"):classifier = DecisionTreeClassifier(max_depth=1)# 用弱分类器拟合数据classifier.fit(self.X, self.y, sample_weight=self.sample_weight)# 得到弱分类器对数据的推断, 也就是h(x)predictions = classifier.predict(self.X)# 计算错误率error_rate = np.mean(np.average((predictions != self.y), weights=self.sample_weight))# 计算alphaalpha = self.learning_rate * (np.log((1 - error_rate) / error_rate)) / 2# 计算t+1的权重self.sample_weight *= np.exp(-alpha * self.y * predictions)# 归一化, 归一化因子为Z: sum(self.sample_weight)self.sample_weight /= np.sum(self.sample_weight)# 记录当前弱分类器对象self.classifiers.append(classifier)# 记录当前弱分类器权重self.alphas.append(alpha)# 计算f1 score_, f1 = self.predict()# 画图if plot:return self.contour_plot(title="adaboost step " + str(len(self.classifiers)) + " f1 score: {:.2f}".format(f1), mode=plot_mode)else:return f1

if __name__ == '__main__':# 测试X, y = make_moons(n_samples=300, noise=0.2, random_state=3)y[np.where(y == 0)] = -1model = Adaboost_Demonstration(X, y)for i in range(100):model.__next__(plot=False)model.contour_plot(mode="3d")

在这里插入图片描述