sigmoidGradient.m
g = sigmoid(z) .* (1 - sigmoid(z)); %y*(1-y)
randInitializeWeights.m
epsilon_init = 0.11; %初始化值
W = rand(L_out, 1 + L_in) * 2 * epsilon_init - epsilon_init; %完成随机初始化
nnCostFunction.m
%前向传播和代价函数
h = eye(num_labels); %单位矩阵
y = h(y,:);
a1 = [ones(m,1) X]; %第一层输入
z2 = a1*Theta1';
a2 = sigmoid(z2); %第二层输入
n = size(a2,1); %获取神经元数
a2 = [ones(n,1) a2];
a3 = sigmoid(a2*Theta2'); %第三层输入
J = sum(sum(-y.*log(a3) - (1-y).*log(1-a3)))/m; %计算代价函数%正则化代价函数
regularized = lambda/(2*m)*(sum(sum(Theta1(:,2:end).^2))+sum(sum(Theta2(:,2:end).^2)));
%计算正则项
J = J + regularized; %加上正则项%反向传播
delta3 = a3 - y;
delta2 = delta3*Theta2;
delta2 = delta2(:,2:end);
delta2 = delta2 .* sigmoidGradient(z2);%初始化
delta_1 = zeros(size(Theta1));
delta_2 = zeros(size(Theta2));delta_1 = delta_1 + delta2'*a1;
delta_2 = delta_2 + delta3'*a2;%计算正则化后的梯度
Theta1_grad = ((1/m)*delta_1) + ((lambda/m)*Theta1);
Theta2_grad = ((1/m)*delta_2) + ((lambda/m)*Theta2);%常数项不进行正则化
Theta1_grad(:,1) = Theta1_grad(:,1) - ((lambda/m)*(Theta1(:,1)));
Theta2_grad(:,1) = Theta2_grad(:,1) - ((lambda/m)*(Theta2(:,1)));