目录
softmax从零开始实现
softmax使用pytorch实现
softmax从零开始实现
1.导入包
import torch
import torchvision
import numpy as np
import sys
sys.path.append("..") # 为了导入上层目录的d2lzh_pytorch
import d2lzh_pytorch as d2lprint(torch.__version__)
print(torchvision.__version__)
2.获取和读取数据
batch_size = 256
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)
3.初始化模型参数
num_inputs = 784
num_outputs = 10W = torch.tensor(np.random.normal(0, 0.01, (num_inputs, num_ outputs)), dtype=torch.float)
b = torch.zeros(num_outputs, dtype=torch.float)W.requires_grad_(requires_grad=True)
b.requires_grad_(requires_grad=True)
4.实现softmax运算
对矩阵中同一列( dim=0 )或同一行( dim=1 )的元素求和, 并在结果中保留行和列这两个维度( keepdim=True )。
为了表达样本预测各个输出的概率,softmax运算会先通过 exp 函数对每个元素做指数运算,再对 exp 矩阵同行元素求和,最后令矩阵每行各元素与该行元素之和相除。这样一来,最终得到的矩阵每行元素和为1且非负。因此,该矩阵每行都是合法的概率分布。softmax运算的输出矩阵中的任意一行元素代表了一个样本在各个输出类别上的预测概率。
def softmax(X):X_exp = X.exp()partition = X_exp.sum(dim=1, keepdim=True)return X_exp / partition # 这里应用了广播机制
5.定义模型
def net(X):return softmax(torch.mm(X.view((-1, num_inputs)), W) + b)
6.定义损失函数
def cross_entropy(y_hat, y):return - torch.log(y_hat.gather(1, y.view(-1, 1)))
7.计算分类准确率
def accuracy(y_hat, y): return (y_hat.argmax(dim=1) == y).float().mean().item()
def evaluate_accuracy(data_iter, net):acc_sum, n = 0.0, 0for X, y in data_iter:acc_sum += (net(X).argmax(dim=1) == y).float().sum().item()n += y.shape[0]return acc_sum / nprint(evaluate_accuracy(test_iter, net))
8.训练模型
num_epochs, lr = 5, 0.1# 本函数已保存在d2lzh_pytorch包中方便以后使用
def train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size,params=None, lr=None, optimizer=None):for epoch in range(num_epochs):train_l_sum, train_acc_sum, n = 0.0, 0.0, 0for X, y in train_iter:y_hat = net(X)l = loss(y_hat, y).sum()# 梯度清零if optimizer is not None:optimizer.zero_grad()elif params is not None and params[0].grad is not None:for param in params:param.grad.data.zero_()l.backward()if optimizer is None:d2l.sgd(params, lr, batch_size)else:optimizer.step() # “softmax回归的简洁实现”一节将用到train_l_sum += l.item()train_acc_sum += (y_hat.argmax(dim=1) == y).sum().item()n += y.shape[0]test_acc = evaluate_accuracy(test_iter, net)print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f'% (epoch + 1, train_l_sum / n, train_acc_sum / n, test_acc))train_ch3(net, train_iter, test_iter, cross_entropy, num_epochs, batch_size, [W, b], lr)
9.预测
X, y = iter(test_iter).next()true_labels = d2l.get_fashion_mnist_labels(y.numpy())
pred_labels = d2l.get_fashion_mnist_labels(net(X).argmax(dim=1).numpy())
titles = [true + '\n' + pred for true, pred in zip(true_labels, pred_labels)]d2l.show_fashion_mnist(X[0:9], titles[0:9])
softmax使用pytorch实现
1.导入包
import torch
from torch import nn
from torch.nn import init
import numpy as np
import sys
sys.path.append("..")
import d2lzh_pytorch as d2lprint(torch.__version__)
2.获取和读取数据
使用Fashion-MNIST数据集和上一节中设置的批量大小。
batch_size = 256
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)
3.定义和初始化模型
softmax回归的输出层是一个全连接层,所以我们用一个线性模块就可以了。因为前面我们数据返回的每个batch样本 x 的形状为(batch_size, 1, 28, 28), 所以我们要先用view() 将 x 的形状转换成(batch_size, 784)才送入全连接层。
num_inputs = 784
num_outputs = 10# class LinearNet(nn.Module):
# def __init__(self, num_inputs, num_outputs):
# super(LinearNet, self).__init__()
# self.linear = nn.Linear(num_inputs, num_outputs)
# def forward(self, x): # x shape: (batch, 1, 28, 28)
# y = self.linear(x.view(x.shape[0], -1))
# return y# net = LinearNet(num_inputs, num_outputs)class FlattenLayer(nn.Module):def __init__(self):super(FlattenLayer, self).__init__()def forward(self, x): # x shape: (batch, *, *, ...)return x.view(x.shape[0], -1)from collections import OrderedDict
net = nn.Sequential(# FlattenLayer(),# nn.Linear(num_inputs, num_outputs)OrderedDict([('flatten', FlattenLayer()),('linear', nn.Linear(num_inputs, num_outputs))]))init.normal_(net.linear.weight, mean=0, std=0.01)
init.constant_(net.linear.bias, val=0)
4.softmax和交叉熵损失函数
PyTorch提供了一个包括softmax运算和交叉熵损失计算的函数。
loss = nn.CrossEntropyLoss()
5.定义优化算法
使用学习率为0.1的小批量随机梯度下降作为优化算法。
optimizer = torch.optim.SGD(net.parameters(), lr=0.1)
6.训练模型
num_epochs = 5
d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, None, None, optimizer)