LeNet-5是一个较简单的卷积神经网络。下图显示了其结构:输入的二维图像,先经过两次卷积层到池化层,再经过全连接层,最后使用softmax分类作为输出层。(灰度图像)
需要知道卷积后的矩阵尺寸大小计算公式
model.py
import torch.nn as nn
import torch.nn.functional as Fclass LeNet(nn.Module):def __init__(self):super(LeNet, self).__init__()self.conv1 = nn.Conv2d(3, 16, 5)self.pool1 = nn.MaxPool2d(2, 2)self.conv2 = nn.Conv2d(16, 32, 5)self.pool2 = nn.MaxPool2d(2, 2)self.fc1=nn.Linear(32*5*5,120)self.fc2=nn.Linear(120,84)self.fc3=nn.Linear(84,10)def forward(self,x):x=F.relu(self.conv1(x)) #input(3,32,32) output(16,28,28)x=self.pool1(x) #output(16,14,14)x=F.relu(self.conv2(x)) #output(32,10,10)x=self.pool2(x) #output(32,5,5)x=x.view(-1,32*5*5) #output(32*5*5)x=F.relu(self.fc1(x)) #output(120)x=F.relu(self.fc2(x)) #output(84)x=self.fc3(x) #output(10)return x
trian.py
import torch
import torchvision
import torch.nn as nn
from model import LeNet
import torch.optim as optim
import torchvision.transforms as transformstransform=transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))])#50000张训练图片
train_set=torchvision.datasets.CIFAR10(root='./data',train=True,download=False,transform=transform)train_loader=torch.utils.data.DataLoader(train_set,batch_size=36,shuffle=False,num_workers=0)#10000张验证图片
val_set=torchvision.datasets.CIFAR10(root='./data',train=False,download=False,transform=transform)val_loader=torch.utils.data.DataLoader(val_set,batch_size=10000,shuffle=False,num_workers=0)#转化成为迭代器
val_data_iter=iter(val_loader)
#获取数据测试图像以及标签
val_image,val_label=val_data_iter.next()classes = ('plane', 'car', 'bird', 'cat','deer', 'dog', 'frog', 'horse', 'ship', 'truck')net=LeNet()
loss_function=nn.CrossEntropyLoss()
optimizer=optim.Adam(net.parameters(),lr=0.001)for epoch in range(5): # 多次遍历数据集running_loss=0.0 #累加损失for step,data in enumerate(train_loader,start=0):#get the inputs:data is a list of [inputs,labels]inputs,labels=data#参数梯度清零optimizer.zero_grad()#forward + backward + optimizeoutputs=net(inputs)loss=loss_function(outputs,labels)loss.backward()#updateoptimizer.step()running_loss+=loss.item()if step % 500 ==499:#不要计算误差损失梯度with torch.no_grad():outputs=net(val_image) #[batch,10]predict_y=torch.max(outputs,dim=1)[1]accuracy=(predict_y == val_label).sum().item() / val_label.size(0)print('[%d, %5d] train_loss: %.3f test_accuracy: %.3f' %(epoch + 1, step + 1, running_loss / 500, accuracy))running_loss = 0.0print('Finished Training')#生成模型权重文件
save_path='./Lenet.pth'
torch.save(net.state_dict(),save_path)
训练数据集放在当前的data文件夹下,采用的CIFAR10 dataset,会生成权重文件Lenet.pth文件,然后在网上找一张飞机的图片进行预测。
predict.py
import torch
import torchvision.transforms as transforms
from PIL import Image
from model import LeNettransform = transforms.Compose(#图像缩放到32×32形状[transforms.Resize((32,32)),#转化为Tensortransforms.ToTensor(),#标准化处理transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))])classes = ('plane', 'car', 'bird', 'cat','deer', 'dog', 'frog', 'horse', 'ship', 'truck')net=LeNet()
#载入权重文件
net.load_state_dict(torch.load('Lenet.pth'))#载入图像
im=Image.open('1.jpg')
#图像预处理
im=transform(im) #[C,H,W]
#在最前面增加一个新的维度batch
im=torch.unsqueeze(im,dim=0) #[N,C,H,W]with torch.no_grad():outputs=net(im)predict=torch.max(outputs,dim=1)[1].data.numpy()# predict = torch.softmax(outputs,dim=1)
print(classes[int(predict)])
# print(predict)
我是看了B站的一个up主才复现的这个网络,觉得up主讲的很好,推荐一波
视频地址为:https://www.bilibili.com/video/BV187411T7Ye