导入需要的库
import torch
from torch import nn
from torch.nn import init
import numpy as np
import sys
import torchvision
from torch.utils import data
from torchvision import transforms
获取和读取数据
def get_dataloader_workers():
if sys.platform.startswith('win'):
return 0
else:
return 4
def load_data_fashion_mnist(batch_size, resize=None):
# 下载Fashion-MNIST数据集,然后将其加载到内存中,返回训练集和测试集的数据迭代器
mnist_train = torchvision.datasets.FashionMNIST(root="data", train=True, transform=transforms.ToTensor(), download=True)
mnist_test = torchvision.datasets.FashionMNIST(root="data", train=False, transform=transforms.ToTensor(), download=True)
return (data.DataLoader(mnist_train, batch_size, shuffle=True, num_workers=get_dataloader_workers()),
data.DataLoader(mnist_test, batch_size, shuffle=False, num_workers=get_dataloader_workers()))
batch_size = 256
train_iter, test_iter = load_data_fashion_mnist(batch_size)
定义模型参数
输入个数为784,输出个数为10。实验中,设超参数隐藏单元个数为256。
num_inputs, num_outputs, num_hiddens = 784, 10, 256
W1 = torch.tensor(np.random.normal(0, 0.01, (num_inputs, num_hiddens)), dtype=torch.float) # 正态分布初始化,均值为0,标准差为0.01,形状为(784, 256)
b1 = torch.zeros(num_hiddens, dtype=torch.float)
W2 = torch.tensor(np.random.normal(0, 0.01, (num_hiddens, num_outputs)), dtype=torch.float) # 正态分布初始化,均值为0,标准差为0.01,形状为(256, 10)
b2 = torch.zeros(num_outputs, dtype=torch.float)
params = [W1, b1, W2, b2] # 参数列表
for param in params:
param.requires_grad_(requires_grad=True) # 将参数的requires_grad属性设置为True,因为之前默认是False
定义激活函数
使用基础的max函数来实现ReLU,而非直接调用relu函数。
def relu(X):
return torch.max(input=X, other=torch.tensor(0.0))
定义模型
def net(X):
X = X.view((-1, num_inputs)) # X的形状: (batch_size, 1, 28, 28) => (batch_size, 784)
H = relu(torch.matmul(X, W1) + b1)
return torch.matmul(H, W2) + b2
定义损失函数
loss = torch.nn.CrossEntropyLoss()
训练模型
def evaluate_accuracy(data_iter, net):
acc_sum, n = 0.0, 0
for X, y in data_iter: # X是图像,y是标签,数量为batch_size
acc_sum += (net(X).argmax(dim=1) == y).float().sum().item() # net(X) 返回预测概率,argmax(dim=1)返回概率最大的类别,与标签y比较
n += y.shape[0] # y.shape[0]是y的行数,也就是batch_size
return acc_sum / n # 返回正确率
# 3.2.6 定义优化算法
def sgd(params,lr,batch_size): #定义优化算法,params:待优化参数,lr:学习率,batch_size:批量大小
for param in params:
param.data-=lr*param.grad/batch_size #注意这里更改param时用的param.data
def train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, params=None, lr=None, optimizer=None):
# net: 网络,即线性回归模型
# train_iter: 训练数据集,test_iter: 测试数据集
# loss: 损失函数,num_epochs: 训练的轮数,batch_size: 批量大小
# params: 模型参数,即W和b,lr: 学习率,optimizer: 优化算法,如SGD
for epoch in range(num_epochs): # 训练模型一共需要num_epochs个迭代周期
train_l_sum, train_acc_sum, n = 0.0, 0.0, 0 # 训练损失总和,训练准确度总和,样本数
for X, y in train_iter: # X是图像,y是标签,数量为batch_size
y_hat = net(X) # 预测概率
l = loss(y_hat, y).sum() # 计算损失,sum()将所有loss值相加得到一个标量
# 梯度清零
if optimizer is not None: # 使用PyTorch内置的优化器和损失函数
optimizer.zero_grad() # 梯度清零
elif params is not None and params[0].grad is not None: # 使用自定义的优化器和损失函数
for param in params:
param.grad.data.zero_()
l.backward() # 计算梯度
if optimizer is None: # 使用PyTorch内置的优化器和损失函数
sgd(params, lr, batch_size) # 更新模型参数
else:
optimizer.step() # “softmax回归的简洁实现”一节将用到
train_l_sum += l.item() # 将当前批次loss值相加得到一个总的loss值
train_acc_sum += (y_hat.argmax(dim=1) == y).sum().item() # 计算总准确率
n += y.shape[0] # y.shape[0]是y的行数,也就是batch_size,计算总样本数
test_acc = evaluate_accuracy(test_iter, net) # 计算测试集准确率
print('周期 %d, 损失 %.4f, 数据集准确率 %.3f, 测试集准确率 %.3f'
% (epoch + 1, train_l_sum / n, train_acc_sum / n, test_acc))
num_epochs, lr = 5, 100
train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, params, lr)