avatar

莫烦python pytorch学习

pytorch和numpy

import torch
import numpy as np

# numpy和torch的转换
np_data = np.arange(6).reshape((2, 3))# 2行3列
torch_data = torch.from_numpy(np_data)
tensor2array = torch_data.numpy()
print(
'\nnumpy array:', np_data, # [[0 1 2], [3 4 5]]
'\ntorch tensor:', torch_data, # 0 1 2 \n 3 4 5 [torch.LongTensor of size 2x3]
'\ntensor to array:', tensor2array, # [[0 1 2], [3 4 5]]
)

如果是nparray,直接使用from_numpy(data)即可
如果是普通数组,使用torch.FloatTensor(data)

# 数学运算

# abs 计算绝对值
data=[-1,-2,1,2]
tensor=torch.FloatTensor(data)#转换成32位浮点tensor
# 或
# data=[-1,-2,1,2]
# tensor=torch.from_numpy(np.array(data))

print(
'\nabs',
'\nnumpy: ', np.abs(data), # [1 2 1 2]
'\ntorch: ', torch.abs(tensor) # [1 2 1 2]
)

# sin 三角函数 sin
print(
'\nsin',
'\nnumpy: ', np.sin(data), # [-0.84147098 -0.90929743 0.84147098 0.90929743]
'\ntorch: ', torch.sin(tensor) # [-0.8415 -0.9093 0.8415 0.9093]
)

# mean 均值
print(
'\nmean',
'\nnumpy: ', np.mean(data), # 0.0
'\ntorch: ', torch.mean(tensor) # 0.0
)
# matrix multiplication 矩阵点乘
data = [[1,2], [3,4]]
tensor = torch.FloatTensor(data) # 转换成32位浮点 tensor
# correct method
print(
'\nmatrix multiplication (matmul)',
'\nnumpy: ', np.matmul(data, data), # [[7, 10], [15, 22]]
'\ntorch: ', torch.mm(tensor, tensor) # [[7, 10], [15, 22]]
)

# !!!! 下面是错误的方法 !!!!
data = np.array(data)
print(
'\nmatrix multiplication (dot)',
'\nnumpy: ', data.dot(data), # [[7, 10], [15, 22]] 在numpy 中可行
'\ntorch: ', tensor.dot(tensor) # torch 会转换成 [1,2,3,4].dot([1,2,3,4) = 30.0
# 该写法在我的python里直接报错了
)

variable

在 Torch 中的 Variable 就是一个存放会变化的值的地理位置. 里面的值会不停的变化. 就像一个裝鸡蛋的篮子, 鸡蛋数会不停变动. 那谁是里面的鸡蛋呢, 自然就是 Torch 的 Tensor 咯. 如果用一个 Variable 进行计算, 那返回的也是一个同类型的 Variable.

import torch
from torch.autograd import Variable

tensor = torch.FloatTensor([[1, 2], [3, 4]])
variable = Variable(tensor, requires_grad=True) # 需要生成梯度信息
print(tensor)
print(variable)

tensor([[1., 2.],
[3., 4.]])
tensor([[1., 2.],
[3., 4.]], requires_grad=True)

print(tensor*tensor) # x^2

tensor([[ 1., 4.],
[ 9., 16.]])

t_out = torch.mean(tensor*tensor)       # x^2
v_out = torch.mean(variable*variable) # x^2
print(t_out)
print(v_out) # 7.5

tensor(7.5000)
tensor(7.5000, grad_fn=)

Variable 计算时, 它在背景幕布后面一步步默默地搭建着一个庞大的系统, 叫做计算图, computational graph. 这个图是用来将所有的计算步骤 (节点) 都连接起来, 最后进行误差反向传递的时候, 一次性将所有 variable 里面的修改幅度 (梯度) 都计算出来, 而 tensor 就没有这个能力.

v_out.backward()    # 模拟 v_out 的误差反向传递

# 下面两步看不懂没关系, 只要知道 Variable 是计算图的一部分, 可以用来传递误差就好.
# v_out = 1/4 * sum(variable*variable) 这是计算图中的 v_out 计算步骤
# 针对于 v_out 的梯度就是, d(v_out)/d(variable) = 1/4*2*variable = variable/2

print(variable.grad) # 初始 Variable 的梯度
'''
0.5000 1.0000
1.5000 2.0000
'''
print(variable)     #  Variable 形式
"""
Variable containing:
1 2
3 4
[torch.FloatTensor of size 2x2]
"""

print(variable.data) # tensor 形式
"""
1 2
3 4
[torch.FloatTensor of size 2x2]
"""

print(variable.data.numpy()) # numpy 形式
"""
[[ 1. 2.]
[ 3. 4.]]
"""

激励函数

梯度爆炸和梯度消失

CNN-> relu

rnn-> relu or tanh

非线性函数

import torch
import torch.nn.functional as F # 激励函数都在这
from torch.autograd import Variable
import matplotlib.pyplot as plt

x = torch.linspace(-5, 5, 200) # [-5,5] 200个均分
x = Variable(x)

x_np = x.data.numpy() # 画图

# 激励函数

y_relu = F.relu(x).data.numpy()
y_sigmoid = F.sigmoid(x).data.numpy()
y_tanh = F.tanh(x).data.numpy()
y_softplus = F.softplus(x).data.numpy()

plt.figure(1, figsize=(8, 6))
plt.subplot(221)
plt.plot(x_np, y_relu, c='red', label='relu')
plt.ylim((-1, 5))
plt.legend(loc='best')

plt.subplot(222)
plt.plot(x_np, y_sigmoid, c='red', label='sigmoid')
plt.ylim((-0.2, 1.2))
plt.legend(loc='best')

plt.subplot(223)
plt.plot(x_np, y_tanh, c='red', label='tanh')
plt.ylim((-1.2, 1.2))
plt.legend(loc='best')

plt.subplot(224)
plt.plot(x_np, y_softplus, c='red', label='softplus')
plt.ylim((-0.2, 6))
plt.legend(loc='best')

plt.show()
image-20210327185811769

关系拟合

import torch
import torch.nn.functional as F # 激励函数都在这

# 把一维数据编程二维
x = torch.unsqueeze(torch.linspace(-1, 1, 100), dim=1) # x data (tensor), shape=(100, 1)
# 增加噪点
y = x.pow(2) + 0.2 * torch.rand(x.size()) # noisy y data (tensor), shape=(100, 1)


class Net(torch.nn.Module):
def __init__(self, n_features, n_hidden, n_output):
super(Net, self).__init__()
# 输入个数和隐藏层神经元个数
self.hidden = torch.nn.Linear(n_features, n_hidden) # 隐藏层
self.predict = torch.nn.Linear(n_hidden, n_output) # 输出层,如果只输出y,则output=1

def forward(self, x):
x = F.relu(self.hidden(x)) # 对隐藏层使用激励函数
x = self.predict(x) # 输出值
return x


net = Net(n_features=1, n_hidden=10, n_output=1)
print(net)

# SGD优化器
optimizer = torch.optim.SGD(net.parameters(), lr=0.2)
loss_func = torch.nn.MSELoss() # 均方差,用于回归问题
for t in range(100): # 100步训练
prediction = net(x) # 输出预测值

loss = loss_func(prediction, y) # 计算误差和真实的差

optimizer.zero_grad() # 将所有参数的梯度降为0
loss.backward() # 反向传递,计算参数更新值
optimizer.step() # 以lr优化
# 可以用于画图查看神经网络调整流程

分类

import torch
import torch.nn.functional as F # 激励函数都在这
from torch.autograd import Variable
import matplotlib.pyplot as plt

# 假数据
# 100行2列 1
n_data = torch.ones(100, 2) # 数据的基本形态
# 正态分布(means每个输出元素的均值,std标准差,out=None可选的输出张量)
x0 = torch.normal(2 * n_data, 1) # 类型0 x data (tensor), shape=(100, 2)

# 一维行向量
y0 = torch.zeros(200) # 类型0 y data (tensor), shape=(100, )
x1 = torch.normal(-2 * n_data, 1) # 类型1 x data (tensor), shape=(100, 1)
y1 = torch.ones(200) # 类型1 y data (tensor), shape=(100, )

# 注意 x, y 数据的数据形式是一定要像下面一样 (torch.cat 是在合并数据)
# 有0,按行合并,没有按列
x = torch.cat((x0, x1), 0).type(torch.LongTensor) # FloatTensor = 32-bit floating
y = torch.cat((y0, y1), ).type(torch.LongTensor) # LongTensor = 64-bit integer

# y=0,y=1两个颜色
plt.scatter(x.data.numpy()[:, 0], x.data.numpy()[:, 1], c=y.data.numpy(), s=100, lw=0, cmap='RdYlGn')
plt.show()

class Net(torch.nn.Module):
def __init__(self, n_features, n_hidden, n_output):
super(Net, self).__init__()
# 输入个数和隐藏层神经元个数
self.hidden = torch.nn.Linear(n_features, n_hidden) # 隐藏层
self.predict = torch.nn.Linear(n_hidden, n_output) # 输出层,如果只输出y,则output=1

def forward(self, x):
x = F.relu(self.hidden(x)) # 对隐藏层使用激励函数
x = self.predict(x) # 输出值
return x
class Net(torch.nn.Module):
def __init__(self, n_features, n_hidden, n_output):
super(Net, self).__init__()
# 输入个数和隐藏层神经元个数
self.hidden = torch.nn.Linear(n_features, n_hidden) # 隐藏层
self.predict = torch.nn.Linear(n_hidden, n_output) # 输出层,如果只输出y,则output=1

def forward(self, x):
x = F.relu(self.hidden(x)) # 对隐藏层使用激励函数
x = self.predict(x) # 输出值
return x


net = Net(n_features=2, n_hidden=10, n_output=2) # 分成2类,output=2
# 如果输出是[0,1]则分类为1
# 如果输出为[1,0]则分类为0
print(net)

# SGD优化器
optimizer = torch.optim.SGD(net.parameters(), lr=0.05)
loss_func = torch.nn.CrossEntropyLoss() # 用于分类问题,softmax
# 例 输出[0.1,0.2,0.7] 概率,根据这种形式计算误差
for t in range(100): # 100步训练
out = net(x) # 输出预测值

loss = loss_func(out, y) # 计算误差和真实的差

optimizer.zero_grad() # 将所有参数的梯度降为0
loss.backward() # 反向传递,计算参数更新值
optimizer.step() # 以lr优化

快速搭建法

net2=torch.nn.Sequential(
torch.nn.Linear(2,10),
torch.nn.ReLU(), # 类,搭建效果和class相同
torch.nn.Linear(10,2)
)

Net(
(hidden): Linear(in_features=2, out_features=10, bias=True)
(predict): Linear(in_features=10, out_features=2, bias=True)
)
Sequential(
(0): Linear(in_features=2, out_features=10, bias=True)
(1): ReLU()
(2): Linear(in_features=10, out_features=2, bias=True)
)

保存提取

torch.manual_seed(1)    # reproducible,使每次的初始化固定

# 假数据
x = torch.unsqueeze(torch.linspace(-1, 1, 100), dim=1) # x data (tensor), shape=(100, 1)
y = x.pow(2) + 0.2*torch.rand(x.size()) # noisy y data (tensor), shape=(100, 1)

def save():
# 建网络
net1 = torch.nn.Sequential(
torch.nn.Linear(1, 10),
torch.nn.ReLU(),
torch.nn.Linear(10, 1)
)
optimizer = torch.optim.SGD(net1.parameters(), lr=0.5)
loss_func = torch.nn.MSELoss()

# 训练
for t in range(100):
prediction = net1(x)
loss = loss_func(prediction, y)
optimizer.zero_grad()
loss.backward()
optimizer.step()
torch.save(net1, 'net.pkl') # 保存整个网络
torch.save(net1.state_dict(), 'net_params.pkl') # 只保存网络中的参数 (速度快, 占内存少)


def restore_net():
# restore entire net1 to net2
net2 = torch.load('net.pkl')
prediction = net2(x)
def restore_params():
# 新建 net3
net3 = torch.nn.Sequential(
torch.nn.Linear(1, 10),
torch.nn.ReLU(),
torch.nn.Linear(10, 1)
)

# 将保存的参数复制到 net3
net3.load_state_dict(torch.load('net_params.pkl'))
prediction = net3(x)

# 保存 net1 (1. 整个网络, 2. 只有参数)
save()

# 提取整个网络
restore_net()

# 提取网络参数, 复制到新网络
restore_params()

批训练

dataloader可以迭代地处理数据

import torch
import torch.utils.data as Data
torch.manual_seed(1) # reproducible

BATCH_SIZE = 8 # 批训练的数据个数

x = torch.linspace(1, 10, 10) # x data (torch tensor)
y = torch.linspace(10, 1, 10) # y data (torch tensor)

# 先转换成 torch 能识别的 Dataset
# 训练数据data_tensor,计算误差target_tensor
torch_dataset = Data.TensorDataset(x, y)

# 把 dataset 放入 DataLoader
loader = Data.DataLoader(
dataset=torch_dataset, # torch TensorDataset format
batch_size=BATCH_SIZE, # mini batch size
shuffle=True, # 要不要打乱数据 (打乱比较好)
num_workers=0, # 多线程来读数据,0为默认值
)

for epoch in range(3): # 训练所有!整套!数据 3 次
# 可以设置是否打乱数据 shuffle
# enumerate 索引
for step, (batch_x, batch_y) in enumerate(loader): # 每一步 loader 释放一小批数据用来学习
# 假设这里就是你训练的地方...

# 打出来一些数据
print('Epoch: ', epoch, '| Step: ', step, '| batch x: ',
batch_x.numpy(), '| batch y: ', batch_y.numpy())

Epoch: 0 | Step: 0 | batch x: [ 5. 7. 10. 3. 4. 2. 1. 8.] | batch y: [ 6. 4. 1. 8. 7. 9. 10. 3.]
Epoch: 0 | Step: 1 | batch x: [9. 6.] | batch y: [2. 5.]
Epoch: 1 | Step: 0 | batch x: [ 4. 6. 7. 10. 8. 5. 3. 2.] | batch y: [7. 5. 4. 1. 3. 6. 8. 9.]
Epoch: 1 | Step: 1 | batch x: [1. 9.] | batch y: [10. 2.]
Epoch: 2 | Step: 0 | batch x: [ 4. 2. 5. 6. 10. 3. 9. 1.] | batch y: [ 7. 9. 6. 5. 1. 8. 2. 10.]
Epoch: 2 | Step: 1 | batch x: [8. 7.] | batch y: [3. 4.]

优化器optimizer

import torch
import torch.utils.data as Data
import torch.nn.functional as F
import matplotlib.pyplot as plt

torch.manual_seed(1) # reproducible

LR = 0.01
BATCH_SIZE = 32
EPOCH = 12

# fake dataset
x = torch.unsqueeze(torch.linspace(-1, 1, 1000), dim=1)
y = x.pow(2) + 0.1 * torch.normal(torch.zeros(*x.size()))

# plot dataset
plt.scatter(x.numpy(), y.numpy())
plt.show()

# 使用上节内容提到的 data loader
torch_dataset = Data.TensorDataset(x, y)
loader = Data.DataLoader(dataset=torch_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=0)


# 默认的 network 形式
class Net(torch.nn.Module):
def __init__(self):
super(Net, self).__init__()
self.hidden = torch.nn.Linear(1, 20) # hidden layer
self.predict = torch.nn.Linear(20, 1) # output layer

def forward(self, x):
x = F.relu(self.hidden(x)) # activation function for hidden layer
x = self.predict(x) # linear output
return x


# 为每个优化器创建一个 net
net_SGD = Net()
net_Momentum = Net()
net_RMSprop = Net()
net_Adam = Net()
nets = [net_SGD, net_Momentum, net_RMSprop, net_Adam]

# different optimizers
opt_SGD = torch.optim.SGD(net_SGD.parameters(), lr=LR)
opt_Momentum = torch.optim.SGD(net_Momentum.parameters(), lr=LR, momentum=0.8)
opt_RMSprop = torch.optim.RMSprop(net_RMSprop.parameters(), lr=LR, alpha=0.9)
opt_Adam = torch.optim.Adam(net_Adam.parameters(), lr=LR, betas=(0.9, 0.99))
optimizers = [opt_SGD, opt_Momentum, opt_RMSprop, opt_Adam]

loss_func = torch.nn.MSELoss()
losses_his = [[], [], [], []] # 记录 training 时不同神经网络的 loss

for epoch in range(EPOCH): # EPOCH=12
print('Epoch: ', epoch)
for step, (b_x, b_y) in enumerate(loader):

# 对每个优化器, 优化属于他的神经网络
for net, opt, l_his in zip(nets, optimizers, losses_his): # 后者都是list形式
output = net(b_x) # get output for every net
loss = loss_func(output, b_y) # compute loss for every net
opt.zero_grad() # clear gradients for next train
loss.backward() # backpropagation, compute gradients
opt.step() # apply gradients
l_his.append(loss.data.numpy()) # loss recoder

CNN

收集一小块的图像信息,进行总结

mnist

mnist数据集放在root下的\MNIST\processed文件夹下

import torch
import torch.nn as nn
import torch.utils.data as Data
import torchvision # 数据库模块
import matplotlib.pyplot as plt

# Hyper Parameters
EPOCH = 1 # 训练整批数据多少次, 为了节约时间, 我们只训练一次
BATCH_SIZE = 50
LR = 0.001 # 学习率
DOWNLOAD_MNIST = False # 如果你已经下载好了mnist数据就写上 False

train_data_ = torchvision.datasets.MNIST(
root='./mnist',
train=True,
transform=torchvision.transforms.ToTensor(), # np array(pixel)修改为tensor (0,255)->(0,1)
download=DOWNLOAD_MNIST
)
# 可以打印出数字图片
plt.imshow(train_data_.train_data[1].numpy(), cmap="gray")
plt.show()
print(train_data_)
print(train_data_.train_data)

数据处理

import torch
import torch.nn as nn
import torch.utils.data as Data
import torchvision # 数据库模块
import matplotlib.pyplot as plt

# Hyper Parameters
EPOCH = 1 # 训练整批数据多少次, 为了节约时间, 我们只训练一次
BATCH_SIZE = 50
LR = 0.001 # 学习率
DOWNLOAD_MNIST = False # 如果你已经下载好了mnist数据就写上 False

train_data_ = torchvision.datasets.MNIST(
root='./mnist',
train=True,
transform=torchvision.transforms.ToTensor(), # np array(pixel)修改为tensor (0,255)->(0,1)
download=DOWNLOAD_MNIST
)

test_data_ = torchvision.datasets.MNIST(root='./mnist', train=False)
train_loader = Data.DataLoader(dataset=train_data_, batch_size=BATCH_SIZE, shuffle=True)

# 为简化训练,只训练2000组
# 将整数转换成小数,再除以255
test_x = torch.unsqueeze(test_data_.test_data, dim=1).type(torch.FloatTensor)[:2000] / 255
test_y = test_data_.test_labels[:2000]

CNN模型搭建

# CNN模型
class CNN(nn.Module):
def __init__(self):
super(CNN, self).__init__()
self.conv1 = nn.Sequential(
# (1,28,28)
nn.Conv2d( # 卷积层,filter(深度)☞在这一区域内提取出的特征个数
in_channels=1, # 输入层高度,黑白1,彩色3
out_channels=16, # filter个数,每块提取出16个特征
kernel_size=5, # filter宽和高都是5pixel 5x5,每次扫描25个像素点
stride=1, # 每步跳1个pixel,步长
padding=2, # 边框,提取边缘像素点特征
# 如果想要 con2d 出来的图片长宽没有变化, padding=(kernel_size-1)/2 当 stride=1
# output shape (16, 28, 28)
),
nn.ReLU(), # 非线性激活层 # (16, 28, 28)
nn.MaxPool2d( # 池化层,筛选重要的特征
kernel_size=2, # 长和宽为2的pooling,可以理解为提取2x2空间种的最大值,用于压缩长宽,但是高度不变
) # (16, 14, 14)
)
self.conv2 = nn.Sequential( # (16, 14, 14)
nn.Conv2d(16, # input, conv1 outchannel=16
32,
5, # kernel_size
1,
2, ), # (32, 14, 14)
nn.ReLU(),
nn.MaxPool2d(2), # (32, 7, 7)
)

self.out = nn.Linear(32 * 7 * 7, 10) # 输入,输出

def forward(self, x): # x表示输入的training数据
x = self.conv1(x)
x = self.conv2(x) # (batch,32,7,7)
x = x.view(x.size(0), -1) # 展平 -1把数据编程32*7*7 (batch,32*7*7)
output = self.out(x)
return output

cnn=CNN()
print(cnn)

CNN(
(conv1): Sequential(
(0): Conv2d(1, 16, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
(1): ReLU()
(2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)
(conv2): Sequential(
(0): Conv2d(16, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
(1): ReLU()
(2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)
(out): Linear(in_features=1568, out_features=10, bias=True)
)

训练

optimizer = torch.optim.Adam(cnn.parameters(), lr=LR)   # optimize all cnn parameters
loss_func = nn.CrossEntropyLoss() # the target label is not one-hotted

# training and testing
for epoch in range(EPOCH):
for step, (b_x, b_y) in enumerate(train_loader): # 分配 batch data, normalize x when iterate train_loader
output = cnn(b_x) # cnn output
loss = loss_func(output, b_y) # cross entropy loss
optimizer.zero_grad() # clear gradients for this training step
loss.backward() # backpropagation, compute gradients
optimizer.step() # apply gradients

if step%50==0:
test_output=cnn(test_x)
pred_y=torch.max(test_output,1)[1].data.squeeze()
accuracy=float(sum(pred_y==test_y))/test_y.size(0)
print('Epoch:',epoch,'| train loss %.4f' % loss.data,'| test accuracy: %.2f'% accuracy)

Epoch: 0 | train loss 2.2979 | test accuracy: 0.13
Epoch: 0 | train loss 0.2706 | test accuracy: 0.84
Epoch: 0 | train loss 0.2919 | test accuracy: 0.89
Epoch: 0 | train loss 0.1879 | test accuracy: 0.92

……

输出

test_output = cnn(test_x[:10])
"""
torch.max用于分类问题,选取最大的概率
torch.max(input,dim)
input是softmax函数输出的一个tensor
dim是max函数索引的维度0/1,0是每列的最大值,1是每行的最大值
函数会返回两个tensor,第一个tensor是每行的最大值;第二个tensor是每行最大值的索引。
"""
pred_y = torch.max(test_output, 1)[1].data.numpy().squeeze()
print(pred_y, 'prediction number')
print(test_y[:10].numpy(), 'real number')

RNN

数据间的顺序关联

分类

import torch
from torch import nn
import torchvision.datasets as dsets
import torchvision.transforms as transforms
import matplotlib.pyplot as plt

torch.manual_seed(1) # reproducible

# Hyper Parameters
EPOCH = 1 # 训练整批数据多少次, 为了节约时间, 我们只训练一次
BATCH_SIZE = 64
TIME_STEP = 28 # rnn 时间步数 / 图片高度 每28步中的一步读取一行信息
INPUT_SIZE = 28 # rnn 每步输入值 / 图片每行像素 一行信息包括28个像素点
LR = 0.01 # learning rate
DOWNLOAD_MNIST = True # 如果你已经下载好了mnist数据就写上 Fasle

train_data = dsets.MNIST(root='./mnist', train=True, transform=transforms.ToTensor(), download=DOWNLOAD_MNIST)
test_data = dsets.MNIST(root='./mnist', train=False)
train_loader = torch.utils.data.DataLoader(dataset=train_data, batch_size=BATCH_SIZE, shuffle=True)

test_x = test_data.data.type(torch.FloatTensor)[:2000] / 255
test_y = test_data.targets[:2000]



class RNN(nn.Module):
def __init__(self):
super(RNN, self).__init__()

self.rnn = nn.LSTM( # 使用rnn准确度不高
input_size=INPUT_SIZE, # 图片每层的像素点
hidden_size=64,
batch_first=True, # batch_size是否在第一维度
)

self.out = nn.Linear(64, 10)

def forward(self, x):
# x shape (batch, time_step, input_size)
# r_out shape (batch, time_step, output_size)
# h_n shape (n_layers, batch, hidden_size) LSTM 有两个 hidden states, h_n 是分线, h_c 是主线
# h_c shape (n_layers, batch, hidden_size)
# None 是否有第一个hidden state
r_out, (h_n, h_c) = self.rnn(x, None)
# 28个output
# 选最后一个output(读完所有数据)
out = self.out(r_out[:, -1, :]) # (batch,time step,input)
# print(r_out)
return out
# 这里还有点不明白,需要补充别的视频看一下

rnn = RNN()


optimizer = torch.optim.Adam(rnn.parameters(), lr=LR) # optimize all parameters
loss_func = nn.CrossEntropyLoss() # the target label is not one-hotted

for epoch in range(EPOCH):
for step, (x, b_y) in enumerate(train_loader):
b_x = x.view(-1, 28, 28) # reshape x to (batch,time_step,input_size)
output = rnn(b_x) # rnn output
loss = loss_func(output, b_y) # cross entropy loss
optimizer.zero_grad() # clear gradients for this training step
loss.backward() # backpropagation, compute gradients
optimizer.step() # apply gradients

# 每50步
if step % 50 == 0:
test_output = rnn(test_x)
#torch.max(input,dim) input是softmax函数输出的一个tensor
# dim是max函数索引的维度0/1,0是每列的最大值,1是每行的最大值
# 函数会返回两个tensor,第一个tensor是每行的最大值;第二个tensor是每行最大值的索引
pred_y = torch.max(test_output, 1)[1].data.squeeze()
# torch.size()返回(行,列)
accuracy = float(sum(pred_y == test_y)) / test_y.size(0)
print('Epoch:', epoch, '| train loss %.4f' % loss.data, '| test accuracy: %.2f' % accuracy)

test_output = rnn(test_x[:10].view(-1, 28, 28))
pred_y = torch.max(test_output, 1)[1].data.numpy().squeeze()
print(pred_y, 'prediction number')
print(test_y[:10], 'real number')

RNN(
(rnn): LSTM(28, 64, batch_first=True)
(out): Linear(in_features=64, out_features=10, bias=True)
)

Epoch: 0 | train loss 2.2883 | test accuracy: 0.10
Epoch: 0 | train loss 0.8795 | test accuracy: 0.57
Epoch: 0 | train loss 1.0830 | test accuracy: 0.76

回归

用sin预测cos

import torch
from torch import nn
import numpy as np
import matplotlib.pyplot as plt

# torch.manual_seed(1) # reproducible

# Hyper Parameters
TIME_STEP = 10 # rnn time step
INPUT_SIZE = 1 # rnn input size
LR = 0.02 # learning rate

# show data
steps = np.linspace(0, np.pi * 2, 100, dtype=np.float32) # float32 for converting torch FloatTensor
x_np = np.sin(steps)
y_np = np.cos(steps)
plt.plot(steps, y_np, 'r-', label='target (cos)')
plt.plot(steps, x_np, 'b-', label='input (sin)')
plt.legend(loc='best')
plt.show()


class RNN(nn.Module):
def __init__(self):
super(RNN, self).__init__()

self.rnn = nn.RNN(
input_size=INPUT_SIZE,# 1 用sin的数据预测cos
hidden_size=32, # rnn hidden unit(time step)
num_layers=1, # number of rnn layer
batch_first=True, # input & output will has batch size as 1s dimension. e.g. (batch, time_step, input_size)
)
self.out = nn.Linear(32, 1)

def forward(self, x, h_state):
# x (batch, time_step, input_size)
# h_state (n_layers, batch, hidden_size)
# r_out (batch, time_step, hidden_size)
r_out, h_state = self.rnn(x, h_state) # 将状态当作下一步的输入
# r_outb包含所有的out但是h——state只包含最后一个

outs = [] # save all predictions
for time_step in range(r_out.size(1)): # calculate output for each time step
outs.append(self.out(r_out[:, time_step, :]))

# 将list变成tensor
return torch.stack(outs, dim=1), h_state

# instead, for simplicity, you can replace above codes by follows
# r_out = r_out.view(-1, 32)
# outs = self.out(r_out)
# outs = outs.view(-1, TIME_STEP, 1)
# return outs, h_state

# or even simpler, since nn.Linear can accept inputs of any dimension
# and returns outputs with same dimension except for the last
# outs = self.out(r_out)
# return outs


rnn = RNN()
print(rnn)

optimizer = torch.optim.Adam(rnn.parameters(), lr=LR) # optimize all cnn parameters
loss_func = nn.MSELoss()

h_state = None # for initial hidden state

plt.figure(1, figsize=(12, 5))
plt.ion() # continuously plot

for step in range(100):
start, end = step * np.pi, (step + 1) * np.pi # time range
# use sin predicts cos
steps = np.linspace(start, end, TIME_STEP, dtype=np.float32,
endpoint=False) # float32 for converting torch FloatTensor
x_np = np.sin(steps)
y_np = np.cos(steps)

x = torch.from_numpy(x_np[np.newaxis, :, np.newaxis]) # shape (batch, time_step, input_size)
y = torch.from_numpy(y_np[np.newaxis, :, np.newaxis])

prediction, h_state = rnn(x, h_state) # rnn output
# !! next step is important !!
h_state = h_state.data # repack the hidden state, break the connection from last iteration

loss = loss_func(prediction, y) # calculate loss
optimizer.zero_grad() # clear gradients for this training step
loss.backward() # backpropagation, compute gradients
optimizer.step() # apply gradients

# plotting
plt.plot(steps, y_np.flatten(), 'r-')
plt.plot(steps, prediction.data.numpy().flatten(), 'b-')
plt.draw()
plt.pause(0.05)

plt.ioff()
plt.show()

LSTM

解决RNN的梯度消失和梯度爆炸

增加输入控制、输出控制和忘记控制

感觉RNN部分没听太明白,还需要补充其它资料再学一下

Author: Michelle19l
Link: https://gitee.com/michelle19l/michelle19l/2021/03/31/python/pytorch/morvan/
Copyright Notice: All articles in this blog are licensed under CC BY-NC-SA 4.0 unless stating additionally.
Donate
  • 微信
    微信
  • 支付寶
    支付寶