pytorch和numpy

import torch
import numpy as np

# numpy和torch的转换
np_data = np.arange(6).reshape((2, 3))# 2行3列
torch_data = torch.from_numpy(np_data)
tensor2array = torch_data.numpy()
print(
    '\nnumpy array:', np_data,          # [[0 1 2], [3 4 5]]
    '\ntorch tensor:', torch_data,      #  0  1  2 \n 3  4  5    [torch.LongTensor of size 2x3]
    '\ntensor to array:', tensor2array, # [[0 1 2], [3 4 5]]
)

如果是nparray，直接使用from_numpy(data)即可
如果是普通数组，使用torch.FloatTensor(data)

# 数学运算

# abs 计算绝对值
data=[-1,-2,1,2]
tensor=torch.FloatTensor(data)#转换成32位浮点tensor
# 或
# data=[-1,-2,1,2]
# tensor=torch.from_numpy(np.array(data))

print(
    '\nabs',
    '\nnumpy: ', np.abs(data),          # [1 2 1 2]
    '\ntorch: ', torch.abs(tensor)      # [1 2 1 2]
)

# sin   三角函数 sin
print(
    '\nsin',
    '\nnumpy: ', np.sin(data),      # [-0.84147098 -0.90929743  0.84147098  0.90929743]
    '\ntorch: ', torch.sin(tensor)  # [-0.8415 -0.9093  0.8415  0.9093]
)

# mean  均值
print(
    '\nmean',
    '\nnumpy: ', np.mean(data),         # 0.0
    '\ntorch: ', torch.mean(tensor)     # 0.0
)

# matrix multiplication 矩阵点乘
data = [[1,2], [3,4]]
tensor = torch.FloatTensor(data)  # 转换成32位浮点 tensor
# correct method
print(
    '\nmatrix multiplication (matmul)',
    '\nnumpy: ', np.matmul(data, data),     # [[7, 10], [15, 22]]
    '\ntorch: ', torch.mm(tensor, tensor)   # [[7, 10], [15, 22]]
)

# !!!!  下面是错误的方法 !!!!
data = np.array(data)
print(
    '\nmatrix multiplication (dot)',
    '\nnumpy: ', data.dot(data),        # [[7, 10], [15, 22]] 在numpy 中可行
    '\ntorch: ', tensor.dot(tensor)     # torch 会转换成 [1,2,3,4].dot([1,2,3,4) = 30.0 
    # 该写法在我的python里直接报错了
)

variable

在 Torch 中的 Variable 就是一个存放会变化的值的地理位置. 里面的值会不停的变化. 就像一个裝鸡蛋的篮子, 鸡蛋数会不停变动. 那谁是里面的鸡蛋呢, 自然就是 Torch 的 Tensor 咯. 如果用一个 Variable 进行计算, 那返回的也是一个同类型的 Variable.

import torch
from torch.autograd import Variable

tensor = torch.FloatTensor([[1, 2], [3, 4]])
variable = Variable(tensor, requires_grad=True)  # 需要生成梯度信息
print(tensor)
print(variable)

tensor([[1., 2.],
[3., 4.]])
tensor([[1., 2.],
[3., 4.]], requires_grad=True)

print(tensor*tensor) # x^2

tensor([[ 1., 4.],
[ 9., 16.]])

t_out = torch.mean(tensor*tensor)       # x^2
v_out = torch.mean(variable*variable)   # x^2
print(t_out)
print(v_out)    # 7.5

tensor(7.5000)
tensor(7.5000, grad_fn=)

Variable 计算时, 它在背景幕布后面一步步默默地搭建着一个庞大的系统, 叫做计算图, computational graph. 这个图是用来将所有的计算步骤 (节点) 都连接起来, 最后进行误差反向传递的时候, 一次性将所有 variable 里面的修改幅度 (梯度) 都计算出来, 而 tensor 就没有这个能力.

v_out.backward()    # 模拟 v_out 的误差反向传递

# 下面两步看不懂没关系, 只要知道 Variable 是计算图的一部分, 可以用来传递误差就好.
# v_out = 1/4 * sum(variable*variable) 这是计算图中的 v_out 计算步骤
# 针对于 v_out 的梯度就是, d(v_out)/d(variable) = 1/4*2*variable = variable/2

print(variable.grad)    # 初始 Variable 的梯度
'''
 0.5000  1.0000
 1.5000  2.0000
'''

print(variable)     #  Variable 形式
"""
Variable containing:
 1  2
 3  4
[torch.FloatTensor of size 2x2]
"""

print(variable.data)    # tensor 形式
"""
 1  2
 3  4
[torch.FloatTensor of size 2x2]
"""

print(variable.data.numpy())    # numpy 形式
"""
[[ 1.  2.]
 [ 3.  4.]]
"""

激励函数

梯度爆炸和梯度消失

CNN-> relu

rnn-> relu or tanh

非线性函数

import torch
import torch.nn.functional as F  # 激励函数都在这
from torch.autograd import Variable
import matplotlib.pyplot as plt

x = torch.linspace(-5, 5, 200)  # [-5,5] 200个均分
x = Variable(x)

x_np = x.data.numpy()  # 画图

# 激励函数

y_relu = F.relu(x).data.numpy()
y_sigmoid = F.sigmoid(x).data.numpy()
y_tanh = F.tanh(x).data.numpy()
y_softplus = F.softplus(x).data.numpy()

plt.figure(1, figsize=(8, 6))
plt.subplot(221)
plt.plot(x_np, y_relu, c='red', label='relu')
plt.ylim((-1, 5))
plt.legend(loc='best')

plt.subplot(222)
plt.plot(x_np, y_sigmoid, c='red', label='sigmoid')
plt.ylim((-0.2, 1.2))
plt.legend(loc='best')

plt.subplot(223)
plt.plot(x_np, y_tanh, c='red', label='tanh')
plt.ylim((-1.2, 1.2))
plt.legend(loc='best')

plt.subplot(224)
plt.plot(x_np, y_softplus, c='red', label='softplus')
plt.ylim((-0.2, 6))
plt.legend(loc='best')

plt.show()

关系拟合

import torch
import torch.nn.functional as F  # 激励函数都在这

# 把一维数据编程二维
x = torch.unsqueeze(torch.linspace(-1, 1, 100), dim=1)  # x data (tensor), shape=(100, 1)
# 增加噪点
y = x.pow(2) + 0.2 * torch.rand(x.size())  # noisy y data (tensor), shape=(100, 1)


class Net(torch.nn.Module):
    def __init__(self, n_features, n_hidden, n_output):
        super(Net, self).__init__()
        # 输入个数和隐藏层神经元个数
        self.hidden = torch.nn.Linear(n_features, n_hidden)  # 隐藏层
        self.predict = torch.nn.Linear(n_hidden, n_output)  # 输出层，如果只输出y，则output=1

    def forward(self, x):
        x = F.relu(self.hidden(x))  # 对隐藏层使用激励函数
        x = self.predict(x)  # 输出值
        return x


net = Net(n_features=1, n_hidden=10, n_output=1)
print(net)

# SGD优化器
optimizer = torch.optim.SGD(net.parameters(), lr=0.2)
loss_func = torch.nn.MSELoss()  # 均方差，用于回归问题
for t in range(100):  # 100步训练
    prediction = net(x) # 输出预测值

    loss = loss_func(prediction, y)  # 计算误差和真实的差

    optimizer.zero_grad()  # 将所有参数的梯度降为0
    loss.backward()  # 反向传递，计算参数更新值
    optimizer.step()  # 以lr优化
	# 可以用于画图查看神经网络调整流程

分类

import torch
import torch.nn.functional as F  # 激励函数都在这
from torch.autograd import Variable
import matplotlib.pyplot as plt

# 假数据
# 100行2列 1
n_data = torch.ones(100, 2)  # 数据的基本形态
# 正态分布(means每个输出元素的均值,std标准差,out=None可选的输出张量)
x0 = torch.normal(2 * n_data, 1)  # 类型0 x data (tensor), shape=(100, 2)

# 一维行向量
y0 = torch.zeros(200)  # 类型0 y data (tensor), shape=(100, )
x1 = torch.normal(-2 * n_data, 1)  # 类型1 x data (tensor), shape=(100, 1)
y1 = torch.ones(200)  # 类型1 y data (tensor), shape=(100, )

# 注意 x, y 数据的数据形式是一定要像下面一样 (torch.cat 是在合并数据)
# 有0，按行合并，没有按列
x = torch.cat((x0, x1), 0).type(torch.LongTensor)  # FloatTensor = 32-bit floating
y = torch.cat((y0, y1), ).type(torch.LongTensor)  # LongTensor = 64-bit integer

# y=0,y=1两个颜色
plt.scatter(x.data.numpy()[:, 0], x.data.numpy()[:, 1], c=y.data.numpy(), s=100, lw=0, cmap='RdYlGn')
plt.show()

class Net(torch.nn.Module):
    def __init__(self, n_features, n_hidden, n_output):
        super(Net, self).__init__()
        # 输入个数和隐藏层神经元个数
        self.hidden = torch.nn.Linear(n_features, n_hidden)  # 隐藏层
        self.predict = torch.nn.Linear(n_hidden, n_output)  # 输出层，如果只输出y，则output=1

    def forward(self, x):
        x = F.relu(self.hidden(x))  # 对隐藏层使用激励函数
        x = self.predict(x)  # 输出值
        return x

class Net(torch.nn.Module):
    def __init__(self, n_features, n_hidden, n_output):
        super(Net, self).__init__()
        # 输入个数和隐藏层神经元个数
        self.hidden = torch.nn.Linear(n_features, n_hidden)  # 隐藏层
        self.predict = torch.nn.Linear(n_hidden, n_output)  # 输出层，如果只输出y，则output=1

    def forward(self, x):
        x = F.relu(self.hidden(x))  # 对隐藏层使用激励函数
        x = self.predict(x)  # 输出值
        return x


net = Net(n_features=2, n_hidden=10, n_output=2)  # 分成2类，output=2
# 如果输出是[0,1]则分类为1
# 如果输出为[1,0]则分类为0
print(net)

# SGD优化器
optimizer = torch.optim.SGD(net.parameters(), lr=0.05)
loss_func = torch.nn.CrossEntropyLoss()  # 用于分类问题，softmax
# 例 输出[0.1,0.2,0.7] 概率，根据这种形式计算误差
for t in range(100):  # 100步训练
    out = net(x)  # 输出预测值

    loss = loss_func(out, y)  # 计算误差和真实的差

    optimizer.zero_grad()  # 将所有参数的梯度降为0
    loss.backward()  # 反向传递，计算参数更新值
    optimizer.step()  # 以lr优化

快速搭建法

net2=torch.nn.Sequential(
    torch.nn.Linear(2,10),
    torch.nn.ReLU(), # 类，搭建效果和class相同
    torch.nn.Linear(10,2)
)

Net(
(hidden): Linear(in_features=2, out_features=10, bias=True)
(predict): Linear(in_features=10, out_features=2, bias=True)
)
Sequential(
(0): Linear(in_features=2, out_features=10, bias=True)
(1): ReLU()
(2): Linear(in_features=10, out_features=2, bias=True)
)

保存提取

torch.manual_seed(1)    # reproducible,使每次的初始化固定

# 假数据
x = torch.unsqueeze(torch.linspace(-1, 1, 100), dim=1)  # x data (tensor), shape=(100, 1)
y = x.pow(2) + 0.2*torch.rand(x.size())  # noisy y data (tensor), shape=(100, 1)

def save():
    # 建网络
    net1 = torch.nn.Sequential(
        torch.nn.Linear(1, 10),
        torch.nn.ReLU(),
        torch.nn.Linear(10, 1)
    )
    optimizer = torch.optim.SGD(net1.parameters(), lr=0.5)
    loss_func = torch.nn.MSELoss()

    # 训练
    for t in range(100):
        prediction = net1(x)
        loss = loss_func(prediction, y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    torch.save(net1, 'net.pkl')  # 保存整个网络
	torch.save(net1.state_dict(), 'net_params.pkl')   # 只保存网络中的参数 (速度快, 占内存少)

    
def restore_net():
    # restore entire net1 to net2
    net2 = torch.load('net.pkl')
    prediction = net2(x)
def restore_params():
    # 新建 net3
    net3 = torch.nn.Sequential(
        torch.nn.Linear(1, 10),
        torch.nn.ReLU(),
        torch.nn.Linear(10, 1)
    )

    # 将保存的参数复制到 net3
    net3.load_state_dict(torch.load('net_params.pkl'))
    prediction = net3(x)
    
# 保存 net1 (1. 整个网络, 2. 只有参数)
save()

# 提取整个网络
restore_net()

# 提取网络参数, 复制到新网络
restore_params()

批训练

dataloader可以迭代地处理数据

import torch
import torch.utils.data as Data
torch.manual_seed(1)    # reproducible

BATCH_SIZE = 8     # 批训练的数据个数

x = torch.linspace(1, 10, 10)       # x data (torch tensor)
y = torch.linspace(10, 1, 10)       # y data (torch tensor)

# 先转换成 torch 能识别的 Dataset
# 训练数据data_tensor,计算误差target_tensor
torch_dataset = Data.TensorDataset(x, y)

# 把 dataset 放入 DataLoader
loader = Data.DataLoader(
    dataset=torch_dataset,      # torch TensorDataset format
    batch_size=BATCH_SIZE,      # mini batch size
    shuffle=True,               # 要不要打乱数据 (打乱比较好)
    num_workers=0,              # 多线程来读数据，0为默认值
)

for epoch in range(3):   # 训练所有!整套!数据 3 次
    # 可以设置是否打乱数据 shuffle
    # enumerate 索引
    for step, (batch_x, batch_y) in enumerate(loader):  # 每一步 loader 释放一小批数据用来学习
        # 假设这里就是你训练的地方...

        # 打出来一些数据
        print('Epoch: ', epoch, '| Step: ', step, '| batch x: ',
              batch_x.numpy(), '| batch y: ', batch_y.numpy())

Epoch: 0 | Step: 0 | batch x: [ 5. 7. 10. 3. 4. 2. 1. 8.] | batch y: [ 6. 4. 1. 8. 7. 9. 10. 3.]
Epoch: 0 | Step: 1 | batch x: [9. 6.] | batch y: [2. 5.]
Epoch: 1 | Step: 0 | batch x: [ 4. 6. 7. 10. 8. 5. 3. 2.] | batch y: [7. 5. 4. 1. 3. 6. 8. 9.]
Epoch: 1 | Step: 1 | batch x: [1. 9.] | batch y: [10. 2.]
Epoch: 2 | Step: 0 | batch x: [ 4. 2. 5. 6. 10. 3. 9. 1.] | batch y: [ 7. 9. 6. 5. 1. 8. 2. 10.]
Epoch: 2 | Step: 1 | batch x: [8. 7.] | batch y: [3. 4.]

优化器optimizer

import torch
import torch.utils.data as Data
import torch.nn.functional as F
import matplotlib.pyplot as plt

torch.manual_seed(1)  # reproducible

LR = 0.01
BATCH_SIZE = 32
EPOCH = 12

# fake dataset
x = torch.unsqueeze(torch.linspace(-1, 1, 1000), dim=1)
y = x.pow(2) + 0.1 * torch.normal(torch.zeros(*x.size()))

# plot dataset
plt.scatter(x.numpy(), y.numpy())
plt.show()

# 使用上节内容提到的 data loader
torch_dataset = Data.TensorDataset(x, y)
loader = Data.DataLoader(dataset=torch_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=0)


# 默认的 network 形式
class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.hidden = torch.nn.Linear(1, 20)  # hidden layer
        self.predict = torch.nn.Linear(20, 1)  # output layer

    def forward(self, x):
        x = F.relu(self.hidden(x))  # activation function for hidden layer
        x = self.predict(x)  # linear output
        return x


# 为每个优化器创建一个 net
net_SGD = Net()
net_Momentum = Net()
net_RMSprop = Net()
net_Adam = Net()
nets = [net_SGD, net_Momentum, net_RMSprop, net_Adam]

# different optimizers
opt_SGD = torch.optim.SGD(net_SGD.parameters(), lr=LR)
opt_Momentum = torch.optim.SGD(net_Momentum.parameters(), lr=LR, momentum=0.8)
opt_RMSprop = torch.optim.RMSprop(net_RMSprop.parameters(), lr=LR, alpha=0.9)
opt_Adam = torch.optim.Adam(net_Adam.parameters(), lr=LR, betas=(0.9, 0.99))
optimizers = [opt_SGD, opt_Momentum, opt_RMSprop, opt_Adam]

loss_func = torch.nn.MSELoss()
losses_his = [[], [], [], []]  # 记录 training 时不同神经网络的 loss

for epoch in range(EPOCH):  # EPOCH=12
    print('Epoch: ', epoch)
    for step, (b_x, b_y) in enumerate(loader):

        # 对每个优化器, 优化属于他的神经网络
        for net, opt, l_his in zip(nets, optimizers, losses_his):  # 后者都是list形式
            output = net(b_x)  # get output for every net
            loss = loss_func(output, b_y)  # compute loss for every net
            opt.zero_grad()  # clear gradients for next train
            loss.backward()  # backpropagation, compute gradients
            opt.step()  # apply gradients
            l_his.append(loss.data.numpy())  # loss recoder

CNN

收集一小块的图像信息，进行总结

mnist

mnist数据集放在root下的\MNIST\processed文件夹下

import torch
import torch.nn as nn
import torch.utils.data as Data
import torchvision  # 数据库模块
import matplotlib.pyplot as plt

# Hyper Parameters
EPOCH = 1  # 训练整批数据多少次, 为了节约时间, 我们只训练一次
BATCH_SIZE = 50
LR = 0.001  # 学习率
DOWNLOAD_MNIST = False  # 如果你已经下载好了mnist数据就写上 False

train_data_ = torchvision.datasets.MNIST(
    root='./mnist',
    train=True,
    transform=torchvision.transforms.ToTensor(),  # np array(pixel)修改为tensor （0，255）->（0，1）
    download=DOWNLOAD_MNIST
)
# 可以打印出数字图片
plt.imshow(train_data_.train_data[1].numpy(), cmap="gray")
plt.show()
print(train_data_)
print(train_data_.train_data)

数据处理

import torch
import torch.nn as nn
import torch.utils.data as Data
import torchvision  # 数据库模块
import matplotlib.pyplot as plt

# Hyper Parameters
EPOCH = 1  # 训练整批数据多少次, 为了节约时间, 我们只训练一次
BATCH_SIZE = 50
LR = 0.001  # 学习率
DOWNLOAD_MNIST = False  # 如果你已经下载好了mnist数据就写上 False

train_data_ = torchvision.datasets.MNIST(
    root='./mnist',
    train=True,
    transform=torchvision.transforms.ToTensor(),  # np array(pixel)修改为tensor （0，255）->（0，1）
    download=DOWNLOAD_MNIST
)

test_data_ = torchvision.datasets.MNIST(root='./mnist', train=False)
train_loader = Data.DataLoader(dataset=train_data_, batch_size=BATCH_SIZE, shuffle=True)

# 为简化训练，只训练2000组
# 将整数转换成小数，再除以255
test_x = torch.unsqueeze(test_data_.test_data, dim=1).type(torch.FloatTensor)[:2000] / 255
test_y = test_data_.test_labels[:2000]

CNN模型搭建

# CNN模型
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Sequential(
            # (1,28,28)
            nn.Conv2d(  # 卷积层，filter（深度）☞在这一区域内提取出的特征个数
                in_channels=1,  # 输入层高度，黑白1，彩色3
                out_channels=16,  # filter个数，每块提取出16个特征
                kernel_size=5,  # filter宽和高都是5pixel 5x5，每次扫描25个像素点
                stride=1,  # 每步跳1个pixel，步长
                padding=2,  # 边框，提取边缘像素点特征
                # 如果想要 con2d 出来的图片长宽没有变化, padding=(kernel_size-1)/2 当 stride=1
                # output shape (16, 28, 28)
            ),
            nn.ReLU(),  # 非线性激活层 # (16, 28, 28)
            nn.MaxPool2d(  # 池化层，筛选重要的特征
                kernel_size=2,  # 长和宽为2的pooling，可以理解为提取2x2空间种的最大值，用于压缩长宽，但是高度不变
            )  # (16, 14, 14)
        )
        self.conv2 = nn.Sequential(  # (16, 14, 14)
            nn.Conv2d(16,  # input, conv1 outchannel=16
                      32,
                      5,  # kernel_size
                      1,
                      2, ),  # (32, 14, 14)
            nn.ReLU(),
            nn.MaxPool2d(2),  # (32, 7, 7)
        )

        self.out = nn.Linear(32 * 7 * 7, 10)  # 输入，输出

    def forward(self, x):  # x表示输入的training数据
        x = self.conv1(x)
        x = self.conv2(x)  # （batch，32，7，7）
        x = x.view(x.size(0), -1)  # 展平 -1把数据编程32*7*7  (batch,32*7*7)
        output = self.out(x)
        return output

cnn=CNN()
print(cnn)

CNN(
(conv1): Sequential(
(0): Conv2d(1, 16, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
(1): ReLU()
(2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)
(conv2): Sequential(
(0): Conv2d(16, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
(1): ReLU()
(2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)
(out): Linear(in_features=1568, out_features=10, bias=True)
)

训练

optimizer = torch.optim.Adam(cnn.parameters(), lr=LR)   # optimize all cnn parameters
loss_func = nn.CrossEntropyLoss()   # the target label is not one-hotted

# training and testing
for epoch in range(EPOCH):
    for step, (b_x, b_y) in enumerate(train_loader):   # 分配 batch data, normalize x when iterate train_loader
        output = cnn(b_x)               # cnn output
        loss = loss_func(output, b_y)   # cross entropy loss
        optimizer.zero_grad()           # clear gradients for this training step
        loss.backward()                 # backpropagation, compute gradients
        optimizer.step()                # apply gradients
        
        if step%50==0: 
            test_output=cnn(test_x)
            pred_y=torch.max(test_output,1)[1].data.squeeze()
            accuracy=float(sum(pred_y==test_y))/test_y.size(0)
            print('Epoch:',epoch,'| train loss %.4f' % loss.data,'| test accuracy: %.2f'% accuracy)

Epoch: 0 | train loss 2.2979 | test accuracy: 0.13
Epoch: 0 | train loss 0.2706 | test accuracy: 0.84
Epoch: 0 | train loss 0.2919 | test accuracy: 0.89
Epoch: 0 | train loss 0.1879 | test accuracy: 0.92

……

输出

test_output = cnn(test_x[:10])
"""
torch.max用于分类问题，选取最大的概率
torch.max(input,dim)
input是softmax函数输出的一个tensor
dim是max函数索引的维度0/1，0是每列的最大值，1是每行的最大值
函数会返回两个tensor，第一个tensor是每行的最大值；第二个tensor是每行最大值的索引。
"""
pred_y = torch.max(test_output, 1)[1].data.numpy().squeeze()
print(pred_y, 'prediction number')
print(test_y[:10].numpy(), 'real number')

RNN

数据间的顺序关联

分类

import torch
from torch import nn
import torchvision.datasets as dsets
import torchvision.transforms as transforms
import matplotlib.pyplot as plt

torch.manual_seed(1)  # reproducible

# Hyper Parameters
EPOCH = 1  # 训练整批数据多少次, 为了节约时间, 我们只训练一次
BATCH_SIZE = 64
TIME_STEP = 28  # rnn 时间步数 / 图片高度 每28步中的一步读取一行信息
INPUT_SIZE = 28  # rnn 每步输入值 / 图片每行像素 一行信息包括28个像素点
LR = 0.01  # learning rate
DOWNLOAD_MNIST = True  # 如果你已经下载好了mnist数据就写上 Fasle

train_data = dsets.MNIST(root='./mnist', train=True, transform=transforms.ToTensor(), download=DOWNLOAD_MNIST)
test_data = dsets.MNIST(root='./mnist', train=False)
train_loader = torch.utils.data.DataLoader(dataset=train_data, batch_size=BATCH_SIZE, shuffle=True)

test_x = test_data.data.type(torch.FloatTensor)[:2000] / 255
test_y = test_data.targets[:2000]



class RNN(nn.Module):
    def __init__(self):
        super(RNN, self).__init__()

        self.rnn = nn.LSTM(  # 使用rnn准确度不高
            input_size=INPUT_SIZE,  # 图片每层的像素点
            hidden_size=64,
            batch_first=True,  # batch_size是否在第一维度
        )

        self.out = nn.Linear(64, 10)

    def forward(self, x):
        # x shape (batch, time_step, input_size)
        # r_out shape (batch, time_step, output_size)
        # h_n shape (n_layers, batch, hidden_size)   LSTM 有两个 hidden states, h_n 是分线, h_c 是主线
        # h_c shape (n_layers, batch, hidden_size)
        # None 是否有第一个hidden state
        r_out, (h_n, h_c) = self.rnn(x, None)
        # 28个output
        # 选最后一个output(读完所有数据)
        out = self.out(r_out[:, -1, :])  # (batch,time step,input)
        # print(r_out)
        return out
		# 这里还有点不明白，需要补充别的视频看一下

rnn = RNN()


optimizer = torch.optim.Adam(rnn.parameters(), lr=LR)  # optimize all parameters
loss_func = nn.CrossEntropyLoss()  # the target label is not one-hotted

for epoch in range(EPOCH):
    for step, (x, b_y) in enumerate(train_loader):
        b_x = x.view(-1, 28, 28)  # reshape x to (batch,time_step,input_size)
        output = rnn(b_x)  # rnn output
        loss = loss_func(output, b_y)  # cross entropy loss
        optimizer.zero_grad()  # clear gradients for this training step
        loss.backward()  # backpropagation, compute gradients
        optimizer.step()  # apply gradients

        # 每50步
        if step % 50 == 0:
            test_output = rnn(test_x)
            #torch.max(input,dim) input是softmax函数输出的一个tensor
            # dim是max函数索引的维度0/1，0是每列的最大值，1是每行的最大值
            # 函数会返回两个tensor，第一个tensor是每行的最大值；第二个tensor是每行最大值的索引
            pred_y = torch.max(test_output, 1)[1].data.squeeze()
            # torch.size()返回（行，列）
            accuracy = float(sum(pred_y == test_y)) / test_y.size(0)
            print('Epoch:', epoch, '| train loss %.4f' % loss.data, '| test accuracy: %.2f' % accuracy)

test_output = rnn(test_x[:10].view(-1, 28, 28))
pred_y = torch.max(test_output, 1)[1].data.numpy().squeeze()
print(pred_y, 'prediction number')
print(test_y[:10], 'real number')

RNN(
(rnn): LSTM(28, 64, batch_first=True)
(out): Linear(in_features=64, out_features=10, bias=True)
)

Epoch: 0 | train loss 2.2883 | test accuracy: 0.10
Epoch: 0 | train loss 0.8795 | test accuracy: 0.57
Epoch: 0 | train loss 1.0830 | test accuracy: 0.76

回归

用sin预测cos

import torch
from torch import nn
import numpy as np
import matplotlib.pyplot as plt

# torch.manual_seed(1)    # reproducible

# Hyper Parameters
TIME_STEP = 10  # rnn time step
INPUT_SIZE = 1  # rnn input size
LR = 0.02  # learning rate

# show data
steps = np.linspace(0, np.pi * 2, 100, dtype=np.float32)  # float32 for converting torch FloatTensor
x_np = np.sin(steps)
y_np = np.cos(steps)
plt.plot(steps, y_np, 'r-', label='target (cos)')
plt.plot(steps, x_np, 'b-', label='input (sin)')
plt.legend(loc='best')
plt.show()


class RNN(nn.Module):
    def __init__(self):
        super(RNN, self).__init__()

        self.rnn = nn.RNN(
            input_size=INPUT_SIZE,# 1 用sin的数据预测cos
            hidden_size=32,  # rnn hidden unit（time step）
            num_layers=1,  # number of rnn layer
            batch_first=True,  # input & output will has batch size as 1s dimension. e.g. (batch, time_step, input_size)
        )
        self.out = nn.Linear(32, 1)

    def forward(self, x, h_state):
        # x (batch, time_step, input_size)
        # h_state (n_layers, batch, hidden_size)
        # r_out (batch, time_step, hidden_size)
        r_out, h_state = self.rnn(x, h_state) # 将状态当作下一步的输入
        # r_outb包含所有的out但是h——state只包含最后一个

        outs = []  # save all predictions
        for time_step in range(r_out.size(1)):  # calculate output for each time step
            outs.append(self.out(r_out[:, time_step, :]))

        # 将list变成tensor
        return torch.stack(outs, dim=1), h_state

        # instead, for simplicity, you can replace above codes by follows
        # r_out = r_out.view(-1, 32)
        # outs = self.out(r_out)
        # outs = outs.view(-1, TIME_STEP, 1)
        # return outs, h_state

        # or even simpler, since nn.Linear can accept inputs of any dimension
        # and returns outputs with same dimension except for the last
        # outs = self.out(r_out)
        # return outs


rnn = RNN()
print(rnn)

optimizer = torch.optim.Adam(rnn.parameters(), lr=LR)  # optimize all cnn parameters
loss_func = nn.MSELoss()

h_state = None  # for initial hidden state

plt.figure(1, figsize=(12, 5))
plt.ion()  # continuously plot

for step in range(100):
    start, end = step * np.pi, (step + 1) * np.pi  # time range
    # use sin predicts cos
    steps = np.linspace(start, end, TIME_STEP, dtype=np.float32,
                        endpoint=False)  # float32 for converting torch FloatTensor
    x_np = np.sin(steps)
    y_np = np.cos(steps)

    x = torch.from_numpy(x_np[np.newaxis, :, np.newaxis])  # shape (batch, time_step, input_size)
    y = torch.from_numpy(y_np[np.newaxis, :, np.newaxis])

    prediction, h_state = rnn(x, h_state)  # rnn output
    # !! next step is important !!
    h_state = h_state.data  # repack the hidden state, break the connection from last iteration

    loss = loss_func(prediction, y)  # calculate loss
    optimizer.zero_grad()  # clear gradients for this training step
    loss.backward()  # backpropagation, compute gradients
    optimizer.step()  # apply gradients

    # plotting
    plt.plot(steps, y_np.flatten(), 'r-')
    plt.plot(steps, prediction.data.numpy().flatten(), 'b-')
    plt.draw()
    plt.pause(0.05)

plt.ioff()
plt.show()