梯度下降的简单实现以及理解

import torch
import numpy as np
import matplotlib.pyplot as plt

fig = plt.figure()
ax = plt.axes(projection="3d")

# 创建a和b的值的网格
a_values = np.linspace(-20, 20, 100)
b_values = np.linspace(-20, 20, 100)
a, b = np.meshgrid(a_values, b_values)

SSE = (2 - a - b) ** 2 + (4 - 3 * a - b) ** 2

ax.plot_surface(a, b, SSE, cmap="rainbow")
ax.contour(a, b, SSE, zdir="z", offset=0, cmap="rainbow")

plt.show()

a = torch.tensor(-1.0, requires_grad=True)
b = torch.tensor(-1.0, requires_grad=True)
s0 = torch.pow(2 - a - b, 2) + torch.pow(4 - 3 * a - b, 2)
s0.backward()
print(s0, a.grad, b.grad)

# 找到移动方向
with torch.no_grad():
    a -= 0.01 * a.grad
    b -= 0.01 * b.grad
    print(a, b)

    a.grad.zero_()
    b.grad.zero_()

s1 = torch.pow(2 - a - b, 2) + torch.pow(4 - 3 * a - b, 2)
s1.backward()
print(s1, a.grad, b.grad)

# 找到移动方向
with torch.no_grad():
    a -= 0.01 * a.grad
    b -= 0.01 * b.grad
    print(a, b)
    a.grad.zero_()
    b.grad.zero_()

s2 = torch.pow(2 - a - b, 2) + torch.pow(4 - 3 * a - b, 2)
s2.backward()
print(s2, a.grad, b.grad)
print("-" * 30)

# 简化上面的代码
a = torch.tensor(-1.0, requires_grad=True)
b = torch.tensor(-1.0, requires_grad=True)
for i in range(5):
    s = torch.pow(2 - a - b, 2) + torch.pow(4 - 3 * a - b, 2)
    s.backward()
    with torch.no_grad():
        a -= 0.01 * a.grad
        b -= 0.01 * b.grad
        a.grad.zero_()
        b.grad.zero_()
        print(a, b, s)

print("-" * 30)

# 用torch.optim优化器
a = torch.tensor(-1.0, requires_grad=True)
b = torch.tensor(-1.0, requires_grad=True)
optimizer = torch.optim.Adam([a, b], lr=0.01)
for i in range(100):
    s = torch.pow(2 - a - b, 2) + torch.pow(4 - 3 * a - b, 2)
    optimizer.zero_grad()
    s.backward()
    optimizer.step()
    print(i, a, b, s)

原文链接: 梯度下降的简单实现以及理解 版权所有,转载时请注明出处,违者必究。
注明出处格式:流沙团 ( https://gyarmy.com/post-832.html )

发表评论

0则评论给“梯度下降的简单实现以及理解”