import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

def model(w, b, x):
    yhat = w * x + b
    return yhat

def mse_loss(y, yhat):
    return np.mean((y - yhat) ** 2)

def loss_for_model(x, y, w, b):
    yhat = model(w, b, x)
    loss = mse_loss(y, yhat)
    return loss

# Generate synthetic data
n_samples = 20
x = np.linspace(0, 5, n_samples)
# True relationship: y = 2*x + 1
y_true = 2 * x + 1
# Add noise
noise = np.random.normal(0, 0.8, n_samples)
y = y_true + noise

print(f"Generated {n_samples} data points")
print(f"x range: [{x.min():.2f}, {x.max():.2f}]")
print(f"y range: [{y.min():.2f}, {y.max():.2f}]")
print(f"\nTrue parameters: w=2, b=1")

Generated 20 data points
x range: [0.00, 5.00]
y range: [1.02, 11.86]

True parameters: w=2, b=1

# Create a grid of w and b values
w_range = np.linspace(0, 4, 50)
b_range = np.linspace(-2, 4, 50)
W, B = np.meshgrid(w_range, b_range)

# Compute loss for each (w, b) combination
Loss = np.zeros_like(W)
for i in range(W.shape[0]):
    for j in range(W.shape[1]):
        w_val = W[i, j]
        b_val = B[i, j]
        Loss[i, j] = loss_for_model(x, y, w_val, b_val)

print(f"Loss surface computed: shape {Loss.shape}")
print(f"Loss range: [{Loss.min():.3f}, {Loss.max():.3f}]")

Loss surface computed: shape (50, 50)
Loss range: [0.376, 75.012]

%matplotlib widget

# Create 3D surface plot
fig = plt.figure(figsize=(12, 9))
ax = fig.add_subplot(111, projection='3d')

# Plot the surface
surf = ax.plot_surface(W, B, Loss, cmap='viridis', alpha=0.8, edgecolor='none')

# Mark the true minimum
w_true, b_true = 2.0, 1.0
loss_true = loss_for_model(x, y, w_true, b_true)
ax.scatter([w_true], [b_true], [loss_true], color='red', s=200, marker='*', 
           label='True Parameters', zorder=5, edgecolor='darkred', linewidth=2)

ax.set_xlabel('w (weight)', fontsize=11, labelpad=10)
ax.set_ylabel('b (bias)', fontsize=11, labelpad=10)
ax.set_zlabel('MSE Loss', fontsize=11, labelpad=10)
ax.set_title('Loss Landscape: MSE as a Function of (w, b)', fontsize=14, fontweight='bold', pad=20)

# Add colorbar
fig.colorbar(surf, ax=ax, label='Loss', pad=0.1)

# Set viewing angle
ax.view_init(elev=25, azim=45)

ax.legend(loc='upper left', fontsize=10)

plt.tight_layout()
plt.show()

print("Notice: The loss surface looks like a bowl with the minimum at the bottom!")

/var/folders/_6/xflrqlk90v11_4t_8ppm4s540000gq/T/ipykernel_28358/2681587642.py:29: UserWarning: Tight layout not applied. The left and right margins cannot be made large enough to accommodate all Axes decorations.
  plt.tight_layout()

Notice: The loss surface looks like a bowl with the minimum at the bottom!

%matplotlib inline

# Create contour plot (top-down view of the surface)
fig, ax = plt.subplots(figsize=(10, 8))

# Contour plot
contour = ax.contourf(W, B, Loss, levels=30, cmap='viridis', alpha=0.8)
contours = ax.contour(W, B, Loss, levels=15, colors='white', alpha=0.3, linewidths=0.5)
ax.clabel(contours, inline=True, fontsize=8)

# Mark the true minimum
ax.scatter([w_true], [b_true], color='red', s=300, marker='*', 
           label='True Min (w=2, b=1)', zorder=5, edgecolor='darkred', linewidth=2)

ax.set_xlabel('w (weight)', fontsize=12)
ax.set_ylabel('b (bias)', fontsize=12)
ax.set_title('Loss Landscape Contour Plot: Top-Down View', fontsize=14, fontweight='bold')
cbar = fig.colorbar(contour, ax=ax, label='MSE Loss')

ax.legend(loc='upper right', fontsize=10)
ax.grid(True, alpha=0.2)

plt.tight_layout()
plt.show()

print("Contour lines connect points with the same loss.")
print("The minimum is at the center where the contours are tightest.")

Contour lines connect points with the same loss.
The minimum is at the center where the contours are tightest.

def grad_wrt_w(yhat, x, y):
    return (1/len(y)) * np.sum((yhat - y) * x)
    
def grad_wrt_b(yhat, x, y):
    return (1/len(y)) * np.sum(yhat - y)

# Ensure inline plotting for these cells
%matplotlib inline

# State for gradient descent
state = {
    "w": 0.0,
    "b": 0.0,
    "lr": 0.05,
    "history": [],  # (step, loss, w, b)
    "step": 0,
}


def reset_state(w0=0.0, b0=0.0, lr=0.05):
    state["w"] = float(w0)
    state["b"] = float(b0)
    state["lr"] = float(lr)
    state["history"] = []
    state["step"] = 0
    print(f"State reset: w={state['w']:.3f}, b={state['b']:.3f}, lr={state['lr']}")


def plot_state():
    w, b, step = state["w"], state["b"], state["step"]
    yhat = model(w, b, x)
    loss = mse_loss(y, yhat)

    fig, axes = plt.subplots(1, 2, figsize=(12, 4))

    # Left: data + current line
    ax = axes[0]
    ax.scatter(x, y, s=80, alpha=0.8, label="data", color="black")
    ax.plot(x, yhat, color="tab:blue", linewidth=2, label=f"line (w={w:.2f}, b={b:.2f})")
    ax.set_xlabel("x")
    ax.set_ylabel("y")
    ax.set_title(f"Current fit (step {step}, loss {loss:.3f})")
    ax.legend()

    # Right: loss history
    ax = axes[1]
    if state["history"]:
        steps = [h[0] for h in state["history"]]
        losses = [h[1] for h in state["history"]]
        ax.plot(steps, losses, marker="o", color="tab:orange")
    ax.set_xlabel("step")
    ax.set_ylabel("loss")
    ax.set_title("Loss vs steps")
    ax.grid(True, alpha=0.3)

    plt.tight_layout()
    plt.show()


def step_gd(n_steps=1):
    """Take n_steps of gradient descent and plot after stepping."""
    for _ in range(n_steps):
        yhat = model(state['w'], state['b'], x) # calculate wx + b
        
        grad_w = grad_wrt_w(yhat, x, y)
        grad_b = grad_wrt_b(yhat, x, y)
        loss = mse_loss(y, yhat)
        state["w"] -= state["lr"] * grad_w
        state["b"] -= state["lr"] * grad_b
        state["step"] += 1
        state["history"].append((state["step"], loss, state["w"], state["b"]))
    plot_state()
    print(f"After step {state['step']}: w={state['w']:.4f}, b={state['b']:.4f}, loss={loss:.4f}")


# Initialize
reset_state(w0=0.0, b0=0.0, lr=0.05)
plot_state()

State reset: w=0.000, b=0.000, lr=0.05

# Run this cell to step through the algorithm:
step_gd(n_steps=1000)

After step 2035: w=2.1148, b=0.7341, loss=0.3750

ML1: Minimizing Loss with Gradient Descent¶

Goals¶

Setup¶

Visualizing the loss¶

How do we find the best $w$ and $b$?¶

More odds and ends (whiteboard, if time):¶