Chapter 6 of 7

Python Code

Each snippet is self-contained — copy it into a .py file or Jupyter notebook and run it directly.

numpy pandas statsmodels matplotlib

Run regression using statsmodels OLS.

All

Linear regression with statsmodels

import numpy as np
import pandas as pd
import statsmodels.api as sm

# Football player dataset: salary (M€) vs market value (M€)
data = pd.DataFrame({
    "salary": [3.6, 4.6, 5.2, 6.25, 6.67, 7.4, 7.8, 8.0,
               8.3, 8.8, 9.2, 9.5, 10.1, 10.5, 11.2],
    "market_value": [18, 20, 28, 22, 30, 35, 32, 25,
                     32, 30, 37, 40, 38, 45, 42]
})

# Add constant for intercept
X = sm.add_constant(data["salary"])

# Fit OLS model
model = sm.OLS(data["market_value"], X).fit()

# Full summary
print(model.summary())

Recreate the scatter plot and fitted line using matplotlib.

Ch 1

Scatter Plot with Regression Line

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Football player dataset
data = pd.DataFrame({
    "salary": [3.6, 4.6, 5.2, 6.25, 6.67, 7.4, 7.8, 8.0,
               8.3, 8.8, 9.2, 9.5, 10.1, 10.5, 11.2],
    "market_value": [18, 20, 28, 22, 30, 35, 32, 25,
                     32, 30, 37, 40, 38, 45, 42]
})

# OLS coefficients
x = data["salary"].values
y = data["market_value"].values
x_mean, y_mean = x.mean(), y.mean()
b1 = np.sum((x - x_mean) * (y - y_mean)) / np.sum((x - x_mean) ** 2)
b0 = y_mean - b1 * x_mean

fig, ax = plt.subplots(figsize=(8, 5))

# Scatter points
ax.scatter(data["salary"], data["market_value"],
           color="#3b82f6", s=60, edgecolors="white",
           linewidth=1, zorder=3, label="Players")

# Regression line
x_line = np.linspace(data["salary"].min() * 0.9,
                     data["salary"].max() * 1.05, 100)
y_line = b0 + b1 * x_line
ax.plot(x_line, y_line, color="#ef4444", linewidth=2,
        label=f"OLS: ŷ = {b0:.1f} + {b1:.2f}x")

ax.set_xlabel("Annual Salary (M€)", fontsize=12)
ax.set_ylabel("Market Value (M€)", fontsize=12)
ax.set_title("Salary vs Market Value", fontsize=14)
ax.legend()
ax.grid(alpha=0.3)
plt.tight_layout()
plt.show()

Built with SvelteKit + D3.js