Chapter 6 of 7
Python Code
Each snippet is self-contained — copy it into a .py file or Jupyter notebook and run it directly.
numpy pandas statsmodels matplotlib
Run regression using statsmodels OLS.
All
Linear regression with statsmodels
import numpy as np
import pandas as pd
import statsmodels.api as sm
# Football player dataset: salary (M€) vs market value (M€)
data = pd.DataFrame({
"salary": [3.6, 4.6, 5.2, 6.25, 6.67, 7.4, 7.8, 8.0,
8.3, 8.8, 9.2, 9.5, 10.1, 10.5, 11.2],
"market_value": [18, 20, 28, 22, 30, 35, 32, 25,
32, 30, 37, 40, 38, 45, 42]
})
# Add constant for intercept
X = sm.add_constant(data["salary"])
# Fit OLS model
model = sm.OLS(data["market_value"], X).fit()
# Full summary
print(model.summary())Recreate the scatter plot and fitted line using matplotlib.
Ch 1
Scatter Plot with Regression Line
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# Football player dataset
data = pd.DataFrame({
"salary": [3.6, 4.6, 5.2, 6.25, 6.67, 7.4, 7.8, 8.0,
8.3, 8.8, 9.2, 9.5, 10.1, 10.5, 11.2],
"market_value": [18, 20, 28, 22, 30, 35, 32, 25,
32, 30, 37, 40, 38, 45, 42]
})
# OLS coefficients
x = data["salary"].values
y = data["market_value"].values
x_mean, y_mean = x.mean(), y.mean()
b1 = np.sum((x - x_mean) * (y - y_mean)) / np.sum((x - x_mean) ** 2)
b0 = y_mean - b1 * x_mean
fig, ax = plt.subplots(figsize=(8, 5))
# Scatter points
ax.scatter(data["salary"], data["market_value"],
color="#3b82f6", s=60, edgecolors="white",
linewidth=1, zorder=3, label="Players")
# Regression line
x_line = np.linspace(data["salary"].min() * 0.9,
data["salary"].max() * 1.05, 100)
y_line = b0 + b1 * x_line
ax.plot(x_line, y_line, color="#ef4444", linewidth=2,
label=f"OLS: ŷ = {b0:.1f} + {b1:.2f}x")
ax.set_xlabel("Annual Salary (M€)", fontsize=12)
ax.set_ylabel("Market Value (M€)", fontsize=12)
ax.set_title("Salary vs Market Value", fontsize=14)
ax.legend()
ax.grid(alpha=0.3)
plt.tight_layout()
plt.show()