Marketing Case Study: Email Campaign Uplift¶

Scenario: An e-commerce company sends promotional emails to customers. Not all customers respond equally — some are "persuadables" (they buy only when emailed) while others would buy anyway. We want to identify and target the persuadables in real time as customers arrive.

Problem: Treatment (email = 1 / no email = 0) is partially confounded by customer engagement score. We use OnlineCML to estimate individual uplift and rank customers by predicted CATE.

Methods compared: OnlineIPW, OnlineAIPW, OnlineTLearner, OnlineRLearner

In [1]:

Copied!





import math
import matplotlib
import matplotlib.pyplot as plt

from river.linear_model import LinearRegression

from onlinecml.datasets import HeterogeneousCausalStream
from onlinecml.reweighting import OnlineIPW, OnlineAIPW
from onlinecml.metalearners import OnlineTLearner, OnlineRLearner
from onlinecml.diagnostics import OnlineSMD, OverlapChecker
from onlinecml.evaluation import progressive_causal_score
from onlinecml.evaluation.metrics import ATEError, UpliftAUC
import math
import matplotlib
import matplotlib.pyplot as plt

from river.linear_model import LinearRegression

from onlinecml.datasets import HeterogeneousCausalStream
from onlinecml.reweighting import OnlineIPW, OnlineAIPW
from onlinecml.metalearners import OnlineTLearner, OnlineRLearner
from onlinecml.diagnostics import OnlineSMD, OverlapChecker
from onlinecml.evaluation import progressive_causal_score
from onlinecml.evaluation.metrics import ATEError, UpliftAUC

1. Simulate the campaign stream¶

We use HeterogeneousCausalStream to simulate customers with varying engagement features and heterogeneous treatment effects.

In [2]:

Copied!





# Simulate 5000 customers arriving over time
# true_ate = 0.08 (8% average lift in conversion)
# heterogeneity = nonlinear (some customers are much more responsive)
STREAM_PARAMS = dict(n=5000, n_features=5, true_ate=0.08,
                     heterogeneity="nonlinear", confounding_strength=0.6, seed=42)

# Progressive evaluation: how well do the models rank customers by uplift?
results_t = progressive_causal_score(
    stream  = HeterogeneousCausalStream(**STREAM_PARAMS),
    model   = OnlineTLearner(treated_model=LinearRegression(), control_model=LinearRegression()),
    metrics = [ATEError(), UpliftAUC()],
    step    = 500,
)

results_r = progressive_causal_score(
    stream  = HeterogeneousCausalStream(**STREAM_PARAMS),
    model   = OnlineRLearner(cate_model=LinearRegression()),
    metrics = [ATEError(), UpliftAUC()],
    step    = 500,
)

print(f"{'Method':>12} | {'Final ATEError':>14} | {'Final UpliftAUC':>15}")
print("-" * 48)
print(f"{'T-Learner':>12} | {results_t['ATEError'][-1]:>14.4f} | {results_t['UpliftAUC'][-1]:>15.4f}")
print(f"{'R-Learner':>12} | {results_r['ATEError'][-1]:>14.4f} | {results_r['UpliftAUC'][-1]:>15.4f}")
# Simulate 5000 customers arriving over time
# true_ate = 0.08 (8% average lift in conversion)
# heterogeneity = nonlinear (some customers are much more responsive)
STREAM_PARAMS = dict(n=5000, n_features=5, true_ate=0.08,
                     heterogeneity="nonlinear", confounding_strength=0.6, seed=42)

# Progressive evaluation: how well do the models rank customers by uplift?
results_t = progressive_causal_score(
    stream  = HeterogeneousCausalStream(**STREAM_PARAMS),
    model   = OnlineTLearner(treated_model=LinearRegression(), control_model=LinearRegression()),
    metrics = [ATEError(), UpliftAUC()],
    step    = 500,
)

results_r = progressive_causal_score(
    stream  = HeterogeneousCausalStream(**STREAM_PARAMS),
    model   = OnlineRLearner(cate_model=LinearRegression()),
    metrics = [ATEError(), UpliftAUC()],
    step    = 500,
)

print(f"{'Method':>12} | {'Final ATEError':>14} | {'Final UpliftAUC':>15}")
print("-" * 48)
print(f"{'T-Learner':>12} | {results_t['ATEError'][-1]:>14.4f} | {results_t['UpliftAUC'][-1]:>15.4f}")
print(f"{'R-Learner':>12} | {results_r['ATEError'][-1]:>14.4f} | {results_r['UpliftAUC'][-1]:>15.4f}")

      Method | Final ATEError | Final UpliftAUC
------------------------------------------------
   T-Learner |         0.0101 |          0.1088
   R-Learner |         0.0252 |          0.1099

/home/lucifer/pCloudDrive/onlinecml/onlinecml/evaluation/progressive.py:66: UserWarning: treated_model has not seen any data yet; CATE estimate may be biased.
  cate_hat = model.predict_one(x)
/home/lucifer/pCloudDrive/onlinecml/onlinecml/evaluation/progressive.py:66: UserWarning: control_model has not seen any data yet; CATE estimate may be biased.
  cate_hat = model.predict_one(x)

2. Real-time CATE ranking¶

In [3]:

Copied!





# Train a final model and rank the last 100 customers by predicted uplift
model = OnlineRLearner(cate_model=LinearRegression())
last_100 = []

for i, (x, w, y, tau) in enumerate(HeterogeneousCausalStream(**STREAM_PARAMS)):
    cate_hat = model.predict_one(x)
    if i >= 4900:
        last_100.append((cate_hat, tau, x))
    model.learn_one(x, w, y)

# Sort by predicted CATE descending
last_100.sort(key=lambda t: t[0], reverse=True)

print("Top-10 customers by predicted uplift:")
print(f"{'Rank':>4} | {'CATE_hat':>9} | {'True CATE':>10}")
print("-" * 30)
for rank, (cate_hat, true_cate, _) in enumerate(last_100[:10], 1):
    print(f"{rank:>4} | {cate_hat:>9.4f} | {true_cate:>10.4f}")
# Train a final model and rank the last 100 customers by predicted uplift
model = OnlineRLearner(cate_model=LinearRegression())
last_100 = []

for i, (x, w, y, tau) in enumerate(HeterogeneousCausalStream(**STREAM_PARAMS)):
    cate_hat = model.predict_one(x)
    if i >= 4900:
        last_100.append((cate_hat, tau, x))
    model.learn_one(x, w, y)

# Sort by predicted CATE descending
last_100.sort(key=lambda t: t[0], reverse=True)

print("Top-10 customers by predicted uplift:")
print(f"{'Rank':>4} | {'CATE_hat':>9} | {'True CATE':>10}")
print("-" * 30)
for rank, (cate_hat, true_cate, _) in enumerate(last_100[:10], 1):
    print(f"{rank:>4} | {cate_hat:>9.4f} | {true_cate:>10.4f}")

Top-10 customers by predicted uplift:
Rank |  CATE_hat |  True CATE
------------------------------
   1 |    2.6157 |     2.6548
   2 |    2.2389 |     2.2985
   3 |    1.9188 |     2.3004
   4 |    1.9085 |     2.4400
   5 |    1.7700 |     1.6502
   6 |    1.7223 |     1.9445
   7 |    1.4334 |     1.5938
   8 |    1.3852 |     1.6978
   9 |    1.2568 |     1.5612
  10 |    1.2492 |     1.7730

3. Balance diagnostics¶

In [4]:

Copied!





ipw = OnlineIPW()
smd = OnlineSMD(covariates=[f"x{i}" for i in range(5)])
overlap = OverlapChecker(ps_min=0.05, ps_max=0.95)

for x, w, y, _ in HeterogeneousCausalStream(**STREAM_PARAMS):
    ps = ipw.ps_model.predict_one(x)
    weight = 1.0 / ps if w == 1 else 1.0 / (1.0 - ps)
    smd.update(x, treatment=w, weight=weight)
    overlap.update(ps, treatment=w)
    ipw.learn_one(x, w, y)

print(f"Overlap adequate   : {overlap.is_overlap_adequate()}")
print(f"Flag rate          : {overlap.report()['flag_rate']:.1%}")
print(f"Balance adequate   : {smd.is_balanced()}")
print(f"\nIPW ATE  : {ipw.predict_ate():.4f}")
print(f"True ATE : ~0.0800")
ipw = OnlineIPW()
smd = OnlineSMD(covariates=[f"x{i}" for i in range(5)])
overlap = OverlapChecker(ps_min=0.05, ps_max=0.95)

for x, w, y, _ in HeterogeneousCausalStream(**STREAM_PARAMS):
    ps = ipw.ps_model.predict_one(x)
    weight = 1.0 / ps if w == 1 else 1.0 / (1.0 - ps)
    smd.update(x, treatment=w, weight=weight)
    overlap.update(ps, treatment=w)
    ipw.learn_one(x, w, y)

print(f"Overlap adequate   : {overlap.is_overlap_adequate()}")
print(f"Flag rate          : {overlap.report()['flag_rate']:.1%}")
print(f"Balance adequate   : {smd.is_balanced()}")
print(f"\nIPW ATE  : {ipw.predict_ate():.4f}")
print(f"True ATE : ~0.0800")

Overlap adequate   : True
Flag rate          : 0.0%
Balance adequate   : True

IPW ATE  : 0.1685
True ATE : ~0.0800