dml2

import%20marimo%0A%0A__generated_with%20%3D%20%220.11.20%22%0Aapp%20%3D%20marimo.App(width%3D%22medium%22)%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_()%3A%0A%20%20%20%20%23%20Relevant%20Imports%0A%0A%20%20%20%20import%20marimo%20as%20mo%0A%20%20%20%20import%20numpy%20as%20np%0A%20%20%20%20import%20pandas%20as%20pd%0A%20%20%20%20import%20statsmodels.formula.api%20as%20smf%0A%20%20%20%20from%20sklearn.ensemble%20import%20GradientBoostingRegressor%2C%20GradientBoostingClassifier%0A%20%20%20%20from%20sklearn.model_selection%20import%20cross_val_predict%0A%20%20%20%20from%20sklearn.metrics%20import%20mean_squared_error%2C%20r2_score%2C%20mean_absolute_error%0A%20%20%20%20import%20graphviz%0A%20%20%20%20import%20matplotlib.pyplot%20as%20plt%0A%20%20%20%20from%20IPython.display%20import%20display%2C%20HTML%0A%0A%20%20%20%20import%20os%0A%0A%20%20%20%20try%3A%0A%20%20%20%20%20%20%20%20os.chdir(%22assets%2Farticles%2Fnotebooks%22)%0A%20%20%20%20except%3A%0A%20%20%20%20%20%20%20%20pass%0A%0A%20%20%20%20np.random.seed(00)%0A%0A%20%20%20%20%23%23%20Helper%20Plots%0A%0A%20%20%20%20COLORS%20%3D%20%5B%22%2300B0F0%22%2C%20%22%23FF0000%22%2C%20%22%23B0F000%22%5D%0A%0A%20%20%20%20def%20plot_effect(%0A%20%20%20%20%20%20%20%20effect_true%2C%20effect_pred%2C%20save_path%2C%20figsize%3D(8%2C%205)%2C%20ylim%3D(-10%2C%20100)%0A%20%20%20%20)%3A%0A%20%20%20%20%20%20%20%20plt.figure(figsize%3Dfigsize)%0A%20%20%20%20%20%20%20%20plt.scatter(effect_true%2C%20effect_pred%2C%20color%3DCOLORS%5B0%5D%2C%20s%3D10)%0A%20%20%20%20%20%20%20%20plt.plot(%0A%20%20%20%20%20%20%20%20%20%20%20%20np.sort(effect_true)%2C%0A%20%20%20%20%20%20%20%20%20%20%20%20np.sort(effect_true)%2C%0A%20%20%20%20%20%20%20%20%20%20%20%20color%3DCOLORS%5B1%5D%2C%0A%20%20%20%20%20%20%20%20%20%20%20%20alpha%3D0.7%2C%0A%20%20%20%20%20%20%20%20%20%20%20%20label%3D%22Perfect%20model%22%2C%0A%20%20%20%20%20%20%20%20)%0A%20%20%20%20%20%20%20%20plt.xlabel(%22True%20effect%22%2C%20fontsize%3D14)%0A%20%20%20%20%20%20%20%20plt.ylabel(%22Predicted%20effect%22%2C%20fontsize%3D14)%0A%20%20%20%20%20%20%20%20plt.legend()%0A%20%20%20%20%20%20%20%20plt.savefig(save_path%2C%20format%3D%22webp%22%2C%20dpi%3D300%2C%20bbox_inches%3D'tight')%0A%0A%20%20%20%20def%20hist_effect(effect_true%2C%20effect_pred%2C%20save_path%2C%20figsize%3D(8%2C%205))%3A%0A%20%20%20%20%20%20%20%20plt.figure(figsize%3Dfigsize)%0A%0A%20%20%20%20%20%20%20%20plt.hist(%0A%20%20%20%20%20%20%20%20%20%20%20%20effect_pred%2C%0A%20%20%20%20%20%20%20%20%20%20%20%20color%3D%22r%22%2C%0A%20%20%20%20%20%20%20%20%20%20%20%20alpha%3D0.8%2C%0A%20%20%20%20%20%20%20%20%20%20%20%20density%3DTrue%2C%0A%20%20%20%20%20%20%20%20%20%20%20%20bins%3D50%2C%0A%20%20%20%20%20%20%20%20%20%20%20%20label%3D%22Linear%20DML%20CATE%20Prediction%22%2C%0A%20%20%20%20%20%20%20%20)%0A%20%20%20%20%20%20%20%20plt.hist(%0A%20%20%20%20%20%20%20%20%20%20%20%20effect_true%2C%0A%20%20%20%20%20%20%20%20%20%20%20%20color%3D%22b%22%2C%0A%20%20%20%20%20%20%20%20%20%20%20%20alpha%3D0.4%2C%0A%20%20%20%20%20%20%20%20%20%20%20%20density%3DTrue%2C%0A%20%20%20%20%20%20%20%20%20%20%20%20bins%3D50%2C%0A%20%20%20%20%20%20%20%20%20%20%20%20label%3D%22True%20CATE%22%2C%0A%20%20%20%20%20%20%20%20)%0A%0A%20%20%20%20%20%20%20%20plt.legend()%0A%20%20%20%20%20%20%20%20plt.savefig(save_path%2C%20format%3D%22webp%22%2C%20dpi%3D300%2C%20bbox_inches%3D'tight')%0A%20%20%20%20return%20(%0A%20%20%20%20%20%20%20%20COLORS%2C%0A%20%20%20%20%20%20%20%20GradientBoostingClassifier%2C%0A%20%20%20%20%20%20%20%20GradientBoostingRegressor%2C%0A%20%20%20%20%20%20%20%20HTML%2C%0A%20%20%20%20%20%20%20%20cross_val_predict%2C%0A%20%20%20%20%20%20%20%20display%2C%0A%20%20%20%20%20%20%20%20graphviz%2C%0A%20%20%20%20%20%20%20%20hist_effect%2C%0A%20%20%20%20%20%20%20%20mean_absolute_error%2C%0A%20%20%20%20%20%20%20%20mean_squared_error%2C%0A%20%20%20%20%20%20%20%20mo%2C%0A%20%20%20%20%20%20%20%20np%2C%0A%20%20%20%20%20%20%20%20os%2C%0A%20%20%20%20%20%20%20%20pd%2C%0A%20%20%20%20%20%20%20%20plot_effect%2C%0A%20%20%20%20%20%20%20%20plt%2C%0A%20%20%20%20%20%20%20%20r2_score%2C%0A%20%20%20%20%20%20%20%20smf%2C%0A%20%20%20%20)%0A%0A%0A%40app.cell%0Adef%20_(mo)%3A%0A%20%20%20%20mo.md(%0A%20%20%20%20%20%20%20%20r%22%22%22%0A%20%20%20%20%20%20%20%20%23%20Double%20Machine%20Learning%2C%20Simplified%3A%20Part%202%20-%20Targeting%20%26%20the%20CATE%20%0A%20%20%20%20%20%20%20%20%3Ccenter%3E%20**Learn%20how%20to%20utilize%20DML%20for%20estimating%20idiosyncratic%20treatment%20effects%20to%20enable%20personalized%20targeting**%20%3C%2Fcenter%3E%0A%20%20%20%20%20%20%20%20%22%22%22%0A%20%20%20%20)%0A%20%20%20%20return%0A%0A%0A%40app.cell%0Adef%20_(mo)%3A%0A%20%20%20%20mo.md(%0A%20%20%20%20%20%20%20%20r%22%22%22%0A%20%20%20%20%20%20%20%20%23%23%20Introduction%0A%0A%20%20%20%20%20%20%20%20%3E%20This%20article%20is%20the%20**2nd**%20in%20a%202%20part%20series%20on%20simplifying%20and%20democratizing%20Double%20Machine%20Learning.%20In%20the%20%3Ca%20href%3D%22%2Farticles%2Fdml1%22%20target%3D%22_blank%22%20rel%3D%22noopener%20noreferrer%22%3E1st%20part%3C%2Fa%3E%2C%20we%20covered%20the%20fundamentals%20of%20Double%20Machine%20Learning%2C%20along%20with%20two%20basic%20causal%20inference%20applications.%20Now%2C%20in%20pt.%202%2C%20we%20will%20extend%20this%20knowledge%20to%20turn%20our%20causal%20inference%20problem%20into%20a%20prediction%20task%2C%20wherein%20we%20predict%20individual%20level%20treatment%20effects%20to%20aid%20in%20decision%20making%20and%20data-driven%20targeting%0A%0A%20%20%20%20%20%20%20%20Double%20Machine%20Learning%2C%20as%20we%20learned%20in%20%5Bpart%201%5D(%2Farticles%2Fdml1)%20of%20this%20series%2C%20is%20a%20highly%20flexible%20partially-linear%20causal%20inference%20method%20for%20estimating%20the%20average%20treatment%20effect%20(ATE)%20of%20a%20treatment.%20Specifically%2C%20it%20can%20be%20utilized%20to%20model%20highly%20non-linear%20confounding%20relationships%20in%20observational%20data%20(especially%20when%20our%20set%20of%20controls%2Fconfounders%20is%20of%20extremely%20high%20dimensionality)%20and%2For%20to%20reduce%20the%20variation%20in%20our%20key%20outcome%20in%20experimental%20settings.%20Estimating%20the%20ATE%20is%20particularly%20useful%20in%20understanding%20the%20average%20impact%20of%20a%20specific%20treatment%2C%20which%20can%20be%20extremely%20useful%20for%20future%20decision%20making.%20However%2C%20extrapolating%20this%20treatment%20effect%20assumes%20a%20degree%20homogeneity%20in%20the%20effect%3B%20that%20is%2C%20regardless%20of%20the%20population%20we%20roll%20treatment%20out%20to%2C%20we%20anticipate%20the%20effect%20to%20be%20similar%20to%20the%20ATE.%20What%20if%20we%20are%20limited%20in%20the%20number%20of%20individuals%20who%20we%20can%20target%20for%20future%20rollout%20and%20thus%20want%20to%20understand%20among%20which%20subpopulations%20the%20treatment%20was%20most%20effective%20to%20drive%20highly%20effective%20rollout%3F%0A%0A%20%20%20%20%20%20%20%20This%20issue%20described%20above%20concerns%20estimating%20treatment%20effect%20heterogeneity.%20That%20is%2C%20how%20does%20our%20treatment%20effect%20impact%20different%20subsets%20of%20the%20population%3F%20Luckily%20for%20us%2C%20DML%20provides%20a%20powerful%20framework%20to%20do%20exactly%20this.%20Specifically%2C%20we%20can%20make%20use%20of%20DML%20to%20estimate%20the%20Conditional%20Average%20Treatment%20Effect%20(CATE).%20First%2C%20let%E2%80%99s%20revisit%20our%20definition%20of%20the%20ATE%2C%20in%20binary%20and%20continuous%20cases%2C%20respectively%3A%0A%0A%20%20%20%20%20%20%20%20%24%24%0A%20%20%20%20%20%20%20%20%5Cbegin%7Bequation%7D%0A%20%20%20%20%20%20%20%20%5Ctext%7BATE%7D%3D%5Cmathbb%7BE_n%7D%5By(T%3D1)-y(T%3D0)%5D%0A%20%20%20%20%20%20%20%20%5Ctag%7B1%7D%0A%20%20%20%20%20%20%20%20%5Cend%7Bequation%7D%0A%20%20%20%20%20%20%20%20%24%24%0A%0A%20%20%20%20%20%20%20%20%24%24%0A%20%20%20%20%20%20%20%20%5Cbegin%7Bequation%7D%0A%20%20%20%20%20%20%20%20%5Ctext%7BATE%7D%3D%5Cmathbb%7BE_n%7D%5Cleft%5B%5Cfrac%7B%5Cpartial%20y%7D%7B%5Cpartial%20T%7D%5Cright%5D%0A%20%20%20%20%20%20%20%20%5Ctag%7B2%7D%0A%20%20%20%20%20%20%20%20%5Cend%7Bequation%7D%0A%20%20%20%20%20%20%20%20%24%24%0A%0A%20%20%20%20%20%20%20%20Now%20with%20the%20CATE%2C%20we%20estimate%20the%20ATE%20conditional%20on%20a%20set%20of%20values%20for%20our%20covariates%2C%20%24%5Cmathbf%7BX%7D%24%3A%0A%0A%20%20%20%20%20%20%20%20%24%24%0A%20%20%20%20%20%20%20%20%5Cbegin%7Bequation%7D%0A%20%20%20%20%20%20%20%20%5Ctext%7BCATE%7D%3D%5Cmathbb%7BE_n%7D%5By(T%3D1)-y(T%3D0)%7C%5Cmathbf%7BX%7D%3Dx%5D%20%0A%20%20%20%20%20%20%20%20%5Ctag%7B3%7D%0A%20%20%20%20%20%20%20%20%5Cend%7Bequation%7D%0A%20%20%20%20%20%20%20%20%24%24%0A%0A%20%20%20%20%20%20%20%20%24%24%0A%20%20%20%20%20%20%20%20%5Cbegin%7Bequation%7D%0A%20%20%20%20%20%20%20%20%5Ctext%7BCATE%7D%3D%5Cmathbb%7BE_n%7D%5Cleft%5B%5Cfrac%7B%5Cpartial%20y%7D%7B%5Cpartial%20T%7D%5Cright%7C%5Cmathbf%7BX%7D%3Dx%5D%0A%20%20%20%20%20%20%20%20%5Ctag%7B4%7D%0A%20%20%20%20%20%20%20%20%5Cend%7Bequation%7D%0A%20%20%20%20%20%20%20%20%24%24%0A%0A%20%20%20%20%20%20%20%20For%20example%2C%20if%20we%20wanted%20to%20know%20the%20treatment%20effect%20for%20males%20versus%20females%2C%20we%20can%20estimate%20the%20CATE%20conditional%20on%20the%20covariate%20being%20equal%20to%20each%20subgroup%20of%20interest.%20Note%20that%20we%20can%20estimate%20highly%20aggregated%20CATEs%20(i.e.%2C%20at%20a%20male%20vs.%20female%20level)%2C%20also%20known%20as%20Group%20Average%20Treatment%20Effects%20(GATEs)%2C%20or%20we%20can%20allow%20%24%5Cmathbf%7BX%7D%24%20to%20take%20on%20an%20extremely%20high%20dimensionality%20and%20thus%20closely%20estimate%20each%20individuals%20treatment%20effect.%20You%20may%20immediately%20notice%20the%20benefits%20in%20being%20able%20to%20do%20this%3A%20we%20can%20utilize%20this%20information%20to%20make%20highly%20informed%20decisions%20in%20future%20targeting%20of%20the%20treatment!%20Even%20more%20notable%2C%20we%20can%20create%20a%20CATE%20function%20to%20make%20predictions%20of%20the%20treatment%20effect%20on%20previously%20unexposed%20individuals!%0A%0A%20%20%20%20%20%20%20%20Note%2C%20that%20there%20are%20many%20models%20that%20exist%20for%20estimating%20CATEs%2C%20which%20we'll%20cover%20in%20a%20subsequent%20post.%20For%20now%2C%20we'll%20cover%20two%20techniques%20within%20the%20partially%20linear%20DML%20formulation%20for%20estimating%20this%20CATE%20function%3B%20namely%2C%20Linear%20DML%20and%20Non-Parametric%20DML.%20Er%20will%20show%20how%20to%20estimate%20the%20CATE%20mathematically%20and%20then%20provide%20examples%20for%20each%20case.%0A%0A%20%20%20%20%20%20%20%20%3E%20Note%3A%20Unbiased%20estimation%20of%20the%20CATE%20still%20requires%20the%20exogeneity%2FCIA%2FIgnorability%20assumption%20to%20hold%20as%20covered%20in%20part%201.%0A%0A%20%20%20%20%20%20%20%20**Everything%20demonstrated%20below%20can%20and%20should%20be%20extended%20to%20the%20experimental%20setting%20(RCT%20or%20A%2FB%20Testing)%2C%20where%20exogeneity%20is%20satisfied%20by%20construction%2C%20as%20covered%20in%20application%202%20of%20part%201.**%0A%0A%20%20%20%20%20%20%20%20%23%23%20Linear%20DML%20for%20Estimating%20the%20CATE%0A%0A%20%20%20%20%20%20%20%20Estimating%20the%20CATE%20in%20the%20linear%20DML%20framework%20is%20a%20simple%20extension%20of%20DML%20for%20estimating%20the%20ATE%3A%0A%0A%20%20%20%20%20%20%20%20%24%24%0A%20%20%20%20%20%20%20%20%5Cbegin%7Bequation%7D%0A%20%20%20%20%20%20%20%20y-%5Cmathcal%7BM%7D_y(%5Cmathbf%7BX%7D)%3D%5Cbeta_0%2B%5Cbeta_1(T-%5Cmathcal%7BM%7D_T(%5Cmathbf%7BX%7D))%2B%5Cepsilon%20%0A%20%20%20%20%20%20%20%20%5Ctag%7B5%7D%0A%20%20%20%20%20%20%20%20%5Cend%7Bequation%7D%0A%20%20%20%20%20%20%20%20%24%24%0A%0A%20%20%20%20%20%20%20%20where%20%24y%24%20is%20our%20outcome%2C%20%24T%24%20is%20our%20treatment%2C%20%26%20%24%5Cmathcal%7BM%7D_y%24%20and%20%24%5Cmathcal%7BM%7D_T%24%20are%20both%20flexible%20ML%20models%20(our%20nuisance%20functions)%20to%20predict%20%24y%24%20and%20%24T%24%20given%20confounders%20and%2For%20controls%2C%20%24%5Cmathbf%7BX%7D%24%2C%20respectively.%20To%20estimate%20the%20CATE%20function%20using%20Linear%20DML%2C%20we%20can%20simply%20include%20interaction%20terms%20of%20the%20treatment%20residuals%20with%20our%20covariates.%20Observe%3A%0A%0A%20%20%20%20%20%20%20%20%24%24%0A%20%20%20%20%20%20%20%20%5Cbegin%7Bequation%7D%0A%20%20%20%20%20%20%20%20y-%5Cmathcal%7BM%7D_y(%5Cmathbf%7BX%7D)%3D%5Cbeta_0%2B%5Cbeta_1(T-%5Cmathcal%7BM%7D_T(%5Cmathbf%7BX%7D))%2B(T-%5Cmathcal%7BM%7D_T(%5Cmathbf%7BX%7D))%5Cmathbf%7BX%7D%5Cmathbf%7B%5COmega%7D%20%2B%20%5Cepsilon%20%0A%20%20%20%20%20%20%20%20%5Ctag%7B6%7D%0A%20%20%20%20%20%20%20%20%5Cend%7Bequation%7D%0A%20%20%20%20%20%20%20%20%24%24%0A%0A%20%20%20%20%20%20%20%20where%20%24%5Cmathbf%7B%5COmega%7D%24%20is%20the%20vector%20of%20coefficients%20for%20the%20interaction%20terms.%20Now%20our%20CATE%20function%2C%20call%20it%20%24%5Ctau%24%2C%20takes%20the%20form%20%24%5Ctau(%5Cmathbf%7BX%7D)%20%3D%20%5Cbeta_1%20%2B%20%5Cmathbf%7BX%7D%5Cmathbf%7B%5COmega%7D%24%2C%20where%20we%20can%20predict%20each%20individuals%20CATE%20given%20%24%5Cmathbf%7BX%7D%24.%20If%20%24T%24%20is%20continuous%2C%20this%20CATE%20function%20is%20for%20a%201%20unit%20increase%20in%20T.%20Note%20that%20%24%5Ctau(%5Cmathbf%7BX%7D)%20%3D%20%5Cbeta_1%24%20in%20eq.%20(3)%20where%20%24%5Ctau(%5Cmathbf%7BX%7D)%24%20is%20assumed%20a%20constant.%20Let%E2%80%99s%20take%20a%20look%20at%20this%20in%20action!%0A%0A%20%20%20%20%20%20%20%20First%2C%20let%E2%80%99s%20use%20the%20same%20casual%20DAG%20from%20part%201%2C%20where%20we%20will%20be%20looking%20at%20the%20effect%20of%20an%20individuals%20time%20spent%20on%20the%20website%20on%20their%20purchase%20amount%2C%20or%20sales%2C%20in%20the%20past%20month%20(assuming%20we%20observe%20all%20confounders).%3A%0A%20%20%20%20%20%20%20%20%22%22%22%0A%20%20%20%20)%0A%20%20%20%20return%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_(graphviz%2C%20mo)%3A%0A%20%20%20%20def%20create_dag()%3A%0A%20%20%20%20%20%20%20%20%23%20Create%20a%20directed%20graph%0A%20%20%20%20%20%20%20%20g%20%3D%20graphviz.Digraph(format%3D%22png%22)%0A%0A%20%20%20%20%20%20%20%20%23%20Add%20nodes%0A%20%20%20%20%20%20%20%20nodes%20%3D%20%5B%0A%20%20%20%20%20%20%20%20%20%20%20%20%22Age%22%2C%0A%20%20%20%20%20%20%20%20%20%20%20%20%22%23%20Social%20Media%20Accounts%22%2C%0A%20%20%20%20%20%20%20%20%20%20%20%20%22Yrs%20Member%22%2C%0A%20%20%20%20%20%20%20%20%20%20%20%20%22Time%20on%20Website%22%2C%0A%20%20%20%20%20%20%20%20%20%20%20%20%22Sales%22%2C%0A%20%20%20%20%20%20%20%20%20%20%20%20%22Z%22%2C%0A%20%20%20%20%20%20%20%20%5D%0A%20%20%20%20%20%20%20%20%5Bg.node(n)%20for%20n%20in%20nodes%5D%0A%0A%20%20%20%20%20%20%20%20g.edge(%22Age%22%2C%20%22Time%20on%20Website%22)%0A%20%20%20%20%20%20%20%20g.edge(%22%23%20Social%20Media%20Accounts%22%2C%20%22Time%20on%20Website%22)%0A%20%20%20%20%20%20%20%20g.edge(%22Yrs%20Member%22%2C%20%22Time%20on%20Website%22)%0A%20%20%20%20%20%20%20%20g.edge(%22Age%22%2C%20%22Sales%22)%0A%20%20%20%20%20%20%20%20g.edge(%22%23%20Social%20Media%20Accounts%22%2C%20%22Sales%22)%0A%20%20%20%20%20%20%20%20g.edge(%22Yrs%20Member%22%2C%20%22Sales%22)%0A%20%20%20%20%20%20%20%20g.edge(%22Time%20on%20Website%22%2C%20%22Sales%22%2C%20color%3D%22red%22)%0A%20%20%20%20%20%20%20%20g.edge(%22Z%22%2C%20%22Sales%22)%0A%0A%20%20%20%20%20%20%20%20g.graph_attr%5B%22dpi%22%5D%20%3D%20%22400%22%0A%0A%20%20%20%20%20%20%20%20%23%20Render%20for%20print%0A%20%20%20%20%20%20%20%20g.render(%22data%2Fdag1%22%2C%20format%3D%22webp%22)%0A%0A%20%20%20%20create_dag()%0A%20%20%20%20mo.image(%22data%2Fdag1.webp%22).center()%0A%20%20%20%20return%20(create_dag%2C)%0A%0A%0A%40app.cell%0Adef%20_(mo)%3A%0A%20%20%20%20mo.md(r%22%22%22Let%E2%80%99s%20then%20simulate%20this%20DGP%20using%20a%20similar%20process%20as%20utilized%20in%20part%201%20(note%20that%20all%20values%20%26%20data%20are%20chosen%20and%20generated%20arbitrarily%20for%20demonstrative%20purposes).%20Observe%20that%20we%20now%20include%20interaction%20terms%20in%20the%20sales%20DGP%20to%20model%20the%20CATE%2C%20or%20treatment%20effect%20heterogeneity%20(note%20that%20the%20DGP%20in%20part%201%20had%20no%20treatment%20effect%20heterogeneity%20by%20construction)%22%22%22)%0A%20%20%20%20return%0A%0A%0A%40app.cell%0Adef%20_(np%2C%20pd)%3A%0A%20%20%20%20%23%20Sample%20Size%0A%20%20%20%20N%20%3D%20100_000%0A%0A%20%20%20%20%23%20Confounders%20(X)%0A%20%20%20%20age%20%3D%20np.random.randint(low%3D18%2C%20high%3D75%2C%20size%3DN)%0A%20%20%20%20num_social_media_profiles%20%3D%20np.random.choice(%0A%20%20%20%20%20%20%20%20%5B0%2C%201%2C%202%2C%203%2C%204%2C%205%2C%206%2C%207%2C%208%2C%209%2C%2010%5D%2C%20size%3DN%0A%20%20%20%20)%0A%20%20%20%20yr_membership%20%3D%20np.random.choice(%5B0%2C%201%2C%202%2C%203%2C%204%2C%205%2C%206%2C%207%2C%208%2C%209%2C%2010%5D%2C%20size%3DN)%0A%0A%20%20%20%20%23%20Arbitrary%20Covariates%20(Z)%0A%20%20%20%20Z%20%3D%20np.random.normal(loc%3D50%2C%20scale%3D25%2C%20size%3DN)%0A%0A%20%20%20%20%23%20Error%20Terms%0A%20%20%20%20%CE%B51%20%3D%20np.random.normal(loc%3D20%2C%20scale%3D5%2C%20size%3DN)%0A%20%20%20%20%CE%B52%20%3D%20np.random.normal(loc%3D40%2C%20scale%3D15%2C%20size%3DN)%0A%0A%20%20%20%20%23%20Treatment%20(T%20%3D%20g(X)%20%2B%20%CE%B51)%0A%20%20%20%20def%20T(age%2C%20num_social_media_profiles%2C%20yr_membership%2C%20%CE%B51)%3A%0A%20%20%20%20%20%20%20%20time_on_website%20%3D%20np.maximum(%0A%20%20%20%20%20%20%20%20%20%20%20%2010%0A%20%20%20%20%20%20%20%20%20%20%20%20-%200.01%20*%20age%0A%20%20%20%20%20%20%20%20%20%20%20%20-%200.001%20*%20age**2%0A%20%20%20%20%20%20%20%20%20%20%20%20%2B%20num_social_media_profiles%0A%20%20%20%20%20%20%20%20%20%20%20%20-%200.01%20*%20num_social_media_profiles**2%0A%20%20%20%20%20%20%20%20%20%20%20%20-%200.01%20*%20(age%20*%20num_social_media_profiles)%0A%20%20%20%20%20%20%20%20%20%20%20%20%2B%200.2%20*%20yr_membership%0A%20%20%20%20%20%20%20%20%20%20%20%20%2B%200.001%20*%20yr_membership**2%0A%20%20%20%20%20%20%20%20%20%20%20%20-%200.01%20*%20(age%20*%20yr_membership)%0A%20%20%20%20%20%20%20%20%20%20%20%20%2B%200.2%20*%20(num_social_media_profiles%20*%20yr_membership)%0A%20%20%20%20%20%20%20%20%20%20%20%20%2B%200.01%0A%20%20%20%20%20%20%20%20%20%20%20%20*%20(num_social_media_profiles%20*%20np.log(age)%20*%20age%20*%20yr_membership%20**%20(1%20%2F%202))%0A%20%20%20%20%20%20%20%20%20%20%20%20%2B%20%CE%B51%2C%0A%20%20%20%20%20%20%20%20%20%20%20%200%2C%0A%20%20%20%20%20%20%20%20)%0A%20%20%20%20%20%20%20%20return%20time_on_website%0A%0A%20%20%20%20time_on_website%20%3D%20T(age%2C%20num_social_media_profiles%2C%20yr_membership%2C%20%CE%B51)%0A%0A%20%20%20%20%23%20Outcome%20(y%20%3D%20f(T%2CX%2CZ)%20%2B%20%CE%B52)%0A%20%20%20%20def%20y(time_on_website%2C%20age%2C%20num_social_media_profiles%2C%20yr_membership%2C%20Z%2C%20%CE%B52)%3A%0A%20%20%20%20%20%20%20%20sales%20%3D%20np.maximum(%0A%20%20%20%20%20%20%20%20%20%20%20%2025%0A%20%20%20%20%20%20%20%20%20%20%20%20%2B%205%20*%20time_on_website%20%20%23%20Baseline%20Treatment%20Effect%0A%20%20%20%20%20%20%20%20%20%20%20%20-%200.2%20*%20time_on_website%20*%20age%20%20%23%20Heterogeneity%0A%20%20%20%20%20%20%20%20%20%20%20%20%2B%202%20*%20time_on_website%20*%20num_social_media_profiles%20%20%23%20Heterogeneity%0A%20%20%20%20%20%20%20%20%20%20%20%20%2B%202%20*%20time_on_website%20*%20yr_membership%20%20%23%20Heterogeneity%0A%20%20%20%20%20%20%20%20%20%20%20%20-%200.1%20*%20age%0A%20%20%20%20%20%20%20%20%20%20%20%20-%200.001%20*%20age**2%0A%20%20%20%20%20%20%20%20%20%20%20%20%2B%208%20*%20num_social_media_profiles%0A%20%20%20%20%20%20%20%20%20%20%20%20-%200.1%20*%20num_social_media_profiles**2%0A%20%20%20%20%20%20%20%20%20%20%20%20-%200.01%20*%20(age%20*%20num_social_media_profiles)%0A%20%20%20%20%20%20%20%20%20%20%20%20%2B%202%20*%20yr_membership%0A%20%20%20%20%20%20%20%20%20%20%20%20%2B%200.1%20*%20yr_membership**2%0A%20%20%20%20%20%20%20%20%20%20%20%20-%200.01%20*%20(age%20*%20yr_membership)%0A%20%20%20%20%20%20%20%20%20%20%20%20%2B%203%20*%20(num_social_media_profiles%20*%20yr_membership)%0A%20%20%20%20%20%20%20%20%20%20%20%20%2B%200.1%0A%20%20%20%20%20%20%20%20%20%20%20%20*%20(num_social_media_profiles%20*%20np.log(age)%20*%20age%20*%20yr_membership%20**%20(1%20%2F%202))%0A%20%20%20%20%20%20%20%20%20%20%20%20%2B%200.5%20*%20Z%0A%20%20%20%20%20%20%20%20%20%20%20%20%2B%20%CE%B52%2C%0A%20%20%20%20%20%20%20%20%20%20%20%200%2C%0A%20%20%20%20%20%20%20%20)%0A%20%20%20%20%20%20%20%20return%20sales%0A%0A%20%20%20%20sales%20%3D%20y(time_on_website%2C%20age%2C%20num_social_media_profiles%2C%20yr_membership%2C%20Z%2C%20%CE%B52)%0A%0A%20%20%20%20df%20%3D%20pd.DataFrame(%0A%20%20%20%20%20%20%20%20np.array(%0A%20%20%20%20%20%20%20%20%20%20%20%20%5Bsales%2C%20time_on_website%2C%20age%2C%20num_social_media_profiles%2C%20yr_membership%2C%20Z%5D%0A%20%20%20%20%20%20%20%20).T%2C%0A%20%20%20%20%20%20%20%20columns%3D%5B%0A%20%20%20%20%20%20%20%20%20%20%20%20%22sales%22%2C%0A%20%20%20%20%20%20%20%20%20%20%20%20%22time_on_website%22%2C%0A%20%20%20%20%20%20%20%20%20%20%20%20%22age%22%2C%0A%20%20%20%20%20%20%20%20%20%20%20%20%22num_social_media_profiles%22%2C%0A%20%20%20%20%20%20%20%20%20%20%20%20%22yr_membership%22%2C%0A%20%20%20%20%20%20%20%20%20%20%20%20%22Z%22%2C%0A%20%20%20%20%20%20%20%20%5D%2C%0A%20%20%20%20)%0A%20%20%20%20return%20(%0A%20%20%20%20%20%20%20%20N%2C%0A%20%20%20%20%20%20%20%20T%2C%0A%20%20%20%20%20%20%20%20Z%2C%0A%20%20%20%20%20%20%20%20age%2C%0A%20%20%20%20%20%20%20%20df%2C%0A%20%20%20%20%20%20%20%20num_social_media_profiles%2C%0A%20%20%20%20%20%20%20%20sales%2C%0A%20%20%20%20%20%20%20%20time_on_website%2C%0A%20%20%20%20%20%20%20%20y%2C%0A%20%20%20%20%20%20%20%20yr_membership%2C%0A%20%20%20%20%20%20%20%20%CE%B51%2C%0A%20%20%20%20%20%20%20%20%CE%B52%2C%0A%20%20%20%20)%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_(df)%3A%0A%20%20%20%20df%0A%20%20%20%20return%0A%0A%0A%40app.cell%0Adef%20_(mo)%3A%0A%20%20%20%20mo.md(r%22%22%22Now%2C%20to%20estimate%20our%20CATE%20function%2C%20as%20outlined%20in%20eq.%20(4)%2C%20we%20can%20run%3A%22%22%22)%0A%20%20%20%20return%0A%0A%0A%40app.cell%0Adef%20_(GradientBoostingRegressor%2C%20cross_val_predict%2C%20df%2C%20smf)%3A%0A%20%20%20%20%23%20DML%20Procedure%20for%20Estimating%20the%20CATE%0A%20%20%20%20M_sales%20%3D%20GradientBoostingRegressor()%0A%20%20%20%20M_time_on_website%20%3D%20GradientBoostingRegressor()%0A%0A%20%20%20%20df%5B%22residualized_sales%22%5D%20%3D%20df%5B%22sales%22%5D%20-%20cross_val_predict(%0A%20%20%20%20%20%20%20%20M_sales%2C%0A%20%20%20%20%20%20%20%20df%5B%5B%22age%22%2C%20%22num_social_media_profiles%22%2C%20%22yr_membership%22%5D%5D%2C%0A%20%20%20%20%20%20%20%20df%5B%22sales%22%5D%2C%0A%20%20%20%20%20%20%20%20cv%3D3%2C%0A%20%20%20%20)%0A%0A%20%20%20%20df%5B%22residualized_time_on_website%22%5D%20%3D%20df%5B%22time_on_website%22%5D%20-%20cross_val_predict(%0A%20%20%20%20%20%20%20%20M_time_on_website%2C%0A%20%20%20%20%20%20%20%20df%5B%5B%22age%22%2C%20%22num_social_media_profiles%22%2C%20%22yr_membership%22%5D%5D%2C%0A%20%20%20%20%20%20%20%20df%5B%22time_on_website%22%5D%2C%0A%20%20%20%20%20%20%20%20cv%3D3%2C%0A%20%20%20%20)%0A%0A%20%20%20%20DML_model%20%3D%20smf.ols(%0A%20%20%20%20%20%20%20%20formula%3D%22residualized_sales%20~%201%20%2B%20residualized_time_on_website%20%2B%20residualized_time_on_website%3Aage%20%2B%20residualized_time_on_website%3Anum_social_media_profiles%20%2B%20residualized_time_on_website%3Ayr_membership%22%2C%0A%20%20%20%20%20%20%20%20data%3Ddf%2C%0A%20%20%20%20).fit()%0A%0A%20%20%20%20print(DML_model.summary())%0A%20%20%20%20return%20DML_model%2C%20M_sales%2C%20M_time_on_website%0A%0A%0A%40app.cell%0Adef%20_(mo)%3A%0A%20%20%20%20mo.md(r%22%22%22Here%20we%20can%20see%20that%20linear%20DML%20closely%20modeled%20the%20true%20DGP%20for%20the%20CATE%20(see%20coefficients%20on%20interaction%20terms%20in%20sales%20DGP).%20Let%E2%80%99s%20evaluate%20the%20performance%20of%20our%20CATE%20function%20by%20comparing%20the%20linear%20DML%20predictions%20to%20the%20true%20CATE%20for%20a%201%20hour%20increase%20in%20time%20on%20the%20spent%20on%20the%20website%3A%22%22%22)%0A%20%20%20%20return%0A%0A%0A%40app.cell%0Adef%20_(%0A%20%20%20%20DML_model%2C%0A%20%20%20%20Z%2C%0A%20%20%20%20age%2C%0A%20%20%20%20df%2C%0A%20%20%20%20mean_absolute_error%2C%0A%20%20%20%20mean_squared_error%2C%0A%20%20%20%20num_social_media_profiles%2C%0A%20%20%20%20r2_score%2C%0A%20%20%20%20time_on_website%2C%0A%20%20%20%20y%2C%0A%20%20%20%20yr_membership%2C%0A%20%20%20%20%CE%B52%2C%0A)%3A%0A%20%20%20%20%23%20Predict%20CATE%20of%201%20hour%20increase%0A%20%20%20%20linear_dml_cates%20%3D%20DML_model.predict(%0A%20%20%20%20%20%20%20%20df.assign(%0A%20%20%20%20%20%20%20%20%20%20%20%20residualized_time_on_website%3Dlambda%20x%3A%20x.residualized_time_on_website%20%2B%201%0A%20%20%20%20%20%20%20%20)%0A%20%20%20%20)%20-%20DML_model.predict(df)%0A%0A%20%20%20%20%23%20True%20CATE%20of%201%20hour%20increase%0A%20%20%20%20X%20%3D%20%5Bage%2C%20num_social_media_profiles%2C%20yr_membership%2C%20Z%2C%20%CE%B52%5D%0A%20%20%20%20true_cates%20%3D%20y(time_on_website%20%2B%201%2C%20*X)%20-%20y(time_on_website%2C%20*X)%0A%0A%20%20%20%20print(f%22Mean%20Squared%20Error%3A%20%7Bmean_squared_error(true_cates%2C%20linear_dml_cates)%7D%22)%0A%20%20%20%20print(f%22Mean%20Absolute%20Error%3A%20%7Bmean_absolute_error(true_cates%2C%20linear_dml_cates)%7D%22)%0A%20%20%20%20print(f%22R-Squared%3A%20%7Br2_score(true_cates%2C%20linear_dml_cates)%7D%22)%0A%20%20%20%20return%20X%2C%20linear_dml_cates%2C%20true_cates%0A%0A%0A%40app.cell%0Adef%20_(mo)%3A%0A%20%20%20%20mo.md(r%22%22%22Plotting%20the%20distributions%20of%20the%20predicted%20CATE%20and%20true%20CATE%2C%20we%20obtain%3A%22%22%22)%0A%20%20%20%20return%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_(hist_effect%2C%20linear_dml_cates%2C%20mo%2C%20true_cates)%3A%0A%20%20%20%20hist_effect(true_cates%2C%20linear_dml_cates%2C%20save_path%3D%22data%2Flinear_dml_hist.webp%22)%0A%0A%20%20%20%20mo.image(%22data%2Flinear_dml_hist.webp%22%2C%20height%3D500).center()%0A%20%20%20%20return%0A%0A%0A%40app.cell%0Adef%20_(mo)%3A%0A%20%20%20%20mo.md(r%22%22%22Additionally%2C%20plotting%20the%20predicted%20values%20versus%20the%20true%20values%20we%20obtain%3A%22%22%22)%0A%20%20%20%20return%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_(linear_dml_cates%2C%20mo%2C%20plot_effect%2C%20true_cates)%3A%0A%20%20%20%20plot_effect(true_cates%2C%20linear_dml_cates%2C%20save_path%3D%22data%2Flinear_dml_line.webp%22)%0A%0A%20%20%20%20mo.image(%22data%2Flinear_dml_line.webp%22%2C%20height%3D500).center()%0A%20%20%20%20return%0A%0A%0A%40app.cell%0Adef%20_(mo)%3A%0A%20%20%20%20mo.md(r%22%22%22Overall%2C%20we%20have%20pretty%20impressive%20performance!%20However%2C%20the%20primary%20limitation%20in%20this%20approach%20is%20that%20we%20must%20manually%20specify%20the%20functional%20form%20of%20the%20CATE%20function%2C%20thus%20if%20we%20are%20only%20including%20linear%20interaction%20terms%20we%20may%20not%20capture%20the%20true%20CATE%20function.%20In%20our%20example%2C%20we%20simulated%20the%20DGP%20to%20only%20have%20these%20linear%20interaction%20terms%20and%20thus%20the%20performance%20is%20strong%20by%20construction%2C%20but%20let%E2%80%99s%20see%20what%20happens%20when%20we%20tweak%20the%20DGP%20for%20the%20CATE%20to%20be%20arbitrarily%20non-linear%3A%22%22%22)%0A%20%20%20%20return%0A%0A%0A%40app.cell%0Adef%20_(%0A%20%20%20%20Z%2C%0A%20%20%20%20age%2C%0A%20%20%20%20np%2C%0A%20%20%20%20num_social_media_profiles%2C%0A%20%20%20%20pd%2C%0A%20%20%20%20time_on_website%2C%0A%20%20%20%20yr_membership%2C%0A%20%20%20%20%CE%B52%2C%0A)%3A%0A%20%20%20%20%23%20Outcome%20(y%20%3D%20f(T%2CX%2CZ)%20%2B%20%CE%B52)%0A%20%20%20%20def%20y_fn_nonlinear(%0A%20%20%20%20%20%20%20%20time_on_website%2C%20age%2C%20num_social_media_profiles%2C%20yr_membership%2C%20Z%2C%20%CE%B52%0A%20%20%20%20)%3A%0A%20%20%20%20%20%20%20%20sales%20%3D%20np.maximum(%0A%20%20%20%20%20%20%20%20%20%20%20%2025%0A%20%20%20%20%20%20%20%20%20%20%20%20%2B%205%20*%20time_on_website%20%20%23%20Baseline%20Treatment%20Effect%0A%20%20%20%20%20%20%20%20%20%20%20%20-%200.2%20*%20time_on_website%20*%20age%20%20%23%20Heterogeneity%0A%20%20%20%20%20%20%20%20%20%20%20%20-%200.0005%20*%20time_on_website%20*%20age**2%20%20%23%20Heterogeneity%0A%20%20%20%20%20%20%20%20%20%20%20%20%2B%200.8%20*%20time_on_website%20*%20num_social_media_profiles%20%20%23%20Heterogeneity%0A%20%20%20%20%20%20%20%20%20%20%20%20%2B%200.001%20*%20time_on_website%20*%20num_social_media_profiles**2%20%20%23%20Heterogeneity%0A%20%20%20%20%20%20%20%20%20%20%20%20%2B%200.8%20*%20time_on_website%20*%20yr_membership%20%20%23%20Heterogeneity%0A%20%20%20%20%20%20%20%20%20%20%20%20%2B%200.001%20*%20time_on_website%20*%20yr_membership**2%20%20%23%20Heterogeneity%0A%20%20%20%20%20%20%20%20%20%20%20%20%2B%200.005%0A%20%20%20%20%20%20%20%20%20%20%20%20*%20time_on_website%0A%20%20%20%20%20%20%20%20%20%20%20%20*%20yr_membership%0A%20%20%20%20%20%20%20%20%20%20%20%20*%20num_social_media_profiles%0A%20%20%20%20%20%20%20%20%20%20%20%20*%20age%20%20%23%20Heterogeneity%0A%20%20%20%20%20%20%20%20%20%20%20%20%2B%200.005%0A%20%20%20%20%20%20%20%20%20%20%20%20*%20time_on_website%0A%20%20%20%20%20%20%20%20%20%20%20%20*%20(yr_membership**3%20%2F%20(1%20%2B%20num_social_media_profiles**2))%0A%20%20%20%20%20%20%20%20%20%20%20%20*%20np.log(age)%20**%202%0A%20%20%20%20%20%20%20%20%20%20%20%20-%200.1%20*%20age%0A%20%20%20%20%20%20%20%20%20%20%20%20-%200.001%20*%20age**2%0A%20%20%20%20%20%20%20%20%20%20%20%20%2B%208%20*%20num_social_media_profiles%0A%20%20%20%20%20%20%20%20%20%20%20%20-%200.1%20*%20num_social_media_profiles**2%0A%20%20%20%20%20%20%20%20%20%20%20%20-%200.01%20*%20(age%20*%20num_social_media_profiles)%0A%20%20%20%20%20%20%20%20%20%20%20%20%2B%202%20*%20yr_membership%0A%20%20%20%20%20%20%20%20%20%20%20%20%2B%200.1%20*%20yr_membership**2%0A%20%20%20%20%20%20%20%20%20%20%20%20-%200.01%20*%20(age%20*%20yr_membership)%0A%20%20%20%20%20%20%20%20%20%20%20%20%2B%203%20*%20(num_social_media_profiles%20*%20yr_membership)%0A%20%20%20%20%20%20%20%20%20%20%20%20%2B%200.1%0A%20%20%20%20%20%20%20%20%20%20%20%20*%20(num_social_media_profiles%20*%20np.log(age)%20*%20age%20*%20yr_membership%20**%20(1%20%2F%202))%0A%20%20%20%20%20%20%20%20%20%20%20%20%2B%200.5%20*%20Z%0A%20%20%20%20%20%20%20%20%20%20%20%20%2B%20%CE%B52%2C%0A%20%20%20%20%20%20%20%20%20%20%20%200%2C%0A%20%20%20%20%20%20%20%20)%0A%20%20%20%20%20%20%20%20return%20sales%0A%0A%20%20%20%20sales_nonlinear%20%3D%20y_fn_nonlinear(%0A%20%20%20%20%20%20%20%20time_on_website%2C%20age%2C%20num_social_media_profiles%2C%20yr_membership%2C%20Z%2C%20%CE%B52%0A%20%20%20%20)%0A%0A%20%20%20%20df_nonlinear%20%3D%20pd.DataFrame(%0A%20%20%20%20%20%20%20%20np.array(%0A%20%20%20%20%20%20%20%20%20%20%20%20%5B%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20sales_nonlinear%2C%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20time_on_website%2C%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20age%2C%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20num_social_media_profiles%2C%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20yr_membership%2C%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20Z%2C%0A%20%20%20%20%20%20%20%20%20%20%20%20%5D%0A%20%20%20%20%20%20%20%20).T%2C%0A%20%20%20%20%20%20%20%20columns%3D%5B%0A%20%20%20%20%20%20%20%20%20%20%20%20%22sales%22%2C%0A%20%20%20%20%20%20%20%20%20%20%20%20%22time_on_website%22%2C%0A%20%20%20%20%20%20%20%20%20%20%20%20%22age%22%2C%0A%20%20%20%20%20%20%20%20%20%20%20%20%22num_social_media_profiles%22%2C%0A%20%20%20%20%20%20%20%20%20%20%20%20%22yr_membership%22%2C%0A%20%20%20%20%20%20%20%20%20%20%20%20%22Z%22%2C%0A%20%20%20%20%20%20%20%20%5D%2C%0A%20%20%20%20)%0A%20%20%20%20return%20df_nonlinear%2C%20sales_nonlinear%2C%20y_fn_nonlinear%0A%0A%0A%40app.cell%0Adef%20_(mo)%3A%0A%20%20%20%20mo.md(r%22%22%22Fitting%20our%20models%3A%22%22%22)%0A%20%20%20%20return%0A%0A%0A%40app.cell%0Adef%20_(GradientBoostingRegressor%2C%20cross_val_predict%2C%20df_nonlinear%2C%20smf)%3A%0A%20%20%20%20%23%20DML%20Procedure%0A%20%20%20%20M_sales2%20%3D%20GradientBoostingRegressor()%0A%20%20%20%20M_time_on_website2%20%3D%20GradientBoostingRegressor()%0A%0A%20%20%20%20df_nonlinear%5B%22residualized_sales%22%5D%20%3D%20df_nonlinear%5B%22sales%22%5D%20-%20cross_val_predict(%0A%20%20%20%20%20%20%20%20M_sales2%2C%0A%20%20%20%20%20%20%20%20df_nonlinear%5B%5B%22age%22%2C%20%22num_social_media_profiles%22%2C%20%22yr_membership%22%5D%5D%2C%0A%20%20%20%20%20%20%20%20df_nonlinear%5B%22sales%22%5D%2C%0A%20%20%20%20%20%20%20%20cv%3D3%2C%0A%20%20%20%20)%0A%0A%20%20%20%20df_nonlinear%5B%22residualized_time_on_website%22%5D%20%3D%20df_nonlinear%5B%0A%20%20%20%20%20%20%20%20%22time_on_website%22%0A%20%20%20%20%5D%20-%20cross_val_predict(%0A%20%20%20%20%20%20%20%20M_time_on_website2%2C%0A%20%20%20%20%20%20%20%20df_nonlinear%5B%5B%22age%22%2C%20%22num_social_media_profiles%22%2C%20%22yr_membership%22%5D%5D%2C%0A%20%20%20%20%20%20%20%20df_nonlinear%5B%22time_on_website%22%5D%2C%0A%20%20%20%20%20%20%20%20cv%3D3%2C%0A%20%20%20%20)%0A%0A%20%20%20%20DML_model_nonlinear%20%3D%20smf.ols(%0A%20%20%20%20%20%20%20%20formula%3D%22residualized_sales%20~%201%20%2B%20residualized_time_on_website%20%2B%20residualized_time_on_website%3Aage%20%2B%20residualized_time_on_website%3Anum_social_media_profiles%20%2B%20residualized_time_on_website%3Ayr_membership%22%2C%0A%20%20%20%20%20%20%20%20data%3Ddf_nonlinear%2C%0A%20%20%20%20).fit()%0A%0A%20%20%20%20print(DML_model_nonlinear.summary())%0A%20%20%20%20return%20DML_model_nonlinear%2C%20M_sales2%2C%20M_time_on_website2%0A%0A%0A%40app.cell%0Adef%20_(mo)%3A%0A%20%20%20%20mo.md(r%22%22%22And%20then%20evaluating%20performance%3A%22%22%22)%0A%20%20%20%20return%0A%0A%0A%40app.cell%0Adef%20_(%0A%20%20%20%20DML_model_nonlinear%2C%0A%20%20%20%20X%2C%0A%20%20%20%20df_nonlinear%2C%0A%20%20%20%20mean_absolute_error%2C%0A%20%20%20%20mean_squared_error%2C%0A%20%20%20%20r2_score%2C%0A%20%20%20%20time_on_website%2C%0A%20%20%20%20y_fn_nonlinear%2C%0A)%3A%0A%20%20%20%20%23%20Predict%20CATE%20of%201%20hour%20increase%0A%20%20%20%20linear_dml_cates_nonlinear%20%3D%20DML_model_nonlinear.predict(%0A%20%20%20%20%20%20%20%20df_nonlinear.assign(%0A%20%20%20%20%20%20%20%20%20%20%20%20residualized_time_on_website%3Dlambda%20x%3A%20x.residualized_time_on_website%20%2B%201%0A%20%20%20%20%20%20%20%20)%0A%20%20%20%20)%20-%20DML_model_nonlinear.predict(df_nonlinear)%0A%0A%20%20%20%20%23%20True%20CATE%20of%201%20hour%20increase%0A%20%20%20%20true_cates_nonlinear%20%3D%20y_fn_nonlinear(time_on_website%20%2B%201%2C%20*X)%20-%20y_fn_nonlinear(%0A%20%20%20%20%20%20%20%20time_on_website%2C%20*X%0A%20%20%20%20)%0A%0A%20%20%20%20print(%0A%20%20%20%20%20%20%20%20f%22Mean%20Squared%20Error%3A%20%7Bmean_squared_error(true_cates_nonlinear%2C%20linear_dml_cates_nonlinear)%7D%22%0A%20%20%20%20)%0A%20%20%20%20print(%0A%20%20%20%20%20%20%20%20f%22Mean%20Absolute%20Error%3A%20%7Bmean_absolute_error(true_cates_nonlinear%2C%20linear_dml_cates_nonlinear)%7D%22%0A%20%20%20%20)%0A%20%20%20%20print(f%22R-Squared%3A%20%7Br2_score(true_cates_nonlinear%2C%20linear_dml_cates_nonlinear)%7D%22)%0A%20%20%20%20return%20linear_dml_cates_nonlinear%2C%20true_cates_nonlinear%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_(hist_effect%2C%20linear_dml_cates_nonlinear%2C%20mo%2C%20true_cates_nonlinear)%3A%0A%20%20%20%20hist_effect(%0A%20%20%20%20%20%20%20%20true_cates_nonlinear%2C%0A%20%20%20%20%20%20%20%20linear_dml_cates_nonlinear%2C%0A%20%20%20%20%20%20%20%20save_path%3D%22data%2Flinear_dml_nonlinear_hist.webp%22%2C%0A%20%20%20%20)%0A%0A%20%20%20%20mo.image(%22data%2Flinear_dml_nonlinear_hist.webp%22%2C%20height%3D500).center()%0A%20%20%20%20return%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_(linear_dml_cates_nonlinear%2C%20mo%2C%20plot_effect%2C%20true_cates_nonlinear)%3A%0A%20%20%20%20plot_effect(%0A%20%20%20%20%20%20%20%20true_cates_nonlinear%2C%0A%20%20%20%20%20%20%20%20linear_dml_cates_nonlinear%2C%0A%20%20%20%20%20%20%20%20save_path%3D%22data%2Flinear_dml_nonlinear_line.webp%22%2C%0A%20%20%20%20)%0A%0A%20%20%20%20mo.image(%22data%2Flinear_dml_nonlinear_line.webp%22%2C%20height%3D500).center()%0A%20%20%20%20return%0A%0A%0A%40app.cell%0Adef%20_(mo)%3A%0A%20%20%20%20mo.md(%0A%20%20%20%20%20%20%20%20r%22%22%22%0A%20%20%20%20%20%20%20%20Here%20we%20see%20much%20degradation%20in%20performance.%20This%20non-linearity%20in%20the%20CATE%20function%20is%20precisely%20where%20Non-Parametric%20DML%20can%20shine!%0A%0A%20%20%20%20%20%20%20%20%23%23%20Non-Parametric%20DML%20for%20Estimating%20the%20CATE%0A%0A%20%20%20%20%20%20%20%20Non-Parametric%20DML%20goes%20one%20step%20further%20and%20allows%20for%20another%20flexible%20non-parametric%20ML%20model%20to%20be%20utilized%20for%20learning%20the%20CATE%20function!%20Let%E2%80%99s%20take%20a%20look%20at%20how%20we%20can%2C%20mathematically%2C%20do%20exactly%20this.%20Let%20%24%5Ctau(%5Cmathbf%7BX%7D)%24%20continue%20to%20denote%20our%20CATE%20function.%20Let%E2%80%99s%20start%20with%20defining%20our%20error%20term%20relative%20to%20eq.%203%20(note%20we%20drop%20the%20intercept%20%24%5Cbeta_0%24%20as%20this%20parameter%20is%20partialled%20out%20in%20residualization%20step%3B%20we%20could%20similarly%20drop%20this%20in%20the%20linear%20DML%20formulation%2C%20but%20for%20the%20sake%20of%20simplicity%20and%20consistency%20with%20part%201%2C%20we%20do%20not%20do%20this)%3A%0A%0A%0A%20%20%20%20%20%20%20%20%24%24%0A%20%20%20%20%20%20%20%20%5Cbegin%7Balign*%7D%0A%20%20%20%20%20%20%20%20y-%5Cmathcal%7BM%7D_y(%5Cmathbf%7BX%7D)%26%3D%5Ctau(%5Cmathbf%7BX%7D)(T-%5Cmathcal%7BM%7D_T(%5Cmathbf%7BX%7D))%2B%5Cepsilon%20%5C%5C%0A%20%20%20%20%20%20%20%20%5Ctilde%7By%7D%20%26%3D%5Ctau(%5Cmathbf%7BX%7D)%5Ctilde%7BT%7D%2B%5Cepsilon%20%5C%5C%0A%20%20%20%20%20%20%20%20%5Cepsilon%26%3D%5Ctilde%7By%7D-%5Ctau(%5Cmathbf%7BX%7D)%5Ctilde%7BT%7D%0A%20%20%20%20%20%20%20%20%5Cend%7Balign*%7D%0A%20%20%20%20%20%20%20%20%24%24%0A%0A%20%20%20%20%20%20%20%20Then%20define%20the%20causal%20loss%20function%20as%20such%20(note%20this%20is%20just%20the%20MSE!)%3A%0A%0A%20%20%20%20%20%20%20%20%24%24%0A%20%20%20%20%20%20%20%20%5Cbegin%7Balign*%7D%0A%20%20%20%20%20%20%20%20%5Cmathscr%7BL%7D(%5Ctau(%5Cmathbf%7BX%7D))%20%0A%20%20%20%20%20%20%20%20%26%3D%20%5Cfrac%7B1%7D%7BN%7D%5Csum_%7Bi%3D1%7D%5EN%5Cbigl(%5Ctilde%7By%7D_i%20-%20%5Ctau(%5Cmathbf%7BX%7D_i)%5Ctilde%7BT%7D_i%5Cbigr)%5E2%20%5C%5C%0A%20%20%20%20%20%20%20%20%26%3D%20%5Cfrac%7B1%7D%7BN%7D%5Csum_%7Bi%3D1%7D%5EN%5Cbigl(%5Ctilde%7By%7D_i%20-%20%5Ctau(%5Cmathbf%7BX%7D_i)%5Ctilde%7BT%7D_i%5Cbigr)%5E2%0A%20%20%20%20%20%20%20%20%5Cend%7Balign*%7D%0A%20%20%20%20%20%20%20%20%24%24%0A%0A%20%20%20%20%20%20%20%20What%20does%20this%20mean%3F%20We%20can%20directly%20learn%20%24%5Ctau(%5Cmathbf%7BX%7D)%24%20with%20any%20flexible%20ML%20model%20via%20minimizing%20our%20causal%20loss%20function!%20This%20amounts%20to%20a%20weighted%20regression%20problem%20with%20our%20target%20and%20weights%2C%20respectively%2C%20as%3A%0A%0A%20%20%20%20%20%20%20%20%24%24%0A%20%20%20%20%20%20%20%20%5Cbegin%7Balign*%7D%0A%20%20%20%20%20%20%20%20%5Ctext%7BTarget%7D%26%3D%5Cfrac%7B%5Ctilde%7By%7D_i%7D%7B%5Ctilde%7BT%7D_i%7D%20%5C%5C%0A%20%20%20%20%20%20%20%20%5Ctext%7BWeights%7D%26%3D%5Ctilde%7BT%7D_i%5E2%20%5C%5C%0A%20%20%20%20%20%20%20%20%5Cend%7Balign*%7D%0A%20%20%20%20%20%20%20%20%24%24%0A%0A%20%20%20%20%20%20%20%20_Take%20a%20moment%20and%20soak%20in%20the%20elegance%20of%20this%20result%E2%80%A6%20We%20can%20directly%20learn%20the%20CATE%20function%20%26%20predict%20an%20individuals%20CATE%20given%20our%20residualized%20outcome%2C%20%24y%24%2C%20and%20treatment%2C%20%24T%24!_%0A%0A%20%20%20%20%20%20%20%20Let%E2%80%99s%20take%20a%20look%20at%20this%20in%20action%20now.%20We%20will%20reuse%20the%20DGP%20for%20the%20non-linear%20CATE%20function%20that%20was%20utilized%20in%20the%20example%20where%20linear%20DML%20performs%20poorly%20above.%20To%20construct%20of%20Non-Parametric%20DML%20model%2C%20we%20can%20run%3A%0A%20%20%20%20%20%20%20%20%22%22%22%0A%20%20%20%20)%0A%20%20%20%20return%0A%0A%0A%40app.cell%0Adef%20_(mo)%3A%0A%20%20%20%20mo.md(r%22%22%22Then%20define%20the%20causal%20loss%20function%20as%20such%20(note%20this%20is%20just%20the%20MSE!)%3A%22%22%22)%0A%20%20%20%20return%0A%0A%0A%40app.cell%0Adef%20_(GradientBoostingRegressor%2C%20df_nonlinear)%3A%0A%20%20%20%20%23%20Define%20Target%20%26%20Weights%0A%20%20%20%20df_nonlinear%5B%22target%22%5D%20%3D%20(%0A%20%20%20%20%20%20%20%20df_nonlinear%5B%22residualized_sales%22%5D%0A%20%20%20%20%20%20%20%20%2F%20df_nonlinear%5B%22residualized_time_on_website%22%5D%0A%20%20%20%20)%0A%20%20%20%20df_nonlinear%5B%22weights%22%5D%20%3D%20df_nonlinear%5B%22residualized_time_on_website%22%5D%20**%202%0A%0A%20%20%20%20%23%20Non-Parametric%20CATE%20Model%0A%20%20%20%20CATE_model%20%3D%20GradientBoostingRegressor()%0A%20%20%20%20CATE_model.fit(%0A%20%20%20%20%20%20%20%20df_nonlinear%5B%5B%22age%22%2C%20%22num_social_media_profiles%22%2C%20%22yr_membership%22%5D%5D%2C%0A%20%20%20%20%20%20%20%20df_nonlinear%5B%22target%22%5D%2C%0A%20%20%20%20%20%20%20%20sample_weight%3Ddf_nonlinear%5B%22weights%22%5D%2C%0A%20%20%20%20)%0A%20%20%20%20return%20(CATE_model%2C)%0A%0A%0A%40app.cell%0Adef%20_(mo)%3A%0A%20%20%20%20mo.md(r%22%22%22And%20to%20make%20predictions%20%2B%20evaluate%20performance%3A%22%22%22)%0A%20%20%20%20return%0A%0A%0A%40app.cell%0Adef%20_(%0A%20%20%20%20CATE_model%2C%0A%20%20%20%20df_nonlinear%2C%0A%20%20%20%20mean_absolute_error%2C%0A%20%20%20%20mean_squared_error%2C%0A%20%20%20%20r2_score%2C%0A%20%20%20%20true_cates_nonlinear%2C%0A)%3A%0A%20%20%20%20%23%20Predict%20CATE%20of%201%20hour%20increase%0A%20%20%20%20nonparam_dml_cates_nonlinear%20%3D%20CATE_model.predict(%0A%20%20%20%20%20%20%20%20df_nonlinear%5B%5B%22age%22%2C%20%22num_social_media_profiles%22%2C%20%22yr_membership%22%5D%5D%0A%20%20%20%20)%0A%0A%20%20%20%20print(%0A%20%20%20%20%20%20%20%20f%22Mean%20Squared%20Error%3A%20%7Bmean_squared_error(true_cates_nonlinear%2C%20nonparam_dml_cates_nonlinear)%7D%22%0A%20%20%20%20)%0A%20%20%20%20print(%0A%20%20%20%20%20%20%20%20f%22Mean%20Absolute%20Error%3A%20%7Bmean_absolute_error(true_cates_nonlinear%2C%20nonparam_dml_cates_nonlinear)%7D%22%0A%20%20%20%20)%0A%20%20%20%20print(f%22R-Squared%3A%20%7Br2_score(true_cates_nonlinear%2C%20nonparam_dml_cates_nonlinear)%7D%22)%0A%20%20%20%20return%20(nonparam_dml_cates_nonlinear%2C)%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_(hist_effect%2C%20mo%2C%20nonparam_dml_cates_nonlinear%2C%20true_cates_nonlinear)%3A%0A%20%20%20%20hist_effect(%0A%20%20%20%20%20%20%20%20true_cates_nonlinear%2C%0A%20%20%20%20%20%20%20%20nonparam_dml_cates_nonlinear%2C%0A%20%20%20%20%20%20%20%20save_path%3D%22data%2Fnonparam_dml_nonlinear_hist.webp%22%2C%0A%20%20%20%20)%0A%0A%20%20%20%20mo.image(%22data%2Fnonparam_dml_nonlinear_hist.webp%22%2C%20height%3D500).center()%0A%20%20%20%20return%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_(mo%2C%20nonparam_dml_cates_nonlinear%2C%20plot_effect%2C%20true_cates_nonlinear)%3A%0A%20%20%20%20plot_effect(%0A%20%20%20%20%20%20%20%20true_cates_nonlinear%2C%0A%20%20%20%20%20%20%20%20nonparam_dml_cates_nonlinear%2C%0A%20%20%20%20%20%20%20%20save_path%3D%22data%2Fnonparam_dml_nonlinear_line.webp%22%2C%0A%20%20%20%20)%0A%0A%20%20%20%20mo.image(%22data%2Fnonparam_dml_nonlinear_line.webp%22%2C%20height%3D500).center()%0A%20%20%20%20return%0A%0A%0A%40app.cell%0Adef%20_(mo)%3A%0A%20%20%20%20mo.md(%0A%20%20%20%20%20%20%20%20r%22%22%22%0A%20%20%20%20%20%20%20%20Here%20we%20can%20see%20that%2C%20although%20not%20perfect%2C%20the%20non-parametric%20DML%20approach%20was%20able%20to%20model%20the%20non-linearities%20in%20the%20CATE%20function%20much%20better%20than%20the%20linear%20DML%20approach.%20We%20can%20of%20course%20further%20improve%20the%20performance%20via%20tuning%20our%20model.%20Note%20that%20we%20can%20use%20explainable%20AI%20tools%2C%20such%20as%20%5BSHAP%20values%5D(https%3A%2F%2Fshap.readthedocs.io%2Fen%2Flatest%2Findex.html)%2C%20to%20understand%20the%20nature%20of%20our%20treatment%20effect%20heterogeneity!%0A%0A%20%20%20%20%20%20%20%20%23%23%20Conclusion%0A%0A%20%20%20%20%20%20%20%20And%20there%20you%20have%20it!%20Thank%20you%20for%20taking%20the%20time%20to%20read%20through%20my%20article.%20I%20hope%20this%20article%20has%20taught%20you%20how%20to%20go%20beyond%20estimating%20only%20the%20ATE%20%26%20utilize%20DML%20to%20estimate%20the%20CATE%20to%20further%20understanding%20heterogeneity%20in%20the%20treatment%20effects%20and%20drive%20more%20causal%20inference-%20%26%20data-%20driven%20targeting%20schemes.%0A%0A%20%20%20%20%20%20%20%20As%20always%2C%20I%20hope%20you%20have%20enjoyed%20reading%20this%20as%20much%20as%20I%20enjoyed%20writing%20it!%0A%0A%20%20%20%20%20%20%20%20%23%23%20References%0A%20%20%20%20%20%20%20%20%5B1%5D%20V.%20Chernozhukov%2C%20D.%20Chetverikov%2C%20M.%20Demirer%2C%20E.%20Duflo%2C%20C.%20Hansen%2C%20and%20a.%20W.%20Newey.%20Double%20Machine%20Learning%20for%20Treatment%20and%20Causal%20Parameters.%20ArXiv%20e-prints%2C%20July%202016.%0A%0A%20%20%20%20%20%20%20%20%3Cdiv%20style%3D%22text-align%3A%20center%3B%20font-size%3A%2024px%3B%22%3E%E2%9D%96%E2%9D%96%E2%9D%96%3C%2Fdiv%3E%0A%0A%20%20%20%20%20%20%20%20%3Ccenter%3E%0A%20%20%20%20%20%20%20%20Access%20all%20the%20code%20via%20this%20Marimo%20Notebook%20or%20my%20%5BGitHub%20Repo%5D(https%3A%2F%2Fgithub.com%2Fjakepenzak%2Fblog-posts)%0A%0A%20%20%20%20%20%20%20%20I%20appreciate%20you%20reading%20my%20post!%20My%20posts%20primarily%20explore%20real-world%20and%20theoretical%20applications%20of%20econometric%20and%20statistical%2Fmachine%20learning%20techniques%2C%20but%20also%20whatever%20I%20am%20currently%20interested%20in%20or%20learning%20%F0%9F%98%81.%20At%20the%20end%20of%20the%20day%2C%20I%20write%20to%20learn!%20I%20hope%20to%20make%20complex%20topics%20slightly%20more%20accessible%20to%20all.%0A%20%20%20%20%20%20%20%20%3C%2Fcenter%3E%0A%20%20%20%20%20%20%20%20%22%22%22%0A%20%20%20%20)%0A%20%20%20%20return%0A%0A%0Aif%20__name__%20%3D%3D%20%22__main__%22%3A%0A%20%20%20%20app.run()%0A