ATTgt¶
Basic Example¶
In [1]: from differences import load_data, ATTgt
In [2]: dataset = load_data.mpdta()
# compute group-time ATT
In [3]: att_gt = ATTgt(
...: data=dataset["data"],
...: cohort_name=dataset["cohort_name"],
...: )
...:
In [4]: att_gt.fit("lemp ~ lpop", est_method="reg")
Out[4]:
ATTgtResult ...
analytic ... pointwise conf. band
ATT std_error ... upper zero_not_in_cband
cohort base_period time ...
2004 2003 2004 -0.014911 0.022056 ... 0.028317
2005 -0.076996 0.028360 ... -0.021412 *
2006 -0.141080 0.034836 ... -0.072802 *
2007 -0.107544 0.032738 ... -0.043380 *
2006 2003 2004 -0.002066 0.022122 ... 0.041293
2004 2005 -0.006968 0.018346 ... 0.028989
2005 2006 0.000766 0.019196 ... 0.038389
2007 -0.041536 0.019717 ... -0.002891 *
2007 2003 2004 0.026366 0.014019 ... 0.053842
2004 2005 -0.004760 0.015670 ... 0.025953
2005 2006 -0.028502 0.018132 ... 0.007036
2006 2007 -0.028789 0.016168 ... 0.002899
[12 rows x 5 columns]
In [5]: att_gt.aggregate("event")
Out[5]:
EventAggregation ...
analytic ... pointwise conf. band
ATT std_error ... upper zero_not_in_cband
relative_period ...
-3 0.026366 0.014019 ... 0.053842
-2 -0.004130 0.012917 ... 0.021188
-1 -0.023465 0.014442 ... 0.004840
0 -0.021147 0.011481 ... 0.001356
1 -0.053356 0.016293 ... -0.021422 *
2 -0.141080 0.034836 ... -0.072802 *
3 -0.107544 0.032738 ... -0.043380 *
[7 rows x 5 columns]
In [6]: att_gt.aggregate("time")
Out[6]:
TimeAggregation
analytic pointwise conf. band
ATT std_error lower upper zero_not_in_cband
time
2004 -0.014911 0.022056 -0.058140 0.028317
2005 -0.076996 0.028360 -0.132580 -0.021412 *
2006 -0.046516 0.020998 -0.087672 -0.005360 *
2007 -0.039705 0.012866 -0.064922 -0.014489 *
In [7]: att_gt.aggregate("cohort")
Out[7]:
CohortAggregation ...
analytic ... pointwise conf. band
ATT std_error ... upper zero_not_in_cband
cohort ...
2004 -0.085133 0.024251 ... -0.037601 *
2006 -0.020385 0.017403 ... 0.013723
2007 -0.028789 0.016168 ... 0.002899
[3 rows x 5 columns]
In [8]: att_gt.aggregate("simple")
Out[8]:
SimpleAggregation
analytic pointwise conf. band
ATT std_error lower upper zero_not_in_cband
0 -0.041969 0.011445 -0.0644 -0.019537 *
In [9]: att_gt.fit("lemp ~ lpop", est_method="dr")
Out[9]:
ATTgtResult ...
analytic ... pointwise conf. band
ATT std_error ... upper zero_not_in_cband
cohort base_period time ...
2004 2003 2004 -0.014530 0.022129 ... 0.028843
2005 -0.076422 0.028671 ... -0.020227 *
2006 -0.140448 0.035378 ... -0.071108 *
2007 -0.106904 0.032886 ... -0.042448 *
2006 2003 2004 -0.000472 0.022223 ... 0.043085
2004 2005 -0.006203 0.018496 ... 0.030048
2005 2006 0.000961 0.019400 ... 0.038984
2007 -0.041294 0.019721 ... -0.002641 *
2007 2003 2004 0.026728 0.014066 ... 0.054296
2004 2005 -0.004577 0.015718 ... 0.026230
2005 2006 -0.028447 0.018181 ... 0.007186
2006 2007 -0.028781 0.016239 ... 0.003046
[12 rows x 5 columns]
In [10]: att_gt.aggregate("event")
Out[10]:
EventAggregation ...
analytic ... pointwise conf. band
ATT std_error ... upper zero_not_in_cband
relative_period ...
-3 0.026728 0.014066 ... 0.054296
-2 -0.003616 0.012928 ... 0.021723
-1 -0.023244 0.014485 ... 0.005146
0 -0.021060 0.011494 ... 0.001468
1 -0.053003 0.016346 ... -0.020965 *
2 -0.140448 0.035378 ... -0.071108 *
3 -0.106904 0.032886 ... -0.042448 *
[7 rows x 5 columns]
In [11]: att_gt.aggregate("time")
Out[11]:
TimeAggregation
analytic pointwise conf. band
ATT std_error lower upper zero_not_in_cband
time
2004 -0.014530 0.022129 -0.057902 0.028843
2005 -0.076422 0.028671 -0.132617 -0.020227 *
2006 -0.046176 0.021211 -0.087748 -0.004603 *
2007 -0.039582 0.012930 -0.064924 -0.014240 *
In [12]: att_gt.aggregate("cohort")
Out[12]:
CohortAggregation ...
analytic ... pointwise conf. band
ATT std_error ... upper zero_not_in_cband
cohort ...
2004 -0.084576 0.024565 ... -0.036430 *
2006 -0.020167 0.017470 ... 0.014073
2007 -0.028781 0.016239 ... 0.003046
[3 rows x 5 columns]
In [13]: att_gt.aggregate("simple")
Out[13]:
SimpleAggregation
analytic pointwise conf. band
ATT std_error lower upper zero_not_in_cband
0 -0.041752 0.011503 -0.064297 -0.019207 *
In [14]: att_gt.fit("lemp ~ lpop", est_method="std_ipw-mle")
Out[14]:
ATTgtResult ...
analytic ... pointwise conf. band
ATT std_error ... upper zero_not_in_cband
cohort base_period time ...
2004 2003 2004 -0.014548 0.022115 ... 0.028795
2005 -0.076450 0.028649 ... -0.020299 *
2006 -0.140465 0.035371 ... -0.071139 *
2007 -0.106933 0.032889 ... -0.042471 *
2006 2003 2004 -0.000869 0.022153 ... 0.042550
2004 2005 -0.006397 0.018457 ... 0.029778
2005 2006 0.001208 0.019488 ... 0.039404
2007 -0.041308 0.019721 ... -0.002655 *
2007 2003 2004 0.026556 0.014044 ... 0.054082
2004 2005 -0.004661 0.015669 ... 0.026050
2005 2006 -0.028340 0.018189 ... 0.007310
2006 2007 -0.028895 0.016246 ... 0.002948
[12 rows x 5 columns]
In [15]: att_gt.aggregate("event")
Out[15]:
EventAggregation ...
analytic ... pointwise conf. band
ATT std_error ... upper zero_not_in_cband
relative_period ...
-3 0.026556 0.014044 ... 0.054082
-2 -0.003774 0.012896 ... 0.021501
-1 -0.023207 0.014493 ... 0.005198
0 -0.021088 0.011498 ... 0.001448
1 -0.053022 0.016347 ... -0.020982 *
2 -0.140465 0.035371 ... -0.071139 *
3 -0.106933 0.032889 ... -0.042471 *
[7 rows x 5 columns]
In [16]: att_gt.aggregate("time")
Out[16]:
TimeAggregation
analytic pointwise conf. band
ATT std_error lower upper zero_not_in_cband
time
2004 -0.014548 0.022115 -0.057892 0.028795
2005 -0.076450 0.028649 -0.132601 -0.020299 *
2006 -0.046016 0.021273 -0.087711 -0.004321 *
2007 -0.039666 0.012914 -0.064978 -0.014354 *
In [17]: att_gt.aggregate("cohort")
Out[17]:
CohortAggregation ...
analytic ... pointwise conf. band
ATT std_error ... upper zero_not_in_cband
cohort ...
2004 -0.084599 0.024553 ... -0.036477 *
2006 -0.020050 0.017527 ... 0.014302
2007 -0.028895 0.016246 ... 0.002948
[3 rows x 5 columns]
In [18]: att_gt.aggregate("simple")
Out[18]:
SimpleAggregation
analytic pointwise conf. band
ATT std_error lower upper zero_not_in_cband
0 -0.041777 0.0115 -0.064316 -0.019238 *
Example with simulated data¶
In [19]: from differences import simulate_data, ATTgt
In [20]: panel_data = simulate_data() # generate data
In [21]: att_gt = ATTgt(data=panel_data, cohort_name='cohort')
In [22]: att_gt.fit(formula='y ~ x0')
Out[22]:
ATTgtResult ...
analytic ... pointwise conf. band
ATT std_error ... upper zero_not_in_cband
cohort base_period time ...
1901 1900 1901 -1.770823 1.416490 ... 1.005446
1902 9.547152 1.358363 ... 12.209495 *
1903 18.528777 1.310213 ... 21.096747 *
1904 29.063384 1.347204 ... 31.703855 *
1905 39.591632 1.409773 ... 42.354737 *
1906 49.652876 1.394390 ... 52.385831 *
1907 58.971455 1.392544 ... 61.700791 *
1905 1900 1901 -0.743443 1.395346 ... 1.991385
1901 1902 2.661357 1.308430 ... 5.225833 *
1902 1903 -1.060266 1.330040 ... 1.546565
1903 1904 0.795617 1.360870 ... 3.462873
1904 1905 -1.346158 1.359089 ... 1.317607
1906 0.545921 1.371593 ... 3.234194
1907 0.273274 1.293713 ... 2.808905
1907 1900 1901 -0.004217 1.353749 ... 2.649083
1901 1902 2.546809 1.326770 ... 5.147229
1902 1903 -0.674085 1.331711 ... 1.936022
1903 1904 -0.375036 1.360131 ... 2.290772
1904 1905 0.421206 1.371092 ... 3.108496
1905 1906 1.324373 1.315620 ... 3.902940
1906 1907 -1.584219 1.369530 ... 1.100010
[21 rows x 5 columns]
In [23]: att_gt.aggregate('time')
Out[23]:
TimeAggregation ...
analytic ... pointwise conf. band
ATT std_error ... upper zero_not_in_cband
time ...
1901 -1.770823 1.416490 ... 1.005446
1902 9.547152 1.358363 ... 12.209495 *
1903 18.528777 1.310213 ... 21.096747 *
1904 29.063384 1.347204 ... 31.703855 *
1905 19.199400 1.445303 ... 22.032141 *
1906 25.191359 1.543621 ... 28.216801 *
1907 19.319548 1.395955 ... 22.055571 *
[7 rows x 5 columns]
In [24]: att_gt.aggregate('event')
Out[24]:
EventAggregation ...
analytic ... pointwise conf. band
ATT std_error ... upper zero_not_in_cband
relative_period ...
-6 -0.004217 1.353749 ... 2.649083
-5 2.546809 1.326770 ... 5.147229
-4 -0.708764 0.937185 ... 1.128084
-3 1.143161 0.926032 ... 2.958151
-2 -0.319530 0.973605 ... 1.588700
-1 1.059995 0.992179 ... 3.004631
0 -1.567576 0.822166 ... 0.043839
1 5.063393 0.994313 ... 7.012210 *
2 9.435212 1.014343 ... 11.423288 *
3 29.063384 1.347204 ... 31.703855 *
4 39.591632 1.409773 ... 42.354737 *
5 49.652876 1.394390 ... 52.385831 *
6 58.971455 1.392544 ... 61.700791 *
[13 rows x 5 columns]
In [25]: att_gt.aggregate('cohort')
Out[25]:
CohortAggregation ...
analytic ... pointwise conf. band
ATT std_error ... upper zero_not_in_cband
cohort ...
1901 29.083493 1.065057 ... 31.170967 *
1905 -0.175654 1.096148 ... 1.972757
1907 -1.584219 1.369530 ... 1.100010
[3 rows x 5 columns]
In [26]: att_gt.aggregate('simple')
Out[26]:
SimpleAggregation
analytic pointwise conf. band
ATT std_error lower upper zero_not_in_cband
0 18.367027 0.937023 16.530496 20.203558 *
In [27]: att_gt.aggregate('event', overall=True)
Out[27]:
EventAggregationOverall ...
analytic ... pointwise conf. band
ATT std_error ... upper zero_not_in_cband
0 27.172911 0.863392 ... 28.865127 *
[1 rows x 5 columns]
Heterogeneity and triple difference¶
# heterogeneity
In [28]: panel_data = simulate_data(samples=3)
In [29]: att_gt = ATTgt(data=panel_data, cohort_name='cohort')
In [30]: att_gt.fit(formula='y', split_sample_by='samples')
Out[30]:
ATTgtResult ...
... pointwise conf. band
ATT ... zero_not_in_cband
sample_name cohort base_period time ...
samples = 2 1902 1900 1901 1.171448 ...
1901 1902 0.064281 ...
1903 9.179412 ... *
1904 18.806461 ... *
1905 26.598950 ... *
... ... ... ...
samples = 1 1907 1902 1903 0.331039 ...
1903 1904 0.403154 ...
1904 1905 2.759556 ...
1905 1906 -7.434259 ... *
1906 1907 3.421336 ...
[63 rows x 5 columns]
In [31]: att_gt.aggregate('event')
Out[31]:
EventAggregation ...
... pointwise conf. band
ATT ... zero_not_in_cband
sample_name relative_period ...
samples = 2 -6 -1.066179 ...
-5 2.503249 ...
-4 0.142707 ...
-3 -1.200561 ...
-2 0.869256 ...
-1 -0.337870 ...
0 0.195757 ...
1 4.241020 ... *
2 10.575644 ... *
3 15.906961 ... *
4 34.606926 ... *
5 42.950095 ... *
samples = 0 -6 -1.434893 ...
-5 -2.078107 ...
-4 -1.653544 ...
-3 0.766860 ...
-2 -2.575999 ...
-1 -0.302069 ...
0 -0.189330 ...
1 2.584816 ...
2 7.296462 ... *
3 10.455909 ... *
4 28.200953 ... *
5 33.619450 ... *
samples = 1 -6 2.762561 ...
-5 -4.445402 ...
-4 0.331039 ...
-3 2.957393 ...
-2 -2.787250 ...
-1 1.344287 ...
0 -1.954031 ...
1 4.221844 ... *
2 9.747289 ... *
3 16.364363 ... *
4 34.196971 ... *
5 45.415534 ... *
[36 rows x 5 columns]
In [32]: att_gt.aggregate('simple')
Out[32]:
SimpleAggregation ...
analytic ... pointwise conf. band
ATT std_error ... upper zero_not_in_cband
sample_name ...
samples = 2 12.667928 1.534763 ... 15.676009 *
samples = 0 9.138191 1.223046 ... 11.535318 *
samples = 1 12.527383 1.284117 ... 15.044206 *
[3 rows x 5 columns]
# triple difference
In [33]: att_gt.aggregate('time', difference=['samples = 1', 'samples = 2'])
Out[33]:
DifferenceTimeAggregation ...
... pointwise conf. band
ATT ... zero_not_in_cband
difference_between time ...
samples = 1 - samples = 2 1902 -6.179208 ... *
1903 -1.925446 ...
1904 -2.312572 ...
1905 1.113046 ...
1906 -0.326926 ...
1907 3.764902 ... *
[6 rows x 5 columns]
Multi-valued Treatment¶
# multi-valued treatment
In [34]: panel_data = simulate_data(intensity_by=2) # generate data
In [35]: att_gt = ATTgt(data=panel_data, cohort_name='cohort', strata_name='strata')
In [36]: att_gt.fit(formula='y', n_jobs=1)
Out[36]:
ATTgtResult ...
... pointwise conf. band
ATT ... zero_not_in_cband
stratum cohort base_period time ...
0 1902 1900 1901 1.603146 ...
1901 1902 -4.513971 ... *
1903 -0.584115 ...
1904 -2.806337 ...
1905 -1.258377 ...
1906 -0.743632 ...
1907 0.018110 ...
1903 1900 1901 0.426330 ...
1901 1902 -0.729027 ...
1902 1903 1.022224 ...
1904 0.564047 ...
1905 0.121820 ...
1906 1.480310 ...
1907 3.100692 ... *
1904 1900 1901 2.800354 ...
1901 1902 -3.346426 ... *
1902 1903 2.625392 ...
1903 1904 -1.567931 ...
1905 -2.198848 ...
1906 -1.156904 ...
1907 -1.268145 ...
1 1902 1900 1901 1.856299 ...
1901 1902 -0.895695 ...
1903 9.320287 ... *
1904 18.089749 ... *
1905 27.553460 ... *
1906 37.488657 ... *
1907 47.901531 ... *
1903 1900 1901 0.447893 ...
1901 1902 -2.032053 ...
1902 1903 1.147370 ...
1904 8.247226 ... *
1905 19.060428 ... *
1906 29.784651 ... *
1907 37.853282 ... *
1904 1900 1901 2.389180 ...
1901 1902 -3.169450 ... *
1902 1903 1.717456 ...
1903 1904 -3.536899 ... *
1905 6.193858 ... *
1906 19.129266 ... *
1907 28.598216 ... *
[42 rows x 5 columns]
In [37]: att_gt.aggregate('event')
Out[37]:
EventAggregation ...
analytic ... pointwise conf. band
ATT std_error ... upper zero_not_in_cband
stratum relative_period ...
0 -3 2.800354 1.624492 ... 5.984301
-2 -1.553698 1.001505 ... 0.409215
-1 1.205075 0.831243 ... 2.834281
0 -1.605665 0.756861 ... -0.122245 *
1 -0.794305 0.861145 ... 0.893509
2 -1.234759 0.873842 ... 0.477940
3 -0.355813 0.918228 ... 1.443880
4 1.260801 1.169319 ... 3.552625
5 0.018110 1.656627 ... 3.265040
1 -3 2.389180 1.696745 ... 5.714738
-2 -1.274651 0.991847 ... 0.669333
-1 0.514720 0.772439 ... 2.028674
0 -1.015340 0.795941 ... 0.544676
1 8.011893 0.836648 ... 9.651693 *
2 18.729930 0.892172 ... 20.478556 *
3 28.616467 0.900505 ... 30.381424 *
4 37.663677 1.110140 ... 39.839511 *
5 47.901531 1.509574 ... 50.860242 *
[18 rows x 5 columns]
In [38]: att_gt.aggregate('simple')
Out[38]:
SimpleAggregation ...
analytic ... pointwise conf. band
ATT std_error ... upper zero_not_in_cband
stratum ...
0 -0.644958 0.713971 ... 0.754399
1 19.355804 0.730277 ... 20.787120 *
[2 rows x 5 columns]
In [39]: att_gt.aggregate('event', difference=[0, 1], boot_iterations=5000)
Out[39]:
DifferenceEventAggregation ...
... simult. conf. band
ATT ... zero_not_in_cband
difference_between relative_period ...
0 - 1 -3 0.411174 ...
-2 -0.279047 ...
-1 0.690355 ...
0 -0.590324 ...
1 -8.806198 ... *
2 -19.964689 ... *
3 -28.972281 ... *
4 -36.402876 ... *
5 -47.883421 ... *
[9 rows x 5 columns]
Last update:
Dec 10, 2023