ATTgt

Basic Example

In [1]: from differences import load_data, ATTgt

In [2]: dataset = load_data.mpdta()

# compute group-time ATT
In [3]: att_gt = ATTgt(
   ...:     data=dataset["data"],
   ...:     cohort_name=dataset["cohort_name"],
   ...: )
   ...: 
In [4]: att_gt.fit("lemp ~ lpop", est_method="reg")
Out[4]: 
                        ATTgtResult            ...                                       
                                     analytic  ... pointwise conf. band                  
                                ATT std_error  ...                upper zero_not_in_cband
cohort base_period time                        ...                                       
2004   2003        2004   -0.014911  0.022056  ...             0.028317                  
                   2005   -0.076996  0.028360  ...            -0.021412                 *
                   2006   -0.141080  0.034836  ...            -0.072802                 *
                   2007   -0.107544  0.032738  ...            -0.043380                 *
2006   2003        2004   -0.002066  0.022122  ...             0.041293                  
       2004        2005   -0.006968  0.018346  ...             0.028989                  
       2005        2006    0.000766  0.019196  ...             0.038389                  
                   2007   -0.041536  0.019717  ...            -0.002891                 *
2007   2003        2004    0.026366  0.014019  ...             0.053842                  
       2004        2005   -0.004760  0.015670  ...             0.025953                  
       2005        2006   -0.028502  0.018132  ...             0.007036                  
       2006        2007   -0.028789  0.016168  ...             0.002899                  

[12 rows x 5 columns]

In [5]: att_gt.aggregate("event")
Out[5]: 
                EventAggregation            ...                                       
                                  analytic  ... pointwise conf. band                  
                             ATT std_error  ...                upper zero_not_in_cband
relative_period                             ...                                       
-3                      0.026366  0.014019  ...             0.053842                  
-2                     -0.004130  0.012917  ...             0.021188                  
-1                     -0.023465  0.014442  ...             0.004840                  
 0                     -0.021147  0.011481  ...             0.001356                  
 1                     -0.053356  0.016293  ...            -0.021422                 *
 2                     -0.141080  0.034836  ...            -0.072802                 *
 3                     -0.107544  0.032738  ...            -0.043380                 *

[7 rows x 5 columns]

In [6]: att_gt.aggregate("time")
Out[6]: 
     TimeAggregation                                                           
                      analytic pointwise conf. band                            
                 ATT std_error                lower     upper zero_not_in_cband
time                                                                           
2004       -0.014911  0.022056            -0.058140  0.028317                  
2005       -0.076996  0.028360            -0.132580 -0.021412                 *
2006       -0.046516  0.020998            -0.087672 -0.005360                 *
2007       -0.039705  0.012866            -0.064922 -0.014489                 *

In [7]: att_gt.aggregate("cohort")
Out[7]: 
       CohortAggregation            ...                                       
                          analytic  ... pointwise conf. band                  
                     ATT std_error  ...                upper zero_not_in_cband
cohort                              ...                                       
2004           -0.085133  0.024251  ...            -0.037601                 *
2006           -0.020385  0.017403  ...             0.013723                  
2007           -0.028789  0.016168  ...             0.002899                  

[3 rows x 5 columns]

In [8]: att_gt.aggregate("simple")
Out[8]: 
  SimpleAggregation                                                           
                     analytic pointwise conf. band                            
                ATT std_error                lower     upper zero_not_in_cband
0         -0.041969  0.011445              -0.0644 -0.019537                 *
In [9]: att_gt.fit("lemp ~ lpop", est_method="dr")
Out[9]: 
                        ATTgtResult            ...                                       
                                     analytic  ... pointwise conf. band                  
                                ATT std_error  ...                upper zero_not_in_cband
cohort base_period time                        ...                                       
2004   2003        2004   -0.014530  0.022129  ...             0.028843                  
                   2005   -0.076422  0.028671  ...            -0.020227                 *
                   2006   -0.140448  0.035378  ...            -0.071108                 *
                   2007   -0.106904  0.032886  ...            -0.042448                 *
2006   2003        2004   -0.000472  0.022223  ...             0.043085                  
       2004        2005   -0.006203  0.018496  ...             0.030048                  
       2005        2006    0.000961  0.019400  ...             0.038984                  
                   2007   -0.041294  0.019721  ...            -0.002641                 *
2007   2003        2004    0.026728  0.014066  ...             0.054296                  
       2004        2005   -0.004577  0.015718  ...             0.026230                  
       2005        2006   -0.028447  0.018181  ...             0.007186                  
       2006        2007   -0.028781  0.016239  ...             0.003046                  

[12 rows x 5 columns]

In [10]: att_gt.aggregate("event")
Out[10]: 
                EventAggregation            ...                                       
                                  analytic  ... pointwise conf. band                  
                             ATT std_error  ...                upper zero_not_in_cband
relative_period                             ...                                       
-3                      0.026728  0.014066  ...             0.054296                  
-2                     -0.003616  0.012928  ...             0.021723                  
-1                     -0.023244  0.014485  ...             0.005146                  
 0                     -0.021060  0.011494  ...             0.001468                  
 1                     -0.053003  0.016346  ...            -0.020965                 *
 2                     -0.140448  0.035378  ...            -0.071108                 *
 3                     -0.106904  0.032886  ...            -0.042448                 *

[7 rows x 5 columns]

In [11]: att_gt.aggregate("time")
Out[11]: 
     TimeAggregation                                                           
                      analytic pointwise conf. band                            
                 ATT std_error                lower     upper zero_not_in_cband
time                                                                           
2004       -0.014530  0.022129            -0.057902  0.028843                  
2005       -0.076422  0.028671            -0.132617 -0.020227                 *
2006       -0.046176  0.021211            -0.087748 -0.004603                 *
2007       -0.039582  0.012930            -0.064924 -0.014240                 *

In [12]: att_gt.aggregate("cohort")
Out[12]: 
       CohortAggregation            ...                                       
                          analytic  ... pointwise conf. band                  
                     ATT std_error  ...                upper zero_not_in_cband
cohort                              ...                                       
2004           -0.084576  0.024565  ...            -0.036430                 *
2006           -0.020167  0.017470  ...             0.014073                  
2007           -0.028781  0.016239  ...             0.003046                  

[3 rows x 5 columns]

In [13]: att_gt.aggregate("simple")
Out[13]: 
  SimpleAggregation                                                           
                     analytic pointwise conf. band                            
                ATT std_error                lower     upper zero_not_in_cband
0         -0.041752  0.011503            -0.064297 -0.019207                 *
In [14]: att_gt.fit("lemp ~ lpop", est_method="std_ipw-mle")
Out[14]: 
                        ATTgtResult            ...                                       
                                     analytic  ... pointwise conf. band                  
                                ATT std_error  ...                upper zero_not_in_cband
cohort base_period time                        ...                                       
2004   2003        2004   -0.014548  0.022115  ...             0.028795                  
                   2005   -0.076450  0.028649  ...            -0.020299                 *
                   2006   -0.140465  0.035371  ...            -0.071139                 *
                   2007   -0.106933  0.032889  ...            -0.042471                 *
2006   2003        2004   -0.000869  0.022153  ...             0.042550                  
       2004        2005   -0.006397  0.018457  ...             0.029778                  
       2005        2006    0.001208  0.019488  ...             0.039404                  
                   2007   -0.041308  0.019721  ...            -0.002655                 *
2007   2003        2004    0.026556  0.014044  ...             0.054082                  
       2004        2005   -0.004661  0.015669  ...             0.026050                  
       2005        2006   -0.028340  0.018189  ...             0.007310                  
       2006        2007   -0.028895  0.016246  ...             0.002948                  

[12 rows x 5 columns]

In [15]: att_gt.aggregate("event")
Out[15]: 
                EventAggregation            ...                                       
                                  analytic  ... pointwise conf. band                  
                             ATT std_error  ...                upper zero_not_in_cband
relative_period                             ...                                       
-3                      0.026556  0.014044  ...             0.054082                  
-2                     -0.003774  0.012896  ...             0.021501                  
-1                     -0.023207  0.014493  ...             0.005198                  
 0                     -0.021088  0.011498  ...             0.001448                  
 1                     -0.053022  0.016347  ...            -0.020982                 *
 2                     -0.140465  0.035371  ...            -0.071139                 *
 3                     -0.106933  0.032889  ...            -0.042471                 *

[7 rows x 5 columns]

In [16]: att_gt.aggregate("time")
Out[16]: 
     TimeAggregation                                                           
                      analytic pointwise conf. band                            
                 ATT std_error                lower     upper zero_not_in_cband
time                                                                           
2004       -0.014548  0.022115            -0.057892  0.028795                  
2005       -0.076450  0.028649            -0.132601 -0.020299                 *
2006       -0.046016  0.021273            -0.087711 -0.004321                 *
2007       -0.039666  0.012914            -0.064978 -0.014354                 *

In [17]: att_gt.aggregate("cohort")
Out[17]: 
       CohortAggregation            ...                                       
                          analytic  ... pointwise conf. band                  
                     ATT std_error  ...                upper zero_not_in_cband
cohort                              ...                                       
2004           -0.084599  0.024553  ...            -0.036477                 *
2006           -0.020050  0.017527  ...             0.014302                  
2007           -0.028895  0.016246  ...             0.002948                  

[3 rows x 5 columns]

In [18]: att_gt.aggregate("simple")
Out[18]: 
  SimpleAggregation                                                           
                     analytic pointwise conf. band                            
                ATT std_error                lower     upper zero_not_in_cband
0         -0.041777    0.0115            -0.064316 -0.019238                 *

Example with simulated data

In [19]: from differences import simulate_data, ATTgt

In [20]: panel_data = simulate_data()  # generate data

In [21]: att_gt = ATTgt(data=panel_data, cohort_name='cohort')
In [22]: att_gt.fit(formula='y ~ x0')
Out[22]: 
                        ATTgtResult            ...                                       
                                     analytic  ... pointwise conf. band                  
                                ATT std_error  ...                upper zero_not_in_cband
cohort base_period time                        ...                                       
1901   1900        1901   -1.770823  1.416490  ...             1.005446                  
                   1902    9.547152  1.358363  ...            12.209495                 *
                   1903   18.528777  1.310213  ...            21.096747                 *
                   1904   29.063384  1.347204  ...            31.703855                 *
                   1905   39.591632  1.409773  ...            42.354737                 *
                   1906   49.652876  1.394390  ...            52.385831                 *
                   1907   58.971455  1.392544  ...            61.700791                 *
1905   1900        1901   -0.743443  1.395346  ...             1.991385                  
       1901        1902    2.661357  1.308430  ...             5.225833                 *
       1902        1903   -1.060266  1.330040  ...             1.546565                  
       1903        1904    0.795617  1.360870  ...             3.462873                  
       1904        1905   -1.346158  1.359089  ...             1.317607                  
                   1906    0.545921  1.371593  ...             3.234194                  
                   1907    0.273274  1.293713  ...             2.808905                  
1907   1900        1901   -0.004217  1.353749  ...             2.649083                  
       1901        1902    2.546809  1.326770  ...             5.147229                  
       1902        1903   -0.674085  1.331711  ...             1.936022                  
       1903        1904   -0.375036  1.360131  ...             2.290772                  
       1904        1905    0.421206  1.371092  ...             3.108496                  
       1905        1906    1.324373  1.315620  ...             3.902940                  
       1906        1907   -1.584219  1.369530  ...             1.100010                  

[21 rows x 5 columns]

In [23]: att_gt.aggregate('time')
Out[23]: 
     TimeAggregation            ...                                       
                      analytic  ... pointwise conf. band                  
                 ATT std_error  ...                upper zero_not_in_cband
time                            ...                                       
1901       -1.770823  1.416490  ...             1.005446                  
1902        9.547152  1.358363  ...            12.209495                 *
1903       18.528777  1.310213  ...            21.096747                 *
1904       29.063384  1.347204  ...            31.703855                 *
1905       19.199400  1.445303  ...            22.032141                 *
1906       25.191359  1.543621  ...            28.216801                 *
1907       19.319548  1.395955  ...            22.055571                 *

[7 rows x 5 columns]

In [24]: att_gt.aggregate('event')
Out[24]: 
                EventAggregation            ...                                       
                                  analytic  ... pointwise conf. band                  
                             ATT std_error  ...                upper zero_not_in_cband
relative_period                             ...                                       
-6                     -0.004217  1.353749  ...             2.649083                  
-5                      2.546809  1.326770  ...             5.147229                  
-4                     -0.708764  0.937185  ...             1.128084                  
-3                      1.143161  0.926032  ...             2.958151                  
-2                     -0.319530  0.973605  ...             1.588700                  
-1                      1.059995  0.992179  ...             3.004631                  
 0                     -1.567576  0.822166  ...             0.043839                  
 1                      5.063393  0.994313  ...             7.012210                 *
 2                      9.435212  1.014343  ...            11.423288                 *
 3                     29.063384  1.347204  ...            31.703855                 *
 4                     39.591632  1.409773  ...            42.354737                 *
 5                     49.652876  1.394390  ...            52.385831                 *
 6                     58.971455  1.392544  ...            61.700791                 *

[13 rows x 5 columns]

In [25]: att_gt.aggregate('cohort')
Out[25]: 
       CohortAggregation            ...                                       
                          analytic  ... pointwise conf. band                  
                     ATT std_error  ...                upper zero_not_in_cband
cohort                              ...                                       
1901           29.083493  1.065057  ...            31.170967                 *
1905           -0.175654  1.096148  ...             1.972757                  
1907           -1.584219  1.369530  ...             1.100010                  

[3 rows x 5 columns]

In [26]: att_gt.aggregate('simple')
Out[26]: 
  SimpleAggregation                                                            
                     analytic pointwise conf. band                             
                ATT std_error                lower      upper zero_not_in_cband
0         18.367027  0.937023            16.530496  20.203558                 *

In [27]: att_gt.aggregate('event', overall=True)
Out[27]: 
  EventAggregationOverall            ...                                       
                           analytic  ... pointwise conf. band                  
                      ATT std_error  ...                upper zero_not_in_cband
0               27.172911  0.863392  ...            28.865127                 *

[1 rows x 5 columns]

Heterogeneity and triple difference

# heterogeneity
In [28]: panel_data = simulate_data(samples=3)

In [29]: att_gt = ATTgt(data=panel_data, cohort_name='cohort')

In [30]: att_gt.fit(formula='y', split_sample_by='samples')
Out[30]: 
                                    ATTgtResult  ...                     
                                                 ... pointwise conf. band
                                            ATT  ...    zero_not_in_cband
sample_name cohort base_period time              ...                     
samples = 2 1902   1900        1901    1.171448  ...                     
                   1901        1902    0.064281  ...                     
                               1903    9.179412  ...                    *
                               1904   18.806461  ...                    *
                               1905   26.598950  ...                    *
...                                         ...  ...                  ...
samples = 1 1907   1902        1903    0.331039  ...                     
                   1903        1904    0.403154  ...                     
                   1904        1905    2.759556  ...                     
                   1905        1906   -7.434259  ...                    *
                   1906        1907    3.421336  ...                     

[63 rows x 5 columns]

In [31]: att_gt.aggregate('event')
Out[31]: 
                            EventAggregation  ...                     
                                              ... pointwise conf. band
                                         ATT  ...    zero_not_in_cband
sample_name relative_period                   ...                     
samples = 2 -6                     -1.066179  ...                     
            -5                      2.503249  ...                     
            -4                      0.142707  ...                     
            -3                     -1.200561  ...                     
            -2                      0.869256  ...                     
            -1                     -0.337870  ...                     
             0                      0.195757  ...                     
             1                      4.241020  ...                    *
             2                     10.575644  ...                    *
             3                     15.906961  ...                    *
             4                     34.606926  ...                    *
             5                     42.950095  ...                    *
samples = 0 -6                     -1.434893  ...                     
            -5                     -2.078107  ...                     
            -4                     -1.653544  ...                     
            -3                      0.766860  ...                     
            -2                     -2.575999  ...                     
            -1                     -0.302069  ...                     
             0                     -0.189330  ...                     
             1                      2.584816  ...                     
             2                      7.296462  ...                    *
             3                     10.455909  ...                    *
             4                     28.200953  ...                    *
             5                     33.619450  ...                    *
samples = 1 -6                      2.762561  ...                     
            -5                     -4.445402  ...                     
            -4                      0.331039  ...                     
            -3                      2.957393  ...                     
            -2                     -2.787250  ...                     
            -1                      1.344287  ...                     
             0                     -1.954031  ...                     
             1                      4.221844  ...                    *
             2                      9.747289  ...                    *
             3                     16.364363  ...                    *
             4                     34.196971  ...                    *
             5                     45.415534  ...                    *

[36 rows x 5 columns]

In [32]: att_gt.aggregate('simple')
Out[32]: 
            SimpleAggregation            ...                                       
                               analytic  ... pointwise conf. band                  
                          ATT std_error  ...                upper zero_not_in_cband
sample_name                              ...                                       
samples = 2         12.667928  1.534763  ...            15.676009                 *
samples = 0          9.138191  1.223046  ...            11.535318                 *
samples = 1         12.527383  1.284117  ...            15.044206                 *

[3 rows x 5 columns]

# triple difference
In [33]: att_gt.aggregate('time', difference=['samples = 1', 'samples = 2'])
Out[33]: 
                               DifferenceTimeAggregation  ...                     
                                                          ... pointwise conf. band
                                                     ATT  ...    zero_not_in_cband
difference_between        time                            ...                     
samples = 1 - samples = 2 1902                 -6.179208  ...                    *
                          1903                 -1.925446  ...                     
                          1904                 -2.312572  ...                     
                          1905                  1.113046  ...                     
                          1906                 -0.326926  ...                     
                          1907                  3.764902  ...                    *

[6 rows x 5 columns]

Multi-valued Treatment

# multi-valued treatment
In [34]: panel_data = simulate_data(intensity_by=2)  # generate data

In [35]: att_gt = ATTgt(data=panel_data, cohort_name='cohort', strata_name='strata')

In [36]: att_gt.fit(formula='y', n_jobs=1)
Out[36]: 
                                ATTgtResult  ...                     
                                             ... pointwise conf. band
                                        ATT  ...    zero_not_in_cband
stratum cohort base_period time              ...                     
0       1902   1900        1901    1.603146  ...                     
               1901        1902   -4.513971  ...                    *
                           1903   -0.584115  ...                     
                           1904   -2.806337  ...                     
                           1905   -1.258377  ...                     
                           1906   -0.743632  ...                     
                           1907    0.018110  ...                     
        1903   1900        1901    0.426330  ...                     
               1901        1902   -0.729027  ...                     
               1902        1903    1.022224  ...                     
                           1904    0.564047  ...                     
                           1905    0.121820  ...                     
                           1906    1.480310  ...                     
                           1907    3.100692  ...                    *
        1904   1900        1901    2.800354  ...                     
               1901        1902   -3.346426  ...                    *
               1902        1903    2.625392  ...                     
               1903        1904   -1.567931  ...                     
                           1905   -2.198848  ...                     
                           1906   -1.156904  ...                     
                           1907   -1.268145  ...                     
1       1902   1900        1901    1.856299  ...                     
               1901        1902   -0.895695  ...                     
                           1903    9.320287  ...                    *
                           1904   18.089749  ...                    *
                           1905   27.553460  ...                    *
                           1906   37.488657  ...                    *
                           1907   47.901531  ...                    *
        1903   1900        1901    0.447893  ...                     
               1901        1902   -2.032053  ...                     
               1902        1903    1.147370  ...                     
                           1904    8.247226  ...                    *
                           1905   19.060428  ...                    *
                           1906   29.784651  ...                    *
                           1907   37.853282  ...                    *
        1904   1900        1901    2.389180  ...                     
               1901        1902   -3.169450  ...                    *
               1902        1903    1.717456  ...                     
               1903        1904   -3.536899  ...                    *
                           1905    6.193858  ...                    *
                           1906   19.129266  ...                    *
                           1907   28.598216  ...                    *

[42 rows x 5 columns]

In [37]: att_gt.aggregate('event')
Out[37]: 
                        EventAggregation            ...                                       
                                          analytic  ... pointwise conf. band                  
                                     ATT std_error  ...                upper zero_not_in_cband
stratum relative_period                             ...                                       
0       -3                      2.800354  1.624492  ...             5.984301                  
        -2                     -1.553698  1.001505  ...             0.409215                  
        -1                      1.205075  0.831243  ...             2.834281                  
         0                     -1.605665  0.756861  ...            -0.122245                 *
         1                     -0.794305  0.861145  ...             0.893509                  
         2                     -1.234759  0.873842  ...             0.477940                  
         3                     -0.355813  0.918228  ...             1.443880                  
         4                      1.260801  1.169319  ...             3.552625                  
         5                      0.018110  1.656627  ...             3.265040                  
1       -3                      2.389180  1.696745  ...             5.714738                  
        -2                     -1.274651  0.991847  ...             0.669333                  
        -1                      0.514720  0.772439  ...             2.028674                  
         0                     -1.015340  0.795941  ...             0.544676                  
         1                      8.011893  0.836648  ...             9.651693                 *
         2                     18.729930  0.892172  ...            20.478556                 *
         3                     28.616467  0.900505  ...            30.381424                 *
         4                     37.663677  1.110140  ...            39.839511                 *
         5                     47.901531  1.509574  ...            50.860242                 *

[18 rows x 5 columns]

In [38]: att_gt.aggregate('simple')
Out[38]: 
        SimpleAggregation            ...                                       
                           analytic  ... pointwise conf. band                  
                      ATT std_error  ...                upper zero_not_in_cband
stratum                              ...                                       
0               -0.644958  0.713971  ...             0.754399                  
1               19.355804  0.730277  ...            20.787120                 *

[2 rows x 5 columns]

In [39]: att_gt.aggregate('event', difference=[0, 1], boot_iterations=5000)
Out[39]: 
                                   DifferenceEventAggregation  ...                   
                                                               ... simult. conf. band
                                                          ATT  ...  zero_not_in_cband
difference_between relative_period                             ...                   
0 - 1              -3                                0.411174  ...                   
                   -2                               -0.279047  ...                   
                   -1                                0.690355  ...                   
                    0                               -0.590324  ...                   
                    1                               -8.806198  ...                  *
                    2                              -19.964689  ...                  *
                    3                              -28.972281  ...                  *
                    4                              -36.402876  ...                  *
                    5                              -47.883421  ...                  *

[9 rows x 5 columns]

Last update: Dec 10, 2023