An example of market-clearing assessment

This short tutorial gives an example of how one can statistically assess whether a market is in an equilibrium state. The tutorial assumes some familiarity with the concepts and the functionality of the package. The basic_usage vignette can be helpful in acquiring this familiarity.

Setup the environment

Load the required libraries.

library(diseq)
library(magrittr)

Prepare the data. Here, we simply simulate data using a data generating process for a market in equilibrium.

nobs <- 1000
tobs <- 5

alpha_d <- -3.9
beta_d0 <- 18.9
beta_d <- c(2.1, -0.7)
eta_d <- c(3.5, 6.25)

alpha_s <- 2.8
beta_s0 <- 3.2
beta_s <- c(2.65)
eta_s <- c(1.15, 4.2)

sigma_d <- 0.8
sigma_s <- 1.1
rho_ds <- 0.0

seed <- 42

eq_data <- simulate_data(
  "equilibrium_model", nobs, tobs,
  alpha_d, beta_d0, beta_d, eta_d,
  alpha_s, beta_s0, beta_s, eta_s,
  NA, NA, c(NA),
  sigma_d = sigma_d, sigma_s = sigma_s, rho_ds = rho_ds,
  seed = seed
)

Initialize the model

Prepare the basic parameters for model initialization.

key_columns <- c("id", "date")
time_column <- c("date")
quantity_column <- "Q"
price_column <- "P"
demand_specification <- paste0(price_column, " + Xd1 + Xd2 + X1 + X2")
supply_specification <- "Xs1 + X1 + X2"
price_specification <- "Xp1"
verbose <- 2
correlated_shocks <- TRUE

Using the above parameterization, construct the model objects. Here we construct two equilibrium models and four disequilibrium models. All the models are constructed using the simulated data from a model of market in equilibrium.

eqmdl <- new(
  "equilibrium_model",
  key_columns,
  quantity_column, price_column,
  demand_specification, paste0(price_column, " + ", supply_specification),
  eq_data[eq_data$date != 1, ],
  correlated_shocks = correlated_shocks, verbose = verbose
)
#> Info: This is 'Equilibrium with correlated shocks' model
#> Warning: Removing unobserved '1' level(s).
bsmdl <- new(
  "diseq_basic",
  key_columns,
  quantity_column, price_column,
  demand_specification, paste0(price_column, " + ", supply_specification),
  eq_data[eq_data$date != 1, ],
  correlated_shocks = correlated_shocks, verbose = verbose
)
#> Info: This is 'Basic with correlated shocks' model
#> Warning: Removing unobserved '1' level(s).
damdl <- new(
  "diseq_deterministic_adjustment",
  key_columns, time_column,
  quantity_column, price_column,
  demand_specification, paste0(price_column, " + ", supply_specification),
  eq_data,
  correlated_shocks = correlated_shocks, verbose = verbose
)
#> Info: This is 'Deterministic Adjustment with correlated shocks' model
#> Info: Dropping 1000 rows by generating 'LAGGED_P'.
#> Info: Sample separated with 1971 rows in excess supply and 2029 in excess demand regime.

Estimation

Set the estimation parameters.

optimization_method <- "BFGS"
optimization_controls <- list(maxit = 10000, reltol = 1e-8)

Estimate the models.

eqmdl_reg <- estimate(eqmdl, method = "2SLS")
eqmdl_est <- estimate(eqmdl,
  control = optimization_controls,
  method = optimization_method
)
bsmdl_est <- estimate(bsmdl,
  control = optimization_controls,
  method = optimization_method
)
damdl_est <- estimate(damdl,
  control = optimization_controls,
  method = optimization_method
)

Post estimation analysis

Summaries

All the models provide estimates for the simulated data. Even with simulated data, it is difficult to assess which model performs better by examining only the summaries in separation or collectively.

summary(eqmdl_reg$first_stage_model)
#> 
#> Call:
#> lm(formula = first_stage_formula, data = object@model_tibble)
#> 
#> Residuals:
#>      Min       1Q   Median       3Q      Max 
#> -0.88388 -0.14215  0.00305  0.14088  0.81967 
#> 
#> Coefficients:
#>              Estimate Std. Error t value Pr(>|t|)    
#> (Intercept)  2.356218   0.036603   64.37   <2e-16 ***
#> Xd1          0.314532   0.006576   47.83   <2e-16 ***
#> Xd2         -0.102046   0.006549  -15.58   <2e-16 ***
#> X1           0.347118   0.006523   53.21   <2e-16 ***
#> X2           0.306744   0.006445   47.60   <2e-16 ***
#> Xs1         -0.401540   0.006568  -61.14   <2e-16 ***
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> Residual standard error: 0.2087 on 3994 degrees of freedom
#> Multiple R-squared:  0.7409, Adjusted R-squared:  0.7406 
#> F-statistic:  2284 on 5 and 3994 DF,  p-value: < 2.2e-16
summary(eqmdl_reg$system_model)
#> 
#> systemfit results 
#> method: 2SLS 
#> 
#>           N   DF     SSR  detRCov   OLS-R2 McElroy-R2
#> system 8000 7989 7765.49 0.838051 0.895447    0.90887
#> 
#>           N   DF     SSR      MSE    RMSE       R2   Adj R2
#> demand 4000 3994 2579.28 0.645789 0.80361 0.930546 0.930459
#> supply 4000 3995 5186.20 1.298174 1.13937 0.860348 0.860208
#> 
#> The covariance matrix of the residuals
#>            demand     supply
#> demand  0.6457891 -0.0171846
#> supply -0.0171846  1.2981739
#> 
#> The correlations of the residuals
#>            demand     supply
#> demand  1.0000000 -0.0187684
#> supply -0.0187684  1.0000000
#> 
#> 
#> 2SLS estimates for 'demand' (equation 1)
#> Model Formula: Q ~ P + Xd1 + Xd2 + X1 + X2
#> <environment: 0x55aa3f27fcb0>
#> Instruments: ~Xd1 + Xd2 + X1 + X2 + Xs1
#> <environment: 0x55aa3f27fcb0>
#> 
#>               Estimate Std. Error  t value   Pr(>|t|)    
#> (Intercept) 18.7291651  0.1529666 122.4395 < 2.22e-16 ***
#> P           -3.8534666  0.0629681 -61.1971 < 2.22e-16 ***
#> Xd1          2.1102794  0.0323478  65.2372 < 2.22e-16 ***
#> Xd2         -0.7156455  0.0261205 -27.3978 < 2.22e-16 ***
#> X1           3.5151555  0.0331078 106.1731 < 2.22e-16 ***
#> X2           6.2431905  0.0314974 198.2126 < 2.22e-16 ***
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> Residual standard error: 0.80361 on 3994 degrees of freedom
#> Number of observations: 4000 Degrees of Freedom: 3994 
#> SSR: 2579.281574 MSE: 0.645789 Root MSE: 0.80361 
#> Multiple R-Squared: 0.930546 Adjusted R-Squared: 0.930459 
#> 
#> 
#> 2SLS estimates for 'supply' (equation 2)
#> Model Formula: Q ~ P + Xs1 + X1 + X2
#> <environment: 0x55aa3f27fcb0>
#> Instruments: ~Xd1 + Xd2 + X1 + X2 + Xs1
#> <environment: 0x55aa3f27fcb0>
#> 
#>              Estimate Std. Error  t value   Pr(>|t|)    
#> (Intercept) 2.7596055  0.3499559  7.88558 3.9968e-15 ***
#> P           2.8849781  0.1084928 26.59143 < 2.22e-16 ***
#> Xs1         2.7054083  0.0569035 47.54382 < 2.22e-16 ***
#> X1          1.1758638  0.0515231 22.82207 < 2.22e-16 ***
#> X2          4.1766642  0.0489108 85.39346 < 2.22e-16 ***
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> Residual standard error: 1.139374 on 3995 degrees of freedom
#> Number of observations: 4000 Degrees of Freedom: 3995 
#> SSR: 5186.204766 MSE: 1.298174 Root MSE: 1.139374 
#> Multiple R-Squared: 0.860348 Adjusted R-Squared: 0.860208
bbmle::summary(eqmdl_est)
#> Maximum likelihood estimation
#> 
#> Call:
#> `bbmle::mle2`(list(method = "BFGS", control = list(maxit = 10000, 
#>     reltol = 1e-08), start = c(D_P = -2.68706645853737, D_CONST = 17.1339113219814, 
#> D_Xd1 = 1.73722956126006, D_Xd2 = -0.588859897639326, D_X1 = 3.11549614992508, 
#> D_X2 = 5.88376213269379, S_P = 0.142585048267158, S_CONST = 10.692054625689, 
#> S_Xs1 = 1.5882047811332, S_X1 = 2.11726946967838, S_X2 = 5.03594363343709, 
#> D_VARIANCE = 1, S_VARIANCE = 1, RHO = 0), minuslogl = function (...) 
#> minus_log_likelihood(object, ...), gr = function (...) 
#> gradient(object, ...)))
#> 
#> Coefficients:
#>             Estimate Std. Error  z value     Pr(z)    
#> D_P        -3.853483   0.062929 -61.2353 < 2.2e-16 ***
#> D_CONST    18.730529   0.153363 122.1322 < 2.2e-16 ***
#> D_Xd1       2.110141   0.032270  65.3909 < 2.2e-16 ***
#> D_Xd2      -0.716020   0.026169 -27.3612 < 2.2e-16 ***
#> D_X1        3.515155   0.033086 106.2424 < 2.2e-16 ***
#> D_X2        6.243180   0.031473 198.3687 < 2.2e-16 ***
#> S_P         2.886243   0.108461  26.6109 < 2.2e-16 ***
#> S_CONST     2.755863   0.349846   7.8774 3.344e-15 ***
#> S_Xs1       2.705925   0.056883  47.5696 < 2.2e-16 ***
#> S_X1        1.175444   0.051504  22.8224 < 2.2e-16 ***
#> S_X2        4.176285   0.048892  85.4181 < 2.2e-16 ***
#> D_VARIANCE  0.644823   0.018994  33.9485 < 2.2e-16 ***
#> S_VARIANCE  1.297027   0.051289  25.2886 < 2.2e-16 ***
#> RHO        -0.018907   0.023533  -0.8034    0.4217    
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> -2 log L: 6722.671
bbmle::summary(bsmdl_est)
#> Maximum likelihood estimation
#> 
#> Call:
#> `bbmle::mle2`(list(control = list(maxit = 10000, reltol = 1e-08), 
#>     method = "BFGS", skip.hessian = TRUE, start = c(D_P = -2.68706645853737, 
#>     D_CONST = 17.1339113219814, D_Xd1 = 1.73722956126006, D_Xd2 = -0.588859897639326, 
#>     D_X1 = 3.11549614992508, D_X2 = 5.88376213269379, S_P = 0.142585048267158, 
#>     S_CONST = 10.692054625689, S_Xs1 = 1.5882047811332, S_X1 = 2.11726946967838, 
#>     S_X2 = 5.03594363343709, D_VARIANCE = 1, S_VARIANCE = 1, 
#>     RHO = 0), minuslogl = function (...) 
#>     minus_log_likelihood(object, ...), gr = function (...) 
#>     gradient(object, ...)))
#> 
#> Coefficients:
#>             Estimate Std. Error  z value     Pr(z)    
#> D_P        -2.911913   0.072376 -40.2334 < 2.2e-16 ***
#> D_CONST    17.467852   0.217222  80.4149 < 2.2e-16 ***
#> D_Xd1       2.026359   0.048168  42.0682 < 2.2e-16 ***
#> D_Xd2      -0.711620   0.032789 -21.7029 < 2.2e-16 ***
#> D_X1        3.168023   0.047606  66.5467 < 2.2e-16 ***
#> D_X2        5.984037   0.046977 127.3816 < 2.2e-16 ***
#> S_P         0.924162   0.159424   5.7969 6.755e-09 ***
#> S_CONST     8.353210   0.563139  14.8333 < 2.2e-16 ***
#> S_Xs1       2.467525   0.115833  21.3025 < 2.2e-16 ***
#> S_X1        1.878510   0.096513  19.4638 < 2.2e-16 ***
#> S_X2        4.712034   0.100106  47.0706 < 2.2e-16 ***
#> D_VARIANCE  0.595982   0.023909  24.9274 < 2.2e-16 ***
#> S_VARIANCE  0.970348   0.073543  13.1943 < 2.2e-16 ***
#> RHO        -0.335457   0.052592  -6.3785 1.788e-10 ***
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> -2 log L: 8182.64
bbmle::summary(damdl_est)
#> Maximum likelihood estimation
#> 
#> Call:
#> `bbmle::mle2`(list(control = list(maxit = 10000, reltol = 1e-08), 
#>     method = "BFGS", start = c(D_P = -2.68706645853737, D_CONST = 17.1339113219814, 
#>     D_Xd1 = 1.73722956126006, D_Xd2 = -0.588859897639326, D_X1 = 3.11549614992508, 
#>     D_X2 = 5.88376213269379, S_P = 0.142585048267158, S_CONST = 10.692054625689, 
#>     S_Xs1 = 1.5882047811332, S_X1 = 2.11726946967838, S_X2 = 5.03594363343709, 
#>     P_DIFF = 1, D_VARIANCE = 1, S_VARIANCE = 1, RHO = 0), minuslogl = function (...) 
#>     minus_log_likelihood(object, ...), gr = function (...) 
#>     gradient(object, ...)))
#> 
#> Coefficients:
#>             Estimate Std. Error  z value     Pr(z)    
#> D_P        -3.848967   0.066359 -58.0023 < 2.2e-16 ***
#> D_CONST    18.717272   0.165305 113.2287 < 2.2e-16 ***
#> D_Xd1       2.110049   0.032270  65.3876 < 2.2e-16 ***
#> D_Xd2      -0.716016   0.026168 -27.3624 < 2.2e-16 ***
#> D_X1        3.515018   0.033094 106.2122 < 2.2e-16 ***
#> D_X2        6.243126   0.031472 198.3693 < 2.2e-16 ***
#> S_P         2.882241   0.110065  26.1867 < 2.2e-16 ***
#> S_CONST     2.772435   0.358361   7.7364 1.022e-14 ***
#> S_Xs1       2.705982   0.056885  47.5691 < 2.2e-16 ***
#> S_X1        1.175343   0.051508  22.8185 < 2.2e-16 ***
#> S_X2        4.176110   0.048899  85.4028 < 2.2e-16 ***
#> P_DIFF      0.008358   0.039024   0.2142    0.8304    
#> D_VARIANCE  0.644759   0.018995  33.9428 < 2.2e-16 ***
#> S_VARIANCE  1.297088   0.051294  25.2875 < 2.2e-16 ***
#> RHO        -0.018885   0.023533  -0.8025    0.4223    
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> -2 log L: 6722.625

Model selection

The deterministic adjustment model has price dynamics that are analogous to excess demand and estimates one extra parameter. The directional model estimates one parameter less as the model does not have enough equations to identify prices in both demand and supply equations. The estimated parameters are summarized as follows.

sim_coef <- c(
  alpha_d, beta_d0, beta_d, eta_d,
  alpha_s, beta_s0, beta_s, eta_s,
  NA,
  sigma_d, sigma_s,
  rho_ds
)
names(sim_coef) <- names(damdl_est@coef)

dm_inc <- eqmdl_reg$system_model$coefficients[
  grep(
    "demand",
    names(eqmdl_reg$system_model$coefficients)
  )
]
sp_inc <- eqmdl_reg$system_model$coefficients[
  grep(
    "supply",
    names(eqmdl_reg$system_model$coefficients)
  )
]
lm_coef <- c(
  dm_inc[2], dm_inc[-2], sp_inc[2], sp_inc[-2],
  NA,
  NA, NA,
  NA
)

eqmdl_coef <- append(
  eqmdl_est@coef, c(NA),
  after = which(names(eqmdl_est@coef) ==
    prefixed_variance_variable(eqmdl@system@demand)) - 1
)

bsmdl_coef <- append(
  bsmdl_est@coef, c(NA),
  after = which(names(bsmdl_est@coef) ==
    prefixed_variance_variable(bsmdl@system@demand)) - 1
)

damdl_coef <- damdl_est@coef

comp <- tibble::tibble(
  parameter = names(sim_coef),
  sim = sim_coef, lm = lm_coef, fi = eqmdl_coef,
  bm = bsmdl_coef, da = damdl_coef,
  lmerr = abs(lm_coef - sim_coef), fierr = abs(eqmdl_coef - sim_coef),
  bmerr = abs(bsmdl_coef - sim_coef), daerr = abs(damdl_coef - sim_coef)
)
comp
#> # A tibble: 15 x 10
#>    parameter    sim     lm      fi     bm       da    lmerr    fierr   bmerr
#>    <chr>      <dbl>  <dbl>   <dbl>  <dbl>    <dbl>    <dbl>    <dbl>   <dbl>
#>  1 D_P        -3.9  -3.85  -3.85   -2.91  -3.85     0.0465   0.0465   0.988 
#>  2 D_CONST    18.9  18.7   18.7    17.5   18.7      0.171    0.169    1.43  
#>  3 D_Xd1       2.1   2.11   2.11    2.03   2.11     0.0103   0.0101   0.0736
#>  4 D_Xd2      -0.7  -0.716 -0.716  -0.712 -0.716    0.0156   0.0160   0.0116
#>  5 D_X1        3.5   3.52   3.52    3.17   3.52     0.0152   0.0152   0.332 
#>  6 D_X2        6.25  6.24   6.24    5.98   6.24     0.00681  0.00682  0.266 
#>  7 S_P         2.8   2.88   2.89    0.924  2.88     0.0850   0.0862   1.88  
#>  8 S_CONST     3.2   2.76   2.76    8.35   2.77     0.440    0.444    5.15  
#>  9 S_Xs1       2.65  2.71   2.71    2.47   2.71     0.0554   0.0559   0.182 
#> 10 S_X1        1.15  1.18   1.18    1.88   1.18     0.0259   0.0254   0.729 
#> 11 S_X2        4.2   4.18   4.18    4.71   4.18     0.0233   0.0237   0.512 
#> 12 P_DIFF     NA    NA     NA      NA      0.00836 NA       NA       NA     
#> 13 D_VARIANCE  0.8  NA      0.645   0.596  0.645   NA        0.155    0.204 
#> 14 S_VARIANCE  1.1  NA      1.30    0.970  1.30    NA        0.197    0.130 
#> 15 RHO         0    NA     -0.0189 -0.335 -0.0189  NA        0.0189   0.335 
#> # … with 1 more variable: daerr <dbl>

Since we have used simulated data, we can calculate the average absolute error of the parameter estimation for each of the models. In practice, the population values are unknown and this calculation is impossible.

comp_means <- colMeans(comp[, grep("err", colnames(comp))], na.rm = TRUE)
comp_means
#>      lmerr      fierr      bmerr      daerr 
#> 0.08138534 0.09076426 0.87318787 0.09056802

Moreover, the average absolute error cannot provide an overall estimation assessment as the market models have different parameter spaces. To assess the overall model performance one can instead use an information criterion.

model_names <- c(
  eqmdl@model_type_string,
  bsmdl@model_type_string, damdl@model_type_string
)
model_obs <- c(
  number_of_observations(eqmdl),
  number_of_observations(bsmdl),
  number_of_observations(damdl)
)
model_errors <- c(
  comp_means["fierr"],
  comp_means["bmerr"],
  comp_means["daerr"]
)
seltbl <- AIC(eqmdl_est, bsmdl_est, damdl_est) %>%
  tibble::add_column(Model = model_names, .before = 1) %>%
  tibble::add_column(Obs. = model_obs, `Mean Error` = model_errors) %>%
  dplyr::rename(D.F. = df) %>%
  dplyr::arrange(AIC)
seltbl
#>                              Model D.F.      AIC Obs. Mean Error
#> eqmdl_est              Equilibrium   14 6750.671 4000 0.09076426
#> damdl_est Deterministic Adjustment   15 6752.625 4000 0.09056802
#> bsmdl_est                    Basic   14 8210.640 4000 0.87318787