# 簡易シミュレーション比較
set.seed (1234 )
n_sim <- 100
n_samples <- 60
p_vars <- 10
signal <- 0.4
power_res <- replicate (n_sim, {
# データ生成
X <- matrix (rnorm (n_samples * p_vars), n_samples, p_vars)
# 第1変数だけ真の効果がある
Y <- signal * X[, 1 ] + rnorm (n_samples)
# 1. 選択的推論 (全データ)
fit_all <- glmnet (X, Y, alpha = 1 , lambda = 0.1 )
# 簡易的な評価(実際はもっと複雑)
out_si <- tryCatch (fixedLassoInf (X, Y, beta = as.vector (coef (fit_all, s= 0.1 )[- 1 ]), lambda = 0.1 * n_samples), error= function (e) NULL )
p_si <- if (! is.null (out_si) && out_si$ vars[1 ] == 1 ) out_si$ pv[1 ] else 1
# 2. データ分割 (半分)
idx <- 1 : (n_samples/ 2 )
fit_split <- glmnet (X[idx,], Y[idx], alpha = 1 , lambda = 0.1 )
selected <- which (as.vector (coef (fit_split, s= 0.1 )[- 1 ]) != 0 )
p_split <- 1
if (1 %in% selected) {
m_test <- lm (Y[- idx] ~ X[- idx, 1 ])
p_split <- summary (m_test)$ coefficients[2 , 4 ]
}
c (SI = p_si < 0.05 , Split = p_split < 0.05 )
})
power_df <- data.frame (
Method = c ("選択的推論" , "データ分割" ),
Power = rowMeans (power_res, na.rm = TRUE )
)
ggplot (power_df, aes (x = Method, y = Power, fill = Method)) +
geom_bar (stat = "identity" ) +
theme_minimal (base_size = 16 ) +
scale_fill_manual (values = c ("#a8dadc" , "#457b9d" )) +
labs (title = "真の効果を検出できる確率 (Power)" , y = "検出力" ) +
ylim (0 , 1 )