as well as all signif vertices
wd="path/to/working/directory"
cd $wd
for scenario in 03_BWAS_uncorrected_allModa 09_BWAS_ICVagesex_allModa 05_BWAS_10globalPCs_allModa 06_BWAS_10specificPCs_allModa 07_BWAS_MOA_allModa 10_BWAS_MOA_multiORM_QC_allModa 15_BWAS_MOA_allModa_FE
do
for NNN in 10 100 1000
do
${wd}/qsubshcom " module load R/3.5.0 |;
cd ${wd} |;
R -e 'arg=commandArgs(TRUE) ; source(\"/"${wd}"/RR_5.1_BWASprediction.R\") ; makeOSCAscorefile(scenario=arg[1], iter=arg[2], NNN=arg[3])' --args "${scenario}" \${TASK_ID} "${NNN}" |;
"${wd}"/osca_200220 --befile "${wd}"/BodFiles/AllVertices.fwhm0.fsaverage.UKB15K --keep "${wd}"/ID_UKB_replication.txt --score "${wd}"/"${scenario}"_clusterFWER/BWAS_topVerticesCluster_NVertices"${NNN}"_Niter\${TASK_ID}.txt --out "${wd}"/"${scenario}"_clusterFWER/BWAS_predicted_topVerticesCluster_NVertices"${NNN}"_Niter\${TASK_ID} |;
"${wd}"/osca_200220 --befile "${wd}"/BodFiles/AllVertices.fwhm0.fsaverage.UKB15K --keep "${wd}"/ID_UKB_replication.txt --score "${wd}"/"${scenario}"_clusterFWER/BWAS_allSignifVertices_NVertices"${NNN}"_Niter\${TASK_ID}.txt --out "${wd}"/"${scenario}"_clusterFWER/BWAS_predicted_allSignifVertices_NVertices"${NNN}"_Niter\${TASK_ID} |;
" 1 4G BWAS_${NNN}_${scenario}_pred_simul 01:00:00 "-array=1-100 "
done
donebased on prediction accuracy we have assumed total the linear model, regressed out the covariates to perform BWAS
Hence standardised BRS from vertices and unstandardised from covariates
Dependencies - tables UKB etc…
signifT = 0.05/652283/5
library(xlsx)
for (pheno in c("Age_MRI", "body_mass_index_bmi_f21001_2_0",
"fluid_intelligence_score_f20016_2_0", "sexuses_datacoding_9_f31_0_0",
"smoking_status_f20116_2_0")) {
res = resall = NULL
for (scenario in c("03_BWAS_uncorrected_realPheno", "09_BWAS_ICVagesex_realPheno",
"04_BWAS_5globalPCs_realPheno", "05_BWAS_10globalPCs_realPheno",
"06_BWAS_10specificPCs_realPheno", "07_BWAS_MOA_realPheno",
"15_BWAS_MOA_allModa_FE_realPheno", "10_BWAS_MOA_multiORM_QC_realPheno")) {
if (scenario %in% c("03_BWAS_uncorrected_realPheno",
"07_BWAS_MOA_realPheno", "10_BWAS_moment2beta_realPheno",
"10_BWAS_MOA_multiORM_QC_realPheno")) {
TTrepli[, paste0("SCORE2_", pheno, "_", scenario)] = scale(TTrepli[,
paste0("SCORE_", pheno, "_", scenario)]) * sd(TTdisco[,
pheno], na.rm = T) + mean(TTdisco[, pheno], na.rm = T)
}
if (scenario == "04_BWAS_5globalPCs_realPheno") {
# Predictor from covariates
formu = paste0(pheno, " ~ ", paste0("PC", 1:5, collapse = "+"))
mCov = glm(as.formula(formu), data = TTdisco)
TTrepli[, paste0("SCORE_Covariates_", pheno, "_",
scenario)] = predict.glm(mCov, newdata = TTrepli)
# Scale BRS for MAE and combine PC and vertex
# based scores
TTrepli[, paste0("SCORE2_", pheno, "_", scenario)] = TTrepli[,
paste0("SCORE_", pheno, "_", scenario)] * TTrepli[1,
paste0("CNT_", pheno, "_", scenario)] + TTrepli[,
paste0("SCORE_Covariates_", pheno, "_", scenario)]
}
if (scenario == "05_BWAS_10globalPCs_realPheno") {
# Predictor from covariates
formu = paste0(pheno, " ~ ", paste0("PC", 1:10, collapse = "+"))
mCov = glm(as.formula(formu), data = TTdisco)
TTrepli[, paste0("SCORE_Covariates_", pheno, "_",
scenario)] = predict.glm(mCov, newdata = TTrepli)
# Scale BRS for MAE and combine PC and vertex
# based scores
TTrepli[, paste0("SCORE2_", pheno, "_", scenario)] = TTrepli[,
paste0("SCORE_", pheno, "_", scenario)] * TTrepli[1,
paste0("CNT_", pheno, "_", scenario)] + TTrepli[,
paste0("SCORE_Covariates_", pheno, "_", scenario)]
}
if (scenario == "06_BWAS_10specificPCs_realPheno") {
# Predictor from covariates
formu = paste0(pheno, " ~ ", paste0("PC_thickness_",
1:10, collapse = "+"), " + ", paste0("PC_area_",
1:10, collapse = "+"), " + ", paste0("PC_thick_",
1:10, collapse = "+"), " + ", paste0("PC_LogJacs_",
1:10, collapse = "+"))
mCov = glm(as.formula(formu), data = TTdisco)
TTrepli[, paste0("SCORE_Covariates_", pheno, "_",
scenario)] = predict.glm(mCov, newdata = TTrepli)
# Scale BRS for MAE and combine PC and vertex
# based scores
TTrepli[, paste0("SCORE2_", pheno, "_", scenario)] = TTrepli[,
paste0("SCORE_", pheno, "_", scenario)] * TTrepli[1,
paste0("CNT_", pheno, "_", scenario)] + TTrepli[,
paste0("SCORE_Covariates_", pheno, "_", scenario)]
}
if (scenario %in% c("09_BWAS_ICVagesex_realPheno", "15_BWAS_MOA_allModa_FE_realPheno")) {
# Predictor from covariates
formu = paste0(pheno, " ~ Age_MRI + ICV + sexuses_datacoding_9_f31_0_0")
mCov = glm(as.formula(formu), data = TTdisco)
TTrepli[, paste0("SCORE_Covariates_", pheno, "_",
scenario)] = predict.glm(mCov, newdata = TTrepli)
summary(mCov)
# Scale BRS for MAE and combine PC and vertex
# based scores
TTrepli[, paste0("SCORE2_", pheno, "_", scenario)] = TTrepli[,
paste0("SCORE_", pheno, "_", scenario)] * TTrepli[1,
paste0("CNT_", pheno, "_", scenario)] + TTrepli[,
paste0("SCORE_Covariates_", pheno, "_", scenario)]
}
# Models with no results
if (is.nan(TTrepli[1, paste0("SCORE2_", pheno, "_", scenario)])) {
TTrepli[, paste0("SCORE2_", pheno, "_", scenario)] = 0
}
if (is.nan(TTrepli[1, paste0("SCORE_", pheno, "_", scenario)])) {
TTrepli[, paste0("SCORE_", pheno, "_", scenario)] = 0
}
# Evaluate total prediction
formu = paste0(pheno, " ~ ", paste0("SCORE2_", pheno,
"_", scenario))
m0 = glm(as.formula(formu), data = TTrepli)
mm = summary(m0)
# Check R2
if (scenario %in% c("09_BWAS_ICVagesex_realPheno", "04_BWAS_5globalPCs_realPheno",
"05_BWAS_10globalPCs_realPheno", "06_BWAS_10specificPCs_realPheno")) {
formu = paste0(pheno, " ~ ", paste0("SCORE_", pheno,
"_", scenario), " +", paste0("SCORE_Covariates_",
pheno, "_", scenario))
} else {
formu = paste0(pheno, " ~ ", paste0("SCORE_", pheno,
"_", scenario))
}
m1 = glm(as.formula(formu), data = TTrepli)
mm1 = summary(m1)
# Store results
res = c(scenario, TTrepli[1, paste0("CNT_", pheno, "_",
scenario)], 1 - mm$deviance/mm$null.deviance, 1 -
mm1$deviance/mm1$null.deviance, mm$aic, length(which(mm$coefficients[-1,
"Pr(>|t|)"] < signifT)), cor(TTrepli[, pheno], TTrepli[,
paste0("SCORE2_", pheno, "_", scenario)], use = "p"),
cor.test(TTrepli[, pheno], TTrepli[, paste0("SCORE2_",
pheno, "_", scenario)], use = "p")$conf.int[1],
cor.test(TTrepli[, pheno], TTrepli[, paste0("SCORE2_",
pheno, "_", scenario)], use = "p")$conf.int[2],
mean(abs(TTrepli[, pheno] - TTrepli[, paste0("SCORE2_",
pheno, "_", scenario)]), na.rm = T), sd(abs(TTrepli[,
pheno] - TTrepli[, paste0("SCORE2_", pheno, "_",
scenario)]), na.rm = T))
resall = rbind(resall, res)
}
colnames(resall) = c("scenario", "NbFeatures", "R2", "R2-2scores",
"aic", "NbSignifVertices", "cor", "lb_CI95%", "ub_CI95%",
"mae", "sd_mae")
write.xlsx(resall, "../../prediction_BWAS_results_topVertexCluster_wCovariates_allPhenoModels_R1IEEE.xlsx",
row.names = F, sheetName = pheno, append = T)
}labelsVar = c("Age", "BMI", "Fluid IQ", "Sex", "Smoking status")
morphom = c(0.91, 0.59, 0.16, 0.99, 0.15)
library(viridis)
png(paste0("../../Prediction_plot_intoUKB_OASIS_topVertexCluster_wCovariates_allPhenoModels_Figure3_R1IEEE.png"),
width = 18, height = 10, units = "cm", res = 400)
par(mar = c(0.5, 4, 0.5, 0.5))
nplots = 5
layout(matrix(data = c(1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 8, 8, 9,
10, 10), ncol = 5, byrow = F))
cols = c(viridis_pal(option = "C")(8))
nscenarios = 8
for (iii in c(2, 3, 1, 4, 5)) {
pre = read.xlsx("../../prediction_intoOASIS_topVertexCluster_wCovariates_N1006_allPhenoModels_R1IEEE.xlsx",
sheetIndex = iii, as.data.frame = T, colClasses = c("character",
rep("numeric", 10)))
pre = pre[1:nscenarios, ]
preUKB = read.xlsx("../../prediction_BWAS_results_topVertexCluster_wCovariates_allPhenoModels_R1IEEE.xlsx",
sheetIndex = iii, as.data.frame = T, colClasses = c("character",
rep("numeric", 10)))
preUKB = preUKB[1:nscenarios, ]
# Number signif clusters
par(mar = c(0.5, 2, 0.5, 0.5))
if (iii == 2) {
par(mar = c(0.5, 3, 0.5, 0.5))
}
plot(1:nscenarios, preUKB$NbFeatures, ylab = "", xaxt = "n",
xlab = "", pch = 19, lwd = 1, col = cols, main = labelsVar[iii],
cex.main = 0.7)
grid()
if (iii == 2) {
title(ylab = "Significant clusters", line = 2, cex.lab = 1)
}
# Correlation UKB
par(mar = c(8, 2, 0.5, 0.5))
if (iii == 2) {
par(mar = c(8, 3, 0.5, 0.5))
}
plot(1:nscenarios, preUKB$cor, ylim = c(0, 1), ylab = "",
xaxt = "n", xlab = "", pch = 20, lwd = 1, col = cols,
cex.main = 0.7)
arrows(x0 = 1:nscenarios, y0 = preUKB$lb_CI95., x1 = 1:nscenarios,
y1 = preUKB$ub_CI95., code = 3, length = 0.05, angle = 90,
col = "black", lwd = 1)
abline(h = sqrt(morphom[iii]), lty = 2, lwd = 2, col = "darkgrey")
points(1:nscenarios, pre$cor, ylim = c(0, 1), pch = 8, lwd = 1,
col = cols)
arrows(x0 = 1:nscenarios, y0 = pre$lb_CI95., x1 = 1:nscenarios,
y1 = pre$ub_CI95., code = 3, length = 0.05, angle = 90,
col = "black", lwd = 1)
grid()
# legend
if (iii == 1) {
labs = c("No covariates", "Sex, ICV reg.", "5 global PCs",
"10 global PCs", "10 modality spe. PCs", "LMM (global BRM)",
"LMM with covariates", "LMM (multi. BRM)")
} else if (iii == 4) {
labs = c("No covariates", "Age, ICV reg.", "5 global PCs",
"10 global PCs", "10 modality spe. PCs", "LMM (global BRM)",
"LMM with covariates", "LMM (multi. BRM)")
} else {
labs = c("No covariates", "Age, Sex, ICV reg.", "5 global PCs",
"10 global PCs", "10 modality spe. PCs", "LMM (global BRM)",
"LMM with covariates", "LMM (multi. BRM)")
}
text(cex = 1, x = c(1:nscenarios) + 0.5, y = -0.1, pos = 2,
labels = labs, xpd = TRUE, srt = 65)
if (iii == 2) {
title(ylab = "Prediction accuracy", line = 2, cex.lab = 1)
}
}
dev.off()A work by by [Baptiste Couvy-Duchesne] - 13 June 2022