as well as all signif vertices
wd="path/to/working/directory"
cd $wd
for scenario in 03_BWAS_uncorrected_allModa 09_BWAS_ICVagesex_allModa 05_BWAS_10globalPCs_allModa 06_BWAS_10specificPCs_allModa 07_BWAS_MOA_allModa 10_BWAS_MOA_multiORM_QC_allModa 15_BWAS_MOA_allModa_FE
do
for NNN in 10 100 1000
do
${wd}/qsubshcom " module load R/3.5.0 |;
cd ${wd} |;
R -e 'arg=commandArgs(TRUE) ; source(\"/"${wd}"/RR_5.1_BWASprediction.R\") ; makeOSCAscorefile(scenario=arg[1], iter=arg[2], NNN=arg[3])' --args "${scenario}" \${TASK_ID} "${NNN}" |;
"${wd}"/osca_200220 --befile "${wd}"/BodFiles/AllVertices.fwhm0.fsaverage.UKB15K --keep "${wd}"/ID_UKB_replication.txt --score "${wd}"/"${scenario}"_clusterFWER/BWAS_topVerticesCluster_NVertices"${NNN}"_Niter\${TASK_ID}.txt --out "${wd}"/"${scenario}"_clusterFWER/BWAS_predicted_topVerticesCluster_NVertices"${NNN}"_Niter\${TASK_ID} |;
"${wd}"/osca_200220 --befile "${wd}"/BodFiles/AllVertices.fwhm0.fsaverage.UKB15K --keep "${wd}"/ID_UKB_replication.txt --score "${wd}"/"${scenario}"_clusterFWER/BWAS_allSignifVertices_NVertices"${NNN}"_Niter\${TASK_ID}.txt --out "${wd}"/"${scenario}"_clusterFWER/BWAS_predicted_allSignifVertices_NVertices"${NNN}"_Niter\${TASK_ID} |;
" 1 4G BWAS_${NNN}_${scenario}_pred_simul 01:00:00 "-array=1-100 "
done
done
based on prediction accuracy we have assumed total the linear model, regressed out the covariates to perform BWAS
Hence standardised BRS from vertices and unstandardised from covariates
Dependencies - tables UKB etc…
= 0.05/652283/5
signifT library(xlsx)
for (pheno in c("Age_MRI", "body_mass_index_bmi_f21001_2_0",
"fluid_intelligence_score_f20016_2_0", "sexuses_datacoding_9_f31_0_0",
"smoking_status_f20116_2_0")) {
= resall = NULL
res
for (scenario in c("03_BWAS_uncorrected_realPheno", "09_BWAS_ICVagesex_realPheno",
"04_BWAS_5globalPCs_realPheno", "05_BWAS_10globalPCs_realPheno",
"06_BWAS_10specificPCs_realPheno", "07_BWAS_MOA_realPheno",
"15_BWAS_MOA_allModa_FE_realPheno", "10_BWAS_MOA_multiORM_QC_realPheno")) {
if (scenario %in% c("03_BWAS_uncorrected_realPheno",
"07_BWAS_MOA_realPheno", "10_BWAS_moment2beta_realPheno",
"10_BWAS_MOA_multiORM_QC_realPheno")) {
paste0("SCORE2_", pheno, "_", scenario)] = scale(TTrepli[,
TTrepli[, paste0("SCORE_", pheno, "_", scenario)]) * sd(TTdisco[,
na.rm = T) + mean(TTdisco[, pheno], na.rm = T)
pheno],
}
if (scenario == "04_BWAS_5globalPCs_realPheno") {
# Predictor from covariates
= paste0(pheno, " ~ ", paste0("PC", 1:5, collapse = "+"))
formu = glm(as.formula(formu), data = TTdisco)
mCov paste0("SCORE_Covariates_", pheno, "_",
TTrepli[, = predict.glm(mCov, newdata = TTrepli)
scenario)]
# Scale BRS for MAE and combine PC and vertex
# based scores
paste0("SCORE2_", pheno, "_", scenario)] = TTrepli[,
TTrepli[, paste0("SCORE_", pheno, "_", scenario)] * TTrepli[1,
paste0("CNT_", pheno, "_", scenario)] + TTrepli[,
paste0("SCORE_Covariates_", pheno, "_", scenario)]
}
if (scenario == "05_BWAS_10globalPCs_realPheno") {
# Predictor from covariates
= paste0(pheno, " ~ ", paste0("PC", 1:10, collapse = "+"))
formu = glm(as.formula(formu), data = TTdisco)
mCov paste0("SCORE_Covariates_", pheno, "_",
TTrepli[, = predict.glm(mCov, newdata = TTrepli)
scenario)]
# Scale BRS for MAE and combine PC and vertex
# based scores
paste0("SCORE2_", pheno, "_", scenario)] = TTrepli[,
TTrepli[, paste0("SCORE_", pheno, "_", scenario)] * TTrepli[1,
paste0("CNT_", pheno, "_", scenario)] + TTrepli[,
paste0("SCORE_Covariates_", pheno, "_", scenario)]
}
if (scenario == "06_BWAS_10specificPCs_realPheno") {
# Predictor from covariates
= paste0(pheno, " ~ ", paste0("PC_thickness_",
formu 1:10, collapse = "+"), " + ", paste0("PC_area_",
1:10, collapse = "+"), " + ", paste0("PC_thick_",
1:10, collapse = "+"), " + ", paste0("PC_LogJacs_",
1:10, collapse = "+"))
= glm(as.formula(formu), data = TTdisco)
mCov paste0("SCORE_Covariates_", pheno, "_",
TTrepli[, = predict.glm(mCov, newdata = TTrepli)
scenario)]
# Scale BRS for MAE and combine PC and vertex
# based scores
paste0("SCORE2_", pheno, "_", scenario)] = TTrepli[,
TTrepli[, paste0("SCORE_", pheno, "_", scenario)] * TTrepli[1,
paste0("CNT_", pheno, "_", scenario)] + TTrepli[,
paste0("SCORE_Covariates_", pheno, "_", scenario)]
}
if (scenario %in% c("09_BWAS_ICVagesex_realPheno", "15_BWAS_MOA_allModa_FE_realPheno")) {
# Predictor from covariates
= paste0(pheno, " ~ Age_MRI + ICV + sexuses_datacoding_9_f31_0_0")
formu = glm(as.formula(formu), data = TTdisco)
mCov paste0("SCORE_Covariates_", pheno, "_",
TTrepli[, = predict.glm(mCov, newdata = TTrepli)
scenario)] summary(mCov)
# Scale BRS for MAE and combine PC and vertex
# based scores
paste0("SCORE2_", pheno, "_", scenario)] = TTrepli[,
TTrepli[, paste0("SCORE_", pheno, "_", scenario)] * TTrepli[1,
paste0("CNT_", pheno, "_", scenario)] + TTrepli[,
paste0("SCORE_Covariates_", pheno, "_", scenario)]
}
# Models with no results
if (is.nan(TTrepli[1, paste0("SCORE2_", pheno, "_", scenario)])) {
paste0("SCORE2_", pheno, "_", scenario)] = 0
TTrepli[,
}if (is.nan(TTrepli[1, paste0("SCORE_", pheno, "_", scenario)])) {
paste0("SCORE_", pheno, "_", scenario)] = 0
TTrepli[,
}# Evaluate total prediction
= paste0(pheno, " ~ ", paste0("SCORE2_", pheno,
formu "_", scenario))
= glm(as.formula(formu), data = TTrepli)
m0 = summary(m0)
mm
# Check R2
if (scenario %in% c("09_BWAS_ICVagesex_realPheno", "04_BWAS_5globalPCs_realPheno",
"05_BWAS_10globalPCs_realPheno", "06_BWAS_10specificPCs_realPheno")) {
= paste0(pheno, " ~ ", paste0("SCORE_", pheno,
formu "_", scenario), " +", paste0("SCORE_Covariates_",
"_", scenario))
pheno, else {
} = paste0(pheno, " ~ ", paste0("SCORE_", pheno,
formu "_", scenario))
}= glm(as.formula(formu), data = TTrepli)
m1 = summary(m1)
mm1
# Store results
= c(scenario, TTrepli[1, paste0("CNT_", pheno, "_",
res 1 - mm$deviance/mm$null.deviance, 1 -
scenario)], $deviance/mm1$null.deviance, mm$aic, length(which(mm$coefficients[-1,
mm1"Pr(>|t|)"] < signifT)), cor(TTrepli[, pheno], TTrepli[,
paste0("SCORE2_", pheno, "_", scenario)], use = "p"),
cor.test(TTrepli[, pheno], TTrepli[, paste0("SCORE2_",
"_", scenario)], use = "p")$conf.int[1],
pheno, cor.test(TTrepli[, pheno], TTrepli[, paste0("SCORE2_",
"_", scenario)], use = "p")$conf.int[2],
pheno, mean(abs(TTrepli[, pheno] - TTrepli[, paste0("SCORE2_",
"_", scenario)]), na.rm = T), sd(abs(TTrepli[,
pheno, - TTrepli[, paste0("SCORE2_", pheno, "_",
pheno] na.rm = T))
scenario)]), = rbind(resall, res)
resall
}
colnames(resall) = c("scenario", "NbFeatures", "R2", "R2-2scores",
"aic", "NbSignifVertices", "cor", "lb_CI95%", "ub_CI95%",
"mae", "sd_mae")
write.xlsx(resall, "../../prediction_BWAS_results_topVertexCluster_wCovariates_allPhenoModels_R1IEEE.xlsx",
row.names = F, sheetName = pheno, append = T)
}
= c("Age", "BMI", "Fluid IQ", "Sex", "Smoking status")
labelsVar = c(0.91, 0.59, 0.16, 0.99, 0.15)
morphom
library(viridis)
png(paste0("../../Prediction_plot_intoUKB_OASIS_topVertexCluster_wCovariates_allPhenoModels_Figure3_R1IEEE.png"),
width = 18, height = 10, units = "cm", res = 400)
par(mar = c(0.5, 4, 0.5, 0.5))
= 5
nplots layout(matrix(data = c(1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 8, 8, 9,
10, 10), ncol = 5, byrow = F))
= c(viridis_pal(option = "C")(8))
cols = 8
nscenarios
for (iii in c(2, 3, 1, 4, 5)) {
= read.xlsx("../../prediction_intoOASIS_topVertexCluster_wCovariates_N1006_allPhenoModels_R1IEEE.xlsx",
pre sheetIndex = iii, as.data.frame = T, colClasses = c("character",
rep("numeric", 10)))
= pre[1:nscenarios, ]
pre
= read.xlsx("../../prediction_BWAS_results_topVertexCluster_wCovariates_allPhenoModels_R1IEEE.xlsx",
preUKB sheetIndex = iii, as.data.frame = T, colClasses = c("character",
rep("numeric", 10)))
= preUKB[1:nscenarios, ]
preUKB
# Number signif clusters
par(mar = c(0.5, 2, 0.5, 0.5))
if (iii == 2) {
par(mar = c(0.5, 3, 0.5, 0.5))
}
plot(1:nscenarios, preUKB$NbFeatures, ylab = "", xaxt = "n",
xlab = "", pch = 19, lwd = 1, col = cols, main = labelsVar[iii],
cex.main = 0.7)
grid()
if (iii == 2) {
title(ylab = "Significant clusters", line = 2, cex.lab = 1)
}
# Correlation UKB
par(mar = c(8, 2, 0.5, 0.5))
if (iii == 2) {
par(mar = c(8, 3, 0.5, 0.5))
}
plot(1:nscenarios, preUKB$cor, ylim = c(0, 1), ylab = "",
xaxt = "n", xlab = "", pch = 20, lwd = 1, col = cols,
cex.main = 0.7)
arrows(x0 = 1:nscenarios, y0 = preUKB$lb_CI95., x1 = 1:nscenarios,
y1 = preUKB$ub_CI95., code = 3, length = 0.05, angle = 90,
col = "black", lwd = 1)
abline(h = sqrt(morphom[iii]), lty = 2, lwd = 2, col = "darkgrey")
points(1:nscenarios, pre$cor, ylim = c(0, 1), pch = 8, lwd = 1,
col = cols)
arrows(x0 = 1:nscenarios, y0 = pre$lb_CI95., x1 = 1:nscenarios,
y1 = pre$ub_CI95., code = 3, length = 0.05, angle = 90,
col = "black", lwd = 1)
grid()
# legend
if (iii == 1) {
= c("No covariates", "Sex, ICV reg.", "5 global PCs",
labs "10 global PCs", "10 modality spe. PCs", "LMM (global BRM)",
"LMM with covariates", "LMM (multi. BRM)")
else if (iii == 4) {
} = c("No covariates", "Age, ICV reg.", "5 global PCs",
labs "10 global PCs", "10 modality spe. PCs", "LMM (global BRM)",
"LMM with covariates", "LMM (multi. BRM)")
else {
} = c("No covariates", "Age, Sex, ICV reg.", "5 global PCs",
labs "10 global PCs", "10 modality spe. PCs", "LMM (global BRM)",
"LMM with covariates", "LMM (multi. BRM)")
}text(cex = 1, x = c(1:nscenarios) + 0.5, y = -0.1, pos = 2,
labels = labs, xpd = TRUE, srt = 65)
if (iii == 2) {
title(ylab = "Prediction accuracy", line = 2, cex.lab = 1)
}
}dev.off()
A work by by [Baptiste Couvy-Duchesne] - 13 June 2022