library(brainMapR) # devtools::install.github('baptisteCD/brainMapR')
for (var in c("body_mass_index_bmi_f21001_2_0", "fluid_intelligence_score_f20016_2_0",
"sexuses_datacoding_9_f31_0_0", "smoking_status_f20116_2_0")) {
for (scenario in c("03_BWAS_uncorrected_realPheno", "04_BWAS_5globalPCs_realPheno",
"05_BWAS_10globalPCs_realPheno", "06_BWAS_10specificPCs_realPheno",
"07_BWAS_MOA_realPheno", "09_BWAS_ICVagesex_realPheno",
"15_BWAS_MOA_allModa_FE_realPheno", "10_BWAS_MOA_multiORM_QC_realPheno")) {
::identifyClustersBWAS(pathToTheBWASresults = paste0(scenario,
brainMapR"/"), outputFolder = paste0(scenario, "/"), bwasFile = paste0("BWAS_",
signifThreshold = 1.5e-08)
var),
} }
library(readr)
= 0.05/652283/5
signifT # signifT=1.5e-8
for (scenario in c("03_BWAS_uncorrected_realPheno", "04_BWAS_5globalPCs_realPheno",
"05_BWAS_10globalPCs_realPheno", "06_BWAS_10specificPCs_realPheno",
"07_BWAS_MOA_realPheno", "09_BWAS_ICVagesex_realPheno", "15_BWAS_MOA_allModa_FE_realPheno",
"10_BWAS_MOA_multiORM_QC_realPheno")) {
for (pheno in c("Age_MRI", "body_mass_index_bmi_f21001_2_0",
"fluid_intelligence_score_f20016_2_0", "sexuses_datacoding_9_f31_0_0",
"smoking_status_f20116_2_0")) {
= NULL
vert
for (moda in c("area", "thickness", "LogJacs", "thick")) {
= read.table(paste0(scenario, "/BWAS_NVertices",
bwas "_", moda, "_clustersAndCoordinates"),
pheno, stringsAsFactors = F)
= NULL
clusVars = colnames(bwas)[grep("cluster_", x = colnames(bwas))]
clusVars
if (length(clusVars) > 0) {
for (iii in 1:length(clusVars)) {
= bwas[which(bwas[, clusVars[iii]] ==
cluster 1), ]
= cluster[order(cluster$p), ]
cluster = rbind(vert, c(cluster$Probe[1], cluster$b[1]))
vert
}
}
}print(dim(vert)[1])
write.table(unique(vert), paste0("15_BWAS_prediction/BWAS_topVerticesCluster_",
"_", scenario, ".txt"), col.names = F, row.names = F,
pheno, quote = F)
} }
wd="/path/to/working/directory"
cd $wd/15_BWAS_prediction
for scenario in 03_BWAS_uncorrected_realPheno 04_BWAS_5globalPCs_realPheno 05_BWAS_10globalPCs_realPheno 06_BWAS_10specificPCs_realPheno 07_BWAS_MOA_realPheno 09_BWAS_ICVagesex_realPheno 10_BWAS_MOA_multiORM_QC_realPheno 15_BWAS_MOA_allModa_FE_realPheno
do
for pheno in Age_MRI body_mass_index_bmi_f21001_2_0 fluid_intelligence_score_f20016_2_0 sexuses_datacoding_9_f31_0_0 smoking_status_f20116_2_0
do
${wd}/qsubshcom "
"${wd}"/osca_200220 --befile "${wd}"/BodFiles/AllVertices.fwhm0.fsaverage.UKB15K --keep "${wd}"/ID_UKB_replication.txt --score "${wd}"/15_BWAS_prediction/BWAS_topVerticesCluster_"${pheno}"_"${scenario}".txt --out "${wd}"/15_BWAS_prediction/BWAS_predicted_topVerticesCluster_"${pheno}"_"${scenario}".signif |;
" 1 30G BWAS_pred_topVertices_${scenario}_${pheno} 10:00:00 " "
done
done
library(readr)
= read_csv("UKB_phenotypes_15K_allData_regVars_Jan2019Update.csv")
TT
# Identify QCed individuals
= read.table("00_BodFiles/BRM.fwhm0.fsaverage.QC.orm.id",
qcID stringsAsFactors = F)
# Add brain PCs - calculated in OSCA
= read.table("PC.10global.eigenvec")
pcs colnames(pcs) = c("eid", "fid", paste0("PC", 1:10))
= merge(dat, pcs, by = "eid", all.x = T)
dat
# Add specific PCs
for (moda in c("thickness", "area", "thick", "LogJacs")) {
= read.table(paste0("PC.10specific.", moda, ".eigenvec"))
pcs colnames(pcs) = c("eid", "fid", paste0("PC_", moda, "_",
1:10))
= merge(dat, pcs, by = "eid", all.x = T)
dat
}
# Discovery and replication sample
= read.table("ID_UKB_replication.txt", stringsAsFactors = F)
replicID $discoveryQCed = 0
dat$discoveryQCed[which(!dat$eid %in% replicID$V1 & dat$eid %in%
dat$V1)] = 1
qcID$replicationQCed = 0
dat$replicationQCed[which(dat$eid %in% replicID$V1 & dat$eid %in%
dat$V1)] = 1
qcIDtable(dat$discoveryQCed)
table(dat$replicationQCed, dat$discoveryQCed)
# Add predictors
= as.data.frame(dat[, c("eid", "Age_MRI")])
prall for (scenario in c("03_BWAS_uncorrected_realPheno", "04_BWAS_5globalPCs_realPheno",
"05_BWAS_10globalPCs_realPheno", "06_BWAS_10specificPCs_realPheno",
"07_BWAS_MOA_realPheno", "09_BWAS_ICVagesex_realPheno", "10_BWAS_MOA_multiORM_QC_realPheno",
"15_BWAS_MOA_allModa_FE_realPheno", "10_BWAS_moment2beta_realPheno")) {
for (pheno in c("Age_MRI", "body_mass_index_bmi_f21001_2_0",
"fluid_intelligence_score_f20016_2_0", "sexuses_datacoding_9_f31_0_0",
"smoking_status_f20116_2_0")) {
= read.table(paste0("15_BWAS_prediction/BWAS_predicted_topVerticesCluster_",
pr "_", scenario, ".signif.profile"), header = T)
pheno, colnames(pr) = c("eid", "FID", "PHENO", paste0("CNT",
"_", pheno, "_", scenario), paste0("SCORE", "_",
"_", scenario))
pheno, $PHENO = pr$FID = NULL
pr
= merge(prall, pr, by = "eid")
prall
}
}
# Merge
$Age_MRI = NULL
prall= merge(dat, prall, all.x = T, by = "eid")
dat
# Separate discovery from repliation samples
= dat[which(dat$discoveryQCed == 1), ]
TTdisco = dat[which(dat$replicationQCed == 1), ] TTrepli
= 0.05/652283/5
signifT library(xlsx)
for (pheno in c("Age_MRI", "body_mass_index_bmi_f21001_2_0",
"fluid_intelligence_score_f20016_2_0", "sexuses_datacoding_9_f31_0_0",
"smoking_status_f20116_2_0")) {
= resall = NULL
res
for (scenario in c("03_BWAS_uncorrected_realPheno", "09_BWAS_ICVagesex_realPheno",
"04_BWAS_5globalPCs_realPheno", "05_BWAS_10globalPCs_realPheno",
"06_BWAS_10specificPCs_realPheno", "07_BWAS_MOA_realPheno",
"15_BWAS_MOA_allModa_FE_realPheno", "10_BWAS_MOA_multiORM_QC_realPheno",
"10_BWAS_moment2beta_realPheno")) {
if (scenario %in% c("03_BWAS_uncorrected_realPheno",
"07_BWAS_MOA_realPheno", "10_BWAS_MOA_multiORM_QC_realPheno")) {
paste0("SCORE2_", pheno, "_", scenario)] = scale(TTrepli[,
TTrepli[, paste0("SCORE_", pheno, "_", scenario)]) * sd(TTdisco[,
na.rm = T) + mean(TTdisco[, pheno], na.rm = T)
pheno],
}
if (scenario == "04_BWAS_5globalPCs_realPheno") {
# Predictor from covariates
= paste0(pheno, " ~ ", paste0("PC", 1:5, collapse = "+"))
formu = glm(as.formula(formu), data = TTdisco)
mCov paste0("SCORE_Covariates_", pheno, "_",
TTrepli[, = predict.glm(mCov, newdata = TTrepli)
scenario)]
# Scale BRS for MAE and combine PC and vertex
# based scores
paste0("SCORE2_", pheno, "_", scenario)] = TTrepli[,
TTrepli[, paste0("SCORE_", pheno, "_", scenario)] * TTrepli[1,
paste0("CNT_", pheno, "_", scenario)] + TTrepli[,
paste0("SCORE_Covariates_", pheno, "_", scenario)]
}
if (scenario == "05_BWAS_10globalPCs_realPheno") {
# Predictor from covariates
= paste0(pheno, " ~ ", paste0("PC", 1:10, collapse = "+"))
formu = glm(as.formula(formu), data = TTdisco)
mCov paste0("SCORE_Covariates_", pheno, "_",
TTrepli[, = predict.glm(mCov, newdata = TTrepli)
scenario)]
# Scale BRS for MAE and combine PC and vertex
# based scores
paste0("SCORE2_", pheno, "_", scenario)] = TTrepli[,
TTrepli[, paste0("SCORE_", pheno, "_", scenario)] * TTrepli[1,
paste0("CNT_", pheno, "_", scenario)] + TTrepli[,
paste0("SCORE_Covariates_", pheno, "_", scenario)]
}
if (scenario == "06_BWAS_10specificPCs_realPheno") {
# Predictor from covariates
= paste0(pheno, " ~ ", paste0("PC_thickness_",
formu 1:10, collapse = "+"), " + ", paste0("PC_area_",
1:10, collapse = "+"), " + ", paste0("PC_thick_",
1:10, collapse = "+"), " + ", paste0("PC_LogJacs_",
1:10, collapse = "+"))
= glm(as.formula(formu), data = TTdisco)
mCov paste0("SCORE_Covariates_", pheno, "_",
TTrepli[, = predict.glm(mCov, newdata = TTrepli)
scenario)]
# Scale BRS for MAE and combine PC and vertex
# based scores
paste0("SCORE2_", pheno, "_", scenario)] = TTrepli[,
TTrepli[, paste0("SCORE_", pheno, "_", scenario)] * TTrepli[1,
paste0("CNT_", pheno, "_", scenario)] + TTrepli[,
paste0("SCORE_Covariates_", pheno, "_", scenario)]
}
if (scenario %in% c("09_BWAS_ICVagesex_realPheno", "15_BWAS_MOA_allModa_FE_realPheno")) {
# Predictor from covariates
= paste0(pheno, " ~ Age_MRI + ICV + sexuses_datacoding_9_f31_0_0")
formu = glm(as.formula(formu), data = TTdisco)
mCov paste0("SCORE_Covariates_", pheno, "_",
TTrepli[, = predict.glm(mCov, newdata = TTrepli)
scenario)] summary(mCov)
# Scale BRS for MAE and combine PC and vertex
# based scores
paste0("SCORE2_", pheno, "_", scenario)] = TTrepli[,
TTrepli[, paste0("SCORE_", pheno, "_", scenario)] * TTrepli[1,
paste0("CNT_", pheno, "_", scenario)] + TTrepli[,
paste0("SCORE_Covariates_", pheno, "_", scenario)]
}
# Models with no results
if (is.nan(TTrepli[1, paste0("SCORE2_", pheno, "_", scenario)])) {
paste0("SCORE2_", pheno, "_", scenario)] = 0
TTrepli[,
}if (is.nan(TTrepli[1, paste0("SCORE_", pheno, "_", scenario)])) {
paste0("SCORE_", pheno, "_", scenario)] = 0
TTrepli[,
}# Evaluate total prediction
= paste0(pheno, " ~ ", paste0("SCORE2_", pheno,
formu "_", scenario))
= glm(as.formula(formu), data = TTrepli)
m0 = summary(m0)
mm
# Check R2
if (scenario %in% c("09_BWAS_ICVagesex_realPheno", "04_BWAS_5globalPCs_realPheno",
"05_BWAS_10globalPCs_realPheno", "06_BWAS_10specificPCs_realPheno")) {
= paste0(pheno, " ~ ", paste0("SCORE_", pheno,
formu "_", scenario), " +", paste0("SCORE_Covariates_",
"_", scenario))
pheno, else {
} = paste0(pheno, " ~ ", paste0("SCORE_", pheno,
formu "_", scenario))
}= glm(as.formula(formu), data = TTrepli)
m1 = summary(m1)
mm1
# Store results
= c(scenario, TTrepli[1, paste0("CNT_", pheno, "_",
res 1 - mm$deviance/mm$null.deviance, 1 -
scenario)], $deviance/mm1$null.deviance, mm$aic, length(which(mm$coefficients[-1,
mm1"Pr(>|t|)"] < signifT)), cor(TTrepli[, pheno], TTrepli[,
paste0("SCORE2_", pheno, "_", scenario)], use = "p"),
cor.test(TTrepli[, pheno], TTrepli[, paste0("SCORE2_",
"_", scenario)], use = "p")$conf.int[1],
pheno, cor.test(TTrepli[, pheno], TTrepli[, paste0("SCORE2_",
"_", scenario)], use = "p")$conf.int[2],
pheno, mean(abs(TTrepli[, pheno] - TTrepli[, paste0("SCORE2_",
"_", scenario)]), na.rm = T), sd(abs(TTrepli[,
pheno, - TTrepli[, paste0("SCORE2_", pheno, "_",
pheno] na.rm = T))
scenario)]), = rbind(resall, res)
resall
}
colnames(resall) = c("scenario", "NbFeatures", "R2", "R2-2scores",
"aic", "NbSignifVertices", "cor", "lb_CI95%", "ub_CI95%",
"mae", "sd_mae")
write.xlsx(resall, "prediction_BWAS_results_topVertexCluster_wCovariates_allPhenoModels_R1IEEE.xlsx",
row.names = F, sheetName = pheno, append = T)
}
= c("Age", "BMI", "Fluid IQ", "Sex", "Smoking status")
labelsVar = c(0.91, 0.59, 0.16, 0.99, 0.15)
morphom
library(viridis)
png(paste0("Prediction_plot_intoUKB_OASIS_topVertexCluster_wCovariates_allPhenoModels_Figure3_R1IEEE.png"),
width = 18, height = 10, units = "cm", res = 400)
par(mar = c(0.5, 4, 0.5, 0.5))
= 5
nplots layout(matrix(data = c(1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 8, 8, 9,
10, 10), ncol = 5, byrow = F))
= c(viridis_pal(option = "C")(8))
cols = 8
nscenarios
# Loop on the different variables
for (iii in c(2, 3, 1, 4, 5)) {
= read.xlsx("prediction_intoOASIS_topVertexCluster_wCovariates_N1006_allPhenoModels_R1IEEE.xlsx",
pre sheetIndex = iii, as.data.frame = T, colClasses = c("character",
rep("numeric", 10)))
= pre[1:nscenarios, ]
pre
= read.xlsx("prediction_BWAS_results_topVertexCluster_wCovariates_allPhenoModels_R1IEEE.xlsx",
preUKB sheetIndex = iii, as.data.frame = T, colClasses = c("character",
rep("numeric", 10)))
= preUKB[1:nscenarios, ]
preUKB
# Number signif clusters
par(mar = c(0.5, 2, 0.5, 0.5))
if (iii == 2) {
par(mar = c(0.5, 3, 0.5, 0.5))
}
plot(1:nscenarios, preUKB$NbFeatures, ylab = "", xaxt = "n",
xlab = "", pch = 19, lwd = 1, col = cols, main = labelsVar[iii],
cex.main = 0.7)
grid()
if (iii == 2) {
title(ylab = "Significant clusters", line = 2, cex.lab = 1)
}
# Correlation UKB
par(mar = c(8, 2, 0.5, 0.5))
if (iii == 2) {
par(mar = c(8, 3, 0.5, 0.5))
}
plot(1:nscenarios, preUKB$cor, ylim = c(0, 1), ylab = "",
xaxt = "n", xlab = "", pch = 20, lwd = 1, col = cols,
cex.main = 0.7)
arrows(x0 = 1:nscenarios, y0 = preUKB$lb_CI95., x1 = 1:nscenarios,
y1 = preUKB$ub_CI95., code = 3, length = 0.05, angle = 90,
col = "black", lwd = 1)
abline(h = sqrt(morphom[iii]), lty = 2, lwd = 2, col = "darkgrey")
points(1:nscenarios, pre$cor, ylim = c(0, 1), pch = 8, lwd = 1,
col = cols)
arrows(x0 = 1:nscenarios, y0 = pre$lb_CI95., x1 = 1:nscenarios,
y1 = pre$ub_CI95., code = 3, length = 0.05, angle = 90,
col = "black", lwd = 1)
grid()
# legend
if (iii == 1) {
= c("No covariates", "Sex, ICV reg.", "5 global PCs",
labs "10 global PCs", "10 modality spe. PCs", "LMM (global BRM)",
"LMM with covariates", "LMM (multi. BRM)")
else if (iii == 4) {
} = c("No covariates", "Age, ICV reg.", "5 global PCs",
labs "10 global PCs", "10 modality spe. PCs", "LMM (global BRM)",
"LMM with covariates", "LMM (multi. BRM)")
else {
} = c("No covariates", "Age, Sex, ICV reg.", "5 global PCs",
labs "10 global PCs", "10 modality spe. PCs", "LMM (global BRM)",
"LMM with covariates", "LMM (multi. BRM)")
}text(cex = 1, x = c(1:nscenarios) + 0.5, y = -0.1, pos = 2,
labels = labs, xpd = TRUE, srt = 65)
if (iii == 2) {
title(ylab = "Prediction accuracy", line = 2, cex.lab = 1)
}
}dev.off()
library(knitr)
include_graphics(path = paste0("examplePlots/Prediction_plot_intoUKB_OASIS_topVertexCluster_wCovariates_allPhenoModels_Figure3_R1IEEE.png"))
A work by by [Baptiste Couvy-Duchesne] - 13 June 2022