logo

1 Open phenotype data

I use read.csv here, which is a slower than readr::read_csv(). This is because I found that using read_csv opens the table as a data frame (not a data table) which does not allow to call TT[,columnName]

2 Write all pairwise phenotypes

The aim of this code section is to write bivariate phenotype files that have the form eid eid pheno1 pheno2 which will be read in OSCA to perform bivariate analyses.

We restrict ourselves to phenotypes with significant morphometricity.

4 Extract rGM and phenotypic correlations

# Rename columns to make code more readable
colnames(phe)=c("old_var" ,"category", "variable_clean", "my_var")

# Initialise - rGM matrices
rB<-matrix(data = NA, ncol =length(phe$my_var), nrow = length(phe$my_var) )
colnames(rB)<-phe$variable_clean
rownames(rB)<-phe$variable_clean
rBpval<-rB
rBSE<-rB

# Initialise - Phenotypic correlations
rP<-rB
rPpval<-rB
rPSE<-rB

# We need to store B1 and B2 (variance components) to calculate the residual correlation
BVC=matrix(data = NA, nrow =length(phe$my_var), ncol = 1 )
rownames(BVC)<-phe$variable_clean

# Loop on variable pairs and store values of interest
for (iii in 1:(length(phe$my_var)-1) ){
  for (jjj in (iii+1):length(phe$my_var)){

     var1<-phe$my_var[iii]
     var2<-phe$my_var[jjj]
     # rGM
     if(file.exists(paste0("BREML_bivariate_reg_Bsln_bdsz/BivModel.",var1, "_", var2, ".outliers.UKB.hsq"))){
out<-read.table(paste0("BREML_bivariate_reg_Bsln_bdsz/BivModel.",var1, "_", var2, ".outliers.UKB.hsq"), fill=T, stringsAsFactors = F)
rB[phe$variable_clean[iii], phe$variable_clean[jjj]]<-out[12,2]
rBpval[phe$variable_clean[iii], phe$variable_clean[jjj]]<-out[19,2]
 rBSE[phe$variable_clean[iii], phe$variable_clean[jjj]]<-out[12,3]  
     } else {
      if(file.exists(paste0("BREML_bivariate_reg_Bsln_bdsz/BivModel.",var2, "_", var1, ".outliers.UKB.hsq"))){
  out<-read.table(paste0("BREML_bivariate_reg_Bsln_bdsz/BivModel.",var2, "_", var1, ".outliers.UKB.hsq"), fill=T, stringsAsFactors = F)
rB[phe$variable_clean[iii], phe$variable_clean[jjj]]<-out[12,2]
rBpval[phe$variable_clean[iii], phe$variable_clean[jjj]]<-out[19,2]
 rBSE[phe$variable_clean[iii], phe$variable_clean[jjj]]<-out[12,3]     
       } }
     
# Get phenotypic correlation from written phenotype files too
if(file.exists(paste0("UKB_phenotypes_bivariate_reg_Bsln_bdsz/",var1, "_", var2, ".txt"))){
     dat=read.table(paste0("UKB_phenotypes_bivariate_reg_Bsln_bdsz/",var1, "_", var2, ".txt"))
     ct=cor.test(dat$V3, dat$V4)
     rP[phe$variable_clean[iii], phe$variable_clean[jjj]]<-ct$estimate
  rPpval[phe$variable_clean[iii], phe$variable_clean[jjj]]<-ct$p.value
   rPSE[phe$variable_clean[iii], phe$variable_clean[jjj]]<-sqrt( (1-ct$estimate**2) / ct$parameter )
} else {
  if(file.exists(paste0("UKB_phenotypes_bivariate_reg_Bsln_bdsz/",var2, "_", var1, ".txt"))){
    dat=read.table(paste0("UKB_phenotypes_bivariate_reg_Bsln_bdsz/",var2, "_", var1, ".txt"))
     ct=cor.test(dat$V3, dat$V4)
  rP[phe$variable_clean[iii], phe$variable_clean[jjj]]<-ct$estimate
  rPpval[phe$variable_clean[iii], phe$variable_clean[jjj]]<-ct$p.value
  rPSE[phe$variable_clean[iii], phe$variable_clean[jjj]]<-sqrt( (1-ct$estimate**2) / ct$parameter )
}
  
}
  }
  BVC[phe$variable_clean[iii]]=as.numeric(out[10,2])
}

# Write matrices and initialise plot
write.table(rB, "rB_correlation_matrix_outliers_Bsln_bdsz.txt", col.names=T, row.names = F)
write.table(rBpval, "rB_pvalues_correlation_matrix_outliers_Bsln_bdsz.txt", col.names=T, row.names = F)
write.table(rBSE, "rB_SE_correlation_matrix_outliers_Bsln_bdsz.txt", col.names=T, row.names = F)

write.table(rP, "rP_correlation_matrix_outliers_Bsln_bdsz.txt", col.names=T, row.names = F)
write.table(rPpval, "rP_pvalues_correlation_matrix_outliers_Bsln_bdsz.txt", col.names=T, row.names = F)
write.table(rPSE, "rP_SE_correlation_matrix_outliers_Bsln_bdsz.txt", col.names=T, row.names = F)

5 Calculate residual correlations

findVCOVmatFromGCTAOutput() is a R function that extract the matrix of variance covariace of model estimates from qsubshcom log files.

6 Plot grey-matter and residual correlations

rB<-read.table("rB_correlation_matrix_outliers_Bsln_bdsz_15K.txt", header = T, colClasses = numeric())
rBpval<-read.table("rB_pvalues_correlation_matrix_outliers_Bsln_bdsz_15K.txt", header = T, colClasses = numeric())
rBSE<-read.table("rB_SE_correlation_matrix_outliers_Bsln_bdsz_15K.txt", header = T, colClasses = numeric())

rp=read.table("rE_correlation_matrix_outliers_Bsln_bdsz_15K.txt", header = T, colClasses = numeric())
rpp=read.table("rE_pvalues_correlation_matrix_outliers_Bsln_bdsz_15K.txt", header = T, colClasses = numeric())
rpSE=read.table("rE_SE_correlation_matrix_outliers_Bsln_bdsz_15K.txt", header = T, colClasses = numeric())

rp=t(rp)
rpp=t(rpp)
rpSE=t(rpSE)

# Combine the two triangular matrics
allr<-matrix(0, nrow = length(phe$variable_clean), ncol = length(phe$variable_clean) )
allr[upper.tri(allr)]<-rB[upper.tri(rB)]
allr[lower.tri(allr)]<-rp[lower.tri(rp)]
colnames(allr)<-phe$variable_clean
rownames(allr)<-phe$variable_clean

allp<-matrix(0, nrow = length(phe$variable_clean), ncol = length(phe$variable_clean) )
allp[upper.tri(allp)]<-rBpval[upper.tri(rBpval)]
allp[lower.tri(allp)]<-rpp[lower.tri(rpp)]
allr[which(is.na(allr), arr.ind = T)]<-0

allSE<-matrix(0, nrow = length(phe$variable_clean), ncol = length(phe$variable_clean) )
allSE[upper.tri(allSE)]<-rBSE[upper.tri(rBSE)]
allSE[lower.tri(allSE)]<-rpSE[lower.tri(rpSE)]
allSE[which(is.na(allSE), arr.ind = T)]<-0

# Create object to highlight categories
ncate=as.data.frame(table(phe$category))
ncate$Freq
diag(allr)=NA

# Bonferroni significance threshold
signifUKB_Biv=0.05/(53*52)

# Make plot with correct colours and stars
png("Corplot_rE_rB_allstars_ordered_Bsln_bdsz_15K_stars.png", width = 30, height = 30, units = "cm", res = 300)
corrplot(corr = allr, method = "color",addgrid.col = "grey", type="full",  tl.cex = 1 ,  tl.col = "black", insig = "blank", col=colorRampPalette(c("#0072B2","#56B4E9","white", "#F0E442","#D55E00"))(100), na.label="square", tl.srt=70, cl.cex = 1.3)
for (iii in 1:length(unique(ncate$Var1))){
  rect(xleft = 0.5 + sum(ncate$Freq[0:(iii-1)]), ytop = 0.5+sum(ncate$Freq)-sum(ncate$Freq[0:(iii-1)]), ybottom = 0.5+sum(ncate$Freq) - ncate$Freq[iii] -sum(ncate$Freq[0:(iii-1)]), xright = 0.5 + ncate$Freq[iii] +sum(ncate$Freq[0:(iii-1)]), lwd = 3 )
}
stars=which(allp<signifUKB_Biv, arr.ind = T)
for (iii in 1:length(stars[,1])){
  text( stars[iii,2], dim(allr)[1] +1 - stars[iii,1], labels = "*", cex=1.2)
}
dev.off()


# write results
allrp=allr
for (iii in 1:dim(allr)[1]){
  for (jjj in 1:dim(allr)[1]){
allrp[iii,jjj]=paste0( signif(allr[iii,jjj], 3)," (SE=",signif(allSE[iii,jjj], 2) , "; p=", signif(allp[iii,jjj], 2), ")")
}
}
diag(allrp)=NA
write.csv(allrp, "UKB_Fig3a_rGM_rE_Bsln_bdsz.csv")



 




A work by by [Baptiste Couvy-Duchesne] - 23 April 2020