logo

1 Open database

UKB_phenotypes_15K_allData_regVars_Jan2019Update.csv was created using the ukbtool package, which renamed the columns in a more user friendly format
https://github.com/kenhanscombe/ukbtools

library(readr)
TT = read_csv("UKB_phenotypes_15K_allData_regVars_Jan2019Update.csv")
# Add discovery and replication variables to extract sample
# for BWAS

table(TT$t1_structural_brain_images_nifti_f20252_2_0, exclude = NULL)
table(TT$t2_flair_structural_brain_images_nifti_f20253_2_0, exclude = NULL)

# Open and add PCs
globPC = read.table("PC.10global.eigenvec")
colnames(globPC) = c("eid", "iid", paste0("PC", 1:10))
TT = merge(TT, globPC, by = "eid", all.x = T)

2 Write raw phenotypes and covariate files

We have created a text file UKB_BWASvars.txt, which contains the list of variables we want to study. This file is used to loop on the phenotypes of interest.
Phenotypes are stored in individual text files (folder UKB_phenotypes15K) to faciliate reading them with OSCA. Each phenotypic file contains 3 columns: eid eid phenotype

# Exclude extreme ICV values and normalise variables
TT$ICV[which(TT$ICV < 1e+06)] = NA
TT$ICV[which(TT$ICV > 2e+06)] = NA
TT$ICVstd = scale(TT$ICV)

TT$Agestd = scale(TT$Age_MRI)

# write phenotypic files (one per variable of interest)
phe = read.table("UKB_BWASvars.txt")
for (var in phe$V1) {
    write.table(TT[, c("eid", "eid", var)], paste0("UKB_phenotypes15K/",
        var, ".phen"), col.names = F, row.names = F)
}

# Covariates for age
write.table(TT[, c("eid", "eid", "sexuses_datacoding_9_f31_0_0",
    "ICVstd")], paste0("UKB_phenotypes15K/Covariates_no_Age_MRI.phen"),
    col.names = F, row.names = F)
write.table(TT[, c("eid", "eid", "Agestd", "ICVstd")], paste0("UKB_phenotypes15K/Covariates_no_sexuses_datacoding_9_f31_0_0.phen"),
    col.names = F, row.names = F)
write.table(TT[, c("eid", "eid", "Age_MRI", "sexuses_datacoding_9_f31_0_0")],
    paste0("UKB_phenotypes15K/Covariates_no_ICV.phen"), col.names = F,
    row.names = F)

3 Write PCs

for (iii in 1:10) {
    write.table(TT[, c("eid", "eid", paste0("PC", iii))], paste0("UKB_phenotypes15K/PC",
        iii, ".phen"), col.names = F, row.names = F)
}



 




A work by by [Baptiste Couvy-Duchesne] - 13 June 2022