### date 25-10-2022 # This file describes the code used to obtain the results described in the paper 'Disentangling Genetic Risks for Metabolic Syndrome' published in Diabetes in 2022 by van Walree et al. # See paper for URLS of summary statistics and tools used in this study. Check out the tools websites to see input file requirements. ### Step 1. Genetic correlations with MetS components # goal: estimate genetic correlation between MetS components module load Miniconda3 source activate ldsc #1a. munge sumstats with LDSC # FG ldsc/munge_sumstats.py \ --sumstats FG_wo_UKB.txt \ --N 46186 \ --out FG_wo_UKB \ --merge-alleles w_hm3.snplist # HDL ldsc/munge_sumstats.py \ --sumstats HDL_wo_UKB.txt \ --N 99000 \ --out HDL_wo_UKB \ --merge-alleles w_hm3.snplist # SBP ldsc/munge_sumstats.py \ --sumstats SBP_UKB.txt \ --N 361402 \ --out SBP_UKB \ --merge-alleles w_hm3.snplist # TG ldsc/munge_sumstats.py \ --sumstats TG_with_Effect.tbl \ --ignore GC.Zscore \ --N 96598 \ --out TG_wo_UKB \ --merge-alleles w_hm3.snplist # WC ldsc/munge_sumstats.py \ --sumstats WC_w_UKB.txt \ --N 385932 \ --out WC_w_UKB \ --merge-alleles w_hm3.snplist # 1b. Obtain genetic correlations # SBP ldsc/ldsc.py \ --rg SBP_UKB.sumstats.gz,WC_w_UKB.sumstats.gz,HDL_wo_UKB.sumstats.gz,TG_wo_UKB.sumstats.gz,FG_wo_UKB.sumstats.gz \ --ref-ld-chr eur_w_ld_chr/ \ --w-ld-chr eur_w_ld_chr/ \ --out /home/evawalre/Genomic_SEM/genetic_correlation/RG_SBP_MetS_components # WC ldsc/ldsc.py \ --rg WC_w_UKB.sumstats.gz,SBP_UKB.sumstats.gz,HDL_wo_UKB.sumstats.gz,TG_wo_UKB.sumstats.gz,FG_wo_UKB.sumstats.gz \ --ref-ld-chr eur_w_ld_chr/ \ --w-ld-chr eur_w_ld_chr/ \ --out /home/evawalre/Genomic_SEM/genetic_correlation/RG_WC_MetS_components # HDL ldsc/ldsc.py \ --rg HDL_wo_UKB.sumstats.gz,WC_w_UKB.sumstats.gz,SBP_UKB.sumstats.gz,TG_wo_UKB.sumstats.gz,FG_wo_UKB.sumstats.gz \ --ref-ld-chr eur_w_ld_chr/ \ --w-ld-chr eur_w_ld_chr/ \ --out /home/evawalre/Genomic_SEM/genetic_correlation/RG_HDL_MetS_components # TG ldsc/ldsc.py \ --rg TG_wo_UKB.sumstats.gz,HDL_wo_UKB.sumstats.gz,WC_w_UKB.sumstats.gz,SBP_UKB.sumstats.gz,FG_wo_UKB.sumstats.gz \ --ref-ld-chr eur_w_ld_chr/ \ --w-ld-chr eur_w_ld_chr/ \ --out /home/evawalre/Genomic_SEM/genetic_correlation/RG_TG_MetS_CF_Lind_input # FG ldsc/ldsc.py \ --rg FG_wo_UKB.sumstats.gz,TG_wo_UKB.sumstats.gz,HDL_wo_UKB.sumstats.gz,WC_w_UKB.sumstats.gz,SBP_UKB.sumstats.gz \ --ref-ld-chr eur_w_ld_chr/ \ --w-ld-chr eur_w_ld_chr/ \ --out RG_FG_MetS_components ### Step 2. EFA # 2a. Munge sumstats + QC for gSEM # goal: munge summary statistics by gSEM, which removes 1) SNPs with MAF 0.01 (when MAF field is available), 2) SNPs with imputation INFO score >0.1 (when INFO field is available) and 3) SNPs not in HapMap3 ##### in R ##### require(GenomicSEM) # Munge sumstats + QC munge(c("FG_wo_UKB.txt", "HDL_with_Effect.tbl", "TG_with_Effect.tbl", "SBP_UKB.txt", "WC_w_UKB.txt"), "w_hm3.noMHC.snplist",trait.names=c("FG_wo_UKB","HDL_wo_UKB","TG_wo_UKB", "SBP_UKB", "WC_w_UKB"), c(46186, 99000, 96598, 361402, 385932), info.filter = 0.9, maf.filter = 0.01) # 2b. EFA with Genomic SEM # goal: run EFA for a one and two factor model. Running with 3 factors results in an error. require(GenomicSEM) require(stats) # generate genetic covariance matrix S, and its parameter variance covariance matrix V traits<-c("FG_wo_UKB.sumstats.gz","HDL_wo_UKB.sumstats.gz","SBP_UKB.sumstats.gz","TG_wo_UKB.sumstats.gz","WC_w_UKB.sumstats.gz") sample.prev<-c(NA, NA, NA, NA, NA) population.prev<-c(NA, NA, NA, NA, NA) ld <-"eur_w_ld_chr/" wld="eur_w_ld_chr/" trait.names<-c("FG_wo_UKB", "HDL_wo_UKB", "SBP_UKB", "TG_wo_UKB", "WC_w_UKB") LDSCoutput <- ldsc(traits, sample.prev, population.prev, ld, wld, trait.names, stand=TRUE) #run EFA with promax rotation and 1 factor using the factanal function in the stats package. promax because that allows factors to be correlated, and is useful for large datasets. EFA_1<-factanal(covmat = LDSCoutput$S, factors = 1, rotation = "promax") # print the loadings print(EFA_1$loadings) EFA1 <- EFA_1$loadings #run EFA with promax rotation and 2 factors using the factanal function in the stats package. promax because that allows factors to be correlated, and is useful for large datasets. EFA_2<-factanal(covmat = LDSCoutput$S, factors = 2, rotation = "promax") # print the loadings print(EFA_2$loadings) EFA2 <- EFA_2$loadings save(EFA1, file="loadings_EFA1_final.txt") save(EFA2, file="loadings_EFA2_final.txt") # determine which number of factors to use library(nFactors) #get eigenvalues ev <- eigen(cor(LDSCoutput$S)) ap <- parallel(subject=nrow(LDSCoutput$S),var=ncol(LDSCoutput$S), rep=100,cent=.05) nS <- nScree(x=ev$values, aparallel=ap$eigen$qevpea) pdf("EFA_factors_scree_plot_final.pdf") plotnScree(nS) dev.off() ### Step 3. CFA # goal: run confirmatory factor analysis on one factor 'MetS risk' model ##### in R ##### require(GenomicSEM) # generate genetic covariance matrix S, and its parameter variance covariance matrix V traits<-c("FG_wo_UKB.sumstats.gz","HDL_wo_UKB.sumstats.gz","SBP_UKB.sumstats.gz","TG_wo_UKB.sumstats.gz","WC_w_UKB.sumstats.gz") sample.prev<-c(NA, NA, NA, NA, NA) population.prev<-c(NA, NA, NA, NA, NA) ld <-"eur_w_ld_chr/" wld="eur_w_ld_chr/" trait.names<-c("FG_wo_UKB", "HDL_wo_UKB", "SBP_UKB", "TG_wo_UKB", "WC_w_UKB") LDSCoutput <- ldsc(traits, sample.prev, population.prev, ld, wld, trait.names, stand=TRUE) save(LDSCoutput, file="Risk_factors_for_MetS_largest_woDBP_SBPUKB.RData") #Specify the Genomic confirmatory factor model CFA <- 'F1 =~ WC_w_UKB + HDL_wo_UKB + TG_wo_UKB + SBP_UKB + FG_wo_UKB' #run the model MetS<-usermodel(LDSCoutput, estimation = "DWLS", model = CFA, CFIcalc = TRUE, std.lv = TRUE, imp_cov = FALSE) #print the results print(MetS) library(data.table) #save the results save(MetS, file="Riskfactors_for_MetS_largest_one_factor_CFA.txt") ### Step 5 (there is no step 4). Common factor GWAS with Genomic SEM # goal: run multivariate GWAS on metabolic risk latent variable ##### in R ##### require(GenomicSEM) # for both continuous GWAS and meta-analyses, se.logit=F, OLS=T, linprob=F (NULL for all), N=total sample size. MAF should be filtered based on the reference file. See https://github.com/GenomicSEM/GenomicSEM/wiki/4.-Common-Factor-GWAS for explanation of other values. # Note that the commonfactorGWAS function use unit loading identification, such that the loading of the first indicator is fixed to 1 so that the model is identified. In order for the model to converge, it will typically be best to order your files in the ldsc and sumstats function such that the phenotype with the largest loading on the common factor is listed first. # 5a. munge summary statistics munge(c("FG_wo_UKB.txt", "HDL_with_Effect.tbl", "WC_w_UKB.txt", "TG_with_Effect.tbl", "SBP_UKB.txt"), "w_hm3.noMHC.snplist",trait.names=c("FG_wo_UKB","HDL_wo_UKB","WC_w_UKB","TG_wo_UKB", "SBP_UKB"), c(461 86, 99000, 385932, 96598, 361402), info.filter = 0.9, maf.filter = 0.01) # 5b. generate genetic covariance matrix S, and its parameter variance covariance matrix V traits<-c("WC_w_UKB.sumstats.gz","HDL_wo_UKB.sumstats.gz", "TG_wo_UKB.sumstats.gz", "FG_wo_UKB.sumstat s.gz", "SBP_UKB.sumstats.gz") sample.prev<-c(NA, NA, NA, NA, NA, NA) population.prev<-c(NA, NA, NA, NA, NA, NA) ld <-"eur_w_ld_chr/" wld="eur_w_ld_chr/" trait.names<-c("WC_w_UKB", "HDL_wo_UKB", "TG_wo_UKB", "FG_wo_UKB", "SBP_UKB") LDSCoutput <- ldsc(traits, sample.prev, population.prev, ld, wld, trait.names, stand=TRUE) save(LDSCoutput, file="LDSCoutput.txt") # 5c. prepare the summary statistics for GWAS files=c("WC_w_UKB.txt", "HDL_with_Effect.tbl", "TG_with_Effect.tbl", "FG_wo_UKB.txt", "SBP_UKB.txt") ref= "reference.1000G.maf.0.005.txt" trait.names<-c("WC_w_UKB", "HDL_wo_UKB", "TG_wo_UKB", "FG_wo_UKB", "SBP_UKB") se.logit=c(F,F,F,F,F) info.filter=.6 maf.filter=0.01 OLS=c(T,T,T,T,T) N=c(385932, 99000, 96598, 46186, 361402) mets_sumstats <-sumstats(files=files,ref=ref,trait.names=trait.names,se.logit=se.logit,OLS=OLS,linprob =NULL,N=N,info.filter=info.filter,maf.filter=maf.filter,keep.indel=FALSE,parallel=TRUE,cores=NULL) # 5d. Chop up sumstats in 20 chunks because of memory issues # there are 2251442 SNPs left in the final multivariate summary statistics file mets_sumstats_chunk1 <- mets_sumstats[1:112384,] mets_sumstats_chunk2 <- mets_sumstats[112385:224768,] mets_sumstats_chunk3 <- mets_sumstats[224769:337152,] mets_sumstats_chunk4 <- mets_sumstats[337153:449536,] mets_sumstats_chunk5 <- mets_sumstats[449537:561920,] mets_sumstats_chunk6 <- mets_sumstats[561921:674304,] mets_sumstats_chunk7 <- mets_sumstats[674305:786688,] mets_sumstats_chunk8 <- mets_sumstats[786689:899072,] mets_sumstats_chunk9 <- mets_sumstats[899073:1011456,] mets_sumstats_chunk10 <- mets_sumstats[1011457:1123840,] mets_sumstats_chunk11 <- mets_sumstats[1123841:1236224,] mets_sumstats_chunk12 <- mets_sumstats[1236225:1348608,] mets_sumstats_chunk13 <- mets_sumstats[1348609:1460992,] mets_sumstats_chunk14 <- mets_sumstats[1460993:1573376,] mets_sumstats_chunk15 <- mets_sumstats[1573377:1685760,] mets_sumstats_chunk16 <- mets_sumstats[1685761:1798144,] mets_sumstats_chunk17 <- mets_sumstats[1798145:1910528,] mets_sumstats_chunk18 <- mets_sumstats[1910529:2022912,] mets_sumstats_chunk19 <- mets_sumstats[2022913:2135296,] mets_sumstats_chunk20 <- mets_sumstats[2135297:2251443,] save(mets_sumstats_chunk1, file="mets_sumstats_chunk1.txt") save(mets_sumstats_chunk2, file="mets_sumstats_chunk2.txt") save(mets_sumstats_chunk3, file="mets_sumstats_chunk3.txt") save(mets_sumstats_chunk4, file="mets_sumstats_chunk4.txt") save(mets_sumstats_chunk5, file="mets_sumstats_chunk5.txt") save(mets_sumstats_chunk6, file="mets_sumstats_chunk6.txt") save(mets_sumstats_chunk7, file="mets_sumstats_chunk7.txt") save(mets_sumstats_chunk8, file="mets_sumstats_chunk8.txt") save(mets_sumstats_chunk9, file="mets_sumstats_chunk9.txt") save(mets_sumstats_chunk10, file="mets_sumstats_chunk10.txt") save(mets_sumstats_chunk11, file="mets_sumstats_chunk11.txt") save(mets_sumstats_chunk12, file="mets_sumstats_chunk12.txt") save(mets_sumstats_chunk13, file="mets_sumstats_chunk13.txt") save(mets_sumstats_chunk14, file="mets_sumstats_chunk14.txt") save(mets_sumstats_chunk15, file="mets_sumstats_chunk15.txt") save(mets_sumstats_chunk16, file="mets_sumstats_chunk16.txt") save(mets_sumstats_chunk17, file="mets_sumstats_chunk17.txt") save(mets_sumstats_chunk18, file="mets_sumstats_chunk18.txt") save(mets_sumstats_chunk19, file="mets_sumstats_chunk19.txt") save(mets_sumstats_chunk20, file="mets_sumstats_chunk20.txt") # 5e. Combine the summary statistics and LDSC output and run the common factor GWAS # run the multivariate GWAS using parallel processing message("running the multivariate GWAS chunk 1") MetS_chunk1 <- commonfactorGWAS(covstruc = LDSCoutput, SNPs = mets_sumstats_chunk1, estimation = "DWLS", cores = NULL, toler = FALSE, SNPSE = FALSE, parallel = FALSE, GC="standard",MPI=FALSE) warnings() save(MetS_chunk1, file="commonfactorGWAS_MetS_largest_chunk1.Rdata") message("running the multivariate GWAS chunk 2") MetS_chunk2 <- commonfactorGWAS(covstruc = LDSCoutput, SNPs = mets_sumstats_chunk2, estimation = "DWLS", cores = NULL, toler = FALSE, SNPSE = FALSE, parallel = FALSE, GC="standard",MPI=FALSE) warnings() save(MetS_chunk2, file="commonfactorGWAS_MetS_largest_chunk2.Rdata") message("running the multivariate GWAS chunk 3") MetS_chunk3 <- commonfactorGWAS(covstruc = LDSCoutput, SNPs = mets_sumstats_chunk3, estimation = "DWLS", cores = NULL, toler = FALSE, SNPSE = FALSE, parallel = FALSE, GC="standard",MPI=FALSE) warnings() save(MetS_chunk3, file="commonfactorGWAS_MetS_largest_chunk3.Rdata") message("running the multivariate GWAS chunk 4") MetS_chunk4 <- commonfactorGWAS(covstruc = LDSCoutput, SNPs = mets_sumstats_chunk4, estimation = "DWLS", cores = NULL, toler = FALSE, SNPSE = FALSE, parallel = FALSE, GC="standard",MPI=FALSE) warnings() save(MetS_chunk4, file="commonfactorGWAS_MetS_largest_chunk4.Rdata") message("running the multivariate GWAS chunk 5") MetS_chunk5 <- commonfactorGWAS(covstruc = LDSCoutput, SNPs = mets_sumstats_chunk5, estimation = "DWLS", cores = NULL, toler = FALSE, SNPSE = FALSE, parallel = FALSE, GC="standard",MPI=FALSE) warnings() save(MetS_chunk5, file="commonfactorGWAS_MetS_largest_chunk5.Rdata") message("running the multivariate GWAS chunk 6") MetS_chunk6 <- commonfactorGWAS(covstruc = LDSCoutput, SNPs = mets_sumstats_chunk6, estimation = "DWLS", cores = NULL, toler = FALSE, SNPSE = FALSE, parallel = FALSE, GC="standard",MPI=FALSE) warnings() save(MetS_chunk6, file="commonfactorGWAS_MetS_largest_chunk6.Rdata") message("running the multivariate GWAS chunk 7") MetS_chunk7 <- commonfactorGWAS(covstruc = LDSCoutput, SNPs = mets_sumstats_chunk7, estimation = "DWLS", cores = NULL, toler = FALSE, SNPSE = FALSE, parallel = FALSE, GC="standard",MPI=FALSE) warnings() save(MetS_chunk7, file="commonfactorGWAS_MetS_largest_chunk7.Rdata") message("running the multivariate GWAS chunk 8") MetS_chunk8 <- commonfactorGWAS(covstruc = LDSCoutput, SNPs = mets_sumstats_chunk8, estimation = "DWLS", cores = NULL, toler = FALSE, SNPSE = FALSE, parallel = FALSE, GC="standard",MPI=FALSE) warnings() save(MetS_chunk8, file="commonfactorGWAS_MetS_largest_chunk8.Rdata") message("running the multivariate GWAS chunk 9") MetS_chunk9 <- commonfactorGWAS(covstruc = LDSCoutput, SNPs = mets_sumstats_chunk9, estimation = "DWLS", cores = NULL, toler = FALSE, SNPSE = FALSE, parallel = FALSE, GC="standard",MPI=FALSE) warnings() save(MetS_chunk9, file="commonfactorGWAS_MetS_largest_chunk9.Rdata") message("running the multivariate GWAS chunk 10") MetS_chunk10 <- commonfactorGWAS(covstruc = LDSCoutput, SNPs = mets_sumstats_chunk10, estimation = "DWLS", cores = NULL, toler = FALSE, SNPSE = FALSE, parallel = FALSE, GC="standard",MPI=FALSE) warnings() save(MetS_chunk10, file="commonfactorGWAS_MetS_largest_chunk10.Rdata") message("running the multivariate GWAS chunk 11") MetS_chunk11 <- commonfactorGWAS(covstruc = LDSCoutput, SNPs = mets_sumstats_chunk11, estimation = "DWLS", cores = NULL, toler = FALSE, SNPSE = FALSE, parallel = FALSE, GC="standard",MPI=FALSE) warnings() save(MetS_chunk11, file="commonfactorGWAS_MetS_largest_chunk11.Rdata") message("running the multivariate GWAS chunk 12") MetS_chunk12 <- commonfactorGWAS(covstruc = LDSCoutput, SNPs = mets_sumstats_chunk12, estimation = "DWLS", cores = NULL, toler = FALSE, SNPSE = FALSE, parallel = FALSE, GC="standard",MPI=FALSE) warnings() save(MetS_chunk12, file="commonfactorGWAS_MetS_largest_chunk12.Rdata") message("running the multivariate GWAS chunk 13") MetS_chunk13 <- commonfactorGWAS(covstruc = LDSCoutput, SNPs = mets_sumstats_chunk13, estimation = "DWLS", cores = NULL, toler = FALSE, SNPSE = FALSE, parallel = FALSE, GC="standard",MPI=FALSE) warnings() save(MetS_chunk13, file="commonfactorGWAS_MetS_largest_chunk13.Rdata") message("running the multivariate GWAS chunk 14") MetS_chunk14 <- commonfactorGWAS(covstruc = LDSCoutput, SNPs = mets_sumstats_chunk14, estimation = "DWLS", cores = NULL, toler = FALSE, SNPSE = FALSE, parallel = FALSE, GC="standard",MPI=FALSE) warnings() save(MetS_chunk14, file="commonfactorGWAS_MetS_largest_chunk14.Rdata") message("running the multivariate GWAS chunk 15") MetS_chunk15 <- commonfactorGWAS(covstruc = LDSCoutput, SNPs = mets_sumstats_chunk15, estimation = "DWLS", cores = NULL, toler = FALSE, SNPSE = FALSE, parallel = FALSE, GC="standard",MPI=FALSE) warnings() save(MetS_chunk15, file="commonfactorGWAS_MetS_largest_chunk15.Rdata") message("running the multivariate GWAS chunk 16") MetS_chunk16 <- commonfactorGWAS(covstruc = LDSCoutput, SNPs = mets_sumstats_chunk16, estimation = "DWLS", cores = NULL, toler = FALSE, SNPSE = FALSE, parallel = FALSE, GC="standard",MPI=FALSE) warnings() save(MetS_chunk16, file="commonfactorGWAS_MetS_largest_chunk16.Rdata") message("running the multivariate GWAS chunk 17") MetS_chunk17 <- commonfactorGWAS(covstruc = LDSCoutput, SNPs = mets_sumstats_chunk17, estimation = "DWLS", cores = NULL, toler = FALSE, SNPSE = FALSE, parallel = FALSE, GC="standard",MPI=FALSE) warnings() save(MetS_chunk17, file="commonfactorGWAS_MetS_largest_chunk17.Rdata") message("running the multivariate GWAS chunk 18") MetS_chunk18 <- commonfactorGWAS(covstruc = LDSCoutput, SNPs = mets_sumstats_chunk18, estimation = "DWLS", cores = NULL, toler = FALSE, SNPSE = FALSE, parallel = FALSE, GC="standard",MPI=FALSE) warnings() save(MetS_chunk18, file="commonfactorGWAS_MetS_largest_chunk18.Rdata") message("running the multivariate GWAS chunk 19") MetS_chunk19 <- commonfactorGWAS(covstruc = LDSCoutput, SNPs = mets_sumstats_chunk19, estimation = "DWLS", cores = NULL, toler = FALSE, SNPSE = FALSE, parallel = FALSE, GC="standard",MPI=FALSE) warnings() save(MetS_chunk19, file="commonfactorGWAS_MetS_largest_chunk19.Rdata") message("running the multivariate GWAS chunk 20") MetS_chunk20 <- commonfactorGWAS(covstruc = LDSCoutput, SNPs = mets_sumstats_chunk20, estimation = "DWLS", cores = NULL, toler = FALSE, SNPSE = FALSE, parallel = FALSE, GC="standard",MPI=FALSE) warnings() save(MetS_chunk20, file="commonfactorGWAS_MetS_largest_chunk20.Rdata") # 5f. Combine all chunks CF_MetS_results <- rbind(MetS_chunk1, MetS_chunk2, MetS_chunk3, MetS_chunk4, MetS_chunk5, MetS_chunk6, MetS_chunk7, MetS_chunk8, MetS_chunk9, MetS_chunk10, MetS_chunk11, MetS_chunk12, MetS_chunk13, MetS_chunk14, MetS_chunk15, MetS_chunk16, MetS_chunk17, MetS_chunk18, MetS_chunk19, MetS_chunk20) # remove SNPs with warning 'lavaan WARNING: some estimated ov variances are negative' (n=46) cf2 <- CF_MetS_results[which(CF_MetS_results$warning==0),] # calculate effective N #restrict to MAF of 40% and 10% cf3<-subset(cf2, cf2$MAF <= .4 & cf2$MAF >= .1) #calculate effective N Effective_N<-(mean(((cf3$Z_Estimate/cf3$est)^2)/(2*cf3$MAF*(1-cf3$MAF)))) # Effective_N = 461920.7 library(data.table) fwrite(cf2, "CF_MetS_results.txt", sep="\t", na="NA", quote=FALSE) # functional annotation of these results was performed online with FUMA, see https://fuma.ctglab.nl/ 'SNP2GENE' function ### Step 6. Calculate observed scale h2 (observed for comparative reasons) for MetS factor GWAS and MetS GWAS by Lind with LDSC source activate ldsc ldsc/munge_sumstats.py \ --sumstats CF_MetS_results_forldsc.txt \ --N 461920 \ --out Output/MetS_CF \ --merge-alleles w_hm3.snplist ldsc/munge_sumstats.py \ --sumstats MetS_Lind.txt \ --N 291107 \ --out Output/MetS_Lind \ --merge-alleles w_hm3.snplist ldsc/ldsc.py \ --rg MetS_CF.sumstats.gz,MetS_Lind.sumstats.gz \ --ref-ld-chr eur_w_ld_chr/ \ --w-ld-chr eur_w_ld_chr/ \ --out /home/evawalre/Genomic_SEM/genetic_correlation/RG_MetS_CF_Lind ### Step 7. Calculate genetic correlations with other diseases. # goal: estimate genetic correlation between the CF_MetS_GWAS and correlated phenotypes. Some of my sumstats were already munged, others weren't so they're munged below module load Miniconda3 source activate ldsc ldsc/munge_sumstats.py \ --sumstats AD_sumstats_Jansenetal_2019sept.txt \ --out AD \ --ignore Z \ --merge-alleles w_hm3.snplist ldsc/munge_sumstats.py \ --sumstats schizophrenia_clozuk_pgc.txt \ --N 480359 \ --out SCZ \ --merge-alleles w_hm3.snplist ldsc/munge_sumstats.py \ --sumstats COVID_very_severe_respiratory_eur.txt \ --out COVID \ --merge-alleles w_hm3.snplist ldsc/munge_sumstats.py \ --sumstats NAFLD.txt \ --N 19264 \ --out NAFLD \ --merge-alleles w_hm3.snplist ldsc/munge_sumstats.py \ --sumstats CKD.txt \ --N 117165 \ --out CKD \ --merge-alleles w_hm3.snplist ldsc/munge_sumstats.py \ --sumstats MDD2018_ex23andMe.txt \ --out MDD \ --merge-alleles w_hm3.snplist ldsc/munge_sumstats.py \ --sumstats Cholelithiasis.txt \ --out Cholelithiasis \ --merge-alleles w_hm3.snplist ldsc/munge_sumstats.py \ --sumstats CAD_w_UKB.txt \ --out CAD \ --merge-alleles w_hm3.snplist ldsc/munge_sumstats.py \ --sumstats T2D_w_UKB.txt \ --out T2D \ --N 898130 \ --merge-alleles w_hm3.snplist ldsc/ldsc.py \ --rg MetS_CF.sumstats.gz,AD.sumstats.gz,SCZ.sumstats.gz,COVID.sumstats.gz,NAFLD.sumstats.gz,CKD.sumstats.gz,MDD.sumstats.gz,Cholelithiasis.sumstats.gz,CAD.sumstats.gz,T2D.sumstats.gz,adhd_new_eur_n.sumstats.gz,age_fb.sumstats.gz,ano_new.sumstats.gz,anxcc.sumstats.gz,asd_new.sumstats.gz,ast.sumstats.gz,bp_new.sumstats.gz,cannabisdx_pgc.sumstats.gz,cigperday_gscan2019.sumstats.gz,drinksperweek_gscan2019.sumstats.gz,edu.sumstats.gz,EGG_bl.sumstats.gz,EGG_bmi.sumstats.gz,EGG_bw.sumstats.gz,HT2014.sumstats.gz,icv.sumstats.gz,insomnia_Jansen2018.sumstats.gz,longevity_parents.sumstats.gz,neuro_excl23andme.sumstats.gz,num_child.sumstats.gz,risktol_ssgac.sumstats.gz,smokinit_gscan2019.sumstats.gz \ --ref-ld-chr eur_w_ld_chr/ \ --w-ld-chr eur_w_ld_chr/ \ --out RG_MetS_CF_corr_diseases ### Step 8. Polygenic prediction # 8a. QC Framingham Heart Study (FHS) data (PRS target data) # goal: QC target data according to tutorial https://choishingwan.github.io/PRS-Tutorial/prsice/ (https://www-nature-com.vu-nl.idm.oclc.org/articles/s41596-020-0353-1#Sec3) # the genomic coordinates were lifted over from GrCH36 to GrCH37 with https://genome.ucsc.edu/cgi-bin/hgLiftOver # QC according to PRSice suggestions plink --bfile FHS_grch37 \ --maf 0.05 \ --mind 0.1 \ --geno 0.1 \ --hwe 1e-6 \ --write-snplist \ --make-just-fam \ --out FHS_grch37.QC_prsice # Find SNPs that have high heterozygosity # Prune plink \ --bfile FHS_grch37 \ --keep FHS_grch37.QC_prsice.fam \ --extract FHS_grch37.QC_prsice.snplist \ --indep-pairwise 200 50 0.25 \ --out FHS_grch37.QC_prsice # Calculate heterozygosity rates (with PLINK/1.9b_6.17-x86_64, PLINK/2 doesn't work) plink \ --bfile FHS_grch37 \ --extract FHS_grch37.QC_prsice.prune.in \ --keep FHS_grch37.QC_prsice.fam \ --het \ --out FHS_grch37.QC_prsice # In R: remove individuals with F coefficients that are more than 3 standard deviation (SD) units from the mean ###### R ######## library(data.table) dat <- fread("FHS_grch37.QC_prsice.het") # Read in the het file, specify it has header m <- mean(dat$F) # Calculate the mean s <- sd(dat$F) # Calculate the SD valid <- subset(dat, F <= m+3*s & F >= m-3*s) # Get any samples with F coefficient within 3 SD of the population mean write.table(valid[,c(1,2)], "FHS.valid.sample.prsice", quote=F, row.names=F) # print FID and IID for valid samples q() # exit R # Remove samples with discordant sex plink \ --bfile FHS_grch37 \ --extract FHS_grch37.QC_prsice.prune.in \ --keep FHS.valid.sample.prsice \ --check-sex \ --out FHS_grch37.QC_prsice ######### in R ######### library(data.table) # Read in file valid <- fread("FHS.valid.sample.prsice") dat <- fread("FHS_grch37.QC_prsice.sexcheck")[FID%in%valid$FID] fwrite(dat[STATUS=="OK",c("FID","IID")], "FHS_grch37.QC_prsice.valid", sep="\t") q() # exit R # Remove related individuals plink \ --bfile FHS_grch37 \ --extract FHS_grch37.QC_prsice.prune.in \ --keep FHS_grch37.QC_prsice.valid \ --rel-cutoff 0.125 \ --out FHS_grch37.QC_prsice # Generate final QC'd data file (don't perform SNP mismatching as PRSice does that for us) plink \ --bfile FHS_grch37 \ --make-bed \ --keep FHS_grch37.QC_prsice.rel.id \ --out FHS_QC_prsice \ --extract FHS_grch37.QC_prsice.snplist # Calculate PCs for selecting Europeans # compare sample to 1000G to select Europeans # merge # first, convert ss id to rsid to allow merge. use dbsnp 153. Match on chr:loc:allele:allele (depending on which one comes first alphabetically), --update-name plink --bfile FHS_QC_prsice --update-map SSid_to_rsid.txt --update-name --make-bed --out FHS_QC_prsice_rsid plink --bfile FHS_QC_prsice_rsid --bmerge $HOME/Volendam/PCA/PCA_scripts/PCA_files/1000G_DARWIN61WGS/1000G_qcrp_autosomal_pihat0.2 --geno 0.05 --maf 0.05 --make-bed --out FHS_1000G #prune and make mds plink --bfile FHS_1000G --indep 50 5 2 --out FHS_1000G plink --bfile FHS_1000G --extract FHS_1000G.prune.in --genome --cluster --mds-plot 20 --out FHS_1000G # in R: library(data.table) library(ggplot2) d <- fread("FHS_1000G.mds") # the first 2542 rows are FHS, then individuals with FID starting with mis_pop_afri_afr are africans, mis_pop_amer_amr americans, mis_pop_asia_asn asians, mis_pop_euro_eur europeans FHS <- d[1:2542,] FHS$ancestry <- "FHS" AFR <- d[d$FID %like% "mis_pop_afri_afr", ] AFR$ancestry <- "AFR" AMR <- d[d$FID %like% "mis_pop_amer_amr", ] AMR$ancestry <- "AMR" ASI <- d[d$FID %like% "mis_pop_asia_asn", ] ASI$ancestry <- "ASI" EUR <- d[d$FID %like% "mis_pop_euro_eur", ] EUR$ancestry <- "EUR" total <- rbind(FHS, AFR, AMR, ASI, EUR) pdf("pca-ancestry-FHS-1000G.plot.pdf") ggplot(total, aes(C1, C2, color=ancestry)) + geom_point() dev.off() # select FHS Europeans based on visual PCA plot FHS_EUR <- subset(FHS, C1>(-0.03) & C1<(-0.005) & C2>(-0.015) & C2 <0.025) FHS_EUR$ancestry <- "FHS_EUR" fwrite(FHS_EUR[,1:2], "FHS_EUR_sample.txt", sep="\t", quote=FALSE, na="NA") # plot again adding this group total <- rbind(FHS, AFR, AMR, ASI, EUR, FHS_EUR) pdf("pca-ancestry-FHS-EUR-1000G.plot.pdf") ggplot(total, aes(C1, C2, color=ancestry)) + geom_point() dev.off() # FHS Europeans now plot nicely with 1000G Europeans quit() n # back to plink. Keep only FHS Europeans. plink --bfile FHS_QC_prsice_rsid --keep FHS_EUR_sample.txt --make-bed --out FHS_QC_EUR # removed the prsice extension as this will be the file I'll be working with # rerun PC on only Europeans # 5a. Prune again on cleaned set to get PCs for covariates plink \ --bfile FHS_QC_EUR \ --indep-pairwise 200 50 0.25 \ --out FHS_QC_EUR plink \ --bfile FHS_QC_EUR \ --extract FHS_QC_EUR.prune.in \ --cluster --mds-plot 20 \ --out FHS_QC_EUR plink \ --bfile FHS_QC_EUR \ --extract FHS_QC_EUR.prune.in \ --pca \ --out FHS_QC_EUR # 8b. Run GWAS' for Fasting Glucose, HDL and triglycerides because of sample overlap of base and target data. As target data, FHS offspring and third generation individuals were used. For FG, individuals on glucose lowering medication were removed, for HDL and triglycerides individuals on cholesterol lowering medication plink2 --bfile FHS_QC_EUR --pheno FHS_offspring_3g_noglucmed_phenos.txt --ci 0.95 --pheno-name GLUC --covar /home/evawalre/Genomic_SEM/PRS/files/FHS/covariates/FHS_covs_offspring_thirdgen.txt --covar-variance-standardize --covar-name sex,age,age_sq --glm hide-covar no-x-sex --out FG_FHS_sumstats.txt plink2 --bfile FHS_QC_EUR --pheno FHS_offspring_3g_nocholmed_phenos.txt --ci 0.95 --pheno-name HDL --covar FHS_covs_offspring_thirdgen.txt --covar-variance-standardize --covar-name sex,age,age_sq,PC1-PC20 --glm hide-covar no-x-sex --out HDL_FHS_sumstats.txt plink2 --bfile FHS_QC_EUR --pheno FHS_offspring_3g_nocholmed_phenos.txt --ci 0.95 --pheno-name Triglycerides --covar FHS_covs_offspring_thirdgen.txt --covar-variance-standardize --covar-name sex,age,age_sq,PC1-PC20 --glm hide-covar no-x-sex --out TG_FHS_sumstats.txt # 8c. Run EraSOR to correct FG, HDL and TG sumstats for sample overlap with FHS module load Miniconda3/4.7.12.1 # the tool doesn't like FG_FHS_sumstats.txt.GLUC.glm.linear so change cp FG_FHS_sumstats.txt.GLUC.glm.linear FG_FHS_sumstats.txt cp HDL_FHS_sumstats.txt.HDL.glm.linear HDL_FHS_sumstats.txt cp TG_FHS_sumstats.txt.Triglycerides.glm.linear TG_FHS_sumstats.txt # FG python EraSOR/EraSOR.py \ --base FG_wo_UKB_N.txt \ --base-snp snp \ --base-signed-sumstats effect,0 \ --base-a1 A1 \ --base-a2 A2 \ --base-p pvalue \ --base-N-col N \ --target FG_FHS_sumstats.txt \ --target-snp SNP \ --target-signed-sumstats BETA,0 \ --target-a1 A1 \ --target-a2 A2 \ --target-p P \ --target-N-col OBS_CT \ --ref-ld-chr FHS_QC_EUR_ \ --w-ld-chr FHS_QC_EUR_ \ --out Output/EraSOR_FG_FHS_adjusted python EraSOR/EraSOR.py \ --base HDL_with_Effect.tbl \ --base-snp MarkerName \ --base-signed-sumstats Effect,0 \ --base-a1 Allele1 \ --base-a2 Allele2 \ --base-p P_value \ --base-N-col N \ --target HDL_FHS_sumstats.txt \ --target-snp SNP \ --target-signed-sumstats BETA,0 \ --target-a1 A1 \ --target-a2 A2 \ --target-p P \ --target-N-col OBS_CT \ --ref-ld-chr FHS_QC_EUR_ \ --w-ld-chr FHS_QC_EUR_ \ --out Output/EraSOR_HDL_FHS_adjusted python EraSOR/EraSOR.py \ --base TG_with_Effect.tbl \ --base-snp MarkerName \ --base-signed-sumstats Effect,0 \ --base-a1 Allele1 \ --base-a2 Allele2 \ --base-p Pvalue \ --base-N-col N \ --target TG_FHS_sumstats.txt \ --target-snp SNP \ --target-signed-sumstats BETA,0 \ --target-a1 A1 \ --target-a2 A2 \ --target-p P \ --target-N-col OBS_CT \ --ref-ld-chr FHS_QC_EUR_ \ --w-ld-chr FHS_QC_EUR_ \ --out Output/EraSOR_TG_FHS_adjusted #8d. Merge ErasOR adjusted snps with non adjusted # only the variants that were adjusted with EraSOR are in the output file, but not the non-adjusted (which I do need for prediction). Also need effect size ####### in R ####### library(data.table) setwd("/home/evawalre/Genomic_SEM/PRS/files/FHS/adjusted_sumstats/") HDL_adj <- fread("EraSOR_HDL_FHS_adjusted.assoc.gz") FG_adj <- fread("EraSOR_FG_FHS_adjusted.assoc.gz") TG_adj <- fread("EraSOR_TG_FHS_adjusted.assoc.gz") HDL <- fread("/home/evawalre/Genomic_SEM/PRS/files/FHS/FHS_sumstats/HDL_with_Effect.tbl") FG <- fread("/home/evawalre/Genomic_SEM/PRS/files/FHS/FHS_sumstats/FG_wo_UKB.txt") TG <- fread("/home/evawalre/Genomic_SEM/PRS/files/FHS/FHS_sumstats/TG_with_Effect.tbl") HDL_freq <- fread("/home/evawalre/Genomic_SEM/sumstats/MR/HDL_wo_UKB_GSMR_AF1000G.txt") TG_freq <- fread("/home/evawalre/Genomic_SEM/sumstats/MR/TG_wo_UKB_GSMR_AF1000G.txt") # HDL HDL_freq <- HDL_freq[,1:4] # get only relevant columns from HDL HDL <- HDL[,c(1,2,3,4,5,6,9,10)] # adjusted sumstats # get 1000G freq HDL_adj_freq <- merge(HDL_adj, HDL_freq, by=c("SNP", "A1", "A2")) # get maf HDL_adj_freq$MAF <- ifelse(HDL_adj_freq$freq>0.5, 1-HDL_adj_freq$freq, HDL_adj_freq$freq) # add N (HDL Teslovic sumstats-FHS sumstats) HDL_adj_freq$N <- 97175 # calculate effect: Beta = Zscore / sqrt( 2 * MAF * ( 1 - MAF) * ( N + Zscore^2 ) ) # calculate SE: SE = 1 / sqrt( 2 * MAF * ( 1 - MAF ) * ( N + Zscore^2 ) ) HDL_adj_freq$BETA <- HDL_adj_freq$Z / sqrt( 2 * HDL_adj_freq$MAF * ( 1 - HDL_adj_freq$MAF) * ( HDL_adj_freq$N + HDL_adj_freq$Z^2 ) ) HDL_adj_freq$SE <- 1 / sqrt( 2 * HDL_adj_freq$MAF * ( 1 - HDL_adj_freq$MAF ) * ( HDL_adj_freq$N + HDL_adj_freq$Z^2 ) ) summary(HDL_adj_freq$BETA) #-0.12 to 0.14, 2046 NA's, mean 0 # SE 0.0045 to 0.032, mean 0.0059 colnames(HDL) <- c("SNP", "A1", "A2", "N", "Z", "P", "BETA", "SE") #remove freq and maf column from HDL adjusted file for bind HDL_adj_freq$MAF <- NULL HDL_adj_freq$freq <- NULL # to remove from HDL (SNPs present in adjusted HDL file) HDL_to_remove <- HDL_adj_freq[,1] # remove from HDL sumstats those SNPs that are in the adjusted file HDL2 <- HDL[!HDL$SNP %in% HDL_to_remove$SNP, , drop = FALSE] # reshuffle HDL3 <- HDL2[,c(1,2,3,5,6,4,7,8)] # rbind HDL_final <- rbind(HDL3, HDL_adj_freq) fwrite(HDL_final, "EraSOR_HDL_FHS_adjusted_incl_nonoverlapping_nonadjusted.txt", sep="\t", quote=FALSE, na="NA") # TG TG_freq <- TG_freq[,1:4] # get only relevant columns from TG TG <- TG[,c(1,2,3,4,5,6,9,10)] # adjusted sumstats # get 1000G freq TG_adj_freq <- merge(TG_adj, TG_freq, by=c("SNP", "A1", "A2")) # get maf TG_adj_freq$MAF <- ifelse(TG_adj_freq$freq>0.5, 1-TG_adj_freq$freq, TG_adj_freq$freq) # add N (TG Teslovic sumstats-FHS sumstats) TG_adj_freq$N <- 94786 # calculate effect: Beta = Zscore / sqrt( 2 * MAF * ( 1 - MAF) * ( N + Zscore^2 ) ) # calculate SE: SE = 1 / sqrt( 2 * MAF * ( 1 - MAF ) * ( N + Zscore^2 ) ) TG_adj_freq$BETA <- TG_adj_freq$Z / sqrt( 2 * TG_adj_freq$MAF * ( 1 - TG_adj_freq$MAF) * ( TG_adj_freq$N + TG_adj_freq$Z^2 ) ) TG_adj_freq$SE <- 1 / sqrt( 2 * TG_adj_freq$MAF * ( 1 - TG_adj_freq$MAF ) * ( TG_adj_freq$N + TG_adj_freq$Z^2 ) ) summary(TG_adj_freq$BETA) #-0.18 to 0.19, mean =0, NA's = 2045 summary(TG_adj_freq$SE) # 0.0046-0.0322, mean 0.0059 colnames(TG) <- c("SNP", "A1", "A2", "N", "Z", "P", "BETA", "SE") #remove freq and maf column from HDL adjusted file for bind TG_adj_freq$MAF <- NULL TG_adj_freq$freq <- NULL # to remove from HDL (SNPs present in adjusted HDL file) TG_to_remove <- TG_adj_freq[,1] # remove from HDL sumstats those SNPs that are in the adjusted file TG2 <- TG[!TG$SNP %in% TG_to_remove$SNP, , drop = FALSE] # reshuffle TG3 <- TG2[,c(1,2,3,5,6,4,7,8)] # rbind TG_final <- rbind(TG3, TG_adj_freq) fwrite(TG_final, "EraSOR_TG_FHS_adjusted_incl_nonoverlapping_nonadjusted.txt", sep="\t", quote=FALSE, na="NA") # FG # FG sumstats have freq FG_freq <- FG[,c(1,2,3,4)] colnames(FG_freq) <- c("SNP", "A1", "A2", "MAF") FG_adj$A1 <- tolower(FG_adj$A1) FG_adj$A2 <- tolower(FG_adj$A2) FG_adj_freq <- merge(FG_adj, FG_freq, by=c("SNP", "A1", "A2")) FG_adj_freq$N <- 44370 # calculate effect: Beta = Zscore / sqrt( 2 * MAF * ( 1 - MAF) * ( N + Zscore^2 ) ) # calculate SE: SE = 1 / sqrt( 2 * MAF * ( 1 - MAF ) * ( N + Zscore^2 ) ) FG_adj_freq$BETA <- FG_adj_freq$Z / sqrt( 2 * FG_adj_freq$MAF * ( 1 - FG_adj_freq$MAF) * ( FG_adj_freq$N + FG_adj_freq$Z^2 ) ) FG_adj_freq$SE <- 1 / sqrt( 2 * FG_adj_freq$MAF * ( 1 - FG_adj_freq$MAF ) * ( FG_adj_freq$N + FG_adj_freq$Z^2 ) ) summary(FG_adj_freq$BETA) #-1.18 to 0.12, mean 3.074e-05 summary(FG_adj_freq$SE) #0.007 to 0.030 colnames(FG) <- c("SNP", "A1", "A2", "MAF", "BETA", "SE", "P") #remove freq and maf column from HDL adjusted file for bind FG_adj_freq$Z <- NULL FG_adj_freq$N <- NULL # to remove from HDL (SNPs present in adjusted HDL file) FG_to_remove <- FG_adj_freq[,1] # remove from FG sumstats those SNPs that are in the adjusted file FG2 <- FG[!FG$SNP %in% FG_to_remove$SNP, , drop = FALSE] # reshuffle FG3 <- FG2[,c(1,2,3,7,4,5,6)] # rbind FG_final <- rbind(FG3, FG_adj_freq) fwrite(FG_final, "EraSOR_FG_FHS_adjusted_incl_nonoverlapping_nonadjusted.txt", sep="\t", quote=FALSE, na="NA") # the MetS common factor GWAS was rerun (see step 5 for code) using the sumstats generated above, which are now adjusted for sample overlap with FHS (output file = CF_MetS_results_FHS_adj.txt) #8e. Use PRSice to generate polygenic risk scores with the adjusted summary statistics for FG, HDL, TG and the MetS factor ###### in R ###### # 8e1. FG with FG sumstats adjusted for own FHS FG sumstats with EraSOR Rscript /home/evawalre/Genomic_SEM/script/PRS/tools/PRSice.R \ --prsice /home/evawalre/Genomic_SEM/script/PRS/tools/PRSice_linux \ --base /home/evawalre/Genomic_SEM/PRS/files/FHS/adjusted_sumstats/EraSOR_FG_FHS_adjusted_incl_nonoverlapping_nonadjusted.txt --A1 A1 --stat BETA --pvalue P --snp SNP \ --target /home/evawalre/Genomic_SEM/PRS/files/FHS/genotype/FHS_QC_EUR \ --binary-target F \ --pheno /home/evawalre/Genomic_SEM/PRS/files/FHS/phenos/FHS_offspring_3g_noglucmed_phenos_PRSice.txt --pheno-col GLUC --ignore-fid \ --cov /home/evawalre/Genomic_SEM/PRS/files/FHS/covariates/FHS_covs_offspring_thirdgen_PRSice.txt --ignore-fid \ --base-maf MAF:0.01 \ --out /home/evawalre/Genomic_SEM/PRS/PRSice_output/FG_FHS_EraSOR_adjusted_FG_sumstats # 8e2. FG with MetS sumstats adjusted for own FHS sumstats Rscript /home/evawalre/Genomic_SEM/script/PRS/tools/PRSice.R \ --prsice /home/evawalre/Genomic_SEM/script/PRS/tools/PRSice_linux \ --base /home/evawalre/Genomic_SEM/PRS/files/FHS/adjusted_sumstats/CF_MetS_results_FHS_adj.txt --A1 A1 --stat beta --pvalue Pval_Estimate --snp SNP \ --target /home/evawalre/Genomic_SEM/PRS/files/FHS/genotype/FHS_QC_EUR \ --binary-target F \ --pheno /home/evawalre/Genomic_SEM/PRS/files/FHS/phenos/FHS_offspring_3g_noglucmed_phenos_PRSice.txt --pheno-col GLUC --ignore-fid \ --cov /home/evawalre/Genomic_SEM/PRS/files/FHS/covariates/FHS_covs_offspring_thirdgen_PRSice.txt \ --base-maf MAF:0.01 \ --out /home/evawalre/Genomic_SEM/PRS/PRSice_output/FG_FHS_EraSOR_adjusted_MetS_sumstats #8e3. HDL with HDL sumstats adjusted for own FHS HDL sumstats Rscript /home/evawalre/Genomic_SEM/script/PRS/tools/PRSice.R \ --prsice /home/evawalre/Genomic_SEM/script/PRS/tools/PRSice_linux \ --base /home/evawalre/Genomic_SEM/PRS/files/FHS/adjusted_sumstats/EraSOR_HDL_FHS_adjusted_incl_nonoverlapping_nonadjusted.txt --A1 A1 --stat BETA --pvalue P --snp SNP \ --target /home/evawalre/Genomic_SEM/PRS/files/FHS/genotype/FHS_QC_EUR \ --binary-target F \ --pheno /home/evawalre/Genomic_SEM/PRS/files/FHS/phenos/FHS_offspring_3g_nocholmed_phenos_PRSice.txt --pheno-col HDL --ignore-fid \ --cov /home/evawalre/Genomic_SEM/PRS/files/FHS/covariates/FHS_covs_offspring_thirdgen_PRSice.txt \ --base-maf MAF:0.01 \ --out /home/evawalre/Genomic_SEM/PRS/PRSice_output/HDL_FHS_EraSOR_adjusted_HDL_sumstats # 8e4. HDL with MetS sumstats adjusted for own FHS sumstats Rscript /home/evawalre/Genomic_SEM/script/PRS/tools/PRSice.R \ --prsice /home/evawalre/Genomic_SEM/script/PRS/tools/PRSice_linux \ --base /home/evawalre/Genomic_SEM/PRS/files/FHS/adjusted_sumstats/CF_MetS_results_FHS_adj.txt --A1 A1 --stat beta --pvalue Pval_Estimate --snp SNP \ --target /home/evawalre/Genomic_SEM/PRS/files/FHS/genotype/FHS_QC_EUR \ --binary-target F \ --pheno /home/evawalre/Genomic_SEM/PRS/files/FHS/phenos/FHS_offspring_3g_nocholmed_phenos_PRSice.txt --pheno-col HDL --ignore-fid \ --cov /home/evawalre/Genomic_SEM/PRS/files/FHS/covariates/FHS_covs_offspring_thirdgen_PRSice.txt \ --base-maf MAF:0.01 \ --out /home/evawalre/Genomic_SEM/PRS/PRSice_output/HDL_FHS_EraSOR_adjusted_MetS_sumstats # 8e5. TG with TG sumstats adjusted for own FHS TG sumstats Rscript /home/evawalre/Genomic_SEM/script/PRS/tools/PRSice.R \ --prsice /home/evawalre/Genomic_SEM/script/PRS/tools/PRSice_linux \ --base /home/evawalre/Genomic_SEM/PRS/files/FHS/adjusted_sumstats/EraSOR_TG_FHS_adjusted_incl_nonoverlapping_nonadjusted.txt --A1 A1 --stat BETA --pvalue P --snp SNP \ --target /home/evawalre/Genomic_SEM/PRS/files/FHS/genotype/FHS_QC_EUR \ --binary-target F \ --pheno /home/evawalre/Genomic_SEM/PRS/files/FHS/phenos/FHS_offspring_3g_nocholmed_phenos_PRSice.txt --pheno-col Triglycerides --ignore-fid \ --cov /home/evawalre/Genomic_SEM/PRS/files/FHS/covariates/FHS_covs_offspring_thirdgen_PRSice.txt \ --base-maf MAF:0.01 \ --out /home/evawalre/Genomic_SEM/PRS/PRSice_output/TG_FHS_EraSOR_adjusted_TG_sumstats # 8e6. TG with MetS sumstats adjusted for own FHS sumstats Rscript /home/evawalre/Genomic_SEM/script/PRS/tools/PRSice.R \ --prsice /home/evawalre/Genomic_SEM/script/PRS/tools/PRSice_linux \ --base /home/evawalre/Genomic_SEM/PRS/files/FHS/adjusted_sumstats/CF_MetS_results_FHS_adj.txt --A1 A1 --stat beta --pvalue Pval_Estimate --snp SNP \ --target /home/evawalre/Genomic_SEM/PRS/files/FHS/genotype/FHS_QC_EUR \ --binary-target F \ --pheno /home/evawalre/Genomic_SEM/PRS/files/FHS/phenos/FHS_offspring_3g_nocholmed_phenos_PRSice.txt --pheno-col Triglycerides --ignore-fid \ --cov /home/evawalre/Genomic_SEM/PRS/files/FHS/covariates/FHS_covs_offspring_thirdgen_PRSice.txt \ --base-maf MAF:0.01 \ --out /home/evawalre/Genomic_SEM/PRS/PRSice_output/TG_FHS_EraSOR_adjusted_MetS_sumstats # 8e7. SBP with SBP sumstats Rscript /home/evawalre/Genomic_SEM/script/PRS/tools/PRSice.R \ --prsice /home/evawalre/Genomic_SEM/script/PRS/tools/PRSice_linux \ --base /home/evawalre/Genomic_SEM/sumstats/largest/SBP_UKB.txt --A1 A1 --stat BETA --pvalue P --snp SNP \ --target /home/evawalre/Genomic_SEM/PRS/files/FHS/genotype/FHS_QC_EUR \ --binary-target F \ --pheno /home/evawalre/Genomic_SEM/PRS/files/FHS/phenos/FHS_offspring_3g_nobpmed_phenos.txt --pheno-col Systolic_blood_pressure --ignore-fid \ --cov /home/evawalre/Genomic_SEM/PRS/files/FHS/covariates/FHS_covs_offspring_thirdgen_PRSice.txt \ --base-maf MAF:0.01 \ --out /home/evawalre/Genomic_SEM/PRS/PRSice_output/SBP_SBP_sumstats # 8e8. SBP with MetS sumstats adjusted for own FHS sumstats Rscript /home/evawalre/Genomic_SEM/script/PRS/tools/PRSice.R \ --prsice /home/evawalre/Genomic_SEM/script/PRS/tools/PRSice_linux \ --base /home/evawalre/Genomic_SEM/PRS/files/FHS/adjusted_sumstats/CF_MetS_results_FHS_adj.txt --A1 A1 --stat beta --pvalue Pval_Estimate --snp SNP \ --target /home/evawalre/Genomic_SEM/PRS/files/FHS/genotype/FHS_QC_EUR \ --binary-target F \ --pheno /home/evawalre/Genomic_SEM/PRS/files/FHS/phenos/FHS_offspring_3g_nobpmed_phenos.txt --pheno-col Systolic_blood_pressure --ignore-fid \ --cov /home/evawalre/Genomic_SEM/PRS/files/FHS/covariates/FHS_covs_offspring_thirdgen_PRSice.txt \ --base-maf MAF:0.01 \ --out /home/evawalre/Genomic_SEM/PRS/PRSice_output/SBP_FHS_EraSOR_adjusted_MetS_sumstats # 8e9. WC with WC sumstats Rscript /home/evawalre/Genomic_SEM/script/PRS/tools/PRSice.R \ --prsice /home/evawalre/Genomic_SEM/script/PRS/tools/PRSice_linux \ --base /home/evawalre/Genomic_SEM/sumstats/largest/WC_w_UKB.txt --A1 A1 --stat BETA --pvalue P --snp SNP \ --target /home/evawalre/Genomic_SEM/PRS/files/FHS/genotype/FHS_QC_EUR \ --binary-target F \ --pheno /home/evawalre/Genomic_SEM/PRS/files/FHS/phenos/FHS_offspring_3g_phenos.txt --pheno-col WAIST --ignore-fid \ --cov /home/evawalre/Genomic_SEM/PRS/files/FHS/covariates/FHS_covs_offspring_thirdgen_PRSice.txt \ --base-maf MAF:0.01 \ --out /home/evawalre/Genomic_SEM/PRS/PRSice_output/WC_WC_sumstats # 8e10. WC with MetS sumstats adjusted for own FHS sumstats Rscript /home/evawalre/Genomic_SEM/script/PRS/tools/PRSice.R \ --prsice /home/evawalre/Genomic_SEM/script/PRS/tools/PRSice_linux \ --base /home/evawalre/Genomic_SEM/PRS/files/FHS/adjusted_sumstats/CF_MetS_results_FHS_adj.txt --A1 A1 --stat beta --pvalue Pval_Estimate --snp SNP \ --target /home/evawalre/Genomic_SEM/PRS/files/FHS/genotype/FHS_QC_EUR \ --binary-target F \ --pheno /home/evawalre/Genomic_SEM/PRS/files/FHS/phenos/FHS_offspring_3g_phenos.txt --pheno-col WAIST --ignore-fid \ --cov /home/evawalre/Genomic_SEM/PRS/files/FHS/covariates/FHS_covs_offspring_thirdgen_PRSice.txt \ --base-maf MAF:0.01 \ --out /home/evawalre/Genomic_SEM/PRS/PRSice_output/WC_FHS_EraSOR_adjusted_MetS_sumstats # 8e11. MetS with MetS sumstats adjusted for own FHS sumstats Rscript /home/evawalre/Genomic_SEM/script/PRS/tools/PRSice.R \ --prsice /home/evawalre/Genomic_SEM/script/PRS/tools/PRSice_linux \ --base /home/evawalre/Genomic_SEM/PRS/files/FHS/adjusted_sumstats/CF_MetS_results_FHS_adj.txt --A1 A1 --stat beta --pvalue Pval_Estimate --snp SNP \ --target /home/evawalre/Genomic_SEM/PRS/files/FHS/genotype/FHS_QC_EUR \ --binary-target T \ --pheno /home/evawalre/Genomic_SEM/PRS/files/FHS/phenos/FHS_offspring_3g_mets_phenos.txt --pheno-col mets --ignore-fid \ --cov /home/evawalre/Genomic_SEM/PRS/files/FHS/covariates/FHS_covs_offspring_thirdgen_PRSice.txt \ --base-maf MAF:0.01 \ --out /home/evawalre/Genomic_SEM/PRS/PRSice_output/MetS_FHS_EraSOR_adjusted_MetS_sumstats # 8e12. MetS with Lind sumstats Rscript /home/evawalre/Genomic_SEM/script/PRS/tools/PRSice.R \ --prsice /home/evawalre/Genomic_SEM/script/PRS/tools/PRSice_linux \ --base /home/evawalre/Genomic_SEM/genetic_correlation/sumstats/MetS_Lind.txt --A1 effect_allele --stat beta --pvalue p_value --snp rsid --extract /home/evawalre/Genomic_SEM/PRS/PRSice_output/MetS_Lind_sumstats.valid \ --target /home/evawalre/Genomic_SEM/PRS/files/FHS/genotype/FHS_QC_EUR \ --binary-target T \ --pheno /home/evawalre/Genomic_SEM/PRS/files/FHS/phenos/FHS_offspring_3g_mets_phenos.txt --pheno-col mets --ignore-fid \ --cov /home/evawalre/Genomic_SEM/PRS/files/FHS/covariates/FHS_covs_offspring_thirdgen_PRSice.txt \ --base-maf MAF:0.01 \ --out /home/evawalre/Genomic_SEM/PRS/PRSice_output/MetS_Lind_sumstats # 8e13. MetS with FG sumstats Rscript /home/evawalre/Genomic_SEM/script/PRS/tools/PRSice.R \ --prsice /home/evawalre/Genomic_SEM/script/PRS/tools/PRSice_linux \ --base /home/evawalre/Genomic_SEM/PRS/files/FHS/adjusted_sumstats/EraSOR_FG_FHS_adjusted_incl_nonoverlapping_nonadjusted.txt --A1 A1 --stat BETA --pvalue P --snp SNP \ --target /home/evawalre/Genomic_SEM/PRS/files/FHS/genotype/FHS_QC_EUR \ --binary-target T \ --pheno /home/evawalre/Genomic_SEM/PRS/files/FHS/phenos/FHS_offspring_3g_mets_phenos.txt --pheno-col mets --ignore-fid \ --cov /home/evawalre/Genomic_SEM/PRS/files/FHS/covariates/FHS_covs_offspring_thirdgen_PRSice.txt \ --base-maf MAF:0.01 \ --out /home/evawalre/Genomic_SEM/PRS/PRSice_output/MetS_FG_sumstats # 8e14. MetS with WC sumstats Rscript /home/evawalre/Genomic_SEM/script/PRS/tools/PRSice.R \ --prsice /home/evawalre/Genomic_SEM/script/PRS/tools/PRSice_linux \ --base /home/evawalre/Genomic_SEM/sumstats/largest/WC_w_UKB.txt --A1 A1 --stat BETA --pvalue P --snp SNP \ --target /home/evawalre/Genomic_SEM/PRS/files/FHS/genotype/FHS_QC_EUR \ --binary-target T \ --pheno /home/evawalre/Genomic_SEM/PRS/files/FHS/phenos/FHS_offspring_3g_mets_phenos.txt --pheno-col mets --ignore-fid \ --cov /home/evawalre/Genomic_SEM/PRS/files/FHS/covariates/FHS_covs_offspring_thirdgen_PRSice.txt \ --base-maf MAF:0.01 \ --out /home/evawalre/Genomic_SEM/PRS/PRSice_output/MetS_WC_sumstats # 8e15. MetS with SBP sumsts Rscript /home/evawalre/Genomic_SEM/script/PRS/tools/PRSice.R \ --prsice /home/evawalre/Genomic_SEM/script/PRS/tools/PRSice_linux \ --base /home/evawalre/Genomic_SEM/sumstats/largest/SBP_UKB.txt --A1 A1 --stat BETA --pvalue P --snp SNP \ --target /home/evawalre/Genomic_SEM/PRS/files/FHS/genotype/FHS_QC_EUR \ --binary-target T \ --pheno /home/evawalre/Genomic_SEM/PRS/files/FHS/phenos/FHS_offspring_3g_mets_phenos.txt --pheno-col mets --ignore-fid \ --cov /home/evawalre/Genomic_SEM/PRS/files/FHS/covariates/FHS_covs_offspring_thirdgen_PRSice.txt \ --base-maf MAF:0.01 \ --out /home/evawalre/Genomic_SEM/PRS/PRSice_output/MetS_SBP_sumstats # 8e16. MetS with TG sumstats Rscript /home/evawalre/Genomic_SEM/script/PRS/tools/PRSice.R \ --prsice /home/evawalre/Genomic_SEM/script/PRS/tools/PRSice_linux \ --base /home/evawalre/Genomic_SEM/PRS/files/FHS/adjusted_sumstats/EraSOR_TG_FHS_adjusted_incl_nonoverlapping_nonadjusted.txt --A1 A1 --stat BETA --pvalue P --snp SNP \ --target /home/evawalre/Genomic_SEM/PRS/files/FHS/genotype/FHS_QC_EUR \ --binary-target T \ --pheno /home/evawalre/Genomic_SEM/PRS/files/FHS/phenos/FHS_offspring_3g_mets_phenos.txt --pheno-col mets --ignore-fid \ --cov /home/evawalre/Genomic_SEM/PRS/files/FHS/covariates/FHS_covs_offspring_thirdgen_PRSice.txt \ --base-maf MAF:0.01 \ --out /home/evawalre/Genomic_SEM/PRS/PRSice_output/MetS_TG_sumstats # 8e17. MetS with HDL sumstats Rscript /home/evawalre/Genomic_SEM/script/PRS/tools/PRSice.R \ --prsice /home/evawalre/Genomic_SEM/script/PRS/tools/PRSice_linux \ --base /home/evawalre/Genomic_SEM/PRS/files/FHS/adjusted_sumstats/EraSOR_HDL_FHS_adjusted_incl_nonoverlapping_nonadjusted.txt --A1 A1 --stat BETA --pvalue P --snp SNP \ --target /home/evawalre/Genomic_SEM/PRS/files/FHS/genotype/FHS_QC_EUR \ --binary-target T \ --pheno /home/evawalre/Genomic_SEM/PRS/files/FHS/phenos/FHS_offspring_3g_mets_phenos.txt --pheno-col mets --ignore-fid \ --cov /home/evawalre/Genomic_SEM/PRS/files/FHS/covariates/FHS_covs_offspring_thirdgen_PRSice.txt \ --base-maf MAF:0.01 \ --out /home/evawalre/Genomic_SEM/PRS/PRSice_output/MetS_HDL_sumstats ### Step 9. Drug gene set analysis python ~/drugsets/drugsets.py --geneassoc ~/code/phd-code/metS_code/metS.genes.raw --drugsets all --enrich all --id ensembl92 --out ~/code/phd-code/metS_code/metS_drugsets_results/metS