Hi,
It turnes out that when running the "full pipeline vignette" the bgzipped VCF file generated under ./RESULTS/GWAS/Nalls23andMe_2019/BST1/LD is empty.
I believe this is causing the "Segmentation fault (core dumped)" error, so that no finemapping tool works downstream since we are missing the locus specific LD structure.
r$> library(echolocatoR)
Registered S3 method overwritten by 'GGally':
method from
+.gg ggplot2
Bioconductor version 3.12 (BiocManager 1.30.12), ?BiocManager::install for help
Bioconductor version '3.12' is out-of-date; the current release version '3.14' is available with R version '4.1'; see https://bioconductor.org/install
data("Nalls_top_SNPs");
top_SNPs <- import_topSNPs(
# topSS = "~/Desktop/Fine_Mapping/Data/GWAS/Nalls23andMe_2019/Nalls2019_TableS2.xlsx",
topSS = Nalls_top_SNPs,
chrom_col = "CHR",
position_col = "BP",
snp_col="SNP",
pval_col="P, all studies",
effect_col="Beta, all studies",
gene_col="Nearest Gene",
locus_col = "Nearest Gene",
grouping_vars = c("Locus"),
remove_variants = "rs34637584")
#> [1] "+ Assigning gene_col and locus_col independently"
head(top_SNPs)
Locus Gene CHR POS SNP P Effect
1: ASXL3 ASXL3 18 31304318 rs1941685 1.69e-08 0.0531
2: BAG3 BAG3 10 121415685 rs72840788 1.57e-11 0.0763
3: BIN3 BIN3 8 22525980 rs2280104 1.16e-08 0.0556
4: BRIP1 BRIP1 17 59917366 rs61169879 9.28e-10 0.0820
5: BST1 BST1 4 15737348 rs4698412 2.06e-28 0.1035
6: C5orf24 C5orf24 5 134199105 rs11950533 7.16e-09 -0.0916
r$> fullSS_path <- example_fullSS(fullSS_path=file.path("/mnt/rreal/RDS/acarrasco/ANALYSES_WORKSPACE/EARLY_PD/POST_GWAS/ECHOLOCATOR","Nalls23andMe_2019.fullSS_subset.tsv"))
r$> fullSS_path
[1] "/mnt/rreal/RDS/acarrasco/ANALYSES_WORKSPACE/EARLY_PD/POST_GWAS/ECHOLOCATOR/Nalls23andMe_2019.fullSS_subset.tsv"
top_SNPs = top_SNPs,
# It's best to give absolute paths
results_dir = file.path("/mnt/rreal/RDS/acarrasco/ANALYSES_WORKSPACE/EARLY_PD/POST_GWAS/ECHOLOCATOR","results"),
loci = c("BST1","MEX3C"),# top_SNPs$Locus,
dataset_name = "Nalls23andMe_2019",
dataset_type = "GWAS",
force_new_subset = T,
force_new_LD = T,
force_new_finemap = T,
remove_tmps = F,
# SUMMARY STATS ARGUMENTS
fullSS_path = fullSS_path,
query_by ="tabix",
chrom_col = "CHR", position_col = "POS", snp_col = "RSID",
pval_col = "p", effect_col = "beta", stderr_col = "se",
freq_col = "freq", MAF_col = "calculate",
A1_col = "A1",
A2_col = "A2",
# FILTERING ARGUMENTS
## It's often desirable to use a larger window size
## (e.g. 2Mb which is bp_distance=500000*2),
## but we use a small window here to speed up the process.
bp_distance = 10000,#500000*2,
min_MAF = 0.001,
trim_gene_limits = F,
# FINE-MAPPING ARGUMENTS
## General
finemap_methods = c("ABF","FINEMAP","SUSIE","POLYFUN_SUSIE"),
n_causal = 5,
PP_threshold = .95,
# LD ARGUMENTS
LD_reference = "1KGphase1",#"UKB",
superpopulation = "EUR",
download_method = "axel",
# PLOT ARGUMENTS
## general
plot.types=c("fancy"),
## Generate multiple plots of different window sizes;
### all SNPs, 4x zoomed-in, and a 50000bp window
plot.zoom = c("all","4x","10x"),
## XGR
# plot.XGR_libnames=c("ENCODE_TFBS_ClusteredV3_CellTypes"),
## Roadmap
plot.Roadmap = F,
plot.Roadmap_query = NULL,
# Nott et al. (2019)
plot.Nott_epigenome = T,
plot.Nott_show_placseq = T,
[1] "+ CONDA:: Activating conda env 'echoR'"
[1] "Checking for tabix installation..."
[1] "Checking for bcftools installation..."
) ) ) ))))))}}}}}}}} {{{{{{{{{(((((( ( ( (
BST1 (1 / 2)
) ) ) ))))))}}}}}}}} {{{{{{{{{(((((( ( ( (
[1] "+ CONDA:: Identified bgzip executable in echoR env."
[1] "Determining chrom type from file header"
[1] "LD:: Standardizing summary statistics subset."
[1] "++ Preparing Gene col"
[1] "++ Preparing A1,A1 cols"
[1] "++ Preparing MAF,Freq cols"
[1] "++ Inferring MAF from frequency column..."
[1] "++ Preparing N_cases,N_controls cols"
[1] "++ Preparing `proportion_cases` col"
[1] "++ Calculating `proportion_cases`."
[1] "++ Preparing N col"
[1] "+ Preparing sample_size (N) column"
[1] "++ Computing effective sample size."
[1] "++ Preparing t-stat col"
[1] "+ Calculating t-statistic from Effect and StdErr..."
[1] "++ Assigning lead SNP"
[1] "++ Ensuring Effect, StdErr, P are numeric"
[1] "++ Ensuring 1 SNP per row"
[1] "++ Removing extra whitespace"
[1] "++ Saving subset ==> /mnt/rreal/RDS/acarrasco/ANALYSES_WORKSPACE/EARLY_PD/POST_GWAS/ECHOLOCATOR/results/GWAS/Nalls23andMe_2019/BST1/BST1_Nalls23andMe_2019_subset.tsv.gz"
[1] "LD:: Using 1000Genomes as LD reference panel."
[1] "LD Reference Panel = 1KGphase1"
[1] "+ LD:: Querying 1KG remote server."
[1] "+ CONDA:: Identified tabix executable in echoR env."
[1] "LD:: Querying VCF subset"
[1] "/home/acarrasco/.conda/envs/echoR/bin/tabix -fh -p vcf ftp://ftp-trace.ncbi.nih.gov/1000genomes/ftp/release/20110521//ALL.chr4.phase1_release_v3.20101123.snps_indels_svs.genotypes.vcf.gz 4:15727389-15747197 > /mnt/rreal/RDS/acarrasco/ANALYSES_WORKSPACE/EARLY_PD/POST_GWAS/ECHOLOCATOR/results/GWAS/Nalls23andMe_2019/BST1/LD/BST1.1KGphase1.vcf"
Segmentation fault (core dumped)
[1] "LD:BCFTOOLS:: Compressing vcf file..."
[1] "LD:TABIX:: Re-indexing vcf.gz..."
[1] "LD:BCFTOOLS:: Subsetting vcf to only include EUR individuals ( 378 / 1091 )."
Failed to read from /mnt/rreal/RDS/acarrasco/ANALYSES_WORKSPACE/EARLY_PD/POST_GWAS/ECHOLOCATOR/results/GWAS/Nalls23andMe_2019/BST1/LD/BST1.1KGphase1.vcf.gz: unknown file type
[1] "LD:PLINK:: Converting vcf.gz to .bed/.bim/.fam"
PLINK v1.90b6.9 64-bit (4 Mar 2019) www.cog-genomics.org/plink/1.9/
(C) 2005-2019 Shaun Purcell, Christopher Chang GNU General Public License v3
Logging to /mnt/rreal/RDS/acarrasco/ANALYSES_WORKSPACE/EARLY_PD/POST_GWAS/ECHOLOCATOR/results/GWAS/Nalls23andMe_2019/BST1/LD/plink.log.
Options in effect:
--out /mnt/rreal/RDS/acarrasco/ANALYSES_WORKSPACE/EARLY_PD/POST_GWAS/ECHOLOCATOR/results/GWAS/Nalls23andMe_2019/BST1/LD/plink
--vcf /mnt/rreal/RDS/acarrasco/ANALYSES_WORKSPACE/EARLY_PD/POST_GWAS/ECHOLOCATOR/results/GWAS/Nalls23andMe_2019/BST1/LD/BST1.1KGphase1.vcf.gz
257891 MB RAM detected; reserving 128945 MB for main workspace.
Error: File read failure.
[1] "LD:snpStats:: Computing LD (stats = R)"
Error in data.table::fread(bim_path, col.names = c("CHR", "SNP", "V3", :
File '/mnt/rreal/RDS/acarrasco/ANALYSES_WORKSPACE/EARLY_PD/POST_GWAS/ECHOLOCATOR/results/GWAS/Nalls23andMe_2019/BST1/LD/plink.bim' does not exist or is non-readable. getwd()=='/mnt/rreal/RDS/acarrasco/ANALYSES_WORKSPACE/EARLY_PD/POST_GWAS/ECHOLOCATOR'
Fine-mapping complete in:
Time difference of 11.7 secs
) ) ) ))))))}}}}}}}} {{{{{{{{{(((((( ( ( (
MEX3C (2 / 2)
) ) ) ))))))}}}}}}}} {{{{{{{{{(((((( ( ( (
[1] "Determining chrom type from file header"
[1] "LD:: Standardizing summary statistics subset."
[1] "++ Preparing Gene col"
[1] "++ Preparing A1,A1 cols"
[1] "++ Preparing MAF,Freq cols"
[1] "++ Inferring MAF from frequency column..."
[1] "++ Preparing N_cases,N_controls cols"
[1] "++ Preparing `proportion_cases` col"
[1] "++ Calculating `proportion_cases`."
[1] "++ Preparing N col"
[1] "+ Preparing sample_size (N) column"
[1] "++ Computing effective sample size."
[1] "++ Preparing t-stat col"
[1] "+ Calculating t-statistic from Effect and StdErr..."
[1] "++ Assigning lead SNP"
[1] "++ Ensuring Effect, StdErr, P are numeric"
[1] "++ Ensuring 1 SNP per row"
[1] "++ Removing extra whitespace"
[1] "++ Saving subset ==> /mnt/rreal/RDS/acarrasco/ANALYSES_WORKSPACE/EARLY_PD/POST_GWAS/ECHOLOCATOR/results/GWAS/Nalls23andMe_2019/MEX3C/MEX3C_Nalls23andMe_2019_subset.tsv.gz"
[1] "LD:: Using 1000Genomes as LD reference panel."
[1] "LD Reference Panel = 1KGphase1"
[1] "+ LD:: Querying 1KG remote server."
[1] "+ CONDA:: Identified tabix executable in echoR env."
[1] "LD:: Querying VCF subset"
[1] "/home/acarrasco/.conda/envs/echoR/bin/tabix -fh -p vcf ftp://ftp-trace.ncbi.nih.gov/1000genomes/ftp/release/20110521//ALL.chr18.phase1_release_v3.20101123.snps_indels_svs.genotypes.vcf.gz 18:48673708-48692728 > /mnt/rreal/RDS/acarrasco/ANALYSES_WORKSPACE/EARLY_PD/POST_GWAS/ECHOLOCATOR/results/GWAS/Nalls23andMe_2019/MEX3C/LD/MEX3C.1KGphase1.vcf"
Segmentation fault (core dumped)
[1] "LD:BCFTOOLS:: Compressing vcf file..."
[1] "LD:TABIX:: Re-indexing vcf.gz..."
[1] "LD:BCFTOOLS:: Subsetting vcf to only include EUR individuals ( 378 / 1091 )."
Failed to read from /mnt/rreal/RDS/acarrasco/ANALYSES_WORKSPACE/EARLY_PD/POST_GWAS/ECHOLOCATOR/results/GWAS/Nalls23andMe_2019/MEX3C/LD/MEX3C.1KGphase1.vcf.gz: unknown file type
[1] "LD:PLINK:: Converting vcf.gz to .bed/.bim/.fam"
PLINK v1.90b6.9 64-bit (4 Mar 2019) www.cog-genomics.org/plink/1.9/
(C) 2005-2019 Shaun Purcell, Christopher Chang GNU General Public License v3
Logging to /mnt/rreal/RDS/acarrasco/ANALYSES_WORKSPACE/EARLY_PD/POST_GWAS/ECHOLOCATOR/results/GWAS/Nalls23andMe_2019/MEX3C/LD/plink.log.
Options in effect:
--out /mnt/rreal/RDS/acarrasco/ANALYSES_WORKSPACE/EARLY_PD/POST_GWAS/ECHOLOCATOR/results/GWAS/Nalls23andMe_2019/MEX3C/LD/plink
--vcf /mnt/rreal/RDS/acarrasco/ANALYSES_WORKSPACE/EARLY_PD/POST_GWAS/ECHOLOCATOR/results/GWAS/Nalls23andMe_2019/MEX3C/LD/MEX3C.1KGphase1.vcf.gz
257891 MB RAM detected; reserving 128945 MB for main workspace.
Error: File read failure.
[1] "LD:snpStats:: Computing LD (stats = R)"
Error in data.table::fread(bim_path, col.names = c("CHR", "SNP", "V3", :
File '/mnt/rreal/RDS/acarrasco/ANALYSES_WORKSPACE/EARLY_PD/POST_GWAS/ECHOLOCATOR/results/GWAS/Nalls23andMe_2019/MEX3C/LD/plink.bim' does not exist or is non-readable. getwd()=='/mnt/rreal/RDS/acarrasco/ANALYSES_WORKSPACE/EARLY_PD/POST_GWAS/ECHOLOCATOR'
Fine-mapping complete in:
Time difference of 8.7 secs
Error in `[.data.table`(x, r, vars, with = FALSE) :
column(s) not found: SNP
Then, we see how the loci specific LD file is empty, leading to the segmentation fault error
acarrasco@kronos:/mnt/rreal/RDS/acarrasco/ANALYSES_WORKSPACE/EARLY_PD/POST_GWAS/ECHOLOCATOR/results/GWAS/Nalls23andMe_2019/BST1/LD$ gunzip BST1.1KGphase1.vcf.gz
acarrasco@kronos:/mnt/rreal/RDS/acarrasco/ANALYSES_WORKSPACE/EARLY_PD/POST_GWAS/ECHOLOCATOR/results/GWAS/Nalls23andMe_2019/BST1/LD$ cat BST1.1KGphase1.vcf
Same output when I tried to use the phase 3 from 1000G Panel
Conclussion
I believe there is an issue accessing the 1000Genome Ref Panel. When I changed the reference panel to UKB, the finemap_loci() function worked