diff --git a/src/intersect_SNP.R b/src/intersect_SNP.R index 6d2649fefe071f4f0426229c6b688a33dcf21bfc..6cd39fb2904864a5a62398fd7720b76f4854071c 100755 --- a/src/intersect_SNP.R +++ b/src/intersect_SNP.R @@ -1,5 +1,6 @@ #!/usr/bin/Rscript library("tidyverse") +library("seqinr") args <- commandArgs(trailingOnly = TRUE) snp_a <- read_delim(args[1], delim = "\t") %>% @@ -12,8 +13,30 @@ only_b <- snp_b %>% select(cords) %>% setdiff(snp_a %>% select(cords)) %>% pull(cords) +snp <- snp_b %>% + filter(cords %in% only_b) -snp_b %>% - filter(cords %in% only_b) %>% +fastafile <- read.fasta(file = args[3], + as.string = TRUE) + +snp$seq_list <- snp %>% + apply(1, FUN = function(x, fastafile, POS, CHROM){ + begin <- as.integer(x[ POS ]) - 10 + end <- as.integer(x[ POS ]) + 10 + chrom <- x[ CHROM ] + print(paste0(chrom, ":", begin, "-", end)) + seq_restric <- fastafile[[ chrom ]] %>% + c2s() %>% + substr(begin, end) %>% + s2c() + seq_restric[11] <- toupper(seq_restric[11]) + return(seq_restric %>% c2s()) + }, + fastafile = fastafile, + POS = which(colnames(snp) %in% "POS"), + CHROM = which(colnames(snp) %in% "CHROM") + ) + +snp %>% write_csv(paste0("only_", args[2]))