title: "Plotting coverage in sub windows"
```{r setup, echo=FALSE, message=FALSE}
```{r, echo=FALSE, eval=FALSE}
metaByBioSample = readr::read_tsv("../meta/metadata/metaByBioSample.tab.txt") %>%
mutate(country = gsub("South East Asia - East", "Cambodia", country))
metaByBioSample_Ethiopia = metaByBioSample %>% filter(country == "Ethiopia")
allSel = readr::read_tsv("/Users/nick/Dropbox (Personal)/ownCloud/documents/plasmodium/falciparum/pfepipanels/Pf_Epi_Panels/data/MAD4HATTER/data/pf/reports/slim_allSelectedClustersInfo.tab.txt.gz")
allSel_xx = allSel %>%
filter(grepl("^X",s_Sample) & s_Sample %in% metaByBioSample_Ethiopia$sample)
allSel_xx_prep = HaplotypeRainbows::prepForRainbow(allSel_xx)
allSel = readr::read_tsv("/Users/nick/Dropbox (Personal)/ownCloud/documents/plasmodium/falciparum/pfepipanels/Pf_Epi_Panels/data/PfSMART/data/pf/reports/slim_allSelectedClustersInfo.tab.txt.gz")
allSel_xx = allSel %>%
filter(grepl("^X",s_Sample) & s_Sample %in% metaByBioSample_Ethiopia$sample)
allSel_xx_prep = HaplotypeRainbows::prepForRainbow(allSel_xx, minPopSize = 1)
allSel = readr::read_tsv("/Users/nick/Dropbox (Personal)/ownCloud/documents/plasmodium/falciparum/pfepipanels/Pf_Epi_Panels/data/heome1/data/pf/reports/slim_allSelectedClustersInfo.tab.txt.gz")
allSel_withDeletions = allSel %>%
filter(s_Sample %in% previousDeletionCalls$BiologicalSample)
allSel_withDeletions_prep = HaplotypeRainbows::prepForRainbow(allSel_withDeletions, minPopSize = 2)
pdf("heome1_rainbow.pdf", width = 20, height = 25, useDingbats = F)
allSel_withDeletions_prep_outForMoire = allSel_withDeletions_prep %>%
select(s_Sample, p_name, h_popUID) %>%
rename(sample_id = s_Sample,
locus = p_name,
allele = h_popUID)
write_tsv(allSel_withDeletions_prep_outForMoire, "~/Downloads/allSel_withDeletions_prep_outForMoire.tsv")
meta = readr:: read_tsv ("../meta/metadata/meta.tab.txt" ) %>%
mutate (country = gsub ("South East Asia - East" , "Cambodia" , country))
metaByBioSample = readr:: read_tsv ("../meta/metadata/metaByBioSample.tab.txt" ) %>%
mutate (country = gsub ("South East Asia - East" , "Cambodia" , country))
# coiCalls = readr::read_tsv("MAD4HATTER_COI_calls.tab.txt")
# #coiCalls = readr::read_tsv("PfSMART_COI_calls.tab.txt")
# coiCalls = readr::read_tsv("heome1_COI_calls.tab.txt")
# coiCalls_poly = coiCalls %>%
# filter(COI > 1)
previousDeletionCalls = readr:: read_tsv ("allMeta_HRP2_HRP3_deletionCalls.tab.txt" ) %>%
#filter(country %!in% c("Bangladesh", "Mauritania", "Myanmar", "The Gambia")) %>%
filter (! ((grepl ("SPT" , sample) & possiblyChr11Deleted))) %>%
#filter(BiologicalSample %!in% coiCalls_poly$sample) %>%
mutate (country = gsub ("South East Asia - East" , "Cambodia" , country)) %>%
mutate (hrpCall = case_when (
possiblyHRP2Deleted & possiblyHRP3Deleted ~ "pfhrp2-/pfhrp3-" ,
possiblyHRP2Deleted & ! possiblyHRP3Deleted ~ "pfhrp2-/pfhrp3+" ,
! possiblyHRP2Deleted & possiblyHRP3Deleted ~ "pfhrp2+/pfhrp3-" ,
T ~ "pfhrp2+/pfhrp3+"
# %>%
# left_join(coiCalls %>%
# rename(BiologicalSample = sample))
previousDeletionCalls_hrp2_del = previousDeletionCalls %>%
filter (possiblyHRP2Deleted)
previousDeletionCalls_hrp3_del = previousDeletionCalls %>%
filter (possiblyHRP3Deleted)
previousDeletionCalls_chr11_del = previousDeletionCalls %>%
filter (possiblyChr11Deleted)
masterTable = readr:: read_tsv ("../meta/metadata/masterTable.tab.txt" ) %>% filter (! is.na (SRARuns))
cov = readr:: read_tsv ("../meta/allCov_summaryStats.tab.txt.gz" )
#cov = readr::read_tsv("../../../../allSRAData/reProcess_2021_11_19/coverage/data/allCov_summaryStats.tab.txt.gz")
meta = meta%>%
left_join (previousDeletionCalls)%>%
mutate (isolate = ifelse (IsFieldSample, "Field" , BiologicalSample))
inputFnp = "data/finalHRPII_HRPIII_windows_withTunedSubWindows/popClustering/reports/allBasicInfo.tab.txt.gz"
outputFnp = "finalHRPII_HRPIII_windows_withTunedSubWindows_allBasicInfo.Rdata"
if (! file.exists (outputFnp) | file.info (inputFnp)$ ctime > file.info (outputFnp)$ ctime){
allBasicInfo = readr:: read_tsv (inputFnp)%>%
mutate (genomicID = paste0 (` #chrom ` , "-" , start, "-" , end))
save (allBasicInfo, file = outputFnp)
}else {
load (outputFnp)
homologousRegion = readr:: read_tsv ("../rRNA_segmental_duplications/sharedBetween11_and_13/investigatingChrom11Chrom13/Pf3D7_13_v3-2792021-2807295-for--Pf3D7_11_v3-1918028-1933288-for.bed" ,
col_names = F)
allBasicInfo_filt = allBasicInfo %>%
filter (sample %fin% previousDeletionCalls$ sample) %>%
# filter(name %!in% Chrom13RegionsToRemove) %>%
#mutate(inGene = !is.na(extraField0)) %>%
mutate (inGene = extraField0 != "[extraField0=NA]" ) %>%
left_join (cov) %>%
mutate (medianCov = median (perBaseCoverage),
meanCov = mean (perBaseCoverage)) %>%
mutate (perBaseCoverageNorm = ifelse (inGene, perBaseCoverage/ medianPerBaseCov_inGenes, perBaseCoverage/ medianPerBaseCov_notInGenes)) %>%
mutate (perBaseCoverageNormRounded = ifelse (perBaseCoverageNorm > 0.10 & perBaseCoverageNorm <= 1 , 1 , perBaseCoverageNorm)) %>%
mutate (perBaseCoverageNormRounded = round (perBaseCoverageNormRounded))
# mutate(perBaseCoverageNormRounded = ifelse(perBaseCoverageNorm >0.10 & perBaseCoverageNorm < 0.5, 0.5, perBaseCoverageNorm)) %>%
# mutate(perBaseCoverageNormRounded = round(perBaseCoverageNormRounded/0.5)*0.5)
regions = allBasicInfo_filt %>%
filter (.$ sample[1 ] == sample) %>%
select (1 : 6 , extraField0) %>%
unique () %>%
mutate (genomicID = paste0 (` #chrom ` , "-" , start, "-" , end))
regions = regions %>%
mutate (id = paste0 (` #chrom ` , "-" , start, "-" , end)) %>%
arrange (id) %>%
#mutate(inGene = !is.na(extraField0)) %>%
mutate (inGene = extraField0 != "[extraField0=NA]" ) %>%
mutate (geneType = ifelse (
grepl ("histidine-rich protein II" , regions$ extraField0),
"hrp" ,
)) %>%
mutate (geneType = ifelse (grepl ("ribosomal RNA" , regions$ extraField0), "rRNA" , geneType)) %>%
mutate (geneType = ifelse (grepl ("332" , regions$ extraField0), "Pf332" , geneType)) %>%
mutate (homologousRegion = ifelse ((` #chrom ` == "Pf3D7_11_v3" &
start >= 1918028 &
end <= 1933288 ) |
` #chrom ` == "Pf3D7_13_v3" &
start >= 2792021 &
end <= 2807295 ,
"shared" ,
)) %>%
mutate (afterHomologousRegion = (` #chrom ` == "Pf3D7_11_v3" &
start >= 1933288 ) |
(` #chrom ` == "Pf3D7_13_v3" &
start >= 2807295 )) %>%
mutate (genomicRegion = case_when (
"rRNA" == geneType ~ "rRNA" ,
"hrp" == geneType ~ "hrp" ,
"Pf332" == geneType ~ "Pf332" ,
afterHomologousRegion ~ "After Duplicated Region" ,
"shared" == homologousRegion ~ "Duplicated Region" ,
T ~ "other"
)) %>%
mutate (chrom = ` #chrom ` )
## further filtering
endRegions_08 = c ("Pf3D7_08_v3-1375207-1375341__var-0" ,"Pf3D7_08_v3-1375210-1375410__var-0" ,"Pf3D7_08_v3-1375185-1375485__var-1" ,"Pf3D7_08_v3-1375557-1375750__subseq-0" ,
"Pf3D7_08_v3-1378006-1378662__var-0" ,"Pf3D7_08_v3-1378006-1378662__var-1" ,"Pf3D7_08_v3-1378006-1378662__var-2" ,"Pf3D7_08_v3-1378006-1378662__var-3" ,
"Pf3D7_08_v3-1378006-1378662__var-4" ,"Pf3D7_08_v3-1379154-1379915__var-0" ,"Pf3D7_08_v3-1379154-1379915__var-1" ,"Pf3D7_08_v3-1379154-1379915__var-2" ,
"Pf3D7_08_v3-1379154-1379915__var-3" ,"Pf3D7_08_v3-1379154-1379915__var-4" ,"Pf3D7_08_v3-1379154-1379915__var-5" ,"Pf3D7_08_v3-1379154-1379915__var-6" ,
"Pf3D7_08_v3-1379154-1379915__var-7" ,"Pf3D7_08_v3-1379154-1379915__var-8" ,"Pf3D7_08_v3-1380194-1380328__var-0" ,"Pf3D7_08_v3-1382255-1382505__var-0" ,
"Pf3D7_08_v3-1382255-1382505__var-1" ,"Pf3D7_08_v3-1382255-1382505__var-2" ,"Pf3D7_08_v3-1382680-1383155__var-0" ,"Pf3D7_08_v3-1382680-1383155__var-1" ,
"Pf3D7_08_v3-1382680-1383155__var-2" ,"Pf3D7_08_v3-1382680-1383155__var-3" ,"Pf3D7_08_v3-1382680-1383155__var-4" ,"Pf3D7_08_v3-1382680-1383155__var-5" ,
"Pf3D7_08_v3-1382680-1383155__var-6" ,"Pf3D7_08_v3-1382680-1383155__var-7" ,"Pf3D7_08_v3-1384030-1384251__var-0" ,"Pf3D7_08_v3-1384030-1384251__var-1" ,
"Pf3D7_08_v3-1384030-1384251__var-2" ,"Pf3D7_08_v3-1384316-1384663__var-0" ,"Pf3D7_08_v3-1384316-1384663__var-1" ,"Pf3D7_08_v3-1384316-1384663__var-2" ,
"Pf3D7_08_v3-1384316-1384663__var-3" ,"Pf3D7_08_v3-1384316-1384663__var-4" ,"Pf3D7_08_v3-1384837-1385370__var-0" ,"Pf3D7_08_v3-1384837-1385370__var-1" ,
"Pf3D7_08_v3-1384837-1385370__var-2" ,"Pf3D7_08_v3-1384837-1385370__var-3" ,"Pf3D7_08_v3-1384837-1385370__var-4" ,"Pf3D7_08_v3-1384837-1385370__var-5" ,
"Pf3D7_08_v3-1384837-1385370__var-6" ,"Pf3D7_08_v3-1385625-1385741__var-0" ,"Pf3D7_08_v3-1385951-1386323__var-0" ,"Pf3D7_08_v3-1385951-1386323__var-1" ,
"Pf3D7_08_v3-1385951-1386323__var-2" ,"Pf3D7_08_v3-1385951-1386323__var-3" ,"Pf3D7_08_v3-1385951-1386323__var-4" ,"Pf3D7_08_v3-1385951-1386323__var-5" ,
"Pf3D7_08_v3-1386518-1386680__var-0" ,"Pf3D7_08_v3-1386518-1386680__var-1" ,"Pf3D7_08_v3-1386518-1386680__var-2" ,"Pf3D7_08_v3-1386518-1386680__var-3" ,
"Pf3D7_08_v3-1386739-1387414__var-00" ,"Pf3D7_08_v3-1386739-1387414__var-01" ,"Pf3D7_08_v3-1386739-1387414__var-02" ,"Pf3D7_08_v3-1386739-1387414__var-03" ,
"Pf3D7_08_v3-1386739-1387414__var-04" ,"Pf3D7_08_v3-1386739-1387414__var-05" ,"Pf3D7_08_v3-1386739-1387414__var-06" ,"Pf3D7_08_v3-1386739-1387414__var-07" ,
"Pf3D7_08_v3-1386739-1387414__var-08" ,"Pf3D7_08_v3-1386739-1387414__var-09" ,"Pf3D7_08_v3-1386739-1387414__var-10" ,"Pf3D7_08_v3-1386739-1387414__var-11" ,
"Pf3D7_08_v3-1386739-1387414__var-12" ,"Pf3D7_08_v3-1386739-1387414__var-13" ,"Pf3D7_08_v3-1387782-1387982__var-0" ,"Pf3D7_08_v3-1387782-1387982__var-1" ,
"Pf3D7_08_v3-1387782-1387982__var-2" ,"Pf3D7_08_v3-1387782-1387982__var-3" )
endRegions_11 = c ("Pf3D7_11_v3-1991347-1992851__var-00" ,"Pf3D7_11_v3-1991347-1992851__var-01" ,"Pf3D7_11_v3-1991347-1992851__var-02" ,"Pf3D7_11_v3-1991347-1992851__var-03" ,
"Pf3D7_11_v3-1991347-1992851__var-04" ,"Pf3D7_11_v3-1991347-1992851__var-05" ,"Pf3D7_11_v3-1991347-1992851__var-06" ,"Pf3D7_11_v3-1991347-1992851__var-07" ,
"Pf3D7_11_v3-1991347-1992851__var-08" ,"Pf3D7_11_v3-1991347-1992851__var-09" ,"Pf3D7_11_v3-1991347-1992851__var-10" ,"Pf3D7_11_v3-1991347-1992851__var-11" ,
"Pf3D7_11_v3-1991347-1992851__var-12" ,"Pf3D7_11_v3-1991347-1992851__var-13" ,"Pf3D7_11_v3-1991347-1992851__var-14" ,"Pf3D7_11_v3-1991347-1992851__var-15" ,
"Pf3D7_11_v3-1991347-1992851__var-16" ,"Pf3D7_11_v3-1991347-1992851__var-17" ,"Pf3D7_11_v3-1991347-1992851__var-18" ,"Pf3D7_11_v3-1992907-1993669__var-00" ,
"Pf3D7_11_v3-1992907-1993669__var-01" ,"Pf3D7_11_v3-1992907-1993669__var-02" ,"Pf3D7_11_v3-1992907-1993669__var-03" ,"Pf3D7_11_v3-1992907-1993669__var-04" ,
"Pf3D7_11_v3-1992907-1993669__var-05" ,"Pf3D7_11_v3-1992907-1993669__var-06" ,"Pf3D7_11_v3-1992907-1993669__var-07" ,"Pf3D7_11_v3-1992907-1993669__var-08" ,
"Pf3D7_11_v3-1992907-1993669__var-09" ,"Pf3D7_11_v3-1993833-1994061__var-0" ,"Pf3D7_11_v3-1993833-1994061__var-1" ,"Pf3D7_11_v3-1993833-1994061__var-2" ,
"Pf3D7_11_v3-1993833-1994061__var-3" ,"Pf3D7_11_v3-1994139-1994363__var-0" ,"Pf3D7_11_v3-1994139-1994363__var-1" ,"Pf3D7_11_v3-1994139-1994363__var-2" ,
"Pf3D7_11_v3-1994139-1994363__var-3" ,"Pf3D7_11_v3-1995234-1995394__var-0" ,"Pf3D7_11_v3-1995234-1995394__var-1" ,"Pf3D7_11_v3-1995459-1995614__var-0" ,
"Pf3D7_11_v3-1995459-1995614__var-1" ,"Pf3D7_11_v3-1995459-1995614__var-2" ,"Pf3D7_11_v3-1995678-1996112__var-00" ,"Pf3D7_11_v3-1995678-1996112__var-01" ,
"Pf3D7_11_v3-1995678-1996112__var-02" ,"Pf3D7_11_v3-1995678-1996112__var-03" ,"Pf3D7_11_v3-1995678-1996112__var-04" ,"Pf3D7_11_v3-1995678-1996112__var-05" ,
"Pf3D7_11_v3-1995678-1996112__var-06" ,"Pf3D7_11_v3-1995678-1996112__var-07" ,"Pf3D7_11_v3-1995678-1996112__var-08" ,"Pf3D7_11_v3-1995678-1996112__var-09" ,
"Pf3D7_11_v3-1996349-1996650__var-0" ,"Pf3D7_11_v3-1996349-1996650__var-1" ,"Pf3D7_11_v3-1996745-1997019__var-0" ,"Pf3D7_11_v3-1996745-1997019__var-1" ,
"Pf3D7_11_v3-1996745-1997019__var-2" ,"Pf3D7_11_v3-1996745-1997019__var-3" ,"Pf3D7_11_v3-1996745-1997019__var-4" ,"Pf3D7_11_v3-1997095-1997221__var-0" ,
"Pf3D7_11_v3-1997095-1997221__var-1" ,"Pf3D7_11_v3-1997095-1997221__var-2" ,"Pf3D7_11_v3-1997095-1997221__var-3" ,"Pf3D7_11_v3-1997276-1997425__var-0" ,
"Pf3D7_11_v3-1997276-1997425__var-1" ,"Pf3D7_11_v3-1997276-1997425__var-2" ,"Pf3D7_11_v3-1997276-1997425__var-3" ,"Pf3D7_11_v3-1997674-1997884__var-0" ,
"Pf3D7_11_v3-1997674-1997884__var-1" ,"Pf3D7_11_v3-1997944-1998371__var-0" ,"Pf3D7_11_v3-1997944-1998371__var-1" ,"Pf3D7_11_v3-1997944-1998371__var-2" ,
"Pf3D7_11_v3-1997944-1998371__var-3" ,"Pf3D7_11_v3-1997944-1998371__var-4" ,"Pf3D7_11_v3-1997944-1998371__var-5" ,"Pf3D7_11_v3-1997944-1998371__var-6" ,
"Pf3D7_11_v3-1997944-1998371__var-7" ,"Pf3D7_11_v3-1998638-1998759__var-0" ,"Pf3D7_11_v3-1998833-1999151__var-0" ,"Pf3D7_11_v3-1998833-1999151__var-1" ,
"Pf3D7_11_v3-1998833-1999151__var-2" ,"Pf3D7_11_v3-1998833-1999151__var-3" ,"Pf3D7_11_v3-1999245-1999390__var-0" ,"Pf3D7_11_v3-1999600-1999713__var-0" ,
"Pf3D7_11_v3-1999600-1999713__var-1" ,"Pf3D7_11_v3-2000687-2000889__var-0" ,"Pf3D7_11_v3-2000687-2000889__var-1" ,"Pf3D7_11_v3-2000687-2000889__var-2" ,
"Pf3D7_11_v3-2000687-2000889__var-3" ,"Pf3D7_11_v3-2000687-2000889__var-4" ,"Pf3D7_11_v3-2000687-2000889__var-5" ,"Pf3D7_11_v3-2000941-2001239__var-0" ,
"Pf3D7_11_v3-2000941-2001239__var-1" ,"Pf3D7_11_v3-2000941-2001239__var-2" ,"Pf3D7_11_v3-2000941-2001239__var-3" ,"Pf3D7_11_v3-2000941-2001239__var-4" ,
"Pf3D7_11_v3-2000941-2001239__var-5" ,"Pf3D7_11_v3-2001300-2003328__var-00" ,"Pf3D7_11_v3-2001300-2003328__var-01" ,"Pf3D7_11_v3-2001300-2003328__var-02" ,
"Pf3D7_11_v3-2001300-2003328__var-03" ,"Pf3D7_11_v3-2001300-2003328__var-04" ,"Pf3D7_11_v3-2001300-2003328__var-05" ,"Pf3D7_11_v3-2001300-2003328__var-06" ,
"Pf3D7_11_v3-2001300-2003328__var-07" ,"Pf3D7_11_v3-2001300-2003328__var-08" ,"Pf3D7_11_v3-2001300-2003328__var-09" ,"Pf3D7_11_v3-2001300-2003328__var-10" ,
"Pf3D7_11_v3-2001300-2003328__var-11" ,"Pf3D7_11_v3-2001300-2003328__var-12" ,"Pf3D7_11_v3-2001300-2003328__var-13" ,"Pf3D7_11_v3-2001300-2003328__var-14" ,
"Pf3D7_11_v3-2001300-2003328__var-15" ,"Pf3D7_11_v3-2001300-2003328__var-16" ,"Pf3D7_11_v3-2001300-2003328__var-17" ,"Pf3D7_11_v3-2001300-2003328__var-18" ,
"Pf3D7_11_v3-2001300-2003328__var-19" ,"Pf3D7_11_v3-2001300-2003328__var-20" ,"Pf3D7_11_v3-2001300-2003328__var-21" ,"Pf3D7_11_v3-2001300-2003328__var-22" ,
"Pf3D7_11_v3-2001300-2003328__var-23" ,"Pf3D7_11_v3-2001300-2003328__var-24" ,"Pf3D7_11_v3-2001300-2003328__var-25" ,"Pf3D7_11_v3-2001300-2003328__var-26" )
endRegions_13 = c ("Pf3D7_13_v3-2841776-2841926__subseq-0" ,"Pf3D7_13_v3-2842001-2842301__var-0" ,"Pf3D7_13_v3-2842076-2842151__var-0" ,"Pf3D7_13_v3-2842076-2842176__var-0" ,
"Pf3D7_13_v3-2842101-2842251__var-0" ,"Pf3D7_13_v3-2842001-2842301__var-1" ,"Pf3D7_13_v3-2842151-2842226__var-0" ,"Pf3D7_13_v3-2842226-2842301__var-0" ,
"Pf3D7_13_v3-2842001-2842301__var-2" ,"Pf3D7_13_v3-2842251-2842351__var-0" ,"Pf3D7_13_v3-2842301-2842376__var-0" ,"Pf3D7_13_v3-2842251-2842401__var-0" ,
"Pf3D7_13_v3-2842251-2842451__var-0" ,"Pf3D7_13_v3-2842426-2842501__var-0" ,"Pf3D7_13_v3-2842476-2842688__var-0" ,"Pf3D7_13_v3-2842476-2842688__var-1" ,
"Pf3D7_13_v3-2842476-2842688__var-2" ,"Pf3D7_13_v3-2842515-2842804__var-2" ,"Pf3D7_13_v3-2843108-2843446__var-0" ,"Pf3D7_13_v3-2843108-2843446__var-1" ,
"Pf3D7_13_v3-2843108-2843446__var-2" ,"Pf3D7_13_v3-2843108-2843446__var-3" ,"Pf3D7_13_v3-2843108-2843446__var-4" ,"Pf3D7_13_v3-2843108-2843446__var-5" ,
"Pf3D7_13_v3-2843641-2843863__var-0" ,"Pf3D7_13_v3-2843641-2843863__var-1" ,"Pf3D7_13_v3-2843930-2844101__var-0" ,"Pf3D7_13_v3-2843930-2844101__var-1" ,
"Pf3D7_13_v3-2844247-2844785__var-0" ,"Pf3D7_13_v3-2844247-2844785__var-1" ,"Pf3D7_13_v3-2844247-2844785__var-2" ,"Pf3D7_13_v3-2844247-2844785__var-3" ,
"Pf3D7_13_v3-2844247-2844785__var-4" ,"Pf3D7_13_v3-2844247-2844785__var-5" )
hold = readr:: read_tsv (
"/Users/nick/Dropbox (Personal)/ownCloud/documents/plasmodium/falciparum/hrps/windowSelectionSurroundingHRPs/Windows_Surrounding_HRP2_3_deletions/windowAnalysis/allMeta_HRP2_HRP3_deletionCalls.tab.txt"
#### filter ends chr08 -begin
allBasicInfo_filt_endsNoCovSel_08 = allBasicInfo_filt %>%
filter (` #chrom ` == "Pf3D7_08_v3" ) %>%
filter (
sample %in% previousDeletionCalls_hrp2_del$ sample
) %>%
group_by (sample, name) %>%
mutate (marker = ifelse (perBaseCoverageNormRounded == 0 , 1 , 0 )) %>%
filter (name %in% endRegions_08) %>%
group_by (sample, ` #chrom ` ) %>%
summarise (noCov = sum (marker)) %>%
mutate (noCovPerc = noCov/ length (endRegions_08))
allBasicInfo_filt_endsNoCovSel_08_filt = allBasicInfo_filt_endsNoCovSel_08 %>%
#filter(noCov == length(endRegions_08))
filter (noCovPerc >= 0.95 )
allBasicInfo_filt_endsNoSuccessSel_08 = allBasicInfo_filt %>%
filter (` #chrom ` == "Pf3D7_08_v3" ) %>%
filter (
sample %in% previousDeletionCalls_hrp2_del$ sample
) %>%
group_by (sample, name) %>%
mutate (marker = ifelse (readTotal > 10 & success, 0 , 1 )) %>%
filter (name %in% endRegions_08) %>%
group_by (sample, ` #chrom ` ) %>%
summarise (noCov = sum (marker))%>%
mutate (noCovPerc = noCov/ length (endRegions_08))
allBasicInfo_filt_endsNoSuccessSel_08_filt = allBasicInfo_filt_endsNoSuccessSel_08 %>%
#filter(noCov == length(endRegions_08))
filter (noCovPerc >= 0.95 )
samples_chr08_noCovNoSucEnds = intersect (
sort (
allBasicInfo_filt_endsNoCovSel_08_filt$ sample
sort (
allBasicInfo_filt_endsNoSuccessSel_08_filt$ sample
#### filter ends chr08 -end
#### filter ends chr13 -begin
allBasicInfo_filt_endsNoCovSel_13 = allBasicInfo_filt %>%
filter (` #chrom ` == "Pf3D7_13_v3" ) %>%
filter (
sample %in% previousDeletionCalls_hrp3_del$ sample
) %>%
group_by (sample, name) %>%
mutate (marker = ifelse (perBaseCoverageNormRounded == 0 , 1 , 0 )) %>%
filter (name %in% endRegions_13) %>%
group_by (sample, ` #chrom ` ) %>%
summarise (noCov = sum (marker)) %>%
mutate (noCovPerc = noCov/ length (endRegions_13))
allBasicInfo_filt_endsNoCovSel_13_filt = allBasicInfo_filt_endsNoCovSel_13 %>%
#filter(noCov == length(endRegions_13))
filter (noCovPerc >= 0.95 )
allBasicInfo_filt_endsNoSuccessSel_13 = allBasicInfo_filt %>%
filter (` #chrom ` == "Pf3D7_13_v3" ) %>%
filter (
sample %in% previousDeletionCalls_hrp3_del$ sample
) %>%
group_by (sample, name) %>%
mutate (marker = ifelse (readTotal > 10 & success, 0 , 1 )) %>%
filter (name %in% endRegions_13) %>%
group_by (sample, ` #chrom ` ) %>%
summarise (noCov = sum (marker))%>%
mutate (noCovPerc = noCov/ length (endRegions_13))
allBasicInfo_filt_endsNoSuccessSel_13_filt = allBasicInfo_filt_endsNoSuccessSel_13 %>%
#filter(noCov == length(endRegions_13))
filter (noCovPerc >= 0.95 )
samples_chr13_noCovNoSucEnds = intersect (
sort (
allBasicInfo_filt_endsNoCovSel_13_filt$ sample
sort (
allBasicInfo_filt_endsNoSuccessSel_13_filt$ sample
#### filter ends chr13 -end
#### filter ends chr11 -begin
allBasicInfo_filt_endsNoCovSel_11 = allBasicInfo_filt %>%
filter (` #chrom ` == "Pf3D7_11_v3" ) %>%
filter (
sample %in% previousDeletionCalls_chr11_del$ sample
) %>%
group_by (sample, name) %>%
mutate (marker = ifelse (perBaseCoverageNormRounded == 0 , 1 , 0 )) %>%
filter (name %in% endRegions_11) %>%
group_by (sample, ` #chrom ` ) %>%
summarise (noCov = sum (marker)) %>%
mutate (noCovPerc = noCov/ length (endRegions_11))
allBasicInfo_filt_endsNoCovSel_11_filt = allBasicInfo_filt_endsNoCovSel_11 %>%
#filter(noCov == length(endRegions_11))
filter (noCovPerc >= 0.95 )
allBasicInfo_filt_endsNoSuccessSel_11 = allBasicInfo_filt %>%
filter (` #chrom ` == "Pf3D7_11_v3" ) %>%
filter (
sample %in% previousDeletionCalls_chr11_del$ sample
) %>%
group_by (sample, name) %>%
mutate (marker = ifelse (readTotal > 10 & success, 0 , 1 )) %>%
filter (name %in% endRegions_11) %>%
group_by (sample, ` #chrom ` ) %>%
summarise (noCov = sum (marker))%>%
mutate (noCovPerc = noCov/ length (endRegions_11))
allBasicInfo_filt_endsNoSuccessSel_11_filt = allBasicInfo_filt_endsNoSuccessSel_11 %>%
#filter(noCov == length(endRegions_11))
filter (noCovPerc >= 0.95 )
samples_chr11_noCovNoSucEnds = intersect (
sort (
allBasicInfo_filt_endsNoCovSel_11_filt$ sample
sort (
allBasicInfo_filt_endsNoSuccessSel_11_filt$ sample
#### filter ends chr11 -end
samplesToKeep = c (
) %>%
unique ()
samplesToRemove = allBasicInfo_filt %>%
select (sample) %>%
unique () %>%
filter (sample %!in% samplesToKeep)
previousDeletionCalls = readr:: read_tsv ("initial_allMeta_HRP2_HRP3_deletionCalls.tab.txt" ) %>%
filter (sample %in% samplesToKeep) %>%
#filter(country %!in% c("Bangladesh", "Mauritania", "Myanmar", "The Gambia")) %>%
filter (! ((grepl ("SPT" , sample) & possiblyChr11Deleted))) %>%
#filter(BiologicalSample %!in% coiCalls_poly$sample) %>%
mutate (country = gsub ("South East Asia - East" , "Cambodia" , country)) %>%
mutate (hrpCall = case_when (
possiblyHRP2Deleted & possiblyHRP3Deleted ~ "pfhrp2-/pfhrp3-" ,
possiblyHRP2Deleted & ! possiblyHRP3Deleted ~ "pfhrp2-/pfhrp3+" ,
! possiblyHRP2Deleted & possiblyHRP3Deleted ~ "pfhrp2+/pfhrp3-" ,
T ~ "pfhrp2+/pfhrp3+"
write_tsv (previousDeletionCalls, "allMeta_HRP2_HRP3_deletionCalls.tab.txt" )
previousDeletionCalls_hrp2_del = previousDeletionCalls %>%
filter (possiblyHRP2Deleted)
previousDeletionCalls_hrp3_del = previousDeletionCalls %>%
filter (possiblyHRP3Deleted)
previousDeletionCalls_chr11_del = previousDeletionCalls %>%
filter (possiblyChr11Deleted)
allBasicInfo_filt = allBasicInfo_filt %>%
filter (sample %in% samplesToKeep)
allBasicInfo_filt_sp = allBasicInfo_filt %>%
group_by () %>%
select (sample, genomicID, perBaseCoverageNormRounded) %>%
spread (genomicID, perBaseCoverageNormRounded)
allBasicInfo_filt_sp_mat = as.matrix (allBasicInfo_filt_sp[,2 : ncol (allBasicInfo_filt_sp)])
rownames (allBasicInfo_filt_sp_mat) = allBasicInfo_filt_sp$ sample
allBasicInfo_filt_sp_mat[allBasicInfo_filt_sp_mat > 2 ] = 2
regions = regions[match (colnames (allBasicInfo_filt_sp_mat), regions$ genomicID),]
metaSelected = meta[match (allBasicInfo_filt_sp$ sample, meta$ sample), ]
metaSelected_hrp2_deleted = metaSelected %>% filter (possiblyHRP2Deleted)
metaSelected_hrp3_deleted = metaSelected %>% filter (possiblyHRP3Deleted)
metaSelected_hrp2_and_hrp3_deleted = metaSelected %>% filter (possiblyHRP2Deleted, possiblyHRP3Deleted)
regions_key = regions %>%
select (name, genomicID)
## Writing out deletion calls
Processing meta for deletions calls.
allBasicInfo_filt_counting = allBasicInfo_filt %>%
left_join (regions)
allBasicInfo_filt_counting_sum = allBasicInfo_filt_counting %>%
group_by (sample, ` #chrom ` , afterHomologousRegion) %>%
mutate (marker = perBaseCoverageNormRounded > 0 ) %>%
summarise (markerSum = sum (marker),
n = n ()) %>%
mutate (markerFrac = markerSum/ n) %>%
filter (afterHomologousRegion)
allBasicInfo_filt_counting_sum_sp = allBasicInfo_filt_counting_sum %>%
group_by () %>%
select (sample, ` #chrom ` , markerFrac) %>%
spread (` #chrom ` , markerFrac) %>%
mutate ()
allBasicInfo_filt_counting_sum_sp_hrp3Deleted = allBasicInfo_filt_counting_sum_sp %>%
filter (sample %in% metaSelected_hrp3_deleted$ sample) %>%
mutate (HRP3_deletionPattern = ifelse (Pf3D7_13_v3 < 0.10 , "Pattern 1" , "Pattern 2" ))
previousDeletionCalls = previousDeletionCalls %>%
left_join (allBasicInfo_filt_counting_sum_sp_hrp3Deleted %>%
group_by () %>%
select (HRP3_deletionPattern, sample))
write_tsv (previousDeletionCalls %>%
#filter(hrpCall != "pfhrp2+/pfhrp3+") %>%
left_join (masterTable %>% select (sample, SRARuns)), file = "allMetaDeletionCalls.tab.txt" )
write_tsv (previousDeletionCalls %>%
#filter(hrpCall != "pfhrp2+/pfhrp3+") %>%
left_join (masterTable %>% select (sample, SRARuns)) %>%
filter ("Pattern 2" == HRP3_deletionPattern), file = "allMetaDeletionCalls_Hrp3pattern2.tab.txt" )
# counts per country
previousDeletionCalls_countryHRPCallSum = previousDeletionCalls %>%
filter (hrpCall != "pfhrp2+/pfhrp3+" ) %>%
filter (IsFieldSample) %>%
group_by (country, region, secondaryRegion, hrpCall) %>%
count ()
previousDeletionCalls_countryHRP3CallSum = previousDeletionCalls %>%
filter (! is.na (HRP3_deletionPattern)) %>%
filter (IsFieldSample) %>%
group_by (country, region, secondaryRegion, HRP3_deletionPattern) %>%
count ()
write_tsv (previousDeletionCalls_countryHRPCallSum, "HRP_deletion_calls_by_country.tab.txt" )
write_tsv (previousDeletionCalls_countryHRP3CallSum, "HRP3_deletionPattern_calls_by_country.tab.txt" )
# counts per region
previousDeletionCalls_regionHRPCallSum = previousDeletionCalls %>%
filter (hrpCall != "pfhrp2+/pfhrp3+" ) %>%
filter (IsFieldSample) %>%
group_by (region, secondaryRegion, hrpCall) %>%
count ()
previousDeletionCalls_regionHRP3CallSum = previousDeletionCalls %>%
filter (! is.na (HRP3_deletionPattern)) %>%
filter (IsFieldSample) %>%
group_by (region, secondaryRegion, HRP3_deletionPattern) %>%
count ()
write_tsv (previousDeletionCalls_regionHRPCallSum, "HRP_deletion_calls_by_region.tab.txt" )
write_tsv (previousDeletionCalls_regionHRP3CallSum, "HRP3_deletionPattern_calls_by_region.tab.txt" )
# counter per continent
previousDeletionCalls_continentHRPCallSum = previousDeletionCalls %>%
filter (hrpCall != "pfhrp2+/pfhrp3+" ) %>%
filter (IsFieldSample) %>%
group_by (secondaryRegion, hrpCall) %>%
count ()
previousDeletionCalls_continentHRP3CallSum = previousDeletionCalls %>%
filter (! is.na (HRP3_deletionPattern)) %>%
filter (IsFieldSample) %>%
group_by (secondaryRegion, HRP3_deletionPattern) %>%
count ()
write_tsv (previousDeletionCalls_continentHRPCallSum, "HRP_deletion_calls_by_continent.tab.txt" )
write_tsv (previousDeletionCalls_continentHRP3CallSum, "HRP3_deletionPattern_calls_by_continent.tab.txt" )
# total
previousDeletionCalls_HRPCallSum = previousDeletionCalls %>%
filter (hrpCall != "pfhrp2+/pfhrp3+" ) %>%
filter (IsFieldSample) %>%
group_by (hrpCall) %>%
count ()
previousDeletionCalls_HRP3CallSum = previousDeletionCalls %>%
filter (! is.na (HRP3_deletionPattern)) %>%
filter (IsFieldSample) %>%
group_by (HRP3_deletionPattern) %>%
count () %>%
ungroup () %>%
mutate (total = sum (n))
write_tsv (previousDeletionCalls_HRPCallSum, "HRP_deletion_calls_total.tab.txt" )
write_tsv (previousDeletionCalls_HRP3CallSum, "HRP3_deletionPattern_calls_total.tab.txt" )
