Analysis Surrounding HRP2 and HRP3 deletions
  • Home
  • Final Manuscript
  • Window Analysis
    • Windows
    • Running Haplotype Reconstruction on Windows
    • Genomic Locations Of Final Windows

    • Window analysis by coverage
    • Processing Coverage Initial Windows
    • Processing Coverage on Sub Windows

    • Window analysis of deletion patterns
    • Telomere healing
    • Processing Samples with HRP2 Deletions TARE1
    • Processing Samples with Chr11 Deletions TARE1
    • Processing Samples for chr13 TARE1 presence
    • pfmdr1 duplication
    • Processing Samples with pfhrp3 13-5++ deletion pattern

    • Final Coverage Windows
    • Processing Coverage on Sub Windows - final

    • Window analysis by sequence/variation
    • Plotting haplotype variation within regions

    • Analysis by SNP variant analysis
    • Calling variants and Estimating COI
    • Plotting BiAllelic Variant Plots
  • HB3/SD01 Longreads analysis
    • Set up
    • Creating Hybrid genomes

    • Spanning Raw Reads analysis
    • Processing Spanning Reads
    • SD01 spanning specific

    • HB3
    • Processing chr11 and chr13
    • Final Process Assembly

    • SD01
    • Running SD01 assemblies
    • Processing SD01 assemblies

    • Both
    • Illumina against HB3/SD01 Assemblies
    • Comparison To 3D7 Simplified View
  • rRNA Segmental Duplications

    • Chr11/13 Duplicated Region
    • Characterizing Duplicated Region
  • Related Genomic Regions Vis
    • Analysis
    • Finding shared regions genome wide
    • Mapping out surrounding Genes on Assembled Strains

    • Misc
    • Plotting HRPs Tandem Repeats
    • Info on all rRNA
  • Comparing to related Plasmodiums
    • Comparing to all 6 Plasmodium Laverania
    • Comparing to all 6 Plasmodium Laverania Gene Arrangements chr05,07,08,11,13
    • Comparing to HRP2/3 falciparum sequences
  • References
    • Getting Raw Data References
    • References
    • R Session and Commandline tools info

Contents

  • Read in meta and genome whole coverage and Finding low coverage in HRP gene areas
  • Heatmap of coverage
  • by chrom
    • All
    • hrp2 deleted
    • hrp3 deleted
    • chr11 deleted
  • meta data counts

Processing Coverage

  • Show All Code
  • Hide All Code

  • View Source

Read in meta and genome whole coverage and Finding low coverage in HRP gene areas

Processing coverage of the initial windows to determine samples with possible deletions of either HRP2, HRP3 or the end of chr11.

Code
allMeta = readr::read_tsv("../meta/metadata/meta.tab.txt")
allMeta_filt  = allMeta %>% 
  filter(is.na(site) | site != "LabContaminated", 
        PreferredSample)

inputFnp = "data/finalHRPII_HRPIII_windows_withTuned/popClustering/reports/allBasicInfo.tab.txt.gz"
outputFnp = "initialCoverageDataObjects.Rdata"
#file.remove(outputFnp)
if(!file.exists(outputFnp) | file.info(inputFnp)$ctime > file.info(outputFnp)$ctime){
  
  cov = readr::read_tsv("../meta/allCov_summaryStats.tab.txt.gz")
  allBasicInfo = readr::read_tsv(inputFnp) %>% 
    filter(sample %fin% allMeta_filt$sample)
  
  allBasicInfo = allBasicInfo %>% 
    left_join(cov)
  
  allBasicInfo_sampleCoverageStats = allBasicInfo %>% 
    group_by(sample) %>% 
    summarise(medianCov = median(perBaseCoverage), 
              meanCov = mean(perBaseCoverage)) 
  
  allBasicInfo_sampleCoverageStats_highCov = allBasicInfo_sampleCoverageStats %>% 
    filter(medianCov > 5)
  
  allBasicInfo_higCov = allBasicInfo %>% 
    filter(sample %in% allBasicInfo_sampleCoverageStats_highCov$sample)
  
  
  allBasicInfo_higCov_covLessThan1Sum = allBasicInfo_higCov %>% 
    mutate(marker = ifelse(perBaseCoverage < 1, 1, 0)) %>% 
    group_by(sample, `#chrom`) %>% 
    summarise(lowCov = sum(marker))
  
  #these below regions have variable cross mapping in other strains  
  chrom13FilterRegions = c("Pf3D7_13_v3-2852656-2852831", "Pf3D7_13_v3-2853081-2853381", "Pf3D7_13_v3-2853978-2854303", "Pf3D7_13_v3-2855203-2855353")
  
  # filter coverage data to specific regions of interest  
  allBasicInfo_higCov_areasOfInterest = allBasicInfo_higCov %>% 
    filter((`#chrom` == "Pf3D7_13_v3" & start > 2840426 & start < 2842301) |
           (`#chrom` == "Pf3D7_11_v3" & start > 1982395) |
           (`#chrom` == "Pf3D7_08_v3" & start >= 1374235 & start < 1375485))
  
  cov_highMedCov = cov %>% 
    filter(medianPerBaseCov_inGenes > 100)
  
  # now process to determine which samples potentially have deleted regions of interest base on coverage 
  allBasicInfo_higCov_areasOfInterest = allBasicInfo_higCov_areasOfInterest %>% 
    mutate(perBaseCoverage = ifelse(readTotal <= ifelse(sample %in% cov_highMedCov$sample, 20, 10) & readTotalUsed == 0 & fullySpanningReads <=1, 0, perBaseCoverage))
  allBasicInfo_higCov_areasOfInterest$processedMarker = 0
  allBasicInfo_higCov_areasOfInterest$processedSuccessMarker = 0
  for(row in 2:(nrow(allBasicInfo_higCov_areasOfInterest) - 1 )){
    #print(row)
    if(allBasicInfo_higCov_areasOfInterest$sample[row] == allBasicInfo_higCov_areasOfInterest$sample[row + 1]
       & allBasicInfo_higCov_areasOfInterest$`#chrom`[row] == allBasicInfo_higCov_areasOfInterest$`#chrom`[row + 1]
       & allBasicInfo_higCov_areasOfInterest$perBaseCoverage[row - 1] == 0
       & allBasicInfo_higCov_areasOfInterest$perBaseCoverage[row] == 0
       & allBasicInfo_higCov_areasOfInterest$perBaseCoverage[row + 1] == 0
       # & allBasicInfo_higCov_areasOfInterest$perBaseCoverage[row - 1] < 0.1
       # & allBasicInfo_higCov_areasOfInterest$perBaseCoverage[row] < 0.1
       # & allBasicInfo_higCov_areasOfInterest$perBaseCoverage[row + 1] <0.1
       ){
      allBasicInfo_higCov_areasOfInterest$processedMarker[row] = 1
    }  
      if(allBasicInfo_higCov_areasOfInterest$sample[row] == allBasicInfo_higCov_areasOfInterest$sample[row + 1]
       & allBasicInfo_higCov_areasOfInterest$`#chrom`[row] == allBasicInfo_higCov_areasOfInterest$`#chrom`[row + 1]
       & allBasicInfo_higCov_areasOfInterest$success[row - 1] == F
       & allBasicInfo_higCov_areasOfInterest$success[row] == F
       & allBasicInfo_higCov_areasOfInterest$success[row + 1] == F
       # & allBasicInfo_higCov_areasOfInterest$perBaseCoverage[row - 1] < 0.1
       # & allBasicInfo_higCov_areasOfInterest$perBaseCoverage[row] < 0.1
       # & allBasicInfo_higCov_areasOfInterest$perBaseCoverage[row + 1] <0.1
       ){
      allBasicInfo_higCov_areasOfInterest$processedSuccessMarker[row] = 1
       }  
  }
  save(cov,
       allBasicInfo,
       allBasicInfo_higCov_areasOfInterest,
       file = outputFnp)
} else {
  load(outputFnp)
}


allBasicInfo_higCov_areasOfInterest_covLessThan1Sum = allBasicInfo_higCov_areasOfInterest %>%
  #filter(!is.na(extraField0)) %>%
  # mutate(marker = ifelse(perBaseCoverage < 0.1, 1, 0)) %>%
  mutate(marker = ifelse(perBaseCoverage == 0, 1, 0)) %>%
  mutate(successMarker = ifelse(!success, 1, 0)) %>%
  group_by(sample, `#chrom`) %>%
  summarise(
    lowCov = sum(marker),
    lowCovStrict = sum(processedMarker),
    lowSuccess = sum(successMarker),
    lowSuccessStrict = sum(processedSuccessMarker),
    total = n()
    
  ) %>%
  mutate(
    lowCovPerc = lowCov / total,
    lowCovStrictPerc = lowCovStrict / (total - 2),
    lowSuccessPerc = lowSuccess / total,
    lowSuccessStrictPerc = lowSuccessStrict / (total - 2)
  )
  
# there are several samples which due to high variability in coverage, all of which are sWGA samples or in other artifacts introduced by WGS processes that can't be easily filtered out programatically so
# they are removed based on analysis by hand 

handRemovedSamples = c(
  # chr08 hand investigations 
  "P178-D30", "PD1192-C", "IGS-BRA-018sA", "PA0331-CW", "T367", "RCN09021","RCN13559", "RCN09011", "RCN11871", "RCN02972", "SPT35541", "T508",
                       
  # chr08 hand investigations on finer windows 
  "SPT24525", "SPT24643", "SPT24651", "SPT24541", "SPT24646", "SPT24530", "SPT24607", "SPT24650",
  
  # chr13 hand investigations 
   "SPT22213", "RCN08027", "RCN12032", "SPT35322", "SPT35228", "SPT35354","RCN02907", "RCN02953", "RCN08023", 
  "PF1157-Cx6", "IGS-BRA-016sA", "IGS-BRA-018sA", 
  "NHP4374", "PA0809-CW", "SPT21769", "SPT35543", "T498", "T395", 
  "RCN02816", "RCN02821", "RCN02893", "RCN03336", "RCN03536", "RCN02547", "SPT21675", "PA0331-CW", 
  "RCN14050","SPT16994","SPT38383","SPT40863","SPT38318","SPT15609","SPT38344","SPT34334","SPT38314","SPT34321","SPT17972","SPT19692","SPT12080",
  
  # chr13 hand investigations on finer windows 
  "RCN13051", "PR0262-C", "SPT00902", "SPT00903", "SPT00898", "SPT26258", "SPT26293", "SPT26254",
  "PW0099-C",
  
  "SPT24499", "RCN13387", "SPT21927", "RCN08119",
  
  "RCN13415", "RCN13414", "RCN13413",
  
  "RCN13391", "RCN13388", "RCN11770", "RCN11771", "RCN13416", "SPT21927", "RCN09061", "RCN11766", "RCN13490", "SPT25144", "RCN09058", "RCN09027", "RCN09064",
  
  
  "RCN12064", "RCN11497", "RCN02091", "SPT18468", "RCN13544", "RCN11601", "SPT24457", "SPT43231", "RCN02894", "RCN02146", "RCN02365", "RCN02171",
  "SPT15757", "SPT35260",
  
  # chr11 hand investigations 
  "SPT43093", "RCN13306","RCN13447","RCN11811","SPT43196","RCN11158","RCN11805","RCN13308","RCN13309","RCN13269","RCN13285","RCN13267","SPT43280","RCN02046","RCN13296",
  "RCN13323","RCN13310","RCN09041","RCN09043","RCN09042","RCN09071","RCN11693","RCN11696","RCN11687","RCN11644","RCN09157","RCN10396","RCN11167","RCN11223",
  "RCN10306","RCN00127","RCN00266","RCN09029","RCN00129","RCN02982","RCN03040","PM0744-CW","RCN08298","RCN11172","RCN11234","RCN10247","RCN10297","PA0295-CW",
  "PR0006-CW","NHP4224","NHP4850","RCN11636","RCN13441","RCN13324","RCN09074","RCN13437","RCN09073","RCN11494","RCN11611","RCN13515","RCN13523","RCN11787",
  "RCN13532","RCN02848","SPT35212","RCN11616","RCN13336","RCN13421","SPT42050","RCN13420","RCN13481","SPT36128","SPT34434","SPT36696","SPT38311","RCN13417",
  "SPT34254","RCN02341","RCN13442","RCN13911","SPT19293","SPT34315","SPT34330","SPT34327","SPT34335"
  )

# further removing SWGA samples and control mixtures 
allBasicInfo_higCov_areasOfInterest_covLessThan1Sum_highLowCov = allBasicInfo_higCov_areasOfInterest_covLessThan1Sum %>% 
  filter(!grepl("^PG", sample)) %>% 
  filter(!grepl("^[0-9]S[0-9]-[0-9]C[0-9]+", sample)) %>% 
  filter(!grepl("^SenT", sample)) %>%
  filter(!grepl("^80[0-9]+", sample)) %>%
  filter(!grepl("^OHP[0-9]+", sample)) %>%
  filter(!grepl("^ERR", sample)) %>%
  filter(!(sample %in% handRemovedSamples)) %>%
  filter(lowCovPerc >= .666 & lowCovStrictPerc >= 0.33) 
# %>% 
#   filter((lowCovPerc >= .666 & lowCovStrictPerc >= 0.50) | lowSuccessPerc > 0.95)
# 
allBasicInfo_higCov_areasOfInterest_covLessThan1Sum_highLowCov_opposite = allBasicInfo_higCov_areasOfInterest_covLessThan1Sum %>% 
  filter(!grepl("^PG", sample)) %>% 
  filter(!grepl("^[0-9]S[0-9]-[0-9]C[0-9]+", sample)) %>% 
  filter(!grepl("^SenT", sample)) %>%
  filter(!grepl("^80[0-9]+", sample)) %>%
  filter(!grepl("^OHP[0-9]+", sample)) %>%
  filter(!grepl("^ERR", sample)) %>%
  filter(!(sample %in% handRemovedSamples))  %>%
  filter(!(lowCovPerc >= .666 & lowCovStrictPerc >= 0.33) )
# %>% 
#   filter(!((lowCovPerc >= .666 & lowCovStrictPerc >= 0.50) | lowSuccessPerc > 0.95) )
# 
cat(unique(c(allBasicInfo_higCov_areasOfInterest_covLessThan1Sum_highLowCov_opposite$sample, 
             allBasicInfo_higCov_areasOfInterest_covLessThan1Sum_highLowCov$sample)), file = "samplesCovered.txt", sep = "\n")

allBasicInfo_higCov_areasOfInterest_covLessThan1Sum_highLowCov_chrom08 = allBasicInfo_higCov_areasOfInterest_covLessThan1Sum_highLowCov %>% 
  filter(`#chrom` == "Pf3D7_08_v3")
allBasicInfo_higCov_areasOfInterest_covLessThan1Sum_highLowCov_chrom13 = allBasicInfo_higCov_areasOfInterest_covLessThan1Sum_highLowCov %>% 
  filter(`#chrom` == "Pf3D7_13_v3")
allBasicInfo_higCov_areasOfInterest_covLessThan1Sum_highLowCov_chrom11 = allBasicInfo_higCov_areasOfInterest_covLessThan1Sum_highLowCov %>% 
  filter(`#chrom` == "Pf3D7_11_v3")


allBasicInfo_highCov_butLowInAreasofInterest = allBasicInfo %>%
  filter(sample %fin% allBasicInfo_higCov_areasOfInterest_covLessThan1Sum_highLowCov$sample) %>%
  group_by(sample) %>%
  mutate(perBaseCoverageNorm = perBaseCoverage/medianPerBaseCov_inGenes)%>%
  mutate(perBaseCoverageNormRounded = round(perBaseCoverageNorm/ 0.5)*0.5) %>%
  mutate(id = paste0(`#chrom`, "-", start, "-", end)) 

endRegions = c(
  "Pf3D7_08_v3-1382255-1382505","Pf3D7_08_v3-1382680-1383155","Pf3D7_08_v3-1384030-1384251","Pf3D7_08_v3-1384316-1384663","Pf3D7_08_v3-1384837-1385370",
  #"Pf3D7_08_v3-1385625-1385741",
  "Pf3D7_08_v3-1385951-1386323","Pf3D7_08_v3-1386518-1386680","Pf3D7_08_v3-1386739-1387414","Pf3D7_08_v3-1387782-1387982", 
  
  "Pf3D7_11_v3-1997276-1997425","Pf3D7_11_v3-1997674-1997884","Pf3D7_11_v3-1997944-1998371","Pf3D7_11_v3-1998638-1998759","Pf3D7_11_v3-1998833-1999151",
  "Pf3D7_11_v3-1999245-1999390","Pf3D7_11_v3-1999600-1999713","Pf3D7_11_v3-2000687-2000889","Pf3D7_11_v3-2000941-2001239","Pf3D7_11_v3-2001300-2003328", 
  
  "Pf3D7_13_v3-2842251-2842401","Pf3D7_13_v3-2842251-2842451","Pf3D7_13_v3-2842301-2842601","Pf3D7_13_v3-2842476-2842688","Pf3D7_13_v3-2842501-2842651",
  "Pf3D7_13_v3-2842515-2842804","Pf3D7_13_v3-2843108-2843446","Pf3D7_13_v3-2843641-2843863","Pf3D7_13_v3-2843930-2844101","Pf3D7_13_v3-2844247-2844785"
  
  # "Pf3D7_13_v3-2842501-2842576","Pf3D7_13_v3-2842501-2842601","Pf3D7_13_v3-2842501-2842651","Pf3D7_13_v3-2842515-2842804","Pf3D7_13_v3-2842576-2842651",
  # "Pf3D7_13_v3-2842601-2842688","Pf3D7_13_v3-2843108-2843446","Pf3D7_13_v3-2843641-2843863","Pf3D7_13_v3-2843930-2844101","Pf3D7_13_v3-2844247-2844785"
  # 
  )

##
## after careful inspection of the samples, all potential true deletion appear to include the whole chromosome from the point of interest so in order to clean up the samples will force samples 
## to have no coverage from the point of interest on to get high quality examples of chromosomal deletions of areas of interest 

endRegions_08 = c("Pf3D7_08_v3-1375557-1375750","Pf3D7_08_v3-1378006-1378662","Pf3D7_08_v3-1379154-1379915","Pf3D7_08_v3-1380194-1380328","Pf3D7_08_v3-1382255-1382505",
                  "Pf3D7_08_v3-1382680-1383155","Pf3D7_08_v3-1384030-1384251","Pf3D7_08_v3-1384316-1384663","Pf3D7_08_v3-1384837-1385370",
                  #"Pf3D7_08_v3-1385625-1385741",
                  "Pf3D7_08_v3-1385951-1386323","Pf3D7_08_v3-1386518-1386680","Pf3D7_08_v3-1386739-1387414","Pf3D7_08_v3-1387782-1387982")

endRegions_11 = c("Pf3D7_11_v3-1991347-1992851","Pf3D7_11_v3-1992907-1993669","Pf3D7_11_v3-1993833-1994061","Pf3D7_11_v3-1994139-1994363","Pf3D7_11_v3-1995234-1995394",
                  "Pf3D7_11_v3-1995459-1995614","Pf3D7_11_v3-1995678-1996112","Pf3D7_11_v3-1996349-1996650","Pf3D7_11_v3-1996745-1997019","Pf3D7_11_v3-1997095-1997221",
                  "Pf3D7_11_v3-1997276-1997425","Pf3D7_11_v3-1997674-1997884","Pf3D7_11_v3-1997944-1998371","Pf3D7_11_v3-1998638-1998759","Pf3D7_11_v3-1998833-1999151",
                  "Pf3D7_11_v3-1999245-1999390","Pf3D7_11_v3-1999600-1999713","Pf3D7_11_v3-2000687-2000889","Pf3D7_11_v3-2000941-2001239","Pf3D7_11_v3-2001300-2003328")

endRegions_13 = c("Pf3D7_13_v3-2841776-2841926","Pf3D7_13_v3-2842001-2842301","Pf3D7_13_v3-2842026-2842226","Pf3D7_13_v3-2842038-2842461","Pf3D7_13_v3-2842101-2842251",
                  "Pf3D7_13_v3-2842251-2842401","Pf3D7_13_v3-2842251-2842451","Pf3D7_13_v3-2842301-2842601","Pf3D7_13_v3-2842476-2842688","Pf3D7_13_v3-2842501-2842651",
                  "Pf3D7_13_v3-2842515-2842804","Pf3D7_13_v3-2843108-2843446","Pf3D7_13_v3-2843641-2843863","Pf3D7_13_v3-2843930-2844101","Pf3D7_13_v3-2844247-2844785")



#### filter ends chr08 -begin
allBasicInfo_highCov_butLowInAreasofInterest_endsNoCovSel_08 = allBasicInfo_highCov_butLowInAreasofInterest %>%
  filter(`#chrom` == "Pf3D7_08_v3") %>%
  filter(
    sample %in% allBasicInfo_higCov_areasOfInterest_covLessThan1Sum_highLowCov_chrom08$sample
  ) %>%
  group_by(sample, id) %>%
  mutate(marker = ifelse(perBaseCoverageNormRounded == 0, 1, 0)) %>%
  filter(id %in% endRegions_08) %>%
  group_by(sample, `#chrom`) %>%
  summarise(noCov = sum(marker))

allBasicInfo_highCov_butLowInAreasofInterest_endsNoCovSel_08_filt = allBasicInfo_highCov_butLowInAreasofInterest_endsNoCovSel_08 %>%
  filter(noCov == length(endRegions_08))

allBasicInfo_highCov_butLowInAreasofInterest_endsNoSuccessSel_08 = allBasicInfo_highCov_butLowInAreasofInterest %>%
  filter(`#chrom` == "Pf3D7_08_v3") %>%
  filter(
    sample %in% allBasicInfo_higCov_areasOfInterest_covLessThan1Sum_highLowCov_chrom08$sample
  ) %>%
  group_by(sample, id) %>%
  mutate(marker = ifelse(success, 0, 1)) %>%
  filter(id %in% endRegions_08) %>%
  group_by(sample, `#chrom`) %>%
  summarise(noCov = sum(marker))

allBasicInfo_highCov_butLowInAreasofInterest_endsNoSuccessSel_08_filt = allBasicInfo_highCov_butLowInAreasofInterest_endsNoSuccessSel_08 %>%
  filter(noCov == length(endRegions_08))

samples_chr08_noCovNoSucEnds = intersect(
  sort(
    allBasicInfo_highCov_butLowInAreasofInterest_endsNoCovSel_08_filt$sample
  ),
  sort(
    allBasicInfo_highCov_butLowInAreasofInterest_endsNoSuccessSel_08_filt$sample
  )
)
#### filter ends chr08 -end

#### filter ends chr11 -begin
allBasicInfo_highCov_butLowInAreasofInterest_endsNoCovSel_11 = allBasicInfo_highCov_butLowInAreasofInterest %>%
  filter(`#chrom` == "Pf3D7_11_v3") %>%
  filter(
    sample %in% allBasicInfo_higCov_areasOfInterest_covLessThan1Sum_highLowCov_chrom11$sample
  ) %>%
  group_by(sample, id) %>%
  mutate(marker = ifelse(perBaseCoverageNormRounded == 0, 1, 0)) %>%
  filter(id %in% endRegions_11) %>%
  group_by(sample, `#chrom`) %>%
  summarise(noCov = sum(marker))

allBasicInfo_highCov_butLowInAreasofInterest_endsNoCovSel_11_filt = allBasicInfo_highCov_butLowInAreasofInterest_endsNoCovSel_11 %>%
  filter(noCov == length(endRegions_11))

allBasicInfo_highCov_butLowInAreasofInterest_endsNoSuccessSel_11 = allBasicInfo_highCov_butLowInAreasofInterest %>%
  filter(`#chrom` == "Pf3D7_11_v3") %>%
  filter(
    sample %in% allBasicInfo_higCov_areasOfInterest_covLessThan1Sum_highLowCov_chrom11$sample
  ) %>%
  group_by(sample, id) %>%
  mutate(marker = ifelse(success, 0, 1)) %>%
  filter(id %in% endRegions_11) %>%
  group_by(sample, `#chrom`) %>%
  summarise(noCov = sum(marker))

allBasicInfo_highCov_butLowInAreasofInterest_endsNoSuccessSel_11_filt = allBasicInfo_highCov_butLowInAreasofInterest_endsNoSuccessSel_11 %>%
  filter(noCov == length(endRegions_11))

samples_chr11_noCovNoSucEnds = intersect(
  sort(
    allBasicInfo_highCov_butLowInAreasofInterest_endsNoCovSel_11_filt$sample
  ),
  sort(
    allBasicInfo_highCov_butLowInAreasofInterest_endsNoSuccessSel_11_filt$sample
  )
)
#### filter ends chr11 -end

#### filter ends chr13 -begin
allBasicInfo_highCov_butLowInAreasofInterest_endsNoCovSel_13 = allBasicInfo_highCov_butLowInAreasofInterest %>%
  filter(`#chrom` == "Pf3D7_13_v3") %>%
  filter(
    sample %in% allBasicInfo_higCov_areasOfInterest_covLessThan1Sum_highLowCov_chrom13$sample
  ) %>%
  group_by(sample, id) %>%
  mutate(marker = ifelse(perBaseCoverageNormRounded == 0, 1, 0)) %>%
  filter(id %in% endRegions_13) %>%
  group_by(sample, `#chrom`) %>%
  summarise(noCov = sum(marker))

allBasicInfo_highCov_butLowInAreasofInterest_endsNoCovSel_13_filt = allBasicInfo_highCov_butLowInAreasofInterest_endsNoCovSel_13 %>%
  filter(noCov == length(endRegions_13))

allBasicInfo_highCov_butLowInAreasofInterest_endsNoSuccessSel_13 = allBasicInfo_highCov_butLowInAreasofInterest %>%
  filter(`#chrom` == "Pf3D7_13_v3") %>%
  filter(
    sample %in% allBasicInfo_higCov_areasOfInterest_covLessThan1Sum_highLowCov_chrom13$sample
  ) %>%
  group_by(sample, id) %>%
  mutate(marker = ifelse(success, 0, 1)) %>%
  filter(id %in% endRegions_13) %>%
  group_by(sample, `#chrom`) %>%
  summarise(noCov = sum(marker))

allBasicInfo_highCov_butLowInAreasofInterest_endsNoSuccessSel_13_filt = allBasicInfo_highCov_butLowInAreasofInterest_endsNoSuccessSel_13 %>%
  filter(noCov == length(endRegions_13))

samples_chr13_noCovNoSucEnds = intersect(
  sort(
    allBasicInfo_highCov_butLowInAreasofInterest_endsNoCovSel_13_filt$sample
  ),
  sort(
    allBasicInfo_highCov_butLowInAreasofInterest_endsNoSuccessSel_13_filt$sample
  )
)
#### filter ends chr13 -end

allBasicInfo_highCov_butLowInAreasofInterest_endsNoCovSel = allBasicInfo_highCov_butLowInAreasofInterest %>%
  group_by(sample, id) %>%
  mutate(marker = ifelse(perBaseCoverageNormRounded == 0, 1, 0)) %>%
  filter(id %in% endRegions) %>%
  group_by(sample, `#chrom`) %>%
  summarise(noCov = sum(marker))

allBasicInfo_highCov_butLowInAreasofInterest_endsNoCovSel_filt = allBasicInfo_highCov_butLowInAreasofInterest_endsNoCovSel %>%
  filter(noCov == 10)

allBasicInfo_highCov_butLowInAreasofInterest_endsNoSuccessSel = allBasicInfo_highCov_butLowInAreasofInterest %>%
  group_by(sample, id) %>%
  mutate(marker = ifelse(success, 0, 1)) %>%
  filter(id %in% endRegions) %>%
  group_by(sample, `#chrom`) %>%
  summarise(noCov = sum(marker))

allBasicInfo_highCov_butLowInAreasofInterest_endsNoSuccessSel_filt = allBasicInfo_highCov_butLowInAreasofInterest_endsNoSuccessSel %>%
  filter(noCov == 10)

allBasicInfo_highCov_butLowInAreasofInterest_cov_sp = allBasicInfo_highCov_butLowInAreasofInterest %>%
  # filter(sample %in% allBasicInfo_highCov_butLowInAreasofInterest_endsNoSuccessSel_filt$sample &
  #          sample %in% allBasicInfo_highCov_butLowInAreasofInterest_endsNoCovSel_filt$sample) %>%
  filter(
    sample %in% c(
      samples_chr08_noCovNoSucEnds,
      samples_chr11_noCovNoSucEnds,
      samples_chr13_noCovNoSucEnds
    )
  ) %>%
  select(sample, id, perBaseCoverageNorm) %>%
  spread(id, perBaseCoverageNorm)


# hold = readr::read_tsv(
#   "/Users/nick/Dropbox (Personal)/ownCloud/documents/plasmodium/falciparum/hrps/windowSelectionSurroundingHRPs/Windows_Surrounding_HRP2_3_deletions/windowAnalysis/allMeta_HRP2_HRP3_deletionCalls.tab.txt"
# )

allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat = as.matrix(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp[, 2:ncol(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp)])
rownames(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat) = allBasicInfo_highCov_butLowInAreasofInterest_cov_sp$sample

#cap it at 2 
allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat[allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat>2] =2

chroms = gsub("-.*", "", colnames(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat))

# get the regions 
regions = allBasicInfo %>% 
  filter(.$sample[1] == sample) %>% 
  select(1:6, extraField0) %>% 
  unique()%>% 
  mutate(genomicID = paste0(`#chrom`, "-", start, "-", end)) 
# rename them based on importance 
regions = regions%>%
  mutate(id = paste0(`#chrom`, "-", start, "-", end)) %>% 
  arrange(id) %>% 
  mutate(inGene = !is.na(extraField0)) %>% 
  mutate(geneType = ifelse(grepl("histidine-rich protein II", extraField0), "HRP", "other" ) )%>% 
  mutate(geneType = ifelse(grepl("ribosomal RNA", extraField0), "rRNA", geneType ) )%>% 
  mutate(geneType = ifelse(grepl("332", extraField0), "Pf332", geneType ) ) %>%
  mutate(sharedRegion = ifelse((`#chrom` == "Pf3D7_11_v3" &
                                  start >= 1918028 &
                                  end <= 1933288) |
                                 `#chrom` == "Pf3D7_13_v3" &
                                 start >= 2792021 &
                                 end <= 2807295,
                               "shared",
                               "other"
  )) %>% 
  mutate(afterSharedRegion = (`#chrom` == "Pf3D7_11_v3" &
                                  start >= 1933288) |
                                 (`#chrom` == "Pf3D7_13_v3" &
                                 start >= 2807295)) %>%
  mutate(chrom = `#chrom`)%>% 
  mutate(description = gsub(".*description=", "", extraField0)) %>% 
  mutate(description = ifelse("[extraField0=NA]" == extraField0, NA, description)) %>% 
  mutate(description = gsub("]", "", fixed = T, description))


regions = regions[match(colnames(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat), regions$id),]
allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round = round(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat/ 0.5)*0.5
allMeta = readr::read_tsv("../meta/metadata/meta.tab.txt")
allMeta_filt = allMeta %>% 
  filter( sample  %in% samples_chr13_noCovNoSucEnds | 
           sample %in% samples_chr08_noCovNoSucEnds | 
           sample %in% samples_chr11_noCovNoSucEnds ) 

allMeta_filt$possiblyHRP2Deleted = allMeta_filt$sample %in% samples_chr08_noCovNoSucEnds
allMeta_filt$possiblyHRP3Deleted = allMeta_filt$sample %in% samples_chr13_noCovNoSucEnds
allMeta_filt$possiblyChr11Deleted = allMeta_filt$sample %in% samples_chr11_noCovNoSucEnds


metaSelected = allMeta_filt[match(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp$sample, allMeta_filt$sample), ]

Heatmap of coverage

Code
annotationTextSize = 10

library(circlize)
col_fun = colorRamp2(c(0, 1, 2), c(heat.colors(3)))
allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_noLabs = allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round
rownames(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_noLabs) = NULL
colnames(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_noLabs) = NULL
RowLabs = metaSelected$BiologicalSample
RowLabs[metaSelected$site != "LabIsolate" | is.na(metaSelected$site)] = ""
rownames(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_noLabs) = RowLabs
regionNames = sort(unique(c(metaSelected$country, metaSelected$region, metaSelected$secondaryRegion)))
regionColors = scheme$hex(length(regionNames))
names(regionColors) = regionNames

topAnnoDf  = regions[,c("inGene", "geneType", "sharedRegion", "afterSharedRegion", "chrom")] %>% as.data.frame()
topAnnoColors = createColorListFromDf(topAnnoDf) 

topAnnoColors[["inGene"]] = c("TRUE" = tableau_color_pal()(8)[5], "FALSE" = paste0(tableau_color_pal()(8)[3],"FF"))
topAnnoColors[["sharedRegion"]] = c("other" = paste0(tableau_color_pal()(8)[7], "FF"), "shared" = tableau_color_pal()(8)[8])
topAnnoColors[["afterSharedRegion"]] = c("TRUE" = tableau_color_pal()(8)[6], "FALSE" = paste0(tableau_color_pal()(8)[5],"FF"))

rowAnnoDf  = metaSelected[,c("country", "region", "secondaryRegion",
                             "possiblyHRP2Deleted", "possiblyHRP3Deleted", 
                             "possiblyChr11Deleted")] %>% rename(continent = secondaryRegion) %>% as.data.frame()
rowAnnoColors = createColorListFromDf(rowAnnoDf) 

# rowAnnoColors[["continent"]] = c("AFRICA"  =   tableau_color_pal()(8)[1], 
#                                 "ASIA" =      tableau_color_pal()(8)[7], 
#                                 "OCEANIA"  =  tableau_color_pal()(8)[2], 
#                                 "S_AMERICA" = tableau_color_pal()(8)[3])
rowAnnoColors[["continent"]] = c("AFRICA"  =   "#E69F00", 
                                 "ASIA" =      "#F0E442", 
                                 "OCEANIA"  =  "#F748A5", 
                                 "S_AMERICA" = "#0072B2")
sideAnno = rowAnnotation(df = rowAnnoDf, 
                         col = rowAnnoColors)

topAnno = HeatmapAnnotation(df = topAnnoDf, 
                            col = topAnnoColors)
sideAnno = rowAnnotation(df = rowAnnoDf, 
                         col = rowAnnoColors)
allCoverageHeatMap = Heatmap(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round, cluster_columns = F, 
        col = col_fun,
        name = "coverage",
        top_annotation = topAnno, 
        right_annotation = sideAnno)

Below is a heatmap of the coverage of these initial windows, meta data for each sample is added to the right for country of origin etc. The top has meta data on the regions covered including if they fall within genes, and specific genes of interest like HRP2/3, pf332 and rRNA regions are marked out.

Coverage is normalized to the genomic coverage in the rest of the genome. 1x coverage is normal coverage as compared to the rest of the gnome. For easier interpretation normalized coverage is capped at 2x coverage to easier see where possible duplication has happened. Coverage is rounded to nearest 0.5. Orange == 1x coverage, yellow == 2x coverage, red == no coverage

Code
print(allCoverageHeatMap)

Code
pdf("test_allHeatmap_initial_windows.pdf", useDingbats = F, width = 20, height = 50)
print(allCoverageHeatMap)
dev.off()
quartz_off_screen 
                2 

by chrom

All

Code
allMeta = readr::read_tsv("../meta/metadata/meta.tab.txt")

allMeta$possiblyHRP2Deleted = allMeta$sample %in% samples_chr08_noCovNoSucEnds
allMeta$possiblyHRP3Deleted = allMeta$sample %in% samples_chr13_noCovNoSucEnds
allMeta$possiblyChr11Deleted = allMeta$sample %in% samples_chr11_noCovNoSucEnds

allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_mod = allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round

library(dendsort)
row_dend = dendsort(hclust(dist(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_mod)))

metaSelectedForSorting = allMeta[match(rownames(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_mod), allMeta$sample), ]
metaSelectedForSorting_sel = metaSelectedForSorting %>% 
  select(sample, possiblyHRP2Deleted, possiblyHRP3Deleted, possiblyChr11Deleted)
metaSelectedForSorting_sel_mat = as.matrix(metaSelectedForSorting_sel[,2:ncol(metaSelectedForSorting_sel)])
rownames(metaSelectedForSorting_sel_mat) = metaSelectedForSorting_sel$sample
metaSelectedForSorting_sel_mat_hclust = hclust(dist(metaSelectedForSorting_sel_mat))
metaSelectedForSorting = metaSelectedForSorting %>% 
  left_join(
    tibble(sample = rownames(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_mod)[row_dend$order], 
           coverageOrder = 1:length(row_dend$order)) ) %>% 
  left_join(
    tibble(sample = rownames(metaSelectedForSorting_sel_mat)[hclust(dist(metaSelectedForSorting_sel_mat))$order], 
           deletionOrder = 1:length(hclust(dist(metaSelectedForSorting_sel_mat))$order)) )

metaSelectedForSorting = metaSelectedForSorting %>% 
  arrange(desc(possiblyHRP2Deleted), desc(possiblyHRP3Deleted), desc(possiblyChr11Deleted), coverageOrder)

metaSelectedForSorting = metaSelectedForSorting %>% 
  group_by(sample) %>% 
  mutate(forSortingOld = paste0(possiblyHRP2Deleted, "-", possiblyHRP3Deleted, "-", possiblyChr11Deleted, "-", stringr::str_pad(coverageOrder, side = "left", pad = "0", width = nchar(max(metaSelectedForSorting$coverageOrder))))) %>% 
  mutate(forSorting = paste0(possiblyHRP2Deleted, "-", possiblyHRP3Deleted, "-", possiblyChr11Deleted)) %>% 
  ungroup() %>% 
  mutate(forSorting = factor(forSorting, levels = c("TRUE-FALSE-FALSE","TRUE-TRUE-FALSE", "FALSE-TRUE-FALSE", "FALSE-TRUE-TRUE", "FALSE-FALSE-TRUE"))) %>% 
  arrange(forSorting, coverageOrder)


allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_mod = allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_mod[match(metaSelectedForSorting$sample,rownames(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_mod)),]
metaSelected = allMeta[match(rownames(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_mod), allMeta$sample), ]

allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr08 = allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_mod[,grepl("_08_", colnames(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_mod)) ]

allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr11 = allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_mod[,grepl("_11_", colnames(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_mod)) ]

allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr13 = allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_mod[,grepl("_13_", colnames(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_mod)) ]


rowAnnoDf  = metaSelected[,c("country", "region", "secondaryRegion",
                             "possiblyHRP2Deleted", "possiblyHRP3Deleted", 
                             "possiblyChr11Deleted")] %>% rename(continent = secondaryRegion) %>% as.data.frame()
rowAnnoColors = createColorListFromDf(rowAnnoDf) 

rowAnnoColors[["continent"]] = c("AFRICA"  =   tableau_color_pal()(8)[1], 
                                "ASIA" =      tableau_color_pal()(8)[7], 
                                "OCEANIA"  =  tableau_color_pal()(8)[2], 
                                "S_AMERICA" = tableau_color_pal()(8)[3])

sideAnno = rowAnnotation(
  df = rowAnnoDf,
  col = rowAnnoColors,
  gp = gpar(col = "grey10"),
    simple_anno_size = unit(1.5, "cm"),
  annotation_name_gp = gpar(fontsize = annotationTextSize),
  annotation_legend_param = list(
    labels_gp = gpar(fontsize = annotationTextSize),
    title_gp = gpar(fontsize = annotationTextSize, fontface = "bold")
  ), 
  gap = unit(0.1, "cm")
)

regions_cols = createColorListFromDf(regions %>% 
                                        select(c("inGene", "geneType", "sharedRegion", "afterSharedRegion", "chrom", "description"))) 

regions_cols[["inGene"]] = c("TRUE" = tableau_color_pal()(8)[5], "FALSE" = paste0(tableau_color_pal()(8)[3],"FF"))
regions_cols[["sharedRegion"]] = c("other" = paste0(tableau_color_pal()(8)[7], "FF"), "shared" = tableau_color_pal()(8)[8])
regions_cols[["afterSharedRegion"]] = c("TRUE" = tableau_color_pal()(8)[6], "FALSE" = paste0(tableau_color_pal()(8)[5],"FF"))


regions_chr08 = regions[match(colnames(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr08), regions$genomicID),]
regions_chr11 = regions[match(colnames(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr11), regions$genomicID),]
regions_chr13 = regions[match(colnames(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr13), regions$genomicID),]


regions_chr08_df = regions_chr08 %>% 
  select(inGene, geneType, sharedRegion, afterSharedRegion, chrom, description) %>% 
  as.data.frame()

topAnno_chr08 = HeatmapAnnotation(
  df = regions_chr08_df,
  col = regions_cols,
  annotation_name_gp = gpar(fontsize = annotationTextSize),
  annotation_legend_param = list(
    labels_gp = gpar(fontsize = annotationTextSize),
    title_gp = gpar(fontsize = annotationTextSize, fontface = "bold")
  ),
  annotation_name_side = "left",
  show_annotation_name = T,
  na_col = "#FFFFFF00",
  gap = unit(0.1, "cm")
)

regions_chr11_df = regions_chr11 %>% 
  select(inGene, geneType, sharedRegion, afterSharedRegion, chrom, description) %>% 
  as.data.frame()
topAnno_chr11 = HeatmapAnnotation(
  df = regions_chr11_df,
  col = regions_cols,
  annotation_name_gp = gpar(fontsize = annotationTextSize),
  annotation_legend_param = list(
    labels_gp = gpar(fontsize = annotationTextSize),
    title_gp = gpar(fontsize = annotationTextSize, fontface = "bold")
  ),
  annotation_name_side = "left",
  show_annotation_name = F,
  na_col = "#FFFFFF00",
  gap = unit(0.1, "cm")
)

regions_chr13_df = regions_chr13 %>% 
  select(inGene, geneType, sharedRegion, afterSharedRegion, chrom, description) %>% 
  as.data.frame()
topAnno_chr13 = HeatmapAnnotation(
  df = regions_chr13_df,
  col = regions_cols,
  annotation_name_gp = gpar(fontsize = annotationTextSize),
  annotation_legend_param = list(
    labels_gp = gpar(fontsize = annotationTextSize),
    title_gp = gpar(fontsize = annotationTextSize, fontface = "bold")
  ),
  annotation_name_side = "left",
  show_annotation_name = F,
  na_col = "#FFFFFF00",
  gap = unit(0.1, "cm")
)








allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr08_noColLabs = allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr08
colnames(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr08_noColLabs) = NULL
heatmap_08 = Heatmap(
  allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr08_noColLabs,
  cluster_columns = F,
  # cluster_rows = row_dend,
  cluster_rows = F, 
  col = col_fun,
  name = "coverage",
  row_dend_width = unit(5, "cm"),
  column_dend_height = unit(5, "cm"),
  heatmap_legend_param = list(
    labels_gp = gpar(fontsize = annotationTextSize),
    title_gp = gpar(
      fontsize = annotationTextSize,
      fontface = "bold",
      title = "coverage",
      at = c(0, 0.5, 1, 1.5, 2),
      labels = c("0", "0.5x", "1.0x", "1.5x", ">=2x")
    )
  ),
  top_annotation = topAnno_chr08,
  left_annotation = sideAnno,
  column_title = "Chr08",
  column_title_gp = gpar(fontsize = 30, fontface = "bold")
)

allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr11_noColLabs = allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr11
colnames(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr11_noColLabs) = NULL
heatmap_11 = Heatmap(
  allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr11_noColLabs,
  cluster_columns = F,
  # cluster_rows = row_dend,
  cluster_rows = F, 
  col = col_fun,
  name = "coverage",
  row_dend_width = unit(5, "cm"),
  column_dend_height = unit(5, "cm"),
  heatmap_legend_param = list(
    labels_gp = gpar(fontsize = annotationTextSize),
    title_gp = gpar(
      fontsize = annotationTextSize,
      fontface = "bold",
      title = "coverage",
      at = c(0, 0.5, 1, 1.5, 2),
      labels = c("0", "0.5x", "1.0x", "1.5x", ">=2x")
    )
  ),
  top_annotation = topAnno_chr11,
  #left_annotation = sideAnno,
  column_title = "Chr11",
  column_title_gp = gpar(fontsize = 30, fontface = "bold")
)

allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr13_noColLabs = allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr13
colnames(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr13_noColLabs) = NULL
heatmap_13 = Heatmap(
  allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr13_noColLabs,
  cluster_columns = F,
  # cluster_rows = row_dend,
  cluster_rows = F, 
  col = col_fun,
  name = "coverage",
  row_dend_width = unit(5, "cm"),
  column_dend_height = unit(5, "cm"),
  heatmap_legend_param = list(
    labels_gp = gpar(fontsize = annotationTextSize),
    title_gp = gpar(
      fontsize = annotationTextSize,
      fontface = "bold",
      title = "coverage",
      at = c(0, 0.5, 1, 1.5, 2),
      labels = c("0", "0.5x", "1.0x", "1.5x", ">=2x")
    )
  ),
  top_annotation = topAnno_chr13,
  #left_annotation = sideAnno,
  column_title = "Chr13",
  column_title_gp = gpar(fontsize = 30, fontface = "bold")
)
Code
heatmaps_by_chrom = heatmap_08 + heatmap_11 + heatmap_13
Code
cairo_pdf("test_allHeatmap_initial_windows_byChrom.pdf", width = 20, height = 50)
draw(heatmaps_by_chrom, background = "transparent", merge_legend = TRUE, heatmap_legend_side = "bottom", annotation_legend_side = "bottom", ht_gap = unit(2, "cm"))
dev.off()
quartz_off_screen 
                2 

hrp2 deleted

Code
pf7meta = readr::read_tsv("/Users/nick/Dropbox (Personal)/ownCloud/documents/plasmodium/falciparum/allSRAData/update_2023_01/Pf7_samples.txt")


allMeta = readr::read_tsv("../meta/metadata/meta.tab.txt") %>% 
  left_join(pf7meta %>% 
              select(Sample, `Sample type`) %>% 
              rename(sample = Sample, 
                     seqType = `Sample type`))

allMeta$possiblyHRP2Deleted = allMeta$sample %in% samples_chr08_noCovNoSucEnds
allMeta$possiblyHRP3Deleted = allMeta$sample %in% samples_chr13_noCovNoSucEnds
allMeta$possiblyChr11Deleted = allMeta$sample %in% samples_chr11_noCovNoSucEnds

allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_mod = allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round

selectedSamples = match(samples_chr08_noCovNoSucEnds, 
                                                                                                                                            rownames(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_mod))

allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_mod = allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_mod[selectedSamples[!is.na(selectedSamples)], ]

metaSelected = allMeta[match(rownames(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_mod), allMeta$sample), ]

allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr08 = allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_mod[,grepl("_08_", colnames(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_mod)) ]

allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr11 = allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_mod[,grepl("_11_", colnames(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_mod)) ]

allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr13 = allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_mod[,grepl("_13_", colnames(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_mod)) ]


rowAnnoDf  = metaSelected[,c("country", "region", "secondaryRegion",
                             "possiblyHRP2Deleted", "possiblyHRP3Deleted", 
                             "possiblyChr11Deleted", "seqType")] %>% rename(continent = secondaryRegion) %>% as.data.frame()
rowAnnoColors = createColorListFromDf(rowAnnoDf) 

rowAnnoColors[["continent"]] = c("AFRICA"  =   tableau_color_pal()(8)[1], 
                                "ASIA" =      tableau_color_pal()(8)[7], 
                                "OCEANIA"  =  tableau_color_pal()(8)[2], 
                                "S_AMERICA" = tableau_color_pal()(8)[3])

sideAnno = rowAnnotation(
  df = rowAnnoDf,
  col = rowAnnoColors,
  gp = gpar(col = "grey10"),
    simple_anno_size = unit(1.5, "cm"),
  annotation_name_gp = gpar(fontsize = annotationTextSize),
  annotation_legend_param = list(
    labels_gp = gpar(fontsize = annotationTextSize),
    title_gp = gpar(fontsize = annotationTextSize, fontface = "bold")
  ), 
  gap = unit(0.1, "cm")
)

regions_cols = createColorListFromDf(regions %>% 
                                        select(c("inGene", "geneType", "sharedRegion", "afterSharedRegion", "chrom", "description"))) 

regions_cols[["inGene"]] = c("TRUE" = tableau_color_pal()(8)[5], "FALSE" = paste0(tableau_color_pal()(8)[3],"FF"))
regions_cols[["sharedRegion"]] = c("other" = paste0(tableau_color_pal()(8)[7], "FF"), "shared" = tableau_color_pal()(8)[8])
regions_cols[["afterSharedRegion"]] = c("TRUE" = tableau_color_pal()(8)[6], "FALSE" = paste0(tableau_color_pal()(8)[5],"FF"))


regions_chr08 = regions[match(colnames(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr08), regions$genomicID),]
regions_chr11 = regions[match(colnames(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr11), regions$genomicID),]
regions_chr13 = regions[match(colnames(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr13), regions$genomicID),]


regions_chr08_df = regions_chr08 %>% 
  select(inGene, geneType, sharedRegion, afterSharedRegion, chrom, description) %>% 
  as.data.frame()

topAnno_chr08 = HeatmapAnnotation(
  df = regions_chr08_df,
  col = regions_cols,
  annotation_name_gp = gpar(fontsize = annotationTextSize),
  annotation_legend_param = list(
    labels_gp = gpar(fontsize = annotationTextSize),
    title_gp = gpar(fontsize = annotationTextSize, fontface = "bold")
  ),
  annotation_name_side = "left",
  show_annotation_name = T,
  na_col = "#FFFFFF00",
  gap = unit(0.1, "cm")
)

regions_chr11_df = regions_chr11 %>% 
  select(inGene, geneType, sharedRegion, afterSharedRegion, chrom, description) %>% 
  as.data.frame()
topAnno_chr11 = HeatmapAnnotation(
  df = regions_chr11_df,
  col = regions_cols,
  annotation_name_gp = gpar(fontsize = annotationTextSize),
  annotation_legend_param = list(
    labels_gp = gpar(fontsize = annotationTextSize),
    title_gp = gpar(fontsize = annotationTextSize, fontface = "bold")
  ),
  annotation_name_side = "left",
  show_annotation_name = F,
  na_col = "#FFFFFF00",
  gap = unit(0.1, "cm")
)

regions_chr13_df = regions_chr13 %>% 
  select(inGene, geneType, sharedRegion, afterSharedRegion, chrom, description) %>% 
  as.data.frame()
topAnno_chr13 = HeatmapAnnotation(
  df = regions_chr13_df,
  col = regions_cols,
  annotation_name_gp = gpar(fontsize = annotationTextSize),
  annotation_legend_param = list(
    labels_gp = gpar(fontsize = annotationTextSize),
    title_gp = gpar(fontsize = annotationTextSize, fontface = "bold")
  ),
  annotation_name_side = "left",
  show_annotation_name = F,
  na_col = "#FFFFFF00",
  gap = unit(0.1, "cm")
)


library(dendsort)
row_dend = dendsort(hclust(dist(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_mod)))

allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr08_noColLabs = allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr08
colnames(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr08_noColLabs) = NULL
heatmap_08 = Heatmap(
  allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr08_noColLabs,
  cluster_columns = F,
  cluster_rows = row_dend,
  col = col_fun,
  name = "coverage",
  row_dend_width = unit(5, "cm"),
  column_dend_height = unit(5, "cm"),
  heatmap_legend_param = list(
    labels_gp = gpar(fontsize = annotationTextSize),
    title_gp = gpar(
      fontsize = annotationTextSize,
      fontface = "bold",
      title = "coverage",
      at = c(0, 0.5, 1, 1.5, 2),
      labels = c("0", "0.5x", "1.0x", "1.5x", ">=2x")
    )
  ),
  top_annotation = topAnno_chr08,
  left_annotation = sideAnno,
  column_title = "Chr08",
  column_title_gp = gpar(fontsize = 30, fontface = "bold")
)

allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr11_noColLabs = allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr11
colnames(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr11_noColLabs) = NULL
heatmap_11 = Heatmap(
  allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr11_noColLabs,
  cluster_columns = F,
  cluster_rows = row_dend,
  col = col_fun,
  name = "coverage",
  row_dend_width = unit(5, "cm"),
  column_dend_height = unit(5, "cm"),
  heatmap_legend_param = list(
    labels_gp = gpar(fontsize = annotationTextSize),
    title_gp = gpar(
      fontsize = annotationTextSize,
      fontface = "bold",
      title = "coverage",
      at = c(0, 0.5, 1, 1.5, 2),
      labels = c("0", "0.5x", "1.0x", "1.5x", ">=2x")
    )
  ),
  top_annotation = topAnno_chr11,
  #left_annotation = sideAnno,
  column_title = "Chr11",
  column_title_gp = gpar(fontsize = 30, fontface = "bold")
)

allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr13_noColLabs = allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr13
colnames(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr13_noColLabs) = NULL
heatmap_13 = Heatmap(
  allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr13_noColLabs,
  cluster_columns = F,
  cluster_rows = row_dend,
  col = col_fun,
  name = "coverage",
  row_dend_width = unit(5, "cm"),
  column_dend_height = unit(5, "cm"),
  heatmap_legend_param = list(
    labels_gp = gpar(fontsize = annotationTextSize),
    title_gp = gpar(
      fontsize = annotationTextSize,
      fontface = "bold",
      title = "coverage",
      at = c(0, 0.5, 1, 1.5, 2),
      labels = c("0", "0.5x", "1.0x", "1.5x", ">=2x")
    )
  ),
  top_annotation = topAnno_chr13,
  #left_annotation = sideAnno,
  column_title = "Chr13",
  column_title_gp = gpar(fontsize = 30, fontface = "bold")
)
Code
heatmaps_by_chrom = heatmap_08 + heatmap_11 + heatmap_13
Code
cairo_pdf("test_allHeatmap_initial_windows_byChrom_hrp2_del.pdf", width = 20, height = 50)
draw(heatmaps_by_chrom, background = "transparent", merge_legend = TRUE, heatmap_legend_side = "bottom", annotation_legend_side = "bottom", ht_gap = unit(2, "cm"))
dev.off()
quartz_off_screen 
                2 

hrp3 deleted

Code
pf7meta = readr::read_tsv("/Users/nick/Dropbox (Personal)/ownCloud/documents/plasmodium/falciparum/allSRAData/update_2023_01/Pf7_samples.txt")


allMeta = readr::read_tsv("../meta/metadata/meta.tab.txt") %>% 
  left_join(pf7meta %>% 
              select(Sample, `Sample type`) %>% 
              rename(sample = Sample, 
                     seqType = `Sample type`))

allMeta$possiblyHRP2Deleted = allMeta$sample %in% samples_chr08_noCovNoSucEnds
allMeta$possiblyHRP3Deleted = allMeta$sample %in% samples_chr13_noCovNoSucEnds
allMeta$possiblyChr11Deleted = allMeta$sample %in% samples_chr11_noCovNoSucEnds

allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_mod = allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round

selectedSamples = match(samples_chr13_noCovNoSucEnds, 
                                                                                                                                            rownames(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_mod))

allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_mod = allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_mod[selectedSamples[!is.na(selectedSamples)], ]

metaSelected = allMeta[match(rownames(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_mod), allMeta$sample), ]

allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr08 = allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_mod[,grepl("_08_", colnames(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_mod)) ]

allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr11 = allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_mod[,grepl("_11_", colnames(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_mod)) ]

allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr13 = allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_mod[,grepl("_13_", colnames(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_mod)) ]


rowAnnoDf  = metaSelected[,c("country", "region", "secondaryRegion",
                             "possiblyHRP2Deleted", "possiblyHRP3Deleted", 
                             "possiblyChr11Deleted", "seqType")] %>% rename(continent = secondaryRegion) %>% as.data.frame()
rowAnnoColors = createColorListFromDf(rowAnnoDf) 

rowAnnoColors[["continent"]] = c("AFRICA"  =   tableau_color_pal()(8)[1], 
                                "ASIA" =      tableau_color_pal()(8)[7], 
                                "OCEANIA"  =  tableau_color_pal()(8)[2], 
                                "S_AMERICA" = tableau_color_pal()(8)[3])

sideAnno = rowAnnotation(
  df = rowAnnoDf,
  col = rowAnnoColors,
  gp = gpar(col = "grey10"),
    simple_anno_size = unit(1.5, "cm"),
  annotation_name_gp = gpar(fontsize = annotationTextSize),
  annotation_legend_param = list(
    labels_gp = gpar(fontsize = annotationTextSize),
    title_gp = gpar(fontsize = annotationTextSize, fontface = "bold")
  ), 
  gap = unit(0.1, "cm")
)

regions_cols = createColorListFromDf(regions %>% 
                                        select(c("inGene", "geneType", "sharedRegion", "afterSharedRegion", "chrom", "description"))) 

regions_cols[["inGene"]] = c("TRUE" = tableau_color_pal()(8)[5], "FALSE" = paste0(tableau_color_pal()(8)[3],"FF"))
regions_cols[["sharedRegion"]] = c("other" = paste0(tableau_color_pal()(8)[7], "FF"), "shared" = tableau_color_pal()(8)[8])
regions_cols[["afterSharedRegion"]] = c("TRUE" = tableau_color_pal()(8)[6], "FALSE" = paste0(tableau_color_pal()(8)[5],"FF"))


regions_chr08 = regions[match(colnames(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr08), regions$genomicID),]
regions_chr11 = regions[match(colnames(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr11), regions$genomicID),]
regions_chr13 = regions[match(colnames(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr13), regions$genomicID),]


regions_chr08_df = regions_chr08 %>% 
  select(inGene, geneType, sharedRegion, afterSharedRegion, chrom, description) %>% 
  as.data.frame()

topAnno_chr08 = HeatmapAnnotation(
  df = regions_chr08_df,
  col = regions_cols,
  annotation_name_gp = gpar(fontsize = annotationTextSize),
  annotation_legend_param = list(
    labels_gp = gpar(fontsize = annotationTextSize),
    title_gp = gpar(fontsize = annotationTextSize, fontface = "bold")
  ),
  annotation_name_side = "left",
  show_annotation_name = T,
  na_col = "#FFFFFF00",
  gap = unit(0.1, "cm")
)

regions_chr11_df = regions_chr11 %>% 
  select(inGene, geneType, sharedRegion, afterSharedRegion, chrom, description) %>% 
  as.data.frame()
topAnno_chr11 = HeatmapAnnotation(
  df = regions_chr11_df,
  col = regions_cols,
  annotation_name_gp = gpar(fontsize = annotationTextSize),
  annotation_legend_param = list(
    labels_gp = gpar(fontsize = annotationTextSize),
    title_gp = gpar(fontsize = annotationTextSize, fontface = "bold")
  ),
  annotation_name_side = "left",
  show_annotation_name = F,
  na_col = "#FFFFFF00",
  gap = unit(0.1, "cm")
)

regions_chr13_df = regions_chr13 %>% 
  select(inGene, geneType, sharedRegion, afterSharedRegion, chrom, description) %>% 
  as.data.frame()
topAnno_chr13 = HeatmapAnnotation(
  df = regions_chr13_df,
  col = regions_cols,
  annotation_name_gp = gpar(fontsize = annotationTextSize),
  annotation_legend_param = list(
    labels_gp = gpar(fontsize = annotationTextSize),
    title_gp = gpar(fontsize = annotationTextSize, fontface = "bold")
  ),
  annotation_name_side = "left",
  show_annotation_name = F,
  na_col = "#FFFFFF00",
  gap = unit(0.1, "cm")
)


library(dendsort)
row_dend = dendsort(hclust(dist(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_mod)))

allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr08_noColLabs = allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr08
colnames(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr08_noColLabs) = NULL
heatmap_08 = Heatmap(
  allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr08_noColLabs,
  cluster_columns = F,
  cluster_rows = row_dend,
  col = col_fun,
  name = "coverage",
  row_dend_width = unit(5, "cm"),
  column_dend_height = unit(5, "cm"),
  heatmap_legend_param = list(
    labels_gp = gpar(fontsize = annotationTextSize),
    title_gp = gpar(
      fontsize = annotationTextSize,
      fontface = "bold",
      title = "coverage",
      at = c(0, 0.5, 1, 1.5, 2),
      labels = c("0", "0.5x", "1.0x", "1.5x", ">=2x")
    )
  ),
  top_annotation = topAnno_chr08,
  left_annotation = sideAnno,
  column_title = "Chr08",
  column_title_gp = gpar(fontsize = 30, fontface = "bold")
)

allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr11_noColLabs = allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr11
colnames(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr11_noColLabs) = NULL
heatmap_11 = Heatmap(
  allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr11_noColLabs,
  cluster_columns = F,
  cluster_rows = row_dend,
  col = col_fun,
  name = "coverage",
  row_dend_width = unit(5, "cm"),
  column_dend_height = unit(5, "cm"),
  heatmap_legend_param = list(
    labels_gp = gpar(fontsize = annotationTextSize),
    title_gp = gpar(
      fontsize = annotationTextSize,
      fontface = "bold",
      title = "coverage",
      at = c(0, 0.5, 1, 1.5, 2),
      labels = c("0", "0.5x", "1.0x", "1.5x", ">=2x")
    )
  ),
  top_annotation = topAnno_chr11,
  #left_annotation = sideAnno,
  column_title = "Chr11",
  column_title_gp = gpar(fontsize = 30, fontface = "bold")
)

allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr13_noColLabs = allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr13
colnames(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr13_noColLabs) = NULL
heatmap_13 = Heatmap(
  allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr13_noColLabs,
  cluster_columns = F,
  cluster_rows = row_dend,
  col = col_fun,
  name = "coverage",
  row_dend_width = unit(5, "cm"),
  column_dend_height = unit(5, "cm"),
  heatmap_legend_param = list(
    labels_gp = gpar(fontsize = annotationTextSize),
    title_gp = gpar(
      fontsize = annotationTextSize,
      fontface = "bold",
      title = "coverage",
      at = c(0, 0.5, 1, 1.5, 2),
      labels = c("0", "0.5x", "1.0x", "1.5x", ">=2x")
    )
  ),
  top_annotation = topAnno_chr13,
  #left_annotation = sideAnno,
  column_title = "Chr13",
  column_title_gp = gpar(fontsize = 30, fontface = "bold")
)
Code
heatmaps_by_chrom = heatmap_08 + heatmap_11 + heatmap_13
Code
cairo_pdf("test_allHeatmap_initial_windows_byChrom_hrp3_del.pdf", width = 20, height = 50)
draw(heatmaps_by_chrom, background = "transparent", merge_legend = TRUE, heatmap_legend_side = "bottom", annotation_legend_side = "bottom", ht_gap = unit(2, "cm"))
dev.off()
quartz_off_screen 
                2 

chr11 deleted

Code
pf7meta = readr::read_tsv("/Users/nick/Dropbox (Personal)/ownCloud/documents/plasmodium/falciparum/allSRAData/update_2023_01/Pf7_samples.txt")


allMeta = readr::read_tsv("../meta/metadata/meta.tab.txt") %>% 
  left_join(pf7meta %>% 
              select(Sample, `Sample type`) %>% 
              rename(sample = Sample, 
                     seqType = `Sample type`))

allMeta$possiblyHRP2Deleted = allMeta$sample %in% samples_chr08_noCovNoSucEnds
allMeta$possiblyHRP3Deleted = allMeta$sample %in% samples_chr13_noCovNoSucEnds
allMeta$possiblyChr11Deleted = allMeta$sample %in% samples_chr11_noCovNoSucEnds

allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_mod = allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round

selectedSamples = match(
  samples_chr11_noCovNoSucEnds,
  rownames(
    allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_mod
  )
)

allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_mod = allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_mod[selectedSamples[!is.na(selectedSamples)], ]

metaSelected = allMeta[match(rownames(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_mod), allMeta$sample), ]

allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr08 = allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_mod[,grepl("_08_", colnames(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_mod)) ]

allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr11 = allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_mod[,grepl("_11_", colnames(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_mod)) ]

allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr13 = allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_mod[,grepl("_13_", colnames(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_mod)) ]


rowAnnoDf  = metaSelected[,c("country", "region", "secondaryRegion",
                             "possiblyHRP2Deleted", "possiblyHRP3Deleted", 
                             "possiblyChr11Deleted", "seqType")] %>% rename(continent = secondaryRegion) %>% as.data.frame()
rowAnnoColors = createColorListFromDf(rowAnnoDf) 

rowAnnoColors[["continent"]] = c("AFRICA"  =   tableau_color_pal()(8)[1], 
                                "ASIA" =      tableau_color_pal()(8)[7], 
                                "OCEANIA"  =  tableau_color_pal()(8)[2], 
                                "S_AMERICA" = tableau_color_pal()(8)[3])

sideAnno = rowAnnotation(
  df = rowAnnoDf,
  col = rowAnnoColors,
  gp = gpar(col = "grey10"),
    simple_anno_size = unit(1.5, "cm"),
  annotation_name_gp = gpar(fontsize = annotationTextSize),
  annotation_legend_param = list(
    labels_gp = gpar(fontsize = annotationTextSize),
    title_gp = gpar(fontsize = annotationTextSize, fontface = "bold")
  ), 
  gap = unit(0.1, "cm")
)

regions_cols = createColorListFromDf(regions %>% 
                                        select(c("inGene", "geneType", "sharedRegion", "afterSharedRegion", "chrom", "description"))) 

regions_cols[["inGene"]] = c("TRUE" = tableau_color_pal()(8)[5], "FALSE" = paste0(tableau_color_pal()(8)[3],"FF"))
regions_cols[["sharedRegion"]] = c("other" = paste0(tableau_color_pal()(8)[7], "FF"), "shared" = tableau_color_pal()(8)[8])
regions_cols[["afterSharedRegion"]] = c("TRUE" = tableau_color_pal()(8)[6], "FALSE" = paste0(tableau_color_pal()(8)[5],"FF"))


regions_chr08 = regions[match(colnames(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr08), regions$genomicID),]
regions_chr11 = regions[match(colnames(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr11), regions$genomicID),]
regions_chr13 = regions[match(colnames(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr13), regions$genomicID),]


regions_chr08_df = regions_chr08 %>% 
  select(inGene, geneType, sharedRegion, afterSharedRegion, chrom, description) %>% 
  as.data.frame()

topAnno_chr08 = HeatmapAnnotation(
  df = regions_chr08_df,
  col = regions_cols,
  annotation_name_gp = gpar(fontsize = annotationTextSize),
  annotation_legend_param = list(
    labels_gp = gpar(fontsize = annotationTextSize),
    title_gp = gpar(fontsize = annotationTextSize, fontface = "bold")
  ),
  annotation_name_side = "left",
  show_annotation_name = T,
  na_col = "#FFFFFF00",
  gap = unit(0.1, "cm")
)

regions_chr11_df = regions_chr11 %>% 
  select(inGene, geneType, sharedRegion, afterSharedRegion, chrom, description) %>% 
  as.data.frame()
topAnno_chr11 = HeatmapAnnotation(
  df = regions_chr11_df,
  col = regions_cols,
  annotation_name_gp = gpar(fontsize = annotationTextSize),
  annotation_legend_param = list(
    labels_gp = gpar(fontsize = annotationTextSize),
    title_gp = gpar(fontsize = annotationTextSize, fontface = "bold")
  ),
  annotation_name_side = "left",
  show_annotation_name = F,
  na_col = "#FFFFFF00",
  gap = unit(0.1, "cm")
)

regions_chr13_df = regions_chr13 %>% 
  select(inGene, geneType, sharedRegion, afterSharedRegion, chrom, description) %>% 
  as.data.frame()
topAnno_chr13 = HeatmapAnnotation(
  df = regions_chr13_df,
  col = regions_cols,
  annotation_name_gp = gpar(fontsize = annotationTextSize),
  annotation_legend_param = list(
    labels_gp = gpar(fontsize = annotationTextSize),
    title_gp = gpar(fontsize = annotationTextSize, fontface = "bold")
  ),
  annotation_name_side = "left",
  show_annotation_name = F,
  na_col = "#FFFFFF00",
  gap = unit(0.1, "cm")
)


library(dendsort)
row_dend = dendsort(hclust(dist(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_mod)))

allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr08_noColLabs = allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr08
colnames(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr08_noColLabs) = NULL
heatmap_08 = Heatmap(
  allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr08_noColLabs,
  cluster_columns = F,
  cluster_rows = row_dend,
  col = col_fun,
  name = "coverage",
  row_dend_width = unit(5, "cm"),
  column_dend_height = unit(5, "cm"),
  heatmap_legend_param = list(
    labels_gp = gpar(fontsize = annotationTextSize),
    title_gp = gpar(
      fontsize = annotationTextSize,
      fontface = "bold",
      title = "coverage",
      at = c(0, 0.5, 1, 1.5, 2),
      labels = c("0", "0.5x", "1.0x", "1.5x", ">=2x")
    )
  ),
  top_annotation = topAnno_chr08,
  left_annotation = sideAnno,
  column_title = "Chr08",
  column_title_gp = gpar(fontsize = 30, fontface = "bold")
)

allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr11_noColLabs = allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr11
colnames(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr11_noColLabs) = NULL
heatmap_11 = Heatmap(
  allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr11_noColLabs,
  cluster_columns = F,
  cluster_rows = row_dend,
  col = col_fun,
  name = "coverage",
  row_dend_width = unit(5, "cm"),
  column_dend_height = unit(5, "cm"),
  heatmap_legend_param = list(
    labels_gp = gpar(fontsize = annotationTextSize),
    title_gp = gpar(
      fontsize = annotationTextSize,
      fontface = "bold",
      title = "coverage",
      at = c(0, 0.5, 1, 1.5, 2),
      labels = c("0", "0.5x", "1.0x", "1.5x", ">=2x")
    )
  ),
  top_annotation = topAnno_chr11,
  #left_annotation = sideAnno,
  column_title = "Chr11",
  column_title_gp = gpar(fontsize = 30, fontface = "bold")
)

allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr13_noColLabs = allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr13
colnames(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr13_noColLabs) = NULL
heatmap_13 = Heatmap(
  allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr13_noColLabs,
  cluster_columns = F,
  cluster_rows = row_dend,
  col = col_fun,
  name = "coverage",
  row_dend_width = unit(5, "cm"),
  column_dend_height = unit(5, "cm"),
  heatmap_legend_param = list(
    labels_gp = gpar(fontsize = annotationTextSize),
    title_gp = gpar(
      fontsize = annotationTextSize,
      fontface = "bold",
      title = "coverage",
      at = c(0, 0.5, 1, 1.5, 2),
      labels = c("0", "0.5x", "1.0x", "1.5x", ">=2x")
    )
  ),
  top_annotation = topAnno_chr13,
  #left_annotation = sideAnno,
  column_title = "Chr13",
  column_title_gp = gpar(fontsize = 30, fontface = "bold")
)
Code
heatmaps_by_chrom = heatmap_08 + heatmap_11 + heatmap_13
Code
cairo_pdf("test_allHeatmap_initial_windows_byChrom_chr11_del.pdf", width = 20, height = 20)
draw(heatmaps_by_chrom, background = "transparent", merge_legend = TRUE, heatmap_legend_side = "bottom", annotation_legend_side = "bottom", ht_gap = unit(2, "cm"))
dev.off()
quartz_off_screen 
                2 

meta data counts

Counts for the calls of deletions

Code
pf7meta_swga = pf7meta %>% 
  filter(`Sample type` == "sWGA")
allMeta = readr::read_tsv("../meta/metadata/meta.tab.txt")
allMeta_filt = allMeta %>% 
  filter( sample  %in% samples_chr13_noCovNoSucEnds | 
           sample %in% samples_chr08_noCovNoSucEnds | 
           sample %in% samples_chr11_noCovNoSucEnds ) 

allMeta_filt$possiblyHRP2Deleted = allMeta_filt$sample %in% samples_chr08_noCovNoSucEnds
allMeta_filt$possiblyHRP3Deleted = allMeta_filt$sample %in% samples_chr13_noCovNoSucEnds
allMeta_filt$possiblyChr11Deleted = allMeta_filt$sample %in% samples_chr11_noCovNoSucEnds

create_dt(allMeta_filt)
Code
write_tsv(allMeta_filt,"initial_allMeta_HRP2_HRP3_deletionCalls.tab.txt")


allMeta_filt_sum = allMeta_filt %>% 
  group_by(country, possiblyHRP2Deleted, possiblyHRP3Deleted) %>% 
  count()

write_tsv(allMeta_filt_sum,"initial_allMeta_HRP2_HRP3_metaSum_deletionCalls.tab.txt")

create_dt(allMeta_filt_sum)
Source Code
---
title: "Processing Coverage"
---


```{r setup, echo=FALSE, message=FALSE}
source("../common.R")
```

## Read in meta and genome whole coverage and Finding low coverage in HRP gene areas  

Processing coverage of the initial windows to determine samples with possible deletions of either HRP2, HRP3 or the end of chr11.  

```{r}
allMeta = readr::read_tsv("../meta/metadata/meta.tab.txt")
allMeta_filt  = allMeta %>% 
  filter(is.na(site) | site != "LabContaminated", 
        PreferredSample)

inputFnp = "data/finalHRPII_HRPIII_windows_withTuned/popClustering/reports/allBasicInfo.tab.txt.gz"
outputFnp = "initialCoverageDataObjects.Rdata"
#file.remove(outputFnp)
if(!file.exists(outputFnp) | file.info(inputFnp)$ctime > file.info(outputFnp)$ctime){
  
  cov = readr::read_tsv("../meta/allCov_summaryStats.tab.txt.gz")
  allBasicInfo = readr::read_tsv(inputFnp) %>% 
    filter(sample %fin% allMeta_filt$sample)
  
  allBasicInfo = allBasicInfo %>% 
    left_join(cov)
  
  allBasicInfo_sampleCoverageStats = allBasicInfo %>% 
    group_by(sample) %>% 
    summarise(medianCov = median(perBaseCoverage), 
              meanCov = mean(perBaseCoverage)) 
  
  allBasicInfo_sampleCoverageStats_highCov = allBasicInfo_sampleCoverageStats %>% 
    filter(medianCov > 5)
  
  allBasicInfo_higCov = allBasicInfo %>% 
    filter(sample %in% allBasicInfo_sampleCoverageStats_highCov$sample)
  
  
  allBasicInfo_higCov_covLessThan1Sum = allBasicInfo_higCov %>% 
    mutate(marker = ifelse(perBaseCoverage < 1, 1, 0)) %>% 
    group_by(sample, `#chrom`) %>% 
    summarise(lowCov = sum(marker))
  
  #these below regions have variable cross mapping in other strains  
  chrom13FilterRegions = c("Pf3D7_13_v3-2852656-2852831", "Pf3D7_13_v3-2853081-2853381", "Pf3D7_13_v3-2853978-2854303", "Pf3D7_13_v3-2855203-2855353")
  
  # filter coverage data to specific regions of interest  
  allBasicInfo_higCov_areasOfInterest = allBasicInfo_higCov %>% 
    filter((`#chrom` == "Pf3D7_13_v3" & start > 2840426 & start < 2842301) |
           (`#chrom` == "Pf3D7_11_v3" & start > 1982395) |
           (`#chrom` == "Pf3D7_08_v3" & start >= 1374235 & start < 1375485))
  
  cov_highMedCov = cov %>% 
    filter(medianPerBaseCov_inGenes > 100)
  
  # now process to determine which samples potentially have deleted regions of interest base on coverage 
  allBasicInfo_higCov_areasOfInterest = allBasicInfo_higCov_areasOfInterest %>% 
    mutate(perBaseCoverage = ifelse(readTotal <= ifelse(sample %in% cov_highMedCov$sample, 20, 10) & readTotalUsed == 0 & fullySpanningReads <=1, 0, perBaseCoverage))
  allBasicInfo_higCov_areasOfInterest$processedMarker = 0
  allBasicInfo_higCov_areasOfInterest$processedSuccessMarker = 0
  for(row in 2:(nrow(allBasicInfo_higCov_areasOfInterest) - 1 )){
    #print(row)
    if(allBasicInfo_higCov_areasOfInterest$sample[row] == allBasicInfo_higCov_areasOfInterest$sample[row + 1]
       & allBasicInfo_higCov_areasOfInterest$`#chrom`[row] == allBasicInfo_higCov_areasOfInterest$`#chrom`[row + 1]
       & allBasicInfo_higCov_areasOfInterest$perBaseCoverage[row - 1] == 0
       & allBasicInfo_higCov_areasOfInterest$perBaseCoverage[row] == 0
       & allBasicInfo_higCov_areasOfInterest$perBaseCoverage[row + 1] == 0
       # & allBasicInfo_higCov_areasOfInterest$perBaseCoverage[row - 1] < 0.1
       # & allBasicInfo_higCov_areasOfInterest$perBaseCoverage[row] < 0.1
       # & allBasicInfo_higCov_areasOfInterest$perBaseCoverage[row + 1] <0.1
       ){
      allBasicInfo_higCov_areasOfInterest$processedMarker[row] = 1
    }  
      if(allBasicInfo_higCov_areasOfInterest$sample[row] == allBasicInfo_higCov_areasOfInterest$sample[row + 1]
       & allBasicInfo_higCov_areasOfInterest$`#chrom`[row] == allBasicInfo_higCov_areasOfInterest$`#chrom`[row + 1]
       & allBasicInfo_higCov_areasOfInterest$success[row - 1] == F
       & allBasicInfo_higCov_areasOfInterest$success[row] == F
       & allBasicInfo_higCov_areasOfInterest$success[row + 1] == F
       # & allBasicInfo_higCov_areasOfInterest$perBaseCoverage[row - 1] < 0.1
       # & allBasicInfo_higCov_areasOfInterest$perBaseCoverage[row] < 0.1
       # & allBasicInfo_higCov_areasOfInterest$perBaseCoverage[row + 1] <0.1
       ){
      allBasicInfo_higCov_areasOfInterest$processedSuccessMarker[row] = 1
       }  
  }
  save(cov,
       allBasicInfo,
       allBasicInfo_higCov_areasOfInterest,
       file = outputFnp)
} else {
  load(outputFnp)
}


allBasicInfo_higCov_areasOfInterest_covLessThan1Sum = allBasicInfo_higCov_areasOfInterest %>%
  #filter(!is.na(extraField0)) %>%
  # mutate(marker = ifelse(perBaseCoverage < 0.1, 1, 0)) %>%
  mutate(marker = ifelse(perBaseCoverage == 0, 1, 0)) %>%
  mutate(successMarker = ifelse(!success, 1, 0)) %>%
  group_by(sample, `#chrom`) %>%
  summarise(
    lowCov = sum(marker),
    lowCovStrict = sum(processedMarker),
    lowSuccess = sum(successMarker),
    lowSuccessStrict = sum(processedSuccessMarker),
    total = n()
    
  ) %>%
  mutate(
    lowCovPerc = lowCov / total,
    lowCovStrictPerc = lowCovStrict / (total - 2),
    lowSuccessPerc = lowSuccess / total,
    lowSuccessStrictPerc = lowSuccessStrict / (total - 2)
  )
  
# there are several samples which due to high variability in coverage, all of which are sWGA samples or in other artifacts introduced by WGS processes that can't be easily filtered out programatically so
# they are removed based on analysis by hand 

handRemovedSamples = c(
  # chr08 hand investigations 
  "P178-D30", "PD1192-C", "IGS-BRA-018sA", "PA0331-CW", "T367", "RCN09021","RCN13559", "RCN09011", "RCN11871", "RCN02972", "SPT35541", "T508",
                       
  # chr08 hand investigations on finer windows 
  "SPT24525", "SPT24643", "SPT24651", "SPT24541", "SPT24646", "SPT24530", "SPT24607", "SPT24650",
  
  # chr13 hand investigations 
   "SPT22213", "RCN08027", "RCN12032", "SPT35322", "SPT35228", "SPT35354","RCN02907", "RCN02953", "RCN08023", 
  "PF1157-Cx6", "IGS-BRA-016sA", "IGS-BRA-018sA", 
  "NHP4374", "PA0809-CW", "SPT21769", "SPT35543", "T498", "T395", 
  "RCN02816", "RCN02821", "RCN02893", "RCN03336", "RCN03536", "RCN02547", "SPT21675", "PA0331-CW", 
  "RCN14050","SPT16994","SPT38383","SPT40863","SPT38318","SPT15609","SPT38344","SPT34334","SPT38314","SPT34321","SPT17972","SPT19692","SPT12080",
  
  # chr13 hand investigations on finer windows 
  "RCN13051", "PR0262-C", "SPT00902", "SPT00903", "SPT00898", "SPT26258", "SPT26293", "SPT26254",
  "PW0099-C",
  
  "SPT24499", "RCN13387", "SPT21927", "RCN08119",
  
  "RCN13415", "RCN13414", "RCN13413",
  
  "RCN13391", "RCN13388", "RCN11770", "RCN11771", "RCN13416", "SPT21927", "RCN09061", "RCN11766", "RCN13490", "SPT25144", "RCN09058", "RCN09027", "RCN09064",
  
  
  "RCN12064", "RCN11497", "RCN02091", "SPT18468", "RCN13544", "RCN11601", "SPT24457", "SPT43231", "RCN02894", "RCN02146", "RCN02365", "RCN02171",
  "SPT15757", "SPT35260",
  
  # chr11 hand investigations 
  "SPT43093", "RCN13306","RCN13447","RCN11811","SPT43196","RCN11158","RCN11805","RCN13308","RCN13309","RCN13269","RCN13285","RCN13267","SPT43280","RCN02046","RCN13296",
  "RCN13323","RCN13310","RCN09041","RCN09043","RCN09042","RCN09071","RCN11693","RCN11696","RCN11687","RCN11644","RCN09157","RCN10396","RCN11167","RCN11223",
  "RCN10306","RCN00127","RCN00266","RCN09029","RCN00129","RCN02982","RCN03040","PM0744-CW","RCN08298","RCN11172","RCN11234","RCN10247","RCN10297","PA0295-CW",
  "PR0006-CW","NHP4224","NHP4850","RCN11636","RCN13441","RCN13324","RCN09074","RCN13437","RCN09073","RCN11494","RCN11611","RCN13515","RCN13523","RCN11787",
  "RCN13532","RCN02848","SPT35212","RCN11616","RCN13336","RCN13421","SPT42050","RCN13420","RCN13481","SPT36128","SPT34434","SPT36696","SPT38311","RCN13417",
  "SPT34254","RCN02341","RCN13442","RCN13911","SPT19293","SPT34315","SPT34330","SPT34327","SPT34335"
  )

# further removing SWGA samples and control mixtures 
allBasicInfo_higCov_areasOfInterest_covLessThan1Sum_highLowCov = allBasicInfo_higCov_areasOfInterest_covLessThan1Sum %>% 
  filter(!grepl("^PG", sample)) %>% 
  filter(!grepl("^[0-9]S[0-9]-[0-9]C[0-9]+", sample)) %>% 
  filter(!grepl("^SenT", sample)) %>%
  filter(!grepl("^80[0-9]+", sample)) %>%
  filter(!grepl("^OHP[0-9]+", sample)) %>%
  filter(!grepl("^ERR", sample)) %>%
  filter(!(sample %in% handRemovedSamples)) %>%
  filter(lowCovPerc >= .666 & lowCovStrictPerc >= 0.33) 
# %>% 
#   filter((lowCovPerc >= .666 & lowCovStrictPerc >= 0.50) | lowSuccessPerc > 0.95)
# 
allBasicInfo_higCov_areasOfInterest_covLessThan1Sum_highLowCov_opposite = allBasicInfo_higCov_areasOfInterest_covLessThan1Sum %>% 
  filter(!grepl("^PG", sample)) %>% 
  filter(!grepl("^[0-9]S[0-9]-[0-9]C[0-9]+", sample)) %>% 
  filter(!grepl("^SenT", sample)) %>%
  filter(!grepl("^80[0-9]+", sample)) %>%
  filter(!grepl("^OHP[0-9]+", sample)) %>%
  filter(!grepl("^ERR", sample)) %>%
  filter(!(sample %in% handRemovedSamples))  %>%
  filter(!(lowCovPerc >= .666 & lowCovStrictPerc >= 0.33) )
# %>% 
#   filter(!((lowCovPerc >= .666 & lowCovStrictPerc >= 0.50) | lowSuccessPerc > 0.95) )
# 
cat(unique(c(allBasicInfo_higCov_areasOfInterest_covLessThan1Sum_highLowCov_opposite$sample, 
             allBasicInfo_higCov_areasOfInterest_covLessThan1Sum_highLowCov$sample)), file = "samplesCovered.txt", sep = "\n")

allBasicInfo_higCov_areasOfInterest_covLessThan1Sum_highLowCov_chrom08 = allBasicInfo_higCov_areasOfInterest_covLessThan1Sum_highLowCov %>% 
  filter(`#chrom` == "Pf3D7_08_v3")
allBasicInfo_higCov_areasOfInterest_covLessThan1Sum_highLowCov_chrom13 = allBasicInfo_higCov_areasOfInterest_covLessThan1Sum_highLowCov %>% 
  filter(`#chrom` == "Pf3D7_13_v3")
allBasicInfo_higCov_areasOfInterest_covLessThan1Sum_highLowCov_chrom11 = allBasicInfo_higCov_areasOfInterest_covLessThan1Sum_highLowCov %>% 
  filter(`#chrom` == "Pf3D7_11_v3")


allBasicInfo_highCov_butLowInAreasofInterest = allBasicInfo %>%
  filter(sample %fin% allBasicInfo_higCov_areasOfInterest_covLessThan1Sum_highLowCov$sample) %>%
  group_by(sample) %>%
  mutate(perBaseCoverageNorm = perBaseCoverage/medianPerBaseCov_inGenes)%>%
  mutate(perBaseCoverageNormRounded = round(perBaseCoverageNorm/ 0.5)*0.5) %>%
  mutate(id = paste0(`#chrom`, "-", start, "-", end)) 

endRegions = c(
  "Pf3D7_08_v3-1382255-1382505","Pf3D7_08_v3-1382680-1383155","Pf3D7_08_v3-1384030-1384251","Pf3D7_08_v3-1384316-1384663","Pf3D7_08_v3-1384837-1385370",
  #"Pf3D7_08_v3-1385625-1385741",
  "Pf3D7_08_v3-1385951-1386323","Pf3D7_08_v3-1386518-1386680","Pf3D7_08_v3-1386739-1387414","Pf3D7_08_v3-1387782-1387982", 
  
  "Pf3D7_11_v3-1997276-1997425","Pf3D7_11_v3-1997674-1997884","Pf3D7_11_v3-1997944-1998371","Pf3D7_11_v3-1998638-1998759","Pf3D7_11_v3-1998833-1999151",
  "Pf3D7_11_v3-1999245-1999390","Pf3D7_11_v3-1999600-1999713","Pf3D7_11_v3-2000687-2000889","Pf3D7_11_v3-2000941-2001239","Pf3D7_11_v3-2001300-2003328", 
  
  "Pf3D7_13_v3-2842251-2842401","Pf3D7_13_v3-2842251-2842451","Pf3D7_13_v3-2842301-2842601","Pf3D7_13_v3-2842476-2842688","Pf3D7_13_v3-2842501-2842651",
  "Pf3D7_13_v3-2842515-2842804","Pf3D7_13_v3-2843108-2843446","Pf3D7_13_v3-2843641-2843863","Pf3D7_13_v3-2843930-2844101","Pf3D7_13_v3-2844247-2844785"
  
  # "Pf3D7_13_v3-2842501-2842576","Pf3D7_13_v3-2842501-2842601","Pf3D7_13_v3-2842501-2842651","Pf3D7_13_v3-2842515-2842804","Pf3D7_13_v3-2842576-2842651",
  # "Pf3D7_13_v3-2842601-2842688","Pf3D7_13_v3-2843108-2843446","Pf3D7_13_v3-2843641-2843863","Pf3D7_13_v3-2843930-2844101","Pf3D7_13_v3-2844247-2844785"
  # 
  )

##
## after careful inspection of the samples, all potential true deletion appear to include the whole chromosome from the point of interest so in order to clean up the samples will force samples 
## to have no coverage from the point of interest on to get high quality examples of chromosomal deletions of areas of interest 

endRegions_08 = c("Pf3D7_08_v3-1375557-1375750","Pf3D7_08_v3-1378006-1378662","Pf3D7_08_v3-1379154-1379915","Pf3D7_08_v3-1380194-1380328","Pf3D7_08_v3-1382255-1382505",
                  "Pf3D7_08_v3-1382680-1383155","Pf3D7_08_v3-1384030-1384251","Pf3D7_08_v3-1384316-1384663","Pf3D7_08_v3-1384837-1385370",
                  #"Pf3D7_08_v3-1385625-1385741",
                  "Pf3D7_08_v3-1385951-1386323","Pf3D7_08_v3-1386518-1386680","Pf3D7_08_v3-1386739-1387414","Pf3D7_08_v3-1387782-1387982")

endRegions_11 = c("Pf3D7_11_v3-1991347-1992851","Pf3D7_11_v3-1992907-1993669","Pf3D7_11_v3-1993833-1994061","Pf3D7_11_v3-1994139-1994363","Pf3D7_11_v3-1995234-1995394",
                  "Pf3D7_11_v3-1995459-1995614","Pf3D7_11_v3-1995678-1996112","Pf3D7_11_v3-1996349-1996650","Pf3D7_11_v3-1996745-1997019","Pf3D7_11_v3-1997095-1997221",
                  "Pf3D7_11_v3-1997276-1997425","Pf3D7_11_v3-1997674-1997884","Pf3D7_11_v3-1997944-1998371","Pf3D7_11_v3-1998638-1998759","Pf3D7_11_v3-1998833-1999151",
                  "Pf3D7_11_v3-1999245-1999390","Pf3D7_11_v3-1999600-1999713","Pf3D7_11_v3-2000687-2000889","Pf3D7_11_v3-2000941-2001239","Pf3D7_11_v3-2001300-2003328")

endRegions_13 = c("Pf3D7_13_v3-2841776-2841926","Pf3D7_13_v3-2842001-2842301","Pf3D7_13_v3-2842026-2842226","Pf3D7_13_v3-2842038-2842461","Pf3D7_13_v3-2842101-2842251",
                  "Pf3D7_13_v3-2842251-2842401","Pf3D7_13_v3-2842251-2842451","Pf3D7_13_v3-2842301-2842601","Pf3D7_13_v3-2842476-2842688","Pf3D7_13_v3-2842501-2842651",
                  "Pf3D7_13_v3-2842515-2842804","Pf3D7_13_v3-2843108-2843446","Pf3D7_13_v3-2843641-2843863","Pf3D7_13_v3-2843930-2844101","Pf3D7_13_v3-2844247-2844785")



#### filter ends chr08 -begin
allBasicInfo_highCov_butLowInAreasofInterest_endsNoCovSel_08 = allBasicInfo_highCov_butLowInAreasofInterest %>%
  filter(`#chrom` == "Pf3D7_08_v3") %>%
  filter(
    sample %in% allBasicInfo_higCov_areasOfInterest_covLessThan1Sum_highLowCov_chrom08$sample
  ) %>%
  group_by(sample, id) %>%
  mutate(marker = ifelse(perBaseCoverageNormRounded == 0, 1, 0)) %>%
  filter(id %in% endRegions_08) %>%
  group_by(sample, `#chrom`) %>%
  summarise(noCov = sum(marker))

allBasicInfo_highCov_butLowInAreasofInterest_endsNoCovSel_08_filt = allBasicInfo_highCov_butLowInAreasofInterest_endsNoCovSel_08 %>%
  filter(noCov == length(endRegions_08))

allBasicInfo_highCov_butLowInAreasofInterest_endsNoSuccessSel_08 = allBasicInfo_highCov_butLowInAreasofInterest %>%
  filter(`#chrom` == "Pf3D7_08_v3") %>%
  filter(
    sample %in% allBasicInfo_higCov_areasOfInterest_covLessThan1Sum_highLowCov_chrom08$sample
  ) %>%
  group_by(sample, id) %>%
  mutate(marker = ifelse(success, 0, 1)) %>%
  filter(id %in% endRegions_08) %>%
  group_by(sample, `#chrom`) %>%
  summarise(noCov = sum(marker))

allBasicInfo_highCov_butLowInAreasofInterest_endsNoSuccessSel_08_filt = allBasicInfo_highCov_butLowInAreasofInterest_endsNoSuccessSel_08 %>%
  filter(noCov == length(endRegions_08))

samples_chr08_noCovNoSucEnds = intersect(
  sort(
    allBasicInfo_highCov_butLowInAreasofInterest_endsNoCovSel_08_filt$sample
  ),
  sort(
    allBasicInfo_highCov_butLowInAreasofInterest_endsNoSuccessSel_08_filt$sample
  )
)
#### filter ends chr08 -end

#### filter ends chr11 -begin
allBasicInfo_highCov_butLowInAreasofInterest_endsNoCovSel_11 = allBasicInfo_highCov_butLowInAreasofInterest %>%
  filter(`#chrom` == "Pf3D7_11_v3") %>%
  filter(
    sample %in% allBasicInfo_higCov_areasOfInterest_covLessThan1Sum_highLowCov_chrom11$sample
  ) %>%
  group_by(sample, id) %>%
  mutate(marker = ifelse(perBaseCoverageNormRounded == 0, 1, 0)) %>%
  filter(id %in% endRegions_11) %>%
  group_by(sample, `#chrom`) %>%
  summarise(noCov = sum(marker))

allBasicInfo_highCov_butLowInAreasofInterest_endsNoCovSel_11_filt = allBasicInfo_highCov_butLowInAreasofInterest_endsNoCovSel_11 %>%
  filter(noCov == length(endRegions_11))

allBasicInfo_highCov_butLowInAreasofInterest_endsNoSuccessSel_11 = allBasicInfo_highCov_butLowInAreasofInterest %>%
  filter(`#chrom` == "Pf3D7_11_v3") %>%
  filter(
    sample %in% allBasicInfo_higCov_areasOfInterest_covLessThan1Sum_highLowCov_chrom11$sample
  ) %>%
  group_by(sample, id) %>%
  mutate(marker = ifelse(success, 0, 1)) %>%
  filter(id %in% endRegions_11) %>%
  group_by(sample, `#chrom`) %>%
  summarise(noCov = sum(marker))

allBasicInfo_highCov_butLowInAreasofInterest_endsNoSuccessSel_11_filt = allBasicInfo_highCov_butLowInAreasofInterest_endsNoSuccessSel_11 %>%
  filter(noCov == length(endRegions_11))

samples_chr11_noCovNoSucEnds = intersect(
  sort(
    allBasicInfo_highCov_butLowInAreasofInterest_endsNoCovSel_11_filt$sample
  ),
  sort(
    allBasicInfo_highCov_butLowInAreasofInterest_endsNoSuccessSel_11_filt$sample
  )
)
#### filter ends chr11 -end

#### filter ends chr13 -begin
allBasicInfo_highCov_butLowInAreasofInterest_endsNoCovSel_13 = allBasicInfo_highCov_butLowInAreasofInterest %>%
  filter(`#chrom` == "Pf3D7_13_v3") %>%
  filter(
    sample %in% allBasicInfo_higCov_areasOfInterest_covLessThan1Sum_highLowCov_chrom13$sample
  ) %>%
  group_by(sample, id) %>%
  mutate(marker = ifelse(perBaseCoverageNormRounded == 0, 1, 0)) %>%
  filter(id %in% endRegions_13) %>%
  group_by(sample, `#chrom`) %>%
  summarise(noCov = sum(marker))

allBasicInfo_highCov_butLowInAreasofInterest_endsNoCovSel_13_filt = allBasicInfo_highCov_butLowInAreasofInterest_endsNoCovSel_13 %>%
  filter(noCov == length(endRegions_13))

allBasicInfo_highCov_butLowInAreasofInterest_endsNoSuccessSel_13 = allBasicInfo_highCov_butLowInAreasofInterest %>%
  filter(`#chrom` == "Pf3D7_13_v3") %>%
  filter(
    sample %in% allBasicInfo_higCov_areasOfInterest_covLessThan1Sum_highLowCov_chrom13$sample
  ) %>%
  group_by(sample, id) %>%
  mutate(marker = ifelse(success, 0, 1)) %>%
  filter(id %in% endRegions_13) %>%
  group_by(sample, `#chrom`) %>%
  summarise(noCov = sum(marker))

allBasicInfo_highCov_butLowInAreasofInterest_endsNoSuccessSel_13_filt = allBasicInfo_highCov_butLowInAreasofInterest_endsNoSuccessSel_13 %>%
  filter(noCov == length(endRegions_13))

samples_chr13_noCovNoSucEnds = intersect(
  sort(
    allBasicInfo_highCov_butLowInAreasofInterest_endsNoCovSel_13_filt$sample
  ),
  sort(
    allBasicInfo_highCov_butLowInAreasofInterest_endsNoSuccessSel_13_filt$sample
  )
)
#### filter ends chr13 -end

allBasicInfo_highCov_butLowInAreasofInterest_endsNoCovSel = allBasicInfo_highCov_butLowInAreasofInterest %>%
  group_by(sample, id) %>%
  mutate(marker = ifelse(perBaseCoverageNormRounded == 0, 1, 0)) %>%
  filter(id %in% endRegions) %>%
  group_by(sample, `#chrom`) %>%
  summarise(noCov = sum(marker))

allBasicInfo_highCov_butLowInAreasofInterest_endsNoCovSel_filt = allBasicInfo_highCov_butLowInAreasofInterest_endsNoCovSel %>%
  filter(noCov == 10)

allBasicInfo_highCov_butLowInAreasofInterest_endsNoSuccessSel = allBasicInfo_highCov_butLowInAreasofInterest %>%
  group_by(sample, id) %>%
  mutate(marker = ifelse(success, 0, 1)) %>%
  filter(id %in% endRegions) %>%
  group_by(sample, `#chrom`) %>%
  summarise(noCov = sum(marker))

allBasicInfo_highCov_butLowInAreasofInterest_endsNoSuccessSel_filt = allBasicInfo_highCov_butLowInAreasofInterest_endsNoSuccessSel %>%
  filter(noCov == 10)

allBasicInfo_highCov_butLowInAreasofInterest_cov_sp = allBasicInfo_highCov_butLowInAreasofInterest %>%
  # filter(sample %in% allBasicInfo_highCov_butLowInAreasofInterest_endsNoSuccessSel_filt$sample &
  #          sample %in% allBasicInfo_highCov_butLowInAreasofInterest_endsNoCovSel_filt$sample) %>%
  filter(
    sample %in% c(
      samples_chr08_noCovNoSucEnds,
      samples_chr11_noCovNoSucEnds,
      samples_chr13_noCovNoSucEnds
    )
  ) %>%
  select(sample, id, perBaseCoverageNorm) %>%
  spread(id, perBaseCoverageNorm)


# hold = readr::read_tsv(
#   "/Users/nick/Dropbox (Personal)/ownCloud/documents/plasmodium/falciparum/hrps/windowSelectionSurroundingHRPs/Windows_Surrounding_HRP2_3_deletions/windowAnalysis/allMeta_HRP2_HRP3_deletionCalls.tab.txt"
# )

allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat = as.matrix(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp[, 2:ncol(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp)])
rownames(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat) = allBasicInfo_highCov_butLowInAreasofInterest_cov_sp$sample

#cap it at 2 
allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat[allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat>2] =2

chroms = gsub("-.*", "", colnames(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat))

# get the regions 
regions = allBasicInfo %>% 
  filter(.$sample[1] == sample) %>% 
  select(1:6, extraField0) %>% 
  unique()%>% 
  mutate(genomicID = paste0(`#chrom`, "-", start, "-", end)) 
# rename them based on importance 
regions = regions%>%
  mutate(id = paste0(`#chrom`, "-", start, "-", end)) %>% 
  arrange(id) %>% 
  mutate(inGene = !is.na(extraField0)) %>% 
  mutate(geneType = ifelse(grepl("histidine-rich protein II", extraField0), "HRP", "other" ) )%>% 
  mutate(geneType = ifelse(grepl("ribosomal RNA", extraField0), "rRNA", geneType ) )%>% 
  mutate(geneType = ifelse(grepl("332", extraField0), "Pf332", geneType ) ) %>%
  mutate(sharedRegion = ifelse((`#chrom` == "Pf3D7_11_v3" &
                                  start >= 1918028 &
                                  end <= 1933288) |
                                 `#chrom` == "Pf3D7_13_v3" &
                                 start >= 2792021 &
                                 end <= 2807295,
                               "shared",
                               "other"
  )) %>% 
  mutate(afterSharedRegion = (`#chrom` == "Pf3D7_11_v3" &
                                  start >= 1933288) |
                                 (`#chrom` == "Pf3D7_13_v3" &
                                 start >= 2807295)) %>%
  mutate(chrom = `#chrom`)%>% 
  mutate(description = gsub(".*description=", "", extraField0)) %>% 
  mutate(description = ifelse("[extraField0=NA]" == extraField0, NA, description)) %>% 
  mutate(description = gsub("]", "", fixed = T, description))


regions = regions[match(colnames(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat), regions$id),]
allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round = round(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat/ 0.5)*0.5
allMeta = readr::read_tsv("../meta/metadata/meta.tab.txt")
allMeta_filt = allMeta %>% 
  filter( sample  %in% samples_chr13_noCovNoSucEnds | 
           sample %in% samples_chr08_noCovNoSucEnds | 
           sample %in% samples_chr11_noCovNoSucEnds ) 

allMeta_filt$possiblyHRP2Deleted = allMeta_filt$sample %in% samples_chr08_noCovNoSucEnds
allMeta_filt$possiblyHRP3Deleted = allMeta_filt$sample %in% samples_chr13_noCovNoSucEnds
allMeta_filt$possiblyChr11Deleted = allMeta_filt$sample %in% samples_chr11_noCovNoSucEnds


metaSelected = allMeta_filt[match(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp$sample, allMeta_filt$sample), ]
```

## Heatmap of coverage  

```{r}
annotationTextSize = 10

library(circlize)
col_fun = colorRamp2(c(0, 1, 2), c(heat.colors(3)))
allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_noLabs = allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round
rownames(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_noLabs) = NULL
colnames(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_noLabs) = NULL
RowLabs = metaSelected$BiologicalSample
RowLabs[metaSelected$site != "LabIsolate" | is.na(metaSelected$site)] = ""
rownames(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_noLabs) = RowLabs
regionNames = sort(unique(c(metaSelected$country, metaSelected$region, metaSelected$secondaryRegion)))
regionColors = scheme$hex(length(regionNames))
names(regionColors) = regionNames

topAnnoDf  = regions[,c("inGene", "geneType", "sharedRegion", "afterSharedRegion", "chrom")] %>% as.data.frame()
topAnnoColors = createColorListFromDf(topAnnoDf) 

topAnnoColors[["inGene"]] = c("TRUE" = tableau_color_pal()(8)[5], "FALSE" = paste0(tableau_color_pal()(8)[3],"FF"))
topAnnoColors[["sharedRegion"]] = c("other" = paste0(tableau_color_pal()(8)[7], "FF"), "shared" = tableau_color_pal()(8)[8])
topAnnoColors[["afterSharedRegion"]] = c("TRUE" = tableau_color_pal()(8)[6], "FALSE" = paste0(tableau_color_pal()(8)[5],"FF"))

rowAnnoDf  = metaSelected[,c("country", "region", "secondaryRegion",
                             "possiblyHRP2Deleted", "possiblyHRP3Deleted", 
                             "possiblyChr11Deleted")] %>% rename(continent = secondaryRegion) %>% as.data.frame()
rowAnnoColors = createColorListFromDf(rowAnnoDf) 

# rowAnnoColors[["continent"]] = c("AFRICA"  =   tableau_color_pal()(8)[1], 
#                                 "ASIA" =      tableau_color_pal()(8)[7], 
#                                 "OCEANIA"  =  tableau_color_pal()(8)[2], 
#                                 "S_AMERICA" = tableau_color_pal()(8)[3])
rowAnnoColors[["continent"]] = c("AFRICA"  =   "#E69F00", 
                                 "ASIA" =      "#F0E442", 
                                 "OCEANIA"  =  "#F748A5", 
                                 "S_AMERICA" = "#0072B2")
sideAnno = rowAnnotation(df = rowAnnoDf, 
                         col = rowAnnoColors)

topAnno = HeatmapAnnotation(df = topAnnoDf, 
                            col = topAnnoColors)
sideAnno = rowAnnotation(df = rowAnnoDf, 
                         col = rowAnnoColors)
allCoverageHeatMap = Heatmap(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round, cluster_columns = F, 
        col = col_fun,
        name = "coverage",
        top_annotation = topAnno, 
        right_annotation = sideAnno)

```

Below is a heatmap of the coverage of these initial windows, meta data for each sample is added to the right for country of origin etc. The top has meta data on the regions covered including if they fall within genes, and specific genes of interest like HRP2/3, pf332 and rRNA regions are marked out. 

Coverage is normalized to the genomic coverage in the rest of the genome. 1x coverage is normal coverage as compared to the rest of the gnome. For easier interpretation normalized coverage is capped at 2x coverage to easier see where possible duplication has happened. Coverage is rounded to nearest 0.5. Orange == 1x coverage, yellow == 2x coverage, red == no coverage  

```{r}
#| fig-column: screen
#| fig-height: 15
#| fig-width: 20


print(allCoverageHeatMap)
```

```{r}
pdf("test_allHeatmap_initial_windows.pdf", useDingbats = F, width = 20, height = 50)
print(allCoverageHeatMap)
dev.off()
```


## by chrom 

### All 
```{r}
allMeta = readr::read_tsv("../meta/metadata/meta.tab.txt")

allMeta$possiblyHRP2Deleted = allMeta$sample %in% samples_chr08_noCovNoSucEnds
allMeta$possiblyHRP3Deleted = allMeta$sample %in% samples_chr13_noCovNoSucEnds
allMeta$possiblyChr11Deleted = allMeta$sample %in% samples_chr11_noCovNoSucEnds

allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_mod = allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round

library(dendsort)
row_dend = dendsort(hclust(dist(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_mod)))

metaSelectedForSorting = allMeta[match(rownames(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_mod), allMeta$sample), ]
metaSelectedForSorting_sel = metaSelectedForSorting %>% 
  select(sample, possiblyHRP2Deleted, possiblyHRP3Deleted, possiblyChr11Deleted)
metaSelectedForSorting_sel_mat = as.matrix(metaSelectedForSorting_sel[,2:ncol(metaSelectedForSorting_sel)])
rownames(metaSelectedForSorting_sel_mat) = metaSelectedForSorting_sel$sample
metaSelectedForSorting_sel_mat_hclust = hclust(dist(metaSelectedForSorting_sel_mat))
metaSelectedForSorting = metaSelectedForSorting %>% 
  left_join(
    tibble(sample = rownames(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_mod)[row_dend$order], 
           coverageOrder = 1:length(row_dend$order)) ) %>% 
  left_join(
    tibble(sample = rownames(metaSelectedForSorting_sel_mat)[hclust(dist(metaSelectedForSorting_sel_mat))$order], 
           deletionOrder = 1:length(hclust(dist(metaSelectedForSorting_sel_mat))$order)) )

metaSelectedForSorting = metaSelectedForSorting %>% 
  arrange(desc(possiblyHRP2Deleted), desc(possiblyHRP3Deleted), desc(possiblyChr11Deleted), coverageOrder)

metaSelectedForSorting = metaSelectedForSorting %>% 
  group_by(sample) %>% 
  mutate(forSortingOld = paste0(possiblyHRP2Deleted, "-", possiblyHRP3Deleted, "-", possiblyChr11Deleted, "-", stringr::str_pad(coverageOrder, side = "left", pad = "0", width = nchar(max(metaSelectedForSorting$coverageOrder))))) %>% 
  mutate(forSorting = paste0(possiblyHRP2Deleted, "-", possiblyHRP3Deleted, "-", possiblyChr11Deleted)) %>% 
  ungroup() %>% 
  mutate(forSorting = factor(forSorting, levels = c("TRUE-FALSE-FALSE","TRUE-TRUE-FALSE", "FALSE-TRUE-FALSE", "FALSE-TRUE-TRUE", "FALSE-FALSE-TRUE"))) %>% 
  arrange(forSorting, coverageOrder)


allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_mod = allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_mod[match(metaSelectedForSorting$sample,rownames(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_mod)),]
metaSelected = allMeta[match(rownames(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_mod), allMeta$sample), ]

allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr08 = allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_mod[,grepl("_08_", colnames(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_mod)) ]

allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr11 = allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_mod[,grepl("_11_", colnames(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_mod)) ]

allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr13 = allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_mod[,grepl("_13_", colnames(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_mod)) ]


rowAnnoDf  = metaSelected[,c("country", "region", "secondaryRegion",
                             "possiblyHRP2Deleted", "possiblyHRP3Deleted", 
                             "possiblyChr11Deleted")] %>% rename(continent = secondaryRegion) %>% as.data.frame()
rowAnnoColors = createColorListFromDf(rowAnnoDf) 

rowAnnoColors[["continent"]] = c("AFRICA"  =   tableau_color_pal()(8)[1], 
                                "ASIA" =      tableau_color_pal()(8)[7], 
                                "OCEANIA"  =  tableau_color_pal()(8)[2], 
                                "S_AMERICA" = tableau_color_pal()(8)[3])

sideAnno = rowAnnotation(
  df = rowAnnoDf,
  col = rowAnnoColors,
  gp = gpar(col = "grey10"),
    simple_anno_size = unit(1.5, "cm"),
  annotation_name_gp = gpar(fontsize = annotationTextSize),
  annotation_legend_param = list(
    labels_gp = gpar(fontsize = annotationTextSize),
    title_gp = gpar(fontsize = annotationTextSize, fontface = "bold")
  ), 
  gap = unit(0.1, "cm")
)

regions_cols = createColorListFromDf(regions %>% 
                                        select(c("inGene", "geneType", "sharedRegion", "afterSharedRegion", "chrom", "description"))) 

regions_cols[["inGene"]] = c("TRUE" = tableau_color_pal()(8)[5], "FALSE" = paste0(tableau_color_pal()(8)[3],"FF"))
regions_cols[["sharedRegion"]] = c("other" = paste0(tableau_color_pal()(8)[7], "FF"), "shared" = tableau_color_pal()(8)[8])
regions_cols[["afterSharedRegion"]] = c("TRUE" = tableau_color_pal()(8)[6], "FALSE" = paste0(tableau_color_pal()(8)[5],"FF"))


regions_chr08 = regions[match(colnames(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr08), regions$genomicID),]
regions_chr11 = regions[match(colnames(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr11), regions$genomicID),]
regions_chr13 = regions[match(colnames(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr13), regions$genomicID),]


regions_chr08_df = regions_chr08 %>% 
  select(inGene, geneType, sharedRegion, afterSharedRegion, chrom, description) %>% 
  as.data.frame()

topAnno_chr08 = HeatmapAnnotation(
  df = regions_chr08_df,
  col = regions_cols,
  annotation_name_gp = gpar(fontsize = annotationTextSize),
  annotation_legend_param = list(
    labels_gp = gpar(fontsize = annotationTextSize),
    title_gp = gpar(fontsize = annotationTextSize, fontface = "bold")
  ),
  annotation_name_side = "left",
  show_annotation_name = T,
  na_col = "#FFFFFF00",
  gap = unit(0.1, "cm")
)

regions_chr11_df = regions_chr11 %>% 
  select(inGene, geneType, sharedRegion, afterSharedRegion, chrom, description) %>% 
  as.data.frame()
topAnno_chr11 = HeatmapAnnotation(
  df = regions_chr11_df,
  col = regions_cols,
  annotation_name_gp = gpar(fontsize = annotationTextSize),
  annotation_legend_param = list(
    labels_gp = gpar(fontsize = annotationTextSize),
    title_gp = gpar(fontsize = annotationTextSize, fontface = "bold")
  ),
  annotation_name_side = "left",
  show_annotation_name = F,
  na_col = "#FFFFFF00",
  gap = unit(0.1, "cm")
)

regions_chr13_df = regions_chr13 %>% 
  select(inGene, geneType, sharedRegion, afterSharedRegion, chrom, description) %>% 
  as.data.frame()
topAnno_chr13 = HeatmapAnnotation(
  df = regions_chr13_df,
  col = regions_cols,
  annotation_name_gp = gpar(fontsize = annotationTextSize),
  annotation_legend_param = list(
    labels_gp = gpar(fontsize = annotationTextSize),
    title_gp = gpar(fontsize = annotationTextSize, fontface = "bold")
  ),
  annotation_name_side = "left",
  show_annotation_name = F,
  na_col = "#FFFFFF00",
  gap = unit(0.1, "cm")
)








allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr08_noColLabs = allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr08
colnames(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr08_noColLabs) = NULL
heatmap_08 = Heatmap(
  allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr08_noColLabs,
  cluster_columns = F,
  # cluster_rows = row_dend,
  cluster_rows = F, 
  col = col_fun,
  name = "coverage",
  row_dend_width = unit(5, "cm"),
  column_dend_height = unit(5, "cm"),
  heatmap_legend_param = list(
    labels_gp = gpar(fontsize = annotationTextSize),
    title_gp = gpar(
      fontsize = annotationTextSize,
      fontface = "bold",
      title = "coverage",
      at = c(0, 0.5, 1, 1.5, 2),
      labels = c("0", "0.5x", "1.0x", "1.5x", ">=2x")
    )
  ),
  top_annotation = topAnno_chr08,
  left_annotation = sideAnno,
  column_title = "Chr08",
  column_title_gp = gpar(fontsize = 30, fontface = "bold")
)

allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr11_noColLabs = allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr11
colnames(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr11_noColLabs) = NULL
heatmap_11 = Heatmap(
  allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr11_noColLabs,
  cluster_columns = F,
  # cluster_rows = row_dend,
  cluster_rows = F, 
  col = col_fun,
  name = "coverage",
  row_dend_width = unit(5, "cm"),
  column_dend_height = unit(5, "cm"),
  heatmap_legend_param = list(
    labels_gp = gpar(fontsize = annotationTextSize),
    title_gp = gpar(
      fontsize = annotationTextSize,
      fontface = "bold",
      title = "coverage",
      at = c(0, 0.5, 1, 1.5, 2),
      labels = c("0", "0.5x", "1.0x", "1.5x", ">=2x")
    )
  ),
  top_annotation = topAnno_chr11,
  #left_annotation = sideAnno,
  column_title = "Chr11",
  column_title_gp = gpar(fontsize = 30, fontface = "bold")
)

allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr13_noColLabs = allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr13
colnames(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr13_noColLabs) = NULL
heatmap_13 = Heatmap(
  allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr13_noColLabs,
  cluster_columns = F,
  # cluster_rows = row_dend,
  cluster_rows = F, 
  col = col_fun,
  name = "coverage",
  row_dend_width = unit(5, "cm"),
  column_dend_height = unit(5, "cm"),
  heatmap_legend_param = list(
    labels_gp = gpar(fontsize = annotationTextSize),
    title_gp = gpar(
      fontsize = annotationTextSize,
      fontface = "bold",
      title = "coverage",
      at = c(0, 0.5, 1, 1.5, 2),
      labels = c("0", "0.5x", "1.0x", "1.5x", ">=2x")
    )
  ),
  top_annotation = topAnno_chr13,
  #left_annotation = sideAnno,
  column_title = "Chr13",
  column_title_gp = gpar(fontsize = 30, fontface = "bold")
)

```

```{r}
heatmaps_by_chrom = heatmap_08 + heatmap_11 + heatmap_13
```

```{r}
cairo_pdf("test_allHeatmap_initial_windows_byChrom.pdf", width = 20, height = 50)
draw(heatmaps_by_chrom, background = "transparent", merge_legend = TRUE, heatmap_legend_side = "bottom", annotation_legend_side = "bottom", ht_gap = unit(2, "cm"))
dev.off()
```

### hrp2 deleted 
```{r}
pf7meta = readr::read_tsv("/Users/nick/Dropbox (Personal)/ownCloud/documents/plasmodium/falciparum/allSRAData/update_2023_01/Pf7_samples.txt")


allMeta = readr::read_tsv("../meta/metadata/meta.tab.txt") %>% 
  left_join(pf7meta %>% 
              select(Sample, `Sample type`) %>% 
              rename(sample = Sample, 
                     seqType = `Sample type`))

allMeta$possiblyHRP2Deleted = allMeta$sample %in% samples_chr08_noCovNoSucEnds
allMeta$possiblyHRP3Deleted = allMeta$sample %in% samples_chr13_noCovNoSucEnds
allMeta$possiblyChr11Deleted = allMeta$sample %in% samples_chr11_noCovNoSucEnds

allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_mod = allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round

selectedSamples = match(samples_chr08_noCovNoSucEnds, 
                                                                                                                                            rownames(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_mod))

allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_mod = allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_mod[selectedSamples[!is.na(selectedSamples)], ]

metaSelected = allMeta[match(rownames(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_mod), allMeta$sample), ]

allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr08 = allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_mod[,grepl("_08_", colnames(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_mod)) ]

allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr11 = allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_mod[,grepl("_11_", colnames(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_mod)) ]

allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr13 = allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_mod[,grepl("_13_", colnames(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_mod)) ]


rowAnnoDf  = metaSelected[,c("country", "region", "secondaryRegion",
                             "possiblyHRP2Deleted", "possiblyHRP3Deleted", 
                             "possiblyChr11Deleted", "seqType")] %>% rename(continent = secondaryRegion) %>% as.data.frame()
rowAnnoColors = createColorListFromDf(rowAnnoDf) 

rowAnnoColors[["continent"]] = c("AFRICA"  =   tableau_color_pal()(8)[1], 
                                "ASIA" =      tableau_color_pal()(8)[7], 
                                "OCEANIA"  =  tableau_color_pal()(8)[2], 
                                "S_AMERICA" = tableau_color_pal()(8)[3])

sideAnno = rowAnnotation(
  df = rowAnnoDf,
  col = rowAnnoColors,
  gp = gpar(col = "grey10"),
    simple_anno_size = unit(1.5, "cm"),
  annotation_name_gp = gpar(fontsize = annotationTextSize),
  annotation_legend_param = list(
    labels_gp = gpar(fontsize = annotationTextSize),
    title_gp = gpar(fontsize = annotationTextSize, fontface = "bold")
  ), 
  gap = unit(0.1, "cm")
)

regions_cols = createColorListFromDf(regions %>% 
                                        select(c("inGene", "geneType", "sharedRegion", "afterSharedRegion", "chrom", "description"))) 

regions_cols[["inGene"]] = c("TRUE" = tableau_color_pal()(8)[5], "FALSE" = paste0(tableau_color_pal()(8)[3],"FF"))
regions_cols[["sharedRegion"]] = c("other" = paste0(tableau_color_pal()(8)[7], "FF"), "shared" = tableau_color_pal()(8)[8])
regions_cols[["afterSharedRegion"]] = c("TRUE" = tableau_color_pal()(8)[6], "FALSE" = paste0(tableau_color_pal()(8)[5],"FF"))


regions_chr08 = regions[match(colnames(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr08), regions$genomicID),]
regions_chr11 = regions[match(colnames(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr11), regions$genomicID),]
regions_chr13 = regions[match(colnames(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr13), regions$genomicID),]


regions_chr08_df = regions_chr08 %>% 
  select(inGene, geneType, sharedRegion, afterSharedRegion, chrom, description) %>% 
  as.data.frame()

topAnno_chr08 = HeatmapAnnotation(
  df = regions_chr08_df,
  col = regions_cols,
  annotation_name_gp = gpar(fontsize = annotationTextSize),
  annotation_legend_param = list(
    labels_gp = gpar(fontsize = annotationTextSize),
    title_gp = gpar(fontsize = annotationTextSize, fontface = "bold")
  ),
  annotation_name_side = "left",
  show_annotation_name = T,
  na_col = "#FFFFFF00",
  gap = unit(0.1, "cm")
)

regions_chr11_df = regions_chr11 %>% 
  select(inGene, geneType, sharedRegion, afterSharedRegion, chrom, description) %>% 
  as.data.frame()
topAnno_chr11 = HeatmapAnnotation(
  df = regions_chr11_df,
  col = regions_cols,
  annotation_name_gp = gpar(fontsize = annotationTextSize),
  annotation_legend_param = list(
    labels_gp = gpar(fontsize = annotationTextSize),
    title_gp = gpar(fontsize = annotationTextSize, fontface = "bold")
  ),
  annotation_name_side = "left",
  show_annotation_name = F,
  na_col = "#FFFFFF00",
  gap = unit(0.1, "cm")
)

regions_chr13_df = regions_chr13 %>% 
  select(inGene, geneType, sharedRegion, afterSharedRegion, chrom, description) %>% 
  as.data.frame()
topAnno_chr13 = HeatmapAnnotation(
  df = regions_chr13_df,
  col = regions_cols,
  annotation_name_gp = gpar(fontsize = annotationTextSize),
  annotation_legend_param = list(
    labels_gp = gpar(fontsize = annotationTextSize),
    title_gp = gpar(fontsize = annotationTextSize, fontface = "bold")
  ),
  annotation_name_side = "left",
  show_annotation_name = F,
  na_col = "#FFFFFF00",
  gap = unit(0.1, "cm")
)


library(dendsort)
row_dend = dendsort(hclust(dist(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_mod)))

allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr08_noColLabs = allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr08
colnames(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr08_noColLabs) = NULL
heatmap_08 = Heatmap(
  allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr08_noColLabs,
  cluster_columns = F,
  cluster_rows = row_dend,
  col = col_fun,
  name = "coverage",
  row_dend_width = unit(5, "cm"),
  column_dend_height = unit(5, "cm"),
  heatmap_legend_param = list(
    labels_gp = gpar(fontsize = annotationTextSize),
    title_gp = gpar(
      fontsize = annotationTextSize,
      fontface = "bold",
      title = "coverage",
      at = c(0, 0.5, 1, 1.5, 2),
      labels = c("0", "0.5x", "1.0x", "1.5x", ">=2x")
    )
  ),
  top_annotation = topAnno_chr08,
  left_annotation = sideAnno,
  column_title = "Chr08",
  column_title_gp = gpar(fontsize = 30, fontface = "bold")
)

allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr11_noColLabs = allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr11
colnames(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr11_noColLabs) = NULL
heatmap_11 = Heatmap(
  allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr11_noColLabs,
  cluster_columns = F,
  cluster_rows = row_dend,
  col = col_fun,
  name = "coverage",
  row_dend_width = unit(5, "cm"),
  column_dend_height = unit(5, "cm"),
  heatmap_legend_param = list(
    labels_gp = gpar(fontsize = annotationTextSize),
    title_gp = gpar(
      fontsize = annotationTextSize,
      fontface = "bold",
      title = "coverage",
      at = c(0, 0.5, 1, 1.5, 2),
      labels = c("0", "0.5x", "1.0x", "1.5x", ">=2x")
    )
  ),
  top_annotation = topAnno_chr11,
  #left_annotation = sideAnno,
  column_title = "Chr11",
  column_title_gp = gpar(fontsize = 30, fontface = "bold")
)

allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr13_noColLabs = allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr13
colnames(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr13_noColLabs) = NULL
heatmap_13 = Heatmap(
  allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr13_noColLabs,
  cluster_columns = F,
  cluster_rows = row_dend,
  col = col_fun,
  name = "coverage",
  row_dend_width = unit(5, "cm"),
  column_dend_height = unit(5, "cm"),
  heatmap_legend_param = list(
    labels_gp = gpar(fontsize = annotationTextSize),
    title_gp = gpar(
      fontsize = annotationTextSize,
      fontface = "bold",
      title = "coverage",
      at = c(0, 0.5, 1, 1.5, 2),
      labels = c("0", "0.5x", "1.0x", "1.5x", ">=2x")
    )
  ),
  top_annotation = topAnno_chr13,
  #left_annotation = sideAnno,
  column_title = "Chr13",
  column_title_gp = gpar(fontsize = 30, fontface = "bold")
)

```

```{r}
heatmaps_by_chrom = heatmap_08 + heatmap_11 + heatmap_13
```

```{r}
cairo_pdf("test_allHeatmap_initial_windows_byChrom_hrp2_del.pdf", width = 20, height = 50)
draw(heatmaps_by_chrom, background = "transparent", merge_legend = TRUE, heatmap_legend_side = "bottom", annotation_legend_side = "bottom", ht_gap = unit(2, "cm"))
dev.off()
```

### hrp3 deleted 
```{r}
pf7meta = readr::read_tsv("/Users/nick/Dropbox (Personal)/ownCloud/documents/plasmodium/falciparum/allSRAData/update_2023_01/Pf7_samples.txt")


allMeta = readr::read_tsv("../meta/metadata/meta.tab.txt") %>% 
  left_join(pf7meta %>% 
              select(Sample, `Sample type`) %>% 
              rename(sample = Sample, 
                     seqType = `Sample type`))

allMeta$possiblyHRP2Deleted = allMeta$sample %in% samples_chr08_noCovNoSucEnds
allMeta$possiblyHRP3Deleted = allMeta$sample %in% samples_chr13_noCovNoSucEnds
allMeta$possiblyChr11Deleted = allMeta$sample %in% samples_chr11_noCovNoSucEnds

allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_mod = allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round

selectedSamples = match(samples_chr13_noCovNoSucEnds, 
                                                                                                                                            rownames(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_mod))

allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_mod = allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_mod[selectedSamples[!is.na(selectedSamples)], ]

metaSelected = allMeta[match(rownames(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_mod), allMeta$sample), ]

allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr08 = allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_mod[,grepl("_08_", colnames(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_mod)) ]

allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr11 = allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_mod[,grepl("_11_", colnames(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_mod)) ]

allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr13 = allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_mod[,grepl("_13_", colnames(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_mod)) ]


rowAnnoDf  = metaSelected[,c("country", "region", "secondaryRegion",
                             "possiblyHRP2Deleted", "possiblyHRP3Deleted", 
                             "possiblyChr11Deleted", "seqType")] %>% rename(continent = secondaryRegion) %>% as.data.frame()
rowAnnoColors = createColorListFromDf(rowAnnoDf) 

rowAnnoColors[["continent"]] = c("AFRICA"  =   tableau_color_pal()(8)[1], 
                                "ASIA" =      tableau_color_pal()(8)[7], 
                                "OCEANIA"  =  tableau_color_pal()(8)[2], 
                                "S_AMERICA" = tableau_color_pal()(8)[3])

sideAnno = rowAnnotation(
  df = rowAnnoDf,
  col = rowAnnoColors,
  gp = gpar(col = "grey10"),
    simple_anno_size = unit(1.5, "cm"),
  annotation_name_gp = gpar(fontsize = annotationTextSize),
  annotation_legend_param = list(
    labels_gp = gpar(fontsize = annotationTextSize),
    title_gp = gpar(fontsize = annotationTextSize, fontface = "bold")
  ), 
  gap = unit(0.1, "cm")
)

regions_cols = createColorListFromDf(regions %>% 
                                        select(c("inGene", "geneType", "sharedRegion", "afterSharedRegion", "chrom", "description"))) 

regions_cols[["inGene"]] = c("TRUE" = tableau_color_pal()(8)[5], "FALSE" = paste0(tableau_color_pal()(8)[3],"FF"))
regions_cols[["sharedRegion"]] = c("other" = paste0(tableau_color_pal()(8)[7], "FF"), "shared" = tableau_color_pal()(8)[8])
regions_cols[["afterSharedRegion"]] = c("TRUE" = tableau_color_pal()(8)[6], "FALSE" = paste0(tableau_color_pal()(8)[5],"FF"))


regions_chr08 = regions[match(colnames(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr08), regions$genomicID),]
regions_chr11 = regions[match(colnames(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr11), regions$genomicID),]
regions_chr13 = regions[match(colnames(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr13), regions$genomicID),]


regions_chr08_df = regions_chr08 %>% 
  select(inGene, geneType, sharedRegion, afterSharedRegion, chrom, description) %>% 
  as.data.frame()

topAnno_chr08 = HeatmapAnnotation(
  df = regions_chr08_df,
  col = regions_cols,
  annotation_name_gp = gpar(fontsize = annotationTextSize),
  annotation_legend_param = list(
    labels_gp = gpar(fontsize = annotationTextSize),
    title_gp = gpar(fontsize = annotationTextSize, fontface = "bold")
  ),
  annotation_name_side = "left",
  show_annotation_name = T,
  na_col = "#FFFFFF00",
  gap = unit(0.1, "cm")
)

regions_chr11_df = regions_chr11 %>% 
  select(inGene, geneType, sharedRegion, afterSharedRegion, chrom, description) %>% 
  as.data.frame()
topAnno_chr11 = HeatmapAnnotation(
  df = regions_chr11_df,
  col = regions_cols,
  annotation_name_gp = gpar(fontsize = annotationTextSize),
  annotation_legend_param = list(
    labels_gp = gpar(fontsize = annotationTextSize),
    title_gp = gpar(fontsize = annotationTextSize, fontface = "bold")
  ),
  annotation_name_side = "left",
  show_annotation_name = F,
  na_col = "#FFFFFF00",
  gap = unit(0.1, "cm")
)

regions_chr13_df = regions_chr13 %>% 
  select(inGene, geneType, sharedRegion, afterSharedRegion, chrom, description) %>% 
  as.data.frame()
topAnno_chr13 = HeatmapAnnotation(
  df = regions_chr13_df,
  col = regions_cols,
  annotation_name_gp = gpar(fontsize = annotationTextSize),
  annotation_legend_param = list(
    labels_gp = gpar(fontsize = annotationTextSize),
    title_gp = gpar(fontsize = annotationTextSize, fontface = "bold")
  ),
  annotation_name_side = "left",
  show_annotation_name = F,
  na_col = "#FFFFFF00",
  gap = unit(0.1, "cm")
)


library(dendsort)
row_dend = dendsort(hclust(dist(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_mod)))

allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr08_noColLabs = allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr08
colnames(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr08_noColLabs) = NULL
heatmap_08 = Heatmap(
  allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr08_noColLabs,
  cluster_columns = F,
  cluster_rows = row_dend,
  col = col_fun,
  name = "coverage",
  row_dend_width = unit(5, "cm"),
  column_dend_height = unit(5, "cm"),
  heatmap_legend_param = list(
    labels_gp = gpar(fontsize = annotationTextSize),
    title_gp = gpar(
      fontsize = annotationTextSize,
      fontface = "bold",
      title = "coverage",
      at = c(0, 0.5, 1, 1.5, 2),
      labels = c("0", "0.5x", "1.0x", "1.5x", ">=2x")
    )
  ),
  top_annotation = topAnno_chr08,
  left_annotation = sideAnno,
  column_title = "Chr08",
  column_title_gp = gpar(fontsize = 30, fontface = "bold")
)

allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr11_noColLabs = allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr11
colnames(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr11_noColLabs) = NULL
heatmap_11 = Heatmap(
  allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr11_noColLabs,
  cluster_columns = F,
  cluster_rows = row_dend,
  col = col_fun,
  name = "coverage",
  row_dend_width = unit(5, "cm"),
  column_dend_height = unit(5, "cm"),
  heatmap_legend_param = list(
    labels_gp = gpar(fontsize = annotationTextSize),
    title_gp = gpar(
      fontsize = annotationTextSize,
      fontface = "bold",
      title = "coverage",
      at = c(0, 0.5, 1, 1.5, 2),
      labels = c("0", "0.5x", "1.0x", "1.5x", ">=2x")
    )
  ),
  top_annotation = topAnno_chr11,
  #left_annotation = sideAnno,
  column_title = "Chr11",
  column_title_gp = gpar(fontsize = 30, fontface = "bold")
)

allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr13_noColLabs = allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr13
colnames(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr13_noColLabs) = NULL
heatmap_13 = Heatmap(
  allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr13_noColLabs,
  cluster_columns = F,
  cluster_rows = row_dend,
  col = col_fun,
  name = "coverage",
  row_dend_width = unit(5, "cm"),
  column_dend_height = unit(5, "cm"),
  heatmap_legend_param = list(
    labels_gp = gpar(fontsize = annotationTextSize),
    title_gp = gpar(
      fontsize = annotationTextSize,
      fontface = "bold",
      title = "coverage",
      at = c(0, 0.5, 1, 1.5, 2),
      labels = c("0", "0.5x", "1.0x", "1.5x", ">=2x")
    )
  ),
  top_annotation = topAnno_chr13,
  #left_annotation = sideAnno,
  column_title = "Chr13",
  column_title_gp = gpar(fontsize = 30, fontface = "bold")
)

```

```{r}
heatmaps_by_chrom = heatmap_08 + heatmap_11 + heatmap_13
```

```{r}
cairo_pdf("test_allHeatmap_initial_windows_byChrom_hrp3_del.pdf", width = 20, height = 50)
draw(heatmaps_by_chrom, background = "transparent", merge_legend = TRUE, heatmap_legend_side = "bottom", annotation_legend_side = "bottom", ht_gap = unit(2, "cm"))
dev.off()
```

### chr11 deleted 
```{r}
pf7meta = readr::read_tsv("/Users/nick/Dropbox (Personal)/ownCloud/documents/plasmodium/falciparum/allSRAData/update_2023_01/Pf7_samples.txt")


allMeta = readr::read_tsv("../meta/metadata/meta.tab.txt") %>% 
  left_join(pf7meta %>% 
              select(Sample, `Sample type`) %>% 
              rename(sample = Sample, 
                     seqType = `Sample type`))

allMeta$possiblyHRP2Deleted = allMeta$sample %in% samples_chr08_noCovNoSucEnds
allMeta$possiblyHRP3Deleted = allMeta$sample %in% samples_chr13_noCovNoSucEnds
allMeta$possiblyChr11Deleted = allMeta$sample %in% samples_chr11_noCovNoSucEnds

allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_mod = allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round

selectedSamples = match(
  samples_chr11_noCovNoSucEnds,
  rownames(
    allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_mod
  )
)

allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_mod = allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_mod[selectedSamples[!is.na(selectedSamples)], ]

metaSelected = allMeta[match(rownames(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_mod), allMeta$sample), ]

allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr08 = allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_mod[,grepl("_08_", colnames(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_mod)) ]

allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr11 = allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_mod[,grepl("_11_", colnames(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_mod)) ]

allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr13 = allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_mod[,grepl("_13_", colnames(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_mod)) ]


rowAnnoDf  = metaSelected[,c("country", "region", "secondaryRegion",
                             "possiblyHRP2Deleted", "possiblyHRP3Deleted", 
                             "possiblyChr11Deleted", "seqType")] %>% rename(continent = secondaryRegion) %>% as.data.frame()
rowAnnoColors = createColorListFromDf(rowAnnoDf) 

rowAnnoColors[["continent"]] = c("AFRICA"  =   tableau_color_pal()(8)[1], 
                                "ASIA" =      tableau_color_pal()(8)[7], 
                                "OCEANIA"  =  tableau_color_pal()(8)[2], 
                                "S_AMERICA" = tableau_color_pal()(8)[3])

sideAnno = rowAnnotation(
  df = rowAnnoDf,
  col = rowAnnoColors,
  gp = gpar(col = "grey10"),
    simple_anno_size = unit(1.5, "cm"),
  annotation_name_gp = gpar(fontsize = annotationTextSize),
  annotation_legend_param = list(
    labels_gp = gpar(fontsize = annotationTextSize),
    title_gp = gpar(fontsize = annotationTextSize, fontface = "bold")
  ), 
  gap = unit(0.1, "cm")
)

regions_cols = createColorListFromDf(regions %>% 
                                        select(c("inGene", "geneType", "sharedRegion", "afterSharedRegion", "chrom", "description"))) 

regions_cols[["inGene"]] = c("TRUE" = tableau_color_pal()(8)[5], "FALSE" = paste0(tableau_color_pal()(8)[3],"FF"))
regions_cols[["sharedRegion"]] = c("other" = paste0(tableau_color_pal()(8)[7], "FF"), "shared" = tableau_color_pal()(8)[8])
regions_cols[["afterSharedRegion"]] = c("TRUE" = tableau_color_pal()(8)[6], "FALSE" = paste0(tableau_color_pal()(8)[5],"FF"))


regions_chr08 = regions[match(colnames(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr08), regions$genomicID),]
regions_chr11 = regions[match(colnames(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr11), regions$genomicID),]
regions_chr13 = regions[match(colnames(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr13), regions$genomicID),]


regions_chr08_df = regions_chr08 %>% 
  select(inGene, geneType, sharedRegion, afterSharedRegion, chrom, description) %>% 
  as.data.frame()

topAnno_chr08 = HeatmapAnnotation(
  df = regions_chr08_df,
  col = regions_cols,
  annotation_name_gp = gpar(fontsize = annotationTextSize),
  annotation_legend_param = list(
    labels_gp = gpar(fontsize = annotationTextSize),
    title_gp = gpar(fontsize = annotationTextSize, fontface = "bold")
  ),
  annotation_name_side = "left",
  show_annotation_name = T,
  na_col = "#FFFFFF00",
  gap = unit(0.1, "cm")
)

regions_chr11_df = regions_chr11 %>% 
  select(inGene, geneType, sharedRegion, afterSharedRegion, chrom, description) %>% 
  as.data.frame()
topAnno_chr11 = HeatmapAnnotation(
  df = regions_chr11_df,
  col = regions_cols,
  annotation_name_gp = gpar(fontsize = annotationTextSize),
  annotation_legend_param = list(
    labels_gp = gpar(fontsize = annotationTextSize),
    title_gp = gpar(fontsize = annotationTextSize, fontface = "bold")
  ),
  annotation_name_side = "left",
  show_annotation_name = F,
  na_col = "#FFFFFF00",
  gap = unit(0.1, "cm")
)

regions_chr13_df = regions_chr13 %>% 
  select(inGene, geneType, sharedRegion, afterSharedRegion, chrom, description) %>% 
  as.data.frame()
topAnno_chr13 = HeatmapAnnotation(
  df = regions_chr13_df,
  col = regions_cols,
  annotation_name_gp = gpar(fontsize = annotationTextSize),
  annotation_legend_param = list(
    labels_gp = gpar(fontsize = annotationTextSize),
    title_gp = gpar(fontsize = annotationTextSize, fontface = "bold")
  ),
  annotation_name_side = "left",
  show_annotation_name = F,
  na_col = "#FFFFFF00",
  gap = unit(0.1, "cm")
)


library(dendsort)
row_dend = dendsort(hclust(dist(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_mod)))

allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr08_noColLabs = allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr08
colnames(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr08_noColLabs) = NULL
heatmap_08 = Heatmap(
  allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr08_noColLabs,
  cluster_columns = F,
  cluster_rows = row_dend,
  col = col_fun,
  name = "coverage",
  row_dend_width = unit(5, "cm"),
  column_dend_height = unit(5, "cm"),
  heatmap_legend_param = list(
    labels_gp = gpar(fontsize = annotationTextSize),
    title_gp = gpar(
      fontsize = annotationTextSize,
      fontface = "bold",
      title = "coverage",
      at = c(0, 0.5, 1, 1.5, 2),
      labels = c("0", "0.5x", "1.0x", "1.5x", ">=2x")
    )
  ),
  top_annotation = topAnno_chr08,
  left_annotation = sideAnno,
  column_title = "Chr08",
  column_title_gp = gpar(fontsize = 30, fontface = "bold")
)

allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr11_noColLabs = allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr11
colnames(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr11_noColLabs) = NULL
heatmap_11 = Heatmap(
  allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr11_noColLabs,
  cluster_columns = F,
  cluster_rows = row_dend,
  col = col_fun,
  name = "coverage",
  row_dend_width = unit(5, "cm"),
  column_dend_height = unit(5, "cm"),
  heatmap_legend_param = list(
    labels_gp = gpar(fontsize = annotationTextSize),
    title_gp = gpar(
      fontsize = annotationTextSize,
      fontface = "bold",
      title = "coverage",
      at = c(0, 0.5, 1, 1.5, 2),
      labels = c("0", "0.5x", "1.0x", "1.5x", ">=2x")
    )
  ),
  top_annotation = topAnno_chr11,
  #left_annotation = sideAnno,
  column_title = "Chr11",
  column_title_gp = gpar(fontsize = 30, fontface = "bold")
)

allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr13_noColLabs = allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr13
colnames(allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr13_noColLabs) = NULL
heatmap_13 = Heatmap(
  allBasicInfo_highCov_butLowInAreasofInterest_cov_sp_mat_round_chr13_noColLabs,
  cluster_columns = F,
  cluster_rows = row_dend,
  col = col_fun,
  name = "coverage",
  row_dend_width = unit(5, "cm"),
  column_dend_height = unit(5, "cm"),
  heatmap_legend_param = list(
    labels_gp = gpar(fontsize = annotationTextSize),
    title_gp = gpar(
      fontsize = annotationTextSize,
      fontface = "bold",
      title = "coverage",
      at = c(0, 0.5, 1, 1.5, 2),
      labels = c("0", "0.5x", "1.0x", "1.5x", ">=2x")
    )
  ),
  top_annotation = topAnno_chr13,
  #left_annotation = sideAnno,
  column_title = "Chr13",
  column_title_gp = gpar(fontsize = 30, fontface = "bold")
)

```

```{r}
heatmaps_by_chrom = heatmap_08 + heatmap_11 + heatmap_13
```

```{r}
cairo_pdf("test_allHeatmap_initial_windows_byChrom_chr11_del.pdf", width = 20, height = 20)
draw(heatmaps_by_chrom, background = "transparent", merge_legend = TRUE, heatmap_legend_side = "bottom", annotation_legend_side = "bottom", ht_gap = unit(2, "cm"))
dev.off()
```


# meta data counts

Counts for the calls of deletions  

```{r}
pf7meta_swga = pf7meta %>% 
  filter(`Sample type` == "sWGA")
allMeta = readr::read_tsv("../meta/metadata/meta.tab.txt")
allMeta_filt = allMeta %>% 
  filter( sample  %in% samples_chr13_noCovNoSucEnds | 
           sample %in% samples_chr08_noCovNoSucEnds | 
           sample %in% samples_chr11_noCovNoSucEnds ) 

allMeta_filt$possiblyHRP2Deleted = allMeta_filt$sample %in% samples_chr08_noCovNoSucEnds
allMeta_filt$possiblyHRP3Deleted = allMeta_filt$sample %in% samples_chr13_noCovNoSucEnds
allMeta_filt$possiblyChr11Deleted = allMeta_filt$sample %in% samples_chr11_noCovNoSucEnds

create_dt(allMeta_filt)

write_tsv(allMeta_filt,"initial_allMeta_HRP2_HRP3_deletionCalls.tab.txt")


allMeta_filt_sum = allMeta_filt %>% 
  group_by(country, possiblyHRP2Deleted, possiblyHRP3Deleted) %>% 
  count()

write_tsv(allMeta_filt_sum,"initial_allMeta_HRP2_HRP3_metaSum_deletionCalls.tab.txt")

create_dt(allMeta_filt_sum)
```

```{r, echo = FALSE, message=FALSE, warning=F, eval=FALSE}
#| results: hide

pf7meta = readr::read_tsv("/Users/nick/Dropbox (Personal)/ownCloud/documents/plasmodium/falciparum/allSRAData/update_2023_01/Pf7_samples.txt")%>% 
  filter(!is.na(ENA))

pf7meta_calls = readr::read_tsv("/Users/nick/Dropbox (Personal)/ownCloud/documents/plasmodium/falciparum/pf7/Pf7_inferred_resistance_status_classification_allMeta.tsv") %>% 
  filter(!is.na(ENA))


pf7meta_calls_hrp2_deleted = pf7meta_calls %>% 
  filter(HRP2 == "del")

pf7meta_calls_hrp3_deleted = pf7meta_calls %>% 
  filter(HRP3 == "del")

pf7meta_calls_hrp2_deleted_filt = pf7meta_calls_hrp2_deleted %>% 
  filter(Sample %!in% samples_chr08_noCovNoSucEnds)

pf7meta_calls_hrp3_deleted_filt = pf7meta_calls_hrp3_deleted %>% 
  filter(Sample %!in% samples_chr13_noCovNoSucEnds)

temp = pf7meta %>% 
  filter(Sample %in% pf7meta_calls_hrp2_deleted_filt$Sample | 
           Sample %in% pf7meta_calls_hrp3_deleted_filt$Sample)

currentSamplesToInvestigate = temp$Sample
currentSamplesToInvestigate = (hold %>% filter(sample %!in% allMeta_filt$sample))$sample
allBasicInfo_highCov_butLowInAreasofInterest_mod = allBasicInfo %>%
  filter(
    sample %fin% currentSamplesToInvestigate
  ) %>%
  group_by(sample) %>%
  mutate(perBaseCoverageNorm = perBaseCoverage / medianPerBaseCov_inGenes) %>%
  mutate(perBaseCoverageNormRounded = round(perBaseCoverageNorm / 0.5) *
           0.5) %>%
  mutate(id = paste0(`#chrom`, "-", start, "-", end))


allBasicInfo_highCov_butLowInAreasofInterest_mod_cov_sp = allBasicInfo_highCov_butLowInAreasofInterest_mod %>%
  select(sample, id, perBaseCoverageNorm) %>%
  spread(id, perBaseCoverageNorm)


allBasicInfo_highCov_butLowInAreasofInterest_mod_cov_sp_mat = as.matrix(allBasicInfo_highCov_butLowInAreasofInterest_mod_cov_sp[,2:ncol(allBasicInfo_highCov_butLowInAreasofInterest_mod_cov_sp)])
rownames(allBasicInfo_highCov_butLowInAreasofInterest_mod_cov_sp_mat) = allBasicInfo_highCov_butLowInAreasofInterest_mod_cov_sp$sample

#cap it at 2 
allBasicInfo_highCov_butLowInAreasofInterest_mod_cov_sp_mat[allBasicInfo_highCov_butLowInAreasofInterest_mod_cov_sp_mat>2] =2

chroms = gsub("-.*", "", colnames(allBasicInfo_highCov_butLowInAreasofInterest_mod_cov_sp_mat))

# get the regions 
regions = allBasicInfo %>% 
  filter(.$sample[1] == sample) %>% 
  select(1:6, extraField0) %>% 
  unique()
# rename them based on importance 
regions = regions%>%
  mutate(id = paste0(`#chrom`, "-", start, "-", end)) %>% 
  arrange(id) %>% 
  mutate(inGene = !is.na(extraField0)) %>% 
  mutate(geneType = ifelse(grepl("histidine-rich protein II", extraField0), "HRP", "other" ) )%>% 
  mutate(geneType = ifelse(grepl("ribosomal RNA", extraField0), "rRNA", geneType ) )%>% 
  mutate(geneType = ifelse(grepl("332", extraField0), "Pf332", geneType ) ) %>%
  mutate(sharedRegion = ifelse((`#chrom` == "Pf3D7_11_v3" &
                                  start >= 1918028 &
                                  end <= 1933288) |
                                 `#chrom` == "Pf3D7_13_v3" &
                                 start >= 2792021 &
                                 end <= 2807295,
                               "shared",
                               "other"
  )) %>% 
  mutate(afterSharedRegion = (`#chrom` == "Pf3D7_11_v3" &
                                  start >= 1933288) |
                                 (`#chrom` == "Pf3D7_13_v3" &
                                 start >= 2807295)) %>%
  mutate(chrom = `#chrom`)


regions = regions[match(colnames(allBasicInfo_highCov_butLowInAreasofInterest_mod_cov_sp_mat), regions$id),]
allBasicInfo_highCov_butLowInAreasofInterest_mod_cov_sp_mat_round = round(allBasicInfo_highCov_butLowInAreasofInterest_mod_cov_sp_mat/ 0.5)*0.5
metaSelected = allMeta[match(allBasicInfo_highCov_butLowInAreasofInterest_mod_cov_sp$sample, allMeta$sample), ]

library(circlize)
col_fun = colorRamp2(c(0, 1, 2), c(heat.colors(3)))
allBasicInfo_highCov_butLowInAreasofInterest_mod_cov_sp_mat_round_noLabs = allBasicInfo_highCov_butLowInAreasofInterest_mod_cov_sp_mat_round
rownames(allBasicInfo_highCov_butLowInAreasofInterest_mod_cov_sp_mat_round_noLabs) = NULL
colnames(allBasicInfo_highCov_butLowInAreasofInterest_mod_cov_sp_mat_round_noLabs) = NULL
RowLabs = metaSelected$BiologicalSample
RowLabs[metaSelected$site != "LabIsolate" | is.na(metaSelected$site)] = ""
rownames(allBasicInfo_highCov_butLowInAreasofInterest_mod_cov_sp_mat_round_noLabs) = RowLabs
regionNames = sort(unique(c(metaSelected$country, metaSelected$region, metaSelected$secondaryRegion)))
regionColors = scheme$hex(length(regionNames))
names(regionColors) = regionNames

topAnnoDf  = regions[,c("inGene", "geneType", "sharedRegion", "afterSharedRegion", "chrom")] %>% as.data.frame()
topAnnoColors = createColorListFromDf(topAnnoDf) 

topAnnoColors[["inGene"]] = c("TRUE" = tableau_color_pal()(8)[5], "FALSE" = paste0(tableau_color_pal()(8)[3],"FF"))
topAnnoColors[["sharedRegion"]] = c("other" = paste0(tableau_color_pal()(8)[7], "FF"), "shared" = tableau_color_pal()(8)[8])
topAnnoColors[["afterSharedRegion"]] = c("TRUE" = tableau_color_pal()(8)[6], "FALSE" = paste0(tableau_color_pal()(8)[5],"FF"))

rowAnnoDf  = metaSelected[,c("country", "region", "secondaryRegion")] %>% rename(continent = secondaryRegion) %>% as.data.frame()
rowAnnoColors = createColorListFromDf(rowAnnoDf) 

rowAnnoColors[["continent"]] = c("AFRICA"  =   tableau_color_pal()(8)[1], 
                                "ASIA" =      tableau_color_pal()(8)[7], 
                                "OCEANIA"  =  tableau_color_pal()(8)[2], 
                                "S_AMERICA" = tableau_color_pal()(8)[3])


topAnno = HeatmapAnnotation(df = topAnnoDf, 
                            col = topAnnoColors)
sideAnno = rowAnnotation(df = rowAnnoDf, 
                         col = rowAnnoColors)
allCoverageHeatMap = Heatmap(allBasicInfo_highCov_butLowInAreasofInterest_mod_cov_sp_mat_round, cluster_columns = F, 
        col = col_fun,
        name = "coverage",
        top_annotation = topAnno, 
        right_annotation = sideAnno)


cairo_pdf("investigate_allHeatmap_initial_windows.pdf", width = 20, height = 20)
print(allCoverageHeatMap)
dev.off()

```