---
title: "Processing Samples with Chr11 Deletions TARE1"
---
```{r setup, echo=FALSE, message=FALSE}
source("../../common.R")
```
```{r}
allMetaSamplesByBio = readr:: read_tsv ("../../meta/metadata/metaByBioSample.tab.txt" )
allMetaSamples = readr:: read_tsv ("../../meta/metadata/meta.tab.txt" )
allMetaDeletionCalls = readr:: read_tsv ("../metaSelected.tab.txt" )
allMetaDeletionCalls_possiblyHRP2Deleted = allMetaDeletionCalls %>%
filter (possiblyHRP2Deleted)
allMetaDeletionCalls_possiblyChr11Deleted = allMetaDeletionCalls %>%
filter (possiblyChr11Deleted)
allMetaDeletionCalls_Hrp3pattern2 = readr:: read_tsv ("../allMetaDeletionCalls_Hrp3pattern2.tab.txt" )
realmccoilCoiCalls = readr:: read_tsv ("../wgs_variants/THEREALMcCOIL/categorical_method/real_mccoil_COI_calls.tsv" )
realmccoilCoiCalls_poly = realmccoilCoiCalls %>%
filter (random_median != 1 | topHE_median != 1 ) %>%
left_join (allMetaSamples %>%
select (sample, BiologicalSample))
```
## chr11
```{r}
finalHRPII_HRPIII_windows = readr:: read_tsv ("../../windowAnalysis/windows/finalHRPII_HRPIII_windows_withTuned.bed" , col_names = F) %>%
mutate (genomicID = paste0 (X1, "-" , X2, "-" , X3)) %>%
mutate (chrom = X1, len = X3 - X2) %>%
rename (name = X4)
finalHRPII_HRPIII_windows_investTelemeraseHealing_chr11_full = finalHRPII_HRPIII_windows %>%
filter ("Pf3D7_11_v3" == X1, X2 >= 1950157 , X3 <= 1986814 )
finalHRPII_HRPIII_windows_investTelemeraseHealing_chr11_full_out = finalHRPII_HRPIII_windows_investTelemeraseHealing_chr11_full %>%
group_by (X1) %>% summarise (X2 = min (X2),
X3 = max (X3)) %>%
mutate (name = paste0 (X1, "-" , X2, "-" , X3)) %>%
mutate (len = X3 - X2,
strand = "+" )
write_tsv (finalHRPII_HRPIII_windows_investTelemeraseHealing_chr11_full_out %>%
select (1 : 6 ), "finalHRPII_HRPIII_windows_investTelemeraseHealing_chr11_full_out.bed" , col_names = F)
write_tsv (finalHRPII_HRPIII_windows_investTelemeraseHealing_chr11_full %>%
select (1 : 6 ), "finalHRPII_HRPIII_windows_investTelemeraseHealing_chr11_select.bed" , col_names = F)
```
```{bash}
elucidator createWindowsInRegions --bed finalHRPII_HRPIII_windows_investTelemeraseHealing_chr11_select.bed --step 50 --windowSize 100 --minLen 50 | elucidator bedGetIntersectingGenesInGff --gff /tank/data/genomes/plasmodium/genomes/pf/info/gff/Pf3D7.gff --extraAttributes description --bed STDIN --overWrite --out finalHRPII_HRPIII_windows_investTelemeraseHealing_chr11_select_step50_windowSize100.bed
rsync -raPh finalHRPII_HRPIII_windows_investTelemeraseHealing_chr11_select_step50_windowSize100.bed hathawan@calderon.barrel-of-knowledge.info:/tank/data/plasmodium/falciparum/beds/hrps/redesign_2020_11_22
```
```{bash, eval = F}
nohup elucidator runMultipleCommands --cmdFile runRegionCmds.txt --additionalFields "{DIRNAME}:finalHRPII_HRPIII_windows_investTelemeraseHealing_chr11_select_step50_windowSize100_NoTrim;{SAMPLE}:/tank/data/plasmodium/falciparum/beds/hrps/redesign_2020_11_22/allMetaDeletionCalls_possiblyChr11Deleted_samples.txt;{BEDFNP}:\"/tank/data/plasmodium/falciparum/beds/hrps/redesign_2020_11_22/finalHRPII_HRPIII_windows_investTelemeraseHealing_chr11_select_step50_windowSize100.bed\";{NCPUS}:4 --keepTemporaryFiles --qualCheck 5 --keepImproperMates" --replaceFields --numThreads 11 &
cd finalHRPII_HRPIII_windows_investTelemeraseHealing_chr11_select_step50_windowSize100_NoTrim
PathWeaver runProcessClustersOnRecon --overWriteDir --dout popClustering --pat _finalHRPII_HRPIII_windows_investTelemeraseHealing_chr11_select_step50_windowSize100_NoTrim --numThreads 5 --groupingsFile /tank/data/plasmodium/falciparum/pfdata/metadata/meta.tab.txt --samples /tank/data/plasmodium/falciparum/beds/hrps/redesign_2020_11_22/allMetaDeletionCalls_possiblyChr11Deleted_samples.txt
elucidator countPWExtractedReadsWithPattern --seqPat "TT[CT]AGGGTT[CT]AGGG","CCCT[AG]AACCCT[AG]AA" --pat _finalHRPII_HRPIII_windows_investTelemeraseHealing_chr11_select_step50_windowSize100_NoTrim --bedFnp /tank/data/plasmodium/falciparum/beds/hrps/redesign_2020_11_22/finalHRPII_HRPIII_windows_investTelemeraseHealing_chr11_select_step50_windowSize100.bed --minReadCounts 1 --numThreads 10 --overWrite --out finalHRPII_HRPIII_windows_investTelemeraseHealing_chr11_select_step50_windowSize100_countTares.txt --samples /tank/data/plasmodium/falciparum/beds/hrps/redesign_2020_11_22/allMetaDeletionCalls_possiblyChr11Deleted_samples.txt
```
```{bash}
elucidator createWindowsInRegions --bed finalHRPII_HRPIII_windows_investTelemeraseHealing_chr11_full_out.bed --step 50 --windowSize 100 --minLen 50 | elucidator bedGetIntersectingGenesInGff --gff /tank/data/genomes/plasmodium/genomes/pf/info/gff/Pf3D7.gff --extraAttributes description --bed STDIN --overWrite --out finalHRPII_HRPIII_windows_investTelemeraseHealing_chr11_full_out_step50_windowSize100.bed
rsync -raPh finalHRPII_HRPIII_windows_investTelemeraseHealing_chr11_full_out_step50_windowSize100.bed hathawan@calderon.barrel-of-knowledge.info:/tank/data/plasmodium/falciparum/beds/hrps/redesign_2020_11_22
```
```{bash, eval = F}
nohup elucidator runMultipleCommands --cmdFile runRegionCmds.txt --additionalFields "{DIRNAME}:finalHRPII_HRPIII_windows_investTelemeraseHealing_chr11_full_out_step50_windowSize100_NoTrim;{SAMPLE}:/tank/data/plasmodium/falciparum/beds/hrps/redesign_2020_11_22/allMetaDeletionCalls_possiblyChr11Deleted_samples.txt;{BEDFNP}:\"/tank/data/plasmodium/falciparum/beds/hrps/redesign_2020_11_22/finalHRPII_HRPIII_windows_investTelemeraseHealing_chr11_full_out_step50_windowSize100.bed\";{NCPUS}:4 --keepTemporaryFiles --qualCheck 5 --keepImproperMates" --replaceFields --numThreads 11 &
cd finalHRPII_HRPIII_windows_investTelemeraseHealing_chr11_full_out_step50_windowSize100_NoTrim
PathWeaver runProcessClustersOnRecon --overWriteDir --dout popClustering --pat _finalHRPII_HRPIII_windows_investTelemeraseHealing_chr11_full_out_step50_windowSize100_NoTrim --numThreads 5 --groupingsFile /tank/data/plasmodium/falciparum/pfdata/metadata/meta.tab.txt --samples /tank/data/plasmodium/falciparum/beds/hrps/redesign_2020_11_22/allMetaDeletionCalls_possiblyChr11Deleted_samples.txt
elucidator countPWExtractedReadsWithPattern --seqPat "TT[CT]AGGGTT[CT]AGGG","CCCT[AG]AACCCT[AG]AA" --pat _finalHRPII_HRPIII_windows_investTelemeraseHealing_chr11_full_out_step50_windowSize100_NoTrim --bedFnp /tank/data/plasmodium/falciparum/beds/hrps/redesign_2020_11_22/finalHRPII_HRPIII_windows_investTelemeraseHealing_chr11_full_out_step50_windowSize100.bed --minReadCounts 1 --numThreads 10 --overWrite --out finalHRPII_HRPIII_windows_investTelemeraseHealing_chr11_full_out_step50_windowSize100_countTares.txt --samples /tank/data/plasmodium/falciparum/beds/hrps/redesign_2020_11_22/allMetaDeletionCalls_possiblyChr11Deleted_samples.txt
```
```{r}
# finalHRPII_HRPIII_windows_investTelemeraseHealing_chr11_full_out_step50_windowSize100_NoTrim_sampleRegion = readr::read_tsv("finalHRPII_HRPIII_windows_investTelemeraseHealing_chr11_full_out_step50_windowSize100_NoTrim/finalHRPII_HRPIII_windows_investTelemeraseHealing_chr11_full_out_step50_windowSize100_tare1_sampleRegion.txt", col_names = c("sample", "region", "tare1Direction")) %>%
# select(sample, region) %>%
# unique() %>%
# separate(region, into = c("chrom", "start", "end"), sep = "-", convert = T, remove = F)
finalHRPII_HRPIII_windows_investTelemeraseHealing_chr11_full_out_step50_windowSize100_countTares = readr:: read_tsv ("finalHRPII_HRPIII_windows_investTelemeraseHealing_chr11_full_out_step50_windowSize100_NoTrim/finalHRPII_HRPIII_windows_investTelemeraseHealing_chr11_full_out_step50_windowSize100_countTares.txt" ) %>%
group_by (sample, region, seqPat) %>% summarise (count = sum (count)) %>%
select (sample, region) %>%
unique () %>%
separate (region, into = c ("chrom" , "start" , "end" , "strand" ), sep = "-" , convert = T, remove = F)
```
```{r}
cov = readr:: read_tsv ("../../meta/allCov_summaryStats.tab.txt.gz" )
allReports = readr:: read_tsv ("finalHRPII_HRPIII_windows_investTelemeraseHealing_chr11_full_out_step50_windowSize100_NoTrim/popClustering/reports/allBasicInfo.tab.txt.gz" ) %>%
mutate (inGene = ! is.na (extraField0)) %>%
left_join (cov) %>%
mutate (medianCov = median (perBaseCoverage),
meanCov = mean (perBaseCoverage)) %>%
mutate (perBaseCoverageNorm = ifelse (inGene, perBaseCoverage/ medianPerBaseCov_inGenes, perBaseCoverage/ medianPerBaseCov_notInGenes)) %>%
mutate (perBaseCoverageNormRounded = ifelse (perBaseCoverageNorm > 0.10 & perBaseCoverageNorm <= 1 , 1 , perBaseCoverageNorm)) %>%
mutate (perBaseCoverageNormRounded = round (perBaseCoverageNormRounded)) %>%
filter (sample %!in% realmccoilCoiCalls_poly$ sample)
allReports_sp = allReports %>%
select (sample, name, perBaseCoverageNormRounded) %>%
spread (name, perBaseCoverageNormRounded)
allReports_sp_mat = as.matrix (allReports_sp[,2 : ncol (allReports_sp)])
rownames (allReports_sp_mat) = allReports_sp$ sample
allReports_sp_mat[allReports_sp_mat > 2 ] = 2
annotationTextSize = 20
topAnno = allReports %>%
filter (sample == .$ sample[1 ]) %>%
select (name, extraField0) %>%
rename (` Gene Description ` = extraField0) %>%
mutate (` Gene Description ` = gsub (".*description=" , "" , ` Gene Description ` ))%>%
mutate (` Gene Description ` = gsub (" \\ ].*" , "" , ` Gene Description ` )) %>%
mutate (` Gene Description ` = ifelse (grepl ("PHIST" , ` Gene Description ` ), gsub (" \\ ).*" , "" , gsub (".*PHIST" , "PHIST" , ` Gene Description ` )), ` Gene Description ` ))
finalHRPII_HRPIII_windows_investTelemeraseHealing_chr11_full_out_step50_windowSize100_NoTrim_sampleRegion_count = finalHRPII_HRPIII_windows_investTelemeraseHealing_chr11_full_out_step50_windowSize100_countTares %>%
group_by (sample) %>%
slice_max (order_by = start) %>%
group_by (region) %>%
count (name = "TARE1 Present Chr11 Count" )
topAnno = topAnno %>%
left_join (
finalHRPII_HRPIII_windows_investTelemeraseHealing_chr11_full_out_step50_windowSize100_NoTrim_sampleRegion_count %>%
rename (name = region)
)
topAnnoDf = topAnno %>%
select (- name) %>%
as.data.frame ()
topAnnoColors = createColorListFromDf (topAnnoDf)
metaReSelected = allMetaDeletionCalls[match (rownames (allReports_sp_mat), allMetaDeletionCalls$ sample), ]
rowAnno = tibble (metaReSelected[,c ("sample" ,"country" )]) %>%
mutate (` TARE1 On Chr11 ` = sample %in% finalHRPII_HRPIII_windows_investTelemeraseHealing_chr11_full_out_step50_windowSize100_countTares$ sample)
write_tsv (rowAnno, "finalHRPII_HRPIII_windows_investTelemeraseHealing_chr11_full_out_step50_windowSize100_rowAnno.tsv" )
# %>%
# mutate(tare1Present = sample %in% hrp3Pat2_samplesWithTare1$sample,
# TranspositionWith5 = sample %in% samplesWithTransitionToChr5$sample)
rowAnnoDf = rowAnno %>%
select (- sample) %>%
as.data.frame ()
rowAnnoColors = createColorListFromDf (rowAnnoDf)
sideAnno = rowAnnotation (
df = rowAnnoDf,
col = rowAnnoColors,
gp = gpar (col = "grey10" ),
annotation_name_gp = gpar (fontsize = annotationTextSize),
annotation_legend_param = list (
labels_gp = gpar (fontsize = annotationTextSize),
title_gp = gpar (fontsize = annotationTextSize, fontface = "bold" )
),
gap = unit (0.1 , "cm" )
)
topAnno = HeatmapAnnotation (
df = topAnnoDf,
col = topAnnoColors,
annotation_name_gp = gpar (fontsize = annotationTextSize),
annotation_legend_param = list (
labels_gp = gpar (fontsize = annotationTextSize),
title_gp = gpar (fontsize = annotationTextSize, fontface = "bold" )
),
annotation_name_side = "left"
)
library (circlize)
col_fun = colorRamp2 (c (0 , 1 , 2 ), c (heat.colors (3 )))
allReports_sp_mat_nolab = allReports_sp_mat
colnames (allReports_sp_mat_nolab) = NULL
allReports_sp_mat_hm = Heatmap (
allReports_sp_mat_nolab,
cluster_columns = F,
col = col_fun,
name = "coverage" ,
top_annotation = topAnno,
right_annotation = sideAnno,
row_dend_width = unit (5 , "cm" ),
column_dend_height = unit (5 , "cm" ),
heatmap_legend_param = list (
labels_gp = gpar (fontsize = annotationTextSize),
title_gp = gpar (fontsize = annotationTextSize, fontface = "bold" ),
at = c (0 , 0.5 , 1 , 1.5 , 2 ),
labels = c ("0" , "0.5x" , "1.0x" , "1.5x" , ">=2x" )
),
cell_fun = function (j, i, x, y, width, height, fill) {
grid.rect (x = x, y = y, width = width, height = height * .9 ,
gp = gpar (fill = fill, col = NA ))
},
rect_gp = gpar (type = "none" )
)
```
```{r}
#| fig-column: screen-inset-shaded
#| fig-height: 10
#| fig-width: 20
draw (allReports_sp_mat_hm, background = "transparent" , merge_legend = TRUE , heatmap_legend_side = "bottom" , annotation_legend_side = "bottom" )
```
```{r}
pdf ("finalHRPII_HRPIII_windows_investTelemeraseHealing_chr11_full_out_step50_windowSize100_NoTrim.pdf" , width = 22.5 , height = 12.5 , useDingbats = F)
draw (allReports_sp_mat_hm, background = "transparent" , merge_legend = TRUE , heatmap_legend_side = "bottom" , annotation_legend_side = "bottom" , padding = unit (c (20 , 2 , 2 , 2 ), "mm" ))
dev.off ()
```
```{r}
fillterDf = expand_grid (sample = rownames (allReports_sp_mat),
region = colnames (allReports_sp_mat)) %>%
mutate (marker = 0 )
finalHRPII_HRPIII_windows_investTelemeraseHealing_chr11_full_out_step50_windowSize100_NoTrim_sampleRegion_expanded = finalHRPII_HRPIII_windows_investTelemeraseHealing_chr11_full_out_step50_windowSize100_countTares %>%
select (sample, region) %>%
mutate (marker = 1 ) %>%
bind_rows (fillterDf) %>%
group_by (sample, region) %>%
summarise (marker = sum (marker))
finalHRPII_HRPIII_windows_investTelemeraseHealing_chr11_full_out_step50_windowSize100_NoTrim_sampleRegion_expanded_sp = finalHRPII_HRPIII_windows_investTelemeraseHealing_chr11_full_out_step50_windowSize100_NoTrim_sampleRegion_expanded %>%
spread (region, marker)
finalHRPII_HRPIII_windows_investTelemeraseHealing_chr11_full_out_step50_windowSize100_NoTrim_sampleRegion_expanded_sp_mat = as.matrix (finalHRPII_HRPIII_windows_investTelemeraseHealing_chr11_full_out_step50_windowSize100_NoTrim_sampleRegion_expanded_sp[,2 : ncol (finalHRPII_HRPIII_windows_investTelemeraseHealing_chr11_full_out_step50_windowSize100_NoTrim_sampleRegion_expanded_sp)])
rownames (finalHRPII_HRPIII_windows_investTelemeraseHealing_chr11_full_out_step50_windowSize100_NoTrim_sampleRegion_expanded_sp_mat) = finalHRPII_HRPIII_windows_investTelemeraseHealing_chr11_full_out_step50_windowSize100_NoTrim_sampleRegion_expanded_sp$ sample
finalHRPII_HRPIII_windows_investTelemeraseHealing_chr11_full_out_step50_windowSize100_NoTrim_sampleRegion_expanded_sp_mat = finalHRPII_HRPIII_windows_investTelemeraseHealing_chr11_full_out_step50_windowSize100_NoTrim_sampleRegion_expanded_sp_mat[match (
rownames (allReports_sp_mat), rownames (finalHRPII_HRPIII_windows_investTelemeraseHealing_chr11_full_out_step50_windowSize100_NoTrim_sampleRegion_expanded_sp_mat)
), ]
finalHRPII_HRPIII_windows_investTelemeraseHealing_chr11_full_out_step50_windowSize100_NoTrim_sampleRegion_expanded_sp_mat_nolab = finalHRPII_HRPIII_windows_investTelemeraseHealing_chr11_full_out_step50_windowSize100_NoTrim_sampleRegion_expanded_sp_mat
colnames (finalHRPII_HRPIII_windows_investTelemeraseHealing_chr11_full_out_step50_windowSize100_NoTrim_sampleRegion_expanded_sp_mat_nolab) = NULL
#rownames(finalHRPII_HRPIII_windows_investTelemeraseHealing_chr11_full_out_step50_windowSize100_NoTrim_sampleRegion_expanded_sp_mat_nolab) = NULL
allReports_sp_mat_hm_tare1 = Heatmap (
finalHRPII_HRPIII_windows_investTelemeraseHealing_chr11_full_out_step50_windowSize100_NoTrim_sampleRegion_expanded_sp_mat_nolab,
cluster_columns = F,
cluster_rows = F,
col = colorRamp2 (c (0 , 1 ), c ("#FFFFFF00" ,"green" )),
name = "coverage" ,
top_annotation = topAnno,
right_annotation = sideAnno,
row_dend_width = unit (5 , "cm" ),
column_dend_height = unit (5 , "cm" ),
heatmap_legend_param = list (
labels_gp = gpar (fontsize = annotationTextSize),
title_gp = gpar (fontsize = annotationTextSize, fontface = "bold" ),
at = c (0 , 0.5 , 1 , 1.5 , 2 ),
labels = c ("0" , "0.5x" , "1.0x" , "1.5x" , ">=2x" )
),
cell_fun = function (j, i, x, y, width, height, fill) {
grid.rect (x = x, y = y, width = width, height = height * .9 ,
gp = gpar (fill = fill, col = NA ))
},
rect_gp = gpar (type = "none" )
)
allReports_sp_mat_hm_noCluster = Heatmap (
allReports_sp_mat_nolab,
cluster_columns = F,
cluster_rows = F,
col = col_fun,
name = "coverage" ,
top_annotation = topAnno,
right_annotation = sideAnno,
row_dend_width = unit (5 , "cm" ),
column_dend_height = unit (5 , "cm" ),
heatmap_legend_param = list (
labels_gp = gpar (fontsize = annotationTextSize),
title_gp = gpar (fontsize = annotationTextSize, fontface = "bold" ),
at = c (0 , 0.5 , 1 , 1.5 , 2 ),
labels = c ("0" , "0.5x" , "1.0x" , "1.5x" , ">=2x" )
),
cell_fun = function (j, i, x, y, width, height, fill) {
grid.rect (x = x, y = y, width = width, height = height * .9 ,
gp = gpar (fill = fill, col = NA ))
},
rect_gp = gpar (type = "none" )
)
```
```{r}
pdf ("finalHRPII_HRPIII_windows_investTelemeraseHealing_chr11_full_out_step50_windowSize100_NoTrim_noCluster.pdf" , width = 22.5 , height = 12.5 , useDingbats = F)
draw (allReports_sp_mat_hm_noCluster, background = "transparent" , merge_legend = TRUE , heatmap_legend_side = "bottom" , annotation_legend_side = "bottom" , padding = unit (c (20 , 2 , 2 , 2 ), "mm" ))
draw (allReports_sp_mat_hm_tare1, background = "transparent" , merge_legend = TRUE , heatmap_legend_side = "bottom" , annotation_legend_side = "bottom" , padding = unit (c (20 , 2 , 2 , 2 ), "mm" ))
dev.off ()
```
```{bash, eval = F}
export PATH="/home/hathawan/tempHathaway/bin:$PATH"
cat /tank/data/plasmodium/falciparum/beds/hrps/redesign_2020_11_22/finalHRPII_HRPIII_windows_investTelemeraseHealing_chr11_full_out_step50_windowSize100.bed | egrep 1968375 -A50 > finalHRPII_HRPIII_windows_investTelemeraseHealing_chr11_full_out_step50_windowSize100_aroundEndOfPf332.bed
elucidator getMateMapLocationForRegion --bam /tank/data/plasmodium/falciparum/pfdata/bams/PW0069-C.sorted.bam --bed finalHRPII_HRPIII_windows_investTelemeraseHealing_chr11_full_out_step50_windowSize100_aroundEndOfPf332.bed | cut -f16-21 | tail -n +2 | egrep -v "\*" | bedtools sort | bedtools merge | elucidator bed3ToBed6 --bed STDIN
elucidator getMateMapLocationForRegion --bam /tank/data/plasmodium/falciparum/pfdata/bams/QE0439-C.sorted.bam --bed finalHRPII_HRPIII_windows_investTelemeraseHealing_chr11_full_out_step50_windowSize100_aroundEndOfPf332.bed | cut -f16-21 | tail -n +2 | egrep -v "\*" | bedtools sort | bedtools merge | elucidator bed3ToBed6 --bed STDIN
```
```{bash, eval = F}
nohup elucidator runMultipleCommands --cmdFile runRegionCmds.txt --additionalFields "{DIRNAME}:merged_finalHRPII_HRPIII_windows_investTelemeraseHealing_chr11_full_out_step50_windowSize100_aroundEndOfPf332;{SAMPLE}:PW0069-C;{BEDFNP}:\"/tank/data/plasmodium/falciparum/pfdata/finalHRPII_HRPIII_windows_investTelemeraseHealing_chr11_full_out_step50_windowSize100_NoTrim/merged_finalHRPII_HRPIII_windows_investTelemeraseHealing_chr11_full_out_step50_windowSize100_aroundEndOfPf332.bed\";{NCPUS}:4 --keepTemporaryFiles --qualCheck 5 --keepImproperMates" --replaceFields --numThreads 11 &
```