In this tutorial, we will use the cancerSimCraft package
to simulate a real tumor evolution scenario based on the CHISEL paper
(Figure 3d of Zaccaria and Raphael, Nat Biotechnol 2021) and generate
visualizations of simulation blueprint (ground truth), including clonal
event tree, haplotype copy number profiles and a total copy number
profile.
First, load the necessary libraries and data.
# Load libraries
library(dplyr)
library(ggraph)
library(igraph)
library(cancerSimCraft)
# Read event table, sub_seg_list
# sub_seg_list: A list of sub-segments for each chromosome.
sub_seg_list <- readRDS(file = "./tutorials_data/sub_seg_list.rds")
# hg19_chr_arm_table: A table containing chromosome arm information for the hg19 genome.
hg19_chr_arm_table <- readRDS(file = "./tutorials_data/hg19_chr_arm_table.rds")
# filter the hg19_chr_arm_table to include only autosomes (chromosomes 1–22).
chr_arm_table <- hg19_chr_arm_table %>%
filter(chrom %in% paste0("chr", 1:22))
Generate an initial segment list using
the create_initial_seg_list function. This function creates
baseline segments for maternal and paternal haplotypes.
# Create initial segment list
initial_seg_list <- create_initial_seg_list(chr_arm_table = chr_arm_table)
Adjust the sub_seg_list to ensure 1-based
coordinates.
# Adjust sub-segment list for 1-based coordinates
initial_sub_seg_list <- sub_seg_list
initial_sub_seg_list <- lapply(initial_sub_seg_list, function(df) {
df$ref_start <- df$ref_start + 1
df$ori_start <- df$ori_start + 1
df$start <- df$start + 1
return(df)
})
Load the event table, which contains CNV events for each clone.
# read event table
initial_event_table <- read.table(file = "./tutorials_data/chisel_event_table.tsv", sep = "\t", header = TRUE)
Create a tumor clone tree.
# Set up tumor clone tree
chisel_tree <- igraph::graph_from_literal(I-+MRCA-+II, MRCA-+IV-+V-+VI, V-+III)
Generate an edge event table using
the create_edge_event_table function. This table summarizes
events associated with each edge in the tree.
# Create edge event table
edge_event_table <- create_edge_event_table(event_table = initial_event_table,
tree = chisel_tree,
anno_cols = c("haplotype_abbr", "region_name", "CN_change"))
# View the edge event table
print(edge_event_table)
## # A tibble: 6 × 3
## edge_name edge_label n_events
## <chr> <chr> <int>
## 1 I_MRCA "M:sub_maternal_chr6_1:-1\nM:sub_maternal_chr6_2:-1\nP:sub… 12
## 2 MRCA_II "M:chr6p:-1\nP:chr2:-1\nP:chr3:-1\nP:chr10p:-1" 4
## 3 MRCA_IV "M:chr4:-1\nP:chr8:-1" 2
## 4 IV_V "M:chr2:-1" 1
## 5 V_VI "M:chr3:-1" 1
## 6 V_III "P:chr2:1" 1
Visualize the event tree using
the plot_event_tree function.
# Derive clonal event tree
plot_event_tree_ob <- plot_event_tree(edge_event_table = edge_event_table, tree = chisel_tree)
# Visualize clonal event tree
plot_event_tree_ob$tree_plot
Generate chromosome boundary data using
the generate_chr_boundary_data function. This function
combines chromosome arm information with sub-segment data to create
genomic windows.
# Generate chromosome boundary data
chr_boundary_dat <- generate_chr_boundary_data(chr_arm_table = chr_arm_table,
sub_seg_list = sub_seg_list,
chr_names = paste0("chr", 1:22))
For each clone, identify its ancestors in the tree using
the get_clone_ancestors function.
# Get clone ancestors
clone_names <- names(V(chisel_tree))
clone_ancestors <- lapply(clone_names, get_clone_ancestors, tree = chisel_tree)
names(clone_ancestors) <- clone_names
Extract events for each clone and its ancestors.
# Initialize a list to store events for each clone
clone_events <- list()
# Iterate through each clone and extract events
for (clone in names(clone_ancestors)) {
ancestors <- clone_ancestors[[clone]]
events_for_clone <- initial_event_table[initial_event_table$child %in% ancestors, ]
clone_events[[clone]] <- events_for_clone
}
Generate ground truth copy number profiles for maternal and paternal
haplotypes using
the generate_blueprint_cn_profiles function.
# Generate ground truth CN profiles
ground_truth_cn <- generate_blueprint_cn_profiles(clone_events = clone_events,
genome_window_vector = chr_boundary_dat$genome_window_vector,
segment_info = do.call(rbind, chr_boundary_dat$segment_info))
Extend the copy number profiles to equal-sized bins using
the extend_blueprint_cn_for_equal_binfunction.
# Specify bin size
bin_unit <- 5000000 # 5 Mb bins
# Calculate window sizes
window_sizes <- calculate_window_sizes(chr_boundary_dat$genome_window_vector)
# Extend CN profiles
extended_ground_truth_cn <- lapply(ground_truth_cn, extend_blueprint_cn_for_equal_bin, window = window_sizes, bin_unit = bin_unit)
Sort the clones and visualize the copy number profiles using
the plot_truth_heatmap function.
# Sort clones
desired_clone_order <- c("III", "IV", "V", "VI", "I", "II", "MRCA")
sorted_extended_ground_truth_cn <- list()
sorted_extended_ground_truth_cn$paternal <- extended_ground_truth_cn$paternal[desired_clone_order,]
sorted_extended_ground_truth_cn$maternal <- extended_ground_truth_cn$maternal[desired_clone_order,]
# Generate mock window data for visualization
mock_window_data <- generate_mock_window_data(colnames(extended_ground_truth_cn$paternal))
chr_bar <- draw_chr_bar(mock_window_data)
# Define colors for clones
goodColor <- c("pink", "gold", "darkseagreen", "firebrick4", "lightskyblue", "orange1", "lightyellow", "royalblue", "moccasin","violet", "olivedrab3", "blueviolet", "yellow2", "darkgreen", "royalblue4", "lightsalmon3", "mediumaquamarine", "coral1", "darkolivegreen1", "cyan4", "tan4", "grey80")
clone_color_anno <- goodColor[1:length(desired_clone_order)]
names(clone_color_anno) <- desired_clone_order
# Plot maternal and paternal CN profiles
max_int <- 7
for(haplotype in c("maternal", "paternal")){
plot_truth_heatmap(seg_dat = as.matrix(sorted_extended_ground_truth_cn[[haplotype]]),
title = paste0("Chisel Sim Blueprint ", haplotype, " Copy Number"),
chr_bar = chr_bar,
integer_col = TRUE,
max_int = max_int,
clone_identity_vector = desired_clone_order,
clone_color_anno = clone_color_anno)
}
# Plot total CN profile
ground_truth_total_cn <- as.matrix(sorted_extended_ground_truth_cn[['maternal']] + sorted_extended_ground_truth_cn[['paternal']])
plot_truth_heatmap(seg_dat = ground_truth_total_cn,
title = paste0("Chisel Sim Blueprint Total Copy Number"),
chr_bar = chr_bar,
integer_col = TRUE,
max_int = max_int,
clone_identity_vector = desired_clone_order,
clone_color_anno = clone_color_anno)
Process CNV events and calculate chromosome lengths for maternal and paternal haplotypes.
# Process CNV events
transformed_event_table <- process_cnv_events(initial_event_table)
# Calculate chromosome lengths
chr_lengths <- chr_arm_table %>%
group_by(chrom) %>%
summarise(length = max(end), .groups = 'drop') %>%
# Arrange the data frame by numeric chromosome order
arrange(as.numeric(gsub("chr", "", chrom))) %>%
# Convert to a named vector
with(setNames(length, chrom))
# Store chromosome lengths for maternal and paternal haplotypes
initial_chr_lengths <- list("maternal" = chr_lengths,
"paternal" = chr_lengths)
# Save complete environment for full reproducibility
save.image(file = "./tutorials_data/advanced_4_simulation_setting.RData")