Simulation Setup and Visualization

In this tutorial, we will use the cancerSimCraft package to simulate a real tumor evolution scenario based on the CHISEL paper (Figure 3d of Zaccaria and Raphael, Nat Biotechnol 2021) and generate visualizations of simulation blueprint (ground truth), including clonal event tree, haplotype copy number profiles and a total copy number profile.

Load Required Libraries and Data

First, load the necessary libraries and data.

# Load libraries 
library(dplyr)
library(ggraph)
library(igraph)
library(cancerSimCraft)

# Read event table, sub_seg_list 
# sub_seg_list: A list of sub-segments for each chromosome.
sub_seg_list <- readRDS(file = "./tutorials_data/sub_seg_list.rds")
# hg19_chr_arm_table: A table containing chromosome arm information for the hg19 genome.
hg19_chr_arm_table <- readRDS(file = "./tutorials_data/hg19_chr_arm_table.rds")

# filter the hg19_chr_arm_table to include only autosomes (chromosomes 1–22).
chr_arm_table <- hg19_chr_arm_table %>%
  filter(chrom %in% paste0("chr", 1:22))

Create Initial Segment List

Generate an initial segment list using the create_initial_seg_list function. This function creates baseline segments for maternal and paternal haplotypes.

# Create initial segment list
initial_seg_list <- create_initial_seg_list(chr_arm_table = chr_arm_table)

Adjust Sub-Segment List

Adjust the sub_seg_list to ensure 1-based coordinates.

# Adjust sub-segment list for 1-based coordinates
initial_sub_seg_list <- sub_seg_list
initial_sub_seg_list <- lapply(initial_sub_seg_list, function(df) {
  df$ref_start <- df$ref_start + 1
  df$ori_start <- df$ori_start + 1
  df$start <- df$start + 1
  return(df)
})

Load Event Table

Load the event table, which contains CNV events for each clone.

# read event table 
initial_event_table <- read.table(file = "./tutorials_data/chisel_event_table.tsv", sep = "\t", header = TRUE)

Set Up Tumor Clone Tree

Create a tumor clone tree.

# Set up tumor clone tree
chisel_tree <- igraph::graph_from_literal(I-+MRCA-+II, MRCA-+IV-+V-+VI, V-+III)

Create Edge Event Table

Generate an edge event table using the create_edge_event_table function. This table summarizes events associated with each edge in the tree.

# Create edge event table
edge_event_table <- create_edge_event_table(event_table = initial_event_table,
                                            tree = chisel_tree,
                                            anno_cols = c("haplotype_abbr", "region_name", "CN_change"))

# View the edge event table
print(edge_event_table)
## # A tibble: 6 × 3
##   edge_name edge_label                                                  n_events
##   <chr>     <chr>                                                          <int>
## 1 I_MRCA    "M:sub_maternal_chr6_1:-1\nM:sub_maternal_chr6_2:-1\nP:sub…       12
## 2 MRCA_II   "M:chr6p:-1\nP:chr2:-1\nP:chr3:-1\nP:chr10p:-1"                    4
## 3 MRCA_IV   "M:chr4:-1\nP:chr8:-1"                                             2
## 4 IV_V      "M:chr2:-1"                                                        1
## 5 V_VI      "M:chr3:-1"                                                        1
## 6 V_III     "P:chr2:1"                                                         1

Visualize the Clonal Event Tree

Visualize the event tree using the plot_event_tree function.

# Derive clonal event tree
plot_event_tree_ob <- plot_event_tree(edge_event_table = edge_event_table, tree = chisel_tree)
# Visualize clonal event tree 
plot_event_tree_ob$tree_plot

Generate Chromosome Boundary Data

Generate chromosome boundary data using the generate_chr_boundary_data function. This function combines chromosome arm information with sub-segment data to create genomic windows.

# Generate chromosome boundary data
chr_boundary_dat <- generate_chr_boundary_data(chr_arm_table = chr_arm_table,
                                               sub_seg_list = sub_seg_list, 
                                               chr_names = paste0("chr", 1:22))

Identify Clone Ancestors

For each clone, identify its ancestors in the tree using the get_clone_ancestors function.

# Get clone ancestors 
clone_names <- names(V(chisel_tree))
clone_ancestors <- lapply(clone_names, get_clone_ancestors, tree = chisel_tree)
names(clone_ancestors) <- clone_names

Extract Clone Events

Extract events for each clone and its ancestors.

# Initialize a list to store events for each clone
clone_events <- list()

# Iterate through each clone and extract events
for (clone in names(clone_ancestors)) {
  ancestors <- clone_ancestors[[clone]]
  events_for_clone <- initial_event_table[initial_event_table$child %in% ancestors, ]
  clone_events[[clone]] <- events_for_clone
}

Generate Ground Truth Copy Number Profiles

Generate ground truth copy number profiles for maternal and paternal haplotypes using the generate_blueprint_cn_profiles function.

# Generate ground truth CN profiles
ground_truth_cn <- generate_blueprint_cn_profiles(clone_events = clone_events,
                                                 genome_window_vector = chr_boundary_dat$genome_window_vector, 
                                                 segment_info = do.call(rbind, chr_boundary_dat$segment_info))

Extend Copy Number Profiles for Equal-Sized Bins

Extend the copy number profiles to equal-sized bins using the extend_blueprint_cn_for_equal_binfunction.

# Specify bin size
bin_unit <- 5000000  # 5 Mb bins

# Calculate window sizes
window_sizes <- calculate_window_sizes(chr_boundary_dat$genome_window_vector)

# Extend CN profiles
extended_ground_truth_cn <- lapply(ground_truth_cn, extend_blueprint_cn_for_equal_bin, window = window_sizes, bin_unit = bin_unit)

Sort Clones and Visualize Copy Number Profiles

Sort the clones and visualize the copy number profiles using the plot_truth_heatmap function.

# Sort clones
desired_clone_order <- c("III", "IV", "V", "VI", "I", "II", "MRCA")
sorted_extended_ground_truth_cn <- list() 
sorted_extended_ground_truth_cn$paternal <- extended_ground_truth_cn$paternal[desired_clone_order,]
sorted_extended_ground_truth_cn$maternal <- extended_ground_truth_cn$maternal[desired_clone_order,]

# Generate mock window data for visualization
mock_window_data <- generate_mock_window_data(colnames(extended_ground_truth_cn$paternal))
chr_bar <- draw_chr_bar(mock_window_data)

# Define colors for clones
goodColor <- c("pink", "gold", "darkseagreen", "firebrick4", "lightskyblue", "orange1", "lightyellow", "royalblue", "moccasin","violet", "olivedrab3", "blueviolet", "yellow2", "darkgreen", "royalblue4", "lightsalmon3", "mediumaquamarine", "coral1",  "darkolivegreen1", "cyan4", "tan4", "grey80")
clone_color_anno <- goodColor[1:length(desired_clone_order)]
names(clone_color_anno) <- desired_clone_order

# Plot maternal and paternal CN profiles
max_int <- 7
for(haplotype in c("maternal", "paternal")){
  plot_truth_heatmap(seg_dat = as.matrix(sorted_extended_ground_truth_cn[[haplotype]]),
                     title = paste0("Chisel Sim Blueprint ", haplotype, " Copy Number"),
                     chr_bar = chr_bar,
                     integer_col = TRUE, 
                     max_int = max_int, 
                     clone_identity_vector = desired_clone_order,
                     clone_color_anno = clone_color_anno)
}

# Plot total CN profile
ground_truth_total_cn <- as.matrix(sorted_extended_ground_truth_cn[['maternal']] + sorted_extended_ground_truth_cn[['paternal']])
plot_truth_heatmap(seg_dat = ground_truth_total_cn,
                   title = paste0("Chisel Sim Blueprint Total Copy Number"),
                   chr_bar = chr_bar,
                   integer_col = TRUE, 
                   max_int = max_int, 
                   clone_identity_vector = desired_clone_order,
                   clone_color_anno = clone_color_anno)

Process CNV Events and Chromosome Lengths

Process CNV events and calculate chromosome lengths for maternal and paternal haplotypes.

# Process CNV events
transformed_event_table <- process_cnv_events(initial_event_table)

# Calculate chromosome lengths
chr_lengths <- chr_arm_table %>%
  group_by(chrom) %>%
  summarise(length = max(end), .groups = 'drop') %>%
  # Arrange the data frame by numeric chromosome order
  arrange(as.numeric(gsub("chr", "", chrom))) %>%
  # Convert to a named vector
  with(setNames(length, chrom))

# Store chromosome lengths for maternal and paternal haplotypes
initial_chr_lengths <- list("maternal" = chr_lengths,
                            "paternal" = chr_lengths)

# Save complete environment for full reproducibility
save.image(file = "./tutorials_data/advanced_4_simulation_setting.RData")