Have you read Visualizing Algorithms by Mike Bostock? It’s a pure gold post. In that post Mike show a static representation of a sort algorith and obvious it will fun to replicate that image with ggplot. So here we go.

library(dplyr)
library(tidyr)
library(ggplot2)
library(ggthemes)
library(viridis)
theme_set(theme_map())

We need some sorts algorihms. In this link you can see some algorithms.

We start with Insertion Sort:

insertion_sort_steps <- function(x  = sample(1:15)){
  
  msteps <- matrix(data = x, ncol = length(x))
  
  for (i in 2:length(x)) {
    
    j <- i
    
    while ((j > 1) && (x[j] < x[j - 1])) {
      
      temp <- x[j]
      x[j] <- x[j - 1]
      x[j - 1] <- temp
      j <- j - 1
      
      msteps <- rbind(msteps, as.vector(x))
      
    }
  }
  
  msteps
  
}

Now to test it and see what the function do:

set.seed(12345)

x <- sample(seq(4))

x
## [1] 3 4 2 1

msteps <- insertion_sort_steps(x)


as.data.frame(msteps)
##   V1 V2 V3 V4
## 1  3  4  2  1
## 2  3  2  4  1
## 3  2  3  4  1
## 4  2  3  1  4
## 5  2  1  3  4
## 6  1  2  3  4

Every row is a step in sort the algorithm (a partial sort). This matrix is a hard to plot so we need a nicer structure. We can transform the matrix to a data_frame with the information of every position of every element in each step.

sort_matix_to_df <- function(msteps){
  
  df <- as.data.frame(msteps, row.names = NULL)
  
  names(df) <- seq(ncol(msteps))
  
  df_steps <- df %>%
    tbl_df() %>% 
    mutate(step = seq(nrow(.))) %>% 
    gather(position, element, -step) %>%
    arrange(step)
  
  df_steps
  
}

And we apply this function to the previous steps matrix.

df_steps <- sort_matix_to_df(msteps)

head(df_steps, 10)
## # A tibble: 10 x 3
##     step position element
##    <int> <chr>      <int>
##  1     1 1              3
##  2     1 2              4
##  3     1 3              2
##  4     1 4              1
##  5     2 1              3
##  6     2 2              2
##  7     2 3              4
##  8     2 4              1
##  9     3 1              2
## 10     3 2              3

The next step will be plot the data frame.

plot_sort <- function(df_steps, size = 5, color.low = "#D1F0E1", color.high = "#524BB4"){
  
  ggplot(df_steps,
         aes(step, position, group = element, color = element, label = element)) +  
    geom_path(size = size, alpha = 1, lineend = "round") +
    scale_colour_gradient(low = color.low, high = color.high) +
    coord_flip() + 
    scale_x_reverse() + 
    theme(legend.position = "none")
  
}

Now compare this:

as.data.frame(msteps)
##   V1 V2 V3 V4
## 1  3  4  2  1
## 2  3  2  4  1
## 3  2  3  4  1
## 4  2  3  1  4
## 5  2  1  3  4
## 6  1  2  3  4

With:

plot_sort(df_steps, size = 25) + geom_text(color = "white", size = 6)

It works, so we can now scroll!

sample(seq(50)) %>% 
  insertion_sort_steps() %>% 
  sort_matix_to_df() %>% 
  plot_sort(size = 2.0)

Now try with other sort algorithms:

Bubble sort:

bubble_sort_steps <- function(x = sample(1:15)){
  
  msteps <- matrix(data = x, ncol = length(x))
  
  for (i in 1:(length(x) - 1)) {
    
    for (j in 1:(length(x) - 1)) {
      
      if (x[j] > x[j + 1]) {
        temp <- x[j]
        x[j] <- x[j + 1]
        x[j + 1] <- temp
      }
      
      msteps <- rbind(msteps, as.vector(x))
      
    }
  }
  
  msteps
  
}

Selection sort:

selection_sort_steps <- function(x = sample(1:15)){
  
  msteps <- matrix(data = x, ncol = length(x))
  
  for (i in 1:(length(x) - 1)) {
    
    smallsub <- i
    
    for (j in (i + 1):(length(x) - 0)) { # Is not '- 1' like website
      
      if (x[j] < x[smallsub]) {
        smallsub <- j
      }
    }
    
    temp <- x[i]
    x[i] <- x[smallsub]
    x[smallsub] <- temp
    
    msteps <- rbind(msteps, as.vector(x))
    
  }
  
  msteps
  
}

And test with a longer vector:

n <- 50
x <- sample(seq(n))

big_df <- rbind(
  x %>% selection_sort_steps() %>% sort_matix_to_df() %>% mutate(sort = "Selection Sort"),  
  x %>% insertion_sort_steps() %>% sort_matix_to_df() %>% mutate(sort = "Insertion Sort"),
  x %>% bubble_sort_steps() %>% sort_matix_to_df() %>% mutate(sort = "Bubble Sort")
)

head(big_df)
## # A tibble: 6 x 4
##    step position element sort          
##   <int> <chr>      <int> <chr>         
## 1     1 1             37 Selection Sort
## 2     1 2             25 Selection Sort
## 3     1 3             36 Selection Sort
## 4     1 4              4 Selection Sort
## 5     1 5             21 Selection Sort
## 6     1 6             11 Selection Sort

big_df %>%
  group_by(sort) %>% 
  summarise(steps = n())
## # A tibble: 3 x 2
##   sort            steps
##   <chr>           <int>
## 1 Bubble Sort    120100
## 2 Insertion Sort  31500
## 3 Selection Sort   2500
ggplot(big_df) +  
  geom_path(
    aes(step, position, group = element, color = element, label = element),
    size = 0.8, alpha = 1, lineend = "round"
    ) +
  scale_colour_gradient(low = "#c21500", high = "#ffc500") + # http://uigradients.com/#Kyoto
  facet_wrap(~sort, scales = "free_x", ncol = 1) +
  theme(
    legend.position = "none",
    strip.background = element_rect(fill = "transparent", linetype = 0),
    strip.text = element_text(size = 8),
    panel.spacing = unit(5, "lines")
    )

Or we can plot vertically using the viridis palette from the viridis package :

ggplot(big_df, aes(position, step, group = element, color = element, label = element)) +  
  geom_path(size = 1, alpha = 1, lineend = "round") +
  scale_colour_gradientn(colours = viridis_pal()(n)) +
  facet_wrap(~sort, scales = "free_y", nrow = 1) +
  scale_y_reverse() +
  theme(
    legend.position = "none",
    strip.background = element_rect(fill = "transparent", linetype = 0),
    strip.text = element_text(size = 8),
    panel.spacing = unit(5, "lines")
    )

And that’s it. If you write/implement another sort algorithm in this way let me know to view it ;).

Some bonus content :D.

References:

  1. http://bost.ocks.org/mike/algorithms/
  2. http://faculty.cs.niu.edu/~hutchins/csci230/sorting.htm
  3. http://corte.si/posts/code/visualisingsorting/
  4. http://uigradients.com/#Kyoto
  5. http://algs4.cs.princeton.edu/21elementary/
df <- data_frame(x = seq(1:20))
ggplot(df) + 
  geom_bar(aes(x = x, y = x, fill = factor(x)),
           stat = "identity",width = 0.5) +
  scale_fill_manual(values = viridis_pal()(20)) +
  theme(legend.position = "none",
        plot.background = element_rect(fill = "black", colour = "black"),
        strip.text = element_text(size = 7))

comments powered by Disqus