Visualizing Sort Algorithms With ggplot

by Joshua Kunst

Have you read Visualizing Algorithms by Mike Bostock? It’s a pure gold post. In that post Mike show a static representation of a sort algorith and obvious it will fun to replicate that image with ggplot. So here we go.

library(dplyr)
library(tidyr)
library(ggplot2)
library(ggthemes)
library(viridis)
theme_set(theme_map())

We need some sorts algorihms. In this link you can see some algorithms.

We start with Insertion Sort:

insertion_sort_steps <- function(x  = sample(1:15)){
  
  msteps <- matrix(data = x, ncol = length(x))
  
  for (i in 2:length(x)) {
    
    j <- i
    
    while ((j > 1) && (x[j] < x[j - 1])) {
      
      temp <- x[j]
      x[j] <- x[j - 1]
      x[j - 1] <- temp
      j <- j - 1
      
      msteps <- rbind(msteps, as.vector(x))
      
    }
  }
  
  msteps
  
}

Now to test it and see what the function do:

set.seed(12345)

x <- sample(seq(4))

x
## [1] 3 4 2 1

msteps <- insertion_sort_steps(x)


as.data.frame(msteps)
##   V1 V2 V3 V4
## 1  3  4  2  1
## 2  3  2  4  1
## 3  2  3  4  1
## 4  2  3  1  4
## 5  2  1  3  4
## 6  1  2  3  4

Every row is a step in sort the algorithm (a partial sort). This matrix is a hard to plot so we need a nicer structure. We can transform the matrix to a data_frame with the information of every position of every element in each step.

sort_matix_to_df <- function(msteps){
  
  df <- as.data.frame(msteps, row.names = NULL)
  
  names(df) <- seq(ncol(msteps))
  
  df_steps <- df %>%
    tbl_df() %>% 
    mutate(step = seq(nrow(.))) %>% 
    gather(position, element, -step) %>%
    arrange(step)
  
  df_steps
  
}

And we apply this function to the previous steps matrix.

df_steps <- sort_matix_to_df(msteps)

head(df_steps, 10)
## # A tibble: 10 x 3
##     step position element
##    <int> <chr>      <int>
##  1     1 1              3
##  2     1 2              4
##  3     1 3              2
##  4     1 4              1
##  5     2 1              3
##  6     2 2              2
##  7     2 3              4
##  8     2 4              1
##  9     3 1              2
## 10     3 2              3

The next step will be plot the data frame.

plot_sort <- function(df_steps, size = 5, color.low = "#D1F0E1", color.high = "#524BB4"){
  
  ggplot(df_steps,
         aes(step, position, group = element, color = element, label = element)) +  
    geom_path(size = size, alpha = 1, lineend = "round") +
    scale_colour_gradient(low = color.low, high = color.high) +
    coord_flip() + 
    scale_x_reverse() + 
    theme(legend.position = "none")
  
}

Now compare this:

as.data.frame(msteps)
##   V1 V2 V3 V4
## 1  3  4  2  1
## 2  3  2  4  1
## 3  2  3  4  1
## 4  2  3  1  4
## 5  2  1  3  4
## 6  1  2  3  4

With:

plot_sort(df_steps, size = 25) + geom_text(color = "white", size = 6)

It works, so we can now scroll!

sample(seq(50)) %>% 
  insertion_sort_steps() %>% 
  sort_matix_to_df() %>% 
  plot_sort(size = 2.0)

Now try with other sort algorithms:

Bubble sort:

bubble_sort_steps <- function(x = sample(1:15)){
  
  msteps <- matrix(data = x, ncol = length(x))
  
  for (i in 1:(length(x) - 1)) {
    
    for (j in 1:(length(x) - 1)) {
      
      if (x[j] > x[j + 1]) {
        temp <- x[j]
        x[j] <- x[j + 1]
        x[j + 1] <- temp
      }
      
      msteps <- rbind(msteps, as.vector(x))
      
    }
  }
  
  msteps
  
}

Selection sort:

selection_sort_steps <- function(x = sample(1:15)){
  
  msteps <- matrix(data = x, ncol = length(x))
  
  for (i in 1:(length(x) - 1)) {
    
    smallsub <- i
    
    for (j in (i + 1):(length(x) - 0)) { # Is not '- 1' like website
      
      if (x[j] < x[smallsub]) {
        smallsub <- j
      }
    }
    
    temp <- x[i]
    x[i] <- x[smallsub]
    x[smallsub] <- temp
    
    msteps <- rbind(msteps, as.vector(x))
    
  }
  
  msteps
  
}

And test with a longer vector:

n <- 50
x <- sample(seq(n))

big_df <- rbind(
  x %>% selection_sort_steps() %>% sort_matix_to_df() %>% mutate(sort = "Selection Sort"),  
  x %>% insertion_sort_steps() %>% sort_matix_to_df() %>% mutate(sort = "Insertion Sort"),
  x %>% bubble_sort_steps() %>% sort_matix_to_df() %>% mutate(sort = "Bubble Sort")
)

head(big_df)
## # A tibble: 6 x 4
##    step position element sort          
##   <int> <chr>      <int> <chr>         
## 1     1 1             37 Selection Sort
## 2     1 2             25 Selection Sort
## 3     1 3             36 Selection Sort
## 4     1 4              4 Selection Sort
## 5     1 5             21 Selection Sort
## 6     1 6             11 Selection Sort

big_df %>%
  group_by(sort) %>% 
  summarise(steps = n())
## # A tibble: 3 x 2
##   sort            steps
##   <chr>           <int>
## 1 Bubble Sort    120100
## 2 Insertion Sort  31500
## 3 Selection Sort   2500
ggplot(big_df) +  
  geom_path(
    aes(step, position, group = element, color = element, label = element),
    size = 0.8, alpha = 1, lineend = "round"
    ) +
  scale_colour_gradient(low = "#c21500", high = "#ffc500") + # http://uigradients.com/#Kyoto
  facet_wrap(~sort, scales = "free_x", ncol = 1) +
  theme(
    legend.position = "none",
    strip.background = element_rect(fill = "transparent", linetype = 0),
    strip.text = element_text(size = 8),
    panel.spacing = unit(5, "lines")
    )

Or we can plot vertically using the viridis palette from the viridis package :

ggplot(big_df, aes(position, step, group = element, color = element, label = element)) +  
  geom_path(size = 1, alpha = 1, lineend = "round") +
  scale_colour_gradientn(colours = viridis_pal()(n)) +
  facet_wrap(~sort, scales = "free_y", nrow = 1) +
  scale_y_reverse() +
  theme(
    legend.position = "none",
    strip.background = element_rect(fill = "transparent", linetype = 0),
    strip.text = element_text(size = 8),
    panel.spacing = unit(5, "lines")
    )

And that’s it. If you write/implement another sort algorithm in this way let me know to view it ;).

Some bonus content :D.

References:

  1. http://bost.ocks.org/mike/algorithms/
  2. http://faculty.cs.niu.edu/~hutchins/csci230/sorting.htm
  3. http://corte.si/posts/code/visualisingsorting/
  4. http://uigradients.com/#Kyoto
  5. http://algs4.cs.princeton.edu/21elementary/
df <- data_frame(x = seq(1:20))
ggplot(df) + 
  geom_bar(aes(x = x, y = x, fill = factor(x)),
           stat = "identity",width = 0.5) +
  scale_fill_manual(values = viridis_pal()(20)) +
  theme(legend.position = "none",
        plot.background = element_rect(fill = "black", colour = "black"),
        strip.text = element_text(size = 7))

Hello R Markdown

by Frida Gomam

Reading Data

In blogdown the Rmd files are in the content/post folder so

data <- readRDS("../../data/mtcars.rds")
knitr::kable(head(data))
mpg cyl disp hp drat wt qsec vs am gear carb
Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1

HTML Widgets

library(forecast)
library(highcharter)

hchart(forecast(AirPassengers, level = 90)) %>% 
  hc_add_theme(hc_theme_smpl())


hchart(rnorm(1000)) %>% 
  hc_add_theme(hc_theme_ft())

Tables

This is a example.

rowname mpg cyl disp hp drat
Mazda RX4 21.0 6 160.0 110 3.90
Mazda RX4 Wag 21.0 6 160.0 110 3.90
Datsun 710 22.8 4 108.0 93 3.85
Hornet 4 Drive 21.4 6 258.0 110 3.08
Hornet Sportabout 18.7 8 360.0 175 3.15
Valiant 18.1 6 225.0 105 2.76
Duster 360 14.3 8 360.0 245 3.21
Merc 240D 24.4 4 146.7 62 3.69
Merc 230 22.8 4 140.8 95 3.92
Merc 280 19.2 6 167.6 123 3.92

Yeah!

Quotes

This could be a greate quote:

Blockquotes are very handy in email to emulate reply text. This line is part of the same quote.

Quote break.

This is a very long line that will still be quoted properly when it wraps. Oh boy let’s keep writing to make sure this is long enough to actually wrap for everyone. Oh, you can put Markdown into a blockquote.

End.

R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

You can embed an R code chunk like this:

summary(cars)
##      speed           dist       
##  Min.   : 4.0   Min.   :  2.00  
##  1st Qu.:12.0   1st Qu.: 26.00  
##  Median :15.0   Median : 36.00  
##  Mean   :15.4   Mean   : 42.98  
##  3rd Qu.:19.0   3rd Qu.: 56.00  
##  Max.   :25.0   Max.   :120.00
fit <- lm(dist ~ speed, data = cars)
fit
## 
## Call:
## lm(formula = dist ~ speed, data = cars)
## 
## Coefficients:
## (Intercept)        speed  
##     -17.579        3.932

Including Plots

You can also embed plots. See Figure 1 for example:

par(mar = c(0, 1, 0, 1))
pie(
  c(280, 60, 20),
  c('Sky', 'Sunny side of pyramid', 'Shady side of pyramid'),
  col = c('#0292D8', '#F7EA39', '#C4B632'),
  init.angle = -50, border = NA
)
A fancy pie chart.

Figure 1: A fancy pie chart.

List

Listing things.

Sometimes you want numbered lists:

  1. One
  2. Two
  3. Three

Sometimes you want bullet points:

  • Start a line with a star
  • Profit!

Alternatively,

  • Dashes work just as well
  • And if you have sub points, put two spaces before the dash or star:
    • Like this
    • And this

ggplot with a highcharts taste

by Frida Gomam

Update 2015-11-07: This is modification from an old post. Finnaly I made a pull request and was accepted by Jeffrey Arnold

At work I use ggplot2 almost for everything. I really like the mid term between high level (highcharts) and low-level (like d3 for example). The deafult theme for ggplot it’s good, and really good if you compare with the old looking R base graphics, and there is more: the ggthemes package which have some themes for ggplot objects. However, I miss the elegant and modern touch, for example in highcharts.

So I decide to play around the theme function to replicate the look and feel of highcharts. The main tasks were:

  • Change the font to a more modern one.
  • Remove grid lines (minor ones).
  • Use a more plain color palette.
  • Reduce bar’s width.
  • Put a white background.
  • Put the lenged at bottom.

The chosen font was Open Sans (an ultra used font nowadays) which you can use in R with showtext package.

library(showtext)
library(ggplot2)

font.add.google("Open Sans", "myfont")
showtext.auto()

data(diamonds)

data <- subset(diamonds, color %in% c("E", "F", "G") & cut %in% c("Ideal", "Premium", "Good"))

data$indicator <- ifelse(data$color %in% c("G" ), 1, 0)

colors_hc <- c("#7CB5EC", "#313131", "#F7A35C",
               "#90EE7E", "#7798BF", "#AAEEEE",
               "#FF0066", "#EEAAEE", "#55BF3B",
               "#DF5353", "#7798BF", "#AAEEEE")


theme_hc <- function(){
  theme(
    text                = element_text(family = "myfont", size = 12),
    title               = element_text(hjust = 0), 
    axis.title.x        = element_text(hjust = .5),
    axis.title.y        = element_text(hjust = .5),
    panel.grid.major.y  = element_line(color = 'gray', size = .3),
    panel.grid.minor.y  = element_blank(),
    panel.grid.major.x  = element_blank(),
    panel.grid.minor.x  = element_blank(),
    panel.border        = element_blank(),
    panel.background    = element_blank(),
    legend.position     = "bottom",
    legend.title        = element_blank()
  )
}

The theme is ready. Now to plot.

ggplot(data) +
  geom_bar(aes(cut), width = .4, fill = colors_hc[1]) +
  ggtitle("An interesting title for a bar plot") +
  xlab("Cut") + ylab("Amount") +
  scale_y_continuous(labels = scales::comma) +
  theme_hc()

As you can see, the plot look more clean without the gridlines and the background. This cause less confusion (and maybe less detail) because generate more space.

ggplot(data) +
  geom_bar(aes(color, fill = cut), position = "dodge", width = .4) +
  ggtitle("Another interesting title") +
  xlab("Cut") + ylab("Amount") +
  scale_y_continuous(labels = scales::comma) +
  scale_fill_manual(values = colors_hc) +
  theme_hc()

Finally,

ggplot(data) +
  geom_density(aes(x, fill = cut, color = cut), alpha = I(0.5)) +
  ggtitle("Density plot") +  xlab("x") + ylab("Density") +
  scale_y_continuous(labels = scales::percent) +
  scale_fill_manual(values = colors_hc) +
  xlim(4, 8) +
  theme_hc()

In my humble option, it look great. What do you think?

R, D3js and SNA Course

by Frida Gomam

Update 2015-11-09: This is migration from an old post.

I took the SNA course by Lada Adamic in coursera. It’s a super interesting course. In fact, I was using the networks only how a visualization tool, and that is what it make me little bit embarrassing because there are more, a lot of more. You can detect communities, know more centric nodes and a lot of other information. So, there are a lot of reasons to look the course

By other hand I like the d3 javascript library. Recently I was learning javascript, so I decided make a very little app to keep learning this library and show differents measures of centrality for each node in a set of 4 toy networks and see these measures by size, color or a label

Now, the R code to make the data:

##### Load Packages ####
library(sna) 
## Loading required package: statnet.common
## 
## Attaching package: 'statnet.common'
## The following object is masked from 'package:base':
## 
##     order
## Loading required package: network
## network: Classes for Relational Data
## Version 1.13.0 created on 2015-08-31.
## copyright (c) 2005, Carter T. Butts, University of California-Irvine
##                     Mark S. Handcock, University of California -- Los Angeles
##                     David R. Hunter, Penn State University
##                     Martina Morris, University of Washington
##                     Skye Bender-deMoll, University of Washington
##  For citation information, type citation("network").
##  Type help("network-package") to get started.
## sna: Tools for Social Network Analysis
## Version 2.4 created on 2016-07-23.
## copyright (c) 2005, Carter T. Butts, University of California-Irvine
##  For citation information, type citation("sna").
##  Type help(package="sna") to get started.
# for build a block diagonal matrix
library(Matrix)   
# (https://stat.ethz.ch/pipermail/r-help/2007-June/133875.html)
library(reldist)  
## Warning: package 'reldist' was built under R version 3.4.4
## reldist: Relative Distribution Methods
## Version 1.6-6 created on 2016-10-07.
## copyright (c) 2003, Mark S. Handcock, University of California-Los Angeles
##  For citation information, type citation("reldist").
##  Type help(package="reldist") to get started.
library(plyr)
## 
## Attaching package: 'plyr'
## The following object is masked from 'package:network':
## 
##     is.discrete
library(rjson)

##### Functions ####
degree_sna <- function(net, norm = TRUE, ...){
  degree(net, ...)/2/(if (norm) ncol(net) - 1 else 1)
}

betweenness_sna <- function(net, norm = FALSE, ...){
  n <- ncol(net)
  betweenness(net, ...)/2/(if (norm) (n - 1)*(n - 2)/2 else 1)
}

##### Networks ####
net.butterfly <- matrix(c(0,1,1,0,0,0,0,
                          1,0,1,0,0,0,0,
                          1,1,0,1,0,0,0,
                          0,0,1,0,1,0,0,
                          0,0,0,1,0,1,1,
                          0,0,0,0,1,0,1,
                          0,0,0,0,1,1,0),
                        byrow = TRUE, nrow = 7)

net.star <- matrix(c(0,1,1,1,1,1,
                     1,0,0,0,0,0,
                     1,0,0,0,0,0,
                     1,0,0,0,0,0,
                     1,0,0,0,0,0,
                     1,0,0,0,0,0),
                   byrow = TRUE, nrow = 6)

net.line <- matrix(c(0,1,0,0,0,
                     1,0,1,0,0,
                     0,1,0,1,0,
                     0,0,1,0,1,
                     0,0,0,1,0),
                   byrow = TRUE, nrow = 5)

net.circular <- matrix(c(0,1,0,0,1,
                         1,0,1,0,0,
                         0,1,0,1,0,
                         0,0,1,0,1,
                         1,0,0,1,0),
                       byrow = TRUE, nrow = 5)

nets <- list(net.butterfly, net.star, net.line, net.circular)
net.all <- as.matrix(bdiag(net.butterfly, net.star, net.line, net.circular))


##### Plots ####
gplot(net.butterfly, displaylabels = TRUE, usearrows = FALSE)

gplot(net.star, displaylabels = TRUE, usearrows = FALSE)

gplot(net.line, displaylabels = TRUE, usearrows = FALSE)

gplot(net.circular, displaylabels = TRUE, usearrows = FALSE)

gplot(net.all, usearrows = FALSE,
      label = unlist(llply(nets, degree_sna, norm = FALSE)))

#### Indicators ####
# Degrees for each node of each network
llply(nets, degree_sna)
## [[1]]
## [1] 0.3333333 0.3333333 0.5000000 0.3333333 0.5000000 0.3333333 0.3333333
## 
## [[2]]
## [1] 1.0 0.2 0.2 0.2 0.2 0.2
## 
## [[3]]
## [1] 0.25 0.50 0.50 0.50 0.25
## 
## [[4]]
## [1] 0.5 0.5 0.5 0.5 0.5
llply(nets, degree_sna, norm = FALSE)
## [[1]]
## [1] 2 2 3 2 3 2 2
## 
## [[2]]
## [1] 5 1 1 1 1 1
## 
## [[3]]
## [1] 1 2 2 2 1
## 
## [[4]]
## [1] 2 2 2 2 2
# Differences beetween degree for nodes in each network
laply(nets, function(net){ gini(degree_sna(net)) })
## [1] 0.08928571 0.33333333 0.15000000 0.00000000
laply(nets, function(net){   sd(degree_sna(net)) })
## [1] 0.08132501 0.32659863 0.13693064 0.00000000
# Centralization coefficient $C_D$
laply(nets, centralization, degree)
## [1] 0.1666667 1.0000000 0.1666667 0.0000000
# Betweenness
llply(nets, betweenness_sna)
## [[1]]
## [1] 0 0 8 9 8 0 0
## 
## [[2]]
## [1] 10  0  0  0  0  0
## 
## [[3]]
## [1] 0 3 4 3 0
## 
## [[4]]
## [1] 1 1 1 1 1
llply(nets, betweenness_sna, norm = TRUE)
## [[1]]
## [1] 0.0000000 0.0000000 0.5333333 0.6000000 0.5333333 0.0000000 0.0000000
## 
## [[2]]
## [1] 1 0 0 0 0 0
## 
## [[3]]
## [1] 0.0000000 0.5000000 0.6666667 0.5000000 0.0000000
## 
## [[4]]
## [1] 0.1666667 0.1666667 0.1666667 0.1666667 0.1666667
# Closeness
llply(nets, closeness)
## [[1]]
## [1] 0.4000000 0.4000000 0.5454545 0.6000000 0.5454545 0.4000000 0.4000000
## 
## [[2]]
## [1] 1.0000000 0.5555556 0.5555556 0.5555556 0.5555556 0.5555556
## 
## [[3]]
## [1] 0.4000000 0.5714286 0.6666667 0.5714286 0.4000000
## 
## [[4]]
## [1] 0.6666667 0.6666667 0.6666667 0.6666667 0.6666667
# Eigenvector Centrality
llply(nets, evcent)
## Warning in FUN(X[[i]], ...): Maximum iterations exceeded in evcent_R
## without convergence. This matrix may be pathological - increase maxiter or
## try eigen().
## Warning in FUN(X[[i]], ...): Maximum iterations exceeded in evcent_R
## without convergence. This matrix may be pathological - increase maxiter or
## try eigen().
## [[1]]
## [1] 0.3348053 0.3348053 0.4496177 0.3838092 0.4496177 0.3348053 0.3348053
## 
## [[2]]
## [1] 0.4082483 0.4082483 0.4082483 0.4082483 0.4082483 0.4082483
## 
## [[3]]
## [1] 0.3086067 0.4629100 0.6172134 0.4629100 0.3086067
## 
## [[4]]
## [1] 0.4472136 0.4472136 0.4472136 0.4472136 0.4472136
#### Consolidate Data ####
names <- paste(rep(1:length(nets), laply(nets, ncol)),
               unlist(llply(nets, function(x) 1:ncol(x))), sep = "_")

colnames(net.all) <- names
rownames(net.all) <- names

links <- ldply(names, function(name){
  # name <- sample(names, size = 1)
  # name <- names[1]
  data.frame(source = which(names == name) - 1, 
             target = which(net.all[name,] == 1) - 1)
})

nodes <- data.frame(name = names)
nodes$degree_norm <- unlist(llply(nets, degree_sna))
nodes$degree <- unlist(llply(nets, degree_sna, norm = FALSE))
nodes$betweenness <- unlist(llply(nets, betweenness_sna))
nodes$betweenness_norm <- unlist(llply(nets, betweenness_sna, norm = TRUE))
nodes$closeness <- unlist(llply(nets, closeness))
nodes$eigen_vector_centrality <- unlist(llply(nets, evcent))
## Warning in FUN(X[[i]], ...): Maximum iterations exceeded in evcent_R
## without convergence. This matrix may be pathological - increase maxiter or
## try eigen().

## Warning in FUN(X[[i]], ...): Maximum iterations exceeded in evcent_R
## without convergence. This matrix may be pathological - increase maxiter or
## try eigen().
#### Exporting Data ####
nodes_json <- adply(nodes, 1, toJSON )$V1
nodes_json <- paste(" \"nodes\" : [", paste("\n", nodes_json, collapse = ", "), "\n]")

links_json <- adply(links, 1, toJSON)$V1
links_json <- paste(" \"links\" : [", paste("\n", links_json, collapse = ", "), "\n]")

data_json <- paste("{\n", nodes_json, "\n,\n", links_json, "}")
# write(data_json, "data.json")

You can fork the repo from here.