R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

Bubble Chart 1

df <- structure(list(x = structure(c(1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 1L, 2L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L), .Label = c("1", "2", "3", "4", "5"), class = "factor"), y = structure(c(1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L), .Label = c("1", "2", "3", "4", "5"), class = "factor")), .Names = c("x", "y"), row.names = c(NA, 79L), class = "data.frame")

# Required packages
library(plyr)
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.3.2
# Get the frequency counts
dfc <- ddply(df, c("x", "y"), "nrow", .drop = FALSE)
#dfc

# The plot
ggplot(data = dfc, aes(x = x, y = y, size = factor(nrow))) + 
  geom_point() + 
  scale_size_discrete(range = c(1, 10))
## Warning: Using size for a discrete variable is not advised.

ggplot(data = df, aes(x = x, y = y)) +
  stat_sum(aes(size = factor(..n..)), geom = "point") +
  scale_size_discrete(range = c(1, 10))
## Warning: Using size for a discrete variable is not advised.

Bubble Chart 2

Bubble Chart with bubbles aligned along their bottom edges 1

cat<-c("A", "A", "B", "B", "C", "C")
chara<-c("1", "0", "1", "0", "1", "0")
percent<-c(80, 20, 60, 40, 90,10)
xcoord<-c(10,10,11,11,12,12)
ycoord<-c(10,10,10,10,10,10)

DF<-data.frame(cat,chara, percent, xcoord, ycoord)

NewBubbleChart <- ggplot(DF, aes(x = cat, y = "", size = percent, label = NULL, fill = chara), legend = FALSE) +
  geom_point(color = "grey50", shape = 21, alpha = 0.99) +  
  #geom_text(size=4) +
  theme_bw() +
  scale_size(range = c(5, 20))

NewBubbleChart <- NewBubbleChart +
  scale_fill_manual(name = "Type",
                    values = c("darkblue", "lightblue"),
                    labels = c("0" = "Type 0", "1" = "Type 1"))

NewBubbleChart

Bubble Chart 3

Bubble Chart: bubble-in-bubble

library(ggplot2)
# function to calculate coords of a circle
circle <- function(center,radius) {
  th <- seq(0,2*pi,len=200)
  data.frame(x=center[1]+radius*cos(th),y=center[2]+radius*sin(th))
}
# example dataset, similar to graphic
df <- data.frame(bank=paste("Bank",LETTERS[1:5]),start=1000*(5:1),end=500*(5:1))    
max <- max(df$start)
n.bubbles <- nrow(df)
scale <- 0.4/sum(sqrt(df$start))
# calculate scaled centers and radii of bubbles
radii <- scale*sqrt(df$start)
ctr.x <- cumsum(c(radii[1],head(radii,-1)+tail(radii,-1)+.01))
# starting (larger) bubbles
gg.1  <- do.call(rbind,lapply(1:n.bubbles,function(i)cbind(group=i,circle(c(ctr.x[i],radii[i]),radii[i]))))
text.1 <- data.frame(x=ctr.x,y=-0.05,label=paste(df$bank,df$start,sep="\n"))
# ending (smaller) bubbles
radii <- scale*sqrt(df$end)
gg.2  <- do.call(rbind,lapply(1:n.bubbles,function(i)cbind(group=i,circle(c(ctr.x[i],radii[i]),radii[i]))))
text.2 <- data.frame(x=ctr.x,y=2*radii+0.02,label=df$end)
# make the plot
ggplot()+
  geom_polygon(data=gg.1,aes(x,y,group=group),fill="dodgerblue")+
  geom_path(data=gg.1,aes(x,y,group=group),color="grey50")+
  geom_text(data=text.1,aes(x,y,label=label))+
  geom_polygon(data=gg.2,aes(x,y,group=group),fill="green2")+
  geom_path(data=gg.2,aes(x,y,group=group),color="grey50")+
  geom_text(data=text.2,aes(x,y,label=label), color="white")+
  labs(x="",y="")+scale_y_continuous(limits=c(-0.1,2.5*scale*sqrt(max(df$start))))+
  coord_fixed()+
  theme(axis.text=element_blank(),axis.ticks=element_blank(),panel.grid=element_blank())

Bubble Chart 4

googleVis - Bubble Chart bubble-in-bubble

library(googleVis)
## Warning: package 'googleVis' was built under R version 3.3.2
## Creating a generic function for 'toJSON' from package 'jsonlite' in package 'googleVis'
## 
## Welcome to googleVis version 0.6.2
## 
## Please read Google's Terms of Use
## before you start using the package:
## https://developers.google.com/terms/
## 
## Note, the plot method of googleVis will by default use
## the standard browser to display its output.
## 
## See the googleVis package vignettes for more details,
## or visit http://github.com/mages/googleVis.
## 
## To suppress this message use:
## suppressPackageStartupMessages(library(googleVis))
##
DF <- cbind(
  DF,
  ID=paste0(DF$cat,DF$chara)
)
bChart <- gvisBubbleChart(
  data=DF,
  idvar="ID",
  xvar="xcoord",
  yvar="chara",
  colorvar="cat",
  sizevar="percent",
  options=list(vAxis='{minValue:.8, maxValue:3}')
)

plot(bChart)
## starting httpd help server ...
##  done

Bubble Chart 5

countries = c('IND', 'AUS', 'CHI', 'JAP', 'BAT', 'SING')
frequencies = matrix(sample(1:100, 36), 6, 6, dimnames = list(countries, countries))
diag(frequencies) = 0

library(reshape2)
frequencies_df = melt(frequencies)
names(frequencies_df) = c('origin', 'destination', 'frequency')

library(ggplot2)
ggplot(frequencies_df, aes(x = origin, y = destination, size = frequency)) + geom_point()

Circlize - Bubble Chart 6

library(circlize)
library(migest)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:plyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
m <- data.frame(order = 1:6,
                country = c("Ausralia", "India", "China", "Japan", "Thailand", "Malaysia"),
                V3 = c(1, 150000, 90000, 180000, 15000, 10000),
                V4 = c(35000, 1, 10000, 12000, 25000, 8000),
                V5 = c(10000, 7000, 1, 40000, 5000, 4000),
                V6 = c(7000, 8000, 175000, 1, 11000, 18000),
                V7 = c(70000, 30000, 22000, 120000, 1, 40000),
                V8 = c(60000, 90000, 110000, 14000, 30000, 1),
                r = c(255,255,255,153,51,51),
                g = c(51, 153, 255, 255, 255, 255),
                b = c(51, 51, 51, 51, 51, 153),
                stringsAsFactors = FALSE)

### Create a data frame
df1 <- m[, c(1,2, 9:11)]

### Create a matrix
m <- m[,-(1:2)]/1e04
m <- as.matrix(m[,c(1:6)])
dimnames(m) <- list(orig = df1$country, dest = df1$country)

### Sort order of data.frame and matrix for plotting in circos
df1 <- arrange(df1, order)
df1$country <- factor(df1$country, levels = df1$country)
m <- m[levels(df1$country),levels(df1$country)]

### Define ranges of circos sectors and their colors (both of the sectors and the links)
df1$xmin <- 0
df1$xmax <- rowSums(m) + colSums(m)
n <- nrow(df1)
df1$rcol<-rgb(df1$r, df1$g, df1$b, max = 255)
df1$lcol<-rgb(df1$r, df1$g, df1$b, alpha=200, max = 255)

## Plot sectors (outer part)
par(mar=rep(0,4))
circos.clear()

### Basic circos graphic parameters
circos.par(cell.padding=c(0,0,0,0), track.margin=c(0,0.15), start.degree = 90, gap.degree =4)

### Sector details
circos.initialize(factors = df1$country, xlim = cbind(df1$xmin, df1$xmax))

### Plot sectors
circos.trackPlotRegion(ylim = c(0, 1), factors = df1$country, track.height=0.1,
                       #panel.fun for each sector
                       panel.fun = function(x, y) {
                       #select details of current sector
                       name = get.cell.meta.data("sector.index")
                       i = get.cell.meta.data("sector.numeric.index")
                       xlim = get.cell.meta.data("xlim")
                       ylim = get.cell.meta.data("ylim")
                       #text direction (dd) and adjusmtents (aa)
                       theta = circlize(mean(xlim), 1.3)[1, 1] %% 360
                       dd <- ifelse(theta < 90 || theta > 270, "clockwise", "reverse.clockwise")
                       aa = c(1, 0.5)
                       if(theta < 90 || theta > 270)  aa = c(0, 0.5)
                       #plot country labels
                       circos.text(x=mean(xlim), y=1.7, labels=name, facing = dd, cex=0.6,  adj = aa)
                       #plot main sector
                       circos.rect(xleft=xlim[1], ybottom=ylim[1], xright=xlim[2], ytop=ylim[2], col = df1$rcol[i], border=df1$rcol[i])
                       #blank in part of main sector
                       circos.rect(xleft=xlim[1], ybottom=ylim[1], xright=xlim[2]-rowSums(m)[i], ytop=ylim[1]+0.3, col = "white", border = "white")
                       #white line all the way around
                       circos.rect(xleft=xlim[1], ybottom=0.3, xright=xlim[2], ytop=0.32, col = "white", border = "white")
                       #plot axis
                       circos.axis(labels.cex=0.6, direction = "outside", major.at=seq(from=0,to=floor(df1$xmax)[i],by=5), minor.ticks=1, labels.away.percentage = 0.15)
                       })
## Note: 1 point is out of plotting region in sector 'Ausralia', track '1'.
## Note: 1 point is out of plotting region in sector 'India', track '1'.
## Note: 1 point is out of plotting region in sector 'China', track '1'.
## Note: 1 point is out of plotting region in sector 'Japan', track '1'.
## Note: 1 point is out of plotting region in sector 'Thailand', track '1'.
## Note: 1 point is out of plotting region in sector 'Malaysia', track '1'.
## Plot links (inner part)
### Add sum values to df1, marking the x-position of the first links
### out (sum1) and in (sum2). Updated for further links in loop below.

df1$sum1 <- colSums(m)
df1$sum2 <- numeric(n)

### Create a data.frame of the flow matrix sorted by flow size, to allow largest flow plotted first
df2 <- cbind(as.data.frame(m),orig=rownames(m),  stringsAsFactors=FALSE)
df2 <- reshape(df2, idvar="orig", varying=list(1:n), direction="long",
               timevar="dest", time=rownames(m),  v.names = "m")
df2 <- arrange(df2,desc(m))

### Keep only the largest flows to avoid clutter
df2 <- subset(df2, m > quantile(m,0.6))

### Plot links
for(k in 1:nrow(df2)){
  #i,j reference of flow matrix
  i<-match(df2$orig[k],df1$country)
  j<-match(df2$dest[k],df1$country)
  #plot link
  circos.link(sector.index1=df1$country[i], point1=c(df1$sum1[i], df1$sum1[i] + abs(m[i, j])),
              sector.index2=df1$country[j], point2=c(df1$sum2[j], df1$sum2[j] + abs(m[i, j])),
              col = df1$lcol[i])
  #update sum1 and sum2 for use when plotting the next link
  df1$sum1[i] = df1$sum1[i] + abs(m[i, j])
  df1$sum2[j] = df1$sum2[j] + abs(m[i, j])
}

Bubble Chart 7

age = rep(c(20, 30, 40, 50, 60), 20)
income = c(rep(">50k", 80), rep("<50k", 20))
df1 = data.frame(age=age, income=income)

library(plyr)
df1_summary = ddply(
  df1,
  .(age, income),
  summarize,
  count=length(income)
)

ggplot(df1_summary, aes(age, income, size=count)) +
  geom_point()

test <- with(df1,table(age,income))
test <- as.matrix(as.data.frame.matrix(test))
plot(
  row(test),
  col(test),
  cex=test/3,pch=20,
  xlim=c(0.5,nrow(test)+0.5),
  ylim=c(0.5,ncol(test)+0.5),
  axes=FALSE,
  ann=FALSE
)
axis(1,at=1:nrow(test),labels=rownames(test),cex.axis=0.8)
axis(2,at=1:ncol(test),labels=colnames(test),cex.axis=0.8)
title(xlab="Age Group",ylab="Income")
box()

Bubble Chart 8

http://stackoverflow.com/questions/15840926/categorical-bubble-plot-for-mapping-studies Based on http://flowingdata.com/2010/11/23/how-to-make-bubble-charts/

Bubble Chart 9

hdp <- read.csv("http://www.ats.ucla.edu/stat/data/hdp.csv")
hdp <- within(hdp, {
  Married <- factor(Married, levels = 0:1, labels = c("no", "yes"))
  DID <- factor(DID)
  HID <- factor(HID)
})

ggplot(hdp, aes(x = CancerStage, y = LengthofStay)) +
  stat_sum(aes(size = ..n.., group = 1)) +
  scale_size_area(max_size=10)

Bubble Chart 10

library(ggplot2)                           
# Set up the vectors                           
days <- c("Mon","Tues","Wed","Thurs","Fri")
slots <- c("Coffee/Breakfast","Lunch","Happy Hour","Dinner")

# Create the data frame
df <- expand.grid(days, slots)
df$value <- c(1,1,1,1,2,1,1,NA,NA,1,4,4,7,4,1,5,6,14,5,1)    

#Plot the Data
g <- ggplot(df, aes(Var1, Var2)) + geom_point(aes(size = value), colour = "green") + theme_bw() + xlab("") + ylab("")
g + scale_size_continuous(range=c(10,30)) + geom_text(aes(label = value))
## Warning: Removed 2 rows containing missing values (geom_point).
## Warning: Removed 2 rows containing missing values (geom_text).

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.