This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
df <- structure(list(x = structure(c(1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 1L, 2L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L), .Label = c("1", "2", "3", "4", "5"), class = "factor"), y = structure(c(1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L), .Label = c("1", "2", "3", "4", "5"), class = "factor")), .Names = c("x", "y"), row.names = c(NA, 79L), class = "data.frame")
# Required packages
library(plyr)
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.3.2
# Get the frequency counts
dfc <- ddply(df, c("x", "y"), "nrow", .drop = FALSE)
#dfc
# The plot
ggplot(data = dfc, aes(x = x, y = y, size = factor(nrow))) +
geom_point() +
scale_size_discrete(range = c(1, 10))
## Warning: Using size for a discrete variable is not advised.
ggplot(data = df, aes(x = x, y = y)) +
stat_sum(aes(size = factor(..n..)), geom = "point") +
scale_size_discrete(range = c(1, 10))
## Warning: Using size for a discrete variable is not advised.
Bubble Chart with bubbles aligned along their bottom edges 1
cat<-c("A", "A", "B", "B", "C", "C")
chara<-c("1", "0", "1", "0", "1", "0")
percent<-c(80, 20, 60, 40, 90,10)
xcoord<-c(10,10,11,11,12,12)
ycoord<-c(10,10,10,10,10,10)
DF<-data.frame(cat,chara, percent, xcoord, ycoord)
NewBubbleChart <- ggplot(DF, aes(x = cat, y = "", size = percent, label = NULL, fill = chara), legend = FALSE) +
geom_point(color = "grey50", shape = 21, alpha = 0.99) +
#geom_text(size=4) +
theme_bw() +
scale_size(range = c(5, 20))
NewBubbleChart <- NewBubbleChart +
scale_fill_manual(name = "Type",
values = c("darkblue", "lightblue"),
labels = c("0" = "Type 0", "1" = "Type 1"))
NewBubbleChart
Bubble Chart: bubble-in-bubble
library(ggplot2)
# function to calculate coords of a circle
circle <- function(center,radius) {
th <- seq(0,2*pi,len=200)
data.frame(x=center[1]+radius*cos(th),y=center[2]+radius*sin(th))
}
# example dataset, similar to graphic
df <- data.frame(bank=paste("Bank",LETTERS[1:5]),start=1000*(5:1),end=500*(5:1))
max <- max(df$start)
n.bubbles <- nrow(df)
scale <- 0.4/sum(sqrt(df$start))
# calculate scaled centers and radii of bubbles
radii <- scale*sqrt(df$start)
ctr.x <- cumsum(c(radii[1],head(radii,-1)+tail(radii,-1)+.01))
# starting (larger) bubbles
gg.1 <- do.call(rbind,lapply(1:n.bubbles,function(i)cbind(group=i,circle(c(ctr.x[i],radii[i]),radii[i]))))
text.1 <- data.frame(x=ctr.x,y=-0.05,label=paste(df$bank,df$start,sep="\n"))
# ending (smaller) bubbles
radii <- scale*sqrt(df$end)
gg.2 <- do.call(rbind,lapply(1:n.bubbles,function(i)cbind(group=i,circle(c(ctr.x[i],radii[i]),radii[i]))))
text.2 <- data.frame(x=ctr.x,y=2*radii+0.02,label=df$end)
# make the plot
ggplot()+
geom_polygon(data=gg.1,aes(x,y,group=group),fill="dodgerblue")+
geom_path(data=gg.1,aes(x,y,group=group),color="grey50")+
geom_text(data=text.1,aes(x,y,label=label))+
geom_polygon(data=gg.2,aes(x,y,group=group),fill="green2")+
geom_path(data=gg.2,aes(x,y,group=group),color="grey50")+
geom_text(data=text.2,aes(x,y,label=label), color="white")+
labs(x="",y="")+scale_y_continuous(limits=c(-0.1,2.5*scale*sqrt(max(df$start))))+
coord_fixed()+
theme(axis.text=element_blank(),axis.ticks=element_blank(),panel.grid=element_blank())
googleVis - Bubble Chart bubble-in-bubble
library(googleVis)
## Warning: package 'googleVis' was built under R version 3.3.2
## Creating a generic function for 'toJSON' from package 'jsonlite' in package 'googleVis'
##
## Welcome to googleVis version 0.6.2
##
## Please read Google's Terms of Use
## before you start using the package:
## https://developers.google.com/terms/
##
## Note, the plot method of googleVis will by default use
## the standard browser to display its output.
##
## See the googleVis package vignettes for more details,
## or visit http://github.com/mages/googleVis.
##
## To suppress this message use:
## suppressPackageStartupMessages(library(googleVis))
##
DF <- cbind(
DF,
ID=paste0(DF$cat,DF$chara)
)
bChart <- gvisBubbleChart(
data=DF,
idvar="ID",
xvar="xcoord",
yvar="chara",
colorvar="cat",
sizevar="percent",
options=list(vAxis='{minValue:.8, maxValue:3}')
)
plot(bChart)
## starting httpd help server ...
## done
countries = c('IND', 'AUS', 'CHI', 'JAP', 'BAT', 'SING')
frequencies = matrix(sample(1:100, 36), 6, 6, dimnames = list(countries, countries))
diag(frequencies) = 0
library(reshape2)
frequencies_df = melt(frequencies)
names(frequencies_df) = c('origin', 'destination', 'frequency')
library(ggplot2)
ggplot(frequencies_df, aes(x = origin, y = destination, size = frequency)) + geom_point()
library(circlize)
library(migest)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:plyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
m <- data.frame(order = 1:6,
country = c("Ausralia", "India", "China", "Japan", "Thailand", "Malaysia"),
V3 = c(1, 150000, 90000, 180000, 15000, 10000),
V4 = c(35000, 1, 10000, 12000, 25000, 8000),
V5 = c(10000, 7000, 1, 40000, 5000, 4000),
V6 = c(7000, 8000, 175000, 1, 11000, 18000),
V7 = c(70000, 30000, 22000, 120000, 1, 40000),
V8 = c(60000, 90000, 110000, 14000, 30000, 1),
r = c(255,255,255,153,51,51),
g = c(51, 153, 255, 255, 255, 255),
b = c(51, 51, 51, 51, 51, 153),
stringsAsFactors = FALSE)
### Create a data frame
df1 <- m[, c(1,2, 9:11)]
### Create a matrix
m <- m[,-(1:2)]/1e04
m <- as.matrix(m[,c(1:6)])
dimnames(m) <- list(orig = df1$country, dest = df1$country)
### Sort order of data.frame and matrix for plotting in circos
df1 <- arrange(df1, order)
df1$country <- factor(df1$country, levels = df1$country)
m <- m[levels(df1$country),levels(df1$country)]
### Define ranges of circos sectors and their colors (both of the sectors and the links)
df1$xmin <- 0
df1$xmax <- rowSums(m) + colSums(m)
n <- nrow(df1)
df1$rcol<-rgb(df1$r, df1$g, df1$b, max = 255)
df1$lcol<-rgb(df1$r, df1$g, df1$b, alpha=200, max = 255)
## Plot sectors (outer part)
par(mar=rep(0,4))
circos.clear()
### Basic circos graphic parameters
circos.par(cell.padding=c(0,0,0,0), track.margin=c(0,0.15), start.degree = 90, gap.degree =4)
### Sector details
circos.initialize(factors = df1$country, xlim = cbind(df1$xmin, df1$xmax))
### Plot sectors
circos.trackPlotRegion(ylim = c(0, 1), factors = df1$country, track.height=0.1,
#panel.fun for each sector
panel.fun = function(x, y) {
#select details of current sector
name = get.cell.meta.data("sector.index")
i = get.cell.meta.data("sector.numeric.index")
xlim = get.cell.meta.data("xlim")
ylim = get.cell.meta.data("ylim")
#text direction (dd) and adjusmtents (aa)
theta = circlize(mean(xlim), 1.3)[1, 1] %% 360
dd <- ifelse(theta < 90 || theta > 270, "clockwise", "reverse.clockwise")
aa = c(1, 0.5)
if(theta < 90 || theta > 270) aa = c(0, 0.5)
#plot country labels
circos.text(x=mean(xlim), y=1.7, labels=name, facing = dd, cex=0.6, adj = aa)
#plot main sector
circos.rect(xleft=xlim[1], ybottom=ylim[1], xright=xlim[2], ytop=ylim[2], col = df1$rcol[i], border=df1$rcol[i])
#blank in part of main sector
circos.rect(xleft=xlim[1], ybottom=ylim[1], xright=xlim[2]-rowSums(m)[i], ytop=ylim[1]+0.3, col = "white", border = "white")
#white line all the way around
circos.rect(xleft=xlim[1], ybottom=0.3, xright=xlim[2], ytop=0.32, col = "white", border = "white")
#plot axis
circos.axis(labels.cex=0.6, direction = "outside", major.at=seq(from=0,to=floor(df1$xmax)[i],by=5), minor.ticks=1, labels.away.percentage = 0.15)
})
## Note: 1 point is out of plotting region in sector 'Ausralia', track '1'.
## Note: 1 point is out of plotting region in sector 'India', track '1'.
## Note: 1 point is out of plotting region in sector 'China', track '1'.
## Note: 1 point is out of plotting region in sector 'Japan', track '1'.
## Note: 1 point is out of plotting region in sector 'Thailand', track '1'.
## Note: 1 point is out of plotting region in sector 'Malaysia', track '1'.
## Plot links (inner part)
### Add sum values to df1, marking the x-position of the first links
### out (sum1) and in (sum2). Updated for further links in loop below.
df1$sum1 <- colSums(m)
df1$sum2 <- numeric(n)
### Create a data.frame of the flow matrix sorted by flow size, to allow largest flow plotted first
df2 <- cbind(as.data.frame(m),orig=rownames(m), stringsAsFactors=FALSE)
df2 <- reshape(df2, idvar="orig", varying=list(1:n), direction="long",
timevar="dest", time=rownames(m), v.names = "m")
df2 <- arrange(df2,desc(m))
### Keep only the largest flows to avoid clutter
df2 <- subset(df2, m > quantile(m,0.6))
### Plot links
for(k in 1:nrow(df2)){
#i,j reference of flow matrix
i<-match(df2$orig[k],df1$country)
j<-match(df2$dest[k],df1$country)
#plot link
circos.link(sector.index1=df1$country[i], point1=c(df1$sum1[i], df1$sum1[i] + abs(m[i, j])),
sector.index2=df1$country[j], point2=c(df1$sum2[j], df1$sum2[j] + abs(m[i, j])),
col = df1$lcol[i])
#update sum1 and sum2 for use when plotting the next link
df1$sum1[i] = df1$sum1[i] + abs(m[i, j])
df1$sum2[j] = df1$sum2[j] + abs(m[i, j])
}
age = rep(c(20, 30, 40, 50, 60), 20)
income = c(rep(">50k", 80), rep("<50k", 20))
df1 = data.frame(age=age, income=income)
library(plyr)
df1_summary = ddply(
df1,
.(age, income),
summarize,
count=length(income)
)
ggplot(df1_summary, aes(age, income, size=count)) +
geom_point()
test <- with(df1,table(age,income))
test <- as.matrix(as.data.frame.matrix(test))
plot(
row(test),
col(test),
cex=test/3,pch=20,
xlim=c(0.5,nrow(test)+0.5),
ylim=c(0.5,ncol(test)+0.5),
axes=FALSE,
ann=FALSE
)
axis(1,at=1:nrow(test),labels=rownames(test),cex.axis=0.8)
axis(2,at=1:ncol(test),labels=colnames(test),cex.axis=0.8)
title(xlab="Age Group",ylab="Income")
box()
http://stackoverflow.com/questions/15840926/categorical-bubble-plot-for-mapping-studies Based on http://flowingdata.com/2010/11/23/how-to-make-bubble-charts/
hdp <- read.csv("http://www.ats.ucla.edu/stat/data/hdp.csv")
hdp <- within(hdp, {
Married <- factor(Married, levels = 0:1, labels = c("no", "yes"))
DID <- factor(DID)
HID <- factor(HID)
})
ggplot(hdp, aes(x = CancerStage, y = LengthofStay)) +
stat_sum(aes(size = ..n.., group = 1)) +
scale_size_area(max_size=10)
library(ggplot2)
# Set up the vectors
days <- c("Mon","Tues","Wed","Thurs","Fri")
slots <- c("Coffee/Breakfast","Lunch","Happy Hour","Dinner")
# Create the data frame
df <- expand.grid(days, slots)
df$value <- c(1,1,1,1,2,1,1,NA,NA,1,4,4,7,4,1,5,6,14,5,1)
#Plot the Data
g <- ggplot(df, aes(Var1, Var2)) + geom_point(aes(size = value), colour = "green") + theme_bw() + xlab("") + ylab("")
g + scale_size_continuous(range=c(10,30)) + geom_text(aes(label = value))
## Warning: Removed 2 rows containing missing values (geom_point).
## Warning: Removed 2 rows containing missing values (geom_text).
Note that the echo = FALSE
parameter was added to the code chunk to prevent printing of the R code that generated the plot.