桑基图(Sankey diagram),即桑基能量分流图,也叫桑基能量平衡图。它是一种特定类型的流程图,图中延伸的分支的宽度对应数据流量的大小,通常应用于能源、材料成分、金融等数据的可视化分析。
第一列为上游,第二列为下游,第三列为联通值,值越大线越粗。如果您自己有数据,只需要替换输入部分,后面数据格式转换代码是通用的。
例1:
library(networkD3)
network <-
"Src;Target;Value
Bioinfo;Biology;20
Bioinfo;Math;20
Bioinfo;Program;20
Bioinfo;NGS;20
Program;Linux;8
Program;Python;8
Program;R;6
NGS;RNAseq;1
NGS;ChIPseq;1
NGS;m16Sseq;1
NGS;Metagenome;1
NGS;SingeCellSeq;1
NGS;DNAmethylseq;1
NGS;lncRNA;1
NGS;Exomeseq;1
NGS;TCGA;1
"
network <- read.table(text=network, sep=";",
header=T, row.names=NULL, quote="", comment="")
network <- network[,1:3]
colnames(network) <- c("Src", "Target", "Value")
factor_list <- sort(unique(c(levels(network$Src), levels(network$Target))))
num_list <- 0:(length(factor_list)-1)
levels(network$Src) <- num_list[factor_list %in% levels(network$Src)]
levels(network$Target) <- num_list[factor_list %in% levels(network$Target)]
network$Src <- as.numeric(as.character(network$Src))
network$Target <- as.numeric(as.character(network$Target))
attribute <- data.frame(name=c(factor_list))
sankeyNetwork(Links = network, Nodes = attribute,
Source = "Src", Target = "Target",
Value = "Value", NodeID = "name",
fontSize= 12, nodeWidth = 30)
例2:
library(networkD3)
nodes = data.frame("name" =
c(
"Node A", # Node 0
"Node B", # Node 1
"Node C", # Node 2
"Node D" # Node 3
))
links = as.data.frame(matrix(c(
0, 1, 10, # Each row represents a link. The first number node 0 和 1 之间的线粗为10
0, 2, 20, # represents the node being conntected from.
1, 3, 30, # the second number represents the node connected to. node 1 和 3之间的线粗为30
2, 3, 40),# The third number is the value of the node
byrow = TRUE, ncol = 3))
names(links) = c("source", "target", "value")
sankeyNetwork(Links = links, Nodes = nodes,
Source = "source", Target = "target",
Value = "value", NodeID = "name",
fontSize= 12, nodeWidth = 30)