I am trying to create a Sankey diagram using the networkD3
package in R, particularly the sankeyNetwork
function. I am trying to replicate the Sankey Diagram from Lawrence Livermore National Lab:
So far I have been able to use the following code to get close:
Data:
energy <- structure(list(nodes = structure(list(name = structure(c(15L,
11L, 8L, 17L, 7L, 10L, 2L, 1L, 12L, 4L, 14L, 3L, 9L, 16L, 13L,
6L, 5L), .Label = c("Biomass", "Coal", "Commericial", "Electricity",
"Energy Services", "Exports", "Geothermal", "Hydro", "Industrial",
"Natural Gas", "Nuclear", "Petroleum", "Rejected Energy", "Residential",
"Solar", "Transportation", "Wind"), class = "factor")), .Names = "name", class = "data.frame", row.names = c(NA,
-17L)), links = structure(list(source = c(0L, 0L, 1L, 2L, 2L,
3L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 7L, 7L, 7L, 7L, 7L,
8L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 9L, 9L, 10L, 10L, 11L, 11L,
12L, 12L, 13L, 13L), target = c(9L, 10L, 9L, 9L, 12L, 9L, 9L,
10L, 11L, 9L, 10L, 11L, 12L, 13L, 9L, 12L, 9L, 10L, 11L, 12L,
13L, 9L, 10L, 11L, 12L, 13L, 10L, 11L, 12L, 13L, 14L, 15L, 14L,
16L, 14L, 16L, 14L, 16L, 14L, 16L), value = c(0.25, 0.28, 8.34,
2.38, 0.01, 1.81, 0.16, 0.04, 0.02, 9.99, 4.75, 3.3, 9.36, 0.92,
14.3, 1.41, 0.52, 0.45, 0.13, 2.28, 1.35, 0.28, 0.98, 0.56, 8.2,
25.4, 4.78, 4.63, 3.27, 0.03, 25.4, 0.08, 3.95, 7.33, 3.05, 5.66,
4.91, 19.6, 21.9, 5.81), energy_type = structure(c(12L, 12L,
9L, 7L, 7L, 13L, 6L, 6L, 6L, 8L, 8L, 8L, 8L, 8L, 2L, 2L, 1L,
1L, 1L, 1L, 1L, 10L, 10L, 10L, 10L, 10L, 3L, 3L, 3L, 3L, 11L,
5L, 11L, 4L, 11L, 4L, 11L, 4L, 11L, 4L), .Label = c("Biomass",
"Coal", "Electricity", "Energy Services", "Exports", "Geothermal",
"Hydro", "Natural", "Nuclear", "Petroleum", "Rejected Energy",
"Solar", "Wind"), class = "factor")), .Names = c("source", "target",
"value", "energy_type"), class = "data.frame", row.names = c(NA,
-40L))), .Names = c("nodes", "links"))
And the actual code is:
library(networkD3)
sankeyNetwork(Links = energy$links, Nodes = energy$nodes, Source = "source",
Target = "target", Value = "value", NodeID = "name",
units = "Quads", LinkGroup = "energy_type", colourScale = JS(
'd3.scale.ordinal()
.domain(["Solar","Nuclear","Hydro","Wind","Geothermal","Natural Gas","Coal","Biomass","Petroleum","Electricity","Residential","Commericial","Industrial","Transportation","Rejected Energy","Exports","Energy Services"])
.range(["#FFFF00","#FF0000","#0000FF","#800080","#A52A2A","#00FFFF","#000000","#00FF00","#008000","#FFA500","#FAAFBE","#FAAFBE","#FAAFBE","#FAAFBE","#C0C0C0","#FFA500","#808080"])'
), fontSize = 12, nodeWidth = 75, iterations = 100)
My problem is that I cannot get the colors to match up right. I want to specify my own colors. I am a novice at D3, so that might be my problem. I think that my problem is in the d3.scale.ordinal()
part of the code, so I have broken it out to hopefully make it easier to find my error. But I think I am trying to tell the function to color the "Natural Gas" box a cyan (#00FFFF) color, but it is showing up yellow. Also the "Rejected Energy" and "Energy Services" colors are also off.
I am trying to create a Sankey diagram using the networkD3
package in R, particularly the sankeyNetwork
function. I am trying to replicate the Sankey Diagram from Lawrence Livermore National Lab:
So far I have been able to use the following code to get close:
Data:
energy <- structure(list(nodes = structure(list(name = structure(c(15L,
11L, 8L, 17L, 7L, 10L, 2L, 1L, 12L, 4L, 14L, 3L, 9L, 16L, 13L,
6L, 5L), .Label = c("Biomass", "Coal", "Commericial", "Electricity",
"Energy Services", "Exports", "Geothermal", "Hydro", "Industrial",
"Natural Gas", "Nuclear", "Petroleum", "Rejected Energy", "Residential",
"Solar", "Transportation", "Wind"), class = "factor")), .Names = "name", class = "data.frame", row.names = c(NA,
-17L)), links = structure(list(source = c(0L, 0L, 1L, 2L, 2L,
3L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 7L, 7L, 7L, 7L, 7L,
8L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 9L, 9L, 10L, 10L, 11L, 11L,
12L, 12L, 13L, 13L), target = c(9L, 10L, 9L, 9L, 12L, 9L, 9L,
10L, 11L, 9L, 10L, 11L, 12L, 13L, 9L, 12L, 9L, 10L, 11L, 12L,
13L, 9L, 10L, 11L, 12L, 13L, 10L, 11L, 12L, 13L, 14L, 15L, 14L,
16L, 14L, 16L, 14L, 16L, 14L, 16L), value = c(0.25, 0.28, 8.34,
2.38, 0.01, 1.81, 0.16, 0.04, 0.02, 9.99, 4.75, 3.3, 9.36, 0.92,
14.3, 1.41, 0.52, 0.45, 0.13, 2.28, 1.35, 0.28, 0.98, 0.56, 8.2,
25.4, 4.78, 4.63, 3.27, 0.03, 25.4, 0.08, 3.95, 7.33, 3.05, 5.66,
4.91, 19.6, 21.9, 5.81), energy_type = structure(c(12L, 12L,
9L, 7L, 7L, 13L, 6L, 6L, 6L, 8L, 8L, 8L, 8L, 8L, 2L, 2L, 1L,
1L, 1L, 1L, 1L, 10L, 10L, 10L, 10L, 10L, 3L, 3L, 3L, 3L, 11L,
5L, 11L, 4L, 11L, 4L, 11L, 4L, 11L, 4L), .Label = c("Biomass",
"Coal", "Electricity", "Energy Services", "Exports", "Geothermal",
"Hydro", "Natural", "Nuclear", "Petroleum", "Rejected Energy",
"Solar", "Wind"), class = "factor")), .Names = c("source", "target",
"value", "energy_type"), class = "data.frame", row.names = c(NA,
-40L))), .Names = c("nodes", "links"))
And the actual code is:
library(networkD3)
sankeyNetwork(Links = energy$links, Nodes = energy$nodes, Source = "source",
Target = "target", Value = "value", NodeID = "name",
units = "Quads", LinkGroup = "energy_type", colourScale = JS(
'd3.scale.ordinal()
.domain(["Solar","Nuclear","Hydro","Wind","Geothermal","Natural Gas","Coal","Biomass","Petroleum","Electricity","Residential","Commericial","Industrial","Transportation","Rejected Energy","Exports","Energy Services"])
.range(["#FFFF00","#FF0000","#0000FF","#800080","#A52A2A","#00FFFF","#000000","#00FF00","#008000","#FFA500","#FAAFBE","#FAAFBE","#FAAFBE","#FAAFBE","#C0C0C0","#FFA500","#808080"])'
), fontSize = 12, nodeWidth = 75, iterations = 100)
My problem is that I cannot get the colors to match up right. I want to specify my own colors. I am a novice at D3, so that might be my problem. I think that my problem is in the d3.scale.ordinal()
part of the code, so I have broken it out to hopefully make it easier to find my error. But I think I am trying to tell the function to color the "Natural Gas" box a cyan (#00FFFF) color, but it is showing up yellow. Also the "Rejected Energy" and "Energy Services" colors are also off.
-
1
your
energy$nodes
do not match those inenergy$links$energy_type
. try running thissetdiff(energy$links$energy_type, energy$nodes$name)
– chinsoon12 Commented Sep 23, 2016 at 1:44
3 Answers
Reset to default 3Maybe this helps. I will try to ment inline. As chinsoon12 suggests, it appears your nodes don't quite match. Running his code, we see.
> setdiff(energy$links$energy_type, energy$nodes$name)
[1] "Natural"
This tells us that "Natural"
was used in your data for links while "Natural Gas"
was used in your nodes. I often find it helpful to use a data.frame
to help me identify problems in my color scale.
energy <- structure(list(nodes = structure(list(name = structure(c(15L,
11L, 8L, 17L, 7L, 10L, 2L, 1L, 12L, 4L, 14L, 3L, 9L, 16L, 13L,
6L, 5L), .Label = c("Biomass", "Coal", "Commericial", "Electricity",
"Energy Services", "Exports", "Geothermal", "Hydro", "Industrial",
"Natural Gas", "Nuclear", "Petroleum", "Rejected Energy", "Residential",
"Solar", "Transportation", "Wind"), class = "factor")), .Names = "name", class = "data.frame", row.names = c(NA,
-17L)), links = structure(list(source = c(0L, 0L, 1L, 2L, 2L,
3L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 7L, 7L, 7L, 7L, 7L,
8L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 9L, 9L, 10L, 10L, 11L, 11L,
12L, 12L, 13L, 13L), target = c(9L, 10L, 9L, 9L, 12L, 9L, 9L,
10L, 11L, 9L, 10L, 11L, 12L, 13L, 9L, 12L, 9L, 10L, 11L, 12L,
13L, 9L, 10L, 11L, 12L, 13L, 10L, 11L, 12L, 13L, 14L, 15L, 14L,
16L, 14L, 16L, 14L, 16L, 14L, 16L), value = c(0.25, 0.28, 8.34,
2.38, 0.01, 1.81, 0.16, 0.04, 0.02, 9.99, 4.75, 3.3, 9.36, 0.92,
14.3, 1.41, 0.52, 0.45, 0.13, 2.28, 1.35, 0.28, 0.98, 0.56, 8.2,
25.4, 4.78, 4.63, 3.27, 0.03, 25.4, 0.08, 3.95, 7.33, 3.05, 5.66,
4.91, 19.6, 21.9, 5.81), energy_type = structure(c(12L, 12L,
9L, 7L, 7L, 13L, 6L, 6L, 6L, 8L, 8L, 8L, 8L, 8L, 2L, 2L, 1L,
1L, 1L, 1L, 1L, 10L, 10L, 10L, 10L, 10L, 3L, 3L, 3L, 3L, 11L,
5L, 11L, 4L, 11L, 4L, 11L, 4L, 11L, 4L), .Label = c("Biomass",
"Coal", "Electricity", "Energy Services", "Exports", "Geothermal",
"Hydro", "Natural", "Nuclear", "Petroleum", "Rejected Energy",
"Solar", "Wind"), class = "factor")), .Names = c("source", "target",
"value", "energy_type"), class = "data.frame", row.names = c(NA,
-40L))), .Names = c("nodes", "links"))
library(networkD3)
sankeyNetwork(Links = energy$links, Nodes = energy$nodes, Source = "source",
Target = "target", Value = "value", NodeID = "name",
units = "Quads", LinkGroup = 'energy_type', colourScale = JS(
'd3.scaleOrdinal()
.domain(["Solar","Nuclear","Hydro","Wind","Geothermal","Natural Gas","Coal","Biomass","Petroleum","Electricity","Residential","Commericial","Industrial","Transportation","Rejected Energy","Exports","Energy Services"])
.range(["#FFFF00","#FF0000","#0000FF","#800080","#A52A2A","#00FFFF","#000000","#00FF00","#008000","#FFA500","#FAAFBE","#FAAFBE","#FAAFBE","#FAAFBE","#C0C0C0","#FFA500","#808080"])'
),
fontSize = 12, nodeWidth = 75, iterations = 100)
# putting in a data.frame might help see problems
color_scale <- data.frame(
range = c("#FFFF00","#FF0000","#0000FF","#800080","#A52A2A","#00FFFF","#000000","#00FF00","#008000","#FFA500","#FAAFBE","#FAAFBE","#FAAFBE","#FAAFBE","#C0C0C0","#FFA500","#808080"),
domain = c("Solar","Nuclear","Hydro","Wind","Geothermal","Natural Gas","Coal","Biomass","Petroleum","Electricity","Residential","Commericial","Industrial","Transportation","Rejected Energy","Exports","Energy Services"),
nodes = energy$nodes,
stringsAsFactors = FALSE
)
# once corrected color_scale can be used like this
sankeyNetwork(
Links = energy$links, Nodes = energy$nodes, Source = "source",
Target = "target", Value = "value", NodeID = "name",
units = "Quads", LinkGroup = 'energy_type', colourScale = JS(
sprintf(
'd3.scaleOrdinal()
.domain(%s)
.range(%s)
',
jsonlite::toJSON(color_scale$domain),
jsonlite::toJSON(color_scale$range)
)
),
fontSize = 12, nodeWidth = 75, iterations = 100
)
# if we change Natural Gas to Natural then
# the cyan appears as desired
color_scale[6,2] <- "Natural"
I also had problems with spaces in the names of nodes. Here there is an example
library(networkD3)
Class<-c("Fristående","Nästan_öppet", "Halvöppet", "Slutet")
# Class<-c("Fristående","Nästan öppet", "Halvöppet", "Slutet") #Broken
nClass<-length(Class)
UtMat<-matrix(c(6, 9, 8, 0,
0, 9, 18, 3,
0, 1, 18, 6,
0, 0, 3, 10), nClass,nClass, byrow = TRUE, dimnames = list(Class, Class) )
ClassF <- structure(list(nodes = data.frame(name=as.factor(c(Class,Class)), label=c(Class,Class)),
links = data.frame(source = rep(c(0:3), each=4), # from
target = rep(c(4:7), 4), # to
value = as.vector(t(UtMat)),
group = as.factor(rep(Class, each=4)))
)
)
sankeyNetwork(Links = ClassF$links, Nodes = ClassF$nodes,
Source = "source", Target = "target", Value = "value", NodeID = "name",
units = "", LinkGroup = 'group',
colourScale = JS(
'd3.scaleOrdinal()
.domain(["Fristående","Nästan_öppet","Halvöppet","Slutet"])
.range(["#EDF8E9","#BAE4B3","#74C476","#238B45"])'
),
fontFamily = "Arial", fontSize = 12, nodeWidth = 75)
In this example, the classes should be colored from light green to dark green, but if I use the second Class vector (with space), it just mess it up.
You can avoid the problem related to spaces in node names when you want to assign colors manually by not specifying .domain
in d3.scaleOrdinal()
. You only have to introduce your colors in .range
in the same order as your nodes.
For instance, assuming this is the order of your nodes: Fristående, Nästan_öppet, Halvöppet, Slutet, you only need to specify this:
'd3.scaleOrdinal()
.range(["#EDF8E9","#BAE4B3","#74C476","#238B45"])'