I am working on a dumbbell plot in R inspired by this post, and have two problems:
- Ordering the dumbbell plot (I've tried a strategy provided in this post)
- Present value labels in the plot in an aesthetically pleasing way.
My data set is formatted as a wide data set with 18 units with the following structure:
> head(ADHD_med_2010_2018_wide, 18)
# A tibble: 18 x 9
         age           gender                county adhd_pr_1000_2010 adhd_pr_1000_2018 county_label     adhd_2010 adhd_2018   diff
   <dbl+lbl>        <dbl+lbl>             <dbl+lbl>             <dbl>             <dbl> <fct>                <dbl>     <dbl>  <dbl>
 1 2 [10-14] 1 [Both genders]  1 [Østfold]                       32.1              24.3 Østfold               32.1      24.3  -7.80
 2 2 [10-14] 1 [Both genders]  2 [Akershus]                      20.6              23.0 Akershus              20.6      23     2.40
 3 2 [10-14] 1 [Both genders]  3 [Oslo]                          17.2              33.9 Oslo                  17.2      33.9  16.7 
 4 2 [10-14] 1 [Both genders]  4 [Hedmark]                       41.7              30.9 Hedmark               41.7      30.9 -10.8 
 5 2 [10-14] 1 [Both genders]  5 [Oppland]                       24.9              39.0 Oppland               24.9      39    14.1 
 6 2 [10-14] 1 [Both genders]  6 [Buskerud]                      26.7              36.8 Buskerud              26.7      36.8  10.1 
 7 2 [10-14] 1 [Both genders]  7 [Vestfold]                      28.1              27.1 Vestfold              28.1      27    -1.10
 8 2 [10-14] 1 [Both genders]  8 [Telemark]                      29.2              24.7 Telemark              29.2      24.7  -4.5 
 9 2 [10-14] 1 [Both genders]  9 [Aust-Agder]                    34.9              39.2 Aust-Agder            34.9      39.2   4.30
10 2 [10-14] 1 [Both genders] 10 [Vest-Agder]                    17.4              23.8 Vest-Agder            17.4      23.8   6.40
11 2 [10-14] 1 [Both genders] 11 [Rogaland]                      29.5              13.8 Rogaland              29.5      13.8 -15.7 
12 2 [10-14] 1 [Both genders] 12 [Hordaland]                     21.3              14.4 Hordaland             21.3      14.4  -6.90
13 2 [10-14] 1 [Both genders] 14 [Sogn og Fjordane]              21.3              39.7 Sogn og Fjordane      21.3      39.7  18.4 
14 2 [10-14] 1 [Both genders] 15 [Møre og Romsdal]               27.0              18.6 Møre og Romsdal       27        18.6  -8.40
15 2 [10-14] 1 [Both genders] 18 [Nordland]                      40.1              30.0 Nordland              40.1      30   -10.1 
16 2 [10-14] 1 [Both genders] 19 [Troms]                         25.8              33.2 Troms                 25.8      33.2   7.40
17 2 [10-14] 1 [Both genders] 20 [Finnmark]                      19.1              21.3 Finnmark              19.1      21.3   2.20
18 2 [10-14] 1 [Both genders] 50 [Trøndelag]                     25.0              36.9 Trøndelag             25        37    12   
I've tried two strategies for problem 1:
library("tidyverse")
library("ggalt")
fig2 <- ggplot(ADHD_med_2010_2018_wide, aes(x=adhd_2010, xend=adhd_2018, y=county_label, group=county_label)) + 
      #create a thick line between x and xend instead of using defaut 
      #provided by geom_dubbell
      geom_segment(aes(x=adhd_2010, 
                       xend=adhd_2018, 
                       y=county_label, 
                       yend=county_label), 
                   color="#b2b2b2", size=1.5)+
      geom_dumbbell(color="light blue", 
                    size_x=3.5, 
                    size_xend = 3.5,
                    #Note: there is no US:'color' for UK:'colour' 
                    # in geom_dumbbel unlike standard geoms in ggplot()
                    colour_x="forestgreen", # green = 2010
                    colour_xend = "red")+ # red = 2018
      labs(x=NULL, y=NULL, 
           title="Dumbbell Chart", 
           subtitle="Change in prescription rate: 2010 vs 2018")+
      geom_text(color="black", size=2, hjust=-0.5,
                aes(x=adhd_2010, label=adhd_2010))+
      geom_text(aes(x=adhd_2018, label=adhd_2018), 
                color="black", size=2, hjust=1.5)
    fig2
Which gives a plot without ordering or values presented in a good way:
To correct ordering, I tried following the strategy provided in the post linked above:
library(dplyr)
ADHD_med_2010_2018_wide%>%
  mutate(difference = abs(adhd_2018-adhd_2010)) %>% #creates the variable of differences
  top_n(18, wt = difference) %>% # Choose the rows with top 20 difference
  ggplot() +
  aes(x=adhd_2010, xend=adhd_2018, y=reorder(county_label, difference), 
      group=county_label) + #reorder the labels by descending difference value
  geom_dumbbell(color="light blue", 
                size_x=3.5, 
                size_xend = 3.5,
                #Note: there is no US:'color' for UK:'colour' 
                # in geom_dumbbel unlike standard geoms in ggplot()
                colour_x="forestgreen", # green = 2010
                colour_xend = "red")+ # red = 2018
  labs(x=NULL, y=NULL, 
       title="Dumbbell Chart", 
       subtitle="Change in prescription rate: 2010 vs 2018")+
  geom_text(color="black", size=2, hjust=-0.5,
            aes(x=adhd_2010, label=adhd_2010))+
  geom_text(aes(x=adhd_2018, label=adhd_2018), 
            color="black", size=2, hjust=1.5)
This still does not give a plot with a nice ordering, although it seem to order the difference (and there's still the issue with value labels):
 
 
Hopefully some of you may have input on these issues.
Data to copy:
> dput(head(ADHD_med_2010_2018_wide, 18))
structure(list(age = structure(c(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 
2, 2, 2, 2, 2, 2, 2, 2), label = "Age groups", labels = c(`5-9` = 1, 
`10-14` = 2, `15-19` = 3, `20-24` = 4, `25-29` = 5, `30-34` = 6, 
`All ages` = 7), class = "haven_labelled"), gender = structure(c(1, 
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), label = "Gender", labels = c(`Both genders` = 1, 
Female = 2, Male = 3), class = "haven_labelled"), county = structure(c(1, 
2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 15, 18, 19, 20, 50), labels = c(Østfold = 1, 
Akershus = 2, Oslo = 3, Hedmark = 4, Oppland = 5, Buskerud = 6, 
Vestfold = 7, Telemark = 8, `Aust-Agder` = 9, `Vest-Agder` = 10, 
Rogaland = 11, Hordaland = 12, `Sogn og Fjordane` = 14, `Møre og Romsdal` = 15, 
Nordland = 18, Troms = 19, Finnmark = 20, Trøndelag = 50, `Hele landet` = 99
), class = "haven_labelled"), adhd_pr_1000_2010 = c(32.1488990783691, 
20.5894756317139, 17.2119483947754, 41.6982574462891, 24.8543014526367, 
26.7194156646729, 28.1328239440918, 29.2480430603027, 34.8775291442871, 
17.3759765625, 29.4698066711426, 21.340311050415, 21.3308296203613, 
27.0334072113037, 40.1140670776367, 25.7862873077393, 19.1311283111572, 
25.0325565338135), adhd_pr_1000_2018 = c(24.2834396362305, 23.0037822723389, 
33.9068183898926, 30.8641967773438, 39.0195579528809, 36.7909698486328, 
27.0642204284668, 24.6901988983154, 39.1978950500488, 23.8095245361328, 
13.8218154907227, 14.4400091171265, 39.7175636291504, 18.5994052886963, 
29.9642810821533, 33.1638412475586, 21.2596340179443, 36.9249382019043
), county_label = structure(18:1, .Label = c("Trøndelag", "Finnmark", 
"Troms", "Nordland", "Møre og Romsdal", "Sogn og Fjordane", "Hordaland", 
"Rogaland", "Vest-Agder", "Aust-Agder", "Telemark", "Vestfold", 
"Buskerud", "Oppland", "Hedmark", "Oslo", "Akershus", "Østfold"
), class = "factor"), adhd_2010 = c(32.0999984741211, 20.6000003814697, 
17.2000007629395, 41.7000007629395, 24.8999996185303, 26.7000007629395, 
28.1000003814697, 29.2000007629395, 34.9000015258789, 17.3999996185303, 
29.5, 21.2999992370605, 21.2999992370605, 27, 40.0999984741211, 
25.7999992370605, 19.1000003814697, 25), adhd_2018 = c(24.2999992370605, 
23, 33.9000015258789, 30.8999996185303, 39, 36.7999992370605, 
27, 24.7000007629395, 39.2000007629395, 23.7999992370605, 13.8000001907349, 
14.3999996185303, 39.7000007629395, 18.6000003814697, 30, 33.2000007629395, 
21.2999992370605, 37), diff = c(-7.79999923706055, 2.39999961853027, 
16.7000007629395, -10.8000011444092, 14.1000003814697, 10.0999984741211, 
-1.10000038146973, -4.5, 4.29999923706055, 6.39999961853027, 
-15.6999998092651, -6.89999961853027, 18.4000015258789, -8.39999961853027, 
-10.0999984741211, 7.40000152587891, 2.19999885559082, 12)), row.names = c(NA, 
-18L), class = c("tbl_df", "tbl", "data.frame"))

 
    
 
    
