I have large numbers, e.g. currency or dollar:
1 6,000,000
2 75,000,400
3 743,450,000
4 340,000
5 4,300,000
I want to format them using suffixes, like M (million) and B (billion):
1 6.0 M
2 75.0 M
3 743.5 M
4 0.3 M
5 4.3 M
I have large numbers, e.g. currency or dollar:
1 6,000,000
2 75,000,400
3 743,450,000
4 340,000
5 4,300,000
I want to format them using suffixes, like M (million) and B (billion):
1 6.0 M
2 75.0 M
3 743.5 M
4 0.3 M
5 4.3 M
If you begin with this numeric vector x,
x <- c(6e+06, 75000400, 743450000, 340000, 4300000)
you could do the following.
paste(format(round(x / 1e6, 1), trim = TRUE), "M")
# [1] "6.0 M" "75.0 M" "743.5 M" "0.3 M" "4.3 M"
And if you're not concerned about trailing zeros, just remove the format() call.
paste(round(x / 1e6, 1), "M")
# [1] "6 M" "75 M" "743.5 M" "0.3 M" "4.3 M"
Alternatively, you could assign an S3 class with print method and keep y as numeric underneath. Here I use paste0() to make the result a bit more legible.
print.million <- function(x, quote = FALSE, ...) {
x <- paste0(round(x / 1e6, 1), "M")
NextMethod(x, quote = quote, ...)
}
## assign the 'million' class to 'x'
class(x) <- "million"
x
# [1] 6M 75M 743.5M 0.3M 4.3M
x[]
# [1] 6000000 75000400 743450000 340000 4300000
You could do the same for billions and trillions as well. For information on how to put this into a data frame, see this answer, as you'll need both a format() and an as.data.frame() method.
Obviously you first need to get rid of the commas in the formatted numbers, and gsub("\\,", ...) is the way to go. This uses findInterval to select the appropriate suffix for labeling and determine the denominator for a more compact display. Can be easily extended in either direction if one wanted to go below 1.0 or above 1 trillion:
comprss <- function(tx) {
div <- findInterval(as.numeric(gsub("\\,", "", tx)),
c(0, 1e3, 1e6, 1e9, 1e12) ) # modify this if negative numbers are possible
paste(round( as.numeric(gsub("\\,","",tx))/10^(3*(div-1)), 2),
c("","K","M","B","T")[div] )}
You don't need to remove the as.numeric or gsub if the input is numeric. It's admittedly superfluous, but would succeed. This is the result with Gregor's example:
> comprss (big_x)
[1] "123 " "500 " "999 " "1.05 K" "9 K"
[6] "49 K" "105.4 K" "998 K" "1.5 M" "20 M"
[11] "313.4 M" "453.12 B"
And with the original input (which was probably a factor variable if entered with read.table, read.csv or created with data.frame.)
comprss (dat$V2)
[1] "6 M" "75 M" "743.45 M" "340 K" "4.3 M"
And of course these can be printed without the quotes using either an explicit print command using quotes=FALSE or by using cat.
Recent versions (>= v1.0.0 released 2018) of the scales package include functionality to print readable labels. If you're using ggplot or tidyverse, scales is probably already installed.
In this case, label_number can be used:
> inp <- c(6000000, 75000400, 743450000, 340000, 4300000)
> scales::label_number(accuracy=0.1, scale_cut=scales::cut_short_scale())(inp)
[1] "6.0M" "75.0M" "743.4M" "340.0K" "4.3M"
scales::cut_short_scale() was introduced only in scales v1.2.0 (released 2022) and in earlier versions label_number_si must be used instead (but has since been deprecated):
> inp <- c(6000000, 75000400, 743450000, 340000, 4300000)
> scales::label_number_si(accuracy=0.1)(inp)
[1] "6.0M" "75.0M" "743.4M" "340.0K" "4.3M"
Another option, starting with numeric (rather than character) numbers, and works for both millions and billions (and below). You could pass more arguments to formatC to customize output, and extend to Trillions if need be.
m_b_format = function(x) {
b.index = x >= 1e9
m.index = x >= 1e5 & x < 1e9
output = formatC(x, format = "d", big.mark = ",")
output[b.index] = paste(formatC(x[b.index] / 1e9, digits = 1, format = "f"), "B")
output[m.index] = paste(formatC(x[m.index] / 1e6, digits = 1, format = "f"), "M")
return(output)
}
your_x = c(6e6, 75e6 + 400, 743450000, 340000, 43e6)
> m_b_format(your_x)
[1] "6.0 M" "75.0 M" "743.5 M" "0.3 M" "43.0 M"
big_x = c(123, 500, 999, 1050, 9000, 49000, 105400, 998000,
1.5e6, 2e7, 313402182, 453123634432)
> m_b_format(big_x)
[1] "123" "500" "999" "1,050" "9,000" "49,000"
[7] "0.1 M" "1.0 M" "1.5 M" "20.0 M" "313.4 M" "453.1 B"
dplyr's case_when now offers a more friendly solution to this - e.g:
format_bignum = function(n){
case_when(
n >= 1e12 ~ paste(round(n/1e12), 'Tn'),
n >= 1e9 ~ paste(round(n/1e9), 'Bn'),
n >= 1e6 ~ paste(round(n/1e6), 'M'),
n >= 1e3 ~ paste(round(n/1e3), 'K'),
TRUE ~ as.character(n))
}
Alternatively you could embed the case_when bit inside a mutate call.
Borrowing from other answers and adding to them with the main intent of producing pretty labels for ggplot2 axes. And yes, only positive values (negative will be left as is) since usually I want those suffixes only for positive quantities. Easy to extend to negative numbers.
# Format numbers with suffixes K, M, B, T and optional rounding. Vectorized
# Main purpose: pretty formatting axes for plots produced by ggplot2
#
# Usage in ggplot2: scale_x_continuous(labels = suffix_formatter)
suffix_formatter <- function(x, digits = NULL)
{
intl <- c(1e3, 1e6, 1e9, 1e12);
suffixes <- c('K', 'M', 'B', 'T');
i <- findInterval(x, intl);
result <- character(length(x));
# Note: for ggplot2 the last label element of x is NA, so we need to handle it
ind_format <- !is.na(x) & i > 0;
# Format only the elements that need to be formatted
# with suffixes and possible rounding
result[ind_format] <- paste0(
formatC(x[ind_format]/intl[i[ind_format]], format = "f", digits = digits)
,suffixes[i[ind_format]]
);
# And leave the rest with no changes
result[!ind_format] <- as.character(x[!ind_format]);
return(invisible(result));
}
And example of usage.
x <- seq(1:10);
d <- data.frame(x = x, y = 10^x);
ggplot(aes(x=x, y=y), data = d) + geom_line() + scale_y_log10()
ggplot(aes(x=x, y=y), data = d) + geom_line() + scale_y_log10(labels = suffix_formatter)
I rewrite @42- function to accommodate % numbers, like this
compress <- function(tx) {
tx <- as.numeric(gsub("\\,", "", tx))
int <- c(1e-2, 1, 1e3, 1e6, 1e9, 1e12)
div <- findInterval(tx, int)
paste(round( tx/int[div], 2), c("%","", "K","M","B","T")[div] )
}
>tx
total_reads total_bases q20_rate q30_rate gc_content
3.504660e+05 1.051398e+08 6.648160e-01 4.810370e-01 5.111660e-01
> compress(tx)
[1] "350.47 K" "105.14 M" "66.48 %" "48.1 %" "51.12 %"
This might be useful to similar problem
Similar to @Alex Poklonskiy, I needed a formatter for charts. But I needed a version that supports negative numbers as well. This is his adjusted function (I'm not an expert in R programming though):
number_format <- function(x, digits = NULL)
{
intl <- c(1e3, 1e6, 1e9, 1e12)
suffixes <- c(' K', ' M', ' B', ' T')
i <- findInterval(x, intl)
i_neg <- findInterval(-x, intl)
result <- character(length(x))
# Note: for ggplot2 the last label element of x is NA, so we need to handle it
ind_format <- !is.na(x) & i > 0
neg_format <- !is.na(x) & i_neg > 0
# Format only the elements that need to be formatted
# with suffixes and possible rounding
result[ind_format] <- paste0(
formatC(x[ind_format] / intl[i[ind_format]], format = "f", digits = digits),
suffixes[i[ind_format]]
)
# Format negative numbers
result[neg_format] <- paste0(
formatC(x[neg_format] / intl[i_neg[neg_format]], format = "f", digits = digits),
suffixes[i_neg[neg_format]]
)
# To the rest only apply rounding
result[!ind_format & !neg_format] <- as.character(
formatC(x[!ind_format & !neg_format], format = "f", digits = digits)
)
return(invisible(result))
}
I also adjusted that the digits argument is used to round values which do not get a suffix (e.g. 1.23434546)
Example usage:
> print( number_format(c(1.2325353, 500, 132364584563, 5.67e+9, -2.45e+7, -1.2333, -55)) )
[1] "1.2325" "500.0000" "132.3646 B" "5.6700 B" "-24.5000 M" "-1.2333" "-55.0000"
> print( number_format(c(1.2325353, 500, 132364584563, 5.67e+9, -2.45e+7, -1.2333, -55), digits = 2) )
[1] "1.23" "500.00" "132.36 B" "5.67 B" "-24.50 M" "-1.23" "-55.00"
Since the release of scales 1.2.0, label_number_si is now deprecated. You can use cut_short_scale instead:
library(scales)
inp <- c(6000000, 75000400, 743450000, 340000, 4300000)
label_number(scale_cut = cut_short_scale())(inp)
#[1] "6.0M" "75.0M" "743.4M" "340K" "4.3M"
Use space = TRUE to add space between the number and the unit:
label_number(scale_cut = cut_short_scale(space = TRUE))(inp)
#[1] "6.0 M" "75.0 M" "743.4 M" "340 K" "4.3 M"
Another option with scales package would be to use unit_format:
inp <- c(6000000, 75000400, 743450000, 340000, 4300000)
scales::unit_format(unit = 'M', scale = 1e-6)(inp)
# "6.0 M" "75.0 M" "743.4 M" "0.3 M" "4.3 M"