I have a data frame that looks something like this
user booking_date origin destination  price  sale_channel
1 user5   2018-11-01    MAD         PMI  58.20        online
2 user7   2018-11-01    DUB         MAD 147.50        online
3 user4   2018-11-02    TFS         MAD  24.05        online
4 user7   2018-11-01    LPA         MAD  37.30   call center
5 user1   2018-11-01    AMS         MAD 149.74 travel agency
6 user1   2018-11-01    MAD         PMI  19.95        online
Now I want to:
- Filter by a specific date (2018-11-02) which I've done by
df <- df[df$booking_date == '2018-11-02',]
- Group by the user and sum u their total spending on tickets.
I've tried a couple of approaches using group_by or aggregate but all I managed to get was an additional column but not grouping or aggregating the value per user.
- My final output would idealy by something enabling me to extract the 10 users that spend the most on tickets like this for example:
'user1' 'user10' 'user 7' etc.
Example Data
structure(list(user = c("user4", "user5", "user3", "user10", 
"user1", "user2", "user7", "user6", "user5", "user6", "user6", 
"user7", "user1", "user7", "user4", "user4", "user1", "user7", 
"user7", "user8", "user4", "user10", "user4", "user8", "user3", 
"user9", "user5", "user2", "user5", "user3", "user3", "user9", 
"user6", "user10", "user9", "user5", "user3", "user5", "user7", 
"user9", "user2", "user2", "user7", "user10", "user7", "user3", 
"user1", "user2", "user8", "user6", "user6", "user10", "user4", 
"user7", "user4", "user1", "user4", "user2", "user1", "user7", 
"user5", "user4", "user4", "user7", "user10"), booking_date = structure(c(17837, 
17837, 17837, 17837, 17837, 17837, 17837, 17837, 17837, 17837, 
17837, 17837, 17837, 17837, 17837, 17837, 17837, 17837, 17837, 
17837, 17837, 17837, 17837, 17837, 17837, 17837, 17837, 17837, 
17837, 17837, 17837, 17837, 17837, 17837, 17837, 17837, 17837, 
17837, 17837, 17837, 17837, 17837, 17837, 17837, 17837, 17837, 
17837, 17837, 17837, 17837, 17837, 17837, 17837, 17837, 17837, 
17837, 17837, 17837, 17837, 17837, 17837, 17837, 17837, 17837, 
17837), class = "Date"), origin = c("TFS", "MAD", "TFN", "MAD", 
"LYS", "LPA", "MAD", "MAD", "AMS", "MAD", "MAD", "MAD", "MAD", 
"LPA", "MAD", "MAD", "MAD", "TXL", "MAD", "MAD", "TXL", "TXL", 
"TFS", "MAD", "NAP", "TFS", "PMI", "TLS", "TFS", "NTE", "AMS", 
"FUE", "TFN", "CPH", "TFN", "MAD", "SVQ", "SCQ", "SVQ", "MAD", 
"PMI", "MAD", "PMI", "MAD", "MAD", "MAD", "MAD", "MAD", "SVQ", 
"NCE", "CDG", "MAD", "MAD", "MAD", "MAD", "MAD", "TFN", "LGW", 
"LGW", "MAD", "TFN", "MAD", "RNS", "AGP", "CDG"), destination = c("MAD", 
"DUB", "MAD", "TFS", "MAD", "MAD", "NAP", "TLS", "MAD", "SCQ", 
"LPA", "TFN", "TXL", "MAD", "TFN", "SVQ", "ACE", "MAD", "TLS", 
"SCQ", "MAD", "MAD", "MAD", "LPA", "MAD", "MAD", "MAD", "MAD", 
"MAD", "MAD", "MAD", "MAD", "MAD", "MAD", "MAD", "CAG", "MAD", 
"MAD", "MAD", "LPA", "MAD", "CDG", "MAD", "LPA", "TFS", "TFN", 
"PMI", "NAP", "MAD", "MAD", "MAD", "LPA", "LGW", "LPA", "CDG", 
"SPC", "MAD", "MAD", "MAD", "SCQ", "MAD", "SVQ", "MAD", "MAD", 
"MAD"), price = c(24.0499992371, 41.5400009155, 251.199996948, 
15.6000003815, 44.0099983215, 73.8499984741, 115.470001221, 69.4400024414, 
81.3899993896, 15.2399997711, 41.1199989319, 274.559997559, 150, 
29.3199996948, 332.440002441, 94.9100036621, 97.9800033569, 55.1199989319, 
81.7399978638, 4.86000013351, 39.0299987793, 53.6300010681, 39.3199996948, 
114.559997559, 65.4000015259, 96.2900009155, 41.75, 28.9099998474, 
25.1900005341, 14, 50.3100013733, 47.2999992371, 53.8199996948, 
91.3199996948, 77.6800003052, 17.8099994659, 96.5400009155, 27.6900005341, 
34.1399993896, 34.3300018311, 15.5600004196, 158.449996948, 45.2999992371, 
36.8100013733, 50.6800003052, 62.9000015259, 13.7399997711, 49.4399986267, 
66.1100006104, 95.4400024414, 41.8400001526, 69.8300018311, 60.6599998474, 
34.3300018311, 97.1399993896, 84.3099975586, 25.8099994659, 185.899993896, 
34.8899993896, 304.380004883, 15.1300001144, 29.1399993896, 133.529998779, 
208.910003662, 152.960006714), sale_channel = c("online", "online", 
"travel agency", "online", "online", "online", "travel agency", 
"online", "travel agency", "travel agency", "travel agency", 
"online", "travel agency", "online", "online", "travel agency", 
"online", "travel agency", "travel agency", "call center", "online", 
"online", "online", "travel agency", "travel agency", "travel agency", 
"online", "online", "online", "online", "online", "travel agency", 
"travel agency", "online", "travel agency", "call center", "online", 
"travel agency", "online", "online", "travel agency", "travel agency", 
"online", "travel agency", "online", "online", "online", "travel agency", 
"online", "travel agency", "travel agency", "online", "online", 
"online", "online", "online", "online", "online", "online", "travel agency", 
"travel agency", "travel agency", "online", "travel agency", 
"online"), total = c(876.0300006858, 250.5000009536, 540.3500022886, 
420.1500110628, 424.9299983977, 512.1099882118, 1233.9500045785, 
316.9000034332, 250.5000009536, 316.9000034332, 316.9000034332, 
1233.9500045785, 424.9299983977, 1233.9500045785, 876.0300006858, 
876.0300006858, 424.9299983977, 1233.9500045785, 1233.9500045785, 
185.52999830291, 876.0300006858, 420.1500110628, 876.0300006858, 
185.52999830291, 540.3500022886, 255.6000022889, 250.5000009536, 
512.1099882118, 250.5000009536, 540.3500022886, 540.3500022886, 
255.6000022889, 316.9000034332, 420.1500110628, 255.6000022889, 
250.5000009536, 540.3500022886, 250.5000009536, 1233.9500045785, 
255.6000022889, 512.1099882118, 512.1099882118, 1233.9500045785, 
420.1500110628, 1233.9500045785, 540.3500022886, 424.9299983977, 
512.1099882118, 185.52999830291, 316.9000034332, 316.9000034332, 
420.1500110628, 876.0300006858, 1233.9500045785, 876.0300006858, 
424.9299983977, 876.0300006858, 512.1099882118, 424.9299983977, 
1233.9500045785, 250.5000009536, 876.0300006858, 876.0300006858, 
1233.9500045785, 420.1500110628)), row.names = c(3L, 
26L, 37L, 42L, 48L, 82L, 89L, 100L, 112L, 124L, 133L, 144L, 148L, 
150L, 166L, 167L, 173L, 182L, 217L, 243L, 259L, 285L, 300L, 304L, 
306L, 336L, 341L, 366L, 388L, 397L, 413L, 417L, 423L, 452L, 457L, 
473L, 474L, 478L, 482L, 483L, 486L, 496L, 499L, 504L, 510L, 513L, 
529L, 531L, 558L, 605L, 615L, 628L, 629L, 664L, 669L, 672L, 684L, 
722L, 730L, 752L, 766L, 767L, 779L, 804L, 819L), class = c("data.table", 
"data.frame"))
Any recommendations on how to approach this?
Edit: Final Solution
df <- df[df$booking_date == '2018-11-02',]
total <- aggregate(price ~ user, df, sum)
top_10 <- total[order(total$price, decreasing = T), ]
top_10[1:5, 1]
That gives me
"user7" "user4" "user3" "user2" "user1"
 
    