I checked the linked post and revised the idea a bit. I thought using apply() may not be a good idea for a large data set. So I rather used a data.table-related approach. First, I converted my sample data to a SpatialPointsDataFrame. Then, I split the data by a group variable (i.e.,group). As Eddie suggested, I utilized lapply() with data.table functions. When you use gDistance(), you have a two-dimensional vector as an output. I converted that to a data.table object so that following data process potentially goes faster. I reshaped the dt object with melt() and remove all data points with distance = 0. Finally, I took the first row for each Var1. Please note that Var1 here represents each row of the sample data, mydf. The last job was to add the new distance vector to the original data frame. I hope this will help you.
DATA
group user_id latitude longitude
1 B23 85553 -34.44011 172.6954
2 B23 85553 -34.43929 172.6939
3 B23 85553 -34.43929 172.6939
4 B23 85553 -34.43851 172.6924
5 B23 57357 -34.42747 172.6778
6 B23 57357 -34.42747 172.6778
7 B23 57357 -34.42747 172.6778
8 B23 98418 -34.43119 172.7014
9 B23 98418 -34.43225 172.7023
10 B23 98418 -34.43224 172.7023
11 B23 98418 -34.43224 172.7023
12 B24 57357 -34.43647 172.7141
13 B24 57357 -34.43647 172.7141
14 B24 57357 -34.43647 172.7141
15 B24 98418 -34.43904 172.7172
16 B24 98418 -34.43904 172.7172
17 B24 98418 -34.43904 172.7172
18 B24 98418 -34.43925 172.7168
19 B24 98418 -34.43915 172.7169
20 B24 98418 -34.43915 172.7169
21 B24 98418 -34.43915 172.7169
22 B24 98418 -34.43915 172.7169
CODE
library(sp)
library(rgeos)
library(data.table)
# Copy the original
temp <- mydf
#DF to SPDF
coordinates(temp) <- ~longitude+latitude
# Split the data by a group variable
mylist <- split(temp, f = temp$group)
#For each element in mylist, apply gDistance, reshape the output of
# gDistance and create a data.table. Then, reshape the data, remove
# rows with distance = 0. Finally, choose the first row for each
# variable. levels in variable represents rows in mydf.
out <- rbindlist(
lapply(mylist, function(x){
d <- setDT(melt(gDistance(x, byid = TRUE)))
setorder(d, Var1, value)
d <- d[value > 0]
d <- d[, .SD[1], by = Var1]
d
})
)
out <- cbind(mydf, distance = out$value)
# group user_id latitude longitude distance
#1 B23 85553 -34.44011 172.6954 1.743945e-03
#2 B23 85553 -34.43929 172.6939 1.661118e-03
#3 B23 85553 -34.43929 172.6939 1.661118e-03
#4 B23 85553 -34.43851 172.6924 1.661118e-03
#5 B23 57357 -34.42747 172.6778 1.836642e-02
#6 B23 57357 -34.42747 172.6778 1.836642e-02
#7 B23 57357 -34.42747 172.6778 1.836642e-02
#8 B23 98418 -34.43119 172.7014 1.369100e-03
#9 B23 98418 -34.43225 172.7023 1.456022e-05
#10 B23 98418 -34.43224 172.7023 1.456022e-05
#11 B23 98418 -34.43224 172.7023 1.456022e-05
#12 B24 57357 -34.43647 172.7141 3.862696e-03
#13 B24 57357 -34.43647 172.7141 3.862696e-03
#14 B24 57357 -34.43647 172.7141 3.862696e-03
#15 B24 98418 -34.43904 172.7172 3.245705e-04
#16 B24 98418 -34.43904 172.7172 3.245705e-04
#17 B24 98418 -34.43904 172.7172 3.245705e-04
#18 B24 98418 -34.43925 172.7168 1.393162e-04
#19 B24 98418 -34.43915 172.7169 1.393162e-04
#20 B24 98418 -34.43915 172.7169 1.393162e-04
#21 B24 98418 -34.43915 172.7169 1.393162e-04
#22 B24 98418 -34.43915 172.7169 1.393162e-04
DATA in dput()
mydf <- structure(list(group = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("B23",
"B24"), class = "factor"), user_id = c(85553L, 85553L, 85553L,
85553L, 57357L, 57357L, 57357L, 98418L, 98418L, 98418L, 98418L,
57357L, 57357L, 57357L, 98418L, 98418L, 98418L, 98418L, 98418L,
98418L, 98418L, 98418L), latitude = c(-34.440114, -34.43929,
-34.43929, -34.438507, -34.427467, -34.427467, -34.427467, -34.431187,
-34.432254, -34.43224, -34.43224, -34.436472, -34.436472, -34.436472,
-34.439038, -34.439038, -34.439038, -34.439246, -34.439149, -34.439149,
-34.439149, -34.439149), longitude = c(172.695443, 172.693906,
172.693906, 172.692441, 172.677763, 172.677763, 172.677763, 172.701413,
172.702284, 172.702288, 172.702288, 172.71411, 172.71411, 172.71411,
172.717203, 172.717203, 172.717203, 172.716798, 172.716898, 172.716898,
172.716898, 172.716898)), .Names = c("group", "user_id", "latitude",
"longitude"), row.names = c(NA, -22L), class = "data.frame")