I'm still very new at stack overflow so please let me know if there is a better way to include data or other formatting issues with my question. Thanks!
I have 2 data frames. One contains a single row of data that i need which has a unique reference number.
I need to merge the Ph and Dissolved02 from the first data frame into the one with latitude and longitude. But I only want to the values from the last row of each unique reference number, or in other words the deepest pH and Dissolved02 values. The final data frame will only have one occurrence of each reference number. A sample of each data frame can be created with the following code (maybe a much easier way to input data into stack overflow?)...
sample.df <- readLines(textConnection("BBM2008050101    0.2 B   24.8    52.1    8.2 34.3    6.1 
BBM2008050101   1.0 B   24.8    52.4    8.2 34.5    6.1 
BBM2008050101   1.4 B   24.8    52.4    8.2 34.5    6.1 
BBM2008050102   0.2 B   24.5    53.0    8.1 35.0    6.3 
BBM2008050102   1.0 B   24.5    53.0    8.1 34.9    6.0 
BBM2008050102   1.6 B   24.5    53.0    8.1 35.0    5.9 
BBM2008050103   0.2 B   24.9    51.1    8.2 33.5    6.1 
BBM2008050103   1.0 B   24.9    51.1    8.2 33.5    6.1 
BBM2008050103   1.6 B   24.9    51.1    8.2 33.5    6.1 
BBM2008050104   0.2 B   25.1    51.4    8.2 33.8    6.7 
BBM2008050104   1.0 B   25.1    51.4    8.2 33.8    6.5 
BBM2008050104   1.6 B   25.1    51.4    8.2 33.8    6.5 
BBM2008050105   0.2 B   24.9    51.9    8.1 34.1    7.7 
BBM2008050105   1.0 B   24.9    51.9    8.2 34.1    7.9 
BBM2008050106   0.2 B   25.4    51.1    8.3 33.5    7.0 
BBM2008050106   1.0 B   25.4    51.1    8.3 33.5    6.5 
BBM2008050106   2.0 B   25.4    51.1    8.3 33.5    6.5 
BBM2008050106   2.3 B   25.4    51.1    8.3 33.5    6.4 "))
sample.df <- strsplit(sample.df,"[[:space:]]+")
max.len <- max(sapply(sample.df, length))
corrected.list <- lapply(sample.df, function(x) {c(x, rep(NA, max.len - length(x)))})
df <- do.call(rbind, corrected.list)
colnames(df) <- c("Reference",  "Depth",    "Beg_end",  "Temperature",  "Conductivity", "pH",   "Salinity", "DissolvedO2")
df <- as.data.frame(df)
sample.df2 <- readLines(textConnection("BBM2008050101   301 -83.44165   29.637633   1.6 D
BBM2008050102   301 -83.439717  29.630233   1.8 D
BBM2008050103   301 -83.434017  29.605567   1.8 D
BBM2008050104   301 -83.440067  29.596267   1.8 D
BBM2008050105   301 -83.4346    29.592667   1.2 D
BBM2008050106   300 -83.44555   29.596917   2.5 D"))
sample.df2 <- strsplit(sample.df2,"[[:space:]]+")
max.len2 <- max(sapply(sample.df2, length))
corrected.list2 <- lapply(sample.df2, function(x) {c(x, rep(NA, max.len2 - length(x)))})
df2 <- do.call(rbind, corrected.list2)
colnames(df2) <- c("Reference", "Gear", "Longitude",    "Latitude", "StartDepth",   "Zone")
df2 <- as.data.frame(df2)
Output would be sample.df3 with the deepest Ph and Dissolved02 columns added. Like below but obviously my data frame is much larger and I cannot do this manually.
sample.df3 <- readLines(textConnection("BBM2008050101   301 -83.44165   29.637633   1.6 D  8.2  6.1
BBM2008050102   301 -83.439717  29.630233   1.8 D  8.1  5.9
BBM2008050103   301 -83.434017  29.605567   1.8 D  8.2  6.1
BBM2008050104   301 -83.440067  29.596267   1.8 D  8.2  6.5
BBM2008050105   301 -83.4346    29.592667   1.2 D  8.2  7.9
BBM2008050106   300 -83.44555   29.596917   2.5 D  8.3  6.4"))
sample.df3 <- strsplit(sample.df3,"[[:space:]]+")
max.len3 <- max(sapply(sample.df3, length))
corrected.list3 <- lapply(sample.df3, function(x) {c(x, rep(NA, max.len3 - length(x)))})
df3 <- do.call(rbind, corrected.list3)
colnames(df3) <- c("Reference", "Gear", "Longitude",    "Latitude", "StartDepth",   "Zone", "pH", "Dissolved02")
df3 <- as.data.frame(df3)
 
     
    