I know the question specified dplyr
. But, since others already posted solutions using other packages, I decided to have a go using other packages too:
Base package:
df <- df[with(df, order(id, stopSequence, stopId)), ]
merge(df[!duplicated(df$id), ],
df[!duplicated(df$id, fromLast = TRUE), ],
all = TRUE)
data.table:
df <- setDT(df)
df[order(id, stopSequence)][, .SD[c(1,.N)], by=id]
sqldf:
library(sqldf)
min <- sqldf("SELECT id, stopId, min(stopSequence) AS StopSequence
FROM df GROUP BY id
ORDER BY id, StopSequence, stopId")
max <- sqldf("SELECT id, stopId, max(stopSequence) AS StopSequence
FROM df GROUP BY id
ORDER BY id, StopSequence, stopId")
sqldf("SELECT * FROM min
UNION
SELECT * FROM max")
In one query:
sqldf("SELECT *
FROM (SELECT id, stopId, min(stopSequence) AS StopSequence
FROM df GROUP BY id
ORDER BY id, StopSequence, stopId)
UNION
SELECT *
FROM (SELECT id, stopId, max(stopSequence) AS StopSequence
FROM df GROUP BY id
ORDER BY id, StopSequence, stopId)")
Output:
id stopId StopSequence
1 1 a 1
2 1 c 3
3 2 b 1
4 2 c 4
5 3 a 3
6 3 b 1