Dplyr aims to provide a function for each basic verb of data manipulation:
Interface for operations on:
Data:
timestamp | metrics | value | |
---|---|---|---|
2017-07-03 20:08:02 | Consumer Record | 0.0e+00 | |
2017-07-03 20:08:02 | Consumer Metrics | 4.0e+04 | |
2017-07-03 20:08:02 | Buffer Ratio | 0.0e+00 | |
2017-07-03 20:08:07 | Buffer Size | 3.0e+00 | |
2017-07-03 20:08:07 | Buffer MaxSize | 4.0e+04 | |
2017-07-03 20:08:07 | Buffer Ratio | 7.5e-03 | |
.. | .. | .. |
loaderMetrics %>%
filter(metrics %like% "Consumer(.*)Records") %>%
mutate(timestamp, metrics = "Consumer Total", value) %>%
group_by(timestamp, metrics) %>%
summarize(value=sum(value))
Mutate adds new variables and preserves existing
mutate(dataFrame,NewColumnName = ColumnName/1024/1024)
#
mutate(dataFrame$value = ifelse(boolean, updateValue, dataFrame$value)
)
Transmute drops existing variables.
by <- group_by(data.frame, column1, column2)
summarise(by,
count = n()
)
# And
filter(colName == "foo" & colName2 == "bar")
# Or
filter(colName == "foo" | colName2 == "bar")
# Not Equal
filter(colName != "foo" )
# In
filter(colName %in% c("foo", "bar"))
#Not
filter(!colName %in% c("foo", "bar"))
#Na (Boolean)
filter(is.na(colName))
# Like with grepl
filter(grepl("foo",colName))
# %like% in data.table()
library(data.table)
filter(colName %like% "foo")
count(dataframe)
arrange(dataframe, desc(colName))