dplyr

加载示例数据

library(dplyr)
library(nycflights13)
head(flights)

数据筛选

0.1.1 筛选出 1 月 1 日的数据

filter(flights, month == 1, day == 1)

0.1.2 筛选出大于 6 月的数据

filter(flights, month > 6)

0.1.3 筛选出大于 6 月的数据

filter(flights,month == 1 | month == 2)

数据排序

0.1.1 按照年月日有限度进行降序重排

arrange(flights,year,month,day)

0.1.2 按照 arr_delay 字段进行降序重拍

arrange(flights, desc(arr_delay))

数据筛选并剔除

0.1.1 筛选前几行功能

select(flights, year, month, day)
select(flights, year:day)

0.1.2 按照列对重复行进行筛选

distinct(select(flights, origin, dest))

数据变形

0.1.1 生成了新的列变量

mutate(flights,gain = arr_delay - dep_delay, speed = distance/air_time * 60)

0.1.2 已生成变量

transform(flights,
arr_delay = month - day,
gain_per_hour = arr_delay / (month / 60))

0.1.3 只保留生成的行列

transmute(flights,
gain = arr_delay - dep_delay,
gain_per_hour = gain / (air_time / 60))

数据汇总

summarise(flights, delay = mean(dep_delay, na.rm = TRUE))

数据分组

by_tailnum <- group_by(flights, tailnum)
delay <- summarise(by_tailnum,
count = n(),
dist = mean(distance, na.rm = TRUE),
delay = mean(arr_delay, na.rm = TRUE))
delay <- filter(delay, count > 20, dist < 2000)

其他

by_tailnum <- group_by(flights, tailnum)
delay <- summarise(by_tailnum,
count = n(),
dist = mean(distance, na.rm = TRUE),
delay = mean(arr_delay, na.rm = TRUE))
delay <- filter(delay, count > 20, dist < 2000)
Posted in R

发表回复

您的邮箱地址不会被公开。 必填项已用 * 标注