TidyR
is commonly presented using the operator %>%
, which comes from an earlier package, magrittr
apply(data,1,function)
data %>% apply(1,function)
spread
function converts from long data to wide dataspread
function isspread(data,key,value)
library(DSR)
long <- table2
extra_wide_cases <- table4
combined <- table5
print(table2)
library(tidyr)
print(as.data.frame(spread(long,key,value)))
gather(data, COLUMN_NAME1, COLUMN_NAME2, cols_to_gather)
#print(extra_wide_cases)
gathered_cases <- extra_wide_cases %>% gather("Year","Cases",2:3)
print(gathered_cases)
separate(data,col_to_separate,new_columns)
unite(data,col_to_add, from_columns)
print(table5)
all_good <- table5 %>% unite("year",c("century","year"),sep="") %>%
separate("rate",c("cases",'population'),sep="/")
print(all_good)
plyr
, which allowed easy manipulation of datalibrary(dplyr)
starwars <- as.data.frame(starwars)
row.names(starwars) <- starwars$name
head(starwars)
## Standard Boring Select
select(starwars,hair_color,skin_color, eye_color)
## Select with Pipes and Ends_with
starwars %>% select(ends_with('color'))
starwars %>% select(-name)
starwars %>% filter(species != "Human")
starwars %>% filter(species %in% c('Wookiee','Ewok'))
mutate
and transmute
functions are used to add new variables as well as update existing onesmutate
does not drop old variablestransmute
drops everything except those in the function callstarwars %>% mutate( height_inches = height * 0.393701)
starwars %>% transmute( height_inches = height * 0.393701)
starwars %>% filter(species %in% c('Wookiee','Ewok')) %>%
mutate( height = height * 0.393701)
summarize
takes in as its parameters other functions that do the calculationssummarize
count
function and pass a column to countprint(starwars %>% summarize(n_distinct(species)))
species_counts <- starwars %>% count(species)
print(as.data.frame(species_counts))
species_counts <- starwars %>% count(species,sort=TRUE)
print(as.data.frame(species_counts))
species_counts <- starwars %>% count(species,homeworld,sort=TRUE)
print(as.data.frame(species_counts))
group_by
function allows rows to be grouped based on their values in the given columns or columnsgroup_by(data,LIST_OF_COLUMNS)
print(starwars %>% group_by(species,homeworld) %>%
summarize(avg_height = mean(height)))
print(starwars %>%
group_by(species,homeworld) %>%
summarize(avg_height = mean(height),
min_height=min(height)))
join
functions offer database like functionalitybind_rows
and bind_columns
offer a simpler concatenation style combinationprint(band_members)
print(band_instruments)
print(full_join(band_members,band_instruments))
print(inner_join(band_members,band_instruments))
print(left_join(band_members,band_instruments))
print(right_join(band_members,band_instruments))
print(band_instruments2)
print(full_join(band_members,band_instruments2,
by=c("name" = "artist")))
print(bind_cols(band_members,band_members))
print(bind_rows(band_members,band_instruments))
ggplot2
is a widely used package that standardizes how graphs are createdggplot
function¶ggplot
function sets up the basics for our graph, including which data frame to use, and how to use itggplot(data_frame,aes(AESTHETICS))
library(ggplot2)
ggplot(starwars,aes(x=height,y=mass))
ggplot
function sets up the graph and creates a ggplot object, but doesn't produce anything visuallyggplot(starwars,aes(x=height,y=mass)) + geom_point()
ggplot(starwars,aes(x=height,y=mass)) + geom_histogram()
ggplot(starwars) + geom_histogram(aes(height)) +
geom_histogram(aes(mass))
ggplot(starwars) + geom_density(aes(height),fill="blue",alpha=0.3) +
geom_density(aes(mass))
ggplot(starwars,aes(x=height,y=mass,color=species)) +
geom_point()
interesting <- (starwars %>%
filter(!is.na(species)) %>%
group_by(species) %>%
summarize(count = n()) %>%
filter(count > 2))$species
print(interesting)
to_vis <- starwars %>%
filter(species %in% interesting)
base_plot <- ggplot(to_vis,aes(x=species,fill=species,y=height))
base_plot + geom_violin()
ggplot
has a function for almost every aspect of a graphs appearancescale_
functionsbase_plot2 <- ggplot(to_vis,aes(x=mass,y=height,color=species))
scatter <- base_plot2 + geom_point()
plot(scatter)
scatter + ggtitle("Height vs Mass of Starwars Characters")
scatter + labs(title="Height vs Mass of Starwars Characters",
x="Mass (kg)",y="Height (cm)")
scatter + labs(title="Height vs Mass of Starwars Characters",
x="Mass (kg)",y="Height (cm)") + xlim(0,175) +
ylim(0,240)
scatter + labs(title="Height vs Mass of Starwars Characters",
x="Mass (kg)",y="Height (cm)") + xlim(0,175) +
guides(color=guide_legend(title="Species"))
scatter + labs(title="Height vs Mass of Starwars Characters",
x="Mass (kg)",y="Height (cm)") + xlim(0,175) +
guides(color=guide_legend(title="Species")) +
scale_color_brewer(palette = "Set1")
theme
function and passing the appropriate parameters+ theme_NAME
library(ggthemes)
almost_finished <- scatter +
labs(title="Height vs Mass of Starwars Characters",
x="Mass (kg)",y="Height (cm)") +
xlim(0,175) + guides(color=guide_legend(title="Species"))
almost_finished + theme_fivethirtyeight()
almost_finished + theme_wsj()
almost_finished + theme_economist()
almost_finished + theme_tufte()
Facet Grids allow us to create "mini" plots, per categorical variable
After setting up your plot as your normally would, you add in the facet_grid()
facet_grid(ROWS ~ COLUMNS)
almost_finished + facet_grid(. ~ eye_color)
almost_finished + facet_grid(hair_color ~ .)
almost_finished + facet_grid(hair_color ~ eye_color)
ggsave
function by default will save the last plot to a given file locationggsave(file_name, plot = plot_var)
my_final_plot <- almost_finished + theme_fivethirtyeight()
ggsave("final_plot.pdf",dpi=600,width=10)