R
supports three different types of objects, all declared and used in different waysR
S
languagestructure
function and assign the results to a variableclass
function to give an existing variable a class attributemy_first_instance <- structure(1:5,class="specialVector")
print(my_first_instance)
print(str(my_first_instance))
my_second_instance <- list(a_member = 2, another= "A String")
print(my_second_instance)
class(my_second_instance) <- "listClass"
print(str(my_second_instance))
structure
or class
inside of a functionclass_name <- function(parameters){
structure(list(parameters),class="class_name")
}
vehicle <- function(n_wheels,color){
structure(list(m_n_wheels = n_wheels, m_color = color ),
class="vehicle")
}
myCar <- vehicle(4,'black')
print(class(myCar))
child_class <- function(parameters)
{
self <- parent_class(parameters)
class(self) <- append("child_class",class(self))
self
}
car <- function(color){
self <- vehicle(4,color)
class(self) <- append("car",
class(self))
self
}
my_new_car <- car('black')
print(class(my_new_car))
R
uses a style of OOP known as genericst(df) # actually t.data.frame(df)
mm <- as.data.frame(matrix(1:20,ncol=4))
print(t(mm))
print(t.data.frame(mm))
print(t)
print(t.data.frame)
UseMethod
function denotes that this function should actually dispatch to a more appropriate function, based on the object that was passed int
might look liket <- function(obj){
UseMethod("t")
}
function_name.class_name
function_name.default
can be defined to be run in the event no match is foundprint(my_new_car)
print.vehicle <- function(x)
{
"My vehicle is " % % x[['m_color']] % % "in color and has" % % x$m_n_wheels % % "wheels."
}
print(my_new_car)
#print.vehicle <- print.default
rm(print.vehicle)
print(my_new_car)
makeNoise <- function(x){
print(class(x))
UseMethod("makeNoise")
}
makeNoise.vehicle <-function(x){
"Generic Vehicle Noise"
}
makeNoise.car <- function(x){
"BEEP BEEP"
}
makeNoise.default <- function(x){
"You can't make a noise"
}
print(makeNoise(myCar))
print(makeNoise(my_new_car))
print(makeNoise("Random String"))
new
functionslots
in R
)contains
keywordR
R
is done using the various table
functionstable
returns a table
object, which may be converted to a data frame for easier queryingftable
strings <- c("Yes","Yes","No","Maybe","OK","Yes")
print(table(strings))
library(vcd)
head(Bundesliga)
print(table(Bundesliga$HomeTeam))
homeGames <- table(Bundesliga$HomeTeam)
print(head(homeGames[order(-homeGames)]))
## How do we get the total number of games played?
away_games <- table(Bundesliga$AwayTeam)
all_games <- away_games + homeGames
print(head(all_games[order(-all_games)]))
print(head(table(Bundesliga$HomeTeam,Bundesliga$AwayTeam)))
table
function can only count things, it won't bin numbers for uscut
function converts numeric data into factorsright
controls which side is open and which is closedprint(max(Bundesliga$HomeGoals))
FactorGoals <- cut(Bundesliga$HomeGoals,3,right=FALSE)
print(table(FactorGoals))
print(head(table(Bundesliga$HomeTeam,FactorGoals)))
goalsByTeam <- as.data.frame(table(Bundesliga$HomeTeam,FactorGoals))
print(head(goalsByTeam))
goalsByTeam <- as.data.frame.matrix(table(Bundesliga$HomeTeam,FactorGoals))
print(head(goalsByTeam))
print(order(-goalsByTeam[3]))
print(head(goalsByTeam[order(-goalsByTeam[3]),]))
R
mean
median
sd
- Standard Deviationmax
min
print(paste("Our dataset includes the years from",
min(Bundesliga$Year),"to",max(Bundesliga$Year)))
print(mean(Bundesliga$AwayGoals))
print(mean(Bundesliga$HomeGoals))
print(sd(Bundesliga$AwayGoals))
print(sd(Bundesliga$HomeGoals))
sumAway <- summary(Bundesliga$AwayGoals)
print(class(sumAway))
print(sumAway)
print(summary(Bundesliga$HomeGoals))
rowMeans
and colMeans
apply
function, which applies a function over an object across a given margin(sometimes called an axis)apply(OBJECT,AXIS,FUNCTION)
library(psych)
#print(dim(iqitems))
#print(head(iqitems))
iqitems[is.na(iqitems)] <- 0
print(mean(as.matrix(iqitems)))
print(apply(iqitems,2,mean))
R
using the cor
method, and passing the corresponding string to the method
parameterprint(cor(Bundesliga$HomeGoals, Bundesliga$AwayGoals,method="spearman"))
## Not really useful because its comparing ranks, but this is how it is called
print(cor(Bundesliga$HomeGoals, Bundesliga$AwayGoals,method="kendall"))
R
also comes built in with numerous exploratory data techniquesprcomp
pca <- prcomp(iqitems)
print(pca$x)
kmeans
function produces k-clusters by using attributes of datakmeans
object is returnedclusters <- kmeans(iqitems,10)
print(clusters)
print(str(clusters))
print(clusters$cluster)
#clusters$cluster[clusters$cluster==2]
head(iqitems[names(clusters$cluster[clusters$cluster==2]),])
R
is performed using the lm
functionlm
is the first function we are looking at that takes as an argument a formulalm(formula, data = DATAFRAME)
dependent_var ~ independent_vars
+
*
head(iris)
model1 <- lm(Sepal.Length ~ Sepal.Width + Petal.Length, data = iris)
summary(model1)
model2 <- lm(Sepal.Length ~ Sepal.Width * Petal.Length, data = iris)
summary(model2)
model3 <- lm(Sepal.Length ~ Sepal.Width * Petal.Length * Species, data = iris)
summary(model3)
aov
anova
model4 <- aov(Sepal.Length ~ Sepal.Width * Petal.Length * Species,
data = iris)
print(summary(model4))
print(anova(model3))
R
has a very robust package ecosystemR
, use the install.packages
function, and pass the name of the function you want to install library(PACKAGE_NAME) #No QUOTES
?
operatorinstall.packages
?TidyR
is commonly presented using the operator %>%
, which comes from an earlier package, magrittr
apply(data,1,function)
data %>% apply(1,function)
spread
function converts from long data to wide dataspread
function isspread(data,key,value)
library(DSR)
long <- table2
extra_wide_cases <- table4
combined <- table5
library(tidyr)
print(as.data.frame(spread(long,?,?)))
gather(data, COLUMN_NAME1, COLUMN_NAME2, cols_to_gather)
gathered_cases <- extra_wide_cases %>% gather("Year","Cases",2:3)
print(gathered_cases)
separate(data,col_to_separate,new_columns)
unite(data,col_to_add, from_columns)
print(combined)
all_good <- combined %>% unite("year",?) %>% separate(?,?)
print(all_good)