R
#!/usr/bin/Rscript
R
supports two assignment operators: <-
and =
<-
is preferred<-
Can be reversed to be written as ->
but this is not normally donea <- 1
b = 1
1 -> c
a == b
b == c
R
are often found instead of underscoresa.long.name <- "String"
c, q, s, t, C, D, F, I, T
aLongName <- 0
a_long_name <- 0
a.long.name <- 0
print(aLongName)
print(a_long_name)
print(a.long.name)
R
has data types, and they are important, but they take a back seat to the data structuresR
vector
s num <- 1
print(num)
string <- "String"
print(string)
bool <- TRUE
print(bool)
R
are:#Integers must be denoted by appending "L" to the number
#Otherwise they will be interpreted as a double by default
int <- 1L
#typeof() function returns the type as a string
print(typeof(1L))
print(typeof(1))
float.a <- 1
float.b <- 1.01
print(typeof(float.a))
print(typeof(float.b))
#Infinity and Not-a-Number are both represnted as doubles
float.c <- NaN
float.d <- Inf
float.e <- -Inf
print(typeof(float.c))
print(typeof(float.d))
print(typeof(float.e))
imaginary.a <- 1 + 1i
imaginary.b <- 1 + 0i
print(typeof(imaginary.a))
print(typeof(imaginary.b))
string.example.1 <- "String"
string.example.2 <- 'String'
print(typeof(string.example.1))
print(typeof(string.example.2))
string.example.2 <- 1
print(typeof(string.example.2))
#Logical values are typed in all uppercase letters
logic.t <- TRUE
logic.f <- FALSE
print(typeof(logic.t))
print(typeof(logic.f))
R
has numerous predicate functions relating to data typesis.DATA_TYPE_NAME(x)
is.integer(x)
is.numeric(x)
print(int)
print(is.integer(int))
print(is.double(int))
print(is.numeric(int))
print(is.numeric("1"))
as
functionas.DATA_TYPE_NAME(x)
as.integer(1.003)
R
, not just with primitive data typesprint(as.character(1L))
print(as.integer(1.0004))
print(as.integer(Inf))
print(as.double(1L))
print(as.complex(1))
print(as.numeric(TRUE))
R
can be described by the number of dimensions supported, and the data types allowedHomogeneous | Heterogeneous | |
---|---|---|
1-D | Vector | List |
2-D | Matrix | DataFrame |
N-D | Array |
c
functiona.vector <- c(1,2,3,4)
c
function, they will be coerceda.vector <- c(1,2,3,4)
print(a.vector)
a.vector <- c(1.001,2,3,4)
print(a.vector)
a.vector <- c(1.01,TRUE,3,4)
print(a.vector)
a.vector <- c(TRUE,"a",3,4)
print(a.vector)
factor
function, or converting an existing vector by using as.factor
factor.1 <- factor(c("UMBC","UMCP","UMUC","UMB","UB"))
print(factor.1)
cat("\n")
factor.2 <- factor(c("Senior","Junior","Senior",
"Junior","Sophmore"))
print(factor.2)
# Can use the levels keyword to specify all possible values
factor.3 <- factor(c("Senior","Junior","Senior",
"Junior","Sophmore"),
levels=c("Senior","Junior",
"Sophmore",'Freshman'))
print(factor.3)
cat("\n")
factor.4 <- as.factor(c("Senior","Junior",
"Senior","Junior","Sophmore"))
print(factor.4)
list
functiona.list <- list("a",2,3.14,FALSE)
a.list <- list("a", 2, 3.14, FALSE)
#The str function will show the structure of a variable
#str DOES NOT stand for string, it stands for structure
str(a.list)
print(a.list)
recursive.list <- list("a", 2, 3.14, list("re","cursive"))
str(recursive.list)
# If you try to use c recursively, there is no error
# Everything is just flattened
a.vector <- c(1,2,3,c(4,5))
str(a.vector)
#Applying c to an arguments including at least one list
#coerces the entire structure to a list
coerced.list <- c(1,2,3,list(4,5),list(6,7))
str(coerced.list)
attr(OBJECT, "ATTRIBUTE_NAME") <- ATTRIBUTE_VALUE
obj <- c(3,4,5,6)
print(attr(obj,"time_created"))
attr(obj,"time_created") <- date()
print(attr(obj,"time_created"))
cat("\n")
print(attributes(obj))
attr
functionnames(OBJECT) <- c(SERIES OF CHARACTERS)
VARIABLE <- c(a = 1, b = 2)
scores <- c(80,75,80,100,95,85)
names(scores) <- c("Regex HW","Regex Quiz",
"Shell HW","Shell Quiz",
"R HW", "R Quiz")
print(scores)
matrix
functioncbind
or rbind
functions# Using the Matrix Function
m <- matrix( c(1,2,3,4,5,6,7,8,9,10,11,12),
nrow=3, ncol=4 )
print(m)
cat("\n")
m2 <- matrix(1:12,ncol=4)
print(m2)
#Creating a matrix of zeros
zeros <- matrix(0,nrow=3,ncol=4)
print(zeros)
cat("\n")
print(dim(zeros))
#Adding Dimensions to an existing Vector
vec <- 1:12
print(vec)
print(dim(vec))
cat("\n")
dim(vec) <- c(3,4)
print(vec)
#Using cbind
m3 <- cbind(c(1,2,3),c(4,5,6),c(7,8,9),c(10,11,12))
print(m3)
cat("\n")
m4 <- rbind(c(1,4,7,10),c(2,5,8,11),c(3,6,9,12))
print(m4)
df <- data.frame(COL1 = c(VALUES FOR COL 1),
COL2 = c(VALUES FOR COl2), ...,
COL_N = c(VALUES FOR COL_N))
df <- data.frame(name=c("UMBC","UMCP","Towson"),
zipcode=c(21250,20742,21252),
undergrad=c(11142,28472,19596),
graduate=c(2498,10611,3109))
print(df)
nrow
returns the number of rows in the data framencol
and length
both return the number of columnsrow.names
functionprint(nrow(df))
print(ncol(df))
row.names(df) <- c('A','B','C')
print(df)
R
has many built in functions to read data files into data framesread.table
reads a space separated file by default, and is the base to many other functionsread.csv
reads a comma separated values file, is actually just a call to read.tableR
supports many other formats through various librariesforeign
which reads in data from many similar languages to R
usm <- read.table("data/usm.tsv",sep="\t",header=TRUE)
print(usm)
usm2 <- read.csv("data/usm.csv",row.names=1)
print(usm2)
R
similarly supports many different formats in which to write data to a filewrite.table
write.csv
col.names
or row.names
to FALSEwrite.csv(usm2,'data/usm2.csv')
write.csv(usm2,'data/usm2.csv',append=TRUE,col.names=FALSE)
write.table(usm2,'data/usm2.csv',sep=","
,append=TRUE,col.names=FALSE)
#Addition
print(1 + 1)
print(1 + 1.0)
print(1 + 1i + 2)
print(2 + 1 + 3i)
print(2 + 3i + 4 + 5i)
#Subtraction
print(3-2)
print(0-3)
#Multiplication
print(3 * 4)
print(3 * .12)
#Division
print(3/4)
print(0/4)
print(0/0)
print(3/0)
print(-3/0)
# Integer Division
print(3 %/% 4)
print(12 %/% 5)
print(3 %/% 0)
print(0 %/% 0)
#Modulus
print(3 %% 3)
print(10 %% 3)
print(0 %% 0)
print(3 %% 0)
print(3 ^ 3)
print(9 ^ 0.5)
print(10 ^ -2)
R
t
function#Vector / Scalar Math
vec <- 1:5
print(vec * 2)
print(vec / 10)
print(vec + 1)
#Vector addition
vec2 <- 10:15
print(vec + vec2)
vec2 <- 11:15
print(vec + vec2)
#Element-wise multiplication
print(vec * vec2)
cat("\n")
#Dot Product
print(vec %*% vec2)
#print(cvec,vec2))
#Matrix / Vector Operations
mat <- matrix(1:20,nrow=5)
print(mat)
print(mat / vec)
#Matrix / Vector Operations
mat2 <- matrix(1:20,nrow=4)
print(mat2)
print(mat2 / vec)
#DataFrame Operations
print(usm)
cat("\n")
print(usm * 2)
#Transposition
print(t(mat))
cat("\n")
#What is the datastructure returned by this function?
print(t(usm))
print(as.data.frame(t(usm)))
R
supports the standard boolean operators of <, >, <=, >=, == !=
&
and |
respectively##Standard Scalar Comparison
print(3 == 4)
print(3 < 4)
print(3 < 4 & 5 < 10)
print(3 == 4 | 4 != 4)
## Comparing Data Structures
print(vec)
print(vec2)
cat("\n")
print(vec == vec2)
print(vec < vec2)
#Vector and Matrix Comparison
print(vec)
print(mat)
cat("\n")
print(vec == mat)
print(vec)
print(vec[1])
print(vec[2:3])
print(vec[c(1,5)])
#Can repeat indices
print(vec[c(2,2)])
print(vec)
print(vec[-1])
print(vec[-2:-3])
print(vec[c(-1,-5)])
# Explicit Boolean Subsetting
print(vec)
print(vec[c(TRUE,FALSE,TRUE,FALSE,TRUE)])
cat("\n")
#Using an expression
print(vec[vec %% 2 == 0])
#Returns a list
li <- list(a=1,b=2,c=3,d=4,e=5)
print(li[2])
print(li[[2]])
print(li[['b']])
print(li$b)
idx <- 'b'
cat("\n")
print(li[[idx]])
print(li$idx)
print(mat)
cat("\n")
print(mat[5])
print(mat[5,])
print(mat[,4])
print(mat[5,4])
print(mat[c(5,4),])
print(usm[1])
cat("\n")
print(usm['Name'])
#This is a vector rather than a one column DF
print(usm$Name)
print(usm[usm['Undergraduate.Enrollment'] > 10000,])
cat("\n")
print(usm[usm['Undergraduate.Enrollment'] > 10000,'name'])
usm['total'] <- usm[3] + usm[4]
print(usm)
R
has excellent built in help capabilities?FUNCTION_NAME
??
function?read.table
read.table