path2data <- "../data/"
getwd() #get the current working directory
[1] "/Users/rick/github/psu-psychology/r-bootcamp/talks"
/Users/rick/github/psu-psychology/r-bootcamp/talks
#setwd("~/Dropbox/James Work Files/R Workshop/2017") #change the working directory
Since ~/Dropbox/James Work Files/R Workshop/2017
is specific to James’ computer, it won’t work for others. When using an RStudio project, I don’t change my working directory. Instead, I just make sure I give relevant functions information about the directories where other resources can be found.
# Can install by evaluating chunk, but not by "knitting"
install.packages("multilevel") #Downloading a package to my computer
#loading packages into working library
library("multilevel")
search()
detach(package:multilevel)
search()
#You may inquire about a function using any of the following:
##If you know the exact name:
?search
help(search)
##If want to search by part of the name
apropos("searc")
[1] "help.search" "hsearch_db" "hsearch_db_concepts"
[4] "hsearch_db_keywords" "RSiteSearch" "search"
[7] "searchpaths"
help.search
hsearch_db
hsearch_db_concepts
hsearch_db_keywords
RSiteSearch
search
searchpaths
??sear
Another good source of help is StackOverflow.
x <- 2
x
[1] 2
y = c(1:3); y
[1] 1 2 3
z = c("Porsche 911", "Porsche 944", "Porsche 911", "BMW 335xi")
z
[1] "Porsche 911" "Porsche 944" "Porsche 911" "BMW 335xi"
Porsche 911
Porsche 944
Porsche 911
BMW 335xi
g=sqrt(x); g
[1] 1.414214
is.numeric(x)
[1] TRUE
is.numeric(z)
[1] FALSE
#String Data as character:
z
[1] "Porsche 911" "Porsche 944" "Porsche 911" "BMW 335xi"
Porsche 911
Porsche 944
Porsche 911
BMW 335xi
#String Data as factor:
z2=factor(z)
z2
[1] Porsche 911 Porsche 944 Porsche 911 BMW 335xi
Levels: BMW 335xi Porsche 911 Porsche 944
#Compute the Length of a String (or Numeric) Variable:
nchar(x)
[1] 1
nchar(y)
[1] 1 1 1
nchar(y)
[1] 1 1 1
nchar(z)
[1] 11 11 11 9
#nchar(z2) Throws error during rendering
##Assumes values of TRUE or FALSE
###TRUE is considered equal to 1
###FALSE is considered equal to 0
TRUE*5
[1] 5
sqrt(TRUE)
[1] 1
t=TRUE
# you can test if a variable type is logical using:
is.logical(x)
[1] FALSE
is.logical(t)
[1] TRUE
# Logical data types also used as input to functions (see Day 2 examples)
2==2
[1] TRUE
2==3
[1] FALSE
#Vectors - 1 dimensional collections of same type data
v1=1:5; v1 #creating vector of numbers
[1] 1 2 3 4 5
v2=c(1,2,3,4,5); v2
[1] 1 2 3 4 5
v3=c("Porsche 911", "Ford Mustang GT", "Plymouth Baracuda", "Chevrolet Camaro", "Honda Pilot LX")
v1; v2; v3
[1] 1 2 3 4 5
[1] 1 2 3 4 5
[1] "Porsche 911" "Ford Mustang GT" "Plymouth Baracuda"
[4] "Chevrolet Camaro" "Honda Pilot LX"
Porsche 911
Ford Mustang GT
Plymouth Baracuda
Chevrolet Camaro
Honda Pilot LX
#Matrices - 2 dimensional collections of same type data
m=matrix(1:20, nrow=5); m
[,1] [,2] [,3] [,4]
[1,] 1 6 11 16
[2,] 2 7 12 17
[3,] 3 8 13 18
[4,] 4 9 14 19
[5,] 5 10 15 20
#Arrays - multidimensional collection of same type data
#example of 3D array
a=array(1:20, dim=c(2,5,2)); a
, , 1
[,1] [,2] [,3] [,4] [,5]
[1,] 1 3 5 7 9
[2,] 2 4 6 8 10
, , 2
[,1] [,2] [,3] [,4] [,5]
[1,] 11 13 15 17 19
[2,] 12 14 16 18 20
#Creating a data frame from vectors
eng=c("Flat-6", "V-8", "V-8", "V-8", "V-6")
doors=c(2,2,2,2,4)
data1=data.frame(v2, v3, eng, doors)
# Viewing content of data framees
# Look at the "enviroment" tab in the upper left panel
# Click on one of the data frames listed under Data (e.g., "data1")
# Or, simply type:
data1
# Obtain a list of the variable names in a data frame
names(data1)
[1] "v2" "v3" "eng" "doors"
v2
v3
eng
doors
# Change the names of the variables in a data frame
data2=data.frame(id=v2, model=v3, eng=eng, doors=doors) #creates a new data frame
data1
data2
data3=data1 #make a copy of the original dataframe
install.packages("plyr")
library(plyr)
data3=rename(data3, replace=c("v2"="id","v3" = "model")) #renames specific variables
data3
names(data1)=c("id","model", "eng", "doors") #replaces names of all variables in existing data frame
data1
##List of avaialble data sets
data()
library(multilevel)
Loading required package: nlme
Loading required package: MASS
#List data in the multilevel package
data(package="multilevel")
#load the univ data frame into R environment
data(univbct, package="multilevel")
d=univbct
#Confirm it is loaded as a data frame
class(d)
[1] "data.frame"
data.frame
#Saving a data frame as a .csv file (to be read into SPSS, Excel, Text Editor, etc.)
write.table(d, file = paste0(path2data, "d2.csv"), sep=",",row.names=F)
write.table(d, paste0(path2data, "d1.csv"), sep=",", row.names=FALSE)
#save the data as a text file to be read into SPSS
install.packages("foreign")
library("foreign")
write.foreign(univbct,
datafile=paste0(path2data, "univbct.csv"),
codefile=paste0(path2data, "univbct.sps"),
package="SPSS")
file.show(paste0(path2data, "univbct.csv"))
file.show(paste0(path2data, "univbct.sps"))
library("foreign")
demo1=read.spss(file=paste0(path2data, "demo1.sav"),
use.value.labels=TRUE,
to.data.frame=TRUE,
use.missings=TRUE)
summary(demo1)
SUBNUM TIME BTN COMPANY
Min. : 1.00 Min. :0 Min. : 4.0 A :246
1st Qu.: 75.75 1st Qu.:0 1st Qu.: 377.8 HHC :210
Median :150.50 Median :1 Median :1022.0 B :207
Mean :150.50 Mean :1 Mean :1860.3 D :114
3rd Qu.:225.25 3rd Qu.:2 3rd Qu.:3066.0 C : 84
Max. :300.00 Max. :2 Max. :4042.0 SVC : 24
(Other): 15
MARITAL GENDER HOWLONG RANK
Min. :1.000 Min. :1.000 Min. :0.000 Min. :11.00
1st Qu.:1.000 1st Qu.:1.000 1st Qu.:1.000 1st Qu.:13.00
Median :2.000 Median :1.000 Median :2.000 Median :14.00
Mean :1.711 Mean :1.039 Mean :2.371 Mean :15.26
3rd Qu.:2.000 3rd Qu.:1.000 3rd Qu.:4.000 3rd Qu.:16.00
Max. :5.000 Max. :2.000 Max. :5.000 Max. :32.00
NA's :6 NA's :51 NA's :18 NA's :48
EDUCATE AGE
Min. :1.000 Min. :18.00
1st Qu.:2.000 1st Qu.:20.00
Median :2.000 Median :24.00
Mean :2.663 Mean :25.75
3rd Qu.:3.000 3rd Qu.:30.00
Max. :6.000 Max. :44.00
NA's :9 NA's :9
demo2=read.spss(file=paste0(path2data, "demo2.sav"),
use.value.labels=T,
to.data.frame=T,
use.missings=FALSE)
summary(demo2) #oops, GENDER = 999 was a missing values code
SUBNUM TIME BTN COMPANY MARITAL
Min. :301 Min. :0 Min. : 4 A :156 Min. :1.000
1st Qu.:349 1st Qu.:0 1st Qu.: 404 HHC :144 1st Qu.:1.000
Median :398 Median :1 Median :1022 B :141 Median :2.000
Mean :398 Mean :1 Mean :1755 D : 69 Mean :1.756
3rd Qu.:447 3rd Qu.:2 3rd Qu.:3066 C : 42 3rd Qu.:2.000
Max. :495 Max. :2 Max. :4042 SVC : 15 Max. :5.000
(Other): 18 NA's :6
GENDER HOWLONG RANK EDUCATE
Min. : 1.00 Min. :0.000 Min. :11.0 Min. :1.00
1st Qu.: 1.00 1st Qu.:2.000 1st Qu.:13.0 1st Qu.:2.00
Median : 1.00 Median :2.000 Median :14.0 Median :2.00
Mean : 88.03 Mean :2.446 Mean :14.7 Mean :2.49
3rd Qu.: 1.00 3rd Qu.:3.000 3rd Qu.:15.0 3rd Qu.:2.00
Max. :999.00 Max. :5.000 Max. :31.0 Max. :6.00
NA's :6 NA's :27 NA's :3
AGE
Min. :18.00
1st Qu.:21.00
Median :24.00
Mean :25.68
3rd Qu.:29.00
Max. :46.00
NA's :3
demo2=read.spss(file=paste0(path2data, "demo2.sav"),
use.value.labels=T,
to.data.frame=T,
use.missings=T)
names(demo1); names(demo2)
[1] "SUBNUM" "TIME" "BTN" "COMPANY" "MARITAL" "GENDER" "HOWLONG"
[8] "RANK" "EDUCATE" "AGE"
SUBNUM
TIME
BTN
COMPANY
MARITAL
GENDER
HOWLONG
RANK
EDUCATE
AGE
[1] "SUBNUM" "TIME" "BTN" "COMPANY" "MARITAL" "GENDER" "HOWLONG"
[8] "RANK" "EDUCATE" "AGE"
SUBNUM
TIME
BTN
COMPANY
MARITAL
GENDER
HOWLONG
RANK
EDUCATE
AGE
#Reading data (csv)
data1=read.csv(paste0(path2data, "data1.csv"), header=T)
data2=read.csv(paste0(path2data, "data2.csv"))
#Now click on "Environment" tab and the "data1" dataframe
#NA (not available) is automatically inserted by R for any missing data
head(data1) # display first 6 cases
tail(data1) # display last 6 cases
summary(data1) # display summary
SUBNUM TIME JOBSAT1 COMMIT1
Min. : 1.00 Min. :0 Min. : 1.000 Min. : 1.000
1st Qu.: 75.75 1st Qu.:0 1st Qu.: 2.667 1st Qu.: 3.333
Median :150.50 Median :1 Median : 3.667 Median : 3.667
Mean :150.50 Mean :1 Mean : 49.763 Mean : 46.794
3rd Qu.:225.25 3rd Qu.:2 3rd Qu.: 4.000 3rd Qu.: 4.333
Max. :300.00 Max. :2 Max. :999.000 Max. :999.000
READY1 JOBSAT2 COMMIT2 READY2
Min. : 1.00 Min. :1.000 Min. :1.000 Min. :1.000
1st Qu.: 2.75 1st Qu.:2.667 1st Qu.:3.000 1st Qu.:2.750
Median : 3.25 Median :3.333 Median :3.667 Median :3.250
Mean : 56.18 Mean :3.272 Mean :3.498 Mean :3.176
3rd Qu.: 3.75 3rd Qu.:4.000 3rd Qu.:4.000 3rd Qu.:3.750
Max. :999.00 Max. :5.000 Max. :5.000 Max. :5.000
NA's :66 NA's :48 NA's :54
JOBSAT3 COMMIT3 READY3 JSAT
Min. :1.000 Min. :1.333 Min. :1.000 Min. :1.000
1st Qu.:3.000 1st Qu.:3.000 1st Qu.:2.750 1st Qu.:2.667
Median :3.333 Median :3.667 Median :3.250 Median :3.333
Mean :3.355 Mean :3.556 Mean :3.241 Mean :3.308
3rd Qu.:4.000 3rd Qu.:4.000 3rd Qu.:4.000 3rd Qu.:4.000
Max. :5.000 Max. :5.000 Max. :5.000 Max. :5.000
NA's :51 NA's :48 NA's :48 NA's :53
COMMIT READY
Min. :1.000 Min. :1.000
1st Qu.:3.000 1st Qu.:2.750
Median :3.667 Median :3.250
Mean :3.573 Mean :3.161
3rd Qu.:4.000 3rd Qu.:3.750
Max. :5.000 Max. :5.000
NA's :45 NA's :50
summary(data2)
SUBNUM TIME JOBSAT1 COMMIT1 READY1
Min. :301 Min. :0 Min. :1.000 Min. :1.000 Min. :1.00
1st Qu.:349 1st Qu.:0 1st Qu.:2.667 1st Qu.:3.000 1st Qu.:2.25
Median :398 Median :1 Median :3.333 Median :3.667 Median :3.00
Mean :398 Mean :1 Mean :3.137 Mean :3.543 Mean :2.92
3rd Qu.:447 3rd Qu.:2 3rd Qu.:4.000 3rd Qu.:4.000 3rd Qu.:3.50
Max. :495 Max. :2 Max. :5.000 Max. :5.000 Max. :4.75
NA's :39 NA's :45 NA's :48
JOBSAT2 COMMIT2 READY2 JOBSAT3
Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
1st Qu.:2.667 1st Qu.:3.000 1st Qu.:2.500 1st Qu.:3.000
Median :3.333 Median :3.667 Median :3.000 Median :3.333
Mean :3.207 Mean :3.422 Mean :3.007 Mean :3.313
3rd Qu.:4.000 3rd Qu.:4.000 3rd Qu.:3.750 3rd Qu.:4.000
Max. :5.000 Max. :5.000 Max. :5.000 Max. :5.000
NA's :24 NA's :21 NA's :33 NA's :45
COMMIT3 READY3 JSAT COMMIT
Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
1st Qu.:3.000 1st Qu.:2.750 1st Qu.:2.667 1st Qu.:3.000
Median :3.667 Median :3.250 Median :3.333 Median :3.667
Mean :3.508 Mean :3.165 Mean :3.219 Mean :3.490
3rd Qu.:4.000 3rd Qu.:3.750 3rd Qu.:4.000 3rd Qu.:4.000
Max. :5.000 Max. :5.000 Max. :5.000 Max. :5.000
NA's :36 NA's :57 NA's :36 NA's :34
READY
Min. :1.00
1st Qu.:2.50
Median :3.25
Mean :3.03
3rd Qu.:3.75
Max. :5.00
NA's :46
#Note: I used 999 to represent missing data for JOBSAT1 COMMIT1 and READY1
#R needs to be told that 999 is not a legitimate value, but is user-defined missing value
data1$JOBSAT1[data1$JOBSAT1==999]=NA #Explain what the heck this means!
data1$COMMIT1[data1$COMMIT1==999]=NA
data1$READY1[data1$READY1==999]=NA
summary(data1)
SUBNUM TIME JOBSAT1 COMMIT1
Min. : 1.00 Min. :0 Min. :1.000 Min. :1.000
1st Qu.: 75.75 1st Qu.:0 1st Qu.:2.667 1st Qu.:3.000
Median :150.50 Median :1 Median :3.333 Median :3.667
Mean :150.50 Mean :1 Mean :3.297 Mean :3.663
3rd Qu.:225.25 3rd Qu.:2 3rd Qu.:4.000 3rd Qu.:4.000
Max. :300.00 Max. :2 Max. :5.000 Max. :5.000
NA's :42 NA's :39
READY1 JOBSAT2 COMMIT2 READY2
Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
1st Qu.:2.500 1st Qu.:2.667 1st Qu.:3.000 1st Qu.:2.750
Median :3.000 Median :3.333 Median :3.667 Median :3.250
Mean :3.066 Mean :3.272 Mean :3.498 Mean :3.176
3rd Qu.:3.750 3rd Qu.:4.000 3rd Qu.:4.000 3rd Qu.:3.750
Max. :5.000 Max. :5.000 Max. :5.000 Max. :5.000
NA's :48 NA's :66 NA's :48 NA's :54
JOBSAT3 COMMIT3 READY3 JSAT
Min. :1.000 Min. :1.333 Min. :1.000 Min. :1.000
1st Qu.:3.000 1st Qu.:3.000 1st Qu.:2.750 1st Qu.:2.667
Median :3.333 Median :3.667 Median :3.250 Median :3.333
Mean :3.355 Mean :3.556 Mean :3.241 Mean :3.308
3rd Qu.:4.000 3rd Qu.:4.000 3rd Qu.:4.000 3rd Qu.:4.000
Max. :5.000 Max. :5.000 Max. :5.000 Max. :5.000
NA's :51 NA's :48 NA's :48 NA's :53
COMMIT READY
Min. :1.000 Min. :1.000
1st Qu.:3.000 1st Qu.:2.750
Median :3.667 Median :3.250
Mean :3.573 Mean :3.161
3rd Qu.:4.000 3rd Qu.:3.750
Max. :5.000 Max. :5.000
NA's :45 NA's :50
summary(data2)
SUBNUM TIME JOBSAT1 COMMIT1 READY1
Min. :301 Min. :0 Min. :1.000 Min. :1.000 Min. :1.00
1st Qu.:349 1st Qu.:0 1st Qu.:2.667 1st Qu.:3.000 1st Qu.:2.25
Median :398 Median :1 Median :3.333 Median :3.667 Median :3.00
Mean :398 Mean :1 Mean :3.137 Mean :3.543 Mean :2.92
3rd Qu.:447 3rd Qu.:2 3rd Qu.:4.000 3rd Qu.:4.000 3rd Qu.:3.50
Max. :495 Max. :2 Max. :5.000 Max. :5.000 Max. :4.75
NA's :39 NA's :45 NA's :48
JOBSAT2 COMMIT2 READY2 JOBSAT3
Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
1st Qu.:2.667 1st Qu.:3.000 1st Qu.:2.500 1st Qu.:3.000
Median :3.333 Median :3.667 Median :3.000 Median :3.333
Mean :3.207 Mean :3.422 Mean :3.007 Mean :3.313
3rd Qu.:4.000 3rd Qu.:4.000 3rd Qu.:3.750 3rd Qu.:4.000
Max. :5.000 Max. :5.000 Max. :5.000 Max. :5.000
NA's :24 NA's :21 NA's :33 NA's :45
COMMIT3 READY3 JSAT COMMIT
Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
1st Qu.:3.000 1st Qu.:2.750 1st Qu.:2.667 1st Qu.:3.000
Median :3.667 Median :3.250 Median :3.333 Median :3.667
Mean :3.508 Mean :3.165 Mean :3.219 Mean :3.490
3rd Qu.:4.000 3rd Qu.:3.750 3rd Qu.:4.000 3rd Qu.:4.000
Max. :5.000 Max. :5.000 Max. :5.000 Max. :5.000
NA's :36 NA's :57 NA's :36 NA's :34
READY
Min. :1.00
1st Qu.:2.50
Median :3.25
Mean :3.03
3rd Qu.:3.75
Max. :5.00
NA's :46
#The above can be tedious if you have a large number of variables
### it is eaiser if you copy & paste code
#Or, if 999 doens't hold any meaning for ANY of the variables
data1=read.csv(paste0(path2data, "data1.csv"), na.strings=c(".", "999","9","-9"))
summary(data1)
SUBNUM TIME JOBSAT1 COMMIT1 READY1
Min. : 1 Min. :0 Min. :1.000 Min. :1.000 Min. :1.000
1st Qu.: 76 1st Qu.:0 1st Qu.:2.667 1st Qu.:3.000 1st Qu.:2.500
Median :151 Median :1 Median :3.333 Median :3.667 Median :3.000
Mean :151 Mean :1 Mean :3.297 Mean :3.663 Mean :3.066
3rd Qu.:226 3rd Qu.:2 3rd Qu.:4.000 3rd Qu.:4.000 3rd Qu.:3.750
Max. :300 Max. :2 Max. :5.000 Max. :5.000 Max. :5.000
NA's :3 NA's :42 NA's :39 NA's :48
JOBSAT2 COMMIT2 READY2 JOBSAT3
Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
1st Qu.:2.667 1st Qu.:3.000 1st Qu.:2.750 1st Qu.:3.000
Median :3.333 Median :3.667 Median :3.250 Median :3.333
Mean :3.272 Mean :3.498 Mean :3.176 Mean :3.355
3rd Qu.:4.000 3rd Qu.:4.000 3rd Qu.:3.750 3rd Qu.:4.000
Max. :5.000 Max. :5.000 Max. :5.000 Max. :5.000
NA's :66 NA's :48 NA's :54 NA's :51
COMMIT3 READY3 JSAT COMMIT
Min. :1.333 Min. :1.000 Min. :1.000 Min. :1.000
1st Qu.:3.000 1st Qu.:2.750 1st Qu.:2.667 1st Qu.:3.000
Median :3.667 Median :3.250 Median :3.333 Median :3.667
Mean :3.556 Mean :3.241 Mean :3.308 Mean :3.573
3rd Qu.:4.000 3rd Qu.:4.000 3rd Qu.:4.000 3rd Qu.:4.000
Max. :5.000 Max. :5.000 Max. :5.000 Max. :5.000
NA's :48 NA's :48 NA's :53 NA's :45
READY
Min. :1.000
1st Qu.:2.750
Median :3.250
Mean :3.161
3rd Qu.:3.750
Max. :5.000
NA's :50
#OR, you could write a function
my999isNA=function(x) {x[x==999]=NA; x}
#Now we will apply this missing data function to the proper variables in data2
#To do this, we use the "lapply" function which allows us to apply the same function over a list or array
data1=read.csv(paste0(path2data, "data1.csv")) #reread data1 as a data.frame with missing data
names(data1)
[1] "SUBNUM" "TIME" "JOBSAT1" "COMMIT1" "READY1" "JOBSAT2" "COMMIT2"
[8] "READY2" "JOBSAT3" "COMMIT3" "READY3" "JSAT" "COMMIT" "READY"
SUBNUM
TIME
JOBSAT1
COMMIT1
READY1
JOBSAT2
COMMIT2
READY2
JOBSAT3
COMMIT3
READY3
JSAT
COMMIT
READY
summary(data1)
SUBNUM TIME JOBSAT1 COMMIT1
Min. : 1.00 Min. :0 Min. : 1.000 Min. : 1.000
1st Qu.: 75.75 1st Qu.:0 1st Qu.: 2.667 1st Qu.: 3.333
Median :150.50 Median :1 Median : 3.667 Median : 3.667
Mean :150.50 Mean :1 Mean : 49.763 Mean : 46.794
3rd Qu.:225.25 3rd Qu.:2 3rd Qu.: 4.000 3rd Qu.: 4.333
Max. :300.00 Max. :2 Max. :999.000 Max. :999.000
READY1 JOBSAT2 COMMIT2 READY2
Min. : 1.00 Min. :1.000 Min. :1.000 Min. :1.000
1st Qu.: 2.75 1st Qu.:2.667 1st Qu.:3.000 1st Qu.:2.750
Median : 3.25 Median :3.333 Median :3.667 Median :3.250
Mean : 56.18 Mean :3.272 Mean :3.498 Mean :3.176
3rd Qu.: 3.75 3rd Qu.:4.000 3rd Qu.:4.000 3rd Qu.:3.750
Max. :999.00 Max. :5.000 Max. :5.000 Max. :5.000
NA's :66 NA's :48 NA's :54
JOBSAT3 COMMIT3 READY3 JSAT
Min. :1.000 Min. :1.333 Min. :1.000 Min. :1.000
1st Qu.:3.000 1st Qu.:3.000 1st Qu.:2.750 1st Qu.:2.667
Median :3.333 Median :3.667 Median :3.250 Median :3.333
Mean :3.355 Mean :3.556 Mean :3.241 Mean :3.308
3rd Qu.:4.000 3rd Qu.:4.000 3rd Qu.:4.000 3rd Qu.:4.000
Max. :5.000 Max. :5.000 Max. :5.000 Max. :5.000
NA's :51 NA's :48 NA's :48 NA's :53
COMMIT READY
Min. :1.000 Min. :1.000
1st Qu.:3.000 1st Qu.:2.750
Median :3.667 Median :3.250
Mean :3.573 Mean :3.161
3rd Qu.:4.000 3rd Qu.:3.750
Max. :5.000 Max. :5.000
NA's :45 NA's :50
data1[3:5]=lapply(data1[3:5],my999isNA)
summary(data1)
SUBNUM TIME JOBSAT1 COMMIT1
Min. : 1.00 Min. :0 Min. :1.000 Min. :1.000
1st Qu.: 75.75 1st Qu.:0 1st Qu.:2.667 1st Qu.:3.000
Median :150.50 Median :1 Median :3.333 Median :3.667
Mean :150.50 Mean :1 Mean :3.297 Mean :3.663
3rd Qu.:225.25 3rd Qu.:2 3rd Qu.:4.000 3rd Qu.:4.000
Max. :300.00 Max. :2 Max. :5.000 Max. :5.000
NA's :42 NA's :39
READY1 JOBSAT2 COMMIT2 READY2
Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
1st Qu.:2.500 1st Qu.:2.667 1st Qu.:3.000 1st Qu.:2.750
Median :3.000 Median :3.333 Median :3.667 Median :3.250
Mean :3.066 Mean :3.272 Mean :3.498 Mean :3.176
3rd Qu.:3.750 3rd Qu.:4.000 3rd Qu.:4.000 3rd Qu.:3.750
Max. :5.000 Max. :5.000 Max. :5.000 Max. :5.000
NA's :48 NA's :66 NA's :48 NA's :54
JOBSAT3 COMMIT3 READY3 JSAT
Min. :1.000 Min. :1.333 Min. :1.000 Min. :1.000
1st Qu.:3.000 1st Qu.:3.000 1st Qu.:2.750 1st Qu.:2.667
Median :3.333 Median :3.667 Median :3.250 Median :3.333
Mean :3.355 Mean :3.556 Mean :3.241 Mean :3.308
3rd Qu.:4.000 3rd Qu.:4.000 3rd Qu.:4.000 3rd Qu.:4.000
Max. :5.000 Max. :5.000 Max. :5.000 Max. :5.000
NA's :51 NA's :48 NA's :48 NA's :53
COMMIT READY
Min. :1.000 Min. :1.000
1st Qu.:3.000 1st Qu.:2.750
Median :3.667 Median :3.250
Mean :3.573 Mean :3.161
3rd Qu.:4.000 3rd Qu.:3.750
Max. :5.000 Max. :5.000
NA's :45 NA's :50
#Merging data by adding variables (e.g, two data.frames, demo1 + data1)
dd1=merge(demo1,data1, by="SUBNUM")
dd1=merge(demo1,data1, by=c("SUBNUM","TIME"), all=TRUE)
dd2=merge(demo2,data2, by=c("SUBNUM","TIME"), all=TRUE)
summary(dd1)
SUBNUM TIME BTN COMPANY
Min. : 1.00 Min. :0 Min. : 4.0 A :246
1st Qu.: 75.75 1st Qu.:0 1st Qu.: 377.8 HHC :210
Median :150.50 Median :1 Median :1022.0 B :207
Mean :150.50 Mean :1 Mean :1860.3 D :114
3rd Qu.:225.25 3rd Qu.:2 3rd Qu.:3066.0 C : 84
Max. :300.00 Max. :2 Max. :4042.0 SVC : 24
(Other): 15
MARITAL GENDER HOWLONG RANK
Min. :1.000 Min. :1.000 Min. :0.000 Min. :11.00
1st Qu.:1.000 1st Qu.:1.000 1st Qu.:1.000 1st Qu.:13.00
Median :2.000 Median :1.000 Median :2.000 Median :14.00
Mean :1.711 Mean :1.039 Mean :2.371 Mean :15.26
3rd Qu.:2.000 3rd Qu.:1.000 3rd Qu.:4.000 3rd Qu.:16.00
Max. :5.000 Max. :2.000 Max. :5.000 Max. :32.00
NA's :6 NA's :51 NA's :18 NA's :48
EDUCATE AGE JOBSAT1 COMMIT1
Min. :1.000 Min. :18.00 Min. :1.000 Min. :1.000
1st Qu.:2.000 1st Qu.:20.00 1st Qu.:2.667 1st Qu.:3.000
Median :2.000 Median :24.00 Median :3.333 Median :3.667
Mean :2.663 Mean :25.75 Mean :3.297 Mean :3.663
3rd Qu.:3.000 3rd Qu.:30.00 3rd Qu.:4.000 3rd Qu.:4.000
Max. :6.000 Max. :44.00 Max. :5.000 Max. :5.000
NA's :9 NA's :9 NA's :42 NA's :39
READY1 JOBSAT2 COMMIT2 READY2
Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
1st Qu.:2.500 1st Qu.:2.667 1st Qu.:3.000 1st Qu.:2.750
Median :3.000 Median :3.333 Median :3.667 Median :3.250
Mean :3.066 Mean :3.272 Mean :3.498 Mean :3.176
3rd Qu.:3.750 3rd Qu.:4.000 3rd Qu.:4.000 3rd Qu.:3.750
Max. :5.000 Max. :5.000 Max. :5.000 Max. :5.000
NA's :48 NA's :66 NA's :48 NA's :54
JOBSAT3 COMMIT3 READY3 JSAT
Min. :1.000 Min. :1.333 Min. :1.000 Min. :1.000
1st Qu.:3.000 1st Qu.:3.000 1st Qu.:2.750 1st Qu.:2.667
Median :3.333 Median :3.667 Median :3.250 Median :3.333
Mean :3.355 Mean :3.556 Mean :3.241 Mean :3.308
3rd Qu.:4.000 3rd Qu.:4.000 3rd Qu.:4.000 3rd Qu.:4.000
Max. :5.000 Max. :5.000 Max. :5.000 Max. :5.000
NA's :51 NA's :48 NA's :48 NA's :53
COMMIT READY
Min. :1.000 Min. :1.000
1st Qu.:3.000 1st Qu.:2.750
Median :3.667 Median :3.250
Mean :3.573 Mean :3.161
3rd Qu.:4.000 3rd Qu.:3.750
Max. :5.000 Max. :5.000
NA's :45 NA's :50
summary(dd2)
SUBNUM TIME BTN COMPANY MARITAL
Min. :301 Min. :0 Min. : 4 A :156 Min. :1.000
1st Qu.:349 1st Qu.:0 1st Qu.: 404 HHC :144 1st Qu.:1.000
Median :398 Median :1 Median :1022 B :141 Median :2.000
Mean :398 Mean :1 Mean :1755 D : 69 Mean :1.756
3rd Qu.:447 3rd Qu.:2 3rd Qu.:3066 C : 42 3rd Qu.:2.000
Max. :495 Max. :2 Max. :4042 SVC : 15 Max. :5.000
(Other): 18 NA's :6
GENDER HOWLONG RANK EDUCATE
Min. :1.000 Min. :0.000 Min. :11.0 Min. :1.00
1st Qu.:1.000 1st Qu.:2.000 1st Qu.:13.0 1st Qu.:2.00
Median :1.000 Median :2.000 Median :14.0 Median :2.00
Mean :1.022 Mean :2.446 Mean :14.7 Mean :2.49
3rd Qu.:1.000 3rd Qu.:3.000 3rd Qu.:15.0 3rd Qu.:2.00
Max. :2.000 Max. :5.000 Max. :31.0 Max. :6.00
NA's :51 NA's :6 NA's :27 NA's :3
AGE JOBSAT1 COMMIT1 READY1
Min. :18.00 Min. :1.000 Min. :1.000 Min. :1.00
1st Qu.:21.00 1st Qu.:2.667 1st Qu.:3.000 1st Qu.:2.25
Median :24.00 Median :3.333 Median :3.667 Median :3.00
Mean :25.68 Mean :3.137 Mean :3.543 Mean :2.92
3rd Qu.:29.00 3rd Qu.:4.000 3rd Qu.:4.000 3rd Qu.:3.50
Max. :46.00 Max. :5.000 Max. :5.000 Max. :4.75
NA's :3 NA's :39 NA's :45 NA's :48
JOBSAT2 COMMIT2 READY2 JOBSAT3
Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
1st Qu.:2.667 1st Qu.:3.000 1st Qu.:2.500 1st Qu.:3.000
Median :3.333 Median :3.667 Median :3.000 Median :3.333
Mean :3.207 Mean :3.422 Mean :3.007 Mean :3.313
3rd Qu.:4.000 3rd Qu.:4.000 3rd Qu.:3.750 3rd Qu.:4.000
Max. :5.000 Max. :5.000 Max. :5.000 Max. :5.000
NA's :24 NA's :21 NA's :33 NA's :45
COMMIT3 READY3 JSAT COMMIT
Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
1st Qu.:3.000 1st Qu.:2.750 1st Qu.:2.667 1st Qu.:3.000
Median :3.667 Median :3.250 Median :3.333 Median :3.667
Mean :3.508 Mean :3.165 Mean :3.219 Mean :3.490
3rd Qu.:4.000 3rd Qu.:3.750 3rd Qu.:4.000 3rd Qu.:4.000
Max. :5.000 Max. :5.000 Max. :5.000 Max. :5.000
NA's :36 NA's :57 NA's :36 NA's :34
READY
Min. :1.00
1st Qu.:2.50
Median :3.25
Mean :3.03
3rd Qu.:3.75
Max. :5.00
NA's :46
#let's combine dd1 with dd2
#when you have IDENTICAL columns in both data sets you may use rbind
names(dd1); names(dd2)
[1] "SUBNUM" "TIME" "BTN" "COMPANY" "MARITAL" "GENDER" "HOWLONG"
[8] "RANK" "EDUCATE" "AGE" "JOBSAT1" "COMMIT1" "READY1" "JOBSAT2"
[15] "COMMIT2" "READY2" "JOBSAT3" "COMMIT3" "READY3" "JSAT" "COMMIT"
[22] "READY"
SUBNUM
TIME
BTN
COMPANY
MARITAL
GENDER
HOWLONG
RANK
EDUCATE
AGE
JOBSAT1
COMMIT1
READY1
JOBSAT2
COMMIT2
READY2
JOBSAT3
COMMIT3
READY3
JSAT
COMMIT
READY
[1] "SUBNUM" "TIME" "BTN" "COMPANY" "MARITAL" "GENDER" "HOWLONG"
[8] "RANK" "EDUCATE" "AGE" "JOBSAT1" "COMMIT1" "READY1" "JOBSAT2"
[15] "COMMIT2" "READY2" "JOBSAT3" "COMMIT3" "READY3" "JSAT" "COMMIT"
[22] "READY"
SUBNUM
TIME
BTN
COMPANY
MARITAL
GENDER
HOWLONG
RANK
EDUCATE
AGE
JOBSAT1
COMMIT1
READY1
JOBSAT2
COMMIT2
READY2
JOBSAT3
COMMIT3
READY3
JSAT
COMMIT
READY
dd3=rbind(dd1,dd2)
summary(dd3)
SUBNUM TIME BTN COMPANY MARITAL
Min. : 1 Min. :0 Min. : 4 A :402 Min. :1.000
1st Qu.:124 1st Qu.:0 1st Qu.: 404 HHC :354 1st Qu.:1.000
Median :248 Median :1 Median :1022 B :348 Median :2.000
Mean :248 Mean :1 Mean :1819 D :183 Mean :1.729
3rd Qu.:372 3rd Qu.:2 3rd Qu.:3066 C :126 3rd Qu.:2.000
Max. :495 Max. :2 Max. :4042 SVC : 39 Max. :5.000
(Other): 33 NA's :12
GENDER HOWLONG RANK EDUCATE
Min. :1.000 Min. :0.0 Min. :11.00 Min. :1.000
1st Qu.:1.000 1st Qu.:1.0 1st Qu.:13.00 1st Qu.:2.000
Median :1.000 Median :2.0 Median :14.00 Median :2.000
Mean :1.033 Mean :2.4 Mean :15.04 Mean :2.595
3rd Qu.:1.000 3rd Qu.:4.0 3rd Qu.:16.00 3rd Qu.:3.000
Max. :2.000 Max. :5.0 Max. :32.00 Max. :6.000
NA's :102 NA's :24 NA's :75 NA's :12
AGE JOBSAT1 COMMIT1 READY1
Min. :18.00 Min. :1.000 Min. :1.000 Min. :1.00
1st Qu.:21.00 1st Qu.:2.667 1st Qu.:3.000 1st Qu.:2.50
Median :24.00 Median :3.333 Median :3.667 Median :3.00
Mean :25.72 Mean :3.235 Mean :3.617 Mean :3.01
3rd Qu.:30.00 3rd Qu.:4.000 3rd Qu.:4.000 3rd Qu.:3.75
Max. :46.00 Max. :5.000 Max. :5.000 Max. :5.00
NA's :12 NA's :81 NA's :84 NA's :96
JOBSAT2 COMMIT2 READY2 JOBSAT3
Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
1st Qu.:2.667 1st Qu.:3.000 1st Qu.:2.500 1st Qu.:3.000
Median :3.333 Median :3.667 Median :3.250 Median :3.333
Mean :3.246 Mean :3.468 Mean :3.109 Mean :3.338
3rd Qu.:4.000 3rd Qu.:4.000 3rd Qu.:3.750 3rd Qu.:4.000
Max. :5.000 Max. :5.000 Max. :5.000 Max. :5.000
NA's :90 NA's :69 NA's :87 NA's :96
COMMIT3 READY3 JSAT COMMIT
Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
1st Qu.:3.000 1st Qu.:2.750 1st Qu.:2.667 1st Qu.:3.000
Median :3.667 Median :3.250 Median :3.333 Median :3.667
Mean :3.537 Mean :3.212 Mean :3.273 Mean :3.540
3rd Qu.:4.000 3rd Qu.:3.750 3rd Qu.:4.000 3rd Qu.:4.000
Max. :5.000 Max. :5.000 Max. :5.000 Max. :5.000
NA's :84 NA's :105 NA's :89 NA's :79
READY
Min. :1.00
1st Qu.:2.50
Median :3.25
Mean :3.11
3rd Qu.:3.75
Max. :5.00
NA's :96
#when you have different columns in your data, you can use rbind.fill
#first let's compute some extra variables and add them to dd1
#Computing new variables in an existing data.frame
dd1$STAY=dd1$JSAT+dd1$COMMIT
#dd3=rbind(dd1,dd2) doesn't work because of differing colums
?rbind.fill
install.packages("plyr")
library(plyr)
dd3=plyr::rbind.fill(dd1,dd2)
head(dd3); tail(dd3)
#let's delete STAY from the previous dd3 data.frame
names(dd3)
[1] "SUBNUM" "TIME" "BTN" "COMPANY" "MARITAL" "GENDER" "HOWLONG"
[8] "RANK" "EDUCATE" "AGE" "JOBSAT1" "COMMIT1" "READY1" "JOBSAT2"
[15] "COMMIT2" "READY2" "JOBSAT3" "COMMIT3" "READY3" "JSAT" "COMMIT"
[22] "READY"
SUBNUM
TIME
BTN
COMPANY
MARITAL
GENDER
HOWLONG
RANK
EDUCATE
AGE
JOBSAT1
COMMIT1
READY1
JOBSAT2
COMMIT2
READY2
JOBSAT3
COMMIT3
READY3
JSAT
COMMIT
READY
dd4=dd3[c(1,2,3:22)]
names(dd4)
[1] "SUBNUM" "TIME" "BTN" "COMPANY" "MARITAL" "GENDER" "HOWLONG"
[8] "RANK" "EDUCATE" "AGE" "JOBSAT1" "COMMIT1" "READY1" "JOBSAT2"
[15] "COMMIT2" "READY2" "JOBSAT3" "COMMIT3" "READY3" "JSAT" "COMMIT"
[22] "READY"
SUBNUM
TIME
BTN
COMPANY
MARITAL
GENDER
HOWLONG
RANK
EDUCATE
AGE
JOBSAT1
COMMIT1
READY1
JOBSAT2
COMMIT2
READY2
JOBSAT3
COMMIT3
READY3
JSAT
COMMIT
READY
#Renaming a variable in a data.frame
#let's rename HOWLONG to TENURE and MARITAL to STATUS
dd4=plyr::rename(dd4, c(HOWLONG="TENURE", MARITAL="STATUS"))
names(dd4)
[1] "SUBNUM" "TIME" "BTN" "COMPANY" "STATUS" "GENDER" "TENURE"
[8] "RANK" "EDUCATE" "AGE" "JOBSAT1" "COMMIT1" "READY1" "JOBSAT2"
[15] "COMMIT2" "READY2" "JOBSAT3" "COMMIT3" "READY3" "JSAT" "COMMIT"
[22] "READY"
SUBNUM
TIME
BTN
COMPANY
STATUS
GENDER
TENURE
RANK
EDUCATE
AGE
JOBSAT1
COMMIT1
READY1
JOBSAT2
COMMIT2
READY2
JOBSAT3
COMMIT3
READY3
JSAT
COMMIT
READY
#Categorical Variables: recode sex into a different, dummy variable
#Only “factor” type variables are assigned value labels
dd4$GENDER2=plyr::revalue(as.factor(dd4$GENDER), c("1"="male","2"="female"))
dd4$GENDER3=(dd4$GENDER-1)
class(dd4$GENDER)
[1] "numeric"
numeric
class(dd4$GENDER2)
[1] "factor"
factor
class(dd4$GENDER3)
[1] "numeric"
numeric
#recode Likert-type items/scales
###let's reverse the overall score on COMMIT so that high scores = more likely to leave
dd4$LEAVE=6-dd4$COMMIT
mean(dd3$JSAT); median(dd3$JSAT)
[1] NA
[1] NA
mean(dd3$JSAT,na.rm=TRUE); median(dd3$JSAT,na.rm=TRUE)
[1] 3.272923
[1] 3.333333
#Dispersion
var(dd3$JSAT,na.rm=T)
[1] 0.8622181
sd(dd3$JSAT,na.rm=T)
[1] 0.928557
min(dd3$JSAT, na.rm=T)
[1] 1
max(dd3$JSAT,na.rm=T)
[1] 5
summary(dd3$JSAT,na.rm=T)
Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
1.000 2.667 3.333 3.273 4.000 5.000 89
quantile(dd3$JSAT,probs=c(.1,.2,.3,.4,.5,.6,.7,.8,.9),na.rm=T)
10% 20% 30% 40% 50% 60% 70% 80%
2.000000 2.333333 3.000000 3.000000 3.333333 3.666667 4.000000 4.000000
90%
4.333333
install.packages("Hmisc")
library("Hmisc")
Hmisc::describe(dd4)
dd4
25 Variables 1485 Observations
---------------------------------------------------------------------------
SUBNUM
n missing distinct Info Mean Gmd .05 .10
1485 0 495 1 248 165.1 25.2 50.0
.25 .50 .75 .90 .95
124.0 248.0 372.0 446.0 470.8
lowest : 1 2 3 4 5, highest: 491 492 493 494 495
---------------------------------------------------------------------------
TIME
n missing distinct Info Mean Gmd
1485 0 3 0.889 1 0.8895
Value 0 1 2
Frequency 495 495 495
Proportion 0.333 0.333 0.333
---------------------------------------------------------------------------
BTN
n missing distinct Info Mean Gmd .05 .10
1485 0 16 0.965 1819 1566 4 104
.25 .50 .75 .90 .95
404 1022 3066 4042 4042
Value 0 100 120 140 300 400 700 1000 1010 1020
Frequency 141 15 42 30 123 48 6 66 21 288
Proportion 0.095 0.010 0.028 0.020 0.083 0.032 0.004 0.044 0.014 0.194
Value 2000 2010 3070 4000 4040
Frequency 36 51 435 18 165
Proportion 0.024 0.034 0.293 0.012 0.111
---------------------------------------------------------------------------
COMPANY
n missing distinct
1485 0 8
Value A B C D F HHC REC SVC
Frequency 402 348 126 183 15 354 18 39
Proportion 0.271 0.234 0.085 0.123 0.010 0.238 0.012 0.026
---------------------------------------------------------------------------
STATUS
n missing distinct Info Mean Gmd
1473 12 5 0.79 1.729 0.745
Value 1 2 3 4 5
Frequency 603 768 21 60 21
Proportion 0.409 0.521 0.014 0.041 0.014
---------------------------------------------------------------------------
GENDER
n missing distinct Info Mean Gmd
1383 102 2 0.094 1.033 0.063
Value 1 2
Frequency 1338 45
Proportion 0.967 0.033
---------------------------------------------------------------------------
TENURE
n missing distinct Info Mean Gmd
1461 24 6 0.949 2.4 1.747
Value 0 1 2 3 4 5
Frequency 216 159 495 225 147 219
Proportion 0.148 0.109 0.339 0.154 0.101 0.150
---------------------------------------------------------------------------
RANK
n missing distinct Info Mean Gmd .05 .10
1410 75 15 0.972 15.04 2.979 12 12
.25 .50 .75 .90 .95
13 14 16 21 22
Value 11 12 13 14 15 16 17 18 19 21
Frequency 21 147 324 264 279 114 84 18 3 54
Proportion 0.015 0.104 0.230 0.187 0.198 0.081 0.060 0.013 0.002 0.038
Value 22 23 24 31 32
Frequency 51 42 3 3 3
Proportion 0.036 0.030 0.002 0.002 0.002
---------------------------------------------------------------------------
EDUCATE
n missing distinct Info Mean Gmd
1473 12 6 0.617 2.595 0.9586
Value 1 2 3 4 5 6
Frequency 9 1068 99 117 168 12
Proportion 0.006 0.725 0.067 0.079 0.114 0.008
---------------------------------------------------------------------------
AGE
n missing distinct Info Mean Gmd .05 .10
1473 12 29 0.994 25.72 6.715 19 19
.25 .50 .75 .90 .95
21 24 30 35 37
lowest : 18 19 20 21 22, highest: 42 43 44 45 46
---------------------------------------------------------------------------
JOBSAT1
n missing distinct Info Mean Gmd .05 .10
1404 81 13 0.983 3.235 1.104 1.333 1.667
.25 .50 .75 .90 .95
2.667 3.333 4.000 4.333 4.667
Value 1.000000 1.333333 1.666667 2.000000 2.333333 2.666667 3.000000
Frequency 48 39 63 96 78 102 180
Proportion 0.034 0.028 0.045 0.068 0.056 0.073 0.128
Value 3.333333 3.666667 4.000000 4.333333 4.666667 5.000000
Frequency 156 141 315 87 54 45
Proportion 0.111 0.100 0.224 0.062 0.038 0.032
---------------------------------------------------------------------------
COMMIT1
n missing distinct Info Mean Gmd .05 .10
1401 84 13 0.982 3.617 0.9408 2.000 2.333
.25 .50 .75 .90 .95
3.000 3.667 4.000 4.667 5.000
Value 1.000000 1.333333 1.666667 2.000000 2.333333 2.666667 3.000000
Frequency 12 9 21 45 57 75 165
Proportion 0.009 0.006 0.015 0.032 0.041 0.054 0.118
Value 3.333333 3.666667 4.000000 4.333333 4.666667 5.000000
Frequency 189 222 282 102 102 120
Proportion 0.135 0.158 0.201 0.073 0.073 0.086
---------------------------------------------------------------------------
READY1
n missing distinct Info Mean Gmd .05 .10
1389 96 17 0.99 3.01 0.9286 1.50 1.75
.25 .50 .75 .90 .95
2.50 3.00 3.75 4.00 4.00
Value 1.00 1.25 1.50 1.75 2.00 2.25 2.50 2.75 3.00 3.25
Frequency 36 33 33 45 66 78 108 141 177 204
Proportion 0.026 0.024 0.024 0.032 0.048 0.056 0.078 0.102 0.127 0.147
Value 3.50 3.75 4.00 4.25 4.50 4.75 5.00
Frequency 105 117 183 36 18 6 3
Proportion 0.076 0.084 0.132 0.026 0.013 0.004 0.002
---------------------------------------------------------------------------
JOBSAT2
n missing distinct Info Mean Gmd .05 .10
1395 90 13 0.978 3.246 1.041 1.333 2.000
.25 .50 .75 .90 .95
2.667 3.333 4.000 4.000 4.667
Value 1.000000 1.333333 1.666667 2.000000 2.333333 2.666667 3.000000
Frequency 51 30 54 75 99 84 174
Proportion 0.037 0.022 0.039 0.054 0.071 0.060 0.125
Value 3.333333 3.666667 4.000000 4.333333 4.666667 5.000000
Frequency 177 168 348 63 33 39
Proportion 0.127 0.120 0.249 0.045 0.024 0.028
---------------------------------------------------------------------------
COMMIT2
n missing distinct Info Mean Gmd .05 .10
1416 69 13 0.981 3.468 0.9529 1.667 2.333
.25 .50 .75 .90 .95
3.000 3.667 4.000 4.667 5.000
Value 1.000000 1.333333 1.666667 2.000000 2.333333 2.666667 3.000000
Frequency 39 18 30 18 57 93 207
Proportion 0.028 0.013 0.021 0.013 0.040 0.066 0.146
Value 3.333333 3.666667 4.000000 4.333333 4.666667 5.000000
Frequency 213 207 291 96 75 72
Proportion 0.150 0.146 0.206 0.068 0.053 0.051
---------------------------------------------------------------------------
READY2
n missing distinct Info Mean Gmd .05 .10
1398 87 17 0.989 3.109 0.9311 1.50 2.00
.25 .50 .75 .90 .95
2.50 3.25 3.75 4.00 4.25
Value 1.00 1.25 1.50 1.75 2.00 2.25 2.50 2.75 3.00 3.25
Frequency 24 30 30 39 69 75 105 75 216 162
Proportion 0.017 0.021 0.021 0.028 0.049 0.054 0.075 0.054 0.155 0.116
Value 3.50 3.75 4.00 4.25 4.50 4.75 5.00
Frequency 162 162 156 39 18 15 21
Proportion 0.116 0.116 0.112 0.028 0.013 0.011 0.015
---------------------------------------------------------------------------
JOBSAT3
n missing distinct Info Mean Gmd .05 .10
1389 96 13 0.972 3.338 0.941 1.667 2.000
.25 .50 .75 .90 .95
3.000 3.333 4.000 4.333 4.667
Value 1.000000 1.333333 1.666667 2.000000 2.333333 2.666667 3.000000
Frequency 24 33 21 72 72 69 279
Proportion 0.017 0.024 0.015 0.052 0.052 0.050 0.201
Value 3.333333 3.666667 4.000000 4.333333 4.666667 5.000000
Frequency 183 138 351 60 42 45
Proportion 0.132 0.099 0.253 0.043 0.030 0.032
---------------------------------------------------------------------------
COMMIT3
n missing distinct Info Mean Gmd .05 .10
1401 84 13 0.974 3.537 0.8182 2.000 2.667
.25 .50 .75 .90 .95
3.000 3.667 4.000 4.333 4.667
Value 1.000000 1.333333 1.666667 2.000000 2.333333 2.666667 3.000000
Frequency 9 9 21 33 42 54 261
Proportion 0.006 0.006 0.015 0.024 0.030 0.039 0.186
Value 3.333333 3.666667 4.000000 4.333333 4.666667 5.000000
Frequency 204 234 315 102 48 69
Proportion 0.146 0.167 0.225 0.073 0.034 0.049
---------------------------------------------------------------------------
READY3
n missing distinct Info Mean Gmd .05 .10
1380 105 17 0.986 3.212 0.8964 1.50 2.00
.25 .50 .75 .90 .95
2.75 3.25 3.75 4.00 4.25
Value 1.00 1.25 1.50 1.75 2.00 2.25 2.50 2.75 3.00 3.25
Frequency 12 24 42 36 36 39 87 102 237 144
Proportion 0.009 0.017 0.030 0.026 0.026 0.028 0.063 0.074 0.172 0.104
Value 3.50 3.75 4.00 4.25 4.50 4.75 5.00
Frequency 168 114 231 48 21 21 18
Proportion 0.122 0.083 0.167 0.035 0.015 0.015 0.013
---------------------------------------------------------------------------
JSAT
n missing distinct Info Mean Gmd .05 .10
1396 89 13 0.978 3.273 1.032 1.333 2.000
.25 .50 .75 .90 .95
2.667 3.333 4.000 4.333 4.667
Value 1.000000 1.333333 1.666667 2.000000 2.333333 2.666667 3.000000
Frequency 41 34 46 81 83 85 211
Proportion 0.029 0.024 0.033 0.058 0.059 0.061 0.151
Value 3.333333 3.666667 4.000000 4.333333 4.666667 5.000000
Frequency 172 149 338 70 43 43
Proportion 0.123 0.107 0.242 0.050 0.031 0.031
---------------------------------------------------------------------------
COMMIT
n missing distinct Info Mean Gmd .05 .10
1406 79 13 0.979 3.54 0.9079 2.000 2.667
.25 .50 .75 .90 .95
3.000 3.667 4.000 4.667 5.000
Value 1.000000 1.333333 1.666667 2.000000 2.333333 2.666667 3.000000
Frequency 20 12 24 32 52 74 211
Proportion 0.014 0.009 0.017 0.023 0.037 0.053 0.150
Value 3.333333 3.666667 4.000000 4.333333 4.666667 5.000000
Frequency 202 221 296 100 75 87
Proportion 0.144 0.157 0.211 0.071 0.053 0.062
---------------------------------------------------------------------------
READY
n missing distinct Info Mean Gmd .05 .10
1389 96 17 0.989 3.11 0.924 1.50 2.00
.25 .50 .75 .90 .95
2.50 3.25 3.75 4.00 4.25
Value 1.00 1.25 1.50 1.75 2.00 2.25 2.50 2.75 3.00 3.25
Frequency 24 29 35 40 57 64 100 106 210 170
Proportion 0.017 0.021 0.025 0.029 0.041 0.046 0.072 0.076 0.151 0.122
Value 3.50 3.75 4.00 4.25 4.50 4.75 5.00
Frequency 145 131 190 41 19 14 14
Proportion 0.104 0.094 0.137 0.030 0.014 0.010 0.010
---------------------------------------------------------------------------
GENDER2
n missing distinct
1383 102 2
Value male female
Frequency 1338 45
Proportion 0.967 0.033
---------------------------------------------------------------------------
GENDER3
n missing distinct Info Sum Mean Gmd
1383 102 2 0.094 45 0.03254 0.063
---------------------------------------------------------------------------
LEAVE
n missing distinct Info Mean Gmd .05 .10
1406 79 13 0.979 2.46 0.9079 1.000 1.333
.25 .50 .75 .90 .95
2.000 2.333 3.000 3.333 4.000
Value 1.000000 1.333333 1.666667 2.000000 2.333333 2.666667 3.000000
Frequency 87 75 100 296 221 202 211
Proportion 0.062 0.053 0.071 0.211 0.157 0.144 0.150
Value 3.333333 3.666667 4.000000 4.333333 4.666667 5.000000
Frequency 74 52 32 24 12 20
Proportion 0.053 0.037 0.023 0.017 0.009 0.014
---------------------------------------------------------------------------
detach("package:Hmisc")
install.packages("psych")
library(psych)
psych::describe(dd4,na.rm=T)
psych::describe(dd4,na.rm=F)
psych::describe(na.omit(dd4))
#Frequency Counts
table(dd4$COMPANY)
A B C D F HHC REC SVC
402 348 126 183 15 354 18 39
#Proportions
prop.table(table(dd4$COMPANY))
A B C D F HHC
0.27070707 0.23434343 0.08484848 0.12323232 0.01010101 0.23838384
REC SVC
0.01212121 0.02626263
#Rounding proportions to 3 decimals
round(prop.table(table(dd4$COMPANY)),3)
A B C D F HHC REC SVC
0.271 0.234 0.085 0.123 0.010 0.238 0.012 0.026
#Percentages
100*(prop.table(table(dd4$COMPANY)))
A B C D F HHC REC
27.070707 23.434343 8.484848 12.323232 1.010101 23.838384 1.212121
SVC
2.626263
#Cross Tabs & Simple Tables
#install.packages("gmodels")
library(gmodels)
CrossTable(dd4$GENDER,dd4$COMPANY,chisq=TRUE,format="SPSS")
Warning in chisq.test(t, correct = FALSE, ...): Chi-squared approximation
may be incorrect
Cell Contents
|-------------------------|
| Count |
| Chi-square contribution |
| Row Percent |
| Column Percent |
| Total Percent |
|-------------------------|
Total Observations in Table: 1383
| dd4$COMPANY
dd4$GENDER | A | B | C | D | F | HHC | REC | SVC | Row Total |
-------------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|
1 | 357 | 321 | 111 | 165 | 9 | 321 | 18 | 36 | 1338 |
| 0.023 | 0.181 | 0.042 | 0.037 | 0.010 | 0.148 | 0.020 | 0.039 | |
| 26.682% | 23.991% | 8.296% | 12.332% | 0.673% | 23.991% | 1.345% | 2.691% | 96.746% |
| 95.968% | 99.074% | 94.872% | 98.214% | 100.000% | 94.690% | 100.000% | 100.000% | |
| 25.813% | 23.210% | 8.026% | 11.931% | 0.651% | 23.210% | 1.302% | 2.603% | |
-------------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|
2 | 15 | 3 | 6 | 3 | 0 | 18 | 0 | 0 | 45 |
| 0.693 | 5.396 | 1.263 | 1.113 | 0.293 | 4.404 | 0.586 | 1.171 | |
| 33.333% | 6.667% | 13.333% | 6.667% | 0.000% | 40.000% | 0.000% | 0.000% | 3.254% |
| 4.032% | 0.926% | 5.128% | 1.786% | 0.000% | 5.310% | 0.000% | 0.000% | |
| 1.085% | 0.217% | 0.434% | 0.217% | 0.000% | 1.302% | 0.000% | 0.000% | |
-------------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|
Column Total | 372 | 324 | 117 | 168 | 9 | 339 | 18 | 36 | 1383 |
| 26.898% | 23.427% | 8.460% | 12.148% | 0.651% | 24.512% | 1.302% | 2.603% | |
-------------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|
Statistics for All Table Factors
Pearson's Chi-squared test
------------------------------------------------------------
Chi^2 = 15.42045 d.f. = 7 p = 0.03097201
Minimum expected frequency: 0.2928416
Cells with Expected Frequency < 5: 4 of 16 (25%)
table(dd4$GENDER,dd4$COMPANY)
A B C D F HHC REC SVC
1 357 321 111 165 9 321 18 36
2 15 3 6 3 0 18 0 0
prop.table(table(dd4$GENDER,dd4$COMPANY))
A B C D F
1 0.258134490 0.232104121 0.080260304 0.119305857 0.006507592
2 0.010845987 0.002169197 0.004338395 0.002169197 0.000000000
HHC REC SVC
1 0.232104121 0.013015184 0.026030369
2 0.013015184 0.000000000 0.000000000
#Histograms
hist(dd4$JSAT)
hist(dd4$JSAT, main="Job Satisfaction Histogram",xlab="Job Satisfaction" )
cor(dd4[,20:22],use="complete.obs")
JSAT COMMIT READY
JSAT 1.0000000 0.5373179 0.5093204
COMMIT 0.5373179 1.0000000 0.4610560
READY 0.5093204 0.4610560 1.0000000
install.packages("Hmisc")
library(Hmisc)
Hmisc::rcorr(as.matrix(dd4[,c(20:22)]))
JSAT COMMIT READY
JSAT 1.00 0.54 0.51
COMMIT 0.54 1.00 0.46
READY 0.51 0.46 1.00
n
JSAT COMMIT READY
JSAT 1396 1385 1369
COMMIT 1385 1406 1375
READY 1369 1375 1389
P
JSAT COMMIT READY
JSAT 0 0
COMMIT 0 0
READY 0 0