Stats courses taken by PSY grads

2023-10-02

Background

In Fall 2023, Sherri Gilliland and Irene Grassi conducted a survey of Psychology Department graduate students via Qualtrics.

This document describes those data.

Note

The data/ directory is not synched to GitHub.

Setup

library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.2     ✔ tibble    3.3.0
✔ lubridate 1.9.4     ✔ tidyr     1.3.1
✔ purrr     1.1.0     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

Gathering

The raw data were reported in a MS Excel (.xlsx format) spreadsheet.

psych_stats <- readxl::read_xlsx("data/xlsx/STATISTICS course question.xlsx")

The questions asked were as follows:

names(psych_stats)
 [1] "ID"                                        
 [2] "Start time"                                
 [3] "Completion time"                           
 [4] "Email"                                     
 [5] "Name"                                      
 [6] "Last modified time"                        
 [7] "What year did you start in Psychology?"    
 [8] "How many Statistics courses have you taken"
 [9] "Statistic course #1"                       
[10] "Statistic course #2 (if any)"              
[11] "Statistic course #3 (if any)"              
[12] "Statistic course #4 (if any)"              

Cleaning

We’ll drop the personal identifiers and the last modified time.

psych_stats <- psych_stats |>
  dplyr::select(-c("Email", "Name", "Last modified time"))

Then, we clean up some of the variable names.

psych_stats <- psych_stats |>
  dplyr::rename(
    "start_time" = "Start time",
    "complete_time" = "Completion time",
    "start_year" = "What year did you start in Psychology?",
    "n_stats_courses" = "How many Statistics courses have you taken",
    "course_1" = "Statistic course #1",
    "course_2" = "Statistic course #2 (if any)",
    "course_3" = "Statistic course #3 (if any)",
    "course_4" = "Statistic course #4 (if any)"
  )

Next, we clean-up individual variables.

psych_stats <- psych_stats |>
  dplyr::mutate(n_stats_courses = stringr::str_replace(n_stats_courses, "Two", "2")) |>
  dplyr::mutate(n_stats_courses = stringr::str_replace(n_stats_courses, "1 in progress", "1")) |>
  dplyr::mutate(
    n_stats_courses = stringr::str_replace(
      n_stats_courses,
      "7 including Psych 507/508 and 2x 1-credit HDFS 597 this semester",
      "7"
    )
  ) |>
  dplyr::mutate(n_stats_courses = stringr::str_replace(n_stats_courses, "1 \\(At PSU\\)", "1")) |>
  dplyr::mutate(
    n_stats_courses = stringr::str_replace(n_stats_courses, "1 \\(only counting Penn State courses\\)", "1")
  ) |>
  dplyr::mutate(n_stats_courses = stringr::str_replace(n_stats_courses, "507 this semester", "1")) |>
  dplyr::mutate(n_stats_courses = as.numeric(n_stats_courses))
psych_stats <- psych_stats |>
  dplyr::mutate(
    PSY_507 = stringr::str_detect(course_1, "507|Data Analysis [1|I]") |
      stringr::str_detect(course_2, "508|PSY 501")
  ) |>
  dplyr::mutate(
    PSY_508 = stringr::str_detect(course_1, "508") |
      stringr::str_detect(course_2, "508|Data Analysis 2")
  ) |>
  dplyr::mutate(
    PSY_531 = stringr::str_detect(course_2, "531") |
      stringr::str_detect(course_3, "531") |
      stringr::str_detect(course_4, "531")
  ) |>
  dplyr::mutate(
    PSY_509 = stringr::str_detect(course_2, "509") |
      stringr::str_detect(course_3, "509") |
      stringr::str_detect(course_4, "509")
  ) |>
  dplyr::mutate(
    PSY_597 = stringr::str_detect(course_2, "PSY 597") |
      stringr::str_detect(course_4, "Psychometrics \\(LeBreton")
  ) |>
  dplyr::mutate(
    hdfs = stringr::str_detect(course_1, "[hH][dD][fF][sS]") |
      stringr::str_detect(course_2, "[hH][dD][fF][sS]") |
      stringr::str_detect(course_3, "[hH][dD][fF][sS]") |
      stringr::str_detect(course_4, "[hH][dD][fF][sS]")
  ) |>
  dplyr::mutate(
    ed_psy = stringr::str_detect(course_1, "ED[ ]?PSY") |
      stringr::str_detect(course_2, "ED[ ]?PSY") |
      stringr::str_detect(course_3, "ED[ ]?PSY") |
      stringr::str_detect(course_4, "ED[ ]?PSY")
  ) |>
  dplyr::mutate(
    ist = stringr::str_detect(course_1, "IST") |
      stringr::str_detect(course_2, "IST") |
      stringr::str_detect(course_3, "IST") |
      stringr::str_detect(course_4, "IST")
  ) |>
  dplyr::mutate(
    stat = stringr::str_detect(course_1, "stat|STAT") |
      stringr::str_detect(course_2, "stat|STAT") |
      stringr::str_detect(course_3, "stat|STAT") |
      stringr::str_detect(course_4, "stat|STAT")
  ) |>
  dplyr::mutate(
    soda = stringr::str_detect(course_1, "S[oO]DA") |
      stringr::str_detect(course_2, "S[oO]DA") |
      stringr::str_detect(course_3, "S[oO]DA") |
      stringr::str_detect(course_4, "S[oO]DA")
  )

Visualizations

Responses by cohort year.

psych_stats |>
  ggplot() +
  aes(x = start_year) +
  geom_bar() +
  theme_classic()
Figure 5.1: Survey respondents by starting year in the program
psych_stats |>
  ggplot() +
  aes(x = n_stats_courses) +
  geom_bar() +
  facet_grid(rows = vars(start_year)) +
  theme_classic()
Figure 5.2: Number of stats courses taken by respondent cohort year

PSY 507 and PSY 508

Table 5.1
xtabs(formula = ~ PSY_507 + PSY_508, psych_stats)
       PSY_508
PSY_507 FALSE TRUE
  FALSE     9    0
  TRUE     18   67

Other PSY

psych_stats <- psych_stats |>
  dplyr::mutate(PSY_beyond_508 = PSY_597 | PSY_509 | PSY_531 | PSY_597)

psych_stats |>
  dplyr::filter(start_year != 2023) |>
  dplyr::filter(!is.na(PSY_beyond_508)) |>
  summarise(n = sum(as.numeric(PSY_beyond_508)))
# A tibble: 1 × 1
      n
  <dbl>
1    21
psych_stats_not_2023 <- psych_stats |>
  dplyr::filter(start_year != 2023)

dim(psych_stats_not_2023)
[1] 78 20
xtabs(formula = ~ PSY_beyond_508 + start_year, psych_stats)
              start_year
PSY_beyond_508 2018 2019 2020 2021 2022 2023
         FALSE   10   12   17    7   10   16
         TRUE     2    4    2    8    5    0

HDFS or ED PSY or IST or SoDA

psych_stats <- psych_stats |>
  dplyr::mutate(outside_psy = ed_psy | ist | soda | stat | hdfs)

psych_stats |>
  dplyr::filter(!is.na(outside_psy)) |>
  dplyr::filter(start_year != 2023) |>
  summarise(n = sum(as.numeric(outside_psy)))
# A tibble: 1 × 1
      n
  <dbl>
1    34
xtabs(formula = ~ outside_psy + start_year, psych_stats)
           start_year
outside_psy 2018 2019 2020 2021 2022 2023
      FALSE    3    9   11   10   10   16
      TRUE     9    7    8    5    5    0

Beyond PSY 507 & 508

psych_stats <- psych_stats |>
  dplyr::mutate(all_beyond_508 = PSY_beyond_508 | outside_psy) 

psych_stats|>
  dplyr::filter(!is.na(all_beyond_508)) |>
  dplyr::filter(start_year != 2023) |>
  summarise(n = sum(as.numeric(all_beyond_508)))
# A tibble: 1 × 1
      n
  <dbl>
1    49
xtabs(formula = ~ all_beyond_508 + start_year, psych_stats)
              start_year
all_beyond_508 2018 2019 2020 2021 2022 2023
         FALSE    2    6   10    3    7   16
         TRUE    10   10    9   12    8    0
xtabs(~ start_year, psych_stats)
start_year
2018 2019 2020 2021 2022 2023 
  12   16   19   15   16   16 

So, in most cohorts, 50% or more more of the respondents are taking courses beyond the requirements.

Courses

This gives an illustration of what cleaning the course data looks like. This is why the “quick and dirty” approach is sorta dirty.

with(psych_stats, unique(course_1))
 [1] "507"                                                                                                 
 [2] "PSY 507"                                                                                             
 [3] "Psych 507"                                                                                           
 [4] "ADVANCED PSYCHOLOGICAL STATISTICS"                                                                   
 [5] "PSY507 - Data Analysis I"                                                                            
 [6] "PSY507 (1st year stats fall)"                                                                        
 [7] "Principles of Statistical Analysis (2016 Fall, BA)/ Elementary Business Statistics (2017 Spring, BA)"
 [8] "507: Data Analysis I"                                                                                
 [9] "PSY 507 Intro to Data Analysis I"                                                                    
[10] "PSY507 data analysis I"                                                                              
[11] "PSY507&508"                                                                                          
[12] "PSY507"                                                                                              
[13] "PSY 507: Analysis of Psychological Data I"                                                           
[14] "Lecture Data Analysis"                                                                               
[15] "PSYC 507: Analysis of Psychological Data 1"                                                          
[16] "HDFS 530 - Longitudinal Structural Equation Modeling"                                                
[17] "Psy 507"                                                                                             
[18] "PSY 507/508"                                                                                         
[19] "Data Analysis 1 (in psych)"                                                                          
[20] "SEM"                                                                                                 
[21] "psy507"                                                                                              
[22] "PSY507/508: Data Analysis"                                                                           
[23] "PSY 507 - Analysis of Psychological Data I"                                                          
[24] "Psychological Statistics (undergraduate)"                                                            
[25] "PYCH 507"                                                                                            
[26] "Data Analysis I & 2"                                                                                 
[27] "PSY 507: Analysis of Psych Data I"                                                                   
[28] "psych 507"                                                                                           
[29] "Intro to Stats with J. Cook"                                                                         
[30] "PSYCH 507"                                                                                           
[31] "EDPSY 558"                                                                                           
[32] "PSY 507 Data analysis I"                                                                             
[33] "PSY 507 - 3 credits"                                                                                 
[34] "PSY 507:Data Analysis I"                                                                             
[35] "Statistics for Psychology 1"                                                                         
[36] "PSY 501 Basic Statistics"                                                                            
[37] "PSYC 507"                                                                                            
[38] "Psychology 507: Analysis of Psychological Data I"                                                    
[39] "PSY 507 Data Analysis 1"                                                                             
with(psych_stats, unique(course_2))
 [1] "508"                                                  
 [2] "PSY 508"                                              
 [3] "Psych 508"                                            
 [4] "X"                                                    
 [5] "PSY508 - Data Analysis II"                            
 [6] "PSY508 (1st year stats spring)"                       
 [7] "Business Statistics and Application (2019 Spring, BA)"
 [8] "508: Data Analysis II"                                
 [9] "PSY 508 Intro to Data Analysis II"                    
[10] "PSY508 Data analysis II"                              
[11] "PSY531 multilevel modelling"                          
[12] "NA"                                                   
[13] "PSY 508: Analysis of Psychological Data II"           
[14] "HDFS 597 (Time Varying Effect Analysis)"              
[15] "N/A"                                                  
[16] "PSYC 508: Analysis of Psychological Data 2"           
[17] "BBH 597 - Multilevel Modeling"                        
[18] "Psy 508"                                              
[19] "0"                                                    
[20] "-"                                                    
[21] "HDFS, Multilevel Modeling"                            
[22] "n/a"                                                  
[23] "Data Analysis 2 (in psych)"                           
[24] "."                                                    
[25] "none"                                                 
[26] "BBH597: Multilevel Modeling"                          
[27] "PSY 508 - Analysis of Psychological Data II"          
[28] "PSYCH 508"                                            
[29] "SEM"                                                  
[30] "PSY 508: Analysis of Psych Data II"                   
[31] "psych 508"                                            
[32] "HDFS 523"                                             
[33] "PSY508"                                               
[34] "PSY 508 Data analysis II"                             
[35] "PSY 508 - 3 credits"                                  
[36] "STAT 557: Data Mining I"                              
[37] "Statistics for Psychology 2"                          
[38] "HDFS 508 Dynamical Systems Modelin"                   
[39] "PSY 507"                                              
[40] "Na"                                                   
[41] "PSYC 508"                                             
[42] "x"                                                    
[43] "PSY 508 Data Analysis II"                             
with(psych_stats, unique(course_3))
 [1] "531 Multilevel Modeling"                                                        
 [2] "NA"                                                                             
 [3] "n/a"                                                                            
 [4] "N/A"                                                                            
 [5] "Multivariate"                                                                   
 [6] "X"                                                                              
 [7] "HDFS 530 - Longitudinal SEM"                                                    
 [8] "BBH 597 - Multilevel"                                                           
 [9] "HDFS517 - Multilevel Analysis"                                                  
[10] "PSY526 (psychometrics)"                                                         
[11] "PSY 531"                                                                        
[12] "HDFS 526"                                                                       
[13] "Regression and Factor Analysis (2019 Fall, Master)"                             
[14] "531: Multilevel Research"                                                       
[15] "PSY 509 Multivariate Statistics"                                                
[16] "PSY531 Multilevel research"                                                     
[17] "HDFS597 Bayesian data analysis"                                                 
[18] "PSY 531: Multilevel Theory, Measurement, & Analysis"                            
[19] "HDFS 523"                                                                       
[20] "HDFS 597 (Survival Analysis)"                                                   
[21] "PSYC 531: Multilevel Theory, Measurement, & Analysis"                           
[22] "STAT 508 - Machine Learning"                                                    
[23] "Multilevel (LeBreton)"                                                          
[24] "None"                                                                           
[25] "-"                                                                              
[26] "HDFS 517"                                                                       
[27] "Multilevel"                                                                     
[28] "0"                                                                              
[29] "Data Mining 1 (in IST)"                                                         
[30] "."                                                                              
[31] "none"                                                                           
[32] "N/a"                                                                            
[33] "BBH 597"                                                                        
[34] "HDFS530: Longitudinal Structural Equation Modeling"                             
[35] "HDFS 517 - The General Linear Mixed Model (Applications to Multilevel Modeling)"
[36] "509"                                                                            
[37] NA                                                                               
[38] "531"                                                                            
[39] "PSYCH 531"                                                                      
[40] "Multilevel Methods"                                                             
[41] "HDFS 597"                                                                       
[42] "This fall: HDFS 597"                                                            
[43] "HDFS 517: Multilevel Modeling"                                                  
[44] "psych 509"                                                                      
[45] "PSY 597 (Psychometrics with James Lebreton)"                                    
[46] "BBH 597 Multilevel Modeling"                                                    
[47] "HDFS 597 - Special Topics (Intro to Time-Varying Effect Modeling) -1 credit"    
[48] "Multi-Level-Modeling"                                                           
[49] "HDFS Multilevel Modeling"                                                       
[50] "PSY509 - Multivariate"                                                          
[51] "Multilevel - in progress"                                                       
[52] "EDPSY 507: Multivariate"                                                        
[53] "Na"                                                                             
[54] "x"                                                                              
[55] "HDFS 517 Multivar Change"                                                       
[56] "HDFS 517 - Multilevel Modeling"                                                 
with(psych_stats, unique(course_4))
 [1] "None"                                                                                                                       
 [2] "NA"                                                                                                                         
 [3] "n/a"                                                                                                                        
 [4] "N/A"                                                                                                                        
 [5] "Multilevel (and psychometrics but there isn’t a #5 slot)"                                                                   
 [6] "X"                                                                                                                          
 [7] "BBH 597 - Multilevel Modeling"                                                                                              
 [8] "SODA 501 - Big Social Data"                                                                                                 
 [9] "HDFS523 - Data Analysis in Development"                                                                                     
[10] "HDFS530 (longitudinal SEM)"                                                                                                 
[11] "Advanced statistical analysis in psychology (2020 Spring, Master)"                                                          
[12] "597: TVEM"                                                                                                                  
[13] "PSY 531 Multilevel Modeling"                                                                                                
[14] "HDFS597 Special topics - Time varying effect modeling"                                                                      
[15] "Will be taking HDFS597 Dynamical systems"                                                                                   
[16] "HDFS 597: Special Topics - Introduction to Bayesian Data Analysis"                                                          
[17] "HDFS 597: Special topics - Introduction to Bayesian Data Analysis"                                                          
[18] "HDFS 597 - Bayesian Modeling"                                                                                               
[19] "Psychometrics (LeBreton)"                                                                                                   
[20] "-"                                                                                                                          
[21] "HDFS 526"                                                                                                                   
[22] "Multivariate"                                                                                                               
[23] "0"                                                                                                                          
[24] "Principles of causal inference (in IST)"                                                                                    
[25] "."                                                                                                                          
[26] "none"                                                                                                                       
[27] "N/a"                                                                                                                        
[28] "HDFS 530"                                                                                                                   
[29] "PSY509: Quantitative Methods (Classical & Modern Test Theory)"                                                              
[30] "HDFS 523 - Strategies for Data Analysis in Developmental Research"                                                          
[31] NA                                                                                                                           
[32] "Dynamical Systems"                                                                                                          
[33] "PSY 526"                                                                                                                    
[34] "531"                                                                                                                        
[35] "HDFS 523: Longitudinal/Multivariate Analysis"                                                                               
[36] "psych 531"                                                                                                                  
[37] "EDPSY 556"                                                                                                                  
[38] "HDFS 596"                                                                                                                   
[39] "HDFS 597 Time Varying Effect Modeling"                                                                                      
[40] "HDFS 597 - Special Topics (Advanced contingency table analysis Status) -1 credit"                                           
[41] "N/A (Additional bootcamps were taken, not necessarily part of PSU)"                                                         
[42] "PSY531"                                                                                                                     
[43] "ED PSY556: Foundations & Applications of IRT, EDPSY 597: Foundations of Meta-Analysis, CAS597: Structural Equation Modeling"
[44] "Na"                                                                                                                         
[45] "HDFS 508"                                                                                                                   
[46] "x"                                                                                                                          
[47] "PSY 527 Measures in Human Development"