Execute the checks against a dataset to return a nested list.

execute_checks(ds, checks, origin)

Arguments

ds

The data.frame to be checked. Required.

checks

The list describing the check. Is the output of

origin

The origin of the dataset. Currently supports "csv" and "REDCap". Required. load_checks(). Required.

Examples

# Step 0: define paths.
#   So this package example executes on every machine, temp files are used.

# Replace the two paths for your specific project
path_data    <- system.file("datasets/pt-event-biochemical.rds", package = "trawler")
path_checks  <- system.file("checks/checks-biochemical.yml", package = "trawler")

# Step 1: load the check definitions and the dataset to test
ds_pt_event  <- readr::read_rds(path_data)
checks       <- load_checks(path_checks, origin = "REDCap")

# Step 2: execute the checks and save to an rds file
ds_pt_event |>
  execute_checks(checks, origin = "REDCap")
#> $github_file_prefix
#> [1] "https://github.com/OuhscBbmc/validator-1/tree/main"
#> 
#> $record_id_name
#> [1] "record_id"
#> 
#> $record_id_link
#> [1] "<a href=\"https://bbmc.ouhsc.edu/redcap/redcap_v%s/DataEntry/index.php?pid=%s&arm=%s&id=%s&page=%s\" target=\"_blank\">%s</a>"
#> 
#> $baseline_date_name
#> [1] "date_enrolled"
#> 
#> $redcap_project_id
#> [1] 1612
#> 
#> $redcap_version
#> [1] "10.5.1"
#> 
#> $redcap_default_arm
#> [1] 1
#> 
#> $redcap_codebook
#> [1] "https://bbmc.ouhsc.edu/redcap/redcap_v10.5.1/Design/data_dictionary_codebook.php?pid=1612"
#> 
#> $smells
#> # A tibble: 11 × 12
#>    check_name           pass  description priority debug bound_lower bound_upper
#>    <chr>                <lgl> <chr>          <int> <lgl>       <dbl>       <dbl>
#>  1 females              TRUE  Proportion…        2 FALSE        0.4         0.6 
#>  2 males                TRUE  Proportion…        2 FALSE        0.4         0.6 
#>  3 age                  TRUE  Mean parti…        2 FALSE       20          60   
#>  4 serum_prealbumin_le… FALSE Mean serum…        2 FALSE       32          39   
#>  5 serum_creatinine_le… FALSE Mean serum…        2 FALSE        3          15   
#>  6 bmi_at_baseline      FALSE Mean BMI i…        2 FALSE       18          24   
#>  7 serum_cholesterol_l… TRUE  Mean chole…        1 FALSE      100         140   
#>  8 dialysis_adequacy    TRUE  Mean Kt/V …        1 FALSE        1.2         5   
#>  9 nutritional_counsel… TRUE  Most patie…        2 FALSE        0.85        0.99
#> 10 definitive_diagnosis TRUE  The propor…        1 FALSE        1           1   
#> 11 normalized_protein_… TRUE  Mean Norma…        1 FALSE        0           1.2 
#> # ℹ 5 more variables: bounds_template <chr>, value_template <chr>,
#> #   equation <chr>, boundaries <chr>, value <dbl>
#> 
#> $smell_status
#> [1] "11 smells have been sniffed.  3 violation(s) were found."
#> 
#> $smells_inactive
#> # A tibble: 1 × 9
#>   check_name  description priority debug bound_lower bound_upper bounds_template
#>   <chr>       <chr>          <int> <lgl>       <dbl>       <dbl> <chr>          
#> 1 average_se… ??Units co…        1 FALSE         501        1200 [%.0f, %.0f]   
#> # ℹ 2 more variables: value_template <chr>, equation <chr>
#> 
#> $rules
#> # A tibble: 14 × 8
#>    check_name     violation_count error_message priority debug redcap_instrument
#>    <chr>                    <int> <chr>            <int> <lgl> <chr>            
#>  1 baseline_prea…              10 Serum pre-al…        1 FALSE baseline_data    
#>  2 missing_serum…               3 Relevant nut…        1 FALSE baseline_data    
#>  3 serum_prealbu…              15 Baseline pre…        2 FALSE baseline_data, v…
#>  4 serum_prealbu…               0 Baseline pre…        2 FALSE baseline_data, v…
#>  5 serum_prealbu…               0 Baseline pre…        2 FALSE baseline_data, v…
#>  6 serum_prealbu…               0 serum prealb…        3 FALSE baseline_data, v…
#>  7 baseline_firs…               0 Serum prealb…        3 FALSE baseline_data, v…
#>  8 daily_first_v…               0 In-addition …        3 FALSE baseline_data, p…
#>  9 daily_protein…               0 npcr levels …        3 FALSE baseline_data, p…
#> 10 hospitalizati…               2 Patient was …        1 FALSE completion_proje…
#> 11 optimal_daily…               7 Daily protei…        2 FALSE completion_proje…
#> 12 recommended_n…              10 NPCR values …        2 FALSE completion_data  
#> 13 npcr                         1 NPCR at comp…        2 FALSE completion_data  
#> 14 npcr_comparis…               2 NPCR at comp…        3 FALSE completion_data  
#> # ℹ 2 more variables: passing_test <chr>, results <list>
#> 
#> $rule_status
#> [1] "14 rules were examined. 8 rule(s) had at least 1 violation. 50 total violation(s) were found."
#> 
#> $rules_inactive
#> # A tibble: 1 × 6
#>   check_name         error_message priority debug redcap_instrument passing_test
#>   <chr>              <chr>            <int> <lgl> <chr>             <chr>       
#> 1 pre_albumin_levels pre_albumin …        3 FALSE visit_lab_date    "function (…
#> 
#> attr(,"class")
#> [1] "trawler_checks"

# Save to disk if needed
# ds_pt_event |>
#   execute_checks(checks, origin = "REDCap") |>
#   readr::write_rds("inst/derived/biochemical.rds")