These functions help to compare two metadata frames and assess if new rows should be added.

data_frame_stack_new(
  d_original,
  d_current,
  keys,
  datestamp_update = FALSE,
  datestamp_value = Sys.Date(),
  stat_columns = character(0)
)

metadata_update_file(
  path,
  d_current,
  keys,
  datestamp_update = FALSE,
  datestamp_value = Sys.Date(),
  stat_columns = character(0)
)

Arguments

d_original

A data.frame that serves as the existing metadata file that potentially needs to be updated. Required.

d_current

A data.frame that contains records potentially missing from d_original. Required.

keys

Column names that represent unique combination. character vector. Optional.

datestamp_update

A logical value indicating whether to ignore a column called datestamp. Defaults to FALSE.

datestamp_value

A Date value assigned to the datestamp column for the records in d_current not present in d_original when datestamp_update is TRUE. Defaults to today.

stat_columns

The name(s) of columns containing values to update. These values in d_current with overwrite the values in d_original.

path

Location of the metadata file to potentially updated. Required character vector.

Value

A tibble::tibble that combines d_original with the new records from d_current.

Note

Each dataset is verified to not have more then one row with the same values in the combination of keys

The stat_columns typically contain metrics like 'count' or 'mean' which may become obsolete in d_original. These values are dropped from d_original and replaced by the columns in d_current, after joining on the keys column(s).

Author

Will Beasley

Examples

library("magrittr")
ds_original <- tibble::tibble(
  x1         = c(1, 3, 4),
  x2         = letters[c(1, 3, 4)],
  x3         = c(11, 13, 14),
  x4         = c(111, 113, 114),
  x5         = c(-11, -13, -14),
 datestamp  = as.Date("2020-01-07")
)

ds_current <- tibble::tibble(
  x1   = c(1:5, 1, 5),
  x2   = c(letters[1:5], "x", "y"),
  x3   = c(11, 12, 13, 14, 15, 11, 15),
  x4   = c(211, 212, 213, 214, 215, 211, 215),
  x5   = c(311, 312, 313, 314, 315, 311, 315),
  datestamp = as.Date(NA)
)

# Basic: append the new records.
data_frame_stack_new(
  d_original       = ds_original,
  d_current        = ds_current,
  keys             = c("x1", "x2")
)
#> # A tibble: 7 × 6
#>      x1 x2       x3    x4    x5 datestamp 
#>   <dbl> <chr> <dbl> <dbl> <dbl> <date>    
#> 1     1 a        11   111   -11 2020-01-07
#> 2     3 c        13   113   -13 2020-01-07
#> 3     4 d        14   114   -14 2020-01-07
#> 4     2 b        12   212   312 NA        
#> 5     5 e        15   215   315 NA        
#> 6     1 x        11   211   311 NA        
#> 7     5 y        15   215   315 NA        

# Wrinkle 1: datestamp the new records.
data_frame_stack_new(
  d_original       = ds_original,
  d_current        = ds_current,
  keys             = c("x1", "x2"),
  datestamp_update = TRUE
)
#> # A tibble: 7 × 6
#>      x1 x2       x3    x4    x5 datestamp 
#>   <dbl> <chr> <dbl> <dbl> <dbl> <date>    
#> 1     1 a        11   111   -11 2020-01-07
#> 2     3 c        13   113   -13 2020-01-07
#> 3     4 d        14   114   -14 2020-01-07
#> 4     2 b        12   212   312 2024-02-26
#> 5     5 e        15   215   315 2024-02-26
#> 6     1 x        11   211   311 2024-02-26
#> 7     5 y        15   215   315 2024-02-26

# Wrinkle 2a: datestamp the new records; update x4.
data_frame_stack_new(
  d_original       = ds_original,
  d_current        = ds_current,
  keys             = c("x1", "x2"),
  datestamp_update = TRUE,
  stat_columns     = c("x4")
)
#> # A tibble: 7 × 6
#>      x1 x2       x3    x5 datestamp     x4
#>   <dbl> <chr> <dbl> <dbl> <date>     <dbl>
#> 1     1 a        11   -11 2020-01-07   211
#> 2     3 c        13   -13 2020-01-07   213
#> 3     4 d        14   -14 2020-01-07   214
#> 4     2 b        12   312 2024-02-26   212
#> 5     5 e        15   315 2024-02-26   215
#> 6     1 x        11   311 2024-02-26   211
#> 7     5 y        15   315 2024-02-26   215

# Wrinkle 2b: datestamp the new records; update x4 & x5.
data_frame_stack_new(
  d_original       = ds_original,
  d_current        = ds_current,
  keys             = c("x1", "x2"),
  datestamp_update = TRUE,
  stat_columns     = c("x4", "x5")
)
#> # A tibble: 7 × 6
#>      x1 x2       x3 datestamp     x4    x5
#>   <dbl> <chr> <dbl> <date>     <dbl> <dbl>
#> 1     1 a        11 2020-01-07   211   311
#> 2     3 c        13 2020-01-07   213   313
#> 3     4 d        14 2020-01-07   214   314
#> 4     2 b        12 2024-02-26   212   312
#> 5     5 e        15 2024-02-26   215   315
#> 6     1 x        11 2024-02-26   211   311
#> 7     5 y        15 2024-02-26   215   315

ds_current %>%
  dplyr::anti_join(ds_original, by = c("x1", "x2"))
#> # A tibble: 4 × 6
#>      x1 x2       x3    x4    x5 datestamp
#>   <dbl> <chr> <dbl> <dbl> <dbl> <date>   
#> 1     2 b        12   212   312 NA       
#> 2     5 e        15   215   315 NA       
#> 3     1 x        11   211   311 NA       
#> 4     5 y        15   215   315 NA       

# Update a file
if (FALSE) {
{
  path_temp <- tempfile(fileext = ".csv")
  on.exit(unlink(path_temp))
  file.copy(
    system.file("test-data/metadata-original.csv", package = "OuhscMunge"),
    path_temp
  )
}

# Displays 3 rows.
readr::read_csv(path_temp)

metadata_update_file(
  path_temp,
  dplyr::mutate(ds_current, x1 = as.character(x1), x3 = as.character(x3)),
  c("x1", "x2")
)

# Displays 7 rows.
readr::read_csv(path_temp)
}