These functions help to compare two metadata frames and assess if new rows should be added.
A data.frame
that serves as the existing metadata file
that potentially needs to be updated. Required.
A data.frame
that contains records potentially missing
from d_original
. Required.
Column names that represent unique combination.
character
vector. Optional.
A logical
value indicating whether to ignore a
column called datestamp
. Defaults to FALSE
.
A Date
value assigned to the datestamp
column
for the records in d_current
not present in d_original
when
datestamp_update
is TRUE
.
Defaults to today.
The name(s) of columns containing values to update.
These values in d_current
with overwrite the values in d_original
.
Location of the metadata file to potentially updated.
Required character
vector.
A tibble::tibble
that combines d_original
with the new records
from d_current
.
Each dataset is verified to not have more then one
row with the same values in the combination of keys
The stat_columns
typically contain metrics like 'count' or 'mean'
which may become obsolete in d_original
. These values are dropped
from d_original
and replaced by the columns in d_current
, after
joining on the keys
column(s).
library("magrittr")
ds_original <- tibble::tibble(
x1 = c(1, 3, 4),
x2 = letters[c(1, 3, 4)],
x3 = c(11, 13, 14),
x4 = c(111, 113, 114),
x5 = c(-11, -13, -14),
datestamp = as.Date("2020-01-07")
)
ds_current <- tibble::tibble(
x1 = c(1:5, 1, 5),
x2 = c(letters[1:5], "x", "y"),
x3 = c(11, 12, 13, 14, 15, 11, 15),
x4 = c(211, 212, 213, 214, 215, 211, 215),
x5 = c(311, 312, 313, 314, 315, 311, 315),
datestamp = as.Date(NA)
)
# Basic: append the new records.
data_frame_stack_new(
d_original = ds_original,
d_current = ds_current,
keys = c("x1", "x2")
)
#> # A tibble: 7 × 6
#> x1 x2 x3 x4 x5 datestamp
#> <dbl> <chr> <dbl> <dbl> <dbl> <date>
#> 1 1 a 11 111 -11 2020-01-07
#> 2 3 c 13 113 -13 2020-01-07
#> 3 4 d 14 114 -14 2020-01-07
#> 4 2 b 12 212 312 NA
#> 5 5 e 15 215 315 NA
#> 6 1 x 11 211 311 NA
#> 7 5 y 15 215 315 NA
# Wrinkle 1: datestamp the new records.
data_frame_stack_new(
d_original = ds_original,
d_current = ds_current,
keys = c("x1", "x2"),
datestamp_update = TRUE
)
#> # A tibble: 7 × 6
#> x1 x2 x3 x4 x5 datestamp
#> <dbl> <chr> <dbl> <dbl> <dbl> <date>
#> 1 1 a 11 111 -11 2020-01-07
#> 2 3 c 13 113 -13 2020-01-07
#> 3 4 d 14 114 -14 2020-01-07
#> 4 2 b 12 212 312 2024-12-03
#> 5 5 e 15 215 315 2024-12-03
#> 6 1 x 11 211 311 2024-12-03
#> 7 5 y 15 215 315 2024-12-03
# Wrinkle 2a: datestamp the new records; update x4.
data_frame_stack_new(
d_original = ds_original,
d_current = ds_current,
keys = c("x1", "x2"),
datestamp_update = TRUE,
stat_columns = c("x4")
)
#> # A tibble: 7 × 6
#> x1 x2 x3 x5 datestamp x4
#> <dbl> <chr> <dbl> <dbl> <date> <dbl>
#> 1 1 a 11 -11 2020-01-07 211
#> 2 3 c 13 -13 2020-01-07 213
#> 3 4 d 14 -14 2020-01-07 214
#> 4 2 b 12 312 2024-12-03 212
#> 5 5 e 15 315 2024-12-03 215
#> 6 1 x 11 311 2024-12-03 211
#> 7 5 y 15 315 2024-12-03 215
# Wrinkle 2b: datestamp the new records; update x4 & x5.
data_frame_stack_new(
d_original = ds_original,
d_current = ds_current,
keys = c("x1", "x2"),
datestamp_update = TRUE,
stat_columns = c("x4", "x5")
)
#> # A tibble: 7 × 6
#> x1 x2 x3 datestamp x4 x5
#> <dbl> <chr> <dbl> <date> <dbl> <dbl>
#> 1 1 a 11 2020-01-07 211 311
#> 2 3 c 13 2020-01-07 213 313
#> 3 4 d 14 2020-01-07 214 314
#> 4 2 b 12 2024-12-03 212 312
#> 5 5 e 15 2024-12-03 215 315
#> 6 1 x 11 2024-12-03 211 311
#> 7 5 y 15 2024-12-03 215 315
ds_current %>%
dplyr::anti_join(ds_original, by = c("x1", "x2"))
#> # A tibble: 4 × 6
#> x1 x2 x3 x4 x5 datestamp
#> <dbl> <chr> <dbl> <dbl> <dbl> <date>
#> 1 2 b 12 212 312 NA
#> 2 5 e 15 215 315 NA
#> 3 1 x 11 211 311 NA
#> 4 5 y 15 215 315 NA
# Update a file
if (FALSE) { # \dontrun{
{
path_temp <- tempfile(fileext = ".csv")
on.exit(unlink(path_temp))
file.copy(
system.file("test-data/metadata-original.csv", package = "OuhscMunge"),
path_temp
)
}
# Displays 3 rows.
readr::read_csv(path_temp)
metadata_update_file(
path_temp,
dplyr::mutate(ds_current, x1 = as.character(x1), x3 = as.character(x3)),
c("x1", "x2")
)
# Displays 7 rows.
readr::read_csv(path_temp)
} # }