set_csfmt_rts_data_v2
converts a data.table
to csfmt_rts_data_v2
by reference.
csfmt_rts_data_v2
creates a new csfmt_rts_data_v2
(not by reference) from either a data.table
or data.frame
.
Usage
set_csfmt_rts_data_v2(x, create_unified_columns = TRUE, heal = TRUE)
csfmt_rts_data_v2(x, create_unified_columns = TRUE, heal = TRUE)
Arguments
- x
The data.table to be converted to csfmt_rts_data_v2
- create_unified_columns
Do you want it to create unified columns?
- heal
Do you want to impute missing values on creation?
Value
An extended data.table
, which has been modified by reference and returned (invisibly).
No return value, called for side effect of replacing the current data.table with a csfmt_rts_data_v2 in place.
Returns a duplicated csfmt_rts_data_v2.
Details
For more details see the vignette:
vignette("csfmt_rts_data_v2", package = "cstidy")
Smart assignment
csfmt_rts_data_v2
contains the smart assignment feature for time and geography.
When the variables in bold are assigned using :=
, the listed variables will be automatically imputed.
location_code:
granularity_geo
country_iso3
isoyear:
granularity_time
isoweek
isoyearweek
isoquarter
isoyearquarter
season
seasonweek
calyear
calmonth
calyearmonth
date
isoyearweek:
granularity_time
isoyear
isoweek
isoquarter
isoyearquarter
season
seasonweek
calyear
calmonth
calyearmonth
date
date:
granularity_time
isoyear
isoweek
isoyearweek
isoquarter
isoyearquarter
season
seasonweek
calyear
calmonth
calyearmonth
Unified columns
csfmt_rts_data_v2
contains 16 unified columns:
granularity_time
granularity_geo
country_iso3
location_code
border
age
sex
isoyear
isoweek
isoyearweek
isoquarter
isoyearquarter
season
seasonweek
calyear
calmonth
calyearmonth
date
See also
Other csfmt_rts_data:
expand_time_to()
,
identify_data_structure()
,
remove_class_csfmt_rts_data()
,
set_csfmt_rts_data_v1()
,
unique_time_series()
Examples
# Create some fake data as data.table
d <- cstidy::generate_test_data(fmt = "csfmt_rts_data_v2")
d <- d[1:5]
# convert to csfmt_rts_data_v2 by reference
cstidy::set_csfmt_rts_data_v2(d, create_unified_columns = TRUE)
#
d[1, isoyearweek := "2021-01"]
#> granularity_time granularity_geo country_iso3 location_code border age sex
#> 1: isoyearweek county nor county_nor42 NA <NA> <NA>
#> 2: isoyearweek county nor county_nor32 NA <NA> <NA>
#> 3: isoyearweek county nor county_nor33 NA <NA> <NA>
#> 4: isoyearweek county nor county_nor56 NA <NA> <NA>
#> 5: isoyearweek county nor county_nor34 NA <NA> <NA>
#> isoyear isoweek isoyearweek isoquarter isoyearquarter season seasonweek
#> 1: 2021 1 2021-01 1 2021-Q1 2020/2021 24
#> 2: 2022 3 2022-03 1 2022-Q1 2021/2022 26
#> 3: 2022 3 2022-03 1 2022-Q1 2021/2022 26
#> 4: 2022 3 2022-03 1 2022-Q1 2021/2022 26
#> 5: 2022 3 2022-03 1 2022-Q1 2021/2022 26
#> calyear calmonth calyearmonth date deaths_n
#> 1: NA NA <NA> 2021-01-10 0
#> 2: NA NA <NA> 2022-01-23 6
#> 3: NA NA <NA> 2022-01-23 3
#> 4: NA NA <NA> 2022-01-23 4
#> 5: NA NA <NA> 2022-01-23 5
d
#> granularity_time granularity_geo country_iso3 location_code border age sex
#> 1: isoyearweek county nor county_nor42 NA <NA> <NA>
#> 2: isoyearweek county nor county_nor32 NA <NA> <NA>
#> 3: isoyearweek county nor county_nor33 NA <NA> <NA>
#> 4: isoyearweek county nor county_nor56 NA <NA> <NA>
#> 5: isoyearweek county nor county_nor34 NA <NA> <NA>
#> isoyear isoweek isoyearweek isoquarter isoyearquarter season seasonweek
#> 1: 2021 1 2021-01 1 2021-Q1 2020/2021 24
#> 2: 2022 3 2022-03 1 2022-Q1 2021/2022 26
#> 3: 2022 3 2022-03 1 2022-Q1 2021/2022 26
#> 4: 2022 3 2022-03 1 2022-Q1 2021/2022 26
#> 5: 2022 3 2022-03 1 2022-Q1 2021/2022 26
#> calyear calmonth calyearmonth date deaths_n
#> 1: NA NA <NA> 2021-01-10 0
#> 2: NA NA <NA> 2022-01-23 6
#> 3: NA NA <NA> 2022-01-23 3
#> 4: NA NA <NA> 2022-01-23 4
#> 5: NA NA <NA> 2022-01-23 5
d[2, isoyear := 2019]
#> granularity_time granularity_geo country_iso3 location_code border age sex
#> 1: isoyearweek county nor county_nor42 NA <NA> <NA>
#> 2: isoyear county nor county_nor32 NA <NA> <NA>
#> 3: isoyearweek county nor county_nor33 NA <NA> <NA>
#> 4: isoyearweek county nor county_nor56 NA <NA> <NA>
#> 5: isoyearweek county nor county_nor34 NA <NA> <NA>
#> isoyear isoweek isoyearweek isoquarter isoyearquarter season seasonweek
#> 1: 2021 1 2021-01 1 2021-Q1 2020/2021 24
#> 2: 2019 52 2019-52 1 2022-Q1 <NA> NA
#> 3: 2022 3 2022-03 1 2022-Q1 2021/2022 26
#> 4: 2022 3 2022-03 1 2022-Q1 2021/2022 26
#> 5: 2022 3 2022-03 1 2022-Q1 2021/2022 26
#> calyear calmonth calyearmonth date deaths_n
#> 1: NA NA <NA> 2021-01-10 0
#> 2: NA NA <NA> 2019-12-29 6
#> 3: NA NA <NA> 2022-01-23 3
#> 4: NA NA <NA> 2022-01-23 4
#> 5: NA NA <NA> 2022-01-23 5
d
#> granularity_time granularity_geo country_iso3 location_code border age sex
#> 1: isoyearweek county nor county_nor42 NA <NA> <NA>
#> 2: isoyear county nor county_nor32 NA <NA> <NA>
#> 3: isoyearweek county nor county_nor33 NA <NA> <NA>
#> 4: isoyearweek county nor county_nor56 NA <NA> <NA>
#> 5: isoyearweek county nor county_nor34 NA <NA> <NA>
#> isoyear isoweek isoyearweek isoquarter isoyearquarter season seasonweek
#> 1: 2021 1 2021-01 1 2021-Q1 2020/2021 24
#> 2: 2019 52 2019-52 1 2022-Q1 <NA> NA
#> 3: 2022 3 2022-03 1 2022-Q1 2021/2022 26
#> 4: 2022 3 2022-03 1 2022-Q1 2021/2022 26
#> 5: 2022 3 2022-03 1 2022-Q1 2021/2022 26
#> calyear calmonth calyearmonth date deaths_n
#> 1: NA NA <NA> 2021-01-10 0
#> 2: NA NA <NA> 2019-12-29 6
#> 3: NA NA <NA> 2022-01-23 3
#> 4: NA NA <NA> 2022-01-23 4
#> 5: NA NA <NA> 2022-01-23 5
d[3, date := as.Date("2020-01-01")]
#> granularity_time granularity_geo country_iso3 location_code border age sex
#> 1: isoyearweek county nor county_nor42 NA <NA> <NA>
#> 2: isoyear county nor county_nor32 NA <NA> <NA>
#> 3: date county nor county_nor33 NA <NA> <NA>
#> 4: isoyearweek county nor county_nor56 NA <NA> <NA>
#> 5: isoyearweek county nor county_nor34 NA <NA> <NA>
#> isoyear isoweek isoyearweek isoquarter isoyearquarter season seasonweek
#> 1: 2021 1 2021-01 1 2021-Q1 2020/2021 24
#> 2: 2019 52 2019-52 1 2022-Q1 <NA> NA
#> 3: 2020 1 2020-01 1 2020-Q1 2019/2020 24
#> 4: 2022 3 2022-03 1 2022-Q1 2021/2022 26
#> 5: 2022 3 2022-03 1 2022-Q1 2021/2022 26
#> calyear calmonth calyearmonth date deaths_n
#> 1: NA NA <NA> 2021-01-10 0
#> 2: NA NA <NA> 2019-12-29 6
#> 3: 2020 1 2020-M01 2020-01-01 3
#> 4: NA NA <NA> 2022-01-23 4
#> 5: NA NA <NA> 2022-01-23 5
d
#> granularity_time granularity_geo country_iso3 location_code border age sex
#> 1: isoyearweek county nor county_nor42 NA <NA> <NA>
#> 2: isoyear county nor county_nor32 NA <NA> <NA>
#> 3: date county nor county_nor33 NA <NA> <NA>
#> 4: isoyearweek county nor county_nor56 NA <NA> <NA>
#> 5: isoyearweek county nor county_nor34 NA <NA> <NA>
#> isoyear isoweek isoyearweek isoquarter isoyearquarter season seasonweek
#> 1: 2021 1 2021-01 1 2021-Q1 2020/2021 24
#> 2: 2019 52 2019-52 1 2022-Q1 <NA> NA
#> 3: 2020 1 2020-01 1 2020-Q1 2019/2020 24
#> 4: 2022 3 2022-03 1 2022-Q1 2021/2022 26
#> 5: 2022 3 2022-03 1 2022-Q1 2021/2022 26
#> calyear calmonth calyearmonth date deaths_n
#> 1: NA NA <NA> 2021-01-10 0
#> 2: NA NA <NA> 2019-12-29 6
#> 3: 2020 1 2020-M01 2020-01-01 3
#> 4: NA NA <NA> 2022-01-23 4
#> 5: NA NA <NA> 2022-01-23 5
d[4, c("isoyear", "isoyearweek") := .(2021, "2021-01")]
#> Warning: Multiple time variables specified. Smart-assignment disabled.
#> granularity_time granularity_geo country_iso3 location_code border age sex
#> 1: isoyearweek county nor county_nor42 NA <NA> <NA>
#> 2: isoyear county nor county_nor32 NA <NA> <NA>
#> 3: date county nor county_nor33 NA <NA> <NA>
#> 4: isoyearweek county nor county_nor56 NA <NA> <NA>
#> 5: isoyearweek county nor county_nor34 NA <NA> <NA>
#> isoyear isoweek isoyearweek isoquarter isoyearquarter season seasonweek
#> 1: 2021 1 2021-01 1 2021-Q1 2020/2021 24
#> 2: 2019 52 2019-52 1 2022-Q1 <NA> NA
#> 3: 2020 1 2020-01 1 2020-Q1 2019/2020 24
#> 4: 2021 3 2021-01 1 2022-Q1 2021/2022 26
#> 5: 2022 3 2022-03 1 2022-Q1 2021/2022 26
#> calyear calmonth calyearmonth date deaths_n
#> 1: NA NA <NA> 2021-01-10 0
#> 2: NA NA <NA> 2019-12-29 6
#> 3: 2020 1 2020-M01 2020-01-01 3
#> 4: NA NA <NA> 2022-01-23 4
#> 5: NA NA <NA> 2022-01-23 5
d
#> granularity_time granularity_geo country_iso3 location_code border age sex
#> 1: isoyearweek county nor county_nor42 NA <NA> <NA>
#> 2: isoyear county nor county_nor32 NA <NA> <NA>
#> 3: date county nor county_nor33 NA <NA> <NA>
#> 4: isoyearweek county nor county_nor56 NA <NA> <NA>
#> 5: isoyearweek county nor county_nor34 NA <NA> <NA>
#> isoyear isoweek isoyearweek isoquarter isoyearquarter season seasonweek
#> 1: 2021 1 2021-01 1 2021-Q1 2020/2021 24
#> 2: 2019 52 2019-52 1 2022-Q1 <NA> NA
#> 3: 2020 1 2020-01 1 2020-Q1 2019/2020 24
#> 4: 2021 3 2021-01 1 2022-Q1 2021/2022 26
#> 5: 2022 3 2022-03 1 2022-Q1 2021/2022 26
#> calyear calmonth calyearmonth date deaths_n
#> 1: NA NA <NA> 2021-01-10 0
#> 2: NA NA <NA> 2019-12-29 6
#> 3: 2020 1 2020-M01 2020-01-01 3
#> 4: NA NA <NA> 2022-01-23 4
#> 5: NA NA <NA> 2022-01-23 5
d[5, c("location_code") := .("norge")]
#> granularity_time granularity_geo country_iso3 location_code border age sex
#> 1: isoyearweek county nor county_nor42 NA <NA> <NA>
#> 2: isoyear county nor county_nor32 NA <NA> <NA>
#> 3: date county nor county_nor33 NA <NA> <NA>
#> 4: isoyearweek county nor county_nor56 NA <NA> <NA>
#> 5: isoyearweek nation nor norge NA <NA> <NA>
#> isoyear isoweek isoyearweek isoquarter isoyearquarter season seasonweek
#> 1: 2021 1 2021-01 1 2021-Q1 2020/2021 24
#> 2: 2019 52 2019-52 1 2022-Q1 <NA> NA
#> 3: 2020 1 2020-01 1 2020-Q1 2019/2020 24
#> 4: 2021 3 2021-01 1 2022-Q1 2021/2022 26
#> 5: 2022 3 2022-03 1 2022-Q1 2021/2022 26
#> calyear calmonth calyearmonth date deaths_n
#> 1: NA NA <NA> 2021-01-10 0
#> 2: NA NA <NA> 2019-12-29 6
#> 3: 2020 1 2020-M01 2020-01-01 3
#> 4: NA NA <NA> 2022-01-23 4
#> 5: NA NA <NA> 2022-01-23 5
d
#> granularity_time granularity_geo country_iso3 location_code border age sex
#> 1: isoyearweek county nor county_nor42 NA <NA> <NA>
#> 2: isoyear county nor county_nor32 NA <NA> <NA>
#> 3: date county nor county_nor33 NA <NA> <NA>
#> 4: isoyearweek county nor county_nor56 NA <NA> <NA>
#> 5: isoyearweek nation nor norge NA <NA> <NA>
#> isoyear isoweek isoyearweek isoquarter isoyearquarter season seasonweek
#> 1: 2021 1 2021-01 1 2021-Q1 2020/2021 24
#> 2: 2019 52 2019-52 1 2022-Q1 <NA> NA
#> 3: 2020 1 2020-01 1 2020-Q1 2019/2020 24
#> 4: 2021 3 2021-01 1 2022-Q1 2021/2022 26
#> 5: 2022 3 2022-03 1 2022-Q1 2021/2022 26
#> calyear calmonth calyearmonth date deaths_n
#> 1: NA NA <NA> 2021-01-10 0
#> 2: NA NA <NA> 2019-12-29 6
#> 3: 2020 1 2020-M01 2020-01-01 3
#> 4: NA NA <NA> 2022-01-23 4
#> 5: NA NA <NA> 2022-01-23 5
# Investigating the data structure of one column inside a dataset
cstidy::generate_test_data() %>%
cstidy::set_csfmt_rts_data_v2() %>%
cstidy::identify_data_structure("deaths_n") %>%
plot()
# Investigating the data structure via summary
cstidy::generate_test_data() %>%
cstidy::set_csfmt_rts_data_v2() %>%
summary()
#>
#> granularity_time
#> ✅ No errors
#>
#> granularity_geo
#> ✅ No errors
#>
#> country_iso3
#> ✅ No errors
#>
#> location_code
#> ✅ No errors
#>
#> border
#> ❌ Errors:
#> - NA exists (not allowed)
#>
#> age
#> ✅ No errors
#>
#> sex
#> ✅ No errors
#>
#> isoyear
#> ✅ No errors
#>
#> isoweek
#> ✅ No errors
#>
#> isoyearweek
#> ✅ No errors
#>
#> isoquarter
#> ✅ No errors
#>
#> isoyearquarter
#> ✅ No errors
#>
#> season
#> ✅ No errors
#>
#> seasonweek
#> ✅ No errors
#>
#> calyear
#> ✅ No errors
#>
#> calmonth
#> ✅ No errors
#>
#> calyearmonth
#> ✅ No errors
#>
#> date
#> ✅ No errors
#> granularity_time (character):
#> - isoyearweek (n = 45)
#> granularity_geo (character):
#> - county (n = 45)
#> country_iso3 (character):
#> - nor (n = 45)
#> location_code (character)
#> border (integer):
#> - <NA> (n = 45)
#> age (character):
#> - 000_005 (n = 15)
#> - <NA> (n = 15)
#> - total (n = 15)
#> sex (character):
#> - <NA> (n = 15)
#> - total (n = 30)
#> isoyear (integer):
#> - 2022 (n = 45)
#> isoweek (integer)
#> isoyearweek (character)
#> isoquarter (integer)
#> isoyearquarter (character)
#> season (character):
#> - 2021/2022 (n = 45)
#> seasonweek (numeric)
#> calyear (integer)
#> calmonth (integer)
#> calyearmonth (character)
#> date (Date)
#> deaths_n (integer)
#>