Chapter 4 Appendix: Importing the original datasets

4.1 Software

I followed the standards and conventions from the Tidyverse, and I converted all the original datasets in Stata with this software:

R version 4.0.5 (2021-03-31)

Platform: x86_64-pc-linux-gnu (64-bit)

locale: LC_CTYPE=en_CA.UTF-8, LC_NUMERIC=C, LC_TIME=en_CA.UTF-8, LC_COLLATE=en_CA.UTF-8, LC_MONETARY=en_CA.UTF-8, LC_MESSAGES=en_CA.UTF-8, LC_PAPER=en_CA.UTF-8, LC_NAME=C, LC_ADDRESS=C, LC_TELEPHONE=C, LC_MEASUREMENT=en_CA.UTF-8 and LC_IDENTIFICATION=C

attached base packages: stats, graphics, grDevices, utils, datasets, methods and base

other attached packages: ggplot2(v.3.3.3), msm(v.1.6.8), broom(v.0.7.6), lmtest(v.0.9-38), zoo(v.1.8-9), sandwich(v.3.0-0), tidyr(v.1.1.3), dplyr(v.1.0.5), yotover(v.0.3.5), devtools(v.2.4.0), testthat(v.3.0.2) and usethis(v.2.0.1)

loaded via a namespace (and not attached): Rcpp(v.1.0.6), mvtnorm(v.1.1-1), lattice(v.0.20-41), prettyunits(v.1.1.1), ps(v.1.6.0), assertthat(v.0.2.1), rprojroot(v.2.0.2), digest(v.0.6.27), utf8(v.1.2.1), duckdb(v.0.2.5), R6(v.2.5.0), backports(v.1.2.1), evaluate(v.0.14), highr(v.0.9), httr(v.1.4.2), pillar(v.1.6.0), rlang(v.0.4.10), rstudioapi(v.0.13), callr(v.3.7.0), jquerylib(v.0.1.3), Matrix(v.1.3-2), rmarkdown(v.2.7), labeling(v.0.4.2), desc(v.1.3.0), splines(v.4.0.5), pander(v.0.6.3), stringr(v.1.4.0), munsell(v.0.5.0), compiler(v.4.0.5), xfun(v.0.22), pkgconfig(v.2.0.3), pkgbuild(v.1.2.0), htmltools(v.0.5.1.1), tidyselect(v.1.1.0), tibble(v.3.1.1), expm(v.0.999-6), bookdown(v.0.22), codetools(v.0.2-18), fansi(v.0.4.2), crayon(v.1.4.1), withr(v.2.4.2), grid(v.4.0.5), gtable(v.0.3.0), jsonlite(v.1.7.2), lifecycle(v.1.0.0), DBI(v.1.1.1), magrittr(v.2.0.1), scales(v.1.1.1), cli(v.2.4.0), stringi(v.1.5.3), cachem(v.1.0.4), farver(v.2.1.0), fs(v.1.5.0), remotes(v.2.3.0), bslib(v.0.2.4), ellipsis(v.0.3.1), generics(v.0.1.0), vctrs(v.0.3.7), tools(v.4.0.5), glue(v.1.4.2), purrr(v.0.3.4), processx(v.3.5.1), pkgload(v.1.2.1), fastmap(v.1.1.0), survival(v.3.2-10), yaml(v.2.2.1), colorspace(v.2.0-0), sessioninfo(v.1.1.1), memoise(v.2.0.0), knitr(v.1.32) and sass(v.0.3.1)

4.2 Downloading the original datasets

appfiles_url <- "https://vi.unctad.org/tpa/web/zips/vol2/Advanced%20Guide%20to%20TPA.zip"
appfiles_zip <- "00-application-files.zip"
appfiles_dir <- "00-application-files"

if (!file.exists(appfiles_zip)) {
  download.file(appfiles_url, appfiles_zip, method = "curl")
}

if (!dir.exists(appfiles_dir)) {
  unzip(appfiles_zip)
  file.rename("Advanced Guide to TPA", appfiles_dir)
}

4.3 Converting the original datasets

This code chunk can be a bit obscure. It is only shown to make all of my steps transparent.

# these packages are only used to import the data
library(haven)
library(stringr)
library(janitor)
library(purrr)

try(dir.create("data-tsv", showWarnings = F))

dta_files <- list.files("00-application-files",
                        pattern = "dta",
                        full.names = TRUE,
                        recursive = TRUE)

read_and_clean <- function(finp) {
  message(finp)
  
  fout <- finp %>% 
    str_replace(appfiles_dir, "") %>% 
    str_replace("Chapter", "ch") %>% 
    str_replace_all("Chapter[0-9]|\\.dta", "")
  
  fout <- fout %>% 
    str_replace_all("(/)", "_") %>% 
    make_clean_names()
  
  long_names <- c(
    "datasets_",
    "applications_",
    "exercises_",
    "1_trade_without_border_results_1",
    "2_rt_as_effects_results_2_"
  )
  
  fout <- fout %>% 
    str_replace_all(paste(long_names, collapse = "|"), "")
  
  fout <- str_replace(fout, "_([0-9])_|__", "_")

  fout2 <- sprintf("data-tsv/%s.tsv", fout)
  
  if (!file.exists(fout2)) {
    d <- read_dta(finp) %>% 
      clean_names()
    
    data.table::fwrite(d, fout2, sep = "\t")
  }
}

map(dta_files, read_and_clean)