Skip to contents
library(qzwslrm)

Disclaimer

The process of generating test data used in the package qzwslrm is documented. The process of generating the data file and the pedigree is done only once and these files are then stored under the extdata directory of this package. Hence most of the following R-code chunks are not evaluated, but they are visible for documentation.

Data Simulation

The functionality implemented in qzwslrm is tested with a dataset that is generated using the simulation program QMSim. The parameter input for QMSim was taken from Macedo et al (2020) and was adapted. The main difference to the original input parameter is the reduction of the number of male and female animals per generation. To get smaller datafiles, the number of animals per generation was reduced by a factor of \(100\). With the input parameters the simulation was started as shown below

s_prm_file <- "qzwslrm_test.prm"
s_sim_param_path <- system.file("extdata", "qmsim", s_prm_file, package = "qzwslrm")
qmsim_cmd <- paste("/qualstorzws01/data_projekte/linuxBin/QMSim", s_sim_param_path)
system(command = qmsim_cmd)

Reproducibility of the data simulation is provided by moving the seed file to the QMSim data directory.

s_seed_path <- file.path(here::here(), "vignettes", "seed.prv")
if (fs::file_exists(s_seed_path))
  fs::file_move(s_seed_path, file.path(here::here(), "inst", "extdata", "qmsim"))

Phenotypic Data

QMSim produces a number of output files. For the development and the tests of this package, we mainly require the phenotypic data store in a file called p1_data_001.txt. This file is copied to the system files of this package

qmsim_out_dir <- file.path(here::here(), "vignettes", paste0("r_", fs::path_ext_remove(s_prm_file)))
s_phen_path <- file.path(qmsim_out_dir, s_qmsim_phen_file)
qmsim_ext_dir <- file.path(here::here(), "inst", "extdata", "qmsim")
fs::file_move(path = s_phen_path, new_path = qmsim_ext_dir)
# remove old qmsim output directory
fs::dir_delete(path = qmsim_out_dir)

Prediction of Breeding Values

The file with the phenotypic data is used to predict breeding values using mix99. Before we can run the analysis with mix99, the data must be prepared and re-formatted.

s_qsim_data_path <- system.file("extdata", "qmsim", s_qmsim_phen_file, package = "qzwslrm")
# count number of lines
n_nr_lines <- as.integer(system(command = paste0("wc -l ", s_qsim_data_path, " | cut -d ' ' -f1", collapse = ""), intern = TRUE))
# read qmsim data
tbl_qm_data <- readr::read_table(file = s_qsim_data_path, na = '---', guess_max = n_nr_lines)
dim(tbl_qm_data)
#> [1] 4995   14

Generate Data for MiX99

To create a phenotype input file for mix99, the data is filtered and a few columns are selected.

library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
tbl_mix99_data <- tbl_qm_data %>%
  filter(!is.na(Phen)) %>%
  mutate(Sex = recode(Sex, `M` = 1, `F` = 2)) %>%
  select(Progeny, Sex, Phen)

As checks, the number of records are reported here

dim(tbl_mix99_data)
#> [1] 2679    3

The first few lines of the selected data is shown

head(tbl_mix99_data)
#> # A tibble: 6 × 3
#>   Progeny   Sex   Phen
#>     <dbl> <dbl>  <dbl>
#> 1      46     2 -0.351
#> 2      47     2 -0.754
#> 3      48     2  0.603
#> 4      49     2  1.30 
#> 5      50     2  2.44 
#> 6      51     2  0.941

The phenotypic data is written to a file

s_mix99_data_dir <- file.path(here::here(), "inst", "extdata", "mix99")
if (!fs::dir_exists(s_mix99_data_dir)) fs::dir_create(path = s_mix99_data_dir)
s_mix99_data_path <- file.path(s_mix99_data_dir, "p1_data_001.dat")
readr::write_delim(tbl_mix99_data, file = s_mix99_data_path, delim = " ", col_names = FALSE)

Generate Pedigree for MiX99

From the QMSim data the pedigree is generated.

tbl_mix99_ped <- tbl_qm_data %>% 
  select(Progeny, Sire, Dam)

Check the generated tibble

dim(tbl_mix99_ped)
#> [1] 4995    3

The first lines

head(tbl_mix99_ped)
#> # A tibble: 6 × 3
#>   Progeny  Sire   Dam
#>     <dbl> <dbl> <dbl>
#> 1       1     0     0
#> 2       2     0     0
#> 3       3     0     0
#> 4       4     0     0
#> 5       5     0     0
#> 6       6     0     0

Write the pedigree to a file

s_mix99_ped_path <- file.path(s_mix99_data_dir, "p1_data_001.ped")
readr::write_delim(tbl_mix99_ped, file = s_mix99_ped_path, delim = " ", col_names = FALSE)

Create CLIM File

The CLIM file is the instruction file that is used by MiX99. In this file the phenotypes, the pedigree, the parameters and the model are specified.

s_mix99_data_dir <- file.path(here::here(), "inst", "extdata", "mix99")
s_mix99_clm_path <- file.path(s_mix99_data_dir, 'p1_data_001.clm')
cat('#------------------------------------------------------------------\n', file = s_mix99_clm_path)
cat('#                              CLIM\n', file = s_mix99_clm_path, append = TRUE)
cat('#           Command file for a simple animal model analysis.\n', file = s_mix99_clm_path, append = TRUE)
cat('#             MODEL:  phen = sex + animal\n', file = s_mix99_clm_path, append = TRUE)
cat('#------------------------------------------------------------------\n', file = s_mix99_clm_path, append = TRUE)

cat('DATAFILE  p1_data_001.dat        # Data file\n', file = s_mix99_clm_path, append = TRUE)
cat('MISSING -99999.0        # Number for missing real number\n', file = s_mix99_clm_path, append = TRUE)

cat('INTEGER   animal sex    # Integer column names\n', file = s_mix99_clm_path, append = TRUE)
cat('REAL      phen          # Real column names\n', file = s_mix99_clm_path, append = TRUE)

cat('PEDFILE   p1_data_001.ped        # Pedigree file\n', file = s_mix99_clm_path, append = TRUE)
cat('PEDIGREE  animal am     # Genetics associated with animal code: am=animal model \n', file = s_mix99_clm_path, append = TRUE)

cat('PARFILE   p1_data_001.var        # Variance component file\n', file = s_mix99_clm_path, append = TRUE)

cat('PRECON b f              # Preconditioner: b=block\n', file = s_mix99_clm_path, append = TRUE)

cat('MODEL\n', file = s_mix99_clm_path, append = TRUE)
cat('  phen = sex animal # The model\n', file = s_mix99_clm_path, append = TRUE)

Writing Parameter File

The parameter file is the input file that contains the variance-covariance components.

s_mix99_data_dir <- file.path(here::here(), "inst", "extdata", "mix99")
s_mix99_var_path <- file.path(s_mix99_data_dir, 'p1_data_001.var')
cat('1  1  1  0.1\n', file = s_mix99_var_path)
cat('2  1  1  0.9\n', file = s_mix99_var_path, append = TRUE)

With all the above components, it should be possible to run predictions of breeding values using MiX99.