Generate Test Data for qzwslrm
generate_qzwslrm_test_data.Rmd
library(qzwslrm)
Disclaimer
The process of generating test data used in the package
qzwslrm
is documented. The process of generating the data
file and the pedigree is done only once and these files are then stored
under the extdata
directory of this package. Hence most of
the following R-code chunks are not evaluated, but they are visible for
documentation.
Data Simulation
The functionality implemented in qzwslrm
is tested with
a dataset that is generated using the simulation program
QMSim
. The parameter input for QMSim
was taken
from Macedo et al
(2020) and was adapted. The main difference to the original input
parameter is the reduction of the number of male and female animals per
generation. To get smaller datafiles, the number of animals per
generation was reduced by a factor of \(100\). With the input parameters the
simulation was started as shown below
s_prm_file <- "qzwslrm_test.prm"
s_sim_param_path <- system.file("extdata", "qmsim", s_prm_file, package = "qzwslrm")
qmsim_cmd <- paste("/qualstorzws01/data_projekte/linuxBin/QMSim", s_sim_param_path)
system(command = qmsim_cmd)
Reproducibility of the data simulation is provided by moving the seed file to the QMSim data directory.
Phenotypic Data
QMSim
produces a number of output files. For the
development and the tests of this package, we mainly require the
phenotypic data store in a file called p1_data_001.txt. This file is
copied to the system files of this package
qmsim_out_dir <- file.path(here::here(), "vignettes", paste0("r_", fs::path_ext_remove(s_prm_file)))
s_phen_path <- file.path(qmsim_out_dir, s_qmsim_phen_file)
qmsim_ext_dir <- file.path(here::here(), "inst", "extdata", "qmsim")
fs::file_move(path = s_phen_path, new_path = qmsim_ext_dir)
# remove old qmsim output directory
fs::dir_delete(path = qmsim_out_dir)
Prediction of Breeding Values
The file with the phenotypic data is used to predict breeding values
using mix99
. Before we can run the analysis with
mix99
, the data must be prepared and re-formatted.
s_qsim_data_path <- system.file("extdata", "qmsim", s_qmsim_phen_file, package = "qzwslrm")
# count number of lines
n_nr_lines <- as.integer(system(command = paste0("wc -l ", s_qsim_data_path, " | cut -d ' ' -f1", collapse = ""), intern = TRUE))
# read qmsim data
tbl_qm_data <- readr::read_table(file = s_qsim_data_path, na = '---', guess_max = n_nr_lines)
dim(tbl_qm_data)
#> [1] 4995 14
Generate Data for MiX99
To create a phenotype input file for mix99
, the data is
filtered and a few columns are selected.
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
tbl_mix99_data <- tbl_qm_data %>%
filter(!is.na(Phen)) %>%
mutate(Sex = recode(Sex, `M` = 1, `F` = 2)) %>%
select(Progeny, Sex, Phen)
As checks, the number of records are reported here
dim(tbl_mix99_data)
#> [1] 2679 3
The first few lines of the selected data is shown
head(tbl_mix99_data)
#> # A tibble: 6 × 3
#> Progeny Sex Phen
#> <dbl> <dbl> <dbl>
#> 1 46 2 -0.351
#> 2 47 2 -0.754
#> 3 48 2 0.603
#> 4 49 2 1.30
#> 5 50 2 2.44
#> 6 51 2 0.941
The phenotypic data is written to a file
s_mix99_data_dir <- file.path(here::here(), "inst", "extdata", "mix99")
if (!fs::dir_exists(s_mix99_data_dir)) fs::dir_create(path = s_mix99_data_dir)
s_mix99_data_path <- file.path(s_mix99_data_dir, "p1_data_001.dat")
readr::write_delim(tbl_mix99_data, file = s_mix99_data_path, delim = " ", col_names = FALSE)
Generate Pedigree for MiX99
From the QMSim data the pedigree is generated.
Check the generated tibble
dim(tbl_mix99_ped)
#> [1] 4995 3
The first lines
head(tbl_mix99_ped)
#> # A tibble: 6 × 3
#> Progeny Sire Dam
#> <dbl> <dbl> <dbl>
#> 1 1 0 0
#> 2 2 0 0
#> 3 3 0 0
#> 4 4 0 0
#> 5 5 0 0
#> 6 6 0 0
Write the pedigree to a file
s_mix99_ped_path <- file.path(s_mix99_data_dir, "p1_data_001.ped")
readr::write_delim(tbl_mix99_ped, file = s_mix99_ped_path, delim = " ", col_names = FALSE)
Create CLIM File
The CLIM file is the instruction file that is used by MiX99. In this file the phenotypes, the pedigree, the parameters and the model are specified.
s_mix99_data_dir <- file.path(here::here(), "inst", "extdata", "mix99")
s_mix99_clm_path <- file.path(s_mix99_data_dir, 'p1_data_001.clm')
cat('#------------------------------------------------------------------\n', file = s_mix99_clm_path)
cat('# CLIM\n', file = s_mix99_clm_path, append = TRUE)
cat('# Command file for a simple animal model analysis.\n', file = s_mix99_clm_path, append = TRUE)
cat('# MODEL: phen = sex + animal\n', file = s_mix99_clm_path, append = TRUE)
cat('#------------------------------------------------------------------\n', file = s_mix99_clm_path, append = TRUE)
cat('DATAFILE p1_data_001.dat # Data file\n', file = s_mix99_clm_path, append = TRUE)
cat('MISSING -99999.0 # Number for missing real number\n', file = s_mix99_clm_path, append = TRUE)
cat('INTEGER animal sex # Integer column names\n', file = s_mix99_clm_path, append = TRUE)
cat('REAL phen # Real column names\n', file = s_mix99_clm_path, append = TRUE)
cat('PEDFILE p1_data_001.ped # Pedigree file\n', file = s_mix99_clm_path, append = TRUE)
cat('PEDIGREE animal am # Genetics associated with animal code: am=animal model \n', file = s_mix99_clm_path, append = TRUE)
cat('PARFILE p1_data_001.var # Variance component file\n', file = s_mix99_clm_path, append = TRUE)
cat('PRECON b f # Preconditioner: b=block\n', file = s_mix99_clm_path, append = TRUE)
cat('MODEL\n', file = s_mix99_clm_path, append = TRUE)
cat(' phen = sex animal # The model\n', file = s_mix99_clm_path, append = TRUE)
Writing Parameter File
The parameter file is the input file that contains the variance-covariance components.
s_mix99_data_dir <- file.path(here::here(), "inst", "extdata", "mix99")
s_mix99_var_path <- file.path(s_mix99_data_dir, 'p1_data_001.var')
cat('1 1 1 0.1\n', file = s_mix99_var_path)
cat('2 1 1 0.9\n', file = s_mix99_var_path, append = TRUE)
With all the above components, it should be possible to run predictions of breeding values using MiX99.