# Triple Category Regression (TCR)
data = read.table("M1D.txt", h = T)  #data file of training population
M1 = as.matrix(data[, -(1:4)])  #file of markers of training population
# phenotypes vector of training population
phen = cbind(1:1000, data[, 4])
colnames(phen) = c("id", "phen")
y = phen[, 2]
# phenotypes vector of the training population in descending order
order = phen[order(phen[, 2], decreasing = TRUE), ]
# Subpopulation 1 with the highest phenotypes
g1 = order[1:(0.5 * nrow(data)), ]  #phenotypes
M1_new = M1[g1[, 1], ]  #genotypes
# Subpopulation 2 with the smallest phenotypes
g2 = order[(0.5 * nrow(data) + 1):nrow(data), ]  #phenotypes
M2_new = M1[g2[, 1], ]  #genotypes
# Allele frequencies of the total population, subpopulation 1 and subpopulation 2
p10 = matrix(0, ncol(M1_new), 1)
p20 = matrix(0, ncol(M1_new), 1)
p0 = matrix(0, ncol(M1_new), 1)
for (i in 1:ncol(M1_new)) {
    p10[i, ] = (length(which(M1_new[, i] == 1)) + 2 * length(which(M1_new[, i] == 
        2)))/(2 * nrow(M1_new))
    p20[i, ] = (length(which(M2_new[, i] == 1)) + 2 * length(which(M2_new[, i] == 
        2)))/(2 * nrow(M2_new))
    p0[i, ] = (length(which(M1[, i] == 1)) + 2 * length(which(M1[, i] == 2)))/(2 * 
        nrow(M1))
}
# Exclusion of markers by MAF
MAF = 0.05
maf = NULL
count = NULL
for (i in 1:length(p0)) {
    maf[i] = min(p0[i, ], 1 - p0[i, ])
    if (maf[i] > MAF) {
        count[i] = i
    }
}
# Deletion of markers with deltap0 = 0
deltap0 = (p10 - p20)  #difference between frequencies
count1 = NULL
for (i in 1:length(p0)) {
    if (deltap0[i] != 0) {
        count1[i] = i
    }
}
# Frequencies of markers that were not excluded by the control
int_snp = intersect(as.vector(na.omit(count)), as.vector(na.omit(count1)))
Mp = as.matrix(M1[, int_snp])
p1 = as.matrix(p10[int_snp, ])
p2 = as.matrix(p20[int_snp, ])
p = as.matrix(p0[int_snp, ])
deltap = (p1 - p2)  # difference between frequencies
# change zero by 2 and 2 by zero in each marker column with a negative deltap
# signal
M = Mp
for (i in 1:nrow(M)) {
    for (j in 1:ncol(M)) {
        if (deltap[j] < 0) {
            if (Mp[i, j] == 2) {
                M[i, j] = 0
            }
            if (Mp[i, j] == 0) {
                M[i, j] = 2
            }
        }
    }
}
# Count of 0 (bb), 1 (Bb) and 2 (BB) by individuals
pqn1 = NULL
pqn2 = NULL
pqn0 = NULL
for (i in 1:nrow(M)) {
    pqn1[i] = length(which(M[i, ] == 1))
    pqn2[i] = length(which(M[i, ] == 2))
    pqn0[i] = length(which(M[i, ] == 0))
}
# Regression coefficient in each class
regBB = cov(y, pqn2)/var(pqn2)
regbb = cov(y, pqn0)/var(pqn0)
regBb = cov(y, pqn1)/var(pqn1)
# Genetic values by genotype class
vgBB = regBB * pqn2
vgbb = regbb * pqn0
vgBb = regBb * pqn1
########### Additive Effects Additive genomic values
a = (vgBB/2 + vgbb/2)
# Accuracy - correlation between the estimated value and the simulated value
cora = cor(a, data[, 2])
# Bias - coefficient of the regression between the estimated value and the
# phenotypic value
ba = cov(a, y)/var(a)
# Additive variance
va = var((1/sqrt(mean(2 * p * (1 - p)))) * a)
# Additive molecular heritability
h2a = va/var(y)
########### Dominance Effects Genomic values due to dominance
d = 0.5 * (vgBb - vgBB + vgbb)
# Accuracy - correlation between the estimated value and the simulated value
cord = cor(d, data[, 3])
# Bias - coefficient of the regression between the estimated value and the
# phenotypic value
bd = cov(d, y)/var(d)
# Variance due to dominance
vd = var((1/mean(2 * p * (1 - p))) * d)
# Dominance heritability
h2d = vd/var(y)