# Triple Category Regression (TCR) data = read.table("M1D.txt", h = T) #data file of training population M1 = as.matrix(data[, -(1:4)]) #file of markers of training population # phenotypes vector of training population phen = cbind(1:1000, data[, 4]) colnames(phen) = c("id", "phen") y = phen[, 2] # phenotypes vector of the training population in descending order order = phen[order(phen[, 2], decreasing = TRUE), ] # Subpopulation 1 with the highest phenotypes g1 = order[1:(0.5 * nrow(data)), ] #phenotypes M1_new = M1[g1[, 1], ] #genotypes # Subpopulation 2 with the smallest phenotypes g2 = order[(0.5 * nrow(data) + 1):nrow(data), ] #phenotypes M2_new = M1[g2[, 1], ] #genotypes # Allele frequencies of the total population, subpopulation 1 and subpopulation 2 p10 = matrix(0, ncol(M1_new), 1) p20 = matrix(0, ncol(M1_new), 1) p0 = matrix(0, ncol(M1_new), 1) for (i in 1:ncol(M1_new)) { p10[i, ] = (length(which(M1_new[, i] == 1)) + 2 * length(which(M1_new[, i] == 2)))/(2 * nrow(M1_new)) p20[i, ] = (length(which(M2_new[, i] == 1)) + 2 * length(which(M2_new[, i] == 2)))/(2 * nrow(M2_new)) p0[i, ] = (length(which(M1[, i] == 1)) + 2 * length(which(M1[, i] == 2)))/(2 * nrow(M1)) } # Exclusion of markers by MAF MAF = 0.05 maf = NULL count = NULL for (i in 1:length(p0)) { maf[i] = min(p0[i, ], 1 - p0[i, ]) if (maf[i] > MAF) { count[i] = i } } # Deletion of markers with deltap0 = 0 deltap0 = (p10 - p20) #difference between frequencies count1 = NULL for (i in 1:length(p0)) { if (deltap0[i] != 0) { count1[i] = i } } # Frequencies of markers that were not excluded by the control int_snp = intersect(as.vector(na.omit(count)), as.vector(na.omit(count1))) Mp = as.matrix(M1[, int_snp]) p1 = as.matrix(p10[int_snp, ]) p2 = as.matrix(p20[int_snp, ]) p = as.matrix(p0[int_snp, ]) deltap = (p1 - p2) # difference between frequencies # change zero by 2 and 2 by zero in each marker column with a negative deltap # signal M = Mp for (i in 1:nrow(M)) { for (j in 1:ncol(M)) { if (deltap[j] < 0) { if (Mp[i, j] == 2) { M[i, j] = 0 } if (Mp[i, j] == 0) { M[i, j] = 2 } } } } # Count of 0 (bb), 1 (Bb) and 2 (BB) by individuals pqn1 = NULL pqn2 = NULL pqn0 = NULL for (i in 1:nrow(M)) { pqn1[i] = length(which(M[i, ] == 1)) pqn2[i] = length(which(M[i, ] == 2)) pqn0[i] = length(which(M[i, ] == 0)) } # Regression coefficient in each class regBB = cov(y, pqn2)/var(pqn2) regbb = cov(y, pqn0)/var(pqn0) regBb = cov(y, pqn1)/var(pqn1) # Genetic values by genotype class vgBB = regBB * pqn2 vgbb = regbb * pqn0 vgBb = regBb * pqn1 ########### Additive Effects Additive genomic values a = (vgBB/2 + vgbb/2) # Accuracy - correlation between the estimated value and the simulated value cora = cor(a, data[, 2]) # Bias - coefficient of the regression between the estimated value and the # phenotypic value ba = cov(a, y)/var(a) # Additive variance va = var((1/sqrt(mean(2 * p * (1 - p)))) * a) # Additive molecular heritability h2a = va/var(y) ########### Dominance Effects Genomic values due to dominance d = 0.5 * (vgBb - vgBB + vgbb) # Accuracy - correlation between the estimated value and the simulated value cord = cor(d, data[, 3]) # Bias - coefficient of the regression between the estimated value and the # phenotypic value bd = cov(d, y)/var(d) # Variance due to dominance vd = var((1/mean(2 * p * (1 - p))) * d) # Dominance heritability h2d = vd/var(y)