function igeno = impute(geno) %will impute missing genotype data %for an F2 genotype array encoded as 0-1-2 %geno is assumed to be a single chromosome % so if there are many chromosomes, % loop through them one at a time %NOTE %this is a fast-impute method that % correctly imputes the marginal distribution for each missing element in genotype array % it is NOT THE CORRECT JOINT IMPUTATION and thus % should be used only for single marker methods % should not be used in conjunction with interval mapping methods % %Copyright notice %this code was written by Gary Churchill who retains all rights. %If this code to generate results in publications please cite %Sugiyama F, Churchill GA, Higgins DC, Johns C, Makaritsis KP, Gavras H, Paigen B (1999) %Quantitative trait loci analysis of blood pressure in mice on a high salt regimen: %Concordance with rat and human loci. Submitted to Nature Genetics. % %set up F2 indicator if max(geno(:))>1 F2 = 1; else F2 = 0; end [n,m] = size(geno); %size of array n mice and m markers r = est_rec_fracs(geno); % m x m array of pairwise rec fracs igeno = geno; for i = 1:n %loop through rows of geno array if sum(isnan(geno(i,:))) == m %empty row for j = 1:m if F2 igeno(i,j) = frand([0.25, 0.5, 0.25]); else igeno(i,j) = frand([0.5, 0.5]); end end elseif sum(isnan(geno(i,:))) > 0 %at least one missing for j = find(isnan(geno(i,:))) %loop through missing value %find nearest non-missing to left left = j-1; while left > 0 if isnan(geno(i,left)) left = left - 1; else break; end end %find nearest non-missing to right right = j+1; while right < m+1 if isnan(geno(i,right)) right = right + 1; else break; end end % determine case and impute missing value if left == 0 %left edge igeno(i,j) = impute1(geno(i,right),r(j,right),F2); elseif right == m+1 %right edge igeno(i,j) = impute1(geno(i,left),r(left,j),F2); else % middle igeno(i,j) = impute2(geno(i,left), geno(i,right), r(left,j), r(j,right),F2); end end else %no missing data end end %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function y = impute1(x, r,F2) if F2 switch x case 0 p = [(1-r)^2, 2*r*(1-r), r^2]; case 1 p = [r*(1-r), (1-r)^2+r^2, r*(1-r)]; case 2 p = [r^2, 2*r*(1-r), (1-r)^2]; otherwise error('invalid genotype'); end else switch x case 0 p = [(1-r), r]; case 1 p = [r, (1-r)]; otherwise error('invalid genotype'); end end y = frand(p); %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function y = impute2(x1, x2, r1, r2, F2) p = zeros(3,1); if F2 switch x1 case 0 switch x2 case 0 p(1) = (1-r1)^2*(1-r2)^2; p(2) = 4*r1*(1-r1)*r2*(1-r2); p(3) = r1^2*r2^2; case 1 p(1) = (1-r1)^2*2*r2*(1-r2); p(2) = 2*r1*(1-r1)*((1-r2)^2+r2^2); p(3) = r1^2*2*r2*(1-r2); case 2 p(1) = (1-r1)^2*r2^2; p(2) = 4*r1*(1-r1)*r2*(1-r2); p(3) = r1^2*(1-r2)^2; otherwise error('invalid genotype'); end case 1 switch x2 case 0 p(1) = 2*r1*(1-r1) * (1-r2)^2; p(2) = ((1-r1)^2+r2^2)*2*r2*(1-r2); p(3) = 2*r1*(1-r1)*r2^2; case 1 p(1) = 4*r1*(1-r1)*r2*(1-r2); p(2) = ((1-r1)^2 + r1^2)*((1-r2)^2 + r2^2); p(3) = 4*r1*(1-r1)*r2*(1-r2); case 2 p(1) = 2*r1*(1-r1)*r2^2; p(2) = ((1-r1)^2 + r1^2)*2*r2*(1-r2); p(3) = 2*r1*(1-r1)*(1-r2)^2; otherwise error('invalid genotype'); end case 2 switch x2 case 0 p(1) = r1^2 * (1-r2)^2; p(2) = 4*r1*(1-r1)*r2*(1-r2); p(3) = (1-r1)^2 * r2^2; case 1 p(1) = r1^2 * 2*r2*(1-r2); p(2) = 2*r1*(1-r1)*((1-r2)^2 + r2^2); p(3) = (1-r1)^2 * 2*r2*(1-r2); case 2 p(1) = r1^2 * r2^2; p(2) = 4*r1*(1-r1)*r2*(1-r2); p(3) = (1-r1)^2*(1-r2)^2; otherwise error('invalid genotype'); end otherwise error('invalid genotype'); end else switch x1 case 0 switch x2 case 0 p(1) = (1-r1)*(1-r2); p(2) = r1*r2; case 1 p(1) = (1-r1)*r2; p(2) = r1*(1-r2); otherwise error('invalid genotype'); end case 1 switch x2 case 0 p(1) = r1*(1-r2); p(2) = ((1-r1)^2+r2^2)*2*r2*(1-r2); case 1 p(1) = 4*r1*(1-r1)*r2*(1-r2); p(2) = ((1-r1)^2 + r1^2)*((1-r2)^2 + r2^2); otherwise error('invalid genotype'); end otherwise error('invalid genotype'); end end p = p / sum(p); y = frand(p); %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function y = frand(p) %returns a random integer 0,...,n-1 according to the distribution in p y = sum(cumsum(p) < rand(1));