# $Id: combinations.R 1083 2007-03-23 22:53:00Z warnes $ # ## ## From email by Brian D Ripley to r-help ## dated Tue, 14 Dec 1999 11:14:04 +0000 (GMT) in response to ## Alex Ahgarin . Original version was ## named "subsets" and was Written by Bill Venables. ## combinations <- function(n, r, v = 1:n, set = TRUE, repeats.allowed=FALSE) { if(mode(n) != "numeric" || length(n) != 1 || n < 1 || (n %% 1) != 0) stop("bad value of n") if(mode(r) != "numeric" || length(r) != 1 || r < 1 || (r %% 1) != 0) stop("bad value of r") if(!is.atomic(v) || length(v) < n) stop("v is either non-atomic or too short") if( (r > n) & repeats.allowed==FALSE) stop("r > n and repeats.allowed=FALSE") if(set) { v <- unique(sort(v)) if (length(v) < n) stop("too few different elements") } v0 <- vector(mode(v), 0) ## Inner workhorse if(repeats.allowed) sub <- function(n, r, v) { if(r == 0) v0 else if(r == 1) matrix(v, n, 1) else if(n == 1) matrix(v, 1, r) else rbind( cbind(v[1], Recall(n, r-1, v)), Recall(n-1, r, v[-1])) } else sub <- function(n, r, v) { if(r == 0) v0 else if(r == 1) matrix(v, n, 1) else if(r == n) matrix(v, 1, n) else rbind(cbind(v[1], Recall(n-1, r-1, v[-1])), Recall(n-1, r, v[-1])) } sub(n, r, v[1:n]) } ## ## Original version by Bill Venables and cited by by Matthew ## Wiener (mcw@ln.nimh.nih.gov) in an email to R-help dated ## Tue, 14 Dec 1999 09:11:32 -0500 (EST) in response to ## Alex Ahgarin ## ## permutations <- function(n, r, v = 1:n, set = TRUE, repeats.allowed=FALSE) { if(mode(n) != "numeric" || length(n) != 1 || n < 1 || (n %% 1) != 0) stop("bad value of n") if(mode(r) != "numeric" || length(r) != 1 || r < 1 || (r %% 1) != 0) stop("bad value of r") if(!is.atomic(v) || length(v) < n) stop("v is either non-atomic or too short") if( (r > n) & repeats.allowed==FALSE) stop("r > n and repeats.allowed=FALSE") if(set) { v <- unique(sort(v)) if (length(v) < n) stop("too few different elements") } v0 <- vector(mode(v), 0) ## Inner workhorse if(repeats.allowed) sub <- function(n, r, v) { if(r==1) matrix(v,n,1) else if(n==1) matrix(v,1,r) else { inner <- Recall(n, r-1, v) cbind( rep( v, rep(nrow(inner),n) ), matrix( t(inner), ncol=ncol(inner), nrow=nrow(inner) * n , byrow=TRUE ) ) } } else sub <- function(n, r, v) { if(r==1) matrix(v,n,1) else if(n==1) matrix(v,1,r) else { X <- NULL for(i in 1:n) X <- rbind( X, cbind( v[i], Recall(n-1, r - 1, v[-i]))) X } } sub(n, r, v[1:n]) } # Clear the workspace of all objects whose names don't start with a full stop clear <- function() { env <- as.environment(1) to.go <- ls(env, all.names=FALSE) continue <- TRUE while (continue) { nxt <- search()[[2]] # bit of a botch if (substr(nxt, 1, 8)!="package:") detach() else continue <- FALSE } remove(list=to.go, envir=env) } # Simple logistic regression wrapper logistic <- function(formula, weights) { if (missing(weights)) res <- glm(formula, family=binomial) else { wt <- weights res <- glm(formula, family=binomial, weights=wt) } class(res) <- c("logistic", "glm", "lm") res$call$formula <- formula res } print.logistic <- function(x) { sx <- summary(x) beta <- sx$coefficients intcp <- match("(Intercept)", rownames(beta)) if (!is.na(intcp)) { beta <- beta[-intcp,,drop=FALSE] } or <- matrix(nrow=nrow(beta), ncol=5) rownames(or) <- rownames(beta) colnames(or) <- c("OR", "Lower", "Upper", "z-test", "P-value") or[,1] <- exp(beta[,1]) or[,2] <- exp(beta[,1]-1.96*beta[,2]) or[,3] <- exp(beta[,1]+1.96*beta[,2]) or[,4] <- beta[,3] or[,5] <- beta[,4] cat("Logistic regression: ", deparse(sx$call$formula), "\n\n") cat("Odds ratios (1 unit change), lower and upper confidence limits, and tests:\n\n") print(or) } # Find most representative member of each cluster in a hierarchical clustering # clusters: object calculated either by rect.hclust or by cutree # In case of ties, select member with highest value for tie.break representative <- function(clusters, distance, minimize="mean", tie.break){ cri <- match(minimize, c("mean", "maximum")) if (is.na(cri)) stop("minimize argument should be mean or minimum") if (class(distance)=="dist") { distance <- as.matrix(distance) } else if (!is.matrix(distance)) stop("distance argument must be a distance object or a matrix") N <- nrow(distance) if (!is.list(clusters)) { if (length(clusters)!=N) stop("Incorrect length for clusters argument") ncl <- max(clusters) nms <- names(clusters) new <- vector("list", ncl) ord <- 1:N for (i in 1:ncl) { cli <- ord[clusters==i] names(cli) <- nms[clusters==i] new[[i]] <- cli } clusters <- new } ncl <- length(clusters) if (missing(tie.break)) tie.break <- runif(N) else if (length(tie.break)!=N) stop("tie.break argument has the wrong length") diag(distance) <- NA reps <- numeric(ncl) nreps <- character(ncl) dreps <- numeric(ncl) for (i in 1:ncl) { cli <- clusters[[i]] nmi <- names(cli) size <- length(cli) if (size==1) { reps[i] <- cli[1] nreps[i] <- nmi[1] dreps[i] <- 0 } else { di <- distance[cli, cli] pri <- tie.break[cli] if (cri==1) ds <- apply(di, 1, mean, na.rm=TRUE) else ds <- apply(di, 1, max, na.rm=TRUE) best <- min(ds) cand <- best==ds if (sum(cand)==1) { reps[i] <- cli[cand] nreps[i] <- nmi[cand] } else { maxpr <- max(pri[cand]) ch <- (1:size)[cand] which <- ch[match(maxpr, pri[cand])] reps[i] <- cli[which] nreps[i] <- nmi[which] } dreps[i] <- best } } names(reps) <- nreps names(dreps) <- nreps isr <- rep(FALSE, N) isr[reps] <- TRUE list(rep=reps, is.rep=isr, dist=dreps) } # Weighted table wtable <- function(..., weights) { by <- list(...) if (missing(weights)) table(by) else { tw <- tapply(weights, by, sum, na.rm=TRUE) tw[is.na(tw)] <- 0 tw } } # Weighted means wmean <- function(x, weights, na.rm=TRUE) { if (missing(weights)) mean(x, na.rm=na.rm) else sum(x*weights, na.rm=na.rm)/sum(weights, na.rm=na.rm) } # Table of means mean.table <- function(x, ..., weights) { by <- list(...) if (missing(weights)) { num <- tapply(x, by, sum, na.rm=TRUE) den <- table(by) tab <- num/den class(tab) <- "table" tab } else { num <- tapply(x*weights, by, sum, na.rm=TRUE) den <- tapply(weights, by, sum, na.rm=TRUE) num/den } } # Cochran-Armitage test - with Mantel extension # See Clayton & Hills 20.3 pp 201-203 Cochran.Armitage.test <- function(exposure, cc, stratum=rep(1,length(cc))) { N <- length(cc) if (is.factor(exposure)) { if (is.ordered(exposure)) exposure <- as.numeric(exposure) else stop("exposure argument must be numeric or an ordered factor") } cl <- match.call() narg <- length(cl) - 1 arguments <- character(narg) for (i in 1:narg) { argi <- as.character(cl[[i+1]]) arguments[i] <- paste(argi, collapse=".") } use <- !(is.na(cc) | is.na(exposure) | is.na(stratum)) if (any(!use)) { cc <- cc[use] exposure <- exposure[use] stratum <- stratum[use] } dh <- table(stratum, cc) if (ncol(dh)!=2) stop("cc argument must have two levels") nt <- table(stratum) zm <- tapply(exposure, list(stratum, cc), mean) ut <- dh[,1]*dh[,2]*(zm[,1] - zm[,2])/nt zv <- tapply(exposure, stratum, var) vt <- dh[,1]*dh[,2]*zv/nt x2 <- sum(ut, na.rm=TRUE)^2/sum(vt, na.rm=TRUE) names(x2) <- "Chi-squared" df <- 1 names(df) <- "df" res <- list(statistic=x2, parameter=df, p.value=pchisq(x2, 1, lower.tail=FALSE), method="Cochran-Armitage test with Mantel's extension", data.name=paste(arguments, paste=" "), score=ut, score.variance=vt) class(res) <- "htest" res } # expectedGenotypes.R #-------------------------------------------------------------------------- # What: Construct expected genotypes according to known allele variants # Time-stamp: <2007-07-19 12:06:26 ggorjan> #-------------------------------------------------------------------------- expectedGenotypes <- function(x, alleles=allele.names(x), ploidy=2, sort=TRUE, haplotype=FALSE) { ## Checks if (missing(x) && missing(alleles)) stop("at least one of 'x' or 'alleles' must be given") if (!(missing(x) && !missing(alleles))) { if (!is.genotype(x)) stop("x must be of class 'genotype'") } nA <- length(alleles) if(nA==0) stop("Zero length alleles vector.") ## Add numbers to allele names, so we get result sorted by ## given allele names if (sort) { allelesOrig <- alleles alleles <- paste(1:nA, alleles, sep="") } ## Find possible genotypes according to allele variants if(!haplotype) { comb <- combinations(n=nA, r=ploidy, v=alleles, repeats.allowed=T) } else { comb <- permutations(n=nA, r=ploidy, v=alleles, repeats.allowed=T) } if (sort) { for (i in 1:nA) comb[comb == alleles[i]] <- allelesOrig[i] } ## Create a nice character vector of expected genotypes nC <- dim(comb) ret <- vector(mode="character", length=nC[1]) ret <- paste(comb[, 1], comb[, 2], sep="/") if (ploidy > 2) { for (i in 3:nC[2]) { ret <- paste(ret, comb[, i], sep="/") } } return(ret) } expectedHaplotypes <- function(x, alleles=allele.names(x), ploidy=2, sort=TRUE, haplotype=TRUE) { expectedGenotypes(x=x, alleles=alleles, ploidy=ploidy, sort=sort, haplotype=haplotype) } #-------------------------------------------------------------------------- # expectedGenotypes.R ends here # $Id: genotype.R 1329 2007-11-20 20:20:51Z warnes $ genotype <- function(a1, a2=NULL, alleles=NULL, sep="/", remove.spaces=TRUE, reorder=c("yes", "no", "default", "ascii", "freq"), allow.partial.missing=FALSE, locus=NULL, genotypeOrder=NULL) { if(missing(reorder)) reorder <- "freq" else reorder <- match.arg(reorder) if(is.genotype(a1)){ a1 <- as.character(a1) ## ignore a2 a2 <- NULL } else { a1.d <- dim(a1) a1 <- as.character(a1) dim(a1) <- a1.d a1[is.na(a1)] <- "" # necessary because of bug in grep & friends, # will be fixed in 1.7.1 } if(!is.null(a2)) { a2.d <- dim(a2) a2 <- as.character(a2) dim(a2) <- a2.d a1[is.na(a1)] <- "" # necessary because of bugs in grep & friends # will be fixed in 1.7.1 } if(remove.spaces) { a1dim <- dim(a1) a1 <- gsub("[ \t]", "", a1) dim(a1) <- a1dim if(!is.null(a2)) a2 <- gsub("[ \t]", "", a2) } if(!is.null(dim(a1)) && ncol(a1) > 1) parts <- a1[,1:2] else if(!is.null(a2)) parts <- cbind(a1,a2) else { # if sep is empty, assume allele names are single characters # pasted together if(sep=="") sep <- 1 # Based on the value of sep, reformat into our standard # name-slash-name format if (is.character(sep) ) { part.list <- strsplit(a1,sep) part.list[ sapply(part.list, length)==0] <- NA ## Handle missing / empty values correctly. ## Without this, empty elements are silently dropped ## and/or cause errors # only first field was given half.empties <- lapply(part.list, length)==1 part.list[half.empties] <- lapply(part.list[half.empties],c,NA) # neither field was given empties <- is.na(a1) | lapply(part.list, length)==0 part.list[empties] <- list(c(NA,NA)) parts <- matrix(unlist(part.list),ncol=2,byrow=TRUE) } else if (is.numeric(sep)) parts <- cbind( substring(a1,1,sep), substring(a1,sep+1,9999)) else stop(paste("I don't know how to handle sep=",sep)) } mode(parts) <- "character" # needed for bare NA's o # convert entirely whitespace alleles to NAs temp <- grep("^[ \t]*$", parts) parts[temp] <- NA #parts[parts=="NA"] <- NA if(missing(alleles) || is.null(alleles)) alleles <- unique(c(na.omit(parts))) else { which.alleles <- !(parts %in% alleles) ## Skipping NA's which.alleles <- which.alleles & !is.na(parts) if(any(which.alleles)) { warning("Found data values not matching specified alleles. ", "Converting to NA.") parts[which.alleles] <- NA } } if(!allow.partial.missing) parts[is.na(parts[,1]) | is.na(parts[,2]),] <- c(NA,NA) if(reorder!="no") { if(reorder=="ascii") { alleles <- sort(alleles) } else if(reorder=="freq") { ## get reordering of alleles by frequency tmp <- names(rev(sort(table(parts)))) alleles <- unique(c(tmp,alleles)) } reorder <- function( x, alleles) { tmp <- match( x, alleles ) x[order(tmp)] } parts <- t(apply(parts,1, reorder, alleles)) } tmp <- ifelse( is.na(parts[,1]) & is.na(parts[,2]), NA, apply(parts,1,paste,collapse="/") ) object <- factor( tmp ) # force "NA" not to be a factor level ll <- levels(object) <- na.omit(levels(object)) class(object) <- c("genotype","factor") attr(object,"allele.names") <- alleles attr(object,"allele.map") <- do.call("rbind", strsplit(ll, "/")) genotypeOrder(object) <- genotypeOrder if(is.null(locus) || is.locus(locus) ) attr(object,"locus") <- locus else stop("parameter locus must be of class locus") return(object) } is.genotype <- function(x) inherits(x, "genotype") is.haplotype <- function(x) inherits(x, "haplotype") ### ### Haplotype -- differs only in that order of a1,a2 is considered siginificant ### haplotype <- function (a1, a2 = NULL, alleles = NULL, sep = "/", remove.spaces = TRUE, reorder = "no", allow.partial.missing = FALSE, locus = NULL, genotypeOrder=NULL) { retval <- genotype(a1 = a1, a2 = a2, alleles = alleles, sep = sep, remove.spaces = remove.spaces, reorder = reorder, allow.partial.missing = allow.partial.missing, locus = locus, genotypeOrder=genotypeOrder) class(retval) <- c("haplotype", "genotype", "factor") retval } as.haplotype <- function(x,...) { retval <- as.genotype(x,...,reorder="no") class(retval) <- c("haplotype","genotype","factor") retval } ### ### Display by giving values plus list of alleles ### print.genotype <- function(x,...) { if(!is.null(attr(x,"locus"))) print(attr(x,"locus")) print(as.character(x)) cat("Alleles:", allele.names(x), "\n" ) invisible(x) } ### ### Conversion Functions ### as.genotype <- function (x,...) UseMethod("as.genotype") # Do we want to do this? as.genotype.default <- function(x,...) genotype(x,...) # stop("No method to convert this object to a genotype") # for characters, and factors, just do the standard thing (factors get # implicitly converted to characters so both have the same effect. as.genotype.character <- function(x,...) genotype(x,...) as.genotype.factor <- function(x,...) genotype(as.character(x),...) as.genotype.genotype <- function(x,...) return(x) as.genotype.haplotype <- function(x,...) return(x) ## genotype.allele.counts give the count of each allele type as a ## matrix. Collapse back into the form we need as.genotype.allele.count <- function(x, alleles=c("A","B"), ...) { if(!is.matrix(x) & !is.data.frame(x) ) { x <- cbind(x, 2-x) colnames(x) <- alleles } if(any(x > 2, na.rm=TRUE) || any( x < 0, na.rm=TRUE ) ) stop("Allele counts must be in {0,1,2}") allele.names <- colnames(x) tmp <- apply(x, 1, function(y) rep( colnames(x), ifelse(is.na(y), 0, y) )) if(!is.matrix(tmp)) retval <- genotype(sapply(tmp,paste,collapse="/"), alleles=alleles, ...) else retval <- genotype(a1=tmp[1,], a2=tmp[2,], ... ) return(retval) } allele.count.2.genotype <- function(...) as.genotype.allele.count(...) as.genotype.table <- function(x, alleles, ...) { #if(missing(alleles)) alleles <- unique(unlist(dimnames(x))) tmp <- outer( rownames(x), colnames(x), paste, sep="/") retval <- genotype( rep(tmp,x), alleles=alleles ) retval } ### ### Equality test for genotype, assumes allele order is _not_ significant ### "==.genotype" <- function(x,y) { if(!is.genotype(y)) y <- as.genotype(y) x.a1 <- allele(x,1) x.a2 <- allele(x,2) y.a1 <- allele(y,1) y.a2 <- allele(y,2) return( (x.a1==y.a1 & x.a2==y.a2) | (x.a1==y.a2 & x.a2==y.a1) ) } ### ### Equality test for haplotype, assumes allele order _is_ significant ### "==.haplotype" <- function(x,y) { if(!is.genotype(y)) y <- as.haplotype(y) x.a1 <- allele(x,1) x.a2 <- allele(x,2) y.a1 <- allele(y,1) y.a2 <- allele(y,2) return( x.a1==y.a1 & x.a2==y.a2 ) } ### ### is.element i.e. %in% ### "%in%" <- function(x, table) UseMethod("%in%") ## Get default method for %in% from base package "%in%.default" <- get("%in%", pos="package:base") "%in%.genotype" <- function(x, table) { xA1 <- allele(x, 1) xA2 <- allele(x, 2) x1 <- paste(xA1, xA2, sep="/") x2 <- paste(xA2, xA1, sep="/") ## Return ((x1 %in% table) | (x2 %in% table)) } "%in%.haplotype" <- function(x, table) as.character(x) %in% as.character(table) ### ### Extract the first and/or second allele. ### ### By default, return a 2 column matrix containing both alleles ### #allele <- function (x,...) # UseMethod("allele") #allele.genotype <- function(x, which=c(1,2) ) allele <- function(x, which=c(1,2) ) { alleles.x <- attr(x,"allele.map") retval <- alleles.x[as.integer(x),which] attr(retval,"locus") <- attr(x,"locus") attr(retval,"which") <- which attr(retval,"allele.names") <- allele.names(x) #class(retval) <- c("allele.genotype", class(retval)) return( retval) } as.factor <- function(x, ...) UseMethod("as.factor") as.factor.default <- get("as.factor",pos="package:base") formals(as.factor.default) <- c(formals(as.factor.default),alist(...= )) as.factor.genotype <- function(x, ...) { attr(x,"class") <- "factor" attr(x,"allele.names") <- NULL attr(x,"allele.map") <- NULL attr(x,"locus") <- NULL attr(x,"genotypeOrder") <- NULL x } as.factor.allele.genotype <- function(x,...) factor(x,levels=allele.names(x)) print.allele.genotype <- function(x,...) { if(!is.null(attr(x,"locus"))) print(attr(x,"locus")) cat("Allele(s):", attr(x,"which"), "\n") attr(x, "which") <- attr(x, "class") <- attr(x,"locus") <- attr(x,"allele.names") <- NULL NextMethod("print",x) } ### ### Obtain the count of the number of copies of alleles for each individual ### ### By default, return a matrix containing the counts for all possible allele values. ### #allele.count <- function (x,...) # UseMethod("allele.count") #allele.count.default <- function (x, ... ) # { # x <- as.genotype(x) # allele.count(x, ...) # } #allele.count.genotype <- function(x, allele.name=allele.names(x), allele.count <- function(x, allele.name=allele.names(x), any=!missing(allele.name), na.rm=FALSE) { if(!missing(allele.name) && length(allele.name)==1) { a.1 <- allele(x,1) a.2 <- allele(x,2) retval <- ifelse(is.na(a.1) | is.na(a.2), ifelse(na.rm, 0, NA), (a.1==allele.name) + (a.2==allele.name) ) # class(retval) <- "allele.count" attr(retval,"allele") <- allele.name attr(retval,"locus") <- attr(x,"locus") return(retval) } else { retval <- sapply( allele.name, function(y) allele.count(x,y)) if(any==TRUE && is.matrix(retval) ) retval <- apply(retval,1,sum,na.rm=na.rm) if(na.rm) retval[is.na(retval)] <- 0 # class(retval) <- "allele.count" attr(retval,"locus") <- attr(x,"locus") return(retval) } } #print.allele.count <- function(x,...) # { # if(!is.null(attr(x,"locus"))) # print(attr(x,"locus")) # # if(is.null(attr(x,"allele"))) # cat("Allele Counts:\n") # else # cat("Allele Count (", attr(x,"allele"), " allele):\n", sep="") # val <- x # attr(val,"class") <- NULL # attr(val,"allele") <- NULL # print(val) # invisible(x) # } ### ### Check for the presence of alleles for each individual ### ### By default, return a matrix containing indicators for all possible ### allele values except the last. ### # #allele.ind <- function(x,allele) # { ## if(missing(allele)) ## stop("Alleles to test must be specified") ## if(length(allele)==1) # retval <- allele.count(x,allele) > 0 ## else ## retval <- apply(allele.count(x,allele) ,1,sum) > 0 # # if(missing(allele)) # allele <- colnames(retval) # attr(retval,"allele") <- allele # attr(retval,"locus") <- attr(x,"locus") # class(retval) <- "allele.ind" # return(retval) # } #print.allele.ind <- function(x,...) # { # if(!is.null(attr(x,"locus"))) # print(attr(x,"locus")) # # cat("Indicator(s) for allele(s):", attr(x,"allele"), "\n") # attr(x,"locus") <- attr(x,"class") <- attr(x,"allele") <- NULL # NextMethod("print",x) # } ### ### Methods for creating subsets based on a genotype ### homozygote <- function (x,allele.name,...) UseMethod("homozygote") homozygote.genotype <- function(x,allele.name,...) { a1 <- allele(x,1) a2 <- allele(x,2) if(missing(allele.name)) retval <- ifelse( is.na(a1) | is.na(a2), NA, a1==a2 ) else retval <- ifelse( is.na(a1) | is.na(a2), NA, a1==allele.name & a2==allele.name ) attr(retval,"locus") <- attr(x,"locus") # class(retval) <- "homozygote" return(retval) } #print.homozygote <- function(x,...) # { # if(!is.null(attr(x,"locus"))) # print(attr(x,"locus")) # # cat("Homozygote Indicators:\n") # attr(x,"locus") <- attr(x,"class") <- attr(x,"allele") <- NULL # NextMethod("print",x) # } heterozygote <- function (x,allele.name,...) UseMethod("heterozygote") heterozygote.genotype <- function(x,allele.name,...) { { a1 <- allele(x,1) a2 <- allele(x,2) if(missing(allele.name)) retval <- ifelse( is.na(a1) | is.na(a2), NA, !a1==a2 ) else retval <- ((a1 %in% allele.name) | (a2 %in% allele.name)) & (a1 != a2) attr(retval,"locus") <- attr(x,"locus") # class(retval) <- "homozygote" return(retval) } } #print.heterozygote <- function(x,...) # { # if(!is.null(attr(x,"locus"))) # print(attr(x,"locus")) # # cat("Heterozygote Indicators:\n") # attr(x,"locus") <- attr(x,"class") <- attr(x,"allele") <- NULL # NextMethod("print",x) # } carrier <- function (x,allele.name,...) UseMethod("carrier") carrier.genotype <- function(x, allele.name=allele.names(x), any=!missing(allele.name), na.rm=FALSE, ...) { retval <- allele.count(x,allele.name=allele.name,any=any,na.rm=na.rm) > 0 attr(retval,"allele") <- retval$allele attr(retval,"locus") <- attr(x,"locus") # class(retval) <- "carrier" return(retval) } #print.carrier <- function(x,...) # { # if(!is.null(attr(x,"locus"))) # print(attr(x,"locus")) # # cat("Carrier Indicator(s) for allele(s):", attr(x,"allele"), "\n") # attr(x,"locus") <- attr(x,"class") <- attr(x,"allele") <- NULL # NextMethod("print",unclass(x)) # } ### ### ### allele.names<- function(x) { retval <- attr(x,"allele.names") if(is.null(retval)) retval <- x$allele.names return(retval) } ### ### Subset method ### "[.genotype" <- function(x, i, drop=FALSE) { allelesOld <- attr(x, "allele.names") retval <- NextMethod("[") # force "NA" not to be a factor level ll <- levels(retval) <- na.omit(levels(retval)) class(retval) <- c("genotype","factor") if(drop) { alleles <- unique( unlist(strsplit(ll, "/") ) ) } else { alleles <- attr(x, "allele.names") } attr(retval,"allele.names") <- alleles attr(retval,"allele.map") <- do.call("rbind", strsplit(ll, "/")) attr(retval,"locus") <- attr(x,"locus") attr(retval,"label") <- attr(x,"label") goCur <- attr(x, "genotypeOrder") if(drop) { ## Removing genotype names having dropped alleles allelesOld <- allelesOld[!(allelesOld %in% alleles)] tmp <- allele(as.haplotype(goCur)) test <- tmp %in% allelesOld test <- rowSums(matrix(test, ncol=2)) > 0 attr(retval, "genotypeOrder") <- goCur[!test] } else { attr(retval, "genotypeOrder") <- goCur } return(retval) } "[.haplotype" <- function(x, i, drop=FALSE) { retval <- NextMethod("[") class(retval) <- c("haplotype","genotype","factor") retval } ### ### Subset Assigment method ### "[<-.genotype" <- function(x, i, value) { ## Special case for insertion of NA and "" values if(all( is.na(value) | as.character(value)<="" ) ) { x.class <- class(x) x <- unclass(x) x[i] <- NA class(x) <- x.class return(x) } if(!is.genotype(value)) { value <- genotype(value) } lx <- levels(x) lv <- levels(value) ax <- allele.names(x) av <- allele.names(value) m <- is.na(match(av,ax) ) if( any( m ) ) warning(paste("Adding new allele name:", av[m], "\n")) la <- unique(c(lx,lv)) aa <- unique(c(ax,av)) cx <- class(x) nas <- is.na(x) data <- match(levels(value)[value],la) class(x) <- NULL x[i] <- data attr(x, "levels") <- la map <- attr(x, "allele.map") <- do.call("rbind", strsplit(la, "/")) attr(x, "allele.names") <- aa goCur <- attr(x, "genotypeOrder") goAll <- expectedGenotypes(alleles=aa, haplotype=TRUE) attr(x, "genotypeOrder") <- c(goCur, goAll[!(goAll %in% goCur)]) class(x) <- cx x } "[<-.haplotype" <- function(x, i, value) { if(!is.haplotype(value)) stop("Assigned value must be of class haplotype.") NextMethod("[<-") } nallele <- function(x) length(allele.names(x)) genotypeOrder <- function(x) attr(x, "genotypeOrder") "genotypeOrder<-" <- function(x, value) { if(!is.genotype(x)) stop("'x' must be of a genotype class") alleles <- allele.names(x) goAll <- expectedGenotypes(alleles=alleles, haplotype=TRUE) goDef <- unique(sort(as.character(x))) if(is.null(value)) { attr(x, "genotypeOrder") <- goAll } else { value <- unique(value) ## Stop msg says all parts <- strsplit(x=value, split="/") parts <- sapply(parts, c) test <- !(parts %in% alleles) if(any(test)) stop("adding genotype names with alleles that are not in the data") ## Any genotypes in the data that are not in value? test <- !(goDef %in% value) if(any(test)) { ## These values are in all possible genotypes/haplotypes testDefinAll <- goDef[test] %in% goAll ## but not in value testDefinAllnotVa <- !(goDef[testDefinAll] %in% value) goPos <- goDef[testDefinAllnotVa] ## We could simply add goPos to value now. However, A/B in goPos ## should also match B/A, since genotype() allows reordering of ## original data and additionally we want this to work also for ## haplotype. ## Extend value first. We do not do this before, since one ## might not necessarily like to have B/A together with A/B in ## first place. value <- genetics:::.genotype2Haplotype(x=value) ## Remove heterozygos matches in goPos test <- !(goPos %in% value) goPos <- goPos[test] ## Add goPos to the end of value if(any(test)) value <- c(value, goPos) ## If there are still some values in all, but not in value ## now, we just add them at the end testGOnotAll <- !(goAll %in% value) if(any(testGOnotAll)) value <- c(value, goAll[testGOnotAll]) } else { value <- genetics:::.genotype2Haplotype(x=value) } attr(x, "genotypeOrder") <- value } x } .genotype2Haplotype <- function(x) { ## Internal function ## ## Returns a character vector of possible haplotypes for given genotypes ## in such a way that for say c("A/A", "A/B", "B/B") you get c("A/A", ## "A/B", "B/A", "B/B") i.e. "B/A" comes directly after "A/B"! ## ## x - character, vector of genotype values in form allele1/allele2 ## ## Details ## Unique values of x are taken i.e. first occurrence prevails ## ## Example ## genetics:::.genotype2Haplotype(c("A/A", "A/B", "B/B")) ## "A/A" "A/B" "B/A" "B/B" ## genetics:::.genotype2Haplotype(c("B/B", "A/B", "A/A")) ## "B/B" "A/B" "B/A" "A/A" x <- unique(x) N <- length(x) parts <- genetics:::.genotype2Allele(x=x) parts <- rbind(parts, parts[, 2:1]) ind <- rep(1:N, each=2) + c(0, N) parts <- parts[ind, ] parts <- unique(paste(parts[, 1], parts[, 2], sep="/")) parts } .genotype2Allele <- function(x) { ## Internal function ## ## Returns a matrix of alleles from a character vector of genotype names ## ## x - character, vector of genotype values in form allele1/allele2 ## ## Details: ## Coercing to character is done for x. ## ## Example ## genetics:::.genotype2Allele(c("A/A", "A/B", "B/B")) ## [,1] [,2] ## [1,] "A" "A" ## [2,] "A" "B" ## [3,] "B" "B" parts <- strsplit(x=as.character(x), split="/") parts <- t(sapply(parts, c)) parts } is.qna <- function(x) {is.na(x) | x=="NA"} allele.pairs <- function(names, postfixes = c(".1", ".2")) { if (!is.character(names)) stop("illegal names argument") if (!is.character(postfixes) || length(postfixes)!=2) stop("illegal postfixes argument") N <- length(names) s1 <- sub(paste(postfixes[1],"$",sep=""), "", names) s2 <- sub(paste(postfixes[2],"$",sep=""), "", names) res <- list() nres <- NULL for (i in 1:N) { if (s1[i]!=names[i]) { for (j in 1:N) { if (j!=i && s2[i]!=names[j] && s2[j]==s1[i]) { new <- list(c(names[i], names[j])) names(new) <- s1[i] res <- c(res, new) } } } } res } # This is very basic at present and only loads marker position "annotation<-" <- function(data, value, reorder=FALSE, ...) { if (!(is.data.frame(data) && is.data.frame(value))) stop("Arguments must be dataframes") varnames <- names(data) nv <- length(varnames) n.an <- nrow(value) gtypes <- NULL positions <- NULL others <- NULL for (i in 1:nv) { var <- data[[i]] if (is.genotype(var)) { lname <- varnames[i] m <- (value$name == lname) sm <- sum(m) if (sm==0) warning(paste("No annotation found for", lname)) else { if (sm>1) { warning(paste("Duplicate annotations found for", lname, "- using first")) } line <- min((1:n.an)[m]) type <- value$type[line] if (is.null(type)) type <- "SNP" bps <- value$bp.start[line] bpe <- value$bp.end[line] lg <- "" mark <- marker(lname, type, lg, bps, bpe) attr(data[[i]], "locus") <- mark gtypes <- c(gtypes, i) positions <- c(positions, bps) } } else others <- c(others, i) } if (reorder) { reord <- c(others, gtypes[order(positions)]) out <- vector("list", nv) for (i in 1:nv) { out[[i]] <- data[[reord[i]]] } names(out) <- varnames[reord] return(as.data.frame(out)) } else { return(data) } } # Contrast matrix for additive, dominance, and parent-of-origin, # genotype, and haplotype effects genetic.contrasts <- function(g, mode="additive", base) { if (!is.genotype(g)) { stop("Argument must be of class genotype") } if (!is.character(mode)) stop("Illegal contrast specification") modes <- c("genotype", "haplotype", "additive", "dominance", "additive.origin", "dominance.origin", "parent.of.origin") # For backward compatibility mode <- match(mode, modes) if (is.na(mode)) stop("Illegal contrast specification") if (mode>4 && !is.haplotype(g)) stop("Parent-of-origin contrasts only legal for haplotype variables") if (mode==2 && !is.haplotype(g)) stop("Haplotype contrasts only legal for haplotype variables") if (mode==7) mode <- 6 alleles <- attr(g, "allele.names") na <- length(alleles) nd <- (na*(na-1))/2 cnames <- character(nd) levs <- attr(g,"levels") if (length(levs)<2) { stop("Genotype is not polymorphic") } if (mode > 2) { if (missing(base)) { af <- allele.table(g) base <- order(alleles)[match(max(af), af)] } if (is.numeric(base)) { if (base<0 || base>na) { stop("base out of range") } } else if (is.character(base)) { bi <- match(base, alleles) if (is.na(bi)) stop("Base allele not found") base <- bi } else { stop("Illegal base argument") } if (base!=0) alladd <- alleles[-base] else alladd <- alleles add <- matrix(nrow=length(levs), ncol=length(alladd), dimnames=list(levs, alladd)) storage.mode(add) <- "double" map <- attr(g, "allele.map") for (all in alladd) add[, all] <- ((map[,1]==all) + (map[,2]==all)) prefix <- ":a" colnames(add) <- paste(prefix, alladd, sep=":") cmat <- add if (mode==4 || mode==6){ dom <- matrix(nrow = length(levs), ncol=nd) storage.mode(dom) <- "double" prefix <- ":d" ij <- 0 for (i in 1:(na-1)) { ai <- alleles[i] for (j in (i+1):na) { aj <- alleles[j] ij <- ij+1 cnames[ij] <- paste(prefix, ai, aj, sep=":") dom[,ij] <- ((map[,1]==ai & map[,2]==aj)|(map[,1]==aj & map[,2]==ai)) } } rownames(dom) <- levs colnames(dom) <- cnames cmat <- cbind(cmat, dom) } if (mode==5 || mode==6) { poo <- matrix(nrow = length(levs), ncol=nd) storage.mode(poo) <- "double" prefix <- ":p" ij <- 0 for (i in 1:(na-1)) { ai <- alleles[i] for (j in (i+1):na) { aj <- alleles[j] ij <- ij+1 cnames[ij] <- paste(prefix, ai, aj, sep=":") poo[,ij] <- ((map[,1]==ai & map[,2]==aj) - (map[,1]==aj & map[,2]==ai))/2 } } rownames(poo) <- levs colnames(poo) <- cnames cmat <- cbind(cmat, poo) } } else { if (missing(base)) { gf <- table(g) base <- order(levs)[match(max(gf), gf)] } if (is.character(base)) base <- match(base, levels(g)) if (!is.haplotype(g) || mode==2) { cmat <- contr.treatment(levels(g), base=base) } else { nl <- length(levs) ng <- (na*(na+1))/2 cmat <- matrix(nrow=nl, ncol=ng) cnames <- character(ng) am <- attr(g, "allele.map") ig <- 0 for (i in 1:na) { for (j in 1:i) { ig <- ig+1 cnames[ig] <- paste(alleles[j], alleles[i], sep="/") cmat[,ig] <- (am[,1]==alleles[i] & am[,2]==alleles[j]) | (am[,2]==alleles[i] & am[,1]==alleles[j]) } } keep <- !cmat[base,] cmat <- matrix(as.numeric(cmat[,keep]), nrow=nl) dimnames(cmat) <- list(levs, cnames[keep]) } } attr(cmat, "mode") = modes[mode] cmat } # Functions to use in model formula additive <- function(g, base) { attr(g, "contrasts") <- genetic.contrasts(g, "additive", base) g } dominance <- function(g, base) { attr(g, "contrasts") <- genetic.contrasts(g, "dominance", base) g } additive.origin <- function(g, base) { attr(g, "contrasts") <- genetic.contrasts(g, "additive.origin", base) g } dominance.origin <- function(g, base) { attr(g, "contrasts") <- genetic.contrasts(g, "dominance.origin", base) g } porigin <- function(g, base) { attr(g, "contrasts") <- genetic.contrasts(g, "parent.of.origin", base) g } # Interrogate genotype contrasts gcontrasts <- function(g) { contrasts(g) } # Set mode of a genotype "gcontrasts<-" <- function(g, base, value) { if (!is.genotype(g)) stop("argument must be a genotype") modes <- c("genotype", "haplotype", "additive", "dominance", "additive.origin", "dominance.origin", "parent.of.origin") # For backward compatibility if (is.character(value)) { if (is.na(match(value, modes))) stop("Unrecognized contrast spec") cntr <- genetic.contrasts(g, mode=value, base=base) } else if (is.matrix(value)) { if (any(sort(levels(g))!= sort(rownames(value)))) stop("Row names of contrast matrix don't match genotypes/haplotypes") cntr <- value } else stop("Illegal contrast spec") how.many <- ncol(cntr) contrasts(g, how.many) <- cntr g } # And of all genotypes in a dataframe "default.mode<-" <- function(data, value) { if (!is.data.frame(data)) stop("argument must be a dataframe") if (!is.character(value)) stop("Illegal mode") m <- match(value, c("additive", "dominance", "parent.origin", "genotype", "haplotype")) if (is.na(m)) stop("Illegal mode") nv <- length(data) for (i in 1:nv) { if (is.genotype(data[[i]])) { if (!is.haplotype(data[[i]])) { if (m==3) m <- 2 if (m==5) m <- 4 } gcontrasts(data[[i]]) <- c("additive", "dominance", "parent.origin", "genotype", "haplotype")[m] } } data } # Interrogate contrasts for mode inheritance.mode <- function(g) { cntr <- attr(g, "contrasts") if (is.null(cntr)) NULL else { pre <- substr(colnames(cntr), 1, 3) comp <- match(pre, c(":a:", ":d:", ":p:")) if (any(is.na(comp))) NULL else c("additive", "dominance", "parent.of.origin")[max(comp)] } } # Merge minor alleles if MAF less than a threshold. # Can be applied to a single genotype (haplotype) variable or a dataframe # Variable is dropped if merging can't solve the problem mamerge <- function(object, maf=0.05, others="*") { df <- is.data.frame(object) if (!df) { if (is.genotype(object)) object <- list(genotype=object) else stop("argument is not a dataframe or a genotype") } if (!is.character(others) || nchar(others)!=1) stop("others argument must be a single character") obj.names <- names(object) nobj <- length(object) status <- rep(0, nobj) for (i in 1:nobj) { ob <- object[[i]] if (is.genotype(ob)) { af <- apply(allele.count(ob), 2, mean, na.rm=TRUE)/2 if (min(af)2) { asc <- (1:na)[order(af)] one <- asc[1] two <- asc[2] name.one <- an[one] name.two <- an[two] af[two] <- af[two]+af[one] if (substr(name.one, 1, 1)=="others") an[two] <- name.one else if(substr(name.two, 1, 1)!="others") { an[two] <- lab lab <- paste(lab, others, sep="") } af <- af[-one] an <- an[-one] map <- ifelse(map==name.one | map==name.two, others, map) na <- na - 1 } if (min(af) 1) { count <- matrix(nrow=N, ncol=ng) for (i in 1:ng) { ig <- gts[i] gi <- data[[ig]] count[,i] <- allele.count(gi, allele.names(gi)[1]) } for (i in 1:(ng-1)) { ci <- count[,i] for (j in (i+1):ng) { cj <- count[,j] identical <- all(ci==cj, na.rm=TRUE) reversed <- all((ci+cj)==2, na.rm=TRUE) if (identical || reversed) { change <- TRUE if (amalgamate) { status[i] <- -1 # delete if (any((is.na(ci)&!is.na(cj))|(is.na(cj)&!is.na(ci)))) { status[j] <- 1 # to be changed if (reversed) ci <- 2-ci cj <- ifelse(is.na(cj), ci, cj) count[,j] <- cj } warning(paste(gnames[i], "is genetically identical to", gnames[j], "and has been omitted")) } else { if (sum(is.na(cj))>sum(is.na(ci))){ c[,j] <- ci status[j] <- -1 # delete warning(paste(gnames[j], "is genetically identical to", gnames[i], "and has been omitted")) gnames[j] <- gnames[i] } else { status[i] <- -1 # delete warning(paste(gnames[i], "is genetically identical to", gnames[j], "and has been omitted")) } } break } } } } if (change) { for (i in 1:ng){ if (status[i]==1) { ci <- count[,i] att <- attributes(data[[gts[i]]]) anames <- att$allele.names contr <- att$contrasts wopt <- options()$warn options(warn=-1) new <- as.genotype.allele.count(ci, anames) if (!is.null(contr)) attr(new, "contrasts") <- contr options(warn=wopt) data[[gts[i]]] <- new } } for (i in ng:1) { if (status[i]==-1) { data[[gts[i]]] <- NULL } } } data } # Minor allele frequency, data frame or single genotype # When applied to a data frame, returns values only for genotype variables # if simplify=T; otherwise returns NA's for non-genotype variables MAF <- function(g, simplify=TRUE) { if (is.data.frame(g)) { nv <- length(g) gt <- logical(nv) mf <- as.numeric(rep(NA, nv)) names(mf) <- names(g) for (i in 1:nv) { gi <- g[[i]] if (is.genotype(gi)) { gt[i] <- TRUE fr <- table(allele(gi)) mf[i] <- min(fr)/sum(fr) } else gt[i] <- FALSE } if (simplify) mf[gt] else mf } else if (is.genotype(g)) { fr <- table(allele(g)) min(fr)/sum(fr) } } # Partial correlations (with sign reversed) partial.LD <- function(object) { if (class(object)[1]=="LD.data.frame") R <- object$r else if (is.data.frame(object)) R <- LD(object)$r else stop("Argument must be a dataframe or an LD dataframe") dn <- dimnames(R) p <- nrow(R) R <- ifelse(is.na(R), t(R), R) diag(R) <- 1 if (any(is.na(R))) stop("NAs in correlation matrix") Ri <- solve(R) if (any(diag(Ri)<=0)) stop("Matrix not positive semidefinite") D <- diag(1/sqrt(diag(Ri))) Ri <- D %*% Ri %*% D dimnames(Ri) <- dn Ri } # # Identify trios of affected offspring and both parents # trios <- function(pedigree, id, id.father, id.mother, sex, affected, codes=2, first=FALSE, data = sys.frame(sys.parent())) { cl <- match.call() if (missing(pedigree)) pedigree <- eval(as.symbol("pedigree"), data) else pedigree <- eval(cl$pedigree, data) N <- length(pedigree) if (missing(id)) id <- eval(as.symbol("id"), data) else id <- eval(cl$id, data) if (missing(id.father)) id.father <- eval(as.symbol("id.father"), data) else id.father <- eval(cl$id.father, data) if (missing(id.mother)) id.mother <- eval(as.symbol("id.mother"), data) else id.mother <- eval(cl$id.mother, data) if (missing(sex)) sex <- eval(as.symbol("sex"), data) else sex <- eval(cl$sex, data) if (missing(affected)) affected <- eval(as.symbol("affected"), data) else affected <- eval(cl$affected, data) if (N==0 || length(id)==0 || length(id.father)==0 || length(id.mother)==0 || length(sex)==0 || length(affected)==0) stop("missing or inconsistent arguments") index <- (1:N) if (is.factor(affected)) { if (is.numeric(codes)) is.affected <- !is.na(match(as.numeric(affected), codes)) else is.affected <- !is.na(match(affected, codes)) } else is.affected <- !is.na(match(affected, codes)) use <- index[is.affected & !is.na(id.father) & !is.na(id.mother)] if (first) { nuc <- paste(pedigree,id.mother,id.father,sep=":")[use] fst <- match(unique(nuc), nuc) use <- use[fst] } lid <- paste(pedigree, id, sep=":") ma <- match(paste(pedigree, id.mother, sep=":")[use], lid) pa <- match(paste(pedigree, id.father, sep=":")[use], lid) comp <- !(is.na(id.mother[use]) | is.na(ma) | is.na(id.father[use]) | is.na(pa)) if (any(!comp)) { use <- use[comp] ma <- ma[comp] pa <- pa[comp] } wrong <- sum(as.numeric(sex[ma])==1, na.rm=TRUE) + sum(as.numeric(sex[pa])==2, na.rm=TRUE) if (wrong>0) warning(paste("sex of mothers and/or fathers incorrectly coded", wrong, "times")) data.frame(case=use, father=pa, mother=ma, pedigree=pedigree[use], id=id[use], id.father=id.father[use], id.mother=id.mother[use]) } # Resolve a single genotype for trios into transmitted and untransmitted # haplotypes # If one.parent true, allows partial resolution when one parent is missing # If one.parent.all true, only resolves these if resolution possible for # either transmission from other parent # If allow.ambiguous, resolutions which are ambiguous for parent of origin # are allowed phase.resolve <- function(offspring, mother, father, allow.ambiguous=FALSE, one.parent=FALSE, one.parent.all=FALSE, as.allele.pair=FALSE) { if(!(is.genotype(offspring) && is.genotype(mother) && is.genotype(father))) stop("Arguments must be genotype variables") alleles <- allele.names(offspring) a.o <- allele(offspring) a.m <- allele(mother) a.f <- allele(father) full <- !(is.na(offspring) | is.na(mother) | is.na(father)) asis <- is.na(offspring) | ((is.na(mother) | a.o[,1]==a.m[,1] | a.o[,1]==a.m[,2]) & (is.na(father) | a.o[,2]==a.f[,1] | a.o[,2]==a.f[,2])) swap <- is.na(offspring) | ((is.na(father) | a.o[,1]==a.f[,1] | a.o[,1]==a.f[,2]) & (is.na(mother) | a.o[,2]==a.m[,1] | a.o[,2]==a.m[,2])) asis <- ifelse(is.na(asis), TRUE, asis) swap <- ifelse(is.na(swap), TRUE, swap) misinherits <- (!asis) & (!swap) if (any(misinherits)) warning(sum(misinherits), " misinheritances") homo <- homozygote(offspring) ambiguous <- asis & swap & !homo mat.t <- ifelse(swap, a.o[,2], a.o[,1]) pat.t <- ifelse(swap, a.o[,1], a.o[,2]) mat.u <- ifelse(mat.t==a.m[,1], a.m[,2], a.m[,1]) pat.u <- ifelse(pat.t==a.f[,1], a.f[,2], a.f[,1]) resolved <- full & (allow.ambiguous | !ambiguous) # Partial resolution if only one parent typed # (only carried out if possible for either transmission) if (one.parent) { m.only <- is.na(father) & !(is.na(offspring) | is.na(mother)) f.only <- is.na(mother) & !(is.na(offspring) | is.na(father)) if (one.parent.all) resolved <- resolved | ((m.only | f.only) & !ambiguous) else resolved <- resolved | ((m.only | f.only) & !ambiguous & !homo) } resolved <- resolved & !misinherits & (allow.ambiguous | !ambiguous) mat.t[!resolved] <- NA pat.t[!resolved] <- NA mat.u[!resolved] <- NA pat.u[!resolved] <- NA mlist <- (1:length(misinherits))[misinherits] if (as.allele.pair) { tr <- cbind(mat.t, pat.t) un <- cbind(mat.u, pat.u) if (allow.ambiguous) { colnames(tr) <- NULL colnames(un) <- NULL list(trans=tr, untrans=un, misinherits=mlist, ambiguous=ambiguous) } else { colnames(tr) <- c("Maternal", "Paternal") colnames(un) <- c("Maternal", "Paternal") list(trans=tr, untrans=un, misinherits=mlist) } } else { if (allow.ambiguous) list( trans=haplotype(mat.t, pat.t, allow.partial.missing=one.parent), untrans=haplotype(mat.u, pat.u, allow.partial.missing=one.parent), misinherits= mlist, ambiguous=ambiguous ) else list( trans=haplotype(mat.t, pat.t, allow.partial.missing=one.parent), untrans=haplotype(mat.u, pat.u, allow.partial.missing=one.parent), misinherits= mlist ) } } tdt <- function(genotype, data = sys.frame(sys.parent()), pedigree, id, id.father, id.mother, sex, affected, codes = 2, parent="both", robust=FALSE, first=FALSE) { if (is.na(match(parent, c("both", "mother", "father")))) stop("Unrecognized parent= argument") if (missing(genotype)) stop("Missing genotype argument with no default") cl <- match.call() g <- eval(cl$genotype, data) gname <- deparse(cl$genotype) if (missing(pedigree)) ped <- evalq(pedigree, data) else ped <- eval(cl$pedigree, data) anames <- allele.names(g) nall <- length(anames) tr <- trios(pedigree, id, id.father, id.mother, sex, affected, codes=codes, data=data, first=first) g.cs <- g[tr$case] g.fr <- g[tr$father] g.mr <- g[tr$mother] ped.cs <- ped[tr$case] res <- phase.resolve(g.cs, g.mr, g.fr, as.allele.pair = TRUE, allow.ambiguous=(parent=="both"), one.parent=TRUE) if (parent=="both") { tr <- rbind(res$trans[,1], res$trans[,2]) un <- rbind(res$untrans[,1], res$untrans[,2]) pe <- rbind(ped.cs, ped.cs) } else if (parent=="mother") { tr <- res$trans[,1] un <- res$untrans[,1] pe <- ped.cs } else if (parent=="father") { tr <- res$trans[,2] un <- res$untrans[,2] pe <- ped.cs } af <- table(un, dnn="Allele frequency") af <- af/sum(af) infmtv <- !is.na(tr) & !is.na(un) & (tr!=un) ninf <- sum(infmtv) tr <- tr[infmtv] un <- un[infmtv] pe <- pe[infmtv] trtab <- table(c(tr, un), rep(c(0,1), c(ninf, ninf))) anames.old <- anames anames <- rownames(trtab) anames.omitted <- setdiff(anames.old, anames) if (length(anames.omitted)>0) { warning("No informative transmissions for alleles: ", paste(anames.omitted, collapse=",")) } nall <- length(anames) dimnames(trtab) <- list(anames, c("Transmitted", "Untransmitted")) Ia <- diag(nall) rownames(Ia) <- anames U <- (Ia[tr,] - Ia[un,]) if (robust) { peds <- unique(pe) if (length(peds)1) { cat("\nGlobal chi-squared test =", x$statistic, "on", x$parameter, "df. Asymptotic P-value =", format.pval(x$p.value[1], digits=3),"\n") } } # long.phase.resolve resolves phase of transmitted and untransmitted # haplotypes # falk.rubinstein simply computes (multilocus) transmitted and untransmitted # genotypes # # offspring, mother, father are lists of genotype variables long.phase.resolve <- function(offspring, mother, father, parent.of.origin=FALSE, as.allele.pair=FALSE) { if (!(is.list(offspring) && is.list(mother) && is.list(father))) stop("Arguments should be lists of genotypes") nloci <- length(offspring) if (!(length(mother)==nloci && length(father)==nloci)) stop("Arguments should be lists of same length") tr <- vector("list", nloci) un <- vector("list", nloci) locnames <- names(offspring) names(tr) <- locnames names(un) <- locnames ntr <- length(offspring[[1]]) resolved <- rep(TRUE, ntr) if (!parent.of.origin) { n.ambig<- rep(0, ntr) n.het.tr <- rep(0, ntr) n.het.un <- rep(0, ntr) } for (i in 1:nloci) { phi <- phase.resolve(offspring[[i]], mother[[i]], father[[i]], allow.ambiguous = !parent.of.origin, as.allele.pair = as.allele.pair) tr[[i]] <- phi$trans un[[i]] <- phi$untrans if (as.allele.pair) { resolved <- resolved & !is.na(phi$trans[,1]) & !is.na(phi$untrans[,1]) & !is.na(phi$trans[,2]) & !is.na(phi$untrans[,2]) if (!parent.of.origin) { resolved <- resolved & !phi$ambiguous n.ambig <- n.ambig + phi$ambiguous n.het.tr <- n.het.tr + (phi$trans[,1]!= phi$trans[,2]) n.het.un <- n.het.un + (phi$untrans[,1]!= phi$untrans[,2]) } } else { resolved <- resolved & !is.na(phi$trans) & !is.na(phi$untrans) if (!parent.of.origin) { resolved <- resolved & !phi$ambiguous n.ambig <- n.ambig + phi$ambiguous n.het.tr <- n.het.tr + heterozygote(phi$trans) n.het.un <- n.het.un + heterozygote(phi$untrans) } } } if (!parent.of.origin) resolved <- resolved | n.ambig==0 | (n.ambig==1 & !(n.het.tr>1 | n.het.un>1)) resolved[is.na(resolved)] <- FALSE for (i in 1:nloci) { if (as.allele.pair) { tr[[i]][!resolved,] <- NA un[[i]][!resolved,] <- NA } else { tr[[i]][!resolved] <- as.haplotype(NA) un[[i]][!resolved] <- as.haplotype(NA) } } list(trans=tr, untrans=un, resolved=resolved) } falk.rubinstein <- function(offspring, mother, father, as.allele.pair=FALSE, allow.partial.missing=TRUE) { if (!(is.list(offspring) && is.list(mother) && is.list(father))) stop("Arguments should be lists of genotypes") nloci <- length(offspring) if (!(length(mother)==nloci && length(father)==nloci)) stop("Arguments should be lists of same length") tr <- vector("list", nloci) un <- vector("list", nloci) locnames <- names(offspring) names(tr) <- locnames names(un) <- locnames ntr <- length(offspring[[1]]) misinherits <- rep(FALSE, ntr) any.missing <- FALSE for (i in 1:nloci) { a.m <- allele(mother[[i]]) a.f <- allele(father[[i]]) a.o <- allele(offspring[[i]]) asis <- is.na(offspring[[i]]) | ((is.na(mother[[i]]) | a.o[,1]==a.m[,1] | a.o[,1]==a.m[,2]) & (is.na(father[[i]]) | a.o[,2]==a.f[,1] | a.o[,2]==a.f[,2])) swap <- is.na(offspring[[i]]) | ((is.na(father[[i]]) | a.o[,1]==a.f[,1] | a.o[,1]==a.f[,2]) & (is.na(mother[[i]]) | a.o[,2]==a.m[,1] | a.o[,2]==a.m[,2])) asis <- ifelse(is.na(asis), TRUE, asis) swap <- ifelse(is.na(swap), TRUE, swap) cant <- (!asis) & (!swap) misinherits <- misinherits | cant uni <- cbind(ifelse(asis, ifelse(a.o[,1]==a.m[,1], a.m[,2], a.m[,1]), ifelse(a.o[,1]==a.f[,1], a.f[,2], a.f[,1])), ifelse(asis, ifelse(a.o[,2]==a.f[,1], a.f[,2], a.f[,1]), ifelse(a.o[,2]==a.m[,1], a.m[,2], a.m[,1])) ) any.missing <- any.missing | is.na(uni[,1]) | is.na(uni[,2]) | is.na(a.o[,1]) | is.na(a.o[,2]) un[[i]] <- uni tr[[i]] <- a.o } if (any(misinherits)) warning(sum(misinherits), " misinheritances") if (!allow.partial.missing) misinherits <- misinherits | any.missing if (any(misinherits)) { for (i in 1:nloci) { un[[i]][misinherits,1] <- NA un[[i]][misinherits,2] <- NA tr[[i]][misinherits,1] <- NA tr[[i]][misinherits,2] <- NA } } if (!as.allele.pair) { for (i in 1:nloci) { un[[i]] <- genotype(un[[i]], allow.partial.missing=allow.partial.missing) tr[[i]] <- genotype(tr[[i]], allow.partial.missing=allow.partial.missing) } } list(trans=tr, untrans=un) } # Bind multiple haplotype variables together hbind <- function(..., sep=":", as.allele.pair=FALSE) { args <- list(...) if (length(args)==1 && is.list(args[[1]])) args <- args[[1]] nl <- length(args) if (nl<=1) stop("Too few arguments or list elements") first <- args[[1]] if (is.haplotype(first)) { as.allele.pair.in <- FALSE hap <- allele(first) } else if (is.character(first) && is.matrix(first) && ncol(first)==2) { as.allele.pair.in <- TRUE hap <- first } else stop("Illegal haplotype argument or list element") for (i in 2:nl) { ht <- args[[i]] if (as.allele.pair.in) { if (!(is.character(ht) && is.matrix(ht) && ncol(ht)==2)) stop("Illegal haplotype argument or list element") } else { if (!is.haplotype(ht)) stop("Illegal haplotype argument or list element") ht <- allele(ht) } hap[,1] <- ifelse(is.na(hap[,1])|is.na(ht[,1]), NA, paste(hap[,1], ht[,1], sep=sep)) hap[,2] <- ifelse(is.na(hap[,2])|is.na(ht[,2]), NA, paste(hap[,2], ht[,2], sep=sep)) } if (as.allele.pair) hap else as.haplotype(hap) } # Make dataframe of cases and matched pseudo-controls pseudocc <- function(..., data = sys.frame(sys.parent()), pedigree, id, id.father, id.mother, sex, affected, codes = 2, phase=TRUE, first=FALSE, parent.of.origin=exchangeable, exchangeable = FALSE, allow.partial.missing=FALSE) { if (exchangeable && !parent.of.origin) stop("Incompatible exchangeable and parent.of.origin options") if (parent.of.origin && !phase) stop("Incompatible phase and parent.of.origin options") if (allow.partial.missing && phase) stop("Incompatible allow.partial.missing and phase options") cl <- match.call() arg.names <- names(cl) narg <- length(arg.names) ng <- sum(arg.names=="")-1 gnames <- character(ng) gts <- vector("list", ng) for (i in 2:narg) { if (arg.names[i]=="") { gnames[i-1] <- deparse(cl[[i]]) gtsi<- eval(cl[[i]], data) if (!is.genotype(gtsi)) stop(gnames[i-1], " is not a genotype variable") gts[[i-1]] <- gtsi } } names(gts) <- gnames if (missing(pedigree)) pedigree <- evalq(pedigree, data) else pedigree <- eval(cl$pedigree, data) if (missing(id)) id <- evalq(id, data) else id <- eval(cl$id, data) if (missing(id.father)) id.father <- evalq(id.father, data) else id.father <- eval(cl$id.father, data) if (missing(id.mother)) id.mother <- evalq(id.mother, data) else id.mother <- eval(cl$id.mother, data) tr <- trios(pedigree, id, id.father, id.mother, sex, affected, codes, data=data, first=first) pedigree <- pedigree[tr$case] id <- id[tr$case] id.father <- id.father[tr$case] id.mother <- id.mother[tr$case] father <- mother <- offspring <- vector("list", ng) for (i in 1:ng) { offspring[[i]] <- gts[[i]][tr$case] father[[i]] <- gts[[i]][tr$father] mother[[i]] <- gts[[i]][tr$mother] } if (phase && ng>1) { extra <- 7 out <- vector("list", 3*ng+7) hapname <- paste(gnames, sep="", collapse=":") if (nchar(hapname)>10) hapname <- "haplotype" names(out) <- c("set", "cc", "pedigree", "id", "id.father", "id.mother", hapname, gnames, paste(gnames,"mother", sep=":"), paste(gnames, "father", sep=":")) } else { extra <- 6 out <- vector("list", 3*ng+6) names(out) <- c("set", "cc", "pedigree", "id", "id.father", "id.mother", gnames, paste(gnames,"mother", sep=":"), paste(gnames, "father", sep=":")) } if (phase) { res <- long.phase.resolve(offspring, mother, father, parent.of.origin=parent.of.origin, as.allele.pair=TRUE) case <- res$trans untrans <- res$untrans res.case <- res$resolved off <- vector("list", ng) for (i in 1:ng) off[[i]] <- genotype(untrans[[i]][,1], case[[i]][,2]) res <- long.phase.resolve(off, mother, father, parent.of.origin=parent.of.origin, as.allele.pair=TRUE) control1 <- res$trans res.control1 <- res$resolved for (i in 1:ng) off[[i]] <- genotype(case[[i]][,1], untrans[[i]][,2]) res <- long.phase.resolve(off, mother, father, parent.of.origin=parent.of.origin, as.allele.pair=TRUE) control2 <- res$trans res.control2 <- res$resolved for (i in 1:ng) off[[i]] <- genotype(untrans[[i]][,1], untrans[[i]][,2]) res <- long.phase.resolve(off, mother, father, parent.of.origin=parent.of.origin, as.allele.pair=TRUE) control3 <- res$trans res.control3 <- res$resolved res.case <- res.case & (res.control1 | res.control2 | res.control3) set <- 1:length(res.case) n.case <- sum(res.case) n.control <- sum(res.control1) + sum(res.control2) + sum(res.control3) set <- c(set[res.case], set[res.control1], set[res.control2], set[res.control3]) cc <- c(rep(1, n.case), rep(0, n.control)) ped <- c(pedigree[res.case], pedigree[res.control1], pedigree[res.control2], pedigree[res.control3]) id <- c(id[res.case], id[res.control1], id[res.control2], id[res.control3]) fid <- c(id.father[res.case], id.father[res.control1], id.father[res.control2], id.father[res.control3]) mid <- c(id.mother[res.case], id.mother[res.control1], id.mother[res.control2], id.mother[res.control3]) if (exchangeable) { set1 <- set set <- rep(set, 2) cc <- c(cc, rep(0, length(cc))) ped <- rep(ped, 2) id <- rep(id, 2) fid <- c(fid, mid) mid <- c(mid, fid) } setord <- order(set) out$set <- set[setord] out$cc <- cc[setord] out$pedigree <- ped[setord] out$id <- id[setord] out$id.father <- fid[setord] out$id.mother <- mid[setord] for (i in 1:ng) { hapi <- rbind(case[[i]][res.case,], control1[[i]][res.control1,], control2[[i]][res.control2,], control3[[i]][res.control3,]) if (exchangeable) hapi <- rbind(hapi, cbind(hapi[,2], hapi[,1])) if (ng==1 && !parent.of.origin) out[[i+extra]] <- genotype(hapi[setord,]) else out[[i+extra]] <- haplotype(hapi[setord,]) } if (phase && ng>1) { if (parent.of.origin) out[[extra]] <- hbind(out[extra +(1:ng)]) else out[[extra]] <- as.genotype(hbind(out[extra +(1:ng)])) } for (i in 1:ng) { if (exchangeable) pg <- genotype(rbind(allele(mother[[i]][set1]), allele(father[[i]][set1]))) else pg <- mother[[i]][set] out[[i+ng+extra]] <- pg[setord] } for (i in 1:ng) { if (exchangeable) pg <- genotype(rbind(allele(father[[i]][set1]), allele(mother[[i]][set1]))) else pg <- father[[i]][set] out[[i+2*ng+extra]] <- pg[setord] } } else { res <- falk.rubinstein(offspring, mother, father, as.allele.pair=TRUE, allow.partial.missing=allow.partial.missing) n.case <- nrow(res$trans[[1]]) set <- rep(1:n.case, rep(2, n.case)) out$set <- set out$cc <- rep(c(1,0), n.case) out$pedigree <- pedigree[set] out$id <- id[set] out$id.father <- id.father[set] out$id.mother <- id.mother[set] setord <- order(rep(1:n.case,2)) for (i in 1:ng) out[[i+extra]] <- genotype( rbind(res$trans[[i]], res$untrans[[i]])[setord,], allow.partial.missing = allow.partial.missing) for (i in 1:ng) out[[i+ng+extra]] <- mother[[i]][set] for (i in 1:ng) out[[i+2*ng+extra]] <- father[[i]][set] } as.data.frame(out) } # Trio types for parent-of-origin analyses trio.types <- function(genotype, data = sys.frame(sys.parent()), pedigree, id, id.father, id.mother, sex, affected, codes = 2, all=FALSE, first=FALSE, parent.of.origin=FALSE) { if (missing(genotype)) stop("Missing genotype argument with no default") cl <- match.call() g <- eval(cl$genotype, data) gname <- deparse(cl$genotype) tr <- trios(pedigree, id, id.father, id.mother, sex, affected, codes=codes, data=data, first=first) g.cs <- g[tr$case] g.fr <- g[tr$father] g.mr <- g[tr$mother] df <- as.data.frame(table(g.cs, g.mr, g.fr)) df.swap <- as.data.frame(table(g.cs, g.fr, g.mr)) for (i in 1:3) df[[i]] <- as.genotype(df[[i]]) wl <- options()$warn options(warn=-1) pr <- phase.resolve(df[[1]], df[[2]], df[[3]], allow.ambiguous=TRUE) options(warn=wl) mendelian <- rep(TRUE, length(df[[1]])) mendelian[pr$misinherits] <- FALSE if (!all & any(!mendelian & (df[[4]]!=0))) warning("Non-Mendelian inheritances were observed") i.cs <- as.numeric(df[[1]]) i.fr <- as.numeric(df[[2]]) i.mr <- as.numeric(df[[3]]) i.p1 <- ifelse(i.fr0) { df1 <- data.frame(x=all[,1], rst) names(df1) <- fn df2 <- data.frame(x=all[,2], rst) names(df2) <- fn table(rbind(df1, df2)) } else table(c(all[,1], all[,2])) } origin <- function(genotype, data = sys.frame(sys.parent()), pedigree, id, id.father, id.mother, sex, affected, codes = 2, first=TRUE, maternal.effect=FALSE, base=1) { cl <- match.call() g <- eval(cl$genotype, data) if (missing(pedigree)) pedigree <- eval(as.symbol("pedigree"), data) else pedigree <- eval(cl$pedigree, data) N <- length(pedigree) if (missing(id)) id <- eval(as.symbol("id"), data) else id <- eval(cl$id, data) if (missing(id.father)) id.father <- eval(as.symbol("id.father"), data) else id.father <- eval(cl$id.father, data) if (missing(id.mother)) id.mother <- eval(as.symbol("id.mother"), data) else id.mother <- eval(cl$id.mother, data) if (missing(sex)) sex <- eval(as.symbol("sex"), data) else sex <- eval(cl$sex, data) if (missing(affected)) affected <- eval(as.symbol("affected"), data) else affected <- eval(cl$affected, data) tr <- trio.types(g, codes=codes, first=first, parent.of.origin=TRUE) n.pairs <- length(tr$affected.offspring)/2 one <- rep(c(TRUE, FALSE), n.pairs) r <- tr$frequency[one] n <- r + tr$frequency[!one] p <- r/n ac <- allele.count(tr$affected.offspring) mc <- allele.count(tr$mother) fc <- allele.count(tr$father) maternal.origin <- matrix(as.numeric((ac==2) | ((ac==1) & (mc>fc))), ncol=2) mother.1 <- matrix(as.numeric(mc==1), ncol=2) mother.2 <- matrix(as.numeric(mc==2), ncol=2) colnames(maternal.origin) <- allele.names(tr$affected.offspring) colnames(mother.1) <- allele.names(tr$affected.offspring) colnames(mother.2) <- allele.names(tr$affected.offspring) maternal.origin <- maternal.origin[, -base, drop=FALSE] mother.1 <- mother.1[, -base, drop=FALSE] mother.2 <- mother.2[, -base, drop=FALSE] mother <- cbind(mother.1, mother.2) maternal.origin <- maternal.origin[one,] - maternal.origin[!one,] mother <- mother[one,] - mother[!one,] if (!maternal.effect) { g <- glm(p ~ -1 + maternal.origin, family=binomial, weights=n) } else { g <- glm(p ~ -1 + mother + maternal.origin, family=binomial, weights=n) } anova(g) }