# Common R macros for Math 300 and 400 level courses, originally written spring 2005. # Math 300 trunc.hist <- function(x, xmin=NULL, xmax=NULL, trim=0.025, ...) { # Produces a truncated histogram. # `x' is the vector of observations. # `trim' is the fraction (0 to 0.5) of observations to be trimmed from # each end of `x' before the histogram is constructed; # `trim' is used only when `xmin' and `xmax' are NULL. # When `xmin' and `xmax' are not NULL, then `trim' is ignored, # and the histogram excludes values outside the domain from # `xmin' to `xmax'. # ...: optional arguments to `hist'. if ( (is.null(xmin)& !is.null(xmax)) || (!is.null(xmin)&is.null(xmax))) stop("`xmin' and `xmax' must both be NULL or must both be nonNULL.") if (is.null(xmin)&is.null(xmax)) y <- sort(x)[(length(x)*trim): (length(x)*(1-trim))] if (!is.null(xmin)&!is.null(xmax)) { if (xmin >= xmax) stop("`xmin' cannot be as large as or larger than `xmax'.") y <- x[ x>xmin & x shade.dist( 6, 'df.nc', c(7,9,2.5), NULL, F, 0, 8 ) # To avoid ambiguity with discrete distributions, avoid boarder values with `xshade'. # For example, if X ~ Binomial(n=10, p=0.4), then to graph P(X>2) type: # > shade.dist( 2.5, 'dbinom', 10, 0.4, F ) # `xmin' and `xmax' represent the limiting values on the x-axis. # Reasonable values of `xmin' and `xmax' are generated when originally set to NULL. # `xlab' is the label on the x-axis. If NULL, `xlab' may be automatically set to `X', # `Z', `T', `F' or `X^2', according to `ddist'. # `digits.prob' is the number of significant digits listed in the probability. # `digits.xtic' is the number of significant digits listed on the x-axis. # If `xtic' is TRUE, then the numbers listed on the x-axis include the median and `xshade'. # If `xtic' is FALSE, then the default numbers are listed on the x-axis. # If `xtic' is a vector, then the numbers listed on the x-axis consist of `xtic'. # `is.discrete' is logical, indicating whether or not the distribution is discrete. # If NULL, then the macro attempts to assign the correct logical value. # `additional.x.range' is a vector of TWO additional `x' values for evaluating the function. # This is useful when the plot is too sparse. # `main' is the main title. If NULL, the probability is displayed. # If no title is desired, set `main' to an empty string ''. num.grid.points <- 10001; col <- c("black", "red"); ylimchisq1 <- 1.2 options(warn = -1); if (substring(ddist,1,1)!="d") stop("`ddist' must be a pdf.") all.discrete = c("dbinom", "dgeom", "dhyper", "dnbinom", "dpois") if (!is.null(is.discrete)) discrete.pdf=is.discrete if (is.null(is.discrete)) discrete.pdf=(ddist %in% all.discrete) if (is.null(xlab)) { xlab="X" if (ddist=="dnorm") { if (is.null(c(parm1,parm2))) xlab="Z" if (!is.null(parm1) & is.null(parm2)) { if (parm1==0) xlab="Z" } if (!is.null(parm1) & !is.null(parm2)) { if (parm1==0 & parm2==1) xlab="Z" } } if (xlab=="Z") {parm1=0; parm2=1} if (ddist=="dnorm" & is.null(parm2)) parm2==1 temp <- c(parm1, parm2) if (ddist=="dt" & length(temp)==1) xlab=paste(c("T_",temp[1]),sep="",collapse="") if (ddist=="dt.nc" & length(temp)==2) xlab=paste(c("T_",temp[1],",",temp[2]),sep="",collapse="") if (ddist=="dchisq" & length(temp)==1) xlab=paste(c("CHI-SQUARE_",temp[1]),sep="",collapse="") if (ddist=="dchisq" & length(temp)==2) xlab=paste(c("CHI-SQUARE_",temp[1],",",temp[2]),sep="",collapse="") if (ddist=="df" & length(temp)==2) xlab=paste(c("F_",temp[1],",",temp[2]),sep="",collapse="") if (ddist=="df.nc" & length(temp)==3) xlab=paste(c("F_",temp[1],",",temp[2],",",temp[3]),sep="",collapse="") } parm1 <- c(parm1, parm2); temp.vec <- get.min.max(xmin, xmax, ddist, parm1) if (is.null(xshade)) {xshade <- temp.vec$medianA} if (length(xshade)!=1 & length(xshade)!=2) stop("xshade must have length 1 or 2.") if (length(xshade)==2) {if (xshade[1]>xshade[2]) {temp <- xshade[1]; xshade[1] <- xshade[2]; xshade[2] <- temp}} if (is.null(xmin) || is.null(xmax)) { xmin <- temp.vec$xmin ; xmax <- temp.vec$xmax xmin.temp <- min( xmin, xshade[1]-(xmax-xmin)*0.1 ) xmax.temp <- max( xmax, xshade[length(xshade)]+(xmax-xmin)*0.1 ) xmin <- xmin.temp ; xmax <- xmax.temp } else { if (xmin>xmax) { temp <- xmin; xmin <- xmax; xmax <- temp } } pdist <- paste("p",substring(ddist,2),sep="") qdist <- paste("q",substring(ddist,2),sep="") if (pdist=="pf.nc") pdist <- "pf" ; if (pdist=="pt.nc") pdist <- "pt" f <- function(z) { if (length(parm1)==0) {return(match.fun(ddist)(z))} if (length(parm1)==1) {return(match.fun(ddist)(z, parm1[1]))} if (length(parm1)==2) {return(match.fun(ddist)(z, parm1[1], parm1[2]))} if (length(parm1)==3) {return(match.fun(ddist)(z, parm1[1], parm1[2], parm1[3]))} if (length(parm1)==4) {return(match.fun(ddist)(z, parm1[1], parm1[2], parm1[3], parm1[4]))} if (length(parm1)==5) {return(match.fun(ddist)(z, parm1[1], parm1[2], parm1[3], parm1[4], parm1[5]))} } g <- function(z, lower.tail=TRUE) { if (length(parm1)==0) {return(match.fun(pdist)(z, lower.tail=lower.tail))} if (length(parm1)==1) {return(match.fun(pdist)(z, parm1[1], lower.tail=lower.tail))} if (length(parm1)==2) {return(match.fun(pdist)(z, parm1[1], parm1[2], lower.tail=lower.tail))} if (length(parm1)==3) {return(match.fun(pdist)(z, parm1[1], parm1[2], parm1[3], lower.tail=lower.tail))} if (length(parm1)==4) {return(match.fun(pdist)(z, parm1[1], parm1[2], parm1[3], parm1[4], lower.tail=lower.tail))} if (length(parm1)==5) {return(match.fun(pdist)(z, parm1[1], parm1[2], parm1[3], parm1[4], parm1[5], lower.tail=lower.tail))} } h <- function(z) { if (length(parm1)==0) {return(match.fun(qdist)(z))} if (length(parm1)==1) {return(match.fun(qdist)(z, parm1[1]))} if (length(parm1)==2) {return(match.fun(qdist)(z, parm1[1], parm1[2]))} if (length(parm1)==3) {return(match.fun(qdist)(z, parm1[1], parm1[2], parm1[3]))} if (length(parm1)==4) {return(match.fun(qdist)(z, parm1[1], parm1[2], parm1[3], parm1[4]))} if (length(parm1)==5) {return(match.fun(qdist)(z, parm1[1], parm1[2], parm1[3], parm1[4], parm1[5]))} } x <- xmin + (xmax-xmin)*(0:num.grid.points)/num.grid.points; options(warn = -1) options(warn = 0) if (discrete.pdf) {xmin <- ceiling(xmin); xmax<-floor(xmax); x <- xmin:xmax } if (length(xshade)==2 & xshade[1]>xshade[2]) { temp <- xshade[1]; xshade[1] <- xshade[2]; xshade[2] <- temp } if (xshade[1]xmax) stop("xshade must be between xmin and xmax.") if (lower.tail & length(xshade)==1) {the.prob <- g(xshade); x1 <- x[ x<=xshade ] } if (!lower.tail & length(xshade)==1) {the.prob <- g(xshade, lower.tail=FALSE); x1 <- x[ x>xshade ] } if (!lower.tail & length(xshade)==2) {the.prob <- g(xshade[2])-g(xshade[1], lower.tail=TRUE) x1 <- x[ xshade[1]xshade[2] ] } maxA <- ifelse( ((ddist %in% c("dchisq", "df")) && parm1[1]==1), min(ylimchisq1, max(f(x),na.rm=TRUE)), max(f(x)) ) ylim1 <- c(min(f(x),na.rm=TRUE), maxA) if (is.null(main)) main= paste(c("Probability is", prettyNum(the.prob,digits=digits.prob)), collapse=" ") if (discrete.pdf) { plot(x, f(x), xlim=c(xmin, xmax), ylim=ylim1, type="h", xlab=xlab, ylab="probability density function", xaxt="n", main=main, col=col[1]) curve(f, min(x1), max(x1), (max(x1)-min(x1)+1), add=TRUE, type="h", xaxt="n", col=col[2]) if (lower.tail==TRUE & length(xshade)==2) { curve(f, min(x2), max(x2), (max(x2)-min(x2)+1), add=TRUE, type="h", xaxt="n", col=col[2]) } } else { plot(x1, f(x1), xlim=c(xmin,xmax), ylim=ylim1, type="h", xlab=xlab, ylab="probability density function", xaxt="n", main=main, col=col[2]) if (lower.tail==TRUE & length(xshade)==2) { curve(f, min(x2), max(x2), num.grid.points, add=TRUE, type="h", xaxt="n", col=col[2]) } curve(f, xmin, xmax, num.grid.points, add=TRUE, type="p", xaxt="n", pch=20, cex=0.05, col=col[1]) if (length(additional.x.range)>=2) curve(f, min(additional.x.range), max(additional.x.range), num.grid.points, add=TRUE, type="p", xaxt="n", pch=20, cex=0.05, col=col[1]) } if (!is.logical(xtic)) axis(1, at=xtic, labels=xtic) if (is.logical(xtic)) { if (!xtic) axis(1); if (xtic) { if (length(xshade)==1) xtic0=prettyNum(c(h(0.5), 2*h(0.5)-xshade, xshade),digits=digits.xtic) if (length(xshade)==2) xtic0=prettyNum(c(h(0.5), xshade),digits=digits.xtic) if (ddist %in% c("dchisq","df")) xtic0=prettyNum(c(h(0.5), xshade, 0),digits=digits.xtic) axis(1, at=xtic0, labels=xtic0) } } } # Garren, 2006 # Math 300 shade.phat <- function(xshade=NULL, size=1, prob=0.5, lower.tail=TRUE, xmin=0, xmax=1, xlab=expression(hat(p)), xtic=TRUE, digits.prob=4, digits.xtic=3, main=NULL) { # Plots a continuous probability density function (pdf) and shades in a region. # `xshade' can be a scalar or a vector of size 2. # When `xshade' is a scalar and lower.tail is TRUE, # the area from 0 to xshade is shaded. # When `xshade' is a scalar and lower.tail is FALSE, # the area from xshade to 1 is shaded. # When `xshade' is a vector and lower.tail is TRUE, # the tail probabilities are shaded. # When `xshade' is a vector and lower.tail is FALSE, # the area from xshade[1] to xshade[2] is shaded. # `size' and `prob' are scalars, corresponding to the parameters in `dbinom'. # To avoid ambiguity with discrete distributions, avoid boarder values with `xshade'. # `xmin' and `xmax' represent the limiting values on the x-axis. # `xlab' is the label on the x-axis. # `digits.prob' is the number of significant digits listed in the probability. # `digits.xtic' is the number of significant digits listed on the x-axis. # If `xtic' is TRUE, then the numbers listed on the x-axis consist of `p' and `xshade'. # If `xtic' is FALSE, then the default numbers are listed on the x-axis. # If `xtic' is a vector, then the numbers listed on the x-axis consist of `xtic'. # `main' is the main title. If NULL, the probability is displayed. # If no title is desired, set `main' to an empty string ''. num.grid.points <- 10001; col <- c("black", "red") options(warn = -1); n=size; p=prob if (is.null(xshade)) xshade <- p if (length(xshade)!=1 & length(xshade)!=2) stop("xshade must have length 1 or 2.") if (length(xshade)==2) {if (xshade[1]>xshade[2]) {temp <- xshade[1]; xshade[1] <- xshade[2]; xshade[2] <- temp}} if (xmin>xmax) { temp <- xmin; xmin <- xmax; xmax <- temp } x <- xmin + (xmax-xmin)*(0:num.grid.points)/num.grid.points; options(warn = -1) options(warn = 0) f=function(x){dbinom(x,n,p)} ; g=function(prop){dbinom(round(prop*n),n,p)} xmin1 <- ceiling(xmin*n); xmax1<-floor(xmax*n); x <- xmin1:xmax1 if (xshade[1]xmax) stop("xshade must be between xmin and xmax.") if (lower.tail & length(xshade)==1) {the.prob <- pbinom(xshade*n,n,p); x1 <- x[ x<=xshade*n ] } if (!lower.tail & length(xshade)==1) {the.prob <- 1-pbinom(xshade*n,n,p); x1 <- x[ x>xshade*n ] } if (!lower.tail & length(xshade)==2) {the.prob <- pbinom(xshade[2]*n,n,p)-pbinom(xshade[1]*n,n,p) x1 <- x[ xshade[1]*nxshade[2]*n ] } ylim1 <- c(0, max(f(x),na.rm=TRUE)) if (is.null(main)) main= paste(c("Probability is", prettyNum(the.prob,digits=digits.prob)), collapse=" ") plot(x/n, f(x), xlim=c(xmin, xmax), ylim=ylim1, type="h", xlab=xlab, ylab="probability density function", xaxt="n", main=main, col=col[1]) curve(g, min(x1/n), max(x1/n), max(x1)-min(x1)+1, add=TRUE, type="h", xaxt="n", col=col[2]) if (lower.tail & length(xshade)==2) { curve(g, min(x2/n), max(x2/n), max(x2)-min(x2)+1, add=TRUE, type="h", xaxt="n", col=col[2]) } if (is.logical(xtic)) { if (!xtic) axis(1); if (xtic) { if (length(xshade)==1) xtic=prettyNum(c(xshade, 2*p-xshade, p, xmin, xmax),digits=digits.xtic) if (length(xshade)==2) xtic=prettyNum(c(xshade, p, xmin, xmax),digits=digits.xtic) axis(1, at=xtic, labels=xtic) } } if (!is.logical(xtic)) axis(1, at=xtic, labels=xtic) } # Garren, 2006 # Math 300 plot.dist <- function(distA="dnorm", parmA1=NULL, parmA2=NULL, distB=NULL, parmB1=NULL, parmB2=NULL, distC=NULL, parmC1=NULL, parmC2=NULL, xmin=NULL, xmax=NULL, col=c("black","red","darkgreen"), xlab=NULL, is.discrete=NULL, additional.x.range=NULL, main=NULL) { # Plots one, two, or three functions. # When plotting the pdf, some choices for `distA', `distB' and `distC' are # dnorm, dexp, dcauchy, dlaplace, dt, dt.nc, dchisq, df, df.nc, dunif, dbinom, # dgeom, dpois, dhyper, dnbinom. # When plotting the cdf, some choices for `distA', `distB' and `distC' are # pnorm, pexp, pcauchy, plaplace, pt, pchisq, pf, punif, pbinom, # pgeo, ppois, phyper, pnbinom. # The values of `distA', `distB' and `distC' must be in quotes if `xmin' or `xmax' is NULL. # `parmA1' and `parmA2' are numeric scalars or vectors of the parameters of `distA' # (excluding the first argument), where NULL is the default value; # likewise for the other two functions. # `col' may be a scalar or vector, and specifies the colors of the plotted functions. # Type `colors()' for selections. # If only one function is to be plotted, then use `distA'. # If a distribution, say `distA', needs more than 2 parameters, then assign `parmA1' # to be a vector of all (as many as 5) parameters, and keep `parmA2' equal to NULL. # For example, to plot a noncentral F pdf with 7 and 9 df and ncp=2.5, type: # > plot2.dist( df.nc, c(7,9,2.5) ) # `xmin' and `xmax' represent the limiting values on the x-axis. # Reasonable values of `xmin' and `xmax' are attempted when originally set to NULL. # `xlab' is the label on the x-axis. If NULL, `xlab' may be automatically set to `X', # `Z', `T', `F' or `X^2', according to `distA', `distB' and `distC'. # `is.discrete' is a vector with 1, 2, or 3 logical values, indicating whether or not # `distA', `distB', and `distC' are discrete. # If NULL, then the macro attempts to assign the correct logical value(s). # `additional.x.range' is a vector of TWO additional `x' values for evaluating the function. # This is useful when the plot is too sparse. # `main' is the main title. If NULL, no main title is displayed. num.grid.points <- 10001; options(warn= -1); ylimchisq1 <- 1.2 all.discrete = c("dbinom", "dgeom", "dhyper", "dpois", "dnbinom", "pbinom", "pgeom", "phyper", "ppois", "pnbinom") if (!is.null(is.discrete)) discrete.pdfA=is.discrete[1] discrete.pdfB=FALSE; discrete.pdfC=FALSE if (is.null(is.discrete)) discrete.pdfA=(distA %in% all.discrete) if (!is.null(distB)) { if (!is.null(is.discrete)) discrete.pdfB=is.discrete[2] if (is.null(is.discrete)) discrete.pdfB=(distB %in% all.discrete) } if (!is.null(distC)) { if (!is.null(is.discrete)) discrete.pdfC=is.discrete[3] if (is.null(is.discrete)) discrete.pdfC=(distC %in% all.discrete) } if (is.null(xlab)) { xlab="X" if ((distA=="dnorm" || distA=="pnorm") & is.null(distB) & is.null(distC)) { if (is.null(c(parmA1,parmA2))) xlab="Z" if (!is.null(parmA1) & is.null(parmA2)) { if (parmA1==0) xlab="Z" } if (!is.null(parmA1) & !is.null(parmA2)) { if (parmA1==0 & parmA2==1) xlab="Z" } } temp = c(parmA1, parmA2) if (distA=="dt" || (distA=="pt" & length(temp)==1)) { xlab=paste(c("T_",temp[1]),sep="",collapse="") if ( !is.null(distB) ) { xlab="X" if ( length( union( union(distA,distB), union(distA,distC) ) )==1 ) xlab="T" } } if (distA=="dt.nc" || (distA=="pt" & length(temp)==2)) { xlab=paste(c("T_",temp[1],",",temp[2]),sep="",collapse="") if ( !is.null(distB) ) { xlab="X" if ( length( union( union(distA,distB), union(distA,distC) ) )==1 ) xlab="T" } } if (distA=="dchisq" || distA=="pchisq") { xlab=paste(c("CHI-SQUARE_",parmA1[1]),sep="",collapse="") if (length(temp)==2) xlab=paste(c("CHI-SQUARE_",temp[1],",",temp[2]),sep="",collapse="") if ( !is.null(distB) ) { xlab="X" if ( length( union( union(distA,distB), union(distA,distC) ) )==1 ) xlab="CHI-SQUARE" } } if (distA=="df" || (distA=="pf" & length(temp)==2)) { xlab=paste(c("F_",temp[1],",",temp[2]),sep="",collapse="") if ( !is.null(distB) ) { xlab="X" if ( length( union( union(distA,distB), union(distA,distC) ) )==1 ) xlab="F" } } if (distA=="df.nc" || (distA=="pf" & length(temp)==3)) { xlab=paste(c("F_",temp[1],",",temp[2],",",temp[3]),sep="",collapse="") if ( !is.null(distB) ) { xlab="X" if ( length( union( union(distA,distB), union(distA,distC) ) )==1 ) xlab="F" } } } parmA1 <- c(parmA1, parmA2); parmB1 <- c(parmB1, parmB2); parmC1 <- c(parmC1, parmC2) temp.vec <- get.min.max(xmin, xmax, distA, parmA1, NULL, distB, parmB1, NULL, distC, parmC1, NULL ) xmin <- temp.vec$xmin ; xmax <- temp.vec$xmax if (xmin>xmax) {temp <- xmin; xmin <- xmax; xmax <- temp} x <- xmin + (xmax-xmin)*(0:num.grid.points)/num.grid.points x0 <- ceiling(xmin):floor(xmax) if (length(col)==1) {col <- rep(col, 3)}; if (length(col)==2) {col <- c(col, col[1])} fA <- function(z) { if (length(parmA1)==0) {return(match.fun(distA)(z))} if (length(parmA1)==1) {return(match.fun(distA)(z, parmA1[1]))} if (length(parmA1)==2) {return(match.fun(distA)(z, parmA1[1], parmA1[2]))} if (length(parmA1)==3) {return(match.fun(distA)(z, parmA1[1], parmA1[2], parmA1[3]))} if (length(parmA1)==4) {return(match.fun(distA)(z, parmA1[1], parmA1[2], parmA1[3], parmA1[4]))} if (length(parmA1)==5) {return(match.fun(distA)(z, parmA1[1], parmA1[2], parmA1[3], parmA1[4], parmA1[5]))} } ylim1 <- NULL if (!is.null(distB)) { fB <- function(z) { if (length(parmB1)==0) {return(match.fun(distB)(z))} if (length(parmB1)==1) {return(match.fun(distB)(z, parmB1[1]))} if (length(parmB1)==2) {return(match.fun(distB)(z, parmB1[1], parmB1[2]))} if (length(parmB1)==3) {return(match.fun(distB)(z, parmB1[1], parmB1[2], parmB1[3]))} if (length(parmB1)==4) {return(match.fun(distB)(z, parmB1[1], parmB1[2], parmB1[3], parmB1[4]))} if (length(parmB1)==5) {return(match.fun(distB)(z, parmB1[1], parmB1[2], parmB1[3], parmB1[4], parmB1[5]))} } } if (!is.null(distC)) { fC <- function(z) { if (length(parmC1)==0) {return(match.fun(distC)(z))} if (length(parmC1)==1) {return(match.fun(distC)(z, parmC1[1]))} if (length(parmC1)==2) {return(match.fun(distC)(z, parmC1[1], parmC1[2]))} if (length(parmC1)==3) {return(match.fun(distC)(z, parmC1[1], parmC1[2], parmC1[3]))} if (length(parmC1)==4) {return(match.fun(distC)(z, parmC1[1], parmC1[2], parmC1[3], parmC1[4]))} if (length(parmC1)==5) {return(match.fun(distC)(z, parmC1[1], parmC1[2], parmC1[3], parmC1[4], parmC1[5]))} } } maxA <- ifelse( ((distA %in% c("dchisq", "df")) && parmA1[1]==1), min(ylimchisq1, max(c(fA(x),fA(x0)),na.rm=TRUE)), max(c(fA(x),fA(x0))) ) ylim1 <- c(min(c(fA(x),fA(x0)),na.rm=TRUE), maxA) if (!is.null(distB) & is.null(distC)) { maxB <- ifelse( ((distB %in% c("dchisq", "df")) && parmB1[1]==1), min(ylimchisq1, max(c(fB(x),fB(x0)),na.rm=TRUE)), max(c(fB(x),fB(x0))) ) ylim1 <- c( min(c(fA(x),fB(x),fA(x0),fB(x0)),na.rm=TRUE), max(maxA, maxB) ) } if (is.null(distB) & !is.null(distC)) { maxC <- ifelse( ((distC %in% c("dchisq", "df")) && parmC1[1]==1), min(ylimchisq1, max(c(fC(x),fC(x0)),na.rm=TRUE)), max(c(fC(x),fC(x0))) ) ylim1 <- c( min(c(fA(x),fC(x),fA(x0),fC(x0)),na.rm=TRUE), max(maxA, maxC) ) } if (!is.null(distB) & !is.null(distC)) { maxB <- ifelse( ((distB %in% c("dchisq", "df")) && parmB1[1]==1), min(ylimchisq1, max(c(fB(x),fB(x0)),na.rm=TRUE)), max(c(fB(x),fB(x0))) ) maxC <- ifelse( ((distC %in% c("dchisq", "df")) && parmC1[1]==1), min(ylimchisq1, max(c(fC(x),fC(x0)),na.rm=TRUE)), max(c(fC(x),fC(x0))) ) ylim1 <- c( min(c(fA(x),fB(x),fC(x),fA(x0),fB(x0),fC(x0)),na.rm=TRUE), max(maxA, maxB, maxC) ) } ylab=""; pdfA <- substring(distA, 1, 1)=="d" pdfB <- ifelse(is.null(distB), T, (substring(distB, 1, 1)=="d")) pdfC <- ifelse(is.null(distC), T, (substring(distC, 1, 1)=="d")) if (pdfA & is.null(distB) & is.null(distC)) {ylab <- "probability density function"} else { if (pdfA & pdfB & pdfB) ylab <- "probability density functions" } cdfA <- substring(distA, 1, 1)=="p" cdfB <- ifelse(is.null(distB), T, (substring(distB, 1, 1)=="p")) cdfC <- ifelse(is.null(distC), T, (substring(distB, 1, 1)=="p")) if (cdfA & is.null(distB) & is.null(distC)) {ylab <- "cumulative distribution function"} else { if (cdfA & cdfB & cdfB) ylab <- "cumulative distribution functions" } if (discrete.pdfA) { plot(x0, fA(x0), ylim=ylim1, type="h", xlab=xlab, ylab=ylab, col=col[1], main=main) } else { plot(x, fA(x), ylim=ylim1, type="p", pch=20, cex=0.05, xlab=xlab, ylab=ylab, col=col[1], main=main) } if (discrete.pdfB) { curve(fB, x0[1], x0[length(x0)], length(x0), add=TRUE, type="h", col=col[2]) } if (!is.null(distB) & !discrete.pdfB) { curve(fB, xmin, xmax, num.grid.points, add=TRUE, type="p", pch=20, cex=0.05, col=col[2]) } if (discrete.pdfC) { curve(fC, x0[1], x0[length(x0)], length(x0), add=TRUE, type="h", col=col[3]) } if (!is.null(distC) & !discrete.pdfC) { curve(fC, xmin, xmax, num.grid.points, add=TRUE, type="p", pch=20, cex=0.05, col=col[3]) } if (discrete.pdfA & discrete.pdfB & !discrete.pdfC) { x1 <- x0[ fA(x0) > fB(x0) ]; x2 <- x0[ fA(x0) <= fB(x0) ] curve(fA, x1[1], x1[length(x1)], length(x1), add=TRUE, type="h", col=col[1]) curve(fB, x1[1], x1[length(x1)], length(x1), add=TRUE, type="h", col=col[2]) curve(fB, x2[1], x2[length(x2)], length(x2), add=TRUE, type="h", col=col[2]) curve(fA, x2[1], x2[length(x2)], length(x2), add=TRUE, type="h", col=col[1]) } if (discrete.pdfA & !discrete.pdfB & discrete.pdfC) { x1 <- x0[ fA(x0) > fC(x0) ]; x2 <- x0[ fA(x0) <= fC(x0) ] curve(fA, x1[1], x1[length(x1)], length(x1), add=TRUE, type="h", col=col[1]) curve(fC, x1[1], x1[length(x1)], length(x1), add=TRUE, type="h", col=col[3]) curve(fC, x2[1], x2[length(x2)], length(x2), add=TRUE, type="h", col=col[3]) curve(fA, x2[1], x2[length(x2)], length(x2), add=TRUE, type="h", col=col[1]) } if (!discrete.pdfA & discrete.pdfB & discrete.pdfC) { x1 <- x0[ fB(x0) > fC(x0) ]; x2 <- x0[ fB(x0) <= fC(x0) ] curve(fB, x1[1], x1[length(x1)], length(x1), add=TRUE, type="h", col=col[2]) curve(fC, x1[1], x1[length(x1)], length(x1), add=TRUE, type="h", col=col[3]) curve(fC, x2[1], x2[length(x2)], length(x2), add=TRUE, type="h", col=col[3]) curve(fB, x2[1], x2[length(x2)], length(x2), add=TRUE, type="h", col=col[2]) } if (discrete.pdfA & discrete.pdfB & discrete.pdfC) { x1 <- x0[ fA(x0) >= fB(x0) & fB(x0) >= fC(x0) ] x2 <- x0[ fA(x0) >= fC(x0) & fC(x0) >= fB(x0) ] x3 <- x0[ fB(x0) >= fA(x0) & fA(x0) >= fC(x0) ] x4 <- x0[ fB(x0) >= fC(x0) & fC(x0) >= fA(x0) ] x5 <- x0[ fC(x0) >= fA(x0) & fA(x0) >= fB(x0) ] x6 <- x0[ fC(x0) >= fB(x0) & fB(x0) >= fA(x0) ] curve(fA, x1[1], x1[length(x1)], length(x1), add=TRUE, type="h", col=col[1]) curve(fB, x1[1], x1[length(x1)], length(x1), add=TRUE, type="h", col=col[2]) curve(fC, x1[1], x1[length(x1)], length(x1), add=TRUE, type="h", col=col[3]) curve(fA, x2[1], x2[length(x2)], length(x2), add=TRUE, type="h", col=col[1]) curve(fC, x2[1], x2[length(x2)], length(x2), add=TRUE, type="h", col=col[3]) curve(fB, x2[1], x2[length(x2)], length(x2), add=TRUE, type="h", col=col[2]) curve(fB, x3[1], x3[length(x3)], length(x3), add=TRUE, type="h", col=col[2]) curve(fA, x3[1], x3[length(x3)], length(x3), add=TRUE, type="h", col=col[1]) curve(fC, x3[1], x3[length(x3)], length(x3), add=TRUE, type="h", col=col[3]) curve(fB, x4[1], x4[length(x4)], length(x4), add=TRUE, type="h", col=col[2]) curve(fC, x4[1], x4[length(x4)], length(x4), add=TRUE, type="h", col=col[3]) curve(fA, x4[1], x4[length(x4)], length(x4), add=TRUE, type="h", col=col[1]) curve(fC, x5[1], x5[length(x5)], length(x5), add=TRUE, type="h", col=col[3]) curve(fA, x5[1], x5[length(x5)], length(x5), add=TRUE, type="h", col=col[1]) curve(fB, x5[1], x5[length(x5)], length(x5), add=TRUE, type="h", col=col[2]) curve(fC, x6[1], x6[length(x6)], length(x6), add=TRUE, type="h", col=col[3]) curve(fB, x6[1], x6[length(x6)], length(x6), add=TRUE, type="h", col=col[2]) curve(fA, x6[1], x6[length(x6)], length(x6), add=TRUE, type="h", col=col[1]) } if (!is.null(distC) & !discrete.pdfC) { curve(fC, xmin, xmax, num.grid.points, add=TRUE, type="p", pch=20, cex=0.05, col=col[3]) if (length(additional.x.range)>=2) curve(fC, min(additional.x.range), max(additional.x.range), num.grid.points, add=TRUE, type="p", pch=20, cex=0.05, col=col[3]) } if (!is.null(distB) & !discrete.pdfB) { curve(fB, xmin, xmax, num.grid.points, add=TRUE, type="p", pch=20, cex=0.05, col=col[2]) if (length(additional.x.range)>=2) curve(fB, min(additional.x.range), max(additional.x.range), num.grid.points, add=TRUE, type="p", pch=20, cex=0.05, col=col[2]) } if (!is.null(distA) & !discrete.pdfA) { curve(fA, xmin, xmax, num.grid.points, add=TRUE, type="p", pch=20, cex=0.05, col=col[1]) if (length(additional.x.range)>=2) curve(fA, min(additional.x.range), max(additional.x.range), num.grid.points, add=TRUE, type="p", pch=20, cex=0.05, col=col[1]) } } # Garren, 2006 # Math 300 get.min.max <- function(xmin=NULL, xmax=NULL, distA, parmA1=NULL, parmA2=NULL, distB=NULL, parmB1=NULL, parmB2=NULL, distC=NULL, parmC1=NULL, parmC2=NULL) { # Outputs reasonable minimum and maximum boundaries for plotting functions. # Also, outputs the medians. # `xmin' and `xmax' represent the limiting values on the x-axis. # Reasonable values of `xmin' and `xmax' are attempted when originally set to NULL. # When plotting the pdf, some choices for `distA', `distB' and `distC' are # dnorm, dexp, dcauchy, dlaplace, dt, dt.nc, dchisq, df, df.nc, dunif, dbinom, # dgeom, dhyper, dpois, dnbinom. # When plotting the cdf, some choices for `distA', `distB' and `distC' are # pnorm, pexp, pcauchy, plaplace, pt, pchisq, pf, punif, pbinom, # pgeom, phyper, ppois, pnbinom. # The values of `distA', `distB' and `distC' must be in quotes if `xmin' or `xmax' is NULL. # `parmA1' and `parmA2' are numeric scalars or vectors of the parameters of `distA' # (excluding the first argument), where NULL is the default value; # likewise for the other two functions. # If only one function is to be plotted, then use `distA'. # If a distribution, say `distA', needs more than 2 parameters, then assign `parmA1' # to be a vector of all (as many as 5) parameters, and keep `parmA2' equal to NULL. # For example, to plot a noncentral F pdf with 7 and 9 df and ncp=2.5, type: # > plot2.dist( 0, 8, "df.nc", c(7,9,2.5) ) pmin <- 0.125 ; amountover <- 1 ; buffer <- 0.15 return.now <- FALSE ; medianA <- medianB <- medianC <- NULL parmA1 <- c(parmA1, parmA2); parmB1 <- c(parmB1, parmB2); parmC1 <- c(parmC1, parmC2) if (!is.null(xmin) & !is.null(xmax)) medianA <- medianB <- medianC <- mean(xmin,xmax) if ("dt.nc" %in% c(distA, distB, distC)) return.now <- TRUE if ("df.nc" %in% c(distA, distB, distC)) return.now <- TRUE if (distA=="pt" & length(parmA1)>1) return.now <- TRUE if (!is.null(distB)) {if (distB=="pt" & length(parmB1)>1) return.now <- TRUE} if (!is.null(distC)) {if (distC=="pt" & length(parmC1)>1) return.now <- TRUE} if (distA=="pf" & length(parmA1)>2) return.now <- TRUE if (!is.null(distB)) {if (distB=="pf" & length(parmB1)>2) return.now <- TRUE} if (!is.null(distC)) {if (distC=="pf" & length(parmC1)>2) return.now <- TRUE} if (return.now & (is.null(xmin) || is.null(xmax))) stop("Neither `xmin' nor `xmax' can be NULL for this distribution.") if (!return.now) { if (!is.null(distA)) qdistA <- paste("q",substring(distA,2),sep="") if (!is.null(distB)) qdistB <- paste("q",substring(distB,2),sep="") if (!is.null(distC)) qdistC <- paste("q",substring(distC,2),sep="") if (!is.null(distA)) { qfA <- function(z) { if (length(parmA1)==0) {return(match.fun(qdistA)(z))} if (length(parmA1)==1) {return(match.fun(qdistA)(z, parmA1[1]))} if (length(parmA1)==2) {return(match.fun(qdistA)(z, parmA1[1], parmA1[2]))} if (length(parmA1)==3) {return(match.fun(qdistA)(z, parmA1[1], parmA1[2], parmA1[3]))} if (length(parmA1)==4) {return(match.fun(qdistA)(z, parmA1[1], parmA1[2], parmA1[3], parmA1[4]))} if (length(parmA1)==5) {return(match.fun(qdistA)(z, parmA1[1], parmA1[2], parmA1[3], parmA1[4], parmA1[5]))} } } if (!is.null(distB)) { qfB <- function(z) { if (length(parmB1)==0) {return(match.fun(qdistB)(z))} if (length(parmB1)==1) {return(match.fun(qdistB)(z, parmB1[1]))} if (length(parmB1)==2) {return(match.fun(qdistB)(z, parmB1[1], parmB1[2]))} if (length(parmB1)==3) {return(match.fun(qdistB)(z, parmB1[1], parmB1[2], parmB1[3]))} if (length(parmB1)==4) {return(match.fun(qdistB)(z, parmB1[1], parmB1[2], parmB1[3], parmB1[4]))} if (length(parmB1)==5) {return(match.fun(qdistB)(z, parmB1[1], parmB1[2], parmB1[3], parmB1[4], parmB1[5]))} } } if (!is.null(distC)) { qfC <- function(z) { if (length(parmC1)==0) {return(match.fun(qdistC)(z))} if (length(parmC1)==1) {return(match.fun(qdistC)(z, parmC1[1]))} if (length(parmC1)==2) {return(match.fun(qdistC)(z, parmC1[1], parmC1[2]))} if (length(parmC1)==3) {return(match.fun(qdistC)(z, parmC1[1], parmC1[2], parmC1[3]))} if (length(parmC1)==4) {return(match.fun(qdistC)(z, parmC1[1], parmC1[2], parmC1[3], parmC1[4]))} if (length(parmC1)==5) {return(match.fun(qdistC)(z, parmC1[1], parmC1[2], parmC1[3], parmC1[4], parmC1[5]))} } } xminA.temp <- NULL; xmaxA.temp <- NULL; xminB.temp <- NULL; xmaxB.temp <- NULL xminC.temp <- NULL; xmaxC.temp <- NULL xmin2A.temp <- NULL; xmax2A.temp <- NULL; xmin2B.temp <- NULL; xmax2B.temp <- NULL xmin2C.temp <- NULL; xmax2C.temp <- NULL if (is.null(xmin)) { if (!is.null(distA)) xminA.temp <- qfA(pmin) if (!is.null(distB)) xminB.temp <- qfB(pmin) if (!is.null(distC)) xminC.temp <- qfC(pmin) } else { xminA.temp <- xmin; xminB.temp <- xmin; xminC.temp <- xmin } if (is.null(xmax)) { if (!is.null(distA)) xmaxA.temp <- qfA(1-pmin) if (!is.null(distB)) xmaxB.temp <- qfB(1-pmin) if (!is.null(distC)) xmaxC.temp <- qfC(1-pmin) } else { xmaxA.temp <- xmax; xmaxB.temp <- xmax; xmaxC.temp <- xmax } if (!is.null(distA)) { xmin2A.temp <- xminA.temp - (xmaxA.temp-xminA.temp)*amountover xmax2A.temp <- xmaxA.temp + (xmaxA.temp-xminA.temp)*amountover qfA0 <- ifelse( is.na(qfA(0)), -Inf, qfA(0) ) qfA1 <- ifelse( is.na(qfA(1)), Inf, qfA(1) ) left.bounded <- ( xmin2A.temp < qfA0 ) right.bounded <- ( xmax2A.temp > qfA1 ) xmin2A.temp <- max( xmin2A.temp, qfA0 ) xmax2A.temp <- min( xmax2A.temp, qfA1 ) if (left.bounded) xmin3A.temp <- xmin2A.temp - (xmax2A.temp - xmin2A.temp) * buffer if (right.bounded) xmax3A.temp <- xmax2A.temp + (xmax2A.temp - xmin2A.temp) * buffer if (left.bounded) xmin2A.temp <- xmin3A.temp if (right.bounded) xmax2A.temp <- xmax3A.temp } if (!is.null(distB)) { xmin2B.temp <- xminB.temp - (xmaxB.temp-xminB.temp)*amountover xmax2B.temp <- xmaxB.temp + (xmaxB.temp-xminB.temp)*amountover qfB0 <- ifelse( is.na(qfB(0)), -Inf, qfB(0) ) qfB1 <- ifelse( is.na(qfB(1)), Inf, qfB(1) ) left.bounded <- ( xmin2B.temp < qfB0 ) right.bounded <- ( xmax2B.temp > qfB1 ) xmin2B.temp <- max( xmin2B.temp, qfB0 ) xmax2B.temp <- min( xmax2B.temp, qfB1 ) if (left.bounded) xmin3B.temp <- xmin2B.temp - (xmax2B.temp - xmin2B.temp) * buffer if (right.bounded) xmax3B.temp <- xmax2B.temp + (xmax2B.temp - xmin2B.temp) * buffer if (left.bounded) xmin2B.temp <- xmin3B.temp if (right.bounded) xmax2B.temp <- xmax3B.temp } if (!is.null(distC)) { xmin2C.temp <- xminC.temp - (xmaxC.temp-xminC.temp)*amountover xmax2C.temp <- xmaxC.temp + (xmaxC.temp-xminC.temp)*amountover qfC0 <- ifelse( is.na(qfC(0)), -Inf, qfC(0) ) qfC1 <- ifelse( is.na(qfC(1)), Inf, qfC(1) ) left.bounded <- ( xmin2C.temp < qfC0 ) right.bounded <- ( xmax2C.temp > qfC1 ) xmin2C.temp <- max( xmin2C.temp, qfC0 ) xmax2C.temp <- min( xmax2C.temp, qfC1 ) if (left.bounded) xmin3C.temp <- xmin2C.temp - (xmax2C.temp - xmin2C.temp) * buffer if (right.bounded) xmax3C.temp <- xmax2C.temp + (xmax2C.temp - xmin2C.temp) * buffer if (left.bounded) xmin2C.temp <- xmin3C.temp if (right.bounded) xmax2C.temp <- xmax3C.temp } if (is.null(xmin)) xmin <- min(xmin2A.temp, xmin2B.temp, xmin2C.temp) if (is.null(xmax)) xmax <- max(xmax2A.temp, xmax2B.temp, xmax2C.temp) if (xmin>xmax) {temp <- xmin; xmin <- xmax; xmax <- temp} medianA <- ifelse( is.null(distA), NA, qfA(0.5) ) medianB <- ifelse( is.null(distB), NA, qfB(0.5) ) medianC <- ifelse( is.null(distC), NA, qfC(0.5) ) } # End of `if (!return.now)' structure(list(xmin=xmin, xmax=xmax, medianA=medianA, medianB=medianB, medianC=medianC)) } # Garren, 2006 # Math 300 empirical.cdf <- function(x, y=NULL, col=c("black","red","darkgreen")) { # Graphs one or two empirical cumulative distribution functions. # `x' is the vector of observations whose empirical cdf is to be graphed. # `y' is an optional vector of observations whose empirical cdf is to be graphed. # `col' may be a scalar or vector of length 3, and specifies the colors of the plotted functions. # Preferably, all three colors should be different. col[1] and col[2] # correspond to `x' and `y', respectively. col[3] is used where # the two empirical cdfs overlap. Type `colors()' for selections. num.grid.points <- 10001; delta <- 0.1 xmin <- min(x,y)-(max(x,y)-min(x,y))*delta; xmax <- max(x,y)+(max(x,y)-min(x,y))*delta x0 <- xmin + (xmax-xmin)*(0:num.grid.points)/num.grid.points y0 <- rep(NA, num.grid.points+1) for (i in 0:num.grid.points) { y0[i] <- mean(x<=x0[i]) } plot(x0, y0, pch=20, cex=0.05, xlab=ifelse(is.null(y),"X","X, Y"), ylab=ifelse(is.null(y),"Cumulative Proportion","Cumulative Proportions"), main=ifelse(is.null(y),"Empirical Cumulative Distribution Function", "Empirical Cumulative Distribution Functions"), col=col[1]) if (!is.null(y)) { if (length(col)==1) { col <- rep(col, 3) } if (length(col)==2) { col <- c(col, col[2]) } f2 <- function(u) { f <- rep(NA, length(u)) for (i in 1:length(u)) { f[i] <- mean(y<=u[i]) } ; return(f) } color.index <- ( y0==f2(x0) ) + 2 curve(f2, xmin, xmax, num.grid.points, add=TRUE, type="p", pch=20, cex=0.05, col=col[color.index]) } } # Garren, 2006 # Math 300 plot.line <- function(x, y, xlab="X", ylab="Y", pch=19, col=c("black", "red"), ...) { # Constructs a scatterplot with the fitted regression line. # `x' is the vector of independent observations. # `y' is the vector of dependent observations. # `xlab' is the label for the x-axis. # `ylab' is the label for the y-axis. # `pch' is the plotting character, usually 19 to 25; 21 is a hollow circle. # Type `?points' for more details. # `col' may be a scalar or vector of length 2, and specifies the color. # col[1] is the color of the points, and col[2] is the color of the plotted line. # ...: optional arguments to `plot'. if (length(col)==1) col <- rep(col,2) z <- lm(y~x)$coefficients; intercept <- as.numeric(z[1]); slope <- as.numeric(z[2]) f <- function(q) {intercept+q*slope} main1 <- c("Simple Linear Regression Line", paste( ylab, "=", prettyNum(intercept), ifelse(slope>=0,"+","-"), prettyNum(abs(slope)), xlab ) ) ylim <- c( min(c(y,f(min(x)),f(max(x)))), max(c(y,f(min(x)),f(max(x)))) ) plot(x, y, ylim=ylim, type="p", xlab=xlab, ylab=ylab, main=main1, col=col[1], pch=pch, ...) lines( c(min(x),max(x)), c(f(min(x)), f(max(x))), col=col[2] ) } # Garren, 2005 # Math 300 plot.line2 <- function(x, y, xlab="X", ylab="Y", header=TRUE, col=c("black", "blue"), ...) { # Constructs a scatterplot with the fitted regression line. # `x' is the vector of independent observations. # `y' is the vector of dependent observations. # `xlab' is the label for the x-axis. # `ylab' is the label for the y-axis. # `header' indicates whether or not the least squares equation should be printed. # `col' may be a scalar or vector of length 2, and specifies the color. # col[1] is the color of the points, and col[2] is the color of the plotted line. # ...: optional arguments to `plot'. if (length(col)==1) col <- rep(col,2) z <- lm(y~x)$coefficients; intercept <- as.numeric(z[1]); slope <- as.numeric(z[2]) f <- function(q) {intercept+q*slope} main1 <- c("Simple Linear Regression Line", paste( ylab, "=", prettyNum(intercept), ifelse(slope>=0,"+","-"), prettyNum(abs(slope)), xlab ) ) if (!header) main1 <- NULL ylim <- c( min(c(y,f(min(x)),f(max(x)))), max(c(y,f(min(x)),f(max(x)))) ) plot(x, y, ylim=ylim, type="p", xlab=xlab, ylab=ylab, main=main1, col=col[1], pch=20, cex=2, ...) lines( c(min(x),max(x)), c(f(min(x)), f(max(x))), col=col[2], lwd=3 ) } # Garren, 2005 # Math 300 linegraph <- function(x, freq=TRUE, prob=NULL, col="red", ...) { # Constructs a line graph. # `x' is the vector of data. # If `freq' is TRUE and `prob' does not sum to 1, then frequencies (counts) are plotted. # If `freq' is FALSE or `prob' sums to 1, then relative frequencies are plotted. # `prob' is a vector of the probabilities or weights on `x', # and does not need to sum to one. If `prob' is NULL, # then all `x' values are equally weighted. # `col' is the color of the plotted lines. # ...: optional arguments to `plot'. x.old <- x; x <- union(x.old,x.old); if (is.null(prob)) { for (i in 1:length(x)) prob <- c( prob, sum(x.old==x[i]) ) } if (!freq) prob <- prob/sum(prob) if (sum(prob)==1 || !freq) {plot( x, prob/sum(prob), ylim=c(0,max(prob)/sum(prob)), type="h", ylab="RELATIVE FREQUENCY", col=col, ... )} else {plot( x, prob, ylim=c(0,max(prob)), type="h", ylab="FREQUENCY", col=col, ... )} } # Garren, 2005 # Math 300 df.nc <- function(x, df1, df2, ncp=0) { # Approximate probability density function (pdf) for the noncentral F-distribution. # `x' is the vector of quantiles. # `df1' and `df2' are the degrees of freedom. # `ncp' is the noncentrality parameter. delta <- 1e-2 # `delta' should not be too small or too large; 0.01 works well. if (ncp==0) {return(df(x,df1, df2))} else {return( ( pf( (x+delta/2), df1, df2, ncp ) - pf( (x-delta/2), df1, df2, ncp ) ) / delta )} } # Garren, 2006 # Math 300 dt.nc <- function(x, df, ncp=0) { # Approximate probability density function (pdf) for the noncentral t-distribution. # `x' is the vector of quantiles. # `df' is the degrees of freedom. # `ncp' is the noncentrality parameter. delta <- 1e-2 # `delta' should not be too small or too large; 0.01 works well. # When delta is 0.01, the maximum absolute error is 1.7e-6, # which may be compared to dnorm(0) whose value is 0.3989. if (ncp==0) {return(dt(x,df))} else {return( ( pt( (x+delta/2), df, ncp ) - pt( (x-delta/2), df, ncp ) ) / delta )} } # Garren, 2006 # Math 300 dlaplace <- function(x, mean=0, sd=1) { # Probability density function (pdf) for the Laplace (double exponential) distribution. # `x' is the vector of quantiles. # `mean' is the population mean. # `sd' is the population standard deviation. exp(-abs(x-mean)*sqrt(2)/sd)/(sd*sqrt(2)) } # Garren, 2005 # Math 300 plaplace <- function(q, mean=0, sd=1, lower.tail=TRUE) { # Cumulative distribution function (cdf) for the Laplace # (double exponential) distribution. # `q' is the vector of quantiles. # `mean' is the population mean. # `sd' is the population standard deviation. # `lower.tail' is logical; if TRUE, then probabilities are P[X <= x]; # otherwise, P[X > x]. p=(q>mean) - ((q>mean)*2-1) * exp(-abs(q-mean)*sqrt(2)/sd)/2 if (!lower.tail) p=1-p; return(p) } # Garren, 2005 # Math 300 qlaplace <- function(p, mean=0, sd=1) { # Quantile function for the Laplace (double exponential) distribution. # `p' is the vector of probabilities. # `mean' is the population mean. # `sd' is the population standard deviation. mean + ( 2*(p<=0.5)-1 ) * sd / sqrt(2) * log( 2 * (p*(p<=0.5)+(1-p)*(p>0.5)) ) } # Garren, 2005 # Math 300 rlaplace <- function(n, mean=0, sd=1) { # Random generation for the Laplace (double exponential) distribution. # `n' is the number of observations. # `mean' is the population mean. # `sd' is the population standard deviation. rexp(n, (1/sd)) * sample(c(-1, 1), n, replace=TRUE) / sqrt(2) + mean } # Garren, 2005 dtriang <- function(x, min=0, max=1) { # Probability density function (pdf) for the symmetric triangular distribution # with endpoints `min' and `max'. # `x' is the vector of quantiles. if (min==max) stop("Endpoints cannot be equal.") if (min>max) {temp=min; min=max; max=temp} ; c=(min+max)/2 2*(x-min)/(max-min)/(c-min)*(min<=x & x<=c) + 2*(max-x)/(max-min)/(max-c)*(c x]. if (min==max) stop("Endpoints cannot be equal.") if (min>max) {temp=min; min=max; max=temp} ; c=(min+max)/2 p=(q-min)^2/(max-min)/(c-min)*(min<=q&q<=c)+(1-(max-q)^2/(max-min)/(max-c))*(cmax) if (!lower.tail) p=1-p; return(p) } # Garren, 2006 qtriang <- function(p, min=0, max=1) { # Quantile function for the symmetric triangular distribution with endpoints `min' and `max'. # `p' is the vector of probabilities. if (min==max) stop("Endpoints cannot be equal.") if (min(p)<0 || max(p)>1) stop("Values of `p' must be between zero and one.") if (min>max) {temp=min; min=max; max=temp} min + (max-min)*( (0<=p&p<=0.5)*(0.5*sqrt(2*p))+(0.5max) {temp=min; min=max; max=temp}; ( runif(n,min,max) + runif(n,min,max) ) /2 } # Garren, 2006 # Math 300 abbreviation <- function(x, choices) { # Returns a value in `choices' specified by `x', which may be an abbreviation. # `x' is a character string, and consists of some or all letters in a # value in `choices' or may equal `choices'. # `choices' is a vector of character strings. answer <- x[1] if (length(x)==1) { for (i in 1:length(choices)) { if (!is.na(charmatch(x, choices[i]))) answer <- choices[i] } } return(answer) } # Garren, 2005 # Math 300 # G <- function(graph.name="Rplots.ps") { # # Plot the current graph, but only when using Linux. # dev.off(); system(paste("gs", graph.name)) # } # Garren, 2005 # Math 300 G <- function() { # Plot the current graph, but only when using Linux. dev.off(); system("cp Rplots.ps /home2/junk9.dir/junk9471.ps") } # Garren, 2007 # Math 300 ci.t.test <- function(x, conf.level=0.95, fpc=1) { # Generates a one-sample two-sided confidence interval on the population mean # based on the t-test. # `x' is a numeric vector of data values. # `conf.level' is the confidence level of the interval. # `fpc' is the finite population correction, and is used when sampling without replacement. if (fpc==1) { y <- as.vector(t.test(x, conf.level=conf.level)$conf.int) } else { y <- rep(NA, 2) y[1] <- mean(x) + qt( ((1-conf.level)/2), (length(x)-1) ) * sqrt( var(x)*fpc/length(x) ) y[2] <- mean(x) - qt( ((1-conf.level)/2), (length(x)-1) ) * sqrt( var(x)*fpc/length(x) ) } return(y) } # Garren, 2005 # Math 300 rstat <- function(num.stat=1, stat, rdist, n=1, ..., replace=FALSE, prob=NULL, conf.level=0.95) { # Random generation for a statistic of a distribution. # num.stat is the number of statistics `stat' to be generated. # Some choices for `stat': mean, median, sum, sd, var, min, max, ci.t.test, # where `ci.t.test' is a t-confidence interval. # `rdist' may be a vector of observations to be (sub)sampled, or may be a function. # Some choices for `rdist' when `rdist' is a function: # rnorm, rexp, rcauchy, rlaplace, rt, rchisq, rf, runif, rbinom, rgeo, rpois. # `n' is the number of observations used to compute each `stat'. # ...: optional arguments to `rdist', EXCLUDING the first argument, when `rdist' is # a function not a vector. # `replace': Should sampling be with replacement; # used only when `rdist' is a vector of observations. # `prob' is a vector of probability weights for obtaining the elements of the vector # being sampled, and is used only when `rdist' is a vector of observations. # For equal weights, keep `prob' equal to NULL. # `conf.level' is the confidence level of the interval, and is used only when # `stat' represents a confidence interval such as `ci.t.test'. y <- NULL is.ci <- length(match.fun(stat)(1:10))==2 if (is.ci) { if (is.numeric(rdist)) { fpc <- ifelse( replace, 1, 1-n/length(rdist) ) for (i in 1:num.stat) { y <- rbind(y, match.fun(stat)( sample(rdist,n,replace,prob), conf.level=conf.level, fpc=fpc)) } } else { for (i in 1:num.stat) { y <- rbind(y, match.fun(stat)(match.fun(rdist)(n, ...), conf.level=conf.level)) } } } else { if (is.numeric(rdist)) { for (i in 1:num.stat) { y <- c(y, match.fun(stat)( sample(rdist,n,replace,prob)) ) } } else { for (i in 1:num.stat) { y <- c(y, match.fun(stat)(match.fun(rdist)(n, ...))) } } } return(y) } # Garren, 2005 # Math 300 plot.ci <- function(ci, mu=NULL, plot.midpoints=TRUE, col=c("black", "red", "darkgreen", "purple")) { # Plots confidence intervals. # `ci' is an N by 2 matrix consisting of N two-sided confidence intervals. # `mu' is the value of the estimated parameter, # and is the population mean for t-confidence intervals. # `plot.midtpoints' is logical, to state whether or not the midpoints # of the intervals should be plotted in col[4]. # `col' may be a scalar or vector, and specifies the colors of the # plotted confidence intervals, the vertical line going through `mu', and the midpoints. if (length(col)==1) {col <- rep(col, 4)}; if (length(col)==2) {col <- c(col, col)} if (length(col)==3) {col <- c(col, col[1])} if (is.vector(ci)) { ci <- t(matrix(ci)) } if (dim(ci)[1]==2 & dim(ci)[2]!=2) ci <- t(ci) if (dim(ci)[2]!=2) stop("`ci' is not in the correct format.") main <- ifelse( is.null(mu), "", paste(c( prettyNum(100*mean( (ci[,1]<=mu)*(mu <=ci[,2]) )), "% of these confidence intervals contain ",prettyNum(mu),"." ), collapse =" ") ) plot(NA, NA, xlim=c(min(ci[,1],mu),max(ci[,2],mu)), ylim=c(0.5,dim(ci)[1]+0.5) , type="s", main=main, xlab=paste(c("CONFIDENCE INTERVAL",ifelse(dim(ci)[1]==1,"","S")),collapse =""), ylab="label") if (is.null(mu)) {col1 <- rep(col[3], dim(ci)[1])} else {col1 <- ifelse(ci[,1]<=mu & mu<=ci[,2], col[3], col[2])} for (i in 1:dim(ci)[1]) {lines( c(ci[i,1], ci[i,2]), c(i,i), col=col1[i])} midpoint <- (ci[,1]+ci[,2])/2 ; delta <- (max(ci[,2],mu)-min(ci[,1],mu))*0.002 if (!is.null(mu)) { lines( c(mu,mu), c(0.5,dim(ci)[1]+0.5), col=col[1]) } if (plot.midpoints) { for (i in 1:dim(ci)[1]) {lines( c(midpoint[i]-delta,midpoint[i]+delta), c(i,i), lwd=4, col=col[4])} } } # Garren, 2005 source2 <- function(course.num=300) { # Reads from the web the Rmacros for the MATH course. # `course.num' is the course number and may be numeric or character. source(paste("http://www.math.jmu.edu/~garrenst/math", course.num, ".dir/Rmacros", sep="")) } # Garren, 2005