R软件做判别分析
- 1、下载文档前请自行甄别文档内容的完整性,平台不提供额外的编辑、内容补充、找答案等附加服务。
- 2、"仅部分预览"的文档,不可在线预览部分如存在完整性等问题,可反馈申请退款(可完整预览的文档不适用该条件!)。
- 3、如文档侵犯您的权益,请联系客服反馈,我们会尽快为您处理(人工客服工作时间:9:00-18:30)。
R 软件做判别分析:
1.距离判别
(1)两样本
discriminiant.distance <- function(TrnX1, TrnX2, TstX = NULL, var.equal = FALSE)
{
if (is.null(TstX) == TRUE) TstX <- rbind(TrnX1,TrnX2)
if (is.vector(TstX) == TRUE) TstX <- t(as.matrix(TstX))
else if (is.matrix(TstX) != TRUE)
TstX <- as.matrix(TstX)
if (is.matrix(TrnX1) != TRUE) TrnX1 <- as.matrix(TrnX1)
if (is.matrix(TrnX2) != TRUE) TrnX2 <- as.matrix(TrnX2)
nx <- nrow(TstX)
blong <- matrix(rep(0, nx), nrow=1, byrow=TRUE,
dimnames=list("blong", 1:nx))
mu1 <- colMeans(TrnX1); mu2 <- colMeans(TrnX2)
if (var.equal == TRUE || var.equal == T){
S <- var(rbind(TrnX1,TrnX2))
w <- mahalanobis(TstX, mu2, S)- mahalanobis(TstX, mu1, S)
}
else{
S1 <-var(TrnX1); S2 <- var(TrnX2)
w <- mahalanobis(TstX, mu2, S2)- mahalanobis(TstX, mu1, S1) }
for (i in 1:nx){
if (w[i] > 0) blong[i] <- 1
else
blong[i] <- 2
}
blong
}
例1: 数据
classX1<-data.frame(
x1=c(6.60, 6.60, 6.10, 6.10, 8.40, 7.2, 8.40, 7.50,
7.50, 8.30, 7.80, 7.80),
x2=c(39.00,39.00, 47.00, 47.00, 32.00, 6.0, 113.00, 52.00, 52.00,113.00,172.00,172.00),
x3=c(1.00, 1.00, 1.00, 1.00, 2.00, 1.0, 3.50, 1.00,
3.50, 0.00, 1.00, 1.50),
x4=c(6.00, 6.00, 6.00, 6.00, 7.50, 7.0, 6.00, 6.00,
7.50, 7.50, 3.50, 3.00),
x5=c(6.00, 12.00, 6.00, 12.00, 19.00, 28.0, 18.00, 12.00,
6.00, 35.00, 14.00, 15.00),
x6=c(0.12, 0.12, 0.08, 0.08, 0.35, 0.3, 0.15, 0.16,
0.16, 0.12, 0.21, 0.21),
x7=c(20.00,20.00, 12.00, 12.00, 75.00, 30.0, 75.00, 40.00, 40.00,180.00, 45.00, 45.00)
)
classX2<-data.frame(
x1=c(8.40, 8.40, 8.40, 6.3, 7.00, 7.00, 7.00, 8.30,8.30, 7.2, 7.2, 7.2, 5.50, 8.40, 8.40, 7.50,7.50, 8.30, 8.30, 8.30, 8.30, 7.80, 7.80),
x2=c(32.0 ,32.00, 32.00, 11.0, 8.00, 8.00, 8.00,161.00,161.0, 6.0, 6.0, 6.0, 6.00,113.00,113.00, 52.00,52.00, 97.00,
97.00,89.00,56.00,172.00,283.00),
x3=c(1.00, 2.00, 2.50, 4.5, 4.50, 6.00, 1.50, 1.50,0.50, 3.5, 1.0, 1.0, 2.50, 3.50, 3.50, 1.00,1.00, 0.00, 2.50, 0.00, 1.50, 1.00, 1.00),
x4=c(5.00, 9.00, 4.00, 7.5, 4.50, 7.50, 6.00, 4.00,2.50, 4.0, 3.0, 6.0, 3.00, 4.50, 4.50, 6.00,7.50, 6.00, 6.00, 6.00, 6.00, 3.50, 4.50),
x5=c(4.00, 10.00, 10.00, 3.0, 9.00, 4.00, 1.00, 4.00,1.00, 12.0, 3.0, 5.0, 7.00, 6.00, 8.00, 6.00,8.00, 5.00, 5.00,10.00,13.00, 6.00, 6.00),
x6=c(0.35, 0.35, 0.35, 0.2, 0.25, 0.25, 0.25, 0.08,0.08, 0.30, 0.3, 0.3, 0.18, 0.15, 0.15, 0.16,0.16, 0.15, 0.15, 0.16, 0.25, 0.21, 0.18),
x7=c(75.00,75.00, 75.00, 15.0,30.00, 30.00, 30.00, 70.00,70.00, 30.0, 30.0, 30.0,18.00, 75.00, 75.00,
40.00,40.00,180.00,180.00,180.00,180.00,45.00,45.00)
)
source("discriminiant.distance.R")
discriminiant.distance(classX1, classX2, var.equal=TRUE) discriminiant.distance(classX1, classX2)
TrnX1<-data.frame(
X1=c(13.85, 22.31, 28.82,15.29, 28.79),
X2=c(2.79, 4.67, 4.63, 3.54, 4.90),
X3= c(7.80, 12.31, 16.18, 7.50, 16.12),
X4=c(49.60, 47.80, 62.15, 43.20, 58.10)
)
TrnX2<-data.frame(
X1=c(2.18, 3.85, 11.40, 3.66, 12.10),
X2=c(1.06, 0.80, 0.00, 2.42, 0.00),
X3=c(1.22, 4.06, 3.50, 2.14,5.68),
X4=c(20.60, 47.10, 0.00, 15.10, 0.00)
)
TrnX<-data.frame(
X1=c(8.85, 28.60),
X2=c(3.38, 2.40),
X3=c(5.17, 1.20),
X4=c(26.10, 127.0)
)
discriminiant.distance(TrnX1, TrnX2)
(2)多样本
distinguish.distance <- function(TrnX, TrnG, TstX = NULL, var.equal = FALSE){
if ( is.factor(TrnG) == FALSE){
mx <- nrow(TrnX); mg <- nrow(TrnG)
TrnX <- rbind(TrnX, TrnG)
TrnG <- factor(rep(1:2, c(mx, mg)))
}
if (is.null(TstX) == TRUE) TstX <- TrnX
if (is.vector(TstX) == TRUE) TstX <- t(as.matrix(TstX))
else if (is.matrix(TstX)!= TRUE)
TstX <- as.matrix(TstX)
if (is.matrix(TrnX)!= TRUE) TrnX <- as.matrix(TrnX)
nx <- nrow(TstX)
blong <- matrix(rep(0, nx), nrow=1,
dimnames=list("blong", 1:nx))
g <- length(levels(TrnG))
mu <- matrix(0, nrow=g, ncol=ncol(TrnX))
for (i in 1:g)
mu[i,] <- colMeans(TrnX[TrnG==i,])
D <-matrix(0, nrow=g, ncol=nx)
if (var.equal == TRUE || var.equal == T){
for (i in 1:g)
D[i,] <- mahalanobis(TstX, mu[i,], var(TrnX))
}
else{
for (i in 1:g)
D[i,] <- mahalanobis(TstX, mu[i,], var(TrnX[TrnG==i,]))
}
for (j in 1:nx){
dmin <- Inf
for (i in 1:g)
if (D[i,j] < dmin){
dmin <- D[i,j]; blong[j] <- i
}
}
blong
}
多总体距离判别:(要求数据矩阵或数据框)
X<- matrix(c(34.16, 7.44, 1.12, 7.87, 95.19, 69.30, 33.06, 6.34, 1.08, 6.77, 94.08, 69.70, 36.26, 9.24, 1.04, 8.97, 97.30, 68.80, 40.17, 13.45, 1.43, 13.88,
101.2, 66.2, 50.06, 23.03, 2.83, 23.74, 112.52, 63.3, 33.24, 6.24, 1.18, 22.9, 160.01, 65.4, 32.22, 4.22, 1.06, 20.7, 124.7, 68.7, 41.15, 10.08, 2.32, 32.84,
172.06, 65.85, 53.04, 25.74, 4.06, 34.87, 152.03, 63.5, 38.03, 11.2, 6.07, 27.84, 146.32, 66.8, 34.03, 5.41, 0.07, 5.2, 90.10, 69.5, 32.11, 3.02, 0.09, 3.14,85.15, 70.8, 44.12, 15.12, 1.08, 15.15, 103.12, 64.8, 54.17, 25.03, 2.11, 25.15, 110.14, 63.7, 28.07, 2.01, 0.07, 3.02, 81.22, 68.3), nrow=15, ncol=6, byrow=TRUE)
G<-gl(3,5)
Y<- matrix(c(50.22, 6.66, 1.08, 22.54,170.6, 65.2, 34.64, 7.33, 1.11, 7.78, 95.16, 69.3, 33.42, 6.22, 1.12, 22.95, 160.31, 68.3, 44.02, 15.36, 1.07, 16.45, 105.3, 64.2), nrow=4, ncol=6, byrow=TRUE)
distinguish.distance(X, G, Y,var.equal=TRUE)
Bayes判别:
(1)两样本
discriminiant.bayes <- function
(TrnX1, TrnX2, rate = 1, TstX = NULL, var.equal = FALSE){
if (is.null(TstX) == TRUE) TstX<-rbind(TrnX1,TrnX2)
if (is.vector(TstX) == TRUE) TstX <- t(as.matrix(TstX))
else if (is.matrix(TstX) != TRUE)
TstX <- as.matrix(TstX)
if (is.matrix(TrnX1)!= TRUE) TrnX1 <- as.matrix(TrnX1)
if (is.matrix(TrnX2)!= TRUE) TrnX2 <- as.matrix(TrnX2)
nx <- nrow(TstX)
blong <- matrix(rep(0, nx), nrow=1, byrow=TRUE,
dimnames=list("blong", 1:nx))
mu1 <- colMeans(TrnX1); mu2 <- colMeans(TrnX2)
if (var.equal == TRUE || var.equal == T){
S <- var(rbind(TrnX1,TrnX2)); beta <- 2*log(rate)
w <- mahalanobis(TstX, mu2, S)- mahalanobis(TstX, mu1, S) }
else{
S1 <- var(TrnX1); S2 <- var(TrnX2)
beta <- 2*log(rate) + log(det(S1)/det(S2))
w <- mahalanobis(TstX, mu2, S2)- mahalanobis(TstX, mu1, S2) }
for (i in 1:nx){
if (w[i] > beta)
blong[i] <- 1
else
blong[i] <- 2
}
blong
}
例2: 两样本
TrnX1<-matrix(
c(24.8, 24.1, 26.6, 23.5, 25.5, 27.4,
-2.0, -2.4, -3.0, -1.9, -2.1, -3.1),
ncol=2)
TrnX2<-matrix(
c(22.1, 21.6, 22.0, 22.8, 22.7, 21.5, 22.1, 21.4,
-0.7, -1.4, -0.8, -1.6, -1.5, -1.0, -1.2, -1.3),
ncol=2)
source("discriminiant.bayes.R")
discriminiant.bayes(TrnX1, TrnX2, rate=8/6, var.equal=TRUE)
(2)多样本
distinguish.bayes <- function(TrnX, TrnG, p = rep(1,
length(levels(TrnG))),TstX =NULL, var.equal = FALSE){
if ( is.factor(TrnG) == FALSE){
mx <- nrow(TrnX); mg <- nrow(TrnG)
TrnX <- rbind(TrnX, TrnG)
TrnG <- factor(rep(1:2, c(mx, mg)))
}
if (is.null(TstX) == TRUE) TstX <- TrnX
if (is.vector(TstX) == TRUE) TstX <- t(as.matrix(TstX))
else if (is.matrix(TstX) != TRUE)
TstX <- as.matrix(TstX)
if (is.matrix(TrnX) != TRUE) TrnX <- as.matrix(TrnX)
nx <- nrow(TstX)
blong <- matrix(rep(0, nx), nrow=1,
dimnames=list("blong", 1:nx))
g <- length(levels(TrnG))
mu <- matrix(0, nrow=g, ncol=ncol(TrnX))
for (i in 1:g)
mu[i,] <- colMeans(TrnX[TrnG==i,])
D <- matrix(0, nrow=g, ncol=nx)
if (var.equal == TRUE || var.equal == T){
for (i in 1:g){
d2 <- mahalanobis(TstX, mu[i,], var(TrnX))
D[i,] <- d2 - 2*log(p[i])
}
}
else{
for (i in 1:g){
S <- var(TrnX[TrnG==i,])
d2 <- mahalanobis(TstX, mu[i,], S)
D[i,] <- d2 - 2*log(p[i])-log(det(S))
}
}
for (j in 1:nx){
dmin <- Inf
for (i in 1:g)
if (D[i,j] < dmin){
dmin <- D[i,j]; blong[j] <- i
}
}
blong
}
X<- matrix(c(34.16, 7.44, 1.12, 7.87, 95.19, 69.30, 33.06, 6.34, 1.08, 6.77, 94.08, 69.70, 36.26, 9.24, 1.04, 8.97, 97.30, 68.80,
40.17, 13.45, 1.43, 13.88,
101.2, 66.2, 50.06, 23.03, 2.83, 23.74, 112.52, 63.3, 33.24, 6.24, 1.18, 22.9, 160.01, 65.4, 32.22, 4.22, 1.06, 20.7, 124.7, 68.7, 41.15, 10.08, 2.32, 32.84,
172.06, 65.85, 53.04, 25.74, 4.06, 34.87, 152.03, 63.5, 38.03, 11.2, 6.07, 27.84, 146.32, 66.8, 34.03, 5.41, 0.07, 5.2, 90.10, 69.5, 32.11, 3.02, 0.09, 3.14,85.15, 70.8, 44.12, 15.12, 1.08, 15.15, 103.12, 64.8, 54.17, 25.03, 2.11, 25.15, 110.14, 63.7, 28.07, 2.01, 0.07, 3.02, 81.22, 68.3), nrow=15, ncol=6, byrow=TRUE)
G<-gl(3,5)
Y<- matrix(c(50.22, 6.66, 1.08, 22.54,170.6, 65.2, 34.64, 7.33, 1.11, 7.78, 95.16, 69.3, 33.42, 6.22, 1.12, 22.95, 160.31, 68.3, 44.02, 15.36, 1.07, 16.45, 105.3, 64.2), nrow=4, ncol=6, byrow=TRUE)
distinguish.bayes(X, G, p = rep(1, length(levels(G))),Y, var.equal = TRUE)
(3)Fisher判别
discriminiant.fisher <- function(TrnX1, TrnX2, TstX = NULL){
if (is.null(TstX) == TRUE) TstX <- rbind(TrnX1,TrnX2)
if (is.vector(TstX) == TRUE) TstX <- t(as.matrix(TstX))
else if (is.matrix(TstX) != TRUE)
TstX <- as.matrix(TstX)
if (is.matrix(TrnX1) != TRUE) TrnX1 <- as.matrix(TrnX1)
if (is.matrix(TrnX2) != TRUE) TrnX2 <- as.matrix(TrnX2)
nx <- nrow(TstX)
blong <- matrix(rep(0, nx), nrow=1, byrow=TRUE, dimnames=list("blong", 1:nx))
n1 <- nrow(TrnX1); n2 <- nrow(TrnX2)
mu1 <- colMeans(TrnX1); mu2 <- colMeans(TrnX2)
S <- (n1-1)*var(TrnX1) + (n2-1)*var(TrnX2)
mu <- n1/(n1+n2)*mu1 + n2/(n1+n2)*mu2
w <- (TstX-rep(1,nx) %o% mu) %*% solve(S, mu2-mu1); for (i in 1:nx){
if (w[i] <= 0)
blong[i] <- 1
else
blong[i] <- 2
}
blong
}
多总体Fisher判别:(要求数据框)
library(MASS)
X<- matrix(c(34.16, 7.44, 1.12, 7.87, 95.19, 69.30, 33.06, 6.34, 1.08, 6.77, 94.08, 69.70, 36.26, 9.24, 1.04, 8.97, 97.30, 68.80, 40.17, 13.45, 1.43, 13.88,
101.2, 66.2, 50.06, 23.03, 2.83, 23.74, 112.52, 63.3, 33.24, 6.24, 1.18, 22.9, 160.01, 65.4, 32.22, 4.22, 1.06, 20.7, 124.7, 68.7, 41.15, 10.08, 2.32, 32.84,
172.06, 65.85, 53.04, 25.74, 4.06, 34.87, 152.03, 63.5, 38.03, 11.2, 6.07, 27.84, 146.32, 66.8, 34.03, 5.41, 0.07, 5.2, 90.10, 69.5, 32.11, 3.02, 0.09, 3.14,85.15, 70.8, 44.12, 15.12, 1.08, 15.15, 103.12, 64.8, 54.17, 25.03, 2.11, 25.15, 110.14, 63.7, 28.07, 2.01, 0.07, 3.02, 81.22, 68.3), nrow=15, ncol=6, byrow=TRUE)
Y<- data.frame(X, Sp = rep(c("1","2","3"), rep(5,3)))
Train<-data.frame(matrix(c(50.22, 6.66, 1.08, 22.54,170.6, 65.2, 34.64, 7.33, 1.11, 7.78, 95.16, 69.3, 33.42, 6.22, 1.12, 22.95, 160.31, 68.3, 44.02, 15.36, 1.07, 16.45, 105.3, 64.2), nrow=4, ncol=6, byrow=TRUE))
z <- lda(Sp ~ ., Y)
predict(z, Train )
多总体Bayes判别:(要求数据矩阵或数据框)
X<- matrix(c(34.16, 7.44, 1.12, 7.87, 95.19, 69.30, 33.06, 6.34, 1.08, 6.77, 94.08, 69.70, 36.26, 9.24, 1.04, 8.97, 97.30, 68.80, 40.17, 13.45, 1.43, 13.88,
101.2, 66.2, 50.06, 23.03, 2.83, 23.74, 112.52, 63.3, 33.24, 6.24, 1.18, 22.9, 160.01, 65.4, 32.22, 4.22, 1.06, 20.7, 124.7, 68.7, 41.15, 10.08, 2.32, 32.84,
172.06, 65.85, 53.04, 25.74, 4.06, 34.87, 152.03, 63.5, 38.03, 11.2, 6.07, 27.84, 146.32, 66.8, 34.03, 5.41, 0.07, 5.2, 90.10, 69.5, 32.11, 3.02, 0.09, 3.14,85.15, 70.8, 44.12, 15.12, 1.08, 15.15, 103.12, 64.8, 54.17, 25.03, 2.11, 25.15, 110.14, 63.7, 28.07, 2.01, 0.07, 3.02, 81.22, 68.3), nrow=15, ncol=6, byrow=TRUE)
G<-gl(3,5)
Y<- matrix(c(50.22, 6.66, 1.08, 22.54,170.6, 65.2, 34.64, 7.33, 1.11, 7.78, 95.16, 69.3, 33.42, 6.22, 1.12, 22.95, 160.31, 68.3, 44.02, 15.36, 1.07, 16.45, 105.3, 64.2), nrow=4, ncol=6, byrow=TRUE)
distinguish.bayes(X, G, p = rep(1, length(levels(G))),Y, var.equal = TRUE)
X<- iris[,1:4]
G<- gl(3,50)
Iris <- data.frame(rbind(iris3[,,1], iris3[,,2], iris3[,,3]), Sp =
rep(c("s","c","v"), rep(50,3)))
train <- sample(1:150, 75)
table(Iris$Sp[train])
## your answer may differ
## c s v
## 22 23 30
z <- lda(Sp ~ ., Iris, prior = c(1,1,1)/3, subset = train)
predict(z, Iris[-train, ])$class
## [1] s s s s s s s s s s s s s s s s s s s s s s s s s s s c c c
## [31] c c c c c c c v c c c c v c c c c c c c c c c c c v v v v v ## [61] v v v v v v v v v v v v v v v
(z1 <- update(z, . ~ . - Petal.W.))。