Chapter 2. Kernel estimation of the regression function

2.1 Fonctions

library(sm)

Cross validation error computation

Compute_CV=function(x,y,method,h=NULL,weights=NA)
{
  hopt=h
  if (is.null(h)) {hopt=sm.regression(x,y,method=method,display='none',poly.index=0)$h}
  n=length(x)
  if (is.na(weights)) weights=rep(1,n)
  CV=0
  for (i in 1:n)
  {
    CV=CV+(y[i]-sm.regression(x[-i],y[-i],h=hopt,eval.points=x[i],display='none',poly.index=0)$estimate)^2*weights[i] 
  }
  return(CV)
}

ROC Curve

ROC=function(z,p1,plot=FALSE)
{
  p=c(-0.0001,sort(unique(p1)),1.0001)#seq(-0.01,1.01,0.01)c(-0.0001,sort(unique(p1))-0.00000000001,1.0001)
  lp=length(p)
  ROC=matrix(NA,lp,2)
  for (s in 1:lp)
  {
    ROC[s,1]=mean(p1[z==0]>=p[s])
    ROC[s,2]=mean(p1[z==1]>=p[s])  
  }
  #AUC=sum((ROC[-(lp),2])*(ROC[-(lp),1]-ROC[-(1),1]))
  AUC=sum((ROC[-(lp),2]+ROC[-1,2])*(ROC[-(lp),1]-ROC[-(1),1])/2)
  
  colnames(ROC)=c("FPR","TPR")
  if (plot)  
  {
    plot(ROC,type='l',main=paste('AUC =', AUC))
    abline(0,1,col='orange')
  }
  
  return(list(ROC=ROC,AUC=AUC))
}

Non parametric estimation of Bayes classifier

Classif_NP=function(X,Y,X0=NA,plot=FALSE,me='cv',h=NULL)
{
  X=as.matrix(X)
  n=length(Y)
  V=sort(unique(Y))
  n_V=length(V)
  Prob=matrix(NA,n,n_V)
  colnames(Prob)=V
  Class=rep(NA,n)
  if (!is.na(max(X0)))
  {
    X0=as.matrix(X0)
    P0=matrix(NA,nrow(X0),n_V)
    Class0=rep(NA,nrow(X0))
  }
  for (v in 1:n_V)
  {
    z=as.numeric(Y==V[v])
    for (i in 1:n )
    {
      if (is.null(h)) {value=sm.regression(X[-i,],z[-i], eval.points=X,eval.grid=FALSE,display='none',method=me,poly.index=0)$estimate[i]}
      else {value=sm.regression(X[-i,],z[-i], eval.points=X,eval.grid=FALSE,display='none',h=h,poly.index=0)$estimate[i]}
      if (!is.na(value)) {Prob[i,v]=value}
      if (is.na(value)) {Prob[i,v]=0#mean(z[-i])
      }
    }
    if (!is.na(max(X0))) {if (is.null(h)) {P0[,v]=sm.regression(X,z,eval.points=X0,eval.grid=FALSE,display='none',method=me,poly.index=0)$estimate}
      else {P0[,v]=sm.regression(X,z,eval.points=X0,eval.grid=FALSE,display='none',h=h,poly.index=0)$estimate}
    P0[is.na(P0[,v]),v]=mean(z)}
  }
  if (n_V==2) {Roc=ROC(Y==V[2],Prob[,2],plot)}
  Class=V[apply(Prob,1,which.max)]
  V_est=sort(unique(Class))
  if (length(V_est)==n_V){M_table=table(Y,Class)}
  else {
    M_table=matrix(0,n_V,n_V)
    M_table0=table(Y,Class)
    for (j in 1:length(V_est)) {M_table[,which(V==V_est[j])]=M_table0[,j]}
  }
  Err=1-(sum(diag(M_table))/sum(M_table))
  if (!is.na(max(X0))) {Class0=V[apply(P0,1,which.max)]}
  if (!is.na(max(X0))) {return(list(Class=Class, Prob=Prob, M_table=M_table, Err=Err, Class0=Class0,Prob0=P0,Auc=ifelse(n_V==2,Roc$AUC,NA)))}
  else {return(list(Class=Class, Prob=Prob, M_table=M_table, Err=Err,Auc=ifelse(n_V==2,Roc$AUC,NA)))}
}

2.2 Regression

2.2.1 Simulation

Regression 1.a

\[Y=10e^{-3X}+e^{-\frac{X}{2}}\epsilon, \] avec \(X \sim \mathcal{U}([0;3])\) et \(\epsilon \sim \mathcal{N}(0,1)\).

n=100
x=runif(n,0,3)
e=rnorm(n,0,exp(-x/2))
y=10*exp(-3*x)+e
M=1000
gr=seq(min(x),max(x),len=M)
plot(gr,10*exp(-3*gr),ylab='y',type='l',ylim=c(-1,10))
 lines(x,y,col="red",type="p")
sm.regression(x,y,h=0.01,col="blue",eval.points=gr,add=T,nbins=0,poly.index=0)
sm.regression(x,y,h=.1,col="orange",eval.points=gr,add=T,nbins=0,poly.index=0)
sm.regression(x,y,h=1,col="green",eval.points=gr,add=T,nbins=0,poly.index=0)
legend(x="topright",leg=c('h=0.01','h=0.1','h=1'), col=c("blue","orange","green"),lty=1)

M=1000
gr=seq(min(x),max(x),len=M)
plot(gr,10*exp(-3*gr),ylab='y',type='l',ylim=c(-1,10))
lines(x,y,col="red",type="p")
 
sm.regression(x,y,method="cv",col="blue",eval.points=gr,add=T,nbins=0,poly.index=0)
reg3=sm.regression(x,y,method="cv",col="blue",eval.points=x,add=T,nbins=0,poly.index=0,display='none')
sm.regression(x,y,method="df",col="orange",add=T,eval.points=gr,nbins=0,poly.index=0)
reg1=sm.regression(x,y,method="df",eval.points=x,nbins=0,add=T,poly.index=0,display='none')
sm.regression(x,y,method="aicc",col="green",add=T,eval.points=gr,nbins=0,poly.index=0)
reg2=sm.regression(x,y,method="aicc",add=T,eval.points=x,nbins=0,poly.index=0,display='none')
legend(x="topright",leg=c('cv',"df",'aicc'), col=c("blue","orange","green"),lty=1)

plot(reg1$estimate,y,col="orange")
lines(reg2$estimate,y,col="green",type='p')
lines(reg3$estimate,y,col="blue",type='p')
abline(0,1)

Erreur quadratique et Validation croisée

# Erreur quadratique
mean((reg1$estimate-y)^2)

[1] 0.4620179

mean((reg2$estimate-y)^2)

[1] 0.2220598

mean((reg3$estimate-y)^2)

[1] 0.2192335

#Validation croisée
Compute_CV(x,y,'cv')/n

[1] 0.3658954

Compute_CV(x,y,'df')/n

[1] 0.5387251

Compute_CV(x,y,'aicc')/n

[1] 0.3665102

# CV avec moyenne Y
n=length(x)
CV_y=0
for (j in 1:n) 
  {
  CV_y=CV_y+(y[j]-mean(y[-j]))^2
} 
CV_y/n

[1] 5.034279

# Variance non corrigée e
var(e)*(n-1)/n

[1] 0.2506579

Regression 1.b

\[Y=10e^{-3X}+e^{-\frac{X}{2}}\epsilon, \] avec \(X \sim \mathcal{E}(1)\) et \(\epsilon \sim \mathcal{N}(0,1)\).

n=100
x=rexp(n,1)
e=rnorm(n,0,exp(-x/2))
y=10*exp(-3*x)+e
M=1000
gr=seq(min(x),max(x),len=M)
plot(gr,10*exp(-3*gr),ylab='y',type='l',ylim=c(-1,10))
 lines(x,y,col="red",type="p")
sm.regression(x,y,h=0.01,col="blue",eval.points=gr,add=T,nbins=0,poly.index=0)
sm.regression(x,y,h=.1,col="orange",eval.points=gr,add=T,nbins=0,poly.index=0)
sm.regression(x,y,h=1,col="green",eval.points=gr,add=T,nbins=0,poly.index=0)
legend(x="topright",leg=c('h=0.01','h=0.1','h=1'), col=c("blue","orange","green"),lty=1)

M=1000
gr=seq(min(x),max(x),len=M)
plot(gr,10*exp(-3*gr),ylab='y',type='l',ylim=c(-1,10))
lines(x,y,col="red",type="p")
 
sm.regression(x,y,method="cv",col="blue",eval.points=gr,add=T,nbins=0,poly.index=0)
reg3=sm.regression(x,y,method="cv",col="blue",eval.points=x,add=T,nbins=0,poly.index=0,display='none')
sm.regression(x,y,method="df",col="orange",add=T,eval.points=gr,nbins=0,poly.index=0)
reg1=sm.regression(x,y,method="df",eval.points=x,nbins=0,add=T,poly.index=0,display='none')
sm.regression(x,y,method="aicc",col="green",add=T,eval.points=gr,nbins=0,poly.index=0)
reg2=sm.regression(x,y,method="aicc",add=T,eval.points=x,nbins=0,poly.index=0,display='none')
legend(x="topright",leg=c('cv',"df",'aicc'), col=c("blue","orange","green"),lty=1)

plot(reg1$estimate,y,col="orange")
lines(reg2$estimate,y,col="green",type='p')
lines(reg3$estimate,y,col="blue",type='p')
abline(0,1)

Erreur quadratique et Validation croisée

# Erreur quadratique
mean((reg1$estimate-y)^2)

[1] 1.720602

mean((reg2$estimate-y)^2)

[1] 0.5800625

mean((reg3$estimate-y)^2)

[1] 0.5238565

#Validation croisée
Compute_CV(x,y,'cv')/n

[1] 0.6819319

Compute_CV(x,y,'df')/n

[1] 1.832334

Compute_CV(x,y,'aicc')/n

[1] 0.7122753

# CV avec moyenne Y
n=length(x)
CV_y=0
for (j in 1:n) 
  {
  CV_y=CV_y+(y[j]-mean(y[-j]))^2
} 
CV_y/n

[1] 9.422851

# Variance non corrigée e
var(e)*(n-1)/n

[1] 0.5635725

Regression 2.a

\[Y=7cos(7X)+10e^{-3X}+3e^{-\frac{X}{2}}\epsilon, \] avec \(X \sim \mathcal{U}([0;3])\) et \(\epsilon \sim \mathcal{N}(0,1)\).

n=1000
x=runif(n,0,3)
e=rnorm(n,0,exp(-x/2))
y=10*exp(-3*x)+7*cos(7*x)+e
M=1000
gr=seq(min(x),max(x),len=M)
plot(gr,10*exp(-3*gr)+7*cos(7*gr),ylab='y',type='l')
 lines(x,y,col="red",type="p")
sm.regression(x,y,h=0.01,col="blue",eval.points=gr,add=T,nbins=0,poly.index=0)
sm.regression(x,y,h=.1,col="orange",eval.points=gr,add=T,nbins=0,poly.index=0)
sm.regression(x,y,h=1,col="green",eval.points=gr,add=T,nbins=0,poly.index=0)
legend(x="topright",leg=c('h=0.01','h=0.1','h=1'), col=c("blue","orange","green"),lty=1)

M=1000
gr=seq(min(x),max(x),len=M)
plot(gr,10*exp(-3*gr)+7*cos(7*gr),ylab='y',type='l'
     )
lines(x,y,col="red",type="p")
 
sm.regression(x,y,method="cv",col="blue",eval.points=gr,add=T,nbins=0,poly.index=0)
reg3=sm.regression(x,y,method="cv",col="blue",eval.points=x,add=T,nbins=0,poly.index=0,display='none')
sm.regression(x,y,method="df",col="orange",add=T,eval.points=gr,nbins=0,poly.index=0)
reg1=sm.regression(x,y,method="df",eval.points=x,nbins=0,add=T,poly.index=0,display='none')
sm.regression(x,y,method="aicc",col="green",add=T,eval.points=gr,nbins=0,poly.index=0)
reg2=sm.regression(x,y,method="aicc",add=T,eval.points=x,nbins=0,poly.index=0,display='none')
legend(x="topright",leg=c('cv',"df",'aicc'), col=c("blue","orange","green"),lty=1)

plot(reg1$estimate,y,col="orange")
lines(reg2$estimate,y,col="green",type='p')
lines(reg3$estimate,y,col="blue",type='p')
abline(0,1)

Erreur quadratique et Validation croisée

# Erreur quadratique
mean((reg1$estimate-y)^2)

[1] 12.88788

mean((reg2$estimate-y)^2)

[1] 0.4994431

mean((reg3$estimate-y)^2)

[1] 0.4994409

#Validation croisée
Compute_CV(x,y,'cv')/n

[1] 0.551335

Compute_CV(x,y,'df')/n

[1] 13.08754

Compute_CV(x,y,'aicc')/n

[1] 0.8338718

# CV avec moyenne Y
n=length(x)
CV_y=0
for (j in 1:n) 
  {
  CV_y=CV_y+(y[j]-mean(y[-j]))^2
} 
CV_y/n

[1] 29.42165

# Variance non corrigée e
var(e)*(n-1)/n

[1] 0.3026617

Regression 2.b

\[Y=7cos(7X)+10e^{-3X}+3e^{-\frac{X}{2}}\epsilon, \] avec \(X \sim \mathcal{E}(1)\) et \(\epsilon \sim \mathcal{N}(0,1)\).

n=1000
x=rexp(n,1)
e=rnorm(n,0,exp(-x/2))
y=10*exp(-3*x)+7*cos(7*x)+e
M=1000
gr=seq(min(x),max(x),len=M)
plot(gr,10*exp(-3*gr)+7*cos(7*gr),ylab='y',type='l')
 lines(x,y,col="red",type="p")
sm.regression(x,y,h=0.01,col="blue",eval.points=gr,add=T,nbins=0,poly.index=0)
sm.regression(x,y,h=.1,col="orange",eval.points=gr,add=T,nbins=0,poly.index=0)
sm.regression(x,y,h=1,col="green",eval.points=gr,add=T,nbins=0,poly.index=0)
legend(x="topright",leg=c('h=0.01','h=0.1','h=1'), col=c("blue","orange","green"),lty=1)

M=1000
gr=seq(min(x),max(x),len=M)
plot(gr,10*exp(-3*gr)+7*cos(7*gr),ylab='y',type='l')
lines(x,y,col="red",type="p")
 
sm.regression(x,y,method="cv",col="blue",eval.points=gr,add=T,nbins=0,poly.index=0)
reg3=sm.regression(x,y,method="cv",col="blue",eval.points=x,add=T,nbins=0,poly.index=0,display='none')
sm.regression(x,y,method="df",col="orange",add=T,eval.points=gr,nbins=0,poly.index=0)
reg1=sm.regression(x,y,method="df",eval.points=x,nbins=0,add=T,poly.index=0,display='none')
sm.regression(x,y,method="aicc",col="green",add=T,eval.points=gr,nbins=0,poly.index=0)
reg2=sm.regression(x,y,method="aicc",add=T,eval.points=x,nbins=0,poly.index=0,display='none')
legend(x="topright",leg=c('cv',"df",'aicc'), col=c("blue","orange","green"),lty=1)

plot(reg1$estimate,y,col="orange")
lines(reg2$estimate,y,col="green",type='p')
lines(reg3$estimate,y,col="blue",type='p')
abline(0,1)

Erreur quadratique et Validation croisée

# Erreur quadratique
mean((reg1$estimate-y)^2)

[1] 35.05045

mean((reg2$estimate-y)^2)

[1] 0.9064671

mean((reg3$estimate-y)^2)

[1] 0.9064595

#Validation croisée
Compute_CV(x,y,'cv')/n

[1] 2.217656

Compute_CV(x,y,'df')/n

[1] 35.47329

Compute_CV(x,y,'aicc')/n

[1] 39.39004

# CV avec moyenne Y
n=length(x)
CV_y=0
for (j in 1:n) 
  {
  CV_y=CV_y+(y[j]-mean(y[-j]))^2
} 
CV_y/n

[1] 41.78547

# Variance non corrigée e
var(e)*(n-1)/n

[1] 0.5031912

Erreur quadratique et Validation croisée

# Erreur quadratique
mean((reg1$estimate-y)^2)

[1] 35.05045

mean((reg2$estimate-y)^2)

[1] 0.9064671

mean((reg3$estimate-y)^2)

[1] 0.9064595

#Validation croisée
Compute_CV(x,y,'cv')/n

[1] 2.217656

Compute_CV(x,y,'df')/n

[1] 35.47329

Compute_CV(x,y,'aicc')/n

[1] 39.39004

# CV avec moyenne Y
n=length(x)
CV_y=0
for (j in 1:n) 
  {
  CV_y=CV_y+(y[j]-mean(y[-j]))^2
} 
CV_y/n

[1] 41.78547

# Variance non corrigée e
var(e)*(n-1)/n

[1] 0.5031912

2.2.2 Real world data

# load data
library(MASS)
data(mcycle)
head(mcycle)

  times accel
1   2.4   0.0
2   2.6  -1.3
3   3.2  -2.7
4   3.6   0.0
5   4.0  -2.7
6   6.2  -2.7

x=mcycle$times
y=mcycle$accel
n=length(x)

# plot data
plot(x, y, xlab = "Time (ms)", ylab = "Acceleration (g)")

sm.regression(x,y,h=0.1,col="blue",eval.points=sort(x),add=T,nbins=0,poly.index=0)
sm.regression(x,y,h=1,col="orange",eval.points=sort(x),add=T,nbins=0,poly.index=0)
sm.regression(x,y,h=10,col="green",eval.points=sort(x),add=T,nbins=0,poly.index=0)
legend(x="topright",leg=c('h=0.1','h=1','h=10'), col=c("blue","orange","green"),lty=1)

plot(x,y,col="red",type="p")
reg3=sm.regression(x,y,method="cv",col="blue", eval.points=sort(x),add=T,nbins=0,poly.index=0)
legend(x="topright",leg=c('cv',"df",'aicc'), col=c("blue","orange","green"),lty=1)

reg1=sm.regression(x,y,method="df",col="orange",eval.points=sort(x),add=T,nbins=0,poly.index=0)
reg2=sm.regression(x,y,method="aicc",col="green", eval.points=sort(x),add=T,nbins=0,poly.index=0)

plot(reg1$estimate,y[order(x)],col="orange")
lines(reg2$estimate,y[order(x)],col="green",type='p')
lines(reg3$estimate,y[order(x)],col="blue",type='p')
abline(0,1)

Erreur quadratique et Validation croisée

# Erreur quadratique
mean((reg1$estimate-y[order(x)])^2)

[1] 929.8045

mean((reg2$estimate-y[order(x)])^2)

[1] 487.3774

mean((reg3$estimate-y[order(x)])^2)

[1] 444.4707

#Validation croisée
Compute_CV(x,y,'cv')/n

[1] 602.9729

Compute_CV(x,y,'df')/n

[1] 991.0706

Compute_CV(x,y,'aicc')/n

[1] 896.2287

# CV avec moyenne Y
n=length(x)
CV_y=0
for (j in 1:n) 
  {
  CV_y=CV_y+(y[j]-mean(y[-j]))^2
} 
CV_y/n

[1] 2352.71

2.3 Supervised classification (Discrimination)

2.3.1 Simulation

\[f_X(x)=\alpha f_{\mathcal{N}(0,2)}(x)+(1-\alpha)f_{\mathcal{N}(4,1)}(x)\]

rmixing2=function(n,alpha,l0,l1,p0,p1)
# Generate data from a mixing model 
{
  z=rbinom(n,1,alpha)
  f1=eval(parse(text=paste('r',l1,'(',paste(c(n,p1),collapse=','),')',sep='')))
  f0=eval(parse(text=paste('r',l0,'(',paste(c(n,p0),collapse=','),')',sep='')))
  x=z*f1+(1-z)*f0
  return(list(x=x,z=z))
}
dmixing2=function(t,alpha,l0,l1,p0,p1)
# draw the density of the mixing model
# p1_t (resp. p0_t) returns the true P(Y=1|X) (resp. P(Y=0|X)) 
{
  mix=alpha*eval(parse(text=paste('d',l1,'(t,',paste(p1,collapse=','),')',sep='')))+(1-alpha)*eval(parse(text=paste('d',l0,'(t,',paste(p0,collapse=','),')',sep='')))
  p1_t=alpha*eval(parse(text=paste('d',l1,'(t,',paste(p1,collapse=','),')',sep='')))/mix  
  p0_t=(1-alpha)*eval(parse(text=paste('d',l0,'(t,',paste(p0,collapse=','),')',sep='')))/mix  
  
return(list(mix=mix,p0_t=p0_t,p1_t=p1_t))
}

#Example  
n=300
alpha=0.3
l0='norm'
p0=c(4,1)
l1='norm'
p1=c(0,2)
s=seq(-10,10,0.001)

# Generate data
r=rmixing2(n,alpha,l0,l1,p0,p1)
x=r$x
z=r$z

Compute true P(Y=0|X) and P(Y=1|X)

\[P(Y=1|x)=\frac{\alpha f_{\mathcal{N}(0,2)}(x)}{f_X(x)}\] \[P(Y=0|x)=\frac{(1-\alpha) f_{\mathcal{N}(4,1)}(x)}{f_X(x)}\]

p0_x=dmixing2(x,alpha,l0,l1,p0,p1)$p0_t
p1_x=dmixing2(x,alpha,l0,l1,p0,p1)$p1_t
u=sort(x)

Re_cv=Classif_NP(x,z,me='cv',plot=FALSE)
Re_aicc=Classif_NP(x,z,me='aicc',plot=FALSE)
Re_df=Classif_NP(x,z,me='df',plot=FALSE)

plot(u,p0_x[order(x)],ylab='P(Y=0|x)',type='l')
lines(u,Re_cv$Prob[order(x),1],col="blue")
lines(u,Re_df$Prob[order(x),1],col="orange")
lines(u,Re_aicc$Prob[order(x),1],col="green")
legend(x="topleft",leg=c('true','cv',"df",'aicc'), col=c('black',"blue","orange","green"),lty=1)

plot(u,p1_x[order(x)],ylab='P(Y=1|x)',type='l')
lines(u,Re_cv$Prob[order(x),2],col="blue")
lines(u,Re_df$Prob[order(x),2],col="orange")
lines(u,Re_aicc$Prob[order(x),2],col="green")
legend(x="topright",leg=c('true','cv',"df",'aicc'), 
col=c('black',"blue","orange","green"),lty=1)

Compare to Bayes’ Classifier

ROC curves and AUC

ROC_t=ROC(z,p1_x,plot = TRUE)
ROC_cv=ROC(z,Re_cv$Prob[,2],plot=FALSE)$ROC
ROC_df=ROC(z,Re_df$Prob[,2],plot=FALSE)$ROC
ROC_aicc=ROC(z,Re_aicc$Prob[,2],plot=FALSE)$ROC
lines(ROC_cv[,1],ROC_cv[,2],col="blue")
lines(ROC_df[,1],ROC_df[,2],col="orange")
lines(ROC_aicc[,1],ROC_aicc[,2],col="green")
legend(x="topright",leg=c('true','cv',"df",'aicc'), 
col=c('black',"blue","orange","green"),lty=1)

ROC_t$AUC

[1] 0.957234

Re_cv$Auc

[1] 0.9492939

Re_df$Auc

[1] 0.9521081

Re_aicc$Auc

[1] 0.9523594

Confusion matrices and errors

Class_Bayes=as.numeric(p1_x>0.5)
(M_table=table(z,Class_Bayes))

   Class_Bayes
z     0   1
  0 197   4
  1  19  80

(err=1-sum(diag(M_table))/n)

[1] 0.07666667

Re_cv$M_table

   Class
Y     0   1
  0 196   5
  1  19  80

Re_cv$Err

[1] 0.08

Re_df$M_table

   Class
Y     0   1
  0 199   2
  1  23  76

Re_df$Err

[1] 0.08333333

Re_aicc$M_table

   Class
Y     0   1
  0 201   0
  1  28  71

Re_aicc$Err

[1] 0.09333333

2.3.2 Real world data

Dopage

load('Dopage.RData')
x=hema
z=test
Classif_NP(x,z)

$Class
 [1] "negatif" "positif" "positif" "negatif" "positif" "positif" "negatif"
 [8] "positif" "negatif" "negatif" "negatif" "positif" "negatif" "negatif"
[15] "negatif" "negatif" "negatif" "negatif" "negatif" "negatif" "negatif"
[22] "negatif" "negatif" "negatif" "positif" "positif" "negatif" "negatif"
[29] "positif" "positif" "negatif" "negatif" "negatif" "negatif" "negatif"
[36] "positif" "negatif" "negatif" "positif" "negatif" "positif" "negatif"
[43] "positif" "negatif" "positif" "positif" "negatif" "positif" "negatif"
[50] "positif" "negatif" "negatif" "negatif" "negatif" "negatif" "negatif"
[57] "negatif" "negatif" "negatif" "positif" "negatif" "negatif" "positif"
[64] "positif" "negatif" "positif" "negatif" "negatif" "positif" "negatif"
[71] "negatif" "negatif" "negatif" "negatif" "negatif"

$Prob
          negatif      positif
 [1,] 0.999968964 3.103551e-05
 [2,] 0.016305744 9.836943e-01
 [3,] 0.129553473 8.704465e-01
 [4,] 0.923901808 7.609819e-02
 [5,] 0.027154219 9.728458e-01
 [6,] 0.026935553 9.730644e-01
 [7,] 0.877767850 1.222321e-01
 [8,] 0.495307372 5.046926e-01
 [9,] 0.999792750 2.072500e-04
[10,] 0.960180180 3.981982e-02
[11,] 0.976050354 2.394965e-02
[12,] 0.081367070 9.186329e-01
[13,] 0.838496025 1.615040e-01
[14,] 0.971656009 2.834399e-02
[15,] 0.958550032 4.144997e-02
[16,] 0.913324197 8.667580e-02
[17,] 0.964410649 3.558935e-02
[18,] 0.928302469 7.169753e-02
[19,] 0.994805108 5.194892e-03
[20,] 0.990075083 9.924917e-03
[21,] 0.893434030 1.065660e-01
[22,] 0.933316379 6.668362e-02
[23,] 0.910319383 8.968062e-02
[24,] 0.991925101 8.074899e-03
[25,] 0.390368614 6.096314e-01
[26,] 0.017819543 9.821805e-01
[27,] 0.941316949 5.868305e-02
[28,] 0.728692529 2.713075e-01
[29,] 0.136339531 8.636605e-01
[30,] 0.003007971 9.969920e-01
[31,] 0.889063368 1.109366e-01
[32,] 0.942977581 5.702242e-02
[33,] 0.967791754 3.220825e-02
[34,] 0.958306456 4.169354e-02
[35,] 0.829707806 1.702922e-01
[36,] 0.106991112 8.930089e-01
[37,] 0.688900272 3.110997e-01
[38,] 0.949588101 5.041190e-02
[39,] 0.205250030 7.947500e-01
[40,] 0.990666767 9.333233e-03
[41,] 0.327218622 6.727814e-01
[42,] 0.973274646 2.672535e-02
[43,] 0.027156205 9.728438e-01
[44,] 0.923643369 7.635663e-02
[45,] 0.051956471 9.480435e-01
[46,] 0.049793742 9.502063e-01
[47,] 0.935397146 6.460285e-02
[48,] 0.043322971 9.566770e-01
[49,] 0.934489021 6.551098e-02
[50,] 0.090516393 9.094836e-01
[51,] 0.979097727 2.090227e-02
[52,] 0.960301606 3.969839e-02
[53,] 0.850864193 1.491358e-01
[54,] 0.947438352 5.256165e-02
[55,] 0.902603008 9.739699e-02
[56,] 0.740140544 2.598595e-01
[57,] 0.885796639 1.142034e-01
[58,] 0.614589988 3.854100e-01
[59,] 0.962639367 3.736063e-02
[60,] 0.005681567 9.943184e-01
[61,] 0.981849465 1.815053e-02
[62,] 0.853533918 1.464661e-01
[63,] 0.180383777 8.196162e-01
[64,] 0.041135883 9.588641e-01
[65,] 0.997550274 2.449726e-03
[66,] 0.367796491 6.322035e-01
[67,] 0.929512804 7.048720e-02
[68,] 0.924891275 7.510873e-02
[69,] 0.274581075 7.254189e-01
[70,] 0.968836094 3.116391e-02
[71,] 0.999993864 6.135732e-06
[72,] 0.798915579 2.010844e-01
[73,] 0.702483156 2.975168e-01
[74,] 0.997271710 2.728290e-03
[75,] 0.917376581 8.262342e-02

$M_table
         Class
Y         negatif positif
  negatif      47       3
  positif       5      20

$Err
[1] 0.1066667

$Auc
[1] 0.9664

load('Dopage.RData')
x=hema
z=test
(x0=runif(7,min(x),max(x)))

[1] 58.29929 54.60851 49.42921 55.57193 34.76628 39.61908 37.90928

Classif_NP(x,z,x0)

$Class
 [1] "negatif" "positif" "positif" "negatif" "positif" "positif" "negatif"
 [8] "positif" "negatif" "negatif" "negatif" "positif" "negatif" "negatif"
[15] "negatif" "negatif" "negatif" "negatif" "negatif" "negatif" "negatif"
[22] "negatif" "negatif" "negatif" "positif" "positif" "negatif" "negatif"
[29] "positif" "positif" "negatif" "negatif" "negatif" "negatif" "negatif"
[36] "positif" "negatif" "negatif" "positif" "negatif" "positif" "negatif"
[43] "positif" "negatif" "positif" "positif" "negatif" "positif" "negatif"
[50] "positif" "negatif" "negatif" "negatif" "negatif" "negatif" "negatif"
[57] "negatif" "negatif" "negatif" "positif" "negatif" "negatif" "positif"
[64] "positif" "negatif" "positif" "negatif" "negatif" "positif" "negatif"
[71] "negatif" "negatif" "negatif" "negatif" "negatif"

$Prob
          negatif      positif
 [1,] 0.999968964 3.103551e-05
 [2,] 0.016305744 9.836943e-01
 [3,] 0.129553473 8.704465e-01
 [4,] 0.923901808 7.609819e-02
 [5,] 0.027154219 9.728458e-01
 [6,] 0.026935553 9.730644e-01
 [7,] 0.877767850 1.222321e-01
 [8,] 0.495307372 5.046926e-01
 [9,] 0.999792750 2.072500e-04
[10,] 0.960180180 3.981982e-02
[11,] 0.976050354 2.394965e-02
[12,] 0.081367070 9.186329e-01
[13,] 0.838496025 1.615040e-01
[14,] 0.971656009 2.834399e-02
[15,] 0.958550032 4.144997e-02
[16,] 0.913324197 8.667580e-02
[17,] 0.964410649 3.558935e-02
[18,] 0.928302469 7.169753e-02
[19,] 0.994805108 5.194892e-03
[20,] 0.990075083 9.924917e-03
[21,] 0.893434030 1.065660e-01
[22,] 0.933316379 6.668362e-02
[23,] 0.910319383 8.968062e-02
[24,] 0.991925101 8.074899e-03
[25,] 0.390368614 6.096314e-01
[26,] 0.017819543 9.821805e-01
[27,] 0.941316949 5.868305e-02
[28,] 0.728692529 2.713075e-01
[29,] 0.136339531 8.636605e-01
[30,] 0.003007971 9.969920e-01
[31,] 0.889063368 1.109366e-01
[32,] 0.942977581 5.702242e-02
[33,] 0.967791754 3.220825e-02
[34,] 0.958306456 4.169354e-02
[35,] 0.829707806 1.702922e-01
[36,] 0.106991112 8.930089e-01
[37,] 0.688900272 3.110997e-01
[38,] 0.949588101 5.041190e-02
[39,] 0.205250030 7.947500e-01
[40,] 0.990666767 9.333233e-03
[41,] 0.327218622 6.727814e-01
[42,] 0.973274646 2.672535e-02
[43,] 0.027156205 9.728438e-01
[44,] 0.923643369 7.635663e-02
[45,] 0.051956471 9.480435e-01
[46,] 0.049793742 9.502063e-01
[47,] 0.935397146 6.460285e-02
[48,] 0.043322971 9.566770e-01
[49,] 0.934489021 6.551098e-02
[50,] 0.090516393 9.094836e-01
[51,] 0.979097727 2.090227e-02
[52,] 0.960301606 3.969839e-02
[53,] 0.850864193 1.491358e-01
[54,] 0.947438352 5.256165e-02
[55,] 0.902603008 9.739699e-02
[56,] 0.740140544 2.598595e-01
[57,] 0.885796639 1.142034e-01
[58,] 0.614589988 3.854100e-01
[59,] 0.962639367 3.736063e-02
[60,] 0.005681567 9.943184e-01
[61,] 0.981849465 1.815053e-02
[62,] 0.853533918 1.464661e-01
[63,] 0.180383777 8.196162e-01
[64,] 0.041135883 9.588641e-01
[65,] 0.997550274 2.449726e-03
[66,] 0.367796491 6.322035e-01
[67,] 0.929512804 7.048720e-02
[68,] 0.924891275 7.510873e-02
[69,] 0.274581075 7.254189e-01
[70,] 0.968836094 3.116391e-02
[71,] 0.999993864 6.135732e-06
[72,] 0.798915579 2.010844e-01
[73,] 0.702483156 2.975168e-01
[74,] 0.997271710 2.728290e-03
[75,] 0.917376581 8.262342e-02

$M_table
         Class
Y         negatif positif
  negatif      47       3
  positif       5      20

$Err
[1] 0.1066667

$Class0
[1] "positif" "positif" "negatif" "positif" "negatif" "negatif" "negatif"

$Prob0
           [,1]         [,2]
[1,] 0.01115700 9.888430e-01
[2,] 0.12836357 8.716364e-01
[3,] 0.72050460 2.794954e-01
[4,] 0.07317239 9.268276e-01
[5,] 0.99999563 4.373132e-06
[6,] 0.99784874 2.151265e-03
[7,] 0.99961262 3.873839e-04

$Auc
[1] 0.9664

Iris

data('iris')
attach(iris)
for (j in colnames(iris)[1:4])
{
print(j)
x=get(j)
z=Species
(x0=runif(7,min(x),max(x)))
r=Classif_NP(x,z,x0)
cat('   \n')
cat('Qualité de la classification \n')
cat('matrice de confusion: \n')
print(r$M_table)
cat('Erreur:\n')
print(r$Err)
cat('   \n')
cat('Estimation \n')
cat('x0:\n')
print(x0)
cat('Class0:\n ')
print(r$Class0)
cat('Prob0:\n')
print(r$Prob0)
cat('   \n')
cat('   \n')
}

[1] "Sepal.Length"
   
Qualité de la classification 
matrice de confusion: 
            Class
Y            setosa versicolor virginica
  setosa         45          5         0
  versicolor      6         28        16
  virginica       1         10        39
Erreur:
[1] 0.2533333
   
Estimation 
x0:
[1] 5.069105 5.653251 5.856123 6.551880 7.473992 4.523146 4.775450
Class0:
 [1] setosa     versicolor versicolor virginica  virginica  setosa     setosa    
Levels: setosa versicolor virginica
Prob0:
             [,1]       [,2]        [,3]
[1,] 7.916405e-01 0.17510390 0.018315254
[2,] 2.463156e-01 0.58641527 0.168209351
[3,] 1.201879e-01 0.59227047 0.312851964
[4,] 3.414587e-04 0.37332924 0.626906972
[5,] 2.825179e-12 0.02070251 0.999548532
[6,] 9.417964e-01 0.03181973 0.001936666
[7,] 8.660817e-01 0.09919381 0.042596623
   
   
[1] "Sepal.Width"
   
Qualité de la classification 
matrice de confusion: 
            Class
Y            setosa versicolor virginica
  setosa         33          1        16
  versicolor      5         27        18
  virginica      13         21        16
Erreur:
[1] 0.4933333
   
Estimation 
x0:
[1] 2.828772 4.090964 4.054074 2.015490 3.175221 2.028399 4.119100
Class0:
 [1] versicolor setosa     setosa     versicolor virginica  versicolor setosa    
Levels: setosa versicolor virginica
Prob0:
           [,1]         [,2]       [,3]
[1,] 0.07325834 4.513515e-01 0.42796759
[2,] 0.98709727 4.770596e-05 0.10736420
[3,] 0.97019752 9.878640e-05 0.12133089
[4,] 0.01251519 7.828750e-01 0.17112760
[5,] 0.35547702 2.658112e-01 0.37377187
[6,] 0.01614726 7.775086e-01 0.17407764
[7,] 0.99384610 2.652217e-05 0.09612621
   
   
[1] "Petal.Length"
   
Qualité de la classification 
matrice de confusion: 
            Class
Y            setosa versicolor virginica
  setosa         50          0         0
  versicolor      0         46         4
  virginica       0          3        47
Erreur:
[1] 0.04666667
   
Estimation 
x0:
[1] 4.440947 5.667446 6.015891 4.740497 2.423802 2.987832 1.498454
Class0:
 [1] versicolor virginica  virginica  versicolor setosa     versicolor setosa    
Levels: setosa versicolor virginica
Prob0:
              [,1]         [,2]          [,3]
[1,] 5.882315e-102 9.342802e-01  6.571908e-02
[2,] 3.730734e-222 1.712459e-07  9.999998e-01
[3,] 1.302348e-264 1.844531e-16  1.000000e+00
[4,] 5.402844e-127 7.477741e-01  2.522293e-01
[5,]  9.402970e-01 4.477008e-02  4.897262e-73
[6,]  7.558291e-19 1.000000e+00  1.595289e-41
[7,]  1.000000e+00 2.133861e-42 7.212393e-163
   
   
[1] "Petal.Width"
   
Qualité de la classification 
matrice de confusion: 
            Class
Y            setosa versicolor virginica
  setosa         50          0         0
  versicolor      0         48         2
  virginica       0          4        46
Erreur:
[1] 0.04
   
Estimation 
x0:
[1] 0.2188561 0.6508943 2.3023410 1.9928379 2.4673677 2.1230873 1.1408556
Class0:
 [1] setosa     setosa     virginica  virginica  virginica  virginica  versicolor
Levels: setosa versicolor virginica
Prob0:
              [,1]         [,2]         [,3]
[1,]  1.000000e+00 3.110436e-17 2.147145e-38
[2,]  1.000000e+00 3.822084e-03 2.322006e-15
[3,] 2.323245e-278 2.426702e-08 1.000000e+00
[4,] 8.212895e-187 8.494495e-03 9.915059e-01
[5,]  0.000000e+00 4.463431e-13 1.000000e+00
[6,] 5.040224e-223 1.689210e-04 9.998311e-01
[7,]  2.590094e-29 9.983930e-01 1.606848e-03