--- title: "TP_Chapitre_3.1" output: html_document date: "2025-01-28" --- ```{r, include=FALSE} knitr::opts_chunk$set(echo = TRUE,comment='',class.source="bg-info", class.output="bg-success") ``` # Multivariate case # Fonctions ```{r} library(sm) Compute_CV=function(x,y,method,weights=NA) { hopt=sm.regression(x,y,method=method,display='none')$h n=nrow(x) yest=rep(NA,n) if (is.na(weights)) weights=rep(1,n) CV=0 for (i in 1:n) { yest[i]=(sm.regression(x[-i,],y[-i],h=hopt,eval.points=x,eval.grid=F,display='none',nbins=0)$estimate)[i] CV=CV+(y[i]-yest[i])^2*weights[i] } return(list('CV'=CV,'y_est'=yest)) } #### ROC=function(z,p1) { p=sort(unique(p1)) lp=length(p) ROC=matrix(NA,lp,2) for (s in 1:lp) { ROC[s,1]=mean(p1[z==0]>p[s]) ROC[s,2]=mean(p1[z==1]>p[s]) } AUC=sum(ROC[-lp,2]*(ROC[-lp,1]-ROC[-1,1])) colnames(ROC)=c("FPR","TPR") plot(ROC,type='l',main=paste('AUC =', AUC)) return(list(ROC=ROC,AUC=AUC)) } ####### Classif_NP=function(X,Y,X0=NA) { X=as.matrix(X) n=length(Y) V=sort(unique(Y)) n_V=length(V) Prob=matrix(NA,n,n_V) colnames(Prob)=V Class=rep(NA,n) if (!is.na(max(X0))) { X0=as.matrix(X0) P0=matrix(NA,nrow(X0),n_V) Class0=rep(NA,n) } for (v in 1:n_V) { z=as.numeric(Y==V[v]) for (i in 1:n ) { Prob[i,v]=sm.regression(X[-i,],z[-i], eval.points=X,eval.grid=FALSE,display='none')$estimate[i] } if (!is.na(max(X0))) {P0[,v]=sm.regression(X,z,eval.points=X0,eval.grid=FALSE,display='none')$estimate} } if (n_V==2) {ROC(Y==V[2],Prob[,2])} Class=V[apply(Prob,1,which.max)] M_table=table(Y,Class) if (!is.na(max(X0))) {Class0=V[apply(P0,1,which.max)]} if (!is.na(max(X0))) {return(list(Class=Class, Prob=Prob, M_table=M_table, err=1-sum(diag(M_table))/n, Class0=Class0,Prob0=P0))} else {return(list(Class=Class, Prob=Prob, M_table=M_table, err=1-sum(diag(M_table))/n))} } ``` # Density ## Simulation ```{r} library(sm) #library(plotly) library(plot3D) #library(rpanel) n=1000 alpha=0.4 mu11=7 mu12=0 sig11=1 sig12=3 mu21=0 mu22=3 sig21=2 sig22=2 x1=matrix(c(rnorm(n,mu11,sig11),rnorm(n,mu12,sig12)),ncol=2) x2=matrix(c(rnorm(n,mu21,sig21),rnorm(n,mu22,sig22)),ncol=2) z=rbinom(n,1,alpha) x=x1 x[z==1,]=x2[z==1,] M=300 r1=seq(-10,10,len=M) r2=seq(-10,10,len=M) x_1=x[,1] x_2=x[,2] ``` ### Estimation de la densité ```{r} par(mfrow=c(1,3)) sm.density(x,method='cv',structure='common',nbins=0) sm.density(x,method='cv',structure="scaled",nbins=0) sm.density(x,method='cv',structure='separate',nbins=0) par(mfrow=c(1,2)) rx=sm.density(x,method='cv',eval.points=x ,eval.grid=F, nbins=0) r=sm.density(x,method='cv',eval.points=cbind(r1,r2) ,eval.grid=T, nbins=0) f_k=r$estimate #fra=data.frame(x_1,x_2,f_k) ``` ### Comparaison avec la vraie densité ```{r} f=matrix(NA,M,M) for (j in 1:M) { for (k in 1:M) { f[j,k]=(1-alpha)*dnorm(r1[j],mu11,sig11)*dnorm(r2[k],mu12,sig12)+(1-alpha)*dnorm(r1[j],mu21,sig21)*dnorm(r2[k],mu22,sig22) } } v1=rep(r1,M) v2=rep(r2,rep(M,M)) fv=as.vector(f) fra=data.frame(v1,v2,fv) par(mfrow=c(1,3)) image(f,zlim=range(c(f,r$estimate))) image(r$estimate,zlim=range(c(f,r$estimate))) image(abs(f-r$estimate),zlim=range(c(f,r$estimate))) surf3D(matrix(rep(r1,M),M,M),matrix(rep(r2,rep(M,M)),M,M),f,clim=range(c(f,f_k))) surf3D(matrix(rep(r1,M),M,M),matrix(rep(r2,rep(M,M)),M,M),f_k,clim=range(c(f,f_k))) surf3D(matrix(rep(r1,M),M,M),matrix(rep(r2,rep(M,M)),M,M),abs(f-f_k),clim=range(c(f,f_k))) ``` ### Estimation du mode ```{r} p_mo=which(f==max(f),arr.ind=T) (mode=c(r1[p_mo[1]],r2[p_mo[2]])) p_mok=which(f_k==max(f_k),arr.ind=T) (modek=c(r1[p_mok[1]],r2[p_mok[2]])) p_mo1=which(f[1:(M/2),]==max(f[1:(M/2),]),arr.ind=T) (modek1=c(r1[p_mo1[1]],r2[p_mo1[2]])) p_mok1=which(f_k[1:(M/2),]==max(f_k[1:(M/2),]),arr.ind=T) (modek1=c(r1[p_mok1[1]],r2[p_mok1[2]])) #plot_ly(fra,x=~v1,y=~v2,z = ~fv, type = "surface") ``` ## Real world data ### Estimation de la densité ```{r} data(faithful) attach(faithful) par(mfrow=c(1,3)) sm.density(faithful,method='cv',structure='common',nbins=0) sm.density(faithful,method='cv',structure="scaled",nbins=0) sm.density(faithful,method='cv',structure='separate',nbins=0) ``` ### Estimation du mode ```{r} x=eruptions y=waiting x0=seq(min(x),max(x),len=100) y0=seq(min(y),max(y),len=100) re=sm.density(cbind(x,y),method='cv',eval.points=cbind(x0,y0), eval.grid=T,structure='common',nbins=0) p_mo=which(re$estimate==max(re$estimate),arr.ind=T) (mode=c(x0[p_mo[1]],y0[p_mo[2]])) p_mo1=which(re$estimate[1:30,1:30]==max(re$estimate[1:30,1:30]),arr.ind=T) (mode1=c(x0[p_mo1[1]],y0[p_mo1[2]])) ``` # Regression ## Real world data ```{r} load('Randonnee.RData') x=long y=lat z=alti # variable of interest x0=seq(min(x),max(x),len=100) y0=seq(min(y),max(y),len=100) sm.regression(cbind(x,y),z,method='cv',eval.points=cbind(x0,y0), eval.grid=T,structure='common',nbins=0) alti_profile=sm.regression(cbind(x,y),z,method='cv',eval.points=Position, eval.grid=F,structure='common',nbins=0)$estimate km=rep(0,nrow(Position)) for (j in 1:(nrow(Position)-1)) {km[j+1]=km[j]+sqrt((Position[j+1,1]-Position[j,1])^2+(Position[j+1,2]-Position[j,2])^2)} #km=cumsum(km)/1000 plot(km/10,alti_profile,type='l') ``` ### Qualité #### CV with Kernel Estimator ```{r,cache=TRUE} RE=Compute_CV(cbind(x,y),z,method='cv') RE$CV/length(alti) ``` ```{r} plot(RE$y_est,z) abline(0,1) ``` #### CV avec moyenne Z ```{r,cache=TRUE} n=length(z) CV_z=0 for (j in 1:n) { CV_z=CV_z+(z[j]-mean(z[-j]))^2 } CV_z/n ``` ## Classification ```{r} data(iris) v=combn(1:4,2) for(j in 1:ncol(v)) { print(colnames(iris)[v[,j]]) C_NP=Classif_NP(iris[,v[,j]],iris[,5]) print(C_NP$err) print(C_NP$M_table) } ```