The paper entitled Parsimonious Gaussian Process Models for the Classification of Hyperspectral Remote Sensing Images has been accepted in the IEEE Geoscience and Remote Sensing Letters. A version of the paper can be found here.
The Python functions that implement the models can be found https://github.com/mfauvel/PGPDA. In this post, I am going to describe how the results presented in the paper have been obtained. Scikit-learn should be installed. Also, for best performances, a good linear algebra library is required. Openblas is a good option.
Experimental Setup
I use the following script to perform the experiment. Just change the name of the data to perform the corresponding data set. Send me an email to get the data in Scipy
format.
import scipy as sp
import pgpda
import kernels
import accuracy_index as ai
import time
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.cross_validation import KFold
from sklearn.grid_search import GridSearchCV
import pickle
# Functions definitions
def compute_PGPDA(x,y,xt,yt,sig_r,dc_r,threshold_r,mod):
model=pgpda.PGPDA(model=mod)
sig,threshold,err=model.cross_validation(x, y,sig_r=sig_r,dc_r=dc_r,threshold_r=threshold_r)
model.train(x,y)
yp=model.predict(xt,x,y)
conf = ai.CONFUSION_MATRIX()
conf.compute_confusion_matrix(yp,yt)
return conf.OA,conf.Kappa,conf.confusion_matrix
def compute_NPGPDA(x,y,xt,yt,sig_r,dc_r,threshold_r,mod):
model=pgpda.NPGPDA(model=mod)
sig,threshold,err=model.cross_validation(x, y,sig_r=sig_r,dc_r=dc_r,threshold_r=threshold_r)
model.train(x,y)
yp=model.predict(xt,x,y)
conf = ai.CONFUSION_MATRIX()
conf.compute_confusion_matrix(yp,yt)
return conf.OA,conf.Kappa,conf.confusion_matrix
def compute_SVM(x,y,xt,yt,param_grid_svm):
y.shape=(y.size,)
cv = KFold(y.size, n_folds=5)
grid = GridSearchCV(SVC(), param_grid=param_grid_svm, cv=cv,n_jobs=2)
grid.fit(x, y)
clf = grid.best_estimator_
clf.fit(x,y)
yp = clf.predict(xt).reshape(yt.shape)
conf = ai.CONFUSION_MATRIX()
conf.compute_confusion_matrix(yp,yt)
return conf.OA,conf.Kappa,conf.confusion_matrix
def compute_RF(x,y,xt,yt,param_grid_rf):
y.shape=(y.size,)
cv = KFold(y.size, n_folds=5)
grid = GridSearchCV(RandomForestClassifier(), param_grid=param_grid_rf, cv=cv,n_jobs=2)
grid.fit(x, y)
clf = grid.best_estimator_
clf.fit(x,y)
yp = clf.predict(xt).reshape(yt.shape)
conf = ai.CONFUSION_MATRIX()
conf.compute_confusion_matrix(yp,yt)
return conf.OA,conf.Kappa,conf.confusion_matrix
def compute_KDA(x,y,xt,yt,mu_r,sig_r):
model=pgpda.KDA()
sig,mu,err=model.cross_validation(x, y,sig_r=sig_r,mu_r=mu_r)
model.train(x,y)
yp=model.predict(xt,x,y)
conf = ai.CONFUSION_MATRIX()
conf.compute_confusion_matrix(yp,yt)
return conf.OA,conf.Kappa,conf.confusion_matrix
if __name__ == '__main__':
# Load spectra
data_name = 'aisa' # 'ksc' 'uni' 'aisa'
data = sp.load(data_name+'.npz')
X = data['X']
Y = data['Y']
X = pgpda.scale(X)[0]
del data
# Set Parameters
MODELS_NPG = ['NM0','NM1','NM2','NM3','NM4']
MODELS_PG = ['M0','M1','M2','M3','M4','M5','M6']
REP = 20
Nt = 50
NT = 1000
n,d=X.shape
C = Y.max()
sig_r = 2.0**sp.arange(-8,4) # PGPDA
threshold_r = sp.linspace(0.70,0.9999,15) # PGPDA
dc_r =sp.arange(2,Nt,3) # PGPDA
penalty = 10.0**sp.arange(-2,6) # SVM
param_grid_svm = dict(gamma=sig_r, C=penalty) # SVM
n_estimators=sp.arange(10,Nt,(Nt-10)/10) # RF
param_grid_rf = dict(n_estimators=n_estimators) # RF
mu_r =10.0**sp.arange(-8,2) # KDA
RES = {'M0':[[],[],[],[]],'M1':[[],[],[],[]],'M2':[[],[],[],[]],'M3':[[],[],[],[]],'M4':[[],[],[],[]],'M5':[[],[],[],[]],'M6':[[],[],[],[]],'NM0':[[],[],[],[]],'NM1':[[],[],[],[]],'NM2':[[],[],[],[]],'NM3':[[],[],[],[]],'NM4':[[],[],[],[]],'SVM':[[],[],[],[]],'RF':[[],[],[],[]],'KDA':[[],[],[],[]]}
# Start the repetition
r=0
while r<REP:
print "Repetition number ",str(r)
# Random selection of the sample
x = sp.array([]).reshape(0,d)
y = sp.array([]).reshape(0,1)
xt = sp.array([]).reshape(0,d)
yt = sp.array([]).reshape(0,1)
sp.random.seed(r)
for i in range(C):
t = sp.where((i+1)==Y)[0]
nc = t.size
rp = sp.random.permutation(nc)
x = sp.concatenate((X[t[rp[0:Nt]],:],x))
xt = sp.concatenate((X[t[rp[-NT:]],:],xt))
y = sp.concatenate((Y[t[rp[0:Nt]]],y))
yt = sp.concatenate((Y[t[rp[-NT:]]],yt))
# PGPDA
for mod in MODELS_PG:
ts=time.time()
res = compute_PGPDA(x,y,xt,yt,sig_r,dc_r,threshold_r,mod)
[RES[mod][i].append(res[i]) for i in range(3)]
RES[mod][3].append(time.time()-ts)
# NPGPDA
for mod in MODELS_NPG:
ts=time.time()
res = compute_NPGPDA(x,y,xt,yt,sig_r,dc_r,threshold_r,mod)
[RES[mod][i].append(res[i]) for i in range(3)]
RES[mod][3].append(time.time()-ts)
# SVM
ts=time.time()
res = compute_SVM(x,y,xt,yt,param_grid_svm)
[RES['SVM'][i].append(res[i]) for i in range(3)]
RES['SVM'][3].append(time.time()-ts)
# RF
ts=time.time()
res = compute_RF(x,y,xt,yt,param_grid_rf)
[RES['RF'][i].append(res[i]) for i in range(3)]
RES['RF'][3].append(time.time()-ts)
# KDA
ts=time.time()
res = compute_KDA(x,y,xt,yt,mu_r,sig_r)
[RES['KDA'][i].append(res[i]) for i in range(3)]
RES['KDA'][3].append(time.time()-ts)
r += 1
# Save Results
output = open('res_full'+data_name, 'wb')
pickle.dump(RES, output)
output.close()
Compute accuracy
I generate the org table with the following python code (again, change the name of the data set accordingly).
import scipy as sp
import scipy.stats as stats
import pickle
# Load data
RES = pickle.load(open("res_fullaisa.npz","rb"))
NC = 16
NS = 20
# Name of methods
NAME = ['M0','M1','M2','M3','M4','M5','M6','NM0','NM1','NM2','NM3','NM3','KDA','RF','SVM']
# File setup
orgfile = open("res_aisa.org","w")
orgfile.write('|Method')
for name in NAME:
orgfile.write('|'+name)
orgfile.write('|\n')
orgfile.write('|--+ --+--|\n')
# Write OA
orgfile.write('|OA')
for name in NAME:
val =int(1000*sp.mean(RES[name][0]))/10.0
orgfile.write('|'+str(val))
orgfile.write('|\n')
# Write Kappa
orgfile.write('|Kappa')
for name in NAME:
val =int(1000*sp.mean(RES[name][1]))/1000.0
orgfile.write('|'+str(val))
orgfile.write('|\n')
# Write CPU
orgfile.write('|CPT')
for name in NAME:
val =int(sp.mean(RES[name][3]))
orgfile.write('|'+str(val))
orgfile.write('|\n')
# Compute mean confusion matrix
UA = []
PA = []
for name in NAME:
confu = sp.zeros(RES['M0'][2][0].shape)
for i in range(NS):
confu+=RES[name][2][i]
confu/=20
PA.append(sp.diag(confu)/confu.sum(axis=0))
UA.append(sp.diag(confu)/confu.sum(axis=1))
orgfile.write('|--+ --+--|\n')
for j in range(NC):
orgfile.write('|PA '+str(j))
for i in range(len(NAME)):
orgfile.write('|'+str(int(1000.0*PA[i][j])/10.0))
orgfile.write('|\n')
orgfile.write('|--+ --+--|\n')
for j in range(NC):
orgfile.write('|UA '+str(j))
for i in range(len(NAME)):
orgfile.write('|'+str(int(1000.0*UA[i][j])/10.0))
orgfile.write('|\n')
orgfile.write('\n')
orgfile.write('\n')
# Write RANK TEST
orgfile.write('| ')
for name in NAME:
orgfile.write('|'+name)
orgfile.write('|\n')
orgfile.write('|--+ --+--|\n')
# Rank test
for name in NAME:
orgfile.write('|'+name)
for name1 in NAME:
z,p=stats.ranksums(RES[name][1],RES[name1][1])
orgfile.write('|'+str(z))
orgfile.write('|\n')
orgfile.close()
Finally I (and you should!) get the tabs reported in the article.
Comments
comments powered by Disqus