This work has been published by Mehdi Rezvandehy and Clayton V. Deutsch at Journal of Stochastic Environmental Research and Risk Assessment https://doi.org/10.1007/s00477-017-1388-x. Here is a Python implementation with realistic data for declustering spatial correlation of data.
Summary
The spatial correlation called variogram is a key parameter for geostatistical estimation and simulation. Preferential sampling may bias the spatial structure and often leads to noisy and unreliable variograms. A novel technique is proposed to weight variogram pairs in order to compensate for preferential or clustered sampling. Weighting the variogram pairs by global kriging of the quadratic differences between the tail and head values gives each pair the appropriate weight, removes noise and minimizes artifacts in the experimental variogram. Moreover, variogram uncertainty could be computed by this technique. The required covariance between the pairs going into variogram calculation, is a fourth order covariance that must be calculated by second order moments. A realistic example is presented for variaogram declustering and kriging estimation. The data for this example is the data "cluster.dat" taken from the literature literature.
Python functions and data files needed to run this notebook are available via this link.
import os
os.environ['path']+=";.\\Codes" # All required codes are in this folder
import pandas as pd
import numpy as np
import matplotlib
import pylab as plt
import geostatspy.GSLIB as GSLIB
import geostatspy.geostats as geostats
from IPython.display import clear_output
from matplotlib import gridspec
from matplotlib.offsetbox import AnchoredText
import warnings
warnings.filterwarnings('ignore')
# Looking at the data
Cluster=GSLIB_View('Cluster.dat')
display(Cluster[0:10])
Cluster.info()
Cluster.describe()
Figure below shows location map of cluster data with 140 samples. We assume the unit of measurements for coordinate system is kilometer. There are sevearl prefrential sampling that will affect the experimental variogram.
font = {'size' : 10}
matplotlib.rc('font', **font)
fig, axs = plt.subplots(figsize=(10, 5), dpi= 200, facecolor='w', edgecolor='k')
gs = gridspec.GridSpec(1, 2, width_ratios=[1.8, 1], wspace=0.19)
ax1=plt.subplot(gs[0])
plt.scatter(Cluster['Xlocation'], Cluster['Ylocation'],c=Cluster['Primary'],s=65,linewidths=1,edgecolors='k',
cmap='jet',vmin=0, vmax=14)
plt.gca().set_aspect('0.95')
plt.colorbar(shrink=0.8,label='Primary')
plt.xlabel('Easting (km)',fontsize='12')
plt.ylabel('Northing (km)',fontsize='12')
plt.title('Location Map of Primary',fontsize=12)
ax1=plt.subplot(gs[1])
plt.xlabel('Primary',fontsize='10')
plt.ylabel('Frequency',fontsize='10')
plt.hist(Cluster['Primary'], bins=15,ec='black')
# Calculate mean and variance of the variable
n=len(Cluster['Primary'])
Mean=np.mean(Cluster['Primary'])
SD=np.sqrt(np.var(Cluster['Primary']))
Max=np.amax(Cluster['Primary'])
Min=np.amin(Cluster['Primary'])
#plt.xlim(0,20)
plt.title('Histogram of Primary',fontsize=12)
plt.gca().set_aspect('0.5')
txt='n=%.0f \n $\mu$=%.3f \n $\sigma$=%.3f \n Max=%.1f \n Min=%.1f'
anchored_text = AnchoredText(txt %(n,Mean,SD,Max,Min), loc=1)
ax1.add_artist(anchored_text)
plt.show()
#Experimental Variogram with Nromal Score and then fit a theoretical model
#Azimith 0: Experimental Variogram
tmin=-100;tmax=10000;xlag=2;xltol=1;nlag=14;azm=0;atol=20;bandwh=10;isill=1
Lag1, Var1, npp_az0=geostats.gamv(Cluster,'Xlocation', 'Ylocation','NS:Primary',tmin,
tmax,xlag,xltol,nlag,azm,atol,bandwh,isill)
#Azimith 0: Fitted variogram Model Parameters
nlag=100; xlag=0.3; azm=0
vario={'nst':2,'nug':0.01, 'cc1':0.44,'it1':1,'azi1':0,'hmaj1':10,'hmin1':10,
'cc2':0.55,'it2':1,'azi2':0,'hmaj2':20,'hmin2':20}
index_az0,Lag1_fit,Var1_fit,cov_az0,ro_az0=geostats.vmodel(nlag, xlag, azm, vario)
clear_output()
# Plot the calculated variogram
font = {'size' : 7}
matplotlib.rc('font', **font)
fig, axs = plt.subplots(figsize=(8,8), dpi= 200, facecolor='w', edgecolor='k')
fig.subplots_adjust(hspace = .05, wspace=0.25)
#
#Label and Title of Variogram Plot
label1='Azimuth 0 ' ;symbol1='--or';label1_fit='Fitted Model ';symbol1_fit='-r'
label2='Azimuth 90 ';symbol2='--sb';label2_fit='Fitted Model ';symbol2_fit='-b'
xlabl='Distance (km)'; xlim=[0,20]; vlim=[0,1.38]; sill=1;loc=4
ax1=plt.subplot(1,2,1)
#Plot Variograms for Azimith 0
Lag1=np.delete(Lag1, np.where(Var1==0))
Var1=np.delete(Var1, np.where(Var1==0))
plt.plot(Lag1, Var1,symbol1,linewidth=1,markersize=4,label=label1)
plt.plot(Lag1_fit, Var1_fit,symbol1_fit,linewidth=1.2,label=label1_fit)
plt.legend(fontsize=8,loc=loc,frameon=False)
plt.gca().spines['right'].set_visible(False)
plt.gca().spines['top'].set_visible(False)
plt.gca().set_aspect('10')
plt.axhline(y=sill,linewidth=1,color='k',linestyle='--')
title='Variogram and Fitted Model_Azimuth 0'
plt.title(title,fontsize=10)
plt.xlabel(xlabl,fontsize=9)
ylabl='$\gamma (\mathbf{h}) $'
plt.ylabel(ylabl,rotation=0,fontsize=12,labelpad=15)
plt.xlim(float(xlim[0]),float(xlim[1]))
plt.ylim(float(vlim[0]),float(vlim[1]))
plt.show()
# Call Vardec Fortran code in Python for variogram declustering
txt=" Parameters for Vardec \n"\
+" ***************************\n"\
+"START OF PARAMETERS: \n"\
+"Cluster.dat - file with data \n"\
+"1 2 0 - columns for X, Y, Z coordinates \n"\
+"6 - col number for variable \n"\
+"-1.0e21 1.0e21 - trimming limits \n"\
+"50 0.5 1 - nx,xmn,xsiz \n"\
+"50 0.5 1 - ny,ymn,ysiz \n"\
+"1 0.5 1 - nz,zmn,zsiz \n"\
+"2 - grid increment for global kriging, 1 considering all blocks \n"\
+"14 - number of lags \n"\
+"2 - lag separation distance \n"\
+"1 - lag tolerance \n"\
+"1 - number of directions \n"\
+"0 20 10.0 0.0 0.5 5.0 - azm,atol,bandh,dip,dtol,bandv \n"\
+"1 - standardize the experimental and declustered variogram \n"\
+"./Output/Vardec.out - output file for declustered variogram \n"\
+"./Output/vargplt.ps - output file for variogam ploting \n"\
+"./Output/varfit.var - variogram model for declustered variogram \n"\
+"2 0.01 - nst, nugget effect \n"\
+"1 0.44 0.0 0.0 0.0 - it,cc,ang1,ang2,ang3 \n"\
+" 10.0 10.0 10 - a_hmax,a_hmin,a_vert \n"\
+"1 0.55 0.0 0.0 0.0 - it,cc,ang1,ang2,ang3 \n"\
+" 20.0 20.0 10 - a_hmax,a_hmin,a_vert \n"
f1 = open('temp', "w");
f1.write(txt)
f1.close()
! echo temp | Vardec
! rm temp
clear_output()
# Reading output file of Vardec
Vardec=GSLIB_View('./Output/Vardec.out')
display(Vardec)
# Plot declustered variogram
font = {'size' : 7}
matplotlib.rc('font', **font)
fig, axs = plt.subplots(figsize=(8,8), dpi= 200, facecolor='w', edgecolor='k')
fig.subplots_adjust(hspace = .05, wspace=0.25)
ax1=plt.subplot(1,2,1)
xlim=[0,20]; vlim=[0,1.38]; sill=1; fsize=5; loc=4; outfile='vargplt_Declustered.png'
plt.axhline(y=sill,linewidth=0.6,color='k',linestyle='--')
plt.title(title,fontsize=fsize*1.5)
plt.xlabel(xlabl,fontsize=fsize*1.5)
ylabl='$\gamma (\mathbf{h}) $'
plt.ylabel(ylabl,rotation=0,fontsize=fsize*1.9,labelpad=15)
plt.xlim(float(xlim[0]),float(xlim[1]))
plt.ylim(float(vlim[0]),float(vlim[1]))
#
Lag1=Vardec['lag-dist']
Gamma1=Vardec['Exp-Variogram']
#
Lag2=Vardec['lag-dist']
Gamma2=Vardec['GK-Variogram']
#
Lagf3=Vardec['lag-dist']
Gammaf3=Vardec['Fit-Variogram']
symbol1= '-or'; symbol2= '-sb'; symbol3= '--b'; label1='Experimental Variogram'
label2='Declustered Variogram ';label3='Fitted Variogram (Declustered)'; xlabl='Distance (km)'
plt.gca().set_aspect('10')
plt.plot(Lag1, Gamma1,symbol1,linewidth=0.4,markersize=3,label=label1)
plt.plot(Lag2, Gamma2,symbol2,linewidth=1.5,markersize=4,label=label2)
plt.plot(Lagf3, Gammaf3,symbol3,linewidth=1.1,markersize=3,label=label3)
fsizet=fsize*1.3
plt.legend(fontsize=str(fsizet),loc=loc,frameon=False)
ax1.spines['left'].set_visible(True)
ax1.spines['bottom'].set_visible(True)
ax1.spines['right'].set_visible(False)
ax1.spines['top'].set_visible(False)
The declustered variogram (blue line) improves the experimental variogram (red line); it removes bias due to prefrential sampling. The declustered variogram (blue line) is fit again (blue dashed line) and used for geostatistical modeling.
# Call kt3d GSLIB code in Python for kriging estimation
txt=" Parameters for KT3D \n"\
+" ******************* \n"\
+"START OF PARAMETERS: \n"\
+"Cluster.dat -file with data \n"\
+"0 1 2 0 3 0 - columns for DH,X,Y,Z,var,sec var \n"\
+"-1.0e21 1.0e21 - trimming limits \n"\
+"0 -option: 0=grid, 1=cross, 2=jackknife \n"\
+"xvk.dat -file with jackknife data \n"\
+"1 2 0 3 0 - columns for X,Y,Z,vr and sec var \n"\
+"3 -debugging level: 0,1,2,3 \n"\
+"./Output/kt3d.dbg -file for debugging output \n"\
+"./Output/kt3d.out -file for kriged output \n"\
+"50 0.5 1 - nx,xmn,xsiz \n"\
+"50 0.5 1 - ny,ymn,ysiz \n"\
+"1 0.5 1 - nz,zmn,zsiz \n"\
+"1 1 1 -x,y and z block discretization \n"\
+"1 8 -min, max data for kriging \n"\
+"0 -max per octant (0-> not used) \n"\
+"20.0 20.0 1.0 -maximum search radii \n"\
+"0.0 0.0 0.0 -angles for search ellipsoid \n"\
+"1 0.613545 -0=SK,1=OK,2=non-st SK,3=exdrift \n"\
+"0 0 0 0 0 0 0 0 0 -drift: x,y,z,xx,yy,zz,xy,xz,zy \n"\
+"0 -0, variable; 1, estimate trend \n"\
+"./Output/extdrift.dat -gridded file with drift/mean \n"\
+"4 - column number in gridded file \n"\
+"2 0.010 - nst, nugget effect (Declustered Variogram Model) \n"\
+"1 0.715 0.0 0.0 0.0 - it,cc,ang1,ang2,ang3 (Declustered Variogram Model) \n"\
+" 10.170 10.170 10.170 - a_hmax,a_hmin,a_vert (Declustered Variogram Model) \n"\
+"1 0.275 0.0 0.0 0.0 - it,cc,ang1,ang2,ang3 (Declustered Variogram Model) \n"\
+" 28.412 28.412 28.412 - a_hmax,a_hmin,a_vert (Declustered Variogram Model) \n"
f1 = open('temp', "w");
f1.write(txt)
f1.close()
! echo temp | KT3D
! rm temp
clear_output()
# Plor kriging estimation
font = {'size' : 8}
matplotlib.rc('font', **font)
fig, axs = plt.subplots(figsize=(10, 3.5), dpi= 200, facecolor='w', edgecolor='k')
# Reading kriging file
txt= './Output/kt3d.out'
ax1=plt.subplot(1,2,1)
plt.scatter(Cluster['Xlocation'], Cluster['Ylocation'],c=Cluster['Primary'],s=25,linewidths=1,edgecolors='k',
cmap='jet',vmin=0, vmax=20)
nx=50; xsiz=1; xmin=0.5; ny=50; ysiz=1; ymin=0.5
xa=np.zeros(nx*ny)
ya=np.zeros(nx*ny)
Var1a=np.zeros((ny,nx))
nxy=nx*ny
iz=0
with open(txt) as f:
vt1=[]
for i in range(4):
next(f)
for line in f:
p = line.split()
vt1.append(float(p[0]))
varkk=[]
kk=0
for j in range(ny):
for i in range(nx):
xa[kk]=xmin+(i)*xsiz
ya[kk]=ymin+(j)*ysiz
index1=i+1 + (j-1+1)*nx
Var1a[j,i]=vt1[index1-1]
kk=kk+1
plt.title('Kriging Estimation with Declustered Variogram',fontsize=8)
im = plt.imshow(Var1a, extent=(xa.min(), xa.max(), ya.min(), ya.max()), cmap='jet',
origin='lower',vmin=0,vmax=10,aspect=0.9)
plt.colorbar(shrink=0.8,label='Primary')
plt.xlabel('Easting (km)',fontsize='9')
plt.ylabel('Northing (km)',fontsize='9')
plt.show()