This work has been published by Mehdi Rezvandehy and Clayton V. Deutsch at Springer, Journal of Natural Resource Research https://doi.org/10.1007/s11053-016-9322-3. Python implementation is presented here for calculating correct histogram uncertainty.
Summary
There is unavoidable uncertainty in the representative histogram required as an input for geostatistical modeling. This uncertainty should be quantified correctly and incorporated into final modeling because it affects resource/reserve estimation, investment and development decisions. This works confirms a correct approach of quantifying histogram uncertainty by comparing to reference uncertainty found by an automated scan-based approach. Similar patterns of a data configuration are found within a large image and the mean of the specified domain is directly calculated. The correct quantification of histogram uncertainty is defined. The spatial bootstrap provides prior uncertainty that does not consider the domain limits and is not conditioned to the data. This uncertainty is updated in geostatistical modeling to consider the limits and data. The resulting uncertainty closely matches the scan based approach.
Python functions and data files needed to run this notebook are available via this link.
import pandas as pd
import numpy as np
import matplotlib
import pylab as plt
from matplotlib import gridspec
from matplotlib.offsetbox import AnchoredText
from IPython.display import clear_output
import geostatspy.GSLIB as GSLIB
import geostatspy.geostats as geostats
from matplotlib.path import Path
from Codes import*
from scipy.stats import norm
from sklearn import preprocessing
import os
os.environ['path']+=";.\\Codes" # All required codes are in this folder
import sys
sys.path.append
sys.path.append('./Codes')
import warnings
warnings.filterwarnings('ignore')
A realistic example is considered for calculating uncertainty in the histogram. The Jura data set was collected by Swiss Federal Institute of Technology at Lausanne. There are 359 data locations for concentration of seven heavy metal in the topsoil (Goovaerts 1997). We decluster the variable nickel (Ni) for this study. First experimental variogram is calculated for major and minor direction of continuity. Then, uncertainty in the histogram of data (mean and variance) is calculated with the spatial bootstrap as prior uncertainty. Finally the prior uncertainty will go through geostatistical simulation for conditioning and clipping to achieve posterior uncertainty that should be correct uncertainty in the histogram of data.
# Looking at the data
Data=GSLIB_View('juraset.dat')
display(Data[0:10])
Data.describe()
Figure below shows location map of porosity data with 359 NI samples (left) and histogram of data (right).
font = {'size' : 9}
matplotlib.rc('font', **font)
fig, axs = plt.subplots(figsize=(10, 5), dpi= 200, facecolor='w', edgecolor='k')
gs = gridspec.GridSpec(1, 2, width_ratios=[1.5, 1], wspace=0.12)
ax1=plt.subplot(gs[0])
plt.scatter(Data['X'], Data['Y'],c=Data['Ni'],s=65,linewidths=1,edgecolors='k',
cmap='jet',vmin=0, vmax=35)
plt.colorbar(shrink=0.8)
plt.xlabel('Easting (km)',fontsize='12')
plt.ylabel('Northing (km)',fontsize='12')
plt.title('Location Map of Nickel',fontsize=12)
plt.xlim(0,5.2)
plt.ylim(0,6)
ax1=plt.subplot(gs[1])
plt.xlabel('Ni',fontsize='10')
plt.ylabel('Frequency',fontsize='10')
plt.hist(Data['Ni'], bins=15,ec='black')
n=len(Data['Ni'])
Mean=np.mean(Data['Ni'])
SD=np.sqrt(np.var(Data['Ni']))
Max=np.amax(Data['Ni'])
Min=np.amin(Data['Ni'])
plt.title('Histogram of Nickel',fontsize=12)
plt.gca().set_aspect('0.7')
txt='n=%.0f \n $\mu$=%.1f \n $\sigma$=%.1f \n Max=%.1f \n Min=%.1f'
anchored_text = AnchoredText(txt %(n,Mean,SD,Max,Min), loc=1)
ax1.add_artist(anchored_text)
plt.show()
#Calculate Experimental Variogram and fitted a Model
#Normal Score Transformation
ns, vr, wt_ns=geostats.nscore(Data, vcol='Ni', wcol=None, ismooth=False, dfsmooth=None, smcol=0, smwcol=0)
#Experimental Variogram with Nromal Score and then fit a theoretical model #############
Data['NS_Ni']=ns
#Azimith 0: Experimental Variogram
tmin=-100;tmax=10000;xlag=0.15;xltol=0.07;nlag=10;azm=0;atol=20;bandwh=20;isill=1
Lag1, Var1, npp_az0=geostats.gamv(Data,'X', 'Y','NS_Ni',tmin,tmax,xlag,xltol,nlag,azm,atol,bandwh,isill)
#Azimith 0: Fitted variogram Model Parameters
nlag=200; xlag=0.01
vario={'nst':2,'nug':0.1, 'cc1':0.45,'it1':1,'azi1':0,'hmaj1':0.35,'hmin1':.35,
'cc2':0.44,'it2':1,'azi2':0,'hmaj2':1,'hmin2':1}
index_az0,Lag1_fit,Var1_fit,cov_az0,ro_az0=geostats.vmodel(nlag, xlag, azm, vario)
#Azimith 90: Experimental Variogram
tmin=-100;tmax=100;xlag=0.15;xltol=0.07;nlag=10;azm=90;atol=20;bandwh=50;isill=1
Lag2, Var2, npp_az90=geostats.gamv(Data,'X', 'Y','NS_Ni',tmin,tmax,xlag,xltol,nlag,azm,atol,bandwh,isill)
#Fitted Model: Fitted variogram Model Parameters
nlag=200; xlag=0.01
vario={'nst':2,'nug':0.1, 'cc1':0.45,'it1':1,'azi1':0,'hmaj1':0.8,'hmin1':.8,
'cc2':0.44,'it2':1,'azi2':0,'hmaj2':1.5,'hmin2':1.5}
index_az90,Lag2_fit,Var2_fit,cov_az90,ro_az90=geostats.vmodel(nlag, xlag, azm, vario)
clear_output()
font = {'size' : 12}
matplotlib.rc('font', **font)
fig, axs = plt.subplots(figsize=(12,12), dpi= 200, facecolor='w', edgecolor='k')
fig.subplots_adjust(hspace = .05, wspace=0.25)
#
#Label and Title of Variogram Plot
label1='Azimuth 0 ' ;symbol1='--or';label1_fit='Fitted Model ';symbol1_fit='-r'
label2='Azimuth 90 ';symbol2='--sb';label2_fit='Fitted Model ';symbol2_fit='-b'
xlabl='Distance (km)'; xlim=[0,1.5]; vlim=[0,1.38]; sill=1;loc=4
ax1=plt.subplot(1,2,1)
#Plot Variograms for Azimith 0
Lag1=np.delete(Lag1, np.where(Var1==0))
Var1=np.delete(Var1, np.where(Var1==0))
plt.plot(Lag1, Var1,symbol1,linewidth=2,markersize=7,label=label1)
plt.plot(Lag1_fit, Var1_fit,symbol1_fit,linewidth=2.5,label=label1_fit)
plt.legend(fontsize=13,loc=loc,frameon=False)
plt.gca().spines['right'].set_visible(False)
plt.gca().spines['top'].set_visible(False)
plt.gca().set_aspect('0.8')
plt.axhline(y=sill,linewidth=2,color='k',linestyle='--')
title='Variogram and Fitted Model_Azimuth 0'
plt.title(title,fontsize=15)
plt.xlabel(xlabl,fontsize=15)
ylabl='$\gamma (\mathbf{h}) $'
plt.ylabel(ylabl,rotation=0,fontsize=17,labelpad=15)
plt.xlim(float(xlim[0]),float(xlim[1]))
plt.ylim(float(vlim[0]),float(vlim[1]))
ax1=plt.subplot(1,2,2)
#Plot Variogramz for Azimith 90
Lag2=np.delete(Lag2, np.where(Var2==0))
Var2=np.delete(Var2, np.where(Var2==0))
plt.plot(Lag2, Var2,symbol2,linewidth=2,markersize=7,label=label2)
plt.plot(Lag2_fit, Var2_fit,symbol2_fit,linewidth=2.5,label=label2_fit)
plt.legend(fontsize=13,loc=loc,frameon=False)
plt.gca().spines['right'].set_visible(False)
plt.gca().spines['top'].set_visible(False)
plt.gca().set_aspect('0.8')
plt.axhline(y=sill,linewidth=2,color='k',linestyle='--')
title='Variogram and Fitted Model_Azimuth 90'
plt.title(title,fontsize=15)
plt.xlabel(xlabl,fontsize=15)
ylabl='$\gamma (\mathbf{h}) $'
plt.ylabel(ylabl,rotation=0,fontsize=17,labelpad=15)
plt.xlim(float(xlim[0]),float(xlim[1]))
plt.ylim(float(vlim[0]),float(vlim[1]))
clear_output()
plt.show()
x=Data['X']; y=Data['Y']; var=Data['Ni']; seed=256; nsim=100
variog={'nst':2,'nug':0.1, 'cc1':0.45,'it1':1,'azi1':90,'hmaj1':0.8,'hmin1':.35,
'cc2':0.44,'it2':1,'azi2':90,'hmaj2':1.5,'hmin2':1.0}
outfl="./Spatial Bootstrap/SB"
#
yvec_r, mean=Spatial_Bootstrap(x, y, var, variog, seed, nsim,outfl)
var1=yvec_r;bins=100;label2='Data Distribution';label1='Spatial Bootstrap (SB)';linewidth='0.8'
title=' 100 Histograms of Spatial Bootstrap (SB)';xlabl='Nickel';ylabl='Cumulative Distribution Function';loc=4
font = {'size' :7.7 }
#
matplotlib.rc('font', **font)
fig=plt.figure(figsize=(8, 3), dpi= 200, facecolor='w', edgecolor='k')
ax1 = plt.subplot(1,2,1)
cdf_real(var,var1,nsim,bins,label1,label2,linewidth,title,xlabl,ylabl,xlimt=(3,45),ylimt=(0,1),loc=loc,axt=ax1)
ax2 = plt.subplot(1,2,2)
histplt (mean,15,title='Prior Uncertainty in the Mean',xlabl='Mean of Nickel',xlimt=(16.5,23),ylimt=None,axt=ax2)
#Call SGSIM GSLIB code in Python
# Parameter file of SGS
for isim in range(nsim):
txt=" Parameters for SGSIM \n"\
+" ********************** \n"\
+"START OF PARAMETERS:\n"\
+"juraset.dat -file with data\n"\
+"1 2 0 10 0 0 - columns for X,Y,Z,vr,wt,sec.var.\n"\
+"-998.0 1.0e21 - trimming limits\n"\
+"1 -transform the data (0=no, 1=yes)\n"\
+"./Simulation/sgsim.trn - file for output trans table\n"\
+"0 - consider ref. dist (0=no, 1=yes)\n"\
+"./Spatial Bootstrap/SB_"+str(isim+1)+" - file with ref. dist distribution\n"\
+"1 0 - columns for vr and wt\n"\
+str(min(var))+" "+str(max(var))+" - zmin,zmax (for tail extrapolation)\n"\
+"1 4.0 - lower tail option (1=linear), parameter\n"\
+"1 11.5 - upper tail option (1=linear), parameter\n"\
+"1 -debugging level: 0,1,2,3\n"\
+"./Simulation/sgsim.dbg -file for debugging output\n"\
+"./Simulation/sgsim_"+str(isim+1)+" -file for simulation output\n"\
+"1 -number of realizations to generate\n"\
+"50 0.05 0.1 -nx,xmn,xsiz\n"\
+"60 0.05 0.1 -ny,ymn,ysiz\n"\
+"1 1 1 -nz,zmn,zsiz\n"\
+str(10523+isim)+" -random number seed\n"\
+"0 24 -min and max original data for sim\n"\
+"24 -number of simulated nodes to use\n"\
+"1 -assign data to nodes (0=no, 1=yes)\n"\
+"1 3 -multiple grid search (0=no, 1=yes),num\n"\
+"0 -maximum data per octant (0=not used)\n"\
+"1.2 0.9 1.0 -maximum search radii\n"\
+"90 0.0 0.0 -angles for search ellipsoid\n"\
+"0 0.60 1.0 -ktype: 0=SK,1=OK,2=LVM,3=EXDR,4=COLC, corr and VRF\n"\
+"lvmfl.dat - file with LVM, EXDR, or COLC variable\n"\
+"1 - column for secondary variable\n"\
+"2 0.11 -nst, nugget effect\n"\
+"1 0.45 90.0 0.0 0.0 -it,cc,ang1,ang2,ang3\n"\
+" 0.8 0.35 1.0 -a_hmax, a_hmin, a_vert\n"\
+"1 0.44 90.0 1.0 0.0 -it,cc,ang1,ang2,ang3\n"\
+" 1.5 1.0 5.0 -a_hmax, a_hmin, a_vert \n"
#
f1 = open('temp', "w");
f1.write(txt)
f1.close()
! echo temp | sgsim
! rm temp
clear_output()
font = {'size' :11 }
#
matplotlib.rc('font', **font)
fig=plt.figure(figsize=(12, 8), dpi= 200, facecolor='w', edgecolor='k')
# Clip
path=(1,0.1),(0.2,0.1),(0.2,2.7),(0.75,2.7),\
(0.75,3.5),(1,3.5),(1,4),(1.5,4),(1.5,4.5),(2,4.5),(2,5),(2.5,5),(2.5,6),\
(4.5,6),(4.5,5.5),(4.8,5.5),(4.8,5),(5,5),(5,4.2),(5.3,4.2),(5.3,0.75),\
(5,0.75),(5,0),(4.5,0.2),(3.5,0.2),(3.5,0.1),(1.2,0.1),(1.2,0.3),(1,0.1)
path1 = np.asarray(path)
path2 = Path(path1)
patch = PathPatch(path2, facecolor='none',linewidth=1.5)
#
sim=[]; mean_CS=[]
for i in range(nsim):
df_sim=GSLIB.GSLIB2Dataframe('./Simulation/sgsim_'+str(i+1))
sim.append(df_sim.to_numpy())
mean_CS.append(np.mean(df_sim.to_numpy()))
#
nsim_=6;nx=50; xmn=0.05;xsiz=0.1;ny=60; ymn=0.05;ysiz=0.1
for isim in range(nsim_):
if(isim<6):
ax1=plt.subplot(2,3,isim+1)
plt.scatter(Data['X'], Data['Y'],c=Data['Ni'],s=25,linewidths=1,edgecolors='k',
cmap='jet',vmin=0, vmax=35)
plt.xlabel('Easting (km)',fontsize='11')
plt.ylabel('Northing (km)',fontsize='11')
plt.title('Realization# '+str(isim+1),fontsize=12)
sim_clip=np.zeros((ny,nx))
for j in range(ny):
for i in range(nx):
xa=xmn+(i)*xsiz; ya=ymn+(j)*ysiz
inside=path2.contains_points([(xa,ya)])
index=i+1 + (j-1+1)*nx
if(inside[0]):
sim_clip[j,i]=sim[isim][index-1]
else:
sim_clip[j,i]=np.NAN
im = plt.imshow(sim_clip, extent=(xmn+xsiz/2, xsiz*nx+xmn, ymn+ysiz/2, ysiz*ny+ymn), cmap='jet',
origin='lower',interpolation='nearest',vmin=0, vmax=35)
plt.xlim(0,5.2)
plt.ylim(0,6)
plt.colorbar(shrink=0.8)
plt.show()
var1=sim;bins=100;label2='Data Distribution';label1='Conditional Simulation (CS)';linewidth='0.8'
title=' 100 Histograms of Conditional Simulation (CS)';xlabl='Nickel';ylabl='Cumulative Distribution Function';loc=4
font = {'size' :7.7 }
#
matplotlib.rc('font', **font)
fig=plt.figure(figsize=(8, 3), dpi= 200, facecolor='w', edgecolor='k')
ax1 = plt.subplot(1,2,1)
cdf_real(var,var1,nsim,bins,label1,label2,linewidth,title,xlabl,ylabl,xlimt=(3,45),ylimt=(0,1),loc=loc,axt=ax1)
ax2 = plt.subplot(1,2,2)
histplt (mean_CS,10,title='Histogram of Mean (CS)',xlabl='Mean of Nickel',xlimt=(16.5,23),ylimt=None,axt=ax2)
#Call SGSIM GSLIB code in Python for variogram declustering
# Parameter file of SGS
for isim in range(nsim):
txt=" Parameters for SGSIM \n"\
+" ********************** \n"\
+"START OF PARAMETERS:\n"\
+"juraset.dat -file with data\n"\
+"1 2 0 10 0 0 - columns for X,Y,Z,vr,wt,sec.var.\n"\
+"-998.0 1.0e21 - trimming limits\n"\
+"1 -transform the data (0=no, 1=yes)\n"\
+"./Simulation/sgsim.trn - file for output trans table\n"\
+"1 - consider ref. dist (0=no, 1=yes)\n"\
+"./Spatial Bootstrap/SB_"+str(isim+1)+" - file with ref. dist distribution\n"\
+"1 0 - columns for vr and wt\n"\
+str(min(var))+" "+str(max(var))+" - zmin,zmax (for tail extrapolation)\n"\
+"1 4.0 - lower tail option (1=linear), parameter\n"\
+"1 11.5 - upper tail option (1=linear), parameter\n"\
+"1 -debugging level: 0,1,2,3\n"\
+"./Simulation/sgsim.dbg -file for debugging output\n"\
+"./Simulation/sgsim_"+str(isim+1)+" -file for simulation output\n"\
+"1 -number of realizations to generate\n"\
+"50 0.05 0.1 -nx,xmn,xsiz\n"\
+"60 0.05 0.1 -ny,ymn,ysiz\n"\
+"1 1 1 -nz,zmn,zsiz\n"\
+str(10520+isim)+" -random number seed\n"\
+"0 24 -min and max original data for sim\n"\
+"24 -number of simulated nodes to use\n"\
+"1 -assign data to nodes (0=no, 1=yes)\n"\
+"1 3 -multiple grid search (0=no, 1=yes),num\n"\
+"0 -maximum data per octant (0=not used)\n"\
+"1.2 0.9 1.0 -maximum search radii\n"\
+"90 0.0 0.0 -angles for search ellipsoid\n"\
+"0 0.60 1.0 -ktype: 0=SK,1=OK,2=LVM,3=EXDR,4=COLC, corr and VRF\n"\
+"lvmfl.dat - file with LVM, EXDR, or COLC variable\n"\
+"1 - column for secondary variable\n"\
+"2 0.11 -nst, nugget effect\n"\
+"1 0.45 90.0 0.0 0.0 -it,cc,ang1,ang2,ang3\n"\
+" 0.8 0.35 1.0 -a_hmax, a_hmin, a_vert\n"\
+"1 0.44 90.0 1.0 0.0 -it,cc,ang1,ang2,ang3\n"\
+" 1.5 1.0 5.0 -a_hmax, a_hmin, a_vert \n"
#
f1 = open('temp', "w");
f1.write(txt)
f1.close()
! echo temp | sgsim
! rm temp
clear_output()
font = {'size' :11 }
#
matplotlib.rc('font', **font)
fig=plt.figure(figsize=(12, 8), dpi= 200, facecolor='w', edgecolor='k')
# Clip simulation
path=(1,0.1),(0.2,0.1),(0.2,2.7),(0.75,2.7),\
(0.75,3.5),(1,3.5),(1,4),(1.5,4),(1.5,4.5),(2,4.5),(2,5),(2.5,5),(2.5,6),\
(4.5,6),(4.5,5.5),(4.8,5.5),(4.8,5),(5,5),(5,4.2),(5.3,4.2),(5.3,0.75),\
(5,0.75),(5,0),(4.5,0.2),(3.5,0.2),(3.5,0.1),(1.2,0.1),(1.2,0.3),(1,0.1)
path1 = np.asarray(path)
path2 = Path(path1)
patch = PathPatch(path2, facecolor='none',linewidth=1.5)
#
sim=[]; mean_pos=[]
for i in range(nsim):
df_sim=GSLIB.GSLIB2Dataframe('./Simulation/sgsim_'+str(i+1))
sim.append(df_sim.to_numpy())
mean_pos.append(np.mean(df_sim.to_numpy()))
#
nsim_=6;nx=50; xmn=0.05;xsiz=0.1;ny=60; ymn=0.05;ysiz=0.1
for isim in range(nsim_):
if(isim<6):
ax1=plt.subplot(2,3,isim+1)
plt.scatter(Data['X'], Data['Y'],c=Data['Ni'],s=25,linewidths=1,edgecolors='k',
cmap='jet',vmin=0, vmax=35)
plt.xlabel('Easting (km)',fontsize='11')
plt.ylabel('Northing (km)',fontsize='11')
plt.title('Realization# '+str(isim+1),fontsize=12)
sim_clip=np.zeros((ny,nx))
for j in range(ny):
for i in range(nx):
xa=xmn+(i)*xsiz; ya=ymn+(j)*ysiz
inside=path2.contains_points([(xa,ya)])
index=i+1 + (j-1+1)*nx
if(inside[0]):
sim_clip[j,i]=sim[isim][index-1]
else:
sim_clip[j,i]=np.NAN
im = plt.imshow(sim_clip, extent=(xmn+xsiz/2, xsiz*nx+xmn, ymn+ysiz/2, ysiz*ny+ymn), cmap='jet',
origin='lower',interpolation='nearest',vmin=0, vmax=35)
plt.xlim(0,5.2)
plt.ylim(0,6)
plt.colorbar(shrink=0.8)
plt.show()
var1=sim;bins=100;label2='Data Distribution';label1='Posterior Uncertainy';linewidth='0.8'
title=' 100 Histograms of Posterior Uncertainy';xlabl='Nickel';ylabl='Cumulative Distribution Function';loc=4
font = {'size' :7.7 }
#
matplotlib.rc('font', **font)
fig=plt.figure(figsize=(8, 3), dpi= 200, facecolor='w', edgecolor='k')
ax1 = plt.subplot(1,2,1)
cdf_real(var,var1,nsim,bins,label1,label2,linewidth,title,xlabl,ylabl,xlimt=(3,45),ylimt=(0,1),loc=loc,axt=ax1)
ax2 = plt.subplot(1,2,2)
histplt (mean_pos,10,title='Posterior Uncertainty in the Mean',xlabl='Mean of Nickel',xlimt=(16.5,23),ylimt=None,axt=ax2)
font = {'size' :7.7 }
matplotlib.rc('font', **font)
fig=plt.figure(figsize=(7, 4), dpi= 200, facecolor='w', edgecolor='k')
# Uncertainty in the Mean with Spatial Bootstrap (Prior Uncertainty)
data = mean
mu, std = norm.fit(data)
# Plot the PDF.
xmin, xmax = min(data),max(data)
x = np.linspace(xmin-2, xmax+5, 1000)
p = norm.pdf(x, mu, std)
plt.plot(x, p, 'r', linewidth=2,label='Prior Uncertainty, '+ 'Variance='+str(np.round(std**2,2)))
# Uncertainty in the Mean with Conditional Simulation
data = mean_CS
mu, std = norm.fit(data)
# Plot the PDF.
xmin, xmax = min(data),max(data)
x = np.linspace(xmin-2, xmax+5, 1000)
p = norm.pdf(x, mu, std)
plt.plot(x, p, 'b', linewidth=2,label='Conditional Simulation, '+ 'Variance='+str(np.round(std**2,2)))
# Uncertainty in the Mean by Conditional Simulation and Bootstrap (Posterior Uncertainty)
data = mean_pos
mu, std = norm.fit(data)
# Plot the PDF.
xmin, xmax = min(data),max(data)
x = np.linspace(xmin-2, xmax+5, 1000)
p = norm.pdf(x, mu, std)
plt.plot(x, p, 'g', linewidth=4,label='Posterior, '+ 'Variance='+str(np.round(std**2,2)))
plt.xlim((15.5,25))
plt.ylim(0, 0.8)
plt.xlabel('Means',fontsize='12')
plt.title('Uncertainty in the Mean with Different Approaches',fontsize='11')
#plt.legend(fontsize='9',framealpha =1, loc=1,bbox_to_anchor=(0.75, -0.1))
plt.legend(fontsize='8',framealpha =1, loc=2)
plt.show()