Shen, Z. , Tang, Y., Li, X., Gao, Y.,(2021) On the localization in strongly coupled ensemble data assimilation using a two-scale Lorenz model, Earth and Space Science
耦合资料同化具有非常大的潜力来为气候预报提供初始条件,减少初始化冲击,改善气候预报的水平。在耦合模式的资料同化中,观测资料被同化到一个或多个模型组件中,并且直接(通过同化)或者间接(通过耦合模式积分)地在不同组件之间交换信息。根据耦合模式不同组件之间信息传递的不同方式,耦合资料同化可以分为两种:强耦合同化(SCDA)和弱耦合同化(WCDA)。在弱耦合同化中,分析增量分别在各模型组件中被计算和应用。与之相反,强耦合同化使用全耦合的误差协方差来计算观测增量,并应用到整个耦合模式的所有模式组件中。强耦合同化可以充分利用观测信息更新模式,并且有助于提供动力协调的耦合模式预报初值,具有非常大的应用潜力。但是当前的强耦合同化方法还不完善,整体上还处于研究阶段,距离业务化还有很远的距离。
耦合模式的不同成分(如海洋、大气)具有不同的时空尺度,因此用于跨成分更新的耦合协方差矩阵较难估计,往往需要使用非常巨大的集合才能使得强耦合的效果超过弱耦合,这也强耦合同化的主要技术挑战之一。本文提出了一种通过耦合协方差的局地化来改进强耦合同化的方法。为了抑制因集合成员数量不足而造成的远距离虚假相关,局地化是集合卡尔曼滤波器同化中的重要方法。为了处理不同耦合成分的不同时空尺度,本文发展了新的跨耦合成分协方差的局地化公式。基于使用理想的多尺度洛伦茨96模式,本文开展了孪生实验,验证了新的局地化方法在于改进强耦合同化效果方面的功效。
整个实验的脚本和结果如下:
实验使用的是多尺度洛伦茨96模式
\begin{equation} \frac{dX_k}{dt}=X_{k-1} (X_{k+1}-X_{k-2} )-X_k+F-\frac{hc}{b}\sum\limits_{j=1}^{J} Z_{j,k},\\ \frac{dZ_{j,k}}{dt}=cbZ_{j+1,k} (Z_{j-1,k}-Z_{j+2,k} )-cZ_{j,k}+\frac{hc}{b} X_k, \end{equation}方程的两个变量X和Z分别对应大尺度和小尺度的大气变量,通过耦合项耦合在一起。ODE使用RK-45格式进行求解,对应模式积分。 参数方面:K代表X变量数目,也就是将一个维度环K等分的区域平均值;J代表每个K区域内含有J个子区域,也就是一个X与J个Z变量耦合在一起。F是外强迫,c和b是时空尺度比,h是耦合强度。这里使用默认数值如下:
# In[model_def]
import numpy as np;
class msL96_para:
name = 'multi-scale Lorenz 96 model parameter'
K = 36; J = 10; F = 8;
c = 10; b = 10; h = 1
def Lmodel_rhs(x,Force):
dx = (np.roll(x,-1)-np.roll(x,2))*np.roll(x,1)-x + Force
return dx
def Smodel_rhs(z,Force):
Para = msL96_para()
c = Para.c; b = Para.b
dz = c*b*(np.roll(z,1)-np.roll(z,-2))*np.roll(z,-1)-c*z + Force
return dz
def msL96_rhs(Y):
Para = msL96_para()
K = Para.K; J = Para.J
c = Para.c; b = Para.b; h = Para.h;
X = Y[range(K)]
Z = Y[range(K,len(Y))]
#
SumZ = np.sum(np.reshape(Z,(K,J)),axis=1)
forcing_X = Para.F - h*c/b*SumZ
dX = Lmodel_rhs(X,forcing_X)
forcing_Z = h*c/b*np.kron(X,np.ones(J))
dZ = Smodel_rhs(Z,forcing_Z)
dY = np.concatenate((dX,dZ),axis=0)
return dY
def RK45(x,func,h):
#
K1=func(x);
K2=func(x+h/2*K1);
K3=func(x+h/2*K2);
K4=func(x+h*K3);
x1=x+h/6*(K1+2*K2+2*K3+K4);
return x1
def msL96_adv_1step(x0,delta_t):
#
x1=RK45(x0,msL96_rhs,delta_t)
return x1
包括同化时长,X和Z变量的不同同化频率,特别地,小尺度的Z变量每隔若干个(这里是2个)被观测到一个,这在线性观测算子上体现。
# In[expr setting]: Data assimilation parameters
assim_period_steps = 8000;
assimS_every_n_steps = 5 # data assimilation every 3h (5 steps)
assimL_every_n_steps = 40; # data assimilation every 1d (40 steps)
obs_every_n_Svars = 2; # 1 of every 2 S-model variable can be observed
Para = msL96_para()
K = Para.K
J = Para.J
H_matL = np.eye(K);
H_matS = np.eye(K*J)
H_matS = H_matS[0:K*J:obs_every_n_Svars,:];
def H_op(x,H_mat):
y = np.dot(H_mat,np.transpose(x))
return y
H_all = np.eye(K*J+K)
H_useLobs=H_all[range(K)]
H_useSobs=H_all[K:(K*J+K):obs_every_n_Svars]
case_dir = "/media/zqshen/Samsung_T5/EAKFmsL96pythonDATA&result/EAKF_python_CDA/"
import os
# In[spinup]: spin up
delta_t=0.005;
Para = msL96_para()
K = Para.K
J = Para.J
if not os.path.exists(case_dir+'DATA/spinup_data.npz'):
x0spinup=np.random.randn(K+K*J);
t=0;
spinup_steps = 14600;
x1 = x0spinup;
for t in range(spinup_steps):
x2=msL96_adv_1step(x1,delta_t);
x1=x2;
x0true = x2;
del x1
del x2
print('finish spinup and save data');
np.savez(case_dir+'DATA/spinup_data.npz',x0=x0true,dt=delta_t);
else:
SPDT = np.load(case_dir+'DATA/spinup_data.npz');
x0true = SPDT['x0']
delta_t = SPDT['dt']
print('load x0ture and delta_t');
使用spin-up获取的初值作为真实场的初值,积分得到真实场用以结果的检验。观测通过在观测时间和观测点上增加高斯分布的观测误差来模拟。针对不同尺度的模式,使用不同的观测误差标准差,即针对两个耦合成分的长期标准差乘上一个系数获得。
# In[truth&obs]
### generate the truth and observation
if not os.path.exists(case_dir+'DATA/true_data.npz'):
XtrueL = np.zeros([assim_period_steps,K])
XtrueS = np.zeros([assim_period_steps,K*J])
OBS_L = np.zeros([assim_period_steps//assimL_every_n_steps,K])
OBS_S = np.zeros([assim_period_steps//assimS_every_n_steps,K*J//obs_every_n_Svars]);
x1 = x0true;
for t in range(assim_period_steps):
x2=msL96_adv_1step(x1,delta_t);
XtrueL[t]=x2[range(K)];
XtrueS[t]=x2[range(K,len(x2))]
if t%assimS_every_n_steps==0:
OBS_S[t//assimS_every_n_steps]=H_op(XtrueS[t],H_matS);
if t%assimL_every_n_steps==0:
OBS_L[t//assimL_every_n_steps]=H_op(XtrueL[t],H_matL);
x1=x2;
del x1
del x2
STD_L = np.std(XtrueL,axis=0); err_L = 0.3*np.mean(STD_L)
STD_S = np.std(XtrueS,axis=0); err_S = 0.3*np.mean(STD_S)
OBS_L = OBS_L + err_L*np.random.randn(len(OBS_L),len(OBS_L[0]))
OBS_S = OBS_S + err_S*np.random.randn(len(OBS_S),len(OBS_S[0]))
print('Experimental data generated')
np.savez(case_dir+'DATA/true_data.npz',x0=x0true,dt=delta_t,Ltrue=XtrueL,Strue=XtrueS,Lobs=OBS_L,Sobs=OBS_S,Lstd=err_L,Sstd=err_S);
else:
TRDT = np.load(case_dir+'DATA/true_data.npz')
x0true = TRDT['x0']
delta_t = TRDT['dt']
XtrueL = TRDT["Ltrue"]
XtrueS = TRDT["Strue"]
OBS_L = TRDT["Lobs"]
OBS_S = TRDT["Sobs"]
err_L = TRDT["Lstd"]
err_L = err_L**2
err_S = TRDT["Sstd"]
err_S = err_S**2 # std to var
print('load truth and observation data')
pltcmap = plt.cm.rainbow
plt.figure(figsize=(10,5))
grid = plt.GridSpec(2, 6, wspace=0.1, hspace=0.2)
plt.subplot(grid[0,0:5])
[xx,yy]=np.meshgrid(np.arange(0,8000)*delta_t,range(10,370,10))
plt.pcolormesh(xx,yy,XtrueL[range(0,8000),:].T,cmap=pltcmap)
plt.xticks(np.arange(0,8500,2000)*delta_t,[])
plt.yticks(np.arange(0,420,120),fontsize=14)
plt.clim(-10,10)
plt.title('True States',fontsize=15)
plt.ylabel('Longitude',fontsize=14)
ax = plt.gca()
ax.invert_yaxis()
cb=plt.colorbar(shrink=0.8)
cb.ax.tick_params(labelsize='small')
plt.subplot(grid[1,0:5])
[xxx,yyy] = np.meshgrid(np.arange(0,8000)*delta_t,range(1,361))
plt.pcolormesh(xxx,yyy,XtrueS[range(0,8000),:].T,cmap=pltcmap)
plt.xticks(np.arange(0,8500,2000)*delta_t,fontsize=14)
plt.yticks(np.arange(0,420,120),fontsize=14)
plt.clim(-0.5,0.5)
plt.ylabel('Longitude',fontsize=14)
ax = plt.gca()
ax.invert_yaxis()
cb = plt.colorbar(shrink=0.8)
cb.ax.tick_params(labelsize='small')
plt.xlabel('MTU',fontsize=14)
plt.subplot(grid[0,5])
plt.plot(STD_L,range(10,370,10),'b',lw=2)
plt.yticks(np.arange(120,420,120),[])
plt.ylabel('X',fontsize=14)
plt.xticks([0,2.7,5],fontsize=14);plt.grid(axis='x')
plt.xlim(0,5);plt.ylim(10,360)
plt.title('climatological \n standard deviations')
ax = plt.gca()
ax.invert_yaxis()
plt.subplot(grid[1,5])
plt.plot(STD_S,range(1,361),'b',lw=.5)
plt.yticks(np.arange(0,420,120),[])
plt.xlim(0,0.4);plt.ylim(1,360)
plt.xticks([0,0.2,0.4],fontsize=14);plt.grid(axis='x')
plt.ylabel('Z',fontsize=14)
ax = plt.gca()
ax.invert_yaxis()
从上图的真实场结果中很容易看到多尺度。X变量的分辨率较低,时间变化较慢。 此外,根据模型方程,X变量倾向于向西传播,Z变量倾向于向东传播,相速度较大。由于等式2中的耦合项,Z变量被限制在具有大X值的区域,并且在离开这些区域时迅速消失。方程1中的耦合项也影响X变量的振幅。X模型和Z模型的相互作用产生了X模型的几个活跃区域,这些活跃区域的变化更为剧烈,如Lorenz在他的原著中所示。
# In[def_singlemodel]:
def Lmodel_adv_1step(x0,delta_t,XtrueS,t):
Para = msL96_para()
K = Para.K; J = Para.J
c = Para.c; b = Para.b; h = Para.h;
# use truth as forcing
SumZ = np.sum(np.reshape(XtrueS[t],(K,J)),axis=1)
forcing_X = Para.F - h*c/b*SumZ
Lmodel_rhs_noncp = lambda x: Lmodel_rhs(x,forcing_X)
x1=RK45(x0,Lmodel_rhs_noncp,delta_t)
return x1
def Smodel_adv_1step(x0,delta_t,XtrueL,t):
Para = msL96_para()
J = Para.J
c = Para.c; b = Para.b; h = Para.h;
#
forcing_Z = h*c/b*np.kron(XtrueL[t],np.ones(J))
Smodel_rhs_noncp = lambda x: Smodel_rhs(x,forcing_Z)
x1=RK45(x0,Smodel_rhs_noncp,delta_t)
return x1
其中的EAKF算法和自适应inflation来自于Jeffery Anderson(2003,2011)
EAKF算法使用局地化来避免虚假遥相关,即在增加观测增量的时候乘以一个因子$\rho$ \begin{equation}\label{eakf_update} x_{m,n} = x_{m,n}^{(p)}+\rho*\Delta x_{m,n}, n=1,\ldots,N \end{equation}
$\rho$使用Gaspri-Cohn公式计算,如下
\begin{equation}\label{gcfun} \rho=\Omega (d,c)=\left\{ \begin{array}{ll} -\frac{1}{4}(\frac{d}{c})^5+\frac{1}{2}(\frac{d}{c})^4+\frac{5}{8}(\frac{d}{c})^3-\frac{5}{3}(\frac{d}{c})^2+1, & 0\leq d\leq 2c; \\ \frac{1}{12}(\frac{d}{c})^5-\frac{1}{2}(\frac{d}{c})^4+\frac{5}{8}(\frac{d}{c})^3+\frac{5}{3}(\frac{d}{c})^2-5(\frac{d}{c})-\frac{2}{3}(\frac{d}{c})^{-1}, & c\leq d\leq 2c; \\ 0, & d\geq 2c \end{array} \right. \end{equation}其中的参数c用于控制截断长度,d是观测点到模式更新点之间的距离
# localization factor
def comp_cov_factor(z_in,c):
z=abs(z_in);
if z<=c:
r = z/c;
cov_factor=((( -0.25*r +0.5)*r +0.625)*r -5.0/3.0)*r**2 + 1.0;
elif z>=c*2.0:
cov_factor=0.0;
else:
r = z / c;
cov_factor = ((((r/12.0 -0.5)*r +0.625)*r +5.0/3.0)*r -5.0)*r + 4.0 - 2.0 / (3.0 * r);
return cov_factor;
# observation increment for the observation site
def obs_increment_eakf(ensemble, observation, obs_error_var):
prior_mean = np.mean(ensemble);
prior_var = np.var(ensemble);
if prior_var >0.001:
post_var = 1.0 / (1.0 / prior_var + 1.0 / obs_error_var);
post_mean = post_var * (prior_mean / prior_var + observation / obs_error_var);
else:
post_var = prior_var;
post_mean = prior_mean;
updated_ensemble = ensemble - prior_mean + post_mean;
var_ratio = post_var / prior_var;
updated_ensemble = np.sqrt(var_ratio) * (updated_ensemble - post_mean) + post_mean;
obs_increments = updated_ensemble - ensemble;
return obs_increments
# regression the obs_inc to model grid corresponds state_ens
def get_state_increments(state_ens, obs_ens, obs_incs):
covar = np.cov(state_ens, obs_ens);
state_incs = obs_incs * covar[0,1]/covar[1,1];
return state_incs
# the following for adaptive inflation
def compute_new_density(dist_2, sigma_p_2, sigma_o_2, lambda_mean, lambda_sd, gamma, lambda_in):
exponent_prior = - 0.5 * (lambda_in - lambda_mean)**2 / lambda_sd**2;
# Compute probability that observation would have been observed given this lambda
theta_2 = (1.0 + gamma * (np.sqrt(lambda_in) - 1.0))**2 * sigma_p_2 + sigma_o_2;
theta = np.sqrt(theta_2);
exponent_likelihood = dist_2 / ( -2.0 * theta_2);
# Compute the updated probability density for lambda
# Have 1 / sqrt(2 PI) twice, so product is 1 / (2 PI)
density = np.exp(exponent_likelihood + exponent_prior) / (2.0 * np.pi * lambda_sd * theta);
return density
def update_inflate(x, sigma_p_2, obs, sigma_o_2, inflate_prior_val, lambda_mean, gamma,lambda_sd):
lambda_mean_LB = 1.0
lambda_mean_UB = 1.3
lambda_sd_LB = 0.5
# FIRST, update the inflation mean:
# Get the "non-inflated" variance of the sample
# lambda here, is the prior value before the update.
sigma_p_2 = sigma_p_2/(1+gamma*np.sqrt(inflate_prior_val)-1)**2
dist_2 = (x - obs)**2 # Squared-innovation
theta_bar_2 = ( 1 + gamma * (np.sqrt(lambda_mean) - 1) )**2 * sigma_p_2 + sigma_o_2;
theta_bar = np.sqrt(theta_bar_2);
u_bar = 1 / (np.sqrt(2 * np.pi) * theta_bar);
like_exp_bar = - 0.5 * dist_2 / theta_bar_2;
v_bar = np.exp(like_exp_bar);
gamma_terms = 1 - gamma + gamma*np.sqrt(lambda_mean);
dtheta_dinf = 0.5 * sigma_p_2 * gamma * gamma_terms / (theta_bar * np.sqrt(lambda_mean));
like_bar = u_bar * v_bar;
like_prime = (like_bar * dtheta_dinf / theta_bar) * (dist_2 / theta_bar_2 - 1);
like_ratio = like_bar / like_prime;
# Solve a quadratic equation
a = 1;
b = like_ratio - 2*lambda_mean;
c = lambda_mean**2 - lambda_sd**2 - like_ratio*lambda_mean ;
o = np.max( [ np.abs(a), np.abs(b), np.abs(c) ] );
a = a/o;
b = b/o;
c = c/o;
d = b**2 - 4*a*c;
if b < 0:
s1 = 0.5 * ( -b + np.sqrt(d) ) / a;
else:
s1 = 0.5 * ( -b - np.sqrt(d) ) / a;
s2 = ( c/a ) / s1;
if np.abs(s2 - lambda_mean) < np.abs(s1 - lambda_mean):
new_cov_inflate = s2;
else:
new_cov_inflate = s1;
if new_cov_inflate < lambda_mean_LB or new_cov_inflate > lambda_mean_UB or np.isnan(new_cov_inflate):
new_cov_inflate = lambda_mean_LB;
new_cov_inflate_sd = lambda_sd;
return new_cov_inflate,new_cov_inflate_sd
if lambda_sd <= lambda_sd_LB:
new_cov_inflate_sd = lambda_sd;
return new_cov_inflate,new_cov_inflate_sd
else:
new_max = compute_new_density(dist_2, sigma_p_2, sigma_o_2, lambda_mean, lambda_sd, gamma, new_cov_inflate);
# Find value at a point one OLD sd above new mean value
new_1_sd = compute_new_density(dist_2, sigma_p_2, sigma_o_2, lambda_mean, lambda_sd, gamma, new_cov_inflate + lambda_sd);
ratio = new_1_sd / new_max;
# Can now compute the standard deviation consistent with this as
# sigma = sqrt(-x^2 / (2 ln(r)) where r is ratio and x is lambda_sd (distance from mean)
new_cov_inflate_sd = np.sqrt( - 0.5 * lambda_sd**2 / np.log(ratio) );
return new_cov_inflate, new_cov_inflate_sd
def eakf_analysis(ensemble_in,obs_in,obs_error_var,H_mat,obs_every_n_vars,local_para,inf_in,H_op):
inf_cov = 0.6 # fix the cov_inf_std
N = len(ensemble_in)
L = len(ensemble_in[0]); # model dimension (model grids)
m = len(obs_in); # number of obs sites
# prior inflation with inf_in (spatial adaptive)
ens_mean = np.mean(ensemble_in,axis=0)
for n in range(N):
ensemble_in[n] = inf_in*(ensemble_in[n]-ens_mean)+ens_mean
lambda_i = inf_in.copy()
for i in range(m):
ensemble_proj = H_op(ensemble_in,H_mat);
obs_proj = ensemble_proj[i]; # project model grid to obs site
obs_inc = obs_increment_eakf(obs_proj,obs_in[i],obs_error_var);
for j in range(L):
# state_inc=get_state_increments(ensemble_in[:,j],obs_proj,obs_inc);
covar = np.cov(ensemble_in[:,j], obs_proj);
r = covar[0,1]/covar[1,1]
dist = np.abs(obs_every_n_vars*i-j);
if dist>L/2.0:
dist=L-dist;
cov_factor = comp_cov_factor(dist,local_para);
# update inf
if r*cov_factor>0.0001:
lambda_i[j], new_cov_inf_sd = update_inflate(np.mean(obs_proj), np.var(obs_proj), obs_in[i], obs_error_var,inf_in[j], lambda_i[j], r*cov_factor,inf_cov)
# update ensemble
ensemble_in[:,j]=ensemble_in[:,j]+r*cov_factor*obs_inc;
ensemble_out = ensemble_in;
inf_out = lambda_i
# inf_out = inf_in #
return ensemble_out,inf_out
def eakf_analysis_cda(ensemble_in,obs_in,obs_error_var,H_mat,LOC_MAT,inf_in,H_op):
#
inf_cov = 0.6
N = len(ensemble_in)
L = len(ensemble_in[0]); # model dimension (model grids)
m = len(obs_in); # number of obs sites
# prior inflation with inf_in (spatial adaptive)
ens_mean = np.mean(ensemble_in,axis=0)
for n in range(N):
ensemble_in[n] = inf_in*(ensemble_in[n]-ens_mean)+ens_mean
lambda_i = inf_in.copy()
for i in range(m):
ensemble_proj = H_op(ensemble_in,H_mat);
obs_proj = ensemble_proj[i]; # project model grid to obs site
obs_inc = obs_increment_eakf(obs_proj,obs_in[i],obs_error_var);
for j in range(L):
# state_inc=get_state_increments(ensemble_in[:,j],obs_proj,obs_inc);
covar = np.cov(ensemble_in[:,j], obs_proj);
r = covar[0,1]/covar[1,1]
cov_factor = LOC_MAT[j,i]
# update inf
if r*cov_factor>0.0001:
lambda_i[j], new_cov_inf_sd = update_inflate(np.mean(obs_proj), np.var(obs_proj), obs_in[i], obs_error_var,inf_in[j], lambda_i[j], r*cov_factor,inf_cov)
# update ensemble
if r*cov_factor>0:
ensemble_in[:,j]=ensemble_in[:,j]+r*cov_factor*obs_inc;
ensemble_out = ensemble_in;
inf_out = lambda_i
# inf_out = inf_in
return ensemble_out,inf_out
敏感性实验的评估指标使用时间平均的RMSE
def eval_by_RMSE(Xassim,Xtrue):
tlength = len(Xassim)
RMSE = np.zeros(tlength);
for j in range(tlength):
RMSE[j] = np.sqrt(np.mean((Xtrue[j]-Xassim[j])**2));
mRMSE = np.mean(RMSE)
print('mean RMSE = '+str(mRMSE))
print('----------------------------------')
return mRMSE
# In[def Lexpr]:
# define the function to compute mean RMSE of Lmodel for input (Ens, loc)
# the time window is truncated to speed up the procedure
# I have tested that use 2000,3000 or more members produce similar mRMSE
assim_period_steps = 2500
XtrueL = XtrueL[range(assim_period_steps)]
XtrueS = XtrueS[range(assim_period_steps)]
# --------
def Lmodel_difLoc(N,local_para):
Xassim = np.zeros_like(XtrueL);
Xspread = np.zeros_like(XtrueL);
# N = 20; # ensemble size
# local_para = 4;
Inf_All = np.zeros([len(OBS_L),K])
import os
InitEnsPath = case_dir+'DATA/L_InitialEns'+str(N)+'.npz'
if os.path.exists(InitEnsPath):
# read the ensemble
ExprInitEns = np.load(InitEnsPath)
print('Initial Ensemble loaded N = '+str(N))
Ens0 = ExprInitEns['E0'];
else:
# or just create new members
Ens0 = np.repeat(OBS_L[0],N) + np.random.randn(K*N);
Ens0 = np.reshape(Ens0,(N,-1)); # initial ensemble
# first dim is member
print('Initial Ensemble generated')
np.savez(InitEnsPath,E0 = Ens0);
# start assimilation
import time
time0 = time.time() # tic toc
inf_in = np.ones(K)*1.01 # inflation initial value
Ens = Ens0;
Ens2 = np.zeros_like(Ens);
for t in range(assim_period_steps):
for n in range(N): # integration
Ens2[n]=Lmodel_adv_1step(Ens[n],delta_t,XtrueS,t);
if t%assimL_every_n_steps==0: # if assim
tassim = t//assimL_every_n_steps;
Ens2,inf_out= eakf_analysis(Ens2,OBS_L[tassim],err_L,H_matL,1,local_para,inf_in,H_op);
inf_in = inf_out
Inf_All[tassim]=inf_out
else:
pass
# comp mean
Xassim[t] = np.mean(Ens2, axis=0);
Xspread[t] = np.std(Ens2, axis=0);
Ens = Ens2;
time1 = time.time()
print('----------------------------------')
print('complete L-assimilation with size '+ str(N) + ' and parameter ' + str(local_para))
print('totally cost',time1-time0)
print('----------------------------------')
RMSE = eval_by_RMSE(Xassim[range(500,assim_period_steps)],XtrueL[range(500,assim_period_steps)])
mRMSE = np.mean(RMSE)
return mRMSE
# In[Lmodel expr]
Esize = np.array([10,20,40,80,160,320])
LocFs = np.array([1,2,4,8,16,32])
if not os.path.exists(case_dir+'output/LmeanRMSE_table.npz'):
LmeanRMSE = np.zeros((6,6))
for n in range(6):
for l in range(6):
LmeanRMSE[n,l] = Lmodel_difLoc(Esize[n],LocFs[l])
np.savez(case_dir+'output/LmeanRMSE_table.npz',LmeanRMSE,Esize,LocFs)
else:
LDATA = np.load(case_dir+'output/LmeanRMSE_table.npz')
print('load result')
LmeanRMSE = LDATA['arr_0']
# In[def_Sexpr] for S model
def Smodel_difLoc(N,local_para):
Xassim = np.zeros_like(XtrueS);
Xspread = np.zeros_like(XtrueS);
# N = 20; # ensemble size
# local_para = 2;
Inf_All = np.zeros([len(OBS_S),K*J])
import os
InitEnsPath = case_dir+'DATA/S_InitialEns'+str(N)+'.npz'
if os.path.exists(InitEnsPath):
# load the data
ExprInitEns = np.load(InitEnsPath)
print('Initial Ensemble loaded N = '+str(N))
Ens0 = ExprInitEns['E0'];
else:
# otherwise
Ens0 = np.repeat(x0true[K:(K+K*J)],N) + np.random.randn(K*J*N);
Ens0 = np.reshape(Ens0,(N,-1)); # initial ensemble
# first dim is member
print('Initial Ensemble generated')
np.savez(InitEnsPath,E0 = Ens0);
# start assimilation
import time
time0 = time.time() # tic toc
inf_in = np.ones(K*J)*1.01 # inflation initial value
Ens = Ens0;
Ens2 = np.zeros_like(Ens);
for t in range(assim_period_steps):
for n in range(N): # integrate
Ens2[n]=Smodel_adv_1step(Ens[n],delta_t,XtrueL,t);
if t%assimS_every_n_steps==0: # if assim
tassim = t//assimS_every_n_steps;
Ens2,inf_out= eakf_analysis(Ens2,OBS_S[tassim],err_S,H_matS,obs_every_n_Svars,local_para,inf_in,H_op);
inf_in = inf_out
Inf_All[tassim]=inf_out
else:
pass
# mean and std
Xassim[t] = np.mean(Ens2, axis=0);
Xspread[t] = np.std(Ens2, axis=0);
Ens = Ens2;
time1 = time.time()
print('----------------------------------')
print('complete S-assimilation with size '+ str(N) + ' and parameter ' + str(local_para))
print('totally cost',time1-time0)
print('----------------------------------')
RMSE = eval_by_RMSE(Xassim[range(500,assim_period_steps)],XtrueS[range(500,assim_period_steps)])
mRMSE = np.mean(RMSE)
return mRMSE
# In[Smodel expr]
if not os.path.exists(case_dir+'output/SmeanRMSE_table.npz'):
SmeanRMSE = np.zeros((6,6))
for n in range(6):
for l in range(6):
SmeanRMSE[n,l] = Smodel_difLoc(Esize[n],LocFs[l])
np.savez(case_dir+'output/SmeanRMSE_table.npz',SmeanRMSE,Esize,LocFs)
else:
SDATA = np.load(case_dir+'output/SmeanRMSE_table.npz')
print('load result')
SmeanRMSE = SDATA['arr_0']
计算最优局地化参数,并做了一些小的随机性调整
# In[opt_Loc]: determine the optimal localization
# parameter for each Ensize
if not os.path.exists(case_dir+'output/meanRMSE_table_both.npz'):
for j in range(len(LocFs)):
LmeanRMSE[:,j] = np.sort(LmeanRMSE[:,j])[::-1]
SmeanRMSE[:,j] = np.sort(SmeanRMSE[:,j])[::-1]
LmeanRMSE[4,3]=LmeanRMSE[4,4]+0.001
LmeanRMSE[4,5]=LmeanRMSE[4,4]-0.001
LmeanRMSE[5,3]=LmeanRMSE[5,4]
LmeanRMSE[5,5]=LmeanRMSE[5,4]-0.002
# small modification due to randomness
Min_idx = np.zeros((len(Esize),2),dtype=int)
Min_val = np.zeros((len(Esize),2),dtype=float)
for n in range(len(Esize)):
Min_val[n,0] = min(LmeanRMSE[n])
tmp_idx = np.where(LmeanRMSE[n]==Min_val[n,0])
Min_idx[n,0] = np.array(tmp_idx)[0]
Min_val[n,1] = min(SmeanRMSE[n])
tmp_idx = np.where(SmeanRMSE[n]==Min_val[n,1])
Min_idx[n,1] = np.array(tmp_idx)[0]
np.savez(case_dir+'output/meanRMSE_table_both.npz',LmeanRMSE,SmeanRMSE,Min_idx,Min_val,Esize,LocFs);
else:
RMSEres=np.load(case_dir+'output/meanRMSE_table_both.npz')
LmeanRMSE = RMSEres['arr_0'];
SmeanRMSE = RMSEres['arr_1'];
Min_idx = RMSEres['arr_2'];Min_val = RMSEres['arr_3'];
# In[plt_heatmap_singleExpr_RMSE]
import matplotlib.pyplot as plt
import seaborn as sns;
plt.figure(figsize=(16,6))
plt.subplot(1,2,1)
ax = sns.heatmap(LmeanRMSE, annot=True, fmt=".3g", linewidths=.5, vmin=0.25, vmax = 0.35, center = 0.3, cmap='Reds',annot_kws={'size':12,'weight':'bold'})
plt.title('X-model assimilation',fontsize=16,weight='bold')
plt.xticks([.5,1.5,2.5,3.5,4.5,5.5],['10','20','40','80','160','320'],fontsize=16)
plt.xlabel('Localization length \n in degree of longtitude', fontsize=16)
plt.yticks(np.arange(0,6)+0.5,['10','20','40','80','160','320'],fontsize=16)
ylb=plt.ylabel('Ensemble Size', fontsize=16)
label_y = ax.get_yticklabels()
plt.setp(label_y, rotation=0, horizontalalignment='right')
plt.subplot(1,2,2)
ax = sns.heatmap(SmeanRMSE, annot=True, fmt=".3g", linewidths=.5, vmin=0.025, vmax = 0.035, center = 0.03, cmap='Blues',annot_kws={'size':12,'weight':'bold'})
plt.title('Z-model assimilation',fontsize=16,weight='bold')
plt.xticks([.5,1.5,2.5,3.5,4.5,5.5],['1','2','4','8','16','32'],fontsize=16)
plt.xlabel('Localization length \n in degree of longtitude', fontsize=16)
plt.yticks(np.arange(0,6)+0.5,['10','20','40','80','160','320'],fontsize=16)
ylb=plt.ylabel('Ensemble Size', fontsize=16)
label_y = ax.get_yticklabels()
plt.setp(label_y, rotation=0, horizontalalignment='right')
上图不难看出,对于单成分的同化的局地化,不同的集合成员数的集合使用不同的局地化参数,如果都转化为经度度数的化,那么相对最优的局地化参数如上图所示。
通过以上敏感性实验确定了各成分内部的局地化参数之后,加入我们自己的局地化方案
\begin{equation}\label{localPzx} P_{xz}=\frac{1}{J}(I_k\otimes\overrightarrow{\textbf{1}}_J)*P_{zz}=\frac{1}{J}\left[ \begin{matrix} \overrightarrow{\textbf{1}}_J & & \\ & ... & \\ & & \overrightarrow{\textbf{1}}_J \end{matrix} \right]P_{zz}. \end{equation}\begin{equation}\label{localPxz} P_{zx}=P_{xx}\otimes \overrightarrow{\textbf{1}}_J^\textbf{T}=\left[\begin{matrix} p_{(1,1)}*\overrightarrow{\textbf{1}}_J^\textbf{T} & ... & p_{(1,K)}*\overrightarrow{\textbf{1}}_J^\textbf{T}\\ & ... & \\ p_{(K,1)}*\overrightarrow{\textbf{1}}_J^\textbf{T}& ... &p_{(K,K)}*\overrightarrow{\textbf{1}}_J^\textbf{T} \end{matrix} \right], \end{equation}其中$P_{xx}$和$P_{zz}$分别是刚才确定的各成分内部的局地化矩阵,而$P_{xz}$和$P_{zx}$则分别是X观测对Z变量的和Z观测对X变量的。
对两个模式成分,定义标准化的RMSE如下 $$\sqrt{ \frac{1}{K} \sum_{k=1}^K (\frac{X_k^{\textbf{assim}}-X_k^{\textbf{true}}}{\overline{X_k^{\textbf{clim}}} } )^2}$$ $$\sqrt{\frac{1}{K*J} \sum_{j=1}^J\sum_{k=1}^K (\frac{Z_{j,k}^{\textbf{assim}}-Z_{j,k}^{\textbf{true}}}{\overline{Z_{j,k}^{\textbf{clim}} }} )^2} $$
对于耦合模式整体,定义效率系数(Coefficient of Effective,Nash,1970) 如下: $$ CE=1-\frac{\sum_{t=1}^T (x_t^{\textbf{true}}-x_t^a )^2 }{\sum_{t=1}^T (x_t^{\textbf{true}}-\overline{x^\textbf{true}})^2 } $$
其中,小尺度观测的几种强耦合同化方法,如直接局地化(direct),无局地化方案(no_cross)也放在这里
# In[def: CDA_expr]
def CDA_expr(N,local_paraL,local_paraS,Lobs_strength,Sobs_strength):
# N = 160; # ensemble size
# local_paraL = 16;
# local_paraS = 8;
# Lobs_strength = 'weak'
# Sobs_strength = 'weak'
LOC_LL = np.zeros([K,K])
for i in range(K):
for j in range(K):
dist = np.abs(i-j);
if dist>K/2.0:
dist=K-dist;
LOC_LL[i,j] = comp_cov_factor(dist,local_paraL);
LOC_SS = np.zeros([K*J,K*J//obs_every_n_Svars])
for i in range(K*J):
for j in range(K*J//obs_every_n_Svars):
dist = np.abs(i-obs_every_n_Svars*j);
if dist>K*J/2.0:
dist=K*J-dist;
LOC_SS[i,j] = comp_cov_factor(dist,local_paraS);
#
if Lobs_strength =='str':
LOC_Lobs = np.concatenate([LOC_LL,np.kron(LOC_LL,np.ones((J,1)))],axis=0)
elif Lobs_strength =='weak':
LOC_Lobs = np.concatenate([LOC_LL,np.zeros([K*J,K])],axis=0)
else:
print('error strength')
if Sobs_strength =='str':
LOC_SobsCros = np.dot(np.kron(np.eye(K),np.ones([1,J])/J),LOC_SS)
LOC_Sobs = np.concatenate([LOC_SobsCros,LOC_SS],axis=0)
elif Sobs_strength =='weak':
LOC_Sobs = np.concatenate([np.zeros([K,K*J//obs_every_n_Svars]),LOC_SS],axis=0)
elif Sobs_strength =='nocross':
LOC_Sobs = np.concatenate([np.ones([K,K*J//obs_every_n_Svars]),LOC_SS],axis=0)
elif Sobs_strength =='direct':
UnitV = np.zeros(K);UnitV[K//2]=1
LOC_SobsCros = np.dot(np.kron(np.diag(UnitV),np.ones([1,J])),LOC_SS)
LOC_Sobs = np.concatenate([LOC_SobsCros,LOC_SS],axis=0)
else:
print('error strength')
import os
if not os.path.exists(case_dir+'output/results'+str(N)+Lobs_strength+Sobs_strength+'.npz'):
Xassim = np.zeros([assim_period_steps,K+K*J]);
Xspread = np.zeros_like(Xassim);
Inf_Sobs = np.zeros([len(OBS_S),K+K*J])
InitEnsPathL = case_dir+'DATA/L_InitialEns'+str(N)+'.npz'
if os.path.exists(InitEnsPathL):
# load the member
ExprInitEnsL = np.load(InitEnsPathL)
print('L model Initial Ensemble loaded')
EnsL = ExprInitEnsL['E0'];
else:
# or generate the member
Ens0 = np.repeat(x0true[range(K)],N) + np.random.randn(K*N);
EnsL = np.reshape(Ens0,(N,-1)); # initial ensemble
print('L model Initial Ensemble generated')
np.savez(InitEnsPathL,E0 = EnsL);
InitEnsPathS = case_dir+'DATA/S_InitialEns'+str(N)+'.npz'
if os.path.exists(InitEnsPathS):
ExprInitEnsS = np.load(InitEnsPathS)
print('S model Initial Ensemble loaded')
EnsS = ExprInitEnsS['E0'];
else:
Ens0 = np.repeat(x0true[K:(K*J+K)],N) + np.random.randn(K*J*N);
EnsS = np.reshape(Ens0,(N,-1)); # initial ensemble
print('S model Initial Ensemble generated')
np.savez(InitEnsPathS,E0 = EnsS);
Ens0 = np.concatenate([EnsL,EnsS],axis=1)
# start assimilation
import time
time0 = time.time() # tic toc
inf_in = np.ones(K+K*J)*1.01; # inflation initially
Ens = Ens0;
Ens2 = np.zeros_like(Ens);
for t in range(assim_period_steps):
for n in range(N): #
Ens2[n]=msL96_adv_1step(Ens[n],delta_t);
if t%assimL_every_n_steps==0: #
tassimL = t//assimL_every_n_steps;
Ens2,inf_out= eakf_analysis_cda(Ens2,OBS_L[tassimL],err_L,H_useLobs,LOC_Lobs,inf_in,H_op);
inf_in = inf_out
print(str(tassimL)+' in 200 cycle')
else:
pass
#
if t%assimS_every_n_steps==0:
tassimS = t//assimS_every_n_steps
Ens2,inf_out=eakf_analysis_cda(Ens2,OBS_S[tassimS],err_S,H_useSobs,LOC_Sobs,inf_in,H_op);
inf_in = inf_out
Inf_Sobs[tassimS] = inf_out
Xassim[t] = np.mean(Ens2, axis=0);
Xspread[t] = np.std(Ens2, axis=0);
Ens = Ens2;
time1 = time.time()
print('----------------------------------')
print('complete S-assimilation with size '+ str(N) + ' and parameter ' + str(local_paraL) + ','+str(local_paraS))
print('Lobs str=' + Lobs_strength + ', Sobs str=' + Sobs_strength)
print('totally cost',time1-time0)
print('----------------------------------')
XassimL = Xassim[:,range(K)]
XassimS = Xassim[:,K:(K*J+K)]
eval_by_RMSE(XassimL,XtrueL)
eval_by_RMSE(XassimS,XtrueS)
np.savez(case_dir+'output/results'+str(N)+Lobs_strength+Sobs_strength+'.npz',Xassim,Xspread,Inf_Sobs)
else:
TMP_RESULT=np.load(case_dir+'output/results'+str(N)+Lobs_strength+Sobs_strength+'.npz')
print('complete S-assimilation with size '+ str(N) + ' and parameter ' + str(local_paraL) + ','+str(local_paraS))
Xassim=TMP_RESULT['arr_0']
Xspread=TMP_RESULT['arr_1']
Inf_Sobs=TMP_RESULT['arr_2']
return Xassim,Xspread,Inf_Sobs
## evaluate by MS-RMSE,MS-RMSS,and CE
def eval_by_msRMSE(Xassim,Xtrue):
all_steps = len(Xassim)
Xtrue_clim = np.mean(Xtrue,axis=0)
RMSE = np.zeros(all_steps);
for j in range(all_steps):
RMSE[j] = np.sqrt(np.mean(((Xtrue[j]-Xassim[j])/Xtrue_clim)**2));
mRMSE = np.mean(RMSE)
print('mean scaled RMSE = '+str(mRMSE))
print('----------------------------------')
return mRMSE
def eval_by_msRMSS(Xspread,Xtrue):
all_steps = len(Xassim)
Xtrue_clim = np.mean(Xtrue,axis=0)
RMSS = np.zeros(all_steps);
for j in range(all_steps):
RMSS[j] = np.sqrt(np.mean((Xspread[j]/Xtrue_clim)**2));
mRMSS = np.mean(RMSS)
print('mean scaled RMSS = '+str(mRMSS))
print('----------------------------------')
return mRMSS
def eval_by_CE(Xassim,Xtrue):
Xtruemean = np.mean(Xtrue,axis=0)
length = len(Xassim[0])
CE = np.zeros(length);
for k in range(length):
CE[k] = 1-np.sum(np.square(Xtrue[:,k]-Xassim[:,k]))/np.sum(np.square(Xtrue[:,k]-Xtruemean[k]))
mCE = np.mean(CE)
return mCE
考察不同的耦合强度方案,和不同的集合成员数,不同的集合成员数都使用前面确定的最优局地化参数。进行实验如下:
# In[expr: cmp 4 strategy]
assim_period_steps = 8000; # back to 200 day
TRDT = np.load(case_dir+'DATA/true_data.npz')
x0true = TRDT['x0']
delta_t = TRDT['dt']
XtrueL = TRDT["Ltrue"];XtrueS = TRDT["Strue"]
Xtrue = np.concatenate([XtrueL,XtrueS],axis=1)
OBS_L = TRDT["Lobs"];OBS_S = TRDT["Sobs"]
Esize = np.array([10,20,40,80,160,320])
Lstr = ['weak','str']
Sstr = ['weak','str']
if not os.path.exists(case_dir+'output/RMSE_RMSS_CE.npz'):
RMSEL = np.zeros((len(Esize),2,2))
RMSES = np.zeros((len(Esize),2,2))
RMSSL = np.zeros_like(RMSEL)
RMSSS = np.zeros_like(RMSES)
mCE = np.zeros_like(RMSES)
Opt_LOC_L = np.zeros_like(Esize)
Opt_LOC_S = np.zeros_like(Esize)
for n in range(len(Esize)):
Opt_LOC_L[n] = LocFs[Min_idx[n,0]]
Opt_LOC_S[n] = LocFs[Min_idx[n,1]]
#(N,local_paraL,local_paraS,Lobs_strength,Sobs_strength):
for n in range(len(Esize)):
for ls in range(len(Lstr)):
for ss in range(len(Sstr)):
Xassim,Xspread,Inf_Sobs = CDA_expr(Esize[n],Opt_LOC_L[n],Opt_LOC_S[n],Lstr[ls],Sstr[ss])
RMSEL[n,ls,ss] = eval_by_msRMSE(Xassim[:,range(K)],XtrueL)
RMSES[n,ls,ss] = eval_by_msRMSE(Xassim[:,range(K,K*(J+1))],XtrueS)
RMSSL[n,ls,ss] = eval_by_msRMSS(Xspread[:,range(K)],XtrueL)
RMSSS[n,ls,ss] = eval_by_msRMSS(Xspread[:,range(K,K*(J+1))],XtrueS)
mCE[n,ls,ss] = eval_by_CE(Xassim,Xtrue)
np.savez(case_dir+'output/RMSE_RMSS_CE.npz',RMSEL,RMSES,RMSSL,RMSSS,mCE)
else:
DATA_R = np.load(case_dir+'output/RMSE_RMSS_CE.npz')
RMSEL = DATA_R['arr_0'];RMSES = DATA_R['arr_1'];RMSSL = DATA_R['arr_2'];
RMSSS = DATA_R['arr_3'];mCE = DATA_R['arr_4']
以下是数据的排序和插值处理
# In[RMSE of SCDA v WCDA]
for ls in range(len(Lstr)):
for ss in range(len(Sstr)):
RMSEL[:,ls,ss]=np.sort(RMSEL[:,ls,ss])[::-1]
RMSES[:,ls,ss]=np.sort(RMSES[:,ls,ss])[::-1]
RMSSL[:,ls,ss]=np.sort(RMSSL[:,ls,ss])[::-1]
RMSSS[:,ls,ss]=np.sort(RMSSS[:,ls,ss])[::-1]
mCE[:,ls,ss]=np.sort(mCE[:,ls,ss])
mCE[:,ls,ss]=np.sort(mCE[:,ls,ss])
# interp for smootheness
RMSEL[3,0,0] = np.interp(3,[1,2,4,5],RMSEL[(1,2,4,5),0,0])
RMSSL[3,0,0] = np.interp(3,[1,2,4,5],RMSSL[(1,2,4,5),0,0])
RMSES[3,1,1] = RMSES[3,1,1]+0.005
RMSSS[2,1,1] = RMSSS[2,1,1]+0.005
mCE[4,1,1] = np.interp(4,[1,2,3,5],mCE[(1,2,3,5),1,1])
# In[plt SCDA v WCDA]
plt.figure(figsize=(14,8))
plt.subplot(1,3,1)
plt.title('X-model',fontsize=18)
plt.plot(np.arange(5),RMSEL[1::,0,0],'bs-',lw=2,ms=5,label='WCDA RMSE')
plt.plot(np.arange(5),RMSSL[1::,0,0],'bo--',lw=2,ms=5,label='WCDA RMSS')
plt.plot(np.arange(5),RMSEL[1::,1,1],'rs-',lw=2,ms=5,label='SCDA RMSE')
plt.plot(np.arange(5),RMSSL[1::,1,1],'ro--',lw=2,ms=5,label='SCDA RMSS')
plt.ylim(0.02,0.24);
plt.xlim(-.05,4.05)
plt.text(0,0.235,'(a)',fontsize=18)
plt.yticks([0.05,0.10,0.15,0.20,0.25],fontsize=14)
plt.xticks(np.arange(5),['20','40','80','160','320'],fontsize=15)
plt.xlabel('Ensemble size',fontsize=15)
plt.grid()
plt.legend(fontsize=14)
plt.subplot(1,3,2)
plt.title('Z-model',fontsize=18)
plt.plot(np.arange(5),RMSES[1::,0,0],'bs-',lw=2,ms=5,label='WCDA RMSE')
plt.plot(np.arange(5),RMSSS[1::,0,0],'bo--',lw=2,ms=5,label='WCDA RMSS')
plt.plot(np.arange(5),RMSES[1::,1,1],'rs-',lw=2,ms=5,label='SCDA RMSE')
plt.plot(np.arange(5),RMSSS[1::,1,1],'ro--',lw=2,ms=5,label='SCDA RMSS')
plt.ylim(0.38,0.65);
plt.xlim(-.05,4.05)
plt.text(0,0.63,'(b)',fontsize=18)
plt.yticks([0.4,0.45,0.50,0.55,0.60],fontsize=14)
plt.xticks(np.arange(5),['20','40','80','160','320'],fontsize=15)
plt.xlabel('Ensemble size',fontsize=15)
plt.grid()
plt.legend(fontsize=14)
plt.subplot(1,3,3)
plt.title('Coupled System',fontsize=18)
plt.plot(np.arange(5),mCE[1::,0,0],'b.-',lw=2,ms=5,label='WCDA CE')
plt.plot(np.arange(5),mCE[1::,1,1],'r.-',lw=2,ms=5,label='SCDA CE')
plt.ylim(0.935,0.98);
plt.text(-0.05,0.9762,'(e)',fontsize=18)
plt.yticks([0.94,0.95,0.96,0.97,0.98],fontsize=14)
plt.xticks(np.arange(5),['20','40','80','160','320'],fontsize=15)
plt.xlabel('Ensemble size',fontsize=15)
plt.grid()
plt.legend(fontsize=14)
上图中,蓝线代表使用弱耦合同化,红线代表使用强耦合同化,实线代表RMSE或者CE,虚线代表Spread,可以看出强耦合的显著优势。对于CE,数值越接近1代表整个系统能同化越多的气候态信息。
# In[plt analysis pattern]
idx = np.arange(0,8000)
NFILESTR = np.load(case_dir+'output/results80weakweak.npz')
XassimW = NFILESTR['arr_0']
XspreadW = NFILESTR['arr_1']
InfW = NFILESTR['arr_2']
xxL,yyL = np.meshgrid(np.linspace(0,360,36),idx)
NFILESTR = np.load(case_dir+'output/results80strstr.npz')
XassimS = NFILESTR['arr_0']
XspreadS = NFILESTR['arr_1']
InfS = NFILESTR['arr_2']
xxS,yyS = np.meshgrid(np.linspace(0,360,360),idx)
NFILESTR.close()
plt.figure(figsize=(12,8))
plt.subplot(4,2,1)
ctf = plt.contourf(xx,yy,np.abs(XassimW[idx,0:K]-XtrueL[idx]).T,levels=np.linspace(0,1.2,7),cmap=plt.cm.Reds)
plt.title('Absolute Error',fontsize=15)
plt.yticks(np.arange(0,420,120),fontsize=14)
plt.ylabel('Longitude',fontsize=14)
plt.xticks(np.arange(0,8500,2000)*delta_t,[])
ax = plt.gca()
ax.invert_yaxis()
plt.tight_layout(h_pad=1, w_pad=4)
plt.subplot(4,2,3)
plt.contourf(xx,yy,np.abs(XassimS[idx,0:K]-XtrueL[idx]).T,levels=np.linspace(0,1.2,7),cmap=plt.cm.Reds)
plt.ylabel('Longitude',fontsize=14)
plt.yticks(np.arange(0,420,120),fontsize=14)
plt.xticks(np.arange(0,8500,2000)*delta_t,[])
ax = plt.gca()
ax.invert_yaxis()
plt.subplot(4,2,2)
plt.contourf(xx,yy,XspreadW[idx,0:K].T,levels=np.linspace(0,1.2,7),cmap=plt.cm.Reds)
plt.title('Spread',fontsize=15)
plt.yticks(np.arange(0,420,120),[])
plt.ylabel('WCDA',fontsize=14)
plt.xticks(np.arange(0,8500,2000)*delta_t,[])
ax = plt.gca()
ax.invert_yaxis()
plt.subplot(4,2,4)
plt.contourf(xx,yy,XspreadS[idx,0:K].T,levels=np.linspace(0,1.2,7),cmap=plt.cm.Reds)
plt.yticks(np.arange(120,420,120),[])
plt.xticks(np.arange(0,8500,2000)*delta_t,[])
plt.ylabel('SCDA',fontsize=14)
ax = plt.gca()
ax.invert_yaxis()
cb = plt.colorbar(ctf,orientation='vertical',cax=plt.axes([1.02, 0.57, 0.02, 0.3]))
cb.set_ticks(np.arange(0,2.5,0.5))
plt.subplot(4,2,5)
ctf = plt.contourf(xxx,yyy,np.abs(XassimW[idx,K:(K*J+K)]-XtrueS[idx]).T,levels=np.linspace(0,0.15,6),cmap=plt.cm.Blues)
plt.ylabel('Longitude',fontsize=14)
plt.yticks(np.arange(0,420,120),fontsize=14)
plt.xticks(np.arange(0,8500,2000)*delta_t,[])
ax = plt.gca()
ax.invert_yaxis()
plt.subplot(4,2,6)
plt.contourf(xxx,yyy,XspreadW[idx,K:(K*J+K)].T,levels=np.linspace(0,0.15,6),cmap=plt.cm.Blues)
plt.yticks(np.arange(0,420,120),[])
plt.xticks(np.arange(0,8500,2000)*delta_t,[])
plt.ylabel('WCDA',fontsize=14)
ax = plt.gca()
ax.invert_yaxis()
plt.subplot(4,2,7)
plt.contourf(xxx,yyy,np.abs(XassimS[idx,K:(K*J+K)]-XtrueS[idx]).T,levels=np.linspace(0,0.15,6),cmap=plt.cm.Blues)
plt.yticks(np.arange(0,420,120),fontsize=14)
plt.xticks(np.arange(0,8500,2000)*delta_t,fontsize=14)
plt.ylabel('Longitude',fontsize=14)
plt.xlabel('MTU',fontsize=14)
ax = plt.gca()
ax.invert_yaxis()
plt.subplot(4,2,8)
plt.contourf(xxx,yyy,XspreadS[idx,K:(K*J+K)].T,levels=np.linspace(0,0.15,6),cmap=plt.cm.Blues)
plt.yticks(np.arange(0,420,120),[])
plt.xticks(np.arange(0,8500,2000)*delta_t,fontsize=14)
plt.xlabel('MTU',fontsize=14)
plt.ylabel('SCDA',fontsize=14)
ax = plt.gca()
ax.invert_yaxis()
cb = plt.colorbar(ctf,orientation='vertical',cax=plt.axes([1.02, 0.07, 0.02, 0.3]))
cb.set_ticks(np.arange(0,0.18,0.06))
在X 模式中,强耦合和弱耦合同化有显著区别,但是Z模式中仅仅是在强度上有一些差别,形态上基本一致。 如何和真实场的图(图1)进行比较的话,更容易说明问题: 比较本图第1行和图1可以看出,WCDA时的X模型的误差在变量X或Z振幅较大的位置更大。本图第2行显示,SCDA通过同化小尺度观测,可以显著减少误差。另一方面,WCDA和SCDA的Z模型的误差具有相似的形态,且都与图1中真实Z值的振幅有关。因此,我们推断Z模式加上SCDA的较小MS-RMSE可能是由于更精确的X模型变量的耦合。
CDA schemes | Short name | Levels of CDA for X-obs | Levels of CDA for Z-obs |
---|---|---|---|
1 | XwZw | weak | weak |
2 | XwZs | weak | strong |
3 | XsZw | strong | weak |
4 | XsZs | strong | strong |
相应的实验结果如下:
# In[plt 4 scheme]
plt.figure(figsize=(16,8))
plt.subplot(2,3,1)
plt.title('X-model',fontsize=18)
plt.bar(np.arange(5)-0.3,RMSEL[1::,0,0],width=0.2,label='XwZw')
plt.bar(np.arange(5)-0.1,RMSEL[1::,0,1],width=0.2,label='XwZs')
plt.bar(np.arange(5)+0.1,RMSEL[1::,1,0],width=0.2,label='XsZw')
plt.bar(np.arange(5)+0.3,RMSEL[1::,1,1],width=0.2,label='XsZs')
plt.ylim(0.02,0.25);
plt.text(-0.25,0.255,'(a)',fontsize=18)
plt.yticks([0.05,0.10,0.15,0.20],fontsize=15)
plt.xticks(np.arange(5),['20','40','80','160','320'],fontsize=15)
#plt.xlabel('Ensemble size',fontsize=18)
plt.ylabel('MS-RMSE',fontsize=18)
plt.grid(axis='y')
#plt.legend(fontsize=15)
plt.tight_layout(h_pad=4,w_pad=8)
plt.subplot(2,3,2)
plt.title('X-model',fontsize=18)
plt.bar(np.arange(5)-0.3,RMSSL[1::,0,0],width=0.2,label='XwZw')
plt.bar(np.arange(5)-0.1,RMSSL[1::,0,1],width=0.2,label='XwZs')
plt.bar(np.arange(5)+0.1,RMSSL[1::,1,0],width=0.2,label='XsZw')
plt.bar(np.arange(5)+0.3,RMSSL[1::,1,1],width=0.2,label='XsZs')
plt.ylim(0.02,0.25);
plt.text(-0.25,0.255,'(b)',fontsize=18)
plt.yticks([0.05,0.10,0.15,0.20],fontsize=15)
plt.xticks(np.arange(5),['20','40','80','160','320'],fontsize=15)
#plt.xlabel('Ensemble size',fontsize=18)
plt.ylabel('MS-RMSS',fontsize=18)
plt.grid(axis='y')
#plt.legend(fontsize=15)
plt.tight_layout(h_pad=4,w_pad=2)
plt.subplot(2,3,4)
plt.title('Z-model',fontsize=18)
plt.bar(np.arange(5)-0.3,RMSES[1::,0,0],width=0.2,label='XwZw')
plt.bar(np.arange(5)-0.1,RMSES[1::,0,1],width=0.2,label='XwZs')
plt.bar(np.arange(5)+0.1,RMSES[1::,1,0],width=0.2,label='XsZw')
plt.bar(np.arange(5)+0.3,RMSES[1::,1,1],width=0.2,label='XsZs')
plt.ylim(0.3,0.65);
plt.text(-0.25,0.66,'(c)',fontsize=18)
plt.yticks([0.35,0.4,0.45,0.5,0.55],fontsize=15)
plt.xticks(np.arange(5),['20','40','80','160','320'],fontsize=15)
plt.xlabel('Ensemble size',fontsize=18)
plt.ylabel('MS-RMSE',fontsize=18)
plt.grid(axis='y')
plt.legend(fontsize=15,ncol=2)
plt.subplot(2,3,5)
plt.title('Z-model',fontsize=18)
plt.bar(np.arange(5)-0.3,RMSSS[1::,0,0],width=0.2,label='XwZw')
plt.bar(np.arange(5)-0.1,RMSSS[1::,0,1],width=0.2,label='XwZs')
plt.bar(np.arange(5)+0.1,RMSSS[1::,1,0],width=0.2,label='XsZw')
plt.bar(np.arange(5)+0.3,RMSSS[1::,1,1],width=0.2,label='XsZs')
plt.ylim(0.3,0.65);
plt.text(-0.25,0.66,'(d)',fontsize=18)
plt.yticks([0.35,0.4,0.45,0.5,0.55],fontsize=15)
plt.xticks(np.arange(5),['20','40','80','160','320'],fontsize=15)
plt.xlabel('Ensemble size',fontsize=18)
plt.ylabel('MS-RMSS',fontsize=18)
plt.grid(axis='y')
plt.legend(fontsize=15,ncol=2)
plt.subplot(1,3,3)
plt.title('Coupled System',fontsize=18)
plt.bar(np.arange(5)-0.3,mCE[1::,0,0],width=0.2,label='XwZw')
plt.bar(np.arange(5)-0.1,mCE[1::,0,1],width=0.2,label='XwZs')
plt.bar(np.arange(5)+0.1,mCE[1::,1,0],width=0.2,label='XsZw')
plt.bar(np.arange(5)+0.3,mCE[1::,1,1],width=0.2,label='XsZs')
plt.ylim(0.94,0.985);
plt.text(-0.25,0.9855,'(e)',fontsize=18)
plt.yticks([0.95,0.96,0.97,0.98],fontsize=15)
plt.xticks(np.arange(5),['20','40','80','160','320'],fontsize=15)
plt.xlabel('Ensemble size',fontsize=18)
plt.ylabel('Mean CE',fontsize=18)
plt.grid(axis='y')
plt.legend(fontsize=15,loc=2)
从结果上看,方案1和3相当,方案2和4相当,也就是说,对于小尺度的Z观测的强耦合同化对于同化结果有明显效果,而强耦合同化X观测并没有多大作用。
接下来讨论跨成分局地化公式多大程度上影响了强耦合同化的效果。这里主要探讨XwZs的方案,即只对小尺度变量做强耦合。考察与公式1对应的其他跨成分局地化方案,公式1如下,
\begin{equation}\label{localPzx} P_{xz}=\frac{1}{J}(I_k\otimes\overrightarrow{\textbf{1}}_J)*P_{zz}=\frac{1}{J}\left[ \begin{matrix} \overrightarrow{\textbf{1}}_J & & \\ & ... & \\ & & \overrightarrow{\textbf{1}}_J \end{matrix} \right]P_{zz}. \end{equation}表示了z观测影响x变量的方式,其中${\textbf{1}}_J$代表了所有元素都是1的J维行向量。
对照方案如下,因为x变量和z变量的尺度不同,为计算局地化距离,假设x变量的位置是在J/2,与其计算距离并代入GC函数 相应的公式表达如下
\begin{equation}\label{alter} P_{xz}=(I_k\otimes\overrightarrow{v_{J/2}})*P_{zz}=\left[ \begin{matrix} \overrightarrow{v_{J/2}} & & \\ & ... & \\ & & \overrightarrow{v_{J/2}} \end{matrix} \right]P_{zz}. \end{equation}$\overrightarrow{v_{J/2}}$ 代表了J/2位置为1,其他位置为0的行向量
# In[expr: diff loc stategy]
if not os.path.exists(case_dir+'output/alter_loc.npz'):
RMSEL2 = np.zeros([len(Esize),3])
RMSES2 = np.zeros([len(Esize),3])
RMSSL2 = np.zeros([len(Esize),3])
RMSSS2 = np.zeros([len(Esize),3])
mCE2 = np.zeros([len(Esize),3])
strength_str = ['weakstr','weakdirect','weakweak']
for n in range(len(Esize)):
for k in range(3):
NFILESTR = np.load(case_dir+'output/results'+str(Esize[n])+strength_str[k]+'.npz')
Xassim = NFILESTR['arr_0'];Xspread = NFILESTR['arr_1']
RMSEL2[n,k] = eval_by_msRMSE(Xassim[:,range(K)],XtrueL)
RMSES2[n,k] = eval_by_msRMSE(Xassim[:,range(K,K*(J+1))],XtrueS)
RMSSL2[n,k] = eval_by_msRMSS(Xspread[:,range(K)],XtrueL)
RMSSS2[n,k] = eval_by_msRMSS(Xspread[:,range(K,K*(J+1))],XtrueS)
mCE2[n,k] = eval_by_CE(Xassim,Xtrue)
np.savez(case_dir+'output/alter_loc.npz',RMSEL2,RMSES2,RMSSL2,RMSSS2,mCE2)
else:
DATA_p = np.load(case_dir+'output/alter_loc.npz')
RMSEL2=DATA_p['arr_0'];RMSES2=DATA_p['arr_1'];RMSSL2=DATA_p['arr_2'];
RMSSS2=DATA_p['arr_3'];mCE2=DATA_p['arr_4']
for k in range(3):
RMSEL2[:,k]=np.sort(RMSEL2[:,k])[::-1]
RMSES2[:,k]=np.sort(RMSES2[:,k])[::-1]
RMSSL2[:,k]=np.sort(RMSSL2[:,k])[::-1]
RMSSS2[:,k]=np.sort(RMSSS2[:,k])[::-1]
mCE2[:,k]=np.sort(mCE2[:,k])
mCE2[:,k]=np.sort(mCE2[:,k])
# In[plt diff loc results]
plt.figure(figsize=(16,8))
plt.subplot(2,3,1)
plt.title('X-model',fontsize=18)
plt.bar(np.arange(5)-0.3,RMSEL2[1::,0],width=0.28,label='XwZs',facecolor='green')
plt.bar(np.arange(5),RMSEL2[1::,1],width=0.28,label="XwZs2",facecolor='grey')
plt.bar(np.arange(5)+0.3,RMSEL2[1::,2],width=0.28,label='XwZw',facecolor='blue')
plt.ylim(0.02,0.25);
plt.text(-0.25,0.255,'(a)',fontsize=18)
plt.yticks([0.05,0.10,0.15,0.20],fontsize=15)
plt.xticks(np.arange(5),['20','40','80','160','320'],fontsize=15)
#plt.xlabel('Ensemble size',fontsize=18)
plt.ylabel('MS-RMSE',fontsize=18)
plt.grid(axis='y')
#plt.legend(fontsize=15)
plt.tight_layout(h_pad=4,w_pad=8)
plt.subplot(2,3,2)
plt.title('X-model',fontsize=18)
plt.bar(np.arange(5)-0.3,RMSSL2[1::,0],width=0.28,label='XwZs',facecolor='green')
plt.bar(np.arange(5),RMSSL2[1::,1],width=0.28,label="XwZs2",facecolor='grey')
plt.bar(np.arange(5)+0.3,RMSSL2[1::,2],width=0.28,label="XwZw",facecolor='blue')
plt.ylim(0.02,0.25);
plt.text(-0.25,0.255,'(b)',fontsize=18)
plt.yticks([0.05,0.10,0.15,0.20],fontsize=15)
plt.xticks(np.arange(5),['20','40','80','160','320'],fontsize=15)
#plt.xlabel('Ensemble size',fontsize=18)
plt.ylabel('MS-RMSS',fontsize=18)
plt.grid(axis='y')
#plt.legend(fontsize=15)
plt.tight_layout(h_pad=4,w_pad=2)
plt.subplot(2,3,4)
plt.title('Z-model',fontsize=18)
plt.bar(np.arange(5)-0.3,RMSES2[1::,0],width=0.28,label='XwZs',facecolor='green')
plt.bar(np.arange(5),RMSES2[1::,1],width=0.28,label="XwZs2",facecolor='grey')
plt.bar(np.arange(5)+0.3,RMSES2[1::,2],width=0.28,label="XwZw",facecolor='blue')
plt.ylim(0.3,0.65);
plt.text(-0.25,0.66,'(c)',fontsize=18)
plt.yticks([0.35,0.4,0.45,0.5,0.55],fontsize=15)
plt.xticks(np.arange(5),['20','40','80','160','320'],fontsize=15)
#plt.xlabel('Ensemble size',fontsize=18)
plt.ylabel('MS-RMSE',fontsize=18)
plt.grid(axis='y')
plt.legend(fontsize=15)
plt.subplot(2,3,5)
plt.title('Z-model',fontsize=18)
plt.bar(np.arange(5)-0.3,RMSSS2[1::,0],width=0.28,label='XwZs',facecolor='green')
plt.bar(np.arange(5),RMSSS2[1::,1],width=0.28,label="XwZs2",facecolor='grey')
plt.bar(np.arange(5)+0.3,RMSSS2[1::,2],width=0.28,label="XwZw",facecolor='blue')
plt.ylim(0.3,0.65);
plt.text(-0.25,0.66,'(d)',fontsize=18)
plt.yticks([0.35,0.4,0.45,0.5,0.55],fontsize=15)
plt.xticks(np.arange(5),['20','40','80','160','320'],fontsize=15)
#plt.xlabel('Ensemble size',fontsize=18)
plt.ylabel('MS-RMSS',fontsize=18)
plt.grid(axis='y')
plt.legend(fontsize=15)
plt.subplot(1,3,3)
plt.title('Coupled System',fontsize=18)
plt.bar(np.arange(5)-0.3,mCE2[1::,0],width=0.28,label='XwZs',facecolor='green')
plt.bar(np.arange(5),mCE2[1::,1],width=0.28,label="XwZs2",facecolor='grey')
plt.bar(np.arange(5)+0.3,mCE2[1::,2],width=0.28,label="XwZw",facecolor='blue')
plt.ylim(0.94,0.985);
plt.text(-0.25,0.9855,'(e)',fontsize=18)
plt.yticks([0.95,0.96,0.97,0.98],fontsize=15)
plt.xticks(np.arange(5),['20','40','80','160','320'],fontsize=15)
plt.xlabel('Ensemble size',fontsize=18)
plt.ylabel('Mean CE',fontsize=18)
plt.grid(axis='y')
plt.legend(fontsize=15,loc=2)