Source code for stompy.io.match_datasets

"""
Venturing into generic code to match two datasets.  

Not remotely generic at this point, and makes some assumptions
about dimensions, depth, time, etc.

"""
import numpy as np
import xarray as xr
from scipy.spatial import kdtree

from .. import utils


[docs]class MatchVarsCruise(object): def __init__(self,varA,varB,B_type): """ Building on the development in ~/notebooks/nitrogen_budgets/sfbay_din/ Callable instance which takes a variable with the same shape/dimenions as varB, and returns a variable of the shape/dims of varA. """ new_coords={} #---- Time! mapBtime_to_A = np.searchsorted( varB.time.values, varA.time.values ) if 1: mapBtime_to_A=np.ma.array( mapBtime_to_A, mask=utils.isnat(varA.time.values) ) new_coords['time']= xr.DataArray( varB.time.values[mapBtime_to_A], dims=varA.time.dims) #---- Stations: A_xy=np.array( [varA.x, varA.y] ).T if B_type=='hist': B_xy=np.array( [varB.element_x,varB.element_y] ).T elif B_type=='map': B_xy=np.array( [varB.FlowElem_xcc,varB.FlowElem_ycc] ).T # in the case of hist files, some history output isn't tied to a spatial element # (element ids come from a convention in waq_scenario), and those elements will # have nan coordinates valid=np.isfinite(B_xy[:,0]) kdt=kdtree.KDTree(B_xy[valid]) dists,valid_indices = kdt.query(A_xy) # print("Distance from observed locations to model output: ",dists) all_indices= np.arange(B_xy.shape[0])[valid][valid_indices] mapBstn_to_A=all_indices new_coords['x'] = xr.DataArray(B_xy[mapBstn_to_A,0],dims=['Distance_from_station_36']) new_coords['y'] = xr.DataArray(B_xy[mapBstn_to_A,1],dims=['Distance_from_station_36']) #---- Layers: A_depth=varA.depth # fully 3D, all values present B_depth=varB.localdepth # fully 3D, lots missing mapBdepth_to_A=np.zeros(varA.shape,'i4') mask=np.zeros(varA.shape,'b1') # This set of loops is painful slow for idx0 in range(varA.shape[0]): for idx1 in range(varA.shape[1]): # gets tricky with generalizing here # varA.time.dims => ('date', 'Distance_from_station_36', 'prof_sample') # varA.Distance_from_station_36.dims => 'Distance_from_station_36' for idx2 in range(varA.shape[2]): Bidx0=mapBtime_to_A[idx0,idx1,idx2] masked=mapBtime_to_A.mask[idx0,idx1,idx2] Bidx1=mapBstn_to_A[idx1] # could be moved out if masked: mask[idx0,idx1,idx2]=True continue this_A_depth =A_depth[idx0,idx1,idx2] these_B_depths=B_depth[Bidx0,Bidx1,:] valid=np.isfinite(these_B_depths) idx_valid=np.searchsorted(these_B_depths[valid],this_A_depth) idx_valid=idx_valid.clip(0,len(these_B_depths)-1) idx=np.arange(len(these_B_depths))[idx_valid] mapBdepth_to_A[idx0,idx1,idx2]=idx mapBdepth_to_A=np.ma.array(mapBdepth_to_A,mask=mask) #---- Extract depth for a coordinate new_depths=B_depth.values[mapBtime_to_A, mapBstn_to_A[None,:,None], mapBdepth_to_A] new_coords['depth']= xr.DataArray( np.ma.array(new_depths,mask=mask), dims=varA.dims) # save the mapping info self.mask=mask self.new_coords=new_coords self.map_time=mapBtime_to_A self.map_station=mapBstn_to_A self.map_depth=mapBdepth_to_A self.varA=varA # important to pass these in as default args to establish a robust # binding def __call__(self,varB): #---- Extract the actual analyte newBvalues=varB.values[ self.map_time, self.map_station[None,:,None], self.map_depth ] newBvalues=np.ma.array(newBvalues,mask=self.mask) # the primary dimensions are copied from A Bcoords=[ (d,self.varA[d]) for d in self.varA.dims ] newB=xr.DataArray(newBvalues, dims=self.varA.dims, coords=Bcoords) # additionaly coordinate information reflects the original times/locations # of the data newB=newB.assign_coords(**self.new_coords) return newB