Source code for stompy.io.qnc

from __future__ import print_function

from six import iteritems
import netCDF4
import os
import uuid
from .. import utils
import numpy as np
import collections
from scipy import interpolate
from scipy.signal import decimate

[docs]class QncException(Exception):
    pass

[docs]def to_str(s):
    if not isinstance(s,str) and isinstance(s,bytes):
        s=s.decode() # in case py3 and s is bytes
    return s

[docs]def sanitize_name(s):
    """
    make s suitable for use as a variable or dimension
    name
    """
    return to_str(s).replace(' ','_').replace('/','_')
    
[docs]def as_tuple(x):
    if isinstance(x,tuple):
        return x
    elif isinstance(x,list):
        return tuple(x)
    else:
        return (x,)

[docs]def anon_dim_name(size,**kws):
    """
    Name given to on-demand dimensions.
    kws: unused, but might include the type?
    """
    return 'd%d'%size

[docs]class QuickVar(object): # wraps netcdf variable
    # predefine QuickVar attributes for the sake of  __setattr__
    _nc=None
    _v=None
    _transpose=None
    # _converter=None
    def __init__(self,nc,v,transpose=None):
        self.__dict__['_nc']=nc
        self.__dict__['_v']=v
        # self.__dict__['_converter']=converter
        if transpose is None:
            transpose=range(len(v.dimensions))
        self.__dict__['_transpose']=transpose
    def __getitem__(self,k):
        """ the options are similar to indexing a record array,
        but strings refer to dimensions
        """

        # if k is a string or tuple of strings, transpose
        # and return a new quick var.
        # if k is a dict, no transposition, but map the values to
        # slices

        # otherwise delegate to var.

        # this makes list arguments and tuple arguments
        # appear the same, but in the spirit of following
        # numpy semantics, those should not be the same.
        if isinstance(k,dict):
            dims=self.dims
            slices=[ k.get(d,slice(None))
                     for d in self.dims ]
            k=tuple(slices)

        if not isinstance(k,tuple):
            k=(k,)

        k=tuple(to_str(ki) for ki in k)
        
        if isinstance(k[0],str):
            return self._transpose_by_names(k)
        else:
            myv=self._v

            # first, make k explicitly cover all dimensions
            #try:
            k=list(k) # make a list so we can modify it
            #except TypeError: # k just a single slice or index
            #    k=[k]

            for ki,kk in enumerate(k):
                if kk is Ellipsis:
                    expand_slc=slice(ki,ki+1)
                    expansion=[slice(None)]*(myv.ndim-(len(k)-1))
                    k[expand_slc] = expansion
                    break
            else:
                while len(k)< myv.ndim:
                    k.append( slice(None) )

            # k is still a set of slices on the transposed data
            untranspose=[ self._transpose.index(i) 
                          for i in range(myv.ndim) ]
            k_untransposed=[k[j] for j in untranspose]

            # retrieve the data, possibly having netcdf subset it
            # important to tuplify - different semantics than 
            # a list
            pulled=self._rich_take(myv,tuple(k_untransposed))

            # if none of the slices are a singleton, then we'd just 
            # apply our transpose and be done.
            # .. say the initial dims were [time,cell,z]
            #    and self._transpose=[1,0,2] # cell,time,z
            #    and k=[0,slice(None),slice(None)]
            # which means cell=0,all time, all z
            # then k_transposed=[slice(None),0,slice(None)]
            # pulled has dimensions of time,z
            # retranspose=[0,1]
            # compared to transpose which was [1,0,2]
            #  so of transpose, 
            retranspose=[i for i in self._transpose 
                         if (isinstance(k_untransposed[i],slice) or
                             isinstance(k_untransposed[i],collections.Iterable))]
            # and renumber via cheesy np trick
            if len(retranspose):
                retranspose=np.argsort(np.argsort(retranspose))
                return pulled.transpose(retranspose)
            else:
                return pulled
    def _rich_take(self,myv,idxs):
        # allow more relaxed semantics, in particular, grabbing an
        # out of order or repeated set of indices

        new_idxs=[] # introduce some mangling
        post_idxs=[] # then use this to undo mangling

        for idx in idxs:
            post=slice(None)

            if isinstance(idx, collections.Iterable):
                idx=np.asarray(idx)
                if ( idx.ndim!=1 or 
                     (len(idx)>1 and np.any(np.diff(idx)<=0)) ):
                    post=idx # have numpy do it.
                    idx=slice(None)
            new_idxs.append(idx)
            if not np.isscalar(idx):
                post_idxs.append(post)

        new_idxs=tuple(new_idxs)
        post_idxs=tuple(post_idxs)
        result=myv[new_idxs]
        if len(post_idxs):
            result=result[post_idxs] 
        return result
            
    def __setitem__(self,k,v):
        """
        limited support here - just nc.varname[slices]=value
        TODO: allow k to be a bitmask index.
        TODO: allow k to be dimensions, to transpose on the fly
        TODO: allow self to already have a transpose
        """
        myv=self._v
        # for now, no support for assigning into a transposed array
        if np.all( self._transpose == range(len(myv.dimensions)) ):
            # when k is a bitmask, this is super slow.
            if myv.size<1e6: # do it in python, in memory.
                value=myv[:]
                value[k]=v
                myv[:]=value
            else:
                myv[k]=v
        else:
            raise QncException("Tranpose is set - not ready for that")
        
    def __getattr__(self,attr):
        return getattr(self._v,attr)
    def __setattr__(self,attr,value):
        # ony pre-existing attributes are set on this object
        # all others are passed on to the real variable.
        if attr in self.__dict__:
            self.__dict__[attr]=value
        else:
            return setattr(self._v,attr,value)
    def _transpose_by_names(self,k):
        if isinstance(k,str):
            k=(k,)
        new_transpose=[self._v.dimensions.index(kk) for kk in k]
        return QuickVar(self._nc,self._v,new_transpose)
    def __len__(self):
        return len(self._v)
    @property
    def dims(self):
        return self.dimensions
    @property
    def dimensions(self):
        return self._v.dimensions

[docs]    def as_datenum(self):
        import nc_utils
        return nc_utils.cf_time_to_datenum(self._v)
            
[docs]class QDataset(netCDF4.Dataset):
    # 'varlen' will try to use variable length arrays to store strings
    # 'fixed' will add a strNN dimension, and strings are padded out to that size
    # 'varlen' may not be supported on as wide a range of library versions.
    # neither is well-tested at this point
    
    def __init__(self,*args,**kws):
        # seems that it has to be done this way, otherwise the setattr/getattr
        # overrides don't see it.
        super(QDataset,self).__init__(*args,**kws)
        self._set_string_mode('varlen') # 'fixed'
        self.__dict__['_auto_dates']=True

    def _set_string_mode(self,mode):
        self.__dict__['_string_mode']=mode

    def __getattr__(self,attr):
        # only called when attribute is not found in dict
        if attr in self.variables:
            return QuickVar(self,self.variables[attr])
        raise AttributeError(attr)

[docs]    class VarProxy(object):
        """ represents a variable about to be defined, but
        waiting for dimension names and data, via setattr.
        """
        def __init__(self,dataset,varname):
            self.dataset=dataset
            self.varname=to_str(varname)
        def __setitem__(self,dims,data):
            """ syntax for creating dimensions and variables at the
            same time.

            nc['var_name']['dim1','dim2','dim3']=np.array(...)
            """
            return self.create(dims,data)

[docs]        def create(self,dims,data,**kwargs):
            if self.varname in self.dataset.variables:
                print( "Trying to setitem on varname=%s"%(self.varname) )
                print( "Existing dataset state:" )
                self.dataset._dump()
                raise QncException("Can't create variable twice: %s"%self.varname)

            attrs=kwargs.get('attrs',{})

            data=np.asarray(data)
            # create dimensions on-demand:
            dims=as_tuple(dims)

            dims=tuple(to_str(di) for di in dims)
            
            if len(dims) and dims[-1]==Ellipsis:
                dims=dims[:-1] #drop the ellipsis
                # add new dimensions named for their size, according to the
                # trailing dimensions of data 
                extras=[anon_dim_name(size=n) for n in data.shape[len(dims):]]
                if extras:
                    dims=dims + tuple(extras)
                    #print "New dimensions: "
                    #print dims

            # A college try at dereferencing objects
            if data.dtype==object:
                if np.all( [isinstance(v,str) or isinstance(v,unicode) for v in data.ravel()] ):
                    data=data.astype('S')
            
            # and handle strings by also adding an extra generic dimension
            dtype_str=data.dtype.str
            # print "Dtype: ",dtype_str
            if dtype_str.startswith('|S') or dtype_str.startswith('S'):
                # print "It's a string!"
                slen=data.dtype.itemsize

                if slen>1 and self.dataset._string_mode=='fixed':
                    dims=dims + ("str%d"%slen,)
                    new_shape=data.shape + (slen,)
                    new_data=np.fromstring(data.tostring(),'S1').reshape(new_shape)
                    data=new_data
                    # print "New data dtype: ",data.dtype

            # get smart about np datetime64 values
            if self.dataset._auto_dates and (data.dtype.type == np.datetime64):
                # CF conventions can't deal with nanoseconds...
                # this will lose sub-second values - could try going with floats
                # instead??
                data=data.astype('M8[s]').astype(np.int64)
                # Assumes UTC
                attrs['units']='seconds since 1970-01-01 00:00:00'

            data_shape=list(data.shape)
            var_dtype=data.dtype

            for dim_idx,dim_name in enumerate(dims):
                self.dataset.add_dimension(dim_name,data.shape[dim_idx])

            variable=self.dataset.createVariable(self.varname,
                                                 var_dtype,
                                                 dims,**kwargs)
            variable[:]=data
            for k,v in iteritems(attrs):
                setattr(variable,k,v)

[docs]    def add_dimension(self,dim_name,length):
        """ 
        create dimension if it doesn't exist, otherwise check that
        the length requested matches what does exist.
        """
        if dim_name in self.dimensions:
            assert ( self.dimensions[dim_name].isunlimited() or
                     (len(self.dimensions[dim_name]) == length) )
        else:
            self.createDimension(dim_name,length)

    def __getitem__(self,k):
        if k in self.variables:
            return QuickVar(self,self.variables[k])
        else:
            return self.VarProxy(dataset=self,varname=k)

    def __setitem__(self,k,val):
        # shorthand for dimensionless variable 
        self[k][()]=val

    def __contains__(self,k):
        return k in self.variables
    
[docs]    def alias(self,**kwargs):
        """ had been just copying the variables.  But why not just 
        update self.variables?  This works even if not writing
        to the file.
        """
        for k,v in iteritems(kwargs):
            if 0: # deep copy:
                self[k][self.variables[v].dimensions] = self.variables[v][:]

                for attr_name in self.variables[v].ncattrs():
                    setattr(self.variables[k],attr_name,
                            getattr(self.variables[v],attr_name))
            else:
                self.variables[k]=self.variables[v]

[docs]    def interpolate_dimension(self,int_dim,int_var,new_coordinate,
                              max_gap=None,gap_fields=None,
                              int_mode='nearest'):
        """ 
        return a new dataset as a copy of this one, but with
        the given dimension interpolated according to varname=values
        
        typically this would be done to a list of datasets, after which
        they could be appended.

        it can also be used to collapse a 'dependent' coordinate into
        an independent coordinate - e.g. if depth bins are a function of
        time, this can be used to interpolate onto a constant depth axis,
        which will also remove the time dimension from that depth variable.

        max_gap: jumps in the source variable greater than max_gap are filled
        with nan (or -99 if int valued).  For now this is only supported when
        int_dim has just one dimension
        gap_fields: None, or a list of variable names to be masked based on gaps.

        int_mode: 
          'nearest' - grab the integer value from the nearest sample
          may add 'linear' in the future, which would cast to float
        """
        result=empty()

        int_ncvar=self.variables[int_var]
        if len(int_ncvar.dimensions)>1:
            print( "Will collapse %s"%int_var)
            if max_gap:
                raise QncException("max_gap not implemented for multi-dimensional variable")
        else:
            if max_gap:
                gapped=np.zeros(new_coordinate.shape,'b1')
                deltas=np.diff(int_ncvar[:])

                gap_idx=np.nonzero(deltas>max_gap)[0]
                for idx in gap_idx:
                    gap_left=int_ncvar[idx]
                    gap_right=int_ncvar[idx+1]
                    print( "Gap is %f - %f (delta %f)"%(gap_left,gap_right,gap_right-gap_left) )
                    to_mask=slice(*np.searchsorted( new_coordinate, [gap_left,gap_right] ))
                    gapped[to_mask]=True
                    
        for varname in self.variables.keys():
            dim_names=self.variables[varname].dimensions

            if int_dim in dim_names:
                merge_idx=dim_names.index(int_dim)
            else:
                merge_idx=None

            if varname==int_var:
                # takes care of matching the coordinate variable.  
                # but there will be trouble if there are multiple
                # coordinate variables and you try to concatenate
                # use int_dim here instead of dim_names, because we might
                # be collapsing the interpolant variable.
                result[varname][int_dim]=new_coordinate
            elif merge_idx is not None:
                # it's an array-valued variable, and includes 
                # the dimension over which we want to interpolate

                int_all_dims=self.variables[int_var].dimensions
                src_var=self.variables[varname]
                src_val=src_var[:]

                # masked values don't work so well with the 
                # interpolation:
                if isinstance(src_val,np.ma.core.MaskedArray):
                    print( "Filling masked src data" )
                    if 'int' in src_val.dtype.name:
                        src_val=src_val.filled(-1)
                    else:
                        src_val=src_val.filled(np.nan)

                if len(int_all_dims)==1:
                    if ('int' in src_val.dtype.name) and (int_mode=='nearest'):
                        def interper(coord):
                            idxs=utils.nearest(self.variables[int_var][:],coord)
                            slices=[slice(None)]*src_val.ndim
                            slices[merge_idx]=idxs
                            return src_val[slices]
                    else:
                        interper=interpolate.interp1d(self.variables[int_var][:],
                                                      src_val,
                                                      axis=merge_idx,
                                                      bounds_error=False,
                                                      assume_sorted=False)
                    new_val=interper(new_coordinate)

                    if max_gap and ( (gap_fields is None) or (varname in gap_fields)):
                        if 'int' in new_val.dtype.name:
                            new_val[gapped] = -99
                        else:
                            new_val[gapped] = np.nan
                    result[varname][dim_names]=new_val
                else:
                    # here's the tricky part when it comes to collapsing
                    # dimensions
                    # self.variables[int_var] - this is multidimensional
                    # basically we want to iterate over elements in 

                    # iterate over all other dimensions of the int_var
                    int_values=self.variables[int_var][:]
                    int_dim_idx=self.variables[int_var].dimensions.index(int_dim)

                    # preallocate the result, since we have to fill it in bit-by-bit
                    dest_shape=list(src_var.shape)
                    dest_shape[merge_idx] = len(new_coordinate)
                    dest=np.zeros(dest_shape,dtype=src_val.dtype)

                    # start with a stupid, slow way:
                    # Assume that there is one extra dimension and it's the first one.

                    # int_ncvar has multiple dimensions, i.e. depth ~ bin,time
                    # so there is an assumption here that some variable to be interpolated
                    # like u=u(bin,time)
                    # could there be something that depended on bin, but not time?
                    # there aren't any in the existing adcp data

                    for extra in range(int_ncvar.shape[0]):
                        interper=interpolate.interp1d(int_ncvar[extra,:],
                                                      src_val[extra,:],
                                                      axis=merge_idx-1, # account for extra
                                                      bounds_error=False,
                                                      assume_sorted=False)
                        dest[extra,:]=interper(new_coordinate)
                    result[varname][dim_names]=dest

            else: # non-dimensioned attributes here
                result[varname][dim_names]=self.variables[varname][:]
            self.copy_ncattrs_to(result)
        return result

[docs]    def copy(self,skip=[],fn=None,**create_args):
        """ make a deep copy of self, into a writable, diskless QDataset
        if fn is given, target is a netCDF file on disk.
        """
        if fn is not None:
            new=empty(fn,**create_args)
        else:
            new=empty()
        for dimname in self.dimensions.keys():
            if dimname not in skip:
                new.createDimension(dimname,len(self.dimensions[dimname]))
        for varname in self.variables.keys():
            if varname not in skip:
                ncvar=self.variables[varname]
                new[varname][ncvar.dimensions] = ncvar[:]
        self.copy_ncattrs_to(new)
        return new
[docs]    def copy_ncattrs_to(self,new):
        for varname in self.variables.keys():
            myvar=self.variables[varname]
            if varname not in new.variables:
                continue
            newvar = new.variables[varname]
            for attr in myvar.ncattrs():
                if attr != '_FillValue':
                    # _FillValue can only be set at var creation time
                    # This approach was failing on valid_range, 2017-03-17
                    # setattr(newvar,attr,getattr(myvar,attr))
                    newvar.setncattr(attr,getattr(myvar,attr))
[docs]    def select(self,**kwargs):
        new=empty()

        for varname in self.variables.keys():
            dim_names=self.variables[varname].dimensions
            if len(dim_names)==0:
                newvar=new.createVariable(varname,self.variables[varname].dtype,())
                newvar[...]=self.variables[varname][...]
            else:
                slices=[slice(None)]*len(dim_names)

                for slc_dim,slc_sel in iteritems(kwargs):
                    if slc_dim in dim_names:
                        slc_idx=dim_names.index(slc_dim)
                        slices[slc_idx]=slc_sel
                    else:
                        print( "slice dimension %s not in dim_names %s"%(slc_dim,dim_names) )

                new[varname][dim_names]=self.variables[varname][slices]
        self.copy_ncattrs_to(new)
        return new
[docs]    def within(self,**kwargs):
        selects={}
        for slc_varname,slc_range in iteritems(kwargs):
            slc_var=self.variables[slc_varname]
            assert( len(slc_var.dimensions)==1 )
            
            selects[slc_var.dimensions[0]]=utils.within(slc_var[:],slc_range,as_slice=True)
        return self.select(**selects)
    def _dump(self):
        print( self._desc() )

    def _desc(self):
        """ returns pretty printed description of dataset similar to 
        output of ncdump
        """
        lines=[ "%s %s {"%( self.file_format.lower(), "unknown_filename" ) ]
        lines.append( self._desc_dims() )
        lines.append( self._desc_vars() )
        lines.append( "}" )

        return "\n".join(lines)

    def _desc_dims(self):
        lines=["dimensions:"]
        for k,v in iteritems(self.dimensions):
            lines.append("    %s = %s ;"%(k,len(v) ))
        return "\n".join(lines)
    def _desc_vars(self,max_attr_len=20,max_n_attrs=7):
        lines=["variables:"]

        for k,v in iteritems(self.variables):
            try:
                typ=v.dtype.name
            except AttributeError:
                typ=str(v.dtype)
            lines.append( "    %s %s(%s) ;"%( typ, k, ",".join( v.dimensions )) )
            for attr_name in v.ncattrs()[:max_n_attrs]:
                a_val=getattr(v,attr_name)
                if isinstance(a_val,str) and len(a_val)>max_attr_len:
                    a_val = a_val[:max_attr_len] + "... [%d more bytes]"%(len(a_val)-max_attr_len)
                    a_val = '"' + a_val + '"'
                lines.append('         %s:%s = %s'%(k,attr_name,a_val))
            if len(v.ncattrs()) > max_n_attrs > 0:
                lines.append('         ... %d more'%(len(v.ncattrs())-max_n_attrs))
        return "\n".join(lines)

[docs]def empty(fn=None,overwrite=False,**kwargs):
    if fn is None:
        return QDataset(uuid.uuid1().hex,'w',diskless=True,**kwargs)
    else:
        if os.path.exists(fn):
            if overwrite:
                os.unlink(fn)
            else:
                raise QncException('File %s already exists'%fn)
        return QDataset(fn,'w',**kwargs)

[docs]def concatenate(ncs,cat_dim,skip=[],new_dim=None):
    """ ncs is an ordered list of QDataset objects
    If a single QDataset is given, it will be copied at the metadata
    level
    new_dim: if given, then fields not having cat_dim, but differing
     between datasets, will be concatenated along new_dim.

    for convenience, elements of gdms which are None are silently dropped
    """
    ncs=filter(None,ncs)
    N=len(ncs)
    if N==1:
        return ncs[0].copy()
    if N==0:
        return empty()
    
    result=empty()
    
    for varname in ncs[0].variables.keys():
        if varname in skip:
            continue

        dim_names=ncs[0].variables[varname].dimensions
    
        if cat_dim in dim_names:
            cat_idx=dim_names.index(cat_dim)
        else:
            cat_idx=None
            
        parts=[nc.variables[varname][:] for nc in ncs]
        if cat_idx is not None:
            result[varname][dim_names]=np.concatenate(parts,axis=cat_idx)
        else:
            constant=True
            for n in range(1,N):
                if np.any(parts[0]!=parts[n]):
                    constant=False
                    break

            if not constant:
                if new_dim is None:
                    raise QncException("Non-concatenated variable %s "\
                                    "does not match %s != %s"%(varname,
                                                               parts[0],
                                                               parts[n]))
                else:
                    print( "Variable values of %s will go into new dimension %s"%(varname,
                                                                                  new_dim) )
                    result[varname][ [new_dim]+list(dim_names) ]=np.array(parts)
            else:
                result[varname][dim_names]=parts[0]

    # attrs are copied from first element
    ncs[0].copy_ncattrs_to(result)
    return result
    


# Functional manipulations of QDataset:

[docs]def downsample(ds,dim,stride,lowpass=True):
    """ Lowpass variables along the given dimension, and resample
    at the given stride.
    lowpass=False   => decimate, no lowpass
    lowpass=<float> => lowpass window size is lowpass*stride
    """
    lowpass=float(lowpass)
    winsize=int(lowpass*stride)

    new=empty()

    for var_name in ds.variables:
        ncvar=ds.variables[var_name]
        val=ncvar[:]
        if dim in ncvar.dimensions:
            dim_idx=ncvar.dimensions.index(dim)
            # should be possible to use the faster scipy way,
            # but it is having issues with setting nan's in the
            # output - maybe something is getting truncated or 
            # reshaped??
            if True: # lowpass!=1: # older, slower way:
                if lowpass:
                    import lp_filter
                    val_lp=lp_filter.lowpass_fir(val,winsize,axis=dim_idx,nan_weight_threshold=0.5)
                else:
                    val_lp=val
                slcs=[slice(None)]*len(ncvar.dimensions)
                slcs[dim_idx]=slice(None,None,stride)
                val=val_lp[slcs]
            else: # scipy.signal way:
                kws=dict(q=stride,ftype='fir',axis=dim_idx)
                val_valid=decimate(np.isfinite(val).astype('f4'),**kws)
                val=decimate(val,**kws)
                val[val_valid<0.1] = np.nan
                new[var_name+'_msk'][ncvar.dimensions]=val_valid # DBG

        new[var_name][ncvar.dimensions] = val

    ds.copy_ncattrs_to(new)
    return new

[docs]def mat_to_nc(mat,dim_map={},autosqueeze=True):
    nc=empty()

    length_to_dim={}

    if autosqueeze:
        sq=np.squeeze
    else:
        sq=lambda x: x

    for k,v in iteritems( dim_map ):
        if isinstance(v,str):
            v=[v]

        if k in mat:
            x=sq(mat[k])
            if x.ndim != len(v):
                raise QncException("dim_map: field %s - %s doesn't match with %s"%(k,v,x.shape))
            for dim_name,size in zip(v,x.shape):
                length_to_dim[size]=dim_name
    for k,v in iteritems(mat):
        if k.startswith('_'):
            print( "Skipping %s"%k)
            continue
        v=sq(v)
        if not isinstance(v,np.ndarray):
            print( "Don't know how to deal with variable of type %s"%str(type(v)))
            continue
        if v.ndim==0:
            setattr(nc,k,v.item())
            continue
        if k in dim_map:
            dims=dim_map[k]
        else:
            dims=[]
            for size in v.shape:
                if size not in length_to_dim:
                    length_to_dim[size]='d%d'%size
                dims.append(length_to_dim[size])
        # special handling for some datatypes:
        if v.dtype == np.dtype('O'):
            # sometimes this is just ragged array
            # for some reason mat files can have a 3-dimensional variable reported
            # as an array of 2-d arrays.
            # TODO
            print( "%s is not a simple array. Skipping"%k )
            continue
        nc[k][dims]=v
    return nc



[docs]def linear_to_orthogonal_nc(nc_src,lin_dim,ortho_dims,nc_dst=None):
    """ 
    copy a dataset, changing a linear dimension into a pair of orthogonal
    dimensions
    """
    if isinstance(nc_src,str):
        nc=qnc.QDataset(nc_src)
    else:
        nc=nc_src
        
    if nc_dst is None:
        nc2=qnc.empty()
    elif isinstance(nc_dst,str):
        nc2=qnc.empty(fn=nc_dst)
    else:
        nc2=nc_dst

    ortho_coords=[np.unique(nc.variables[d][:]) for d in ortho_dims]
    ortho_shape =[len(oc) for oc in ortho_coords]
    map1=np.searchsorted(ortho_coords[0],nc.variables[ortho_dims[0]][:])
    map2=np.searchsorted(ortho_coords[1],nc.variables[ortho_dims[1]][:])

    for d in ortho_dims:
        vals=np.unique(nc.variables[d][:])
        nc2[d][d]=vals
        nc2.variables
    # nc2.set_coords(ortho_dims)

    for v in nc.variables:
        print("Processing %s"%v)
        if v in ortho_dims: 
            continue
        create_kwargs={}
        if '_FillValue' in nc.variables[v].ncattrs():
            create_kwargs['fill_value']=nc.variables[v]._FillValue
            
        if lin_dim in nc.variables[v].dimensions:
            old_val=nc.variables[v][:]
            new_dims=[] ; new_shape=[] ; new_slices=[]
            for d in nc.variables[v].dimensions:
                if d==lin_dim:
                    new_dims+=ortho_dims
                    new_shape+=ortho_shape
                    new_slices+=[map1,map2]
                else:
                    new_dims.append(d)
                    new_shape.append(len(nc.variables[d]))
                    new_slices+=[slice(None)]

            new_val=np.zeros(new_shape,old_val.dtype)
            new_val[:] = np.nan
            new_val[tuple(new_slices)] = old_val

            # dims=[nc2[d] for d in new_dims]
            nc2[v].create(new_dims,new_val,**create_kwargs)
        else:
            #print "COPY",v,nc[v].dims
            data=nc.variables[v][:]
            if data.dtype==object:
                data=data.astype('S')
            nc2[v].create(nc.variables[v].dimensions,data,**create_kwargs)

        for att in nc.variables[v].ncattrs():
            if att == '_FillValue': # can't be added after the fact
                continue
            setattr(nc2.variables[v],att,getattr(nc.variables[v],att))
            
    if isinstance(nc_src,str):
        nc.close()
    return nc2

[docs]def ortho_to_transect_nc(src_nc,src_x_var,src_y_var,transect_xy,dst_nc=None):
    """ Extract a transect to a new dataset
    """
    if isinstance(src_nc,str):
        src_nc=qnc.QDataset(src_nc)
        close_src=True
    else:
        close_src=False
        
    if dst_nc is None:
        dst_nc=qnc.empty()
    elif isinstance(dst_nc,str):
        dst_nc=qnc.empty(fn=dst_nc)
    else:
        pass
    
    src_xy=np.array( [src_nc.variables[src_x_var][:],src_nc.variables[src_y_var][:]] ).T

    elti_sel=[]
    for xy in transect_xy:
        dists=utils.dist(xy,src_xy)
        elti_sel.append(np.argmin(dists))
    elti_sel=np.array(elti_sel)    

    station_dim=src_nc.variables['x'].dimensions[0]
    print("Station dim inferred to be %s"%station_dim)        

    # extract profile data from the original netcdf, building a new xarray
    for field in src_nc.variables:
        print(field,end=" ")
        fvar=src_nc[field]
        if station_dim in fvar.dims:
            dst_nc[field][fvar.dims]=fvar[{station_dim:elti_sel}]
        else:
            dst_nc[field][fvar.dims]=fvar[:]
    print()

    # and derived coordinates
    dst_nc['distance'][station_dim]=utils.dist_along(dst_nc[src_x_var][:],
                                                     dst_nc[src_y_var][:])
    # trans=trans.set_coords(['distance'])
    return dst_nc


# enhancements:
#   read some local config file, and remap variables and units
#   to the local user's preferences.
#   this would include translating time variables to a common standard,
#   adding aliases like 'u'->'east_vel', with rules based on a combination
#   of standard names, long names, units, and variable names.
Source code for stompy.io.qnc

stompy

Navigation

Related Topics