Source code for stompy.model.delft.io

import os
import glob
import subprocess

import copy
import datetime
import io # python io.
import numpy as np
import pandas as pd
import re
import xarray as xr
import six
from collections import defaultdict
from shapely import geometry

import logging

log=logging.getLogger('delft.io')

from ... import utils

[docs]def parse_his_file(fn):
    """
    you probably want mon_his_file_dataframe() or bal_his_file_dataframe()
    --
    parse mixed ascii/binary history files as output by delwaq.
    applies to both monitoring output and balance output.

    returns tuple:
      sim_descs - descriptive text from inp file.
      time0 - text line giving time origin and units
      regions - names of regions with numeric index (1-based as read from file)
      fields - names of fields, separate into substance and process
      frames - actual data, and timestamps
    """
    fp=open(fn,'rb')

    sim_descs=np.fromfile(fp,'S40',4)
    time0=sim_descs[3]

    n_fields,n_regions=np.fromfile(fp,'i4',2)

    fdtype=np.dtype( [ ('sub','S10'),
                       ('proc','S10') ] )

    fields=np.fromfile( fp, fdtype, n_fields)

    regions=np.fromfile(fp,
                        [('num','i4'),('name','S20')],
                        n_regions)

    # assume that data is 'f4'
    # following other Delft output, probably each frame is prepended by
    # 'i4' time index
    frame_dtype=np.dtype( [('tsec','i4'),
                           ('data','f4',(n_regions,n_fields))] )
    frames=np.fromfile(fp,frame_dtype)

    return sim_descs,time0,regions,fields,frames

[docs]def bal_his_file_dataframe(fn):
    sim_descs,time0,regions,fields,frames = parse_his_file(fn)

    n_regions=len(regions)
    n_fields=len(fields)
    cols=[]
    tuples=[]

    for ri,region in enumerate(regions):
        for fi,field in enumerate(fields):
            tuples.append( (region['name'].strip(),
                            field['sub'].strip(),
                            field['proc'].strip()) )

    col_index=pd.MultiIndex.from_tuples(tuples,names=('region','sub','proc'))
    df=pd.DataFrame(data=frames['data'].reshape( (-1,n_regions*n_fields) ),
                    index=frames['tsec'],
                    columns=col_index)
    return df


[docs]def his_file_xarray(fn,region_exclude=None,region_include=None):
    """
    Read a delwaq balance file, return the result as an xarray.
    region_exclude: regular expression for region names to omit from the result
    region_include: regular expression for region names to include.

    Defaults to returning all regions.
    """
    sim_descs,time_meta,regions,fields,frames = parse_his_file(fn)

    def decstrip(s):
        try:
            s=s.decode() # in case binary
        except AttributeError:
            pass
        return s.strip()

    ds=xr.Dataset()

    ds['descs']=( ('n_desc',), [decstrip(s) for s in sim_descs])

    time0,time_unit = parse_time0(time_meta)
    times=time0 + time_unit*frames['tsec']
    ds['time']=( ('time',), times)
    ds['tsec']=( ('time',), frames['tsec'])

    region_names=[decstrip(s) for s in regions['name']]
    subs=[decstrip(s) for s in np.unique(fields['sub'])]
    procs=[decstrip(s) for s in np.unique(fields['proc'])]

    if region_include:
        region_mask=np.array( [bool(re.match(region_include,region))
                               for region in region_names] )
    else:
        region_mask=np.ones(len(region_names),np.bool8)

    if region_exclude:
        skip=[bool(re.match(region_exclude,region))
              for region in region_names]
        region_mask &= ~np.array(skip)

    sub_proc=[]
    for s,p in fields:
        if decstrip(p):
            sub_proc.append("%s,%s"%(decstrip(s),decstrip(p)))
        else:
            sub_proc.append(decstrip(s))

    region_idxs=np.nonzero(region_mask)[0]
    ds['region']=( ('region',), [region_names[i] for i in region_idxs] )
    ds['sub']  =( ('sub',), subs)
    ds['proc'] =( ('proc',), procs)
    ds['field']=( ('field',), sub_proc)

    ds['bal']=( ('time','region','field'),
                frames['data'][:,region_mask,:] )
    return ds

# older name - xarray version doesn't discriminate between balance
# and monitoring output
bal_his_file_xarray=his_file_xarray

[docs]def mon_his_file_dataframe(fn):
    df=bal_his_file_dataframe(fn)
    df.columns=df.columns.droplevel(2) # drop process level
    return df


[docs]def inp_tok(fp,comment=';'):
    # tokenizer for parsing rules of delwaq inp file.
    # parses either single-quoted strings, or space-delimited literals.
    for line in fp:
        if comment in line:
            line=line[ : line.index(comment)]
        # pattern had been
        # r'\s*((\'[^\']+\')|([/:-a-zA-Z_#0-9\.]+))'
        # but that has a bad dash before a, and doesn't permit +, either.
        matches=re.findall(r'\s*((\'[^\']+\')|([-/:a-zA-Z_#+0-9\.]+))', line)
        for m in matches:
            yield m[0]


[docs]def inp_tok_include(fp,fn,**kw):
    """
    Wrap inp_tok and handle INCLUDE tokens transparently.
    Note that also requires the filename
    """
    tokr=inp_tok(fp,**kw)

    while 1:
        tok=next(tokr)
        if tok.upper()!='INCLUDE':
            yield tok
        else:
            inc_fn=next(tokr)
            if inc_fn[0] in ["'",'"']:
                inc_fn=inc_fn.strip(inc_fn[0])
            inc_path=os.path.join( os.path.dirname(fn),
                                   inc_fn )
            # print("Will include %s"%inc_path)

            with open(inc_path,'rt') as inc_fp:
                inc_tokr=inp_tok_include(inc_fp,inc_path,**kw)
                for tok in inc_tokr:
                    yield tok
            # print("Done with include")


[docs]def parse_inp_monitor_locations(inp_file):
    """
    returns areas[name]=>[seg1,...] , transects[name]=>[+-exch1, ...]
    ONE-BASED return values.
    """
    with open(inp_file,'rt') as fp:
        tokr=inp_tok(fp)

        while next(tokr)!='#1':
            continue
        for _ in range(4):  # clock/date formats, integration float
            next(tokr)
        for t in tokr:
            if re.match(r'[-_a-zA-Z]+',t):
                continue
            break
        # t is now start timestep
        for _ in range(3):
            next(tokr) # stop, time step time step
        areas={}
        if int(next(tokr)) == 1: # monitoring points used
            nmon = int(next(tokr))
            for imon in range(nmon):
                name, segcount=next(tokr),int(next(tokr))
                segs=[int(next(tokr)) for iseg in range(segcount)]
                areas[name.strip("'")]=segs
        transects={} # name => list of signed, 1-based exchanges
        if int(next(tokr)) == 1: # transects used
            ntrans=int(next(tokr))
            for itrans in range(ntrans):
                name,style,ecount = next(tokr),next(tokr),int(next(tokr))
                exchs=[int(next(tokr)) for _ in range(ecount)]
                transects[name.strip("'")]=exchs
    return areas,transects

[docs]def parse_inp_transects(inp_file):
    # with open(inp_file,'rt') as fp:
    #     tokr=inp_tok(fp)
    #
    #     while next(tokr)!='#1':
    #         continue
    #     for _ in range(4):  # clock/date formats, integration float
    #         next(tokr)
    #     for t in tokr:
    #         if re.match(r'[-_a-zA-Z]+',t):
    #             continue
    #         break
    #     # t is now start timestep
    #     for _ in range(3):
    #         next(tokr) # stop, time step time step
    #     if int(next(tokr)) == 1: # monitoring points used
    #         nmon = int(next(tokr))
    #         for imon in range(nmon):
    #             name, segcount=next(tokr),int(next(tokr))
    #             for iseg in range(segcount):
    #                 next(tokr)
    #     transects={} # name => list of signed, 1-based exchanges
    #     if int(next(tokr)) == 1: # transects used
    #         ntrans=int(next(tokr))
    #         for itrans in range(ntrans):
    #             name,style,ecount = next(tokr),next(tokr),int(next(tokr))
    #             exchs=[int(next(tokr)) for _ in range(ecount)]
    #             transects[name.strip("'")]=exchs

    areas,transects=parse_inp_monitor_locations(inp_file)

    return transects

[docs]def parse_time0(time0):
    """ return a np.datetime64 for the time stamp, and the time unit in seconds
    (almost always equal to 1 second)
    input format is: b'T0: 2012/08/07-00:00:00  (scu=       1s)'
    """
    try:
        time0=time0.decode()
    except AttributeError:
        pass

    m=re.match(r'T0:\s+(\S+)\s+\(scu=\s*(\d+)(\w+)\)',time0)
    dt = m.group(1)
    # make it clear it's UTC:
    dt=dt.replace('-','T').replace('/','-') + "Z"
    origin=np.datetime64(dt)
    unit=np.timedelta64(int(m.group(2)),m.group(3))

    return (origin, unit)


# just a start.  And really this stuff should be rolled into the Scenario
# class, so it builds up a Scenario
[docs]def parse_boundary_conditions(inp_file):
    """
    Parse section 5 of DWAQ input file.
    Returns bcs,items
    bcs: BC links
    items: match data and bc links.
     - strings are folded to lowercase
    """
    def dequote(s):
        s=s.strip()
        if s[0] in ['"',"'"]:
            s=s.strip(s[0])
        return s
    with open(inp_file,'rt') as fp:
        tokr=inp_tok_include(fp,inp_file)

        while next(tokr)!='#4':
            continue

        bcs=[]
        while 1:
            tok = next(tokr)
            if tok[0] in "-0123456789":
                thatcher = int(tok)
                break
            else:
                bc_id=dequote(tok)
                bc_typ=dequote(next(tokr))
                bc_grp=dequote(next(tokr))
                bcs.append( (bc_id,bc_typ,bc_grp) )

        if thatcher==0: # no lags
            pass
        else:
            assert False,"Parsing Thatcher-Harleman lags not yet implemented"

        # The actual items are not yet implemented -- this is where
        # the inp file would assign concentrations or fluxes to
        # specific boundary exchanges are groups defined above
        bc_items=[]

        tok=next(tokr)
        while 1: # iterate over BC blocks
            defs=[]
            while 1: # iterate over the 3 subparts of a block
                if tok.upper()=='ITEM':
                    # Read names of BC items, which could also be integers
                    item_block=[]
                    while 1:
                        tok=next(tokr)
                        if tok[0] in ["'",'"']:
                            item_block.append(dequote(tok).lower())
                            continue
                        elif tok[0] in "0123456789":
                            item_block.append(int(tok))
                        else:
                            break
                    defs.append( ('item',item_block) )
                elif tok.upper()=='CONCENTRATION':
                    # Read the names of scalars
                    # Read names of BC items, which could also be integers
                    conc_block=[]
                    while 1:
                        tok=next(tokr)
                        if tok.upper() not in ['DATA','ITEM']:
                            conc_block.append(dequote(tok).lower())
                            continue
                        else:
                            break
                    defs.append( ('concentration',conc_block) )
                elif tok.upper()=='DATA':
                    matrix=np.zeros( ( len(defs[0][1]),
                                       len(defs[1][1]) ), 'f8')
                    for row_i,row in enumerate(defs[0][1]):
                        for col_i,col in enumerate(defs[1][1]):
                            matrix[row_i,col_i]=float(next(tokr))
                    defs.append( ('data',matrix) )
                    tok=next(tokr)
                else:
                    break # must not have been a BC block
            if len(defs)==0:
                assert tok=='#5'
                break # great - not a block
            elif len(defs)==3:
                bc_items.append(defs)
            else:
                assert False,"Incomplete BC block"

    return bcs,bc_items

[docs]def pli_to_shp(pli_fn,shp_fn,overwrite=False):
    from shapely import geometry
    from ...spatial import wkb2shp

    feats=read_pli(pli_fn)
    def clean_pnts(pnts):
        if pnts.shape[0]==1:
            pnts=np.concatenate( [pnts,pnts])
        return pnts
    geoms=[ geometry.LineString(clean_pnts(feat[1]))
            for feat in feats ]
    names=[ feat[0] for feat in feats ]
    wkb2shp.wkb2shp(shp_fn,geoms,fields=dict(name=names),
                    overwrite=overwrite)

[docs]def read_pli(fn,one_per_line=True):
    """
    Parse a polyline file a la DFM inputs.
    Return a list of features:
    [  (feature_label, N*M values, N labels), ... ]
    where the first two columns are typically x and y, but there may be
    more columns depending on the usage.  If no labels are in the file,
    the list of labels will be all empty strings.

    Generally assumes that the file is honest about the number of fields,
    but some files (like boundary condition pli) will add a text label for 
    each node.

    one_per_line: for files which add a label to each node but say nothing of 
      this in the number of fields, one_per_line=True will assume that each line
      of the text file has exactly one node, and any extra text becomes the label.
    """
    features=[]

    with open(fn,'rt') as fp:
        if not one_per_line:
            toker=inp_tok(fp)
            token=lambda: six.next(toker)

            while True:
                try:
                    label=token()
                except StopIteration:
                    break
                nrows=int(token())
                ncols=int(token())
                geometry=[]
                node_labels=[]
                for row in range(nrows):
                    rec=[float(token()) for c in range(ncols)]
                    geometry.append(rec)
                    node_labels.append("") 
                features.append( (label, np.array(geometry), node_labels) )
        else: # line-oriented approach which can handle unannounced node labels
            while True:
                label=fp.readline().strip()
                if label=="":
                    break
                nrows,ncols = [int(s) for s in fp.readline().split()]
                geometry=[]
                node_labels=[]
                for row in range(nrows):
                    values=fp.readline().strip().split(None,ncols+1)
                    geometry.append( [float(s) for s in values[:ncols]] )
                    if len(values)>ncols:
                        node_labels.append(values[ncols])
                    else:
                        node_labels.append("")
                features.append( (label, np.array(geometry), node_labels) )
    return features

[docs]def write_pli(file_like,pli_data):
    """
    Reverse of read_pli.
    file_like: a string giving the name of a file to be opened (clobbering
    an existing file), or a file-like object.
    pli_data: [ (label, N*M values, [optional N labels]), ... ]
    typically first two values of each row are x and y, and the rest depend on intended
    usage of the file
    """
    if hasattr(file_like,'write'):
        fp=file_like
        do_close=False
    else:
        fp=open(file_like,'wt')
        do_close=True

    try:
        for feature in pli_data:
            label,data = feature[:2]
            data=np.asanyarray(data)
            if len(feature)==3:
                node_labels=feature[2]
            else:
                node_labels=[""]*len(data)

            fp.write("%s\n"%label)
            fp.write("     %d     %d\n"%data.shape)
            if len(data) != len(node_labels):
                raise Exception("%d nodes, but there are %d node labels"%(len(data),
                                                                          len(node_labels)))
            block="\n".join( [ "  ".join(["%15s"%d for d in row]) + "   " + node_label
                               for row,node_label in zip(data,node_labels)] )
            fp.write(block)
            fp.write("\n")
    finally:
        if do_close:
            fp.close()

[docs]def grid_to_pli_data(g,node_fields,labeler=None):
    """
    UnstructuredGrid => PLI translation
    translate the edges of g into a list of features as returned
    by read_pli()
    features are extracted as contiguous linestrings, as long as possible.
    node_fields is a list giving a subset of the grid's node fields to
    be written out, in addition to x and y.
    labeler: leave as None to get Lnnn style labels.  Otherwise, a function
       which takes the index, and returns a string for the label.
    """
    strings=g.extract_linear_strings()

    features=[]

    labeler=labeler or (lambda i: "L%04d"%i)

    for feat_i,nodes in enumerate(strings):
        label=labeler(feat_i)

        cols=[ g.nodes['x'][nodes,0], g.nodes['x'][nodes,1] ]

        for fld in node_fields:
            cols.append( g.nodes[fld][nodes] )
        feature=np.array( cols ).T
        features.append( (label,feature) )
    return features

[docs]def add_suffix_to_feature(feat,suffix):
    """
    Utility method, takes a feature as returned by read_pli
    (name,
     [ [x0,y0],[x1,y1],...],
     { [node_label0,node_label1,...] }  # optional
    )

    and adds a suffix to the name of the feature and the
    names of nodes if they exist
    """
    name=feat[0]
    suffize=lambda s: s.replace(name,name+suffix)
    feat_suffix=[suffize(feat[0]), feat[1]] # points stay the same
    if len(feat)==3: # includes names for nodes
        feat_suffix.append( [suffize(s) for s in feat[2]] )
    return feat_suffix


[docs]def pli_to_grid_edges(g,levees):
    """
    g: UnstructuredGrid
    levees: polylines in the format returned by stompy.model.delft.io.read_pli,
    i.e. a list of features
    [ 
      [ 'name', 
        [ [x,y,z,...],...], 
        ['node0',...]
      ], ... 
    ]

    returns an array of length g.Nedges(), with z values from those features
    mapped onto edges. when multiple z values map to the same grid edge, the 
    minimum value is used.
    grid edges which do not have a levee edge get nan.
    """
    poly=g.boundary_polygon()

    # The dual additionally allows picking out edges 
    gd=g.create_dual(center='centroid',create_cells=False,remove_disconnected=False,
                     remove_1d=False)

    levee_de=np.nan*np.zeros(g.Nedges())

    for levee in utils.progress(levees,msg="Levees: %s"):
        # levee: [name, Nx{x,y,z,l,r}, {labels}]
        xyz=levee[1][:,:3]
        # having shapely check the intersection is 100x
        # faster than using select_cells_nearest(inside=True)
        ls=geometry.LineString(xyz[:,:2])
        if not poly.intersects(ls): continue

        # clip the edges to get some speedup
        xxyy=[xyz[:,0].min(),
              xyz[:,0].max(),
              xyz[:,1].min(),
              xyz[:,1].max()]
        edge_mask=gd.edge_clip_mask(xxyy,ends=True)

        # edges that make up the snapped line
        gde=gd.select_edges_intersecting(ls,mask=edge_mask)
        gde=np.nonzero(gde)[0]
        if len(gde)==0:
            continue
        # map the dual edge indexes back to the original grid
        ge=gd.edges['dual_edge'][gde]

        # print("Got a live one!")

        # check for closed ring:
        closed=np.all( xyz[-1,:2]==xyz[0,:2] )
        dists=utils.dist_along(xyz[:,:2])

        for j in ge:
            n1,n2=g.edges['nodes'][j]
            l1=np.argmin( utils.dist(g.nodes['x'][n1] - xyz[:,:2] ) )
            l2=np.argmin( utils.dist(g.nodes['x'][n2] - xyz[:,:2] ) )
            if l1>l2:
                l1,l2=l2,l1
            zs=xyz[l1:l2+1,2]

            if closed:
                # allow for possibility that the levee is a closed ring
                # and this grid edge actually straddles the end,
                dist_fwd=dists[l2]-dists[l1]
                dist_rev=dists[-1] - dist_fwd
                if dist_rev<dist_fwd:
                    print("wraparound")
                    zs=np.concatenate( [xyz[l2:,2],
                                        xyz[:l1,2]] )

            z=zs.min() 
            levee_de[j]=z
    return levee_de


[docs]def create_restart(res_fn, map_fn, hyd, state_vars = None, map_net_cdf = False, extr_time = None,
                   start_time = None):
    """ 
    Create a restart file using an exisiting map file and a user defined 
    time. 
    
    res_fn: path/file name of the restart file 
    map_fn: provide either a binary or net CDF file. Default is the raw binary output. 
    hyd: waq_scenario.Hydro() object
    state_vars: an array that provides the state variables used in the original 
    model run. Name must match the information written to map file
    map_net_cdf: set this flag to True if providing net CDF output that has 
        already been created by read_map 
    extr_time: a date string that will be converted internally to a 
        numpy datetime object.If not provided, will use the last time step of the 
        mapfile 
    start_time: a date string that will be converted internally to a numpy
        datetime object. If not provided will be set as hydro.

    [author: Pradeep Mugunthan]
    """
    # Identify whether read_map call is needed 
    if not map_net_cdf:
        ds = read_map(map_fn, hyd)
    
    # infer n_subs and n_segs
    if state_vars is None: 
        state_vars = ds.sub.values

    n_subs = len(state_vars)
    
    n_segs = np.int32(len(ds.layer)*len(ds.face))
    
    # convert time to dt 
    if extr_time is not None:
        dt = np.datetime64(extr_time)
    else:
        dt = max(ds.time.values)
        extr_time = pd.to_datetime(dt).strftime('%Y-%m-%d %H:%M')
        
    dt_st = np.datetime64(hyd.time0)
    if start_time is not None:
        dt_st = np.datetime64(start_time)
     
    ds_t = ds.sel(time = dt)
    
    # construct output arrays 
    # now = pd.Timestamp.today().strftime('%Y-%m-%d %H:%M')
    # txt_header = np.array('Restart file starting at {:s} ' \
    #                       'Written by stompy.model.delft.io.create_restart '\
    #                       'Written at {:s}'.format(extr_time, now),dtype='S160')
    
    txt_header = np.array(ds.header,dtype = 'S160')
    
    out_arr = np.zeros((n_segs,n_subs), dtype = 'f4' )  
    subs = np.array(['']*n_subs,dtype='S20')
    time = np.int32(pd.to_timedelta(dt-dt_st).total_seconds())

    count = 0
    for idx,name in enumerate(ds_t.sub.values):
        if name in state_vars:
            subs[count] = name
            out_arr[:,count] = ds_t[name].values.flatten()
            count = count + 1             
    
    # write out file 
    with open(res_fn,'wb') as fp:
        # header 
        fp.write(txt_header)
        
        # nosubs, noseg
        fp.write(np.array(n_subs, 'i4').tobytes())
        fp.write(np.array(n_segs, 'i4').tobytes())
        
        # parameter names 
        fp.write(subs) # is tobytes() implied here?
        
        # output time and substance values 
        fp.write(np.array(time, 'i4').tobytes())
        fp.write(out_arr.astype('f4'))
        
        fp.close() # PM - this shoudn't be necessary - but am having problems with file remanining open even after return
    
    return 


[docs]def read_map(fn,hyd=None,use_memmap=True,include_grid=True,return_grid=False):
    """
    Read binary D-Water Quality map output, returning an xarray dataset.

    fn: path to .map file
    hyd: waq_scenario.Hydro() object.  In the past this could be a path,
       but to avoid an apparent circular import, this must now be a
       Hydro object.
    use_memmap: use memory mapping for file access.  Currently
      this must be enabled.

    include_grid: the returned dataset also includes grid geometry, suitable
       for unstructured_grid.from_ugrid(ds).
       WARNING: there is currently a bug which causes this grid to have errors.
       probably a one-off error of some sort.

    note that missing values at this time are not handled - they'll remain as
    the delwaq standard -999.0.
    """

    # pycharm does not like the circular import, even when it's inside
    # a function like this, so until this all gets refactored, disallow
    # this feature
    assert hyd is not None,"Inferring hyd is disabled because of circular imports"
    assert not isinstance(hyd,six.string_types),"Must pass in Hydro() object, not path"

    # from . import waq_scenario as waq
    #
    # if not isinstance(hyd,waq.Hydro):
    #     if hyd==None:
    #         hyds=glob.glob( os.path.join(os.path.dirname(fn),"*.hyd"))
    #         assert len(hyds)==1,"hyd=auto only works when there is exactly 1 (not %d) hyd files"%(len(hyds))
    #         hyd=hyds[0]
    #     hyd=waq.HydroFiles(hyd)

    nbytes=os.stat(fn).st_size # 420106552

    with open(fn,'rb') as fp:

        # header line of 160 characters
        txt_header=fp.read(160)
        # print "Text header: ",txt_header

        # 4 bytes, maybe a little-endian int.  0x0e, that's 14, number of substances
        n_subs=np.fromfile(fp,np.int32,1)[0]
        # print "# substances: %d"%n_subs

        n_segs=np.fromfile(fp,np.int32,1)[0]
        # print "Nsegs: %d"%n_segs

        substance_names=np.fromfile(fp,'S20',n_subs)


        # not sure if there is a quicker way to get the number of layers
        hyd.infer_2d_elements()
        n_layers=1+hyd.seg_k.max()

        g=hyd.grid() # ignore message about ugrid.

        assert g.Ncells()*n_layers == n_segs

        # I'm hoping that now we get 4 byte timestamps in reference seconds,
        # and then n_subs,n_segs chunks.
        # looks that way.
        data_start=fp.tell()

    bytes_left=nbytes-data_start
    framesize=(4+4*n_subs*n_segs)
    nframes,extra=divmod(bytes_left,framesize)
    if extra!=0:
        log.warning("Reading map file %s: %d or %d frames? bad length %d extra bytes (or %d missing)"%(
            fn,nframes,nframes+1,extra,framesize-extra))

    # Specify nframes in cases where the filesizes don't quite match up.
    mapped=np.memmap(fn,[ ('tsecs','i4'),
                          ('data','f4',(n_layers,hyd.n_2d_elements,n_subs))] ,
                     mode='r',
                     shape=(nframes,),
                     offset=data_start)

    ds=xr.Dataset()

    try:
        txt_header=txt_header.decode()
    except AttributeError:
        pass # Hopefully header is already a string
    ds.attrs['header']=txt_header

    # a little python 2/3 misery
    try:
        substance_names=[s.decode() for s in substance_names]
    except AttributeError:
        pass

    ds['sub']= ( ('sub',), [s.strip() for s in substance_names] )

    times=utils.to_dt64(hyd.time0) + np.timedelta64(1,'s') * mapped['tsecs']

    ds['time']=( ('time',), times)
    ds['t_sec']=( ('time',), mapped['tsecs'] )

    for idx,name in enumerate(ds.sub.values):
        ds[name]= ( ('time','layer','face'), 
                    mapped['data'][...,idx] )
        ds[name].attrs['_FillValue']=-999

    if include_grid:
        # not sure why this doesn't work.
        g.write_to_xarray(ds=ds)

    if return_grid:
        return ds,g
    else:
        return ds

[docs]def map_add_z_coordinate(map_ds,total_depth='TotalDepth',coord_type='sigma',
                         layer_dim='layer'):
    """
    For an xarray representation of dwaq output, where the total depth
    has been recorded, add an inferred vertical coordinate in the dataset.
    This is necessary to allow the ugrid visit reader to understand
    the file.
    Currently only sigma coordinates, assumed to be evenly spaced, are
    supported.

    total_depth: Name of the xarray variable in map_ds holding total water column
    depth for each segment.
    coord_type: type of coordinate, currently must be "sigma".
    layer_dim: name of the vertical dimension in the data.

    Makes an arbitrary assumption that the first output time step is roughly
    mean sea level.  Obviously wrong, but a starting point.

    Given the ordering of layers in dwaq output, the sigma coordinate created 
    here is decreasing from 1 to 0.

    Modifies map_ds in place, also returning it.
    """
    assert coord_type=='sigma'

    bedlevel=-map_ds[total_depth].isel(**{layer_dim:0,'time':0,'drop':True})
    dry=(bedlevel==999)
    bedlevel[dry]=0.0
    map_ds['bedlevel']=bedlevel
    map_ds.bedlevel.attrs['units']='m'
    map_ds.bedlevel.attrs['positive']='up'
    map_ds.bedlevel.attrs['long_name']='Bed elevation relative to initial water level'

    tdepth=map_ds[total_depth].isel(**{layer_dim:0,'drop':True})
    eta=tdepth + map_ds.bedlevel
    eta.values[ tdepth.values==-999 ] = 0.0
    map_ds['eta']=eta
    map_ds.eta.attrs['units']='m'
    map_ds.eta.attrs['positive']='up'
    map_ds.eta.attrs['long_name']='Sea surface elevation relative initial time step'

    Nlayers=len(map_ds[layer_dim])
    # This is where sigma is made to be decreasing to capture the order of
    # layers in DWAQ output.
    map_ds['sigma']=(layer_dim,), (0.5+np.arange(Nlayers))[::-1] / float(Nlayers)
    map_ds.sigma.attrs['standard_name']="ocean_sigma_coordinate"
    map_ds.sigma.attrs['positive']='up'
    map_ds.sigma.attrs['units']=""
    map_ds.sigma.attrs['formula_terms']="sigma: sigma eta: eta  bedlevel: bedlevel"

    return map_ds

[docs]def dfm_wind_to_nc(wind_u_fn,wind_v_fn,nc_fn):
    """
    Transcribe DFM 'arcinfo' style gridded wind to
    CF compliant netcdf file (ready for import to erddap)

    Note that the order of rows in the DFM format is weird, and
    required bug fixes to this code 2017-12-21.  While dy is
    specified positive, the rows of data are written from north to
    south.  The DFM text file specifies coordinates for a llcorner
    and a dy, but that llcorner corresponds to the first column of
    the *last* row of data written out.

    wind_u_fn:
      path to the amu file for eastward wind
    wind_v_fn:
      path to the amv file for northward wind
    nc_fn:
      path to the netcdf file which will be created.
    """
    fp_u=open(wind_u_fn,'rt')
    fp_v=open(wind_v_fn,'rt')

    # read the header, gathering parameters in a dict.
    def parse_header(fp):
        params={}
        while 1:
            line=fp.readline().strip()
            if line.startswith('### START OF HEADER'):
                break
        for line in fp:
            line=line.strip()
            if line.startswith('#'):
                if line.startswith('### END OF HEADER'):
                    break
                continue # comment lines
            try:
                key,value = line.split('=',2)
            except ValueError:
                print("Failed to split key=value for '%s'"%line)
                raise
            key=key.strip()
            value=value.strip()

            # some hardcoded data type conversion:
            if key in ['n_rows','n_cols']:
                value=int(value)
            elif key in ['dx','dy','x_llcorner','y_llcorner','NODATA_value']:
                value=float(value)
            params[key]=value
        return params

    fp_u.seek(0)
    fp_v.seek(0)
    u_header=parse_header(fp_u)
    v_header=parse_header(fp_v)

    # make sure they match up
    for k in u_header.keys():
        if k in ['quantity1']:
            continue
        assert u_header[k] == v_header[k]

    # use netCDF4 directly, so we can stream it to disk
    import netCDF4

    os.path.exists(nc_fn) and os.unlink(nc_fn)
    nc=netCDF4.Dataset(nc_fn,'w') # don't worry about netcdf versions quite yet

    xdim='x'
    ydim='y'
    tdim='time'

    nc.createDimension(xdim,u_header['n_cols'])
    nc.createDimension(ydim,u_header['n_rows'])
    nc.createDimension(tdim,None) # unlimited

    # assign some attributes while we're at it
    for k in ['FileVersion','Filetype','dx','dy','grid_unit','unit1','x_llcorner','y_llcorner']:
        if k in u_header:
            setattr(nc,k,u_header[k])

    # cf conventions suggest this order of dimensions
    u_var = nc.createVariable('wind_u',np.float32,[tdim,ydim,xdim],
                              fill_value=u_header['NODATA_value'])
    v_var = nc.createVariable('wind_v',np.float32,[tdim,ydim,xdim],
                              fill_value=v_header['NODATA_value'])
    t_var = nc.createVariable('time',np.float64,[tdim])


    # install some metadata

    # parse the times into unix epochs for consistency
    t_var.units='seconds since 1970-01-01T00:00:00Z'
    t_var.calendar = "proleptic_gregorian"

    # Going to assume that we're working out of the same UTM 10:
    utm_var = nc.createVariable('UTM10',np.int32,[])
    utm_var.grid_mapping_name = "universal_transverse_mercator" 
    utm_var.utm_zone_number = 10
    utm_var.semi_major_axis = 6378137
    utm_var.inverse_flattening = 298.257 
    utm_var._CoordinateTransformType = "Projection" 
    utm_var._CoordinateAxisTypes = "GeoX GeoY" 
    utm_var.crs_wkt = """PROJCS["NAD83 / UTM zone 10N",
        GEOGCS["NAD83",
            DATUM["North_American_Datum_1983",
                SPHEROID["GRS 1980",6378137,298.257222101,
                    AUTHORITY["EPSG","7019"]],
                TOWGS84[0,0,0,0,0,0,0],
                AUTHORITY["EPSG","6269"]],
            PRIMEM["Greenwich",0,
                AUTHORITY["EPSG","8901"]],
            UNIT["degree",0.0174532925199433,
                AUTHORITY["EPSG","9122"]],
            AUTHORITY["EPSG","4269"]],
        PROJECTION["Transverse_Mercator"],
        PARAMETER["latitude_of_origin",0],
        PARAMETER["central_meridian",-123],
        PARAMETER["scale_factor",0.9996],
        PARAMETER["false_easting",500000],
        PARAMETER["false_northing",0],
        UNIT["metre",1,
            AUTHORITY["EPSG","9001"]],
        AXIS["Easting",EAST],
        AXIS["Northing",NORTH],
        AUTHORITY["EPSG","26910"]]
    """

    y_var=nc.createVariable('y',np.float64,[ydim])
    y_var.units='m'
    y_var.long_name="y coordinate of projection"
    y_var.standard_name="projection_y_coordinate"
    y_var[:]=u_header['y_llcorner'] + u_header['dy']*np.arange(u_header['n_rows'])

    x_var=nc.createVariable('x',np.float64,[xdim])
    x_var.units='m'
    x_var.long_name="x coordinate of projection"
    x_var.standard_name="projection_x_coordinate"
    x_var[:]=u_header['x_llcorner'] + u_header['dx']*np.arange(u_header['n_cols'])

    u_var.units='m s-1'
    u_var.grid_mapping='transverse_mercator'
    u_var.long_name='eastward wind from F Ludwig method'
    u_var.standard_name='eastward_wind'

    v_var.units='m s-1'
    v_var.grid_mapping='transverse_mercator'
    v_var.long_name='northward wind from F Ludwig method'
    v_var.standard_name='northward_wind'

    def read_frame(fp,header):
        # Assumes that the TIME line is alone,
        # but the pixel data isn't restricted to a particular number of elements per line.
        time_line=fp.readline()
        if not time_line:
            return None,None

        assert time_line.startswith('TIME')
        _,time_string=time_line.split('=',2)
        count=0
        items=[]
        expected=header['n_cols'] * header['n_rows']
        for line in fp:
            this_data=np.fromstring(line,sep=' ',dtype=np.float32)
            count+=len(this_data)
            items.append(this_data)
            if count==expected:
                break
            assert count<expected

        block=np.concatenate( items ).reshape( header['n_rows'],header['n_cols'] )
        # 2017-12-21: flip so that array index matches coordinate index.
        block=block[::-1,:]
        time=utils.to_dt64(time_string)
        return time,block

    frame_i=0
    while 1:
        u_time,u_block = read_frame(fp_u,u_header)
        v_time,v_block = read_frame(fp_v,v_header)
        if u_time is None or v_time is None:
            break

        assert u_time==v_time

        if frame_i%200==0:
            print("%d frames, %s most recent"%(frame_i,u_time))
        u_var[frame_i,:,:] = u_block
        v_var[frame_i,:,:] = v_block
        t_var[frame_i] = u_time

        # t... come back to it.
        frame_i+=1

    nc.close()


[docs]def dataset_to_dfm_wind(ds,period_start,period_stop,target_filename_base,
                        extra_header=None,min_records=1,
                        wind_u='wind_u',wind_v='wind_v',pres='pres'):
    """
    Write wind in an xarray dataset to a pair of gridded meteo files for DFM.

    ds:
      xarray dataset.  Currently fairly brittle assumptions on the format of
      this dataset, already in the proper coordinates system, coordinates of x and
      y, and the wind variables named wind_u and wind_v.
    period_start,period_stop:
      include data from the dataset on or after period_start, and up to period_stop,
    inclusive
    target_filename_base:
      the path and filename for output, without the .amu and .amv extensions.
    extra_header:
      extra text to place in the header.  This is included as is, with the exception that
      a newline will be added if it's missing

    returns the number of available records overlapping the requested period.
    If that number is less than min_records, no output is written.
    """
    time_idx_start = np.searchsorted(ds.time,period_start,side='left')
    # make stop inclusive by using side='right'
    time_idx_stop  = np.searchsorted(ds.time,period_stop,side='right')

    record_count=time_idx_stop-time_idx_start
    if record_count<min_records:
        return record_count

    # Sanity checks that there was actually some overlapping data.
    # maybe with min_records, this can be relaxed?  Unsure of use case there.
    assert time_idx_start+1<len(ds.time)
    assert time_idx_stop>0
    assert time_idx_start<time_idx_stop

    nodata=-999

    if extra_header is None:
        extra_header=""
    else:
        extra_header=extra_header.rstrip()+"\n"
        
    header_template="""### START OF HEADER
# Created with %(creator)s
%(extra_header)sFileVersion = 1.03
Filetype = meteo_on_equidistant_grid
NODATA_value = %(nodata)g
n_cols = %(n_cols)d
n_rows = %(n_rows)d
grid_unit = m
x_llcorner = %(x_llcorner)g
y_llcorner = %(y_llcorner)g
dx = %(dx)g
dy = %(dy)g
n_quantity = 1
quantity1 = %(quantity)s
unit1 = m s-1
### END OF HEADER
"""

    fp_u=open(target_filename_base+".amu",'wt')
    fp_v=open(target_filename_base+".amv",'wt')

    if pres in ds:
        fp_p=open(target_filename_base+".amp",'wt')
    else:
        fp_p=None

    base_fields=dict(creator="stompy",nodata=nodata,
                     n_cols=len(ds.x),n_rows=len(ds.y),
                     dx=np.median(np.diff(ds.x)),
                     dy=np.median(np.diff(ds.y)),
                     x_llcorner=ds.x[0],
                     y_llcorner=ds.y[0],
                     extra_header=extra_header)

    for fp,quant in [ (fp_u,'x_wind'),
                      (fp_v,'y_wind'),
                      (fp_p,'pres') ]:
        if fp is None:
            continue
        # Write the headers:
        fields=dict(quantity=quant)
        fields.update(base_fields)
        header=header_template%fields
        fp.write(header)

    count=0
    for time_idx in range(time_idx_start, time_idx_stop):
        count+=1
        if (time_idx-time_idx_start) % 96 == 0:
            log.info("Written %d/%d time steps"%( time_idx-time_idx_start,time_idx_stop-time_idx_start))
        u=ds['wind_u'].isel(time=time_idx)
        v=ds['wind_v'].isel(time=time_idx)
        if pres in ds:
            p=ds[pres].isel(time=time_idx)
        else:
            p=None

        t=ds.time.isel(time=time_idx)

        # write a time line formatted like this:
        # TIME=00000.000000 hours since 2012-08-01 00:00:00 +00:00
        time_line="TIME=%f seconds since 1970-01-01 00:00:00 +00:00"%utils.to_unix(t.values)

        for fp,data in [ (fp_u,u),
                         (fp_v,v),
                         (fp_p,p)]:
            if fp is None:
                continue
            # double check order.
            fp.write(time_line) ; fp.write("\n")
            # 2017-12-21: flip order of rows to suit DFM convention
            for row in data.values[::-1]:
                fp.write(" ".join(["%g"%rowcol for rowcol in row]))
                fp.write("\n")

    log.info("Wrote %d time steps"%count)

    fp_u.close()
    fp_v.close()
    if fp_p is not None:
        fp_p.close()
    return record_count

[docs]class SectionedConfig(object):
    """
    Handles reading and writing of config-file like formatted files.
    Follows some of the API of the standard python configparser
    """
    inline_comment_prefixes=('#',';')

    def __init__(self,filename=None,text=None):
        """
        filename: path to file to open and parse
        text: a string containing the entire file to parse
        """
        # This isn't being used anywhere -- delete?  and below.
        self.rows=[]    # full text of each line
        self.filename=filename
        self.base_path=None
        # For flags which do not get written into the config file.
        self.flags=defaultdict(lambda:None)

        if self.filename is not None:
            self.read(self.filename)
            self.base_path=os.path.dirname(self.filename)

        if text is not None:
            fp = StringIO(text)
            self.read(fp,'text')

[docs]    def set_filename(self,fn):
        """
        Updates self.filename and base_path, in anticipation of the file
        being written to a new location (this is so new file paths can be
        extrapolated before having to write this out)
        """
        self.filename=fn
        self.base_path=os.path.dirname(self.filename)

[docs]    def copy(self):
        return copy.deepcopy(self)

[docs]    def read(self, filename, label=None):
        if six.PY2:
            file_base = file
        else:
            file_base = io.IOBase

        if isinstance(filename, file_base):
            label = label or 'n/a'
            fp=filename
            filename=None
        else:
            fp = open(filename,'rt')
            label=label or filename

        # This isn't being used anywhere -- delete?
        # self.sources.append(label)

        for line in fp:
            # save original text so we can write out a new mdu with
            # only minor changes
            # the rstrip()s leave trailing whitespace, but strip newline or CR/LF
            self.rows.append(line.rstrip("\n").rstrip("\r"))
        if filename:
            fp.close()

[docs]    def entries(self):
        """ 
        Generator which iterates over rows, parsing them into index, section, key, value, comment.

        key is always present, but might indicate a section by including square
        brackets.
        value may be a string, or None.  Strings will be trimmed
        comment may be a string, or None.  It includes the leading comment character.
        Indices are not stable across set_value(), since new entries may get inserted into
        the middle of a section and shift other rows down.
        """
        section=None
        for idx,row in enumerate(self.rows):
            parsed=self.parse_row(row)

            if parsed[0] is None: # blank line
                continue # don't send back blank rows

            if parsed[0][0]=='[':
                section=parsed[0]

            yield [idx,section] + list(parsed)

[docs]    def parse_row(self,row):
        section_patt=r'^(\[[A-Za-z0-9 ]+\])([#;].*)?$'
        value_patt = r'^([A-Za-z0-9_ ]+)\s*=([^#;]*)([#;].*)?$'
        # 2019-12-31: appears that some mdu's written by delta shell have
        # lines near the top that are just an asterisk.  Assume
        # those are comments
        blank_patt = r'^\s*([\*#;].*)?$'

        m_sec = re.match(section_patt, row)
        if m_sec is not None:
            return m_sec.group(1), None, m_sec.group(2)

        m_val = re.match(value_patt, row)
        if m_val is not None:
            return m_val.group(1).strip(), m_val.group(2).strip(), m_val.group(3)

        m_cmt = re.match(blank_patt, row)
        if m_cmt is not None:
            return None,None,m_cmt.group(1)

        print("Failed to parse row:")
        print(row)

[docs]    def get_value(self,sec_key):
        """
        return the string-valued settings for a given key.
        if they key is not found, returns None.
        If the key is present but with no value, returns the empty string
        """
        section='[%s]'%sec_key[0].lower()
        key = sec_key[1].lower()

        for row_idx,row_sec,row_key,row_value,row_comment in self.entries():
            if (row_key.lower() == key) and (section.lower() == row_sec.lower()):
                return row_value
        else:
            return None

[docs]    def set_value(self,sec_key,value):
        # set value and optionally comment.
        # sec_key: tuple of section and key (section without brackets)
        # value: either the value (a string, or something that can be converted via str())
        #   or a tuple of value and comment, without the leading comment character
        section='[%s]'%sec_key[0].lower()
        key=sec_key[1]

        last_row_of_section={} # map [lower_section] to the index of the last entry in that section

        if isinstance(value,tuple):
            value,comment=value
            comment='# ' + comment
        else:
            comment=None

        value=self.val_to_str(value)

        def fmt(key,value,comment):
            return "%-18s= %-20s %s"%(key,value,comment or "")

        for row_idx,row_sec,row_key,row_value,row_comment in self.entries():
            last_row_of_section[row_sec]=row_idx

            if (row_key.lower() == key.lower()) and (section.lower() == row_sec.lower()):
                self.rows[row_idx] = fmt(row_key,value,comment or row_comment)
                return

        row_text=fmt(key,value,comment)
        if section in last_row_of_section:
            # the section exists
            last_idx=last_row_of_section[section]
            self.rows.insert(last_idx+1,row_text)
        else: # have to append the new section
            self.rows.append(section)
            self.rows.append(row_text)

    def __setitem__(self,sec_key,value): # self[sec_key]=value
        self.set_value(sec_key,value)
    def __getitem__(self,sec_key):       # self[sec_key]
        return self.get_value(sec_key)

    def __contains__(self,sec_key):
        """
        Return true if the given section or (section,key) tuple 
        exists.  Note that [currently] if the entry exists but is
        empty, this still returns True.
        """
        if isinstance(sec_key,tuple):
            section='[%s]'%sec_key[0].lower()
            key = sec_key[1].lower()
        else:
            section='[%s]'%sec_key.lower()
            key=None
            
        for row_idx,row_sec,row_key,row_value,row_comment in self.entries():
            if ( ((key is None) or (row_key.lower() == key))
                 and (section.lower() == row_sec.lower())):
                return True
        else:
            return False

    def __delitem__(self,sec_key):
        if isinstance(sec_key,tuple):
            section='[%s]'%sec_key[0].lower()
            key = sec_key[1].lower()
        else:
            section='[%s]'%sec_key.lower()
            key=None

        new_rows=[]
        row_sec=None
        
        for row_idx,row in enumerate(self.rows):
            row_key=None
            parsed=self.parse_row(row)

            if parsed[0] is None: # blank line
                new_rows.append(row)
                continue # don't send back blank rows

            if parsed[0][0]=='[':
                row_sec=parsed[0]

                if (row_sec == section) and (key is None):
                    # delete this row.
                    continue
                else:
                    new_rows.append(row)
                    continue
            else:
                row_key=parsed[0]
                if ( ((key is None) or (row_key.lower() == key))
                     and (section.lower() == row_sec.lower())):
                    # delete this row
                    continue
                else:
                    new_rows.append(row)
        self.rows=new_rows
        
[docs]    def filepath(self,sec_key):
        """
        Lookup a filename via a ('section','name') tuple, and
        return the full filename including base path.
        if the key does not exist or is empty, return None.
        """
        val=self.get_value(sec_key)
        if not val:
            return None
        if self.base_path:
            return os.path.join(self.base_path,val)
        else:
            return val

[docs]    def val_to_str(self,value):
        # make sure that floats are formatted with plenty of digits:
        # and handle annoyance of standard Python types vs. numpy types
        # But None stays None, as it gets handled specially elsewhere
        if value is None:
            return None
        if isinstance(value,float) or isinstance(value,np.floating):
            return "%.12g"%value
        else:
            return str(value)

[docs]    def write(self,filename=None):
        """
        Write this config out to a text file.
        filename: defaults to self.filename
        check_changed: if True, and the file already exists and is not materially different,
          then do nothing.  Good for avoiding unnecessary changes to mtimes.
        backup: if true, copy any existing file to <filename>.bak
        """
        if filename is None:
            filename=self.filename
        with open(filename,'wt') as fp:
            for line in self.rows:
                fp.write(line)
                fp.write("\n")

[docs]class MDUFile(SectionedConfig):
    """
    Read/write MDU files, with an interface similar to python's
    configparser, but better support for discerning and retaining
    comments
    """
    @property
    def name(self):
        """
        base name of mdu filename, w/o extension, which is used in various other filenames.
        """
        return os.path.basename(self.filename).split('.')[0]
[docs]    def output_dir(self):
        """
        path to the folder holding DFM output based on MDU filename
        and contents.
        """
        output_dir=self['Output','OutputDir']
        if output_dir in (None,""):
            output_dir="DFM_OUTPUT_%s"%self.name

        return os.path.join(self.base_path,output_dir)

[docs]    def time_range(self):
        """
        return tuple of t_ref,t_start,t_stop
        as np.datetime64
        """
        t_ref=utils.to_dt64( datetime.datetime.strptime(self['time','RefDate'],'%Y%m%d') )
        dt=self.t_unit_td64()
        t_start = t_ref+int(self['time','tstart'])*dt
        t_stop = t_ref+int(self['time','tstop'])*dt
        return t_ref,t_start,t_stop

[docs]    def t_unit_td64(self,default='S'):
        """ Return Tunit as timedelta64.  If none is set, set to default
        """
        t_unit=self['time','Tunit']
        if t_unit is None:  # or does the above throw an error?
            self['time','Tunit']=t_unit=default

        if t_unit.lower() == 'm':
            dt=np.timedelta64(60,'s')
        elif t_unit.lower() == 's':
            dt=np.timedelta64(1,'s')
        else:
            raise Exception("Bad time unit %s"%t_unit)

        return dt

[docs]    def set_time_range(self,start,stop,ref_date=None):
        if ref_date is not None:
            # Make sure ref date is integer number of days
            assert ref_date==ref_date.astype('M8[D]')
        else:
            # Default to truncating the start date
            ref_date = start.astype('M8[D]')

        self['time','RefDate'] = utils.to_datetime(ref_date).strftime('%Y%m%d')

        dt=self.t_unit_td64()
        self['time','TStart'] = int( (start - ref_date)/ dt )
        self['time','TStop'] = int( (stop - ref_date) / dt )

[docs]    def partition(self,nprocs,dfm_bin_dir=None,mpi_bin_dir=None):
        if nprocs<=1:
            return

        # As of r52184, explicitly built with metis support, partitioning can be done automatically
        # from here.
        if mpi_bin_dir is None:
            mpi_bin_dir=dfm_bin_dir

        dflowfm="dflowfm"
        gen_parallel="generate_parallel_mdu.sh"
        if dfm_bin_dir is not None:
            dflowfm=os.path.join(dfm_bin_dir,dflowfm)
            gen_parallel=os.path.join(dfm_bin_dir,gen_parallel)

        mpiexec="mpiexec"
        if mpi_bin_dir is not None:
            mpiexec=os.path.join(mpi_bin_dir,mpiexec)

        cmd="%s -n %d %s --partition:ndomains=%d %s"%(mpiexec,nprocs,dflowfm,nprocs,
                                                      self['geometry','NetFile'])
        pwd=os.getcwd()
        try:
            os.chdir(self.base_path)
            res=subprocess.call(cmd,shell=True)
        finally:
            os.chdir(pwd)

        # similar, but for the mdu:
        cmd="%s %s %d 6"%(gen_parallel,os.path.basename(self.filename),nprocs)
        try:
            os.chdir(self.base_path)
            res=subprocess.call(cmd,shell=True)
        finally:
            os.chdir(pwd)

[docs]def exp_z_layers(mdu,zmin=None,zmax=None):
    """
    This will probably change, not very flexible now.
    For singly exponential z-layers, return zslay, positive up, starting
    from the bed.  first element is the bed itself.

    zmin: deepest depth, positive up.  Defaults to ds.NetNode_z.min(),
       read from the net file specified in mdu.
    zmax: top of water column.  Defaults to WaterLevIni in mdu.
    """

    if zmax is None:
        zmax=float(mdu['geometry','WaterLevIni'] )
    if zmin is None:
        ds=xr.open_dataset(mdu.filepath(['geometry','NetFile']))
        zmin=float(ds.NetNode_z.min())
        ds.close()
        
    kmx=int(mdu['geometry','kmx'])
    coefs=[float(s) for s in mdu['geometry','StretchCoef'].split()] # 0.002 0.02 0.8

    ztot=zmax-zmin

    # From unstruc.F90:
    dzslay=np.zeros(kmx,'f8')
    zslay=np.zeros(kmx+1,'f8')

    gfi = 1.0 / coefs[1] # this shouldn't do anything
    gf  = coefs[2]
    mx=kmx
    k1=int( coefs[0]*kmx) 

    gfk = gfi**k1

    if gfk == 1.0:
        gfi = 1.0
        dzslay[0] = 1.0 / mx
    else:
        dzslay[0] = ( 1.0 - gfi ) / ( 1.0 - gfk )* coefs[0]

    for k in range(1,k1):
        dzslay[k] = dzslay[k-1] * gfi

    gfk = gf**(kmx-k1)
    if k1 < kmx:
        if gfk == 1.0:
            gf = 1.0
            dzslay[k1] = 1.0 / mx
        else:
            dzslay[k1] = ( 1.0 - gf ) / ( 1.0 - gfk ) * ( 1.0 - coefs[0] )

        for k in range(k1+1,mx):
            dzslay[k] = dzslay[k-1] * gf

    zslay[0] = zmin
    for k in range(mx):
        zslay[k+1] = zslay[k] + dzslay[k] * (zmax-zmin)


    return zslay
        

[docs]def read_bnd(fn):
    """
    Parse DWAQ-style boundary data file
    
    Returns a list [ ['boundary_name',array([ boundary_link_idx,[[x0,y0],[x1,y1]] ])], ...]
    """
    with open(fn,'rt') as fp:
        toker=inp_tok(fp)
        token=lambda: six.next(toker)

        N_groups=int(token())
        groups=[]
        for group_idx in range(N_groups):
            group_name=token()
            N_link=int(token())
            links=np.zeros( N_link,
                            dtype=[ ('link','i4'),
                                    ('x','f8',(2,2)) ] )
            for i in range(N_link):
                links['link'][i]=int(token())
                links['x'][i,0,0]=float(token())
                links['x'][i,0,1]=float(token())
                links['x'][i,1,0]=float(token())
                links['x'][i,1,1]=float(token())
            groups.append( [group_name,links] )
    return groups

[docs]def write_bnd(bnd,fn):
    with open(fn,'wt') as fp:
        fp.write("%10d\n"%len(bnd))
        for name,segs in bnd:
            fp.write("%s\n"%name)
            fp.write("%10d\n"%len(segs))
            for seg in segs:
                x=seg['x']
                fp.write("%10d  %.7f  %.7f   %.7f  %.7f\n"%(seg['link'],
                                                            x[0,0],x[0,1],x[1,0],x[1,1]))


[docs]def read_dfm_tim(fn, ref_time, time_unit='M', columns=['val1','val2','val3']):
    """
    Parse a tim file to xarray Dataset.  Must pass in the reference
    time (datetime64, or convertable to that via utils.to_dt64())
    and the time_unit ('s' or 'm')

    time_unit: 'S' for seconds, 'M' for minutes.  Relative to model reference
    time.  Probably ought to be 'M' always.

    returns Dataset with 'time' dimension, and data columns labeled according
    to columns.
    """
    if time_unit.lower()=='m':
        dt=np.timedelta64(60,'s')
    elif time_unit.lower()=='s':
        dt=np.timedelta64(1,'s')
    else:
        raise Exception("Bad time unit %s"%time_unit)

    if not isinstance(ref_time,np.datetime64):
        ref_time=utils.to_dt64(ref_time)

    raw_data=np.loadtxt(fn)
    t=ref_time + dt*raw_data[:,0]

    ds=xr.Dataset()
    ds['time']=('time',),t
    for col_i in range(1,raw_data.shape[1]):
        ds[columns[col_i-1]]=('time',),raw_data[:,col_i]

    ds.attrs['source']=fn

    return ds

[docs]def read_dfm_bc(fn):
    """
    Parse DFM new-style BC file, returning a hash of
    Name => xarray dataset.
    """
    bcs={} # indexed by Name

    import re
    #with open(fn,'rt') as fp:w
    fp=open(fn,'rt') # during DEV
    if 1:
        # pre-read a line, and always keep the next line
        # in this variable for some low-budget lookahead
        line=fp.readline()

        while 1: # looping over datasets
            ds=xr.Dataset()
            ds.attrs['source']=fn

            # Eat blank lines
            while line and (line.strip()==""):
                line=fp.readline()

            if not line:
                break # end of file.  could be empty file.

            assert line.strip().lower()=='[forcing]',"Expected [forcing], got %s in %s"%(line,fn)
            line=fp.readline()

            quantities=[]
            curr_quantity={} # hash of quantity, unit
            def push_quantity():
                # Sanity check, make sure we have the bare minimum to define
                # a quantity.
                assert 'quantity' in curr_quantity
                quantities.append(dict(curr_quantity))
                curr_quantity.clear()

            while 1: # reading key-value pairs
                m=re.match(r'^([^=]+)=([^=]*)$',line)
                if m is not None: # key-value pair
                    key=m.group(1).strip().lower()
                    value=m.group(2).strip()
                    if key in ['quantity','unit']:
                        if key in curr_quantity:
                            # already seen this key, so must be a new quantity.
                            # push the old onto the list
                            push_quantity()
                        # record the new
                        curr_quantity[key]=value
                    else:
                        ds.attrs[key]=value
                    line=fp.readline()
                else:
                    # Not a key-value line.  Must be data
                    if curr_quantity:
                        push_quantity()
                    break # ready for data
            # Reading data
            columns=[ list() for q in quantities ]
            while 1:
                if line.strip()=="": # eat blank lines
                    pass
                elif line.strip().lower()=="[forcing]": # start of next block
                    break
                else:
                    items=line.strip().split()
                    assert len(items)==len(quantities)
                    for v,col in zip(items,columns):
                        col.append(float(v))
                line=fp.readline()
                if not line: # end by way of end-of-file
                    break
            for q,col in zip(quantities,columns):
                var_name=q['quantity']
                ds[var_name]=('time',),np.array(col)
                for k in q.keys():
                    if k!='quantity':
                        # everything else becomes an attribute
                        ds[var_name].attrs[k] = q[k]
            if 'time' in ds:
                if ('unit' in ds['time'].attrs) and ('units' not in ds['time'].attrs):
                    ds['time'].attrs['units']=ds['time'].attrs['unit']

                # This actually should line up with CF conventions
                # pretty well.  Give xarray a chance to parse the
                # dates
                try:
                    ds=xr.decode_cf(ds)
                except TypeError:
                    # Really no idea what kinds of exceptions may crop up there.
                    log.debug("While decoding time data",exc_info=True)
                    # fall through, which will leave original ds intact.
            bcs[ds.attrs['name']]=ds
    return bcs
Source code for stompy.model.delft.io

stompy

Navigation

Related Topics