Source code for stompy.io.local.noaa_coops

import os
import datetime

import numpy as np
import xarray as xr
import requests
import logging

log=logging.getLogger('noaa_coops')

from ... import utils
from .common import periods

all_products=dict(
    water_level="water_level",
    air_temperature="air_temperature",
    water_temperature="water_temperature",
    wind="wind",
    air_pressure="air_pressure",
    air_gap="air_gap",
    conductivity="conductivity",
    visibility="visibility",
    humidity="humidity",
    salinity="salinity",
    hourly_height="hourly_height",
    high_low="high_low",
    daily_mean="daily_mean",
    monthly_mean="monthly_mean",
    one_minute_water_level="one_minute_water_level",
    predictions="predictions",
    datums="datums",
    currents="currents")

all_datums=dict(
    CRD="Columbia River Datum",
    MHHW="Mean Higher High Water",
    MHW="Mean High Water",
    MTL="Mean Tide Level",
    MSL="Mean Sea Level",
    MLW="Mean Low Water",
    MLLW="Mean Lower Low Water",
    NAVD="North American Vertical Datum",
    STND="Station Datum")

[docs]def coops_json_to_ds(json,params): """ Mold the JSON response from COOPS into a dataset """ ds=xr.Dataset() if 'metadata' in json: meta=json['metadata'] ds['station']=( ('station',), [meta['id']]) for k in ['name','lat','lon']: val=meta[k] if k in ['lat','lon']: val=float(val) ds[k]= ( ('station',), [val]) else: # predictions do not come back with metadata ds['station']= ('station',),[params['station']] times=[] values=[] qualities=[] if 'data' in json: data=json['data'] elif 'predictions' in json: # Why do they present predictions data in such a different format? data=json['predictions'] for row in data: # {'f': '0,0,0,0', 'q': 'v', 's': '0.012', 't': '2010-12-01 00:00', 'v': '0.283'} try: values.append(float(row['v'])) except ValueError: values.append(np.nan) times.append( np.datetime64(row['t']) ) # for now, ignore flags, verified status. ds['time']=( ('time',),times) ds[params['product']]=( ('station','time'), [values] ) bad_count=np.sum( np.isnan(values) ) if bad_count: log.warning("%d of %d data values were missing"%(bad_count,len(values))) if params['product'] in ['water_level','predictions']: ds[params['product']].attrs['datum'] = params['datum'] return ds
[docs]def coops_dataset(station,start_date,end_date,products, days_per_request=None,cache_dir=None): """ bare bones retrieval script for NOAA Tides and Currents data. In particular, no error handling yet, doesn't batch requests, no caching, can't support multiple parameters, no metadata, etc. days_per_request: break up the request into chunks no larger than this many days. for hourly data, this should be less than 365. for six minute, I think the limit is 32 days. """ ds_per_product=[] for product in products: ds=coops_dataset_product(station=station, product=product, start_date=start_date, end_date=end_date, days_per_request=days_per_request, cache_dir=cache_dir) if ds is not None: ds_per_product.append(ds) ds_merged=xr.merge(ds_per_product,join='outer') return ds_merged
[docs]def coops_dataset_product(station,product, start_date,end_date,days_per_request='M', cache_dir=None,refetch_incomplete=True, clip=True): """ Retrieve a single data product from a single station. station: string or numeric identifier for COOPS station product: string identifying the variable to retrieve, such as "water_level". See all_products at the top of this file. start_date,end_date: period to retrieve, as python datetime, matplotlib datenum, or numpy datetime64. days_per_request: batch the requests to fetch smaller chunks at a time. if this is an integer, then chunks will start with start_date, then start_date+days_per_request, etc. if this is a string, it is interpreted as the frequency argument to pandas.PeriodIndex. so 'M' will request month-aligned chunks. this has the advantage that requests for different start dates will still be aligned to integer periods, and can reuse cached data. cache_dir: if specified, save each chunk as a netcdf file in this directory, with filenames that include the gage, period and products. The directory must already exist. returns an xarray dataset, or None if no data could be fetched refetch_incomplete: if True, if a dataset is pulled from cache but appears incomplete with respect to the start_date and end_date, attempt to fetch it again. Not that incomplete here is meant for realtime data which has not yet been recorded, so the test is only between end_date and the last time stamp of retrieved data. clip: if true, return only data within the requested window, even if more data was fetched. """ start_date=utils.to_dt64(start_date) end_date=utils.to_dt64(end_date) fmt_date=lambda d: utils.to_datetime(d).strftime("%Y%m%d %H:%M") base_url="https://tidesandcurrents.noaa.gov/api/datagetter" # not supported by this script: bin datums=['NAVD','MSL'] datasets=[] for interval_start,interval_end in periods(start_date,end_date,days_per_request): if cache_dir is not None: begin_str=utils.to_datetime(interval_start).strftime('%Y-%m-%d') end_str =utils.to_datetime(interval_end).strftime('%Y-%m-%d') cache_fn=os.path.join(cache_dir, "%s_%s_%s_%s.nc"%(station, product, begin_str, end_str)) else: cache_fn=None ds=None if (cache_fn is not None) and os.path.exists(cache_fn): log.info("Cached %s -- %s"%(interval_start,interval_end)) ds=xr.open_dataset(cache_fn) if refetch_incomplete: # This will fetch a bit more than absolutely necessary # In the case that this file is up to date, but the sensor was down, # we might be able to discern that if this was originally fetched # after another request which found valid data from a later time. if ds.time.values[-1]<min(utils.to_dt64(interval_end), end_date): log.warning(" but that was incomplete -- will re-fetch") ds=None if ds is None: log.info("Fetching %s -- %s"%(interval_start,interval_end)) params=dict(begin_date=fmt_date(interval_start), end_date=fmt_date(interval_end), station=str(station), time_zone='gmt', # always! application='stompy', units='metric', format='json', product=product) if product in ['water_level','hourly_height',"one_minute_water_level","predictions"]: while 1: # not all stations have NAVD, so fall back to MSL params['datum']=datums[0] req=requests.get(base_url,params=params) try: data=req.json() except ValueError: # thrown by json parsing log.warning("Likely server error retrieving JSON data from tidesandcurrents.noaa.gov") data=dict(error=dict(message="Likely server error")) break if (('error' in data) and (("datum" in data['error']['message'].lower()) or (product=='predictions'))): # Actual message like 'The supported Datum values are: MHHW, MHW, MTL, MSL, MLW, MLLW, LWI, HWI' # Predictions sometimes silently fail, as if there is no data, but really just need # to try MSL. log.debug(data['error']['message']) datums.pop(0) # move on to next datum continue # assume it's because the datum is missing break else: req=requests.get(base_url,params=params) data=req.json() if 'error' in data: msg=data['error']['message'] if "No data was found" in msg: # station does not have this data for this time. log.warning("No data found for this period") else: # Regardless, if there was an error we got no data. log.warning("Unknown error - got no data back.") log.debug(data) log.debug("URL was %s"%(req.url)) continue ds=coops_json_to_ds(data,params) if cache_fn is not None: if os.path.exists(cache_fn): # simply overwriting often does not work, so try removing first os.unlink(cache_fn) ds.to_netcdf(cache_fn) if len(datasets)>0: # avoid duplicates in case they overlap ds=ds.isel(time=ds.time.values>datasets[-1].time.values[-1]) datasets.append(ds) if len(datasets)==0: # could try to construct zero-length dataset, but that sounds like a pain # at the moment. return None if len(datasets)>1: # data_vars='minimal' is needed to keep lat/lon from being expanded # along the time axis. dataset=xr.concat( datasets, dim='time',data_vars='minimal') else: dataset=datasets[0].copy(deep=True) # better not to leave these lying around open for d in datasets: d.close() if clip: time_sel=(dataset.time.values>=start_date) & (dataset.time.values<end_date) dataset=dataset.isel(time=time_sel) return dataset