Source code for nsdf.nsdfwriter

# nsdfwriter.py --- 
# 
# Filename: nsdfwriter.py
# Description: 
# Author: Subhasis Ray [email: {lastname} dot {firstname} at gmail dot com]
# Maintainer: 
# Created: Fri Apr 25 19:51:42 2014 (+0530)
# Version: 
# Last-Updated: 
#           By: 
#     Update #: 0
# URL: 
# Keywords: 
# Compatibility: 
# 
# 

# Commentary: 
# 
# 
# 
# 

# Change log:
# 
# 
# 
# 
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation; either version 3, or
# (at your option) any later version.
# 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with this program; see the file COPYING.  If not, write to
# the Free Software Foundation, Inc., 51 Franklin Street, Fifth
# Floor, Boston, MA 02110-1301, USA.
# 
# 

# Code:
"""
Writer for NSDF file format.
"""

import h5py as h5
import numpy as np
import os

from .model import ModelComponent, common_prefix
from .constants import *
from .util import *
from datetime import datetime

[docs]def match_datasets(hdfds, pydata):
    """Match entries in hdfds with those in pydata. Returns true if the
    two sets are equal. False otherwise.

    """
    src_set = set([item for item in hdfds])
    dsrc_set = set(pydata)
    return src_set == dsrc_set


[docs]def add_model_component(component, parentgroup):
    """Add a model component as a group under `parentgroup`. 

    This creates a group `component.name` under parent group if not
    already present. The `uid` of the component is stored in the `uid`
    attribute of the group. Key-value pairs in the `component.attrs`
    dict are stored as attributes of the group.

    Args: 
        component (ModelComponent): model component object to be
            written to NSDF file.

        parentgroup (HDF Group): group under which this
            component's group should be created.

    Returns:
        HDF Group created for this model component.

    Raises: 
        KeyError if the parentgroup is None and no group
        corresponding to the component's parent exists.

    """
    grp = parentgroup.require_group(component.name)
    component.hdfgroup = grp
    if component.uid is not None:
        grp.attrs['uid'] = component.uid
    else:
        grp.attrs['uid'] = component.path
    for key, value in component.attrs.items():
        grp.attrs[key] = value
    return grp


[docs]def write_ascii_file(group, name, fname, **compression_opts):
    """Add a dataset `name` under `group` and store the contents of text
    file `fname` in it."""
    with open(fname, 'rt') as fhandle:
        data = fhandle.read()            
    if '\x00' in data:
        raise ValueError('Cannot handle NULL byte in ascii data')
    dataset = group.create_dataset(name, shape=(1,), data=data, dtype=VLENBYTE,
                                   **compression_opts)
    return dataset


[docs]def write_binary_file(group, name, fname, **compression_opts):
    """Add a dataset `name` under `group` and store the contents of binary
    file `fname` in it."""
    with open(fname, 'rb') as fhandle:
        data = np.void(fhandle.read())
    dataset = group.create_dataset(name, shape=(1,), data=data, dtype=np.void)
    return dataset


[docs]def write_dir_contents(root_group, root_dir, ascii, **compression_opts):
    """Walk the directory tree rooted at `root_dir` and replicate it under
    `root_group` in HDF5 file. 

    This is a helper function for copying model directory structure
    and file contents into an hdf5 file. If ascii=True all files are
    considered ascii text else all files are taken as binary blob.

    Args:

        root_group (h5py.Group): group under which the directory tree
            is to be created.

        root_dir (str): path of the directory from which to start
           traversal.

        ascii (bool): whether to treat each file as ascii text file.

    """
    for root, dirs, files in os.walk(root_dir):
        relative_root = root[root.find(os.path.basename(root_dir)):]
        grp = root_group.require_group(relative_root)
        for fname in files:
            dset_name = os.path.basename(fname)
            file_path = os.path.join(root, fname)
            if ascii:
                try:
                    dset = write_ascii_file(grp, dset_name, file_path, **compression_opts)
                except ValueError:
                    print 'Skipping binary file', file_path
            else:
                dset = write_binary_file(grp, dset_name, file_path, **compression_opts)


[docs]class NSDFWriter(object):
    """Writer for NSDF files.

    An NSDF file has three main groups: `/model`, `/data` and `/map`.

    Attributes: 
        mode (str): File open mode. Defaults to append
            ('a'). Can be 'w' or 'w+' also.

        dialect (nsdf.dialect member): ONED for storing nonuniformly
            sampled and event data in 1D arrays.

            VLEN for storing such data in 2D VLEN datasets.

            NANPADDED for storing such data in 2D homogeneous datasets
            with NaN padding.

        model (h5.Group): /model group

        data (h5.Group): /data group

        mapping (h5.Group): /map group

        time_dim (h5.Group): /map/time group contains the sampling
            time points as dimension scales of data. It is mainly used
            for nonuniformly sampled data.

        modeltree: (h5.Group): '/model/modeltree group can be used for
            storing the model in a hierarchical manner. Each subgroup
            under `modeltree` is a model component and can contain
            other subgroups representing subcomponents. Each group
            stores the unique identifier of the model component it
            represents in the string attribute `uid`.

    """
    def __init__(self, filename, dialect=dialect.ONED, mode='a', **h5args):
        """Initialize NSDF writer.

        Args:

            filename (str): path of the file to be written.

            dialect (nsdf.dialect member): the dialect of NSDF to be
                used. Default: ONED.

            mode (str): file write mode. Default is 'a', which is also
                the default of h5py.File.

            **h5args: other keyword arguments are passed to h5py when
                  creating datasets. These can be `compression`
                  (='gzip'/'szip'/'lzf'), `compression_opts` (=0-9
                  with gzip), `fletcher32` (=True/False), `shuffle`
                  (=True/False).

        """
        self._fd = h5.File(filename, mode)
        self.timestamp = datetime.utcnow()
        self._fd.attrs['created'] = self.timestamp.isoformat()
        self._fd.attrs['nsdf_version'] = '0.1'
        self._fd.attrs['dialect'] = dialect
        self.mode = mode
        self.dialect = dialect
        self.data = self._fd.require_group('/data')
        self.model = self._fd.require_group('/model')
        self.mapping = self._fd.require_group('/map')
        self.time_dim = self.mapping.require_group('time')
        self.modeltree = self.model.require_group('modeltree')
        for stype in SAMPLING_TYPES:
            self.data.require_group(stype)
            self.mapping.require_group(stype)
        self.modelroot = ModelComponent('modeltree', uid='modeltree',
                                        hdfgroup=self.modeltree)
        self.h5args = h5args

    def __del__(self):
        self._fd.close()

[docs]    def set_properties(self, properties):
        """Set the file attributes (environments).

        Args:
            properties (dict): mapping property names to values.
                It must contain the following keyes:

                title (str)
                creator (list of str)
                software (list of str)
                method (list of str)
                description (str)
                rights (str)
                tstart (datetime.datetime)
                tend (datetime.datetime)
                contributor (list of str)
                

        Raises:
            KeyError if not all environment properties are specified in the dict.

        """
        self._fd.attrs['title'] = properties['title']
        attr = np.zeros((len(properties['creator']),), dtype=VLENSTR)
        attr[:] = properties['creator']
        self._fd.attrs['creator'] = attr                
        attr = np.zeros((len(properties['software']),), dtype=VLENSTR)
        attr[:] = properties['software']
        self._fd.attrs['software'] = attr
        attr = np.zeros((len(properties['method']),), dtype=VLENSTR)
        attr[:] = properties['method']
        self._fd.attrs['method'] = attr                
        self._fd.attrs['description'] = properties['description']
        self._fd.attrs['rights'] = properties['rights']
        self._fd.attrs['tstart'] = properties['tstart'].isoformat()
        self._fd.attrs['tend'] = properties['tend'].isoformat()
        attr = np.zeros((len(properties['contributor']),), dtype=VLENSTR)
        attr[:] = properties['contributor']
        self._fd.attrs['contributor'] = attr                
        
        

    @property
    def title(self):
        """Title of the file"""
        try:
            return self._fd.attrs['title']
        except KeyError:
            return None

    @title.setter
[docs]    def title(self, title):
        """Set the title of the file.

        Args:
            title (str): title text of the file.

        """
        self._fd.attrs['title'] = title

    @property
    def creator(self):
        return self._fd.attrs['creator']
        
    @creator.setter
    def creator(self, creator_list):
        """Set the creator (one or more authors) of the file.

        Args:
            creator_list (list of str): list of creators of the file.

        """
        attr = np.zeros((len(creator_list),), dtype=VLENSTR)
        attr[:] = creator_list
        self._fd.attrs['creator'] = attr                

    @property
    def license(self):
        """License information about the file. This is text string."""
        return self._fd.attrs['license']

    @license.setter
[docs]    def license(self, text):
        self._fd.attrs['license'] = text

    @property
    def software(self):
        """Software (one or more) used to generate the data in the file.

        """
        return self._fd.attrs['software']

    @software.setter
[docs]    def software(self, software_list):       
        """Set the software (one or more) used to generate the data in the
        file.

        Args:
            software_list (list of str): list of software that
                involved in generating the data in the file.

        """
        attr = np.zeros((len(software_list),), dtype=VLENSTR)
        attr[:] = software_list
        self._fd.attrs['software'] = attr

    @property
    def method(self):
        """(numerical) methods applied in generating the data."""
        return self._fd.attrs['method']

    @method.setter
[docs]    def method(self, method_list):
        """Set the (numerical) methods applied in generating the data.

        Args:
            method_list (list of str): names of the methods employed
                to generate the data.

        """
        attr = np.zeros((len(method_list),), dtype=VLENSTR)
        attr[:] = method_list
        self._fd.attrs['method'] = attr                

    @property
    def description(self):
        """Description of the file. A text string."""
        return self._fd.attrs['description']

    @description.setter
[docs]    def description(self, description):
        """Set the description of the file.

        Args:
            description (str): a human readable description of the
                file.

        """
        self._fd.attrs['description'] = description

    @property
    def rights(self):
        """The rights of the file contents."""
        return self._fd.attrs['rights']

    @rights.setter
[docs]    def rights(self, rights):
        """Set the rights of the file contents.

        Args:
            rights (str): text describing the rights of various
                individuals/organizations/other entities on the file
                contents.

        """
        self._fd.attrs['rights'] = rights

    @property
    def tstart(self):
        """Start time of the simulation / data recording. A string
        representation of the timestamp in ISO format

        """
        return self._fd.attrs['tstart']

    @tstart.setter    
[docs]    def tstart(self, tstart):
        """Set the start time of simulation/recording

        Args:
            tstart (datetime.datetime): start date-time of the data
                recording/simulation. 

        Note:
            We take datetime instance here because we want to ensure ISO format.

        """
        self._fd.attrs['tstart'] = tstart.isoformat()

    @property
    def tend(self):
        """End time of the simulation/recording."""
        return self._fd.attrs['tend']

    @tend.setter
[docs]    def tend(self, tend):
        """Set the end time of recording/simulation.

        Args: 
            tend (datetime.datetime): end date-time of the data
                recording or simulation.

        Note:
            We take datetime instance here because we want to ensure ISO format.

        """
        self._fd.attrs['tend'] = tend.isoformat()

    @property
    def contributor(self):
        """List of contributors to the content of this file."""
        return self._fd.attrs['contributor']

    @contributor.setter
[docs]    def contributor(self, contributor_list):
        """Set the list of contributors to the contents of the file.

        Args: 
            contributor_list (list of str): list of
                individuals/organizations/other entities who
                contributed towards the data stored in the file.

        """
        attr = np.zeros((len(contributor_list),), dtype=VLENSTR)
        attr[:] = contributor_list
        self._fd.attrs['contributor'] = attr                
        

    def _link_map_model(self, mapds):
        """Link the model to map dataset and vice versa. 

        The map dataset stores a list of references to the closest
        common ancestor of all the source components in it in the
        attribute `model`. The closest common ancestor in the model
        tree also stores a reference to this map dataset in its `map`
        attribute.

        This is an internal optimization in NSDF because given that
        every model component has an unique id and the map datasets
        store these unique ids, it is always possible to search the
        entire mdoel tree for these unique ids.
    
        Args:
            mapds: The map dataset for which the linking should be done.

        Returns:
            None

        """
        self.modelroot.update_id_path_dict()
        id_path_dict = self.modelroot.get_id_path_dict()
        if mapds.dtype.fields is None:
            idlist = mapds
        else:
            idlist = mapds['source']
            
        if len(id_path_dict) > 1:
            # there are elements other than /model/modeltree
            paths = [id_path_dict[uid] for uid in idlist]
            prefix = common_prefix(paths)[len('/modeltree/'):]
            try:
                source = self.modeltree[prefix]
                tmpattr = ([ref for ref in source.attrs.get('map', [])]
                           + [mapds.ref])
                attr = np.zeros((len(tmpattr),), dtype=REFTYPE)
                attr[:] = tmpattr
                source.attrs['map'] = attr
                tmpattr = ([ref for ref in mapds.attrs.get('map', [])]
                           + [source.ref])
                attr = np.zeros((len(tmpattr),), dtype=REFTYPE)
                attr[:] = tmpattr
                mapds.attrs['model'] = attr                
            except KeyError, error:
                print error.message
        
[docs]    def add_modeltree(self, root, target='/'):
        """Add an entire model tree. This will cause the modeltree rooted at
        `root` to be written to the NSDF file.

        Args:
            root (ModelComponent): root of the source tree.

            target (str): target node path in NSDF file with respect
                to '/model/modeltree'. `root` and its children are
                added under this group.

        """
        def write_absolute(node, rootgroup):
            """Write ModelComponent `node` at its path relative to `rootgroup`.
            """
            if node.parent is None:
                parentgroup = rootgroup
            else:
                parentpath = node.parent.path[1:] 
                parentgroup = rootgroup[parentpath]
            add_model_component(node, parentgroup)
            
        node = self.modelroot
        # Get the node corresponding to `target`, traverse by
        # splitting to avoid confusion between absolute and relative
        # paths.
        for name in target.split('/'):
            if name:
                node = node.children[name]
        node.add_child(root)
        self.modelroot.visit(write_absolute, self.model)

[docs]    def add_model_filecontents(self, filenames, ascii=True, recursive=True):
        """Add the files and directories listed in `filenames` to
        ``/model/filecontents``.
        
        This function is for storing the contents of model files in
        the NSDF file. In case of external formats like NeuroML,
        NineML, SBML and NEURON/GENESIS scripts, this function is
        useful. Each directory is stored as a group and each file is
        stored as a dataset.

        Args: 
            filenames (sequence): the paths of files and/or
                directories which contain model information.

            ascii (bool): whether the files are in ascii.

            recursive (bool): whether to recursively store
                subdirectories.

        """
        filecontents = self.model.require_group('filecontents')
        for fname in filenames:
            if os.path.isfile(fname):
                buf = bytearray(os.path.getsize(fname))
                with open(fname, 'rb') as fhandle:
                    fhandle.readinto(buf)
                components = []
                while True:
                    head, tail = os.path.split(fame)
                    if tail:
                        components.append(tail)
                    if not head:
                        break
                grp = filecontents
                for name in components[:0:-1]:
                    grp = filecontents.require_group(name)
                if ascii:
                    fdata = write_ascii_file(grp, components[-1], fname, **self.h5args)
                else:
                    fdata = write_binary_file(grp, components[-1], fname, **self.h5args)
            elif os.path.isdir(fname):
                write_dir_contents(filecontents, fname, ascii=ascii, **self.h5args)                        

[docs]    def add_uniform_ds(self, name, idlist):

        """Add the sources listed in idlist under /map/uniform.

        Args: 
            name (str): name with which the datasource list
                should be stored. This will represent a population of
                data sources.

            idlist (list of str): list of unique identifiers of the
                data sources.

        Returns: 
            An HDF5 Dataset storing the source ids. This is
            converted into a dimension scale when actual data is
            added.

        """
        if len(idlist) == 0:
            raise ValueError('idlist must be nonempty')
        base = None
        try:
            base = self.mapping[UNIFORM]
        except KeyError:
            base = self.mapping.create_group(UNIFORM)
        src_ds = base.create_dataset(name, shape=(len(idlist),),
                                 dtype=VLENSTR, data=idlist)
        self._link_map_model(src_ds)
        return src_ds

[docs]    def add_nonuniform_ds(self, popname, idlist):
        """Add the sources listed in idlist under /map/nonuniform/{popname}.

        Args: 
            popname (str): name with which the datasource list
                should be stored. This will represent a population of
                data sources.

            idlist (list of str): list of unique identifiers of the
                data sources. This becomes irrelevant if homogeneous=False.

        Returns:
            An HDF5 Dataset storing the source ids when dialect
            is VLEN or NANPADDED. This is converted into a dimension
            scale when actual data is added.

        Raises:
            AssertionError if idlist is empty or dialect is ONED.

        """
        base = None
        base = self.mapping.require_group(NONUNIFORM)
        assert self.dialect != dialect.ONED
        assert len(idlist) > 0
        src_ds = base.create_dataset(popname, shape=(len(idlist),),
                                 dtype=VLENSTR, data=idlist)
        self._link_map_model(src_ds)
        return src_ds
    
[docs]    def add_nonuniform_ds_1d(self, popname, varname, idlist):
        """Add the sources listed in idlist under
        /map/nonuniform/{popname}/{varname}.

        In case of 1D datasets, for each variable we store the mapping
        from source id to dataset ref in a two column compund dataset
        with dtype=[('source', VLENSTR), ('data', REFTYPE)]

        Args: 
            popname (str): name with which the datasource list
                should be stored. This will represent a population of
                data sources.
            
            varname (str): name of the variable beind recorded. The
                same name should be passed when actual data is being
                added.
        
            idlist (list of str): list of unique identifiers of the
                data sources.

        Returns:
            An HDF5 Dataset storing the source ids in `source` column.

        Raises:
            AssertionError if idlist is empty or if dialect is not ONED.

        """
        base = self.mapping.require_group(NONUNIFORM)
        assert self.dialect == dialect.ONED, 'valid only for dialect=ONED'
        assert len(idlist) > 0, 'idlist must be nonempty'
        grp = base.require_group(popname)
        src_ds = grp.create_dataset(varname, shape=(len(idlist),),
                                dtype=SRCDATAMAPTYPE)
        for iii in range(len(idlist)):
            src_ds[iii] = (idlist[iii], None)
        self._link_map_model(src_ds)
        return src_ds

[docs]    def add_event_ds(self, name, idlist):
        """Create a group under `/map/event` with name `name` to store mapping
        between the datasources and event data.

        Args: 
            name (str): name with which the datasource list
                should be stored. This will represent a population of
                data sources.

            idlist (list): unique ids of the data sources.

        Returns: 
            The HDF5 Group `/map/event/{name}`.

        """
        base = self.mapping.require_group(EVENT)
        assert len(idlist) > 0, 'idlist must be nonempty'
        assert ((self.dialect != dialect.ONED) and
                (self.dialect != dialect.NUREGULAR)),   \
            'only for VLEN or NANPADDED dialects'
        src_ds = base.create_dataset(name, shape=(len(idlist),),
                                 dtype=VLENSTR, data=idlist)
        self._link_map_model(src_ds)
        return src_ds

[docs]    def add_event_ds_1d(self, popname, varname, idlist):
        """Create a group under `/map/event` with name `name` to store mapping
        between the datasources and event data.

        Args: 
            popname (str): name of the group under which the
                datasource list should be stored. This will represent
                a population of data sources.

            varname (str): name of the dataset mapping source uid to
                data. This should be same as the name of the recorded
                variable.

        Returns: 
            The HDF5 Dataset `/map/event/{popname}/{varname}`.

        """
        base = self.mapping.require_group(EVENT)
        assert len(idlist) > 0, 'idlist must be nonempty'
        assert ((self.dialect == dialect.ONED) or
            (self.dialect == dialect.NUREGULAR)),   \
            'dialect must be ONED or NUREGULAR'
        grp = base.require_group(popname)
        src_ds = grp.create_dataset(varname, shape=(len(idlist),),
                                     dtype=SRCDATAMAPTYPE)
        for iii in range(len(idlist)):
            src_ds[iii] = (idlist[iii], None)
        self._link_map_model(src_ds)
        return src_ds

[docs]    def add_static_ds(self, popname, idlist):
        """Add the sources listed in idlist under /map/static.

        Args: 
            popname (str): name with which the datasource list
                should be stored. This will represent a population of
                data sources.

            idlist (list of str): list of unique identifiers of the
                data sources.

        Returns: 
            An HDF5 Dataset storing the source ids. This is
            converted into a dimension scale when actual data is
            added.

        """
        if len(idlist) == 0:
            raise ValueError('idlist must be nonempty')
        base = self.mapping.require_group(STATIC)
        src_ds = base.create_dataset(popname, shape=(len(idlist),),
                                 dtype=VLENSTR, data=idlist)
        self.modelroot.update_id_path_dict()
        self._link_map_model(src_ds)
        return src_ds        
    
[docs]    def add_uniform_data(self, source_ds, data_object, tstart=0.0,
                         fixed=False):
        """Append uniformly sampled `variable` values from `sources` to
        `data`.

        Args: 
            source_ds (HDF5 Dataset): the dataset storing the source
                ids under map. This is attached to the stored data as
                a dimension scale called `source` on the row
                dimension.

            data_object (nsdf.UniformData): Uniform dataset to be
                added to file.

            tstart (double): (optional) start time of this dataset
                recording. Defaults to 0.
            
            fixed (bool): if True, the data cannot grow. Default: False

        Returns:
            HDF5 dataset storing the data

        Raises:
            KeyError if the sources in `source_data_dict` do not match
            those in `source_ds`.
        
            ValueError if dt is not specified or <= 0 when inserting
            data for the first time.

        """
        popname = source_ds.name.rpartition('/')[-1]
        ugrp = self.data[UNIFORM].require_group(popname)
        if not match_datasets(source_ds, data_object.get_sources()):
            raise KeyError('members of `source_ds` must match sources in'
                           ' `data`.')
        ordered_data = [data_object.get_data(src) for src in source_ds]
        data = np.vstack(ordered_data)
        try:
            dataset = ugrp[data_object.name]
            oldcolcount = dataset.shape[1]
            dataset.resize(oldcolcount + data.shape[1], axis=1)
            dataset[:, oldcolcount:] = data
        except KeyError:
            if data_object.dt <= 0.0:
                raise ValueError('`dt` must be > 0.0 for creating dataset.')
            if data_object.unit is None:
                raise ValueError('`unit` is required for creating dataset.')
            if data_object.tunit is None:
                raise ValueError('`tunit` is required for creating dataset.')
            maxcol = None
            if fixed:
                maxcol = data.shape[1]
            dataset = ugrp.create_dataset(
                data_object.name,
                shape=data.shape,
                dtype=data_object.dtype,
                data=data,
                maxshape=(data.shape[0], maxcol),
                **self.h5args)
            dataset.dims.create_scale(source_ds, 'source')
            dataset.dims[0].attach_scale(source_ds)
            dataset.dims[0].label = 'source'
            dataset.attrs['tstart'] = tstart
            dataset.attrs['dt'] = data_object.dt
            dataset.attrs['field'] = data_object.field
            dataset.attrs['unit'] = data_object.unit
            dataset.attrs['tunit'] = data_object.tunit
        return dataset

[docs]    def add_nonuniform_regular(self, source_ds, data_object,
                               fixed=False):
        """Append nonuniformly sampled `variable` values from `sources` to
        `data`. In this case sampling times of all the sources are
        same and the data is stored in a 2D dataset.

        Args: 
            source_ds (HDF5 Dataset): the dataset storing the source
                ids under map. This is attached to the stored data as
                a dimension scale called `source` on the row
                dimension.
            
            data_object (nsdf.NonuniformRegularData):
                NonUniformRegular dataset to be added to file.

            fixed (bool): if True, the data cannot grow. Default: False

        Returns:
            HDF5 dataset storing the data

        Raises:
            KeyError if the sources in `data_object` do not match
            those in `source_ds`.
        
            ValueError if the data arrays are not all equal in length.

            ValueError if dt is not specified or <= 0 when inserting
            data for the first time.

        """
        popname = source_ds.name.rpartition('/')[-1]
        ngrp = self.data[NONUNIFORM].require_group(popname)
        if not match_datasets(source_ds, data_object.get_sources()):
            raise KeyError('members of `source_ds` must match sources in'
                           ' `data_object`.')
        ordered_data = [data_object.get_data(src) for src in source_ds]
        data = np.vstack(ordered_data)
        if data.shape[1] != len(data_object.get_times()):
            raise ValueError('number sampling times must be '
                             'same as the number of data points')
        try:
            dataset = ngrp[data_object.name]
            oldcolcount = dataset.shape[1]
            dataset.resize(oldcolcount + data.shape[1], axis=1)
            dataset[:, oldcolcount:] = data
        except KeyError:
            if data_object.unit is None:
                raise ValueError('`unit` is required for creating dataset.')
            if data_object.tunit is None:
                raise ValueError('`tunit` is required for creating dataset.')
            maxcol = None
            if fixed:
                maxcol = data.shape[1]
            dataset = ngrp.create_dataset(
                data_object.name, shape=data.shape,
                dtype=data.dtype,
                data=data,
                maxshape=(data.shape[0], maxcol),
                **self.h5args)
            dataset.dims.create_scale(source_ds, 'source')
            dataset.dims[0].attach_scale(source_ds)
            dataset.dims[0].label = 'source'
            dataset.attrs['field'] = data_object.field
            dataset.attrs['unit'] = data_object.unit
            tsname = '{}_{}'.format(popname, data_object.name)
            tscale = self.time_dim.create_dataset(
                tsname,
                shape=(len(data_object.get_times()),),
                dtype=np.float64,
                data=data_object.get_times(),
                **self.h5args)
            dataset.dims.create_scale(tscale, 'time')
            dataset.dims[1].attach_scale(tscale)
            dataset.dims[1].label = 'time'
            tscale.attrs['unit'] = data_object.tunit
        return dataset

[docs]    def add_nonuniform_1d(self, source_ds, data_object,
                          source_name_dict=None, fixed=False):
        """Add nonuniform data when data from each source is in a separate 1D
        dataset.

        For a population of sources called {population}, a group
        `/map/nonuniform/{population}` must be first created (using
        add_nonuniform_ds). This is passed as `source_ds` argument.
        
        When adding the data, the uid of the sources and the names for
        the corresponding datasets must be specified and this function
        will create one dataset for each source under
        `/data/nonuniform/{population}/{name}` where {name} is the
        name of the data_object, preferably the name of the field
        being recorded.
        
        This function can be used when different sources in a
        population are sampled at different time points for a field
        value. Such case may arise when each member of the population
        is simulated using a variable timestep method like CVODE and
        this timestep is not global.

        Args: 
            source_ds (HDF5 dataset): the dataset
                `/map/nonuniform/{population}/{variable}` created for
                this population of sources (created by
                add_nonunifrom_ds_1d).

            data_object (nsdf.NonuniformData): NSDFData object storing
                the data for all sources in `source_ds`.

            source_name_dict (dict): mapping from source id to dataset
                name. If None (default), the uids of the sources will
                be used as dataset names. If the uids are not
                compatible with HDF5 names (contain '.' or '/'), then
                the index of the source in source_ds will be used.

            fixed (bool): if True, the data cannot grow. Default:
                False

        Returns:
            dict mapping source ids to the tuple (dataset, time).

        Raises:
            AssertionError when dialect is not ONED.

        """
        assert self.dialect == dialect.ONED, \
            'add 1D dataset under nonuniform only for dialect=ONED'
        if source_name_dict is None:
            names = np.asarray(source_ds['source'], dtype=str)
            if np.any((np.char.find(names, '/') >= 0) |
                      (np.char.find(names, '.') >= 0)):
                names = [str(index) for index in range(len(names))]
            source_name_dict = dict(zip(source_ds['source'], names))

        assert len(set(source_name_dict.values())) == len(source_ds), \
            'The names in `source_name_dict` must be unique'        
        popname = source_ds.name.split('/')[-2]
        ngrp = self.data[NONUNIFORM].require_group(popname)
        assert match_datasets(source_name_dict.keys(),
                              data_object.get_sources()), \
               'sources in `source_name_dict`'    \
               ' do not match those in `data_object`'
        assert match_datasets(source_ds['source'],
                              source_name_dict.keys()),  \
            'sources in mapping dataset do not match those with data'
        datagrp = ngrp.require_group(data_object.name)
        datagrp.attrs['source'] = source_ds.ref
        datagrp.attrs['unit'] = data_object.unit
        datagrp.attrs['field'] = data_object.field
        ret = {}
        for iii, source in enumerate(source_ds['source']):
            data, time = data_object.get_data(source)
            dsetname = source_name_dict[source]
            timescale = None
            try:
                dset = datagrp[dsetname]
                oldlen = dset.shape[0]
                timescale = dset.dims[0]['time']
                dset.resize((oldlen + len(data),))
                dset[oldlen:] = data
                timescale.resize((oldlen + len(data),))
                timescale[oldlen:] = time
            except KeyError:
                if data_object.unit is None:
                    raise ValueError('`unit` is required'
                                     ' for creating dataset.')
                if data_object.tunit is None:
                    raise ValueError('`tunit` is required'
                                     ' for creating dataset.')
                maxcol = len(data) if fixed else None
                dset = datagrp.create_dataset(
                    dsetname,
                    shape=(len(data),),
                    dtype=data_object.dtype,
                    data=data,
                    maxshape=(maxcol,),
                    **self.h5args)
                dset.attrs['unit'] = data_object.unit
                dset.attrs['field'] = data_object.field
                dset.attrs['source'] = source
                source_ds[iii] = (source, dset.ref)
                # Using {popname}_{variablename}_{dsetname} for
                # simplicity. What about creating a hierarchy?
                tsname = '{}_{}_{}'.format(popname, data_object.name, dsetname)
                timescale = self.time_dim.create_dataset(
                    tsname,
                    shape=(len(data),),
                    dtype=data_object.ttype,
                    data=time,
                    maxshape=(maxcol,),
                    **self.h5args)
                dset.dims.create_scale(timescale, 'time')
                dset.dims[0].label = 'time'
                dset.dims[0].attach_scale(timescale)
                timescale.attrs['unit'] = data_object.tunit
            ret[source] = (dset, timescale)
        return ret
    
[docs]    def add_nonuniform_vlen(self, source_ds, data_object,
                                fixed=False):
        """Add nonuniform data when data from all sources in a population is
        stored in a 2D ragged array.

        When adding the data, the uid of the sources and the names for
        the corresponding datasets must be specified and this function
        will create the dataset `/data/nonuniform/{population}/{name}`
        where {name} is the first argument, preferably the name of the
        field being recorded.
        
        This function can be used when different sources in a
        population are sampled at different time points for a field
        value. Such case may arise when each member of the population
        is simulated using a variable timestep method like CVODE and
        this timestep is not global.

        Args: 
            source_ds (HDF5 dataset): the dataset under
                `/map/nonuniform` created for this population of
                sources (created by add_nonunifrom_ds).

            data_object (nsdf.NonuniformData): NSDFData object storing
                the data for all sources in `source_ds`.

            fixed (bool): if True, this is a one-time write and the
                data cannot grow. Default: False

        Returns:
            tuple containing HDF5 Datasets for the data and sampling
            times.

        TODO: 
            Concatenating old data with new data and reassigning is a poor
            choice. waiting for response from h5py mailing list about
            appending data to rows of vlen datasets. If that is not
            possible, vlen dataset is a technically poor choice.

            h5py does not support vlen datasets with float64
            elements. Change dtype to np.float64 once that is
            developed.

        """
        if self.dialect != dialect.VLEN:
            raise Exception('add 2D vlen dataset under nonuniform'
                            ' only for dialect=VLEN')
        popname = source_ds.name.rpartition('/')[-1]
        ngrp = self.data[NONUNIFORM].require_group(popname)
        if not match_datasets(source_ds, data_object.get_sources()):
            raise KeyError('members of `source_ds` must match keys of'
                           ' `source_data_dict`.')
        # Using {popname}_{variablename} for simplicity. What
        # about creating a hierarchy?
        tsname = '{}_{}'.format(popname, data_object.name)
        try:
            dataset = ngrp[data_object.name]
            time_ds = self.time_dim[tsname]
        except KeyError:
            if data_object.unit is None:
                raise ValueError('`unit` is required for creating dataset.')
            if data_object.tunit is None:
                raise ValueError('`tunit` is required for creating dataset.')
            vlentype = h5.special_dtype(vlen=data_object.dtype)
            maxrows = source_ds.shape[0] if fixed else None
            # Fix me: is there any point of keeping the compression
            # and shuffle options?
            dataset = ngrp.create_dataset(
                data_object.name,
                shape=source_ds.shape,
                dtype=vlentype,
                **self.h5args)
            dataset.attrs['field'] = data_object.field
            dataset.attrs['unit'] = data_object.unit
            dataset.dims.create_scale(source_ds, 'source')
            dataset.dims[0].attach_scale(source_ds)
            dataset.dims[0].label = 'source'
            # FIXME: VLENFLOAT should be made VLENDOUBLE whenever h5py
            # fixes it
            time_ds = self.time_dim.create_dataset(
                tsname,
                shape=dataset.shape,
                maxshape=(maxrows,),
                dtype=VLENFLOAT,
                **self.h5args)
            dataset.dims.create_scale(time_ds, 'time')
            dataset.dims[0].attach_scale(time_ds)
            dataset.dims[0].label = 'time'            
            time_ds.attrs['unit'] = data_object.tunit
        for iii, source in enumerate(source_ds):
            data, time, = data_object.get_data(source)
            dataset[iii] = np.concatenate((dataset[iii], data))
            time_ds[iii] = np.concatenate((time_ds[iii], time))
        return dataset, time_ds

[docs]    def add_nonuniform_nan(self, source_ds, data_object, fixed=False):
        """Add nonuniform data when data from all sources in a population is
        stored in a 2D array with NaN padding.

        Args: 
            source_ds (HDF5 Dataset): the dataset under
                `/map/event` created for this population of
                sources (created by add_nonunifrom_ds).

            data_object (nsdf.EventData): NSDFData object storing
                the data for all sources in `source_ds`.

            fixed (bool): if True, this is a one-time write and the
                data cannot grow. Default: False

        Returns:
            HDF5 Dataset containing the data.

        Notes: 
            Concatenating old data with new data and reassigning is a
            poor choice for saving data incrementally. HDF5 does not
            seem to support appending data to VLEN datasets.

            h5py does not support vlen datasets with float64
            elements. Change dtype to np.float64 once that is
            developed.

        """
        assert self.dialect == dialect.NANPADDED,    \
            'add 2D dataset under `nonuniform` only for dialect=NANPADDED'
        popname = source_ds.name.rpartition('/')[-1]
        ngrp = self.data[NONUNIFORM].require_group(popname)
        if not match_datasets(source_ds, data_object.get_sources()):
            raise KeyError('members of `source_ds` must match sources '
                           'in `data_object`.')
        # Using {popname}_{variablename} for simplicity. What
        # about creating a hierarchy?
        tsname = '{}_{}'.format(popname, data_object.name)
        cols = [len(data_object.get_data(source)[0]) for source in
                source_ds]
        starts = np.zeros(source_ds.shape[0], dtype=int)
        ends = np.asarray(cols, dtype=int)
        try:
            dataset = ngrp[data_object.name]
            for iii in range(source_ds.shape[0]):
                try:
                    starts[iii] = next(find(dataset[iii], np.isnan))[0][0]
                except StopIteration:
                    starts[iii] = len(dataset[iii])
                ends[iii] = starts[iii] + cols[iii]
            dataset.resize(max(ends), 1)            
            time_ds = self.time_dim[tsname]
            time_ds.resize(max(ends), 1)
        except KeyError:
            if data_object.unit is None:
                raise ValueError('`unit` is required for creating dataset.')
            if data_object.tunit is None:
                raise ValueError('`tunit` is required for creating dataset.')
            
            maxrows = len(source_ds) if fixed else None
            maxcols = max(cols) if fixed else None
            dataset = ngrp.create_dataset(
                data_object.name,
                shape=(source_ds.shape[0], max(ends)),
                maxshape=(maxrows, maxcols),
                fillvalue=np.nan,
                dtype=data_object.dtype,
                **self.h5args)
            dataset.attrs['field'] = data_object.field
            dataset.attrs['unit'] = data_object.unit
            dataset.dims.create_scale(source_ds, 'source')
            dataset.dims[0].attach_scale(source_ds)
            dataset.dims[0].label = 'source'
            time_ds = self.time_dim.create_dataset(
                tsname,
                shape=dataset.shape,
                maxshape=(maxrows,maxcols),
                dtype=data_object.ttype,
                fillvalue=np.nan,
                **self.h5args)
            dataset.dims.create_scale(time_ds, 'time')
            dataset.dims[1].attach_scale(time_ds)
            dataset.dims[1].label = 'time'            
            time_ds.attrs['unit'] = data_object.tunit
        for iii, source in enumerate(source_ds):
            data, time = data_object.get_data(source)
            dataset[iii, starts[iii]:ends[iii]] = data
            time_ds[iii, starts[iii]:ends[iii]] = time
        return dataset


[docs]    def add_event_1d(self, source_ds, data_object, source_name_dict=None,
                     fixed=False):
        """Add event time data when data from each source is in a separate 1D
        dataset.

        For a population of sources called {population}, a group
        `/map/event/{population}` must be first created (using
        add_event_ds). This is passed as `source_ds` argument.
        
        When adding the data, the uid of the sources and the names for
        the corresponding datasets must be specified in
        `source_name_dict` and this function will create one dataset
        for each source under `/data/event/{population}/{name}` where
        {name} is the name of the data_object, preferably the field
        name.
        
        Args: 
            source_ds (HDF5 Dataset): the dataset
                `/map/event/{populationname}{variablename}` created
                for this population of sources (created by
                add_event_ds_1d). The name of this group reflects
                that of the group under `/data/event` which stores the
                datasets.

            data_object (nsdf.EventData): NSDFData object storing
                the data for all sources in `source_ds`.

            source_name_dict (dict): mapping from source id to dataset
                name. If None (default) it tries to use the uids in
                the source_ds. If the uids do not fit the hdf5 naming
                convention, the index of the entries in source_ds will
                be used.

            fixed (bool): if True, the data cannot grow. Default:
                False

        Returns:
            dict mapping source ids to datasets.

        """
        assert ((self.dialect == dialect.ONED) or
            self.dialect == dialect.NUREGULAR), \
            'add 1D dataset under event only for dialect=ONED or NUREGULAR'
        if source_name_dict is None:
            names = np.asarray(source_ds['source'], dtype=str)
            if np.any((np.char.find(names, '/') >= 0) |
                      (np.char.find(names, '.') >= 0)):
                names = [str(index) for index in range(len(names))]
            source_name_dict = dict(zip(source_ds['source'], names))
        assert len(set(source_name_dict.values())) == len(source_ds), \
            'The names in `source_name_dict` must be unique'
        popname = source_ds.name.split('/')[-2]
        ngrp = self.data[EVENT].require_group(popname)
        assert match_datasets(source_name_dict.keys(),
                              data_object.get_sources()),  \
            'number of sources do not match number of datasets'
        datagrp = ngrp.require_group(data_object.name)
        datagrp.attrs['source'] = source_ds.ref
        datagrp.attrs['unit'] = data_object.unit
        datagrp.attrs['field'] = data_object.field
        ret = {}
        for iii, source in enumerate(source_ds['source']):
            data = data_object.get_data(source)
            dsetname = source_name_dict[source]
            try:
                dset = datagrp[dsetname]
                oldlen = dset.shape[0]
                dset.resize((oldlen + len(data),))
                dset[oldlen:] = data
            except KeyError:
                if data_object.unit is None:
                    raise ValueError('`unit` is required for creating dataset.')
                if data_object.field is None:
                    raise ValueError('`field` is required for creating dataset.')
                maxrows = len(data) if fixed else None
                dset = datagrp.create_dataset(
                    dsetname,
                    shape=(len(data),),
                    dtype=data_object.dtype, data=data,
                    maxshape=(maxrows,),
                    **self.h5args)
                dset.attrs['unit'] = data_object.unit
                dset.attrs['field'] = data_object.field
                dset.attrs['source'] = source
                source_ds[iii] = (source, dset.ref)
            ret[source] = dset
        return ret
    
[docs]    def add_event_vlen(self, source_ds, data_object, fixed=False):
        """Add event data when data from all sources in a population is
        stored in a 2D ragged array.

        When adding the data, the uid of the sources and the names for
        the corresponding datasets must be specified and this function
        will create the dataset `/data/event/{population}/{name}`
        where {name} is name of the data_object, preferably the name
        of the field being recorded.
        
        Args: 
            source_ds (HDF5 Dataset): the dataset under
                `/map/event` created for this population of
                sources (created by add_nonunifrom_ds).

            data_object (nsdf.EventData): NSDFData object storing
                the data for all sources in `source_ds`.

            fixed (bool): if True, this is a one-time write and the
                data cannot grow. Default: False

        Returns:
            HDF5 Dataset containing the data.

        Notes: 
            Concatenating old data with new data and reassigning is a
            poor choice for saving data incrementally. HDF5 does not
            seem to support appending data to VLEN datasets.

            h5py does not support vlen datasets with float64
            elements. Change dtype to np.float64 once that is
            developed.

        """
        if self.dialect != dialect.VLEN:
            raise Exception('add 2D vlen dataset under event'
                            ' only for dialect=VLEN')
        popname = source_ds.name.rpartition('/')[-1]
        ngrp = self.data[EVENT].require_group(popname)
        if not match_datasets(source_ds, data_object.get_sources()):
            raise KeyError('members of `source_ds` must match sources '
                           'in `data_object`.')        
        try:
            dataset = ngrp[data_object.name]
        except KeyError:
            if data_object.unit is None:
                raise ValueError('`unit` is required for creating dataset.')
            vlentype = h5.special_dtype(vlen=data_object.dtype)
            maxrows = len(source_ds) if fixed else None
            # Fix me: is there any point of keeping the compression
            # and shuffle options?
            dataset = ngrp.create_dataset(
                data_object.name, shape=source_ds.shape,
                maxshape=(maxrows,),
                dtype=vlentype,
                **self.h5args)
            dataset.attrs['field'] = data_object.field
            dataset.attrs['unit'] = data_object.unit
            dataset.dims.create_scale(source_ds, 'source')
            dataset.dims[0].attach_scale(source_ds)
            dataset.dims[0].label = 'source'            
        for iii, source in enumerate(source_ds):
            data = data_object.get_data(source)
            dataset[iii] = np.concatenate((dataset[iii], data))
        return dataset

[docs]    def add_event_nan(self, source_ds, data_object, fixed=False):
        """Add event data when data from all sources in a population is
        stored in a 2D array with NaN padding.

        Args: 
            source_ds (HDF5 Dataset): the dataset under
                `/map/event` created for this population of
                sources (created by add_nonunifrom_ds).

            data_object (nsdf.EventData): NSDFData object storing
                the data for all sources in `source_ds`.

            fixed (bool): if True, this is a one-time write and the
                data cannot grow. Default: False

        Returns:
            HDF5 Dataset containing the data.

        """
        assert self.dialect == dialect.NANPADDED,    \
            'add 2D vlen dataset under event only for dialect=NANPADDED'
        popname = source_ds.name.rpartition('/')[-1]
        ngrp = self.data[EVENT].require_group(popname)
        if not match_datasets(source_ds, data_object.get_sources()):
            raise KeyError('members of `source_ds` must match sources '
                           'in `data_object`.')
        cols = [len(data_object.get_data(source)) for source in
                source_ds]
        starts = np.zeros(source_ds.shape[0], dtype=int)
        ends = np.asarray(cols, dtype=int)
        try:
            dataset = ngrp[data_object.name]
            for iii in range(dataset.shape[0]):
                try:
                    starts[iii] = next(find(dataset[iii], np.isnan))[0][0]
                except StopIteration:
                    starts[iii] = len(dataset[iii])
                ends[iii] = starts[iii] + cols[iii]
            dataset.resize(max(ends), 1)            
        except KeyError:
            if data_object.unit is None:
                raise ValueError('`unit` is required for creating dataset.')
            maxrows = len(source_ds) if fixed else None
            maxcols = max(ends) if fixed else None
            dataset = ngrp.create_dataset(
                data_object.name,
                shape=(source_ds.shape[0], max(ends)),
                maxshape=(maxrows, maxcols),
                dtype=data_object.dtype,
                fillvalue=np.nan,
                **self.h5args)
            dataset.attrs['field'] = data_object.field
            dataset.attrs['unit'] = data_object.unit
            dataset.dims.create_scale(source_ds, 'source')
            dataset.dims[0].attach_scale(source_ds)
            dataset.dims[0].label = 'source'            
        for iii, source in enumerate(source_ds):
            data = data_object.get_data(source)
            dataset[iii, starts[iii]:ends[iii]] = data
        return dataset
    
[docs]    def add_static_data(self, source_ds, data_object,
                        fixed=True):
        """Append static data `variable` values from `sources` to `data`.

        Args: 
           source_ds (HDF5 Dataset): the dataset storing the source
                ids under map. This is attached to the stored data as
                a dimension scale called `source` on the row
                dimension.

            data_object (nsdf.EventData): NSDFData object storing
                the data for all sources in `source_ds`.
            
            fixed (bool): if True, the data cannot grow. Default: True

        Returns:
            HDF5 dataset storing the data

        Raises:
            KeyError if the sources in `source_data_dict` do not match
            those in `source_ds`.
        
        """
        popname = source_ds.name.rpartition('/')[-1]
        ugrp = self.data[STATIC].require_group(popname)
        if not match_datasets(source_ds, data_object.get_sources()):
            raise KeyError('members of `source_ds` must match keys of'
                           ' `source_data_dict`.')
        ordered_data = [data_object.get_data( src) for src in    \
                        source_ds]
        data = np.vstack(ordered_data)
        try:
            dataset = ugrp[data_object.name]
            oldcolcount = dataset.shape[1]
            dataset.resize(oldcolcount + data.shape[1], axis=1)
            dataset[:, oldcolcount:] = data
        except KeyError:
            if data_object.unit is None:
                raise ValueError('`unit` is required for creating dataset.')
            maxcol = None
            if fixed:
                maxcol = data.shape[1]
            dataset = ugrp.create_dataset(
                data_object.name, shape=data.shape,
                dtype=data_object.dtype,
                data=data,
                maxshape=(data.shape[0], maxcol),
                **self.h5args)
            dataset.dims.create_scale(source_ds, 'source')
            dataset.dims[0].attach_scale(source_ds)
            dataset.dims[0].label = 'source'                        
            dataset.attrs['field'] = data_object.field
            dataset.attrs['unit'] = data_object.unit
        return dataset

    
# 
# nsdfwriter.py ends here