Source code for versuchung.types

# This file is part of versuchung.
#
# versuchung is free software: you can redistribute it and/or modify it under the
# terms of the GNU General Public License as published by the Free Software
# Foundation, either version 3 of the License, or (at your option) any later
# version.
#
# versuchung is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
# PARTICULAR PURPOSE.  See the GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along with
# versuchung.  If not, see <http://www.gnu.org/licenses/>.

from __future__ import print_function

import os
import csv
try:
    from cStringIO import StringIO
except ImportError:
    from io import StringIO
from argparse import ArgumentParser
import copy
import glob

class SubObjects(dict):
    def __init__(self, type_object):
        dict.__init__(self)
        self.parent = type_object

    def __setitem__(self, key, value):
        assert not key in self or self[key] == value, "Duplicated object name: %s = %s" % (key, value)
        dict.__setitem__(self, key, value)
        value.parent_object = self.parent
        self.update()

    def update(self):
        if not "parent" in dir(self) and len(self) > 0:
            print("You probably used python multiprocessing, this might break horrible")
            return

        for name, obj in self.items():
            if self.parent.name != None:
                obj.name = "%s-%s" % (self.parent.name, name)
            else:
                obj.name = name
            obj.static_experiment  = self.parent.static_experiment
            obj.dynamic_experiment = self.parent.dynamic_experiment


class Type(object):
    static_experiment = None
    """A reference to the static enclosing experiment: where the type was defined in"""

    dynamic_experiment = None
    """A reference to the currently running experiment"""

    subobjects = None
    """A Type.Subobjects instance that collects all Types that are
       used by this type. Subordinate types"""

    parent_object = None
    """A Type instance that is the parent of this object"""

    parameter_type = None

    def __init__(self):
        # We gather a list of objects that are used by us.
        self.subobjects = SubObjects(self)
        self.__name = None

    def before_experiment_run(self, parameter_type):
        self.parameter_type = parameter_type
        self.subobjects.update()
        for subobj in self.subobjects.values():
            subobj.before_experiment_run(parameter_type)

    def after_experiment_run(self, parameter_type):
        for subobj in self.subobjects.values():
            subobj.after_experiment_run(parameter_type)

    ################################################################
    # Accessors
    ################################################################
    """This is the base type for all input and output parameters"""
    @property
    def name(self):
        return self.__name
    @name.setter
    def name(self, name):
        self.__name = name

    def path_to_root_object(self):
        """Returns all parent objects"""
        ret = []
        p = self
        while p.parent_object != None:
            ret.append(p)
            p = p.parent_object
        return list(reversed(ret))

    @property
    def value(self):
        """Default accessor for this kind of data"""
        raise NotImplemented

    @property
    def base_directory(self):
        """The base directory of a type is always the base directory
        of the (statically) enclosing experiment instance. The
        Directory has the form <ExperimentName>-<HASH>"""
        if not self.static_experiment:
            return None
        return self.static_experiment.base_directory

    @property
    def tmp_directory(self):
        """A temporary directory, which can be used during experiment
        execution. The tmp_directory is deduced through the dynamic
        experiment reference"""
        assert self.dynamic_experiment, "Type is not used part of a running experiment"
        return self.dynamic_experiment.tmp_directory

    def __repr__(self, value=None):
        if value:
            return "<%s %s '%s'>" %(self.__class__.__name__, self.__name, value)
        return "<%s %s>" %(self.__class__.__name__, self.__name)



class InputParameter:
    is_restartable = False

    def __init__(self):
        pass
    def inp_setup_cmdline_parser(self, parser):
        raise NotImplemented
    def inp_extract_cmdline_parser(self, opts, args):
        raise NotImplemented

    def __parser_option(self, option = None):
        if option:
            return self.name + "-" + option
        return self.name

    def was_given(self):
        """Checks if an optional parameter was given"""
        if not hasattr(self, "optional_parameter_given"):
            return True
        if self.optional_parameter_given:
            return True
        return False

    def inp_parser_add(self, parser, option, default, **kwargs):
        option = self.__parser_option(option)
        kw = {
            "dest": option,
            }
        if not hasattr(self, "optional_parameter_given"):
            kw["default"] = default
            kw["help"]    = "(default: %s)" % default

        kw.update(kwargs)
        parser.add_argument(f'--{option}', **kw)

    def inp_parser_extract(self, opts, option):
        a = getattr(opts, self.__parser_option(option), None)
        if a != None and hasattr(self, "optional_parameter_given"):
            self.optional_parameter_given = True
        return a

    def inp_metadata(self):
        return {}


def Optional(input_parameter):
    """Makes an input parameter optional. input_parameter.was_given()
    checks if the parameter was given on the command line."""
    if not isinstance(input_parameter, InputParameter):
        raise RuntimeError("Optional() can only be used with input parameters")
    input_parameter.optional_parameter_given = False
    return input_parameter


class OutputParameter:
    def __init__(self):
        pass


[docs]class String(InputParameter, Type):
    """Can be used as: **input parameter**

    A String is the most simple input parameter."""

    def __init__(self, default_value=""):
        InputParameter.__init__(self)
        Type.__init__(self)
        self.__value = default_value

    def __reinit__(self, value):
        self.__value = value

    def inp_setup_cmdline_parser(self, parser):
        self.inp_parser_add(parser, None, self.__value)

    def inp_extract_cmdline_parser(self, opts, args):
        self.__value = self.inp_parser_extract(opts, None)

    def inp_metadata(self):
        return {self.name: self.value}

    def __str__(self):
        return str(self.value)

    def __repr__(self):
        return Type.__repr__(self, self.__value)

    @property
    def value(self):
        """The value of the string. This is either the default value
        or the parameter given on the command line"""
        return self.__value


[docs]class Bool(InputParameter, Type):
    """Can be used as: **input parameter**

    A boolean flag parameter (will accept "yes" and "no" on the command line."""

    def __init__(self, default_value=False):
        InputParameter.__init__(self)
        Type.__init__(self)
        self.__value = default_value

    def __reinit__(self, value):
        self.__value = value

    def inp_setup_cmdline_parser(self, parser):
        self.inp_parser_add(parser, None, self.__value)
    def inp_extract_cmdline_parser(self, opts, args):
        yes_values = ("yes", "y", "true", "1")
        no_values  = ("no", "n", "false", "0")
        self.__value = self.inp_parser_extract(opts, None)
        if type(self.value) == str and self.__value.lower() in yes_values:
            self.__value = True
        elif type(self.value) == str and self.__value.lower() in no_values:
            self.__value = False
        elif type(self.__value) == bool:
            pass
        else:
            raise RuntimeError("Wrong parameter for Bool() argument (%s = %s), possible values are %s, %s" %\
                               (self.name, self.__value, yes_values, no_values))

    def inp_metadata(self):
        return {self.name: self.value}

    def __str__(self):
        return str(self.value)

    @property
    def value(self):
        """The value of the bool. This is either the default value
        or the parameter given on the command line"""
        return self.__value

[docs]class Integer(InputParameter, Type):
    """Can be used as: **input parameter**

    A integer flag argument (will accept a number on the command line."""

    def __init__(self, default_value = 0):
        InputParameter.__init__(self)
        Type.__init__(self)
        self.__value = default_value

    def __reinit__(self, value):
        self.__value = value


    def inp_setup_cmdline_parser(self, parser):
        self.inp_parser_add(parser, None, self.__value)
    def inp_extract_cmdline_parser(self, opts, args):
        self.__value = self.inp_parser_extract(opts, None)
        if type(self.__value) == int:
            pass
        else:
            try:
                self.__value = int(self.__value)
            except:
                raise RuntimeError("Wrong parameter for Bool() argument (%s)" % self.__value)

    def inp_metadata(self):
        return {self.name: self.value}

    def __str__(self):
        return str(self.value)

    @property
    def value(self):
        """The value of the integer. This is either the default value
        or the parameter given on the command line"""
        return self.__value



[docs]class List(InputParameter, Type, list):
    """Can be used as: **input parameter**

    Sometimes there is the need to give a variable length of other
    **input types** as argument to an experiment. Of course here the
    command line parsing is somewhat more difficult, because the
    argument count isn't determined in before.

    The *datatype* argument is the type of the input parameter which
    should be collected::

       inputs = { "strings": List(String) }

    The default_value must be a list of compatible instances. List list
    will be used, if no arguments are given. If any argument of this
    type on the command line is given, the default_value will not be
    used::

      inputs = { "strings": List(String, default_value=[String("abc")]) }

    On the command line the List parameter can be given multiple
    times. These will be collected, if you want collect the strings
    ``["abc", "foobar", "Hallo Welt"]`` you can use the following
    parameters on the command line::

        --strings abc --strings foobar --strings "Hallo Welt"

    .. note::

       mention that the list members will appear as separate fields in
       the metadata. all start with the name of the input, and have a
       running number -%d appended.

    More complicated is the situation, when the subtype takes more
    than one command-line argument. There you can replace the name
    prefix with a colon. For example if you want to give a list of two
    :class:`~versuchung.archives.GitArchive` instances use the input
    definition ``"git": List(GitArchive)`` together with the command line::

       --git ":clone-url /path/to/git1" --git ":clone-url /path/to/git2"

    .. note:: Be aware of the quotation marks here!

    In the experiment the input parameter behaves like a list (it
    inherits from ``list``), so it is really easy to iterate over it::

      for string in self.inputs.strings:
          print(string.value)

      for git in self.inputs.git:
          # Clone all given Git Archives
          print(git.path)
    """

    def __init__(self, datatype, default_value=[]):
        InputParameter.__init__(self)
        Type.__init__(self)
        list.__init__(self, default_value)
        if type(datatype) != type:
            datatype = type(datatype)
        self.datatype = datatype
        self.__command_line_parsed = False

    def __reinit__(self, values):
        if hasattr(self.datatype, "__reinit__"):
            self[:] = []
            self.subobjects.clear()
            for item in values:
                # Intatiate Datatype
                item = self.datatype(item)
                self.subobjects["%d" % len(self)] = item
                self.append(item)



    def inp_setup_cmdline_parser(self, parser):
        self.inp_parser_add(parser, None, [], action="append",
                            help = "List parameter for type %s" %
                            self.datatype.__name__)


    def before_experiment_run(self, parameter_type):
        for idx, value in enumerate(self):
            self.subobjects[str(idx)] = value

        Type.before_experiment_run(self,parameter_type)

    def inp_extract_cmdline_parser(self, opts, args):
        args = self.inp_parser_extract(opts, None)
        if not args:
            return

        # Remove default values
        self[:] = []
        self.subobjects.clear()

        while len(args) > 0:
            arg = args.pop(0)
            if hasattr(self.datatype, "path") and not os.path.exists(arg):
                args = glob.glob(arg) + args
                # Remove duplicated items caused by symlinks
                args = list(set([os.path.realpath(x) for x in args]))
                continue
            # Create Subtype and initialize its parser
            subtype = self.datatype()
            self.subobjects["%d" % len(self)] = subtype
            subtype_parser = ArgumentParser()
            subtype.inp_setup_cmdline_parser(subtype_parser)

            sub_args = ["--" + subtype.name, arg]

            opts = subtype_parser.parse_args(sub_args)
            subtype.inp_extract_cmdline_parser(opts, sub_args)

            self.append(subtype)

    def inp_metadata(self):
        metadata = {self.name: []}
        for idx, item in enumerate(self):
            m = item.inp_metadata()
            metadata[self.name].append(m["%s-%d" % (self.name, idx)])
            metadata.update(m)
        return metadata

    @property
    def value(self):
        """Returns the object (which behaves like a list) itself. This
           is only implemented for a coherent API."""
        return self

    def __repr__(self):
        return Type.__repr__(self, list.__repr__(self))