Exemplo n.º 1
0
def ComputeAutoCorrelation(obj, *args, **kargs):
    """
    ComputeAutoCorrelation
    """
    error.CheckType([obj], [[
        _VariableOrderMarkov, _HiddenVariableOrderMarkov,
        _VariableOrderMarkovData
    ]])

    error.CheckArgumentsLength(args, 1, 2)

    if len(args) == 1:
        variable = 1
        value = args[0]
    elif len(args) == 2:
        variable = args[0]
        value = args[1]

    #_check_nb_variable(obj, variable)
    max_lag = error.ParseKargs(kargs, "MaxLag", MAX_LAG)

    error.CheckType([variable, value, max_lag], [int, int, int])

    if len(args) == 1:
        return obj.state_autocorrelation_computation(value, max_lag)
    elif len(args) == 2:
        return obj.output_autocorrelation_computation(variable, value, max_lag)
Exemplo n.º 2
0
def Split(obj, step):
    """.. todo:: documentaiton

    input markovian
    """
    error.CheckType([obj], [[_MarkovianSequences, _VariableOrderMarkovData,
                            _SemiMarkovData, _NonHomogeneousMarkovData]])
    error.CheckType([step], [int])

    return obj.split(step)
Exemplo n.º 3
0
def _estimate_hidden_variable_order_markov(obj, *args, **kargs):
    """
    Estimate switch hidden_variable_order_markov
    """
    from openalea.sequence_analysis._sequence_analysis import \
        MIN_NB_STATE_SEQUENCE, \
        MAX_NB_STATE_SEQUENCE, \
        NB_STATE_SEQUENCE_PARAMETER
    from openalea.stat_tool._stat_tool import \
        FORWARD, \
        FORWARD_BACKWARD_SAMPLING, \
        FORWARD_DYNAMIC_PROGRAMMING

    GlobalInitialTransition = kargs.get("GlobalInitialTransition", True)
    CommonDispersion = kargs.get("CommonDispersion", False)
    NbIteration = kargs.get("NbIteration", 80)
    Counting = kargs.get("Counting", True)
    StateSequence = kargs.get("StateSequence", True)
    Parameter = kargs.get("Parameter", NB_STATE_SEQUENCE_PARAMETER)
    MinNbSequence = kargs.get("MinNbStateSequence", MIN_NB_STATE_SEQUENCE)
    MaxNbSequence = kargs.get("MaxNbStateSequence", MAX_NB_STATE_SEQUENCE)
    Algorithm = error.ParseKargs(kargs, "Algorithm", 'EM', \
                                 sub_markovian_algorithms)

    error.CheckType([CommonDispersion, Counting, GlobalInitialTransition, NbIteration,
                     MinNbSequence, MaxNbSequence, Parameter, StateSequence],
                     [bool, bool, bool, int, int, int, [int, float], bool])

    error.CheckType([args[0]], [_HiddenVariableOrderMarkov])

    # sanity check on arguments
    # this one can be check only when Chain will be public and exported in
    # export_variable_order_markov
    # if type == 'e' and kargs.get("GlobalInitialTransition")" raise Error
    if Algorithm != sub_markovian_algorithms["MCEM"]:
        options = ["Parameter", "MaxNbStateSequence", "MinNbStateSequence"]
        for option in options:
            if kargs.get(option):
                raise ValueError("If % is provided, Algorithm cannot be MCEM" %
                                 option)

    if Algorithm == FORWARD:
        hmarkov = obj.hidden_variable_order_markov_estimation(
                args[0], GlobalInitialTransition, CommonDispersion,
                Counting, StateSequence, NbIteration)

    elif Algorithm == FORWARD_BACKWARD_SAMPLING:
        hmarkov = obj.hidden_variable_order_markov_stochastic_estimation(
                        args[0], GlobalInitialTransition, CommonDispersion,
                        MinNbSequence, MaxNbSequence, Parameter, Counting,
                        StateSequence, NbIteration)
    else:
        print(Algorithm)
    return hmarkov
Exemplo n.º 4
0
def SaveMTG(obj, Filename=None, Type=None):
    """
    Save sequence in MTG format.
    :param Type : list of "S" or "N" characters for Symbolic or Numeric
    """
    from openalea.stat_tool.enums import sub_variable_type

    error.CheckType([obj], [ms_vomd_smd_nhmd])
    error.CheckType([Filename, Type], [str, list])

    type = []
    for pstr in Type:
        type.append(sub_variable_type[pstr])

    obj.mtg_write(Filename, type)
Exemplo n.º 5
0
def ComputeWhiteNoiseCorrelation(obj, itype):
    """ComputeWhiteNoiseCorrelation

    Computation of the sample autocorrelation or cross-correlation function induced on a white noise sequence by filtering.

    :Usage:

    .. doctest::
        :options: +SKIP
        
        >>> ComputeWhiteNoiseAutoCorrelation(cf, order)
        >>> ComputeWhiteNoiseAutoCorrelation(cf, filter)
        >>> ComputeWhiteNoiseAutoCorrelation(cf, frequencies)
        >>> ComputeWhiteNoiseAutoCorrelation(cf, dist)

    :Arguments:

    * cf (correlation): sample autocorrelation or cross-correlation function (in the Pearsons sense),
    * order (int): order of differencing,
    * filter (array(real)): filter values on a half width i.e. from one extremity to the central value (with the constraint that filteri + filterm = 1),
    * frequencies (array(int)): frequencies defining the filter,
    * dist (distribution, mixture, convolution, compound): symmetric distribution whose size of the support is even defining the filter (for instance Distribution("BINOMIAL", 0, 4, 0.5)),

    :Returned Object:

    No object is returned.

    :Background:

    The application of linear filters for trend removal induces an autocorrelation structure. The effect of a given linear filter on the autocorrelation structure of the residual sequence can be roughly described as follows: the number of non-zero induced autocorrelation coefficients increase with the width of the filter while their numerical magnitudes decrease.

    .. seealso::  :func:`~openalea.sequence_analysis.correlation.ComputeCorrelation`.
    """

    error.CheckType([obj], [_Correlation])
    error.CheckType([itype], [[
        int, list, _DiscreteParametricModel, _DiscreteMixture, _Convolution,
        _Compound
    ]])

    if isinstance(itype, int):
        obj.white_noise_correlation_order(itype)
    elif isinstance(itype, list):
        obj.white_noise_correlation_filter(itype)
    else:
        obj.white_noise_correlation_dist(itype)

    return obj
Exemplo n.º 6
0
def _estimate_semi_markov(obj, *args, **kargs):

    Type = 'v'
    #error.CheckType([args[0]], [str])

    Type = error.CheckDictKeys(args[0], stochastic_process_type)

    NbIteration = kargs.get("NbIteration", I_DEFAULT)
    Counting = kargs.get("Counting", True)
    Estimator = error.ParseKargs(kargs, "Estimator", "CompleteLikelihood",
                                 estimator_semi_markov_type)

    OccupancyMean = error.ParseKargs(kargs, "OccupancyMean",
                                      'Computed', mean_computation_map)

    error.CheckType([Counting, NbIteration], [bool, int])

    if Type != 'e' or Estimator == PARTIAL_LIKELIHOOD:
        if kargs.get(NbIteration):
            raise ValueError("Forbidden options Estimate NbIteration")
        if kargs.get("OccupancyMean"):
            raise ValueError("Forbidden options Estimate OccupancyMean")

    return obj.semi_markov_estimation(Type.real , Estimator , Counting,
                                        NbIteration , OccupancyMean)
Exemplo n.º 7
0
def RemoveApicalInternodes(obj, internode):
    """RemoveApicalInternodes

    Removal of the apical internodes of the parent shoot of a 'top'.

    :Usage:

    .. doctest::
        :options: +SKIP
        
        >>> RemoveApicalInternodes(top, nb_internode)

    :Arguments:

    * top (tops),
    * nb_internode (int): number of removed internodes.

    :Returned Object:

    If nb_internode >  0 and if the removed internodes do not bear offspring
    shoots, an object of type tops is returned, otherwise no object is returned.

    .. seealso::

        :func:`~openalea.stat_tool.data_transform.SelectIndividual`,
        :func:`~openalea.stat_tool.data_transform.Merge`,
        :func:`~openalea.sequence_analysis.data_transform.Reverse`.
    """
    error.CheckType([obj, internode], [_Tops, int])
    return obj.shift(internode)
Exemplo n.º 8
0
def RenewalData(*args):

    error.CheckArgumentsLength(args, 2)

    error.CheckType([args[0]], [[_Sequences, _MarkovianSequences]])
    error.CheckType([args[1], args[2]], [int, int] )


    seq = args[0]
    variable = seq.nb_variable
    begin_index = args[1]
    end_index = args[2]

    timev = seq.extract_renewal_data(variable, begin_index, end_index)

    return _RenewalData(timev)
Exemplo n.º 9
0
def NbEventSelect(obj, imin, imax):
    """NbEventSelect

    Selection of data item of type {time interval between two observation dates, 
    number of events occurring between these two observation dates} according to
    a number of events criterion.

    :Usage:

    .. doctest:: 
        :options: +SKIP
        
        >>> NbEventSelect(timev, min_nb_event, max_nb_event)


    :param TimeEvents,RenewalData time_v:
    :param int min_nb_event: minimum number of events,
    :param int max_nb_event: maximum number of events.

    :Returned Object:

    If 0 <= min_nb_event < max_nb_event and if the range of number of events defined by min_nb_event and max_nb_event enables to select data items of type {time interval between two observation dates, number of events}, an object of type time_events is returned, otherwise no object is returned.

    .. seealso::

        :func:`~openalea.stat_tool.data_transform.Merge`,
        :func:`~openalea.sequence_analysis.data_transform.TimeScaling`,
        :func:`~openalea.sequence_analysis.data_transform.TimeSelect`.
    """
    error.CheckType([obj, imin, imax], [[_TimeEvents, _RenewalData], int, int])

    return obj.nb_event_select(imin, imax)
Exemplo n.º 10
0
def LumpabilityTest(obj, *args, **kargs):
    """.. todo:: documenation"""

    error.CheckArgumentsLength(args, 1, 1)
    error.CheckType([obj], [[_MarkovianSequences, _VariableOrderMarkovData,
                            _SemiMarkovData, _NonHomogeneousMarkovData]])


    symbol = args[0]
    Order = kargs.get("Order", 1)

    error.CheckType([symbol, Order], [list, int])

    ret = obj.lumpability_test(symbol, Order)

    if ret is False:
        raise TypeError("warning: false status returned by lumpability test")
Exemplo n.º 11
0
def ComputeInitialRun(obj):
    """.. todo:: documenation

    input can be sequence, markovian_sequences, nonhomogeneous_markov,
    variable_order_markov
    """
    error.CheckType([obj], [[_MarkovianSequences, _VariableOrderMarkovData,
                             _SemiMarkovData, _NonHomogeneousMarkovData]])
    return obj.initial_run_computation()
Exemplo n.º 12
0
def SemiMarkov(filename=None, length=DEFAULT_LENGTH, counting=True,
               cumul_threshold=0):
    """SemiMarkov constructor

    Construction of a semi-Markov chain from an ASCII file.

    :Usage:

    ::
    
        SemiMarkov(filename, length=40, counting=True, cumul_threshold=40) 

     .. todo:: make the parameter input names consistent over all modules e.g, length and Length
        should be only denoted either length or Length exclusively. For backward 
        compatibility, Length should be used ?

    :Arguments:

    * filename (string).

    :Optional Arguments:

    * Length (int): length of sequences for the computation of the intensity and
      counting characteristic distributions (default value: 20),
    * Counting (bool): computation of counting characteristic distributions default value: True).

    :Returned Object:

    If the construction succeeds, an object of type semi-markov is returned,
    otherwise no object is returned.

    :Background:

    A semi-Markov chain is constructed from a first-order Markov chain
    representing transition between distinct states and state occupancy
    distributions associated to the non-absorbing states. The state occupancy
    distributions are defined as object of type distribution with the
    additional constraint that the minimum time spent in a given state
    is at least 1 (inf_bound >= 1).

    .. seealso::

        :class:`~openalea.stat_tool.output.Save`,
        :func:`~openalea.sequence_analysis.compare.Compare`,
        :func:`~openalea.sequence_analysis.simulate.Simulate`.

    """
    # todo:: cumul_threshold has no default value in cpp code semi_markov.cpp
    # does zero is a good default value ?
    error.CheckType([filename, length, counting, cumul_threshold],
                    [str, int, bool, [int, float]])

    if not os.path.isfile(filename):
        raise IOError("Invalid filename")
    else:
        return _SemiMarkov(filename, length, counting, cumul_threshold)
Exemplo n.º 13
0
def TransformPosition(obj, step=None):
    """.. todo:: documenation

    input is a sequence only"""
    error.CheckType([obj, step], [_Sequences, int])

    ret = obj.transform_position(step)
    if hasattr(obj, 'markovian_sequences'):
        return obj.markovian_sequences()
    else:
        return ret
Exemplo n.º 14
0
def RemoveIndexParameter(obj):
    """.. todo:: documenation

    input can be sequence, markovian_sequences,
    nonhomogeneous_markov, variable_order_markov"""

    error.CheckType([obj], [[_Sequences, _MarkovianSequences,
                             _VariableOrderMarkovData, _SemiMarkovData,
                             _NonHomogeneousMarkovData]])
    if isinstance(obj, _Sequences):
        return obj.remove_index_parameter().markovian_sequences()
    else:
        return obj.remove_index_parameter()
def HiddenVariableOrderMarkov(filename=None, Length=DEFAULT_LENGTH,
                              CumulThreshold=OCCUPANCY_THRESHOLD):
    """HiddenVariableOrderMarkov

    :param str filename:
    :param int Length: 
    :param float CumulThreshold: 


    .. todo:: documentation
    """

    error.CheckType([filename, Length, CumulThreshold], [str, int, float])

    if not os.path.isfile(filename):
        raise IOError("Invalid filename %s" % filename)
    else:
        return _HiddenVariableOrderMarkov(filename, Length,  CumulThreshold)
Exemplo n.º 16
0
def NonhomogeneousMarkov(filename, length=DEFAULT_LENGTH):
    """NonhomogeneousMarkov constructor

    :param str filename:
    :param int length: 

    :Usage:
    
    .. doctest:: 
        :options: +SKIP
        
        >>> nm = NonhomogeneousMarkov("filename.dat")
        >>> nm = NonhomogeneousMarkov("filename.dat", 10)
    """
    error.CheckType([filename, length], [str, int])

    if os.path.isfile(filename):
        return _NonHomogeneousMarkov(filename, length)
    else:
        raise IOError("bad file name")
def VariableOrderMarkov(*args, **kargs):
    """VariableOrderMarkov

    :Usage:

    .. doctest::
        :options: +SKIP

        >>> VariableOrderMarkov(filename)
    """
    error.CheckArgumentsLength(args, 1, 1)
    error.CheckType([args[0]], [str])
    filename = args[0]
    Length = kargs.get("Length", DEFAULT_LENGTH)

    if os.path.isfile(filename):
        vom = _VariableOrderMarkov(filename, Length)
    else:
        raise IOError("bad file name %s" % filename)

    return vom
Exemplo n.º 18
0
def IndexParameterType(obj):
    """

    input can be sequence, markovian_sequences,
    nonhomogeneous_markov, variable_order_markov

    .. doctest::
        :options: +SKIP
        
        >>> obj.index_parameter_type
        3
        >>> IndexParameterType(obj)
        openalea.sequence_analysis._sequence_analysis.IndexParameterType.POSITON
    """

    error.CheckType([obj], [[_Sequences, _MarkovianSequences,
                             _VariableOrderMarkovData, _SemiMarkovData,
                             _NonHomogeneousMarkovData]])
    from openalea.sequence_analysis.enums import index_parameter_type_map

    type = obj.index_parameter_type
    for key, value in index_parameter_type_map.iteritems():
        if value==type:
            return key
Exemplo n.º 19
0
def ComputeCorrelation(obj, *args, **kargs):
    """Computation of sample autocorrelation or cross-correlation functions.

    :Examples:

    .. doctest::
        :options: +SKIP
        
        >>> ComputeCorrelation(seq1, MaxLag=10, Type="Spearman", Normalization="Exact")
        >>> ComputeCorrelation(seqn, variable, MaxLag=10, Type="Spearman", Normalization="Exact")
        >>> ComputeCorrelation(seqn, variable1, variable2, MaxLag=10, Type="Spearman", Normalization="Exact")

    :Arguments:

    * seq1 (sequences, discrete_sequences, markov_data, semi-markov_data):
      univariate sequences,
    * seqn (sequences, discrete_sequences, markov_data, semi-markov_data):
      multivariate sequences,
    * variable (int): variable index (computation of a sample autocorrelation function).
    * variable1, variable2 (int): variable indices (computation of a sample
      cross-correlation function).

    :Optional Arguments:

    * Type (string): type of correlation coefficient: "Pearson" (linear
      correlation coefficient - default value), "Spearman" or "Kendall" (rank
      correlation coefficients).
    * MaxLag (int): maximum lag. A default value is computed from the sequence
      length distribution,
    * Normalization (STRING): normalization of the correlation coefficients:
      "Approximated" (the default - usual convention for time series analysis)
      or "Exact", (highly recommended for sample of short sequences). This
      optional argument can only be used if the optional argument Type is set
      at "Pearson" or "Spearman".

    :Returned Object:

    If variable, or variable1 and variable2 are valid indices of variables (and
    are different if two indices are given) and if 0 <= MaxLag < (maximum length
    of sequences), then an object of type correlation is returned, otherwise no
    object is returned.

    :Background:

    In the univariate case or if only variable is given, a sample
    autocorrelation function is computed. If variable1 and variable2 are given,
    a sample cross-correlation function is computed.

    .. seealso::

        :func:`~openalea.sequence_analysis.correlation.ComputePartialAutoCorrelation`,
        :func:`~openalea.sequence_analysis.correlation.ComputeWhiteNoiseCorrelation`

"""

    error.CheckType([obj], [[
        _Sequences, _MarkovianSequences, _VariableOrderMarkovData,
        _SemiMarkovData, _NonHomogeneousMarkovData
    ]])

    if obj.nb_variable == 1:
        variable1 = 1
        variable2 = 1
    else:
        error.CheckType([args[0]], [int])
        #todo: check that variable1 <= nb_variable and > 0
        variable1 = args[0]
        if len(args) == 1:
            variable2 = variable1
        elif len(args) == 2:
            #todo: check that variable1 <= nb_variable and > 0
            error.CheckType([args[1]], [int])
            variable2 = args[1]
        else:
            raise TypeError("1 or 2  non-optional arguments required")

    max_lag = error.ParseKargs(kargs, "MaxLag", I_DEFAULT)
    itype = error.ParseKargs(kargs, "Type", "Pearson", type_dict)
    normalization = error.ParseKargs(kargs, "Normalization", "Exact",
                                     norm_type)
    IndividualMean = error.ParseKargs(kargs, "IndividualMean", False)

    #if normalization_option and ((type == SPEARMAN2) or (type == KENDALL)):
    #    raise Exception

    #if individual_mean_option and (type != PEARSON):
    #    raise Exception

    # check argument validity.
    return obj.correlation_computation(variable1, variable2, itype, max_lag,
                                       normalization, IndividualMean)
Exemplo n.º 20
0
def ComputePartialAutoCorrelation(obj, *args, **kargs):
    """ComputePartialAutoCorrelation

    Computation of sample partial autocorrelation functions.

    :Usage:

    .. doctest::
        :options: +SKIP
        
        >>> ComputePartialAutoCorrelation(seq1, MaxLag=10, Type="Kendall")
        >>> ComputePartialAutoCorrelation(seqn, variable, MaxLag=10, Type="Kendall")

    :Arguments:

    * seq1 (**sequences**, discrete_sequences, markov_data, semi-markov_data):
      univariate sequences,
    * seqn (sequences, discrete_sequences, markov_data, semi-markov_data):
      multivariate sequences,
    * variable (int): variable index.

    :Optional Arguments:

    * MaxLag (int): maximum lag. A default value is computed from the sequence
      length distribution,
    * Type (string): type of correlation coefficient: "Pearson" (linear
      correlation coefficient - the default) or "Kendall" (rank correlation coefficient).

    :Returned Object:

    If variable is a valid variable index and if 1 <= MaxLag < (maximum length
    of sequences), an object of type correlation is returned, otherwise no object
    is returned.

    :Background:

    The partial autocorrelation coefficient at lag k measures the correlation
    between :math:`x_i` and :math:`x_{t+k}` not accounted for by
    :math:`x_{t+1}, ..., x_{t+k-1}`  (or after adjusting for the effects of
    :math:`x_{t+1}, ..., x_{t+k-1}`).

    .. seealso::

        :func:`~openalea.sequence_analysis.correlation.ComputeCorrelation`
    """
    error.CheckType([obj], [[
        _Sequences, _MarkovianSequences, _VariableOrderMarkovData,
        _SemiMarkovData, _NonHomogeneousMarkovData
    ]])

    error.CheckArgumentsLength(args, 0, 1)

    if len(args) == 0:
        variable = 1
    else:
        variable = args[0]

    max_lag = error.ParseKargs(kargs, "MaxLag", MAX_LAG)
    Type = error.ParseKargs(kargs, "Type", "Pearson", type_dict)

    error.CheckType([variable, max_lag], [int, int])
    _check_nb_variable(obj, variable)

    #todo check that  Type is Pearson or Kendall
    return obj.partial_autocorrelation_computation(variable, Type, max_lag)
Exemplo n.º 21
0
def Renewal(*args, **kargs):
    """Renewal

    Construction of a (either ordinary or equilibrium) renewal process from an inter-event distribution or from an ASCII file.

    :Usage:

    .. doctest::
        :options: +SKIP
        
        >>> Renewal("BINOMIAL", inf_bound, sup_bound, proba,  Type="Equilibrium", ObservationTime=40)
        >>> Renewal("POISSON", inf_bound, param, Type="Equilibrium", ObservationTime=40)
        >>> Renewal("NEGATIVE_BINOMIAL", inf_bound, param, proba, Type="Equilibrium", ObservationTime=40)
        >>> Renewal(inter_event, Type="Equilibrium", ObservationTime=40)
        >>> Renewal(file_name, Type="Equilibrium", ObservationTime=40)

    :Arguments:

    * inf_bound (int): lower bound to the range of possible values (shift parameter),
    * sup_bound (int): upper bound to the range of possible values (only relevant for binomial or uniform distributions),
    * param (int, real): parameter of either the Poisson distribution or the negative binomial distribution.
    * proba (int, real): probability of 'success' (only relevant for binomial or negative binomial distributions).

    .. note:: the names of the parametric discrete distributions can be summarized by their first letters: "B" ("BINOMIAL"), "P" ("POISSON"), "NB" ("NEGATIVE_BINOMIAL").

    * inter_event (distribution, mixture, convolution, compound): inter-event distribution,
    * file_name (string).

    :Optional Arguments:

    * Type (string): type of renewal process: "Ordinary" or "Equilibriun" (the default).
    * ObservationTime (int): length of the observation period for the computation of the intensity and counting distributions (default value: 20),

    :Returned Object:

    If the construction succeeds, an object of type renewal is returned, otherwise no object is returned.

    :Background:

    A renewal process is built from a discrete distribution termed the inter-event 
    distribution which represents the time interval between consecutive events. Two types 
    of renewal processes are available:
        * ordinary renewal process where the start of the observation period coincides 
          with the occurrence time of an event (synchronism assumption),
        * equilibrium or stationary renewal process where the start of the observation
          period is independent of the process which generates the data (asynchronism 
          assumption).

    In the case where the arguments are the name and the parameters of the inter-event \
    distribution, the constraints on parameters described in the definition of the syntactic 
    form of the type distribution apply (cf. File Syntax).

    .. seealso::
        :func:`~openalea.stat_tool.output.Save`,
        :func:`~openalea.sequence_analysis.simulate.Simulate` (renewal process)

    .. todo :: ident should correspond to Binomail,B, NegativeBinomial and so on
    """

    #todo: move this enym to enumerate.py
    type_map = {
        "Equilibrium":'e',
        "Ordinary": 'o'
    }

    Type = error.ParseKargs(kargs, "Type", "Equilibrium", type_map)
    ObservationTime = kargs.get("ObservationTime", DEFAULT_TIME)
    Scale = kargs.get("Scale", None)  #todo check default values !

    a = [str]
    a.extend(model_distribution_types)
    error.CheckType([args[0]], [a])

    # a filename constructor. check that only one argument, which is a string
    # ------------------ todo ----------- not tested
    if len(args)==1 and isinstance(args[0], str):
        filename = args[0]
        if os.path.isfile(filename):
            renewal =  _Renewal(filename)
        else:
            raise IOError("bad file name")

    # otherwise, we switch to a constructor from a distribution
    elif isinstance(args[0], str):

        if args[0] == "BINOMIAL" or args[0] == "B":
            error.CheckArgumentsLength(args, 4, 4)
            error.CheckType([args[1], args[2], args[3]],
                            [int, int, [int, float]])
            inf_bound = args[1]
            sup_bound = args[2]
            probability = args[3]
            parameter = -1
        elif args[0] == "NEGATIVE_BINOMIAL" or args[0] == "NB":
            error.CheckArgumentsLength(args, 4, 4)
            error.CheckType([args[1], args[2], args[3]],
                            [int, [int, float], [int, float]])
            inf_bound = args[1]
            sup_bound = -1
            parameter = args[2]
            probability = args[3]
        elif args[0] == "POISSON" or args[0] == "P":
            error.CheckArgumentsLength(args, 4, 4)
            error.CheckType([args[1], args[2], args[3]],
                            [int, [float, int], [int, float]])
            inf_bound = args[1]
            sup_bound = -1
            parameter = args[2]
            probability = args[3]
        else:
            raise NotImplemented("""case not implemented. First arg must be a
                valid filename or a "BINOMIAL", "NEGATIVE_BINOMIAL, or "POISSON"
                """)
        # if all keys in distribution_identifier_type are used, we can move this
        # piece of call before the if and remove the NotImplemented above
        ident = distribution_identifier_type[args[0]]

        RENEWAL_THRESHOLD = 1.

        inter_event = _DiscreteParametric(ident , inf_bound , sup_bound , parameter ,
                                   probability , RENEWAL_THRESHOLD)

        if Scale:
            error.CheckType([Scale], [float])
            scaled_inter_event = _DiscreteParametric(inter_event , Scale)
            renewal = _Renewal(scaled_inter_event , Type , ObservationTime)
        else:
            renewal = _Renewal(inter_event , Type , ObservationTime)


    #    renewal = _Renewal(args[0], range(0,len(args[0])),
    # index_parameter_type) or may be provided by the user.
    elif type(args[0]) in model_distribution_types:
        renewal = _Renewal(_DiscreteParametric(args[0]), Type, ObservationTime)

    return renewal
Exemplo n.º 22
0
def Sequences(obj, **kargs):
    """Construction of a set of sequences from multidimensional arrays
    of integers, from data generated by a renewal process or from an
    ASCII file.

    The data structure of type array(array(array(int))) should be
    constituted at the most internal level of arrays of constant size. If the
    optional argument IndexParameter is set at "Position" or "Time", the data
    structure of type array(array(array(int))) is constituted at the most
    internal level of arrays of size 1 + n (index parameter, n variables
    attached to the explicit index parameter). If the optional argument
    IndexParameter is set at "Position", only the index parameter of the
    last array of size 1 + n is considered and the first component of successive
    elementary arrays (representing the index parameter) should be
    ncreasing. If the optional argument IndexParameter is set at "Time", the
    first component of successive elementary arrays should be strictly
    increasing.


    :Parameters:

    * array1 (array(array(int))): input data for univariate sequences
    * arrayn (array(array(array(int)))): input data for multivariate sequences,
    * timev (renewal_data),
    * file_name (string).

    :Optional Parameters:

    * Identifiers (array(int)): explicit identifiers of sequences. This 
      optional argument can only be used if the first argument is of 
      type array(array(int / array(int))).
    * VertexIdentifiers (array(array(int))): explicit identifiers of vectors. 

    * IndexParameter (string): type of the explicit index parameter: "Position"
      or "Time" (the default: implicit discrete index parameter starting at 0). 
      This optional argument can only be used if the first argument is of type 
      array(array(int / array(int))).
    
    .. todo:: IndexParameterType

    :Returns:

    If the construction succeeds, an object of type sequences or 
    discrete_sequences is returned, otherwise no object is returned. The 
    returned object is of type discrete_sequences if all the variables are of 
    type STATE, if the possible values for each variable are consecutive from 0 
    and if the number of possible values for each variable is <= 15.

    :Examples:

    .. doctest::
        
        >>> # Single univariate sequence case (array1). 
        >>> seq1 = Sequences([1, 2, 3], Identifiers=[8])
        >>> seq1.nb_sequence
        1
        >>> seq1.nb_variable
        1
        >>> # General case arrayn
        >>> seq = Sequences([ 
        ...    [[1,2],[3,4]], 
        ...    [[21,22],[23,24]], 
        ...    [[31,32],[33,34], [35,36] ]], 
        ...    Identifiers = [1,8,12],
        ...    VertexIdentifiers = [[1,2],[3,4],[5,6,7]])
        >>> seq.nb_sequence
        3
        >>> seq.nb_variable
        2
        >>> seq.max_length
        3

    .. doctest::
        :options: +SKIP
        
            >>> Sequences(timev)
            >>> Sequences(file_name)
    
    .. seealso::

       :class:`~openalea.stat_tool.output.Save`,
       :func:`~openalea.sequence_analysis.data_transform.AddAbsorbingRun`,
       :func:`~openalea.stat_tool.cluster.Cluster`,
       :func:`~openalea.sequence_analysis.data_transform.Cumulate`,
       :func:`~openalea.sequence_analysis.data_transform.Difference`,
       :func:`~openalea.sequence_analysis.data_transform.IndexParameterExtract`,
       :func:`~openalea.sequence_analysis.data_transform.LengthSelect`,
       :func:`~openalea.stat_tool.data_transform.Merge`,
       :func:`~openalea.stat_tool.data_transform.MergeVariable`,
       :func:`~openalea.sequence_analysis.data_transform.MovingAverage`,
       :func:`~openalea.sequence_analysis.data_transform.RecurrenceTimeSequences`,
       :func:`~openalea.sequence_analysis.data_transform.RemoveRun`,
       :func:`~openalea.sequence_analysis.data_transform.Reverse`,
       :func:`~openalea.sequence_analysis.data_transform.SegmentationExtract`,
       :func:`~openalea.stat_tool.data_transform.SelectIndividual`,
       :func:`~openalea.stat_tool.data_transform.SelectVariable`,
       :func:`~openalea.stat_tool.data_transform.Shift`,
       :func:`~openalea.stat_tool.cluster.Transcode`,
       :func:`~openalea.stat_tool.data_transform.ValueSelect`,
       :func:`~openalea.sequence_analysis.data_transform.VariableScaling`.
       :func:`~openalea.stat_tool.data_transform.ExtractHistogram`,
       :func:`~openalea.sequence_analysis.data_transform.ExtractVectors`,
       :func:`~openalea.sequence_analysis.correlation.ComputeCorrelation`,
       :func:`~openalea.sequence_analysis.correlation.ComputePartialAutoCorrelation`,
       :func:`~openalea.sequence_analysis.data_transform.ComputeSelfTransition`,
       :func:`~openalea.sequence_analysis.compare.Compare`,
       :func:`~openalea.sequence_analysis.estimate.Estimate`,
       :func:`~openalea.sequence_analysis.data_transform.ComputeStateSequences`,
       :func:`~openalea.sequence_analysis.simulate.Simulate`.


    """
    import numpy

    sequence = None

    error.CheckType([obj], [[str, _RenewalData, list]])

    if isinstance(obj, str):
        filename = obj
        if os.path.isfile(filename):
            OldFormat = error.ParseKargs(kargs, "OldFormat", False, bool_type)
            sequence = _Sequences(filename, OldFormat)
        else:
            raise IOError("bad file name %s" % filename)
        if hasattr(sequence, 'markovian_sequences'):
            try:
                sequence = sequence.markovian_sequences()
            except Exception:
                pass
        try:
            sequence.nb_sequence
        except ValueError:
            raise ValueError("File read but issue while parsing. Returned sequence is not valid")
        return sequence
    elif isinstance(obj, _RenewalData):
        sequence = _Sequences(obj)
        if hasattr(sequence, 'markovian_sequences'):
            try:
                sequence = sequence.markovian_sequences()
            except Exception:
                pass



        return sequence

    # otherwise, we switch to a list constructor that requires a list of seqs
    # transform input into array of arrays of arrays 
    # case 1: general case where input = [[[1,2],[3,4]],[[1,2],[3,4], [5,6]]] nothing to do
    # case 2: univariate single sequence, input = [1,2,3,4,5,6] so it is [[[1],[2],[3],...]]
    # case 3: univariate sequences input = [[1,2],[3,4],[5,6,7]] (i.e, different vector sizes)
    # case 4: multivariate sequence input = [[1,2],[3,4],[5,6]]

    Verbose = error.ParseKargs(kargs, "Verbose", False)

    Univariate = error.ParseKargs(kargs, "Univariate", False)



    if type(obj)==list:
        first_sequence = obj[0]
        if (type(first_sequence) in [int, float]):
            obj = [[[x] for x in obj]]
            if Verbose:print 'this is a single univariate sequence'
        elif type(first_sequence)==list:
            #either a single multivariate sequence ot general case of several sequences multivariates
            if type(first_sequence[0]) == list:
                if Verbose: print 'this is the general case, nothing to do'
            elif type(first_sequence[0]) in [int, float]:
                lengths = numpy.array([len(x) for x in obj])
                if lengths.var()==0:
                    if Verbose:print 'this is the ambiguous case'

                    if lengths[0]<5 and Univariate==False:
                        if Verbose:print 'this is 1 single multivariate sequence'
                        obj = [obj]
                    else:
                        if Verbose:print 'this is univariate sequences'
                        res = []
                        for x in obj:
                            res.append([[y] for y in x])
                        obj = res
                else:
                    if Verbose:print 'this is univariate sequences'
                    res = []
                    for x in obj:
                        res.append([[y] for y in x])
                    obj = res
            else:
                print SyntaxError('wrong syntax for input object')


    # 0 for int, 1 for float. By default all variables are int
    #now, we loop over all sequences and sequences and if a variable 
    # is found to be float, then the type is float.
    # once a float is found, there is no need to carry on the current variable
    InputTypes = [0] * len(obj[0][0])
    nb_variables = len(obj[0][0])
    for seq in obj:
        for vec in seq:
            for index, var in enumerate(vec):
                assert type(var) in [int, float], "wrong types var=%s and its type is %s" % (var, type(var))
                if type(var)==float:
                    InputTypes[index]=1



    from openalea.sequence_analysis._sequence_analysis import TIME, POSITION, \
        IMPLICIT_TYPE
    #error.CheckArgumentsLength(args, 1, 1)


    IndexParameterType = error.ParseKargs(kargs, "IndexParameterType", "IMPLICIT_TYPE", index_parameter_type_map)
    IndexParameter = error.ParseKargs(kargs, "IndexParameter",  [])
    Identifiers = error.ParseKargs(kargs, "Identifiers", [])
    VertexIdentifiers = error.ParseKargs(kargs, "VertexIdentifiers", [])

 
    # build up a list of unique identifiers if none is provided
    lengths=[]
    for seq in obj:
        lengths.append(len(seq))
    # all values must be positive strictly
    if len(Identifiers)>0:
        assert len([x for x in Identifiers if x<=0]) == 0
    else:
        #create a standard identifiers list [0,1,2,....]
        for i, seq in enumerate(obj):
            Identifiers.append(i)

    # build up a list of unique vertex identifiers if none is provided
    if len(VertexIdentifiers)>0:
        assert len([x for x in VertexIdentifiers if x<=0]) == 0
    else:
        #create a standard identifiers list [0,1,2,....] for each sequences ?
        index = 0
        for i, seq in enumerate(obj):
            VertexIdentifiers.append([])
            for vec in  seq:
                VertexIdentifiers[i].append(index)
                index+=1

    # check unicity of vertex identifiers
    idents = []
    for seq in VertexIdentifiers:
        for ident in seq:
            idents.append(ident)
    assert len(set(idents)) == len(idents), "ERROR, VertexIdentifiers must be made of unique identifiers (for each vector)"

    # check unicity of identifiers
    idents = []
    for ident in Identifiers:
        idents.append(ident)
    assert len(set(idents)) == len(idents), "ERROR, Identifiers must be made of unique identifiers (for each sequence)"

    if len(IndexParameter)==0:
        index = 0
        for i, seq in enumerate(obj):
            IndexParameter.append([])
            for vec in seq:
                IndexParameter[i].append(index)
                index+=1
            if IndexParameterType==POSITION:
                IndexParameter[i].append(index)
                index+=1

    for i, seq in enumerate(obj):
        #print len(seq), len(IndexParameter)
        if IndexParameterType==POSITION:
            assert len(seq)==len(IndexParameter[i])-1, "ERROR, wrong IndexParameterLength. When ParameterType=POSITION, ParameterIndex length must be equla to the sequence length +1"
        else:
            assert len(seq)==len(IndexParameter[i]), "ERROR, wrong IndexParameterLength. ParameterIndex length must be equal to the sequence length."
   #todo check that indesparameter length is correct (length of vectors +1 if position)


    valid_param = [POSITION, TIME, IMPLICIT_TYPE]
    if IndexParameterType not in valid_param:
        raise ValueError("""IndexParameter can be only %s if first
            argument is a list""" % valid_param)
    sequence = _Sequences(obj, Identifiers, VertexIdentifiers, IndexParameter, InputTypes, IndexParameterType)

    if hasattr(sequence, 'markovian_sequences'):
        try:
            sequence = sequence.markovian_sequences()
        except Exception:
            pass

    return sequence
Exemplo n.º 23
0
def _estimate_variable_order_markov(obj, *args, **kargs):
    """
    EStimate on variable order markov

    """
    from openalea.sequence_analysis._sequence_analysis import \
        LOCAL_BIC_THRESHOLD,\
        CTM_KT_THRESHOLD,\
        CTM_BIC_THRESHOLD,\
        CONTEXT_THRESHOLD,\
        CTM_BIC,\
        CTM_KT,\
        CONTEXT,\
        LOCAL_BIC

    Order = kargs.get("Order", None)
    MaxOrder = kargs.get("MaxOrder", ORDER)
    MinOrder = kargs.get("MinOrder", 0)
    Threshold = kargs.get("Threshold", LOCAL_BIC_THRESHOLD)

    error.CheckType([Threshold, MaxOrder, MinOrder], [[int, float], int, int])


    Algorithm = error.ParseKargs(kargs, "Algorithm", "LocalBIC", algorithm)
    Estimator = error.ParseKargs(kargs, "Estimator", "Laplace", estimator)
    Penalty = error.ParseKargs(kargs, "Penalty", "BIC", likelihood_penalty_type)

    GlobalInitialTransition = kargs.get("GlobalInitialTransition", True)
    GlobalSample = kargs.get("GlobalSample", True)
    Counting = kargs.get("Counting", True)

    error.CheckType([Counting, GlobalSample, GlobalInitialTransition],
                    [bool, bool, bool])

    #args0 is a string
    if len(args)>0 and isinstance(args[0], str):
        Type = 'v'
        Type = error.CheckDictKeys(args[0], stochastic_process_type)

        # check validity of the input arguments following AML's code
        if Algorithm != LOCAL_BIC and not kargs.get("Threshold"):
            if Algorithm == CTM_BIC:
                Threshold = CTM_BIC_THRESHOLD
            elif Algorithm == CTM_KT:
                Threshold = CTM_KT_THRESHOLD
            elif Algorithm == CONTEXT:
                Threshold = CONTEXT_THRESHOLD
        if Algorithm == CTM_KT and kargs.get("Estimator"):
            raise ValueError("Forbidden combinaison of Algorithm and Estimator")

        order_estimation = True

        if Order is not None:
            order_estimation = False
            MaxOrder = Order

        if not order_estimation:
            options = ["Algorithm", "Estimator", "GlobalSample", "MinOrder",
                       "Threshold"]
            for option in options:
                if kargs.get(option):
                    raise ValueError("Order and %s cannot be used together" %
                                     option)
        if Type == 'e' and kargs.get("GlobalInitialTransition"):
            raise ValueError("""
            Type e and GlobalInitialTransition cannot be used together""")

        if order_estimation is True:
            markov = obj.variable_order_markov_estimation1(
                Type.real, MinOrder, MaxOrder, Algorithm.real, Threshold, Estimator.real ,
                  GlobalInitialTransition , GlobalSample , Counting)
        else:
            markov = obj.variable_order_markov_estimation2(
                    Type, MaxOrder, GlobalInitialTransition, Counting)

    #Variable order markov case
    elif isinstance(args[0], _VariableOrderMarkov):
        vom = args[0]
        # can be implemted once Chain class is public and exported
        # in export_variable_order_markov
     #   if vom.type == 'e' and kargs.get("GlobalInitialTransition"):
     #       raise ValueError("""
     #       Type e and GlobalInitialTransition cannot be used together""")

        markov = obj.variable_order_markov_estimation3(vom,
                      GlobalInitialTransition, Counting)

    # array case
    elif isinstance(args[0], list):
        symbol = args[0]
        markov = obj.lumpability_estimation(symbol, Penalty,
                                         Order, Counting)

    else:
        raise KeyError("jfjf")

    return markov
Exemplo n.º 24
0
def _estimate_hidden_semi_markov(obj, *args, **kargs):
    """
    .. doctest::
        :options: +SKIP
        
        >>> hsmc21 = Estimate(seq21, "HIDDEN_SEMI-MARKOV", hsmc0)

    """

    from openalea.sequence_analysis._sequence_analysis import \
        MIN_NB_STATE_SEQUENCE, \
        MAX_NB_STATE_SEQUENCE, \
        NB_STATE_SEQUENCE_PARAMETER



    from openalea.stat_tool._stat_tool import \
        NO_COMPUTATION, \
        FORWARD, \
        FORWARD_BACKWARD_SAMPLING, \
        KAPLAN_MEIER

#    GlobalInitialTransition = kargs.get("GlobalInitialTransition", True)
    CommonDispersion = kargs.get("CommonDispersion", False)
    NbIteration = kargs.get("NbIteration", I_DEFAULT)
    Counting = kargs.get("Counting", True)
    StateSequence = kargs.get("StateSequence", True)
    Parameter = kargs.get("Parameter", NB_STATE_SEQUENCE_PARAMETER)
    MinNbSequence = kargs.get("MinNbStateSequence", MIN_NB_STATE_SEQUENCE)
    MaxNbSequence = kargs.get("MaxNbStateSequence", MAX_NB_STATE_SEQUENCE)
    Algorithm = error.ParseKargs(kargs, "Algorithm", 'EM', \
                                 sub_markovian_algorithms)
    Estimator = error.ParseKargs(kargs, "Estimator", 'CompleteLikelihood',
                                estimator_semi_markov_type)
    InitialOccupancyMean = kargs.get("InitialOccupancyMean", D_DEFAULT)
    MeanComputation = error.ParseKargs(kargs, "OccupancyMean", 'Computed',
                                      mean_computation_map)

    error.CheckType([CommonDispersion, Counting, NbIteration,
                     MinNbSequence, MaxNbSequence, Parameter, StateSequence,
                     InitialOccupancyMean],
                     [bool, bool, int, int, int, [int, float], bool,
                     [float, int]])
    
    print(Algorithm)
    
    if Algorithm != sub_markovian_algorithms["MCEM"]:
        options = ["Parameter", "MaxNbStateSequence", "MinNbStateSequence"]
        for option in options:
            if kargs.get(option):
                raise ValueError(
                    "If % is provided, Algorithm cannot be MCEM" % option)
    if Algorithm != sub_markovian_algorithms["EM"]:
        if Estimator == KAPLAN_MEIER:
            raise ValueError(
                "Estimator= KaplanMeier and Algorithm = MCEM not possible")


    error.CheckType([args[0]], [[str, _HiddenSemiMarkov]])
    if isinstance(args[0], str):
        Type = 'v'

        error.CheckType([args[1]], [int])
        NbState = args[1]

        if args[0] == "Ordinary":
            error.CheckArgumentsLength(args, 3, 3)
            error.CheckType([args[2]], [str])
            Type = 'o'
            if args[2] not in ["LeftRight", "Irreducible"]:
                raise ValueError(
                        "third argument must be LeftRight or Irreducible.")
            if args[2] == "LeftRight":
                LeftRight = True
            else:
                LeftRight = False
        elif args[0] == "Equilibrium":
            error.CheckArgumentsLength(args, 2, 2)
            Type = 'e'
            LeftRight = False
        else:
            raise AttributeError("type must be Ordinary or Equilibrium")

        if ((Type != 'e') or (Estimator == PARTIAL_LIKELIHOOD) or \
            (Algorithm != NO_COMPUTATION)) and \
            kargs.get(InitialOccupancyMean):
            raise ValueError("Incompatible user arguments")

        if Algorithm == NO_COMPUTATION:
            hsmarkov = obj.hidden_semi_markov_estimation_model( Type, NbState,
                         LeftRight, InitialOccupancyMean, CommonDispersion, Estimator,
                         Counting, StateSequence, NbIteration, MeanComputation)
            return hsmarkov

        elif Algorithm == FORWARD_BACKWARD_SAMPLING:
            hsmarkov = obj.hidden_semi_markov_stochastic_estimation_model(
                Type, NbState, LeftRight, InitialOccupancyMean, CommonDispersion,
                MinNbSequence, MaxNbSequence, Parameter, Estimator, Counting,
                StateSequence, NbIteration)
            return hsmarkov

    elif isinstance(args[0], _HiddenSemiMarkov):

        #todo: add these lines once Chain is public
        #if ((( (args[0].type == 'o')) or
        #     (Estimator == PARTIAL_LIKELIHOOD) or
        # (Algorithm != FORWARD_BACKWARD)) and \
        # kargs.get("InitialOccupancyMean")):
        #    raise ValueError("Incompatible arguments")

        hsmarkov = args[0]
        if Algorithm == NO_COMPUTATION:
            output = obj.hidden_semi_markov_estimation(hsmarkov,
                                CommonDispersion, Estimator, Counting,
                                StateSequence, NbIteration, MeanComputation)
            return output
        elif Algorithm == FORWARD_BACKWARD_SAMPLING:
            return obj.hidden_semi_markov_stochastic_estimation(hsmarkov,
                            CommonDispersion, MinNbSequence, MaxNbSequence,
                            Parameter, Estimator, Counting,
                            StateSequence, NbIteration)
Exemplo n.º 25
0
def Tops(*args, **kargs):
    """Construction of a set of sequences from multidimensional arrays of
    integers, from data generated by a renewal process or from an ASCII file.

    The data structure of type array(array(array(int))) should be constituted
    at the most internal level of arrays of constant size. If the optional
    argument IndexParameter is set at "Position" or "Time", the data
    structure of type array(array(array(int))) is constituted at the most
    internal level of arrays of size 1+n (index parameter, n variables attached
    to the explicit index parameter). If the optional argument IndexParameter
    is set at "Position", only the index parameter of the last array of size
    1+n is considered and the first component of successive elementary arrays
    (representing the index parameter) should be increasing. If the optional
    argument IndexParameter is set at "Time", the first component of successive
    elementary arrays should be strictly increasing.

    :Parameters:

    * array1 (array(array(int))): input data for univariate sequences
    * arrayn (array(array(array(int)))): input data for multivariate sequences,
    * timev (renewal_data), file_name (string).

    :Optional Parameters:

    * Identifiers (array(int)): explicit identifiers of sequences. This optional
      argument can only be used if the first argument is of type
      array(array(int/array(int))).
    * IndexParameter (string): type of the explicit index parameter: "Position"
      or "Time" (the default: implicit discrete index parameter starting at 0).
      This optional argument can only be used if the first argument is of type
      array(array(int/array(int))).

    :Returns:

    If the construction succeeds, an object of type sequences or
    discrete_sequences is returned, otherwise no object is returned. The
    returned object is of type discrete_sequences if all the variables are
    of type STATE, if the possible values for each variable are consecutive
    from 0 and if the number of possible values for each variable is <= 15.

    :Examples:

    .. doctest::
        :options: +SKIP
        
        >>> Tops(array1, Identifiers=[1, 8, 12])
        >>> Tops(arrayn, Identifiers=[1, 8, 12], IndexParameter="Position")
        >>> Tops(timev)
        >>> Tops(file_name)

    .. seealso::


        :class:`~openalea.stat_tool.output.Save`,
        :func:`~openalea.sequence_analysis.data_transform.AddAbsorbingRun`,
        :func:`~openalea.stat_tool.cluster.Cluster`,
        :func:`~openalea.sequence_analysis.data_transform.Cumulate`,
        :func:`~openalea.sequence_analysis.data_transform.Difference`,
        :func:`~openalea.sequence_analysis.data_transform.IndexParameterExtract`,
        :func:`~openalea.sequence_analysis.data_transform.LengthSelect`,
        :func:`~openalea.stat_tool.data_transform.Merge`,
        :func:`~openalea.stat_tool.data_transform.MergeVariable`,
        :func:`~openalea.sequence_analysis.data_transform.MovingAverage`,
        :func:`~openalea.sequence_analysis.data_transform.RecurrenceTimeSequences`,
        :func:`~openalea.sequence_analysis.data_transform.RemoveRun`,
        :func:`~openalea.sequence_analysis.data_transform.Reverse`,
        :func:`~openalea.sequence_analysis.data_transform.SegmentationExtract`,
        :func:`~openalea.stat_tool.data_transform.SelectIndividual`,
        :func:`~openalea.stat_tool.data_transform.SelectVariable`,
        :func:`~openalea.stat_tool.data_transform.Shift`,
        :func:`~openalea.stat_tool.cluster.Transcode`,
        :func:`~openalea.stat_tool.data_transform.ValueSelect`,
        :func:`~openalea.sequence_analysis.data_transform.VariableScaling`.
        :func:`~openalea.stat_tool.data_transform.ExtractHistogram`,
        :func:`~openalea.sequence_analysis.data_transform.ExtractVectors`,
        :func:`~openalea.sequence_analysis.correlation.ComputeCorrelation`,
        :func:`~openalea.sequence_analysis.correlation.ComputePartialAutoCorrelation`,
        :func:`~openalea.sequence_analysis.data_transform.ComputeSelfTransition`,
        :func:`~openalea.sequence_analysis.compare.Compare`,
        :func:`~openalea.sequence_analysis.estimate.Estimate`,
        :func:`ComputeStateTops`,
        :func:`~openalea.sequence_analysis.simulate.Simulate`.
    """
    error.CheckArgumentsLength(args, 1, 1)

    index_parameter = error.ParseKargs(kargs, "IndexParameter",
                                       "IMPLICIT_TYPE",
                                       index_parameter_type_map)

    Identifiers = error.ParseKargs(kargs, "Identifiers", None)

    if isinstance(args[0], str):
        #todo: add True, False instead or as well as Current, Old
        #todo: !!! OldFormat set to True does not work in CPP code
        OldFormat = error.ParseKargs(kargs, "Format", "Old", {
            "Current": False,
            "Old": True
        })
        filename = args[0]
        if os.path.isfile(filename):
            return _Tops(filename, OldFormat)
        else:
            raise IOError("bad file name")
    elif isinstance(args[0], _Sequences):
        raise NotImplemented
        #return _Tops(args[0])
    elif isinstance(args[0], list):
        error.CheckType([Identifiers], [list])
        if kargs.get("IndexParameter"):
            if Identifiers:
                return _Tops(args[0], Identifiers, index_parameter)
            else:
                return _Tops(args[0], range(0, len(args[0])), index_parameter)
        else:
            raise ValueError("wrong arguments ?")
    else:
        raise TypeError("""Expected a valid filename or a list of
         lists (e.g., [[1,0],[0,1]])""")
Exemplo n.º 26
0
def _estimate_renewal_interval_data(obj, **kargs):
    """
    Estimate switch renewal_count_data

    .. todo:: to be completed and validated with tests

    see stat_func4 in aml
    """
    #only LIKELIHOOD and PENALIZED_LIKELIHOOD
    Estimator = error.ParseKargs(kargs, "Estimator",
                                 'Likelihood', estimator_type)


    NbIteration = kargs.get("NbIteration", I_DEFAULT)
    error.CheckType([NbIteration], [int])

    # distribution
    InitialInterEvent = kargs.get("InitialInterEvent", None)
    error.CheckType([InitialInterEvent], [[type(None), _DiscreteParametricModel,
                                           _DiscreteMixture, _Convolution, _Compound]])
    if isinstance(InitialInterEvent, _DiscreteParametricModel):
        InitialInterEvent = _DiscreteParametric(InitialInterEvent)
    else:
        InitialInterEvent = _Distribution(InitialInterEvent)
    #cast initialInterEvent to parametric ?
    Penalty = error.ParseKargs(kargs, "Penalty", "SecondDifference",
                               smoothing_penalty_type)
    Weight = kargs.get("Weight", D_DEFAULT)
    error.CheckType([Weight], [[int, float]])
    Outside = error.ParseKargs(kargs, "Outside", "Zero", outside_type)
    error.CheckType([Weight], [[int, float]])

    InterEventMean = error.ParseKargs(kargs, "InterEventMean",
                            'Computed', mean_computation_map)


    if Estimator == estimator_type['PenalizedLikelihood']:
        if kargs.get("InterEventMean") is None:
            InterEventMean = ONE_STEP_LATE
        elif InterEventMean == COMPUTED:
            raise ValueError("""
                Incompatible options Estimator and InterEventMean""")
    else:
        if kargs.get("Penalty"):
            raise ValueError("""Incompatible options Penalty with type o""")
        if kargs.get("Weight"):
            raise ValueError("""Incompatible options Weight with type o""")
        if kargs.get("Outside"):
            raise ValueError("""Incompatible options Outside with type o""")

    if isinstance(obj, _FrequencyDistribution):
        if InitialInterEvent:
            renew = obj.estimation_inter_event(InitialInterEvent,
                                           Estimator, NbIteration,
                                           InterEventMean, Weight,
                                           Penalty, Outside)
        else:
            renew = obj.estimation(Estimator, NbIteration,
                                   InterEventMean ,
                                   Weight, Penalty, Outside)
    else:
        if InitialInterEvent:
            renew = obj.estimation_inter_event(InitialInterEvent,
                                           Estimator, NbIteration,
                                           InterEventMean, Weight,
                                           Penalty, Outside)
        else:
            renew = obj.estimation(Estimator, NbIteration,
                                   InterEventMean ,
                                   Weight, Penalty, Outside)

    return renew
Exemplo n.º 27
0
def _estimate_renewal_count_data(obj, itype, **kargs):
    """
    Estimate switch renewal_count_data
    """
    Type = 'v'
    error.CheckType([obj, itype], [[_TimeEvents, _RenewalData], str])
    if isinstance(itype, str):
        if itype == "Ordinary":
            Type = 'o'
        elif itype == "Equilibrium":
            Type = 'e'
        else:
            raise AttributeError("type must be Ordinary or Equilibrium")
    else:
        raise AttributeError("type must be Ordinary or Equilibrium")


    Estimator = error.ParseKargs(kargs, "Estimator",
                                 'Likelihood', estimator_type)

    NbIteration = kargs.get("NbIteration", I_DEFAULT)
    error.CheckType([NbIteration], [int])

    InitialInterEvent = kargs.get("InitialInterEvent", None)
    error.CheckType([InitialInterEvent], [[type(None), _DiscreteParametricModel,
                                           _DiscreteMixture, _Convolution, _Compound]])

    EquilibriumEstimator = error.ParseKargs(kargs, "EquilibriumEstimator",
                            'CompleteLikelihood', estimator_semi_markov_type)

    InterEventMean = error.ParseKargs(kargs, "InterEventMean",
                            'Computed', mean_computation_map)

    Penalty = error.ParseKargs(kargs, "Penalty", "SecondDifference",
                               smoothing_penalty_type)

    Outside = error.ParseKargs(kargs, "Outside", "Zero", outside_type)
    Weight = kargs.get("Weight", -1.)
    error.CheckType([Weight], [[int, float]])

    if Type != 'e':
        if kargs.get("EquilibriumEstimator"):
            raise Exception("EquilibriumEstimator cannot be used with type='e'")
        if kargs.get("InterEventMean"):
            raise Exception("InterEventMean be used with type='e'")

    if Estimator == estimator_type['PenalizedLikelihood']:
        if kargs.get("InterEventMean") is None:
            InterEventMean = ONE_STEP_LATE
        elif InterEventMean == COMPUTED:
            raise ValueError("""
                Incompatible options Estimator and InterEventMean""")
    else:
        if kargs.get("Penalty"):
            raise ValueError("""Incompatible options Penalty with type o""")
        if kargs.get("Weight"):
            raise ValueError("""Incompatible options Weight with type o""")
        if kargs.get("Outside"):
            raise ValueError("""Incompatible options Outside with type o""")

    if InitialInterEvent:
        #cast from InitialInterEvent to Mixture, Compound should be done

        if isinstance(InitialInterEvent, _DiscreteParametricModel):
            InitialInterEvent = _DiscreteParametric(InitialInterEvent)
        else:
            InitialInterEvent = _Distribution(InitialInterEvent)
        renew = obj.estimation_inter_event_type(Type, InitialInterEvent,
                                           Estimator, NbIteration,
                                           EquilibriumEstimator,
                                           InterEventMean, Weight,
                                           Penalty, Outside)
    else:
        renew = obj.estimation_type(Type, Estimator, NbIteration,
                               EquilibriumEstimator, InterEventMean ,
                               Weight, Penalty, Outside)

    return renew
Exemplo n.º 28
0
def TopParameters(*args, **kargs):
    """TopParameters

    Construction of 'top' parameters from the three parameters or from an ASCII file.

    :Usage:

        TopParameters(proba, axillary_proba, rhythm_ratio, MaxPosition=40)
        TopParameters(file_name, MaxPosition=40)    


    :Arguments:

    * proba (int, real): growth probability of the parent shoot,
    * axillary_proba (int, real): growth probability of the offspring shoots,
    * rhythm_ratio (int, real): growth rhythm ratio offspring shoots / parent shoot,
    * file_name (string).
    
    :Optional Arguments:

    MaxPosition (int): maximum position for the computation of the distributions of the number of internodes of offspring shoots (default value: 20).
    
    :Returned Object:

    If the construction succeeds, an object of type top_parameters is returned, otherwise no object is returned.
    
    :Background:

    The aim of the model of 'tops' is to related the growth of offspring shoots to the growth of their parent shoot in the case of immediate branching. In the case where the arguments are the three 'top' parameters, the constraints over these parameters are described in the definition of the syntactic form of the type top_parameters (cf. File Syntax).

    :Example:

    .. plot::
        :include-source:
        :width: 50%

        from openalea.sequence_analysis import TopParameters
        top = TopParameters(0.4, 0.5,0.6)
        top.plot()

    .. seealso::

        :func:`~openalea.stat_tool.output.Save`, 
        :func:`~openalea.sequence_analysis.simulate.Simulate`.

    """

    error.CheckArgumentsLength(args, 1, 3)
    MaxPosition = kargs.get("MaxPosition", DEFAULT_MAX_POSITION)
    assert MaxPosition < MAX_POSITION

    if len(args) == 1:
        error.CheckType([args[0], MaxPosition], [str, int])
        #filename case
        filename = args[0]
        if os.path.isfile(filename):
            return _TopParameters(filename, MaxPosition)
        else:
            raise IOError("bad file name")
    else:  #sequences case
        error.CheckArgumentsLength(args, 3, 3)

        error.CheckType([args[0], args[1], args[2], MaxPosition],
                        [[int, float], [float, int], [float, int], int])

        probability = args[0]
        axillary_probability = args[1]
        rhythm_ratio = args[2]

        assert probability >= TOP_MIN_PROBABILITY
        assert probability <= 1
        assert axillary_probability <= 1
        assert axillary_probability >= TOP_MIN_PROBABILITY

        assert rhythm_ratio >= MIN_RHYTHM_RATIO
        assert rhythm_ratio <= 1. / MIN_RHYTHM_RATIO

        return _TopParameters(probability, axillary_probability, rhythm_ratio,
                              MaxPosition)
Exemplo n.º 29
0
    def _SetMTGVidDictionary(self, VidDict, TreeId=None, ValidityCheck=False):
        """Set the dictionaries corresponding to the tree -> MTG
            and MTG -> tree vertex identifiers correspondences.

        :Usage:

            _SetMTGVidDictionary(self, VidDict, TreeId=None, ValidityCheck=False)

        :Parameters:

          `VidDict` (list or dict) - Dictionary or list of dictionaries with the vertices in self
            as keys and the vids of a MTG as values
          `TreeId` (int) - Identifier of the tree whose MTG ids must be set
            (all trees in self if None)
          `ValidityCheck` (bool) - Check whether the dictionary values correspond to valid
            tree vertex identifiers

        :Remarks:

            If TreeId is None, VidDict must be a list of dictionaries with length self.NbTrees().
            Otherwise, VidDict must be a single dictionary.
            The keys of the dictionary(ies) are the vertex identifiers of the MTG,
            and values are the corresponding vertex identifiers in associated trees.
            
        :Examples:

        .. doctest::
            :options: +SKIP

            >>> _SetMTGVidDictionary(self, VidDict, TreeId=None, ValidityCheck=False)

        .. seealso::
            :func:`~openalea.tree_statistic.trees.Trees.MTGVertexId`,
            :func:`~openalea.tree_statistic.trees.Trees.TreeVertexId`.
        """
        msg = "Correspondence between MTG and tree vertex identifiers "
        msg += "was previously defined already. This correspondence "
        msg += "will be overwritten."
        import warnings
        replace = True
        if ((TreeId is None) and not (self.__mtg_to_tree_vid is None)
                and (len(self.__mtg_to_tree_vid) > 0)):
            warnings.warn(msg, Warning)
        if self.__mtg_to_tree_vid is None:
            self.__mtg_to_tree_vid = []
            for t in range(self.NbTrees()):
                self.__mtg_to_tree_vid.append({})
        if self.__tree_to_mtg_vid is None:
            self.__tree_to_mtg_vid = []
            for t in range(self.NbTrees()):
                self.__tree_to_mtg_vid.append({})
        if self.__tree_to_mtg_tid is None:
            self.__tree_to_mtg_tid = {}
        if self.__mtg_to_tree_tid is None:
            self.__mtg_to_tree_tid = {}
        if not (TreeId is None):
            check = self._valid_tree(Treeid)
            CpVidDict = dict(VidDict)
            VidDict = []
            for t in range(self.NbTrees()):
                if (TreeId == t):
                    VidDict.append(CpVidDict)
                else:
                    VidDict.append({})
            if (len(self.__mtg_to_tree_vid[t]) > 0):
                warnings.warn(msg, Warning)
        elif (len(VidDict) != self.NbTrees()):
            if len(VidDict) == 0:
                # erase dictionaries
                replace = False
                self.__mtg_to_tree_vid = None
                self.__tree_to_mtg_vid = None
                self.__tree_to_mtg_tid = None
                self.__mtg_to_tree_tid = None
            else:
                msg = "Bad number of dictionaries: " + str(len(VidDict))
                msg += " - should be " + str(self.NbTrees())
                raise ValueError, msg
        if (ValidityCheck):
            for t in range(self.NbTrees()):
                if ((TreeId is None) or (TreeId == t)):
                    for v in VidDict[t].values():
                        check = self._valid_vid(t, v)
                    for k in VidDict[t].keys():
                        check_error.CheckType([k], [int])
        for t in range(self.NbTrees()):
            # copy dictionary MTG->Tree
            if (((TreeId is None) or (TreeId == t)) and replace):
                self.__mtg_to_tree_vid[t] = dict(VidDict[t])
                # build dictionary Tree->MTG
                self.__tree_to_mtg_vid[t] = {}
                for k in VidDict[t].keys():
                    v = VidDict[t][k]
                    if self.__tree_to_mtg_vid[t].has_key(v):
                        msg = "Tree vertex " + str(v)
                        msg += " already present in dictionary for "
                        msg += "tree " + str(t)
                        raise ValueError, msg
                    else:
                        self.__tree_to_mtg_vid[t][v] = k
                # update dictionaries MTGComponentRoot <--> Tree Roots
                tr = self._ctrees().Tree(t).Root()  # tree root
                try:
                    v = self.__tree_to_mtg_vid[t][tr]  # MTGComponentRoot
                except KeyError, error:
                    if (ValidityCheck):
                        raise KeyError, error
                    else:
                        v = sorted(VidDict[t].keys())[0]
                self.__mtg_to_tree_tid[v] = t
                self.__tree_to_mtg_tid[t] = v
Exemplo n.º 30
0
def TimeEvents(*args, **kargs):
    """TimeEvents

    Construction of data of type {time interval between two observation dates,
    number of events occurring between these two observation dates} from time
    sequences, from an object of type HISTOGRAM or from an ASCII file.

    :Usage:

    .. doctest::
        :options: +SKIP
        
        >>> TimeEvents(seq1, begin_date, end_date, PreviousDate=3, NextDate=8)
        >>> TimeEvents(seqn, variable, begin_date, end_date, PreviousDate=3,\
                NextDate=8)
        >>> TimeEvents(histo, time)
        >>> TimeEvents(file_name)
        >>> h = Histogram([1,1,1,2,2,2])
        >>> t = TimeEvents(h, 2)

    :Arguments:

    * seq1 (sequences): univariate time sequences (with an explicit index
      parameter of type TIME),
    * seqn (sequences): multivariate time sequences (with an explicit index
      parameter of type TIME),
    * variable (int): variable index,
    * begin_date (int): initial observation date,
    * end_date (int): final observation date,
    * histo (histogram, mixture_data, convolution_data, compound_data): number of
      events frequency distribution,
    * time (int): time interval between two observation dates (length of the
      observation period),
    * file_name (string).

    :Optional Arguments:

    * PreviousDate (int): date preceding the initial observation date to check
      the increasing character of the number of events. This optional argument
      can only be used if the first mandatory argument is of type sequences.
    * NextDate (int): date following the final observation date to check the
      increasing character of the number of events. This optional argument can
      only be used if the first mandatory argument is of type sequences.

    :Returned Object:

    If the construction succeeds, an object of type time_events is returned,
    otherwise no object is returned.

    .. seealso::

        :func:`Save`,
        :func:`~openalea.stat_tool.data_transform.ExtractHistogram`,
        :func:`~openalea.stat_tool.data_transform.Merge`,
        :func:`~openalea.sequence_analysis.time_events.NbEventSelect`,
        :func:`~openalea.sequence_analysis.data_transform.TimeScaling`,
        :func:`~openalea.sequence_analysis.data_transform.TimeSelect`.

    .. todo:: fix the build_time_events method to allows constructor with
       histogram issue: this method is in stat_tool and returns a time events
       so stat_tool requires to know sequence_analysis...
    """

    PreviousDate = kargs.get("PreviousDate", -1)
    NextDate = kargs.get("NextDate", -1)

    if len(args) == 1 and isinstance(args[0], str):
        filename = args[0]
        if os.path.isfile(filename):
            time_events = _TimeEvents(filename)
        else:
            raise IOError("bad file name")
    elif isinstance(args[0], _Sequences):
        seq = args[0]
        nb_variable = seq.nb_variable
        if nb_variable != 1:
            variable = args[0]
            begin_date = args[1]
            end_date = args[2]
        else:
            variable = 1
            begin_date = args[1]
            end_date = args[2]
        error.CheckType([variable, begin_date, end_date], [int, int, int])

        time_events = seq.extract_time_events(variable, begin_date, end_date,
                                              PreviousDate, NextDate)

    else:
        # should work with Histogram, Mixture_data, Conv_data, comp_data
        error.CheckArgumentsLength(args, 2, 2)
        error.CheckType([args[0], args[1]], \
                        [[_DiscreteDistributionData, _DiscreteMixtureData,\
                          _ConvolutionData, _CompoundData], int])
        distribution = args[0]
        time = args[1]
        time_events = _TimeEvents(distribution, time)

    return time_events