Пример #1
0
def CCMGetDistances(libraryMatrix, args):
    '''
    Note that for CCM the libraryMatrix and predictionMatrix are the same.

    Return Distances: a square matrix with distances.
    Matrix elements D[i,j] hold the distance between the E-dimensional
    phase space point (vector) between rows (observations) i and j.
    '''

    N_row = nRow(libraryMatrix)

    D = np.full((N_row, N_row), 1E30)  # Distance matrix init to 1E30
    E = args.E + 1

    for row in range(N_row):
        # Get E-dimensional vector from this library row
        # Exclude the 1st column (j=0) of times
        y = libraryMatrix[row, 1:E:]

        for col in range(N_row - 1):
            # Avoid redundant calculations
            if row >= col:
                continue

            # Find distance between vector (y) and other library vector
            # Exclude the 1st column (j=0) of Time
            D[row, col] = Distance(libraryMatrix[col, 1:E:], y)
            # Insert degenerate values since D[i,j] = D[j,i]
            D[col, row] = D[row, col]

    return (D)
Пример #2
0
def CrossMap(args):
    '''
    Pool worker function called from CCM()
    '''

    # Generate embedding on the data to be cross mapped (-c column)
    embedding, colNames, target = EmbedData(args)

    # Use entire library and prediction from embedding matrix
    libraryMatrix = predictionMatrix = embedding
    N_row = nRow(libraryMatrix)

    # Range of CCM library indices
    start, stop, increment = args.libsize

    if args.randomLib:
        # Random samples from library with replacement
        maxSamples = args.subsample
    else:
        # Contiguous samples up to the size of the library
        maxSamples = 1

    # Simplex: if k_NN not specified, set k_NN to E + 1
    if args.k_NN < 0:
        args.k_NN = args.E + 1
        if args.verbose:
            print( "CCM() Set k_NN to E + 1 = " + str( args.k_NN ) +\
                   " for SimplexProjection." )

    #-----------------------------------------------------------------
    print( "CCM(): Simplex cross mapping from " + str( args.columns ) +\
           " to " + args.target +  "  E=" + str( args.E ) +\
           " k_nn=" + str( args.k_NN ) +\
           "  Library range: [{}, {}, {}]".format( start, stop, increment ))

    #-----------------------------------------------------------------
    # Distance for all possible pred : lib E-dimensional vector pairs
    # Distances is a Matrix of all row to to row distances
    #-----------------------------------------------------------------
    Distances = CCMGetDistances(libraryMatrix, args)

    #----------------------------------------------------------
    # Predictions
    #----------------------------------------------------------
    PredictLibStats = {}  # { lib_size : ( rho, r, rmse, mae ) }
    # Loop for library sizes
    for lib_size in range(start, stop + 1, increment):

        if args.Debug:
            print("CCM(): lib_size " + str(lib_size))

        prediction_rho = np.zeros((maxSamples, 3))
        # Loop for subsamples
        for n in range(maxSamples):

            if args.randomLib:
                # Uniform random sample of rows, with replacement
                lib_i = randint(low=0, high=N_row, size=lib_size)
            else:
                if lib_size >= N_row:
                    # library size exceeded, back down
                    lib_i = np.arange(0, N_row)

                    if args.warnings or args.verbose:
                        print("CCM(): max lib_size is {}, "
                              "lib_size has been limited.".format(N_row))
                else:
                    # Contiguous blocks up to N_rows = maxSamples
                    if n + lib_size < N_row:
                        lib_i = np.arange(n, n + lib_size)
                    else:
                        # n + lib_size exceeds N_row, wrap around to data origin
                        lib_start = np.arange(n, N_row)
                        max_i = min(lib_size - (N_row - n), N_row)
                        lib_wrap = np.arange(0, max_i)
                        lib_i = np.concatenate((lib_start, lib_wrap), axis=0)

            #----------------------------------------------------------
            # k_NN nearest neighbors : Local CCMGetNeighbors() function
            #----------------------------------------------------------
            neighbors, distances = CCMGetNeighbors(Distances, lib_i, args)

            predictions = SimplexProjection(libraryMatrix[lib_i, :],
                                            target[lib_i], neighbors,
                                            distances, args)

            rho, rmse, mae = ComputeError(target[lib_i], predictions)

            prediction_rho[n, :] = [rho, rmse, mae]

        rho_ = np.mean(prediction_rho[:, 0])
        rmse_ = np.mean(prediction_rho[:, 1])
        mae_ = np.mean(prediction_rho[:, 2])

        PredictLibStats[lib_size] = (rho_, rmse_, mae_)

    # Return tuple with ( ID, PredictLibStats{} )
    return (str(args.columns) + " to " + args.target, PredictLibStats)
Пример #3
0
def Multiview(args, source=Source.Python):
    '''
    Data input requires -c (columns) to specify timeseries columns
    in inputFile (-i) that will be embedded by EmbedData(), and the 
    -r (target) specifying the data target column in inputFile.

    args.E represents the number of variables to combine for each
    assessment, as well as the number of time delays to create in 
    EmbedData() for each variable. 

    Prediction() with Simplex sets k_NN equal to E+1 if -k not specified.

    --
    Ye H., and G. Sugihara, 2016. Information leverage in interconnected 
    ecosystems: Overcoming the curse of dimensionality. 
    Science 353:922–925.
    '''

    if not len(args.columns):
        raise RuntimeError('Multiview() requires -c to specify data.')
    if not args.target:
        raise RuntimeError('Multiview() requires -r to specify target.')
    if args.E < 0:
        raise RuntimeError('Multiview() E is required.')

    # Save args.plot flag, and disable so Prediction() does not plot
    showPlot = args.plot
    args.plot = False

    # Save args.outputFile and reset so Prediction() does not write
    outputFile = args.outputFile
    args.outputFile = None

    # Embed data from inputFile
    embedding, colNames, target = EmbedData(args)

    # Combinations of possible embedding variables (columns), E at-a-time
    # Column 0 is time. Coerce the iterable into a list of E-tuples
    nVar = len(args.columns)
    combos = list(combinations(range(1, nVar * args.E + 1), args.E))

    # Require that each embedding has at least one coordinate with
    # observed data (zero time lag). This corresponds to combo tuples
    # with modulo E == 1.
    # Note: this only works if the data (unlagged) are in columns
    # 1, 1 + E, 1 + 2E, ... which is consistent with EmbedData() output.
    combo_i = []
    for i in range(len(combos)):
        c = combos[i]  # a tuple of combination indices
        for x in c:
            if x % args.E == 1:
                combo_i.append(i)
                break

    combos = [combos[i] for i in combo_i]

    if not args.multiview:
        # Ye & Sugihara suggest sqrt( m ) as the number of embeddings to avg
        args.multiview = max(2, int(np.sqrt(len(combos))))

        print('Multiview() Set view sample size to ' + str(args.multiview))

    #---------------------------------------------------------------
    # Evaluate variable combinations.
    # Note that this is done within the library itself (in-sample).
    # Save a copy of the specified prediction observations.
    prediction = args.prediction

    # Override the args.prediction for in-sample forecast skill evaluation
    args.prediction = args.library

    # Process pool to evaluate combos
    pool = Pool()

    # Iterable list of arguments for EvalLib()
    argList = []
    for combo in combos:
        argList.append((args, combo, embedding, colNames, target))

    # Submit EvalLib jobs to the process pool
    results = pool.map(EvalLib, argList)

    # Dict to hold combos : rho pairs from EvalLib() tuple
    Combo_rho = {}

    for result in results:
        if result == None:
            continue
        Combo_rho[result[0]] = result[1]

    #---------------------------------------------------------------
    # Rank the in-sample forecasts, zip returns an iterator of 1-tuples
    rho_sort, combo_sort = zip(
        *sorted(zip(Combo_rho.values(), Combo_rho.keys()), reverse=True))

    if args.Debug:
        print("Multiview()  In sample sorted embeddings:")
        print('Columns         ρ')
        for i in range(min(args.multiview, len(combo_sort))):
            print(str(combo_sort[i]) + "    " + str(round(rho_sort[i], 4)))

    #---------------------------------------------------------------
    # Perform predictions with the top args.multiview embeddings
    # Reset the user specified prediction vector
    args.prediction = prediction

    argList.clear()  # Iterable list of arguments for EvalPred()

    # Take the top args.multiview combos
    for combo in combo_sort[0:args.multiview]:
        argList.append((args, combo, embedding, colNames, target))

    # Submit EvalPred jobs to the process pool
    results = pool.map(EvalPred, argList)

    Results = OrderedDict()  # Dictionary of dictionaries results each combo

    for result in results:
        if result == None:
            continue
        Results[result[0]] = result[1]

    # Console output
    print("Multiview()  Prediction Embeddings:")
    print("Columns       Names                       ρ       mae   rmse")
    for key in Results.keys():
        result = Results[key]
        print( str( key ) + "   " + ' '.join( result[ 'names' ] ) +\
               "  " + str( round( result[ 'rho'  ], 4 ) ) +\
               "  " + str( round( result[ 'mae'  ], 4 ) ) +\
               "  " + str( round( result[ 'rmse' ], 4 ) ) )

    #----------------------------------------------------------
    # Compute Multiview averaged prediction
    # The output item of Results dictionary is a matrix with three
    # columns [ Time, Data, Prediction_t() ]
    # Collect the Predictions into a single matrix
    aresult = Results[combo_sort[0]]
    nrows = nRow(aresult['output'])
    time = aresult['output'][:, 0]
    data = aresult['output'][:, 1]

    M = np.zeros((nrows, len(Results)))

    col_i = 0
    for result in Results.values():
        output = result['output']
        M[:, col_i] = output[:, 2]  # Prediction is in col j=2
        col_i = col_i + 1

    prediction = np.mean(M, axis=1)
    multiview_out = np.column_stack((time, data, prediction))

    # Write output
    header = 'Time,Data,Prediction_t(+{0:d})'.format(args.Tp)
    if outputFile:
        np.savetxt(args.path + outputFile,
                   multiview_out,
                   fmt='%.4f',
                   delimiter=',',
                   header=header,
                   comments='')

    # Estimate correlation coefficient on observed : predicted data
    rho, rmse, mae = ComputeError(data, prediction)

    print(("Multiview()  ρ {0:5.3f}  RMSE {1:5.3f}  "
           "MAE {2:5.3f}").format(rho, rmse, mae))

    #----------------------------------------------------------
    if showPlot:

        Time = multiview_out[:, 0]  # Required to be first (j=0) column

        if args.plotDate:
            Time = num2date(Time)

        fig, ax = plt.subplots(1, 1, figsize=args.figureSize, dpi=150)

        ax.plot(Time,
                multiview_out[:, 1],
                label='Observations',
                color='blue',
                linewidth=2)

        ax.plot(Time,
                multiview_out[:, 2],
                label='Predictions_t(+{0:d})'.format(args.Tp),
                color='red',
                linewidth=2)

        if args.verbose:  # Plot all projections
            for col in range(nCol(M)):
                ax.plot(multiview_out[:, 0],
                        M[:, col],
                        label=combo_sort[col],
                        linewidth=2)

        ax.legend()
        ax.set( xlabel = args.plotXLabel,
                ylabel = args.plotYLabel,
                title  = "Multiview  " + args.inputFile +\
                         ' Tp=' + str( args.Tp ) +\
                         ' E='  + str( args.E ) + r' $\rho$=' +\
                str( round( rho, 2 ) ) )
        plt.show()

    if source == Source.Jupyter:
        return {
            'header': header,
            'multiview': multiview_out,
            'rho': rho,
            'RMSE': rmse,
            'MAE': mae
        }
    else:
        return