Python getJupyterRootDirectory 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: utilities.fileFolderManipulations

메소드/함수: getJupyterRootDirectory

hotexamples.com에서의 예제들: 8

Python getJupyterRootDirectory - 8개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 utilities.fileFolderManipulations.getJupyterRootDirectory에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

파일: basicRawDataProcess.py 프로젝트: ozinisle/erai-v2.0

def getPreprocessedData(dataName, dataFrequency, autoConfigFileRelativePath,
                        KEY_preProcessedDataFilePath):
    import pandas as pd
    import numpy as np

    from utilities.fileFolderManipulations import getJupyterRootDirectory
    from config.environment import getAppConfigData

    # Variable to hold the original source folder path which is calculated from the input relative path of the source folder (relativeDataFolderPath)
    # using various python commands like os.path.abspath and os.path.join
    jupyterNodePath = None
    configFilePath = None

    # holds data from input data file - Truth source, should be usd only for reference and no updates should happen to this variable
    inputRawProcessedDataDF = None

    #caluclate the deployment directory path of the current juypter node in the operating system
    jupyterNodePath = getJupyterRootDirectory()
    print("jupyterNodePath >>> " + jupyterNodePath)

    configFilePath = jupyterNodePath + autoConfigFileRelativePath
    print("configFilePath >>> " + configFilePath)

    autoConfigData = getAppConfigData()

    preProcessedDataFilePath = autoConfigData[dataName][dataFrequency][
        KEY_preProcessedDataFilePath]

    # read the raw processed data from csv file
    inputRawProcessedDataDF = pd.read_csv(preProcessedDataFilePath)

    return inputRawProcessedDataDF

예제 #2

파일 보기

def getQuantityBasedFeatures(dataName, dataFrequency):
    import pandas as pd
    import numpy as np

    from utilities.fileFolderManipulations import getJupyterRootDirectory
    from config.environment import getAppConfigData

    # Variable to hold the original source folder path which is calculated from the input relative path of the source folder (relativeDataFolderPath)
    # using various python commands like os.path.abspath and os.path.join
    jupyterNodePath = getJupyterRootDirectory()

    autoConfigData = getAppConfigData()

    preProcessedDataFilePath = autoConfigData[dataName][dataFrequency][
        'preProcessedDataFilePath']

    # read the raw processed data from csv file
    df = pd.read_csv(jupyterNodePath + preProcessedDataFilePath)

    qtyMean = np.mean(df['quantity'])
    qtyMax = np.max(df['quantity'])
    normalizedQuantityDf = (df['quantity'] - qtyMean) / qtyMax

    qtyDiffDf = df['quantity'] - df['quantity'].shift(1)
    qtyDiffMean = np.mean(qtyDiffDf)
    qtyDiffMax = np.max(qtyDiffDf)
    normalizedQtyDiffDf = (qtyDiffDf - qtyDiffMean) / qtyDiffMax

    return pd.concat([normalizedQuantityDf, normalizedQtyDiffDf], axis=1)

예제 #3

파일 보기

파일: environment.py 프로젝트: ozinisle/erai-v2.0

def getAppConfigData():
    import json    
    from utilities.fileFolderManipulations import getJupyterRootDirectory

    data=None
    try:
        projectRootDirectory = getJupyterRootDirectory()
        configFilePath = projectRootDirectory + "/src/config/config.json"
        print(' retrieving values configured in >>> ' + configFilePath)
        with open(configFilePath) as json_data_file:            
            data = json.load(json_data_file)
        
    except :        
        print(' error retrieving values configured in >>> ' + configFilePath)
        print(' creating new configuration file >>> ' + configFilePath)
        data = {}
        f = open(configFilePath, 'a+')  # open file in append mode
        f.write('{}')
        f.close()  
       
    finally:
        return data

예제 #4

파일 보기

파일: environment.py 프로젝트: ozinisle/erai-v2.0

def setAppConfigData(data):
    
    import json  
    import sys,traceback
    
    from utilities.fileFolderManipulations import getJupyterRootDirectory

    returnValue = False
    data_string = ''
    try:
        
        projectRootDirectory = getJupyterRootDirectory()
        configFilePath = projectRootDirectory + "/src/config/config.json"

        print(' updating config file >>> ' + configFilePath)
        data_string = json.dumps(data)
        with open(configFilePath,'a+') as json_data_file:            
            json_data_file.seek(0)
            json_data_file.write('')
            json_data_file.truncate()
            json_data_file.write(data_string)
        print(' successfully updated config file >>> (try block) ' + configFilePath + ' with data >>>' + data_string)
        returnValue = True
    except FileNotFoundError:
        
        print('creating and updating config file')
        f = open(configFilePath, 'a+')  # open file in append mode
        f.write(data_string)
        f.close()  
        print(' successfully created config file >>>  (except block)' + configFilePath + ' with data >>>' + data_string)
        returnValue = True
    except:
        print("Error executing method >>> ")
        # exc_type, exc_obj, exc_tb = sys.exc_info()
        # fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
        # print("Unexpected error:", sys.exc_info())
        # print(exc_type, fname, exc_tb.tb_lineno)
        
        # http://docs.python.org/2/library/sys.html#sys.exc_info
        exc_type, exc_value, exc_traceback = sys.exc_info() # most recent (if any) by default
        
        '''
        Reason this _can_ be bad: If an (unhandled) exception happens AFTER this,
        or if we do not delete the labels on (not much) older versions of Py, the
        reference we created can linger.

        traceback.format_exc/print_exc do this very thing, BUT note this creates a
        temp scope within the function.
        '''

        traceback_details = {
                            'filename': exc_traceback.tb_frame.f_code.co_filename,
                            'lineno'  : exc_traceback.tb_lineno,
                            'name'    : exc_traceback.tb_frame.f_code.co_name,
                            'type'    : exc_type.__name__,
                            'message' : traceback.extract_tb(exc_traceback)
                            }
        
        del(exc_type, exc_value, exc_traceback) # So we don't leave our local labels/objects dangling
        # This still isn't "completely safe", though!
        # "Best (recommended) practice: replace all exc_type, exc_value, exc_traceback
        # with sys.exc_info()[0], sys.exc_info()[1], sys.exc_info()[2]
        
        print
        print(traceback.format_exc())
        print
        print(traceback_template % traceback_details)
        print

        #traceback.print_exception()
        raise
    finally:
        return returnValue

예제 #5

파일 보기

파일: basicRawDataProcess.py 프로젝트: ozinisle/erai-v2.0

def preProcessData(dataName,
                   dataFrequency,
                   outputFileName="processedRawData.csv"):

    import os, sys, traceback

    import pandas as pd
    import glob

    from utilities.fileFolderManipulations import getParentFolder
    from utilities.fileFolderManipulations import createFolder
    from utilities.fileFolderManipulations import getJupyterRootDirectory

    from config.environment import getAppConfigData
    from config.environment import setAppConfigData

    from fastai.tabular import add_datepart

    print(' data pre-processing >> imported dependencies')

    relativeDataFolderPath = 'data/' + dataName + '/raw/' + dataFrequency

    # Variable to hold the original source folder path which is calculated from the input relative path of the source folder (relativeDataFolderPath)
    # using various python commands like os.path.abspath and os.path.join
    jupyterNodePath = None

    # Variable to hold a dataframe created with the data from input data files in the relativeDataFolderPath provided
    inputRawDataDF = None

    # Variable to hold the original source folder path which is calculated from the input relative path of the source folder (relativeDataFolderPath)
    # using various python commands like os.path.abspath and os.path.join
    dataFolderPath = None

    # Variable to hold query like value of python to query all json file names in the source folder (dataFolderPath).
    # Will be used in the glob function to execute the query
    json_pattern = None

    # Variable to contain the list of all input json file names in the source folder (dataFolderPath)
    file_list = None

    # return values of this method
    # -------------------------------------------------------------------------------
    # Current methods return value initialized to false. Will be maked as true
    # after every single line in the method has been executed with out errors
    returnValue = False
    # complete filepath of the csv file with the processed raw data
    outputFilePath = None
    outputFolderName = None

    # -------------------------------------------------------------------------------
    try:
        #caluclate the deployment directory path of the current juypter node in the operating system
        jupyterNodePath = getJupyterRootDirectory()

        # TO BE MODIFIED - NOT SURE WHY I USED THIS - WILL HAVE TO CHECK
        pd.set_option('display.max_columns', None)

        # creating pandas dataframe references for further modification
        inputRawDataDF = pd.DataFrame()

        #calculating the complete data folder path of the relative path provided as parameter
        dataFolderPath = jupyterNodePath + '/' + relativeDataFolderPath

        # creating OS queryable object for python to work with to find json files in the dataFolderPath calcuated in the previous step
        json_pattern = os.path.join(dataFolderPath, '*.json')

        # store all the json file paths in the dataFolderPath for further processing
        file_list = glob.glob(json_pattern)

        # execution assertion/ui progress update info
        print('looping through all the files to create input data')
        # loop through all the files in the folder and create inputRawDataDF pandas datafram
        for file in file_list:
            print("reading input file >>> " + file + " ...")
            data = pd.read_json(file, lines=True)
            if isinstance(data, str):
                data = data['data'][0]['candles']
            else:
                data = data.values[0][0]['candles']

            inputRawDataDF = inputRawDataDF.append(data, ignore_index=True)
            print("File read - SUCCESS")

        inputRawDataDF.columns = [
            'date-time', 'open', 'high', 'low', 'close', 'quantity',
            'dont-know'
        ]

        buffer = inputRawDataDF['date-time']
        add_datepart(inputRawDataDF, 'date-time')

        inputRawDataDF = pd.concat([buffer, inputRawDataDF], axis=1)

        #create prior_holidays feature
        priorHolidaysStamps = getPriorHoliDaysStamps(
            inputRawDataDF['date-timeDayofyear'])
        priorHolidaysStamps_df = pd.DataFrame(
            {'prior_holidays': priorHolidaysStamps[:]})

        inputRawDataDF = pd.concat([inputRawDataDF, priorHolidaysStamps_df],
                                   axis=1)
        print('added prior_holidays feature in pre-processed data')

        #create following_holidays feature
        followingHolidaysStamps = getFollowingHolidaysDaysStamp(
            inputRawDataDF['date-timeDayofyear'])
        followingHolidaysStamps_df = pd.DataFrame(
            {'following_holidays': followingHolidaysStamps[:]})

        inputRawDataDF = pd.concat(
            [inputRawDataDF, followingHolidaysStamps_df], axis=1)
        print('added following_holidays feature in pre-processed data')
        '''
        w  write mode
        r  read mode
        a  append mode

        w+  create file if it doesn't exist and open it in (over)write mode
            [it overwrites the file if it already exists]
        r+  open an existing file in read+write mode
        a+  create file if it doesn't exist and open it in append mode
        '''

        processFolderName = getParentFolder(dataFolderPath,
                                            2) + '/processed/' + dataFrequency
        print('Attempting to create folder if it does not exist >>>' +
              processFolderName)
        createFolder(processFolderName)

        outputFolderName = processFolderName + '/preProcessedData'
        print('Attempting to create folder if it does not exist >>>' +
              outputFolderName)
        createFolder(outputFolderName)

        outputFilePath = outputFolderName + '/' + outputFileName
        print('Attempting to create/update file >>>' + outputFilePath)
        #f = open(output_file_name, 'w+')  # open file in append mode
        #f.write('')
        #f.close()
        #np.savetxt(output_file_name, inputRawDataDF, delimiter=",")
        inputRawDataDF.to_csv(outputFilePath, sep=',', index=False)

        print(
            'created raw easy to use csv data to be used for preparing training data in the location  >>>'
            + outputFilePath)

        print(' creating/updating autoConfig file')
        configFilePath = jupyterNodePath + '/src/config/autoConfig/config.json'

        autoConfigData = getAppConfigData()

        if not autoConfigData.get(dataName):
            autoConfigData[dataName] = {}

        if not autoConfigData[dataName].get(dataFrequency):
            autoConfigData[dataName][dataFrequency] = {}

        autoConfigData[dataName][dataFrequency] = {
            'preProcessedDataFilePath':
            outputFilePath.replace(jupyterNodePath, '')
        }

        setAppConfigData(autoConfigData)
        print(' creating/updating autoConfig file >>>' + configFilePath)

        returnValue = True
    except:
        print("Error executing method >>> ")
        # exc_type, exc_obj, exc_tb = sys.exc_info()
        # fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
        # print("Unexpected error:", sys.exc_info())
        # print(exc_type, fname, exc_tb.tb_lineno)

        # http://docs.python.org/2/library/sys.html#sys.exc_info
        exc_type, exc_value, exc_traceback = sys.exc_info(
        )  # most recent (if any) by default
        '''
        Reason this _can_ be bad: If an (unhandled) exception happens AFTER this,
        or if we do not delete the labels on (not much) older versions of Py, the
        reference we created can linger.

        traceback.format_exc/print_exc do this very thing, BUT note this creates a
        temp scope within the function.
        '''

        traceback_details = {
            'filename': exc_traceback.tb_frame.f_code.co_filename,
            'lineno': exc_traceback.tb_lineno,
            'name': exc_traceback.tb_frame.f_code.co_name,
            'type': exc_type.__name__,
            'message': traceback.extract_tb(exc_traceback)
        }

        del (exc_type, exc_value, exc_traceback
             )  # So we don't leave our local labels/objects dangling
        # This still isn't "completely safe", though!
        # "Best (recommended) practice: replace all exc_type, exc_value, exc_traceback
        # with sys.exc_info()[0], sys.exc_info()[1], sys.exc_info()[2]

        print
        print(traceback.format_exc())
        print
        print(traceback_template % traceback_details)
        print

        #traceback.print_exception()
        raise

    finally:
        return [
            returnValue, outputFolderName, outputFileName, outputFilePath,
            inputRawDataDF
        ]

예제 #6

파일 보기

파일: resolveDataPatterns.py 프로젝트: ozinisle/erai-v2.0

def getRedGreenCandlesCatogizedBySizeDf(df,
                                        dataName,
                                        dataFrequency,
                                        boundaryValues=None):
    # @Param :: boundaryValues
    #   - should be  Array of 5 tuples
    #   - each tuble must be a pair of negative and positive float compatible values only (-0.44,0.44)
    #   - absolute value of each elements in the tuple should be less than the previous corresponding previous entry
    #       example [(-3.44,2.44),(-3.32,1.37),(-1.11,1.01),(-0.53,0.76),(-0.02,0.019)]
    import os, sys, traceback
    import json

    import pandas as pd
    import numpy as np

    from utilities.fileFolderManipulations import getJupyterRootDirectory
    from config.environment import getAppConfigData
    from config.environment import setAppConfigData

    redCandlesBySizeDf = None
    greenCandlesBySizeDf = None
    redCandlesBySizeTimesMagnitudeDf = None
    greenCandlesBySizeTimesMagnitudeDf = None
    redGreenCandlesTanhDf = None
    redGreenCandlesTanhTimesMagnitudeDf = None

    try:

        if boundaryValues is None:
            print('boundary values is none')
            #caluclate the deployment directory path of the current juypter node in the operating system
            jupyterNodePath = getJupyterRootDirectory()
            print("jupyterNodePath >>> " + jupyterNodePath)

            autoConfigData = getAppConfigData()

            if not autoConfigData.get(dataName):
                autoConfigData[dataName] = {}

            if not autoConfigData[dataName].get(dataFrequency):
                autoConfigData[dataName][dataFrequency] = {}

            boundaryValues = autoConfigData[dataName][dataFrequency].get(
                'redGreenCandleSizeBoundaries')
            if boundaryValues is None or (type(boundaryValues) == 'str'
                                          and boundaryValues.strip() == ''):
                print('boundary values is not configured')
                closeOpenDiffDf = (df['close'] -
                                   df['open']).rename('close_open_diff')

                candlesByBodyLengthDf = closeOpenDiffDf.sort_values(
                    axis=0,
                    ascending=True,
                    inplace=False,
                    kind='quicksort',
                    na_position='last').reset_index(drop=True)
                sortedRedCandles = candlesByBodyLengthDf.loc[
                    candlesByBodyLengthDf[0:] < 0].reset_index(drop=True)

                candlesByBodyLengthDf = closeOpenDiffDf.sort_values(
                    axis=0,
                    ascending=False,
                    inplace=False,
                    kind='quicksort',
                    na_position='last').reset_index(drop=True)
                sortedGreenCandles = candlesByBodyLengthDf.loc[
                    candlesByBodyLengthDf[0:] > 0].reset_index(drop=True)

                interval = np.arange(.2, 1, .2)
                indexArr = [
                    ((int)(sortedRedCandles.shape[0] * interval[itr]),
                     (int)(sortedGreenCandles.shape[0] * interval[itr]))
                    for itr in range(0, interval.size)
                ]

                boundaryValues = [(sortedRedCandles[indexItr[0] - 1],
                                   sortedGreenCandles[indexItr[1] - 1])
                                  for indexItr in indexArr]

                autoConfigData[dataName][dataFrequency].update(
                    {'redGreenCandleSizeBoundaries': boundaryValues})

                #print('pushing values to autoConfigFile >>> ' + configFilePath + ' with data '+ json.dumps(autoConfigData))
                setAppConfigData(autoConfigData)
            else:
                print(
                    'using configured boundary values - do not update configurations unless u r absolutely sure of it'
                )
        else:
            print('using boundary values provided as parameter')

        dataMagnitudeDf = np.divide(
            np.sqrt(
                np.sum(np.square(df[['open', 'close', 'high', 'low']]),
                       axis=1)), 4)

        redCandlesBySizeDf = (df['close'] -
                              df['open']).rename('redCandlesBySize')
        redCandlesBySizeDf[redCandlesBySizeDf >= 0] = 0
        redCandlesBySizeDf[redCandlesBySizeDf < boundaryValues[0][0]] = 5
        redCandlesBySizeDf[redCandlesBySizeDf.between(boundaryValues[0][0],
                                                      boundaryValues[1][0],
                                                      inclusive=True)] = 4
        redCandlesBySizeDf[redCandlesBySizeDf.between(boundaryValues[1][0],
                                                      boundaryValues[2][0],
                                                      inclusive=False)] = 3
        redCandlesBySizeDf[redCandlesBySizeDf.between(boundaryValues[2][0],
                                                      boundaryValues[3][0],
                                                      inclusive=True)] = 2
        redCandlesBySizeDf[redCandlesBySizeDf.between(boundaryValues[3][0],
                                                      0,
                                                      inclusive=False)] = 1

        greenCandlesBySizeDf = (df['close'] -
                                df['open']).rename('greenCandlesBySize')
        greenCandlesBySizeDf[greenCandlesBySizeDf <= 0] = 0
        greenCandlesBySizeDf[greenCandlesBySizeDf > boundaryValues[0][1]] = 5
        greenCandlesBySizeDf[greenCandlesBySizeDf.between(boundaryValues[1][1],
                                                          boundaryValues[0][1],
                                                          inclusive=True)] = 4
        greenCandlesBySizeDf[greenCandlesBySizeDf.between(boundaryValues[2][1],
                                                          boundaryValues[1][1],
                                                          inclusive=False)] = 3
        greenCandlesBySizeDf[greenCandlesBySizeDf.between(boundaryValues[3][1],
                                                          boundaryValues[2][1],
                                                          inclusive=True)] = 2
        greenCandlesBySizeDf[greenCandlesBySizeDf.between(0,
                                                          boundaryValues[3][1],
                                                          inclusive=False)] = 1

        redGreenCandlesTanhDf = (df['close'] -
                                 df['open']).rename('redGreenCandlesTanh')
        redGreenCandlesTanhDf[redGreenCandlesTanhDf < 0] = -1
        redGreenCandlesTanhDf[redGreenCandlesTanhDf > 0] = 1
        redGreenCandlesTanhTimesMagnitudeDf = np.multiply(
            redGreenCandlesTanhDf, dataMagnitudeDf)

        redCandlesBySizeTimesMagnitudeDf = -np.multiply(
            redCandlesBySizeDf, dataMagnitudeDf)
        greenCandlesBySizeTimesMagnitudeDf = np.multiply(
            greenCandlesBySizeDf, dataMagnitudeDf)

        redCandlesBySizeTimesMagnitudeDf = redCandlesBySizeTimesMagnitudeDf.rename(
            'redCandlesBySizeTimesMagnitude')
        greenCandlesBySizeTimesMagnitudeDf = greenCandlesBySizeTimesMagnitudeDf.rename(
            'greenCandlesBySizeTimesMagnitude')

    except:
        print("Error executing method >>> ")
        # exc_type, exc_obj, exc_tb = sys.exc_info()
        # fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
        # print("Unexpected error:", sys.exc_info())
        # print(exc_type, fname, exc_tb.tb_lineno)

        # http://docs.python.org/2/library/sys.html#sys.exc_info
        exc_type, exc_value, exc_traceback = sys.exc_info(
        )  # most recent (if any) by default
        '''
        Reason this _can_ be bad: If an (unhandled) exception happens AFTER this,
        or if we do not delete the labels on (not much) older versions of Py, the
        reference we created can linger.

        traceback.format_exc/print_exc do this very thing, BUT note this creates a
        temp scope within the function.
        '''

        traceback_details = {
            'filename': exc_traceback.tb_frame.f_code.co_filename,
            'lineno': exc_traceback.tb_lineno,
            'name': exc_traceback.tb_frame.f_code.co_name,
            'type': exc_type.__name__,
            'message': traceback.extract_tb(exc_traceback)
        }

        del (exc_type, exc_value, exc_traceback
             )  # So we don't leave our local labels/objects dangling
        # This still isn't "completely safe", though!
        # "Best (recommended) practice: replace all exc_type, exc_value, exc_traceback
        # with sys.exc_info()[0], sys.exc_info()[1], sys.exc_info()[2]

        print
        print(traceback.format_exc())
        print
        print(traceback_template % traceback_details)
        print

        #traceback.print_exception()
        raise

    finally:

        return pd.concat([
            redCandlesBySizeDf, greenCandlesBySizeDf,
            redCandlesBySizeTimesMagnitudeDf,
            greenCandlesBySizeTimesMagnitudeDf, redGreenCandlesTanhDf,
            redGreenCandlesTanhTimesMagnitudeDf.rename(
                'redGreenCandlesTanhTimesMagnitudeDf')
        ],
                         axis=1)

예제 #7

파일 보기

def doBasicOperation(dataName, dataFrequency):
    import os, sys, traceback
    from datetime import datetime, timedelta

    import pandas as pd
    import numpy as np

    from config.environment import getAppConfigData
    from config.environment import setAppConfigData

    from utilities.fileFolderManipulations import getJupyterRootDirectory
    from utilities.fileFolderManipulations import getParentFolder
    from utilities.fileFolderManipulations import createFolder
    print("into method doBasicOperation")

    return_fundamentalFeaturesDf = None

    try:

        # Variable to hold the original source folder path which is calculated from the input relative path of the source folder (relativeDataFolderPath)
        # using various python commands like os.path.abspath and os.path.join
        jupyterNodePath = getJupyterRootDirectory()

        configFilePath = None

        # holds data from input data file - Truth source, should be usd only for reference and no updates should happen to this variable
        inputRawProcessedDataDF = None

        autoConfigData = getAppConfigData()

        preProcessedDataFilePath = autoConfigData[dataName][dataFrequency][
            'preProcessedDataFilePath']

        # read the raw processed data from csv file
        inputRawProcessedDataDF = pd.read_csv(jupyterNodePath +
                                              preProcessedDataFilePath)

        return_fundamentalFeaturesDf = createFundamentalFeatures(
            inputRawProcessedDataDF)

        print("before return statement of method doBasicOperation ")

    except:
        print("Error executing method >>> ")
        # exc_type, exc_obj, exc_tb = sys.exc_info()
        # fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
        # print("Unexpected error:", sys.exc_info())
        # print(exc_type, fname, exc_tb.tb_lineno)

        # http://docs.python.org/2/library/sys.html#sys.exc_info
        exc_type, exc_value, exc_traceback = sys.exc_info(
        )  # most recent (if any) by default
        '''
        Reason this _can_ be bad: If an (unhandled) exception happens AFTER this,
        or if we do not delete the labels on (not much) older versions of Py, the
        reference we created can linger.

        traceback.format_exc/print_exc do this very thing, BUT note this creates a
        temp scope within the function.
        '''

        traceback_details = {
            'filename': exc_traceback.tb_frame.f_code.co_filename,
            'lineno': exc_traceback.tb_lineno,
            'name': exc_traceback.tb_frame.f_code.co_name,
            'type': exc_type.__name__,
            'message': traceback.extract_tb(exc_traceback)
        }

        del (exc_type, exc_value, exc_traceback
             )  # So we don't leave our local labels/objects dangling
        # This still isn't "completely safe", though!
        # "Best (recommended) practice: replace all exc_type, exc_value, exc_traceback
        # with sys.exc_info()[0], sys.exc_info()[1], sys.exc_info()[2]

        print
        print(traceback.format_exc())
        print
        print(traceback_template % traceback_details)
        print

        #traceback.print_exception()
        raise
    finally:
        return return_fundamentalFeaturesDf

예제 #8

파일 보기

def createFinalTrainingFeatureList(dataName,
                                   dataFrequency,
                                   variation_degree=-1):
    import glob
    import pandas as pd

    from utilities.fileFolderManipulations import getJupyterRootDirectory
    from utilities.fileFolderManipulations import getParentFolder

    from config.environment import getAppConfigData
    from config.environment import setAppConfigData

    from dataPreparation.featurePreparation import doBasicOperation

    configData = getAppConfigData()

    projectRootFolderPath = getJupyterRootDirectory()

    if not isinstance(variation_degree, int) or variation_degree == -1:
        variation_degree = configData['variationDegreeForFeatureGeneration']

    _basicDf = doBasicOperation(dataName, dataFrequency)

    filteredFeaturesPath = "/data/" + dataName + "/processed/" + dataFrequency + "/features/filteredFeatures"
    outputFinalFeatureListFilePath = "/data/" + dataName + "/processed/" + dataFrequency + "/features/finalTrainingFeatureList.csv"
    print("filteredFeaturesFolderPath >>> " + filteredFeaturesPath)

    # creating OS queryable object for python to work with to find json files in the dataFolderPath calcuated in the previous step
    csv_pattern = os.path.join(
        projectRootFolderPath + '/' + filteredFeaturesPath, '*.csv')
    print("declared csv_pattern")

    # store all the json file paths in the dataFolderPath for further processing
    file_list = glob.glob(csv_pattern)
    print("obtained file_list")

    # creating pandas dataframe references for further modification
    trainingFeatureDF = _basicDf
    print('initialized trainingFeatureDF')

    # execution assertion/ui progress update info
    print('looping through all the files to create input data')

    #loop through all the files in the folder and create inputRawDataDF pandas datafram
    for file in file_list:
        print("reading input file >>> " + file + " ...")
        data = pd.read_csv(file)
        #data=data.values[0][0]['candles']
        trainingFeatureDF = pd.concat(
            [trainingFeatureDF, data],
            axis=1)  #trainingFeatureDF.append(data, ignore_index = True)
        print("File read - SUCCESS")

    # crate the final training list file
    print("creating finalTrainingFeatureList in location >>> " +
          outputFinalFeatureListFilePath)
    trainingFeatureDF.to_csv(projectRootFolderPath + '/' +
                             outputFinalFeatureListFilePath)

    # update auto config file
    autoConfigData = getAppConfigData()
    autoConfigData[dataName][dataFrequency].update(
        {'finalTrainingFeaturesListFile': outputFinalFeatureListFilePath})
    setAppConfigData(autoConfigData)

    print("updated config file with data >>>> finalTrainingFeaturesListFile:" +
          outputFinalFeatureListFilePath)

    return trainingFeatureDF, outputFinalFeatureListFilePath