Python FileManager примеры использования

Язык программирования: Python

Пространство имен/Пакет: finance.utils.FileManager

Класс/Тип: FileManager

Примеров на hotexamples.com: 5

Python FileManager - 5 примеров найдено. Это лучшие примеры Python кода для finance.utils.FileManager.FileManager, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

FileManager(1)

empty_dir(1)

get_data(1)

get_filenames(1)

Пример #1

Показать файл

Файл: DataAccess.py Проект: zxlstoner/PythonFinance

    def set_dir(self, dir_path):
        '''
        Initialize the FileManager
        Creates the directories
        Set global variables with absolute paths to the directories

        Parameters
        ----------
            dir_path: str
        '''
        self.dir = os.path.realpath(dir_path)  # Absolute Path
        self.cache_dir = os.path.join(self.dir, 'cached')
        self.file_manager = FileManager(dir_path)

        # Create paths if it doesn't exist
        if not (os.access(self.dir, os.F_OK)):
            os.makedirs(self.dir)
        if not (os.access(self.cache_dir, os.F_OK)):
            os.makedirs(self.cache_dir)

Пример #2

Показать файл

Файл: DataAccess.py Проект: euclude/PythonFinance

    def set_dir(self, dir_path):
        '''
        Initialize the FileManager
        Creates the directories
        Set global variables with absolute paths to the directories
        '''
        self.dir = os.path.realpath(dir_path) # Absolute Path
        self.cache_dir = os.path.join(self.dir, 'cached')
        self.file_manager = FileManager(dir_path)

        # Create paths if it doesn't exist
        if not (os.access(self.dir, os.F_OK)):
            os.makedirs(self.dir)
        if not (os.access(self.cache_dir, os.F_OK)):
            os.makedirs(self.cache_dir)

Пример #3

Показать файл

Файл: DataAccess.py Проект: BigW/PythonFinance

class DataAccess(object):

    path = ''
    '''
    Class to manage the Access to the Data
    
    Features
    --------
        1. Easy access to the data
        2. Serialization of data

    How to use 
    ----------
        Use one: Note: Option 2 overwrites option 1
        1. Set the enviroment variable: FINANCEPATH
        2. Set the Static Variable DataAccess.path

    '''
    def __init__(self):
        if self.path != '':
            self.set_dir(self.path)
        else:
            env_var = os.getenv("FINANCEPATH")
            if env_var is not None:
                self.set_dir(env_var)
            else:
                raise Exception('No path defined')


    def set_dir(self, dir_path):
        '''
        Initialize the FileManager
        Creates the directories
        Set global variables with absolute paths to the directories

        Parameters
        ----------
            dir_path: str
        '''
        self.dir = os.path.realpath(dir_path) # Absolute Path
        self.cache_dir = os.path.join(self.dir, 'cached')
        self.file_manager = FileManager(dir_path)

        # Create paths if it doesn't exist
        if not (os.access(self.dir, os.F_OK)):
            os.makedirs(self.dir)
        if not (os.access(self.cache_dir, os.F_OK)):
            os.makedirs(self.cache_dir)

    def empty_dir(self, delete=True):
        '''
        Empty the directory of csv files. Do not delete the cache files/folder

        Parameters
        ----------
            delete:boolean, True if want to delete the folder too
        '''
        self.file_manager.empty_dir(delete)

    def empty_cache(self, delete=True):
        '''
        Empty the directory of cached files. Does not delete the csv files/folder

        Parameters
        ----------
            delete: boolean, True if want to delete the folder too
        '''
        list_files = os.listdir(self.cache_dir) # Get the list of files
        for f in list_files:
            try:
                os.remove(os.path.join(self.cache_dir, f))
            except:
                pass

        if delete:
            os.rmdir(self.cache_dir)

    def empty_dirs(self, delete=True):
        '''
        Delete both cached and csv files.

        Parameters
        ----------
            delete:booelan, True if want to delete the folders too
        '''
        self.empty_cache(delete)
        self.empty_dir(delete)

    def save(self, data, name, extension='.data'):
        '''
        Saves a serialized (pickle) version of the data to the cache directory

        Parameters
        ----------
            data: object
            name: str, identifier of the data
            extension: str, extension of the filename
        '''
        h = hashlib.md5()
        h.update(name.encode('utf8'))
        filename = h.hexdigest() + extension

        f = os.path.join(self.cache_dir, filename)
        data.save(f)

    def load(self, name, extension='.data'):
        '''
        Checks for an existing file name and if exists returns the data saved

        Parameters
        ----------
            name: str, identifier of the data
            extension: str, extension of the filename

        Returns
        -------
            data: object (usually pandas.DataFrame), if file was available; None otherwise.
        '''
        h = hashlib.md5()
        h.update(name.encode('utf8'))
        filename = h.hexdigest() + extension

        f = os.path.join(self.cache_dir, filename)
        if os.access(f, os.F_OK):
            return pd.load(f)


    def get_data(self, symbols, start, end, fields='Adj Close', save=True, useCache=True,
                    downloadMissing=True, ignoreMissing=True):
        '''
        Returns a pandas DataFrame with the data of the symbols and field
        fields between the specified dates with the fields specified

        Optional: 
            1. Load a serialized version of the data
            2. Saves a serialized version of the data
            3. If data is not available download the missing data

        Parameters
        ----------
            symbols_s: str or list of str
            start: datetime, with the initial date
            end: datetime, with the final date
            fields: str or list of str
            save: boolean, True if want to save the cache version
            useCache: boolean: True if want to load a cached version (if available)
            downloadMissing: boolean, True if want to download unavailable data
            ignoreMissing=True

        Returns
        -------
            data: pandas.DataFrame
        '''
        # 0. If ask for only one symbols or field convert it to a list of one item
        if type(symbols) == str:
            symbols = [symbols]
        if type(fields) == str:
            fields = [fields]

        # 1. Load the Data, if requested
        filename_id = "%s_%s_%s_%s" % ('_'.join(symbols), start.strftime('%m-%d-%Y'),
                                        end.strftime('%m-%d-%Y'), '-'.join(fields))
        if useCache == True:
            data = self.load(filename_id)
            if data is not None:
                # 1.1 Data was cached before and loaded => return
                return data

        # 1. Data was not cached before need to load the data from csv files
        
        # 1.1 Get the list of filenames from the FileManager
        files = self.file_manager.get_filenames(symbols, start, end, downloadMissing, ignoreMissing)

        # 1.2 We are going to create a pd.DataFrame from a dictionary of pd.Series
        data_dic = {}

        for f, symbol in zip(files, symbols):
            # Create DataFrame from the csv
            new_data = pd.read_csv(os.path.join(self.dir, f))
            # Change the index of the DataFrame to be the date
            new_data = new_data.set_index(pd.to_datetime(new_data['Date']))

            for field in fields:
                # For each field in fields, creates a new column
                colname = ''
                if len(symbols) == 1 and len(fields) == 1:
                    # Single symbol and Single field
                    colname = field
                elif len(symbols) > 1 and len(fields) == 1:
                    # Multiple symbols and single fields
                    colname = symbol
                elif len(symbols) == 1 and len(fields) > 1:
                    # Single symbol and Multiple fields
                    colname = field
                else:
                    # Multiple symbols and multiple fields
                    colname = "%s %s" % (symbol, field)
                # Adds the pd.Series to the dictionary
                data_dic[colname] = new_data[field]

        # 1.4. Create, slice and sort the data
        data = pd.DataFrame(data_dic)
        data = data.sort()[start:end] # Sort because Yahoo Finance data comes reverse

        # Save a cache version if requested
        if save == True:
            self.save(data, filename_id)
        return data
    
    def download(self, symbols, start, end):
        self.get_data(symbols, start, end, useCache=False, save=False)

Пример #4

Показать файл

Файл: DataAccess.py Проект: zxlstoner/PythonFinance

class DataAccess(object):

    path = ''
    '''
    Class to manage the Access to the Data
    
    Features
    --------
        1. Easy access to the data
        2. Serialization of data

    How to use 
    ----------
        Use one: Note: Option 2 overwrites option 1
        1. Set the enviroment variable: FINANCEPATH
        2. Set the Static Variable DataAccess.path

    '''
    def __init__(self):
        if self.path != '':
            self.set_dir(self.path)
        else:
            env_var = os.getenv("FINANCEPATH")
            if env_var is not None:
                self.set_dir(env_var)
            else:
                raise Exception('No path defined')

    def set_dir(self, dir_path):
        '''
        Initialize the FileManager
        Creates the directories
        Set global variables with absolute paths to the directories

        Parameters
        ----------
            dir_path: str
        '''
        self.dir = os.path.realpath(dir_path)  # Absolute Path
        self.cache_dir = os.path.join(self.dir, 'cached')
        self.file_manager = FileManager(dir_path)

        # Create paths if it doesn't exist
        if not (os.access(self.dir, os.F_OK)):
            os.makedirs(self.dir)
        if not (os.access(self.cache_dir, os.F_OK)):
            os.makedirs(self.cache_dir)

    def empty_dir(self, delete=True):
        '''
        Empty the directory of csv files. Do not delete the cache files/folder

        Parameters
        ----------
            delete:boolean, True if want to delete the folder too
        '''
        self.file_manager.empty_dir(delete)

    def empty_cache(self, delete=True):
        '''
        Empty the directory of cached files. Does not delete the csv files/folder

        Parameters
        ----------
            delete: boolean, True if want to delete the folder too
        '''
        list_files = os.listdir(self.cache_dir)  # Get the list of files
        for f in list_files:
            try:
                os.remove(os.path.join(self.cache_dir, f))
            except:
                pass

        if delete:
            os.rmdir(self.cache_dir)

    def empty_dirs(self, delete=True):
        '''
        Delete both cached and csv files.

        Parameters
        ----------
            delete:booelan, True if want to delete the folders too
        '''
        self.empty_cache(delete)
        self.empty_dir(delete)

    def save(self, data, name, extension='.data'):
        '''
        Saves a serialized (pickle) version of the data to the cache directory

        Parameters
        ----------
            data: object
            name: str, identifier of the data
            extension: str, extension of the filename
        '''
        h = hashlib.md5()
        h.update(name.encode('utf8'))
        filename = h.hexdigest() + extension

        f = os.path.join(self.cache_dir, filename)
        data.save(f)

    def load(self, name, extension='.data'):
        '''
        Checks for an existing file name and if exists returns the data saved

        Parameters
        ----------
            name: str, identifier of the data
            extension: str, extension of the filename

        Returns
        -------
            data: object (usually pandas.DataFrame), if file was available; None otherwise.
        '''
        h = hashlib.md5()
        h.update(name.encode('utf8'))
        filename = h.hexdigest() + extension

        f = os.path.join(self.cache_dir, filename)
        if os.access(f, os.F_OK):
            return pd.load(f)

    def get_data(self,
                 symbols,
                 start,
                 end,
                 fields='Adj Close',
                 save=True,
                 useCache=True,
                 downloadMissing=True,
                 ignoreMissing=True):
        '''
        Returns a pandas DataFrame with the data of the symbols and field
        fields between the specified dates with the fields specified

        Optional: 
            1. Load a serialized version of the data
            2. Saves a serialized version of the data
            3. If data is not available download the missing data

        Parameters
        ----------
            symbols_s: str or list of str
            start: datetime, with the initial date
            end: datetime, with the final date
            fields: str or list of str
            save: boolean, True if want to save the cache version
            useCache: boolean: True if want to load a cached version (if available)
            downloadMissing: boolean, True if want to download unavailable data
            ignoreMissing=True

        Returns
        -------
            data: pandas.DataFrame
        '''
        # 0. If ask for only one symbols or field convert it to a list of one item
        if type(symbols) == str:
            symbols = [symbols]
        if type(fields) == str:
            fields = [fields]

        # 1. Load the Data, if requested
        filename_id = "%s_%s_%s_%s" % (
            '_'.join(symbols), start.strftime('%m-%d-%Y'),
            end.strftime('%m-%d-%Y'), '-'.join(fields))
        if useCache == True:
            data = self.load(filename_id)
            if data is not None:
                # 1.1 Data was cached before and loaded => return
                return data

        # 1. Data was not cached before need to load the data from csv files

        # 1.1 Get the list of filenames from the FileManager
        files = self.file_manager.get_filenames(symbols, start, end,
                                                downloadMissing, ignoreMissing)

        # 1.2 We are going to create a pd.DataFrame from a dictionary of pd.Series
        data_dic = {}

        for f, symbol in zip(files, symbols):
            # Create DataFrame from the csv
            new_data = pd.read_csv(os.path.join(self.dir, f))
            # Change the index of the DataFrame to be the date
            new_data = new_data.set_index(pd.to_datetime(new_data['Date']))

            for field in fields:
                # For each field in fields, creates a new column
                colname = ''
                if len(symbols) == 1 and len(fields) == 1:
                    # Single symbol and Single field
                    colname = field
                elif len(symbols) > 1 and len(fields) == 1:
                    # Multiple symbols and single fields
                    colname = symbol
                elif len(symbols) == 1 and len(fields) > 1:
                    # Single symbol and Multiple fields
                    colname = field
                else:
                    # Multiple symbols and multiple fields
                    colname = "%s %s" % (symbol, field)
                # Adds the pd.Series to the dictionary
                data_dic[colname] = new_data[field]

        # 1.4. Create, slice and sort the data
        data = pd.DataFrame(data_dic)
        data = data.sort()[
            start:end]  # Sort because Yahoo Finance data comes reverse

        # Save a cache version if requested
        if save == True:
            self.save(data, filename_id)
        return data

    def download(self, symbols, start, end):
        self.get_data(symbols, start, end, useCache=False, save=False)

Пример #5

Показать файл

Файл: DataAccess.py Проект: euclude/PythonFinance

class DataAccess(object):
    '''
    Class to manage the Access to the Data
    Features:
        1. Easy access to the data
        2. Download data from Yahoo! Finance
        3. Serialization of the data
    '''
    def __init__(self, dir_path='./data/'):
        self.set_dir(dir_path)

    def set_dir(self, dir_path):
        '''
        Initialize the FileManager
        Creates the directories
        Set global variables with absolute paths to the directories
        '''
        self.dir = os.path.realpath(dir_path) # Absolute Path
        self.cache_dir = os.path.join(self.dir, 'cached')
        self.file_manager = FileManager(dir_path)

        # Create paths if it doesn't exist
        if not (os.access(self.dir, os.F_OK)):
            os.makedirs(self.dir)
        if not (os.access(self.cache_dir, os.F_OK)):
            os.makedirs(self.cache_dir)

    def empty_dir(self, delete=True):
        '''
        Empty the directory of .csv files. Do not delete the cache files/folder
        Parameters:
            delete=True - True if want to delete the folder too
        '''
        self.file_manager.empty_dir(delete)

    def empty_cache(self, delete=True):
        '''
        Empty the directory of cached files. Do not delete the .csv files/folder
        Parameters:
            delete=True - True if want to delete the folder too
        '''
        list_files = os.listdir(self.cache_dir) # Get the list of files
        for f in list_files:
            try:
                os.remove(os.path.join(self.cache_dir, f))
            except:
                pass

        if delete:
            os.rmdir(self.cache_dir)

    def empty_dirs(self, delete=True):
        '''
        Delete both cached files and .csv files.
        Parameters:
            delete=True - True if want to delete the folders too
        '''
        self.empty_cache(delete)
        self.empty_dir(delete)

    def get_data(self, symbol_s, start_date, end_date, field_s, save=True, useCache=True,
                                downloadMissing=True):
        '''
        1. Checks if the data requeted has been previously cached:
            1.1 if it was saved, then load the cache version
            1.2 if not loads the data from the .csv files
                1.2.1 saves the cache version (optional)

        Args:
            symbols_s - symbol (str) or list of symbols
            start_date - datetime with the initial date
            end_date - datetime with the final date
            field_s - field (str) or list of fields
            save=True - True if want to save the cache version
            useCache=True - True if want to load a cached version, if available
            downloadMissing=True - True if want to download unavailable data

        Returns:
            pandas.DataFrame - with the data of the symbols and fields requested
                                index:
        '''
        # 0. Generate and string which represents the data requested
        filename_large = "%s_%s_%s_%s" % ('_'.join(symbol_s), start_date.strftime('%m-%d-%Y'),
                            end_date.strftime('%m-%d-%Y'), '-'.join(field_s))
        # 0. Generates hash key of the string to reduce filename length
        h = hashlib.md5()
        h.update(filename_large.encode('utf8'))
        filename = h.hexdigest() + ".data"
        # 1. Load the Data
        data = self.load(filename)
        if data is not None and useCache == True:
            # 1.1 Data was cached so return it
            return data
        else:
            # 1.2 Data was not cached before need to load the data from csv files
            df = self.get_data_from_files(symbol_s, start_date, end_date, field_s, downloadMissing)
            if save == True:
                # 1.2.1 Saves the cache version
                self.save(df, filename)
            return df

    def load(self, name):
        '''
        Checks for an existing file name and if exists returns the data saved
        '''
        f = os.path.join(self.cache_dir, name)
        if os.access(f, os.F_OK):
            return pd.load(f)
        else:
            return None

    def save(self, data, name):
        '''
        Save a serialized (pickle) version of the data to the cache directory
        '''
        f = os.path.join(self.cache_dir, name)
        data.save(f)

    def get_data_from_files(self, symbol_s, start_date, end_date, field_s, downloadMissing=True):
        '''
        Gets the data directly from the csv files

        Args:
            symbols_s - symbol (str) or list of symbols
            start_date - datetime with the initial date
            end_date - datetime with the final date
            field_s - field (str) or list of fields
            downloadMissing=True - True if want to download missing information from the internet

        Returs:
            pandas.DataFrame - with the data of the symbols and fields requested
                                index: DatetimeIndex
        '''
        # 0. If ask for only one symbols or field convert it to a list of one item
        if type(symbol_s) == str:
            symbol_s = [symbol_s]
        if type(field_s) == str:
            field_s = [field_s]

        # 1. We are going to create a pd.DataFrame from a dictionary of pd.Series
        data_dic = {}
        # 1.1 Save the Indexes of the data

        # 2. Get the file names with the information needed from the FileManager
        files = self.file_manager.get_data(symbol_s, start_date, end_date, downloadMissing)
        if type(files) == str:
            files = [files]

        for f, symbol in zip(files, symbol_s):
            # for each file in files and symbol in symbol_s
            n_data = pd.read_csv(os.path.join(self.dir, f))
            # Needs to convert the string to datetime so the index of the Series is DatetimeIndex
            n_data = n_data.set_index(pd.to_datetime(n_data['Date']))

            for field in field_s:
                # For each field in fields
                colname = ''
                if len(symbol_s) == 1 and len(field_s) == 1:
                    # Single symbol and Single field
                    colname = field
                elif len(symbol_s) > 1 and len(field_s) == 1:
                    # Multiple symbols and single fields
                    colname = symbol
                elif len(symbol_s) == 1 and len(field_s) > 1:
                    # Single symbol and Multiple fields
                    colname = field
                else:
                    # Multiple symbols and multiple fields
                    colname = "%s %s" % (symbol, field)
                # Adds the pd.Series to the dictionary
                data_dic[colname] = n_data[field]

        # 3. Create and return a pd.DataFrame from the dictionary of pd.Series
        df = pd.DataFrame(data_dic)
        df = df.sort() # Sort because Yahoo Finance data comes reverse
        return df.ix[start_date:end_date] # Slice by date to only return what is important