예제 #1
0
 def getConnectionDetails(self):
     return constants.CONNECTION_PROPERTIES.format(
         get_property('db_config', 'driver'),
         get_property('db_config', 'server_name'),
         get_property('db_config', 'database_name'),
         get_property('db_config', 'username'),
         get_property('db_config', 'password'))
예제 #2
0
    def __initialize_playlist_history_database(self):
        user_id = get_property(constants.TABLES_DETAILS_DOMAIN,
                               constants.COVER_ID_COLUMN_NAME_KEY)
        song_id = get_property(constants.TABLES_DETAILS_DOMAIN,
                               constants.COVER_SONG_ID_COLUMN_NAME_KEY)
        play_date = get_property(
            constants.TABLES_DETAILS_DOMAIN,
            constants.PLAYLIST_HISTORY_DATE_COLUMN_NAME_KEY)

        table_name = get_property(constants.TABLES_DETAILS_DOMAIN,
                                  constants.PLAYLIST_HISTORY_TABLE_NAME_KEY)

        self.__execute_create_table_query([user_id, song_id, play_date],
                                          table_name)
예제 #3
0
    def __initialize_covers_database(self):
        cover_id = get_property(constants.TABLES_DETAILS_DOMAIN,
                                constants.COVER_ID_COLUMN_NAME_KEY)
        cover_song_id = get_property(constants.TABLES_DETAILS_DOMAIN,
                                     constants.COVER_SONG_ID_COLUMN_NAME_KEY)
        artist_name = get_property(constants.TABLES_DETAILS_DOMAIN,
                                   constants.COVER_ARTIST_NAME_COLUMN_NAME_KEY)
        title = get_property(constants.TABLES_DETAILS_DOMAIN,
                             constants.COVER_TITLE_NAME_COLUMN_NAME_KEY)

        table_name = get_property(constants.TABLES_DETAILS_DOMAIN,
                                  constants.COVER_TABLE_NAME_KEY)

        self.__execute_create_table_query(
            [cover_id, cover_song_id, artist_name, title], table_name)
예제 #4
0
 def find_most_popular_artists(self, count):
     artist_column_name = get_property(
         constants.TABLES_DETAILS_DOMAIN,
         constants.COVER_ARTIST_NAME_COLUMN_NAME_KEY),
     result = self.__database_manager.select(
         self.__find_most_popular(count, artist_column_name))
     return result
예제 #5
0
class FileLoader:
    __BATCH_SIZE = int(get_property(constants.APPLICATION_PROPERTIES_DOMAIN, constants.BATCH_SIZE_KEY))

    def __init__(self, encoding_format, column_separator):
        self.__database_manager = DatabaseManager()
        self.__item_processed = 0
        self.__inMemoryRecords = []
        self.__encoding_format = encoding_format
        self.__column_separator = column_separator

    @time_counter
    def to_database_from_file(self, file_path, table, columns):

        print("Zaczynam wczytywanie...")
        with open(file_path, encoding=self.__encoding_format) as fileContent:
            for line in fileContent:
                if self.__if_use_batch() and self.__if_records_in_memory_equals_to_batch_size():
                    self.__insert_items_to_database(table, columns)
                    self.__clear_precessed_items()
                self.__inMemoryRecords.append(self.__getColumnsPerRecord(line))
                self.__item_processed = self.__item_processed + 1
        self.__insert_items_to_database(table, columns)
        self.__clear_precessed_items()

        print("Koncze wczytywanie...")
        pass

    def __get_prepared_insert_statement(self, table, columns):

        values_gaps = ""
        for i in range(columns):
            if i == columns - 1:
                values_gaps += "?"
            else:
                values_gaps += "?,"

        statement = constants.INSERT_INTO.format(table, values_gaps)
        return statement

    def __if_use_batch(self):
        return bool(get_property(constants.APPLICATION_PROPERTIES_DOMAIN, constants.USE_BATCH))

    def __if_records_in_memory_equals_to_batch_size(self):
        return self.__item_processed == FileLoader.__BATCH_SIZE

    def __insert_items_to_database(self, table, columns):
        prepared_insert_statement = self.__get_prepared_insert_statement(table, columns)
        self.__database_manager.executeMany(prepared_insert_statement, self.__inMemoryRecords)
        self.__database_manager.commit()

    def __clear_precessed_items(self):
        self.__inMemoryRecords = []
        self.__item_processed = 0
        pass

    def __getColumnsPerRecord(self, textLine):
        # Remove \n from the end of line
        line_without_end_line_sign = textLine.rstrip('\n')
        return line_without_end_line_sign.split(self.__column_separator)
예제 #6
0
    def __execute_create_table_query(self, column_names, table_name):

        columns = []
        for column_name in column_names:
            columns.append(create_standard_varchar_column(column_name))

        merged_columns = create_columns(columns)
        database_name = get_property(constants.DATABASE_CONFIG_DOMAIN,
                                     constants.DATABASE_SERVER_NAME)
        database_schema = get_property(constants.DATABASE_CONFIG_DOMAIN,
                                       constants.DATABASE_SCHEMA)
        table_origin = '{}.{}'.format(database_name, database_schema)

        query = constants.CREATE_TABLE.format(table_origin, table_name,
                                              merged_columns)
        print(query)
        self.__crsr.execute(query)
예제 #7
0
파일: main.py 프로젝트: xsiir/etlproject
def main():
    """
        Pomysł na wykonanie projektu:
            1) Niech istnieje klasa (FilelLoader) będąca odpowiedzialna za różne opracje na plikach.
                - u nas będzie to wczytanie danych z pliku .txt i zapis tychże danych do bazy.
            2) Niech cała aplikacja korzysta tylko z jednego połączenia do bazy danych (DatabaseManager).
                Niech klasa to obslugujaca bedzie singletonem. Kazda operacja na bazie danych powinna przechodzic przez ta klase.
            3) Wszystkie bardziej zlozone polecenia SQL tudziez inne bloki tekstowe niech beda sie znajdowac w jednym miejscu.
                - plik ten nazwalem 'constants'
            4) Wszystkie dane konfiguracyjne niech beda trzymane w pliku properties a dostep do nich bedzie mozliwy poprzez
                jedno narzedzie (tu: metoda) dla calej aplikacji


        Wczytywanie danych z pliku do bazy danych trwa ~11:47
    """

    print("Prace przygotowal: Sienkiewicz Maciej")
    encoding_format = get_property(constants.FILE_CONFIG_DOMAIN,
                                   constants.ENCODING_FORMAT_KEY)
    field_separator = get_property(constants.FILE_CONFIG_DOMAIN,
                                   constants.FIELD_SEPARATOR_KEY)
    file_loader = FileLoader(encoding_format, field_separator)

    print("Zaladujmy pierwszy plik do bazy...")
    first_file_path = get_property(constants.FILE_PATHS_DOMAIN,
                                   constants.UNIQUE_TRACKS_PATH)
    file_loader.to_database_from_file(first_file_path, "model.dbo.Covers", 4)
    print("Ladowanie skonczone...")

    print("Zaladujmy drugi plik do bazy...")
    second_file_path = get_property(constants.FILE_PATHS_DOMAIN,
                                    constants.PLAYLIST_HISTORY_PATH)
    file_loader.to_database_from_file(second_file_path,
                                      "model.dbo.PlaylistHistory", 3)
    print("Ladowanie skonczone...")

    song_dao = SongDAO()
    for row in song_dao.find_most_popular_artists('5'):
        print('Utworty artysty: {} zostaly odtworzone {} razy'.format(
            row[0], row[1]))

    for row in song_dao.find_most_popular_tiles('5'):
        print('Utwor {} zostal odtworzone {} razy'.format(row[0], row[1]))
예제 #8
0
    def __find_most_popular(self, count, searched_column_name):
        database_name = get_property(constants.DATABASE_CONFIG_DOMAIN,
                                     constants.DATABASE_SERVER_NAME)
        database_schema = get_property(constants.DATABASE_CONFIG_DOMAIN,
                                       constants.DATABASE_SCHEMA)
        table_origin = '{}.{}'.format(database_name, database_schema)

        query = constants.SELECT_MOST_POPULAR_BAND.format(
            count, table_origin, searched_column_name[0],
            get_property(constants.TABLES_DETAILS_DOMAIN,
                         constants.COVER_TABLE_NAME_KEY),
            get_property(constants.TABLES_DETAILS_DOMAIN,
                         constants.PLAYLIST_HISTORY_TABLE_NAME_KEY),
            get_property(constants.TABLES_DETAILS_DOMAIN,
                         constants.COVER_SONG_ID_COLUMN_NAME_KEY),
            get_property(constants.TABLES_DETAILS_DOMAIN,
                         constants.PLAYLIST_HISTORY_SONG_ID_COLUMN_NAME_KEY))
        return query
예제 #9
0
 def __if_use_batch(self):
     return bool(get_property(constants.APPLICATION_PROPERTIES_DOMAIN, constants.USE_BATCH))
예제 #10
0
 def find_most_popular_tiles(self, count):
     title = get_property(constants.TABLES_DETAILS_DOMAIN,
                          constants.COVER_TITLE_NAME_COLUMN_NAME_KEY),
     result = self.__database_manager.select(
         self.__find_most_popular(count, title))
     return result