示例#1
0
    def test_convert_dictionary_to_mysql_table_function(self):
        from fundamentals.mysql import writequery
        sqlQuery = "DROP TABLE IF EXISTS `testing_table`; CREATE TABLE IF NOT EXISTS `testing_table` (`id` INT NOT NULL,`uniquekey1` varchar(45) NOT NULL default 'ha',`uniqueKey2` varchar(45) NOT NULL default 'ha', PRIMARY KEY (`id`))"
        writequery(log=log,
                   sqlQuery=sqlQuery,
                   dbConn=dbConn,
                   Force=False,
                   manyValueList=False)

        dictionary = {
            "a newKey": "cool",
            "and another": "super cool",
            "uniquekey1": "cheese",
            "uniqueKey2": "burgers"
        }
        from fundamentals.mysql import convert_dictionary_to_mysql_table
        message = convert_dictionary_to_mysql_table(
            dbConn=dbConn,
            log=log,
            dictionary=dictionary,
            dbTableName="testing_table",
            uniqueKeyList=["uniquekey1", "uniqueKey2"],
            createHelperTables=False,
            dateModified=False,
            returnInsertOnly=False)
示例#2
0
    def test_return_inserts_with_datetime_pre_compiled(self):
        from fundamentals.mysql import writequery
        sqlQuery = "CREATE TABLE IF NOT EXISTS `testing_table` (`id` INT NOT NULL, PRIMARY KEY (`id`))"
        writequery(log=log,
                   sqlQuery=sqlQuery,
                   dbConn=dbConn,
                   Force=False,
                   manyValueList=False)

        dictionary = {
            "a newKey": "cool",
            "and another": "super cool",
            "uniquekey1": "cheese",
            "uniqueKey2": "burgers"
        }
        from fundamentals.mysql import convert_dictionary_to_mysql_table
        message = convert_dictionary_to_mysql_table(
            dbConn=dbConn,
            log=log,
            dictionary=dictionary,
            dbTableName="testing_table",
            uniqueKeyList=["uniquekey1", "uniqueKey2"],
            createHelperTables=False,
            dateModified=False,
            returnInsertOnly=True,
            reDatetime=reDatetime)
def list_of_dictionaries_to_mysql_inserts(
        log,
        datalist,
        tableName):
    """Convert a python list of dictionaries to pretty csv output

    **Key Arguments:**
        - ``log`` -- logger
        - ``datalist`` -- a list of dictionaries
        - ``tableName`` -- the name of the table to create the insert statements for

    **Return:**
        - ``output`` -- the mysql insert statements (as a string)

    **Usage:**

        .. code-block:: python 

            from fundamentals.files import list_of_dictionaries_to_mysql_inserts
            mysqlInserts = list_of_dictionaries_to_mysql_inserts(
                log=log,
                datalist=dataList,
                tableName="my_new_table"
            )
            print mysqlInserts

        this output the following:

        .. code-block:: plain

            INSERT INTO `testing_table` (a_newKey,and_another,dateCreated,uniqueKey2,uniquekey1) VALUES ("cool" ,"super cool" ,"2016-09-14T13:17:26" ,"burgers" ,"cheese")  ON DUPLICATE KEY UPDATE  a_newKey="cool", and_another="super cool", dateCreated="2016-09-14T13:17:26", uniqueKey2="burgers", uniquekey1="cheese" ;
            ...
            ...
    """
    log.debug('starting the ``list_of_dictionaries_to_mysql_inserts`` function')

    if not len(datalist):
        return "NO MATCH"

    inserts = []

    for d in datalist:
        insertCommand = convert_dictionary_to_mysql_table(
            log=log,
            dictionary=d,
            dbTableName="testing_table",
            uniqueKeyList=[],
            dateModified=False,
            returnInsertOnly=True,
            replace=True,
            batchInserts=False
        )
        inserts.append(insertCommand)

    output = ";\n".join(inserts) + ";"

    log.debug('completed the ``list_of_dictionaries_to_mysql_inserts`` function')
    return output
示例#4
0
def list_of_dictionaries_to_mysql_inserts(log, datalist, tableName):
    """Convert a python list of dictionaries to pretty csv output

    **Key Arguments:**
        - ``log`` -- logger
        - ``datalist`` -- a list of dictionaries
        - ``tableName`` -- the name of the table to create the insert statements for

    **Return:**
        - ``output`` -- the mysql insert statements (as a string)

    **Usage:**

        .. code-block:: python 

            from fundamentals.files import list_of_dictionaries_to_mysql_inserts
            mysqlInserts = list_of_dictionaries_to_mysql_inserts(
                log=log,
                datalist=dataList,
                tableName="my_new_table"
            )
            print mysqlInserts

        this output the following:

        .. code-block:: plain

            INSERT INTO `testing_table` (a_newKey,and_another,dateCreated,uniqueKey2,uniquekey1) VALUES ("cool" ,"super cool" ,"2016-09-14T13:17:26" ,"burgers" ,"cheese")  ON DUPLICATE KEY UPDATE  a_newKey="cool", and_another="super cool", dateCreated="2016-09-14T13:17:26", uniqueKey2="burgers", uniquekey1="cheese" ;
            ...
            ...
    """
    log.info('starting the ``list_of_dictionaries_to_mysql_inserts`` function')

    if not len(datalist):
        return "NO MATCH"

    inserts = []

    for d in datalist:
        insertCommand = convert_dictionary_to_mysql_table(
            log=log,
            dictionary=d,
            dbTableName="testing_table",
            uniqueKeyList=[],
            dateModified=False,
            returnInsertOnly=True,
            replace=True,
            batchInserts=False)
        inserts.append(insertCommand)

    output = ";\n".join(inserts) + ";"

    log.info(
        'completed the ``list_of_dictionaries_to_mysql_inserts`` function')
    return output
示例#5
0
 def test_convert_dictionary_to_mysql_table_function(self):
     dictionary = {"a newKey": "cool", "and another": "super cool",
                   "uniquekey1": "cheese", "uniqueKey2": "burgers"}
     from fundamentals.mysql import convert_dictionary_to_mysql_table
     message = convert_dictionary_to_mysql_table(
         dbConn=dbConn,
         log=log,
         dictionary=dictionary,
         dbTableName="testing_table",
         uniqueKeyList=["uniquekey1", "uniqueKey2"],
         createHelperTables=False,
         dateModified=False,
         returnInsertOnly=False
     )
 def test_convert_dictionary_to_mysql_table_function(self):
     dictionary = {"a newKey": "cool", "and another": "super cool",
                   "uniquekey1": "cheese", "uniqueKey2": "burgers"}
     from fundamentals.mysql import convert_dictionary_to_mysql_table
     message = convert_dictionary_to_mysql_table(
         dbConn=dbConn,
         log=log,
         dictionary=dictionary,
         dbTableName="testing_table",
         uniqueKeyList=["uniquekey1", "uniqueKey2"],
         createHelperTables=False,
         dateModified=False,
         returnInsertOnly=False
     )
示例#7
0
    def _list_of_dictionaries_to_mysql_inserts(self,
                                               tableName,
                                               createStatement=None):
        """Convert a python list of dictionaries to pretty csv output

        **Key Arguments:**
            - ``tableName`` -- the name of the table to create the insert statements for
            - ``createStatement`` -- add this create statement to the top of the file. Will only be executed if no table of that name exists in database. Default *None*

        **Return:**
            - ``output`` -- the mysql insert statements (as a string)
        """
        self.log.debug(
            'completed the ````_list_of_dictionaries_to_mysql_inserts`` function'
        )

        if not len(self.listOfDictionaries):
            return "NO MATCH"

        dataCopy = copy.deepcopy(self.listOfDictionaries)

        if createStatement:
            output = createStatement + "\n"
        else:
            output = ""

        inserts = []

        inserts = []
        inserts[:] = [
            convert_dictionary_to_mysql_table(log=self.log,
                                              dictionary=d,
                                              dbTableName=tableName,
                                              uniqueKeyList=[],
                                              dateModified=False,
                                              returnInsertOnly=True,
                                              replace=True,
                                              batchInserts=False,
                                              reDatetime=self.reDatetime)
            for d in dataCopy
        ]
        output += ";\n".join(inserts) + ";"

        self.log.debug(
            'completed the ``_list_of_dictionaries_to_mysql_inserts`` function'
        )
        return output
示例#8
0
    def test_return_inserts_non_batch(self):
        dictionary = {"a newKey": "cool", "and another": "super cool",
                      "uniquekey1": "cheese", "uniqueKey2": "burgers"}
        from fundamentals.mysql import convert_dictionary_to_mysql_table
        inserts = convert_dictionary_to_mysql_table(
            dbConn=dbConn,
            log=log,
            dictionary=dictionary,
            dbTableName="testing_table",
            uniqueKeyList=["uniquekey1", "uniqueKey2"],
            dateModified=False,
            returnInsertOnly=True,
            replace=True,
            batchInserts=False
        )

        print inserts
    def test_return_inserts_non_batch(self):
        dictionary = {"a newKey": "cool", "and another": "super cool",
                      "uniquekey1": "cheese", "uniqueKey2": "burgers"}
        from fundamentals.mysql import convert_dictionary_to_mysql_table
        inserts = convert_dictionary_to_mysql_table(
            dbConn=dbConn,
            log=log,
            dictionary=dictionary,
            dbTableName="testing_table",
            uniqueKeyList=["uniquekey1", "uniqueKey2"],
            dateModified=False,
            returnInsertOnly=True,
            replace=True,
            batchInserts=False
        )

        print inserts
    def _list_of_dictionaries_to_mysql_inserts(
            self,
            tableName,
            createStatement=None):
        """Convert a python list of dictionaries to pretty csv output

        **Key Arguments:**
            - ``tableName`` -- the name of the table to create the insert statements for
            - ``createStatement`` -- add this create statement to the top of the file. Will only be executed if no table of that name exists in database. Default *None*

        **Return:**
            - ``output`` -- the mysql insert statements (as a string)
        """
        self.log.debug(
            'completed the ````_list_of_dictionaries_to_mysql_inserts`` function')

        if not len(self.listOfDictionaries):
            return "NO MATCH"

        dataCopy = copy.deepcopy(self.listOfDictionaries)

        if createStatement:
            output = createStatement + "\n"
        else:
            output = ""

        inserts = []

        inserts = []
        inserts[:] = [convert_dictionary_to_mysql_table(log=self.log, dictionary=d, dbTableName=tableName, uniqueKeyList=[
        ], dateModified=False, returnInsertOnly=True, replace=True, batchInserts=False, reDatetime=self.reDatetime) for d in dataCopy]
        output += ";\n".join(inserts) + ";"

        self.log.debug(
            'completed the ``_list_of_dictionaries_to_mysql_inserts`` function')
        return output
    def add_yaml_file_content_to_database(self, filepath, deleteFile=False):
        """*given a file to a yaml file, add yaml file content to database*

        **Key Arguments:**
            - ``filepath`` -- the path to the yaml file
            - ``deleteFile`` -- delete the yaml file when its content has been added to the database. Default *False*

        **Return:**
            - None

        **Usage:**

            To parse and import the contents of a single yaml file into the database, use the following:

            .. code-block:: python 

                from fundamentals.mysql import yaml_to_database
                # PARSE YAML FILE CONTENTS AND ADD TO DATABASE
                yaml2db = yaml_to_database(
                    log=log,
                    settings=settings,
                    dbConn=dbConn
                ) 
                yaml2db.add_yaml_file_content_to_database(
                    filepath=${1:"/path/to/file.yaml"},
                    deleteFile=True
                )
        """
        self.log.debug(
            'completed the ````add_yaml_file_content_to_database`` method')

        import codecs
        try:
            self.log.debug("attempting to open the file %s" % (filepath, ))
            readFile = codecs.open(filepath, encoding='utf-8', mode='r')
            thisData = readFile.read()
            readFile.close()
        except IOError as e:
            message = 'could not open the file %s' % (filepath, )
            self.log.critical(message)
            raise IOError(message)
        readFile.close()

        matchObject = re.finditer(
            r'(^|\n)(?P<key>[^\:]*)\:\s(?P<value>.*?)(\n|$)',
            thisData,
            flags=re.M | re.S  # re.S
        )

        yamlContent = {}
        for match in matchObject:
            if match.group("value")[0] == '"' and match.group(
                    "value")[-1] == '"':
                v = match.group("value")[1:-1]
            elif match.group("value")[0] == "'" and match.group(
                    "value")[-1] == "'":
                v = match.group("value")[1:-1]
            else:
                v = match.group("value")
            yamlContent[match.group("key")] = v

        if "table" not in yamlContent:
            self.log.warning(
                'A table value is need in the yaml content to indicate which database table to add the content to: %(filepath)s'
                % locals())
            return None

        # NOTE THERE MAY BE MORE THAN ONE DATABASE TABLE
        dbTablesTmp = yamlContent["table"].split(",")
        del yamlContent["table"]
        dbTables = []
        dbTables[:] = [d.strip() for d in dbTablesTmp]

        # UNSHORTEN URL
        try:
            r = requests.head(yamlContent["url"], allow_redirects=True)
            yamlContent["url"] = r.url
        except:
            pass

        yamlContent["original_yaml_path"] = filepath

        if "url" in yamlContent:
            uniqueKeyList = ["url"]
        else:
            uniqueKeyList = []

        for t in dbTables:
            convert_dictionary_to_mysql_table(dbConn=self.dbConn,
                                              log=self.log,
                                              dictionary=yamlContent,
                                              dbTableName=t,
                                              uniqueKeyList=uniqueKeyList,
                                              dateModified=True,
                                              returnInsertOnly=False,
                                              replace=True)
        if deleteFile:
            os.remove(filepath)

        self.log.debug(
            'completed the ``add_yaml_file_content_to_database`` method')
        return None
示例#12
0
def insert_list_of_dictionaries_into_database_tables(dbConn,
                                                     log,
                                                     dictList,
                                                     dbTableName,
                                                     uniqueKeyList=[],
                                                     dateModified=False,
                                                     dateCreated=True,
                                                     batchSize=2500,
                                                     replace=False,
                                                     dbSettings=False):
    """insert list of dictionaries into database tables

    **Key Arguments:**
        - ``dbConn`` -- mysql database connection
        - ``log`` -- logger
        - ``dictList`` -- list of python dictionaries to add to the database table
        - ``dbTableName`` -- name of the database table
        - ``uniqueKeyList`` -- a list of column names to append as a unique constraint on the database
        - ``dateModified`` -- add the modification date as a column in the database
        - ``dateCreated`` -- add the created date as a column in the database
        - ``batchSize`` -- batch the insert commands into *batchSize* batches
        - ``replace`` -- repalce row if a duplicate is found
        - ``dbSettings`` -- pass in the database settings so multiprocessing can establish one connection per process (might not be faster)

    **Return:**
        - None

    **Usage:**

        .. code-block:: python

            from fundamentals.mysql import insert_list_of_dictionaries_into_database_tables
            insert_list_of_dictionaries_into_database_tables(
                dbConn=dbConn,
                log=log,
                dictList=dictList,
                dbTableName="test_insert_many",
                uniqueKeyList=["col1", "col3"],
                dateModified=False,
                batchSize=2500
            )
    """

    log.debug(
        'starting the ````insert_list_of_dictionaries_into_database_tables`` function'
    )

    global count
    global totalCount
    global globalDbConn
    global sharedList

    reDate = re.compile('^[0-9]{4}-[0-9]{2}-[0-9]{2}T')

    if dbSettings:
        globalDbConn = dbSettings
    else:
        globalDbConn = dbConn

    if len(dictList) == 0:
        log.warning('the dictionary to be added to the database is empty' %
                    locals())
        return None

    if len(dictList):
        # FIND BUG IN MYSQL QUERY BY UNCOMMENTING
        # tot = len(dictList)
        # for index, d in enumerate(dictList):
        #     if index > 1:
        #         # Cursor up one line and clear line
        #         sys.stdout.write("\x1b[1A\x1b[2K")

        #     percent = (float(index) / float(tot)) * 100.
        #     print('%(index)s/%(tot)s (%(percent)1.1f%% done)' % locals())

        #     convert_dictionary_to_mysql_table(
        #         dbConn=dbConn,
        #         log=log,
        #         dictionary=d,
        #         dbTableName=dbTableName,
        #         uniqueKeyList=uniqueKeyList,
        #         dateModified=dateModified,
        #         reDatetime=reDate,
        #         replace=replace,
        #         dateCreated=dateCreated)
        # sys.exit(0)

        convert_dictionary_to_mysql_table(dbConn=dbConn,
                                          log=log,
                                          dictionary=dictList[0],
                                          dbTableName=dbTableName,
                                          uniqueKeyList=uniqueKeyList,
                                          dateModified=dateModified,
                                          reDatetime=reDate,
                                          replace=replace,
                                          dateCreated=dateCreated)
        dictList = dictList[1:]

    dbConn.autocommit(False)

    if len(dictList):

        total = len(dictList)
        batches = int(old_div(total, batchSize))

        start = 0
        end = 0
        sharedList = []
        for i in range(batches + 1):
            end = end + batchSize
            start = i * batchSize
            thisBatch = dictList[start:end]
            sharedList.append((thisBatch, end))

        totalCount = total + 1
        ltotalCount = totalCount

        print("Starting to insert %(ltotalCount)s rows into %(dbTableName)s" %
              locals())

        if dbSettings == False:
            fmultiprocess(log=log,
                          function=_insert_single_batch_into_database,
                          inputArray=list(range(len(sharedList))),
                          dbTableName=dbTableName,
                          uniqueKeyList=uniqueKeyList,
                          dateModified=dateModified,
                          replace=replace,
                          batchSize=batchSize,
                          reDatetime=reDate,
                          dateCreated=dateCreated)

        else:
            fmultiprocess(log=log,
                          function=_add_dictlist_to_database_via_load_in_file,
                          inputArray=list(range(len(sharedList))),
                          dbTablename=dbTableName,
                          dbSettings=dbSettings,
                          dateModified=dateModified)

        sys.stdout.write("\x1b[1A\x1b[2K")
        print(
            "%(ltotalCount)s / %(ltotalCount)s rows inserted into %(dbTableName)s"
            % locals())

    log.debug(
        'completed the ``insert_list_of_dictionaries_into_database_tables`` function'
    )
    return None
示例#13
0
def _insert_single_batch_into_database(batchIndex, log, dbTableName,
                                       uniqueKeyList, dateModified, replace,
                                       batchSize, reDatetime, dateCreated):
    """*summary of function*

    **Key Arguments:**
        - ``batchIndex`` -- the index of the batch to insert
        - ``dbConn`` -- mysql database connection
        - ``log`` -- logger

    **Return:**
        - None

    **Usage:**
        .. todo::

            add usage info
            create a sublime snippet for usage

        .. code-block:: python 

            usage code            
    """
    log.debug('starting the ``_insert_single_batch_into_database`` function')

    global totalCount
    global globalDbConn
    global sharedList

    batch = sharedList[batchIndex]

    reDate = reDatetime

    if isinstance(globalDbConn, dict):
        # SETUP ALL DATABASE CONNECTIONS

        dbConn = database(log=log, dbSettings=globalDbConn,
                          autocommit=False).connect()
    else:
        dbConn = globalDbConn

    count = batch[1]
    if count > totalCount:
        count = totalCount
    ltotalCount = totalCount

    inserted = False
    while inserted == False:

        if not replace:
            insertVerb = "INSERT IGNORE"
        else:
            insertVerb = "INSERT IGNORE"

        uniKeys = set().union(*(list(d.keys()) for d in batch[0]))
        tmp = []
        tmp[:] = [m.replace(" ", "_").replace("-", "_") for m in uniKeys]
        uniKeys = tmp

        myKeys = '`,`'.join(uniKeys)
        vals = [
            tuple([None if d[k] in ["None", None] else d[k] for k in uniKeys])
            for d in batch[0]
        ]
        valueString = ("%s, " * len(vals[0]))[:-2]
        insertCommand = insertVerb + """ INTO `""" + dbTableName + \
            """` (`""" + myKeys + """`, dateCreated) VALUES (""" + \
            valueString + """, NOW())"""

        if not dateCreated:
            insertCommand = insertCommand.replace(", dateCreated)",
                                                  ")").replace(
                                                      ", NOW())", ")")

        dup = ""
        if replace:
            dup = " ON DUPLICATE KEY UPDATE "
            for k in uniKeys:
                dup = """%(dup)s %(k)s=values(%(k)s),""" % locals()
            dup = """%(dup)s updated=1, dateLastModified=NOW()""" % locals()

        insertCommand = insertCommand + dup

        insertCommand = insertCommand.replace('\\""', '\\" "')
        insertCommand = insertCommand.replace('""', "null")
        insertCommand = insertCommand.replace('"None"', 'null')

        message = ""
        # log.debug('adding new data to the %s table; query: %s' %
        # (dbTableName, addValue))
        try:
            message = writequery(log=log,
                                 sqlQuery=insertCommand,
                                 dbConn=dbConn,
                                 Force=True,
                                 manyValueList=vals)
        except:
            theseInserts = []
            for aDict in batch[0]:
                insertCommand, valueTuple = convert_dictionary_to_mysql_table(
                    dbConn=dbConn,
                    log=log,
                    dictionary=aDict,
                    dbTableName=dbTableName,
                    uniqueKeyList=uniqueKeyList,
                    dateModified=dateModified,
                    returnInsertOnly=True,
                    replace=replace,
                    reDatetime=reDate,
                    skipChecks=True)
                theseInserts.append(valueTuple)

            message = ""
            # log.debug('adding new data to the %s table; query: %s' %
            # (dbTableName, addValue))
            message = writequery(log=log,
                                 sqlQuery=insertCommand,
                                 dbConn=dbConn,
                                 Force=True,
                                 manyValueList=theseInserts)

        if message == "unknown column":
            for aDict in batch:
                convert_dictionary_to_mysql_table(dbConn=dbConn,
                                                  log=log,
                                                  dictionary=aDict,
                                                  dbTableName=dbTableName,
                                                  uniqueKeyList=uniqueKeyList,
                                                  dateModified=dateModified,
                                                  reDatetime=reDate,
                                                  replace=replace)
        else:
            inserted = True

        dbConn.commit()

    log.debug('completed the ``_insert_single_batch_into_database`` function')
    return "None"
示例#14
0
class yaml_to_database():
    """
    *Take key-values from yaml files including a tablename(s) and add them to a mysql database table*

    **Key Arguments:**
        - ``log`` -- logger
        - ``settings`` -- the settings dictionary
        - ``pathToInputDir`` -- path to the directory containing the yaml files that will be added to the database table(s). Default *False*
        - ``dbConn`` -- connection to database to add the content to
        - ``deleteFiles`` - - delete the yamls files once their content has been added to the database. Default * False*

    **Usage:**

        To setup your logger, settings and database connections, please use the ``fundamentals`` package (`see tutorial here <http://fundamentals.readthedocs.io/en/latest/#tutorial>`_). 

        To initiate a ``yaml2db`` object, use the following:

        .. code-block:: python 

            from fundamentals.mysql import yaml_to_database
            yaml2db = yaml_to_database(
                log=log,
                settings=settings,
                dbConn=dbConn,
                pathToInputDir="/path/to/yaml/directory",
                deleteFiles=False
            ) 

        And here's an example of the content in a yaml file that this ``yaml2db`` object can parse:

        .. code-block:: yaml

            title: Why you should do most of your text editing in : Sublime Text | Sublime Text Tips
            url: http://sublimetexttips.com/why-you-should-do-most-of-your-text-editing-in-sublime-text/?utm_source=drip&utm_medium=email&utm_campaign=editor-proliferation
            kind: webpage
            subtype: article
            table: web_articles,podcasts 
    """

    # Initialisation

    def __init__(self,
                 log,
                 dbConn,
                 pathToInputDir=False,
                 settings=False,
                 deleteFiles=False):
        self.log = log
        log.debug("instansiating a new 'yaml_to_database' object")
        self.settings = settings
        self.pathToInputDir = pathToInputDir
        self.dbConn = dbConn
        self.deleteFiles = deleteFiles
        # xt-self-arg-tmpx

        return None

    def ingest(self):
        """
        *ingest the contents of the directory of yaml files into a database*

        **Return:**
            - None

        **Usage:**

            To import an entire directory of yaml files into a database, use the following:

        .. code-block:: python 

            from fundamentals.mysql import yaml_to_database
            yaml2db = yaml_to_database(
                log=log,
                settings=settings,
                dbConn=dbConn,
                pathToInputDir="/path/to/yaml/directory",
                deleteFiles=False
            ) 
            yaml2db.ingest() 
        """
        self.log.info('starting the ``ingest`` method')

        for d in os.listdir(self.pathToInputDir):
            if os.path.isfile(os.path.join(self.pathToInputDir,
                                           d)) and "yaml" in d.lower():
                self.add_yaml_file_content_to_database(
                    filepath=os.path.join(self.pathToInputDir, d),
                    deleteFile=self.deleteFiles)

        self.log.info('completed the ``ingest`` method')
        return None

    def add_yaml_file_content_to_database(self, filepath, deleteFile=False):
        """*given a file to a yaml file, add yaml file content to database*

        **Key Arguments:**
            - ``filepath`` -- the path to the yaml file
            - ``deleteFile`` -- delete the yaml file when its content has been added to the database. Default *False*

        **Return:**
            - None

        **Usage:**

            To parse and import the contents of a single yaml file into the database, use the following:

            .. code-block:: python 

                from fundamentals.mysql import yaml_to_database
                # PARSE YAML FILE CONTENTS AND ADD TO DATABASE
                yaml2db = yaml_to_database(
                    log=log,
                    settings=settings,
                    dbConn=dbConn
                ) 
                yaml2db.add_yaml_file_content_to_database(
                    filepath=${1:"/path/to/file.yaml"},
                    deleteFile=True
                )
        """
        self.log.info(
            'starting the ``add_yaml_file_content_to_database`` method')

        import codecs
        try:
            self.log.debug("attempting to open the file %s" % (filepath, ))
            readFile = codecs.open(filepath, encoding='utf-8', mode='r')
            thisData = readFile.read()
            readFile.close()
        except IOError, e:
            message = 'could not open the file %s' % (filepath, )
            self.log.critical(message)
            raise IOError(message)
        readFile.close()

        matchObject = re.finditer(
            r'(^|\n)(?P<key>[^\:]*)\:\s(?P<value>.*?)(\n|$)',
            thisData,
            flags=re.M | re.S  # re.S
        )

        yamlContent = {}
        for match in matchObject:
            if match.group("value")[0] == '"' and match.group(
                    "value")[-1] == '"':
                v = match.group("value")[1:-1]
            elif match.group("value")[0] == "'" and match.group(
                    "value")[-1] == "'":
                v = match.group("value")[1:-1]
            else:
                v = match.group("value")
            yamlContent[match.group("key")] = v

        if "table" not in yamlContent:
            self.log.warning(
                'A table value is need in the yaml content to indicate which database table to add the content to: %(filepath)s'
                % locals())
            return None

        # NOTE THERE MAY BE MORE THAN ONE DATABASE TABLE
        dbTablesTmp = yamlContent["table"].split(",")
        del yamlContent["table"]
        dbTables = []
        dbTables[:] = [d.strip() for d in dbTablesTmp]

        # UNSHORTEN URL
        try:
            r = requests.head(yamlContent["url"], allow_redirects=True)
            yamlContent["url"] = r.url
        except:
            pass

        yamlContent["original_yaml_path"] = filepath

        if "url" in yamlContent:
            uniqueKeyList = ["url"]
        else:
            uniqueKeyList = []

        for t in dbTables:
            convert_dictionary_to_mysql_table(dbConn=self.dbConn,
                                              log=self.log,
                                              dictionary=yamlContent,
                                              dbTableName=t,
                                              uniqueKeyList=uniqueKeyList,
                                              dateModified=True,
                                              returnInsertOnly=False,
                                              replace=True)
        if deleteFile:
            os.remove(filepath)

        self.log.info(
            'completed the ``add_yaml_file_content_to_database`` method')
        return None
示例#15
0
def insert_list_of_dictionaries_into_database_tables(dbConn,
                                                     log,
                                                     dictList,
                                                     dbTableName,
                                                     uniqueKeyList=[],
                                                     dateModified=False,
                                                     batchSize=2500,
                                                     replace=False,
                                                     dbSettings=False):
    """insert list of dictionaries into database tables

    **Key Arguments:**
        - ``dbConn`` -- mysql database connection
        - ``log`` -- logger
        - ``dictList`` -- list of python dictionaries to add to the database table
        - ``dbTableName`` -- name of the database table
        - ``uniqueKeyList`` -- a list of column names to append as a unique constraint on the database
        - ``dateModified`` -- add the modification date as a column in the database
        - ``batchSize`` -- batch the insert commands into *batchSize* batches
        - ``replace`` -- repalce row if a duplicate is found
        - ``dbSettings`` -- pass in the database settings so multiprocessing can establish one connection per process (might not be faster)

    **Return:**
        - None

    **Usage:**

        .. code-block:: python

            from fundamentals.mysql import insert_list_of_dictionaries_into_database_tables
            insert_list_of_dictionaries_into_database_tables(
                dbConn=dbConn,
                log=log,
                dictList=dictList,
                dbTableName="test_insert_many",
                uniqueKeyList=["col1", "col3"],
                dateModified=False,
                batchSize=2500
            )
    """

    log.info(
        'starting the ``insert_list_of_dictionaries_into_database_tables`` function'
    )

    global count
    global totalCount
    global globalDbConn
    global sharedList

    reDate = re.compile('^[0-9]{4}-[0-9]{2}-[0-9]{2}T')

    if dbSettings:
        globalDbConn = dbSettings
    else:
        globalDbConn = dbConn

    if len(dictList) == 0:
        log.warning('the dictionary to be added to the database is empty' %
                    locals())
        return None

    if len(dictList):
        convert_dictionary_to_mysql_table(
            dbConn=dbConn,
            log=log,
            dictionary=dictList[0],
            dbTableName=dbTableName,
            uniqueKeyList=uniqueKeyList,
            dateModified=dateModified,
            reDatetime=reDate,
            replace=replace,
        )

    dbConn.autocommit(False)

    total = len(dictList[1:])
    batches = int(total / batchSize)

    start = 0
    end = 0
    sharedList = []
    for i in range(batches + 1):
        end = end + batchSize
        start = i * batchSize
        thisBatch = dictList[start:end]
        sharedList.append((thisBatch, end))

    totalCount = total + 1
    ltotalCount = totalCount

    print "Starting to insert %(ltotalCount)s rows into %(dbTableName)s" % locals(
    )

    if dbSettings == False:

        fmultiprocess(log=log,
                      function=_insert_single_batch_into_database,
                      inputArray=range(len(sharedList)),
                      dbTableName=dbTableName,
                      uniqueKeyList=uniqueKeyList,
                      dateModified=dateModified,
                      replace=replace,
                      batchSize=batchSize,
                      reDatetime=reDate)

    else:
        fmultiprocess(log=log,
                      function=_add_dictlist_to_database_via_load_in_file,
                      inputArray=range(len(sharedList)),
                      dbTablename=dbTableName,
                      dbSettings=dbSettings)

    sys.stdout.write("\x1b[1A\x1b[2K")
    print "%(ltotalCount)s / %(ltotalCount)s rows inserted into %(dbTableName)s" % locals(
    )

    log.info(
        'completed the ``insert_list_of_dictionaries_into_database_tables`` function'
    )
    return None
示例#16
0
def insert_list_of_dictionaries_into_database_tables(dbConn,
                                                     log,
                                                     dictList,
                                                     dbTableName,
                                                     uniqueKeyList=[],
                                                     dateModified=False,
                                                     batchSize=2500,
                                                     replace=False):
    """insert list of dictionaries into database tables

    **Key Arguments:**
        - ``dbConn`` -- mysql database connection
        - ``log`` -- logger
        - ``dictList`` -- list of python dictionaries to add to the database table
        - ``dbTableName`` -- name of the database table
        - ``uniqueKeyList`` -- a list of column names to append as a unique constraint on the database
        - ``dateModified`` -- add the modification date as a column in the database
        - ``batchSize`` -- batch the insert commands into *batchSize* batches
        - ``replace`` -- repalce row if a duplicate is found

    **Return:**
        - None

    **Usage:**

        .. code-block:: python

            from fundamentals.mysql import insert_list_of_dictionaries_into_database_tables
            insert_list_of_dictionaries_into_database_tables(
                dbConn=dbConn,
                log=log,
                dictList=dictList,
                dbTableName="test_insert_many",
                uniqueKeyList=["col1", "col3"],
                dateModified=False,
                batchSize=2500
            )
    """

    log.info(
        'starting the ``insert_list_of_dictionaries_into_database_tables`` function'
    )

    if len(dictList) == 0:
        log.warning('the dictionary to be added to the database is empty' %
                    locals())
        return None

    if len(dictList):
        convert_dictionary_to_mysql_table(dbConn=dbConn,
                                          log=log,
                                          dictionary=dictList[0],
                                          dbTableName=dbTableName,
                                          uniqueKeyList=uniqueKeyList,
                                          dateModified=dateModified,
                                          replace=replace)

    total = len(dictList[1:])
    batches = int(total / batchSize)

    start = 0
    end = 0
    theseBatches = []
    for i in range(batches + 1):
        end = end + batchSize
        start = i * batchSize
        thisBatch = dictList[start:end]
        theseBatches.append(thisBatch)

    totalCount = total
    count = 0

    for batch in theseBatches:
        count += len(batch)
        if count > batchSize:
            # Cursor up one line and clear line
            sys.stdout.write("\x1b[1A\x1b[2K")
        if count > totalCount:
            count = totalCount
        print "%(count)s / %(totalCount)s rows inserted into %(dbTableName)s" % locals(
        )

        inserted = False
        while inserted == False:
            theseInserts = []
            for aDict in batch:

                insertCommand, valueTuple = convert_dictionary_to_mysql_table(
                    dbConn=dbConn,
                    log=log,
                    dictionary=aDict,
                    dbTableName=dbTableName,
                    uniqueKeyList=uniqueKeyList,
                    dateModified=dateModified,
                    returnInsertOnly=True,
                    replace=replace)
                theseInserts.append(valueTuple)

            message = ""
            # log.debug('adding new data to the %s table; query: %s' %
            # (dbTableName, addValue))
            message = writequery(log=log,
                                 sqlQuery=insertCommand,
                                 dbConn=dbConn,
                                 Force=True,
                                 manyValueList=theseInserts)

            if message == "unknown column":
                sys.exit(0)
                for aDict in batch:
                    convert_dictionary_to_mysql_table(
                        dbConn=dbConn,
                        log=log,
                        dictionary=aDict,
                        dbTableName=dbTableName,
                        uniqueKeyList=uniqueKeyList,
                        dateModified=dateModified,
                        replace=replace)
            else:
                inserted = True

    log.info(
        'completed the ``insert_list_of_dictionaries_into_database_tables`` function'
    )
    return None