def setUp(self):
     self.dbFactory = DBFactory()
     self.dbManager = self.dbFactory.get_specific_db_manager_and_schema('SOLR','test_core')
def postProcessing(options):
    """
        Executes some curating operations over imported data
    """
    
    init_logger()
    
    logger.info('> Executing ckan postprocessing... ')
    ds_name = None
    if ('ds_name' in options.keys()):
        ds_name = options['ds_name']
        
    
    user = None
    passw = None
    try:
        config = ConfigParser.RawConfigParser()
        config.read('ConfigFile.properties')
        usertemp = config.get('AuthenticationSection', 'database.user');
        passwtemp = config.get('AuthenticationSection', 'database.password');
        user = usertemp
        passw = passwtemp
    except Exception as e:
        logger.info ("Not user info found, using anonymous user... ")
        logger.info (e)
            
    dbFactory = DBFactory()
    dbManager = dbFactory.get_default_db_manager_with_username(ds_name,user,passw)
    
    # We want to change all courses from mygoblet.org tagged as Training Materials
    ckan_conditions = [
        ['AND',[
                ['EQ','link','http://www.mygoblet.org//training-portal/courses/*'],
                ['EQ','resource_type',["Training Material"]]
               ]
        ]
    ]
    
    previous_count = dbManager.count_data_by_conditions(ckan_conditions)
    #print (previous_count)
    results = dbManager.get_data_by_conditions(ckan_conditions)
    # delete all of them , and then we insert them again modified. We will have to implement update operation in AbstractManager
    dbManager.delete_data_by_conditions(ckan_conditions)
    new_count = dbManager.count_data_by_conditions(ckan_conditions)
    #print (new_count)
    numSuccess = 0
    for result in results:
        #print (result)
        exists = util.existURL(result.get("link"))
        # logger.info ('Exists? '+get_link(record)+' :'+str(exists))   
        if (exists):
            success = dbManager.insert_data({
                "title":result.get("title"),
                "description":result.get("description"),
                "field":result.get("field"),
                "source":result.get("source"),
                "resource_type":["Event"],  # Now they are Events, not Training Materials!
                "insertion_date":result.get("insertion_date"),
                "created":result.get("created"),
                "audience":result.get("audience"),
                "link":result.get("link")
                })
            if success:
                numSuccess=numSuccess+1
    #print (numSuccess)
    logger.info('Changed '+str(numSuccess)+' mygoblet.org records tagged as Training Materials to Events')
    logger.info('< Finished ckan postprocessing')
class TestSolrDB(unittest.TestCase):

    def setUp(self):
        self.dbFactory = DBFactory()
        self.dbManager = self.dbFactory.get_specific_db_manager_and_schema('SOLR','test_core')


    def test_insertion(self):
        print ('> Insertion test')
        self.previous_count = len(self.dbManager.get_all_data())
        # print ('previous count: %i' % (self.previous_count))
        self.dbManager.insert_data({'title':'test title','field':'test-field'})
        self.new_count = len(self.dbManager.get_all_data())
        # print ('new count: %i' % (self.new_count) )
        self.assertEqual(self.previous_count+1, self.new_count)
        
        
    def test_all_deletion(self):
        print ('> All deletion test')
        self.dbManager.insert_data({'title':'test title','field':'test-field'})
        self.dbManager.delete_all_data()
        self.new_count = len(self.dbManager.get_all_data())
        # print ('data count after deletion: %i' % (self.new_count) )
        self.assertEqual(0, self.new_count)


    
    def test_delete(self):
        print ('> Select test')
        self.dbManager.delete_all_data()       
        self.dbManager.insert_data({'title':'first test title','field':'test-field'})
        self.dbManager.insert_data({'title':'second test title','field':'test-field'})
        self.dbManager.insert_data({'title':'third test title','field':'test-field'})
        self.previous_count = len(self.dbManager.get_all_data())       
        # print ('previous count: %i' % (self.previous_count))
        self.assertEqual(3, self.previous_count)
        
        self.dbManager.delete_data_by_conditions([['EQ','title','first test title']])
        self.new_count = len(self.dbManager.get_all_data())
        # print ('new count: %i' % (self.new_count) )
        self.assertEqual(2, self.new_count)
        
        
        self.dbManager.delete_data_by_conditions(
            [
                ['OR',
                    [
                        ['AND',[
                                ['EQ','field','test-field'],
                                ['EQ','title','second test title']
                               ]
                        ],
                        ['AND',[
                                ['EQ','field','test-field'],
                                ['EQ','title','first test title']
                               ]
                        ]
                    ]
                ]
            ])
        self.new_count = len(self.dbManager.get_all_data())
        # print ('new count: %i' % (self.new_count) )
        self.assertEqual(1, self.new_count)
        


    def test_select(self):
        print ('> Select test')
        self.dbManager.delete_all_data()       
        self.dbManager.insert_data({'title':'first test title','field':'test-field'})
        self.dbManager.insert_data({'title':'second test title','field':'test-field'})
        self.dbManager.insert_data({'title':'third test title','field':'test-field'})
        self.previous_count = len(self.dbManager.get_all_data())       
        # print ('previous count: %i' % (self.previous_count))
        self.assertEqual(3, self.previous_count)
        
        self.new_count = len(self.dbManager.get_data_by_conditions_full([['EQ','title','first test title']], None, None))
        # print ('new count: %i' % (self.new_count) )
        self.assertEqual(1, self.new_count)
        
        self.new_count = len(self.dbManager.get_data_by_conditions_full([['EQ','field','test-field']], None, None))
        # print ('new count: %i' % (self.new_count) )
        self.assertEqual(3, self.new_count)
        
        self.new_count = len(self.dbManager.get_data_by_conditions_full([['NO','title','second test title']], None, None))
        # print ('new count: %i' % (self.new_count) )
        self.assertEqual(2, self.new_count)
        
        self.new_count = len(self.dbManager.get_data_by_conditions_full([['EQ','field','test-field'], ['EQ','title','second test title']], None, None))
        # print ('new count: %i' % (self.new_count) )
        self.assertEqual(1, self.new_count)
        
        self.new_count = len(self.dbManager.get_data_by_conditions_full([['EQ','field','test-field'], ['NO','title','second test title']], None, None))
        # print ('new count: %i' % (self.new_count) )
        self.assertEqual(2, self.new_count)
        
        self.new_count = len(self.dbManager.get_data_by_conditions_full([['EQ','field','test-field'], ['EQ','title','second test title']], None, None))
        # print ('new count: %i' % (self.new_count) )
        self.assertEqual(1, self.new_count)
        
        self.new_count = len(self.dbManager.get_data_by_conditions_full(
            [
                ['AND',[
                        ['EQ','field','test-field'],
                        ['EQ','title','second test title']
                       ]
                ]
            ], None, None))
        # print ('new count: %i' % (self.new_count) )
        self.assertEqual(1, self.new_count)
        
        
        self.new_count = len(self.dbManager.get_data_by_conditions_full(
            [
                ['OR',
                    [
                        ['EQ','field','test-field'],
                        ['EQ','title','second test title']
                    ]
                ]
            ], None, None))
        # print ('new count: %i' % (self.new_count) )
        self.assertEqual(3, self.new_count)
        
        
        self.new_count = len(self.dbManager.get_data_by_conditions_full(
            [
                ['OR',
                    [
                        ['AND',[
                                ['EQ','field','test-field'],
                                ['EQ','title','second test title']
                               ]
                        ],
                        ['AND',[
                                ['EQ','field','test-field'],
                                ['EQ','title','first test title']
                               ]
                        ]
                    ]
                ]
            ], None, None))
        # print ('new count: %i' % (self.new_count) )
        self.assertEqual(2, self.new_count)
        
        
        self.new_count = len(self.dbManager.get_data_by_conditions_full(
            [
                ['OR',
                    [
                        ['NO',
                            [
                                ['AND',[
                                    ['EQ','field','test-field'],
                                    ['EQ','title','second test title']
                                    ]
                                ]
                            ]
                        ],
                        ['AND',[
                                ['EQ','field','test-field'],
                                ['EQ','title','first test title']
                               ]
                        ]
                    ]
                ]
            ], None, None))
        # print ('new count: %i' % (self.new_count) )
        self.assertEqual(1, self.new_count)
        
        
        
        # Testing sorting rules...
        sorted_results = self.dbManager.get_data_by_conditions_full([['EQ','field','test-field']], [['title','ASC']], None)
        first_sorted_result = sorted_results[0]                         
        self.assertEqual(3, len(sorted_results))
        self.assertEqual('first test title', first_sorted_result['title'])
        
        sorted_results = self.dbManager.get_data_by_conditions_full([['EQ','field','test-field']], [['title','DESC']], None)
        first_sorted_result = sorted_results[0]                       
        self.assertEqual(3, len(sorted_results))
        self.assertEqual('third test title', first_sorted_result['title'])
def postProcessing(options):
    """
        Executes some curating operations over imported data
    """

    init_logger()

    logger.info('> Executing ckan postprocessing... ')
    ds_name = None
    if ('ds_name' in options.keys()):
        ds_name = options['ds_name']

    user = None
    passw = None
    try:
        config = ConfigParser.RawConfigParser()
        config.read('ConfigFile.properties')
        usertemp = config.get('AuthenticationSection', 'database.user')
        passwtemp = config.get('AuthenticationSection', 'database.password')
        user = usertemp
        passw = passwtemp
    except Exception as e:
        logger.info("Not user info found, using anonymous user... ")
        logger.info(e)

    dbFactory = DBFactory()
    dbManager = dbFactory.get_default_db_manager_with_username(
        ds_name, user, passw)

    # We want to change all courses from mygoblet.org tagged as Training Materials
    ckan_conditions = [[
        'AND',
        [['EQ', 'link', 'http://www.mygoblet.org//training-portal/courses/*'],
         ['EQ', 'resource_type', ["Training Material"]]]
    ]]

    previous_count = dbManager.count_data_by_conditions(ckan_conditions)
    #print (previous_count)
    results = dbManager.get_data_by_conditions(ckan_conditions)
    # delete all of them , and then we insert them again modified. We will have to implement update operation in AbstractManager
    dbManager.delete_data_by_conditions(ckan_conditions)
    new_count = dbManager.count_data_by_conditions(ckan_conditions)
    #print (new_count)
    numSuccess = 0
    for result in results:
        #print (result)
        exists = util.existURL(result.get("link"))
        # logger.info ('Exists? '+get_link(record)+' :'+str(exists))
        if (exists):
            success = dbManager.insert_data({
                "title":
                result.get("title"),
                "description":
                result.get("description"),
                "field":
                result.get("field"),
                "source":
                result.get("source"),
                "resource_type":
                ["Event"],  # Now they are Events, not Training Materials!
                "insertion_date":
                result.get("insertion_date"),
                "created":
                result.get("created"),
                "audience":
                result.get("audience"),
                "link":
                result.get("link")
            })
            if success:
                numSuccess = numSuccess + 1
    #print (numSuccess)
    logger.info('Changed ' + str(numSuccess) +
                ' mygoblet.org records tagged as Training Materials to Events')
    logger.info('< Finished ckan postprocessing')
def main_options(options):
    """
        Executes the main functionality of this script: it extracts JSON data from each Training Material found
        and inserts its main data into the DB.
        * options {list} specific configurations for initialization.
            ds_name {string} specific dataset/database to use with the DB manager
            delete_all_old_data {boolean} specifies if we should delete all previous ckanData in our DataBase
            registriesFromTime {datetime} time from registries will be obtained
            updateRegistries {boolean} if we want to get new regiestries or not

    """

    init_logger()
    
    ds_name = None
    delete_all_old_data = False
    registriesFromTime = None
    updateRegistries = True

    if options is not None:
        logger.info ('>> Starting ckanData importing process... params: ')
        if ('ds_name' in options.keys()):
            ds_name = options['ds_name']
            logger.info ('ds_name='+ds_name)
        if ('delete_all_old_data' in options.keys()):
            delete_all_old_data = options['delete_all_old_data']
            logger.info ('delete_all_old_data='+str(delete_all_old_data))
        if ('registriesFromTime' in options.keys()):
            registriesFromTime = options['registriesFromTime']
            logger.info ('registriesFromTime='+str(registriesFromTime))
        if ('updateRegistries' in options.keys()):
            updateRegistries = options['updateRegistries']
            logger.info ('updateRegistries='+str(updateRegistries))
            

    else:
        logger.info ('>> Starting ckanData importing process...')


    materials_names = None
    if updateRegistries:   
        materials_names = get_materials_names()
    
    
    user = None
    passw = None
    try:
        config = ConfigParser.RawConfigParser()
        config.read('ConfigFile.properties')
        usertemp = config.get('AuthenticationSection', 'database.user');
        passwtemp = config.get('AuthenticationSection', 'database.password');
        user = usertemp
        passw = passwtemp
    except Exception as e:
        logger.info ("Not user info found, using anonymous user... ")
        logger.info (e)
            
    dbFactory = DBFactory()
    dbManager = dbFactory.get_default_db_manager_with_username(ds_name,user,passw)
    
    # print (dbManager)
    if (delete_all_old_data is not None and delete_all_old_data):
        ckan_conditions = [['EQ','source',get_source_field()]]
        previous_count = dbManager.count_data_by_conditions(ckan_conditions)
        dbManager.delete_data_by_conditions(ckan_conditions)
        new_count = dbManager.count_data_by_conditions(ckan_conditions)
        if (previous_count is not None and new_count is not None):
            logger.info ('Deleted '+str( (previous_count-new_count) )+' registries')   
    
       
    if materials_names is not None:    
        numSuccess = 0
        for material_name in materials_names:
            json_data = get_json_from_material_name(material_name)
            if (json_data is not None):
                # If we have registriesFromTime, we have to check that each one's creation date if more recent than registriesFromTime
                if registriesFromTime is None or isDataMoreRecentThan(json_data,registriesFromTime):
                    success = dbManager.insert_data({
                        "title":get_title(json_data),
                        "description":get_notes(json_data),
                        "field":get_field(json_data),
                        "source":get_source_field(),
                        "resource_type":get_resource_type_field(),
                        "insertion_date":get_insertion_date_field(),
                        "created":get_created(json_data),
                        "audience":get_audience(json_data),
                        "link":get_link(json_data)
                        })
                    if success:
                        numSuccess=numSuccess+1
                        
        logger.info ('Inserted '+str(numSuccess)+' new registries')   
     
    
    if updateRegistries:
        postProcessing(options)
        
    logger.info('<< Finished ckanData importing process.')
Exemplo n.º 6
0
 def setUp(self):
     self.dbFactory = DBFactory()
     self.dbManager = self.dbFactory.get_specific_db_manager_and_schema(
         'SOLR', 'test_core')
def main_options(options):
    """
        Executes the main functionality of this script: it extracts JSON data from each Training Material found
        and inserts its main data into the DB.
        * options {list} specific configurations for initialization.
            ds_name {string} specific dataset/database to use with the DB manager
            delete_all_old_data {boolean} specifies if we should delete all previous ckanData in our DataBase
            registriesFromTime {datetime} time from registries will be obtained
            updateRegistries {boolean} if we want to get new regiestries or not

    """

    init_logger()

    ds_name = None
    delete_all_old_data = False
    registriesFromTime = None
    updateRegistries = True

    if options is not None:
        logger.info('>> Starting ckanData importing process... params: ')
        if ('ds_name' in options.keys()):
            ds_name = options['ds_name']
            logger.info('ds_name=' + ds_name)
        if ('delete_all_old_data' in options.keys()):
            delete_all_old_data = options['delete_all_old_data']
            logger.info('delete_all_old_data=' + str(delete_all_old_data))
        if ('registriesFromTime' in options.keys()):
            registriesFromTime = options['registriesFromTime']
            logger.info('registriesFromTime=' + str(registriesFromTime))
        if ('updateRegistries' in options.keys()):
            updateRegistries = options['updateRegistries']
            logger.info('updateRegistries=' + str(updateRegistries))

    else:
        logger.info('>> Starting ckanData importing process...')

    materials_names = None
    if updateRegistries:
        materials_names = get_materials_names()

    user = None
    passw = None
    try:
        config = ConfigParser.RawConfigParser()
        config.read('ConfigFile.properties')
        usertemp = config.get('AuthenticationSection', 'database.user')
        passwtemp = config.get('AuthenticationSection', 'database.password')
        user = usertemp
        passw = passwtemp
    except Exception as e:
        logger.info("Not user info found, using anonymous user... ")
        logger.info(e)

    dbFactory = DBFactory()
    dbManager = dbFactory.get_default_db_manager_with_username(
        ds_name, user, passw)

    # print (dbManager)
    if (delete_all_old_data is not None and delete_all_old_data):
        ckan_conditions = [['EQ', 'source', get_source_field()]]
        previous_count = dbManager.count_data_by_conditions(ckan_conditions)
        dbManager.delete_data_by_conditions(ckan_conditions)
        new_count = dbManager.count_data_by_conditions(ckan_conditions)
        if (previous_count is not None and new_count is not None):
            logger.info('Deleted ' + str((previous_count - new_count)) +
                        ' registries')

    if materials_names is not None:
        numSuccess = 0
        for material_name in materials_names:
            json_data = get_json_from_material_name(material_name)
            if (json_data is not None):
                # If we have registriesFromTime, we have to check that each one's creation date if more recent than registriesFromTime
                if registriesFromTime is None or isDataMoreRecentThan(
                        json_data, registriesFromTime):
                    success = dbManager.insert_data({
                        "title":
                        get_title(json_data),
                        "description":
                        get_notes(json_data),
                        "field":
                        get_field(json_data),
                        "source":
                        get_source_field(),
                        "resource_type":
                        get_resource_type_field(),
                        "insertion_date":
                        get_insertion_date_field(),
                        "created":
                        get_created(json_data),
                        "audience":
                        get_audience(json_data),
                        "link":
                        get_link(json_data)
                    })
                    if success:
                        numSuccess = numSuccess + 1

        logger.info('Inserted ' + str(numSuccess) + ' new registries')

    if updateRegistries:
        postProcessing(options)

    logger.info('<< Finished ckanData importing process.')
Exemplo n.º 8
0
class TestSolrDB(unittest.TestCase):
    def setUp(self):
        self.dbFactory = DBFactory()
        self.dbManager = self.dbFactory.get_specific_db_manager_and_schema(
            'SOLR', 'test_core')

    def test_insertion(self):
        print('> Insertion test')
        self.previous_count = len(self.dbManager.get_all_data())
        # print ('previous count: %i' % (self.previous_count))
        self.dbManager.insert_data({
            'title': 'test title',
            'field': 'test-field'
        })
        self.new_count = len(self.dbManager.get_all_data())
        # print ('new count: %i' % (self.new_count) )
        self.assertEqual(self.previous_count + 1, self.new_count)

    def test_all_deletion(self):
        print('> All deletion test')
        self.dbManager.insert_data({
            'title': 'test title',
            'field': 'test-field'
        })
        self.dbManager.delete_all_data()
        self.new_count = len(self.dbManager.get_all_data())
        # print ('data count after deletion: %i' % (self.new_count) )
        self.assertEqual(0, self.new_count)

    def test_delete(self):
        print('> Select test')
        self.dbManager.delete_all_data()
        self.dbManager.insert_data({
            'title': 'first test title',
            'field': 'test-field'
        })
        self.dbManager.insert_data({
            'title': 'second test title',
            'field': 'test-field'
        })
        self.dbManager.insert_data({
            'title': 'third test title',
            'field': 'test-field'
        })
        self.previous_count = len(self.dbManager.get_all_data())
        # print ('previous count: %i' % (self.previous_count))
        self.assertEqual(3, self.previous_count)

        self.dbManager.delete_data_by_conditions(
            [['EQ', 'title', 'first test title']])
        self.new_count = len(self.dbManager.get_all_data())
        # print ('new count: %i' % (self.new_count) )
        self.assertEqual(2, self.new_count)

        self.dbManager.delete_data_by_conditions([[
            'OR',
            [[
                'AND',
                [['EQ', 'field', 'test-field'],
                 ['EQ', 'title', 'second test title']]
            ],
             [
                 'AND',
                 [['EQ', 'field', 'test-field'],
                  ['EQ', 'title', 'first test title']]
             ]]
        ]])
        self.new_count = len(self.dbManager.get_all_data())
        # print ('new count: %i' % (self.new_count) )
        self.assertEqual(1, self.new_count)

    def test_select(self):
        print('> Select test')
        self.dbManager.delete_all_data()
        self.dbManager.insert_data({
            'title': 'first test title',
            'field': 'test-field'
        })
        self.dbManager.insert_data({
            'title': 'second test title',
            'field': 'test-field'
        })
        self.dbManager.insert_data({
            'title': 'third test title',
            'field': 'test-field'
        })
        self.previous_count = len(self.dbManager.get_all_data())
        # print ('previous count: %i' % (self.previous_count))
        self.assertEqual(3, self.previous_count)

        self.new_count = len(
            self.dbManager.get_data_by_conditions_full(
                [['EQ', 'title', 'first test title']], None, None))
        # print ('new count: %i' % (self.new_count) )
        self.assertEqual(1, self.new_count)

        self.new_count = len(
            self.dbManager.get_data_by_conditions_full(
                [['EQ', 'field', 'test-field']], None, None))
        # print ('new count: %i' % (self.new_count) )
        self.assertEqual(3, self.new_count)

        self.new_count = len(
            self.dbManager.get_data_by_conditions_full(
                [['NO', 'title', 'second test title']], None, None))
        # print ('new count: %i' % (self.new_count) )
        self.assertEqual(2, self.new_count)

        self.new_count = len(
            self.dbManager.get_data_by_conditions_full(
                [['EQ', 'field', 'test-field'],
                 ['EQ', 'title', 'second test title']], None, None))
        # print ('new count: %i' % (self.new_count) )
        self.assertEqual(1, self.new_count)

        self.new_count = len(
            self.dbManager.get_data_by_conditions_full(
                [['EQ', 'field', 'test-field'],
                 ['NO', 'title', 'second test title']], None, None))
        # print ('new count: %i' % (self.new_count) )
        self.assertEqual(2, self.new_count)

        self.new_count = len(
            self.dbManager.get_data_by_conditions_full(
                [['EQ', 'field', 'test-field'],
                 ['EQ', 'title', 'second test title']], None, None))
        # print ('new count: %i' % (self.new_count) )
        self.assertEqual(1, self.new_count)

        self.new_count = len(
            self.dbManager.get_data_by_conditions_full([[
                'AND',
                [['EQ', 'field', 'test-field'],
                 ['EQ', 'title', 'second test title']]
            ]], None, None))
        # print ('new count: %i' % (self.new_count) )
        self.assertEqual(1, self.new_count)

        self.new_count = len(
            self.dbManager.get_data_by_conditions_full([[
                'OR',
                [['EQ', 'field', 'test-field'],
                 ['EQ', 'title', 'second test title']]
            ]], None, None))
        # print ('new count: %i' % (self.new_count) )
        self.assertEqual(3, self.new_count)

        self.new_count = len(
            self.dbManager.get_data_by_conditions_full([[
                'OR',
                [[
                    'AND',
                    [['EQ', 'field', 'test-field'],
                     ['EQ', 'title', 'second test title']]
                ],
                 [
                     'AND',
                     [['EQ', 'field', 'test-field'],
                      ['EQ', 'title', 'first test title']]
                 ]]
            ]], None, None))
        # print ('new count: %i' % (self.new_count) )
        self.assertEqual(2, self.new_count)

        self.new_count = len(
            self.dbManager.get_data_by_conditions_full([[
                'OR',
                [[
                    'NO',
                    [[
                        'AND',
                        [['EQ', 'field', 'test-field'],
                         ['EQ', 'title', 'second test title']]
                    ]]
                ],
                 [
                     'AND',
                     [['EQ', 'field', 'test-field'],
                      ['EQ', 'title', 'first test title']]
                 ]]
            ]], None, None))
        # print ('new count: %i' % (self.new_count) )
        self.assertEqual(1, self.new_count)

        # Testing sorting rules...
        sorted_results = self.dbManager.get_data_by_conditions_full(
            [['EQ', 'field', 'test-field']], [['title', 'ASC']], None)
        first_sorted_result = sorted_results[0]
        self.assertEqual(3, len(sorted_results))
        self.assertEqual('first test title', first_sorted_result['title'])

        sorted_results = self.dbManager.get_data_by_conditions_full(
            [['EQ', 'field', 'test-field']], [['title', 'DESC']], None)
        first_sorted_result = sorted_results[0]
        self.assertEqual(3, len(sorted_results))
        self.assertEqual('third test title', first_sorted_result['title'])
def main_options(options):
    """
        Executes the main functionality of this script: it extracts information from iAnn events data and insert some of them
        into the DB
        * options {list} specific configurations for initialization.
            ds_name: specific dataset/database to use with the DB manager
            delete_all_old_data {boolean} specifies if we should delete all previous ckanData in our DataBase
            registriesFromTime {datetime} time from registries will be obtained
            updateRegistries {boolean} if we want to get new regiestries or not
               
        
        In this script we will insert these fields into each registry:
            "title" {string} Title for the event;
            "start" - Date the event starts;
            "end" - Date the event ends;
            "city" {string} City where the event is hosted;
            "country" {string} Country where the event is hosted;
            "field" {string} Branches of science in molecular biology.
            "provider" {string} Organization hosting the event;
            "link" {string} Link to the data registry.
            "source" {string} Default ('ckan');
            "insertion date" {date} Current date and time.
            "created" {date} Date and time of creation of the original registry.

        See more eg: http://iann.pro/iann-web-services
    """
    # IannDataLocking.lock()
    init_logger()
    
    ds_name = None
    delete_all_old_data = False
    registriesFromTime = None
    updateRegistries = True

    if options is not None:
        logger.info ('>> Starting iann importing process... params: ')
        if ('ds_name' in options.keys()):
            ds_name = options['ds_name']
            logger.info ('ds_name='+ds_name)
        if ('delete_all_old_data' in options.keys()):
            delete_all_old_data = options['delete_all_old_data']
            logger.info ('delete_all_old_data='+str(delete_all_old_data))
        if ('registriesFromTime' in options.keys()):
            registriesFromTime = options['registriesFromTime']
            logger.info ('registriesFromTime='+str(registriesFromTime))
        if ('updateRegistries' in options.keys()):
            updateRegistries = options['updateRegistries']
            logger.info ('updateRegistries='+str(updateRegistries))        
    else:
        logger.info ('>> Starting iann importing process...')

    iann_data = None
    if updateRegistries: 
        iann_data = get_iann_data(registriesFromTime)
    
    
    user = None
    passw = None
    try:
        config = ConfigParser.RawConfigParser()
        config.read('ConfigFile.properties')
        usertemp = config.get('AuthenticationSection', 'database.user');
        passwtemp = config.get('AuthenticationSection', 'database.password');
        user = usertemp
        passw = passwtemp
    except Exception as e:
        logger.info ("Not user info found, using anonymous user... ")
        logger.info (e)
        
    dbFactory = DBFactory()
    dbManager = dbFactory.get_default_db_manager_with_username(ds_name,user,passw)
    
    if (delete_all_old_data is not None and delete_all_old_data):
        iann_conditions = [['EQ','source',get_source_field()]]
        previous_count = dbManager.count_data_by_conditions(iann_conditions)
        dbManager.delete_data_by_conditions(iann_conditions)
        new_count = dbManager.count_data_by_conditions(iann_conditions)
        if (previous_count is not None and new_count is not None):
            logger.info ('Deleted '+str( (previous_count-new_count) )+' registries')   
   
    if iann_data is not None:    
        numSuccess = 0
        for result in iann_data:
            if (result is not None):        
                exists = util.existURL(get_link(record))
                # logger.info ('Exists? '+get_link(record)+' :'+str(exists))   
                if (exists):
                        success = dbManager.insert_data({
                            "title":get_title(result),
                            "start":get_start(result),
                            "end":get_end(result),
                            "city":get_city(result),
                            "country":get_country(result),
                            "field":get_field(result),
                            "provider":get_provider(result),
                            "link":get_link(result),
                            "source":get_source_field(),
                            "resource_type":get_resource_type_field(),
                            "insertion_date":get_insertion_date_field(),
                            "created":get_creation_date_field(result)                    
                            })
                        if success:
                            numSuccess=numSuccess+1
        
        logger.info ('Inserted '+str(numSuccess)+' new registries')   
              
    logger.info ('<< Finished iann importing process.')
def main_options(options):
    """
        Executes the main functionality of this script: it extracts JSON data from each record found on Elixir's registry
        and inserts its main data into the DB.
        * options {list} specific configurations for initialization.
            ds_name {string} specific dataset/database to use with the DB manager
            delete_all_old_data {boolean} specifies if we should delete all previous Elixir registry data in our DataBase
            registriesFromTime {date} time from registries will be obtained
            updateRegistries {boolean} if we want to get new regiestries or not

            
        In this script we will insert these fields into each registry:
            "title" {string} Title for the data registry.
            "notes" {string} Description for the data registry.
            "link" {string} Link to the data registry.
            "field" {string} Default ('Services Registry');
            "source" {string} Default ('ckan');
            "insertion date" {date} Current date and time.

    """
    
    init_logger()
    
    
    ds_name = None
    delete_all_old_data = False
    registriesFromTime = None
    updateRegistries = True

    if options is not None:
        logger.info ('>> Starting Elixir registry importing process... params: ')
        if ('ds_name' in options.keys()):
            ds_name = options['ds_name']
            logger.info ('ds_name='+ds_name)
        if ('delete_all_old_data' in options.keys()):
            delete_all_old_data = options['delete_all_old_data']
            logger.info ('delete_all_old_data='+str(delete_all_old_data))
        if ('updateRegistries' in options.keys()):
            updateRegistries = options['updateRegistries']
            logger.info ('updateRegistries='+str(updateRegistries))    

    else:
        logger.info ('>> Starting Elixir registry importing process...')

    records = None
    if updateRegistries:         
        records = get_records()
    
    user = None
    passw = None
    try:
        config = ConfigParser.RawConfigParser()
        config.read('ConfigFile.properties')
        usertemp = config.get('AuthenticationSection', 'database.user');
        passwtemp = config.get('AuthenticationSection', 'database.password');
        user = usertemp
        passw = passwtemp
    except Exception as e:
        logger.info ("Not user info found, using anonymous user... ")
        logger.info (e)
            
    dbFactory = DBFactory()
    dbManager = dbFactory.get_default_db_manager_with_username(ds_name,user,passw)
    
    if (delete_all_old_data is not None and delete_all_old_data):
        registry_conditions = [['EQ','source',get_source_field()]]
        previous_count = dbManager.count_data_by_conditions(registry_conditions)
        dbManager.delete_data_by_conditions(registry_conditions)
        new_count = dbManager.count_data_by_conditions(registry_conditions)
        if (previous_count is not None and new_count is not None):
            logger.info ('Deleted '+str( (previous_count-new_count) )+' registries')   
    
        
    if records is not None:
        
        numSuccess = 0
        for record in records:
            # exists = util.existURL(get_link(record))
            # logger.info ('Exists? '+get_link(record)+' :'+str(exists))   
            # if (exists):
                        success = dbManager.insert_data({
                            "title":get_title(record),
                            "description":get_description(record),
                            "link":get_link(record),
                            "field":get_field(record),
                            "source":get_source_field(),
                            "resource_type":get_resource_type_field(record),
                            "insertion_date":get_insertion_date_field()
                        })
                        if success:
                            numSuccess=numSuccess+1
                
        logger.info ('Inserted '+str(numSuccess)+' new registries')   
   
     
    logger.info('<< Finished Elixir registry importing process...')
Exemplo n.º 11
0
def main_options(options):
    """
        Executes the main functionality of this script: it extracts JSON data from each record found on Elixir's registry
        and inserts its main data into the DB.
        * options {list} specific configurations for initialization.
            ds_name {string} specific dataset/database to use with the DB manager
            delete_all_old_data {boolean} specifies if we should delete all previous Elixir registry data in our DataBase
            registriesFromTime {date} time from registries will be obtained
            updateRegistries {boolean} if we want to get new regiestries or not

            
        In this script we will insert these fields into each registry:
            "title" {string} Title for the data registry.
            "notes" {string} Description for the data registry.
            "link" {string} Link to the data registry.
            "field" {string} Default ('Services Registry');
            "source" {string} Default ('ckan');
            "insertion date" {date} Current date and time.

    """

    init_logger()

    ds_name = None
    delete_all_old_data = False
    registriesFromTime = None
    updateRegistries = True

    if options is not None:
        logger.info(
            '>> Starting Elixir registry importing process... params: ')
        if ('ds_name' in options.keys()):
            ds_name = options['ds_name']
            logger.info('ds_name=' + ds_name)
        if ('delete_all_old_data' in options.keys()):
            delete_all_old_data = options['delete_all_old_data']
            logger.info('delete_all_old_data=' + str(delete_all_old_data))
        if ('updateRegistries' in options.keys()):
            updateRegistries = options['updateRegistries']
            logger.info('updateRegistries=' + str(updateRegistries))

    else:
        logger.info('>> Starting Elixir registry importing process...')

    records = None
    if updateRegistries:
        records = get_records()

    user = None
    passw = None
    try:
        config = ConfigParser.RawConfigParser()
        config.read('ConfigFile.properties')
        usertemp = config.get('AuthenticationSection', 'database.user')
        passwtemp = config.get('AuthenticationSection', 'database.password')
        user = usertemp
        passw = passwtemp
    except Exception as e:
        logger.info("Not user info found, using anonymous user... ")
        logger.info(e)

    dbFactory = DBFactory()
    dbManager = dbFactory.get_default_db_manager_with_username(
        ds_name, user, passw)

    if (delete_all_old_data is not None and delete_all_old_data):
        registry_conditions = [['EQ', 'source', get_source_field()]]
        previous_count = dbManager.count_data_by_conditions(
            registry_conditions)
        dbManager.delete_data_by_conditions(registry_conditions)
        new_count = dbManager.count_data_by_conditions(registry_conditions)
        if (previous_count is not None and new_count is not None):
            logger.info('Deleted ' + str((previous_count - new_count)) +
                        ' registries')

    if records is not None:

        numSuccess = 0
        for record in records:
            # exists = util.existURL(get_link(record))
            # logger.info ('Exists? '+get_link(record)+' :'+str(exists))
            # if (exists):
            success = dbManager.insert_data({
                "title":
                get_title(record),
                "description":
                get_description(record),
                "link":
                get_link(record),
                "field":
                get_field(record),
                "source":
                get_source_field(),
                "resource_type":
                get_resource_type_field(record),
                "insertion_date":
                get_insertion_date_field()
            })
            if success:
                numSuccess = numSuccess + 1

        logger.info('Inserted ' + str(numSuccess) + ' new registries')

    logger.info('<< Finished Elixir registry importing process...')
def main_options(options):
    """
        Executes the main functionality of this script: it extracts information from iAnn events data and insert some of them
        into the DB
        * options {list} specific configurations for initialization.
            ds_name: specific dataset/database to use with the DB manager
            delete_all_old_data {boolean} specifies if we should delete all previous ckanData in our DataBase
            registriesFromTime {datetime} time from registries will be obtained
            updateRegistries {boolean} if we want to get new regiestries or not
               
        
        In this script we will insert these fields into each registry:
            "title" {string} Title for the event;
            "start" - Date the event starts;
            "end" - Date the event ends;
            "city" {string} City where the event is hosted;
            "country" {string} Country where the event is hosted;
            "field" {string} Branches of science in molecular biology.
            "provider" {string} Organization hosting the event;
            "link" {string} Link to the data registry.
            "source" {string} Default ('ckan');
            "insertion date" {date} Current date and time.
            "created" {date} Date and time of creation of the original registry.

        See more eg: http://iann.pro/iann-web-services
    """
    # IannDataLocking.lock()
    init_logger()

    ds_name = None
    delete_all_old_data = False
    registriesFromTime = None
    updateRegistries = True

    if options is not None:
        logger.info('>> Starting iann importing process... params: ')
        if ('ds_name' in options.keys()):
            ds_name = options['ds_name']
            logger.info('ds_name=' + ds_name)
        if ('delete_all_old_data' in options.keys()):
            delete_all_old_data = options['delete_all_old_data']
            logger.info('delete_all_old_data=' + str(delete_all_old_data))
        if ('registriesFromTime' in options.keys()):
            registriesFromTime = options['registriesFromTime']
            logger.info('registriesFromTime=' + str(registriesFromTime))
        if ('updateRegistries' in options.keys()):
            updateRegistries = options['updateRegistries']
            logger.info('updateRegistries=' + str(updateRegistries))
    else:
        logger.info('>> Starting iann importing process...')

    iann_data = None
    if updateRegistries:
        iann_data = get_iann_data(registriesFromTime)

    user = None
    passw = None
    try:
        config = ConfigParser.RawConfigParser()
        config.read('ConfigFile.properties')
        usertemp = config.get('AuthenticationSection', 'database.user')
        passwtemp = config.get('AuthenticationSection', 'database.password')
        user = usertemp
        passw = passwtemp
    except Exception as e:
        logger.info("Not user info found, using anonymous user... ")
        logger.info(e)

    dbFactory = DBFactory()
    dbManager = dbFactory.get_default_db_manager_with_username(
        ds_name, user, passw)

    if (delete_all_old_data is not None and delete_all_old_data):
        iann_conditions = [['EQ', 'source', get_source_field()]]
        previous_count = dbManager.count_data_by_conditions(iann_conditions)
        dbManager.delete_data_by_conditions(iann_conditions)
        new_count = dbManager.count_data_by_conditions(iann_conditions)
        if (previous_count is not None and new_count is not None):
            logger.info('Deleted ' + str((previous_count - new_count)) +
                        ' registries')

    if iann_data is not None:
        numSuccess = 0
        for result in iann_data:
            if (result is not None):
                exists = util.existURL(get_link(record))
                # logger.info ('Exists? '+get_link(record)+' :'+str(exists))
                if (exists):
                    success = dbManager.insert_data({
                        "title":
                        get_title(result),
                        "start":
                        get_start(result),
                        "end":
                        get_end(result),
                        "city":
                        get_city(result),
                        "country":
                        get_country(result),
                        "field":
                        get_field(result),
                        "provider":
                        get_provider(result),
                        "link":
                        get_link(result),
                        "source":
                        get_source_field(),
                        "resource_type":
                        get_resource_type_field(),
                        "insertion_date":
                        get_insertion_date_field(),
                        "created":
                        get_creation_date_field(result)
                    })
                    if success:
                        numSuccess = numSuccess + 1

        logger.info('Inserted ' + str(numSuccess) + ' new registries')

    logger.info('<< Finished iann importing process.')