示例#1
0
 def __init__(self, xml_path, id_filename, debug=False):
     self.bhl_data = BHL_DATA_PROVIDER(xml_path)                       
     self.debug = debug
     self.doc2isis = Doc2ISIS()
     self.id_filename = id_filename
     self.start_year = 2009
示例#2
0
class BHL2LILACS:
    def __init__(self, xml_path, id_filename, debug=False):
        self.bhl_data = BHL_DATA_PROVIDER(xml_path)                       
        self.debug = debug
        self.doc2isis = Doc2ISIS()
        self.id_filename = id_filename
        self.start_year = 2009
        
    def display_debug_message(self, message):
        if self.debug:
            print(message)
            
    def create_id_filename(self, processed_filename, p_from, p_until, last_id):
        """
        create_id_filename
        """

        p_from = p_from[0:10]
        p_until= p_until[0:10]
        
        localtime = time.localtime(time.time())
        min_month = int(p_from[5:7])
        min_day = int(p_from[8:10])
        max_month = 13
        max_day = 29
        self.start_year = int(p_from[0:4]))
        f = open(processed_filename)
        excluded_id_list = f.readlines()
        f.close()
        
        d = {}
        for e in excluded_id_list:            
            d[e.strip("\n")] = ''
        excluded_id_list = d

        self.display_debug_message('create_id_filename')
        self.r2id = record2id(self.id_filename)
        if self.r2id:
            self.display_debug_message('create_id_filename (' + p_from +'-' + p_until + ')')
            curr_day=''
            for year in range(self.start_year , localtime[0]+1):
                if year == localtime[0] :
                    max_month = localtime[1]+1
                self.display_debug_message( str(year) + ' meses (' + str(min_month) + '-' + str(max_month) + ')' )
                for month in range(min_month, max_month):
                    
                    if year == localtime[0] and month==localtime[1]:
                        max_day = localtime[2]+1
                    self.display_debug_message( str(year) + str(month) + ' days (' + str(min_day) + '-' + str(max_day) + ')' )
                    for day in range(min_day, max_day):
                        next_day = str(year) + '-' + format(month) + '-' + format(day)
                        if curr_day:
                            self.__get_data_and_create_records__(excluded_id_list, curr_day, next_day)
                        curr_day = next_day
                    min_day = 1
                min_month=1
                
            self.r2id.close_files()
        else:
            print('invalid id file')

    
    def __get_data_and_create_records__(self, excluded_id_list, p_from, p_until):
        """
        1) Query BHL items create between the date range p_from and p_until and
        2) Write in id_filename
        """
        self.display_debug_message('Debug: executing download_most_recent_items ' + p_from +',' + p_until)
        execute = True
        resumptionToken = ''

        while execute:
            oai_date_list,item_id_list,resumptionToken = self.bhl_data.get_item_id_list_by_batches(p_from, p_until,resumptionToken)
            if resumptionToken == '':
                execute = False
            self.__create_records__(item_id_list, oai_date_list, excluded_id_list, p_from)
        

    def __create_records__(self, item_id_list, oai_date_list, excluded_id_list, query_date):
        """        
        Write in id_filename
        """
        
        i = 0
        for item_id in item_id_list:
            self.display_debug_message('Debug: item_id: '+item_id)
            item_metadata = self.bhl_data.get_item_metadata(item_id)

            if item_metadata:
                title_id = item_metadata[0].get_primary_title_id()
                test = item_id + '|' + title_id[0]
                self.display_debug_message('Debug: test '+test)
                try:
                    exist = excluded_id_list[test]
                except:
                    exist = 'no'

                if exist=='no':
                    self.display_debug_message('Debug: do it')
                    self.display_debug_message('  get_title_metadata')
                    title_metadata = self.bhl_data.get_title_metadata(title_id[0])
                    title_metadata.set_items(item_metadata)
                    title_metadata.set_title_id(title_id)
                    title_metadata.set_oai_date(oai_date_list[i] + '^d' + query_date)

                    self.display_debug_message('  generate_records')
                    records = self.doc2isis.generate_records(title_metadata)
                    for r in records:
                        self.display_debug_message('  save')
                        self.r2id.save(r)
                        self.display_debug_message('  saved')

            else:
                self.display_debug_message('Debug: ERROR no item_metadata ')
            i+=1
    def create_id_filename_old(self, processed_filename, p_from='', p_until=''):
        """
        create_id_filename
        """

        p_from = p_from[0:10]
        p_until= p_until[0:10]

        localtime = time.localtime(time.time())
        min_month = 3
        min_day = 29
        max_month = 13
        max_day = 32

        f = open(processed_filename)
        excluded_id_list = f.readlines()
        f.close()

        d = {}
        for e in excluded_id_list:
            e = e.strip("\n")
            d[e] = ''

        excluded_id_list = d


        #print(excluded_id_list)
        self.display_debug_message('create_id_filename')

        self.r2id = record2id(self.id_filename)
        if self.r2id:
            if p_from=='' and p_until=='':
                self.display_debug_message('create_id_filename for all')
                curr_date = str(localtime[0])+'-'+ str(localtime[1])+'-'+ str(localtime[2])
                previous=''
                for year in range(2009, localtime[0]+1):
                    if year > 2009:
                        min_month = 1
                        min_day = 1
                    for month in range(min_month, max_month):
                        for day in range(min_day, max_day):
                            curr = str(year) + '-' + format(month) + '-' + format(day)
                            if previous:
                                self.__get_data_and_create_records__(excluded_id_list, previous, curr)
                                if curr_date == previous:
                                    max_month=1
                                    max_day=1
                            previous = curr
                        min_day = 1
                    min_month=1
            else:
                self.display_debug_message('create_id_filename for ' + p_from+ ' ' + p_until)
                self.__get_data_and_create_records__(excluded_id_list, p_from, p_until)
            self.r2id.close_files()
        else:
            print('invalid id file')