Exemplo n.º 1
0
    def _process_record(self, record, type):
        '''
        Process a specific record
        '''
        # Compose a name and an ID for the record
        entity_name = re.sub(r'<a [^>]*>([^<]*)</a>', '\g<1>',
                             record[type]).replace('\n', '')
        entity_url = re.search('href="([^"]*)"', record[type]).group(1)
        entity_id = entity_url.replace('/', '')
        if type == 'Topic':
            entity_id = entity_id.replace('t', 'topic_')
            entity = Topic(entity_name, entity_id)
        if type == 'Event':
            entity_id = entity_id.replace('e', 'event_')
            entity = Event(entity_name, entity_id)
        rdf_file = '%s.rdf' % entity_id
        named_graph_resource = entity.named_graph()

        # Open the file in the container and get last modification date
        last_modification = None
        if os.path.isfile('data/' +
                          rdf_file):  # in self.zip_container.namelist():
            #data = self.zip_container.read(rdf_file)
            g = Graph()
            g.parse('data/' + rdf_file, format='xml')
            for _, _, date in g.triples(
                (named_graph_resource, DCT['modified'], None)):
                last_modification = isodate.parse_datetime(date)

        # If not existent or out dated, generate
        generate = False
        if last_modification == None:
            # If not existent, generate
            print '\t[GEN] %s - %s' % (entity_id, entity_name)
            generate = True
        else:
            delta = datetime.strptime(record['Date'],
                                      "%d %b %Y") - last_modification
            if delta.days > 0:
                # If updated, update
                print '\t[UPD] %s - %s' % (entity_id, entity_name)
                generate = True

        if not generate:
            print '\t[OK] %s - %s' % (entity_id, entity_name)
            return

        # Process a topic
        if type == 'Topic':
            print record
            pass

        # Process an event
        if type == 'Event':
            entity.parse(record, entity_url)
            # Save the CFP
            f = open('data/' + entity_id + '_cfp.txt', 'w')
            f.write(entity.get_cfp_data())
            f.close()

        # Save the RDF data
        f = open('data/' + rdf_file, 'w')
        f.write(entity.get_rdf_data())
        f.close()
Exemplo n.º 2
0
    def _process_record(self, record, type):
        '''
        Process a specific record
        '''
        # Compose a name and an ID for the record
        entity_name = re.sub(r'<a [^>]*>([^<]*)</a>', '\g<1>', record[type]).replace('\n', '')
        entity_url = re.search('href="([^"]*)"', record[type]).group(1) 
        entity_id = entity_url.replace('/', '')
        if type == 'Topic':
            entity_id = entity_id.replace('t', 'topic_')
            entity = Topic(entity_name, entity_id)
        if type == 'Event':
            entity_id = entity_id.replace('e', 'event_')
            entity = Event(entity_name, entity_id)
        rdf_file = '%s.rdf' % entity_id
        named_graph_resource = entity.named_graph()
        
        # Open the file in the container and get last modification date
        last_modification = None
        if os.path.isfile('data/' + rdf_file):# in self.zip_container.namelist():
            #data = self.zip_container.read(rdf_file)
            g = Graph()
            g.parse('data/' + rdf_file, format='xml')
            for _, _, date in g.triples((named_graph_resource, DCT['modified'], None)):
                last_modification = isodate.parse_datetime(date)
                
        # If not existent or out dated, generate
        generate = False
        if last_modification == None:
            # If not existent, generate
            print '\t[GEN] %s - %s' % (entity_id, entity_name)
            generate = True
        else:
            delta = datetime.strptime(record['Date'], "%d %b %Y") - last_modification
            if delta.days > 0:
                # If updated, update
                print '\t[UPD] %s - %s' % (entity_id, entity_name)
                generate = True
                        
        if not generate:
            print '\t[OK] %s - %s' % (entity_id, entity_name)
            return
        
        # Process a topic
        if type == 'Topic':
            print record
            pass
        
        # Process an event
        if type == 'Event':
            entity.parse(record, entity_url)
            # Save the CFP
            f = open('data/' + entity_id + '_cfp.txt', 'w')
            f.write(entity.get_cfp_data())
            f.close()

        # Save the RDF data
        f = open('data/' + rdf_file, 'w')
        f.write(entity.get_rdf_data())
        f.close()
        
        # TODO process the list of cited topics, events and organizations
        
        #self.zip_container.writestr(rdf_file, entity.get_rdf_data())