Пример #1
0
 def createJSONFile(file_name, leporello, prettify):
     file_path = Lepistant.createFilePath(Lepistant.REL_PATH_DOWNLOADS_FOLDER, file_name, 'json')
     json_file = open(file_path, 'w+')
     json.dump(leporello, json_file, sort_keys=True, ensure_ascii=False)
     
     if (prettify):
         file_path_formatted = Lepistant.createFilePath(Lepistant.REL_PATH_DOWNLOADS_FOLDER, file_name + '_formatted', 'json')
         json_file_formatted = open(file_path_formatted, 'w+')
         json.dump(leporello, json_file_formatted, sort_keys=True, ensure_ascii=False, indent=4)
Пример #2
0
    def getPlays(leporello_info):
        plays = []
        
        # Initializing the Leporello Assistent with meta information for web scraping.
#        Lepistant.setInfo(leporello_info)
        
        leporelloFileNameOnDisk = Lepistant.createFilePath(
                                                Lepistant.REL_PATH_DOWNLOADS_FOLDER, 
                                                Lepistant.FILE_NAME_LEPORELLO, 
                                                'leporello')
        
        soup = Lepistant.getSoup(leporello_info[URL_LEPORELLO], 
                                 leporelloFileNameOnDisk)
        
        playItems = Lepistant.getTagsByClass(soup, 'div', 
                                             leporello_info[CSS_CLASS_PLAY_ITEM])
        leporello_info[PLAY_ITEMS] = playItems
        
        # Get only one play item for testing purposes
#        playItem = playItems[20]    # WirAlleAnders
        playItem = playItems[12]    # AltArmArbeitslos
        play = Play(playItem)
        play.link = Lepistant.getURLFromTagContent(playItem)
        formatted_title = Lepistant.removeNonAlphanumericCharacters(play.title)
        play.file_path_on_disk = Lepistant.REL_PATH_PLAYS_FOLDER + formatted_title + '/'
        play.file_name_on_disk = Lepistant.createFilePath(
                                                play.file_path_on_disk, 
                                                play.title, 
                                                'html')
        soup = Lepistant.getSoup(play.link, play.file_name_on_disk)
        play.setPlayDetails(soup)
        plays.append(play)
        
        
#        for playItem in playItems:
#            logger.info('>>>>>>>>>>>>>>>>>>>>>>>> Fetching new play <<<<<<<<<<<<<<<<<<<<<<<<<')
#            play = Play(playItem)
#            play.link = Lepistant.getURLFromTagContent(playItem)
#            formatted_title = Lepistant.removeNonAlphanumericCharacters(play.title)
#            play.file_path_on_disk = Lepistant.REL_PATH_PLAYS_FOLDER + formatted_title + '/'
#            play.file_name_on_disk = Lepistant.createFilePath(
#                                                play.file_path_on_disk, 
#                                                play.title, 
#                                                'html')
#            soup = Lepistant.getSoup(play.link, play.file_name_on_disk)
#            play.setPlayDetails(soup)
#            
#            logger.info('')
#            
#            plays.append(play)
        
        return plays
Пример #3
0
    def getPlays(leporello_info):
        plays = []

        # Initializing the Leporello Assistent with meta information for web scraping.
        #        Lepistant.setInfo(leporello_info)

        leporelloFileNameOnDisk = Lepistant.createFilePath(
            Lepistant.REL_PATH_DOWNLOADS_FOLDER, Lepistant.FILE_NAME_LEPORELLO,
            'leporello')

        soup = Lepistant.getSoup(leporello_info[URL_LEPORELLO],
                                 leporelloFileNameOnDisk)

        playItems = Lepistant.getTagsByClass(
            soup, 'div', leporello_info[CSS_CLASS_PLAY_ITEM])
        leporello_info[PLAY_ITEMS] = playItems

        # Get only one play item for testing purposes
        #        playItem = playItems[20]    # WirAlleAnders
        playItem = playItems[12]  # AltArmArbeitslos
        play = Play(playItem)
        play.link = Lepistant.getURLFromTagContent(playItem)
        formatted_title = Lepistant.removeNonAlphanumericCharacters(play.title)
        play.file_path_on_disk = Lepistant.REL_PATH_PLAYS_FOLDER + formatted_title + '/'
        play.file_name_on_disk = Lepistant.createFilePath(
            play.file_path_on_disk, play.title, 'html')
        soup = Lepistant.getSoup(play.link, play.file_name_on_disk)
        play.setPlayDetails(soup)
        plays.append(play)

        #        for playItem in playItems:
        #            logger.info('>>>>>>>>>>>>>>>>>>>>>>>> Fetching new play <<<<<<<<<<<<<<<<<<<<<<<<<')
        #            play = Play(playItem)
        #            play.link = Lepistant.getURLFromTagContent(playItem)
        #            formatted_title = Lepistant.removeNonAlphanumericCharacters(play.title)
        #            play.file_path_on_disk = Lepistant.REL_PATH_PLAYS_FOLDER + formatted_title + '/'
        #            play.file_name_on_disk = Lepistant.createFilePath(
        #                                                play.file_path_on_disk,
        #                                                play.title,
        #                                                'html')
        #            soup = Lepistant.getSoup(play.link, play.file_name_on_disk)
        #            play.setPlayDetails(soup)
        #
        #            logger.info('')
        #
        #            plays.append(play)

        return plays
Пример #4
0
    def createJSONFile(file_name, leporello, prettify):
        file_path = Lepistant.createFilePath(
            Lepistant.REL_PATH_DOWNLOADS_FOLDER, file_name, 'json')
        json_file = open(file_path, 'w+')
        json.dump(leporello, json_file, sort_keys=True, ensure_ascii=False)

        if (prettify):
            file_path_formatted = Lepistant.createFilePath(
                Lepistant.REL_PATH_DOWNLOADS_FOLDER, file_name + '_formatted',
                'json')
            json_file_formatted = open(file_path_formatted, 'w+')
            json.dump(leporello,
                      json_file_formatted,
                      sort_keys=True,
                      ensure_ascii=False,
                      indent=4)
Пример #5
0
    def _setDetailsForPerformances(self):
        performance_tuples = self._getFurtherPerformances()
        
        if performance_tuples:
            for a_tuple in performance_tuples:
                date = a_tuple[0]
                
#                iso_8601_date = Lepistant.formatDateToISO8601(date)
                
                # Only use year, month and day to look up dates for performances.
                lookup_date = date.split('T')[0] + 'T00:00'
                lookup_time = date.split('T')[1]
                
                if lookup_time != '00:00':
                    self._setDefaultTime(lookup_time)
                
                url = a_tuple[1]
                file_path = Lepistant.createFilePath(self.file_path_on_disk, date, 'performance')
                soup = Lepistant.getSoup(url, file_path)
                
                if lookup_date in self.performances:
                    # Updating the date with a more precise date including Weekday and Time.
                    performance = self.performances[lookup_date]
                    # TODO: Updating of the date should be done in a single function.
                    performance.date = str()
                    performance['date'] = date
                    
                    performance.setDetails(soup, self.title)
Пример #6
0
    def _setData(self, data_list):
        role = None
        full_name = ''
        url = Lepistant.NOT_AVAILABLE

        for element in data_list:
            if 'class="eventDetailPerson"' in str(element):
                full_name = element.string.lstrip().rstrip()
            elif 'class="eventDetailPersonRole"' in str(element):
                try:
                    role = element.string.split(':')[0].lstrip().rstrip()
                except:
                    logger.info(
                        'Setting role to "%s" since no role could be find in data_list: %s',
                        role, data_list)
            elif 'class="eventDetailPersonLink"' in str(element):
                full_name = element.string.lstrip().rstrip()
                url = Lepistant.URL_PREFIX + re.search('href=\"(.+?)\"',
                                                       str(element)).group(1)

        # Check if artist already exists.
        if full_name in leporello.artists:
            artist = leporello.artists[full_name]
            self.full_name = self._setKey('full_name', artist.full_name)
            self.first_name = self._setKey('first_name', artist.first_name)
            self.middle_name = self._setKey('middle_name', artist.middle_name)
            self.last_name = self._setKey('last_name', artist.last_name)
            self.producer_roles = self._setKey('producer_roles',
                                               artist.producer_roles)
            self.artist_roles = self._setKey('artist_roles',
                                             artist.artist_roles)
            self.photo = self._setKey('photo', artist.photo)
            self.biography = self._setKey('biography', artist.biography)
            self.appearances = self._setKey('appearances', artist.appearances)
        else:
            self._setName(full_name)
            if url:
                file_path = Lepistant.createFilePath(
                    Lepistant.REL_PATH_ARTISTS_FOLDER, full_name, 'html')
                soup = Lepistant.getSoup(url, file_path)
                self._setDetails(soup)

        if role:
            self._addRole(role)

        # Add artist to the leporello artists dictionary so we can check later if the artist already exists.
        # If the artist exists we only update his data.
        leporello.artists[self.full_name] = self

        return data_list
Пример #7
0
    def getSpecificTheaterPlays(leporello_info, file_name, play_type, url):
        theater_plays = []
        
        leporello_file_name_on_disk = Lepistant.createFilePath(
                                               Lepistant.REL_PATH_DOWNLOADS_FOLDER, 
                                               file_name, 
                                               'html')
        
        soup = Lepistant.getSoup(url, leporello_file_name_on_disk)
        
        theater_play_items = Lepistant.getTagsByClass(soup, 'div', leporello_info[CSS_CLASS_PLAY_ITEM])
        
#        for play_item in theater_play_items:
#            title = getTitleFromPlayItem(play_item)
#            theater_plays.append(title)
            
        for play_item in theater_play_items:
            logger.info('>>>>>>>>>>>>>>>>>>>>>>>> Fetching new play <<<<<<<<<<<<<<<<<<<<<<<<<')
            play = Play(play_item)
            play.setType(play_type)
            play.link = Lepistant.getURLFromTagContent(play_item)
            formatted_title = Lepistant.removeNonAlphanumericCharacters(play.title)
            play.file_path_on_disk = Lepistant.REL_PATH_PLAYS_FOLDER + formatted_title + '/'
            play.file_name_on_disk = Lepistant.createFilePath(
                                                play.file_path_on_disk, 
                                                play.title, 
                                                'html')
            soup = Lepistant.getSoup(play.link, play.file_name_on_disk)
            play.setPlayDetails(soup)
            
            setDefaultTimeForPerformancesInPlay(play)
            
            logger.info('')
            
            theater_plays.append(play)
            
        return theater_plays
Пример #8
0
    def getSpecificTheaterPlays(leporello_info, file_name, play_type, url):
        theater_plays = []

        leporello_file_name_on_disk = Lepistant.createFilePath(
            Lepistant.REL_PATH_DOWNLOADS_FOLDER, file_name, 'html')

        soup = Lepistant.getSoup(url, leporello_file_name_on_disk)

        theater_play_items = Lepistant.getTagsByClass(
            soup, 'div', leporello_info[CSS_CLASS_PLAY_ITEM])

        #        for play_item in theater_play_items:
        #            title = getTitleFromPlayItem(play_item)
        #            theater_plays.append(title)

        for play_item in theater_play_items:
            logger.info(
                '>>>>>>>>>>>>>>>>>>>>>>>> Fetching new play <<<<<<<<<<<<<<<<<<<<<<<<<'
            )
            play = Play(play_item)
            play.setType(play_type)
            play.link = Lepistant.getURLFromTagContent(play_item)
            formatted_title = Lepistant.removeNonAlphanumericCharacters(
                play.title)
            play.file_path_on_disk = Lepistant.REL_PATH_PLAYS_FOLDER + formatted_title + '/'
            play.file_name_on_disk = Lepistant.createFilePath(
                play.file_path_on_disk, play.title, 'html')
            soup = Lepistant.getSoup(play.link, play.file_name_on_disk)
            play.setPlayDetails(soup)

            setDefaultTimeForPerformancesInPlay(play)

            logger.info('')

            theater_plays.append(play)

        return theater_plays
Пример #9
0
 def _setData(self, data_list):
     role = None
     full_name = ''
     url = Lepistant.NOT_AVAILABLE
     
     for element in data_list:
         if 'class="eventDetailPerson"' in str(element):
             full_name = element.string.lstrip().rstrip()
         elif 'class="eventDetailPersonRole"' in str(element):
             try:
                 role = element.string.split(':')[0].lstrip().rstrip()
             except:
                 logger.info('Setting role to "%s" since no role could be find in data_list: %s', role, data_list)
         elif 'class="eventDetailPersonLink"' in str(element):
             full_name = element.string.lstrip().rstrip()
             url = Lepistant.URL_PREFIX + re.search('href=\"(.+?)\"', str(element)).group(1)
     
     # Check if artist already exists.
     if full_name in leporello.artists:
         artist = leporello.artists[full_name]
         self.full_name = self._setKey('full_name', artist.full_name)
         self.first_name = self._setKey('first_name', artist.first_name)
         self.middle_name = self._setKey('middle_name', artist.middle_name)
         self.last_name = self._setKey('last_name', artist.last_name)
         self.producer_roles = self._setKey('producer_roles', artist.producer_roles)
         self.artist_roles = self._setKey('artist_roles', artist.artist_roles)
         self.photo = self._setKey('photo', artist.photo)
         self.biography = self._setKey('biography', artist.biography)
         self.appearances = self._setKey('appearances', artist.appearances)
     else:
         self._setName(full_name)
         if url:
             file_path = Lepistant.createFilePath(Lepistant.REL_PATH_ARTISTS_FOLDER, full_name, 'html')
             soup = Lepistant.getSoup(url, file_path)
             self._setDetails(soup)
         
     if role:
         self._addRole(role)
         
     # Add artist to the leporello artists dictionary so we can check later if the artist already exists.
     # If the artist exists we only update his data.
     leporello.artists[self.full_name] = self
     
     return data_list