def createJSONFile(file_name, leporello, prettify): file_path = Lepistant.createFilePath(Lepistant.REL_PATH_DOWNLOADS_FOLDER, file_name, 'json') json_file = open(file_path, 'w+') json.dump(leporello, json_file, sort_keys=True, ensure_ascii=False) if (prettify): file_path_formatted = Lepistant.createFilePath(Lepistant.REL_PATH_DOWNLOADS_FOLDER, file_name + '_formatted', 'json') json_file_formatted = open(file_path_formatted, 'w+') json.dump(leporello, json_file_formatted, sort_keys=True, ensure_ascii=False, indent=4)
def getPlays(leporello_info): plays = [] # Initializing the Leporello Assistent with meta information for web scraping. # Lepistant.setInfo(leporello_info) leporelloFileNameOnDisk = Lepistant.createFilePath( Lepistant.REL_PATH_DOWNLOADS_FOLDER, Lepistant.FILE_NAME_LEPORELLO, 'leporello') soup = Lepistant.getSoup(leporello_info[URL_LEPORELLO], leporelloFileNameOnDisk) playItems = Lepistant.getTagsByClass(soup, 'div', leporello_info[CSS_CLASS_PLAY_ITEM]) leporello_info[PLAY_ITEMS] = playItems # Get only one play item for testing purposes # playItem = playItems[20] # WirAlleAnders playItem = playItems[12] # AltArmArbeitslos play = Play(playItem) play.link = Lepistant.getURLFromTagContent(playItem) formatted_title = Lepistant.removeNonAlphanumericCharacters(play.title) play.file_path_on_disk = Lepistant.REL_PATH_PLAYS_FOLDER + formatted_title + '/' play.file_name_on_disk = Lepistant.createFilePath( play.file_path_on_disk, play.title, 'html') soup = Lepistant.getSoup(play.link, play.file_name_on_disk) play.setPlayDetails(soup) plays.append(play) # for playItem in playItems: # logger.info('>>>>>>>>>>>>>>>>>>>>>>>> Fetching new play <<<<<<<<<<<<<<<<<<<<<<<<<') # play = Play(playItem) # play.link = Lepistant.getURLFromTagContent(playItem) # formatted_title = Lepistant.removeNonAlphanumericCharacters(play.title) # play.file_path_on_disk = Lepistant.REL_PATH_PLAYS_FOLDER + formatted_title + '/' # play.file_name_on_disk = Lepistant.createFilePath( # play.file_path_on_disk, # play.title, # 'html') # soup = Lepistant.getSoup(play.link, play.file_name_on_disk) # play.setPlayDetails(soup) # # logger.info('') # # plays.append(play) return plays
def getPlays(leporello_info): plays = [] # Initializing the Leporello Assistent with meta information for web scraping. # Lepistant.setInfo(leporello_info) leporelloFileNameOnDisk = Lepistant.createFilePath( Lepistant.REL_PATH_DOWNLOADS_FOLDER, Lepistant.FILE_NAME_LEPORELLO, 'leporello') soup = Lepistant.getSoup(leporello_info[URL_LEPORELLO], leporelloFileNameOnDisk) playItems = Lepistant.getTagsByClass( soup, 'div', leporello_info[CSS_CLASS_PLAY_ITEM]) leporello_info[PLAY_ITEMS] = playItems # Get only one play item for testing purposes # playItem = playItems[20] # WirAlleAnders playItem = playItems[12] # AltArmArbeitslos play = Play(playItem) play.link = Lepistant.getURLFromTagContent(playItem) formatted_title = Lepistant.removeNonAlphanumericCharacters(play.title) play.file_path_on_disk = Lepistant.REL_PATH_PLAYS_FOLDER + formatted_title + '/' play.file_name_on_disk = Lepistant.createFilePath( play.file_path_on_disk, play.title, 'html') soup = Lepistant.getSoup(play.link, play.file_name_on_disk) play.setPlayDetails(soup) plays.append(play) # for playItem in playItems: # logger.info('>>>>>>>>>>>>>>>>>>>>>>>> Fetching new play <<<<<<<<<<<<<<<<<<<<<<<<<') # play = Play(playItem) # play.link = Lepistant.getURLFromTagContent(playItem) # formatted_title = Lepistant.removeNonAlphanumericCharacters(play.title) # play.file_path_on_disk = Lepistant.REL_PATH_PLAYS_FOLDER + formatted_title + '/' # play.file_name_on_disk = Lepistant.createFilePath( # play.file_path_on_disk, # play.title, # 'html') # soup = Lepistant.getSoup(play.link, play.file_name_on_disk) # play.setPlayDetails(soup) # # logger.info('') # # plays.append(play) return plays
def createJSONFile(file_name, leporello, prettify): file_path = Lepistant.createFilePath( Lepistant.REL_PATH_DOWNLOADS_FOLDER, file_name, 'json') json_file = open(file_path, 'w+') json.dump(leporello, json_file, sort_keys=True, ensure_ascii=False) if (prettify): file_path_formatted = Lepistant.createFilePath( Lepistant.REL_PATH_DOWNLOADS_FOLDER, file_name + '_formatted', 'json') json_file_formatted = open(file_path_formatted, 'w+') json.dump(leporello, json_file_formatted, sort_keys=True, ensure_ascii=False, indent=4)
def _setDetailsForPerformances(self): performance_tuples = self._getFurtherPerformances() if performance_tuples: for a_tuple in performance_tuples: date = a_tuple[0] # iso_8601_date = Lepistant.formatDateToISO8601(date) # Only use year, month and day to look up dates for performances. lookup_date = date.split('T')[0] + 'T00:00' lookup_time = date.split('T')[1] if lookup_time != '00:00': self._setDefaultTime(lookup_time) url = a_tuple[1] file_path = Lepistant.createFilePath(self.file_path_on_disk, date, 'performance') soup = Lepistant.getSoup(url, file_path) if lookup_date in self.performances: # Updating the date with a more precise date including Weekday and Time. performance = self.performances[lookup_date] # TODO: Updating of the date should be done in a single function. performance.date = str() performance['date'] = date performance.setDetails(soup, self.title)
def _setData(self, data_list): role = None full_name = '' url = Lepistant.NOT_AVAILABLE for element in data_list: if 'class="eventDetailPerson"' in str(element): full_name = element.string.lstrip().rstrip() elif 'class="eventDetailPersonRole"' in str(element): try: role = element.string.split(':')[0].lstrip().rstrip() except: logger.info( 'Setting role to "%s" since no role could be find in data_list: %s', role, data_list) elif 'class="eventDetailPersonLink"' in str(element): full_name = element.string.lstrip().rstrip() url = Lepistant.URL_PREFIX + re.search('href=\"(.+?)\"', str(element)).group(1) # Check if artist already exists. if full_name in leporello.artists: artist = leporello.artists[full_name] self.full_name = self._setKey('full_name', artist.full_name) self.first_name = self._setKey('first_name', artist.first_name) self.middle_name = self._setKey('middle_name', artist.middle_name) self.last_name = self._setKey('last_name', artist.last_name) self.producer_roles = self._setKey('producer_roles', artist.producer_roles) self.artist_roles = self._setKey('artist_roles', artist.artist_roles) self.photo = self._setKey('photo', artist.photo) self.biography = self._setKey('biography', artist.biography) self.appearances = self._setKey('appearances', artist.appearances) else: self._setName(full_name) if url: file_path = Lepistant.createFilePath( Lepistant.REL_PATH_ARTISTS_FOLDER, full_name, 'html') soup = Lepistant.getSoup(url, file_path) self._setDetails(soup) if role: self._addRole(role) # Add artist to the leporello artists dictionary so we can check later if the artist already exists. # If the artist exists we only update his data. leporello.artists[self.full_name] = self return data_list
def getSpecificTheaterPlays(leporello_info, file_name, play_type, url): theater_plays = [] leporello_file_name_on_disk = Lepistant.createFilePath( Lepistant.REL_PATH_DOWNLOADS_FOLDER, file_name, 'html') soup = Lepistant.getSoup(url, leporello_file_name_on_disk) theater_play_items = Lepistant.getTagsByClass(soup, 'div', leporello_info[CSS_CLASS_PLAY_ITEM]) # for play_item in theater_play_items: # title = getTitleFromPlayItem(play_item) # theater_plays.append(title) for play_item in theater_play_items: logger.info('>>>>>>>>>>>>>>>>>>>>>>>> Fetching new play <<<<<<<<<<<<<<<<<<<<<<<<<') play = Play(play_item) play.setType(play_type) play.link = Lepistant.getURLFromTagContent(play_item) formatted_title = Lepistant.removeNonAlphanumericCharacters(play.title) play.file_path_on_disk = Lepistant.REL_PATH_PLAYS_FOLDER + formatted_title + '/' play.file_name_on_disk = Lepistant.createFilePath( play.file_path_on_disk, play.title, 'html') soup = Lepistant.getSoup(play.link, play.file_name_on_disk) play.setPlayDetails(soup) setDefaultTimeForPerformancesInPlay(play) logger.info('') theater_plays.append(play) return theater_plays
def getSpecificTheaterPlays(leporello_info, file_name, play_type, url): theater_plays = [] leporello_file_name_on_disk = Lepistant.createFilePath( Lepistant.REL_PATH_DOWNLOADS_FOLDER, file_name, 'html') soup = Lepistant.getSoup(url, leporello_file_name_on_disk) theater_play_items = Lepistant.getTagsByClass( soup, 'div', leporello_info[CSS_CLASS_PLAY_ITEM]) # for play_item in theater_play_items: # title = getTitleFromPlayItem(play_item) # theater_plays.append(title) for play_item in theater_play_items: logger.info( '>>>>>>>>>>>>>>>>>>>>>>>> Fetching new play <<<<<<<<<<<<<<<<<<<<<<<<<' ) play = Play(play_item) play.setType(play_type) play.link = Lepistant.getURLFromTagContent(play_item) formatted_title = Lepistant.removeNonAlphanumericCharacters( play.title) play.file_path_on_disk = Lepistant.REL_PATH_PLAYS_FOLDER + formatted_title + '/' play.file_name_on_disk = Lepistant.createFilePath( play.file_path_on_disk, play.title, 'html') soup = Lepistant.getSoup(play.link, play.file_name_on_disk) play.setPlayDetails(soup) setDefaultTimeForPerformancesInPlay(play) logger.info('') theater_plays.append(play) return theater_plays
def _setData(self, data_list): role = None full_name = '' url = Lepistant.NOT_AVAILABLE for element in data_list: if 'class="eventDetailPerson"' in str(element): full_name = element.string.lstrip().rstrip() elif 'class="eventDetailPersonRole"' in str(element): try: role = element.string.split(':')[0].lstrip().rstrip() except: logger.info('Setting role to "%s" since no role could be find in data_list: %s', role, data_list) elif 'class="eventDetailPersonLink"' in str(element): full_name = element.string.lstrip().rstrip() url = Lepistant.URL_PREFIX + re.search('href=\"(.+?)\"', str(element)).group(1) # Check if artist already exists. if full_name in leporello.artists: artist = leporello.artists[full_name] self.full_name = self._setKey('full_name', artist.full_name) self.first_name = self._setKey('first_name', artist.first_name) self.middle_name = self._setKey('middle_name', artist.middle_name) self.last_name = self._setKey('last_name', artist.last_name) self.producer_roles = self._setKey('producer_roles', artist.producer_roles) self.artist_roles = self._setKey('artist_roles', artist.artist_roles) self.photo = self._setKey('photo', artist.photo) self.biography = self._setKey('biography', artist.biography) self.appearances = self._setKey('appearances', artist.appearances) else: self._setName(full_name) if url: file_path = Lepistant.createFilePath(Lepistant.REL_PATH_ARTISTS_FOLDER, full_name, 'html') soup = Lepistant.getSoup(url, file_path) self._setDetails(soup) if role: self._addRole(role) # Add artist to the leporello artists dictionary so we can check later if the artist already exists. # If the artist exists we only update his data. leporello.artists[self.full_name] = self return data_list