def timetable_for_film(film): timetable_list = [] times = driver.find_elements_by_xpath("//*[@id='%s']/tbody/tr/td[2]/div" % film) # ПОЛУЧЕНИЕ ВРЕМЕНИ ФИЛЬМА for time in times: timetable_list.append(time.get_attribute("innerHTML")) cinemas_list = [] cinemas = driver.find_elements_by_xpath( "//*[@id='%s']/tbody/tr/td[1]/a" % film) # ПОЛУЧЕНИЕ КИНОТЕАТРА ФИЛЬМА for cinema in cinemas: cinemas_list.append(cinema.text) return tuple(zip(timetable_list, cinemas_list))
def post_commenter(user_id): directory = USER_DIR + user_id + "/" postLinkFile = directory + 'PostLink.csv' driver = webdriver.Firefox() driver.implicitly_wait(10) myFile = open(directory + 'Commenters.csv', 'wb') textWriter = csv.writer(myFile) textWriter.writerow( ["index", "url", "time", "like", "interaction", "commenter"]) with open(postLinkFile, 'rb') as csvfile: spamreader = csv.reader(csvfile) next(spamreader, None) # skip the headers for row in spamreader: driver.get(row[1]) try: time = driver.find_element_by_class_name('_379kp') like = driver.find_element_by_class_name('_tf9x3') likeNum = like.find_element_by_css_selector( 'span').get_attribute('innerHTML') listOfCommenters = "" try: interaction = 0 comments = driver.find_elements_by_css_selector( '._4zhc5.notranslate._iqaka') interaction = len(comments) commenters = [] commenters_counts = {} for comment in comments: commenters.append(comment.get_attribute('title')) commenters_counts = Counter(commenters) for item in commenters_counts.items(): listOfCommenters = listOfCommenters + item[ 0] + ":" + str(item[1]) + "," except NoSuchElementException: listOfCommenters = "0" except NoSuchElementException: likeNum = 0 finally: time = time.get_attribute('datetime') textWriter.writerow([ row[0], row[1], time, likeNum, interaction, listOfCommenters ]) myFile.flush()
def get_date_time(self): month = self.driver.find_element_by_xpath( '//*[@id="game__top__inner"]/div[1]/p[2]/span') week = self.driver.find_element_by_xpath( '//*[@id="game__top__inner"]/div[1]/p[3]/span[1]') time = self.driver.find_element_by_xpath( '//*[@id="game__top__inner"]/div[1]/p[4]') cell_list = self.ws.range('A3:C3') m_w_t = [ month.get_attribute("textContent"), week.get_attribute("textContent"), time.get_attribute("textContent") ] for i, cell in enumerate(cell_list): val = m_w_t[i] cell.value = val self.ws.update_cells(cell_list, value_input_option='USER_ENTERED')
def get_latest_tweet(): driver.get(TWITTER_URL) tweets_xpath = '//*[@data-testid="tweet"]' elements = None try: elements = WebDriverWait(driver, 15).until( EC.visibility_of_all_elements_located((By.XPATH, tweets_xpath))) except TimeoutException as e: # tweets are not visible in some rare cases, return empty string instead return None finally: # getting latest tweet latest_tweet = None latest_tweet_timestamp = None if elements is not None: for tweet in elements: time = tweet.find_element_by_xpath(".//time") timestamp = time.get_attribute("datetime") date = datetime.datetime.strptime(timestamp, '%Y-%m-%dT%H:%M:%S.000Z') if latest_tweet_timestamp is None or date > latest_tweet_timestamp: latest_tweet_timestamp = date latest_tweet = tweet latest_tweet_text_formatted = "".join( latest_tweet.text.split("\n")[4:-3]) return { "text": latest_tweet_text_formatted, "date": latest_tweet_timestamp } return None
def traffic(config): options = se.webdriver.ChromeOptions() options.add_argument('headless') driver = se.webdriver.Chrome(chrome_options=options) home = config['home'] destination = config['destination'] url = 'https://www.google.com/maps/dir/' + home + ',' + destination + '/@37.3806044,-122.0178939,12z/data=!3m1!4b1!4m14!4m13!1m5!1m1!1s0x808fcc3e5f95acc3:0x47daf6fd38218ca4!2m2!1d-121.8762513!2d37.3853429!1m5!1m1!1s0x808fb64142d26eaf:0x8192e11989e6e62f!2m2!1d-122.020508!2d37.380601!3e0' #url = 'https://www.google.com/maps/dir/'+home+','+destination+'/@37.3806044,-122.0178939,12z/data=!3m1!4b1!4m14!4m13!1m5!1m1!1s0x808fcc3e5f95acc3:0x47daf6fd38218ca4!2m2!1d-121.8762513!2d37.3853429!1m5!1m1!1s0x808fb64142d26eaf:0x8192e11989e6e62f!2m2!1d-122.020508!2d37.380601!3e0' urlTest = 'https://www.google.com/maps/dir/' + home + ',' + destination + '/@37.3806044,-122.0178939,12z/data=!3m1!4b1!4m18!4m17!1m5!1m1!1s0x808fcc3e5f95acc3:0x47daf6fd38218ca4!2m2!1d-121.8762513!2d37.3853429!1m5!1m1!1s0x808fb64142d26eaf:0x8192e11989e6e62f!2m2!1d-122.020508!2d37.380601!2m3!6e0!7e2!8j1545897900!3e0' driver.get(url) time = driver.find_element( By.XPATH, '//div[@class="section-directions-trip-numbers"]/div') distance = driver.find_element( By.XPATH, '//div[@class="section-directions-trip-numbers"]/div[@class="section-directions-trip-distance section-directions-trip-secondary-text"]' ) crowd = time.get_attribute("class").split("delay-")[1] strOutput = "Traffic: " + crowd + '\n' + "Time: " + time.text + '\n' + distance.text + '\n' if crowd == 'heavy': strOutput = strOutput + "太堵啦,要不等会儿再出发" print(strOutput) config['cache']['traffic'] = strOutput with open("config.json", "w") as jsonFile: json.dump(config, jsonFile, indent=4)
def getScheduledMatches(code, year, url): driver.get(url) loadAll() print(code) tbody = driver.find_element_by_css_selector('.sportName') matches = tbody.find_elements_by_xpath('*') roundmatches = {} round = "" id = 12 formermonth = 1 year += 2 for match in matches: if hasClass(match, "event__round"): round = match.get_attribute('innerText') print(round) if len(match.find_elements_by_css_selector( '.event__participant--home')) != 0: roundmatches[id] = [] home = match.find_element_by_css_selector( '.event__participant--home') away = match.find_element_by_css_selector( '.event__participant--away') score = match.find_element_by_css_selector('.event__scores') r = score.find_elements_by_css_selector('span') time = match.find_element_by_css_selector('.event__time') time.__setattr__('year', year) print(year) time = datetime.strptime( time.get_attribute('innerHTML') + ' ' + str(year), '%d.%m. %H:%M %Y') if time.month > formermonth: year -= 1 formermonth = time.month time = datetime(year, time.month, time.day, time.hour, time.minute) homescore = r[0].get_attribute('innerText') awayscore = r[1].get_attribute('innerText') hometeamname = re.findall("\w+", home.get_attribute('innerText')) awayteamname = re.findall("\w+", away.get_attribute('innerText')) hometeamname = (' '.join(hometeamname)) awayteamname = (' '.join(awayteamname)) # print (home.find_element_by_xpath('span').get_attribute('innerText')+"-"+ away.find_element_by_xpath('span').get_attribute('innerText') + " "+homescore + ":" + awayscore) roundmatches[id] = (match1(hometeamname, awayteamname, time, homescore, awayscore, round)) id += 1 # print(roundmatches) formermonth = time.month roundmatches = sorted(roundmatches.items(), key=operator.itemgetter(0), reverse=True) B = [] C = [] F = [] G = [] H = [] I = [] D = [] E = [] for i in roundmatches: F.append(getattr(i[1], 'hometeam')) G.append(getattr(i[1], 'awayteam')) H.append(int(getattr(i[1], 'homescore'))) I.append(int(getattr(i[1], 'awayscore'))) timematch = getattr(i[1], 'date') E.append(datetime.time(timematch)) D.append(datetime.date(timematch)) B.append("") C.append(timematch.year) print(F) print(G) print(H) print(I) df = pd.DataFrame({ 'B': B, 'C': C, 'D': D, 'E': E, 'F': F, 'G': G, 'H': H, 'I': I }) writer = pd.ExcelWriter('C:\\Users\\danida\\Desktop\\excels2\\' + code + '.xls', engine='xlwt') df.to_excel(writer, sheet_name='Sheet1') # Close the Pandas Excel writer and output the Excel file. writer.save() print(roundmatches)
#for reply in replies: # repliess.append(reply.text) repliess_1 = [reply.text for reply in replies_1] #for retweet in retweets: # retweetss.append(retweet.text) retweetss_1 = [retweet.text for retweet in retweets_1] #for like in likes: # likess.append(like.text) likess_1 = [like.text for like in likes_1] #for time in times: # timess.append(time.get_attribute("title")) timess_1 = [time.get_attribute("title") for time in times_1] #for perma in permas: # permass.append(perma.text) #for hash_tag in hash_tags: # print (hash_tag.text) #<strong class="fullname show-popup-with-id u-textTruncate " data-aria-label-part="">TekNotice</strong> #<span class="username u-dir u-textTruncate" dir="ltr" data-aria-label-part="">@<b>TekNotice</b></span> #<a href="/TekNotice/status/952962598311313408" class="tweet-timestamp js-permalink js-nav js-tooltip" title="6:56 PM - 15 Jan 2018" data-conversation-id="952962598311313408"><span class="_timestamp js-short-timestamp js-relative-timestamp" data-time="1516038963" data-time-ms="1516038963000" data-long-form="true" aria-hidden="true">2h</span><span class="u-hiddenVisually" data-aria-label-part="last">2 hours ago</span></a> #<a href="/hashtag/samsungs8?src=hash" data-query-source="hashtag_click" class="twitter-hashtag pretty-link js-nav" dir="ltr"><s>#</s><b><strong>samsungs8</strong></b></a> #<span class="ProfileTweet-actionCountForAria" id="profile-tweet-action-reply-count-aria-952962598311313408">0 replies</span> #for tweet in tweets: # print(tweet.text)