def get_links():
    url = "https://in.mashable.com/"
    res = requests.get(url)

    data = res.text

    soup = BeautifulSoup(data, 'html.parser')

    links = set()
    for link in soup.findAll(
            'a',
            attrs={
                'href':
                re.compile(
                    "^https://in.mashable.com/tech|https://in.mashable.com/e")
            }):
        links.add(link.get('href'))

    print(highlight_green("\nRetrieved {} links.".format(len(links))))

    print(highlight_green("\nArticles fetched:\n\n"))

    i = 1
    for key in links:
        print('{}.'.format(i), end='    ')
        print(key)
        i += 1
    # print('{}.'.format(key+1), end = '    ')
    # print(value)
    return list(links)
    def events(self):
        '''This function enables the actions on obstacles and or stoplight
        '''
        if [15] in self.ids and not self.obstacle_1:
            print magenta("Watch out! Unknown obstacle ahead!")
            self.obstacle_1 = True
        elif self.obstacle_1 and [15] not in self.ids:
            self.obstacle_1 = False
        if [18] in self.ids:
            print magenta(
                "Watch out! Dynamic obstacle ahead! Please land and wait for 10 seconds"
            )
            rospy.sleep(10)
            print magenta("You are allowed to start again")

        while [14] in self.ids and [13] not in self.ids:
            print highlight_red('The light is red! You can not pass.')
            rospy.sleep(2)
        while [13] in self.ids:
            print highlight_green('The light is green! you can proceed.')
            rospy.sleep(2)
    def identify_leftright(self):
        '''
        Let user identify which is right and which is left controller by
        pulling triggers. Controller hand is displayed in terminal.
        '''
        while not (rospy.is_shutdown() or self.vive_loc_ready):
            rospy.sleep(0.1)

        if not rospy.is_shutdown():
            print highlight_green(' Pull each trigger ')

        identify_right = True
        identify_left = True
        while (identify_right or identify_left) and not rospy.is_shutdown():

            (result, pControllerState) = (self.vrsystem.getControllerState(
                self.left_id))
            d = self.from_controller_state_to_dict(pControllerState)
            # print '\n left controller:', self.last_unPacketNum_left, d['unPacketNum']
            # print highlight_green('trigger value: ', d['trigger'])
            if (self.last_unPacketNum_left != d['unPacketNum']):
                self.last_unPacketNum_left = d['unPacketNum']
                # print("Left controller:")
                # self.pp.pprint(d)
                if d['trigger'] == 1.0:
                    print highlight_blue(' Left  trigger ')
                    identify_left = False

            (result, pControllerState) = (self.vrsystem.getControllerState(
                self.right_id))
            d = self.from_controller_state_to_dict(pControllerState)
            # print 'right controller: ', self.last_unPacketNum_right, d['unPacketNum']
            # print highlight_green('trigger value: ', d['trigger'])
            if (self.last_unPacketNum_right != d['unPacketNum']):
                self.last_unPacketNum_right = d['unPacketNum']
                if d['trigger'] == 1.0:
                    print highlight_blue(' Right trigger ')
                    identify_right = False

            rospy.sleep(1.0 / self.reading_rate_hz)
import pickle
import numpy as np
from fabulous.color import highlight_green

from scraper import get_links, parse_page_data

links = get_links()
data = parse_page_data(links)

model = pickle.load(open("./model/finalized_model.sav", 'rb'))
y = model.predict(data)

indices = np.argwhere(y == np.max(y)).flatten()
print(highlight_green("\nArticles most likely to go viral:\n\n"))

for key, value in enumerate(indices):
    print('{}.'.format(key + 1), end='    ')
    print(links[value])
示例#5
0
#!/usr/bin/env python
# coding=utf-8
from __future__ import print_function
import requests
import os
import sys
from fabulous import text
try:
    from BeautifulSoup import BeautifulSoup
except ImportError:
    from bs4 import BeautifulSoup
from humanfriendly.tables import format_pretty_table
from fabulous.color import highlight_green, green, red, yellow

baku_header = [
    highlight_green('Qatar №-si'.decode("utf-8").strip()),
    green('Bakıdan çıxma'.decode("utf-8").strip()),
    green('Biləcəriyə çatma'.decode("utf-8").strip()),
    yellow('Biləcəridən getmə'.decode("utf-8").strip()),
    yellow('Xırdalana çatma'.decode("utf-8").strip()),
    red('Xırdalandan getmə'.decode("utf-8").strip()),
    red('Sumqayıta çatma'.decode("utf-8").strip())
]

sum_header = [
    highlight_green('Qatar №-si'.decode("utf-8").strip()),
    green('Sumqayıtdan çıxma'.decode("utf-8").strip()),
    green('Xırdalana çatma'.decode("utf-8").strip()),
    yellow('Xırdalana getmə'.decode("utf-8").strip()),
    yellow('Biləcəriyə çatma'.decode("utf-8").strip()),
    red('Biləcəriyə getmə'.decode("utf-8").strip()),
def parse_page_data(links):
    page_data = []

    encoder = ColumnTransformer(
        [('days', OneHotEncoder(categories=[[0, 1, 2, 3, 4, 5, 6]]), [7])],
        remainder='passthrough')

    for link in links:
        res = requests.get(link)

        data2 = res.text
        soup = BeautifulSoup(data2, 'html.parser')

        img_count = 0.0
        vid_count = 0.0

        for img in soup.findAll('img',
                                attrs={'itemprop': re.compile("contentUrl")}):
            img_count += 1.0

        for div in soup.findAll('div', attrs={'class': "vplayer"}):
            vid_count += 1.0

        # print(vid_count)

        post_date = soup.find('time')
        weekday = datetime.strptime(post_date['datetime'].split('T')[0],
                                    "%Y-%m-%d").weekday()
        # monday = 0 sunday = 6

        channel = link.split('/')[3]

        title = soup.find('h1', attrs={'id': "id_title"}).text

        num_words_title = len(title.strip().split())
        num_words_text = len(soup.get_text().strip().split())
        if channel == "tech":
            data = [
                0.0, num_words_title, num_words_text, img_count, vid_count,
                0.0, 1.0, weekday
            ]
        elif channel == "entertainment":
            data = [
                0.0, num_words_title, num_words_text, img_count, vid_count,
                1.0, 0.0, weekday
            ]
        else:
            data = [
                0.0, num_words_title, num_words_text, img_count, vid_count,
                0.0, 0.0, weekday
            ]

        if data[-1] == 6 or data[-1] == 5:
            data[0] = 1.0

        page_data.append(data)
    encoded = encoder.fit_transform(page_data)

    # print(links[0])
    # print(page_data[0])
    # print(encoded[0])
    print(highlight_green("\nData preprocessing done."))
    return encoded


# if __name__ == "main"":
#         links = get_links()
#         p = parse_page_data(links)
示例#7
0
 def log(self):
     if self.passed:
         print highlight_green(self.display_str)
     else:
         print highlight_red(self.display_str)