Exemplo n.º 1
0
    def fill(self, first, last):
        last += relativedelta(years=7)

        dfs = []

        with enabled():
            while first <= last:
                response = requests.get(
                    url=
                    'https://www.theice.com/marketdata/ExpiryCalendar.shtml',
                    params={
                        'excel':
                        '',
                        'markets': ("ICE Futures U.S.", "ICE Futures Europe",
                                    "ICE Futures Canada", "ICE OTC",
                                    "ICE Trust U.S.", "ICE Clear Europe CDS",
                                    "ICE Endex", "ICE Futures Singapore"),
                        'expirationEnabled':
                        "true",
                        'expirationDates':
                        ("FTD", "LTD", "FDD", "LDD", "FND", "LND", "FSD"),
                        'dateFrom':
                        first.strftime('%d-%b-%Y')
                    },
                    headers={
                        'User-Agent': 'Mozilla/5.0',
                    })

                first = dt.datetime.strptime(
                    response.text.splitlines()[2][4:],
                    '%d-%b-%Y').date() + dt.timedelta(days=1)

                dfs.append(self.parse_csv(response.text))

        return concat(dfs)
Exemplo n.º 2
0
def process(input, entities):
    output = {}
    try:
        video = entities['video'][0]['value']
        with requests_cache.enabled('video_cache', backend='sqlite', expire_after=3600):

            r = requests.get('https://www.googleapis.com/youtube/v3/search?part=snippet&maxResults=10&q=' + video + '&type=video&key=' + YOUTUBE_DATA_API_KEY)
            data = r.json()
            
        template = GenericTemplate()
        for item in data['items']:
            title = item['snippet']['title']
            item_url = 'https://www.youtube.com/watch?v=' + item['id']['videoId']
            image_url = item['snippet']['thumbnails']['high']['url']
            subtitle = item['snippet']['channelTitle']
            buttons = ButtonTemplate()
            buttons.add_web_url('YouTube Link', 'https://www.youtube.com/watch?v=' + item['id']['videoId'])
            buttons.add_web_url('Channel Link', 'https://www.youtube.com/channel/' + item['snippet']['channelId'])
            template.add_element(title=title, item_url=item_url, image_url=image_url, subtitle=subtitle, buttons=buttons.get_buttons())
        output['input'] = input
        output['output'] = template.get_message()
        output['success'] = True
    except:
        error_message = 'I couldn\'t find any videos matching your query.'
        error_message += '\nPlease ask me something else, like:'
        error_message += '\n  - sia videos'
        error_message += '\n  - videos by eminem'
        error_message += '\n  - video coldplay'
        output['error_msg'] = TextTemplate(error_message).get_message()
        output['success'] = False
    return output
Exemplo n.º 3
0
def process(input, entities):
    output = {}
    try:
        movie = entities['movie'][0]['value']
        with requests_cache.enabled('movie_cache',
                                    backend='sqlite',
                                    expire_after=86400):
            r = requests.get('http://www.omdbapi.com/?t=' + movie +
                             '&plot=full&r=json')
            data = r.json()
        output['input'] = input
        template = TextTemplate('Title: ' + data['Title'] + '\nYear: ' +
                                data['Year'] + '\nIMDb Rating: ' +
                                data['imdbRating'] + ' / 10' + '\nPlot: ' +
                                data['Plot'])
        text = template.get_text()
        template = ButtonTemplate(text)
        template.add_web_url(
            'IMDb Link', 'http://www.imdb.com/title/' + data['imdbID'] + '/')
        output['output'] = template.get_message()
        output['success'] = True
    except:
        error_message = QUERY_ERROR.format('movie') + EXAMPLE_MOVIES
        output['error_msg'] = TextTemplate(error_message).get_message()
        output['success'] = False
    return output
Exemplo n.º 4
0
def process(input, entities):
    output = {}
    try:
        video = entities['video'][0]['value']
        with requests_cache.enabled('video_cache', backend='sqlite', expire_after=3600):
            r = requests.get(
                'https://www.googleapis.com/youtube/v3/search?part=snippet&maxResults=10&q=' + video + '&type=video&key=' + YOUTUBE_DATA_API_KEY)
            data = r.json()
        template = GenericTemplate()
        for item in data['items']:
            title = item['snippet']['title']
            item_url = 'https://www.youtube.com/watch?v=' + item['id']['videoId']
            image_url = item['snippet']['thumbnails']['high']['url']
            subtitle = item['snippet']['channelTitle']
            buttons = ButtonTemplate()
            buttons.add_web_url('YouTube Link', 'https://www.youtube.com/watch?v=' + item['id']['videoId'])
            buttons.add_web_url('Channel Link', 'https://www.youtube.com/channel/' + item['snippet']['channelId'])
            template.add_element(title=title, item_url=item_url, image_url=image_url, subtitle=subtitle,
                                 buttons=buttons.get_buttons())
        output['input'] = input
        output['output'] = template.get_message()
        output['success'] = True
    except:
        error_message = 'I couldn\'t find any videos matching your query.'
        error_message += '\nPlease ask me something else, like:'
        error_message += '\n  - sia videos'
        error_message += '\n  - videos by eminem'
        error_message += '\n  - video coldplay'
        output['error_msg'] = TextTemplate(error_message).get_message()
        output['success'] = False
    return output
Exemplo n.º 5
0
def get_detalhes_raw(papel='WEGE3'):
    """
    Get RAW detailed data from fundamentus:
      URL:
        http://fundamentus.com.br/detalhes.php?papel=WEGE3

    Output:
      list of df
    """

    ##
    ## Busca avançada por empresa
    ##
    url = 'http://fundamentus.com.br/detalhes.php?papel={}'.format(papel)
    hdr = {
        'User-agent':
        'Mozilla/5.0 (Windows; U; Windows NT 6.1; rv:2.2) Gecko/20110201',
        'Accept': 'text/html, text/plain, text/css, text/sgml, */*;q=0.01',
        'Accept-Encoding': 'gzip, deflate',
    }

    with requests_cache.enabled():
        content = requests.get(url, headers=hdr)

        if content.from_cache:
            logging.debug('.../detalhes.php?papel={}: [CACHED]'.format(papel))
        else:  # pragma: no cover
            logging.debug(
                '.../detalhes.php?papel={}: sleeping...'.format(papel))
            time.sleep(.500)  # 500 ms

    ## parse
    tables_html = pd.read_html(content.text, decimal=",", thousands='.')

    return tables_html
Exemplo n.º 6
0
def list_papel_all():
    """
    Get list of all companies, from 'detalhes' page
      URL:
        http://fundamentus.com.br/detalhes.php?papel=''

    Output:
      list
    """

    ##
    url = 'http://fundamentus.com.br/detalhes.php?papel='
    hdr = {
        'User-agent':
        'Mozilla/5.0 (Windows; U; Windows NT 6.1; rv:2.2) Gecko/20110201',
        'Accept': 'text/html, text/plain, text/css, text/sgml, */*;q=0.01',
        'Accept-Encoding': 'gzip, deflate',
    }

    with requests_cache.enabled():
        content = requests.get(url, headers=hdr)

        if content.from_cache:
            logging.debug('list .../detalhes.php?papel= : [CACHED]')
        else:  # pragma: no cover
            logging.debug('list .../detalhes.php?papel= : sleeping...')
            time.sleep(.500)  # 500 ms

    ## parse
    df = pd.read_html(content.text, decimal=",", thousands='.')[0]

    lst = list(df['Papel'])
    logging.info('members in list = {}'.format(len(lst)))

    return lst
Exemplo n.º 7
0
def matches_preferred_time(venue):
    if 'preferred_time' not in venue_list:
        logging.debug('No preferred time set')
        return True

    preferred_time = venue_list['preferred_time']
    with requests_cache.enabled(**long_cache):
        venue_hours = foursq.venues.hours(venue['id'])

    days_seen = []
    days_accepted = []

    try:
        timeframes = venue_hours['popular']['timeframes']
    except KeyError:
        try:
            timeframes = venue_hours['hours']['timeframes']
            logging.debug('No popular hours available, using official hours')
        except KeyError:
            logging.debug('No hours information available')
            return True

    for timeframe in timeframes:
        days_seen += timeframe['days']

        matching = next((True for h in timeframe['open'] if int(h['start']) <= preferred_time <= int(h['end'])), False)
        if matching:
            days_accepted += timeframe['days']

    if len(days_seen) != 7:
        logging.warning('Days seen != 7: {}'.format(days_seen))

    logging.debug('Open at {} on {} days (min 5)'.format(preferred_time, len(days_accepted)))

    return len(days_accepted) >= 5
Exemplo n.º 8
0
    def fetch_etc_objects(self, **kwargs):
        """ Function downloads all available :obj:`GenData` etc objects from DictyExpress database.

        Returns:
            :obj:`list`: :obj:`GenData` objects

        """

        callback = kwargs.get("progress_callback", None)
        with requests_cache.enabled(cache_name=cache_name,
                                    backend=cache_backend):
            try:
                # Note: this is hardcoded for now. When we port this module to Resolwe platform
                #       data retrieval will be different
                list_of_experiments = self._gen.api.data.get(
                    case_ids__contains='5535115cfad58d5e03006217',
                    status='done',
                    type__startswith='data:etc:')['objects']
                if callback:
                    callback.emit()

            except requests.exceptions.ConnectionError as e:
                raise requests.exceptions.ConnectionError(
                    'Server not accessible, check your connection.') from e

            store_experiments = [
                GenData(exp, self._gen) for exp in list_of_experiments
            ]

            if callback:
                callback.emit()

            return store_experiments
Exemplo n.º 9
0
    def download_etc_data(self, gen_data_id, **kwargs):
        """ Function downloads etc data of a chosen experiment from the server.

        Args:
            gen_data_id (str): id of :obj:`GenData` object to download.

        Returns:
             :obj:`dict`: data in json like format


        """
        callback = kwargs.get("progress_callback", None)
        table_name = kwargs.get("table_name", '')

        with requests_cache.enabled(cache_name=cache_name,
                                    backend=cache_backend):
            try:

                response = next(
                    self._gen.download([gen_data_id], 'output.etcfile'))
                # TODO: maybe edit Genesis module to support callback?
                if callback:
                    callback.emit()

            except requests.exceptions.ConnectionError as e:
                raise requests.exceptions.ConnectionError(
                    'Server not accessible, check your connection.') from e

            return response_to_json(response), table_name
Exemplo n.º 10
0
def _fetch_access_token():
    """Make http POST request to obtain new API access token.

    The access token is valid for 15 minutes, and the response is cached
    for 14 minutes so that the token can be reused.

    Returns
    -------
    str
        Access token for https://api.wdpro.disney.go.com API.

    """
    params = {
        "grant_type": "assertion",
        "assertion_type": "public",
        "client_id": "WDPRO-MOBILE.MDX.WDW.ANDROID-PROD",
    }
    with requests_cache.enabled(
        "token_cache", backend="memory", allowable_methods=("POST",), expire_after=840
    ):
        r = requests.post("https://authorization.go.com/token", params=params)
    if r.ok:
        auth_data = r.json()
        return f"{auth_data['token_type']} {auth_data['access_token']}"
    else:
        return None
Exemplo n.º 11
0
    def get_user_profiles(self, user_name_list):
        logger.info(f'tw api profiles for {len(user_name_list)} users')
        # group usernames in 100 lists
        u_groups = [
            user_name_list[n:n + 100]
            for n in range(0, len(user_name_list), 100)
        ]
        wait = 3

        stream = []
        for u_list in u_groups:
            start_time = time.time()
            with requests_cache.enabled(self.cache_path, expire_after=86400):
                u_stream = self.api.request('users/lookup', {
                    'screen_name': u_list,
                    'include_entities': 'false'
                })

            for u in u_stream:
                stream.append(self.parse_user(u))

            elapsed = time.time() - start_time
            pause = wait - elapsed
            if pause > 0:
                time.sleep(pause)

        return stream
Exemplo n.º 12
0
def get_country_levels(country):
    """Look up the admin levels available for a country from the iTOS service.
    @param country: the ISO3 country code (e.g. "GIN")
    @returns: a dict of HXL admin-level names and iTOS levels
    """
    levels = {}
    country = country.upper()
    url = COUNTRY_URL_PATTERN.format(country=country)

    with requests_cache.enabled(app.config.get('ITOS_CACHE_NAME', 'itos-in'),
                                backend=app.config.get('ITOS_CACHE_BACKEND',
                                                       'memory'),
                                expire_after=app.config.get(
                                    'ITOS_CACHE_TIMEOUT', 604800)):
        with requests.get(url) as result:
            data = result.json()

    if 'error' in data:
        raise werkzeug.exceptions.NotFound(
            "iTOS P-code service does not support country: {}".format(country))
    for layer in result.json().get('layers'):
        result = re.match(r'^Admin(\d)$', layer['name'])
        if result:
            id = layer['id']
            l = result.group(1)
            if result == 0:
                levels['country'] = id
            else:
                levels['adm{}'.format(l)] = id
    return levels
Exemplo n.º 13
0
def process(input, entities):
    output = {}
    try:
        anime = entities['anime'][0]['value']
        with requests_cache.enabled('anime_cache',
                                    backend='sqlite',
                                    expire_after=86400):

            r = requests.get('https://hummingbird.me/api/v1/search/anime',
                             params={'query': anime})
            data = r.json()

        template = TextTemplate()
        template.set_text('Title: ' + data[0]['title'] + '\nSynopsis: ' +
                          data[0]['synopsis'])
        template.set_post_text('\nCommunity Rating: ' +
                               str(round(data[0]['community_rating'], 2)) +
                               ' / 5' + '\nStatus: ' + data[0]['status'])
        text = template.get_text()

        template = ButtonTemplate(text)
        template.add_web_url('Hummingbird URL', data[0]['url'])

        output['input'] = input
        output['output'] = template.get_message()
        output['success'] = True
    except:
        error_message = 'I couldn\'t find any anime matching your query.'
        error_message += '\nPlease ask me something else, like:'
        error_message += '\n  - Death Note anime'
        error_message += '\n  - Dragon ball super anime status'
        error_message += '\n  - What is the anime rating of One Punch Man?'
        output['error_msg'] = TextTemplate(error_message).get_message()
        output['success'] = False
    return output
Exemplo n.º 14
0
def process(input, entities):
    output = {}
    try:
        movie = entities['movie'][0]['value']
        with requests_cache.enabled('movie_cache',
                                    backend='sqlite',
                                    expire_after=86400):
            r = requests.get('http://www.omdbapi.com/?t=' + movie +
                             '&plot=full&r=json')
            data = r.json()
        output['input'] = input
        template = TextTemplate('Title: ' + data['Title'] + '\nYear: ' +
                                data['Year'] + '\nIMDb Rating: ' +
                                data['imdbRating'] + ' / 10' + '\nPlot: ' +
                                data['Plot'])
        text = template.get_text()
        template = ButtonTemplate(text)
        template.add_web_url(
            'IMDb Link', 'http://www.imdb.com/title/' + data['imdbID'] + '/')
        output['output'] = template.get_message()
        output['success'] = True
    except:
        error_message = 'I couldn\'t find that movie.'
        error_message += '\nPlease ask me something else, like:'
        error_message += '\n  - batman movie'
        error_message += '\n  - iron man 2 movie plot'
        error_message += '\n  - What is the rating of happyness movie?'
        output['error_msg'] = TextTemplate(error_message).get_message()
        output['success'] = False
    return output
Exemplo n.º 15
0
    def download_etc_data(self, gen_data_id, **kwargs):
        """ Function downloads etc data of a chosen experiment from the server.

        :param gen_data_id: id of GeneData object
        :type gen_data_id: str

        :rtype: data in json like format
        """
        callback = kwargs.get("progress_callback", None)
        table_name = kwargs.get("table_name", '')

        with requests_cache.enabled(cache_name=GENAPI_CACHE,
                                    backend=CACHE_BACKEND):
            try:

                response = next(
                    self._gen.download([gen_data_id], 'output.etcfile'))
                # TODO: maybe edit Genesis module to support callback?
                if callback:
                    callback.emit()

            except requests.exceptions.ConnectionError as e:
                raise requests.exceptions.ConnectionError(
                    'Server not accessible, check your connection.') from e

            return response_to_json(response), table_name
Exemplo n.º 16
0
def process(input, entities=None):
    output = {}
    try:
        with requests_cache.enabled('xkcd_cache', backend='sqlite', expire_after=3600):
            # Get the latest comic
            r = requests.get('http://xkcd.com/info.0.json')
            data = r.json()

            # Get a random comic between the first and the latest one
            r = requests.get('http://xkcd.com/%d/info.0.json' % randint(1, data['num']))
            data = r.json()

        title = data['title']
        item_url = 'http://xkcd.com/' + str(data['num']) + '/'
        explanation_url = 'http://explainxkcd.com/' + str(data['num']) + '/'
        image_url = data['img'].replace('\\', '')
        subtitle = data['alt']

        buttons = ButtonTemplate()
        buttons.add_web_url('xkcd Link', item_url)
        buttons.add_web_url('Explanation Link', explanation_url)

        template = GenericTemplate()
        template.add_element(title=title, item_url=item_url, image_url=image_url, subtitle=subtitle, buttons=buttons.get_buttons())

        output['input'] = input
        output['output'] = template.get_message()
        output['success'] = True
    except:
        error_message = 'There was some error while retrieving data from xkcd.'
        output['error_msg'] = TextTemplate(error_message).get_message()
        output['success'] = False
    return output
Exemplo n.º 17
0
def process(input, entities):
    output = {}
    try:
        music = entities['music'][0]['value']
        with requests_cache.enabled('music_cache', backend='sqlite', expire_after=3600):
            r = requests.get('https://api.spotify.com/v1/search?q=' + music + '&type=track')
            data = r.json()
        assert(len(data['tracks']['items']) > 0)
        template = GenericTemplate()
        for track in data['tracks']['items']:
            title = track['name']
            item_url = track['external_urls']['spotify']
            image_url = track['album']['images'][0]['url']
            artists = []
            for artist in track['artists']:
                artists.append(artist['name'])
            duration = datetime.utcfromtimestamp(track['duration_ms'] / 1000).strftime('%M:%S')
            subtitle = 'By ' + ', '.join(artists) + ' | ' + track['album']['name'] + ' | ' + duration
            buttons = ButtonTemplate()
            buttons.add_web_url('Preview Track', track['preview_url'])
            buttons.add_web_url('Open in Spotify', 'https://embed.spotify.com/openspotify/?spuri=' + track['uri'])
            template.add_element(title=title, item_url=item_url, image_url=image_url, subtitle=subtitle, buttons=buttons.get_buttons())
        output['input'] = input
        output['output'] = template.get_message()
        output['success'] = True
    except:
        error_message = 'I couldn\'t find any music matching your query.'
        error_message += '\nPlease ask me something else, like:'
        error_message += '\n  - hymn for the weekend song'
        error_message += '\n  - linkin park songs'
        error_message += '\n  - play hotel california'
        output['error_msg'] = TextTemplate(error_message).get_message()
        output['success'] = False
    return output
Exemplo n.º 18
0
 def __init__(self, chain_id=ChainID.MAINNET, contract_address=None):
     if contract_address is None:
         contract_address = self.CONTRACT_ADDRESSES[chain_id]
     self.contract_address = contract_address
     self.chain_id = chain_id
     self.contract_address = contract_address
     # ethereum_tester = EthereumTester()
     # self.provider = EthereumTesterProvider(ethereum_tester)
     self.provider = HTTPProviderFactory.create(self.chain_id)
     self.web3 = Web3(self.provider)
     self.etherscan_api_key = get_etherscan_api_key()
     ChainEtherscanContract = ChainEtherscanContractFactory.create(
         self.chain_id)
     self.ChainEtherscanAccount = ChainEtherscanAccountFactory.create(
         self.chain_id)
     # object construction needs to be within the context manager because
     # the requests.Session object to be patched is initialized in the
     # constructor
     with requests_cache.enabled(**REQUESTS_CACHE_PARAMS):
         self.etherscan_contract_api = ChainEtherscanContract(
             address=self.contract_address, api_key=self.etherscan_api_key)
         self.contract_abi = json.loads(
             self.etherscan_contract_api.get_abi())
     # contract_factory_class = ConciseContract
     contract_factory_class = Contract
     self.contract = self.web3.eth.contract(
         abi=self.contract_abi,
         address=self.contract_address,
         ContractFactoryClass=contract_factory_class)
     # retrieve signatures
     self.events_signatures = self.get_events_signatures(self.contract_abi)
     self.functions_signatures = self.get_functions_signatures(
         self.contract_abi)
Exemplo n.º 19
0
def list_papel_setor(setor=None):
    """
    Setor: ...

    Output:
      List
    """

    ## GET: setor
    url = 'http://www.fundamentus.com.br/resultado.php?setor={}'.format(setor)

    hdr = {'User-agent': 'Mozilla/5.0 (Windows; U; Windows NT 6.1; rv:2.2) Gecko/20110201' ,
           'Accept': 'text/html, text/plain, text/css, text/sgml, */*;q=0.01' ,
           'Accept-Encoding': 'gzip, deflate' ,
           }

    with requests_cache.enabled():
        content = requests.get(url, headers=hdr)

        if content.from_cache:
            logging.debug('.../resultado.php?setor={}: [CACHED]'.format(setor))
        else: # pragma: no cover
            logging.debug('.../resultado.php?setor={}: sleeping...'.format(setor))
            time.sleep(.500) # 500 ms


    ## parse + load
    df = pd.read_html(content.text, decimal=",", thousands='.')[0]

    ##
    return list(df['Papel'])
Exemplo n.º 20
0
def process(input, entities):
    output = {}
    try:
        anime = entities['anime'][0]['value']

        with requests_cache.enabled('anime_cache', backend='sqlite', expire_after=86400):
            r = requests.get('https://kitsu.io/api/edge/anime', params={
                'filter[text]': anime
            })
            data = r.json()

        top_anime = data['data'][0]['attributes']

        template = TextTemplate()
        template.set_text('Title: ' + top_anime['canonicalTitle'] + '\nSynopsis: ' + top_anime['synopsis'])
        template.set_post_text('\nAverage Rating: ' + top_anime['averageRating'] + '%')
        text = template.get_text()

        template = ButtonTemplate(text)
        template.add_web_url('Kitsu URL', 'https://kitsu.io/anime/' + top_anime['slug'])

        output['input'] = input
        output['output'] = template.get_message()
        output['success'] = True
    except:
        error_message = 'I couldn\'t find any anime matching your query.'
        error_message += '\nPlease ask me something else, like:'
        error_message += '\n  - Death Note anime'
        error_message += '\n  - Dragon ball super anime status'
        error_message += '\n  - What is the anime rating of One Punch Man?'
        output['error_msg'] = TextTemplate(error_message).get_message()
        output['success'] = False
    return output
Exemplo n.º 21
0
def process(input, entities):
    output = {}
    try:
        word = entities['word'][0]['value']
        with requests_cache.enabled('dictionary_cache',
                                    backend='sqlite',
                                    expire_after=86400):
            r = requests.get('https://wordsapiv1.p.mashape.com/words/' + word +
                             '/definitions',
                             headers={'X-Mashape-Key': WORDS_API_KEY})
            data = r.json()
        output['input'] = input
        output['output'] = TextTemplate(
            'Definition of ' + word + ':\n' +
            data['definitions'][0]['definition']).get_message()
        output['success'] = True
    except:
        error_message = 'I couldn\'t find that definition.'
        error_message += '\nPlease ask me something else, like:'
        error_message += '\n  - define comfort'
        error_message += '\n  - cloud definition'
        error_message += '\n  - what does an accolade mean?'
        output['error_msg'] = TextTemplate(error_message).get_message()
        output['success'] = False
    return output
Exemplo n.º 22
0
    def fetch_etc_objects(self, *args, **kwargs):
        """Function downloads all available :obj:`GenData` etc objects from DictyExpress database.

        :rtype: list of GenData objects
        """

        with requests_cache.enabled(cache_name=GENAPI_CACHE,
                                    backend=CACHE_BACKEND):
            try:
                # Note: this is hardcoded for now. When we port this module to Resolwe platform
                #       data retrieval will be different
                list_of_experiments = self._gen.api.data.get(
                    case_ids__contains='5535115cfad58d5e03006217',
                    status='done',
                    type__startswith='data:etc:')['objects']

            except requests.exceptions.ConnectionError as e:
                raise requests.exceptions.ConnectionError(
                    'Server not accessible, check your connection.') from e

            store_experiments = [
                GenData(exp, self._gen) for exp in list_of_experiments
            ]

            return store_experiments
Exemplo n.º 23
0
def load_annotations(data_folder):

    infile = os.path.join(data_folder, "litcovid2BioCJSON.gz")
    assert os.path.exists(infile)

    with open_anyfile(infile, mode='r') as file:
        a = file.read()
        data_list = json.loads(a)
        # First item is a comment by provider
        data = data_list[1]

    doc_id_set = set()
    with requests_cache.enabled('litcovid_cache', expire_after=expire_after):
        logging.debug("requests_cache: %s",
                      requests_cache.get_cache().responses.filename)
        for i, rec in enumerate(data, start=1):
            # NCBI eutils API limits requests to 10/sec
            if i % 100 == 0:
                logging.info("litcovid.parser.load_annotations progress %s", i)

            doc = getPubMedDataFor(rec["pmid"])
            if not doc['from_cache']:
                time.sleep(.2)
            doc.pop('from_cache')
            if doc['_id'] not in doc_id_set:
                yield doc
            doc_id_set.add(doc['_id'])
        requests_cache.core.remove_expired_responses()
Exemplo n.º 24
0
    def test_as_context_manager(self):
        url = HTTPBIN_URL + 'delay/2'
        with requests_cache.enabled('test_cache', expire_after=10):
            response = requests.get(url)
            self.assertFalse(response.from_cache)

            response = requests.get(url)
            self.assertTrue(response.from_cache)

        start = time.time()
        response = requests.get(url)
        end = time.time()
        self.assertFalse(hasattr(response, 'from_cache'))
        self.assertGreaterEqual(end - start, 1.5)

        with requests_cache.enabled('test_cache'):
            response = requests.get(url)
            self.assertTrue(response.from_cache)
Exemplo n.º 25
0
def process(input, entities):
    output = {}
    try:
        movie = entities['movie'][0]['value']

        with requests_cache.enabled('movie_cache', backend='sqlite', expire_after=86400):
            # Make a search request to the API to get the movie's TMDb ID
            r = requests.get('http://api.themoviedb.org/3/search/movie', params={
                'api_key': TMDB_API_KEY,
                'query': movie,
                'include_adult': False
            })
            data = r.json()

            assert (len(data['results']) > 0)
            tmdb_id = str(data['results'][0]['id'])

            # Make another request to the API using the movie's TMDb ID to get the movie's IMDb ID
            r = requests.get('https://api.themoviedb.org/3/movie/' + tmdb_id, params={
                'api_key': TMDB_API_KEY,
                'append_to_response': 'videos'
            })
            data = r.json()

        # Fetch movie rating from IMDb
        ia = IMDb()
        imdb_id = data['imdb_id']
        imdb_movie = ia.get_movie(imdb_id[2:])
        imdb_movie.fetch()

        template = TextTemplate('Title: ' + data['title'] +
                                '\nYear: ' + data['release_date'][:4] +
                                '\nIMDb Rating: ' + str(imdb_movie.__dict__['rating']) + ' / 10' +
                                '\nOverview: ' + data['overview'])
        text = template.get_text()
        template = ButtonTemplate(text)
        template.add_web_url('IMDb Link', 'https://www.imdb.com/title/' + data['imdb_id'] + '/')

        videos = data['videos']['results']
        # Append first Trailer URL if one exists
        for video in videos:
            if video['type'] == 'Trailer' and video['site'] == 'YouTube':
                template.add_web_url('YouTube Trailer', YouTubeUtil.get_video_url(video['key']))
                break

        output['input'] = input
        output['output'] = template.get_message()
        output['success'] = True
    except:
        error_message = 'I couldn\'t find that movie.'
        error_message += '\nPlease ask me something else, like:'
        error_message += '\n  - batman movie'
        error_message += '\n  - iron man 2 movie plot'
        error_message += '\n  - What is the rating of happyness movie?'
        output['error_msg'] = TextTemplate(error_message).get_message()
        output['success'] = False
    return output
Exemplo n.º 26
0
    def _fetch(self, uri, cache=True):
        if cache:
            with requests_cache.enabled():
                r = requests.get(uri, headers={'Accept': 'application/json'})
        else:
            with requests_cache.disabled():
                r = requests.get(uri, headers={'Accept': 'application/json'})

        return r.json
Exemplo n.º 27
0
def process(input, entities):
    output = {}
    try:
        movie = entities['movie'][0]['value']

        with requests_cache.enabled('movie_cache',
                                    backend='sqlite',
                                    expire_after=86400):
            # Make a search request to the API to get the movie's TMDb ID
            r = requests.get('http://api.themoviedb.org/3/search/movie',
                             params={
                                 'api_key': TMDB_API_KEY,
                                 'query': movie,
                                 'include_adult': False
                             })
            data = r.json()

            assert (len(data['results']) > 0)
            tmdb_id = str(data['results'][0]['id'])

            # Make another request to the API using the movie's TMDb ID to get the movie's IMDb ID
            r = requests.get('https://api.themoviedb.org/3/movie/' + tmdb_id,
                             params={
                                 'api_key': TMDB_API_KEY,
                                 'append_to_response': 'videos'
                             })
            data = r.json()
            trailer = data['videos']['results'][0]['key']
            youtube_link = youtube_link + str(trailer)

        # Fetch movie rating from IMDb
        ia = IMDb()
        imdb_id = data['imdb_id']
        imdb_movie = ia.get_movie(imdb_id[2:])

        template = TextTemplate('Title: ' + data['title'] + '\nYear: ' +
                                data['release_date'][:4] + '\nIMDb Rating: ' +
                                str(imdb_movie['rating']) + ' / 10' +
                                '\nOverview: ' + data['overview'])
        text = template.get_text()
        template = ButtonTemplate(text)
        template.add_web_url(
            'IMDb Link', 'https://www.imdb.com/title/' + data['imdb_id'] + '/')
        template.add_web_url('Trailer', youtube_link)

        output['input'] = input
        output['output'] = template.get_message()
        output['success'] = True
    except:
        error_message = 'I couldn\'t find that movie.'
        error_message += '\nPlease ask me something else, like:'
        error_message += '\n  - batman movie'
        error_message += '\n  - iron man 2 movie plot'
        error_message += '\n  - What is the rating of happyness movie?'
        output['error_msg'] = TextTemplate(error_message).get_message()
        output['success'] = False
    return output
Exemplo n.º 28
0
def api_search(auth_token, search_term):
    with requests_cache.enabled('music_cache',
                                backend='sqlite',
                                expire_after=3600):
        headers = {'Authorization': 'Bearer ' + auth_token}
        r = requests.get('https://api.spotify.com/v1/search?q=' + search_term +
                         '&type=track',
                         headers=headers)
    return r
Exemplo n.º 29
0
    def _fetch(self, uri, cache=True):
        if cache:
            with requests_cache.enabled():
                r = requests.get(uri, headers={'Accept' : 'application/json'})
        else:
            with requests_cache.disabled():
                r = requests.get(uri, headers={'Accept' : 'application/json'})

        return r.json
Exemplo n.º 30
0
def get_categories():
    global _categories
    if _categories:
        return _categories

    with requests_cache.enabled(**long_cache):
        categories = foursq.venues.categories()

    _flatten_categories([], categories['categories'])
    return _categories
Exemplo n.º 31
0
    def test_as_context_manager(self):
        url = 'https://httpbin.org/delay/2'
        with requests_cache.enabled(session_factory=PerURLCacheSession,
                                    expire_after=10):
            response = requests.get(url)
            assert not response.from_cache

            response = requests.get(url)
            assert response.from_cache

        start = time.time()
        response = requests.get(url)
        end = time.time()
        assert not hasattr(response, 'from_cache')
        assert end - start >= 1.5

        with requests_cache.enabled(session_factory=PerURLCacheSession):
            response = requests.get(url)
            assert response.from_cache
Exemplo n.º 32
0
def get_resultado_raw():
    """
    Get data from fundamentus:
      URL:
        http://fundamentus.com.br/resultado.php

    RAW:
      DataFrame preserves original HTML header names

    Output:
      DataFrame
    """

    ##
    ## Busca avançada por empresa
    ##
    url = 'http://www.fundamentus.com.br/resultado.php'
    hdr = {'User-agent': 'Mozilla/5.0 (Windows; U; Windows NT 6.1; rv:2.2) Gecko/20110201',
           'Accept': 'text/html, text/plain, text/css, text/sgml, */*;q=0.01',
           'Accept-Encoding': 'gzip, deflate',
           }

    with requests_cache.enabled():
        content = requests.get(url, headers=hdr)

        if content.from_cache:
            logging.debug('.../resultado.php: [CACHED]')
        else: # pragma: no cover
            logging.debug('.../resultado.php: sleeping...')
            time.sleep(.500) # 500 ms


    ## parse + load
    df = pd.read_html(content.text, decimal=",", thousands='.')[0]

    ## Fix: percent string
    df['Div.Yield']     = utils.perc_to_float( df['Div.Yield']     )
    df['Mrg Ebit']      = utils.perc_to_float( df['Mrg Ebit']      )
    df['Mrg. Líq.']     = utils.perc_to_float( df['Mrg. Líq.']     )
    df['ROIC']          = utils.perc_to_float( df['ROIC']          )
    df['ROE']           = utils.perc_to_float( df['ROE']           )
    df['Cresc. Rec.5a'] = utils.perc_to_float( df['Cresc. Rec.5a'] )

    ## index by 'Papel', instead of 'int'
    df.index = df['Papel']
    df.drop('Papel', axis='columns', inplace=True)
    df.sort_index(inplace=True)

    ## naming
    df.name = 'Fundamentus: HTML names'
    df.columns.name = 'Multiples'
    df.index.name = 'papel'

    ## return sorted by 'papel'
    return df
Exemplo n.º 33
0
 def handle(self, *args, **options):
     with requests_cache.enabled(**settings.REQUEST_CACHE):
         _ = [ItemCollector(g).collect() for g in Genre.objects.all()]
         _ = itertools.chain.from_iterable(_)  # flatten
         with transaction.atomic():
             for i in _:
                 # http://stackoverflow.com/questions/7837033/valueerror-cannot-add-instance-is-on-database-default-value-is-on-databas/7999014
                 item = Item(**i.to_model_param())
                 item.save()
                 item.genres = i.genres()
                 item.save()
Exemplo n.º 34
0
 def handle(self, *args, **options):
     with requests_cache.enabled(**settings.REQUEST_CACHE):
         _ = [ItemCollector(g).collect() for g in Genre.objects.all()]
         _ = itertools.chain.from_iterable(_)  # flatten
         with transaction.atomic():
             for i in _:
                 # http://stackoverflow.com/questions/7837033/valueerror-cannot-add-instance-is-on-database-default-value-is-on-databas/7999014
                 item = Item(**i.to_model_param())
                 item.save()
                 item.genres = i.genres()
                 item.save()
Exemplo n.º 35
0
def _get_csv_reader(id: str):
    res = model.Resource.get(id)
    if res and utils.is_dotstat_url(res.url):
        with requests_cache.enabled(
                CACHE_PATH,
                expire_after=tk.asint(
                    tk.config.get(CONFIG_DOTSTAT_CACHE_AGE, 60)),
        ):
            resp = requests.get(res.url, stream=True)
            if resp.ok:
                return csv.DictReader(resp.iter_lines(decode_unicode=True))
Exemplo n.º 36
0
def process(input, entities):
    output = {}
    try:
        movie = entities['movie'][0]['value']

        with requests_cache.enabled('movie_cache',
                                    backend='sqlite',
                                    expire_after=86400):
            # Make a search request to the API to get the movie's TMDb ID
            r = requests.get('http://api.themoviedb.org/3/search/movie',
                             params={
                                 'api_key': TMDB_API_KEY,
                                 'query': movie,
                                 'include_adult': False
                             })
            data = r.json()
            assert (len(data['results']) > 0)
            tmdb_id = str(data['results'][0]['id'])
            # Make another request to the API using the movie's TMDb ID to get the movie's IMDb ID
            r = requests.get('https://api.themoviedb.org/3/movie/' + tmdb_id,
                             params={'api_key': TMDB_API_KEY})
            data = r.json()
            videoRequests = requests.get(
                'https://api.themoviedb.org/3/movie/' + tmdb_id + '/videos',
                params={'api_key': TMDB_API_KEY})
            videoData = videoRequests.json()
            with open('data.txt', 'w') as outfile:
                json.dump(videoData, outfile)
        template = TextTemplate('Title: ' + data['title'] + '\nYear: ' +
                                data['release_date'][:4] +
                                '\nAverage Rating: ' +
                                str(data['vote_average']) + ' / 10' +
                                '\nOverview: ' + data['overview'])
        text = template.get_text()
        template = ButtonTemplate(text)
        template.add_web_url(
            'IMDb Link', 'https://www.imdb.com/title/' + data['imdb_id'] + '/')
        template.add_web_url(
            'Trailer', 'https://www.youtube.com/watch?v=' +
            videoData['results'][0]['key'])

        output['input'] = input
        output['output'] = template.get_message()
        output['success'] = True
    except:
        error_message = 'I couldn\'t find that movie.'
        error_message += '\nPlease ask me something else, like:'
        error_message += '\n  - batman movie'
        error_message += '\n  - iron man 2 movie plot'
        error_message += '\n  - What is the rating of happyness movie?'
        output['error_msg'] = TextTemplate(error_message).get_message()
        output['success'] = False
    return output
Exemplo n.º 37
0
    def fetch(self, uri, cache=True):
        if cache:
            with requests_cache.enabled():
                r = requests.get(uri, headers={'Accept' : 'application/json'})
        else:
            with requests_cache.disabled():
                r = requests.get(uri, headers={'Accept' : 'application/json'})

        # last uri-part is dataset name and dictionary key
        key = uri.split('/')[-1]

        return r.json.get(key)
Exemplo n.º 38
0
def request_json_resource(url, params=None, retry=3, time_between=1):
    with requests_cache.enabled(os.path.join(CACHE_DIR, 'lcs_static_cache')):
        for i in range(retry):
            response = requests.get(url, params=params, headers={'Origin': 'http://www.lolesports.com'})
            if response.status_code == 200:
                return response.json(object_pairs_hook=OrderedDict)
            elif response.status_code == 404:
                break
            else:
                time.sleep(time_between)

        raise Exception('Unable to retrieve json recourse')
Exemplo n.º 39
0
 def test_enabled(self):
     url = httpbin('get')
     options = dict(cache_name=CACHE_NAME, backend=CACHE_BACKEND,
                    fast_save=FAST_SAVE)
     with requests_cache.enabled(**options):
         r = requests.get(url)
         self.assertFalse(getattr(r, 'from_cache', False))
         for i in range(2):
             r = requests.get(url)
             self.assertTrue(getattr(r, 'from_cache', False))
     r = requests.get(url)
     self.assertFalse(getattr(r, 'from_cache', False))
Exemplo n.º 40
0
    def fetch(self, uri, cache=True):
        if cache:
            with requests_cache.enabled():
                r = requests.get(uri, headers={'Accept': 'application/json'})
        else:
            with requests_cache.disabled():
                r = requests.get(uri, headers={'Accept': 'application/json'})

        # last uri-part is dataset name and dictionary key
        key = uri.split('/')[-1]

        return r.json.get(key)
Exemplo n.º 41
0
 def test_enabled(self):
     url = httpbin('get')
     options = dict(cache_name=CACHE_NAME, backend=CACHE_BACKEND,
                    fast_save=FAST_SAVE)
     with requests_cache.enabled(**options):
         r = requests.get(url)
         self.assertFalse(getattr(r, 'from_cache', False))
         for i in range(2):
             r = requests.get(url)
             self.assertTrue(getattr(r, 'from_cache', False))
     r = requests.get(url)
     self.assertFalse(getattr(r, 'from_cache', False))
Exemplo n.º 42
0
    def get_cached_ids(self):
        with requests_cache.enabled(cache_name=cache_name, backend=cache_backend):
            cached_object = requests_cache.core.get_cache()
            responses = [cached_object.get_response_and_time(response) for response in cached_object.responses]
            gen_ids = []

            for url in [response.url for response, _ in responses]:
                gen_id = re.search(r'{}(.*?)/'.format(self._data_endpoint), url)

                if gen_id is not None:
                    gen_ids.append(gen_id.group(1))

            return gen_ids
Exemplo n.º 43
0
    def test_disabled_enabled(self):
        delay = 1
        url = 'http://httpbin.org/delay/%s' % delay
        with requests_cache.disabled():
            t = time.time()
            n = 2
            for i in range(n):
                requests.get(url)
            delta = time.time() - t
            self.assertGreaterEqual(delta, delay * n)

        with requests_cache.enabled():
            t = time.time()
            n = 5
            for i in range(n):
                requests.get(url)
            delta = time.time() - t
            self.assertLessEqual(delta, delay * n)
Exemplo n.º 44
0
def process(input, entities):
    output = {}
    try:
        word = entities['word'][0]['value']
        with requests_cache.enabled('dictionary_cache', backend='sqlite', expire_after=86400):
            r = requests.get('https://wordsapiv1.p.mashape.com/words/' + word + '/definitions', headers={
                'X-Mashape-Key': WORDS_API_KEY
            })
            data = r.json()
        output['input'] = input
        output['output'] = TextTemplate(
            'Definition of ' + word + ':\n' + data['definitions'][0]['definition']).get_message()
        output['success'] = True
    except:
        error_message = 'I couldn\'t find that definition.'
        error_message += '\nPlease ask me something else, like:'
        error_message += '\n  - define comfort'
        error_message += '\n  - cloud definition'
        error_message += '\n  - what does an accolade mean?'
        output['error_msg'] = TextTemplate(error_message).get_message()
        output['success'] = False
    return output
Exemplo n.º 45
0
def process(input, entities):
    output = {}
    try:
        book_title = entities['book'][0]['value']

        with requests_cache.enabled('book_cache', backend='sqlite', expire_after=86400):
            response = requests.get(
                'https://www.goodreads.com/book/title.xml?key=' + GOODREADS_ACCESS_TOKEN + '&title=' + book_title)
            data = ElementTree.fromstring(response.content)

        book_node = data.find('book')
        author = book_node.find('authors').find('author').find('name').text
        title = book_node.find('title').text
        description = html2text(book_node.find('description').text)
        average_rating = book_node.find('average_rating').text
        link = book_node.find('link').text
        goodreads_attribution = '- Powered by Goodreads'

        template = TextTemplate()
        template.set_text('Title: ' + title + '\nAuthor: ' + author + '\nDescription: ' + description)
        template.set_post_text('\nAverage Rating: ' + average_rating + ' / 5' + '\n' + goodreads_attribution)

        text = template.get_text()
        template = ButtonTemplate(text)
        template.add_web_url('Goodreads Link', link)

        output['input'] = input
        output['output'] = template.get_message()
        output['success'] = True
    except:
        error_message = 'I couldn\'t find any book matching your query.'
        error_message += '\nPlease ask me something else, like:'
        error_message += '\n  - book timeline'
        error_message += '\n  - harry potter book plot'
        error_message += '\n  - little women book rating'
        output['error_msg'] = TextTemplate(error_message).get_message()
        output['success'] = False
    return output
Exemplo n.º 46
0
def process(input, entities):
    output = {}
    try:
        movie = entities['movie'][0]['value']
        with requests_cache.enabled('movie_cache', backend='sqlite', expire_after=86400):
            r = requests.get('http://www.omdbapi.com/?t=' + movie + '&plot=full&r=json')
            data = r.json()
        output['input'] = input
        template = TextTemplate('Title: ' + data['Title'] + '\nYear: ' + data['Year'] + '\nIMDb Rating: ' + data['imdbRating'] + ' / 10' + '\nPlot: ' + data['Plot'])
        text = template.get_text()
        template = ButtonTemplate(text)
        template.add_web_url('IMDb Link', 'http://www.imdb.com/title/' + data['imdbID'] + '/')
        output['output'] = template.get_message()
        output['success'] = True
    except:
        error_message = 'I couldn\'t find that movie.'
        error_message += '\nPlease ask me something else, like:'
        error_message += '\n  - batman movie'
        error_message += '\n  - iron man 2 movie plot'
        error_message += '\n  - What is the rating of happyness movie?'
        output['error_msg'] = TextTemplate(error_message).get_message()
        output['success'] = False
    return output
Exemplo n.º 47
0
def get_venue(venue_id):
    with requests_cache.enabled(**short_cache):
        return foursq.venues(venue_id)['venue']
Exemplo n.º 48
0
def api_search(auth_token, search_term):
    with requests_cache.enabled('music_cache', backend='sqlite', expire_after=3600):
        headers = {'Authorization': 'Bearer ' + auth_token}
        r = requests.get('https://api.spotify.com/v1/search?q=' + search_term + '&type=track', headers=headers)
    return r
Exemplo n.º 49
0
from contextlib import contextmanager
import requests_cache
import requests


resp = requests.get("http://httpbin.org/redirect/4")
for r in resp.history:
    print("url: {} request.url: {}".format(r.url, r.request.url))


@contextmanager
def cache_enabled(*args, **kwargs):
    requests_cache.install_cache(*args, **kwargs)
    try:
        yield
    finally:
        requests_cache.uninstall_cache()

with requests_cache.enabled('test.db'):
    resp = requests.get("http://httpbin.org/get")
    print getattr(resp, 'from_cache', False)
    s = requests.session()
    for i in range(5):
        s.get("http://httpbin.org/redirect/3")

resp = requests.get("http://httpbin.org/get")
print getattr(resp, 'from_cache', False)
Exemplo n.º 50
0
def _request(url):
    with requests_cache.enabled(os.path.join(CACHE_DIR, STATIC_CACHE)):
        return requests.get(url, stream=True)