예제 #1
0
    def url_request(extension, data=None):
        """"""
        if data:
            url = request("{}/{}".format(base_url, extension), data=data)
        else:
            url = request("{}/{}".format(base_url, extension))

        return urlopen(url)
예제 #2
0
    def _base_spotify_query(self, query):
        # spotify client credentials authorization api
        client_id = os.environ.get('SPOTIFY_CLIENT_ID')
        client_secret = os.environ.get('SPOTIFY_CLIENT_SECRET')
        auth = (client_id, client_secret)
        body_params = {'grant_type': 'client_credentials'}

        # make request for client token
        url = 'https://accounts.spotify.com/api/token'
        response = requests.post(url, data=body_params, auth=auth)
        content = ast.literal_eval(response.content)

        try:
            host = self.spotify_fields['host']
            item_limit = self.spotify_fields['item_limit']
            url = host + 'search?q=%s&limit=%d' % (query, item_limit)
            # make authorized request to spotify
            req = urllib2.request(url)
            token = content['access_token']
            token_type = content['token_type']
            req.add_header('Authorization', token_type + ' ' + token)
            filepath, response = urllib2.urlopen(req)

            with open(filepath, 'r') as f:
                raw = json.load(f)
        except:
            raw = {}

        if 'error' in raw:
            m = 'error ' + str(raw['error']) + ' at '
            raise SpotifyError(m + url)

        return raw
예제 #3
0
    def _base_spotify_query(self, query):
        # spotify client credentials authorization api
        client_id = os.environ.get('SPOTIFY_CLIENT_ID')
        client_secret = os.environ.get('SPOTIFY_CLIENT_SECRET')
        auth = (client_id, client_secret)
        body_params = {'grant_type': 'client_credentials'}

        # make request for client token
        url = 'https://accounts.spotify.com/api/token'
        response = requests.post(url, data=body_params, auth=auth)
        content = ast.literal_eval(response.content)
        
        try:
            host = self.spotify_fields['host']
            item_limit = self.spotify_fields['item_limit']
            url = host + 'search?q=%s&limit=%d' % (query, item_limit)
            # make authorized request to spotify
            req = urllib2.request(url)
            token = content['access_token']
            token_type = content['token_type']
            req.add_header('Authorization', token_type+' '+token)
            filepath, response = urllib2.urlopen(req)

            with open(filepath, 'r') as f:
                raw = json.load(f)
        except:
            raw = {}

        if 'error' in raw:
            m = 'error ' + str(raw['error']) + ' at '
            raise SpotifyError(m + url)

        return raw
예제 #4
0
def send_credentials(url, headers, data):
	opener = urllib2.build_opener(DoNothingRedirectHandler())
	req = urllib2.request(url, headers=headers, data=data)
	rsp = opener.open(req)
	if rsp.has_header("Location"):
		return "Success"
	return rsp
예제 #5
0
def send_credentials(url, headers, data):
    opener = urllib2.build_opener(DoNothingRedirectHandler())
    req = urllib2.request(url, headers=headers, data=data)
    rsp = opener.open(req)
    if rsp.has_header("Location"):
        return "Success"
    return rsp
예제 #6
0
    def custom(self, field="classification"):
        """
        return custom information
        link: https://www.rcsb.org/pdb/results/reportField.do


        :param field:
        :return:
        """

        raise NotImplementedError(
            "TODO: The PDB wrapper code needs updating to use the modern RESTful API"
        )

        extension = "/customReport.xml?pdbids={}&customReportColumns={}&service=wsfile&format=xml".format(
            self.identifier, field)
        url = request(base_url + extension)

        x = (urlopen(url))
        data = xmltodict.parse(x.read(), attr_prefix='', dict_constructor=dict)
        try:
            return data["dataset"]["record"]["dimStructure.{}".format(field)]
        except:

            try:
                return data["dataset"]["record"]["dimEntity.{}".format(field)]
            except:
                try:
                    return data["dataset"]["record"][0]["dimEntity.{}".format(
                        field)]
                except:
                    raise RuntimeError()
예제 #7
0
def getInfo(userId):
    req = urllib2.request('https://mm.taobao.com/self/aiShow.htm?&userId=%s' %
                          userId)
    req.add_header('user-agent', headers())
    html = urllib2.urlopen(req).read().decode('gbk').encode('utf-8')
    #print html
    return html
예제 #8
0
def getpage(url):
    re1 = urllib2.request(url)
    page = urlopener.open(re1)
    try:
        f = open('.txt', 'wb+')
    except IOError, e:
        print 'f open error'
예제 #9
0
    def run(self):
        start_timer = time.time()
        resp = urllib2.urlopen(
            'https://qaeunlx0c1.staging.infellowship.com/UserLogin/Index')
        content = resp.read()
        #print(content)
        latency = time.time() - start_timer

        self.custom_timers['Example_Homepage'] = latency

        assert (resp.code == 200), 'Bad HTTP Response'
        assert ('Fellowship One User Groups'
                in content), 'Failed Content Verification'

        values = {
            'username': '******',
            'password': '******',
            'rememberme': 'false',
            'btn_login': '******'
        }

        data = urllib.urlencode(values)
        request = urllib2.request('/UserLogin/Attempt', data)
        response = urllib2.urlopen(request)

        content2 = response.read()
        print(content2)
예제 #10
0
def DownloadUrl2(url):
    q = urllib2.request(url)
    q.add_header(
        'User-Agent',
        "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36"
    )
    m = urllib2.urlopen(q)
    return m.read()
예제 #11
0
def loadPage(url,filename):
    print "正在下载......"+filename
    headers = {
        "User-Agent" : "Mozilla/5.0(compatible;MSIE9.0;Windows NT 6.1; Trident/5.0;)"
    }
    request = urllib2.request(url,headers=headers)
    response = urllib2.urlopen(request)
    return  response.read()
예제 #12
0
def getAlbumList(userId):
    req = urllib2.request(
        'https://mm.taobao.com/self/model_album.htm?&user_id=' % userId)
    req.add_header('user-agent', headers())
    html = urllib2.urlopen(req).read().decode('gbk').encode('utf-8')
    #print html

    reg = r'class="mm-first" href="//(.*?)"'
    return re.findall(reg, html)[::2]
예제 #13
0
def doRequest(url,headers,timeout):
    
    headers_full = {'Accept-Encoding': 'gzip,deflate'}
    headers_full.update(headers)

    req = request(url , headers=headers_full)

    resp = urlopen(req, timeout=timeout)

    return __decode(resp)
예제 #14
0
def PEC(number):
    url = "https://projecteuler.net/"
    data = {'problem': number}
    request = urllib2.request(url, data)
    response = urllib2.urlopen(request)
    result = response.read()
    ext = '.html'
    filename = str(number) + ext
    f = open(filename, 'w')
    f.write(result)
    print(filename + " downloaded")
    f.close()
예제 #15
0
def PEC(number):
    url = "https://projecteuler.net/"
    data = {'problem': number}
    request = urllib2.request(url, data)
    response = urllib2.urlopen(request)
    result = response.read()
    ext = '.html'
    filename = str(number) + ext
    f = open(filename, 'w')
    f.write(result)
    print(filename + " downloaded")
    f.close()
예제 #16
0
    def _raw_properties(self, info_type='ligand'):
        """

        :return:
        """
        info_type_dict = {
            'describe_pdb': '/describePDB?structureId=',
            'describe_mol': '/describeMol?structureId=',
            'ligand': '/ligandInfo?structureId=',
            'pfam': '/hmmer?structureId=',
            'general': '/getEntityInfo?structureId='
        }

        url = request(base_url + info_type_dict[info_type] + self.identifier)
        return urlopen(url)
예제 #17
0
    def custom(self, field="classification"):
        """
        return custom information
        link: https://www.rcsb.org/pdb/results/reportField.do


        :param field:
        :return:
        """
        extension = "/customReport.xml?pdbids={}&customReportColumns={}&service=wsfile&format=xml".format(
            self.identifier, field)
        url = request(base_url + extension)

        x = (urlopen(url))
        data = xmltodict.parse(x.read(), attr_prefix='', dict_constructor=dict)
        return data["dataset"]["record"]["dimStructure.{}".format(field)]
예제 #18
0
    def _raw_properties(self, info_type='ligand'):
        """

        :return:
        """
        raise NotImplementedError(
            "TODO: The PDB wrapper code needs updating to use the modern RESTful API"
        )

        info_type_dict = {
            'describe_pdb': '/describePDB?structureId=',
            'describe_mol': '/describeMol?structureId=',
            'ligand': '/ligandInfo?structureId=',
            'pfam': '/hmmer?structureId=',
            'general': '/getEntityInfo?structureId='
        }

        url = request(base_url + info_type_dict[info_type] + self.identifier)
        return urlopen(url)
예제 #19
0
def crawler(url,settings,headers,timeout):
    
    DEBUG= True
    req = request(url,headers=headers)	
    page = urlopen(req,timeout=timeout)
    soup = bs4.BeautifulSoup(page)  
    
    if DEBUG:
        print "RabbitHole: Crawler fetched data  on " + req.get_full_url()
            
    l = []
    for i in soup.findAll(settings['tag'],attrs=settings['attrs']):
            
        if "attrs_len" in settings and len(i.attrs) != settings["attrs_len"]:
            continue

        if settings['method'] == 'text':
            if "decode" in settings:
                l.append(re.sub(r'[\x00-\x08\x0b\x0c\x0e-\x1f\x7f-\xff]', '', i.text))
            else:
                l.append(i.text)
        elif settings['method'] == 'attr':
            l.append(i[settings["attr"]])

    if "index" in settings:
        url_out = l[int(settings['index'])]
    else:
        url_out = l[0]

    if "manipulate" in settings:

        if "regexp" in settings["manipulate"]: 
            l[i] = re.findall(settings["manipulate"]["regexp"],url_out)[0]
        if "to_replace" in settings["manipulate"]:
            l[i] = str(url_out).replace(settings["manipulate"]["to_replace"],settings["manipulate"]["replace_with"])

    if DEBUG:
        print l
        print "Crawler: List len=" + str(len(l))
        print "Crawler: URL=" + str(url_out)  

    return url_out
def download_bmp(x, y):
	return urlopen(request(URL % (x, y), headers = {'User-agent':'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/6.0)'})).read()
예제 #21
0
#from flask import Flask, flash, redirect, render_template, request, session
#from flask_session import Session
#from tempfile import mkdtemp
#from werkzeug.exceptions import default_exceptions
#from werkzeug.security import check_password_hash, generate_password_hash
import json
import requests
import urllib2

#from helpers import apology, login_required
req = urllib2.request(
    "https://www.quandl.com/api/v3/datatables/ZACKS/AR.json?api_key=sEpgZsqAFy4-iptWfY2V"
)
f = opener.open(req)
json = json.loads(f.read())
print(json)
print(json['unit'])

r = requests.get(
    url='https://hacker-news.firebaseio.com/v0/topstories.json?print=pretty')
print(r.json())

#print(response.status_code)
#print(response)

#with open("response", "r") as read_file:
#   data = json.load(response)
#data = json.loads(response)

#for (k, v) in data.items():
#   print("Key: " + k)
import urllib2

from urllib import urlencode

data = {"username": "******", "password": "******"}
data_encoded = urlencode(data)

req = urllib2.request("url", data_encoded, headers={"User-Agent": "Scanner"})

final_request = urllib2.urlopen(req)

print final_request.read()
예제 #23
0
def action_import(location, verbose=True):
    """
    Import files into the local repo
    """

    location = str(location)  # prevent errors from unicode being passed

    # 1) extract file from location and save locally
    ONTOSPY_LOCAL_MODELS = get_home_location()
    fullpath = ""

    try:
        if location.startswith("www."):  #support for lazy people
            location = "http://%s" % str(location)
        if location.startswith("http"):
            # print("here")
            headers = {'Accept': "application/rdf+xml"}
            try:
                # Py2
                req = urllib2.request(location, headers=headers)
                res = urllib2.urlopen(req)
            except:
                # Py3
                req = urllib.request.Request(location, headers=headers)
                res = urlopen(req)
            final_location = res.geturl()  # after 303 redirects
            printDebug("Saving data from <%s>" % final_location, "green")
            # filename = final_location.split("/")[-1] or final_location.split("/")[-2]
            filename = location.replace("http://", "").replace("/", "_")
            if not filename.lower().endswith(
                ('.rdf', '.owl', '.rdfs', '.ttl', '.n3')):
                filename = filename + ".rdf"
            fullpath = ONTOSPY_LOCAL_MODELS + "/" + filename  # 2016-04-08
            # fullpath = ONTOSPY_LOCAL_MODELS + filename

            # print("==DEBUG", final_location, "**", filename,"**", fullpath)

            file_ = open(fullpath, 'wb')
            file_.write(res.read())
            file_.close()
        else:
            if os.path.isfile(location):
                filename = location.split("/")[-1] or location.split("/")[-2]
                fullpath = ONTOSPY_LOCAL_MODELS + "/" + filename
                shutil.copy(location, fullpath)
            else:
                raise ValueError('The location specified is not a file.')
        # print("Saved local copy")
    except:
        printDebug(
            "Error retrieving file. Please make sure <%s> is a valid location."
            % location, "important")
        if os.path.exists(fullpath):
            os.remove(fullpath)
        return None

    try:
        g = Ontospy(fullpath, verbose=verbose)
        # printDebug("----------")
    except:
        g = None
        if os.path.exists(fullpath):
            os.remove(fullpath)
        printDebug(
            "Error parsing file. Please make sure %s contains valid RDF." %
            location, "important")

    if g:
        printDebug("Caching...", "red")
        do_pickle_ontology(filename, g)
        printDebug("----------\n...completed!", "important")

    # finally...
    return g
예제 #24
0
print auth_url

### The Data we are sending in our post authentication .
values = {'userid':'xxxxx', 'password':'******', 'target':'', 'xxxxxxx':'',
'xxxxx':'', 'xxxxxx':'','xxxxxx':'', 'xxxxxxx':''}

### encode the post data into the url.
data = urllib.urlencode(values)

### cookies handler
jar = cookielib.FileCookieJar("cookie") 
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(jar)) 

### fetch topic.cisco.com
###response = urllib2.urlopen(url)
request1 = urllib2.request(url)
response = opener.open(url)

### perform authentication
##req = urllib2.Request(auth_url, data)

 

####response2 = urllib2.urlopen(req)

### print the first result.
###print response.read()

### print second response
print response2.read()
#file.write(response.read())
예제 #25
0
def getHtmlPage(url):
    return urllib2.urlopen(urllib2.request(url)).read()
예제 #26
0
    def open(self):
        request = urllib2.request(self.fullURL)
        self.remoteHandler = urllib2.urlopen(request)
        self.localHandler = open(self.fileName, "w")

        return (ESuccess, self)
예제 #27
0
파일: util.py 프로젝트: haluomao/crawler
def open(url):
	request=urllib2.request(url)
	response=urllib2.urlopen(request)
	print response.read();
예제 #28
0
    def add_online_contribution_immediate(self):
        """Generates an immediate online contribution"""
        self.open_online_giving()

        # set the URL
        url = "http://integrationqa.dev.corp.local/integration/contribution/onlinecontribution.aspx?cCode=alyq5rXA9igtXilwXAT+3Q==" + self.iCode

        # select first form on the page
        browser.select_form(nr=0)

        # set the values for the immediate contribution
        values = {
            '__EVENTTARGET': 'btnNew',
            '__EVENTARGUMENT': '',
            '__LASTFOCUS': '',
            '__VIEWSTATE': browser.form['__VIEWSTATE'],
            'dtbMonthYearStartDate_PU_PN_Month': '8',
            'dtbMonthYearStartDate_PU_PN_Year': '2010',
            '_ctl1:DateTextBox_PU_PN_MYP_PN_Month': '8',
            '_ctl1:DateTextBox_PU_PN_MYP_PN_Year': '2010',
            '_ctl1:DateTextBox_PU_PN_SelDate': '',
            '_ctl1:DateTextBox_PU_PN_MonthView': '2010|8',
            '__EVENTVALIDATION': browser.form['__EVENTVALIDATION'],
            'txtAmount:textBox': '1.04',
            'ddlFrequency': '1',
            'FundRadioButton': 'rdbtnFund',
            'ddlFund:dropDownList': '29378',
            '_ctl1:DateTextBox': '',
            'dtbMonthYearStartDate': '',
            'rblLength': '1',
            'txtNumberOfGifts': '',
            '_ctl3:DateTextBox': '',
            'oneTime': 'rbImmediate',
            'ddlPaymentMethod:dropDownList': '1',
            'txtBankName:textBox': '',
            'txtBankRoutingNumber:textBox': '',
            'txtBankAccountNumber:textBox': '',
            'txtReenterAccountNumber:textBox': '',
            'txtHoldersName:textBox': 'Matthew Sneeden',
            'txtCardNo:textBox': '4111111111111111',
            'dtCCStartDate': '',
            'mytExpirationDate': '8/2011',
            'txtIssueNumber:textBox': '',
            'txtcvcNumber:textBox': '',
            'ctlAddress:ddlCountry:dropDownList': 'US',
            'ctlAddress:txtAddress1:textBox': '9616 Armour Dr',
            'ctlAddress:txtAddress2:textBox': '',
            'ctlAddress:txtAddress3:textBox': '',
            'ctlAddress:txtCity:textBox': 'Fort Worth',
            'ctlAddress:txtState:textBox': '',
            'ctlAddress:ddlState:dropDownList': 'TX',
            'ctlAddress:txtPostalCode:textBox': '76244-6085',
            'ctlAddress:txtPhoneNumber:textBox': '',
            'txtBankAgree:textBox': '',
            'ddlHistoryYear': '2010',
            'hid1': '',
            'hid2': '2010',
            'hid3': ''
        }

        # encode the data and create the request
        data = urllib.urlencode(values)
        request = urllib2.request(url, data)

        # make the request and read the response
        response = urllib2.urlopen(request)

        # assert the page loaded
        assert (
            resp.code == 200
        ), "Bad HTTP Response. Expecting a 200.  Received a " + str(resp.code)
예제 #29
0
import urllib2
import pytz
import pandas as pd

from bs4 import BeautifulSoup
from datetime import datetime
from pandas.io.data import DataReader

SITE = "http://en.wikipedia.org/wiki/List_of_S%26P_500_companies"
START = datetime(1900, 1, 1, 0, 0, 0, 0, pytz.utc)
END = datetime.today().utcnow()

html = urllib2.request()


def add_two_ints(int1, int2):
  return int1+int2

add_two_ints(1, 5)


예제 #30
0
#encoding:utf-8

import urllib2
from bs4 import BeautifulSoup

url = 'http://reeoo.com'
request = urllib2.request(url)
response = urllib2.urlopen(request, timeout=20)
content = response.read()
soup = BeautifulSoup(content, 'html.parser')
예제 #31
0
import urllib2

request = urllib2.request("http://www.baidu.com")
response = urllib2.urlopen(request)
print (response.read())