Exemplo n.º 1
0
class MetricServiceRequest(object):
    """
    A convience class for fetching metrics from CentralQuery that can
    be used by twisted daemons.
    """
    # use a shared cookie jar so all Metric requests can share the same session
    cookieJar = CookieJar()

    def __init__(self, userAgent):
        self._aggMapping = AGGREGATION_MAPPING
        urlstart = getGlobalConfiguration().get('metric-url',
                                                'http://localhost:8080')
        self._metric_url = '%s/%s' % (urlstart, METRIC_URL_PATH)
        self._metric_url_v2 = '%s/%s' % (urlstart, WILDCARD_URL_PATH)
        creds = IAuthorizationTool(None).extractGlobalConfCredentials()
        auth = base64.b64encode('{login}:{password}'.format(**creds))
        self.agent = CookieAgent(
            Agent(reactor, pool=getPool(), connectTimeout=30), self.cookieJar)
        self._headers = Headers({
            'Authorization': ['basic %s' % auth],
            'content-type': ['application/json'],
            'User-Agent': ['Zenoss: %s' % userAgent]
        })
        self.onMetricsFetched = None

    def getMetrics(self,
                   uuid,
                   dpNames,
                   cf='AVERAGE',
                   rate=False,
                   downsample="1h-avg",
                   start=None,
                   end=None,
                   deviceId=None,
                   returnSet="EXACT"):
        metrics = []
        if isinstance(dpNames, basestring):
            dpNames = [dpNames]
        for dpName in dpNames:
            # TODO find callers
            name = ensure_prefix(deviceId, dpName)
            metrics.append(
                dict(metric=name,
                     aggregator=self._aggMapping.get(cf.lower(), cf.lower()),
                     rpn='',
                     rate=rate,
                     format='%.2lf',
                     tags=dict(contextUUID=[uuid]),
                     name='%s_%s' % (uuid, dpName)))

        request = dict(returnset=returnSet,
                       start=start,
                       end=end,
                       downsample=downsample,
                       metrics=metrics)
        body = FileBodyProducer(StringIO(json.dumps(request)))
        d = self.agent.request('POST', self._metric_url, self._headers, body)
        return d

    def fetchMetrics(self,
                     metrics,
                     start="1h-ago",
                     end=None,
                     returnSet="EXACT"):
        """
        Uses the CentralQuery V2 api to fetch metrics. Mainly that means wild cards can be used to fetch all metrics
        with the same name grouped by a tag. Usually used to retrieve a specific metric for all component on a device
        :param metrics: dictionary with required keys of metricName, tags and optional rpn defaults to empty,
        cf defatults to average, rate defaults to false, downsample defaults to 5m-avg
        :param start:
        :param end:
        :param returnSet:
        :return: deferred
        """
        metricQueries = []
        for metric in metrics:
            log.info("fetchMetrics metrics %s", metric)
            cf = metric.get('cf', 'average')
            rpn = metric.get('rpn', '')
            rate = metric.get('rate', False)
            tags = metric['tags']
            downsample = metric.get('downsample', '5m-avg')
            metricName = metric['metricName']
            metricQueries.append(
                dict(metric=metricName,
                     downsample=downsample,
                     aggregator=self._aggMapping.get(cf.lower(), cf.lower()),
                     rpn=rpn,
                     rate=rate,
                     format='%.2lf',
                     tags=tags,
                     name=metricName))

        request = dict(returnset=returnSet,
                       start=start,
                       end=end,
                       downsample=downsample,
                       queries=metricQueries)
        body = FileBodyProducer(StringIO(json.dumps(request)))
        log.info("POST %s %s %s", self._metric_url_v2, self._headers,
                 json.dumps(request))
        d = self.agent.request('POST', self._metric_url_v2, self._headers,
                               body)
        return d
Exemplo n.º 2
0
                                          count=200)
 if len(results) > 0:
     for tweet in results:
         if len(tweet['entities']['urls']) >= 1:
             url = tweet['entities']['urls'][0]['url']
             txt = spliter.splitCleanTweet2Sents(tweet['text'])
             original_sentence = ''
             for sentence in txt:
                 original_sentence = original_sentence + sentence + ' '
             if len(original_sentence.split()) <= 6:
                 continue
             #print tweet['text']
             #print url
             try:
                 time.sleep(5)
                 cj = CookieJar()
                 opener = urllib2.build_opener(
                     urllib2.HTTPCookieProcessor(cj))
                 result = opener.open(url)
                 real_url = result.geturl()
                 if real_url.find('.html') != -1:
                     real_url = real_url[:real_url.find('.html') + 5]
                 #elif real_url.find('&')!=-1:
                 #	real_url=real_url[:real_url.find('&')]
                 #elif real_url.find('?')!=-1:
                 #	real_url=real_url[:real_url.find('?')]
                 #print real_url
                 if real_url in real_urls:
                     continue
                 query = twitter.search.tweets(q=real_url,
                                               lang="en",
Exemplo n.º 3
0
 def request(self):
     self.response_text = self._DEFAULT_RESPONSE
     if not self.exceptions:
         self.exceptions = Exception
     if not self.wait:
         self.wait = time.sleep
     if not self.headers:
         self.headers = {}
     
     for i in xrange(self.tries):
         self.current_tries = i + 1
         if self.before_request:
             self.before_request(self)
         if self.cancel_operation and self.cancel_operation():
             break
         request_report = 'Request URL: ' + self.get_url_for_report(self.url)
         request_report += '\nRequest data: ' + Utils.str(self.data)
         request_report += '\nRequest headers: ' + Utils.str(self.get_headers_for_report(self.headers))
         response_report = '<response_not_set>'
         response = None
         rex = None
         try:
             Logger.debug(request_report)
             req = urllib2.Request(self.url, self.data, self.headers)
             response = urllib2.urlopen(req)
             self.response_code = response.getcode()
             self.response_info = response.info()
             self.response_url = response.geturl()
             cookiejar = CookieJar()
             cookiejar._policy._now = cookiejar._now = int(time.time())
             self.response_cookies = cookiejar.make_cookies(response, req)
             if self.read_content:
                 self.response_text = response.read()
             content_length = self.response_info.getheader('content-length', -1)
             response_report = '\nResponse Headers:\n%s' % Utils.str(self.response_info)
             response_report += '\nResponse (%d) content-length=%s, len=<%s>:\n%s' % (self.response_code, content_length, len(self.response_text), self.response_text)
             self.success = True
             break
         except self.exceptions as e:
             Logger.debug('Exception...')
             root_exception = e
             response_report = '\nResponse <Exception>: ' 
             if isinstance(e, urllib2.HTTPError):
                 self.response_text = Utils.str(e.read())
                 response_report += self.response_text
             else:
                 response_report += Utils.str(e)
             rex = RequestException(Utils.str(e), root_exception, request_report, response_report)
         finally:
             Logger.debug(response_report)
             if response:
                 response.close()
         if rex:
             if self.on_exception:
                 Logger.debug('calling self.on_exception...')
                 self.on_exception(self, rex)
             if self.cancel_operation and self.cancel_operation():
                 break
             Logger.debug('current_tries: ' + str(self.current_tries) + ' maximum tries: ' + str(self.tries) + ' i: ' + str(i))
             if self.current_tries == self.tries:
                 Logger.debug('max retries reached')
                 if self.on_failure:
                     self.on_failure(self)
                 if self.on_complete:
                     self.on_complete(self)
                 Logger.debug('Raising exception...')
                 raise rex
             current_time = time.time()
             max_waiting_time = current_time + self.current_delay
             Logger.debug('current_delay: ' + str(self.current_delay) + ' seconds. Waiting...')
             while (not self.cancel_operation or not self.cancel_operation()) and max_waiting_time > current_time:
                 remaining = round(max_waiting_time-current_time)
                 if self.waiting_retry:
                     Logger.debug('calling self.waiting_retry...')
                     self.waiting_retry(self, remaining)
                 self.wait(1)
                 current_time = time.time()
             Logger.debug('Done waiting.')
             self.current_delay *= self.backoff
         
     if self.success and self.on_success:
         self.on_success(self)
     if self.on_complete:
         self.on_complete(self)
     return self.response_text
Exemplo n.º 4
0
 def __init__(self, uuid, token, cert_file):
     agent = Agent(uuid, token, cert_file)
     jar = CookieJar()
     self._agent = CookieAgent(agent, jar)
     super(self.__class__, self).__init__(self._agent)
Exemplo n.º 5
0
def test_cookiejar():
    with wsgiserver(set_cookie()):
        useragent = UserAgent(cookiejar=CookieJar())
        assert b"" == useragent.urlopen('http://127.0.0.1:54323/').read()
Exemplo n.º 6
0
#coding:utf8

import urllib2
import random
import json
import re
from cookielib import CookieJar
from pyquery import PyQuery as pq

# cookiejar to help deal with cookie
cj_iut = CookieJar()
opener_iut = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj_iut))

# result file
website_result_file = '../../result/iresearch/iut_service_traffic.txt'
domain_result_file = '../../result/iresearch/iut_service_domain_traffic.txt'

# regular expressions to analyse response
p_month = re.compile(r'dtListM\[\d+]\[3]="(\d{4}-\d{2})"')
p_category_traffic = re.compile(r'iut_data = (\[.*]);')
p_category_title = re.compile(r'iut_title =(\[[\S\s]*?]);')
p_main_category = re.compile(r'selected >(.*?)</option>')
p_page = re.compile(r'</select> /(\d*)(?=</td>)')
p_category = re.compile(r'<option value="(\d*)" \s*>.*</option>')
p_login = re.compile(r'您目前尚未登录或者登录已超时')

# category and date to scrape
month_period = []
categories = []

# to keep track of service traffic running state
Exemplo n.º 7
0
 def __init__(self, email, password):
     self.email = email
     self.password = password
     self.cj = CookieJar()
Exemplo n.º 8
0
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
"""

from os import name
from cookielib import CookieJar

from lib.settings import W, BW, R, G, O, B, P, C, GR

cookie_handler = CookieJar()


def color(text, color=GR):
    """
    Sets the text to a given color if not running under Windows.
    """

    if name == "nt":
        return text
    else:
        return "%s%s%s" % (color, text, W)
Exemplo n.º 9
0
def send_request(url, method, data, args, params, headers, cookies, timeout,
                 is_json, verify_cert):
    """
	Forge and send HTTP request.
	"""
    ## Parse url args
    for p in args:
        url = url.replace(':' + p, str(args[p]))

    try:
        if data:
            if is_json:
                headers['Content-Type'] = 'application/json'
                data = json.dumps(data)

            request = requests.Request(method.upper(),
                                       url,
                                       data=data,
                                       params=params,
                                       headers=headers,
                                       cookies=cookies)
        else:
            request = requests.Request(method.upper(),
                                       url,
                                       params=params,
                                       headers=headers,
                                       cookies=cookies)

        ## Prepare and send HTTP request.
        session = requests.Session()
        session.verify = verify_cert
        r = session.send(request.prepare(), timeout=timeout)
        session.close()

    except requests.exceptions.Timeout:
        return {
            'data': {},
            'cookies': CookieJar(),
            'content_type': '',
            'status': 0,
            'is_json': False,
            'timeout': True
        }

    try:
        content_type = r.headers.get('Content-Type', 'application/json')
        response = r.json()
        isjson = True

    except json.decoder.JSONDecodeError:
        content_type = r.headers.get('Content-Type', 'text/html')
        response = r.text
        isjson = False

    return {
        'data': response,
        'cookies': r.cookies,
        'content_type': content_type,
        'status': r.status_code,
        'is_json': isjson,
        'timeout': False
    }
Exemplo n.º 10
0
RATING_PROPER = 1
RATING_NUKED = 2

CODEC_UNKNOWN = 0
CODEC_XVID = 1
CODEC_H264 = 2
CODEC_MP3 = 3
CODEC_AAC = 4
CODEC_AC3 = 5
CODEC_DTS = 6
CODEC_DTSHD = 7
CODEC_DTSHDMA = 8

USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.66 Safari/537.36"

COOKIE_JAR = CookieJar()
urllib2.install_opener(urllib2.build_opener(urllib2.HTTPCookieProcessor(COOKIE_JAR)))


class closing(object):
    def __init__(self, thing):
        self.thing = thing

    def __enter__(self):
        return self.thing

    def __exit__(self, *exc_info):
        self.thing.close()


def parse_json(data):
Exemplo n.º 11
0
 def init(self):
     self._client = urllib2.build_opener(
         urllib2.HTTPCookieProcessor(CookieJar()))
     self.url = self.resolve_file_url(self._resolver_class, self._url)
     if not self.url:
         raise HTTPLoader.Error('Url was not resolved to file link')
Exemplo n.º 12
0
def wsgi_app(monkeypatch, recreate_openapi_spec):
    wsgi_callable = make_app()
    cookies = CookieJar()
    return WebTestAppForCMK(wsgi_callable, cookiejar=cookies)
Exemplo n.º 13
0
 def module_run(self, domains):
     base_url = 'https://www.bing.com/search'
     cnt = 0
     new = 0
     for domain in domains:
         self.heading(domain, level=0)
         base_query = 'domain:' + domain
         pattern = '"b_algo"><h2><a href="(?:\w*://)*(\S+?)\.%s[^"]*"' % (
             domain)
         subs = []
         # control variables
         new = True
         page = 0
         nr = 50
         cookiejar = CookieJar()
         cookiejar.set_cookie(
             self.make_cookie('SRCHHPGUSR',
                              'NEWWND=0&NRSLT=%d&SRCHLANG=&AS=1' % (nr),
                              '.bing.com'))
         # execute search engine queries and scrape results storing subdomains in a list
         # loop until no new subdomains are found
         while new == True:
             content = None
             query = ''
             # build query based on results of previous results
             for sub in subs:
                 query += ' -domain:%s.%s' % (sub, domain)
             full_query = base_query + query
             url = '%s?first=%d&q=%s' % (base_url, (page * nr),
                                         urllib.quote_plus(full_query))
             # bing errors out at > 2059 characters not including the protocol
             if len(url) > 2066: url = url[:2066]
             self.verbose('URL: %s' % (url))
             # send query to search engine
             resp = self.request(url, cookiejar=cookiejar)
             if resp.status_code != 200:
                 self.alert(
                     'Bing has encountered an error. Please submit an issue for debugging.'
                 )
                 break
             content = resp.text
             sites = re.findall(pattern, content)
             # create a unique list
             sites = list(set(sites))
             new = False
             # add subdomain to list if not already exists
             for site in sites:
                 if site not in subs:
                     subs.append(site)
                     new = True
                     host = '%s.%s' % (site, domain)
                     self.output('%s' % (host))
                     new += self.add_hosts(host)
             if not new:
                 # exit if all subdomains have been found
                 if not '>Next</a>' in content:
                     break
                 else:
                     page += 1
                     self.verbose(
                         'No New Subdomains Found on the Current Page. Jumping to Result %d.'
                         % ((page * nr) + 1))
                     new = True
             # sleep script to avoid lock-out
             self.verbose('Sleeping to avoid lockout...')
             time.sleep(random.randint(5, 15))
         cnt += len(subs)
     self.summarize(new, cnt)
Exemplo n.º 14
0
 def reset(self):
     self._cookie_jar = CookieJar()
     self._opener = build_opener(NoRedirectionProcessor,
                                 HTTPCookieProcessor(self._cookie_jar))
Exemplo n.º 15
0
        return False
    else:
        return True


# Import user created settings. This will override built-in settings if defined.
if module_exists("config"):
    import config
else:
    print(
        "Please set up the config.py file. Copy 'sample.config.py' to 'config.py' and set up options"
    )
    sys.exit(2)

# Initialize cookie jar and session
cookies = CookieJar()
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookies))

print("Load login page")

### Load the login page. This will initialize some cookies. Save them.
login_page = opener.open(
    'https://disneyworld.disney.go.com/login/?returnUrl=https://mydisneyphotopass.disney.go.com/'
)
# cookies are automatically saved.

# grab the unique CSRF key. parse it.
csrf_key = re.search('id="pep_csrf" value=".*"', login_page.read())
csrf_key = csrf_key.group(0)
csrf_key = string.split(csrf_key, "\"")  # split on double quote. easiest way.
csrf_key = csrf_key[
Exemplo n.º 16
0
    def wang(self, value):
        try:
            print '\n***** ' + self.baseUrl + ' *****'

            # 中断打开链接
            if self.isClose:
                return

            # proxy = urllib2.ProxyHandler({'http': '' + ip + ''})
            # opener = urllib2.build_opener(proxy)

            cj = CookieJar()
            cookieHandle = urllib2.HTTPCookieProcessor(cj)
            opener = urllib2.build_opener(cookieHandle)

            opener.addheaders = [
                #     ('Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'),
                #     ('Accept-Encoding', 'gzip, deflate, sdch'),
                #     ('Accept-Language', 'zh-CN,zh;q=0.8,en;q=0.6'),
                #     ('Cache-Control', 'max-age=0'),
                #     ('Connection', 'keep-alive'),
                #     ('DNT', '1'),
                #     ('Upgrade-Insecure-Requests', '1'),
                #     ('Host', 'www.laifudao.com'),
                #     ('User-Agent', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.143 Safari/537.36')
            ]

            url = self.baseUrl + str(value) + self.baseUrl2
            print url
            o = opener.open(url, timeout=10)
            d = o.read()

            pattern = re.compile(
                '<header class="post-header">(.*?)</a></h1>.*?title="(.*?)".*?<time>(.*?)</time>.*?<span class="cats">.*?>(.*?)</a>.*?"article-content">(.*?)</section>',
                re.S)
            results = re.findall(pattern, d)
            for i in results:

                # 中断打开链接
                if self.isClose:
                    return

                title = self.tool.replace(i[0])
                author = self.tool.replace(i[1])
                online = self.tool.replace(i[2])
                type = self.tool.replace(i[3])
                content = self.tool.replace(i[4])

                # 比较数据库最新一条数据,如果相同则跳出
                for old in self.oldDatas:
                    # 数据库查询出来的是unicode编码,要转成utf-8
                    o = old[0].encode('utf-8')
                    if title == o:
                        # 通知中断
                        self.isClose = True
                        return

                # 插入新数据
                time.sleep(0.1)
                sql = "insert into `lf_wangwen` (`pid`, `title`, `content`, `online_time`, `author`, `type`, `create_by`, `update_by`, `create_time`, `update_time`, `status`) values (uuid(), '" + title + "','" + content + "','" + online + "','" + author + "','" + type + "','admin','admin',now(),now(),0);"
                print sql
                self.db.insertDB(sql)

        except urllib2.HTTPError, e:
            print 'HTTPError: ' + str(e.code)
            return False
Exemplo n.º 17
0
def gesdisc_merra_sync(DIRECTORY,
                       YEARS,
                       USER='',
                       PASSWORD='',
                       LOG=False,
                       LIST=False,
                       MODE=None,
                       CLOBBER=False):
    #-- recursively create directory if non-existent
    os.makedirs(DIRECTORY, MODE) if not os.path.exists(DIRECTORY) else None

    #-- create log file with list of synchronized files (or print to terminal)
    if LOG:
        #-- format: NASA_GESDISC_MERRA2_sync_2002-04-01.log
        today = time.strftime('%Y-%m-%d', time.localtime())
        LOGFILE = 'NASA_GESDISC_MERRA2_sync_{0}.log'.format(today)
        fid = open(os.path.join(DIRECTORY, LOGFILE), 'w')
        print('NASA MERRA-2 Sync Log ({0})'.format(today), file=fid)
    else:
        #-- standard output (terminal output)
        fid = sys.stdout

    #-- https://docs.python.org/3/howto/urllib2.html#id5
    #-- create a password manager
    password_mgr = urllib2.HTTPPasswordMgrWithDefaultRealm()
    #-- Add the username and password for NASA Earthdata Login system
    password_mgr.add_password(None, 'https://urs.earthdata.nasa.gov', USER,
                              PASSWORD)
    #-- compile HTML parser for lxml
    parser = lxml.etree.HTMLParser()
    #-- Create cookie jar for storing cookies. This is used to store and return
    #-- the session cookie given to use by the data server (otherwise will just
    #-- keep sending us back to Earthdata Login to authenticate).
    cookie_jar = CookieJar()
    #-- create "opener" (OpenerDirector instance)
    opener = urllib2.build_opener(
        urllib2.HTTPBasicAuthHandler(password_mgr),
        urllib2.HTTPSHandler(context=ssl.SSLContext()),
        urllib2.HTTPCookieProcessor(cookie_jar))
    #-- Now all calls to urllib2.urlopen use our opener.
    urllib2.install_opener(opener)
    #-- All calls to urllib2.urlopen will now use handler
    #-- Make sure not to include the protocol in with the URL, or
    #-- HTTPPasswordMgrWithDefaultRealm will be confused.

    #-- MERRA-2 data remote base directory
    HOST = posixpath.join('http://goldsmr4.gesdisc.eosdis.nasa.gov', 'data',
                          'MERRA2_MONTHLY')

    #-- compile regular expression operator for years to sync
    regex_pattern = '|'.join('{0:d}'.format(y) for y in YEARS)
    R1 = re.compile('({0})'.format(regex_pattern), re.VERBOSE)
    #-- compile regular expression operator to find MERRA2 files
    R2 = re.compile('MERRA2_(.*?).nc4(.xml)?', re.VERBOSE)

    #-- for each MERRA-2 product to sync
    for PRODUCT in ['M2TMNXINT.5.12.4', 'M2TMNXGLC.5.12.4']:
        print('PRODUCT={0}'.format(PRODUCT), file=fid)
        #-- open connection with GESDISC server at remote directory
        req = urllib2.Request(url=posixpath.join(HOST, PRODUCT))
        #-- read and parse request for subdirectories (find column names)
        tree = lxml.etree.parse(urllib2.urlopen(req), parser)
        colnames = tree.xpath('//tr/td[not(@*)]//a/@href')
        #-- find remote yearly directories for PRODUCT
        remote_sub = [sd for sd in colnames if R1.match(sd)]
        for Y in remote_sub:
            #-- check if local directory exists and recursively create if not
            if (not os.access(os.path.join(DIRECTORY, PRODUCT, Y), os.F_OK)):
                os.makedirs(os.path.join(DIRECTORY, PRODUCT, Y), MODE)
            #-- open connection with GESDISC server at remote directory
            req = urllib2.Request(url=posixpath.join(HOST, PRODUCT, Y))
            #-- read and parse request for files (find names and modified dates)
            tree = lxml.etree.parse(urllib2.urlopen(req), parser)
            colnames = tree.xpath('//tr/td[not(@*)]//a/@href')
            collastmod = tree.xpath('//tr/td[@align="right"][1]/text()')
            #-- find remote files for PRODUCT and YEAR
            remote_file_lines = [
                i for i, f in enumerate(colnames) if R2.match(f)
            ]
            for i in remote_file_lines:
                #-- local and remote versions of the file
                FILE = colnames[i]
                local_file = os.path.join(DIRECTORY, PRODUCT, Y, FILE)
                remote_file = posixpath.join(HOST, PRODUCT, Y, FILE)
                #-- get last modified date of file and convert into unix time
                file_date = time.strptime(collastmod[i].rstrip(),
                                          '%d-%b-%Y %H:%M')
                remote_mtime = calendar.timegm(file_date)
                #-- copy file from remote directory checking modification times
                http_pull_file(fid, remote_file, remote_mtime, local_file,
                               LIST, CLOBBER, MODE)
            #-- close request
            req = None

    #-- close log file and set permissions level to MODE
    if LOG:
        fid.close()
        os.chmod(os.path.join(DIRECTORY, LOGFILE), MODE)
Exemplo n.º 18
0
def nsidc_icesat2_sync(ddir,
                       PRODUCTS,
                       RELEASE,
                       VERSIONS,
                       GRANULES,
                       TRACKS,
                       USER='',
                       PASSWORD='',
                       YEARS=None,
                       SUBDIRECTORY=None,
                       AUXILIARY=False,
                       FLATTEN=False,
                       LOG=False,
                       LIST=False,
                       MODE=None,
                       CLOBBER=False):

    #-- check if directory exists and recursively create if not
    os.makedirs(ddir, MODE) if not os.path.exists(ddir) else None

    #-- output of synchronized files
    if LOG:
        #-- format: NSIDC_IceBridge_sync_2002-04-01.log
        today = time.strftime('%Y-%m-%d', time.localtime())
        LOGFILE = 'NSIDC_IceSat-2_sync_{0}.log'.format(today)
        fid = open(os.path.join(ddir, LOGFILE), 'w')
        print('ICESat-2 Data Sync Log ({0})'.format(today), file=fid)
    else:
        #-- standard output (terminal output)
        fid = sys.stdout

    #-- https://docs.python.org/3/howto/urllib2.html#id5
    #-- create a password manager
    password_mgr = urllib2.HTTPPasswordMgrWithDefaultRealm()
    #-- Add the username and password for NASA Earthdata Login system
    password_mgr.add_password(None, 'https://urs.earthdata.nasa.gov', USER,
                              PASSWORD)
    #-- Encode username/password for request authorization headers
    base64_string = base64.b64encode('{0}:{1}'.format(USER, PASSWORD).encode())
    #-- compile HTML parser for lxml
    parser = lxml.etree.HTMLParser()
    #-- Create cookie jar for storing cookies. This is used to store and return
    #-- the session cookie given to use by the data server (otherwise will just
    #-- keep sending us back to Earthdata Login to authenticate).
    cookie_jar = CookieJar()
    #-- create "opener" (OpenerDirector instance)
    opener = urllib2.build_opener(
        urllib2.HTTPBasicAuthHandler(password_mgr),
        urllib2.HTTPSHandler(context=ssl.SSLContext()),
        urllib2.HTTPCookieProcessor(cookie_jar))
    #-- add Authorization header to opener
    authorization_header = "Basic {0}".format(base64_string.decode())
    opener.addheaders = [("Authorization", authorization_header)]
    #-- Now all calls to urllib2.urlopen use our opener.
    urllib2.install_opener(opener)
    #-- All calls to urllib2.urlopen will now use handler
    #-- Make sure not to include the protocol in with the URL, or
    #-- HTTPPasswordMgrWithDefaultRealm will be confused.

    #-- remote https server for ICESat-2 Data
    HOST = 'https://n5eil01u.ecs.nsidc.org'
    #-- regular expression operator for finding files of a particular granule
    #-- find ICESat-2 HDF5 files in the subdirectory for product and release
    regex_track = '|'.join(['{0:04d}'.format(T) for T in TRACKS])
    regex_granule = '|'.join(['{0:02d}'.format(G) for G in GRANULES])
    regex_version = '|'.join(['{0:02d}'.format(V) for V in VERSIONS])
    regex_suffix = '(.*?)' if AUXILIARY else '(h5)'
    remote_regex_pattern = (
        '{0}(-\d{{2}})?_(\d{{4}})(\d{{2}})(\d{{2}})(\d{{2}})'
        '(\d{{2}})(\d{{2}})_({1})(\d{{2}})({2})_({3})_({4})(.*?).{5}$')

    #-- regular expression operator for finding subdirectories
    if SUBDIRECTORY:
        #-- Sync particular subdirectories for product
        R2 = re.compile('(' + '|'.join(SUBDIRECTORY) + ')', re.VERBOSE)
    elif YEARS:
        #-- Sync particular years for product
        regex_pattern = '|'.join('{0:d}'.format(y) for y in YEARS)
        R2 = re.compile('({0}).(\d+).(\d+)'.format(regex_pattern), re.VERBOSE)
    else:
        #-- Sync all available subdirectories for product
        R2 = re.compile('(\d+).(\d+).(\d+)', re.VERBOSE)

    #-- for each icesat2 product listed
    for p in PRODUCTS:
        print('PRODUCT={0}'.format(p), file=fid)
        #-- get directories from remote directory (* splat operator)
        remote_directories = ['ATLAS', '{0}.{1}'.format(p, RELEASE)]
        d = posixpath.join(HOST, *remote_directories)
        req = urllib2.Request(url=d)
        #-- compile regular expression operator for product, release and version
        args = (p, regex_track, regex_granule, RELEASE, regex_version,
                regex_suffix)
        R1 = re.compile(remote_regex_pattern.format(*args), re.VERBOSE)
        #-- read and parse request for subdirectories (find column names)
        tree = lxml.etree.parse(urllib2.urlopen(req), parser)
        colnames = tree.xpath('//td[@class="indexcolname"]//a/@href')
        remote_sub = [sd for sd in colnames if R2.match(sd)]
        #-- for each remote subdirectory
        for sd in remote_sub:
            #-- local directory for product and subdirectory
            if FLATTEN:
                local_dir = os.path.expanduser(ddir)
            else:
                local_dir = os.path.join(ddir, '{0}.{1}'.format(p, RELEASE),
                                         sd)
            #-- check if data directory exists and recursively create if not
            os.makedirs(local_dir,
                        MODE) if not os.path.exists(local_dir) else None
            #-- find ICESat-2 data files
            req = urllib2.Request(url=posixpath.join(d, sd))
            #-- read and parse request for remote files (columns and dates)
            tree = lxml.etree.parse(urllib2.urlopen(req), parser)
            colnames = tree.xpath('//td[@class="indexcolname"]//a/@href')
            collastmod = tree.xpath('//td[@class="indexcollastmod"]/text()')
            #-- find matching files (for granule, release, version, track)
            remote_file_lines = [
                i for i, f in enumerate(colnames) if R1.match(f)
            ]
            #-- sync each ICESat-2 data file
            for i in remote_file_lines:
                #-- remote and local versions of the file
                remote_file = posixpath.join(d, sd, colnames[i])
                local_file = os.path.join(local_dir, colnames[i])
                #-- get last modified date and convert into unix time
                LMD = time.strptime(collastmod[i].rstrip(), '%Y-%m-%d %H:%M')
                remote_mtime = calendar.timegm(LMD)
                #-- sync ICESat-2 files with NSIDC server
                http_pull_file(fid, remote_file, remote_mtime, local_file,
                               LIST, CLOBBER, MODE)
        #-- close request
        req = None

    #-- close log file and set permissions level to MODE
    if LOG:
        fid.close()
        os.chmod(os.path.join(ddir, LOGFILE), MODE)
Exemplo n.º 19
0
def play_link(chn, src):
    item = xbmcgui.ListItem(chn)
    d_progress = xbmcgui.DialogProgress()
    d_progress.create("", addon.getLocalizedString(30009))

    cj = CookieJar()
    opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))

    #login if required
    if src in data['sources'] and 'login' in data['sources'][src] and data[
            'sources'][src]['login'] == "true":
        url = data['sources'][src]['url']
        values = data['sources'][src]['post']
        post_data = urllib.urlencode(values)
        response = opener.open(url, post_data)
        the_page = response.read()

    #m3u8 url from fpt
    if data['channels'][chn]['src']['playpath'] == "m3u8_fpt":
        url = 'http://fptplay.net/show/getlinklivetv'
        page_id = data['channels'][chn]['src']['page_id']
        page_q = data['channels'][chn]['src']['page_q']
        values = {
            'id': page_id,
            'type': 'newchannel',
            'quality': page_q,
            'mobile': 'web'
        }
        post_data = urllib.urlencode(values)
        header = {
            'Content-Type': 'application/x-www-form-urlencoded',
            'Host': 'fptplay.net',
            'Origin': 'http://fptplay.net',
            'X-Requested-With': 'XMLHttpRequest',
            'Referer': 'http://fptplay.net/livetv'
        }
        req = urllib2.Request(url, post_data, header)
        response = urllib2.urlopen(req)
        the_page = response.read()
        the_data = json.loads(the_page)
        full_url = the_data['stream']

    #m3u8 url from tvnet
    elif data['channels'][chn]['src']['playpath'] == "m3u8_tvnet":
        url = 'http://118.107.85.21:1337/get-stream.json?p=smil:' + data[
            'channels'][chn]['src']['page_id'] + '.smil&t=l'
        stringA = opener.open(url).read().decode('utf-8')
        stringB = '"url": "'
        stringC = '"'
        full_url_BC = re.search(stringB + "(.*?)" + re.escape(stringC),
                                stringA).group(1)
        full_url = full_url_BC
        print full_url

    #m3u8 url using before & after marker
    elif data['channels'][chn]['src']['playpath'] == "m3u8_bau":
        if data['channels'][chn]['src'].get('header'):
            header = (data['channels'][chn]['src']['header'])
        else:
            header = None
        if data['channels'][chn]['src'].get('post'):
            post = data['channels'][chn]['src']['post']
        else:
            post = None

        if header == None:
            req = urllib2.Request(data['channels'][chn]['src']['page_url'],
                                  post)
        else:
            req = urllib2.Request(data['channels'][chn]['src']['page_url'],
                                  post, header)

        response = urllib2.urlopen(req)
        #print(response)
        the_page = response.read()
        #print(the_page)

        stringA = the_page
        stringB = (data['channels'][chn]['src']['url_before'])
        stringC = (data['channels'][chn]['src']['url_after'])
        full_url = re.search(
            re.escape(stringB) + "(.*?)" + re.escape(stringC),
            stringA).group(1)
        print(full_url)

    #traditional rtmp(e)
    else:
        videoUrl = data['sources'][src]['url']
        playpath = data['channels'][chn]['src']['playpath']
        if (playpath != ''):
            videoUrl = videoUrl + "/" + playpath

        url_protocol = videoUrl.split(':')[0]
        if (url_protocol == "http"):
            full_url = videoUrl
        elif (url_protocol in ["rtmp", "rtmpe"]):
            swfUrl = data['sources'][src]['swfurl']
            pageUrl = data['sources'][src]['pageurl']
            if (data['channels'][chn]['src']['referer'] != ''):
                pageUrl = pageUrl + "/" + data['channels'][chn]['src'][
                    'referer']
            flashVer = 'LNX_11,2,202,233'
            token = data['sources'][src]['token']
            app = data['sources'][src]['app']

        full_url = videoUrl + ' swfVfy=1 live=1 token=' + token + ' playpath=' + playpath + ' flashVer=' + flashVer + ' pageUrl=' + pageUrl + ' tcUrl=' + videoUrl + ' swfUrl=' + swfUrl

    d_progress.close()
    xbmc.Player().play(full_url)
    return
Exemplo n.º 20
0
 def __init__(self):
     super(WebService, self).__init__()
     self._cookie = CookieJar()
     self._opener = urllib2.build_opener(
         urllib2.HTTPCookieProcessor(self._cookie))
     self.query_interval = 1.0
Exemplo n.º 21
0
def downNASAEarthdata(productname, **kwargs):
    from cookielib import CookieJar
    TRMM_DAILY = False
    TRMM_MONTH = False
    if StringMatch(productname, "TRMM_3B42_Daily"):
        TRMM_DAILY = True
    elif StringMatch(productname, "TRMM_3B43"):
        TRMM_MONTH = True
    usrname = ''
    pwd = ''
    startdate = datetime.datetime.today()
    enddate = datetime.datetime.today()
    outpath = ''
    # try to get the required key-values, or throw exception
    try:
        usrname = kwargs["usrname"]
        pwd = kwargs["pwd"]
        startdate = kwargs["startdate"]
        enddate = kwargs["enddate"]
        outpath = kwargs["workspace"]
    except KeyError:
        print ("downNASAEarthdata function must have the usrname, pwd, startdate, and enddate args.")
    # try to get optional key-values
    logfile = None
    if 'log' in kwargs.keys():
        logfile = kwargs['log']
        delfile(logfile)

    authorizeUrl = "https://urs.earthdata.nasa.gov"
    # Create a password manager to deal with the 401 response that is returned from authorizeUrl
    password_manager = urllib2.HTTPPasswordMgrWithDefaultRealm()
    password_manager.add_password(None, authorizeUrl, usrname, pwd)
    # Create a cookie jar for storing cookies. This is used to store and return
    # the session cookie given to use by the data server (otherwise it will just
    # keep sending us back to Earthdata Login to authenticate).  Ideally, we
    # should use a file based cookie jar to preserve cookies between runs. This
    # will make it much more efficient.
    cookie_jar = CookieJar()
    # Install all the handlers.
    opener = urllib2.build_opener(
        urllib2.HTTPBasicAuthHandler(password_manager),
        # urllib2.HTTPHandler(debuglevel=1),    # Uncomment these two lines to see
        # urllib2.HTTPSHandler(debuglevel=1),   # details of the requests/responses
        urllib2.HTTPCookieProcessor(cookie_jar))
    urllib2.install_opener(opener)

    downUrl = "http://disc2.gesdisc.eosdis.nasa.gov/"
    if TRMM_DAILY:
        downUrl += "data//TRMM_L3/TRMM_3B42_Daily.7/%s/%s/3B42_Daily.%s.7.nc4"
    elif TRMM_MONTH:
        downUrl += "opendap/TRMM_L3/TRMM_3B43.7/%s/%s/3B43.%s.7.HDF.nc"
    tmpdate = startdate
    while tmpdate <= enddate:
        if TRMM_DAILY:
            tmpUrl = downUrl % (tmpdate.strftime('%Y'), tmpdate.strftime('%m'),
                                tmpdate.strftime('%Y%m%d'))
            deltadays = 1
        elif TRMM_MONTH:
            # get the first day of current month
            tmpdate = tmpdate.replace(day = 1)
            tmpUrl = downUrl % (tmpdate.strftime('%Y'), str(doy(tmpdate)).zfill(3),
                                tmpdate.strftime('%Y%m%d'))
            deltadays = GetDayNumber(tmpdate.year, tmpdate.month)
        saveName = tmpUrl.split("/")[-1]
        tmpfile = outpath + os.sep + saveName

        print2log("  -- %s, saved as %s\n" % (tmpdate.strftime('%Y%m%d'), saveName),
                  logfile = logfile)
        if isfileexist(tmpfile):
            tmpdate += datetime.timedelta(days = deltadays)
            continue
        while True:
            # Create and submit the request.
            try:
                print2log(tmpUrl, logfile = logfile)
                request = urllib2.Request(tmpUrl)
                response = urllib2.urlopen(request)
                chunk_read(response, savepath = tmpfile, report_hook = chunk_report)
                break
            except urllib2.HTTPError or urllib2.URLError, e:
                # print e.code
                if e.code == 404 and TRMM_MONTH:
                    tmpUrl = tmpUrl.replace('7.HDF.nc', '7A.HDF.nc')
                    continue
                else:
                    break
        tmpdate += datetime.timedelta(days = deltadays)
Exemplo n.º 22
0
import urllib2
from urllib2 import urlopen
import re
import cookielib
from cookielib import CookieJar
import time


ob = CookieJar()
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(ob)) 

opener.addheaders = [('User-agent','Mozilla/5.0')]


def main():
    try:
	page = "https://www.huffingtonpost.com/section/taste/feed"
	sourcecode = opener.open(page).read() #full sc 
	#print sourcecode
	try:
	    titles = re.findall(r'<title>(.*?)</title> ',sourcecode)
	     links = re.findall(r'<link>(.*?)</link>',sourcecode)
	    #for title in titles:
	    #	print title
	    for link in links:
	    	print 'Visiting',link
		linksource = opener.open(link).read()
		#print linksource
	      content = re.findall(r'<div>(.*?)</div>',linksource)
		for theContent in content:
			print theContent
Exemplo n.º 23
0
from idasix import QtCore

import urllib
import urllib2
from cookielib import CookieJar
from json import loads, dumps

import exceptions
from . import config, logger

# building opener
cookiejar = CookieJar()
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookiejar))

_threadpool = QtCore.QThreadPool()
_threadpool.setMaxThreadCount(config['network']['threadcount'])


class WorkerSignals(QtCore.QObject):
    result_dict = QtCore.pyqtSignal(dict)
    result_list = QtCore.pyqtSignal(list)
    result_str = QtCore.pyqtSignal(str)
    result_exception = QtCore.pyqtSignal(Exception)


class QueryWorker(QtCore.QRunnable):
    def __init__(self,
                 method,
                 url,
                 server=None,
                 token=None,
Exemplo n.º 24
0
def main():

    try:
        ssl._create_default_https_context = ssl._create_unverified_context

        opener = wdf_urllib.build_opener(
            wdf_urllib.HTTPCookieProcessor(CookieJar()))
        wdf_urllib.install_opener(opener)
    except:
        pass

    if not getUUID():
        print('获取uuid失败')
        return

    showQRImage()
    time.sleep(1)

    while waitForLogin() != '200':
        pass

    os.remove(QRImagePath)

    if not login():
        print('登录失败')
        return

    if not webwxinit():
        print('初始化失败')
        return

    MemberList = webwxgetcontact()

    MemberCount = len(MemberList)
    print('通讯录共%s位好友' % MemberCount)

    ChatRoomName = ''
    result = []
    d = {}
    for Member in MemberList:
        d[Member['UserName']] = (Member['NickName'].encode('utf-8'),
                                 Member['RemarkName'].encode('utf-8'))
    print('开始查找...')
    group_num = int(math.ceil(MemberCount / float(MAX_GROUP_NUM)))
    for i in range(0, group_num):
        UserNames = []
        for j in range(0, MAX_GROUP_NUM):
            if i * MAX_GROUP_NUM + j >= MemberCount:
                break
            Member = MemberList[i * MAX_GROUP_NUM + j]
            UserNames.append(Member['UserName'])

        # 新建群组/添加成员
        if ChatRoomName == '':
            (ChatRoomName, DeletedList) = createChatroom(UserNames)
        else:
            DeletedList = addMember(ChatRoomName, UserNames)

        DeletedCount = len(DeletedList)
        if DeletedCount > 0:
            result += DeletedList

        # 删除成员
        deleteMember(ChatRoomName, UserNames)

        # 进度条
        progress_len = MAX_PROGRESS_LEN
        progress = '-' * progress_len
        progress_str = '%s' % ''.join(
            map(lambda x: '#', progress[:(progress_len *
                                          (i + 1)) / group_num]))
        print(''.join([
            '[', progress_str,
            ''.join('-' * (progress_len - len(progress_str))), ']'
        ]))
        print('新发现你被%d人删除' % DeletedCount)
        for i in range(DeletedCount):
            if d[DeletedList[i]][1] != '':
                print(d[DeletedList[i]][0] + '(%s)' % d[DeletedList[i]][1])
            else:
                print(d[DeletedList[i]][0])

        if i != group_num - 1:
            print('正在继续查找,请耐心等待...')
            # 下一次进行接口调用需要等待的时间
            time.sleep(INTERFACE_CALLING_INTERVAL)
    # todo 删除群组

    print('\n结果汇总完毕,20s后可重试...')
    resultNames = []
    for r in result:
        if d[r][1] != '':
            resultNames.append(d[r][0] + '(%s)' % d[r][1])
        else:
            resultNames.append(d[r][0])

    print('---------- 被删除的好友列表(共%d人) ----------' % len(result))
    # 过滤emoji
    resultNames = map(lambda x: re.sub(r'<span.+/span>', '', x), resultNames)
    if len(resultNames):
        print('\n'.join(resultNames))
    else:
        print("无")
    print('---------------------------------------------')
Exemplo n.º 25
0
    def __init__(self, url, **kwargs):
        self.request = None
        self.response = None
        self.code = -1
        self.info = {}
        self.cookieJar = None
        self.reason = ''

        data = kwargs.get('data', None)
        if data:
            if isinstance(data, dict):
                data = urlencode(data)
            if not isinstance(data, basestring):
                raise ValueError('data must be string or dict')

        request_type = kwargs.get('type', 'POST')
        if data and isinstance(request_type,
                               basestring) and request_type.upper() != 'POST':
            url = '{}?{}'.format(url, data)
            data = None  # GET data must be None

        self.request = urlRequest(url, data)

        # referer
        referer = kwargs.get('referer', None)
        if referer:
            self.request.add_header('referer', referer)

        # user-agent
        user_agent = kwargs.get('user_agent', None)
        if user_agent:
            self.request.add_header('User-Agent', user_agent)

        # auth
        auth = kwargs.get('auth', None)
        if auth and isinstance(auth, dict) and 'usr' in auth:
            auth_string = base64.b64encode('{}:{}'.format(
                auth.get('usr', ''), auth.get('pwd', '')))
            self.request.add_header('Authorization',
                                    'Basic {}'.format(auth_string))

        # cookie
        cookie = kwargs.get('cookie', None)
        cj = None
        if cookie:
            if isinstance(cookie, CookieJar):
                cj = cookie
            elif isinstance(cookie, dict):
                result = []
                for k, v in cookie.items():
                    result.append('{}={}'.format(k, v))
                cookie = '; '.join(result)
            elif isinstance(cookie, Cookie.BaseCookie):
                cookie = cookie.output(header='')
            if isinstance(cookie, basestring):
                self.request.add_header('Cookie', cookie)
        if cj is None:
            cj = CookieJar()

        #! TODO: proxy

        # build opener
        debuglevel = 1 if kwargs.get('debug', False) else 0
        opener = build_opener(HTTPHandler(debuglevel=debuglevel),
                              HTTPSHandler(debuglevel=debuglevel),
                              HTTPCookieProcessor(cj))

        # timeout
        timeout = kwargs.get('timeout')
        if not isinstance(timeout, int):
            timeout = _DEFAULT_TIMEOUT

        try:
            self.response = opener.open(self.request, timeout=timeout)
            self.code = self.response.getcode()
            self.header = self.response.info().dict
            self.cookieJar = cj
        except HTTPError as e:
            self.code = e.code
            self.reason = '{}'.format(e)
            raise e
        except URLError as e:
            self.code = -1
            self.reason = e.reason
            raise e
        except Exception as e:
            self.code = -1
            self.reason = '{}'.format(e)
            raise e
Exemplo n.º 26
0
def main():
try:
ssl._create_default_https_context = ssl._create_unverified_context
opener = wdf_urllib.build_opener(
wdf_urllib.HTTPCookieProcessor(CookieJar()))
opener.addheaders = [
('User-agent', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.125 Safari/537.36')]
wdf_urllib.install_opener(opener)
except:
pass
if not getUUID():
print('获取uuid失败')
return
print('正在获取二维码图片...')
showQRImage()
time.sleep(1)
while waitForLogin() != '200':
pass
os.remove(QRImagePath)
if not login():
print('登录失败')
return
if not webwxinit():
print('初始化失败')
return
MemberList = webwxgetcontact()
print('开启心跳线程')
thread.start_new_thread(heartBeatLoop, ())
MemberCount = len(MemberList)
print('通讯录共%s位好友' % MemberCount)
ChatRoomName = ''
result = []
d = {}
for Member in MemberList:
d[Member['UserName']] = (Member['NickName'].encode(
'utf-8'), Member['RemarkName'].encode('utf-8'))
print('开始查找...')
group_num = int(math.ceil(MemberCount / float(MAX_GROUP_NUM)))
for i in range(0, group_num):
UserNames = []
for j in range(0, MAX_GROUP_NUM):
if i * MAX_GROUP_NUM + j >= MemberCount:
break
Member = MemberList[i * MAX_GROUP_NUM + j]
UserNames.append(Member['UserName'])
# 新建群组/添加成员
if ChatRoomName == '':
(ChatRoomName, DeletedList, BlockedList) = createChatroom(
UserNames)
else:
(DeletedList, BlockedList) = addMember(ChatRoomName, UserNames)
# todo BlockedList 被拉黑列表
DeletedCount = len(DeletedList)
if DeletedCount > 0:
result += DeletedList
# 删除成员
deleteMember(ChatRoomName, UserNames)
# 进度条
progress = MAX_PROGRESS_LEN * (i + 1) / group_num
print('[', '#' * progress, '-' * (MAX_PROGRESS_LEN - progress), ']', end=' ')
print('新发现你被%d人删除' % DeletedCount)
for i in range(DeletedCount):
if d[DeletedList[i]][1] != '':
print(d[DeletedList[i]][0] + '(%s)' % d[DeletedList[i]][1])
else:
print(d[DeletedList[i]][0])
if i != group_num - 1:
print('正在继续查找,请耐心等待...')
# 下一次进行接口调用需要等待的时间
time.sleep(INTERFACE_CALLING_INTERVAL)
# todo 删除群组
print('\n结果汇总完毕,20s后可重试...')
resultNames = []
for r in result:
if d[r][1] != '':
resultNames.append(d[r][0] + '(%s)' % d[r][1])
else:
resultNames.append(d[r][0])
print('---------- 被删除的好友列表(共%d人) ----------' % len(result))
# 过滤emoji
resultNames = map(lambda x: re.sub(r'<span.+/span>', '', x), resultNames)
if len(resultNames):
print('\n'.join(resultNames))
else:
print("无")
print('---------------------------------------------')
# windows下编码问题修复
# http://blog.csdn.net/heyuxuanzee/article/details/8442718
class UnicodeStreamFilter:
def __init__(self, target):
self.target = target
self.encoding = 'utf-8'
self.errors = 'replace'
self.encode_to = self.target.encoding
def write(self, s):
if type(s) == str:
s = s.decode('utf-8')
s = s.encode(self.encode_to, self.errors).decode(self.encode_to)
self.target.write(s)
if sys.stdout.encoding == 'cp936':
sys.stdout = UnicodeStreamFilter(sys.stdout)
if __name__ == '__main__':
print('本程序的查询结果可能会引起一些心理上的不适,请小心使用...')
main()
print('回车键退出...')
Exemplo n.º 27
0
def main(year):
    # The user credentials that will be used to authenticate access to the data
    username = "******"
    password = ""

    # The FULL url of the directory which contains the files you would like to bulk download
    #url = "https://daacdata.apps.nsidc.org/pub/DATASETS/nsidc0079_gsfc_bootstrap_seaice_v3/final-gsfc/north/daily/"+str(year)+'/' # Example URL

    url = "https://daacdata.apps.nsidc.org/pub/DATASETS/nsidc0116_icemotion_vectors_v3/data/north/grid/" + str(
        year) + "/"

    # Create a password manager to deal with the 401 reponse that is returned from
    # Earthdata Login

    password_manager = urllib2.HTTPPasswordMgrWithDefaultRealm()
    password_manager.add_password(None, "https://urs.earthdata.nasa.gov",
                                  username, password)

    # Create a cookie jar for storing cookies. This is used to store and return
    # the session cookie given to use by the data server (otherwise it will just
    # keep sending us back to Earthdata Login to authenticate).  Ideally, we
    # should use a file based cookie jar to preserve cookies between runs. This
    # will make it much more efficient.

    cookie_jar = CookieJar()

    # Install all the handlers.
    opener = urllib2.build_opener(
        urllib2.HTTPBasicAuthHandler(password_manager),
        urllib2.HTTPHandler(debuglevel=1),  # Uncomment these two lines to see
        #urllib2.HTTPSHandler(debuglevel=1),   # details of the requests/responses
        urllib2.HTTPCookieProcessor(cookie_jar))
    urllib2.install_opener(opener)

    # Create and submit the requests. There are a wide range of exceptions that
    # can be thrown here, including HTTPError and URLError. These should be
    # caught and handled.

    #===============================================================================
    # Open a requeset to grab filenames within a directory. Print optional
    #===============================================================================

    DirRequest = urllib2.Request(url)
    DirResponse = urllib2.urlopen(DirRequest)

    # Get the redirect url and append 'app_type=401'
    # to do basic http auth
    DirRedirect_url = DirResponse.geturl()
    DirRedirect_url += '&app_type=401'

    # Request the resource at the modified redirect url
    DirRequest = urllib2.Request(DirRedirect_url)
    DirResponse = urllib2.urlopen(DirRequest)

    DirBody = DirResponse.read(DirResponse)

    # Uses the HTML parser defined above to pring the content of the directory containing data
    parser = MyHTMLParser()

    parser.feed(DirBody)
    Files = parser.dataList

    # Display the contents of the python list declared in the HTMLParser class
    # print Files #Uncomment to print a list of the files

    #===============================================================================
    # Call the function to download all files in url
    #===============================================================================

    BatchJob(
        Files, cookie_jar, year,
        url)  # Comment out to prevent downloading to your working directory
Exemplo n.º 28
0
 def __init__(self):
     super(Bing, self).__init__()
     self.cj = CookieJar()
     self.opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(
         self.cj))
Exemplo n.º 29
0
Created on 2014. 9. 17.

@author: a141890
'''

from cookielib import CookieJar
import codecs
import csv
import json
import urllib
import urllib2
import urlparse

# Global Settings
URL = "http://apis.daum.net/local/geo/transcoord"
CJ = CookieJar()
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(CJ))


def start():
    file = codecs.open('wifi.csv', 'r', encoding="EUC-KR")
    csv_file = csv.reader(file)
    response = []

    for line in csv_file:
        x, y = line[3], line[4]
        try:
            x, y = float(x), float(y)
        except:
            continue
Exemplo n.º 30
0
#!/usr/bin/env python
# coding=utf8

from re import compile, DOTALL
import json
import tempfile
import os

from urllib2 import urlopen, HTTPCookieProcessor, build_opener
from cookielib import CookieJar
from urllib import urlencode
import sys

reload(sys)
sys.setdefaultencoding('utf8')
cookie = CookieJar()
opener = build_opener(HTTPCookieProcessor(cookie))


def std_write(thing):
    sys.stdout.write("{}\r\n".format(thing))


# this `try_url` does not require quit much but not https, better with no redirection
try_url = 'http://www.baidu.com'

# ip and port of the url
ip_port = ''


def downloader(url):