Exemplo n.º 1
0
    def save_file(self, source_path, key):
        """

        :param source_path: is relative to the local file system.
        :param key: the key is relative to the current prefix.
        :return:
        """
        if self.local_server:
            if source_path.startswith('/'):
                source_path = "/" + source_path
            return self.local_server.copy(source_path, key)
        # proxy = os.environ.get('HTTP_PROXY')
        # c.setopt(c.PROXY, proxy)
        # logger.print('proxy:', proxy)
        from pycurl import Curl
        c = Curl()
        c.setopt(c.URL, self.url)
        c.setopt(c.TIMEOUT, 3600)
        c.setopt(c.HTTPPOST, [
            ('file', (
                c.FORM_FILE,
                source_path,
                c.FORM_FILENAME,
                key,
                c.FORM_CONTENTTYPE,
                'plain/text',
            )),
        ])
        c.perform()
        c.close()
Exemplo n.º 2
0
 def save_buffer(self, buffer, key):
     # proxy = os.environ.get('HTTP_PROXY')
     # c.setopt(c.PROXY, proxy)
     # logger.print('proxy:', proxy)
     if isinstance(buffer, BytesIO):
         from requests_toolbelt import MultipartEncoder
         encoder = MultipartEncoder({'file': (key, buf), 'canary': true})
         self.session.post(self.url,
                           data=encoder,
                           headers={'Content-Type': encoder.content_type})
     elif isinstance(buffer, StringIO):
         from pycurl import Curl
         c = Curl()
         c.setopt(c.URL, self.url)
         c.setopt(c.TIMEOUT, 3600)
         c.setopt(c.HTTPPOST, [
             ('file', (
                 c.FORM_BUFFER,
                 source_path,
                 c.FORM_BUFFERPTR,
                 buffer.read(),
                 c.FORM_CONTENTTYPE,
                 'plain/text',
             )),
         ])
         c.perform()
         c.close()
Exemplo n.º 3
0
def curl_ix(content=[]):
    # Provide a filename to generate a ix.io link
    # import necesssary classes and functions
    global ERROR
    from pycurl import Curl
    from io import BytesIO
    from urllib.parse import urlencode
    curl=Curl()
    buf=BytesIO()
    curl.setopt(curl.URL, "ix.io")
    curl.setopt(curl.WRITEDATA, buf)
    if content==[]:
        try:
            with open(LOGFILE, 'r') as f:
                content=f.readlines()
        except FileNotFoundError:
            ERROR(f"{LOGFILE} not found.")
        except Exception as e:
            ERROR(f"Error occured:\n{str(e)}")
    curl.setopt(curl.POSTFIELDS, urlencode({"f:1": '\n'.join(content)}))
    try:
        curl.perform()
    except Exception as e:
        ERROR(f"Error occured:\n{str(e)}")
    curl.close()
    return buf.getvalue().decode().strip()
Exemplo n.º 4
0
def curl(url, file_ids, log):
    log.info('\tstarting curl fetch of gdc files')
    params = {'ids': file_ids}
    c = None
    with open('gdc_curl_download.tar.gz', 'wb') as f:
        try:
            c = Curl()
            c.setopt(c.URL, url)
            c.setopt(c.WRITEDATA, f)
            c.setopt(c.HTTPHEADER, ["Content-Type: application/json"])
            c.setopt(pycurl.CUSTOMREQUEST, "POST")
            c.setopt(pycurl.POSTFIELDS, json.dumps(params))
            # TODO: set up using a local certificate
            c.setopt(pycurl.SSL_VERIFYPEER, 0)
            c.setopt(pycurl.SSL_VERIFYHOST, 0)
            c.perform()
        except:
            log.exception('problem with curl')
            raise
        finally:
            if None != c:
                if int(c.getinfo(pycurl.RESPONSE_CODE)) != 200:
                    f.close()
                    with open('gdc_curl_download.tar.gz') as e:
                        err = e.read()
                    log.error('\tbad status on curl call(%s):\n%s' %
                              (c.getinfo(pycurl.RESPONSE_CODE), err))
                c.close()
Exemplo n.º 5
0
def getc(url):
    buf = BytesIO()
    c = Curl()
    c.setopt(c.URL, url)
    c.setopt(c.WRITEDATA, buf)
    c.perform()
    c.close()
    return buf
Exemplo n.º 6
0
def build_thread(gitpath, ref, buildid, cburl=None, submodules=False):
    tmpdir = os.path.join(conf('buildbot.buildpath'), buildid)
    repo = GitRepository(tmpdir)

    output, retcode = repo.clone(gitpath)
    if retcode:
        buildlog(buildid,
                 'Unable to clone %s. %s\n' % (gitpath, '\n'.join(output)))
        return

    output, retcode = repo.checkout(ref)
    if retcode:
        buildlog(buildid,
                 'Unable to checkout %s. %s\n' % (ref, '\n'.join(output)))
        return

    if submodules:
        output, retcode = repo.submodule_init()
        buildlog(buildid, output[0])
        buildlog(buildid, output[1])
        output, retcode = repo.submodule_update()
        buildlog(buildid, output[0])
        buildlog(buildid, output[1])

    resultsdir = os.path.join(tmpdir, '.build_results')
    os.makedirs(resultsdir)
    output, retcode = repo.build(conf('buildbot.signkey'),
                                 conf('buildbot.pbuilderrc'), resultsdir)

    buildlog(buildid, output[0])
    buildlog(buildid, output[1])
    #logging.debug(output[0])
    #logging.debug(output[1])

    os.chdir(resultsdir)
    if not os.listdir(resultsdir) or retcode != 0:
        buildlog(buildid, 'Nothing in results directory. Giving up.')
        return

    tarpath = os.path.join(tmpdir, 'package.tar.gz')
    tar = tarfile.open(tarpath, 'w:gz')
    for name in os.listdir(resultsdir):
        tar.add(name)
    tar.close()

    buildlog(buildid, 'Build complete. Results in %s\n' % tarpath)
    data = file(tarpath, 'rb').read()
    buildlog(buildid, 'Built %i byte tarball' % len(data))

    if cburl:
        buildlog(buildid, 'Performing callback: %s' % cburl)
        req = Curl()
        req.setopt(req.POST, 1)
        req.setopt(req.URL, str(cburl))
        req.setopt(req.HTTPPOST, [('package', (req.FORM_FILE, str(tarpath)))])
        req.setopt(req.WRITEDATA, file('%s/build.log' % tmpdir, 'a+'))
        req.perform()
        req.close()
Exemplo n.º 7
0
def blocking_io(num):  # TODO: Use pycurl
    buf = BytesIO()
    c = Curl()
    c.setopt(c.URL, f'https://xkcd.com/{num}/info.0.json')
    c.setopt(c.WRITEDATA, buf)
    c.setopt(c.CAINFO, certifi.where())
    c.perform()
    c.close()
    buf.seek(0)
    return load(buf)
Exemplo n.º 8
0
    def load_url(self, url):
        buffer = BytesIO()

        c = Curl()
        c.setopt(c.URL, url)
        c.setopt(c.WRITEDATA, buffer)
        c.perform()
        c.close()

        return CSVFile(buffer)
Exemplo n.º 9
0
def curl(url):
    io = BytesIO()
    c = Curl()
    c.setopt(c.URL, url)
    c.setopt(c.WRITEDATA, io)
    c.perform()
    c.close()
    res = io.getvalue()
    io.close()
    return res
Exemplo n.º 10
0
def build_thread(gitpath, ref, buildid, cburl=None, submodules=False):
    tmpdir = os.path.join(conf('buildbot.buildpath'), buildid)
    repo = GitRepository(tmpdir)

    output, retcode = repo.clone(gitpath)
    if retcode:
        buildlog(buildid, 'Unable to clone %s. %s\n' % (gitpath, '\n'.join(output)))
        return

    output, retcode = repo.checkout(ref)
    if retcode:
        buildlog(buildid, 'Unable to checkout %s. %s\n' % (ref, '\n'.join(output)))
        return

    if submodules:
        output, retcode = repo.submodule_init()
        buildlog(buildid, output[0])
        buildlog(buildid, output[1])
        output, retcode = repo.submodule_update()
        buildlog(buildid, output[0])
        buildlog(buildid, output[1])

    resultsdir = os.path.join(tmpdir, '.build_results')
    os.makedirs(resultsdir)
    output, retcode = repo.build(conf('buildbot.signkey'), conf('buildbot.pbuilderrc'), resultsdir)

    buildlog(buildid, output[0])
    buildlog(buildid, output[1])
    #logging.debug(output[0])
    #logging.debug(output[1])

    os.chdir(resultsdir)
    if not os.listdir(resultsdir) or retcode != 0:
        buildlog(buildid, 'Nothing in results directory. Giving up.')
        return

    tarpath = os.path.join(tmpdir, 'package.tar.gz')
    tar = tarfile.open(tarpath, 'w:gz')
    for name in os.listdir(resultsdir):
        tar.add(name)
    tar.close()

    buildlog(buildid, 'Build complete. Results in %s\n' % tarpath)
    data = file(tarpath, 'rb').read()
    buildlog(buildid, 'Built %i byte tarball' % len(data))

    if cburl:
        buildlog(buildid, 'Performing callback: %s' % cburl)
        req = Curl()
        req.setopt(req.POST, 1)
        req.setopt(req.URL, str(cburl))
        req.setopt(req.HTTPPOST, [('package', (req.FORM_FILE, str(tarpath)))])
        req.setopt(req.WRITEDATA, file('%s/build.log' % tmpdir, 'a+'))
        req.perform()
        req.close()
Exemplo n.º 11
0
 def get(name):
     base = 'https://www1.ncdc.noaa.gov/pub/data/igra/data/data-por/{}-data.txt.zip'
     buf = BytesIO()
     c = Curl()
     c.setopt(c.URL, base.format(name))
     c.setopt(c.WRITEDATA, buf)
     c.perform()
     c.close()
     z = ZipFile(buf)
     out = z.open(z.infolist()[0]).read()
     z.close()
     return out.decode()
Exemplo n.º 12
0
def performSubmission(submissionFileName, POST_DATA):
    
    logging.info('Performing submission of ' + submissionFileName + '\n')
    logging.info('POST Data:\n' + str(POST_DATA) + '\n')
    
    
    if (str(getConfigurationValue('test_submission')) == '0'):
        logging.info ('THIS IS A LIVE SUBMISSION AT ENA.')
        requestURL = str(getConfigurationValue('ena_rest_address_prod')) + '?auth=ENA%20' + str(getConfigurationValue('ena_username')) + '%20' + str(getConfigurationValue('ena_password'))
    else:
        logging.info ('THIS IS A TEST SUBMISSION AT ENA.')
        requestURL = str(getConfigurationValue('ena_rest_address_test')) + '?auth=ENA%20' + str(getConfigurationValue('ena_username')) + '%20' + str(getConfigurationValue('ena_password'))
    
    # Problem: StringIO Doesn't work with pycurl in python 3.6. Must replace this with a BytesIO.
    #curlResponseBuffer = StringIO()
    curlResponseBuffer = BytesIO()
    
    
    try:
        curlObject = Curl()
        curlObject.setopt(curlObject.URL, requestURL)
        curlObject.setopt(curlObject.POST, 1)
        curlObject.setopt(curlObject.HTTPPOST, POST_DATA)
        curlObject.setopt(curlObject.USERAGENT, 'Curl')

        curlObject.setopt(curlObject.WRITEFUNCTION, curlResponseBuffer.write)
        
        curlObject.setopt(HTTPHEADER, ['Accept:application/xml'])
        # Insecure.  Any security experts want to make this better?
        curlObject.setopt(SSL_VERIFYHOST, 0)
        curlObject.setopt(SSL_VERIFYPEER, 0)
        curlObject.perform()
        curlObject.close()
    except Exception:
        logging.error ('Exception when performing CURL:\n')
        #logging.error (str(exc_info()))
        logging.error('Exception when performing CURL.\n')
        logging.error('URL:' + str(requestURL))
        
        raise
    
    responseText = curlResponseBuffer.getvalue()
    
    #logging.info ('the type of the responseText is:' + str(type(responseText)))
    #logging.info ('after it becomes a string:' + str(type(str(responseText))))
    
    # write XML to file. 
    projectSubResultsFileName = submissionFileName.replace('.xml','_results.xml')
    resultsFile = createOutputFile(projectSubResultsFileName)
    resultsFile.write(str(responseText))
    resultsFile.close()
    
    return responseText
Exemplo n.º 13
0
def performSubmission(submissionFileName, POST_DATA, enaUserName, enaPassword):
    
    logging.info('Performing submission of ' + submissionFileName + '\n')
    logging.info('POST Data:\n' + str(POST_DATA) + '\n')
    
    
    if (str(getConfigurationValue('test_submission')) == '0'):
        logging.info ('THIS IS A LIVE SUBMISSION AT ENA.')
        requestURL = str(getConfigurationValue('ena_rest_address_prod')) + '?auth=ENA%20' + str(enaUserName) + '%20' + str(enaPassword)
    else:
        logging.info ('THIS IS A TEST SUBMISSION AT ENA.')
        requestURL = str(getConfigurationValue('ena_rest_address_test')) + '?auth=ENA%20' + str(enaUserName) + '%20' + str(enaPassword)
    
    # Problem: StringIO Doesn't work with pycurl in python 3.6. Must replace this with a BytesIO.
    curlResponseBuffer = BytesIO()

    try:
        curlObject = Curl()
        curlObject.setopt(curlObject.URL, requestURL)
        curlObject.setopt(curlObject.POST, 1)
        curlObject.setopt(curlObject.HTTPPOST, POST_DATA)
        curlObject.setopt(curlObject.USERAGENT, 'Curl')

        curlObject.setopt(curlObject.WRITEFUNCTION, curlResponseBuffer.write)
        
        curlObject.setopt(HTTPHEADER, ['Accept:application/xml'])
        # Insecure.  Any security experts want to make this better?
        curlObject.setopt(SSL_VERIFYHOST, 0)
        curlObject.setopt(SSL_VERIFYPEER, 0)
        curlObject.perform()
        curlObject.close()
    except Exception:
        logging.error ('Exception when performing CURL:\n')
        #logging.error (str(exc_info()))
        logging.error('Exception when performing CURL.\n')
        logging.error('URL:' + str(requestURL))
        
        raise
    
    responseText = curlResponseBuffer.getvalue()
    
    #logging.info ('the type of the responseText is:' + str(type(responseText)))
    #logging.info ('after it becomes a string:' + str(type(str(responseText))))
    
    # write XML to file. 
    projectSubResultsFileName = submissionFileName.replace('.xml','_results.xml')
    resultsFile = createOutputFile(projectSubResultsFileName)
    resultsFile.write(str(responseText))
    resultsFile.close()
    
    return responseText
Exemplo n.º 14
0
    def fetchAnnotationJson(self, rawRequestURL=None):
        try:
            postData = {'sequence': self.rawSequence}

            # Using configuration here causes circular dependency. So I'll just pass it in.
            if(rawRequestURL is None):
                logging.error('You must pass a rawRequestURL to fetchAnnotationJson.')
                return
            else:
                requestURL = rawRequestURL + '?' + urlencode(postData)

            resultsIoObject = BytesIO()

            curlObject = Curl()
            curlObject.setopt(curlObject.URL, requestURL)
            curlObject.setopt(curlObject.WRITEDATA, resultsIoObject)

            curlObject.perform()
            curlObject.close()

            getBody = resultsIoObject.getvalue().decode('utf8')

            logging.debug('JSON Request Body:\n' + getBody)

            # TODO:
            # Detect error <head><title>414 Request-URI Too Large</title></head>
            # For larger DRB alleles the webserver fails.
            # Detect error if the result is not json.
            # Maybe this error detection happens in parseExons. But i maybe need to detect server errors here.
            # Simple case is an empty string.
            if(getBody is None or len(getBody)<1):
                logging.error('The JSON results were an empty string. Is there a problem with the ACT server?:' + str(requestURL))
                showInfoBox('Problem Accessing Annotation Service','The JSON results were an empty string. Is there a problem with the ACT server?')
                return None

            # If it's an html error we can respond nicely.
            if(getBody[0:5]=='<html>'):
                # TODO: this might not work if i get some other kind of html.
                errorCode = getBody[getBody.find('<title>'):getBody.find('</title>')]
                logging.error('The annotation JSON results are html, this probably indicates an issue with the annotation webserver:\n' + str(requestURL))
                showInfoBox('Problem Accessing Annotation Service', 'The annotation results are HTML, not JSON, probably an issue with the ACT webserver:\n' + str(errorCode))
                return None

            return getBody

        except Exception:
            logging.error('Exception when performing CURL:\n')
            logging.error(str(exc_info()))
            logging.error('URL:' + str(requestURL))

            raise
Exemplo n.º 15
0
 def sendTelegramAlert(self, telegram_chat_id, telegram_bot_token, message):
     if len(message) > 4096:
         message = "The size of the message in Telegram (4096) has been exceeded. Overall size: " + str(
             len(message))
     c = Curl()
     url = 'https://api.telegram.org/bot' + str(
         telegram_bot_token) + '/sendMessage'
     c.setopt(c.URL, url)
     data = {'chat_id': telegram_chat_id, 'text': message}
     pf = urlencode(data)
     c.setopt(c.POSTFIELDS, pf)
     c.perform_rs()
     status_code = c.getinfo(HTTP_CODE)
     c.close()
     self.getStatusByTelegramCode(status_code)
Exemplo n.º 16
0
 def _fetch(self, url, query, on_progress=None):
     logging.debug('query={query}'.format(query=query))
     from pycurl import Curl, POST, POSTFIELDS
     from io import BytesIO
     c = Curl()
     c.setopt(c.URL, url)
     c.setopt(POST, 1)
     c.setopt(POSTFIELDS, query)
     if on_progress:
         c.setopt(c.HEADERFUNCTION, self._on_header(on_progress))
     buffer = BytesIO()
     c.setopt(c.WRITEDATA, buffer)
     c.perform()
     c.close()
     return buffer.getvalue().decode('UTF-8')
Exemplo n.º 17
0
    def _curl_a_link(self, target_url, post_target, commit_date=None):
        '''
		解析一个地址,返回一个字典,从其中可以读取json字符串の内容,相当于curl get指令,如果这个请求的结果在今天的缓存当中已经有了,则从缓存中取,不从elastic里面再重复读取
		'''

        buffer = StringIO()
        c = Curl()
        c.setopt(c.URL, target_url)
        c.setopt(c.WRITEDATA, buffer)
        c.perform()
        c.close()

        load_target = json.loads(buffer.getvalue())

        return load_target
        pass
Exemplo n.º 18
0
def torch_upload():
    from ml_logger import logger
    import numpy as np

    logger.configure(root_dir="http://54.71.92.65:9080", prefix="geyang/ml_logger-debug/test-1",
                     register_experiment=True)
    logger.log_params(args={})

    with logger.Sync():
        import os
        import torch
        from pycurl import Curl
        from tempfile import NamedTemporaryFile

        logger.remove('upload/example.pt')

        with NamedTemporaryFile(delete=True) as f:
            torch.save(np.ones([10_000_000]), f)
            # torch.save(np.ones([1000_000]), f)
            logger.print(f.name)

            c = Curl()
            c.setopt(c.URL, logger.root_dir)
            # proxy = os.environ.get('HTTP_PROXY')
            # c.setopt(c.PROXY, proxy)
            # logger.print('proxy:', proxy)
            c.setopt(c.TIMEOUT, 100000)
            c.setopt(c.HTTPPOST, [
                ('file', (
                    c.FORM_FILE, f.name,
                    c.FORM_FILENAME, logger.prefix + '/upload/example.pt',
                    c.FORM_CONTENTTYPE, 'plain/text',
                )),
            ])
            c.perform()
            c.close()

        logger.print('done')


        # logger.remove(".")
        # a = np.ones([1, 1, 100_000_000 // 4])
        # logger.print(f"the size of the tensor is {a.size}")
        # data = dict(key="ok", large=a)
        # logger.torch_save(data, f"save/data-{logger.now('%H.%M.%S')}.pkl")
    logger.print('done')
Exemplo n.º 19
0
def curlGet(url: str) -> bytes:

    # Generate objects.
    data = BytesIO()
    curl = Curl()

    # Setup curl.
    curl.setopt(curl.URL, url)
    curl.setopt(curl.WRITEDATA, data)
    curl.setopt(curl.FOLLOWLOCATION, True)
    curl.setopt(curl.HTTPHEADER, ['User-Agent: curl/7.68.0'])

    # Send curl request.
    curl.perform()
    curl.close()

    return data.getvalue()
Exemplo n.º 20
0
    def __fetch_page(self, url):
        useragent = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.81 Safari/537.36'
        encoding = 'gzip, deflate, sdch'
        httpheader = [
            'Accept: text/html, application/xhtml+xml, application/xml; q=0.9, image/webp, */*; q=0.8',
            'Accept-Language: it-IT, it; q=0.8, en-US; q=0.6, en; q=0.4',
            'Host: uniparthenope.esse3.cineca.it'
        ]
        cookiefile = 'cookiefile'

        page = StringIO()
        c = Curl()
        c.setopt(c.FOLLOWLOCATION, True)
        c.setopt(c.WRITEFUNCTION, page.write)
        c.setopt(c.COOKIEJAR, cookiefile)
        c.setopt(c.URL, url)
        c.perform()
        c.close()
        page.close()

        page = StringIO()
        c = Curl()
        c.setopt(c.USERPWD, self.__username + ':' + self.__password)
        c.setopt(c.FOLLOWLOCATION, 1)
        c.setopt(c.WRITEFUNCTION, page.write)
        c.setopt(c.COOKIEFILE, cookiefile)
        c.setopt(c.ENCODING, encoding)
        c.setopt(c.HTTPHEADER, httpheader)
        c.setopt(c.REFERER, url)
        c.setopt(c.USERAGENT, useragent)
        c.setopt(c.URL, url)
        c.perform()

        if (c.getinfo(pycurl.HTTP_CODE) != 200):
            return None
        c.close()

        page_str = page.getvalue()
        page.close()
        p = re.compile('\\s+')
        page_str = p.sub(" ", page_str)

        return page_str
Exemplo n.º 21
0
def post_progress(progress, slotX, slotY, exp_name, message="") :
    from pycurl import Curl
    import cStringIO
    from socket import gethostname
    response = cStringIO.StringIO()
    address ='www.doc.ic.ac.uk/~zf509/'+exp_name+'/ip.php?name='+gethostname()+\
             '-'+message+'&slot='+str(slotX)+'-'+str(slotY)+\
             '&stage='+str(progress)
    c = Curl()
    c.setopt(c.WRITEFUNCTION, response.write)
    c.setopt(c.URL, address)
    c.perform()
    c.close()
    server_res = response.getvalue()
    
    print "Server replied:", server_res
    if server_res[0]=="T" and server_res[1]=="E" and server_res[2]=="R" :
        return False
    else :
        return True
Exemplo n.º 22
0
    def _perform(self,
                 url: str,
                 curl_obj: pycurl.Curl = None,
                 headers: dict = None,
                 postfields: dict = None,
                 skip_auth=False) -> bytes:
        if not skip_auth:
            self._wait_authenticated()

        if not curl_obj:
            curl_obj = pycurl.Curl()

        if postfields:
            postfields = urlencode(postfields)
            _set_postfields(curl_obj, postfields)

        logger.debug("url={url}, headers={headers}", url=url, headers=headers)
        if not headers:
            headers = self.BASE_HEADERS.copy()
        headers = self._headers_to_list(headers)

        logger.debug("prepared headers={h}", h=headers)

        buffer = BytesIO()

        curl_obj.setopt(pycurl.WRITEFUNCTION, buffer.write)
        curl_obj.setopt(pycurl.HEADERFUNCTION, self._header_function)
        curl_obj.setopt(pycurl.BUFFERSIZE, 102400)
        curl_obj.setopt(pycurl.URL, url)
        curl_obj.setopt(pycurl.HTTPHEADER, headers)
        curl_obj.setopt(pycurl.USERAGENT, CURL_USERAGENT)
        curl_obj.setopt(pycurl.MAXREDIRS, 50)
        curl_obj.setopt(pycurl.ACCEPT_ENCODING, "")
        curl_obj.setopt(pycurl.TCP_KEEPALIVE, 1)
        curl_obj.setopt(pycurl.FOLLOWLOCATION, True)
        curl_obj.setopt(pycurl.ENCODING, "gzip, deflate")

        try:
            curl_obj.perform()
        except pycurl.error as e:
            logger.debug(e, exc_info=True)
            logger.warning(e)
            return b""

        status = curl_obj.getinfo(pycurl.HTTP_CODE)
        logger.debug("HTTP status: {s}", s=status)
        curl_obj.close()

        if status != HTTPStatus.OK:
            hdrs = None
            try:
                hdrs = {k: v[-1] for k, v in self._headers.items()}
            except (IndexError, KeyError):
                pass
            phrase = "error"
            try:
                phrase = http.client.responses[status]
                logger.error("HTTP status error: {s}", s=status)
            except KeyError:
                pass
            raise HTTPError(url=url,
                            msg=phrase,
                            code=status,
                            hdrs=hdrs,
                            fp=None)

        # Server changing maps will trigger sessionid change,
        # keep track of latest sessionid in response headers.
        sessionid = self._find_sessionid()
        if sessionid and self._auth_data:
            self._auth_data.sessionid = sessionid

        return buffer.getvalue()
Exemplo n.º 23
0
 def _execute(curl: Curl, close_connection: bool) -> int:
     curl.perform()
     status_code = curl.getinfo(curl.HTTP_CODE)
     if close_connection:
         curl.close()
     return status_code
Exemplo n.º 24
0
 def new_session(self) -> Curl:
     session = Curl()
     yield session
     session.close()
Exemplo n.º 25
0
import re
import os
from io import BytesIO
from urllib.parse import urlparse
from pycurl import Curl

buffer = BytesIO()
c = Curl()
c.setopt(c.URL, 'http://www.xiachufang.com/')
c.setopt(c.WRITEDATA, buffer)
c.perform()
c.close()
body = buffer.getvalue()
text = body.decode('utf-8')
print(text)

img_list = re.findall(r'src=\"(http://i2\.chuimg\.com/\w+\.jpg)', text)

# 初始化下载文件目录
image_dir = os.path.join(os.curdir, 'images')
# if not os.path.isdir(image_dir):
#     os.mkdir(image_dir)

for img in img_list[::-1]:
    o = urlparse(img)
    filename = o.path[1:]
    filepath = os.path.join(image_dir, filename)
    if not os.path.isdir(os.path.dirname(filepath)):
        os.mkdir(os.path.dirname(filepath))
    url = '%s://%s/%s' % (o.scheme, o.netloc, filename)
    print(url)
print('')

###  CONNECTION REUSE TESTS FOLLOW ###

LIBRARY = "pycurl (saving response body by cStringIO BUT MAKING A NEW HANDLE EVERY TIME) "
print("Testing {0} performance with {1} cycles".format(LIBRARY, CYCLES))
start = time.clock()
for i in xrange(1, CYCLES):
    mycurl = Curl()
    mycurl.setopt(mycurl.URL, URL)
    body = StringIO()
    mycurl.setopt(mycurl.WRITEDATA, body)
    mycurl.perform()
    output = body.getvalue()
    body.close()
    mycurl.close()
end = time.clock()

print('{0}: ran {1} HTTP GET requests in {2} seconds'.format(
    LIBRARY, CYCLES, (end - start)))
print('')

LIBRARY = "pycurl (saving response body by cStringIO) "
print("Testing {0} CONNECTION REUSE performance with {1} cycles".format(
    LIBRARY, CYCLES))
mycurl = Curl()
mycurl.setopt(mycurl.URL, URL)

start = time.clock()
for i in xrange(1, CYCLES):
    body = StringIO()
Exemplo n.º 27
0
    def processVideo(self, vID, number):
        for _attempt in range(self.retryCount):
            title = ''
            download = None
            for i in count():
                try:
                    self.goTo(vID)
                    title = self.getElement('h1[itemprop=name]').text.strip().rstrip('.')
                    self.driver.find_element_by_class_name('iconify_down_b').click()
                    download = self.getElement('#download')
                    break
                except NoSuchElementException as e:
                    self.logger.warning(e.msg)
                    if i >= self.retryCount:
                        self.logger.error("Page load failed")
                        self.errors += 1
                        break
            # Parse download links
            link = linkSize = localSize = downloadOK = downloadSkip = None
            if download:
                for preference in FILE_PREFERENCES:
                    try:
                        link = download.find_element_by_partial_link_text(preference)
                        break
                    except NoSuchElementException:
                        pass
            if link: # Parse chosen download link
                userAgent = str(self.driver.execute_script('return window.navigator.userAgent'))
                cookies = self.driver.get_cookies()
                extension = link.get_attribute('download').split('.')[-1]
                description = '%s/%s' % (link.text, extension.upper())
                link = str(link.get_attribute('href'))
                if self.getFileSizes:
                    try:
                        request = requests.get(link, stream = True, headers = { 'user-agent': userAgent }, cookies = dict((str(cookie['name']), str(cookie['value'])) for cookie in cookies))
                        request.close()
                        linkSize = int(request.headers['content-length'])
                        self.totalFileSize += linkSize
                        description += ', %s' % readableSize(linkSize)
                    except Exception as e:
                        self.logger.warning(e)
            else:
                description = extension = 'NONE'
            # Prepare file information
            prefix = ' '.join((title, '(%s)' % description))
            suffix = ' '.join((('%d/%d %d%%' % (number, len(self.vIDs), int(number * 100.0 / len(self.vIDs)))),)
                            + ((readableSize(self.totalFileSize),) if self.totalFileSize else ()))
            self.logger.info(' '.join((prefix, suffix)))
            fileName = cleanupFileName('%s.%s' % (' '.join(((title,) if title else ()) + (str(vID),)), extension.lower()))
            targetFileName = join(self.targetDirectory, fileName)
            if self.setLanguage:
                try:
                    self.driver.find_element_by_id('change_settings').click()
                    languages = self.driver.find_elements_by_css_selector('select[name=language] option')
                    currentLanguage = ([l for l in languages if l.is_selected()] or [None,])[0]
                    if currentLanguage is None or currentLanguage is languages[0]:
                        ls = [l for l in languages if l.text.capitalize().startswith(self.setLanguage)]
                        if len(ls) != 1:
                            ls = [l for l in languages if l.get_attribute('value').capitalize().startswith(self.setLanguage)]
                        if len(ls) == 1:
                            self.logger.info("Language not set, setting to %s", ls[0].text)
                            ls[0].click()
                            self.driver.find_element_by_css_selector('#settings_form input[type=submit]').click()
                        else:
                            self.logger.error("Unsupported language: %s", self.setLanguage)
                            self.setLanguage = None
                    else:
                        self.logger.info("Language already set to %s / %s", currentLanguage.get_attribute('value').upper(), currentLanguage.text)
                except NoSuchElementException:
                    self.logger.warning("Failed to set language to %s, settings not available", self.setLanguage)
            if link: # Downloading file
                if linkSize:
                    localSize = getFileSize(targetFileName)
                    if localSize == linkSize:
                        downloadOK = True
                    elif localSize and localSize > linkSize:
                        self.errors += 1
                        self.logger.error("Local file is larger (%d) than remote file (%d)", localSize, linkSize)
                        downloadSkip = True
                        #remove(targetFileName)
                        #localSize = None
                if self.doDownload and not downloadOK:
                    class ProgressIndicator(object):
                        QUANTUM = 10 * 1024 * 1024 # 10 megabytes
                        ACTION = r'--\\||//' # update() often gets called in pairs, this smoothes things up
                        action = len(ACTION) - 1

                        def __init__(self, timeout):
                            self.timeout = timeout
                            self.started = False
                            self.totalRead = 0
                            self.lastData = time()
                            self.count = 0
                            self.action = len(self.ACTION) - 1
                            self.progress("Dowloading: ")

                        def progress(self, s, suffix = ''):
                            self.action = (self.action + 1) % len(self.ACTION)
                            print('\b%s%s' % (s, suffix + '\n' if suffix else self.ACTION[self.action]), end = '', flush = True)

                        def update(self, _length, totalRead, *_args):
                            if totalRead <= self.totalRead:
                                if time() > self.lastData + self.timeout:
                                    raise curlError("Download seems stalled")
                            else:
                                self.totalRead = totalRead
                                self.lastData = time()
                            oldCount = self.count
                            self.count = int(totalRead // self.QUANTUM) + 1
                            self.progress(('=' if self.started else '+') * max(0, self.count - oldCount))
                            self.started = True

                        def end(self):
                            self.progress("OK")

                    progressIndicator = ProgressIndicator(self.timeout)
                    curl = Curl()
                    curl.setopt(curl.CAINFO, certifi.where())
                    curl.setopt(curl.COOKIE, '; '.join('%s=%s' % (cookie['name'], cookie['value']) for cookie in cookies))
                    curl.setopt(curl.TIMEOUT, self.timeout)
                    curl.setopt(curl.USERAGENT, userAgent)
                    curl.setopt(curl.FOLLOWLOCATION, True)
                    curl.setopt(curl.URL, link)
                    curl.setopt(curl.PROGRESSFUNCTION, progressIndicator.update)
                    try:
                        with open(targetFileName, 'wb') as f:
                            curl.setopt(curl.WRITEDATA, f)
                            curl.perform()
                            curl.close()
                        progressIndicator.end()
                        downloadOK = True
                    except curlError as e:
                        self.errors += 1
                        self.logger.error("Download failed: %s", e)
                    except KeyboardInterrupt:
                        self.errors += 1
                        self.logger.error("Download interrupted")
                    if downloadOK:
                        localSize = getFileSize(targetFileName)
                        if not localSize:
                            self.errors += 1
                            downloadOK = False
                            self.logger.error("Downloaded file seems corrupt")
                        elif linkSize:
                            if localSize > linkSize:
                                self.errors += 1
                                downloadOK = False
                                self.logger.error("Downloaded file larger (%d) than remote file (%d)", localSize, linkSize)
                            elif localSize < linkSize:
                                self.errors += 1
                                downloadOK = False
                                self.logger.error("Downloaded file smaller (%d) than remote file (%d)", localSize, linkSize)
                if downloadOK:
                    self.logger.info("OK")
                    break
                elif downloadSkip or not self.doDownload:
                    self.logger.info("Downloading SKIPPED")
                    break
        else:
            self.logger.info("Download ultimately failed after %d retries", self.retryCount)
        # Creating symbolic links, if enabled
        for dirName in (dirName for (dirName, vIDs) in self.folders if vID in vIDs):
            linkFileName = join(dirName, fileName)
            try:
                if lexists(linkFileName):
                    remove(linkFileName)
            except:
                pass
            try:
                (hardlink if self.useHardLinks else symlink)(join('..', fileName), linkFileName)
            except Exception as e:
                self.logger.warning("Can't create link at %s: %s", linkFileName, e)
                self.errors += 1
print('')

###  CONNECTION REUSE TESTS FOLLOW ###

LIBRARY="pycurl (saving response body by cStringIO BUT MAKING A NEW HANDLE EVERY TIME) "
print ("Testing {0} performance with {1} cycles".format(LIBRARY, CYCLES))
start = time.clock()
for i in xrange(1, CYCLES):
    mycurl=Curl();
    mycurl.setopt(mycurl.URL, URL)
    body = StringIO();
    mycurl.setopt(mycurl.WRITEDATA, body)
    mycurl.perform()
    output = body.getvalue()
    body.close()
    mycurl.close()
end = time.clock()

print('{0}: ran {1} HTTP GET requests in {2} seconds'.format(LIBRARY, CYCLES, (end-start)))
print('')


LIBRARY="pycurl (saving response body by cStringIO) "
print ("Testing {0} CONNECTION REUSE performance with {1} cycles".format(LIBRARY, CYCLES))
mycurl=Curl();
mycurl.setopt(mycurl.URL, URL)

start = time.clock()
for i in xrange(1, CYCLES):
    body = StringIO();
    mycurl.setopt(mycurl.WRITEDATA, body)
Exemplo n.º 29
0
def _complete_request(curl: pycurl.Curl, buffer: BytesIO, response: Response):
  curl.perform()
  response.status = curl.getinfo(curl.RESPONSE_CODE)
  response.body = buffer.getvalue().decode(_CHAR_ENCODING)
  curl.close()
Exemplo n.º 30
0
 def new_session(self) -> Curl:
     session = Curl()
     yield session
     session.close()
Exemplo n.º 31
0
 def _execute(curl: Curl, close_connection: bool) -> int:
     curl.perform()
     status_code = curl.getinfo(curl.HTTP_CODE)
     if close_connection:
         curl.close()
     return status_code
Exemplo n.º 32
0
class HttpDirectory(RemoteDirectory):

    SCHEMES = (
        "http",
        "https",
    )
    HEADERS = config.HEADERS
    BLACK_LIST = (
        "?C=N&O=D",
        "?C=M&O=A",
        "?C=S&O=A",
        "?C=D&O=A",
        "?C=N;O=D",
        "?C=M;O=A",
        "?C=M&O=D",
        "?C=S;O=A",
        "?C=S&O=D",
        "?C=D;O=A",
        "?MA",
        "?SA",
        "?DA",
        "?ND",
        "?C=N&O=A",
        "?C=N&O=A",
        "?M=A",
        "?N=D",
        "?S=A",
        "?D=A",
    )
    FILE_NAME_BLACKLIST = (
        "Parent Directory",
        " Parent Directory"
        "../",
    )
    MAX_RETRIES = 2
    TIMEOUT = 25

    def __init__(self, url):
        super().__init__(url)
        self.curl = None
        self.curl_head = None
        self.init_curl()

    def init_curl(self):

        self.curl = Curl()
        self.curl.setopt(self.curl.SSL_VERIFYPEER, 0)
        self.curl.setopt(self.curl.SSL_VERIFYHOST, 0)
        self.curl.setopt(pycurl.TIMEOUT, HttpDirectory.TIMEOUT)

        self.curl_head = self._curl_handle()

    @staticmethod
    def _curl_handle():

        curl_head = Curl()
        curl_head.setopt(pycurl.SSL_VERIFYPEER, 0)
        curl_head.setopt(pycurl.SSL_VERIFYHOST, 0)
        curl_head.setopt(pycurl.NOBODY, 1)
        curl_head.setopt(pycurl.TIMEOUT, HttpDirectory.TIMEOUT)

        return curl_head

    def list_dir(self, path):

        current_dir_name = path[path.rstrip("/").rfind("/") + 1:-1]
        path_identifier = hashlib.md5(current_dir_name.encode())
        path_url = urljoin(self.base_url, path, "")
        body = self._fetch_body(path_url)
        anchors = self._parse_links(body)

        urls_to_request = []
        files = []

        for anchor in anchors:
            if self._should_ignore(self.base_url, path, anchor):
                continue

            if self._isdir(anchor):

                directory = File(
                    name=anchor.href,  # todo handle external links here
                    mtime=0,
                    size=0,
                    path=path,
                    is_dir=True)
                path_identifier.update(bytes(directory))
                files.append(directory)
            else:
                urls_to_request.append(urljoin(path_url, anchor.href))

        for file in self.request_files(urls_to_request):
            path_identifier.update(bytes(file))
            files.append(file)

        return path_identifier.hexdigest(), files

    def request_files(self, urls_to_request: list) -> list:

        if len(urls_to_request) > 150:
            # Many urls, use multi-threaded solution
            pool = ThreadPool(processes=10)
            files = pool.starmap(self._request_file,
                                 zip(urls_to_request, repeat(self.base_url)))
            pool.close()
            for file in files:
                if file:
                    yield file
        else:
            # Too few urls to create thread pool
            for url in urls_to_request:
                file = self._request_file(url, self.base_url)
                if file:
                    yield file

    @staticmethod
    def _request_file(url, base_url):

        retries = HttpDirectory.MAX_RETRIES
        while retries > 0:
            try:
                curl = HttpDirectory._curl_handle()
                raw_headers = BytesIO()
                curl.setopt(pycurl.URL, url.encode("utf-8", errors="ignore"))
                curl.setopt(pycurl.HEADERFUNCTION, raw_headers.write)
                curl.perform()

                stripped_url = url[len(base_url) - 1:]
                headers = HttpDirectory._parse_dict_header(
                    raw_headers.getvalue().decode("utf-8", errors="ignore"))
                raw_headers.close()

                path, name = os.path.split(stripped_url)
                date = headers.get("Last-Modified", "1970-01-01")
                curl.close()
                return File(path=unquote(path).strip("/"),
                            name=unquote(name),
                            size=int(headers.get("Content-Length", -1)),
                            mtime=int(parse_date(date).timestamp()),
                            is_dir=False)
            except pycurl.error:
                retries -= 1

        logger.debug("TimeoutError - _request_file")
        raise TimeoutError

    def _fetch_body(self, url: str):
        retries = HttpDirectory.MAX_RETRIES
        while retries > 0:
            try:
                content = BytesIO()
                self.curl.setopt(pycurl.URL,
                                 url.encode("utf-8", errors="ignore"))
                self.curl.setopt(pycurl.WRITEDATA, content)
                self.curl.perform()

                return content.getvalue().decode("utf-8", errors="ignore")
            except pycurl.error:
                self.close()
                retries -= 1

        logger.debug("TimeoutError - _fetch_body")
        raise TimeoutError

    @staticmethod
    def _parse_links(body):

        parser = HTMLAnchorParser()
        parser.feed(body)
        return parser.anchors

    @staticmethod
    def _isdir(link: Anchor):
        return link.href.endswith("/")

    @staticmethod
    def _should_ignore(base_url, current_path, link: Anchor):

        full_url = urljoin(base_url, link.href)
        if full_url == urljoin(urljoin(base_url, current_path),
                               "../") or full_url == base_url:
            return True

        if link.href.endswith(HttpDirectory.BLACK_LIST):
            return True

        # Ignore external links
        if not full_url.startswith(base_url):
            return True

        # Ignore parameters in url
        if "?" in link.href:
            return True

    @staticmethod
    def _parse_dict_header(raw):
        headers = dict()
        for line in raw.split(
                "\r\n")[1:]:  # Ignore first 'HTTP/1.0 200 OK' line
            if line:
                k, v = line.split(":", maxsplit=1)
                headers[k.strip()] = v.strip()

        return headers

    def close(self):
        self.curl.close()
        self.init_curl()
Exemplo n.º 33
0
    m = CurlMulti()
    c = Curl()
    try:
        m.remove_handle(c)
    except pycurl.error:
        pass
    else:
        assert 0, "internal error"
    del m, c


# remove an invalid but closed handle
if 1:
    m = CurlMulti()
    c = Curl()
    c.close()
    m.remove_handle(c)
    del m, c


# add a closed handle: this should fail
if 1:
    m = CurlMulti()
    c = Curl()
    c.close()
    try:
        m.add_handle(c)
    except pycurl.error:
        pass
    else:
        assert 0, "internal error"
Exemplo n.º 34
0
def _complete_request(curl: pycurl.Curl, buffer: BytesIO, response: Response):
    curl.perform()
    response.status = curl.getinfo(curl.RESPONSE_CODE)
    response.body = buffer.getvalue().decode(_CHAR_ENCODING)
    curl.close()