示例#1
0
    async def handle_url(self, request: HttpParser) -> HttpResponse:
        method = request.get_method().upper()
        if method not in ('GET', 'HEAD'):
            return HttpResponse(405)
        path = request.get_path()
        if path.endswith('/'):
            path += 'index.html'
        relative = os.path.relpath(url2pathname(path), '/')
        filename = os.path.join(self.root_dir, relative)
        try:
            byte_range = None
            if 'Range' in request.get_headers():
                # Not RFC 7233 compliant
                range_match = re.match(r'bytes=(\d+)-(\d+)',
                                       request.get_headers()['Range'])
                if not range_match:
                    return HttpResponse(400, 'Invalid Range header')
                start, end = map(int, range_match.groups())
                # Python range is exclusive, HTTP Range is inclusive
                byte_range = range(start, end + 1)
            length = 0
            async with aiofiles.open(filename, 'rb') as f:
                if method == 'GET':
                    if byte_range is not None:
                        await f.seek(byte_range.start)
                        data = await f.read(len(byte_range))
                        byte_range = range(byte_range.start,
                                           byte_range.start + len(data))
                        await f.seek(0, os.SEEK_END)
                        length = await f.tell()
                        response = HttpResponse(206, data)
                    else:
                        data = await f.read()
                        response = HttpResponse(200, data)
                else:
                    # Used instead of os.stat to ensure the file can be accessed
                    response = HttpResponse(200)
                    await f.seek(0, os.SEEK_END)
                    length = await f.tell()
                    if byte_range is not None:
                        byte_range = range(byte_range.start,
                                           min(length, byte_range.stop))
                    response.headers['Content-Length'] = length
            if byte_range is not None:
                response.headers['Content-Range'] = 'bytes %d-%d/%d' % (
                    byte_range.start, byte_range.stop - 1, length)

        except FileNotFoundError:
            return HttpResponse(404,
                                'This is not the file you are looking for')
        except PermissionError:
            return HttpResponse(403)
        _, extension = os.path.splitext(filename)
        extension = extension[1:]
        if extension.lower() in self.mime_types:
            response.headers['Content-Type'] = self.mime_types[
                extension.lower()]
        response.headers['Last-Modified'] = formatdate(
            os.stat(filename).st_mtime, False, True)
        return response
def main():

    p = HttpParser()
    s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    body = []
    header_done = False
    try:
        s.connect(('gunicorn.org', 80))
        s.send(b("GET / HTTP/1.1\r\nHost: gunicorn.org\r\n\r\n"))

        while True:
            data = s.recv(1024)
            if not data:
                break

            recved = len(data)
            nparsed = p.execute(data, recved)
            assert nparsed == recved

            if p.is_headers_complete() and not header_done:
                print(p.get_headers())
                print(p.get_headers()['content-length'])
                print(p.get_method())
                header_done = True

            if p.is_partial_body():
                body.append(p.recv_body())

            if p.is_message_complete():
                break

        print(b("").join(body))

    finally:
        s.close()
示例#3
0
    def run(self):

        HTTP_Request = self.client.recv(self.max)
        p = HttpParser()
        header_done = False
        destination_host = ''

        if HTTP_Request:
            print 'Got something from ' + str(self.address) + '...'
            request_length = len(HTTP_Request)
            nparsed = p.execute(HTTP_Request, request_length)
            assert nparsed == request_length

            if p.is_headers_complete() and not header_done:
                print(p.get_headers())
                print(p.get_headers()['Host'])
                destination_host = p.get_headers()['Host']
                header_done = True

                Relay_socket = socket.socket(socket.AF_INET,
                                             socket.SOCK_STREAM)
                Relay_socket.connect((destination_host, 80))
                Relay_socket.sendall(HTTP_Request)
                print 'Forwarding data to destination host...'

                while True:
                    HTTP_Response = Relay_socket.recv(self.max)
                    if not HTTP_Response:
                        break
                    else:
                        print 'Received data back. Forwarding to the client...'
                        self.client.sendall(HTTP_Response)

            self.client.close()
            Relay_socket.close()
class Response_Parser():
    def __init__(self):
        self.parser = HttpParser()
        self.len_response = 0
        self.len_body = 0
        self.body = None
    def parse(self, raw_response):
        self.len_response = len(bytearray(raw_response))
        self.parser.execute(raw_response, self.len_response)
        self.body = self.parser.recv_body()
        self.len_body = len(bytearray(self.body))

    def get_all_keys(self):
        """Get All the key in request headers."""
        return self.parser.get_headers().keys()

    def get_keys(self, *args):
        header_keys = {}
        for key in args:
            header_keys[key] = self.parser.get_headers().get(key, '-')
        return header_keys

    def get_reponse(self, *args):
        values = self.get_keys(*args)
        status_code = self.parser.get_status_code()
        obj = HTTP_Response(status_code, values, self.len_response, self.len_body)
        return obj

    def get_body(self):
        return self.body
示例#5
0
def main():

    p = HttpParser()
    s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    body = []
    header_done = False
    try:
        s.connect(("gunicorn.org", 80))
        s.send("GET / HTTP/1.1\r\nHost: gunicorn.org\r\n\r\n")

        while True:
            data = s.recv(1024)
            if not data:
                break

            recved = len(data)
            nparsed = p.execute(data, recved)
            assert nparsed == recved

            if p.is_headers_complete() and not header_done:
                print p.get_headers()
                print p.get_headers()["content-length"]
                header_done = True

            if p.is_partial_body():
                body.append(p.recv_body())

            if p.is_message_complete():
                break

        print "".join(body)

    finally:
        s.close()
class Request_Parser():
    def __init__(self):
        self.parser = HttpParser()
        self.len_request = 0
        self.len_body = 0

    def parse(self, raw_requset):
        self.len_request = len(bytearray(raw_requset))
        self.parser.execute(raw_requset, self.len_request)
        self.len_body = len(bytearray(self.parser.recv_body()))

    def get_all_keys(self):
        """Get All the key in request headers."""
        return self.parser.get_headers().keys()

    def get_keys(self, *args):
        header_keys = {}
        for key in args:
            header_keys[key] = self.parser.get_headers().get(key, '-')
        return header_keys

    def get_request(self, *args):
        values = self.get_keys(*args)
        obj = HTTP_Requset(values, self.len_request, self.len_body)
        return obj

    def get_body(self):
        return self.parser.recv_body()
示例#7
0
    def run(self):

        HTTP_Request = self.client.recv(self.max)
        p = HttpParser()
        header_done = False
        destination_host = ''

        if HTTP_Request:
            print 'Got something from ' + str(self.address) + '...'
            request_length = len(HTTP_Request)
            nparsed = p.execute(HTTP_Request, request_length)
            assert nparsed == request_length

            if p.is_headers_complete() and not header_done:
                print(p.get_headers())
                print(p.get_headers()['Host'])
                destination_host = p.get_headers()['Host']
                header_done = True

                Relay_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
                Relay_socket.connect((destination_host,80))
                Relay_socket.sendall(HTTP_Request)
                print 'Forwarding data to destination host...'

                while True:
                    HTTP_Response = Relay_socket.recv(self.max)
                    if not HTTP_Response:
                        break
                    else:
                        print 'Received data back. Forwarding to the client...'
                        self.client.sendall(HTTP_Response)

            self.client.close()
            Relay_socket.close()
示例#8
0
    def start(self):
        signal.signal(signal.SIGTERM, SIG_DFL)
        print(f"Worker booted with pid: {os.getpid()}")
        while True:
            body = []
            conn, addr = self.socket.accept()
            http_parser = HttpParser()
            with conn:
                while True:
                    data = conn.recv(1024)
                    if not data:
                        break
                    recved = len(data)
                    nparsed = http_parser.execute(data, recved)
                    assert nparsed == recved

                    if http_parser.is_headers_complete():
                        print(http_parser.get_headers())

                    if http_parser.is_partial_body():
                        body.append(http_parser.recv_body())

                    if http_parser.is_message_complete():
                        break

                buffered_body = io.StringIO("".join(body))
                koi(self.app,
                    conn,
                    request_method=http_parser.get_method(),
                    headers=http_parser.get_headers(),
                    body=buffered_body,
                    content_length=http_parser.get_headers().get(
                        'content-length', 0))
示例#9
0
def handle(connection, address, pid, queue_obj):
  import logging
  import json
  from queue import Full

  logging.basicConfig(level=logging.DEBUG)
  logger = logging.getLogger("process-%r" % (address,))
  content = []
  parser = HttpParser()

  try:
    logger.debug("Connected %r at %r", connection, address)
    while True:
      resp = connection.recv(psize)
      recved = len(resp)

      parsed = parser.execute(resp, recved)
      assert parsed == recved

      if parser.is_headers_complete():
        parser.get_headers()

      if parser.is_partial_body():
        content.append(parser.recv_body())

      if parser.is_message_complete():
        break
  except:
    logger.exception("Problem handling request: %s", sys.exc_info()[1])
    send_and_close(connection, 500)
    return

  parsed_json = {}
  data = None

  try:
    parsed_json = json.loads("".join(map(lambda s: s.decode("utf-8"), content)))
    data = parsed_json.get('data')
    url = parsed_json.get('callback')
    key = parsed_json.get('private_key')
  except:
    logger.exception("Problem decoding JSON: %s", sys.exc_info()[1])
  finally:
    if data is None:
      send_and_close(connection, 400, {"message": "JSON Parse Error"})
    elif data == 'ping':
      send_and_close(connection, 200, {"started": started, "queue": queue_obj.qsize()})
    elif data == 'stop':
      send_and_close(connection, 200, {"message": "Shutting down"})
      os.kill(pid, signal.SIGUSR1)
    elif 'trackers' in data and 'hashes' in data:
      try:
        queue_obj.put({"data": [data, url, key], "address": address}, False)
        send_and_close(connection, 200, {"message": ("in queue [%r]" % (address,))})
      except Full:
        send_and_close(connection, 429, {"message": "Server queue is full. Try another one."})
示例#10
0
    def get_appropriate_response(self):

        try:
            # try to use the fast C parser
            from http_parser.parser import HttpParser
        except ImportError:
            # fall back to the Python parser
            from http_parser.pyparser import HttpParser

        p = HttpParser()
        nparsed = p.execute(self.content.encode('utf-8'), len(self.content))

        if not p.is_headers_complete():
            return HttpResponseBadRequest(content_f=BAD_REQUEST_HTML)

        # check method
        if p.get_method() not in SUPPORTED_METHODS:
            return HttpResponseNotImplemented(content_f=NOT_IMPLEMENTED_HTML)

        base_filepath = ''
        try:
            base_filepath = settings.HOSTS[p.get_headers()['Host'].split(':')
                                           [0]]
        except KeyError:
            base_filepath = settings.HOSTS['default']

        req_file = self.content.split(' ')[1]
        if req_file == '/':
            req_file = '/index.html'

        try:
            full_path = base_filepath + req_file
            open(full_path)

            # check if modified
            if 'If-None-Match' in p.get_headers() and p.get_headers(
            )['If-None-Match'] == etag_for_file(full_path):
                return HttpResponse(status=304, content_f=full_path)

            if p.get_method() == 'HEAD':
                return HttpResponse(content_f=full_path, method='HEAD')
            if 'Range' in p.get_headers():
                return HttpResponsePartialContent(
                    content_f=full_path, h_range=p.get_headers()['Range'])
            return HttpResponse(content_f=full_path)
        except IOError as err:
            if err.errno == 13:
                return HttpResponseForbidden(content_f=FORBIDDEN_HTML)
            elif err.errno == 2:
                return HttpResponseNotFound(content_f=NOT_FOUND_HTML)

        return HttpResponseServerError(content_f=SERVER_ERROR_HTML)
示例#11
0
def process(indir, outdir):
    findstr = os.path.join(indir, '*')
    for fn in glob.glob(findstr):
        print fn
        with open(fn, 'rb') as f:
            http_bin = f.read()

        n = 0
        while n < len(http_bin):

            http = HttpParser()
            nparsed = http.execute(http_bin[n:], len(http_bin) - n)

            if not http.is_message_complete():
                break

            if http.get_path() != '':
                # send

                http_method = http_bin[n:].split()[
                    0]  #http.get_method() -- seems bugged
                http_path = http_bin[n:].split()[1]
                http_request = parse_http_packet(http.get_headers(),
                                                 http.recv_body())
                http_hostname = 'unknown'
                if 'Host' in http.get_headers():
                    http_hostname = http.get_headers()['Host']
                print http_hostname

                nparsed -= 1

                full_http = http_method + ' ' + http_path + '\n'
                full_http += http_request + '\n'

                save_http_packet(outdir, os.path.basename(fn), http_hostname,
                                 http_path, 'send', full_http)
            else:
                # recv

                http_status = http.get_status_code()
                http_reply = parse_http_packet(http.get_headers(),
                                               http.recv_body())

                full_http += str(http_status) + '\n'
                full_http += http_reply

                save_http_packet(outdir, os.path.basename(fn), http_hostname,
                                 '', 'recv', full_http)

            n += nparsed
示例#12
0
    def recv_http_response(self, conn):
        response = HttpParser(kind=1)
        status_code = None
        headers = None

        try:
            while True:
                chunk = conn.recv(1024)

                response.execute(chunk, len(chunk))
                if response.is_headers_complete():
                    headers = response.get_headers()
                    status_code = response.get_status_code()

                    content_length = headers.get('content-length')
                    if not content_length or int(content_length) == 0:
                        break

                if response.is_message_complete():
                    break

                if not chunk:
                    raise EOFError('Incomplete Message')

        except Exception as e:
            raise GeneralProxyError(
                'HTTP Proxy communication error ({})'.format(e))

        return status_code, headers
示例#13
0
def iter_warc_records(warc_file, domain_whitelist=None, only_homepages=None):
    """ Selective iterator over records in a WARC file """

    for _, record in enumerate(warc_file):

        if not record.url:
            continue

        if record['Content-Type'] != 'application/http; msgtype=response':
            continue

        url = URL(record.url, check_encoding=True)

        if domain_whitelist is not None:
            if url.domain not in domain_whitelist:
                continue

        elif only_homepages:
            if url.parsed.path != "/" or url.parsed.query != "":
                continue

        payload = record.payload.read()
        parser = HttpParser()
        parser.execute(payload, len(payload))

        headers = parser.get_headers()

        if 'text/html' not in headers.get("content-type", ""):
            # print "Not HTML?", record.url, headers
            continue

        yield url, headers, parser.recv_body()
示例#14
0
def parse_request(http_request, protocol, host, port):
    """
    Parse HTTP request form Burp Suite to dict
    TODO cookie parse
    """
    httpParser = HttpParser()
    httpParser.execute(http_request, len(http_request))

    header = dict(httpParser.get_headers())
    header.pop("Content-Length")  # remove Content-Length
    # cookie = header["Cookie"]
    body = httpParser.recv_body()
    method = httpParser.get_method()
    url = protocol + "://" + host + httpParser.get_path()
    query = httpParser.get_query_string()

    params = dict(urlparse.parse_qsl(query))
    data = dict(urlparse.parse_qsl(body)) if method == "POST" else {}
    try:
        jsondata = json.loads(
            body) if method == "POST" and header["Content-Type"] == "application/json" else {}
    except Exception as e:
        print "[!] " + e
        jsondata = {}
    return method, url, header, params, data, jsondata
示例#15
0
    def iter_items(self, partition):
        """ Yields objects in the source's native format """

        warc_stream = self.open_warc_stream(partition["path"])

        for record in warc_stream:

            if not record.url:
                continue

            if record['Content-Type'] != 'application/http; msgtype=response':
                continue

            url = URL(record.url, check_encoding=True)

            do_parse, index_level = self.qualify_url(url)

            if not do_parse:
                continue

            payload = record.payload.read()
            parser = HttpParser()
            parser.execute(payload, len(payload))

            headers = parser.get_headers()

            if 'text/html' not in headers.get("content-type", ""):
                # print "Not HTML?", record.url, headers
                continue

            yield url, headers, "html", index_level, parser.recv_body()
示例#16
0
def handle_batch_client(sock):
    recvbuf = ""
    while True:
        rds, _, _ = select.select([sock], [], [], 60 * 5)
        if not rds:
            break

        data = sock.recv(1024)
        if not data:
            break
        recvbuf += data

        pos = recvbuf.find("\r\n\r\n")
        if pos == -1:
            continue
        parser = HttpParser()
        nparsed = parser.execute(recvbuf, pos + 4)
        if nparsed != pos + 4:
            logging.debug("pos:%d, nparsed:%d, recvbuf:%r", pos, nparsed, recvbuf)
        assert nparsed == pos + 4
        assert parser.is_headers_complete()
        headers = parser.get_headers()
        content_length = int(headers["Content-Length"]) if headers.has_key("Content-Length") else 0
        logging.debug("content length:%d", content_length)
        recvbuf = recvbuf[pos + 4 :]
        preread = recvbuf[:content_length]
        recvbuf = recvbuf[content_length:]
        keepalived = handle_request(sock, parser, preread)
        if not keepalived:
            break

    logging.debug("close client")
    sock.close()
示例#17
0
def make_request(sock, server_name):
    """
    Given an open socket, makes a simple HTTP request, parses the response, and
    returns a dictionary containing the HTTP headers that were returned by the
    server.
    """
    p = HttpParser()

    request = ('GET / HTTP/1.0\r\n' +
               'User-Agent: pySSLScan\r\n' +
               'Host: %s\r\n\r\n' % (server_name,))
    sock.write(request.encode('ascii'))

    headers = None
    while True:
        data = sock.recv(1024)
        if not data:
            break

        recved = len(data)
        nparsed = p.execute(data, recved)
        assert nparsed == recved

        if p.is_headers_complete():
            headers = p.get_headers()
            break

    return headers
示例#18
0
def findhue():
    msg = \
        'M-SEARCH * HTTP/1.1\r\n' \
        'HOST:239.255.255.250:1900\r\n' \
        'ST:upnp:rootdevice\r\n' \
        'MX:2\r\n' \
        'MAN:"ssdp:discover"\r\n' \
        '\r\n'

    # Set up UDP socket
    s = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP)
    s.settimeout(5)
    s.sendto(msg.encode('utf-8'), ('239.255.255.250', 1900) )

    try:
        while True:
            data, addr = s.recvfrom(65507)
            p = HttpParser()
            recved = len(data)
            nparsed = p.execute(data, recved)
            assert nparsed == recved
            if p.is_headers_complete():
                headers = p.get_headers()
                if 'hue-bridgeid' in headers:
                    return addr,headers

            if p.is_message_complete():
                break
    except timeout:
        pass
    return None
示例#19
0
def iter_warc_records(warc_file, domain_whitelist=None, only_homepages=None):
    """ Selective iterator over records in a WARC file """

    for _, record in enumerate(warc_file):

        if not record.url:
            continue

        if record['Content-Type'] != 'application/http; msgtype=response':
            continue

        url = URL(record.url, check_encoding=True)

        if domain_whitelist is not None:
            if url.domain not in domain_whitelist:
                continue

        elif only_homepages:
            if url.parsed.path != "/" or url.parsed.query != "":
                continue

        payload = record.payload.read()
        parser = HttpParser()
        parser.execute(payload, len(payload))

        headers = parser.get_headers()

        if 'text/html' not in headers.get("content-type", ""):
            # print "Not HTML?", record.url, headers
            continue

        yield url, headers, parser.recv_body()
示例#20
0
class Request:
    parser = None
    _body = None

    def __init__(self, data):
        self.parser = HttpParser()
        self.parser.execute(data, len(data))

        self.method = self.parser.get_method()
        self.path = self.parser.get_path()
        self.headers = self.parser.get_headers()
        self.querystring = parse_qs(unquote(self.parser.get_query_string()),
                                    keep_blank_values=True)
        if self.querystring:
            self.path += "?{}".format(self.parser.get_query_string())

    def add_data(self, data):
        self.parser.execute(data, len(data))

    @property
    def body(self):
        if self._body is None:
            self._body = decode_from_bytes(self.parser.recv_body())
        return self._body

    def __str__(self):
        return "{} - {} - {}".format(self.method, self.path, self.headers)
示例#21
0
    def run(self):
        while self.running:
            data, addr = self.listener.recvfrom(4096)
            http_pareser = HttpParser()
            http_pareser.execute(data, len(data))
            headers = http_pareser.get_headers()

            try:
                if headers['NTS'] == 'ssdp:alive' and headers[
                        'NT'] == 'urn:zenterio-net:service:X-CTC_RemotePairing:1':
                    stb = STB(uuid=headers['USN'][5:41],
                              location=headers['LOCATION'],
                              nt=headers['NT'])
                    self.mutex.acquire(1)
                    for x in self.stbs:
                        if x.uuid == stb.uuid:
                            break
                    else:
                        self.stbs.append(stb)
                        log.info('-------------------------------------------')
                        log.info("New STB detected!")
                        log.info("UUID: " + stb.uuid)
                        log.info("Location: " + stb.location)
                        log.info("NT: " + stb.nt)
                    self.mutex.release()
            except:
                pass
示例#22
0
    def iter_items(self, partition):
        """ Yields objects in the source's native format """

        warc_stream = self.open_warc_stream(partition["path"])

        for record in warc_stream:

            if not record.url:
                continue

            if record['Content-Type'] != 'application/http; msgtype=response':
                continue

            url = URL(record.url, check_encoding=True)

            do_parse, index_level = self.qualify_url(url)

            if not do_parse:
                continue

            payload = record.payload.read()
            parser = HttpParser()
            parser.execute(payload, len(payload))

            headers = parser.get_headers()

            if 'text/html' not in headers.get("content-type", ""):
                # print "Not HTML?", record.url, headers
                continue

            yield url, headers, "html", index_level, parser.recv_body()
示例#23
0
 def __init__(self, raw):
     resp = HttpParser()
     resp.execute(raw.response, len(raw.response))
     self.headers = resp.get_headers()
     self.body = "".join(resp._body)
     self.raw = raw
     self.code = resp.get_status_code()
     self._json = None
示例#24
0
	def get_appropriate_response(self):

		try:
			# try to use the fast C parser
			from http_parser.parser import HttpParser
		except ImportError:
			# fall back to the Python parser
			from http_parser.pyparser import HttpParser

		p = HttpParser()
		nparsed = p.execute(self.content.encode('utf-8'), len(self.content))

		if not p.is_headers_complete():
			return HttpResponseBadRequest(content_f=BAD_REQUEST_HTML)

		# check method
		if p.get_method() not in SUPPORTED_METHODS:
			return HttpResponseNotImplemented(content_f=NOT_IMPLEMENTED_HTML)

		base_filepath = ''
		try:
			base_filepath = settings.HOSTS[p.get_headers()['Host'].split(':')[0]]
		except KeyError:
			base_filepath = settings.HOSTS['default']

		req_file = self.content.split(' ')[1]
		if req_file == '/':
			req_file = '/index.html'

		try:
			full_path = base_filepath + req_file
			open(full_path)
			if p.get_method() == 'HEAD':
				return HttpResponse(content_f=full_path, method='HEAD')
			if 'Range' in p.get_headers():
				return HttpResponsePartialContent(content_f=full_path, h_range=p.get_headers()['Range'])	
			return HttpResponse(content_f=full_path)
		except IOError as err:
			if err.errno == 13:
				return HttpResponseForbidden(content_f=FORBIDDEN_HTML)
			elif err.errno == 2:
				return HttpResponseNotFound(content_f=NOT_FOUND_HTML)

		return HttpResponseServerError(content_f=SERVER_ERROR_HTML)
示例#25
0
文件: http.py 项目: HVF/diesel
    def request(self, method, url, headers={}, body=None, timeout=None):
        '''Issues a `method` request to `path` on the
        connected server.  Sends along `headers`, and
        body.

        Very low level--you must set "host" yourself,
        for example.  It will set Content-Length,
        however.
        '''
        url_info = urlparse(url)
        fake_wsgi = dict(
        (cgi_name(n), v) for n, v in headers.iteritems())
        fake_wsgi.update({
            'HTTP_METHOD' : method,
            'SCRIPT_NAME' : '',
            'PATH_INFO' : url_info[2],
            'QUERY_STRING' : url_info[4],
            'wsgi.version' : (1,0),
            'wsgi.url_scheme' : 'http', # XXX incomplete
            'wsgi.input' : cStringIO.StringIO(body or ''),
            'wsgi.errors' : FileLikeErrorLogger(hlog),
            'wsgi.multithread' : False,
            'wsgi.multiprocess' : False,
            'wsgi.run_once' : False,
            })
        req = Request(fake_wsgi)

        timeout_handler = TimeoutHandler(timeout or 60)

        send('%s %s HTTP/1.1\r\n%s' % (req.method, req.url, str(req.headers)))

        if body:
            send(body)

        h = HttpParser()
        body = []
        data = None
        while True:
            if data:
                used = h.execute(data, len(data))
                if h.is_headers_complete():
                    body.append(h.recv_body())
                if h.is_message_complete():
                    data = data[used:]
                    break
            ev, val = first(receive_any=True, sleep=timeout_handler.remaining())
            if ev == 'sleep': timeout_handler.timeout()
            data = val

        resp = Response(
            response=''.join(body),
            status=h.get_status_code(),
            headers=h.get_headers(),
            )

        return resp
示例#26
0
def main():

    p = HttpParser()
    s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    body = []
    header_done = False
    try:
        s.connect(('install2.optimum-installer.com', 80))
        s.send(b("GET /o/PDFCreator/Express_Installer.exe.exe HTTP/1.1\r\nHost: gunicorn.org\r\n\r\n"))

        while True:
            data = s.recv(1024)
            if not data:
                break

            recved = len(data)
            nparsed = p.execute(data, recved)
            assert nparsed == recved

            if p.is_headers_complete() and not header_done:
                print(p.get_headers())
                print(p.get_headers()['content-length'])
                header_done = True

            if p.is_partial_body():
                body.append(p.recv_body())
                print p.recv_body()
                print "BDy++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++"

            if p.is_message_complete():
                break

        body = b("").join(body)
        
        print "Writing file\n"
        data_write = open("mal.exe","wb") 
        data_write.write(body)
        data_write.close()
        
        print "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++"

    finally:
        s.close()
示例#27
0
def findhue():  #Auto-find bridges on network & get list
    r = requests.get("https://discovery.meethue.com/")
    bridgelist = json.loads(r.text)
    i = 0
    for b in bridgelist:
        i += 1

    if commandlineargs.bridgeid is not None:
        found = False
        for idx, b in enumerate(bridgelist):
            if b["id"] == commandlineargs.bridgeid:
                bridge = idx
                found = True
                break
        if not found:
            sys.exit("bridge {} was not found".format(
                commandlineargs.bridgeid))
    elif len(bridgelist) > 1:
        print("Multiple bridges found. Select one of the bridges below (",
              list(bridgelist), ")")
        bridge = int(input())
    else:
        bridge = 0  #Default to the only bridge if only one is found

    hueip = bridgelist[bridge][
        'internalipaddress']  #Logic currently assumes 1 bridge on the network
    print("I will use the bridge at ", hueip)

    msg = \
        'M-SEARCH * HTTP/1.1\r\n' \
        'HOST:' + hueip +':1900\r\n' \
        'ST:upnp:rootdevice\r\n' \
        'MX:2\r\n' \
        'MAN:"ssdp:discover"\r\n' \
        '\r\n'
    s = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP)
    s.settimeout(12)
    s.sendto(msg.encode('utf-8'), (hueip, 1900))
    try:
        while True:
            data, addr = s.recvfrom(65507)
            p = HttpParser()
            recved = len(data)
            nparsed = p.execute(data, recved)
            assert nparsed == recved
            if p.is_headers_complete():
                headers = p.get_headers()
                if 'hue-bridgeid' in headers:
                    return addr, headers
            if p.is_message_complete():
                break
    except timeout:
        verbose('Timed out, better luck next time')
        pass
    return None
示例#28
0
 def __init__(self, raw):
     self.raw = raw
     req = HttpParser()
     req.execute(raw.request, len(raw.request))
     self.headers = req.get_headers()
     self.body = b"".join(req._body)
     self.url = req.get_url()
     self.path = req.get_path()
     self.method = req.get_method()
     self.arguments = req.get_query_string()
     self.slug = [a for a in self.path.split('/') if a != '']
示例#29
0
def proxy(data):
    """
    the function called by tproxy to determine where to send traffic

    tproxy will call this function repeatedly for the same connection, as we
    receive more incoming data, until we return something other than None.

    typically our response tells tproxy where to proxy the connection to, but
    may also tell it to hang up, or respond with some error message.
    """

    log = logging.getLogger("proxy")

    bytes_received = len(data)

    parser =  HttpParser()
    bytes_parsed = parser.execute(data, bytes_received)

    if bytes_parsed != bytes_received:
        return { 'close': 
            'HTTP/1.0 400 Bad Request\r\n\r\nParse error' }

    if not parser.is_headers_complete(): 
        if bytes_received > MAX_HEADER_LENGTH:
            return { 'close': 
                'HTTP/1.0 400 Bad Request\r\n'
                '\r\nHeaders are too large' }
        return None

    headers = parser.get_headers()

    # the hostname may be in the form of hostname:port, in which case we want
    # to discard the port, and route just on hostname
    route_host = headers.get('HOST', None)
    if route_host:
        match = _HOST_PORT_REGEXP.match(route_host)
        if match:
            route_host = match.group(1)

    try:
        log.debug("Routing %r" % ( parser.get_url(), ))
        return _ROUTER.route(
            route_host,
            parser.get_method(),
            parser.get_path(),
            parser.get_query_string())
    except Exception, err:
        log.error("error routing %r, %s" % (
            parser.get_url(), traceback.format_exc(), ))
        gevent.sleep(ERROR_DELAY)
        return { 'close': 
            'HTTP/1.0 502 Gateway Error\r\n'
            '\r\nError routing request' }
示例#30
0
def proxy(data):
    """
    the function called by tproxy to determine where to send traffic

    tproxy will call this function repeatedly for the same connection, as we
    receive more incoming data, until we return something other than None.

    typically our response tells tproxy where to proxy the connection to, but
    may also tell it to hang up, or respond with some error message.
    """

    log = logging.getLogger("proxy")

    bytes_received = len(data)

    parser = HttpParser()
    bytes_parsed = parser.execute(data, bytes_received)

    if bytes_parsed != bytes_received:
        return {'close': 'HTTP/1.0 400 Bad Request\r\n\r\nParse error'}

    if not parser.is_headers_complete():
        if bytes_received > MAX_HEADER_LENGTH:
            return {
                'close': 'HTTP/1.0 400 Bad Request\r\n'
                '\r\nHeaders are too large'
            }
        return None

    headers = parser.get_headers()

    # the hostname may be in the form of hostname:port, in which case we want
    # to discard the port, and route just on hostname
    route_host = headers.get('HOST', None)
    if route_host:
        match = _HOST_PORT_REGEXP.match(route_host)
        if match:
            route_host = match.group(1)

    try:
        log.debug("Routing %r" % (parser.get_url(), ))
        return _ROUTER.route(route_host, parser.get_method(),
                             parser.get_path(), parser.get_query_string())
    except Exception, err:
        log.error("error routing %r, %s" % (
            parser.get_url(),
            traceback.format_exc(),
        ))
        gevent.sleep(ERROR_DELAY)
        return {
            'close': 'HTTP/1.0 502 Gateway Error\r\n'
            '\r\nError routing request'
        }
示例#31
0
	def parse_request(self, message):
		try:
		    from http_parser.parser import HttpParser
		except ImportError:
		    from http_parser.pyparser import HttpParser

		p = HttpParser()
		nparsed = p.execute(message,len(message))
		
		self.method = p.get_method()
		self.path = p.get_path()
		self.headers = p.get_headers()

		if p.get_method() == 'GET':
			self.status = 200

		#if "Range" in p.get_headers():
		#	strings = self.headers["Range"]
		#	print strings

		elif p.get_method() != 'GET':
			self.status = 501		#if the method is not a GET
			#TODO maybe make this a head request eventually if you do the download accelerator

		if not p.get_method():
			self.status = 400

		if p.get_path() == '/':
			self.path = '/index.html'

		elif p.get_path().endswith('/'):
			self.path += 'index.html'
		
		if p.get_path() is None:
			self.status = 501


		#print self.path
		"""
		print '\nMethod: ' 
		print p.get_method() 
		print '\nPath: ' 
		print p.get_path()
		print '\nHeaders: ' 
		print p.get_headers()
		print '\nVersion: '
		version = p.get_version()
		print version
		"""
		#print '\nRESPONSE CODE: ' + str(self.status) + '\n'
		#print self.path
		#print self.status
		#working so far
示例#32
0
    def request(self):
        request_buff = ""
        request_parser = HttpParser()
        while True:
            r_data = self.socket.recv(ProxyHandler.BUFF_LEN)
            request_buff += r_data
            r_size = len(r_data)
            request_parser.execute(r_data, r_size)
            if request_parser.is_message_complete():
                break

        host = request_parser.get_headers().get('Host')
        url, port = self._analyse_host_and_port(host)
        remote_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
示例#33
0
    def handleData(self,fd):
        self.debug("Entering handleData")
        if '\r\n\r\n' not in self.con_cache[fd]:
            self.debug("Partial message - Exiting handleData")
            return 
        p = HttpParser() 
        nparsed = p.execute(self.con_cache[fd],len(self.con_cache[fd]))                 
        
        method = p.get_method()
        path = p.get_path()
        headers = p.get_headers() 
        debugStr = "\nMethod: %s\nPath: %s\nHeaders: %s\n" % (method,path,headers)  
        #self.debug(debugStr)
       
        rangeRequest = None 
        if 'Range' in headers:
            rangeRequest = headers['Range']
            self.debug("Range: %s" % (rangeRequest))
           

        validMethods = ['GET','HEAD','PUT','DELETE','POST']
        isValid = False 
        
        if method not in validMethods:
            response = self.makeError('400','Bad Request')
        elif method != 'GET' and method != 'HEAD':
            response = self.makeError('501','Not Implemented')
        else:
            if path == '/':
                path = '/index.html'
                
            path = self.hosts['default'] + path 
            (isValid,response) = self.makeResponse(path,rangeRequest) 

        self.clients[fd].send(response)
         
        self.debug("PATH:%s"%(path))
       
        if isValid and not rangeRequest and method != "HEAD":
            self.sendAll(path,fd) 
        elif isValid and rangeRequest and method != "HEAD":
            (start,end) = self.getByteRange(rangeRequest) 
            self.send(path,fd,start,end) 

        self.debug("Exiting handleData") 
示例#34
0
文件: upstream.py 项目: dtrip/proxpy
    def makeRequest(self, host, url="/", port=80, method='GET', headers=None, postdata=None):
        assert self.e is not None
        evSet = self.e.wait()  # noqa: F841
        # log.debug("Generating raw http request")
        self.s.connect((host, port))

        if headers is None:
            headers = {
                    "Accept": "*/*",
                    "User-Agent": self.useragent
            }

        req = self.rawHttpReq(host, url, method, headers, postdata)

        self.s.sendall(req.encode())

        h = []
        body = []
        p = HttpParser()
        tlen = 0

        while True:
            data = self.s.recv(2048)

            if not data:
                break

            rlen = len(data)
            tlen += rlen
            nparsed = p.execute(data, rlen)
            assert nparsed == rlen

            if p.is_headers_complete():
                h = p.get_headers()
                # log.debug(p.get_headers())
            if p.is_partial_body():
                body.append(p.recv_body())

            if p.is_message_complete():
                break

        self.s.close()

        res = {'status': p.get_status_code(), 'length': tlen, 'headers': h, 'body': body, 'request': req}
        print(res)
示例#35
0
    def receive(self):
        h = HttpParser()
        body = []
        data = None
        while True:
            if data:
                used = h.execute(data, len(data))
                if h.is_headers_complete():
                    body.append(h.recv_body())
                if h.is_message_complete():
                    data = data[used:]
                    break
            data = self.s.recv(BUFSIZE)

        return Response(response=''.join(body),
                        status=h.get_status_code(),
                        headers=h.get_headers(),
                        )
示例#36
0
    def test_constructor(self):
        ''' Instance attributes autosubstitution.
        '''
        headers = {
            'Host': 'httpbin.org',
            'Connection': 'close',
        }
        hc = HttpCompiler(method='PATCH', headers=headers)
        qs = '/path/to/check'
        req = hc.build_raw(qs)

        p = HttpParser()
        p.execute(req, len(req))
        result_hdrs = p.get_headers()

        self.assertTrue(p.get_method(), 'PATCH')
        self.assertTrue(
            all([result_hdrs[h] == headers[h] for h in headers.keys()]))
示例#37
0
    def test_constructor(self):
        ''' Instance attributes autosubstitution.
        '''
        headers = {
            'Host': 'httpbin.org',
            'Connection': 'close',
        }
        hc = HttpCompiler(method='PATCH', headers=headers)
        qs = '/path/to/check'
        req = hc.build_raw(qs)

        p = HttpParser()
        p.execute(req, len(req))
        result_hdrs = p.get_headers()

        self.assertTrue(p.get_method(), 'PATCH')
        self.assertTrue(all(
            [result_hdrs[h] == headers[h] for h in headers.keys()]))
示例#38
0
    def parseData(self, data, fd):
        p = HttpParser()
        nparsed = p.execute(data,len(data))
        resp = Response()
        if self.debugging:
            print p.get_method(),p.get_path(),p.get_headers()

        if (p.get_method() == 'GET'):
            resp = self.handleGet(p, resp)
        elif (p.get_method() == 'DELETE'):
            resp.setCode(501)
        else:
            resp.setCode(400)

        self.clients[fd].send(str(resp))
        try:
            self.clients[fd].send(resp.body)
        except:
            pass
示例#39
0
def do_request(conn):
    body = []
    p = HttpParser()

    while True:
        data = conn.recv(1024)
        recved = len(data)
        nparsed = p.execute(data, recved)
        assert nparsed == recved
        if not data:
            break
        if p.is_headers_complete():
            logger.debug(p.get_headers())
        if p.is_partial_body():
            logger.debug("is partial body")
            body.append(p.recv_body())
        if p.is_message_complete():
            break
    logger.debug(body)
    conn.sendall(EXAMPLE_RESPONSE)
示例#40
0
    def parse_request(self, text):
        p=HttpParser()
        data=text


        recved = len(data)
        nparsed = p.execute(data, recved)
        if p.is_headers_complete():
            self.request_headers=p.get_headers()
        else:
            self.request_headers={}
        print p.recv_body()
        self.request_body=(p.recv_body())
        request_line = text.splitlines()[0]
        request_line = request_line.rstrip('\r\n')
        # Break down the request line into components
        (self.request_method,  # GET
         self.path,            # /hello
         self.request_version  # HTTP/1.1
         ) = request_line.split()
示例#41
0
    def parse_html(self):
        try:
            resolve_ip = ''
            data = []
            fitler_list = ['*', '> ', '< ', '{']
            for item in self.result.split("\n"):
                if 'Trying' in item:
                    resolve_ip = item.replace('*', "").replace(
                        "Trying", "").replace("...", "").strip()
                    log.logger.info('resolve_ip: %s ' % (resolve_ip))

                matching = [s for s in fitler_list if s in item[:2]]
                if len(matching) == 0:
                    data.append(item.encode('utf-8'))
            parsing_string = b("\r\n").join(data)
            p = HttpParser()
            p.execute(parsing_string, len(parsing_string))
            status_code = str(p.get_status_code())
            header_obj = p.get_headers()
            #body = str(p.recv_body())

            header_list = []
            if resolve_ip:
                header_list.append('%s:%s' %
                                   ("resolve ip", resolve_ip.strip()))
            for key, value in header_obj.items():
                header_list.append('%s:%s' % (key, value))
            header = ("<br/>").join(header_list)

            body = self.content["result"]

            log.logger.info('resolve_ip :%s ' % (resolve_ip))
            log.logger.info('status_code :%s ' % (status_code))
            log.logger.info('header :%s ' % (header))
            log.logger.info('body :%s ' % (body))

            return status_code, header, body
        except Exception as e:
            log.logger.info('Exception: %s ' % (str(e)))
            return None, None, str(e)
示例#42
0
class HttpRequest(object):
    __cgi_config = None

    def __init__(self, request_text, server_config):
        self.__parser = HttpParser()
        self.__parser.execute(request_text, len(request_text))
        self.__server_config = server_config

    def get_body(self):
        if self.__parser.is_partial_body():
            return self.__parser.recv_body()
        return None

    def get_headers(self):
        return self.__parser.get_headers()

    def get_request_method(self):
        return self.__parser.get_method()

    def get_request_path(self):
        return self.__parser.get_path()

    def get_cgi_config(self):
        if self.__cgi_config is None:
            __cgi_config = {}
            #WSGI required variable
            #__cgi_config['wsgi.input'] = io.StringIO(self.get_body())

            #CGI
            __cgi_config['SERVER_NAME'] = self.__server_config['server_name']
            __cgi_config['SERVER_PORT'] = self.__server_config['server_port']
            __cgi_config['SERVER_PROTOCOL']: 'HTTP/1.1'
            __cgi_config['REQUEST_METHOD'] = self.get_request_method()
            __cgi_config['PATH_INFO'] = self.get_request_path()

            for header, value in self.get_headers().items():
                __cgi_config[f'HTTP_{header}'] = value

            self.__cgi_config = __cgi_config
        return self.__cgi_config
示例#43
0
    def process_item(self, item):
        response_text = item.response.get('#text')
        if response_text is None:
            return

        response = base64decode(response_text)
        response = response.encode('ascii', errors='ignore')

        p = HttpParser()
        p.execute(response, len(response))

        matching_headers = []

        for header, value in p.get_headers().iteritems():
            if self.CORS_RE.search(header):
                matching_headers.append((header, value))

        if not matching_headers:
            return

        headers_str = '\n'.join(sorted('%s: %s' % (k, v) for (k, v) in matching_headers))
        self.unique_cors.add(headers_str)
示例#44
0
 def handle(self):
     p = HttpParser()
     while True:
         data = self.socket.recv(ProxyHandler.BUFF_LEN)
         self.buff += data
         size = len(data)
         p.execute(data, size)
         if p.is_message_complete():
             break
     remote_url = p.get_headers().get('Host')
     remote_sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
     remote_sock.connect((remote_url, 80))
     p2 = HttpParser()
     bf = ""
     remote_sock.send(self.buff)
     while True:
         data = remote_sock.recv(ProxyHandler.BUFF_LEN)
         bf += data
         s = len(data)
         p2.execute(data, s)
         if p2.is_message_complete():
             self.socket.send(bf)
             break
示例#45
0
class TitleFetcher:
  status_code = 0
  followed_times = 0 # 301, 302
  finder = None
  addr = None
  stream = None
  max_follows = 10
  timeout = 15
  _finished = False
  _cookie = None
  _connected = False
  _redirected_stream = None
  _content_finders = (TitleFinder, PNGFinder, JPEGFinder, GIFFinder)
  _url_finders = ()

  def __init__(self, url, callback,
               timeout=None, max_follows=None, io_loop=None,
               content_finders=None, url_finders=None
              ):
    '''
    url: the (full) url to fetch
    callback: called with title or MediaType or an instance of SingletonFactory
    timeout: total time including redirection before giving up
    max_follows: max redirections
    '''
    self._callback = callback
    if max_follows is not None:
      self.max_follows = max_follows

    if timeout is not None:
      self.timeout = timeout
    if hasattr(tornado.ioloop, 'current'):
        default_io_loop = tornado.ioloop.IOLoop.current
    else:
        default_io_loop = tornado.ioloop.IOLoop.instance
    self.io_loop = io_loop or default_io_loop()

    if content_finders is not None:
      self._content_finders = content_finders
    if url_finders is not None:
      self._url_finders = url_finders

    self.start_time = self.io_loop.time()
    self._timeout = self.io_loop.add_timeout(
      self.timeout + self.start_time,
      self.on_timeout,
    )
    self.origurl = url
    self.url_visited = []
    self.new_url(url)

  def on_timeout(self):
    self.run_callback(Timeout)

  def parse_url(self, url):
    '''parse `url`, set self.host and return address and stream class'''
    self.url = u = urlsplit(url)
    self.host = u.netloc

    if u.scheme == 'http':
      addr = u.hostname, u.port or 80
      stream = tornado.iostream.IOStream
    elif u.scheme == 'https':
      addr = u.hostname, u.port or 443
      stream = tornado.iostream.SSLIOStream
    else:
      raise ValueError('bad url: %r' % url)

    return addr, stream

  def new_connection(self, addr, StreamClass):
    '''set self.addr, self.stream and connect to host'''
    s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    self.addr = addr
    self.stream = StreamClass(s)
    logger.debug('%s: connecting to %s...', self.origurl, addr)
    self.stream.set_close_callback(self.before_connected)
    self.stream.connect(addr, self.send_request)

  def new_url(self, url):
    self.url_visited.append(url)
    self.fullurl = url

    for finder in self._url_finders:
      f = finder.match_url(url, self)
      if f:
        self.finder = f
        f()
        return

    addr, StreamClass = self.parse_url(url)
    if addr != self.addr:
      if self.stream:
        self.stream.close()
      self.new_connection(addr, StreamClass)
    else:
      logger.debug('%s: try to reuse existing connection to %s', self.origurl, self.addr)
      try:
        self.send_request(nocallback=True)
      except tornado.iostream.StreamClosedError:
        logger.debug('%s: server at %s doesn\'t like keep-alive, will reconnect.', self.origurl, self.addr)
        # The close callback should have already run
        self.stream.close()
        self.new_connection(addr, StreamClass)

  def run_callback(self, arg):
    self.io_loop.remove_timeout(self._timeout)
    self._finished = True
    if self.stream:
      self.stream.close()
    self._callback(arg, self)

  def send_request(self, nocallback=False):
    self._connected = True
    req = ('GET %s HTTP/1.1',
           'Host: %s',
           # t.co will return 200 and use js/meta to redirect using the following :-(
           # 'User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:16.0) Gecko/20100101 Firefox/16.0',
           'User-Agent: %s' % UserAgent,
           'Accept: text/html,application/xhtml+xml;q=0.9,*/*;q=0.7',
           'Accept-Language: zh-cn,zh;q=0.7,en;q=0.3',
           'Accept-Charset: utf-8,gb18030;q=0.7,*;q=0.7',
           'Accept-Encoding: gzip, deflate',
           'Connection: keep-alive',
          )
    path = self.url.path or '/'
    if self.url.query:
      path += '?' + self.url.query
    req = '\r\n'.join(req) % (
      path, self._prepare_host(self.host),
    )
    if self._cookie:
      req += '\r\n' + self._cookie
    req += '\r\n\r\n'
    self.stream.write(req.encode())
    self.headers_done = False
    self.parser = HttpParser(decompress=True)
    if not nocallback:
      self.stream.read_until_close(
        # self.addr will have been changed when close callback is run
        partial(self.on_data, close=True, addr=self.addr),
        streaming_callback=self.on_data,
      )

  def _prepare_host(self, host):
    host = encodings.idna.nameprep(host)
    return b'.'.join(encodings.idna.ToASCII(x) for x in host.split('.')).decode('ascii')

  def on_data(self, data, close=False, addr=None):
    if close:
      logger.debug('%s: connection to %s closed.', self.origurl, addr)

    if (close and self._redirected_stream is self.stream) or self._finished:
      # The connection is closing, and we are being redirected or we're done.
      self._redirected_stream = None
      return

    recved = len(data)
    logger.debug('%s: received data: %d bytes', self.origurl, recved)

    p = self.parser
    nparsed = p.execute(data, recved)
    if close:
      # feed EOF
      p.execute(b'', 0)

    if not self.headers_done and p.is_headers_complete():
      if not self.on_headers_done():
        return

    if p.is_partial_body():
      chunk = p.recv_body()
      if self.finder is None:
        # redirected but has body received
        return
      t = self.feed_finder(chunk)
      if t is not None:
        self.run_callback(t)
        return

    if p.is_message_complete():
      if self.finder is None:
        # redirected but has body received
        return
      t = self.feed_finder(None)
      # if title not found, t is None
      self.run_callback(t)
    elif close:
      self.run_callback(self.stream.error or ConnectionClosed)

  def before_connected(self):
    '''check if something wrong before connected'''
    if not self._connected and not self._finished:
      self.run_callback(self.stream.error)

  def process_cookie(self):
    setcookie = self.headers.get('Set-Cookie', None)
    if not setcookie:
      return

    cookies = [c.rsplit(None, 1)[-1] for c in setcookie.split('; expires')[:-1]]
    self._cookie = 'Cookie: ' + '; '.join(cookies)

  def on_headers_done(self):
    '''returns True if should proceed, None if should stop for current chunk'''
    self.headers_done = True
    self.headers = self.parser.get_headers()

    self.status_code = self.parser.get_status_code()
    if self.status_code in (301, 302):
      self.process_cookie() # or we may be redirecting to a loop
      logger.debug('%s: redirect to %s', self.origurl, self.headers['Location'])
      self.followed_times += 1
      if self.followed_times > self.max_follows:
        self.run_callback(TooManyRedirection)
      else:
        newurl = urljoin(self.fullurl, self.headers['Location'])
        self._redirected_stream = self.stream
        self.new_url(newurl)
      return

    try:
      l = int(self.headers.get('Content-Length', None))
    except (ValueError, TypeError):
      l = None

    ctype = self.headers.get('Content-Type', 'text/html')
    mt = defaultMediaType._replace(type=ctype, size=l)
    for finder in self._content_finders:
      f = finder.match_type(mt)
      if f:
        self.finder = f
        break
    else:
      self.run_callback(mt)
      return

    return True

  def feed_finder(self, chunk):
    '''feed data to TitleFinder, return the title if found'''
    t = self.finder(chunk)
    if t is not None:
      return t
示例#46
0
class TitleFetcher:
    status_code = 0
    followed_times = 0  # 301, 302
    finder = None
    addr = None
    stream = None
    max_follows = 10
    timeout = 15
    _finished = False
    _cookie = None
    _connected = False
    _redirected_stream = None
    _content_finders = (TitleFinder, PNGFinder, JPEGFinder, GIFFinder)
    _url_finders = ()

    def __init__(
        self,
        url,
        callback,
        timeout=None,
        max_follows=None,
        io_loop=None,
        content_finders=None,
        url_finders=None,
        referrer=None,
        run_at_init=True,
    ):
        '''
    url: the (full) url to fetch
    callback: called with title or MediaType or an instance of SingletonFactory
    timeout: total time including redirection before giving up
    max_follows: max redirections

    may raise:
    <UnicodeError: label empty or too long> in host preparation
    '''
        self._callback = callback
        self.referrer = referrer
        if max_follows is not None:
            self.max_follows = max_follows

        if timeout is not None:
            self.timeout = timeout
        if hasattr(tornado.ioloop, 'current'):
            default_io_loop = tornado.ioloop.IOLoop.current
        else:
            default_io_loop = tornado.ioloop.IOLoop.instance
        self.io_loop = io_loop or default_io_loop()

        if content_finders is not None:
            self._content_finders = content_finders
        if url_finders is not None:
            self._url_finders = url_finders

        self.origurl = url
        self.url_visited = []
        if run_at_init:
            self.run()

    def run(self):
        if self.url_visited:
            raise Exception("can't run again")
        else:
            self.start_time = self.io_loop.time()
            self._timeout = self.io_loop.add_timeout(
                self.timeout + self.start_time,
                self.on_timeout,
            )
            try:
                self.new_url(self.origurl)
            except:
                self.io_loop.remove_timeout(self._timeout)
                raise

    def on_timeout(self):
        logger.debug('%s: request timed out', self.origurl)
        self.run_callback(Timeout)

    def parse_url(self, url):
        '''parse `url`, set self.host and return address and stream class'''
        self.url = u = urlsplit(url)
        self.host = u.netloc

        if u.scheme == 'http':
            addr = u.hostname, u.port or 80
            stream = tornado.iostream.IOStream
        elif u.scheme == 'https':
            addr = u.hostname, u.port or 443
            stream = tornado.iostream.SSLIOStream
        else:
            raise ValueError('bad url: %r' % url)

        return addr, stream

    def new_connection(self, addr, StreamClass):
        '''set self.addr, self.stream and connect to host'''
        s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        self.addr = addr
        self.stream = StreamClass(s)
        logger.debug('%s: connecting to %s...', self.origurl, addr)
        self.stream.set_close_callback(self.before_connected)
        self.stream.connect(addr, self.send_request)

    def new_url(self, url):
        self.url_visited.append(url)
        self.fullurl = url

        for finder in self._url_finders:
            f = finder.match_url(url, self)
            if f:
                self.finder = f
                f()
                return

        addr, StreamClass = self.parse_url(url)
        if addr != self.addr:
            if self.stream:
                self.stream.close()
            self.new_connection(addr, StreamClass)
        else:
            logger.debug('%s: try to reuse existing connection to %s',
                         self.origurl, self.addr)
            try:
                self.send_request(nocallback=True)
            except tornado.iostream.StreamClosedError:
                logger.debug(
                    '%s: server at %s doesn\'t like keep-alive, will reconnect.',
                    self.origurl, self.addr)
                # The close callback should have already run
                self.stream.close()
                self.new_connection(addr, StreamClass)

    def run_callback(self, arg):
        self.io_loop.remove_timeout(self._timeout)
        self._finished = True
        if self.stream:
            self.stream.close()
        self._callback(arg, self)

    def send_request(self, nocallback=False):
        self._connected = True
        req = [
            'GET %s HTTP/1.1',
            'Host: %s',
            # t.co will return 200 and use js/meta to redirect using the following :-(
            # 'User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:16.0) Gecko/20100101 Firefox/16.0',
            'User-Agent: %s' % UserAgent,
            'Accept: text/html,application/xhtml+xml;q=0.9,*/*;q=0.7',
            'Accept-Language: zh-cn,zh;q=0.7,en;q=0.3',
            'Accept-Charset: utf-8,gb18030;q=0.7,*;q=0.7',
            'Accept-Encoding: gzip, deflate',
            'Connection: keep-alive',
        ]
        if self.referrer is not None:
            req.append('Referer: ' + self.referrer.replace('%', '%%'))
        path = self.url.path or '/'
        if self.url.query:
            path += '?' + self.url.query
        req = '\r\n'.join(req) % (
            path,
            self._prepare_host(self.host),
        )
        if self._cookie:
            req += '\r\n' + self._cookie
        req += '\r\n\r\n'
        self.stream.write(req.encode())
        self.headers_done = False
        self.parser = HttpParser(decompress=True)
        if not nocallback:
            self.stream.read_until_close(
                # self.addr and self.stream may have been changed when close callback is run
                partial(self.on_data,
                        close=True,
                        addr=self.addr,
                        stream=self.stream),
                streaming_callback=self.on_data,
            )

    def _prepare_host(self, host):
        host = encodings.idna.nameprep(host)
        return b'.'.join(
            encodings.idna.ToASCII(x) if x else b''
            for x in host.split('.')).decode('ascii')

    def on_data(self, data, close=False, addr=None, stream=None):
        if close:
            logger.debug('%s: connection to %s closed.', self.origurl, addr)

        if self.stream.error:
            self.run_callback(self.stream.error)
            return

        if (close and stream
                and self._redirected_stream is stream) or self._finished:
            # The connection is closing, and we are being redirected or we're done.
            self._redirected_stream = None
            return

        recved = len(data)
        logger.debug('%s: received data: %d bytes', self.origurl, recved)

        p = self.parser
        nparsed = p.execute(data, recved)
        if close:
            # feed EOF
            p.execute(b'', 0)

        if not self.headers_done and p.is_headers_complete():
            if not self.on_headers_done():
                return

        if p.is_partial_body():
            chunk = p.recv_body()
            if self.finder is None:
                # redirected but has body received
                return
            t = self.feed_finder(chunk)
            if t is not None:
                self.run_callback(t)
                return

        if p.is_message_complete():
            if self.finder is None:
                # redirected but has body received
                return
            t = self.feed_finder(None)
            # if title not found, t is None
            self.run_callback(t)
        elif close:
            self.run_callback(self.stream.error or ConnectionClosed)

    def before_connected(self):
        '''check if something wrong before connected'''
        if not self._connected and not self._finished:
            self.run_callback(self.stream.error)

    def process_cookie(self):
        setcookie = self.headers.get('Set-Cookie', None)
        if not setcookie:
            return

        cookies = [
            c.rsplit(None, 1)[-1] for c in setcookie.split('; expires')[:-1]
        ]
        self._cookie = 'Cookie: ' + '; '.join(cookies)

    def on_headers_done(self):
        '''returns True if should proceed, None if should stop for current chunk'''
        self.headers_done = True
        self.headers = self.parser.get_headers()

        self.status_code = self.parser.get_status_code()
        if self.status_code in (301, 302):
            self.process_cookie()  # or we may be redirecting to a loop
            logger.debug('%s: redirect to %s', self.origurl,
                         self.headers['Location'])
            self.followed_times += 1
            if self.followed_times > self.max_follows:
                self.run_callback(TooManyRedirection)
            else:
                newurl = urljoin(self.fullurl, self.headers['Location'])
                self._redirected_stream = self.stream
                self.new_url(newurl)
            return

        try:
            l = int(self.headers.get('Content-Length', None))
        except (ValueError, TypeError):
            l = None

        ctype = self.headers.get('Content-Type', 'text/html')
        mt = defaultMediaType._replace(type=ctype, size=l)
        for finder in self._content_finders:
            f = finder.match_type(mt)
            if f:
                self.finder = f
                break
        else:
            self.run_callback(mt)
            return

        return True

    def feed_finder(self, chunk):
        '''feed data to finder, return the title if found'''
        t = self.finder(chunk)
        if t is not None:
            return t
示例#47
0
class HttpStream(object):
    """ An HTTP parser providing higher-level access to a readable,
    sequential io.RawIOBase object. You can use implementions of
    http_parser.reader (IterReader, StringReader, SocketReader) or
    create your own.
    """

    def __init__(self, stream, kind=HTTP_BOTH, decompress=False):
        """ constructor of HttpStream.

        :attr stream: an io.RawIOBase object
        :attr kind: Int,  could be 0 to parseonly requests,
        1 to parse only responses or 2 if we want to let
        the parser detect the type.
        """
        self.parser = HttpParser(kind=kind, decompress=decompress)
        self.stream = stream

    def _check_headers_complete(self):
        if self.parser.is_headers_complete():
            return

        while True:
            try:
                next(self)
            except StopIteration:
                if self.parser.is_headers_complete():
                    return
                raise NoMoreData("Can't parse headers")

            if self.parser.is_headers_complete():
                return


    def _wait_status_line(self, cond):
        if self.parser.is_headers_complete():
            return True

        data = []
        if not cond():
            while True:
                try:
                    d = next(self)
                    data.append(d)
                except StopIteration:
                    if self.parser.is_headers_complete():
                        return True
                    raise BadStatusLine(b"".join(data))
                if cond():
                    return True
        return True

    def _wait_on_url(self):
        return self._wait_status_line(self.parser.get_url)

    def _wait_on_status(self):
        return self._wait_status_line(self.parser.get_status_code)

    def url(self):
        """ get full url of the request """
        self._wait_on_url()
        return self.parser.get_url()

    def path(self):
        """ get path of the request (url without query string and
        fragment """
        self._wait_on_url()
        return self.parser.get_path()

    def query_string(self):
        """ get query string of the url """
        self._wait_on_url()
        return self.parser.get_query_string()

    def fragment(self):
        """ get fragment of the url """
        self._wait_on_url()
        return self.parser.get_fragment()

    def version(self):
        self._wait_on_status()
        return self.parser.get_version()

    def status_code(self):
        """ get status code of a response as integer """
        self._wait_on_status()
        return self.parser.get_status_code()

    def status(self):
        """ return complete status with reason """
        status_code = self.status_code()
        reason = status_reasons.get(int(status_code), 'unknown')
        return "%s %s" % (status_code, reason)


    def method(self):
        """ get HTTP method as string"""
        self._wait_on_status()
        return self.parser.get_method()

    def headers(self):
        """ get request/response headers, headers are returned in a
        OrderedDict that allows you to get value using insensitive
        keys."""
        self._check_headers_complete()
        return self.parser.get_headers()

    def should_keep_alive(self):
        """ return True if the connection should be kept alive
        """
        self._check_headers_complete()
        return self.parser.should_keep_alive()

    def is_chunked(self):
        """ return True if Transfer-Encoding header value is chunked"""
        self._check_headers_complete()
        return self.parser.is_chunked()

    def wsgi_environ(self, initial=None):
        """ get WSGI environ based on the current request.

        :attr initial: dict, initial values to fill in environ.
        """
        self._check_headers_complete()
        return self.parser.get_wsgi_environ()

    def body_file(self, buffering=None, binary=True, encoding=None,
            errors=None, newline=None):
        """ return the body as a buffered stream object. If binary is
        true an io.BufferedReader will be returned, else an
        io.TextIOWrapper.
        """
        self._check_headers_complete()

        if buffering is None:
            buffering = -1
        if buffering < 0:
            buffering = DEFAULT_BUFFER_SIZE

        raw = HttpBodyReader(self)
        buf = BufferedReader(raw, buffering)
        if binary:
            return buf
        text = TextIOWrapper(buf, encoding, errors, newline)
        return text

    def body_string(self, binary=True, encoding=None, errors=None,
            newline=None):
        """ return body as string """
        return self.body_file(binary=binary, encoding=encoding,
                newline=newline).read()

    def __iter__(self):
        return self

    def __next__(self):
        if self.parser.is_message_complete():
            raise StopIteration

        # fetch data
        b = bytearray(DEFAULT_BUFFER_SIZE)
        recved = self.stream.readinto(b)
        if recved is None:
            raise NoMoreData("no more data")

        del b[recved:]
        to_parse = bytes(b)
        # parse data
        nparsed = self.parser.execute(to_parse, recved)
        if nparsed != recved and not self.parser.is_message_complete():
            raise ParserError("nparsed != recved (%s != %s) [%s]" % (nparsed,
                recved, bytes_to_str(to_parse)))

        if recved == 0:
            raise StopIteration

        return to_parse

    next = __next__
示例#48
0
class ConnectionHandler:
    def __init__(self, connection, address, timeout):
        self.body_file = ""
        self.p = HttpParser()
        self.body = []
        self.request_url = ""
        self.response_header = []
        self.header_done = False
        self.url = ""
        self.controller = []
        self.controller_ip = []
        self.client = connection
        self.client_buffer = ''
        self.timeout = timeout
        self.method, self.path, self.protocol = self.get_base_header()
        if self.method == 'CONNECT':
            self.method_CONNECT()
        elif self.method in ('OPTIONS', 'GET', 'HEAD', 'POST', 'PUT', 'DELETE',
                             'TRACE'):
            self.method_others()
        self.client.close()
        self.target.close()
        #clear
        #print self.controller , self.controller_ip

    def get_base_header(self):
        while 1:
            self.client_buffer += self.client.recv(BUFLEN)
            end = self.client_buffer.find('\n')
            if end != -1:
                break
        #We dont wann those google.com urls.
        if not "127.0.0.1" in self.client_buffer[:end]:

            #Insert Url into database here
            self.url = '%s' % self.client_buffer[:end]

        data = (self.client_buffer[:end + 1]).split()
        self.client_buffer = self.client_buffer[end + 1:]
        #print data
        return data

    def method_CONNECT(self):
        self._connect_target(self.path)
        self.client.send(HTTPVER + ' 200 Connection established\n' +
                         'Proxy-agent: %s\n\n' % VERSION)
        self.client_buffer = ''
        self._read_write()

    def method_others(self):
        self.path = self.path[7:]
        i = self.path.find('/')
        host = self.path[:i]
        path = self.path[i:]
        self._connect_target(host)
        self.target.send('%s %s %s\n' % (self.method, path, self.protocol) +
                         self.client_buffer)
        self.client_buffer = ''
        self._read_write()

    def _connect_target(self, host):
        i = host.find(':')
        if i != -1:
            port = int(host[i + 1:])
            host = host[:i]
            #print host
        else:
            port = 80
        try:

            (soc_family, _, _, _, address) = socket.getaddrinfo(host, port)[0]
            print "Adress is ", address
            self.target = socket.socket(soc_family)
            self.target.connect(address)

        except Exception as e:
            print "Error Connecting to:" + str(host) + ":" + str(port)
            self.request_url = "Error Connecting to:" + str(host) + ":" + str(
                port)
            # insert to db here
            #Concat data to string
        self.request_url = str(host) + " | " + str(address) + " | " + str(
            self.url)  #debug
        #print self.request_url

    def _read_write(self):

        time_out_max = self.timeout / 3
        socs = [self.client, self.target]
        count = 0
        while 1:
            count += 1
            (recv, _, error) = select.select(socs, [], socs, 3)
            if error:
                break
            if recv:
                for in_ in recv:
                    try:

                        #print " Receving Data "
                        data = in_.recv(84333)
                    except Exception as e:
                        print e
                        pass

                    if in_ is self.client:
                        out = self.target
                    else:
                        out = self.client
                    try:

                        if data:
                            #column 25
                            #Dig here to analysis the traffic
                            #print data
                            try:
                                #Lets parse the data using http_parser modules

                                recved = len(data)
                                #
                                #print "We received so far "+ str(recved)
                                nparsed = self.p.execute(data, recved)
                                assert nparsed == recved
                                # Check
                                if self.p.is_headers_complete(
                                ) and not self.header_done:

                                    #Header is an ordered dictionary
                                    header_s = self.p.get_headers()

                                    # Lets beautify it and print it.
                                    for header, value in header_s.items():

                                        #Print Response
                                        # Connection : close format
                                        res_header = header + ": " + value
                                        self.response_header.append(res_header)

                                        self.header_done = True
                                    #Put header to Database.

                                #Check if the boday is partial, if then append the body
                                if self.p.is_partial_body():

                                    self.body.append(self.p.recv_body())
                                    #print "appending body" +self.p.recv_body()
                                    #Append data body recived to a list
                                    #print self.body

                                # If the parsing of current request is compleated
                                if self.p.is_message_complete():

                                    try:

                                        try:

                                            content_length = self.p.get_headers(
                                            )['content-length']

                                        except Exception as e:
                                            print "Exception in Body retrive-sub" + str(
                                                e)
                                            content_length = 0
                                            pass

                                            self.body_file = "".join(self.body)
                                        body_file_type = ms.buffer(
                                            self.body_file[:1024])
                                        #print self.request_url
                                        #print self.response_header
                                        #print body_file_type
                                        print urlid
                                        update_traffic(urlid, self.request_url,
                                                       self.response_header,
                                                       body_file_type)
                                    except Exception as e:
                                        print "Exception in Body retrive" + str(
                                            e)
                                        content_length = 0
                                        pass

                            except Exception as e:
                                print e
                                pass

                            #if filetype in traffice == jar,class , pdf,flash, execute
                            #save those files

                            out.send(data)
                            count = 0

                    except Exception as e:
                        print e
                        pass
            if count == time_out_max:
                break
示例#49
0
class CometaClient(object):
	"""Connect a device to the Cometa infrastructure"""
	errors = {0:'ok', 1:'timeout', 2:'network error', 3:'protocol error', 4:'authorization error', 5:'wrong parameters', 9:'internal error'} 

	def __init__(self,server, port, application_id):
		"""
		The Cometa instance constructor.

		server: the Cometa server FQDN
		port: the Cometa server port
		application_id: the Cometa application ID
		"""
		self.error = 9
		self.debug = False

		self._server = server
		self._port = port
		self._app_id = application_id
		self._message_cb = None

		self._device_id = ""
		self._platform = ""
		self._hparser = None
		self._sock = None #socket.socket(socket.AF_INET, socket.SOCK_STREAM)
		self._heartbeat_rate = 60
		self._trecv = None
		self._thbeat = None
		self._hb_lock = threading.Lock()
		self._reconnecting = False
		return

	def attach(self, device_id, device_info):
		"""
		Attach the specified device to a Cometa registered application. 
		Authentication is done using only the application_id (one-way authentication).

 		device_id: the device unique identifier
 		device_info: a description of the platform or the device (used only as a comment)
		"""
		self._device_id = device_id
		self._platform = device_info
		self._hparser = HttpParser()
		self._sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
		try:
			self._sock.connect((self._server, self._port))
			sendBuf="POST /v1/applications/%s/devices/%s HTTP/1.1\r\nHost: api.cometa.io\r\nContent-Length:%d\r\n\r\n%s" % (self._app_id,device_id,len(device_info),device_info)
			self._sock.send(sendBuf)
			recvBuf = ""
			while True:
				data = self._sock.recv(1024)
				if not data:
					break

				dataLen = len(data)
				nparsed = self._hparser.execute(data, dataLen)
				assert nparsed == dataLen

				if self._hparser.is_headers_complete():
					if self.debug:
						print "connection for device %s complete" % (device_id)
						print self._hparser.get_headers()
					# reading the attach complete message from the server  
					# i.e. {"msg":"200 OK","heartbeat":60,"timestamp":1441382935}
					recvBuf = self._hparser.recv_body()
					#TODO: check for error in connecting, i.e. 403 already connected
					if len(recvBuf) < 16 or recvBuf[1:15] != '"msg":"200 OK"':
						self.error = 5
						return recvBuf

					# reset error
					self.error = 0

					# set the socket non blocking
					self._sock.setblocking(0) 

					# do not (re)start the threads during a reconnection
					if self._reconnecting:
						self._reconnecting = False
						return recvBuf

					# start the hearbeat thread
					self._thbeat = threading.Thread(target=self._heartbeat)
					self._thbeat.daemon = True
					self._thbeat.start()
						
					# start the receive thread
					self._trecv = threading.Thread(target=self._receive)
					self._trecv.daemon = True	# force to exit on SIGINT
					self._trecv.start()

					return recvBuf
		except:
			self.error = 2
			return

	def send_data(self, msg):
		"""
		Send a data event message upstream to the Cometa server.
 		If a Webhook is specified for the Application in the Cometa configuration file /etc/cometa.conf on the server, 
 		the message is relayed to the Webhook. Also, the Cometa server propagates the message to all open devices Websockets. 
		"""
		sendBuf = "%x\r\n%c%s\r\n" % (len(msg) + 1,'\07',msg)
		if self._reconnecting:
			if self.debug:
				print "Error in Cometa.send_data(): device is reconnecting."
			return -1
		try:
			self._hb_lock.acquire()
			self._sock.send(sendBuf)
			self._hb_lock.release()			
		except Exception, e:
			if self.debug:
				print "Error in Cometa.send_data(): socket write failed."
			return -1
		return 0
示例#50
0
文件: http.py 项目: arnaudsj/diesel
    def request(self, method, url, headers=None, body=None, timeout=None):
        """Issues a `method` request to `path` on the
        connected server.  Sends along `headers`, and
        body.

        Very low level--you must set "host" yourself,
        for example.  It will set Content-Length,
        however.
        """
        headers = headers or {}
        url_info = urlparse(url)
        fake_wsgi = dict((cgi_name(n), str(v).strip()) for n, v in headers.iteritems())

        if body and "CONTENT_LENGTH" not in fake_wsgi:
            # If the caller hasn't set their own Content-Length but submitted
            # a body, we auto-set the Content-Length header here.
            fake_wsgi["CONTENT_LENGTH"] = str(len(body))

        fake_wsgi.update(
            {
                "REQUEST_METHOD": method,
                "SCRIPT_NAME": "",
                "PATH_INFO": url_info[2],
                "QUERY_STRING": url_info[4],
                "wsgi.version": (1, 0),
                "wsgi.url_scheme": "http",  # XXX incomplete
                "wsgi.input": cStringIO.StringIO(body or ""),
                "wsgi.errors": FileLikeErrorLogger(hlog),
                "wsgi.multithread": False,
                "wsgi.multiprocess": False,
                "wsgi.run_once": False,
            }
        )
        req = Request(fake_wsgi)

        timeout_handler = TimeoutHandler(timeout or 60)

        url = str(req.path)
        if req.query_string:
            url += "?" + str(req.query_string)

        send("%s %s HTTP/1.1\r\n%s" % (req.method, url, str(req.headers)))

        if body:
            send(body)

        h = HttpParser()
        body = []
        data = None
        while True:
            if data:
                used = h.execute(data, len(data))
                if h.is_headers_complete():
                    body.append(h.recv_body())
                if h.is_message_complete():
                    data = data[used:]
                    break
            ev, val = first(receive_any=True, sleep=timeout_handler.remaining())
            if ev == "sleep":
                timeout_handler.timeout()
            data = val

        resp = Response(response="".join(body), status=h.get_status_code(), headers=h.get_headers())

        return resp
示例#51
0
    def request(self, method, url, headers=None, body=None, timeout=None):
        '''Issues a `method` request to `path` on the
        connected server.  Sends along `headers`, and
        body.

        Very low level--you must set "host" yourself,
        for example.  It will set Content-Length,
        however.
        '''
        headers = headers or {}
        url_info = urlparse(url)
        fake_wsgi = dict(
        (cgi_name(n), str(v).strip()) for n, v in headers.iteritems())

        if body and 'CONTENT_LENGTH' not in fake_wsgi:
            # If the caller hasn't set their own Content-Length but submitted
            # a body, we auto-set the Content-Length header here.
            fake_wsgi['CONTENT_LENGTH'] = str(len(body))

        fake_wsgi.update({
            'REQUEST_METHOD' : method,
            'SCRIPT_NAME' : '',
            'PATH_INFO' : url_info[2],
            'QUERY_STRING' : url_info[4],
            'wsgi.version' : (1,0),
            'wsgi.url_scheme' : 'http', # XXX incomplete
            'wsgi.input' : cStringIO.StringIO(body or ''),
            'wsgi.errors' : FileLikeErrorLogger(hlog),
            'wsgi.multithread' : False,
            'wsgi.multiprocess' : False,
            'wsgi.run_once' : False,
            })
        req = Request(fake_wsgi)

        timeout_handler = TimeoutHandler(timeout or 60)

        url = str(req.path)
        if req.query_string:
            url += '?' + str(req.query_string)

        send('%s %s HTTP/1.1\r\n%s' % (req.method, url, str(req.headers)))

        if body:
            send(body)

        h = HttpParser()
        body = []
        data = None
        while True:
            if data:
                used = h.execute(data, len(data))
                if h.is_headers_complete():
                    body.append(h.recv_body())
                if h.is_message_complete():
                    data = data[used:]
                    break
            ev, val = first(receive_any=True, sleep=timeout_handler.remaining())
            if ev == 'sleep': timeout_handler.timeout()
            data = val

        resp = Response(
            response=''.join(body),
            status=h.get_status_code(),
            headers=h.get_headers(),
            )

        return resp
示例#52
0
文件: protocol.py 项目: dstufft/stein
class HTTPProtocol(FlowControlMixin, asyncio.Protocol):

    def __init__(self, stream_reader, callback, loop=None):
        super().__init__(loop=loop)
        self._stream_reader = stream_reader
        self._stream_writer = None

        self._callback = callback
        self._task = None

        self._server = None

    def connection_made(self, transport):
        self._parser = HttpParser()

        self._stream_reader.set_transport(transport)
        self._stream_writer = asyncio.StreamWriter(
            transport,
            self,
            self._stream_reader,
            self._loop,
        )

        # Grab the name of our socket if we have it
        self._server = transport.get_extra_info("sockname")

    def connection_lost(self, exc):
        if exc is None:
            self._stream_reader.feed_eof()
        else:
            self._stream_reader.set_exception(exc)

        super().connection_lost(exc)

    def data_received(self, data):
        # Parse our incoming data with our HTTP parser
        self._parser.execute(data, len(data))

        # If we have not already handled the headers and we've gotten all of
        # them, then invoke the callback with the headers in them.
        if self._task is None and self._parser.is_headers_complete():
            coro = self.dispatch(
                {
                    "server": self._server,
                    "protocol": b"HTTP/" + b".".join(
                        str(x).encode("ascii")
                        for x in self._parser.get_version()
                    ),
                    "method": self._parser.get_method().encode("latin1"),
                    "path": self._parser.get_path().encode("latin1"),
                    "query": self._parser.get_query_string().encode("latin1"),
                    "headers": self._parser.get_headers(),
                },
                self._stream_reader,
                self._stream_writer,
            )
            self._task = asyncio.Task(coro, loop=self._loop)

        # Determine if we have any data in the body buffer and if so feed it
        # to our StreamReader
        if self._parser.is_partial_body():
            self._stream_reader.feed_data(self._parser.recv_body())

        # Determine if we've completed the end of the HTTP request, if we have
        # then we should close our stream reader because there is nothing more
        # to read.
        if self._parser.is_message_complete():
            self._stream_reader.feed_eof()

    def eof_received(self):
        # We've gotten an EOF from the client, so we'll propagate this to our
        # StreamReader
        self._stream_reader.feed_eof()

    @asyncio.coroutine
    def dispatch(self, request, request_body, response):
        # Get the status, headers, and body from the callback. The body must
        # be iterable, and each item can either be a bytes object, or an
        # asyncio coroutine, in which case we'll ``yield from`` on it to wait
        # for it's value.
        status, resp_headers, body = yield from self._callback(
            request,
            request_body,
        )

        # Write out the status line to the client for this request
        # TODO: We probably don't want to hard code HTTP/1.1 here
        response.write(b"HTTP/1.1 " + status + b"\r\n")

        # Write out the headers, taking special care to ensure that any
        # mandatory headers are added.
        # TODO: We need to handle some required headers
        for key, values in resp_headers.items():
            # In order to handle headers which need to have multiple values
            # like Set-Cookie, we allow the value of the header to be an
            # iterable instead of a bytes object, in which case we'll write
            # multiple header lines for this header.
            if isinstance(values, (bytes, bytearray)):
                values = [values]

            for value in values:
                response.write(key + b": " + value + b"\r\n")

        # Before we get to the body, we need to write a blank line to separate
        # the headers and the response body
        response.write(b"\r\n")

        for chunk in body:
            # If the chunk is a coroutine, then we want to wait for the result
            # before we write it.
            if asyncio.iscoroutine(chunk):
                chunk = yield from chunk

            # Write our chunk out to the connect client
            response.write(chunk)

        # We've written everything in our iterator, so we want to close the
        # connection.
        response.close()
示例#53
0
            header = "// Retrieved %sfrom %s\n" % (date + ", " if date else "", url)
            path = args.prefix + "/" + path
            try:
                os.makedirs(os.path.dirname(path))
            except OSError:
                pass
            jsfile = open(path, "w")
            jsfile.write(header)
            jsfile.write(script)
            jsfile.close()


extractor = ScriptExtractor()

for filename in args.files:
    print >>sys.stderr, "Extracting JS files from %s..." % filename
    for record in warc.open(filename):
        if record.type == "response":
            http_parser = HttpParser()
            http_parser.execute(record.payload, record.header.content_length)
            header = http_parser.get_headers()
            extractor.init(record.url, header.get("Date", None))
            try:
                extractor.feed(http_parser.recv_body().decode("latin_1"))
            except Exception:
                extractor.handle_error()
                print_exc()
            except:
                extractor.handle_error()
                raise
示例#54
0
文件: proxy_links.py 项目: BwRy/sandy
class ConnectionHandler:
    def __init__(self, connection, address, timeout):
        self.body_file =""
        self.p = HttpParser()
	self.body = []
	self.request_url = ""
	self.response_header = []
	self.header_done = False
        self.url =""
        self.controller = []
        self.controller_ip = []
        self.client = connection
        self.client_buffer = ''
        self.timeout = timeout
        self.method, self.path, self.protocol = self.get_base_header()
        if self.method=='CONNECT':
            self.method_CONNECT()
        elif self.method in ('OPTIONS', 'GET', 'HEAD', 'POST', 'PUT',
                             'DELETE', 'TRACE'):
            self.method_others()
        self.client.close()
        self.target.close()
        #clear
        #print self.controller , self.controller_ip

    def get_base_header(self):
        while 1:
            self.client_buffer += self.client.recv(BUFLEN)
            end = self.client_buffer.find('\n')
            if end!=-1:
                break
        #We dont wann those google.com urls.        
        if not "127.0.0.1" in self.client_buffer[:end]:
	  
	  #Insert Url into database here
          self.url = '%s'%self.client_buffer[:end]
          
          
        data = (self.client_buffer[:end+1]).split()
        self.client_buffer = self.client_buffer[end+1:]
        #print data
        return data

    def method_CONNECT(self):
        self._connect_target(self.path)
        self.client.send(HTTPVER+' 200 Connection established\n'+
                         'Proxy-agent: %s\n\n'%VERSION)
        self.client_buffer = ''
        self._read_write()        

    def method_others(self):
        self.path = self.path[7:]
        i = self.path.find('/')
        host = self.path[:i]        
        path = self.path[i:]
        self._connect_target(host)
        self.target.send('%s %s %s\n'%(self.method, path, self.protocol)+
                         self.client_buffer)
        self.client_buffer = ''
        self._read_write()

    def _connect_target(self, host):
        i = host.find(':')
        if i!=-1:
            port = int(host[i+1:])
            host = host[:i]
            #print host
        else:
            port = 80
        try:
	  
	  
          (soc_family, _, _, _, address) = socket.getaddrinfo(host, port)[0]
          self.target = socket.socket(soc_family)
          self.target.connect(address)
          
        except Exception as e:
	  address =host
	  print "Error Connecting to:"+str(address)
	  connect_ip = "Error Connecting to:"+str(address)
	  update_traffic_link(urlid,connect_ip,"Unable to Connect","Nil","")
	  # insert to db here
        #Concat data to string
        self.request_url = str(host)+" | "+str(address)+" | "+str(self.url) #debug
        #print self.request_url


    def _read_write(self):
        
        time_out_max = self.timeout/3
        socs = [self.client, self.target]
        count = 0
        while 1:
            count += 1
            (recv, _, error) = select.select(socs, [], socs, 3)
            if error:
                break
            if recv:
                for in_ in recv:
		    try:
		      
		      #print " Receving Data "
                      data = in_.recv(10000)
                    except Exception as e:
		      print e
		      pass
		    
                    if in_ is self.client:
                        out = self.target
                    else:
                        out = self.client
                    try:
		      
		      
		      if data:
			  #column 25
			  #Dig here to analysis the traffic
			  #print data
			  try:
			    #Lets parse the data using http_parser modules
			    
			    recved = len(data)
			    #
			    #print "We received so far "+ str(recved)
			    nparsed = self.p.execute(data, recved)
			    assert nparsed == recved
			    # Check 
			    if self.p.is_headers_complete() and not self.header_done:
			      
			      #Header is an ordered dictionary 
			      header_s = self.p.get_headers()
			      
			     
			      # Lets beautify it and print it.
			      for header, value in header_s.items():
				
				#Print Response
				# Connection : close format
				res_header = header+": "+value
				self.response_header.append(res_header)
			      
			        self.header_done = True
			        #Put header to Database.
			        
			   
			    #Check if the boday is partial, if then append the body
			    if self.p.is_partial_body():
			      
			      self.body.append(self.p.recv_body())
			      #print "appending body" +self.p.recv_body()
			      #Append data body recived to a list
			      #print self.body
			      
			    # If the parsing of current request is compleated 
			    if self.p.is_message_complete():
			      
			      try:
				
				try:
				  
				  content_length = self.p.get_headers()['content-length']
			        
			        except Exception as e:
				  print "Exception in Body retrive-sub"+str(e)
				  content_length = 0
				  pass
				  
			        self.body_file = "".join(self.body)
			        body_file_type = ms.buffer(self.body_file[:1024])
			        signature_scan = ""
			        html_source =""
			        html_body=""
			        html_body = self.body_file
			        if "gzip" in body_file_type:
				  try:
				    
				    print " Decoding GZIp html\n"
				    html_body = zlib.decompress(html_body, 16+zlib.MAX_WBITS)
				    #print "source"+str(html_body)
				  except Exception as e:
				    print "Error gzip decoding:"+str(e)
				    
				  
			        
			        print urlid 
			        signature_scan_body = yara_match(html_body)
			        signature_scan_request = yara_match(self.request_url)
			        signature_scan_response =""
			        self_response = ""
			        try:
				  #This is a list convert to string and do the check
				  print self.response_header
				  self_response = ''.join(self.response_header)
				  if "Content-Disposition:" in self_response and "attachment;" in  self_response:
				    signature_scan_response = "Forced-file-download"
				    print " Signatured matched in response"
				    
				except Exception as e:
				  print e,"Error in header_match"
			        signature_scan = str(signature_scan_body) +""+str(signature_scan_request)+""+signature_scan_response
  
			        #print self.request_url
			        #print self.response_header
			        #print body_file_type
			        
			        
			        if len(signature_scan) > 6:
				  try:
				    
				    print " Signatured found and Updating\n"
				    body_file_type = "Signature_Matched: "+signature_scan+" ing "+body_file_type
				    insert_html(urlid,html_body,signature_scan)
				    html_source = html_body
				  
				  except Exception as e:
				    print "Error in Traffic Signature"+str(e)
				  
				print " Trffic Updated\n"
			        update_traffic_link(urlid,self.request_url,self.response_header,body_file_type,html_source)
				  
			        if "executable" in body_file_type:
				  print "\nExecutable found\n"
				  binary_found(urlid)
				  
				  
			      except Exception as e:
				print "Exception in Body retrive"+str(e)
				content_length = 0
				pass
			      
			      
			  except Exception as e:
			    print e
			    pass

			  #if filetype in traffice == jar,class , pdf,flash, execute
			  #save those files
			  
			  
			  out.send(data)
			  count = 0
		      
	
	            except Exception as e:
		      print e
		      pass
            if count == time_out_max:
                break
示例#55
0
    assert nparsed == recved

    if p.is_message_complete():
        print "message complete"
    else:
        print "message incomplete"
        print p.recv_body()

    recved = len(rsp_2)
    nparsed = p.execute(rsp_2, recved)
    assert nparsed == recved

    if p.is_message_complete():
        print "message complete"
        print p.recv_body()
        print p.get_headers()
    else:
        print "message incomplete"
        print p.recv_body()

    print "--------------------"

    rsp_1 = ""
    with open("../testing/test2_response_part1.http", "r") as f:
        rsp_1 = f.readlines()
        rsp_1 = "".join(rsp_1)
        rsp_1 = rsp_1[:-2]
    rsp_2 = ""
    with open("../testing/test2_response_part2.http", "r") as f:
        rsp_2 = f.readlines()
        rsp_2 = "".join(rsp_2)
示例#56
0
    def run(self):
        self._log.info('Watch %s for new data.' % self.extension)

        while True:
            try:
                self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
                if self.authmethod == "basic":
                    self.client = ssl.wrap_socket(self.sock)
                elif self.authmethod == "cert":
                    self.client = ssl.wrap_socket(
                        self.sock,
                        keyfile=self.config['client_cert_key_path'],
                        certfile=self.config['client_cert_path'])
                else:
                    raise KeyError('No authentication mechanisms defined')
                self._log.debug('Connecting to %s %i' % (self.host, self.port))
                # self.client.settimeout(10)
                self.client.connect((self.host, self.port))

            except socket.error as exc:
                self._log.exception('unable to connect to %s: %s' %
                                    (self.host, exc))
                raise

            except KeyError:
                raise KeyError('No authentication mechanisms defined')

            if self.authhead is not None:
                self.client.send("GET %s HTTP/1.1\r\nHost: %s\r\n%s\r\n\r\n" %
                                 (self.extension, self.host, self.authhead))
            else:
                self.client.send("GET %s HTTP/1.1\r\nHost: %s\r\n\r\n" %
                                 (self.extension, self.host))

            readers = [self.client]
            writers = out_of_band = []

            pending = b''

            parser = HttpParser()
            self._log.debug("+")

            while not parser.is_headers_complete():
                self._log.debug(".")
                try:
                    chunk = self.client.recv(io.DEFAULT_BUFFER_SIZE)
                except socket.error as exc:
                    err = exc.args[0]
                    self._log.debug('a recv err (%s): %s' % (err, exc))
                    break
                if not chunk:
                    self._log.exception('a No response from %s' %
                                        self.extension)
                    break
                self._log.debug('a chunk %s' % chunk)
                nreceived = len(chunk)
                nparsed = parser.execute(chunk, nreceived)
                if nparsed != nreceived:
                    self._log.exception('a nparsed %i != nreceived %i' %
                                        (nparsed, nreceived))
                    break
            self._log.debug('parser headers complete %s' %
                            parser.get_headers())
            while True:
                self._log.debug("-")
                try:
                    readable, _, _ = select.select(readers, writers,
                                                   out_of_band)
                except select.error as exc:
                    self._log.debug("b select error: %s" % exc)
                if not readable:
                    self._log.debug('b not readable')
                    break
                try:
                    chunk = self.client.recv(io.DEFAULT_BUFFER_SIZE)
                except socket.error as exc:
                    err = exc.args[0]
                    self._log.debug('b recv err (%s): %s' % (err, exc))
                    break
                if not chunk:
                    self._log.debug('b not chunk')
                    self.client.close()  # pylint: disable=no-member
                    break
                nreceived = len(chunk)
                self._log.debug('b chunk %s' % chunk)
                self._log.debug("repr: %s" % repr(chunk))
                if re.match(r'0\r\n\r\n', chunk, re.M):
                    self._log.debug('b end end end')
                    break
                nparsed = parser.execute(chunk, nreceived)
                if nparsed != nreceived:
                    self._log.exception('b nparsed %i != nreceived %i' %
                                        (nparsed, nreceived))
                    break
                data = pending + parser.recv_body()
                msg = "DATA: %s" % data
                self._log.debug(msg)
                lines = data.split(b'\n')
                pending = lines.pop(-1)
                for line in lines:
                    trigger_payload = self._get_trigger_payload_from_line(line)
                    if trigger_payload == 0:
                        pass
                    else:
                        self._log.info('Triggering Dispatch Now')
                        self._sensor_service.dispatch(trigger=self.TRIGGER_REF,
                                                      payload=trigger_payload)
            self._log.debug('main loop done')
            self.client.close()  # pylint: disable=no-member
示例#57
0
class HTTPSession(base_object.BaseObject):

    _http_header = ""
    _method = ""
    _version = ""
    _req_obj = ""
    _user_agent = "User-Agent: COS-598C-Project-Client\r\n"
    _accept = "Accept: */*\r\n"
    _accept_enc = "Accept-Encoding: *\r\n"
    _accept_charset = "Accept-Charset: *\r\n"
    _host = ""
    _writer = ""
    _closeable = False
    _http_parser = ""
    _nr_bytes = 0

    def __init__(self, method, req_obj, version):
        self._method = method
        self._req_obj = req_obj
        self._version = version
        self._http_parser = HttpParser()

    def _build_first_line(self):
        first_line = self._method + " " + self._req_obj + " " + self._version + "\r\n"
        return first_line

    def set_host(self, host):
        self._host = "Host: " + host + "\r\n"

    def set_writer(self, writer):
        self._writer = writer

    def write_response(self, data):
        recved = len(data)
        nparsed = self._http_parser.execute(data, recved)
        assert nparsed == recved
        self._nr_bytes += recved
        if self._http_parser.is_partial_body():
            self._writer.write(str(self._http_parser.recv_body()))

        if self._http_parser.is_message_complete():
            self._closeable = True
        return self._nr_bytes

    def get_response_headers(self):
        if self._http_parser.is_headers_complete():
            return self._http_parser.get_headers()

    def closeable(self):
        return self._closeable

    def set_port(self, port):
        return

    def get_request(self):
        self._http_header = self._build_first_line()+\
           self._host+\
           self._user_agent+\
           self._accept+\
           self._accept_enc+\
           self._accept_charset+\
           "\r\n"
        return self._http_header
示例#58
0
文件: proxy.py 项目: BwRy/sandy
class ConnectionHandler:
    def __init__(self, connection, address, timeout):
        self.body_file =""
        self.p = HttpParser()
	self.body = []
	self.request_url = ""
	self.response_header = []
	self.header_done = False
        self.url =""
        self.controller = []
        self.controller_ip = []
        self.client = connection
        self.client_buffer = ''
        self.timeout = timeout
        self.method, self.path, self.protocol = self.get_base_header()
        if self.method=='CONNECT':
            self.method_CONNECT()
        elif self.method in ('OPTIONS', 'GET', 'HEAD', 'POST', 'PUT',
                             'DELETE', 'TRACE'):
            self.method_others()
        self.client.close()
        self.target.close()
        #clear
        #print self.controller , self.controller_ip

    def get_base_header(self):
        while 1:
            self.client_buffer += self.client.recv(BUFLEN)
            end = self.client_buffer.find('\n')
            if end!=-1:
                break
        #We dont wann those google.com urls.        
        if not "127.0.0.1" in self.client_buffer[:end]:
	  
	  #Insert Url into database here
          self.url = '%s'%self.client_buffer[:end]
          
          
        data = (self.client_buffer[:end+1]).split()
        self.client_buffer = self.client_buffer[end+1:]
        #print data
        return data

    def method_CONNECT(self):
        self._connect_target(self.path)
        self.client.send(HTTPVER+' 200 Connection established\n'+
                         'Proxy-agent: %s\n\n'%VERSION)
        self.client_buffer = ''
        self._read_write()        

    def method_others(self):
        self.path = self.path[7:]
        i = self.path.find('/')
        host = self.path[:i]        
        path = self.path[i:]
        self._connect_target(host)
        self.target.send('%s %s %s\n'%(self.method, path, self.protocol)+
                         self.client_buffer)
        self.client_buffer = ''
        self._read_write()

    def _connect_target(self, host):
        i = host.find(':')
        if i!=-1:
            port = int(host[i+1:])
            host = host[:i]
            #print host
        else:
            port = 80
        try:
	  
	  
          (soc_family, _, _, _, address) = socket.getaddrinfo(host, port)[0]
          print "Adress is ",address
          self.target = socket.socket(soc_family)
          self.target.connect(address)
          
        except Exception as e:
	  print "Error Connecting to:"+str(host)+":"+str(port)
	  self.request_url = "Error Connecting to:"+str(host)+":"+str(port)
	  # insert to db here
        #Concat data to string
        self.request_url = str(host)+" | "+str(address)+" | "+str(self.url) #debug
        #print self.request_url


    def _read_write(self):
        
        time_out_max = self.timeout/3
        socs = [self.client, self.target]
        count = 0
        while 1:
            count += 1
            (recv, _, error) = select.select(socs, [], socs, 3)
            if error:
                break
            if recv:
                for in_ in recv:
		    try:
		      
		      #print " Receving Data "
                      data = in_.recv(84333)
                    except Exception as e:
		      print e
		      pass
		    
                    if in_ is self.client:
                        out = self.target
                    else:
                        out = self.client
                    try:
		      
		      
		      if data:
			  #column 25
			  #Dig here to analysis the traffic
			  #print data
			  try:
			    #Lets parse the data using http_parser modules
			    
			    recved = len(data)
			    #
			    #print "We received so far "+ str(recved)
			    nparsed = self.p.execute(data, recved)
			    assert nparsed == recved
			    # Check 
			    if self.p.is_headers_complete() and not self.header_done:
			      
			      #Header is an ordered dictionary 
			      header_s = self.p.get_headers()
			      
			     
			      # Lets beautify it and print it.
			      for header, value in header_s.items():
				
				#Print Response
				# Connection : close format
				res_header = header+": "+value
				self.response_header.append(res_header)
			      
			        self.header_done = True
			        #Put header to Database.
			        
			   
			    #Check if the boday is partial, if then append the body
			    if self.p.is_partial_body():
			      
			      self.body.append(self.p.recv_body())
			      #print "appending body" +self.p.recv_body()
			      #Append data body recived to a list
			      #print self.body
			      
			    # If the parsing of current request is compleated 
			    if self.p.is_message_complete():
			      
			      try:
				
				try:
				  
				  content_length = self.p.get_headers()['content-length']
			        
			        except Exception as e:
				  print "Exception in Body retrive-sub"+str(e)
				  content_length = 0
				  pass
				  
				  self.body_file = "".join(self.body)
			        body_file_type = ms.buffer(self.body_file[:1024])
			        #print self.request_url
			        #print self.response_header
			        #print body_file_type
			        print urlid 
			        update_traffic(urlid,self.request_url,self.response_header,body_file_type)
			      except Exception as e:
				print "Exception in Body retrive"+str(e)
				content_length = 0
				pass
			      
			      
			  except Exception as e:
			    print e
			    pass

			  #if filetype in traffice == jar,class , pdf,flash, execute
			  #save those files
			  
			  
			  out.send(data)
			  count = 0
		      
	
	            except Exception as e:
		      print e
		      pass
            if count == time_out_max:
                break