Exemplo n.º 1
0
 def send_request(self, url, depth=globvar.REDIRECT_MAX_DEPTH):
     req = request.get_request(url, redirects=globvar.ALLOW_REDIRECTS)
     self.pool.put(PoolQuery(query.insert_table_crawl_history, req.to_tuple(self.rowid)))
     i = 0
     while (300 <= req.status_code < 400) and i <= depth:
         if utility.same_fld(utility.get_fld(req.new_location), self.fld):
             req = request.get_request(req.new_location, redirects=globvar.ALLOW_REDIRECTS)
             self.pool.put(PoolQuery(query.insert_table_crawl_history, req.to_tuple(self.rowid)))
         else:
             # TODO: Log error message here.
             return None
         i +=1
     
     if 300 <= req.status_code < 400:
         return None
     else:
         return req
Exemplo n.º 2
0
def test_parse_robots(url, expected):
    robots = robot.Robots()
    try:
        req = request.get_request(url)
        robots.parse_robots(req.text)
    except:
        pass
    assert (robots.rules) == expected
Exemplo n.º 3
0
 def start_crawling(self):
     response = request.get_request(self.domain)
     if response is None:
         quit(log_error("Cannot start script."))
     return pages.PageCollection(
         self.domain,
         response,
         self.domain
     )
Exemplo n.º 4
0
 def parse_robots(self):
     logging.info(f'{self.thread_id}: Parsing robots.txt')
     url = f'{globvar.scheme}{self.fld}/robots.txt'
     try:
         req = request.get_request(url)
         if req.status_code != 404:
             self.extractor.robots.parse_robots(req.text)
     except:
         logging.error(f'Something went wrong parsing robots.txt url: {url}')
Exemplo n.º 5
0
def test_get_request():
    req = request.get_request('https://www.google.com')
    assert(req.status_code) == 200
Exemplo n.º 6
0
def test_get_request_timed_out():
    url = "http://google.com"
    req = request.get_request(url, timeout=0.001)
    assert(req.status_code) == 0
Exemplo n.º 7
0
def test_get_request_requestresult():
    req = request.get_request('https://www.google.com')
    assert(type(req)) == request.RequestResult
Exemplo n.º 8
0
 def start_crawling(self):
     response = request.get_request(self.domain)
     if response is None:
         quit(log_error("Cannot start script."))
     return pages.PageCollection(self.domain, response, self.domain)
Exemplo n.º 9
0
import json

from hash import apply_sha1
from util import create_json_file, update_json_file
from request import get_request, post_request

response = get_request()
data = json.dumps(response.json())

create_json_file(data)

json_response = response.json()
house_number = json_response['numero_casas']
cipher_text = str(json_response['cifrado'])

print(f'House number: {house_number}')
print(f'Cipher text: {cipher_text}\n')

alphabet_list = [
    'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
    'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'
]
count = 0
deciphered_text = ''

for word in cipher_text.lower():
    if str(word).isspace() or word == '.':
        deciphered_text += word
    else:
        position = alphabet_list.index(word)
        letter = alphabet_list[(position - house_number)]