示例#1
0
def worker(q, i, top_N):
    global conn
    global count
    global mutex
    while True:
        url = q.get()
        if url is None:
            break
        else:
            try:
                html = requests.get(url)
                bs = BeautifulSoup(html.text, 'html.parser')
                answer = bs.get_text()
                answer = answer.replace('\n', ' ')
                answer = answer.replace('\t', ' ')
                answer = re.sub("[^а-яА-Я]", " ", answer)
                words = answer.split()
                dictionary = {}
                for word in words:
                    if word in dictionary:
                        dictionary[word] += 1
                    else:
                        dictionary[word] = 1
                list_d = list(dictionary.items())
                list_d.sort(key=lambda i: i[1], reverse=True)
                answer = []
                for pair in list_d:
                    if len(answer) < top_N and not (pair[0] in stops or len(
                            pair[0]) == 1 or pair[0].isnumeric()):
                        answer.append(pair)
                    elif len(answer) == top_N:
                        break
                temp = ''
                for pair in answer:
                    temp += pair[0] + ' '
                json_answer = Response.create(code=html.status_code, text=temp)
                # json_answer = Response.create(code=html.status_code, text=f' Thread #{i} complites {url}')

                conn.sendall(json_answer.encode(code))
                with mutex:
                    count += 1
                print(f' Thread #{i} complites {url}')
            except:
                json_answer = Response.create(code=html.status_code,
                                              text='error')
                conn.sendall(json_answer.encode(code))
示例#2
0
        conn, addr = sock.accept()
        conn.settimeout(10)
        with conn:
            while True:
                try:
                    json_data = conn.recv(1024)
                except socket.timeout:
                    print('close connection by timeout')
                    break
                if not json_data:
                    break

                request = Request(json_data.decode(code))

                if not request.url:
                    json_answer = Response.create(code=400,
                                                  error='Bad request')
                    conn.send(json_answer.encode(code))

                elif request.method != 'GET':
                    json_answer = Response.create(code=405,
                                                  error='Invalid method')
                    conn.send(json_answer.encode(code))

                elif not re.match(regex, request.url):
                    json_answer = Response.create(code=400,
                                                  error='Bad request')
                    conn.send(json_answer.encode(code))

                else:
                    try:
                        html = requests.get(request.url)