def test_sending_successful(self, api_gateway_mock, dynamo_mock): # Given: Api Gateway event with connection Id conn_id = send_ev["requestContext"].get("connectionId") # Given: Message and connection tables with content inside message_index = 5_000_000 message_content = get_body(send_ev)["content"] message_table = mock.Mock( name='mess_table', query=lambda *_, **__: {'Items': [{ 'index': message_index }]}) connection_table = mock.Mock( name='conn_table', scan=lambda *_, **__: {'Items': [{ 'connectionId': conn_id }]}) dynamo_mock.side_effect = [message_table, connection_table] # When: send message function is called with a valid event response = send_message(send_ev, "") # Then: 200 is returned self.assertEqual(response['statusCode'], 200) # Then: Dynamo resource is called to pull table with correct table name message_table.put_item.assert_called_once_with( Item={ 'room': 'general', 'index': message_index + 1, 'timestamp': 0, 'username': '******', 'content': message_content }) # Then: Message is sent to 1 existing connection api_gateway_mock.return_value.post_to_connection.assert_called_once_with( ConnectionId=conn_id, Data=build_message_data(username='******', body=get_body(send_ev)))
def download(self): # Construct download link article_id = CaixinRegex.article_id.findall(self.link)[0] content_link = CaixinRegex.content_link.format(article_id, random.random()) # subtract json from javascript response # which contains: current page, media count, total page count contents = yield from get_body(self.session, content_link) contents = contents.decode('utf-8') # Deal with 404 if sign_of_404 in contents: return '' # If any content try: contents_json = eval(CaixinRegex.article_content.findall(contents)[0]) except: log.error(contents) contents_json = {'totalPage': 1, 'content': ''} # If page is split into more than 1, it should be revealed in the json # However, sometimes it is 0 ... if contents_json['totalPage'] > 1 or \ (contents_json['totalPage'] != 1 and not contents_json['content']): content_link = content_link.replace('page=1', 'page=0') contents = yield from get_body(self.session, content_link) contents_json = eval(CaixinRegex.article_content.findall(contents.decode('utf-8'))[0]) log.debug('Article {} has been downloaded'.format(self.link)) # assert len(contents_json['content']) > 0 return contents_json['content']
def main(method, querystring, headers, body): if method == "GET": response_line = 'HTTP/1.1 200 OK' response_headers = [] response_body = get_body('index.html') return response_line, response_headers, response_body elif method == 'POST': parameters = parse_parameters(body) username, password = parameters['username'], parameters['password'] db = SQLiteAPI() # todo hash password if db.add_user(username, password): template = 'successful_registration.html' else: template = 'unsuccessful_registration.html' response_line = 'HTTP/1.1 200 OK' response_headers = [] response_body = get_body(template) return response_line, response_headers, response_body else: response_line = "HTTP/1.1 405 Method Not Allowed" response_headers = [] response_body = get_body('405.html') return response_line, response_headers, response_body
def timer(method, querystring, headers, body): if method == "GET": response_line = 'HTTP/1.1 200 OK' response_headers = [] response_body = get_body('timer.html') return response_line, response_headers, response_body else: response_line = "HTTP/1.1 405 Method Not Allowed" response_headers = [] response_body = get_body('405.html') return response_line, response_headers, response_body
def get_title(self): page = yield from get_body(self.session, self.link) title = CaixinRegex.article_title.findall(page.decode('utf-8')) if not title: log.debug('>>> article {} has no title'.format(self.link)) title = ['Untitled'] return title[0]
def main(): TEST_URL_GUARDIAN = 'https://www.theguardian.com/uk-news/2019/dec/28/government-exposes-addresses-of-new-year-honours-recipients' text_output = utils.get_body(TEST_URL_GUARDIAN) this_gap_finder = GapFiller() this_gap_finder.fill_gaps(text_output) print("------------------------------------------------------------------") print(" ") this_gap_finder.multiple_choice_fill_gaps(text_output) print("------------------------------------------------------------------") print(" ") this_gap_finder.function_word_filler(text_output) print("------------------------------------------------------------------") print(" ") this_gap_finder.skim_reader(text_output) print("------------------------------------------------------------------") print(" ") this_gap_finder.lemmatizer(text_output) print("------------------------------------------------------------------") print(" ") print('\nThis news article can be found via the below link:\n\n', TEST_URL_GUARDIAN)
def _generate_response(self, http_request): try: request_line, headers, body = self.parsing_request(http_request) method, url_and_querystring, version = self.parse_request_line( request_line) url, querystring = self.parse_url_and_querystring( url_and_querystring) headers = self.headers_to_dict(headers) if not version == "HTTP/1.1": headers_response = [] body_response = '' return "HTTP/1.1 505 HTTP Version Not Supported", headers_response, body_response if url not in self.urls: headers_response = [] body_response = get_body('404.html') return "HTTP/1.1 404 Not Found", headers_response, body_response except ValueError: headers_response = [] body_response = '' return 'HTTP/1.1 400 Bad Request', headers_response, body_response return self.urls[url](method=method, querystring=querystring, headers=headers, body=body)
def parse_single_issue(self, *, issue_link, old=False): """ Parse single issue link, append links to self.articles """ page_response = yield from get_body(self.session, issue_link) page = page_response.decode('utf-8') if not old: try: page = CaixinRegex.new_issue_main_content.findall(page)[0] except: # issue in 2010 has no such a main content thing pass # separate article by date: {'2006-06-26': [article link1, ...]} articles_in_this_month = CaixinRegex.old_issue_link.findall(page) article_dates = set(CaixinRegex.old_issue_date.findall(page)) monthly_articles = {date: [link for link in articles_in_this_month if date in link] for date in article_dates} # Delete date with no articles valid_monthly_articles = {date: monthly_articles[date] for date in monthly_articles if monthly_articles[date]} # Append links into self.articles for date in valid_monthly_articles: articles = valid_monthly_articles[date] log.debug('Date {}: {} articles.'.format(date, len(articles))) if date in self.articles: self.articles[date] += articles else: self.articles[date] = articles
def update_issues(self): """ Fetch issue links of 2010.1 - now, generate links from 1998.4 - 2009.11 """ weekly_home_page = self.loop.run_until_complete(get_body(self.session, "http://weekly.caixin.com/")) weekly_home_page = weekly_home_page.decode('utf-8') # Part 1: 2010 - now new_issues = CaixinRegex.issue_2010_2015.findall(weekly_home_page)[0] new_issue_links = CaixinRegex.issue_2010_2015_link.findall(new_issues) # The latest issue is not there, so manually add it last_issue = new_issue_links[0] last_issue_id = CaixinRegex.issue_id.findall(last_issue)[0] latest_issue_id = int(CaixinRegex.issue_id.findall(last_issue)[0]) + 1 latest_issue_link = last_issue.replace(last_issue_id, str(latest_issue_id)) self.new_issues = set(new_issue_links) self.new_issues.add(latest_issue_link) # Also add its date for later RSS generation cover = CaixinRegex.cover.findall(weekly_home_page)[0] latest_issue_date = CaixinRegex.old_issue_date.findall(cover)[0] self.latest_issue_date = latest_issue_date # Part 2: 1998.4 - 2009.11 # Format: http://magazine.caixin.com/h/1998-04.html if self.fetch_old_articles: old_issue_format = 'http://magazine.caixin.com/h/{}-{}.html' start_date = datetime.date(1998, 4, 1) end_date = datetime.date(2009, 11, 2) while start_date < end_date: year, month = start_date.year, str(start_date.month).zfill(2) self.old_issues.add(old_issue_format.format(year, month)) start_date += datetime.timedelta(days=30)
def get_news_content(links): content = [] for link in links: news_html = get_response(link, headers={'User-Agent': ua.random}).text soup = BeautifulSoup(news_html, 'html.parser') title = soup.find('div', attrs={ "class": "article__header" }).find('div', attrs={ "class": "article__header__title" }).text.strip() article = soup.find('div', class_='article__text') body = get_body(article) path = urlparse(link).path.split('/') date = list(filter(lambda e: e.isdigit(), path)) or '' content.append({ "title": title, 'link': link.split('?', 1)[0], "date": '.'.join(date), 'body': body, }) time.sleep(1) return content
def decode(original): """ classifier :param original: original text :return: a Reference Object """ # Others if re.search("[Rr]etrieved\sfrom", original): try: info = utils.get_body(original) except AttributeError: return Other(original) re.sub("ed\.", "ed", info) k = info.count(".") + info.count("?") + info.count("!") if k <= 1: return Website(original) else: return OnLineJournal(original) # Chapter if re.search("In.*\([Ee]ds*\.*\)", original): if "(in press)" in original: return ChapterInPress(original) else: return ChapterPublished(original) # JournalPublished or BookPublished body = utils.get_body(original) if not body: return Other(original) if "(in press)" not in original: if re.search("\d+(?:\(\d+\))*,", body): return JournalPublished(original) else: return BookPublished(original) # JournalInPress or BookInPress body = re.sub("ed\.", "ed", body) k = body.count(".") + body.count("?") + body.count("?") if k <= 1: return BookInPress(original) else: return JournalInPress(original)
def send_account_info(message, address=None, host=None): subj_string = message['Subject'].encode('utf-8').lower() activation_str = ("account activation on %s" % WEBSITE).lower() reset_str = ("password reset on %s" % WEBSITE).lower() logging.debug(message['Subject']) logging.debug(message['To']) logging.debug(message['From']) email_message = message_from_string(str(message)) msg_text = get_body(email_message) logging.info(msg_text) if message['From'].encode('utf-8') == NO_REPLY and (activation_str in subj_string or reset_str in subj_string): email_message = email.message_from_string(str(message)) msg_text = get_body(email_message) mail = MailResponse(From = NO_REPLY, To = message['To'], Subject = message['Subject'], Body = msg_text['plain']) relay.deliver(mail)
def get_news_body(links): content = {} for link in links: news_html = get_response(link).text soup = BeautifulSoup(news_html, 'html.parser') article = soup.find('div', class_='article__text') body = get_body(article) content[link] = body time.sleep(1) return content
def download(self): # Construct download link article_id = CaixinRegex.article_id.findall(self.link)[0] content_link = CaixinRegex.content_link.format(article_id, random.random()) # subtract json from javascript response # which contains: current page, media count, total page count contents = yield from get_body(self.session, content_link) contents = contents.decode('utf-8') # Deal with 404 if sign_of_404 in contents: return '' # If any content try: contents_json = eval( CaixinRegex.article_content.findall(contents)[0]) except IndexError: log.error(contents) contents_json = {'totalPage': 1, 'content': ''} # If page is split into more than 1, it should be revealed in the json # However, sometimes it is 0 ... if contents_json['totalPage'] > 1 or \ (contents_json['totalPage'] != 1 and not contents_json['content']): content_link = content_link.replace('page=1', 'page=0') contents = yield from get_body(self.session, content_link) contents_json = eval( CaixinRegex.article_content.findall( contents.decode('utf-8'))[0]) log.debug('Article {} has been downloaded'.format(self.link)) # assert len(contents_json['content']) > 0 return contents_json['content']
def get_poses(paths): body_estimation = get_body() poses = [] subsets = [] id = current_process() print(paths) for path in paths: print("on path:", path, "id:", id, flush=True) img = cv2.imread(path, cv2.IMREAD_COLOR) pose, subset = body_estimation(img) poses.append(pose) subsets.append(subset) return np.array(poses), np.array(subsets)
def lambda_handler(event, context): input_url = event['queryStringParameters']['url'] boilerplate_response = { "statusCode": 500, "headers": { "my_header": "my_value" }, "body": '', "isBase64Encoded": False } # check for dangerous url if not utils.url_safety_check(input_url): boilerplate_response['body'] = 'unsupported url' boilerplate_response['statusCode'] = 400 return boilerplate_response # check for supported news source if not utils.supported_news_site_check(input_url): boilerplate_response['body'] = 'unsupported url' boilerplate_response['statusCode'] = 400 return boilerplate_response try: url_data_dict = utils.get_body(input_url) except Exception as e: boilerplate_response['body'] = 'server error' boilerplate_response['statusCode'] = 400 return boilerplate_response gap_filler_obj = gap_filler.GapFiller() try: worksheet_data_dict = gap_filler_obj.fill_gaps(url_data_dict) except Exception as e: boilerplate_response['body'] = 'server error' boilerplate_response['statusCode'] = 400 return boilerplate_response boilerplate_response['body'] = json.dumps(worksheet_data_dict) boilerplate_response['statusCode'] = 200 return boilerplate_response
def send_message(event, context): """ When a message is sent on the socket, verify the passed in token, and forward it to all connections if successful. """ logger.info('Message sent on WebSocket.') # Ensure all required fields were provided try: validate_event(event) body = get_body(event) validate_body(body) except ValueError as v_er: return build_response( 400, f'Event or body are not in correct shape or format:({v_er})') # Todo: fix hardcode once username is known username = '******' put_message_to_db(username, body) logger.debug('Broadcasting message: {}'.format(body['content'])) return broadcast_message(username, body, event)
def wrapper(*args, **kwargs): headers = kwargs['headers'] response_body = get_body('index.html') if 'Authorization' not in headers: return "HTTP/1.1 401 Unauthorized", [ 'WWW-Authenticate: Basic realm="Log in"' ], response_body else: try: credentials = base64.b64decode(headers['Authorization'].split( ' ')[1]).decode("utf-8").split(':') if not check_credentials(credentials): return "HTTP/1.1 401 Unauthorized", [ 'WWW-Authenticate: Basic realm="Log in"' ], response_body except Exception as error: return "HTTP/1.1 400 Bad Request", [ 'WWW-Authenticate: Basic realm="Log in "' ], '' return view(*args, **kwargs)
def parse_single_issue(self, *, issue_link, old=False): """ Parse single issue link, append links to self.articles """ page_response = yield from get_body(self.session, issue_link) page = page_response.decode('utf-8') if not old: try: page = CaixinRegex.new_issue_main_content.findall(page)[0] except IndexError: # issue in 2010 has no such a main content thing pass # separate article by date: {'2006-06-26': [article link1, ...]} articles_in_this_month = CaixinRegex.old_issue_link.findall(page) article_dates = set(CaixinRegex.old_issue_date.findall(page)) monthly_articles = { date: [link for link in articles_in_this_month if date in link] for date in article_dates } # Delete date with no articles valid_monthly_articles = { date: monthly_articles[date] for date in monthly_articles if monthly_articles[date] } # Append links into self.articles for date in valid_monthly_articles: articles = valid_monthly_articles[date] log.debug('Date {}: {} articles.'.format(date, len(articles))) if date in self.articles: self.articles[date] += articles else: self.articles[date] = articles
def update_issues(self): """ Fetch issue links of 2010.1 - now, generate links from 1998.4 - 2009.11 """ weekly_home_page = self.loop.run_until_complete( get_body(self.session, "http://weekly.caixin.com/")) weekly_home_page = weekly_home_page.decode('utf-8') # Part 1: 2010 - now new_issues = CaixinRegex.issue_2010_2015.findall(weekly_home_page)[0] new_issue_links = CaixinRegex.issue_2010_2015_link.findall(new_issues) # The latest issue is not there, so manually add it last_issue = new_issue_links[0] last_issue_id = CaixinRegex.issue_id.findall(last_issue)[0] latest_issue_id = int(CaixinRegex.issue_id.findall(last_issue)[0]) + 1 latest_issue_link = last_issue.replace(last_issue_id, str(latest_issue_id)) self.new_issues = set(new_issue_links) self.new_issues.add(latest_issue_link) # Also add its date for later RSS generation cover = CaixinRegex.cover.findall(weekly_home_page)[0] latest_issue_date = CaixinRegex.old_issue_date.findall(cover)[0] self.latest_issue_date = latest_issue_date # Part 2: 1998.4 - 2009.11 # Format: http://magazine.caixin.com/h/1998-04.html if self.fetch_old_articles: old_issue_format = 'http://magazine.caixin.com/h/{}-{}.html' start_date = datetime.date(1998, 4, 1) end_date = datetime.date(2009, 11, 2) while start_date < end_date: year, month = start_date.year, str(start_date.month).zfill(2) self.old_issues.add(old_issue_format.format(year, month)) start_date += datetime.timedelta(days=30)
def _get_body(self): return utils.get_body(self._original)
def START(message, address=None, host=None): logger.info("Email to mailbot@") arrived_message = message name, addr = parseaddr(arrived_message['from'].lower()) site = Site.objects.get_current() auth_res = None # restart the db connection django.db.close_connection() try: addr = addr.strip() imapAccount = ImapAccount.objects.get(email=addr) er_to_execute = None ers = EmailRule.objects.filter(mode__imap_account=imapAccount, type='shortcut') for er in ers: if er.get_forward_addr().lower() == address.lower(): er_to_execute = er break if not er_to_execute: body_part = [] body = {} options = get_available_shortcut_link_text(imapAccount, site.domain) body["text"] = "You email to %s@%s but this shortcut does not exist. \n\n %s \n\n Link to YouPS: %s://%s" % (address, host, options["text"], PROTOCOL, site.domain) body["html"] = "You email to %s@%s but this shortcut does not exist. <br><br> %s <br><br> Link to YouPS: <a href='%s://%s'>%s://%s</a>" % (address, host, options["html"], PROTOCOL, site.domain, PROTOCOL, site.domain) new_message = create_response(arrived_message, addr,arrived_message["message-id"], body, host) relay.deliver(new_message) return logging.debug("mailbot %s" % addr) auth_res = authenticate( imapAccount ) if not auth_res['status']: raise ValueError('Something went wrong during authentication. Log in again at %s/editor' % host) imap = auth_res['imap'] mailbox = MailBox(imapAccount, imap) # local shortcut if arrived_message["In-Reply-To"]: # Get the original message original_message_schema = MessageSchema.objects.filter(imap_account=imapAccount, base_message__message_id=arrived_message["In-Reply-To"]) if original_message_schema.exists(): original_message_schema = original_message_schema[0] imap.select_folder(original_message_schema.folder.name) original_message = Message(original_message_schema, imap) else: # in case YouPS didn't register to DB yet, save the message to DB immediately mail_found_at = "" original_message_id = -1 for folder in mailbox._list_selectable_folders(): imap.select_folder(folder.name) original_message_id = imap.search(["HEADER", "Message-ID", arrived_message["In-Reply-To"]]) # original_message if original_message_id: mail_found_at = folder break if not mail_found_at: raise ValueError("Email does not exist. The message is deleted or YouPS can't detect the message.") else: # Save this message immediately. so it can be ran when it is registered to the database try: logger.critical("%s %s" %(imapAccount.email, mail_found_at)) folder = mail_found_at if original_message_id: folder._save_new_messages(original_message_id[0], urgent=True) original_message_schema = MessageSchema.objects.filter(imap_account=imapAccount, base_message__message_id=arrived_message["In-Reply-To"]) if not original_message_schema.exists(): raise imap.select_folder(original_message_schema.folder.name) original_message = Message(original_message_schema, imap) except FolderSchema.DoesNotExist, MessageSchema.DoesNotExist: raise ValueError("Email does not exist. The message is deleted or YouPS can't detect the message.") entire_message = message_from_string(str(arrived_message)) entire_body = get_body(entire_message) code_body = entire_body['plain'][:(-1)*len(original_message.content['text'])] gmail_header = "---------- Forwarded message ---------" if gmail_header in code_body: code_body = code_body.split(gmail_header)[0].strip() logging.debug(code_body) shortcuts = EmailRule.objects.filter(mode=imapAccount.current_mode, type="shortcut") if not imapAccount.current_mode or not shortcuts.exists(): body = "Your YouPS hasn't turned on or don't have email shortcuts yet! Define your shortcuts here %s://%s" % (PROTOCOL, site.domain) mail = MailResponse(From = WEBSITE+"@" + host, To = imapAccount.email, Subject = "Re: " + original_message.subject, Body = body) relay.deliver(mail) else: res, body = run_shortcut(shortcuts, mailbox, original_message_schema, code_body) # Go to sent folder and delete the sent function from user if imapAccount.is_gmail: imap.select_folder('[Gmail]/Sent Mail') else: import imapclient sent = imap.find_special_folder(imapclient.SENT) if sent is not None: imap.select_folder(sent) this_message = imap.search(["HEADER", "In-Reply-To", original_message_schema.message_id]) imap.delete_messages(this_message) # new_message.set_payload(content.encode('utf-8')) if "text" in body and "html" in body: body["text"] = "Your command: %s%sResult: %s" % (code_body, "\n\n", body["text"]) body["html"] = "Your command: %s%sResult: %s" % (code_body, "<br><br>", body["html"]) else: body["text"] = "Your command:%s%sResult:%s" % (code_body, "\n\n", body["text"]) new_message = create_response(arrived_message, addr, original_message_schema.message_id, body, host) try: new_msg = {} from_field = original_message._get_from_friendly() to_field = original_message._get_to_friendly() cc_field = original_message._get_cc_friendly() new_msg["timestamp"] = str(datetime.now().strftime("%m/%d %H:%M:%S,%f")) new_msg["type"] = "new_message" new_msg["from_"] = from_field new_msg["to"] = to_field new_msg["cc"] = cc_field new_msg["trigger"] = "shortcut" new_msg["log"] = body["text"] new_msg.update(original_message._get_meta_data_friendly()) except Exception: logger.critical("error adding logs") imap.append(original_message_schema.folder.name, str(new_message)) # instead of sending email, just replace the forwarded email to arrive on the inbox quietly # global shortcut else: