Пример #1
0
def sanitize_html(content):
    """
    Converts content to safe HTML
    """
    sanitizer = Sanitizer()
    content = sanitizer.sanitize(content)
    return content
def edit_profile(request):
    user = get_object_or_404(get_user_model(), pk=request.user.pk)
    if hasattr(user, 'userprofile'):
        up_instance = user.userprofile
    else:
        up_instance = None
    if request.method == "POST":
        user_form = P7UserChangeForm(request.POST,
                                     instance=user,
                                     initial={'confirm_email': user.email})
        profile_form = UserProfileForm(request.POST,
                                       request.FILES,
                                       instance=up_instance)
        if all([user_form.is_valid(), profile_form.is_valid()]):
            user_form.save()
            profile = profile_form.save(commit=False)
            profile.user = user
            sanitizer = Sanitizer()
            profile.bio = sanitizer.sanitize(profile.bio)
            profile.save()
            return redirect(reverse('accounts:profile'))

    else:  # GET
        user_form = P7UserChangeForm(instance=user,
                                     initial={'confirm_email': user.email})
        profile_form = UserProfileForm(instance=up_instance)

    template = 'accounts/edit_profile.html'
    context = {'user_form': user_form, 'profile_form': profile_form}
    return render(request, template, context)
Пример #3
0
    def parse_opportunity(self, response):
        full_payload = ""
        # Blurt it all out in full for now.
        sanitizer = Sanitizer({
            'tags': ('hr', 'a', 'br', 'p', 'ul', 'ol', 'li', 'h1', 'h2', 'h3',
                     'table', 'tbody', 'tr', 'td'),
        })

        matches = response.xpath('.//div[@class="column-one-whole"]').extract()
        for item in matches:
            full_payload = "".join(sanitizer.sanitize(item))

        self.store_opportunity({
            'title':
            response.css('header h1::text').extract_first().strip(),
            'url':
            response.request.url,
            'incomplete_applications':
            response.css('#incomplete-applications .big-statistic::text'
                         ).extract_first().strip(),
            'complete_applications':
            response.css('#completed-applications .big-statistic::text').
            extract_first().strip(),
            'full_text':
            full_payload,
        })
Пример #4
0
def getList():
    sanitizer = Sanitizer()
    files = os.listdir('data')
    listStr = ''
    for item in files:
        listStr += '<li><a href="index.py?id={name}">{name}</a></li>'.format(
            name=sanitizer.sanitize(item))
    return listStr
Пример #5
0
def getList():
    sanitizer = Sanitizer()
    number_list = ""
    for file in os.listdir("C:/Bitnami/wampstack-7.3.6-2/apache2/htdocs/data"):
        number_list = number_list + "<li><a href = 'index.py?id={name}'>{name}</a></li>".format(
            name=file)
    number_list = sanitizer.sanitize(number_list)
    return number_list
    def do_GET(self):
        try:
            request_url = urlparse(self.requestline)
            all_companies = request_url.query.split(
                ',')  # Get all company names from URL
            allJobsXml = ET.Element('jobs')
            for i in range(len(all_companies)):
                r = requests.get(
                    f"https://apply.workable.com/api/v1/widget/accounts/{all_companies[i].split()[0]}?details=true"
                )
                data = r.json()
                for job in data["jobs"]:
                    jobXml = ET.SubElement(allJobsXml, 'job')
                    titleXml = ET.SubElement(jobXml, 'title')
                    titleXml.text = job["title"]
                    employerXml = ET.SubElement(jobXml, 'Email')
                    employerXml.text = f"team+{all_companies[i].split()[0]}@climate.careers"
                    urlXml = ET.SubElement(jobXml, 'url')
                    urlXml.text = job["url"]
                    locationXml = ET.SubElement(jobXml, 'location')
                    locationXml.text = job["city"] + ", " + job[
                        "state"] + ", " + job["country"]

                    # HTML Sanitizer
                    sanitizer = Sanitizer({
                        'tags': ('em', 'strong', 'a', 'p', 'br', 'span', 'ul',
                                 'ol', 'li', 'h1', 'h2', 'h3', 'hr'),
                        'attributes': {
                            "a": "href"
                        },
                    })

                    descriptionXml = ET.SubElement(jobXml, 'description')
                    descriptionXml.text = sanitizer.sanitize(
                        job["description"])

                    idXml = ET.SubElement(jobXml, 'id')
                    idXml.text = job["shortcode"]

            self.send_response(200)
            self.send_header('Content-type', 'text/xml')
            self.send_header(
                'Cache-Control',
                'public, immutable, no-transform, s-maxage=3600, max-age=3600')
            self.end_headers()
            message = ET.tostring(allJobsXml)
            self.wfile.write(message)
            return

        except Exception as e:
            self.send_response(500)
            self.send_header('Content-type', 'text/html')
            self.end_headers()
            message = f"<h1>Internal Error</h1><p>Sorry, there was a problem. Make sure the employer's name is " \
                "included in the URL query and/or is the correct name.</p> "
            message2 = f"<p>{e}</p>"
            self.wfile.write(message2.encode())
            return
Пример #7
0
 def post(self):
     """Update announcement"""
     user = self.get_current_user()
     sanitizer = Sanitizer()
     # announcement = self.get_body_argument("announcement")
     announcement = sanitizer.sanitize(
         self.get_body_argument("announcement"))
     self.queue.update(user["name"], announcement)
     self.redirect(self.application.reverse_url("view"))
Пример #8
0
def get_list():
    sanitizer = Sanitizer()
    files = os.listdir('data')
    str_lst = ''
    for item in files:
        item = sanitizer.sanitize(item)
        str_lst = str_lst + '<li><a href="index.py?id={name}">{name}</a></li>'.format(
            name=item)
    return str_lst
def handle(event, context):
    ddb = boto3.resource('dynamodb')
    messages_table = MessagesStore(ddb)

    if "getAllMessages" in json.loads(event["body"]):
        connections_url = "https://" + event["requestContext"][
            "domainName"] + "/" + os.environ["Stage"]
        gatewayapi = boto3.client("apigatewaymanagementapi",
                                  endpoint_url=connections_url)
        apigateway_response = gatewayapi.post_to_connection(
            ConnectionId=event["requestContext"]["connectionId"],
            Data=json.dumps(messages_table.get_messages()).encode('utf-8'))
        return {"statusCode": 200}

    connections_table = ConnectionsStore(ddb)
    data = {"connectionId": event["requestContext"]["connectionId"]}
    connections_ids = connections_table.get_connections()
    sanitizer = Sanitizer()
    chat_message = sanitizer.sanitize(json.loads(event["body"])["data"])

    lambda_client = boto3.client('lambda')
    analysis_handler_function_name = "{}-{}".format(os.environ["Stage"],
                                                    "AnalysisHandler")
    response = lambda_client.invoke(
        FunctionName=analysis_handler_function_name,
        InvocationType='RequestResponse',
        Payload=json.dumps({"message": chat_message}))
    sentiment_response = response['Payload'].read().decode("utf-8")

    messageToSend = {
        "action": "sendMessage",
        "data": {
            "message": chat_message,
            "sentiment": sentiment_response
        }
    }

    message_data_for_ddb = {
        "date": date.today().strftime("%d-%m-%Y"),
        "timestamp": int(round(time.time() * 1000)),
        "message": messageToSend["data"]
    }
    messages_table.add_new_message(**message_data_for_ddb)

    for connection_id in connections_ids:
        connections_url = "https://" + event["requestContext"][
            "domainName"] + "/" + os.environ["Stage"]
        gatewayapi = boto3.client("apigatewaymanagementapi",
                                  endpoint_url=connections_url)
        apigateway_response = gatewayapi.post_to_connection(
            ConnectionId=connection_id['connectionId'],
            Data=json.dumps(messageToSend).encode('utf-8'))

    return {"statusCode": 200}
Пример #10
0
    def _compose_html(self, tagged_text, hard_words):

        sanitizer = Sanitizer({
            'tags': {'p', 'mark', 'span'},
            'attributes': {
                'span': ('class', )
            },
            'empty': set(),
            'separate': {'p', 'mark', 'span'},
        })

        html_paragraphs = []
        for paragraph in tagged_text.iter('chunk'):
            sentences = []
            for sentence in paragraph.iter('sentence'):
                no_space = False
                s = ''
                for pos, el in enumerate(sentence):
                    if el.tag == 'ns':
                        no_space = True
                        continue
                    elif el.tag == 'tok':

                        orth = el.xpath('orth/text()')[0]
                        base = el.xpath('lex/base/text()')[0]

                        if orth in hard_words or base in hard_words:
                            orth = '<mark>{}</mark>'.format(orth)

                        if no_space:
                            s += orth
                            no_space = False
                        else:
                            s += ' ' + orth

                sentence_length = sentence.xpath('count(./tok)')
                if sentence_length > 20:
                    self.long_sentence_count += 1
                    s = '<span class="long_sentence">{}</span>'.format(s)

                sentences.append(s)

            html_paragraphs.append('<p>{}</p>'.format(' '.join(sentences)))

        html_text = ''.join(html_paragraphs)
        return sanitizer.sanitize(html_text)
Пример #11
0
def get_content(url):
    try:
        web_page = requests.get(url, allow_redirects=True, timeout=5)
    except requests.exceptions.Timeout as err:
        raise ServerException(
            description=
            "Failed to fetch web page in time! Skipping preprocessing")
    soup = BeautifulSoup(web_page.content, "html.parser")
    content = str(soup.section)
    try:
        for field in soup.find('ul', attrs={"class": "details"}).children:
            field_str = str(field)
            field_str = field_str.replace("\n", "")
            print(field_str)
            if "#icon-contract" in field_str:
                m = findall(r"svg>(\w|\s)+<\/li>", field_str)
            # print(field.string)
        sanitiser = Sanitizer()
        content = sanitiser.sanitize(content)
        return {"post_details": content, "fields": {}}
    except:
        raise InvalidUserInput(
            description="Couldn't find details for that job")
Пример #12
0
#!python3
#-*- coding: utf-8 -*-
import sys, codecs, os
sys.stdout = codecs.getwriter("utf-8")(sys.stdout.detach())
import cgi, cgitb
import view
cgitb.enable()
print("Content-Type: text/html; charset=utf-8\r\n")
print()
form = cgi.FieldStorage()
from html_sanitizer import Sanitizer
sanitizer = Sanitizer()
if "id" in form:
  title = pageId = form["id"].value
  description = open("data/"+pageId, encoding="utf-8").read()
  description = sanitizer.sanitize(description)
  title = sanitizer.sanitize(title)
  update_link = "<a href = 'update.py?id={pageId}'>update</a>".format(pageId = pageId)
  delete_action = '''
  <form action="process_delete.py" method="post">
    <input type = "hidden" name = "pageId" value="{}">
    <input type = "submit" value = "delete">
  </form>
  '''.format(pageId)
else:
  title = pageId = "Welcome"
  description = "Hello Web"
  update_link = ""
  delete_action = ""
print(pageId)
print("""<!DOCTYPE html>
Пример #13
0
    print(f"Process {a['url']}...")
    article = json.loads(
        subprocess.getoutput(f"../api/read-article {a['url']}"))
    article['url'] = a['url']
    article['body'] = markdown.markdown(article['body'],
                                        extensions=md_extensions,
                                        output_format='html5')
    article['body'] = update_link(article['body'], a['url'])

    article['comments_body'] = ''
    for comment in article.get('comments', {}):
        sanitizer = Sanitizer()
        comment['body'] = markdown.markdown(comment['body'],
                                            extensions=md_extensions,
                                            output_format='html5')
        comment['body'] = sanitizer.sanitize(comment['body'])
        if comment['site']:
            if comment['site'].startswith(
                    'https://') or comment['site'].startswith('http://'):
                comment['site'] = sanitizer.sanitize(comment['site'])
            else:
                comment['site'] = None
        article['comments_body'] += comment_tpl(comment)

    app_body += article_tpl(article)
    full_html = main_tpl({
        'body': app_body,
        'title': f"{article['title']} | Duke's Blog"
    })

    if not os.path.exists(a['url']):
Пример #14
0
def update_temp():

	# Get local cottage temp
	f = open('/sys/bus/w1/devices/28-000007171178/w1_slave', 'r')
	cot = f.readline()
	if "YES" not in cot:
		raise Exception("Bad CRC for Cottage temp sensor")
		
	cot = f.readline()
	cot = cot.split("t=")
	cot = cot[1].rstrip("\n")

	cot = temp_to_format_str(float(cot)/1000)

	f.close()

	# Get local cottage temp and humidity from DHT

	f = open('/sys/bus/iio/devices/iio:device0/in_temp_input', 'r')
	success = False
	tries = 0
	while not(success):
		try:
			cot2 = temp_to_format_str(float(f.readline())/1000)
			success = True
		except OSError as e:
			tries = tries + 1
			if tries > 10:
				cot2 = "ERR"
				break
	
	f.close()

	f = open('/sys/bus/iio/devices/iio:device0/in_humidityrelative_input', 'r')
	success = False
	tries = 0
	while not(success):
		try:
			hm = str(round(float(f.readline())/1000,2)) + " %"
			success = True
		except OSError as e:
			tries = tries + 1
			if tries > 10:
				hm = "ERR"
				break
	
	f.close()

	tempCottage.set(cot)
	tempCottage2.set(cot2)	
	humid.set(hm)

	# Get info from pumphouse
	r = requests.get("http://192.168.0.180/temp")
	# Set encoding to UTF-8 since the HTML is in it but apparently the ESP sends a different one
	r.encoding = "UTF-8"
	
	# Throw error if applicable
	if not(r.ok):
		lakeTempVar = "ERR"
		outsideTempVar = "ERR"
		intakeTempVar = "ERR"
		ambientTempVar = "ERR"

	else:

		# Clean up and split response
		sanitizer = Sanitizer()
		txt = sanitizer.sanitize(r.text)

		# Split on line breaks and remove all but temp info
		txtSplit = txt.split("<br>")[1:5]
		
		for item in txtSplit:
			tempS = item.split(":")
			tmpValue = float(removeNonDecimal(tempS[1]))
			if "Lake" == tempS[0]:
				lakeTempVar = temp_to_format_str(tmpValue)
			elif "Outside" == tempS[0]:
				outsideTempVar = temp_to_format_str(tmpValue)
			elif "Pump intake" == tempS[0]:
				intakeTempVar = temp_to_format_str(tmpValue)
			elif "Ambient outdoor" == tempS[0]:
				ambientTempVar = temp_to_format_str(tmpValue)

	tempLake.set(lakeTempVar)
	tempOutside.set(outsideTempVar)
	tempIntake.set(intakeTempVar)
	tempAmb.set(ambientTempVar)

	csvwriter.writerow({'Time': time.strftime("%F %T"), 'Outside Temp': removeNonDecimal(outsideTempVar), 'Cottage Temp': removeNonDecimal(cot), 'Cottage Temp (DHT)': removeNonDecimal(cot2), 'Lake Temp': removeNonDecimal(lakeTempVar), 'Pump Intake': removeNonDecimal(intakeTempVar), 'Ambient Lake Temp': removeNonDecimal(ambientTempVar), 'Humidity': removeNonDecimal(hm)})