def test_mock_generation(self): """ Tests that a mock slug is properly generated """ u1 = Url(url='http://lab.tmp.br/%s/index.html' % MOCK_MARK) u1.save() self.assertEqual(u1.slug, MOCK_MARK[:MIN_SLUG])
def shorten(): long_url = request.args.get("url") token = request.args.get("token") format = request.args.get("format", "simple") ip = request.headers.get("X-Forwarded-For") if rate_limit_exceeded(ip, token): if format == "html": return redirect_and_flash( render_template("rate_limit_exceeded.html")) else: abort(429) url = Url(url=long_url) url.save() log_ip = Ip(ip=ip, token=token, time=datetime.now()) log_ip.save() root_url = url_for("index", _external=True, _scheme="https") slug = short_url.encode_url(url.id) new_url = root_url + slug print(new_url) if format == "html": return redirect_and_flash( render_template("new_url.html", new_url=new_url)) elif format == "json": return jsonify(url=new_url) return new_url
def main(request): host = request.META['HTTP_HOST'] if request.method == "GET": output = ("<form action='/' method='POST'>\n" + "Introduce your url:" + "<input type='text' name='url'/></br>\n" + "<input type='submit' value='Submit' " + "/></form>\n<br>\n<br>" + str(Url.objects.values_list())) elif request.method == "POST": urlname = urllib.unquote(request.body.split("=")[1]) if (not urlname.startswith("http://") and not urlname.startswith("https://")): urlname = "http://" + urlname try: urlname = Url.objects.get(url=urlname).url except Url.DoesNotExist: new_entry = Url(url=urlname) new_entry.save() urlnum = Url.objects.get(url=urlname).id output = ("You introduced: " + str(urlname) + "</br>\n" + "The abbreviation is: /" + str(urlnum) + "</br>\n" + "<meta http-equiv='Refresh' content='2;" + "url=http://" + host + "'>") else: return HttpResponseForbidden("Method not allowed") return HttpResponse(output)
def index(): if request.method == 'POST': thing = request.form.get('url') if thing: if '://' not in thing: thing = 'http://' + thing # Verify the URL parsed = urlparse(thing) if parsed.scheme not in ('http', 'https'): return "I only take HTTP or HTTPS URLs, dummy" urlhash = hashlib.sha1(thing).hexdigest() try: url = Url.get(Url.url_hash == urlhash) except: url = Url() url.url = thing url.url_hash = urlhash url.created = datetime.datetime.now() url.save() # hokay. got us an ID, let's make a key. url.key = base36_encode(url.id) url.save() return render_template('added.html', short_url="http://{0}/{1}".format(request.headers['host'], url.key)) else: return "You didn't give me shit" else: return render_template('index.html')
def get_currently_selected_genre(self): """ Return the Url for the current genre If the current genre element with a "selected" css class is a subgenre, this will return the subgenre's parent genre. """ selected = None genres = self.get_top_level_genre_tags() if genres: for tag in genres: if "selected" in tag['class']: selected = Url(tag.get('href'), tag.string) break #No hits in genres means a subgenre is currently selected else: subgenre = self._get_currently_selected_subgenre_tag() if subgenre: parent_li = subgenre.parent.parent.parent selected_tag = parent_li.find("a", class_="top-level-genre") if selected_tag: selected = Url(selected_tag.get('href'), selected_tag.string) return selected
def create(): """Create a short URL and return a JSON response.""" full_url = request.args.get('url') if not full_url: return Response(json.dumps({'success': False, 'message': 'No "url" parameter specified'}), mimetype='application/json') # Validate full_url parsed_url = urlparse(full_url) if parsed_url.scheme == '': return Response(json.dumps({'success': False, 'message': 'No URL scheme specified'}), mimetype='application/json') # Insert URL into db and generate a short url short_url = Url(full_url) db.session.add(short_url) db.session.commit() # Get autoincrement id short_url.short_url = base36encode(short_url.id) db.session.commit() # Get host to display short url (this won't work with https) host = 'http://' + request.headers.get('Host', 'localhost') return Response(json.dumps({'success': True, 'url': full_url, 'short_url': '%s/%s' % (host, short_url.short_url)}), mimetype='application/json')
def make_url_model(url, site): """ This should on occur once per newly created URL, the linked count is set to zero if it is a new site added to database """ now = datetime.now() base_url = 'http://links.ep.io/' url_model = Url() url_model.url = url url_short = url try: domain = Domain.objects.get(site=site) domain.linked_count += 1 domain.date_updated = now domain.save() except: domain = Domain(site=site, linked_count=1, date_updated= now) domain.save() url_model.site = domain url_model.date_time_created = datetime.now() url_model.linked_count = 1 url_model.save() url_model.url_shortened = base_url + encode_62(url_model.pk) print url_model.url_shortened url_model.save() return url_model
def barra(request): formul = '<br><form action="" method="POST" accept-charset="UTF-8">' + \ 'URL para acortar: <input type="text" name="url">' + \ '<input type="submit" value="Acorta!"></form><hr>' srvHost = str(request.META["SERVER_NAME"]) srvPort = str(request.META["SERVER_PORT"]) if request.method == "GET": urlshtml = "" urls = Url.objects.all() for url in urls: urlshtml += formatUrlHtml(url, srvHost, srvPort) return HttpResponse(formul + urlshtml) elif request.method == "POST": longUrl = request.POST.get("url", "") if longUrl == "": salida = "Incorrect post or empty url" else: if not longUrl.startswith("http://") and \ not longUrl.startswith("https://"): longUrl = "http://" + longUrl try: newUrl = Url.objects.get(long_url=longUrl) except Url.DoesNotExist: newUrl = Url(long_url=longUrl) newUrl.save() salida = formatUrlHtml(newUrl, srvHost, srvPort) return HttpResponse(salida) else: return HttpResponseNotAllowed("Method not allowed in this server")
def test_redirect(self): url = Url.create(url=self._test_url) with app.test_client() as c: resp = c.get(f"/{Base62.encode(url.id)}") self.assertEqual(resp.status_code, 302) url = Url.select().where(Url.url == self._test_url).first() self.assertEqual(url.views, 1)
def shortener(request): if request.method == "GET": urlDb = Url.objects.all() urlDic = "" for url in urlDb: urlDic += "URL " + str(url.url) + " Shortened URL " + str( url.id) + "<br/>" resp = "<body><html> <form id= shortUrl method= post> \ <fieldset><legend>URL shortener</legend><label> Url</label> \ <input id= campo1 name= Url type= text /></label> \ <input id= campo2 name= pressbutton type= submit value= Shorten URL/> \ </fieldset> </form> <p> URL Dictionary </p>" \ + urlDic + "</body></html>" elif request.method == "POST": url = request.body.split("=") url = url[1].split("&") url = url[0] try: url = Url.objects.get(url=url) except Url.DoesNotExist: new = Url(url=url) new.save() urlId = str(Url.objects.get(url=url).id) resp = "<html><body>URL " + url + " Shortened URL \ <a href= http://" + url + ">" + urlId + "</a> \ </body></html>" return HttpResponse(resp)
def shortener(request): if request.method == "GET": urlDb = Url.objects.all() urlDic = "" for url in urlDb: urlDic += "URL " + str(url.url) + " Shortened URL " + str(url.id) + "<br/>" resp = "<body><html> <form id= shortUrl method= post> \ <fieldset><legend>URL shortener</legend><label> Url</label> \ <input id= campo1 name= Url type= text /></label> \ <input id= campo2 name= pressbutton type= submit value= Shorten URL/> \ </fieldset> </form> <p> URL Dictionary </p>" \ + urlDic + "</body></html>" elif request.method == "POST": url = request.body.split("=") url = url[1].split("&") url = url[0] try: url = Url.objects.get(url = url) except Url.DoesNotExist: new = Url(url = url) new.save() urlId = str(Url.objects.get(url = url).id) resp = "<html><body>URL " + url + " Shortened URL \ <a href= http://" + url + ">" + urlId + "</a> \ </body></html>" return HttpResponse(resp)
def api_add_url(): #check_admin() i = ctx.request.input(url='', frequent='', top_num='', summary='') url = i.url.strip() frequent = i.frequent.strip() top_num = i.top_num.strip() summary = i.summary if not url: raise APIValueError('url', 'url cannot be empty.') if frequent and not _RE_FREQUENT.match(frequent): raise APIValueError('frequent', 'frequent MUST be num. or empty') if top_num and not _RE_TOP_NUM.match(top_num): raise APIValueError('top_num', 'top_num must be 1-999 or empty.') user = ctx.request.user if frequent == '': frequent = 30 if top_num == '': top_num = 1 url = Url(user_id=user.id, url=url, frequent=frequent, top_num=top_num, summary=summary) url.insert() return url
def shortenUrl(request): if request.method == "GET": t = get_template("index.html") return HttpResponse(t.render()) if request.method == "POST": url = request.POST.get("urlToShorten","") if url == "": return HttpResponseRedirect("/") try: if not ("http://" in url) or not ("https://" in url): url="http://"+url val(url) except ValidationError,e: t = get_template("invalid.html") return HttpResponse(t.render()) url = url.replace("http://","") url = url.replace("https://","") QS = Url.objects.all().filter(actualUrl=url) if(len(QS)>0): UrlObject = QS[0] t = get_template("shortened.html") return HttpResponse(t.render(Context({"actual_url":url, "shortened_url":dehydrate(UrlObject.id)}))) mUrl = Url() mUrl.actualUrl = url mUrl.save() # mUrl.shortenedUrl = shorten(url) t = get_template("shortened.html") return HttpResponse(t.render(Context({"actual_url":url, "shortened_url":dehydrate(mUrl.id)})))
def query_bitly(longUrl, user): l = urllib.quote(longUrl, '') if (longUrl[:7].lower() != 'http://' and urllib.unquote(longUrl)[:7].lower() != 'http://' and longUrl[:8].lower() != 'https://' and urllib.unquote(longUrl)[:8].lower() != 'https://'): l = urllib.quote('http://' + longUrl, '') result = urlfetch.fetch(JMP_URL + l) logging.debug('posted to bit.ly: %s' % l) if result.status_code != 200: return 'Sorry! Query failed.' j = json.JSONDecoder() data = j.decode(result.content) if data.get('status_code') == 403: logging.warning('RATE LIMIT EXCEEDED') return 'Sorry! Experiencing rate limits from bit.ly' if data.get('status_code') != 200: logging.error(result.content) return 'Sorry! bit.ly did not accept the query. Make sure that your message only contains a URL.' url = Url(longUrl=data.get('data').get('long_url'), shortUrl=data.get('data').get('url'), creator=user) url.put() return data.get('data').get('url')
def pagina(request): if request.method == "GET": template = get_template("pagina.html") lista_url = Url.objects.all() for url in lista_url: lista_url = "<li><a href=/" + str(url.id) + ">" + url.original_url + "</a>" elif request.method == "POST" or request.method == 'PUT': url = request.POST.get('url') url = acortarUrl(url) try: url_encontrada = Url.objects.get(original_url = url) except Url.DoesNotExist: urls=Url(original_url = url) urls.save() url_encontrada = Url.objects.get(original_url = url) return HttpResponse(str(url_encontrada.id)) lista_url = Url.objects.all() respuesta = "<ol>" for elemento in lista_url: respuesta += '<li><a href ="'+ str(elemento.original_url) + '">' respuesta += str(elemento.original_url) + '</a>' + " = " + '<a href="'+ str(elemento.id) +'">' + str(elemento.id) + '</a>' respuesta += "</ol>" template = get_template("pagina.html") cont = {'contenido': respuesta,} return HttpResponse(template.render(Context(cont)))
def make_it(original_url): short_url = Url() setattr(short_url, "original", original_url) setattr(short_url, "short_url", hashfunc()) models.storage.new(short_url) models.storage.save() return jsonify(short_url.to_dict())
def create(): """ Register the url """ origin_url = request.values.get('origin_url', None) if not origin_url: raise ApiException('"origin_url" is required') MAX_TRY = 5 for cnt in range(MAX_TRY): try: u = Url(short_url=Url.gen_short_url(), origin_url=origin_url) except ValidationError as e: raise ApiException(str(e)) if redis_cli.set(u.redis_key, u.json(), nx=True, ex=u.SHORT_URL_EXPIRE_SECONDS): break if cnt + 1 == MAX_TRY: raise ApiException('Collision happened. Please try again.') return jsonify({'short_url': u.short_url})
async def test(url: UrlSchema): url = dict(url) if (url["customCode"]): shortCode = url["customCode"] else: shortCode = shortuuid.ShortUUID().random(length=8) shortUrl = os.path.join(config("BASE_URL"), shortCode) urlExists = Url.objects(shortCode=shortCode) if len(urlExists) != 0: raise HTTPException(status_code=400, detail="Short code is invalid, It has been used.") try: url = Url(longUrl=url["longUrl"], shortCode=shortCode, shortUrl=shortUrl) url.save() return { "message": "Successfully shortened URL.", "shortUrl": shortUrl, "longUrl": url["longUrl"] } except Exception as e: print(e) raise HTTPException(status_code=500, detail="An unknown error occurred.")
def main(request, **kwargs): if request.method == "POST": form = UrlForm(request.POST) if form.is_valid(): url = Url() url.original_url = form.cleaned_data['url'] url.save() c = { "form": form, "url": url, } return render_to_response("main.html", c, context_instance=RequestContext(request)) else: c = { "form": form, } return render_to_response("main.html", c, context_instance=RequestContext(request)) else: form = UrlForm() c = { "form": form, } return render_to_response("main.html", c, context_instance=RequestContext(request))
def create_url(): if not request.is_json: abort(422) content = request.json redirect_url = content.get('url', '').strip() if not is_valid_url(redirect_url): abort(422) # Check if it already exists url = Url.query.filter(Url.redirect == redirect_url).first() if url: return jsonify({'shorter': url.get_full_short()}) next_id = db.session.execute(Sequence("urls_id_seq")) url = Url(id=next_id, redirect=redirect_url, slug=to_emoji_slug(next_id)) db.session.add(url) db.session.commit() return jsonify({'shorter': url.get_full_short()})
def _get_urls_by_user_and_page(user_id): total = Url.count_all() page = Page(total, _get_page_index()) urls = Url.find_by( "where user_id = ?and `status` != 2 order by create_time desc limit ?,?", user_id, page.offset, page.limit) return urls, page
def _get(self, version, method, url_or_urls, **kwargs): """ _get makes the actual call to api.embed.ly """ if not url_or_urls: raise ValueError('%s requires a url or a list of urls given: %s' % (method.title(), url_or_urls)) #A flag we can use instead of calling isinstance all the time. multi = isinstance(url_or_urls, list) # Throw an error early for too many URLs if multi and len(url_or_urls) > 20: raise ValueError('Embedly accepts only 20 urls at a time. Url ' \ 'Count:%s' % len(url_or_urls)) query = '' key = kwargs.get('key', self.key) #make sure that a key was set on the client or passed in. if not key: raise ValueError('Requires a key. None given: %s' % (key)) kwargs['key'] = key query += urllib.urlencode(kwargs) if multi: query += '&urls=%s&' % ','.join( [urllib.quote(url) for url in url_or_urls]) else: query += '&url=%s' % urllib.quote(url_or_urls) url = 'http://api.embed.ly/%s/%s?%s' % (version, method, query) http = httplib2.Http(timeout=self.timeout) headers = {'User-Agent': self.user_agent} resp, content = http.request(url, headers=headers) if resp['status'] == '200': data = json.loads(content) if kwargs.get('raw', False): data['raw'] = content else: data = { 'type': 'error', 'error': True, 'error_code': int(resp['status']) } if multi: return map(lambda url, data: Url(data, method, url), url_or_urls, data) return Url(data, method, url_or_urls)
def test_url_exists(self): Url.create(url=self._test_url) with app.test_client() as c: resp = c.post("/", data={"url": self._test_url}) self.assertEqual(resp.status_code, 200) # Test url created in db self.assertEqual(Url.select().where(Url.url == self._test_url).count(), 1)
def test_get_origin_url(client): u = Url(origin_url=fake.uri(), short_url='fake_url') redis_cli.set(u.redis_key, u.json()) resp = client.get(f'/v1/url/{u.short_url}') assert resp.status_code == 200 assert resp.json.get('origin_url') == u.origin_url
def report_url(request): if request.method == 'POST': url = request.POST['url'] try: newUrl = Url(url=url) newUrl.save() except Exception: return HttpResponse("ERROR") return HttpResponse("SUCCESS")
def test_create_two_urls_with_same_tag(self): url1 = Url(url="http://example.com/1", title="My Title") url1.save() url1.add_tags_from_string("tag1") url2 = Url(url="http://example.com/2", title="My Title") url2.save() url2.add_tags_from_string("tag1") self.assertEquals(url1.tags.all()[0], url2.tags.all()[0])
def import_urls_from_delicious(login, password, opener=default_opener): bookmarks = opener(login, password) ret = [] for href, tags, title, desc, time in bookmarks: url = Url(url=href) url.save() ret.append(url) return ret
def index(request, tag=None): if tag: tag_model = get_object_or_404(Tag, name=tag) urls = tag_model.url_set.all() else: urls = Url.objects.all() if request.method == "POST": form = UrlForm(request.POST) if form.is_valid(): url_data = form.cleaned_data['url'] title = form.cleaned_data['title'] tags = form.cleaned_data['tags'] try: url = Url(url=url_data, title=title) url.fill_title() url.save() url.add_tags_from_string(tags) if tag: url.add_tag(tag) except IntegrityError: pass return HttpResponseRedirect(request.path) else: form = UrlForm() return render_to_response("index.html", { 'urls': urls, 'form': form, })
def shortenURL(actualUrl): shortURL = parseURL(actualUrl) created = None try: objUrl = Url.objects.get(shortURL=shortURL) created = False except Url.DoesNotExist: objUrl = Url(actualUrl=actualUrl, shortURL=shortURL) objUrl.save() created = True return created, shortURL
def post(self): args = parser.parse_args() print(current_user) url = Url(user_id=current_user.get_id(), url=args['url']) print(url) db.session.add(url) try: db.session.commit() except IntegrityError as e: db.session.rollback() return "url schon vorhanden", 500 return url.as_dict(), 201
def post(self, request): """ Saves a new URL to the db. Accepts a long url and a possible slug as post parameters. * If the long url can't be validated then error 404 is returned. * If the requested slug has already been taken, then a new slug will be generated and returned on success. * If there is not a requested slug, then one will be generated and returned on success. * If there is not a requested slug and the requested URL already has a slug generated, then the previous slug is returned. :Return: Saved slug """ # Make sure the slug is url safe. requested_slug = request.POST.get('requested_slug', None) if requested_slug: requested_slug = urllib.quote(requested_slug) requested_url = request.POST.get('requested_url', None) # Validate the requested url. if not requested_url.startswith('http://') and not requested_url.startswith('https://'): requested_url = 'http://%s' % requested_url try: validator = URLValidator() validator(requested_url) except: return Http404('URL Invalid') # Find the proper slug for this url. if slug_available(requested_slug): slug = requested_slug else: # If a slug was requested and it was taken, maybe it was taken by this url before. # If that is the case, then we should return that one to the user. Otherwise, try # to find a different slug already made for this url. If unable to find a slug # prevously created for this url, then make a new one. try: try: existing = Url.objects.get(url=requested_url, slug=requested_slug) except: existing = Url.objects.filter(url=requested_url)[0] # We already have a record in the db, so we can just return now without creating. return HttpResponse(existing.slug) except: slug = generate_slug(4) # Save the new shortened url to the db. shortened_url = Url( url=requested_url, slug=slug ) shortened_url.save() # Return the saved slug to the user so they can copy and use it. return HttpResponse(slug)
def test_get_url_with_no_title(self): response = self.mocker.mock() response.read() self.mocker.result("foo.zip") urlopen = self.mocker.replace("urllib.urlopen") urlopen("http://example.com/foo.zip") self.mocker.result(response) self.mocker.replay() url = Url(url="http://example.com/foo.zip") url.fill_title() self.assertEquals(url.title, "")
def processPost(url): if url == "": return HttpResponseBadRequest("ERROR: EMPTY POST") elif not url.startswith("http://") and not url.startswith("https://"): url = "http://" + url try: newUrl = Url.objects.get(longUrl=url) except Url.DoesNotExist: newUrl = Url(longUrl=url) newUrl.save() response = "<p>url real: <a href=" + url + ">" + url + "</a></p>" response += "<p>url acortada: <a href=" + str(newUrl.id) + ">" +\ str(newUrl.id) + "</a></p>" return HttpResponse(response)
def test_add_url_with_tags(self): url = Url(url="http://example.com", title="My Title") url.save() url.add_tags_from_string("tag1, tag2 ,tag3 , tag4,tag5,,,") tags = url.tags.order_by("name") self.assertEquals(len(tags), 5) self.assertEquals(tags[0].name, "tag1") self.assertEquals(tags[1].name, "tag2") self.assertEquals(tags[2].name, "tag3") self.assertEquals(tags[3].name, "tag4") self.assertEquals(tags[4].name, "tag5")
def savebookmark(TitleF,UrlF,DescriptionF,TagF,PrivateF,UserF): TitleF = unicode(TitleF) UrlF = unicode(UrlF) DescriptionF = unicode(DescriptionF) Tagf = unicode(TagF) try : UrlB = Url.objects.get(url=UrlF) except : UrlB = Url(url=UrlF) UrlB.save() try : TitleB = Title.objects.get(title=TitleF) except : TitleB = Title(title=TitleF) TitleB.save(); try : DescriptionB = Description.objects.get(description=DescriptionF) except : DescriptionB = Description(description=DescriptionF) DescriptionB.save() try : PrivateB = Private.objects.get(private= (PrivateF == 'True')) except : PrivateB = Private(private= (PrivateF == 'True')) PrivateB.save() try : b2 = Bookmark.objects.get(url=UrlB) b2.title=TitleB b2.description=DescriptionB b2.private=PrivateB except : b2 = Bookmark(title=TitleB,url=UrlB,description=DescriptionB,private=PrivateB) b2.save() b2.user.add(UserF) b2.save() tags = TagF.split(" ") tags.sort() for t in tags : try : TagB = Tag.objects.get(tag=t) except : TagB= Tag(tag=t) TagB.save() TagB.user.add(UserF) TagB.save() b2.tag.add(TagB) b2.save()
def test_slug_shortening(self): """ Tests that a slug is properly generated """ u1 = Url(url='http://lab.tmp.br/%s/index.html' % MOCK_MARK) u1.save() u2 = Url(url='http://another.lab.tmp.br/%s/index.html' % MOCK_MARK) u2.save() u3 = Url(url='http://yetanother.lab.tmp.br/%s/index.html' % MOCK_MARK) u3.save() self.assertEqual(u1.slug, MOCK_MARK[:MIN_SLUG]) self.assertEqual(u2.slug, MOCK_MARK[:MIN_SLUG+1])
def urlacorta(request, recurso): formulario = ('<form action="" method="POST">Escribir url larga:' + '<input type="text" name="nombre" value="" />' + '<input type="submit" value="Acortar" /></form>') if request.method == 'POST': cuerpo = request.body.split('=')[1] if cuerpo == "": return HttpResponseNotFound(formulario + "Url no introducida") if cuerpo.find("http%3A%2F%2F") >= 0: cuerpo = cuerpo.split('http%3A%2F%2F')[1] cuerpo = "http://" + cuerpo try: newurl = Url.objects.get(url=cuerpo) except: newurl = Url(url=cuerpo) newurl.save() return HttpResponse("URL original: " + "<a href=" + cuerpo + ">" + cuerpo + "</a>" + "</br>URL acortada: " + "<a href=" + str(newurl.id) + ">" + "127.0.0.1:8000" + "/" + str(newurl.id) + "</a>" + "</p><a href=" + "http://127.0.0.1:8000" + "> volver </a>""") if request.method == 'GET': urlList = Url.objects.all() if recurso == '': return HttpResponse(formulario + "Urls almacenadas:</br>" + printurls()) else: try: url = Url.objects.get(id=recurso) return HttpResponseRedirect(url.url) except: return HttpResponseNotFound("<h1>Pagina no encontrada</h1></p>" + "<a href=" + "http://" + "127.0.0.1:8000" + ">volver</a>") else: return ("<h1>404 Not Found</h1></p><a href=" + "http://127.0.0.1:8000" + "> volver </a>")
def get_currently_selected_subgenre(self): """Return the Url of the currently selected subgenre""" selected = None selected_tag = self._get_currently_selected_subgenre_tag() if selected_tag: selected = Url(selected_tag.get('href'), selected_tag.string) return selected
def extract_from_url(url): '''From history info, extract url, title and body of page, cleaned with BeautifulSoup''' req = requests.get(url, allow_redirects=True, timeout=10) req.encoding = 'utf-8' if req.status_code is not 200: logging.exception("Warning: " + str(req.url) + ' has a status code of: ' \ + str(req.status_code) + ' omitted from database.\n') bs_obj = BeautifulSoup(req.text, "lxml") if hasattr(bs_obj.title, 'string') & (req.status_code == requests.codes.ok): if url.startswith('http'): title = bs_obj.title.string checks = ['script', 'style', 'meta', '<!--'] for chk in bs_obj.find_all(checks): chk.extract() body = bs_obj.get_text() pattern = re.compile('(^[\s]+)|([\s]+$)', re.MULTILINE) body_str = re.sub(pattern, " ", body) snippet = body_str[:100].replace(',', '-') if title is None: title = u'Untitled' u = Url(url=url, title=title, snippet=snippet) logging.exception("Processed", url, "...") logging.exception(u.title, body_str) return u, body_str
def delete_url(url_id): #check_admin() url = Url.get(url_id) if url is None: raise APIResourceNotFoundError('url') close_url(url) raise seeother('/index')
def shorten(): url = request.form.get('url') if not url: return 'No URL provided', 400 if not is_url(url): return 'The URL is invalid', 400 url = normalize_url(url) db_value = Url.query.filter_by(forward_to=url).first() if not db_value: _hash = generate_hash() while Url.query.filter_by(hash=_hash).first(): # Means that hash is already used _hash = generate_hash() db_value = Url(hash=_hash, forward_to=url) db.session.add(db_value) db.session.commit() return { 'hash': db_value.hash, 'visited_times': db_value.visited_times }, 200
def crawler(url=None, depth=1): print 'Start url:' + str(url) if depth < 0: return if Url.query.filter(Url.url == url).count() != 0: return u = Url(url) db.session.add(u) db.session.commit() try: html = urllib2.urlopen(url).read().decode('utf8') except (ValueError, urllib2.HTTPError, UnicodeError): print 'ERROR: Can\'t get html from url' return print 'Parse links' bs = BeautifulSoup(html, 'html.parser') netloc = get_netloc(url) for link in bs.find_all('a', href=True): new_url = link['href'] if not new_url.startswith('http'): new_url = urlparse.urljoin(url, new_url) if new_url.startswith(netloc): crawler.delay(new_url, depth=depth - 1) parse_html(url, bs)
def index(): body = request.get_json() alias = body.get('alias') origin = body.get('origin') if request.method == 'POST' and alias is not None: exists = db.session.query( db.exists().where(Url.alias == alias)).scalar() if exists: return {'code': 410, 'message': 'Alias repeat'}, 410 if request.method == 'POST' and alias is None: def gen(): chars = string.ascii_letters + string.digits length = 6 alias = ''.join(choice(chars) for x in range(length)) exists = db.session.query( db.exists().where(Url.alias == alias)).scalar() if not exists: return alias alias = gen() while alias is None: alias = gen() if request.method == 'POST' and alias is not None: if origin is not None: url = Url(alias=alias, origin=origin) db.session.add(url) db.session.commit() else: return {'code': 400, 'message': 'Validation Failed'}, 400 return {'code': 200, 'message': 'ok.'}
def redirect_to_long(path=""): url_model = Url.get_by_short_url(path) if not url_model: raise NotFound() return redirect(url_model.long_url)
def post(self): self.parser.add_argument('url', type=url_validator, required=True, help='url is invalid') self.parser.add_argument('group', type=url_group, required=True, help='invalid group id', dest='group_id') self.parser.add_argument('short_url', type=short_url, help='short url must be at least 3 characters in length') args = self.parser.parse_args() url = Url.query.filter((Url.path == args['url']) & (Url.user_id == g.user.id)).first() if url is not None: message = '{0} has been shortened'.format(args['url']) return {'message':message}, 403 length = random.randint(4, 8) custom = False if args['short_url'] == '': short_path = self.shorten(length, g.user.id) else: short_path = args['short_url'] url = Url.query.filter((Url.short_path == short_path)).first() if url is not None: message = '{0} is not available'.format(short_path) return {'message':message}, 403 custom = True url = Url(group_id=args['group_id'], user_id=g.user.id, path=args['url'], short_path=short_path, custom=custom) db.session.add(url) db.session.commit() return marshal(url, self.urlField, envelope='data'), 201
def acortador(request, recurso): formulario = '<form action="" method="POST">' formulario += 'Acortar url: <input type="text" name="valor">' formulario += '<input type="submit" value="Enviar">' formulario += '</form>' urls = Url.objects.all() if request.method == "GET": if recurso == "": respuesta = "<html><body>" + formulario + "</body></html>" for url in urls: respuesta += str(url.valor) + " = " + url.url + "<br>" else: try: recurso = int(recurso) objeto = Url.objects.get(valor=recurso) #Si esta el objeto respuesta = "<html><body><meta http-equiv='refresh'content='1 url="\ + objeto.url + "'>" + "</p>" + "</body></html>" except Url.DoesNotExist: respuesta = "<html><body> Error: Recurso no encontrado </body></html>" elif request.method == "POST": cuerpo = request.body.split("=", 1)[1] if cuerpo.find("http%3A%2F%2F") >= 0: cuerpo = cuerpo.split('http%3A%2F%2F')[1] cuerpo = "http://" + cuerpo try: objeto = Url.objects.get(url=cuerpo) respuesta = "<html><body> Ya tengo esta valor guardado," + objeto.url + " = " + str(objeto.valor) + "</body></html>" except Url.DoesNotExist: valor = len(urls) + 1 nuevo_objeto = Url(url = cuerpo, valor = valor) nuevo_objeto.save() respuesta = "<html><body>Nuevo valor guardado: " + nuevo_objeto.url + " = " + str(nuevo_objeto.valor) + "</body></html>" return HttpResponse(respuesta)
def add_url(url: str): """ Populate url info to a database Represent long operation that can takes significant amount of time What's why it's moved to a celery task :param url: link to a XML file :return: {'id': <id of created Url in DB>} """ xml_dict = parse_xml_file(url) url_object = Url(url=xml_dict['url'], processed=bool(not xml_dict['error_text']), error=xml_dict['error_text']) url_object.keys = [Key(value=key) for key in xml_dict['keys']] s.add(url_object) s.commit() # sleep(20) # uncomment this, if you want to test *real* async :) return {'id': url_object.id}
def test_url_insert_new_invalid_format_shortcode_failure(self): with self.app.app_context(): with pytest.raises( Exception ) as exc: # Wide catch, scope narrowed for preventing nested Exception override url = Url.insert_url(url='scenario4.com', shortcode='xop') assert isinstance(exc.type, InvalidShortcode.__class__) # Sanity check
def shorten(): long_url = request.args.get("url") if not long_url: raise BadRequest() url_model = Url.shorten(long_url) short_url = "/".join([request.host, url_model.short_url]) return render_template("success.html", short_url=short_url)
def redir(encoded_url): try: id = base_conv(encoded_url, input_base=62, output_base=10) url = Url.retrieve(id) return redirect(url.long_url) except excepts.NotFoundException: return notfound_error() except: return undefined_error()
def shrink(): # TODO parse the input new_url = request.form['url_input'] print(new_url) # First, we check if this URL isn't already shortened _url = Url.query.filter_by(url = new_url).first() if not _url: # In case it doesn't exist _url = Url(new_url, request.remote_addr) db.session.add(_url) db.session.commit() else: # If exists, just update the update date _url.update_date = datetime.utcnow() db.session.commit() return redirect('/info/{hash}'.format(hash = _url.url_hash))
def display_shortened_urls(hash): #if the hash is not there q = db.Query(Short) q.filter("hash_value = ",hash) short = q.get() if not short: return render_template('404.html'), 404 urls = Url.gql("WHERE ANCESTOR IS :1", short) return render_template('hash.html',urls=urls)
def process(request): if request.method == 'GET': UrlAsked = Url.objects.all() urls="" for url in UrlAsked: urls += "<pre>Url acortada de: " + url.longUrl + " --> " + str(url.id) formulario = "</br></br></br><form action='' method='POST'>Introduzca su Url a acortar: <input type=\ 'text' name='url'><input type='submit' value='Enviar'></form>" htmlBody = "<html><body>Lista de URLs acortadas: " \ + urls\ + formulario \ + "</body></html>" return HttpResponse(htmlBody) elif request.method == 'POST': url = request.POST.get("url") if url == "": return HttpResponseBadRequest("PAGINA VACIA") elif not url.startswith("http://") and not url.startswith("https://"): url = "http://" + url try: newUrl = Url.objects.get(longUrl=url) except Url.DoesNotExist: newUrl = Url(longUrl=url) newUrl.save() response = "<p>url acortada: <a href=" + str(newUrl.id) + ">" +\ str(newUrl.id) + "</a></p>" response += "<a href=''>Pulse aqui para volver al acortador</a>" return HttpResponse(response) else: HttpResponseNotAllowed("Solo Post y Get")
def shorten(): """Returns short_url of requested full_url.""" # Validate user input full_url = request.args.get('url') if not full_url: raise BadRequest() # Model returns object with short_url property url_model = Url.shorten(full_url) short_url = request.host + '/' + url_model.short_url return render_template('success.html', short_url=short_url)
def test_fill_title_does_not_override_title(self): response = self.mocker.mock() response.read() self.mocker.result(""" <html> <head> <title>Example Web Page</title> </head> </html> """) urlopen = self.mocker.replace("urllib.urlopen") urlopen("http://example.com") self.mocker.result(response) self.mocker.replay() url = Url(url="http://example.com", title="My Title") url.fill_title() self.assertEquals(url.title, "My Title")
def get(self, url_id=""): url = Url.get_by_id(int(url_id)) if url is None: raise MissingUrlException("Missing url '%s'" % url_id) user_data = UserData.current() if user_data: goals_updated = GoalList.update_goals(user_data, lambda goal: goal.just_visited_url(user_data, url)) self.redirect(url.url.encode("utf8"))