示例#1
0
    def _hyperlink_conversion(self, ignoretext):

        if not self.has_selection:
            self.inline_call('square_brackets', nomove=True, text="Link text")
            return self.inline_call('parentheses', text="http://www.example.com")


        text = self.cursor.selectedText()
        is_email = validators.email(text)
        is_url = validators.url(text)

        if is_url:
            self.inline_call('square_brackets', text=text)
            return self.inline._call('parentheses', text=text)
        elif is_email:
            self.inline_call('square_brackets', text=text)
            return self.inline_call('parentheses', text='mailto:' + text)
        
        url_from_partial = 'http://' + text
        if validators.url(url_from_partial):
            self.inline_call('square_brackets')
            self.inline_call('parentheses', text=url_from_partial)
        else:
            self.inline_call('square_brackets', nomove=True)
            self.inline_call('parentheses', text="http://www.example.com")
示例#2
0
文件: koala.py 项目: bostrt/koala
def put_article():
    '''
    Add new article for a user.
    '''
    username = request.headers.get('x-koala-username')
    apikey = request.headers.get('x-koala-key')
    user = locate_user(username, apikey)

    reqjson = request.get_json()

    result = validators.url(reqjson['url'])
    if not result:
        # try again but with http://
        result = validators.url('http://' + reqjson['url'])
        if not result:
            logging.info("Bad URL: %s" % reqjson['url'])
            abort(400)
        else:
            reqjson['url'] = 'http://' + reqjson['url']

    title = reqjson.get('title', reqjson['url'])
    url = reqjson['url']
    date = str(datetime.now())
    read = False
    favorite = False
    owner = user.id

    article = Article.create(title=title, url=url, date=date, read=read, favorite=favorite, owner=owner)

    return jsonify({'id': article.id}), 201
    def test_stackoverflow(self):
        """
        This function is used to test stackoverflow function
        which returns user name, description, tags and links.

        ...If url is valid

        https://stackoverflow.com/users/7690738/ashish-cherian

        returns username, description, tags and links

        ...If url is empty

        raise ValueError("unknown url type: %r" % self.full_url)

        ValueError: unknown url type: ''

        ...If an invalid url is given

        raise URLError(err)

        urllib.error.URLError: <urlopen error [Errno 11001] getaddrinfo failed>

        :return: if test is ok or not
        """
        #validating url

        self.assertTrue(validators.url(self.url1))
        self.assertTrue(validators.url(self.url2))

        #checking if url is a stackoverflow url or not

        self.assertTrue("https://stackoverflow.com/users" in self.url1)
        self.assertTrue("https://stackoverflow.com/users" in self.url2)

        #checking connection to url

        self.assertEqual(urllib.request.urlopen(self.url1).getcode(), 200)
        self.assertEqual(urllib.request.urlopen(self.url2).getcode(), 200)

        #checking if url is not empty

        self.assertNotEqual(self.url1, "")
        self.assertNotEqual(self.url2, "")

        #checking for timeout error

        self.assertTrue(requests.get(self.url1, timeout=10.0))
        self.assertTrue(requests.get(self.url1, timeout=10.0))

        #checking username and description are not empty

        self.assertNotEqual(self.user1, "")
        self.assertNotEqual(self.description1, "")
        self.assertNotEqual(self.user2, "")
        self.assertNotEqual(self.description2, "")
示例#4
0
	def create_server(self,request):
		user = request.user
		url = str(self.cleaned_data.get('server'))
		serverS=Server.objects.filter(url=url)
		if serverS:
			self.add_error('server','Url already exist')
		else:
			if validators.url('https://'+url) or validators.url('http://'+url):
				server = Server.objects.create(url=url,user=user)
				server.save()
			else:
				self.add_error('server','Incorrect url')
 def is_valid_url(self, url):
     if validators.url(url):
         return True
     else:
         if url[:8] == "https://":
             if validators.url("http://%s" % (url[8:])):
                 return True
             else:
                 return False
         else:
             if validators.url("http://%s" % (url)):
                 return [True, "http://"]
示例#6
0
def index():
	if request.method == "POST":
		if 'username' in session:
			url = request.form["url"]
			links = db.links
			if not validators.url(url):
				url = "http://" + url
			if not validators.url(url):
			    return render('form.html', error='URL is incorrect')
			else:
				existing_url = links.find_one({'url': url})
				if not existing_url:
					current_time = str(datetime.now())

					print current_time
					print url

					cur_user = db.users.find_one({'name': session['username']})

					html = None

					try:
						html = urllib2.urlopen(url)
						html = html.read()
						soup = bf(html)
						title = url
						try:
							title = soup.find('title').text
						except Exception:
							pass
						
						db.links.insert({
							'url': url, 
							'title': title,
							'author': cur_user['name'],
							'author_id': cur_user['_id'],
							'current_time': current_time,
							'votes': 1
							})

						return render('form.html', error="New item is added")

					except Exception:
						return render('form.html', error="URL is incorrect")

				else:
					return render('form.html', error="URL already exists")
		else:
			flash('Please log in')
			redirect(url_for('login'))

	return render('form.html')
示例#7
0
def validate_result(current, default, type):
    """
    Validates the data, whether it needs to be url, twitter, linkedin link etc.
    """
    if current is None:
        current = ""
    if default is None:
        default = ""
    if type == "URL" and validators.url(current, require_tld=True) and not validators.url(default, require_tld=True):
        return current
    if type == "EMAIL" and validators.email(current) and not validators.email(default):
        return current
    return default
示例#8
0
文件: civid.py 项目: tdeck/civid_web
def validated_redirect_uri(uri_param):
    if uri_param is None:
        raise BadRequest("Missing required redirect URI")

    try:
        validators.url(uri_param)
    except:
        raise BadRequest("Malformed redirect URI")

    parsed = urlparse(uri_param)
    if parsed.scheme not in ['http', 'https']:
        raise BadRequest("Redirect URI must be http or https")

    return parsed
示例#9
0
    def ddos_server(self, url, timeout=30):
	if self.ddos_process is not None:
	    logging.debug("communicate with siege")
	    stdout,stderr = self.ddos_process.communicate()
            logging.debug("siege stdout: %s"%stdout)
	    logging.debug("siege stderr: %s"%stderr)

        if url is not None and validators.url(url):
	    cmdstr = "timeout -k {longerTimeout}s {longerTimeout}s siege -c 100 -t {timeout} {url}"\
		.format(longerTimeout=timeout+2, timeout=timeout, url=url)
            logging.debug(cmdstr)
            self.ddos_process = subprocess.Popen(shlex.split(cmdstr))
        else:
            logging.warning("Neither ip nor url where supplied, DDOS failed")
            logging.debug("validators.url(%s) == %s"%(url, validators.url(str(url))))
示例#10
0
文件: plan.py 项目: jdh6660/fuzz
    def __get_potential_page_links(self, page):
        # Find all the pages href links
        html = BeautifulSoup(page.text, 'html5lib')
        for tag in html.find_all('a'):
            link = tag.get('href').__str__()

            element = None

            # If link appears to be a valid url
            if validators.url(link):
                # Categorize local and external links
                if link.startswith(self.site.base_url):
                    element = PotentialUrlLinkElement(link)
                else:
                    element = ExternalUrlLinkElement(link)

            # Find email address
            elif "mailto:" in link:
                potential_email = re.sub('mailto:', '', link)
                if validators.email(potential_email):
                    element = EmailAddressElement(potential_email)
            elif validators.email(link):
                element = EmailAddressElement(link)

            # Link didn't match anything valid
            else:
                element = MalformedLinkElement(link)

            # Add new link element to list if it does not already exist
            if not self.discovered_links.contains(element) and element is not None:
                self.discovered_links.add_element(element)

            # Will attempt to create valid links with invalid fragments
            self.__build_in_malformed()
示例#11
0
def is_url(uri):
    result = validators.url(uri)
    if result:
        return True
    else:
        print('It\'s not a valid url:' + uri)
        exit()
示例#12
0
文件: views.py 项目: maninder29/vesta
def create_post(request, type_of_post):
	profile=request.user.profile
	content=request.data.get('content')
	media=request.data.get('media')
	post=Post( profile=profile, content=content, media=media, type_of_post=type_of_post )
	if not post.media: # og
		text = post.content
		text = text.split('http')
		if len(text) != 1:
			text = text[1]
			text = text.split(" ")
			url = "http" + str(text[0])
			if validators.url(url):
				try:
					data = InfoExtractor.PyOpenGraph(url).metadata
					og=OpenGraph()
					og.site=data.get('site_name')
					# if og.site not in ["YouTube", "Vimeo"]:
					og.title=data.get('title')
					og.description=data.get('description')
					og.image=data.get('image')
					og.link=data.get('url')
					og.save()
					post.og=og
				except Exception as e:
					print str(e)
	post.save()
	return
示例#13
0
def feed(request):
    """
    View that process the posted submitted_url
    """
    log.debug('In Feed View')
    view_response = dict(table_of_content='', error='')
    # get url submitted
    if request.POST.get('feed_url'):
        feed_url = request.POST.get('feed_url')
        if not validators.url(feed_url):
            view_response['error'] = 'Invalid url entered...'
            return view_response
        if 'wikipedia.org/' not in feed_url:
            # check if the url submitted a wikipedia url
            view_response['error'] = 'Url not a wikipedia domain...'
            return view_response
        response = urllib2.urlopen(feed_url)
        # retrieve the html page
        html = response.read()
        # convert the html to xml-format
        soup = BeautifulSoup(html, "lxml")
        # Get the table content with specific tag and class
        view_response['table_of_content'] = soup.find('div', class_="toc")
    else:
        # return error that no url entered
        view_response['error'] = 'No url were submitted...'
        return view_response
    if not view_response['table_of_content']:
        # return error that table of content found
        view_response['error'] = 'No Table of Content Found from the url...'
    return view_response
示例#14
0
def urlLinkSpecified(request):
	if request.method == 'POST':
		url = request.POST.get("url")
		if validators.url(url):
			start_time = time.time()
			response = requests.get(url)
			maintype= response.headers['Content-Type'].split(';')[0].lower()
			if maintype not in ('image/png', 'image/jpeg', 'image/gif'):
				print("a")
				return HttpResponse(json.dumps({'data':"Url is not of image type",'url':"/home/ubuntu/DjangoWithCNN/myproject/media/blank_person.png"}))
			else:
				img = Image.open(StringIO(response.content))		
				FileName = str(uuid.uuid1())+".png"
				Pathname = "/home/ubuntu/DjangoWithCNN/myproject/media/"+FileName
				width, height = img.size
				if width > 600:
					basewidth = 400
					wpercent = (basewidth/float(img.size[0]))
					hsize = int((float(img.size[1])*float(wpercent)))
					img = img.resize((basewidth,hsize), Image.ANTIALIAS)
				img.save(Pathname)
				print("--- %s seconds ---" % (time.time() - start_time))
				result = main(Pathname)
				print(result)
				return HttpResponse(result)
		else:
			return HttpResponse(json.dumps({'data':"Invalid URL",'url':"/home/ubuntu/myproject/media/blank_person.png"}))
示例#15
0
def crawl_link(link):
    """ Crawls a link, and returns a tuple with a urllib response and a code
        indicating success or error

    Input:
        string -- link to be claled

    Returns:
        Tuple(e1,e2) --
            e1 = urllib response
            e2 = success or error code.

    Code Source: https://docs.python.org/3/howto/urllib2.html ('Fetching URLs')
    """
    if not validators.url(link):
        return ("", False, "Invalid link")
    print("Currently handling link: " + link)
    req = urllib.request.Request(link)
    try:
        response = urllib.request.urlopen(req)
    except HTTPError as error:
        return (None, False, error.code)
    except ContentTooShortError as error:
        return (None, False, "ContentTooShortError")
    except URLError as error:
        return (None, False, error.reason)
    else:
        return (response, True, "")
示例#16
0
文件: app.py 项目: sublinus/ShortyUrl
def home_addEntry():
    longUrl = request.form['longURl']
    if not validators.url(longUrl):
        return render_template("bad_input.html", title=cfg['general']['title'])
    short = logic.addEntry(longUrl, logic.connect(cfg['general']['sqlite_location']))
    current_url = cfg['web_paths']['shortened'] + short
    return render_template("shortened.html", url=current_url, short=short, title=cfg['general']['title'])
示例#17
0
    def link_data(request):
        if validators.url(request.data_values):
            request.data_values = str(request.data_values)
        else:
            request.data_values = '#'

        return request
示例#18
0
文件: api.py 项目: eldoroshi/project
def url(vdomain):

	if validators.url(vdomain):

		return vdomain
	else:
		return False
示例#19
0
    def load_units(self):
        """
        Load units of the function descriptor content, section
        'virtual_deployment_units'
        """
        if 'virtual_deployment_units' not in self.content:
            log.error("Function id={0} is missing the "
                      "'virtual_deployment_units' section"
                      .format(self.id))
            return

        for vdu in self.content['virtual_deployment_units']:
            unit = Unit(vdu['id'])
            self.associate_unit(unit)

            # Check vm image URLs
            # only perform a check if vm_image is a URL
            vdu_image_path = vdu['vm_image']
            if validators.url(vdu_image_path):  # Check if is URL/URI.
                try:
                    # Check if the image URL is accessible
                    # within a short time interval
                    requests.head(vdu_image_path, timeout=1)

                except (requests.Timeout, requests.ConnectionError):

                    evtlog.log("VDU image not found",
                               "Failed to verify the existence of VDU image at"
                               " the address '{0}'. VDU id='{1}'"
                               .format(vdu_image_path, vdu['id']),
                               self.id,
                               'evt_vnfd_itg_vdu_image_not_found')
        return True
示例#20
0
def collectData(address, internet, fromaddress = ""):
    global doneAddresses, numberOfAddresses
    if doneAddresses > numberOfAddresses:
        return
    else:
        doneAddresses+=1

    if not validators.url(address):
        return
    
    content = getContentFromSite(address)

    if content == "":
        return

    links = removeHost(getLinksFromContent(content), address)   

    if internet.isStored(address):
        n = internet.getNode(address)
    else:
        n = Node()
        internet.storeNode(address, n)
    
    if fromaddress != "":
        n.inputs.append(fromaddress)

    for link in links:
        if link not in n.outputs:
            if link not in n.inputs:
                n.outputs.append(link)
                collectData(link, internet, address)
    else:
        return
示例#21
0
def verify_config(owner, sample_config, config, current_key=None):
    """Verify that config corresponds to sample_config"""
    import validators

    def raise_exception(message):
        raise ValueError('in {} config {}\nsample:   {}\nprovided: {}'.format(owner, message, sorted(sample_config.items()), sorted(config.items())))

    if isinstance(sample_config, list):
        if not len(config):
            raise_exception('empty_list')
        for element in config:
            verify_config(owner=owner, sample_config=sample_config[0], config=element, current_key=current_key)
    elif isinstance(sample_config, dict):
        for sample_key, sample_value in sample_config.items():
            if sample_key not in config:
                raise_exception('key "{}" is not provided'.format(sample_key))
            if config[sample_key] is None:
                raise_exception('Value of "{}" is empty'.format(sample_key))
            verify_config(owner=owner, sample_config=sample_value, config=config[sample_key], current_key=sample_key)
    else:
        # from this point config and sample_config start to be simple values
        if type(sample_config) is str:
            if sample_config.startswith('http') and validators.url(config) is not True:
                raise_exception('Key "{}" do not contain valid url: {}'.format(current_key, config))
            elif sample_config.startswith('email') and not validators.email(config):
                raise_exception('Key "{}" do not contain valid email: {}'.format(current_key, config))
            elif sample_config.startswith('ipv4') and not validators.ipv4(config):
                raise_exception('Key "{}" do not contain valid IPv4: {}'.format(current_key, config))
            elif sample_config.startswith('int'):
                try:
                    int(config)
                except ValueError:
                    raise_exception('Key "{}" do not contain valid int number: {}'.format(current_key, config))
        elif type(sample_config) is bool and type(config) is not bool:
            raise_exception('Key "{}" must be bool: {}'.format(current_key, config))
示例#22
0
文件: plan.py 项目: jdh6660/fuzz
    def __build_in_malformed(self):
        # Get an unreviewed malformed link element
        malformed_element = self.__get_unreviewed(MalformedLinkElement)
        while malformed_element:

            # If malformed element is blacklisted then don't review it
            if malformed_element.data in self.malformed_ignored:
                malformed_element.reviewed = True

            # If malformed element hasn't been reviewed, review it
            if not malformed_element.reviewed:

                potential_url = urljoin(self.site.base_url, malformed_element.data)
                # If the potential url looks valid
                if validators.url(potential_url):
                    # Check if its a known potential element
                    new_element = PotentialUrlLinkElement(potential_url)
                    if not self.discovered_links.contains(new_element):
                        self.discovered_links.add_element(new_element)

                    # Hide from report, it's a known potential element
                    malformed_element.hide = True

                malformed_element.reviewed = True

            malformed_element = self.__get_unreviewed(MalformedLinkElement)
示例#23
0
    def validate_account(self,account):
        """Check a string to see if it exists as the name of an AWS alias.

        Parameters:
        account     The AWS account alias to validate
        """
        result = {
            'accountAlias': None,
            'accountId': None,
            'signinUri': 'https://' + account + '.signin.aws.amazon.com/',
            'exists': False,
            'error': None
        }
        # Check if the provided account name is a string of numbers (an ID) or not (an alias)
        if re.match(r'\d{12}',account):
            result['accountId'] = account
        else:
            result['accountAlias'] = account
        if not validators.url(result['signinUri']):
            result['error'] = 'Invalid URI'
            return result
        try:
            # Request the sign-in URL and don't allow the redirect
            request = requests.get(result['signinUri'],allow_redirects=False,timeout=self.requests_timeout)
            # If we have a redirect, not a 404, we have a valid account alias for AWS
            if request.status_code == 302:
                result['exists'] = True
        except requests.exceptions.RequestException as error:
            result['error'] = error
        return result
示例#24
0
def spider(url,lvl=1):
    tld_url = tldextract.extract(url)
    tld_url = '.'.join(tld_url[:3])
    pos = url.rfind('/')
    outFile = url[pos+1:]
    print (outFile)
    response = requests.get(url) #storing all the information including headers in the variable source code
    if response.status_code == 200:
        plain_text = response.text #sort source code and store only the plaintext
        convert_data = BeautifulSoup(plain_text) #converting plain_text to Beautiful Soup object so the library can sort thru it
        for link in convert_data.findAll('a'):  #sorting useful information
            if link.get('href').find('//') == 0: #address URLs that start with //
                href = 'https:' + link.get('href')
            elif validators.url(link.get('href')): #address absolute URLs
                href = link.get('href')
            else: #address relative URLs
                href = url + link.get('href') #Building a clickable url
            #insertSQL(href, convert_data)
            print(indent(lvl) +str(lvl) + '.  ' +href) #displaying the result back to the user
            #outData = codecs.open(saveLocation +'\\' +outFile +'.html', 'w', 'utf-8')
            #outData.write(plain_text)
            #outData.close()


            if lvl < max_depth:
                spider(href, lvl+1)
示例#25
0
文件: forms.py 项目: ajay2611/mmb
    def clean(self):
        cleaned_data = super(ProfileDataForm, self).clean()
        website = cleaned_data['website']
        username = cleaned_data['username']
        type = cleaned_data['type']
        instrument = cleaned_data['instrument']

        if website and not validators.url(website):
            self._errors['website'] = self.error_class(
                ["Please enter a valid website. For example 'http://makemyband.in'"])

        if type == u'Musician':
            if not instrument:
                self._errors['instrument'] = self.error_class(
                ["This field is required"])

        try:
            user = get_user_model().objects.get(username=username)
            if user and (username != self.user.username):
                self._errors['username'] = self.error_class(
                ["Username already exists"])
        except:
            pass

        return cleaned_data
示例#26
0
文件: main.py 项目: soundless/tinyurl
def add_url():
    if not session.get('logged_in'):
        abort(401)

    url = request.form['url']
    # validate URL
    if not url:
        flash("URL cannot be empty")
        return redirect(url_for('show_urls'))
    if not validators.url(url):
        flash("URL is invalid, valid one starts with 'http://' or 'https://'")
        return redirect(url_for('show_urls'))

    # insert record
    insert = 'INSERT INTO urls (url) VALUES (?)'
    cur = g.db.cursor()
    cur.execute(insert, [url])
    g.db.commit()

    # get the last record id and encoded
    last_id = cur.lastrowid
    short_url = ShortUrl.encode(last_id)

    # update the record again with short_url
    update = 'UPDATE %s SET %s="%s" WHERE id=%s' \
                % (TABLE, _COL2, short_url, last_id)
    cur.execute(update)
    g.db.commit()
    cur.close()

    flash("Tiny URL was successfully created: " + request.host_url + 'o/' + short_url)
    return redirect(url_for('show_urls'))
示例#27
0
def download_file(url, dest):
    if not validators.url(url):
        print("Not a valid image url: {}".format(url))
        return
    response = requests.get(url)
    with open(dest, 'wb') as dest:
        dest.write(response.content)
示例#28
0
    def normalize(self, creator_email, endpoint_url=None, zip_file=None):
        """
        Normalize the required data irrespective of the source
        :param creator_email:
        :param endpoint_url:
        :param zip_file:
        :return:
        """
        self.update_status('Normalizing source data')
        if not endpoint_url and not zip_file:
            raise Exception('endpoint_url or zip_file is required')
        if endpoint_url:
            self.api_link = endpoint_url
            os.makedirs(self.app_temp_assets)
            event_info = requests.get(endpoint_url + '/event').json()
            self.download_event_data()
        else:
            unzip(zip_file, self.app_temp_assets)
            with open(self.get_temp_asset_path('event')) as json_data:
                event_info = json.load(json_data)
                event_id = event_info['id']

            if os.path.isfile(self.get_temp_asset_path('meta')):
                with open(self.get_temp_asset_path('meta')) as json_data:
                    meta = json.load(json_data)
                    root_url = meta['root_url']
                    if root_url:
                        self.api_link = root_url + '/api/v1/events/' + str(event_id)

        self.event_name = event_info['name']
        self.app_name = self.event_name
        self.creator_email = creator_email
        self.update_status('Processing background image and logo')
        background_image = event_info['background_image'].strip() if event_info['background_image'] else ''
        logo = event_info['logo'].strip() if event_info['logo'] else ''
        if background_image != '':
            if background_image.startswith("/"):
                self.app_background_image = self.get_temp_asset_path(background_image)
            elif validators.url(background_image):
                self.app_background_image = self.get_temp_asset_path('background.png')
                urllib.urlretrieve(background_image, self.app_background_image)
        if logo != '':
            if logo.startswith("/"):
                self.app_launcher_icon = self.get_temp_asset_path(logo)
            elif validators.url(logo):
                self.app_launcher_icon = self.get_temp_asset_path('logo.png')
                urllib.urlretrieve(logo, self.app_launcher_icon)
示例#29
0
def tips_process():
    error = []
    event_name = request.forms.get("event_name")
    if event_name == "":
        error.append("error01")
    category = request.POST.getall("category")
    if len(category) == 0:
        error.append("error02")
    first_day = request.forms.get("first_day")
    try:
        datetime.datetime.strptime(first_day, '%Y-%m-%d')
    except:
        error.append("error03")
    last_day = request.forms.get("last_day")
    try:
        datetime.datetime.strptime(last_day, '%Y-%m-%d')
    except:
        error.append("error04")
    first_time = request.forms.get("first_time")
    try:
        datetime.datetime.strptime(first_time, '%H:%M')
    except:
        error.append("error05")
    last_time = request.forms.get("last_time")
    try:
        datetime.datetime.strptime(last_time, '%H:%M')
    except:
        error.append("error06")
    location = request.forms.get("location")
    if location == "":
        error.append("error07")
    adress = request.forms.get("adress")
    if adress == "":
        error.append("error08")
    organizer = request.forms.get("organizer")
    if organizer == "":
        error.append("error09")
    website = request.forms.get("website")
    if not validators.url(website):
        error.append("error10")
    image = request.files.get("image")
    description = request.forms.get("description")
    if description == "":
        error.append("error12")
    tipster = request.forms.get("tipster")
    if tipster == "":
        error.append("error13")
    tipster_mail = request.forms.get("tipster_mail")
    if not validators.email(tipster_mail):
        error.append("error14")
    
    if len(error) > 0:
        redirect("/tips")
        
    else:    
            query = ("INSERT INTO event (event_name, first_day, last_day, first_time, last_time, location, adress, organizer, website, image, description, tipster, tipster_mail) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)")
            cur.execute(query, (event_name, first_day, last_day,    first_time, last_time, location, adress, organizer, website, image, description, tipster, tipster_mail))
            db.commit()
            redirect("/tips")
示例#30
0
def sumrise(text = text, sentences = 5):
    if (validators.url(text)): text = web2text.getwebtxt(text)

    parser = PlaintextParser.from_string(text, Tokenizer('english'))
    summerizer = LsaSummarizer()

    summary = str(summerizer(parser.document, sentences))
    return summary
示例#31
0
 def validate(self, url):
     if not validators.url(url):
         raise RuntimeError('Invalid URL')
     if sys.getsizeof(url.encode('utf-8')) > 1024:
         raise RuntimeError('Too long input')
示例#32
0
import validators
import colorama
import queue
import datetime
from random import randint
from selenium import webdriver
from urllib.parse import urlparse, urljoin
from bs4 import BeautifulSoup

# Colorama setup
colorama.init()

# URL setup
start_url = input("Site to scan: ")

while not validators.url(start_url):
    print("Invalid URL")
    start_url = input("Site to scan: ")

parsed_uri = urlparse(start_url)
hostname = "{uri.scheme}://{uri.netloc}/".format(uri=parsed_uri)

target_url = input("Link to be found on the site: ")
target_url = target_url.rstrip("/")

while not validators.url(target_url):
    print("Invalid URL")
    target_url = input("Link to be found on the site: ")

# Timer setup
start_time = datetime.datetime.now()
示例#33
0
def validate_url(url):
    if validators.url(url):
        return True
    else:
        return False
示例#34
0
def is_url(strg):
    try:
        return validators.url(strg)
    except ValidationFailure:
        return False
示例#35
0
    def search(self, search_params, age=0, ep_obj=None):
        results = []
        if not self.login():
            return results

        freeleech = '&free=on' if self.freeleech else ''

        for mode in search_params:
            items = []
            logger.debug(_("Search Mode: {mode}".format(mode=mode)))
            for search_string in search_params[mode]:
                if mode != 'RSS':
                    logger.debug(
                        _("Search String: {search_string}".format(
                            search_string=search_string)))

                # URL with 50 tv-show results, or max 150 if adjusted in IPTorrents profile
                search_url = self.urls['search'] % (self.categories, freeleech,
                                                    search_string)
                search_url += ';o=seeders' if mode != 'RSS' else ''

                if self.custom_url:
                    if not validators.url(self.custom_url):
                        logger.warning("Invalid custom url: {0}".format(
                            self.custom_url))
                        return results
                    search_url = urljoin(self.custom_url,
                                         search_url.split(self.url)[1])

                data = self.get_url(search_url, returns='text')
                if not data:
                    continue

                try:
                    data = re.sub(r'(?im)<button.+?</button>', '', data, 0)
                    with BS4Parser(data, 'html5lib') as html:
                        if not html:
                            logger.debug("No data returned from provider")
                            continue

                        if html.find(text='No Torrents Found!'):
                            logger.debug(
                                "Data returned from provider does not contain any torrents"
                            )
                            continue

                        torrent_table = html.find('table', id='torrents')
                        torrents = torrent_table('tr') if torrent_table else []

                        # Continue only if one Release is found
                        if not torrents or len(torrents) < 2:
                            logger.debug(
                                "Data returned from provider does not contain any torrents"
                            )
                            continue

                        for result in torrents[1:]:
                            try:
                                title = result('td')[1].find('a').text
                                download_url = urljoin(
                                    search_url,
                                    result('td')[3].find('a')['href'])
                                seeders = int(
                                    result.find('td',
                                                class_='ac t_seeders').text)
                                leechers = int(
                                    result.find('td',
                                                class_='ac t_leechers').text)
                                torrent_size = result('td')[5].text
                                size = convert_size(torrent_size) or -1
                            except (AttributeError, TypeError, KeyError):
                                continue

                            if not all([title, download_url]):
                                continue

                            # Filter unseeded torrent
                            if seeders < self.minseed or leechers < self.minleech:
                                if mode != 'RSS':
                                    logger.debug(
                                        "Discarding torrent because it doesn't meet the minimum seeders or leechers: {0} (S:{1} L:{2})"
                                        .format(title, seeders, leechers))
                                continue

                            item = {
                                'title': title,
                                'link': download_url,
                                'size': size,
                                'seeders': seeders,
                                'leechers': leechers,
                                'hash': ''
                            }
                            if mode != 'RSS':
                                logger.debug(
                                    "Found result: {0} with {1} seeders and {2} leechers"
                                    .format(title, seeders, leechers))

                            items.append(item)

                except Exception as e:
                    logger.exception(
                        "Failed parsing provider. Error: {0!r}".format(str(e)))
                    logger.exception(traceback.format_exc())

            # For each search mode sort all the items by seeders if available
            items.sort(key=lambda d: try_int(d.get('seeders', 0)),
                       reverse=True)

            results += items

        return results
示例#36
0
 def __init__(self, url):
     self.url = url
     self.is_valid = validators.url(self.url) is True
     self.domain = self.url.split('/')[2] if self.is_valid else None
     self.logo_url = self.__LOGO_API + self.domain if self.domain else None
     self.contents = None
示例#37
0
 def test_has_image(self):
     d, q = openaccess_cma_search(has_image=1, indent=1, limit=100)
     for val in d['data']:
         self.assertTrue(val['images'] is not None)
         for k, v in val['images'].items():
             self.assertTrue(validators.url(v['url']))
示例#38
0
def upload():
    # each new "session" has a random case number associated with it
    # obviously, there is a small chance that case numbers will collide.
    # In that case, the person who used it second would overwrite the other persons data.
    # So this is not how it should be in its final version. But it's fine for now.
    case_num = request.args.get('case_num', None)

    fileDict = dao.getFileDict(case_num)

    fileDict['research_question'] = request.form.get('smartsearch')
    if fileDict['research_question'] is not None and fileDict[
            'research_question'].strip() != '':
        if validators.url(fileDict['research_question'].strip()):
            return redirect(
                url_for('visualize_blueprint.visualize', case_num=case_num)
            )  # temporary submission for SmartSearch for demo
        else:
            return redirect(
                url_for('smart_search_blueprint.sheetSelect',
                        case_num=case_num)
            )  # if its not a url take it to smartSearch input

    # here the use of fileDict is probably more clear
    # the strings used to index request.files come from the HTML name of the input field
    # see upload.html
    files = io_service.storeGSA(request.files.getlist('GSA_Input_map'))
    fileDict['GSA_Input_SHP'] = files[0]
    fileDict['GSA_Input_DBF'] = files[1]
    fileDict['GSA_file_list'] = request.files.getlist('GSA_Input_map')
    fileDict['NLP_Input_corpus'] = io_service.storeNLP(
        request.files.getlist('NLP_Input_corpus'))
    fileDict['NLP_Input_LDP'] = io_service.storefile(
        request.files.get('NLP_Input_LDP'))
    fileDict['NLP_Input_Sentiment'] = io_service.storefile(
        request.files.get('NLP_Input_Sentiment'))

    fileDict["NLP_INPUT_NER"] = request.form.get("NLP_INPUT_NER")
    fileDict["NLP_INPUT_IOB"] = request.form.get("NLP_INPUT_IOB")

    fileDict['SNA_Input'] = io_service.storefile(
        request.files.get('SNA_Input'))
    fileDict['GSA_Input'] = io_service.storefile(
        request.files.get('SGA_Input'))

    fileDict['research_question'] = request.form.get('research_question')

    errors = io_service.checkExtensions(
        case_num
    )  # helper method to make sure there are no input errors by the user
    # i.e. if there are errors, we can't proceed so we stay on the upload page
    if len(errors) > 0:
        return render_template('upload.html', errors=errors, case_num=case_num)

    # there are intermediary steps for SNA and NLP analyses
    if fileDict['SNA_Input']:
        return redirect(url_for('sna_blueprint.sheetSelect',
                                case_num=case_num))

    if fileDict['GSA_Input_SHP']:
        return redirect(
            url_for('gsa_blueprint.shp_vars_get', case_num=case_num))

    # if a user does both SNA and NLP, as it stands, the NLP intermediary data will never be gotten to. This is a problem.
    if fileDict['NLP_Input_corpus']:
        return redirect(
            url_for('visualize_blueprint.visualize', case_num=case_num))

    # if NLP chosen, allow them to pick from the different tools available
    # do i redirect to another url to choose then save the results then redirect to visualize?
    # no, just add the radio buttons under the file upload before the hr (in the template)
    return redirect(url_for('visualize_blueprint.visualize',
                            case_num=case_num))
示例#39
0
    def load_schema(self, template, reload=False):
        """
        Load schema from a local file or a remote URL.
        If the same schema was previously loaded
        and reload=False it will return the schema
        stored in cache. If reload=True it will force
        the reload of the schema.

        :param template: Name of local file or URL to remote schema
        :param reload: Force the reload, even if it was previously loaded
        :return: The loaded schema as a dictionary
        """
        # Check if template is already loaded and present in _schemas_library
        if template in self._schemas_library and not reload:
            log.debug("Loading previously stored schema for {}"
                      .format(template))

            return self._schemas_library[template]

        # Load Online Schema
        schema_addr = self._schemas[template]['remote']
        if validators.url(schema_addr):
            try:
                log.debug("Loading schema '{}' from remote location '{}'"
                          .format(template, schema_addr))

                # Load schema from remote source
                self._schemas_library[template] = \
                    load_remote_schema(schema_addr)

                # Update the corresponding local schema file
                write_local_schema(self._schemas_local_master,
                                   self._schemas[template]['local'],
                                   self._schemas_library[template])

                return self._schemas_library[template]

            except RequestException as e:
                log.warning("Could not load schema '{}' from remote "
                            "location '{}', error: {}"
                            .format(template, schema_addr, e))
        else:
            log.warning("Invalid schema URL '{}'".format(schema_addr))

        # Load Offline Schema
        schema_addr = self._schemas[template]['local']
        if os.path.isfile(schema_addr):
            try:
                log.debug("Loading schema '{}' from local file '{}'"
                          .format(template, schema_addr))

                self._schemas_library[template] = \
                    load_local_schema(schema_addr)

                return self._schemas_library[template]

            except FileNotFoundError:
                log.warning("Could not load schema '{}' from local file '{}'"
                            .format(template, schema_addr))

        else:
            log.warning("Schema file '{}' not found.".format(schema_addr))

        log.error("Failed to load schema '{}'".format(template))
示例#40
0
 def url(self):
     if not validators.url(self._url):
         raise Exception("Invalid url format.")
     return self._url
示例#41
0
 def validUrl(url):
     return validators.url(url)
示例#42
0
def isValidWebsite(web):
    if not validators.url('http://' + web):
        return False
    return True
def Isurl(check_url):
    try:
        return validators.url(check_url)
    except Exception:
        return False
示例#44
0
def filter_function(url):
    if url is not None and validators.url(url):
        return True
    return False
    def _parse_redirect_map(self, index_soup):
        """
        Given the HTML soup of an index topic
        extract the redirect mappings from the "Redirects" section.

        The URLs section should contain a table of
        "Path" to "Location" mappings
        (extra markup around this table doesn't matter)
        e.g.:

        <h1>Redirects</h1>
        <details>
            <summary>Mapping table</summary>
            <table>
            <tr><th>Path</th><th>Location</th></tr>
            <tr>
                <td>/my-funky-path</td>
                <td>/cool-page</td>
            </tr>
            <tr>
                <td>/some/other/path</td>
                <td>https://example.com/cooler-place</td>
            </tr>
            </table>
        </details>

        This will typically be generated in Discourse from Markdown similar to
        the following:

        # Redirects

        [details=Mapping table]
        | Path | Path |
        | -- | -- |
        | /my-funky-path | /cool-page |
        | /some/other/path | https://example.com/cooler-place |
        """

        redirect_soup = self._get_section(index_soup, "Redirects")
        redirect_map = {}
        warnings = []

        if redirect_soup:
            for row in redirect_soup.select("tr:has(td)"):
                path_cell = row.select_one("td:first-child")
                location_cell = row.select_one("td:last-child")

                if not path_cell or not location_cell:
                    warnings.append(
                        f"Could not parse redirect map {path_cell}"
                    )
                    continue

                path = path_cell.text
                location = location_cell.text

                if not path.startswith(self.url_prefix):
                    warnings.append(f"Could not parse redirect map for {path}")
                    continue

                if not (
                    location.startswith(self.url_prefix)
                    or validators.url(location, public=True)
                ):
                    warnings.append(
                        f"Redirect map location {location} is invalid"
                    )
                    continue

                if path in self.url_map:
                    warnings.append(
                        f"Redirect path {path} clashes with URL map"
                    )
                    continue

                redirect_map[path] = location

        return redirect_map, warnings
示例#46
0
def audit_website(website_types,website):
    if not website.startswith('http'):
        website = 'http://' + website
    if not validators.url(website):
        website_types.append(website)
示例#47
0
def clean_url(url):
    # TODO:
    if not validators.url(url):
        raise ValueError
    return url
示例#48
0
import validators


class URL_Splitter:
    def __init__(self, url):
        self.url = url
        self.sep = self.url.split('/', 3)

    def split(self):
        self.protocol = self.protocol()
        self.domain = self.domain()
        self.path = self.path()
        return "\nProtocol: {} \nDomain: {} \nPath: {}\n".format(
            self.protocol, self.domain, self.path)

    def protocol(self):
        return self.sep[0][:-1]

    def domain(self):
        return self.sep[2]

    def path(self):
        return self.sep[3]


if __name__ == "__main__":
    ask_url = input("Please input a url to be split: ")
    while validators.url(ask_url) != True:
        ask_url = input("INVALID url, please input another url: ")
    url = URL_Splitter(ask_url)
    print(url.split())
示例#49
0
def summarize_text():
    if request.method == "POST":
        print(request.get_json())
        mode = request.args.get("mode")
        n_keywords = request.args.get("n_keywords")
        raw_text = None
        url = None
        raw_text = None

        if not mode in ["url", "raw_text"]:
            return make_response(
                jsonify({"error": "Mode must be one of ['url', 'raw_text']"}),
                400)

        if request.get_json():
            if mode == "url" and "url" not in request.get_json():
                return make_response(jsonify({"error": "url is required."}),
                                     400)
            elif mode == "url" and "url" in request.get_json():
                url = request.get_json()["url"]

            if mode == "raw_text" and "raw_text" not in request.get_json():
                return make_response(
                    jsonify({"error": "raw_text is required."}), 400)
            elif mode == "raw_text" and "raw_text" in request.get_json():
                raw_text = request.get_json()["raw_text"]

            if mode == "url" and not validators.url(url):
                return make_response(jsonify({"error": "url is invalid."}),
                                     400)
        else:
            return make_response(
                jsonify({"error": "url or raw_text is required."}), 400)

        helpers = AbstrakktHelpers(url=url, raw_text=raw_text)
        if mode == "url":
            try:
                raw_text = helpers.fetch_from_url()
            except Exception as e:
                return make_response(jsonify({"error": e}), 400)

        response = {}
        try:
            summarized_text = helpers.gensim_summarize(raw_text)
            response["summarized_text"] = summarized_text
            if (len(summarized_text) == 0):
                return make_response(
                    jsonify({
                        "error":
                        "An unknown error occurred, could not summarize."
                    }), 500)

            # Compute reading time in minutes (to 2 d.p)
            response["original_reading_time"] = round(
                len(raw_text) / app.config["WORDS_PER_MINUTE"], 2)
            response["reading_time"] = round(
                len(summarized_text) / app.config["WORDS_PER_MINUTE"], 2)
            n_keywords = n_keywords or 7
            response["keywords"] = helpers.extract_keywords(raw_text,
                                                            n=int(n_keywords))

            return make_response(
                jsonify({
                    "message": "Summarization successful !",
                    **response
                }), 200)
        except Exception as e:
            return make_response(jsonify({"error": str(e)}), 500)
def validate_pdf_file(data: dict, key: str):
    validate_text(data.get('description'), True, key, 'pdf file')
    if not data.get('url') and not url(data['url']):
        raise MissingCredentialsError(
            f"Incorrect pdf url in {key}'s pdf file section")
示例#51
0
    def search(self, search_strings, age=0, ep_obj=None):  # pylint: disable=too-many-branches, too-many-locals, too-many-statements
        results = []

        anime = (self.show and self.show.anime) or (ep_obj and ep_obj.show and ep_obj.show.anime) or False
        search_params = {
            "q": "",
            "field": "seeders",
            "sorder": "desc",
            "rss": 1,
            "category": ("tv", "anime")[anime]
        }

        for mode in search_strings:
            items = []
            logger.log("Search Mode: {0}".format(mode), logger.DEBUG)
            for search_string in search_strings[mode]:

                search_params["q"] = search_string if mode != "RSS" else ""
                search_params["field"] = "seeders" if mode != "RSS" else "time_add"

                if mode != "RSS":
                    logger.log("Search string: {0}".format
                               (search_string.decode("utf-8")), logger.DEBUG)

                search_url = self.urls["search"] % ("usearch" if mode != "RSS" else search_string)
                if self.custom_url:
                    if not validators.url(self.custom_url):
                        logger.log("Invalid custom url: {0}".format(self.custom_url), logger.WARNING)
                        return results
                    search_url = urljoin(self.custom_url, search_url.split(self.url)[1])

                data = self.get_url(search_url, params=search_params, returns="text")
                if not data:
                    logger.log("URL did not return results/data, if the results are on the site maybe try a custom url, or a different one", logger.DEBUG)
                    continue

                if not data.startswith("<?xml"):
                    logger.log("Expected xml but got something else, is your mirror failing?", logger.INFO)
                    continue

                with BS4Parser(data, "html5lib") as html:
                    for item in html("item"):
                        try:
                            title = item.title.get_text(strip=True)
                            # Use the torcache link kat provides,
                            # unless it is not torcache or we are not using blackhole
                            # because we want to use magnets if connecting direct to client
                            # so that proxies work.
                            download_url = item.enclosure["url"]
                            if sickbeard.TORRENT_METHOD != "blackhole" or "torcache" not in download_url:
                                download_url = item.find("torrent:magneturi").next.replace("CDATA", "").strip("[!]") + self._custom_trackers

                            if not (title and download_url):
                                continue

                            seeders = try_int(item.find("torrent:seeds").get_text(strip=True))
                            leechers = try_int(item.find("torrent:peers").get_text(strip=True))

                            # Filter unseeded torrent
                            if seeders < self.minseed or leechers < self.minleech:
                                if mode != "RSS":
                                    logger.log("Discarding torrent because it doesn't meet the minimum seeders or leechers: {0} (S:{1} L:{2})".format
                                               (title, seeders, leechers), logger.DEBUG)
                                continue

                            verified = bool(try_int(item.find("torrent:verified").get_text(strip=True)))
                            if self.confirmed and not verified:
                                if mode != "RSS":
                                    logger.log("Found result " + title + " but that doesn't seem like a verified result so I'm ignoring it", logger.DEBUG)
                                continue

                            torrent_size = item.find("torrent:contentlength").get_text(strip=True)
                            size = convert_size(torrent_size) or -1
                            info_hash = item.find("torrent:infohash").get_text(strip=True)

                            item = {'title': title, 'link': download_url, 'size': size, 'seeders': seeders, 'leechers': leechers, 'hash': info_hash}
                            if mode != "RSS":
                                logger.log("Found result: {0} with {1} seeders and {2} leechers".format(title, seeders, leechers), logger.DEBUG)

                            items.append(item)

                        except (AttributeError, TypeError, KeyError, ValueError):
                            continue

            # For each search mode sort all the items by seeders if available
            items.sort(key=lambda d: try_int(d.get('seeders', 0)), reverse=True)

            results += items

        return results
示例#52
0
def view_page():
    global usrarg
    #testing purpose test svn test svn
    usrarg = flask.request.args.get("url")
    #print(usrarg)
    urlvalidity = validators.url(usrarg)
    if (urlvalidity != True):
        return "Wrong url, please go back"

    social_list = [
        "www.facebook.com", "www.qzone.qq.com", "www.tumblr.com",
        "www.instagram.com", "www.twitter.com", "www.skype.com", "www.vk.com",
        "www.linkedin.com", "www.reddit.com"
    ]
    urlobj = urlparse(usrarg)
    #print(urlobj.netloc)
    if (urlobj.netloc in social_list):
        #print("URL should not be social network.")
        return "URL should not be social network, please go back"
    if (usrarg[-1] != "/"):
        usrarg = usrarg + "/"
    req = urllib.request.Request(usrarg)
    req.add_header('Referer', 'http://www.python.org/')
    # Customize the default User-Agent header value:
    req.add_header(
        'User-Agent',
        'PurdueUniversityClassProject/1.0 ([email protected] https://goo.gl/dk8u5s)'
    )
    open = get_html_at_url(usrarg)
    #print(usrarg)
    html = "<base href=" + usrarg + ">" + open
    #print(html)
    etree1 = make_etree(html, usrarg)
    print("------------------------------------------------------------")

    try:
        style = flask.request.args["style"]
    except:
        style = ""
        pass
    try:
        color = flask.request.args["color"]
    except:
        color = ""
        pass

    try:
        mustache = flask.request.args["beard"]
    except:
        mustache = ""
        pass
    #print(color)
    '''
    if(checked == True):
        style = "Square"
    else if()

    '''

    #print(html)
    #print(etree1)
    #print("UTIL copy is fine")
    path = copy_profile_photo_static(etree1, style, color, mustache)
    #print("path:"+path)
    #filename = path[len(os.getcwd()):]

    filename = os.path.basename(path)
    #print("filename+"+filename)
    #print(os.getcwd())
    #print("oldpath:"+wpo.oldpath)
    #print("dict:")
    #print(wpo.url_to_sha)
    static_url = flask.url_for('static', filename=filename)
    #print("static_url:"+static_url)
    #print(type(html))
    src = wpo.url_to_sha[wpo.oldpath]

    before = html[:(html.find(src))]
    after = html[(html.find(src)) + len(src):]
    tempname = static_url[1:].split("/")[1]
    #print(tempname)
    temp = flask.url_for('static', filename=filename, _external=True)
    #print(temp)
    html = before + temp + after
    #print(html)

    return html
示例#53
0
    def login(self):
        cookie_dict = dict_from_cookiejar(self.session.cookies)
        if cookie_dict.get('uid') and cookie_dict.get('pass'):
            return True

        if self.cookies:
            success, status = self.add_cookies_from_ui()
            if not success:
                logger.info(status)
                return False

        login_params = {
            'username': self.username,
            'password': self.password,
            'login': '******'
        }

        if self.custom_url:
            if not validators.url(self.custom_url):
                logger.warning("Invalid custom url: {0}".format(
                    self.custom_url))
                return False

        # Get the index, redirects to login
        data = self.get_url(self.custom_url or self.url, returns='text')
        if not data:
            logger.warning("Unable to connect to provider")
            return False

        with BS4Parser(data, 'html5lib') as html:
            action = html.find('form', {
                'action': re.compile(r'.*login.*')
            }).get('action')
            if not action:
                logger.warning(
                    'Could not find the login form. Try adding cookies instead'
                )
                return False

        response = self.get_url(urljoin(self.custom_url or self.url, action),
                                post_data=login_params,
                                returns='text')
        if not response:
            logger.warning("Unable to connect to provider")
            return False

        # Invalid username and password combination
        if re.search('Invalid username and password combination', response):
            logger.warning("Invalid username or password. Check your settings")
            return False

        # You tried too often, please try again after 2 hours!
        if re.search('You tried too often', response):
            logger.warning(
                "You tried too often, please try again after 2 hours! Disable IPTorrents for at least 2 hours"
            )
            return False

        # Captcha!
        if re.search('Captcha verification failed.', response):
            logger.warning("Stupid captcha")
            return False

        return True
示例#54
0
            imgs.attrib["src"] = urlparse.urljoin(base_img, imgs.attrib["src"])
        except:
            continue
    for links in pagina.xpath('//a'):
        try:
            if links.attrib["href"].startswith('#'):
                continue
            links.attrib["href"] = urlparse.urljoin(base_section,
                                                    links.attrib["href"])
        except:
            continue
    archivo = open('out.html', 'w')
    archivo.write(html.tostring(pagina))
    archivo.close()


if __name__ == '__main__':
    argumentos = sys.argv
    if validators.url(sys.argv[1]):
        sep(sys.argv[1])
    else:
        if validators.domain(sys.argv[1]):
            sep('http://' + sys.argv[1])
    #busqueda=''
    #for x in argumentos[1:]:
    #busqueda=str(x)+'+'
    #if busqueda=='':
    #listax()
    #else:
    #listax(busqueda)
示例#55
0
 def is_valid_link(self, link: str) -> bool:
     return validators.url(link)
def validate_bg_image(img: Union[str, None], required: bool, key: str,
                      section: str):
    if (required and not img) or (required and not url(img)):
        raise MissingCredentialsError(
            f"Wrong background image in {key}'s {section} section")
示例#57
0
def main():
    #prepare user's workarea
    home = os.path.expanduser("~")
    if os.path.exists(home + "/Library/Application Support"):  #MacOS
        homegenice = home + "/Library/Application Support/GenIce"
    else:
        homegenice = os.path.expanduser(home + "/.genice")  #Other unix
    sys.path.append(homegenice)
    try:
        os.makedirs(homegenice + "/lattices")


#        os.makedirs(homegenice+"/molecules")
    except:
        pass  #just ignore.
    options = getoptions()
    if options.debug:
        logging.basicConfig(level=logging.DEBUG,
                            format="%(asctime)s %(levelname)s %(message)s")
    elif options.quiet:
        logging.basicConfig(level=logging.WARN,
                            format="%(asctime)s %(levelname)s %(message)s")
    else:
        #normal
        logging.basicConfig(level=logging.INFO,
                            format="%(asctime)s %(levelname)s %(message)s")
    logger = logging.getLogger()
    Nbox = [int(x) for x in options.rep]
    name = options.name[0]
    #input must be a file......too bad.
    if os.path.exists(name):
        fNameIn = name
        fNameOut = homegenice + "/lattices/" + os.path.basename(name)
        if fNameOut[-4:] in (".cif", ".CIF"):
            fNameOut = fNameOut[:-4]
        fNameOut += ".py"
    else:
        if validators.url(name):
            URL = name
            name = os.path.basename(name)
            if name[-4:] in (".cif", ".CIF"):
                name = name[:-4]
        else:
            URL = "http://www.iza-structure.org/IZA-SC/cif/" + name + ".cif"
        fNameIn = homegenice + "/lattices/" + name + ".cif"
        fNameOut = homegenice + "/lattices/" + name + ".py"
        assert not os.path.exists(
            fNameIn
        ) or options.force, "File exists: {0}. Use '--force' option to overwrite.".format(
            fNameIn)
        assert validators.url(URL)
        download(URL, fNameIn)

    logger.info("Input: {0}".format(fNameIn))
    logger.info("Output: {0}".format(fNameOut))
    if os.path.exists(fNameOut) and not options.force:
        logger.error(
            "File exists: {0}. Use '--force' option to overwrite.".format(
                fNameOut))
        sys.exit(1)
    atoms, box = read_cif.read_and_process(fNameIn, Nbox, make_rect_box=False)
    fOut = open(fNameOut, "w")
    write_py(atoms, box, fOut, matchfunc=lambda x: x[0] != "O")
示例#58
0
文件: main.py 项目: lsqshr/Tylor
    parser.set_defaults(view=False)
    args = parser.parse_args()

    # Define Visual Network
    visual_sensor = VisualNetwork()
    visual_sensor = visual_sensor.cuda()

    # Define Temporal Network
    lstm = Recog(2048, 512, 128)
    lstm = lstm.cuda()

    # Define the memory graph
    mem = GraphMemory(128)

    # Open Video
    if validators.url(args.file):
        from pytube import YouTube
        print('Downloading...')
        yt = YouTube(args.file).streams.first()
        f = yt.default_filename
        if not os.path.exists(f):
            yt.download()
        print('f:', f)
    else:
        f = args.file

    cap = VideoCapture(f)
    cap.open()

    f1, ax1 = plt.subplots(1, 3)
    # f2, ax2 = plt.subplots(2, 1)
示例#59
0
    def search(self, search_strings, age=0, ep_obj=None):  # pylint: disable=too-many-arguments, too-many-locals, too-many-branches, too-many-statements
        """
        Searches indexer using the params in search_strings, either for latest releases, or a string/id search
        Returns: list of results in dict form
        """
        results = []
        if not self._check_auth():
            return results

        if 'gingadaddy' not in self.url:  # gingadaddy has no caps.
            if not self.caps:
                self.get_newznab_categories(just_caps=True)

            if not self.caps:
                return results

        for mode in search_strings:
            search_params = {
                't': ('search', 'tvsearch')[bool(self.use_tv_search)],
                'limit': 100,
                'offset': 0,
                'cat': self.catIDs.strip(', ') or '5030,5040',
                'maxage': sickbeard.USENET_RETENTION
            }

            if self.needs_auth and self.key:
                search_params['apikey'] = self.key

            if mode != 'RSS':
                if self.use_tv_search:
                    if 'tvdbid' in str(self.cap_tv_search):
                        search_params['tvdbid'] = ep_obj.show.indexerid

                    if ep_obj.show.air_by_date or ep_obj.show.sports:
                        date_str = str(ep_obj.airdate)
                        search_params['season'] = date_str.partition('-')[0]
                        search_params['ep'] = date_str.partition(
                            '-')[2].replace('-', '/')
                    elif ep_obj.show.is_anime:
                        search_params['ep'] = ep_obj.absolute_number
                    else:
                        search_params['season'] = ep_obj.scene_season
                        search_params['ep'] = ep_obj.scene_episode

                if mode == 'Season':
                    search_params.pop('ep', '')

            if self.torznab:
                search_params.pop('ep', '')
                search_params.pop('season', '')

            items = []
            logger.log('Search Mode: {0}'.format(mode), logger.DEBUG)
            for search_string in search_strings[mode]:
                if mode != 'RSS':
                    logger.log(
                        'Search string: {0}'.format(
                            search_string.decode('utf-8')), logger.DEBUG)

                    if 'tvdbid' not in search_params:
                        search_params['q'] = search_string

                time.sleep(cpu_presets[sickbeard.CPU_PRESET])
                data = self.get_url(urljoin(self.url, 'api'),
                                    params=search_params,
                                    returns='text')
                if not data:
                    break

                with BS4Parser(data, 'html5lib') as html:
                    if not self._check_auth_from_data(html):
                        break

                    # try:
                    #     self.torznab = 'xmlns:torznab' in html.rss.attrs
                    # except AttributeError:
                    #     self.torznab = False

                    for item in html('item'):
                        try:
                            title = item.title.get_text(strip=True)
                            download_url = None
                            if item.link:
                                if validators.url(
                                        item.link.get_text(strip=True)):
                                    download_url = item.link.get_text(
                                        strip=True)
                                elif validators.url(item.link.next.strip()):
                                    download_url = item.link.next.strip()

                            if (not download_url, item.enclosure
                                    and validators.url(
                                        item.enclosure.get('url',
                                                           '').strip())):
                                download_url = item.enclosure.get('url',
                                                                  '').strip()

                            if not (title and download_url):
                                continue

                            seeders = leechers = None
                            if 'gingadaddy' in self.url:
                                size_regex = re.search(r'\d*.?\d* [KMGT]B',
                                                       str(item.description))
                                item_size = size_regex.group(
                                ) if size_regex else -1
                            else:
                                item_size = item.size.get_text(
                                    strip=True) if item.size else -1
                                for attr in item.find_all(
                                    ['newznab:attr', 'torznab:attr']):
                                    item_size = attr['value'] if attr[
                                        'name'] == 'size' else item_size
                                    seeders = try_int(
                                        attr['value']
                                    ) if attr['name'] == 'seeders' else seeders
                                    leechers = try_int(
                                        attr['value']
                                    ) if attr['name'] == 'peers' else leechers

                            if not item_size or (self.torznab and
                                                 (seeders is None
                                                  or leechers is None)):
                                continue

                            size = convert_size(item_size) or -1

                            result = {
                                'title': title,
                                'link': download_url,
                                'size': size,
                                'seeders': seeders,
                                'leechers': leechers
                            }
                            items.append(result)
                        except StandardError:
                            continue

                # Since we aren't using the search string,
                # break out of the search string loop
                if 'tvdbid' in search_params:
                    break

            if self.torznab:
                results.sort(key=lambda d: try_int(d.get('seeders', 0)),
                             reverse=True)
            results += items

        return results
示例#60
0
    def search(self, search_strings, age=0, ep_obj=None):  # pylint: disable=too-many-locals, too-many-branches, too-many-statements
        results = []
        """
        205 = SD, 208 = HD, 200 = All Videos
        https://pirateproxy.pl/s/?q=Game of Thrones&type=search&orderby=7&page=0&category=200
        """
        # oder_by is 7 in browse for seeders, but 8 in search!
        search_params = {
            "q": "",
            "type": "search",
            "orderby": 8,
            "page": 0,
            "category": 200
        }

        # Units
        units = ["B", "KIB", "MIB", "GIB"]

        def process_column_header(th):
            text = ""
            if th.a:
                text = th.a.get_text(strip=True)
            if not text:
                text = th.get_text(strip=True)
            return text

        for mode in search_strings:
            items = []
            logger.log("Search Mode: {0}".format(mode), logger.DEBUG)

            for search_string in search_strings[mode]:
                search_urls = (self.urls["search"],
                               self.urls["rss"])[mode == "RSS"]
                if not isinstance(search_urls, list):
                    search_urls = [search_urls]

                for search_url in search_urls:
                    if self.custom_url:
                        if not validators.url(self.custom_url):
                            logger.log(
                                "Invalid custom url: {0}".format(
                                    self.custom_url), logger.WARNING)
                            return results
                        search_url = urljoin(self.custom_url,
                                             search_url.split(self.url)[1])

                    if mode != "RSS":
                        search_params["q"] = search_string
                        logger.log(
                            "Search string: {}".format(
                                search_string.decode("utf-8")), logger.DEBUG)

                        # Prevents a 302 redirect, since there is always a 301 from .se to the best mirror having an extra
                        # redirect is excessive on the provider and spams the debug log unnecessarily
                        search_url, params = self.convert_url(
                            search_url, search_params)
                        data = self.get_url(search_url,
                                            params=params,
                                            returns="text")
                    else:
                        data = self.get_url(search_url, returns="text")

                    if not data:
                        logger.log(
                            "URL did not return data, maybe try a custom url, or a different one",
                            logger.DEBUG)
                        continue

                    with BS4Parser(data, "html5lib") as html:
                        torrent_table = html.find("table", id="searchResult")
                        torrent_rows = torrent_table(
                            "tr") if torrent_table else []

                        # Continue only if at least one Release is found
                        if len(torrent_rows) < 2:
                            logger.log(
                                "Data returned from provider does not contain any torrents",
                                logger.DEBUG)
                            continue

                        labels = [
                            process_column_header(label)
                            for label in torrent_rows[0]("th")
                        ]

                        # Skip column headers
                        for result in torrent_rows[1:]:
                            try:
                                cells = result("td")

                                # Funky js on page messing up titles, this fixes that
                                title = result.find(
                                    class_="detLink")['title'].split(
                                        'Details for ', 1)[-1]
                                download_url = result.find(
                                    title="Download this torrent using magnet"
                                )["href"] + self._custom_trackers
                                if not self.magnet_regex.match(download_url):
                                    logger.log(
                                        "Got an invalid magnet: {0}".format(
                                            download_url))
                                    logger.log(
                                        "Invalid ThePirateBay proxy please try another one",
                                        logger.DEBUG)
                                    continue

                                if not all([title, download_url]):
                                    continue

                                seeders = try_int(
                                    cells[labels.index("SE")].get_text(
                                        strip=True))
                                leechers = try_int(
                                    cells[labels.index("LE")].get_text(
                                        strip=True))

                                # Filter unseeded torrent
                                if seeders < self.minseed or leechers < self.minleech:
                                    if mode != "RSS":
                                        logger.log(
                                            "Discarding torrent because it doesn't meet the minimum seeders or leechers: {0} (S:{1} L:{2})"
                                            .format(title, seeders,
                                                    leechers), logger.DEBUG)
                                    continue

                                # Accept Torrent only from Good People for every Episode Search
                                if self.confirmed and not result.find(
                                        alt=re.compile(r"VIP|Trusted")):
                                    if mode != "RSS":
                                        logger.log(
                                            "Found result: {0} but that doesn't seem like a trusted result so I'm ignoring it"
                                            .format(title), logger.DEBUG)
                                    continue

                                # Convert size after all possible skip scenarios
                                torrent_size = re.sub(
                                    r".*Size ([\d.]+).+([KMGT]iB).*", r"\1 \2",
                                    result.find(class_="detDesc").get_text(
                                        strip=True))
                                size = convert_size(torrent_size,
                                                    units=units) or -1

                                item = {
                                    'title': title,
                                    'link': download_url,
                                    'size': size,
                                    'seeders': seeders,
                                    'leechers': leechers,
                                    'hash': ''
                                }
                                if mode != "RSS":
                                    logger.log(
                                        "Found result: {0} with {1} seeders and {2} leechers"
                                        .format(title, seeders,
                                                leechers), logger.DEBUG)

                                items.append(item)
                            except StandardError:
                                continue

            # For each search mode sort all the items by seeders if available
            items.sort(key=lambda d: try_int(d.get('seeders', 0)),
                       reverse=True)
            results += items

        return results