示例#1
0
def main():
    # TODO: grab multiple pages from $START_AT and crawl on multiple threads
    if not (("CRAWL_FOR" in os.environ.keys()) and ("START_AT" in os.environ.keys())):
        print("Error: CRAWL_FOR or START_AT environment variables are missing")
    else:
        print("web crawler started!")
        crawl_time: int = int(os.environ["CRAWL_FOR"])
        start_page: str = os.environ["START_AT"]
        print("starting at {} and crawling for {} seconds".format(start_page, crawl_time))
        print()
        q: Queue = Queue()
        q.put(start_page)
        start_time: int = int(time.time())
        end_time: int = start_time + crawl_time
        done: bool = int(time.time()) >= end_time
        while (not q.empty()) and (not done):
            current_page_url: str = q.get()
            if utils.is_url(current_page_url):
                page_info: tuple = utils.find_page_info(current_page_url)
                if not (page_info is None):
                    for link in page_info[1]:
                        q.put(link)
                    if (not (page_info[0] is None)) and (utils.is_url(current_page_url)):
                        if not utils.add_page_to_index(page_info[0], current_page_url):
                            print("Error: unable to add page {} to index".format(current_page_url))
                        else:
                            print("added {} to index ({})".format(page_info[0], current_page_url))
            time.sleep(3)
            done = int(time.time()) >= end_time
示例#2
0
def main(args):
	print 'Args:', args
	url = urlparse.urlsplit(args.url)
	

	cache_path = os.path.join(os.getcwd(), url.netloc)
	if not os.path.exists(cache_path):
		os.mkdir(cache_path)

	index_path = os.path.join(cache_path, 'index.json')
	if os.path.exists(index_path):
		with open(index_path, 'rb') as imp:
			index = json.loads(imp.read())
	else:
		is_url(url)
		index = {urlparse.urlunsplit(url): None}

	try:
		if args.update:
			existing = [k for k, v in index.items() if v]
			if existing:
				print '\nUpdating existing cache (%s links)...\n' % len(existing)

			for key in existing:
				if index[key]: 
					index[key], links = cache_update(index[key], cache_path)
					for link in [x for x in links if not x in index.keys() and urlparse.urlunsplit(url) in x]:
						index[link] = None


		while True:
			missing = {k:v for k, v in index.items() if not v}
			if missing:
				print '\nRetrieving new cache (%s new links)...\n' % len(missing.keys())
			else: break

			for key in index.keys():
				if not index[key]:
					index[key], links = cache(key, cache_path)
					for link in [x for x in links if not x in index.keys() and urlparse.urlunsplit(url) in x]:
						index[link] = None


	except KeyboardInterrupt:
		print 'Aborted...'


	print 'Saving index file...'
	index_json = json.dumps(index, sort_keys=True, indent=4, separators=(',', ': '))
	with open(index_path, 'wb') as exp:
		exp.write(index_json)
示例#3
0
def build_iso():
    usage = "usage: %prog [options]"
    parser = OptionParser(usage)
    parser.add_option("--input-iso",
                      help="Path or URL to ISO file. Default is 'input.iso'.",
                      action="store", type="string", dest="input_iso",
                      default='input.iso')
    parser.add_option("--output-iso",
                      help="Path to ISO to generate. Default is 'output.iso'.",
                      action="store", type="string", dest="output_iso",
                      default='output.iso')
    parser.add_option("--preseed", help="Path or URL to preseed file. " \
                      "Default is 'preseed.cfg'.", action="store",
                      type="string", dest="preseed_file",
                      default='preseed.cfg')
    parser.add_option("--hide-boot-loader", help="Hide boot loader (default).",
                      action='store_true', dest="is_boot_loader_hidden",
                      default=True)
    parser.add_option("--show-boot-loader", help="Show boot loader.",
                      action='store_false', dest="is_boot_loader_hidden")
    (options, args) = parser.parse_args()

    tmp_input_iso = None
    tmp_preseed_file = None
    try:
        # Download ISO file if necessary.
        if is_url(options.input_iso):
            file_handle, tmp_input_iso = tempfile.mkstemp()
            download_iso_file(options.input_iso, tmp_input_iso)
            options.input_iso = tmp_input_iso
        # Download preseed file if necessary.
        if is_url(options.preseed_file):
            file_handle, tmp_preseed_file = tempfile.mkstemp()
            download_file(options.preseed_file, tmp_preseed_file)
            options.preseed_file = tmp_preseed_file
        # Check that input files exist.
        if not os.path.exists(options.preseed_file):
            parser.error('No such preseed file %s' % options.preseed_file)
        if not os.path.exists(options.input_iso):
            parser.error('No such input ISO %s' % options.input_iso)
        # Build ISO!
        insert_preseed_into_iso(options.preseed_file, options.input_iso,
                                options.output_iso,
                                options.is_boot_loader_hidden)
        if os.path.exists(options.output_iso):
            print "SUCCESS: %s file has been generated." % options.output_iso
    finally:
        if tmp_input_iso:
            os.unlink(tmp_input_iso)
        if tmp_preseed_file:
            os.unlink(tmp_preseed_file)
示例#4
0
    def __init__(
            self, chapter, number, page_link, img_link=None, img_file=None):
        self._chapter = chapter
        self._number = int(number)

        if utils.is_url(page_link):
            self._page_link = page_link
        else:
            raise ValueError(f'{page_link} is not a valid url')

        if img_link is not None and utils.is_url(img_link):
            self._img_link = img_link

        if img_file is not None:
            self._img_file = img_file
示例#5
0
文件: models.py 项目: 7kfpun/pingcron
 def validate_url(self, value):
     if not is_url(value):
         raise Exception('Not an url')
     #pingurl = PingUrl.query()
     #if pingurl.filter(PingUrl.url == value).count():
         #raise Exception('Url exists!')
     return value
示例#6
0
 def get_guild_information(self, guild_url_o_name):
     url = ''
     url_or_name = guild_url_o_name
     is_by_url = utils.is_url(url_or_name)
     if not is_by_url:
         url = self.base_url_guild + url_or_name.replace(' ', '+')
     return get_page_content(url, read_guild_information)
示例#7
0
 def _is_hidden(self, path):
     sep = '/' if utils.is_url(self.path) else os.sep
     relpath = path[len(self.path):] if path.startswith(self.path) else path
     for part in relpath.split(sep):
         if part.startswith('.') or part.startswith('_UNPACK'):
             return True
     return False
示例#8
0
def parse_item(item_tree):
    item_dict = {}

    title_subtree = item_tree.find('title')
    if title_subtree is not None:
        item_dict['title'] = utils.strip_markup(title_subtree.text.strip())

    desc_subtree = item_tree.find('description')
    if desc_subtree is not None:
        item_dict['description'] = desc_subtree.text.strip()
        if 'title' not in item_dict:
            item_dict['title'] = utils.shorten(utils.strip_markup(item_dict['description']), 30)

    guid_subtree = item_tree.find('guid')
    if guid_subtree is not None:
        text = guid_subtree.text.strip()
        has_permalink_attrib = 'isPermaLink' in guid_subtree.attrib
        marked_permalink = has_permalink_attrib and (guid_subtree.attrib['isPermaLink'] == True)

        if marked_permalink:
            item_dict['url'] = text
        elif not marked_permalink and utils.is_url(text):
            item_dict['url'] = text

    if 'url' not in item_dict:
        link_subtree = item_tree.find('link')
        if link_subtree is not None:
            text = link_subtree.text.strip()
            item_dict['url'] = text

    return item_dict
示例#9
0
def store(args, syn):
    #If we are storing a fileEntity we need to have id or parentId
    if args.parentid is None and args.id is None and args.file is not None:
        raise ValueError('synapse store requires at least either parentId or id to be specified.')
    #If both args.FILE and args.file specified raise error
    if args.file and args.FILE:
        raise ValueError('only specify one file')
    args.file = args.FILE if args.FILE is not None else args.file
    args.type = 'FileEntity' if args.type == 'File' else args.type

    if args.id is not None:
        entity = syn.get(args.id)
    else:
        entity = {'concreteType': u'org.sagebionetworks.repo.model.%s' % args.type, 
                  'name': utils.guess_file_name(args.file) if args.file and not args.name else None,
                  'parentId' : None,
                  'description' : None,
                  'path': args.file}
    #Overide setting for parameters included in args
    entity['name'] =  args.name if args.name is not None else entity['name']
    entity['description'] = args.description if args.description is not None else entity['description']
    entity['parentId'] = args.parentid if args.parentid is not None else entity['parentId']
    entity['path'] = args.file if args.file is not None else None
    if utils.is_url(args.file):
        entity['synapseStore'] = False

    used = _convertProvenanceList(args.used, args.limitSearch, syn)
    executed = _convertProvenanceList(args.executed, args.limitSearch, syn)
    entity = syn.store(entity, used=used, executed=executed)
    print 'Created/Updated entity: %s\t%s' %(entity['id'], entity['name'])
示例#10
0
    def on_update(self, pod_row_update):
        def show(result, error):
            if not error:
                podcast = result
                self.database.insert_new_episodes(pod_row_update.podcast.id, podcast['episodes'])

                if self.pod_row_selected == pod_row_update:
                    episodes = self.database.fetch_episodes(pod_row_update.podcast.id, 50)
                    for e in self.episode_box.get_children():
                        self.episode_box.remove(e)

                    for ep in episodes:
                        e = Episode.from_tuple(ep)
                        self.episode_box.add(EpisodeRow(e))

                    self.episode_box.show_all()

            pod_row_update.loading(False)

        @expect_call(on_done=show)
        def updating(url):
            return rest_podcast(url)

        url = pod_row_update.podcast.url
        if is_url(url):
            pod_row_update.loading(True)
            updating(url)
示例#11
0
def parse_item(item_tree):
    item_dict = {}

    title_subtree = item_tree.find('title')
    if title_subtree is not None:
        item_dict['title'] = utils.strip_markup(title_subtree.text.strip())

    desc_subtree = item_tree.find('description')
    if desc_subtree is not None:
        item_dict['description'] = desc_subtree.text.strip()
        if 'title' not in item_dict:
            item_dict['title'] = utils.shorten(
                utils.strip_markup(item_dict['description']), 30)

    guid_subtree = item_tree.find('guid')
    if guid_subtree is not None:
        text = guid_subtree.text.strip()
        has_permalink_attrib = 'isPermaLink' in guid_subtree.attrib
        marked_permalink = has_permalink_attrib and (
            guid_subtree.attrib['isPermaLink'] == True)

        if marked_permalink:
            item_dict['url'] = text
        elif not marked_permalink and utils.is_url(text):
            item_dict['url'] = text

    if 'url' not in item_dict:
        link_subtree = item_tree.find('link')
        if link_subtree is not None:
            text = link_subtree.text.strip()
            item_dict['url'] = text

    return item_dict
示例#12
0
 def _is_hidden(self, path):
     sep = '/' if utils.is_url(self.path) else os.sep
     relpath = path[len(self.path):] if path.startswith(self.path) else path
     for part in relpath.split(sep):
         if part.startswith('.') or part.startswith('_UNPACK'):
             return True
     return False
示例#13
0
def prepareSrc(testname, pkg=None, untar = True):
	''' prepare test stuff from backup or online resource '''

	NEEDPKGS = []
	
	if pkg is not None: NEEDPKGS.append(pkg)
	
	if testcaseAttr(testname, 'localsrc') == 'No':
		onlinePkg = testcaseSubElmt(testname, 'resourcePath')
		if not u.is_url(onlinePkg):
			raise Exception("Unknown source package !")

		pkgName = onlinePkg.split('/')[-1].strip()
		if re.match('clr-generic.x86_64-\d{2,4}.\d{2}.\d{2}.raw.xz', pkgName) is not None:
			if checkImgVer(pkgName):
				NEEDPKGS.append(onlinePkg)
		else:
			NEEDPKGS.append(onlinePkg)

	if len(NEEDPKGS) == 0:
		print "%s - No extra source packages needed." % testname
		return None

	status,pkgsIn = u.RunCommand('ls %s' % pkgsDir()).run()

	for p in NEEDPKGS:
		filename = p.split('/')[-1]

		if testcaseAttr(testname, 'localsrc') == 'Yes' and filename not in pkgsIn and not u.is_url(p):
			os.system('cp -v %s %s'% (p, srcDir(testname)))
			os.system('cp -v %s %s'% (p, pkgsDir()))
		elif filename in pkgsIn:
			os.system('cp -v %s/%s %s'% (pkgsDir(), filename, srcDir(testname)))
		else:
			u.curlObtain(p, NWProxy())
			os.system('cp -v %s %s' % (filename, srcDir(testname)))
			os.system('cp -v %s %s' % (filename, pkgsDir()))
			os.system('rm -rf %s' % filename)
	
		if untar:
			os.chdir(srcDir(testname))
			u.srcExtract(filename, srcDir(testname))
示例#14
0
async def get_image(req):
    body = req.json
    if 'keyword' not in body:
        return create_err('keyword not in body.')
    else:
        keyword = body['keyword']
        image_path_res = await create_googly_from_kw(keyword)
        if is_url(image_path_res):
            return create_res(image_path_res)
        else:
            return create_err(image_path_res)
示例#15
0
def add(args, syn):
    if args.type == 'File': args.type = 'FileEntity'
    entity = {'name': args.name,
              'parentId': args.parentid,
              'description':args.description,
              'concreteType': u'org.sagebionetworks.repo.model.%s' % args.type, 
              'path': args.file}
    if utils.is_url(args.file):
        entity['synapseStore'] = False

    entity = syn.store(entity, used=args.used, executed=args.executed)

    print 'Created/Updated entity: %s\t%s from file: %s' %(entity['id'], entity['name'], args.file)
示例#16
0
def store(args, syn):
    #If we are storing a fileEntity we need to have id or parentId
    if args.parentid is None and args.id is None and args.file is not None:
        raise ValueError(
            'synapse store requires at least either parentId or id to be specified.'
        )
    #If both args.FILE and args.file specified raise error
    if args.file and args.FILE:
        raise ValueError('only specify one file')
    args.file = args.FILE if args.FILE is not None else args.file
    args.type = 'FileEntity' if args.type == 'File' else args.type

    if args.id is not None:
        entity = syn.get(args.id, downloadFile=False)
    else:
        entity = {
            'concreteType':
            u'org.sagebionetworks.repo.model.%s' % args.type,
            'name':
            utils.guess_file_name(args.file)
            if args.file and not args.name else None,
            'parentId':
            None,
            'description':
            None,
            'path':
            args.file
        }
    #Overide setting for parameters included in args
    entity['name'] = args.name if args.name is not None else entity['name']
    entity[
        'description'] = args.description if args.description is not None else entity.get(
            'description', None)
    entity[
        'parentId'] = args.parentid if args.parentid is not None else entity[
            'parentId']
    entity['path'] = args.file if args.file is not None else None
    entity['synapseStore'] = not utils.is_url(args.file)

    used = _convertProvenanceList(args.used, args.limitSearch, syn)
    executed = _convertProvenanceList(args.executed, args.limitSearch, syn)
    entity = syn.store(entity, used=used, executed=executed)
    print 'Created/Updated entity: %s\t%s' % (entity['id'], entity['name'])

    # After creating/updating, if there are annotations to add then
    # add them
    if args.annotations is not None:
        # Need to override the args id parameter
        setattr(args, 'id', entity['id'])
        setAnnotations(args, syn)
示例#17
0
def load_data_from_url(url, language='en'):
    # type: (Text, Optional[Text]) -> TrainingData
    """Load training data from a URL."""

    if not utils.is_url(url):
        raise requests.exceptions.InvalidURL(url)
    try:
        response = requests.get(url)
        response.raise_for_status()
        temp_data_file = utils.create_temporary_file(response.content)
        return _load(temp_data_file, language)
    except Exception as e:
        logger.warning("Could not retrieve training data "
                       "from URL:\n{}".format(e))
示例#18
0
    async def on_command(self, cmd, args, message):
        if cmd in self.cmd_keys:
            cur_month = datetime.now(tz=timezone.utc).month
            if time.time() - self.last_execute_time < self.cool_down:
                await message.channel.send(
                    "Please wait for %d seconds." %
                    (self.cool_down + self.last_execute_time - time.time()))
                return

            if self.usage[0] != cur_month:
                self.usage = [cur_month, 0]
            if self.usage[1] < 1000:
                if message.attachments:
                    url = message.attachments[0].url
                elif len(args) >= 1 and is_url(args[0]):
                    url = args[0]
                else:
                    await message.channel.send(
                        "Please input the image url or upload the image.")
                    return

                data = await get_data_by_url(url)
                try:
                    Image.open(io.BytesIO(data))
                except OSError:
                    await message.channel.send("Illegal file format")
                    return

                image = types.Image(content=data)
                response = self.client.label_detection(image=image)
                labels = response.label_annotations
                translator = Translator()
                s = "\n"
                for label in labels:
                    text = label.description
                    t_text = translator.translate(text, dest="zh-tw").text
                    s += "%s (%s): %f\n" % (text, t_text, label.score)

                self.usage[1] += 1
                with open(self.usage_file_path, "wb") as f:
                    pickle.dump(self.usage, f)

                await message.channel.send(s)

            return True
        else:
            return False
示例#19
0
    def scrape_content(self, store, url):

        unit_tag = self.CONFIGS.get(store).get("listing_unit_tag")
        next_page_tags = self.CONFIGS.get(store).get("next_page_tags")
        listing_info_tags = self.CONFIGS.get(store).get("listing_info_tags")
        detail_info_tags = self.CONFIGS.get(store).get("detail_info_tags")
        total_units_tags = self.CONFIGS.get(store).get("total_units_tags")
        html = requests.get(url, headers=generate_request_header()).content
        total_units = scrape_tag_contents(total_units_tags, html)
        total_units = total_units[0] if len(total_units) else '1'

        total_units = re.sub("[\,|\.]+", "", total_units)
        total_units = re.findall("[0-9]+", total_units)
        total_units = total_units[0] if len(total_units) else '1'
        try:
            total_units = int(total_units)
        except ValueError:
            total_units = 1

        content = []
        print 'Gathering product information...'

        info_tags = listing_info_tags
        if total_units == 1:
            info_tags = detail_info_tags
            unit_tag = [("html", "")]

        progress = 0
        while url:

            if progress:
                html = requests.get(url,
                                    headers=generate_request_header()).content
            units_html = scrape_tag_contents(unit_tag, html)
            for idx, unit in enumerate(units_html):
                progress += 1
                print_progress(progress, total_units)
                values = {}
                for field, field_tags in info_tags.iteritems():
                    value = self._clean_text(
                        scrape_tag_contents(field_tags, unit))
                    values[field] = value
                content.append(values)
            next_page = scrape_tag_contents(next_page_tags, html)
            url = next_page[0] if len(next_page) and is_url(
                next_page[0]) else None
        return content
示例#20
0
def select_emitter(path):
    import xbmcvfs
    import settings
    from utils import log

    if is_url(path) and xbmcvfs.exists(path):
        return VFSPoller

    if os.path.exists(encode_path(path)):
        if settings.POLLING:
            return LocalPoller
        if _is_remote_filesystem(path):
            log("select_observer: path <%s> identified as remote filesystem" % path)
            return LocalPoller
        return NativeEmitter

    raise IOError("No such directory: '%s'" % path)
示例#21
0
 async def save_url(self, name, url, message):
     name = name.lower()
     user = message.author
     channel = message.channel
     if not await self.is_name_valid(name, send_error=True,
                                     channel=channel):
         return
     elif not is_url(url):
         await channel.send("Failed to Save: It's not a legal url")
     elif len(url) > 1000:
         await channel.send(
             "Failed to Save: Length of link should not be > 1000")
     else:
         with open(self.link_dict_path, 'rb') as f:
             link_dict = pickle.load(f)
         link_dict[name] = (url, "", "", str(user), user.id)
         with open(self.link_dict_path, "wb") as f:
             pickle.dump(link_dict, f)
         await channel.send("Successfully Saved")
示例#22
0
    def post(self):
        """
        POST a new song to play by URI/URL.
        """

        try:
            data = json.loads(request.data.decode())
        except ValueError:
            return jsonify(error=True, text="Invalid JSON given")

        uri = data["url"]
        if not uri:
            return jsonify(error=True, text="Invalid URI")

        if is_url(uri):
            uri = self.prov.canonicalize(uri)

        elif not is_uri(uri):
            return jsonify(error=True, text="Invalid URI")

        audio_api = current_app.config['audio_api']
        fn = self.cache.get_song(uri)
        if fn is not None:
            return play_file(
                audio_api, current_app.config['songlogger'], fn, data.get("filename", uri), user=data["user"]
            )

        try:
            task = self.prov.get(uri)
        except ValueError:
            return jsonify(error=True, text="No provider found for " + uri)

        if task.state == TaskState.exception:
            return jsonify(error=True, text=traceback.format_exception_only(type(task.exception), task.exception))

        task.metadata['user'] = data.get('user', 'anonymous')
        task.metadata['original_filename'] = data.get('filename', uri)
        with current_app.config['task_lock']:
            current_app.config['task_dict'][task.id] = task
        return jsonify(error=False, task=task.id, text="Task received, fetching song")
def download_subset(subset_path, dataset_dir, ffmpeg_path, ffprobe_path,
                    num_workers, **ffmpeg_cfg):
    """
    Download all files for a subset, including the segment file, and the audio and video files.

    Args:
        subset_path:    Path to subset segments file
                        (Type: str)

        dataset_dir:    Path to dataset directory where files are saved
                        (Type: str)

        ffmpeg_path:    Path to ffmpeg executable
                        (Type: str)

        ffprobe_path:   Path to ffprobe executable
                        (Type: str)

        num_workers:    Number of workers to download and process videos
                        (Type: int)

    Keyword Args:
        **ffmpeg_cfg:                   Configuration for audio and video
                                        downloading and decoding done by ffmpeg
                                        (Type: dict[str, *])

    Returns:

    """
    if is_url(subset_path):
        subset_path = download_subset_file(subset_path, dataset_dir)

    subset_name = get_subset_name(subset_path)
    data_dir = init_subset_data_dir(dataset_dir, subset_name)

    download_subset_videos(subset_path, data_dir, ffmpeg_path, ffprobe_path,
                           num_workers, **ffmpeg_cfg)
示例#24
0
    def start_process(self):

        video_cache_dir = self.check_video_cache_dir()
        if not video_cache_dir:
            return

        # 检查端口
        port = self.local_frame.port()
        if not utils.is_int(port):
            return messagebox.showerror('错误', '端口只能是数字')
        port = int(port)
        if port < 2000 or port > 60000:
            return messagebox.showerror('错误', '端口只能从2000到60000')

        create_danmaku: bool = self.local_frame.create_danmaku()

        # print(video_cache_dir, port)

        # 检查 三个网址
        video_url = self.video_frame.video_url()
        danmaku_url = self.video_frame.danmaku_url()
        proxy_url = self.video_frame.proxy_url()

        if create_danmaku:
            print('自建弹幕')
            danmaku_url = '1'

        # print(video_url, danmaku_url, proxy_url)

        if len(video_url) == 0:
            return messagebox.showerror('错误', '请填写视频源网址')
        else:
            if video_url != '1' and not utils.is_url(video_url):
                return messagebox.showerror(
                    '错误', '视频源的格式错误,只接受:\nhttp:\\\\xxx\n的格式')

        if danmaku_url != '1':
            if len(danmaku_url) > 0 and not utils.is_url(danmaku_url):
                return messagebox.showerror(
                    '错误', '弹幕源的格式错误,只接受:\nhttp:\\\\xxx\n的格式')

        if len(proxy_url) > 0:
            if not utils.is_url(proxy_url):
                return messagebox.showerror('错误',
                                            '代理的格式错误,只接受:\nhttp:\\\\xxx\n的格式')

        check = test_connect(video_url, proxy_url)
        if check != 'ok':
            has_proxy = len(proxy_url) > 0
            title = '连接错误'
            if has_proxy:
                title = '代理服务器出现错误'
            message = title
            if check == 'NeedTWIP':
                message = '四季TV网络视频源 需要台湾IP'
            elif check == 'ProxyError':
                message = '连接不到代理服务器'
            elif check == 'NotM3u8':
                message = '网络视频源 返回的不是M3u8文件格式'
            elif check == 'TimeOut':
                message = '连接 网络视频源 超时(5秒)'
            return messagebox.showerror(title, message)

        self.__m3u8_process = Process(target=m3u8.run,
                                      args=(video_cache_dir, video_url,
                                            proxy_url, self.my_cache.cache))
        self.__m3u8_process.start()

        only_video = self.local_frame.only_video()
        self.__server_process = Process(target=server.run,
                                        args=(port, video_cache_dir,
                                              danmaku_url, only_video,
                                              self.my_cache.cache))
        self.__server_process.start()

        return '123ok'
示例#25
0
 def add_podcast_from_url(self, url):
     if is_url(url) and not self.database.check_podcast_url(url):
         podcast = rest_podcast(url)
         self.database.insert_podcast_with_episodes(podcast)
         self.update_list(podcast)
示例#26
0
    def __get_url_shorthands(self, configs):
        """
        :type configs: list
        :return: list
        """
        urls = []
        for candidate in configs[:]:
            if is_url(candidate):
                urls.append(candidate)
                configs.remove(candidate)

        if urls:
            self.log.debug("Adding HTTP shorthand config for: %s", urls)
            config_fds = NamedTemporaryFile(prefix="http_", suffix=".yml")
            fname = config_fds.name
            config_fds.close()

            config = Configuration.from_dict({
                "execution": [{
                    "concurrency":
                    "${__tstFeedback(Throughput_Limiter,1,${__P(concurrencyCap,1)},2)}",
                    "hold-for": "2m",
                    "throughput": "${__P(throughput,600)}",
                    "scenario": "linear-growth",
                }],
                "scenarios": {
                    "linear-growth": {
                        "retrieve-resources":
                        False,
                        "timeout":
                        "5s",
                        "keepalive":
                        False,
                        "requests": [{
                            "action":
                            "pause",
                            "pause-duration":
                            0,
                            "jsr223": [{
                                "language":
                                "javascript",
                                "execute":
                                "before",
                                "script-text":
                                """
var startTime = parseInt(props.get("startTime"));
if (!startTime) {
    startTime = Math.floor((new Date()).getTime() / 1000);
    props.put("startTime", startTime);
} else {
    var now = Math.floor((new Date()).getTime() / 1000);
    var offset = now - startTime;
    if (offset < 60) {
        var targetOffset = Math.max(offset * 10, 10);
        props.put("throughput", targetOffset.toString());
    }
}"""
                            }]
                        }] + urls,
                    }
                },
                "modules": {
                    "jmeter": {
                        "properties": {
                            "throughput": 1,
                            "concurrencyCap": 500,
                        },
                    }
                }
            })
            config.dump(fname, Configuration.JSON)
            return [fname]
        else:
            return []
示例#27
0
def _split_path(path):
    sep = '/' if is_url(path) else os.sep
    folder, filename = path.rsplit(sep, 1)
    return folder + sep, filename
示例#28
0
term = args.term
if key_file is None:
    key_file = args.key
candidate_encodings, candidate_names, cnt_dict = flatten_encodings(data)

if args.video is None and not args.camera:
    if args.match is not None:  # Use given photos to do face matching
        urls = get_url_path_list(args.match)
    else:  # Search for photos
        if args.batch:
            stop("No photos provided!")

        urls, term, img_download_path = interact_get_match_photos(term, MATCH_FACE_IMG_PATH, KEY_FILE, key_file)

    for url in urls:
        if is_url(url):
            try:
                print("\n        Downloading the photo from\n            {}".format(url))
                path, _ = download_img(url, img_download_path, term)
                print("        into\n            {}".format(path))
            except (ConnectionResetError, urllib.error.URLError):
                continue
        else:
            path = url

        if not args.batch:
            rgb = show_image(path)
            yes = mlutils.yes_or_no("\n    Do you want to use this photo", yes=True)
            if not yes:
                os.remove(path)
                continue
示例#29
0
async def init_tfm_game(message):
    """Format of message is !tfm +cpv player1;bgy;urd.
    See the help message for more info."""
    args = shlex.split(message.content)
    global_opts = ""
    server = "https://mars.ross.gg"  # default but can be changed by adding a url to the command
    players = []
    if len(args) == 1:
        await message.author.send(
            "No command entered! Showing the help for !tfm.")
        await send_help(message, "tfm_help")
        return
    for arg in args[1:]:
        if arg[0] == "+":
            global_opts = arg[1:]
            continue
        if is_url(arg):
            server = arg
            continue
        logger.debug(f"Parsing arg `{arg}`")
        all_args = arg.split(";")
        if len(all_args) == 2:
            name, colors = all_args
            opts = ""
        elif len(all_args) == 3:
            name, colors, opts = all_args
        else:
            await message.author.send(
                f"Too many semicolons in player string {arg} (expected 2-3)!")
            return
        if not re.match("[rygbpk]+", colors):
            await message.author.send(
                f"Color in {colors} for player {name} is not valid.")
            return
        if not re.match("[23456abcdefghilmnoprstuvw]*", opts):
            await message.author.send(
                f"Opt in {opts} for player {name} is not valid.")
            return
        new_player = TFMPlayer(name, colors, opts)
        players.append(new_player)
    game = TFMGame(server)
    options = await game.generate_shared_params(global_opts, players)
    data = await game.create_table(options)
    player_lines = []
    i = 1
    for player in data:
        color_circle = f":{player['color']}_circle:"
        player_str = player["name"]
        discord_id = get_discord_id(player_str, message)
        if discord_id != -1:
            player_str = f"<@!{discord_id}>"
        player_line = f"**{i} {color_circle}** {player_str}\t [Link to Game]({player['player_link']})"
        player_lines.append(player_line)
        i += 1
    author_line = ""  # It's not as important to have a game creator - the bot is the game creator
    player_list_str = "\n".join(player_lines)
    options_str = ""
    option_names = list(options.keys())
    option_names.sort()
    # The following is a kludge to create a table inside an embed with ~ tabs
    # Use discord number to create a number like :three:
    numbers = {"2": "two", "3": "three", "4": "four", "5": "five", "6": "six"}
    number = numbers[str(options["startingCorporations"])]
    truncated_opts_str = "*Complete options sent to game creator*\n\n :{}: `{:<20}`".format(
        number, "Corporations")
    expansions = [
        "colonies",
        "communityCardsOption",
        "corporateEra",
        "prelude",
        "promoCardsOption",
        "turmoil",
        "venusNext",
    ]
    ith = 1
    for expn in expansions:
        short_expn = expn.replace("CardsOption", "")
        if options[expn]:
            truncated_opts_str += " :white_check_mark:`{:<20}`".format(
                short_expn)
        else:
            truncated_opts_str += " :x:`{:<20}`".format(short_expn)
        ith += 1
        if ith % 2 == 0:
            truncated_opts_str += "\n"  # should be a 2row 3col table
    for key in option_names:
        if key != "players":
            options_str += f"{key}   =   {options[key]}\n"
    await send_table_embed(
        message,
        "Terraforming Mars",
        f"Running on server {server}",
        author_line,
        player_list_str,
        "Options",
        truncated_opts_str,
    )
    await message.author.send(
        f"**Created game with these options**\n\n```{options_str}```")
    await game.close_connection()
示例#30
0
 def handles_url(self, url):
     return is_url(url)
 def _test_exc(self):
     with self.assertRaises(RuntimeError):
         self.assertFalse(utils.is_url('google', 12))
示例#32
0
def parse_profile(uid, num_post_scroll):

    try:
        url = urllib.parse.urljoin(URL_FACEBOOK_ROOT, uid)
        driver = get_driver()
        driver.get(url)

        soup = BeautifulSoup(driver.page_source, 'lxml')
        current_url = driver.current_url
    except Exception as e:
        driver.quit()
        raise e

    try:
        user_data = dict()
        user_data['uid'] = uid
        user_data['profile_url'] = current_url

        # get username
        user_data['username'] = current_url.replace(URL_FACEBOOK_ROOT, '').replace('/', '')

        # Get user's name
        # user_name_h1 = soup.find("h1", id="seo_h1_tag")
        user_name_div = soup.find("div", id="u_0_0")
        user_data['name'] = user_name_div.a.span.string

        # Get follower number
        follower_count = soup.find('div', string=re.compile('people follow this'))
        if follower_count:
            res = re.findall('[0-9,]+', follower_count.string)
            if res:
                user_data['followers'] = res[0].replace(',', '')
            else:
                user_data['followers'] = '0'

        # Get likes number
        likes_count = soup.find('div', string=re.compile('people like this'))
        if likes_count:
            res = re.findall('[0-9,]+', likes_count.string)
            if res:
                user_data['likes'] = res[0].replace(',', '')
            else:
                user_data['likes'] = '0'

        # Click about tab for contact details.
        about_page = driver.find_element(By.CSS_SELECTOR, "[data-key=tab_about]")
        about_page.click()
        WebDriverWait(driver, 10).until(EC.visibility_of_element_located((By.ID, "PagesProfileAboutInfoPagelet_"+str(uid))))

        # CONTACT DETAILS
        soup = BeautifulSoup(driver.page_source, 'lxml')
        contact_details = []
        cd_div_child = soup.find("div", string='CONTACT DETAILS')
        if cd_div_child:
            for sibling in cd_div_child.next_siblings:
                if type(sibling) is Tag:
                    text_div = sibling.find("div", class_="_50f4")
                    if text_div:
                        if is_valid_email(text_div.string) or is_url(text_div.string):
                            contact_details.append(text_div.string)
                        elif text_div.string.startswith('Call '):
                            contact_details.append(text_div.string.replace('Call', '').strip())
                        elif text_div.parent.name == "a":
                            contact_details.append(text_div.parent['href'])

            user_data['contact_details'] = contact_details
        driver.quit()
    except Exception as e:
        driver.quit()
        logging.log(logging.CRITICAL, f"Parse profile failed : {user_data['profile_url']}")
        raise e

    m_connection = MongoClient(MONGODB_URI)
    with m_connection:
        m_connection.aggero_fb.user_details.find_one_and_replace({'uid': user_data['uid']}, user_data, upsert=True)

    logging.log(logging.INFO, f"User Data : {user_data}")
    with faktory.connection(faktory=URL_FACTORY) as client:
        client.queue('parse_posts', args=[user_data['uid'], user_data['username'], num_post_scroll], queue='busy')
def _split_path(path):
    sep = '/' if is_url(path) else os.sep
    folder, filename = path.rsplit(sep, 1)
    return folder + sep, filename
示例#34
0
# ----------------------------------------------------------------------

parser = argparse.ArgumentParser(prog='score',
                                 parents=[option_parser],
                                 description='Detect faces in an image.')

parser.add_argument('image', type=str, help='image path or URL')

args = parser.parse_args()

# Wrap face detection parameters.

face_params = FaceParams(args.scaleFactor, args.minNeighbors, args.minSize)

# ----------------------------------------------------------------------
# Face detection
# ----------------------------------------------------------------------

image = read_cv_image_from(
    args.image if is_url(args.image) else get_abspath(args.image))

faces = detect_faces(image, face_params=face_params)

print("Found {0} faces!".format(len(faces)))

result = mark_faces(image, faces)

image, result = convert_cv2matplot(image, result)

plot_side_by_side_comparison(image, result, rightlabel="Detected Faces")
示例#35
0
 def get_length_for_word(self, word):
     if is_url(word):
         return self.short_url_length
     return len(word)
 def test_valid(self):
     self.assertTrue(utils.is_url('http://google.com'))
示例#37
0
 def img_link(self, img_link):
     if utils.is_url(img_link):
         self._img_link = img_link
     else:
         raise ValueError(f'{img_link} is not a valid url')
示例#38
0
    def test_url_format_check(self):
        """Tests the code used to parse url and verify formats. Gives a slew of cases that are supposed to success as well as fail"""

        assert not is_url("www.google.com")
        assert is_url("http://www.google.com")
        assert is_url("https://www.google.com")
        assert is_url("http://www.google.com/info.txt")
        assert is_url("http://www.google.com/child/info.txt")

        assert not is_url("10.120.1.23")
        assert is_url("http://10.120.1.23")
        assert is_url("http://10.120.1.23/info.txt")
        assert is_url("http://10.120.1.23/child/info.txt")

        assert is_url("http://127.0.0.1:8080")
        assert is_url("http://127.0.0.1:8080/child/info.txt")

        assert is_url("http://port:8080")
        assert is_url("http://port:8080/child/info.txt")

        assert is_url("http://hello")
        assert not is_url("http://hello.")
        assert is_url("http://hello.i")
        assert is_url("http://hello.io")
        assert is_url("http://hello/child/info.txt")

        assert is_url("http://hel-lo")
        assert is_url("http://hel_lo")
        assert not is_url("http://hel lo")
        assert is_url("http://hello/")
        assert is_url("http://hello/.")
        assert is_url("http://hello/.txt")
示例#39
0
 def _get_cfg_fp(self, cfg_file=None):
     cfg = cfg_file or self.cfg_file
     if utils.is_url(cfg):
         return self._get_urlfp(cfg)
     else:
         return self._get_fp(cfg)
 def _test_invalid_url(self, url):
     self.assertFalse(utils.is_url(url))