Python extract 예제들, utils.extract Python 예제들

예제 #1

0

파일 보기

    def parse_item(self, response):
        hxs = Selector(response)
        item_titles = extract(hxs, "//div[@class='gl-i-wrap j-sku-item']//a/em/text()")
        top_id = extract_one(hxs, '//*[@id="J_crumbsBar"]/div/div/div/div[1]/a/text()')
        type_id1 = extract(hxs, '//*[@id="J_crumbsBar"]//div[@class="trigger"]/span/text()')[0]
        type_id2 = extract(hxs, '//*[@id="J_crumbsBar"]//div[@class="trigger"]/span/text()')[-1]

        if type_id1 != type_id2:
            for i, t in enumerate(item_titles):
                if i < 20:
                    good = {
                        'mall': '2',
                        'rank': str(i + 1),
                        'title': t,
                        'price': '0',
                        'turnover_index': '0',
                        'top_id': top_id,
                        'type_id1': type_id1,
                        'type_id2': type_id2,
                        'url': response.url
                    }

                    yield Good(good)

        for link in self.normal_url_extractor.extract_links(response):
            yield SplashRequest(link.url, callback=self.parse_url, args={'wait': 0.5, 'html': 1, })

        for link in self.needed_url_extractor.extract_links(response):
            if 'ev' not in link.url:
                url = re.sub(r'page=.*&', 'page=1&', link.url)
                url = re.sub(r'stock=.*&', 'stock=0&', url)
                url = re.sub(r'delivery_daofu=.*&', 'delivery_daofu=0&', url)
                url = re.sub(r'delivery=.*&', 'delivery=0&', url)
                yield SplashRequest(url, callback=self.parse_item, args={'wait': 0.5, 'html': 1, })

예제 #2

0

파일 보기

파일: taobao.py 프로젝트: halfapple/Spiders

    def parse_item(self, response):
        hxs = Selector(response)
        top_id = re.findall(r'.*&topId=(\S+_\S+)&type.*', response.url)[0]
        #        type_id=re.findall(r'.*leafId=(\d+)&rank=.*',response.url)[0]
        type_id1 = extract_one(
            hxs,
            "//div[@class='block-body ']/div[@class='params-cont']/a[@class='param-item icon-tag param-item-selected']/text()"
        )
        ranks_tuple = extract(
            hxs,
            '//*[@class="rank-num rank-focus"]/text()|//*[@class="rank-num rank-important"]/text()|//*[@class="rank-num rank-"]/text()'
        )
        ranks = []
        for r in ranks_tuple:
            if r.strip() != '':
                ranks.append(r)

        titles = extract(hxs, '//*[@class="title"]/a/text()')
        prices = extract(hxs, '//*[@class="col3 col"]/text()')[1:]
        turnover_indexs = extract(hxs, '//*[@class="focus-bar"]/span/text()')

        for r, t, p, i in zip(ranks, titles, prices, turnover_indexs):
            good = {
                'mall': '0',
                'rank': r.strip(),
                'title': t.strip(),
                'price': p.split('￥')[-1].strip(),
                'turnover_index': i.strip(),
                'top_id': top_id.strip(),
                'type_id1': type_id1.strip(),
                'type_id2': '',
                'url': response.url
            }
            yield Good(good)

예제 #3

0

파일 보기

파일: taobao.py 프로젝트: edwardwbli/spider_practicing

    def parse_item(self,response):
        hxs=Selector(response)
        top_id=re.findall(r'.*&topId=(\S+_\S+)&type.*',response.url)[0]
#        type_id=re.findall(r'.*leafId=(\d+)&rank=.*',response.url)[0]
        type_id1=extract_one(hxs,"//div[@class='block-body ']/div[@class='params-cont']/a[@class='param-item icon-tag param-item-selected']/text()")
        ranks_tuple=extract(hxs,'//*[@class="rank-num rank-focus"]/text()|//*[@class="rank-num rank-important"]/text()|//*[@class="rank-num rank-"]/text()')
        ranks=[]
        for r in ranks_tuple:
            if r.strip()!='':
                ranks.append(r)

        titles=extract(hxs,'//*[@class="title"]/a/text()')
        prices=extract(hxs,'//*[@class="col3 col"]/text()')[1:]
        turnover_indexs=extract(hxs,'//*[@class="focus-bar"]/span/text()')

        for r,t,p,i in zip(ranks,titles,prices,turnover_indexs):
            good={
                'mall':'0',
                'rank':r.strip(),
                'title':t.strip(),
                'price':p.split('￥')[-1].strip(),
                'turnover_index':i.strip(),
                'top_id':top_id.strip(),
                'type_id1':type_id1.strip(),
                'type_id2':'',
                'url':response.url
            }
            yield Good(good)

예제 #4

0

파일 보기

def load_lists(opt):
    arch_name = 'ava_v{}.zip'.format(opt.version)
    arch_path = os.path.join(opt.out_path, arch_name)
    arch_url = 'https://research.google.com/ava/download/{}'.format(arch_name)
    if utils.download_file(arch_url, arch_path):
        utils.extract(arch_path, opt.out_path)

    train_video_ids, val_video_ids, test_video_ids = None, None, None
    if opt.type is None or opt.type == 'train':
        ids_file_path = os.path.join(opt.out_path,
                                     'ava_train_v{}.csv'.format(opt.version))
        train_video_ids = read_ids(ids_file_path)
    if opt.type is None or opt.type == 'validation':
        ids_file_path = os.path.join(opt.out_path,
                                     'ava_val_v{}.csv'.format(opt.version))
        val_video_ids = read_ids(ids_file_path)
    if opt.type is None or opt.type == 'test':
        ids_file_path = os.path.join(opt.out_path,
                                     'ava_test_v{}.txt'.format(opt.version))
        test_video_ids = read_ids(ids_file_path)

    ts_file_name = 'ava_included_timestamps_v{}.txt'.format(opt.version)
    ts_file_path = os.path.join(opt.out_path, ts_file_name)
    with open(ts_file_path) as f:
        lines = f.readlines()
        timestamps = int(lines[0]), int(lines[-1])

    return train_video_ids, val_video_ids, test_video_ids, timestamps

예제 #5

0

파일 보기

파일: jd.py 프로젝트: wqlin/spider_practicing

    def parse_item(self, response):
        hxs = Selector(response)
        item_titles = extract(hxs, "//div[@class='gl-i-wrap j-sku-item']//a/em/text()")
        top_id = extract_one(hxs, '//*[@id="J_crumbsBar"]/div/div/div/div[1]/a/text()')
        type_id1 = extract(hxs, '//*[@id="J_crumbsBar"]//div[@class="trigger"]/span/text()')[0]
        type_id2 = extract(hxs, '//*[@id="J_crumbsBar"]//div[@class="trigger"]/span/text()')[-1]

        if type_id1 != type_id2:
            for i, t in enumerate(item_titles):
                if i < 20:
                    good = {
                        "mall": "2",
                        "rank": str(i + 1),
                        "title": t,
                        "price": "0",
                        "turnover_index": "0",
                        "top_id": top_id,
                        "type_id1": type_id1,
                        "type_id2": type_id2,
                        "url": response.url,
                    }

                    yield Good(good)

        for link in self.normal_url_extractor.extract_links(response):
            yield SplashRequest(link.url, callback=self.parse_url, args={"wait": 0.5, "html": 1})

        for link in self.needed_url_extractor.extract_links(response):
            if "ev" not in link.url:
                url = re.sub(r"page=.*&", "page=1&", link.url)
                url = re.sub(r"stock=.*&", "stock=0&", url)
                url = re.sub(r"delivery_daofu=.*&", "delivery_daofu=0&", url)
                url = re.sub(r"delivery=.*&", "delivery=0&", url)
                yield SplashRequest(url, callback=self.parse_item, args={"wait": 0.5, "html": 1})

예제 #6

0

파일 보기

파일: main.py 프로젝트: thelearningcurves/SEG-2020-Joint-learning-with-spatial-context-for-inversion

def preprocess(no_wells_marmousi, no_wells_seam):
    """Function initializes data, performs standardization, and train test split
    
    Parameters:
    ----------
    no_wells_marmousi : int,
        number of evenly spaced wells and seismic samples to be evenly sampled 
        from marmousi section.
        
    no_wells_seam : int
        number of evenly spaced wells and seismic samples to be evenly sampled from SEAM
        
    Returns
    -------
    seismic_marmousi : array_like, shape(num_traces, depth samples)
        2-D array containing seismic section for marmousi
        
    seismic_seam : array_like, shape(num_traces, depth samples)
        2-D array containing seismic section for SEAM
        
    model_marmousi : array_like, shape(num_wells, depth samples)
        2-D array containing model section from marmousi 2
        
    model_seam : array_like, shape(num_wells, depth samples)
        2-D array containing model section from SEAM
    
    """

    # get project root directory
    project_root = os.getcwd()

    if ~os.path.isdir(
            'data'):  # if data directory does not exists then extract
        extract('data.zip', project_root)

    # Load data
    seismic_marmousi = np.load(join(
        'data', 'marmousi_synthetic_seismic.npy')).squeeze()
    seismic_seam = np.load(join('data',
                                'poststack_seam_seismic.npy')).squeeze()[:,
                                                                         50:]
    seismic_seam = seismic_seam[::2, :]

    # Load targets and standardize data
    model_marmousi = np.load(join('data',
                                  'marmousi_Ip_model.npy')).squeeze()[::5, ::4]
    model_seam = np.load(join('data',
                              'seam_elastic_model.npy'))[::3, :, ::2][:, :,
                                                                      50:]
    model_seam = model_seam[:, 0, :] * model_seam[:, 2, :]

    # standardize
    seismic_marmousi, model_marmousi = standardize(seismic_marmousi,
                                                   model_marmousi,
                                                   no_wells_marmousi)
    seismic_seam, model_seam = standardize(seismic_seam, model_seam,
                                           no_wells_seam)

    return seismic_marmousi, seismic_seam, model_marmousi, model_seam

예제 #7

0

파일 보기

파일: json_glib.py 프로젝트: rafaelmartins/bluster

def install():
    fetch("http://ftp.gnome.org/pub/gnome/sources/json-glib/0.16/json-glib-%(json-glib)s.tar.xz")
    extract("json-glib-%(json-glib)s.tar.xz")
    configure(
        "json-glib-%(json-glib)s", ["--prefix=%s" % env.prefix, "--disable-gcov", "--disable-introspection", "CC=clang"]
    )
    make("json-glib-%(json-glib)s")
    make("json-glib-%(json-glib)s", "install")

예제 #8

0

파일 보기

    def test_extract(self):
        #empty output directory
        utils.init_path(self.output_dir)
        utils.extract(self.archive_file, self.output_dir)
        files = os.listdir(self.output_dir)

        with tarfile.open(self.archive_file) as f:
            for file in files:
                assert file in f.getnames()

예제 #9

0

파일 보기

파일: test_utils.py 프로젝트: jofomah/mapzen-exporter

    def test_extract(self):
        #empty output directory
        utils.init_path(self.output_dir)
        utils.extract(self.archive_file, self.output_dir)
        files = os.listdir(self.output_dir)

        with tarfile.open(self.archive_file) as f:
            for file in files:
                assert file in f.getnames()

예제 #10

0

파일 보기

파일: autogen.py 프로젝트: marwynnsomridhivej/marwynnbot

 async def autoip(self, ctx, *, options: str = None):
     options = options.split(" ") if options else []
     version = extract(options, "46")
     addr_class = extract(options, "abc", func="lower")
     if not version or version != "6":
         func = self.fake.ipv4(address_class=addr_class)
     else:
         func = self.fake.ipv6()
     return await self.send(ctx, "IP address", func)

예제 #11

0

파일 보기

파일: compile_steps_win.py 프로젝트: EndlessSkyCommunity/EndlessPy

def download_libs(constants: Constants, settings: {},
                  progress: sg.ProgressBar):
    archive = os.path.join(settings["installdir"], "libraries.zip")
    utils.download_file(constants.win64_dev_libs, archive, 2492854)
    utils.extract(archive, settings["installdir"])

    dlldir = os.path.join(settings["installdir"], "dev64", "bin")
    for lib in os.listdir(dlldir):
        shutil.copyfile(os.path.join(dlldir, lib),
                        os.path.join(settings["installdir"], lib))

예제 #12

0

파일 보기

파일: augment_data.py 프로젝트: tmonnin/bug_detector

def incorrectly_ordered_boolean(if_ast: dict, code, code_identifier_lst):
    if if_ast["test"]["type"] == "LogicalExpression" and if_ast["test"][
            "operator"] == "&&":
        code_left = utils.extract(if_ast["test"]["left"]["loc"], code)
        code_right = utils.extract(if_ast["test"]["right"]["loc"], code)
        if code_left in code_right:  # TODO similarity
            tmp = if_ast["test"]["left"]
            if_ast["test"]["left"] = if_ast["test"]["right"]
            if_ast["test"]["right"] = tmp
            return True

예제 #13

0

파일 보기

 def parse(self, response):
     xxs= scrapy.Selector(response)
     source="baidu_"+extract(xxs.xpath('//channel/title/text()'))
     for xItem in xxs.xpath('//item'):
         item=Article()
         item['source']=source
         item['title']=extract( xItem.xpath('./title/text()'))
         item['link']= extract(xItem.xpath('./link/text()'))
         item['desc']= extract(xItem.xpath('./description/text()'))
         item['pubDate']= extract(xItem.xpath('./pubDate/text()'))
         yield item

예제 #14

0

파일 보기

def read_dataframe(out_path, split):
    arch_url = KIN_PARAMS[split]['arch_url']
    arch_path = os.path.join(out_path, KIN_PARAMS[split]['arch_name'])
    csv_name = KIN_PARAMS[split]['csv_name']
    csv_path = os.path.join(out_path, csv_name)
    if utils.download_file(arch_url, arch_path):
        utils.extract(arch_path, out_path, csv_name)
    df = pd.read_csv(csv_path)
    if 'label' in df.columns:
        df['label'] = df['label'].astype('category')
    return df

예제 #15

0

파일 보기

파일: autogen.py 프로젝트: marwynnsomridhivej/marwynnbot

 async def autocolor(self, ctx, *, options: str = None):
     options = options.split(" ") if options else []
     color_format = extract(options, COLOR_FORMATS, func="lower", default="hex")
     hue = extract(options, HUES, func="lower")
     luminosity = extract(options, LUMINOSITIES, func="lower", default="random")
     return await self.send(
         ctx,
         f"{color_format} color",
         self.fake.color(
             hue=hue, luminosity=luminosity, color_format=color_format
         )
     )

예제 #16

0

파일 보기

파일: toolman.py 프로젝트: qkthings/qkthings

def _get_toolchain(tree, root, keep_archive=False, clean=False):
	archive = _create_name(tree, suffix=".tar.bz2")
	tree.insert(0, root)
	dir = path.join(_create_path(tree))
	archive_dir = path.join(dir, archive)
	
	if not check_path(dir, clean):
		if download(TOOLCHAIN_FORGE + archive, archive_dir):
			extract(archive_dir, dir)
			if not keep_archive:
				remove(archive_dir)
	else:
		print "! %s already exists" % dir

예제 #17

0

파일 보기

파일: discount.py 프로젝트: rafaelmartins/bluster

def install():
    fetch('http://www.pell.portland.or.us/~orc/Code/discount/discount-%(discount)s.tar.bz2')
    extract('discount-%(discount)s.tar.bz2')
    configure('discount-%(discount)s', ['--prefix=%s' % env.prefix,
                                        '--libdir=%s/lib' % env.prefix,
                                        '--mandir=%s/man' % env.prefix,
                                        '--shared',
                                        '--enable-all-features'],
              'configure.sh')
    run('sed -i .bkp -e "/ldconfig/d" %s/%s/librarian.sh' %
        (env.build, 'discount-%(discount)s' % env.versions))
    make('discount-%(discount)s')
    make('discount-%(discount)s', 'install')

예제 #18

0

파일 보기

파일: video.py 프로젝트: tacaswell/mr

def video_info(filepath):
    """Return some video meta information as a dictionary."""
    ffmpeg = subprocess.Popen("ffmpeg -i " + filepath,
                              shell=True,
                              stderr=subprocess.PIPE)
    stdout, stderr = ffmpeg.communicate()
    info = {}
    info['creation'] = extract('creation_time[ ]+: ([0-9-]* [0-9:]*)', stderr,
                               1, timestamp)
    info['duration'] = extract('Duration: ([0-9:\.]*)', stderr, 1)
    info['detected fps'] = extract('([0-9]*.?[0-9]*) fps,', stderr, 1, float)
    info['w'], info['h'] = extract('Stream.*, ([0-9]+)x([0-9]+)', stderr,
                                   (1, 2), lambda (x, y): (int(x), int(y)))
    return info

예제 #19

0

파일 보기

 def parse(self, response):
     hxs = Selector(response, type="html")
     item_url_list = extract(hxs, "//div[@class='block-body ']/div[@class='params-cont']/a/@href")
     #        //div[@class='block-body ']/div[@class='params-cont']/a/@href
     for url in item_url_list:
         url = url.replace('./index.php?', 'https://top.taobao.com/index.php?')
         yield SplashRequest(url, callback=self.extract_url, args={'wait': 0.5, 'html': 1})

예제 #20

0

파일 보기

파일: taobao.py 프로젝트: edwardwbli/spider_practicing

    def parse(self,response):
        hxs=Selector(response,type="html")
        item_url_list=extract(hxs,"//div[@class='block-body ']/div[@class='params-cont']/a/@href")
#        //div[@class='block-body ']/div[@class='params-cont']/a/@href
        for url in item_url_list:
            url=url.replace('./index.php?','https://top.taobao.com/index.php?')
            yield SplashRequest(url,callback=self.extract_url,args={'wait':0.5,'html':1})

예제 #21

0

파일 보기

    def search(cls, query_params):

        # NOTE: Params 'recursive' and 'with_responses' are currently not used by
        # either the 'search' or 'get_all' actions below.  Both already use
        # with_responses=False internally in the comment service, so no additional
        # optimization is required.
        params = {
            'page': 1,
            'per_page': 20,
            'course_id': query_params['course_id'],
        }
        params.update(utils.strip_blank(utils.strip_none(query_params)))

        if query_params.get('text'):
            url = cls.url(action='search')
        else:
            url = cls.url(action='get_all',
                          params=utils.extract(params, 'commentable_id'))
            if params.get('commentable_id'):
                del params['commentable_id']
        response = utils.perform_request(
            'get',
            url,
            params,
            metric_tags=[u'course_id:{}'.format(query_params['course_id'])],
            metric_action='thread.search',
            paged_results=True)
        if query_params.get('text'):
            search_query = query_params['text']
            course_id = query_params['course_id']
            group_id = query_params[
                'group_id'] if 'group_id' in query_params else None
            requested_page = params['page']
            total_results = response.get('total_results')
            corrected_text = response.get('corrected_text')
            # Record search result metric to allow search quality analysis.
            # course_id is already included in the context for the event tracker
            tracker.emit(
                'edx.forum.searched', {
                    'query': search_query,
                    'corrected_text': corrected_text,
                    'group_id': group_id,
                    'page': requested_page,
                    'total_results': total_results,
                })
            log.info(
                u'forum_text_search query="{search_query}" corrected_text="{corrected_text}" course_id={course_id} group_id={group_id} page={requested_page} total_results={total_results}'
                .format(search_query=search_query,
                        corrected_text=corrected_text,
                        course_id=course_id,
                        group_id=group_id,
                        requested_page=requested_page,
                        total_results=total_results))

        return utils.CommentClientPaginatedResult(
            collection=response.get('collection', []),
            page=response.get('page', 1),
            num_pages=response.get('num_pages', 1),
            thread_count=response.get('thread_count', 0),
            corrected_text=response.get('corrected_text', None))

예제 #22

0

파일 보기

def parse_shebang(s: str) -> str:
    """repos
    extract token from shebang like `#!/bin/sh`
    https://en.wikipedia.org/wiki/Shebang_(Unix)
    :param s: shebang
    :return: shebang token
    """
    script = s
    try:
        match = extract(s, REGEX_SHEBANG_FULL)
        script = match.group().split('/')[-1]
        pos = match.end()
        match = extract(s, REGEX_SHEBANG_WHITESPACE, pos=pos)
        pos = match.end()
        match = extract(s, REGEX_SHEBANG_NON_WHITESPACE, pos=pos)
        return extract(match.group(), compile(r'[^\d]+')).group(0)
    except ExtractException as e:
        return script

예제 #23

0

파일 보기

    def __init__(self, bug, hash):
        """
        Initialize comments

        :arg hash: Dictionary of comment details
        :arg bug: Instance of :class:`~bz_xmlrpc.classes.Bug` object

        :return: Instance of :class:`Comment`
        .. note::
            No need to use this directly. 
            Use :meth:`~bz_xmlrpc.classes.Bug.get_comments()`
        """
        self._hash = hash
        self.id = extract(hash, 'id', 'comment_id')
        self.author = extract(hash, 'email', 'author')
        self.bug = bug
        self.is_private = bool(extract(hash, 'is_private', 'isprivate'))
        self.text = extract(hash, 'text', 'body')
        self.time = to_datetime(extract(hash, 'time', 'bug_when'))

예제 #24

0

파일 보기

파일: classes.py 프로젝트: shreyankg/techilla

    def __init__(self, bug, hash):
        """
        Initialize comments

        :arg hash: Dictionary of comment details
        :arg bug: Instance of :class:`~bz_xmlrpc.classes.Bug` object

        :return: Instance of :class:`Comment`
        .. note::
            No need to use this directly. 
            Use :meth:`~bz_xmlrpc.classes.Bug.get_comments()`
        """
        self._hash = hash
        self.id = extract(hash, "id", "comment_id")
        self.author = extract(hash, "email", "author")
        self.bug = bug
        self.is_private = bool(extract(hash, "is_private", "isprivate"))
        self.text = extract(hash, "text", "body")
        self.time = to_datetime(extract(hash, "time", "bug_when"))

예제 #25

0

파일 보기

파일: plotter.py 프로젝트: kpj/SDEMotif

    def do_scatter(i, j, ax):
        """ Draw single scatter plot
        """
        xs, ys = utils.extract(i, j, steadies)
        ax.scatter(xs, ys)

        ax.set_xlabel(r"$S_%d$" % i)
        ax.set_ylabel(r"$S_%d$" % j)

        cc = utils.get_correlation(xs, ys)
        ax.set_title(r"Corr: $%.2f$" % cc)

예제 #26

0

파일 보기

 def __init__(self, hash):
     """
     Initialize
     """
     self._hash = hash
     if isinstance(hash, str):
         # Hack for searched bug groups
         self.name = hash
         self.ison = True
     else:
         self.bit = extract(hash, 'bit', 'id')
         self.name = extract(hash, 'name')
         self.description = extract(hash, 'description')
         self.ingroup = bool(extract(hash, 'ingroup'))
         self.ison = bool(extract(hash, 'ison'))
         self.mandatory = bool(extract(hash, 'mandatory'))
         self.othercontrol = bool(extract(hash, 'othercontrol'))
         self.direct = bool(extract(hash, 'direct'))
         self.isbuggroup = bool(extract(hash, 'isbuggroup'))
         self.userregexp = extract(hash, 'userregexp')

예제 #27

0

파일 보기

    def do_scatter(i, j, ax):
        """ Draw single scatter plot
        """
        xs, ys = utils.extract(i, j, steadies)
        ax.scatter(xs, ys)

        ax.set_xlabel(r'$S_%d$' % i)
        ax.set_ylabel(r'$S_%d$' % j)

        cc = utils.get_correlation(xs, ys)
        ax.set_title(r'Corr: $%.2f$' % cc)

예제 #28

0

파일 보기

파일: classes.py 프로젝트: shreyankg/techilla

 def __init__(self, hash):
     """
     Initialize
     """
     self._hash = hash
     if isinstance(hash, str):
         # Hack for searched bug groups
         self.name = hash
         self.ison = True
     else:
         self.bit = extract(hash, "bit", "id")
         self.name = extract(hash, "name")
         self.description = extract(hash, "description")
         self.ingroup = bool(extract(hash, "ingroup"))
         self.ison = bool(extract(hash, "ison"))
         self.mandatory = bool(extract(hash, "mandatory"))
         self.othercontrol = bool(extract(hash, "othercontrol"))
         self.direct = bool(extract(hash, "direct"))
         self.isbuggroup = bool(extract(hash, "isbuggroup"))
         self.userregexp = extract(hash, "userregexp")

예제 #29

0

파일 보기

파일: tm.py 프로젝트: wqlin/spider_practicing

    def parse_item(self,response):
        hxs=Selector(response)
        item_titles=extract(hxs,"//div[@id='J_ItemList']//p[@class='productTitle']/a/text()")
        top_id=extract_one(hxs,'//*[@id="J_CrumbSlideCon"]/li[2]/a/text()')
        type_id1=extract(hxs,'//*[@id="J_CrumbSlideCon"]//div[@class="crumbDrop j_CrumbDrop"]/a/text()')
        if type_id1 is not None:
            if len(type_id1) >1:
                type_id2=type_id1.split('/n')[-1]
            else:
                type_id2=''
            type_id1=type_id1.split('/n')[0]
            titles=[]
            title=''
            for t in item_titles:
                if not t.endswith('\n'):
                    title+=t.strip()
                elif t.endswith('\n'):
                    title+=t.strip()
                    if len(title)>5:
                        titles.append(title.strip())
                    title=''

            if len(titles)>19:
                for i,t in enumerate(titles):
                    if i<20:
                        good={
                            'mall': '1',
                            'rank': str(i+1),
                            'title': t.strip(),
                            'price': '0',
                            'turnover_index':'0',
                            'top_id': top_id.strip(),
                            'type_id1': type_id1.strip(),
                            'type_id2': type_id2.strip(),
                            'url': response.url
                        }

                        yield Good(good)

        for link in self.normal_url_extractor.extract_links(response):
            yield SplashRequest(link.url,callback=self.parse,args={'wait':0.5,'html':1,})

예제 #30

0

파일 보기

파일: tm.py 프로젝트: lzj3278/spider

    def parse_item(self, response):
        hxs = Selector(response)
        item_titles = extract(hxs, "//div[@id='J_ItemList']//p[@class='productTitle']/a/text()")
        top_id = extract_one(hxs, '//*[@id="J_CrumbSlideCon"]/li[2]/a/text()')
        type_id1 = extract(hxs, '//*[@id="J_CrumbSlideCon"]//div[@class="crumbDrop j_CrumbDrop"]/a/text()')
        if type_id1 is not None:
            if len(type_id1) > 1:
                type_id2 = type_id1.split('/n')[-1]
            else:
                type_id2 = ''
            type_id1 = type_id1.split('/n')[0]
            titles = []
            title = ''
            for t in item_titles:
                if not t.endswith('\n'):
                    title += t.strip()
                elif t.endswith('\n'):
                    title += t.strip()
                    if len(title) > 5:
                        titles.append(title.strip())
                    title = ''

            if len(titles) > 19:
                for i, t in enumerate(titles):
                    if i < 20:
                        good = {
                            'mall': '1',
                            'rank': str(i + 1),
                            'title': t.strip(),
                            'price': '0',
                            'turnover_index': '0',
                            'top_id': top_id.strip(),
                            'type_id1': type_id1.strip(),
                            'type_id2': type_id2.strip(),
                            'url': response.url
                        }

                        yield Good(good)

        for link in self.normal_url_extractor.extract_links(response):
            yield SplashRequest(link.url, callback=self.parse, args={'wait': 0.5, 'html': 1, })

예제 #31

0

파일 보기

파일: train-LSTM.py 프로젝트: olivesgatech/SEG-2020-Spatiotemporal-modeling-for-seismic-inversion

def preprocess(no_wells):
    """Function initializes data, performs standardization, and train test split
    
    Parameters:
    ----------
    no_wells : int,
        number of evenly spaced wells and seismic samples to be evenly sampled 
        from seismic section.

        
    Returns
    -------
    seismic : array_like, shape(num_traces, depth samples)
        2-D array containing seismic section 
        
    model : array_like, shape(num_wells, depth samples)
        2-D array containing model section 

    """

    # get project root directory
    project_root = os.getcwd()

    if ~os.path.isdir(
            'data'):  # if data directory does not exists then extract
        extract('data.zip', project_root)

    # Load data
    seismic = np.load(join('data',
                           'poststack_seam_seismic.npy')).squeeze()[:, 50:]
    seismic = seismic[::2, :]

    # Load targets and standardize data
    model = np.load(join('data', 'seam_elastic_model.npy'))[::3, :, ::2][:, :,
                                                                         50:]
    model = model[:, 0, :] * model[:, 2, :]

    # standardize
    seismic, model = standardize(seismic, model, no_wells)

    return seismic, model

예제 #32

0

파일 보기

파일: main.py 프로젝트: HugoLebeau/image-colorization

 def criterion(output, target):
     prop, box = output
     target_instance = extract(target.cpu(), box, resize)
     loss, ok = 0., False
     for i, img in enumerate(target_instance):
         if prop[i] is not None:
             z_target = ab2z(img)
             loss += MCE(prop[i].cpu(), z_target, weights=w[z_target.argmax(dim=-1)]).mean()
             ok = True
     if not ok:
         loss = torch.tensor(0., requires_grad=True)
     return loss

예제 #33

0

파일 보기

    def __init__(self, mapp, img, K):
        self.K = K
        self.Kinv = np.linalg.inv(self.K)
        self.pose = np.eye(4)
        self.h, self.w = img.shape[0:2]

        self.kpus, self.des = extract(img)
        self.kps = normalize(self.Kinv, self.kpus)
        self.pts = [None] * len(self.kps)

        self.id = len(mapp.frames)
        mapp.frames.append(self)

예제 #34

0

파일 보기

    def fetch(self):
        """Download and extract the dataset."""

        home = self.home()
        if not path.exists(home):
            os.makedirs(home)

        # download archives
        archive_filenames = []
        for key, archive in self.ARCHIVES.iteritems():
            url = archive['url']
            sha1 = archive['sha1']
            basename = path.basename(url)
            archive_filename = path.join(home, basename)
            if not path.exists(archive_filename):
                download(url, archive_filename, sha1=sha1)
            archive_filenames += [(archive_filename, sha1)]
            self.ARCHIVES[key]['archive_filename'] = archive_filename

        # extract them
        for name, archive in self.ARCHIVES.iteritems():
            archive_dir = path.join(home, name)
            if os.path.exists(archive_dir):
                continue
            url = archive['url']
            sha1 = archive['sha1']
            archive_filename = archive['archive_filename']
            extract(archive_filename, home, sha1=sha1, verbose=True)
            # move around stuff if needed
            if 'moves' in archive:
                for move in archive['moves']:
                    src = self.home(move['source'])
                    dst = self.home(move['destination'])
                    # We can't use shutil here since the destination folder
                    # may already exist. Fortunately the distutils can help
                    # us here (see standard library).
                    dir_util.copy_tree(src, dst)
                    dir_util.remove_tree(src)

예제 #35

0

파일 보기

파일: oxford_pet.py 프로젝트: pierreg/skdata

    def fetch(self):
        """Download and extract the dataset."""

        home = self.home()
        if not path.exists(home):
            os.makedirs(home)

        # download archives
        archive_filenames = []
        for key, archive in self.ARCHIVES.iteritems():
            url = archive['url']
            sha1 = archive['sha1']
            basename = path.basename(url)
            archive_filename = path.join(home, basename)
            if not path.exists(archive_filename):
                download(url, archive_filename, sha1=sha1)
            archive_filenames += [(archive_filename, sha1)]
            self.ARCHIVES[key]['archive_filename'] = archive_filename

        # extract them
        for name, archive in self.ARCHIVES.iteritems():
            archive_dir = path.join(home, name)
            if os.path.exists(archive_dir):
                continue
            url = archive['url']
            sha1 = archive['sha1']
            archive_filename = archive['archive_filename']
            extract(archive_filename, home, sha1=sha1, verbose=True)
            # move around stuff if needed
            if 'moves' in archive:
                for move in archive['moves']:
                    src = self.home(move['source'])
                    dst = self.home(move['destination'])
                    # We can't use shutil here since the destination folder
                    # may already exist. Fortunately the distutils can help
                    # us here (see standard library).
                    dir_util.copy_tree(src, dst)
                    dir_util.remove_tree(src)

예제 #36

0

파일 보기

파일: caltech.py 프로젝트: sohailkhanmarwat/scikits.data

    def fetch(self, download_if_missing=True):
        """Download and extract the dataset."""

        home = self.home()

        if not download_if_missing:
            raise IOError("'%s' exists!" % home)

        # download archive
        url = self.URL
        sha1 = self.SHA1
        basename = path.basename(url)
        archive_filename = path.join(home, basename)
        if not path.exists(archive_filename):
            if not download_if_missing:
                return
            if not path.exists(home):
                os.makedirs(home)
            download(url, archive_filename, sha1=sha1)

        # extract it
        if not path.exists(self.home(self.SUBDIR)):
            extract(archive_filename, home, sha1=sha1, verbose=True)

예제 #37

0

파일 보기

파일: fbo.py 프로젝트: yamins81/skdata

    def fetch(self, download_if_missing=True):
        """Download and extract the dataset."""

        home = self.home()

        if not download_if_missing:
            raise IOError("'%s' exists!" % home)

        # download archive
        url = self.URL
        sha1 = self.SHA1
        basename = path.basename(url)
        archive_filename = path.join(home, basename)
        if not path.exists(archive_filename):
            if not download_if_missing:
                return
            if not path.exists(home):
                os.makedirs(home)
            download(url, archive_filename, sha1=sha1)

        # extract it
        if not path.exists(self.home(self.SUBDIR)):
            extract(archive_filename, home, sha1=sha1, verbose=True)

예제 #38

0

파일 보기

파일: tm.py 프로젝트: edwardwbli/spider_practicing

    def parse_item(self,response):
        hxs=Selector(response)
        search_condition=extract_one(hxs,'//*[@id="J_CrumbSearchInuput"]/@value')
        item_titles=extract(hxs,"//div[@id='J_ItemList']//p[@class='productTitle']/a/text()")
        top_id=extract_one(hxs,'//*[@id="J_CrumbSlideCon"]/li[2]/a/text()')
        type_id1=extract_one(hxs,'//*[@id="J_CrumbSlideCon"]//div[@class="crumbDrop j_CrumbDrop"]/a/text()')
        if type_id1 is not None and search_condition is not None:
            type_id1=type_id1.split('/n')[0]
            titles=[]
            title=''
            for t in item_titles:
                if not t.endswith('\n'):
                    title+=t.strip()
                elif t.endswith('\n'):
                    title+=t.strip()
                    if len(title)>5:
                        titles.append(title.strip())
                    title=''

            if len(titles)>19 and search_condition!=type_id1:
                for i,t in enumerate(titles):
                    if i<20:
                        good={
                            'mall': '1',
                            'rank': str(i+1),
                            'title': t.strip(),
                            'price': '0',
                            'turnover_index':'0',
                            'top_id': top_id.strip(),
                            'type_id1': type_id1.strip(),
                            'type_id2': search_condition.strip(),
                            'url': response.url
                        }

                        yield Good(good)

        for link in self.needed_url_extractor.extract_links(response):
            if 'industryCatId' and 'cat' in link.url and 'post_fee' and 'brand' not in link.url:
                url = re.sub(r'sort=.*&', 'sort=d&', link.url)
                url = re.sub(r'search_condition=.*&', 'search_condition=7', url)
                url=re.sub(r'miaosha=.*&','miaosha=0&',url)
                url=re.sub(r'wwonline=.*&','wwonline=0&',url)
                yield SplashRequest(url, callback=self.parse_item, args={'wait': 0.5, 'html': 1,})

예제 #39

0

파일 보기

 def get_and_run_installer(self, installer):
     installer_exe = os.path.abspath(os.path.basename(installer))
     if not os.path.isfile(installer_exe):
         print 'Downloading', installer, '..',
         installer_exe = download(installer)
         if installer_exe is None:
             print 'Download FAILED'
             return False
         print 'DONE'
     if os.path.splitext(installer_exe)[-1] in ['.zip']:
         install_path = self.get_install_path(installer_exe)
         if install_path is not None:
             if not os.path.isdir(install_path):
                 os.makedirs(install_path)
             return bool(extract(installer_exe, install_path))
     elif not start_installer(installer_exe):
         print 'Failed to start', installer_exe
         return False
     return True

예제 #40

0

파일 보기

파일: gui.py 프로젝트: pearu/iocbio

 def get_and_run_installer (self, installer):
     installer_exe = os.path.abspath(os.path.basename (installer))
     if not os.path.isfile (installer_exe):
         print 'Downloading', installer, '..',
         installer_exe = download (installer)
         if installer_exe is None:
             print 'Download FAILED'
             return False
         print 'DONE'
     if os.path.splitext(installer_exe)[-1] in ['.zip']:
         install_path = self.get_install_path(installer_exe)
         if install_path is not None:
             if not os.path.isdir (install_path):
                 os.makedirs (install_path)
             return bool(extract(installer_exe, install_path))
     elif not start_installer(installer_exe):
         print 'Failed to start', installer_exe
         return False
     return True

예제 #41

0

파일 보기

def extract_triples(hbt_model, save_weights_path, path, author, subject_model,
                    object_model, tokenizer, id2rel):
    workbook = xlwt.Workbook(encoding='utf-8')
    ws = workbook.add_sheet('sheet1', cell_overwrite_ok=True)
    ws.write(0, 0, "head")
    ws.write(0, 1, "tail")
    ws.write(0, 2, "relation")

    hbt_model.load_weights(save_weights_path)
    triples = extract(path, subject_model, object_model, tokenizer, id2rel)
    count = 0

    triple_str = ""
    for triple_list in triples:
        for triple in triple_list:
            count += 1
            ws.write(count, 0, triple[0])
            ws.write(count, 1, triple[1])
            ws.write(count, 2, triple[2])
    workbook.save(path + author + ".xls")

예제 #42

0

파일 보기

파일: base.py 프로젝트: shreyankg/techilla

    def __init__(self,**kwargs):
        """
        Initialize a Bugzilla instance.
        
        Optional Arguments:
        -------------------
        url                     : The Bugzilla URL. 
                May or maynot end with /xmlrpc.cgi.
                If does not end with /xmlrpc.cgi, it will be assumed.
                If not provided, value of BUGZILLA_URL will be defaulted to.
        
        cookie_jar|cookiejar    : cookielib.CookieJar/MozillaCookieJar object.
        user|username|login     : Bugzilla login, usually an email id.
        password|passwd           : Password for bugzilla
        http_proxy|proxy        : String specifying the HTTP proxy of the
        bypass                  : boolean value, asks client to bypass 
                                    password auth and use cookies if present
                client's connection.
                Usually of the form server:port or http://server:port

        """
        # Initialize public attributes for unlogged unstance
        self.user_agent = USER_AGENT
        self.logged_in = False
        self.user_id = None

        self._init_private_data()
        # Extract provided values or default
        self._cookiejar = extract(kwargs, 'cookie_jar', 'cookiejar')
            
        self.url = extract(kwargs, 'url') or BUGZILLA_URL

        self.user = extract(kwargs, 'user', 'username', 'login') or ''
        self.password = extract(kwargs, 'password', 'passwd') or ''

        self.http_proxy = extract(kwargs, 'http_proxy', 'proxy') or ''
        self.bypass = extract(kwargs, 'bypass') or ''

        cookie_dir = extract(kwargs, 'cookie_dir') or COOKIE_DIR
        if not os.path.exists(cookie_dir):
            os.mkdir(cookie_dir)
        self.cookiefile = os.path.join(cookie_dir, '%s.cookie' % self.user)

        self.connect()

예제 #43

0

파일 보기

파일: augment_data.py 프로젝트: tmonnin/bug_detector

def wrong_identifier(if_ast: dict, code, code_identifier_lst):
    code_condition_padded = utils.extract(if_ast["test"]["loc"],
                                          code,
                                          padding=5)
    condition_identifier_lst = []
    utils.dict_visitor(if_ast["test"], identifiers=condition_identifier_lst)
    if len(condition_identifier_lst):
        identifier_to_augment = random.choice(condition_identifier_lst)
        # TODO identifier must stand alone?
        for identifier in code_identifier_lst:
            identifier_start = identifier["loc"]["start"]["line"]
            augment_start = identifier_to_augment["loc"]["start"]["line"]
            if identifier_start < (
                    augment_start -
                    5) and identifier["name"] not in code_condition_padded:
                # TODO choose most similar identifier
                # TODO near neighborhood could be feasible
                identifier_to_augment["name"] = identifier["name"]
                random.shuffle(code_identifier_lst)
                return True

예제 #44

0

파일 보기

파일: model.py 프로젝트: cadmusthefounder/rafiki_lifelong

    def predict(self, F, data_info, time_info):
        '''
        This function should provide predictions of labels on (test) data.
        Make sure that the predicted values are in the correct format for the scoring
        metric. For example, binary classification problems often expect predictions
        in the form of a discriminant value (if the area under the ROC curve it the metric)
        rather that predictions of the class labels themselves. 
        The function predict eventually returns probabilities or continuous values.
        '''

        info_dict = extract(data_info, time_info)
        print_time_info(info_dict)

        if params['algo'] == Algo.OLD_CODE:
            return self.mdl.predict(F, data_info, time_info)
        elif params['algo'] == Algo.ORIGINAL:
            return self._original_predict(F, info_dict)
        elif params['algo'] == Algo.FACEBOOK_LR:
            return self._facebook_lr_predict(F, info_dict)
        elif params['algo'] == Algo.BASIC:
            return self._basic_predict(F, info_dict)

예제 #45

0

파일 보기

 def on_press(self, event):
     value = self.text_ctrl.GetValue()
     if not value:
         print("You didn't enter anything!")
     else:
         self.text_ctrl.Hide()
         png = wx.Image('img/whatever.png',
                        wx.BITMAP_TYPE_ANY).ConvertToBitmap()
         wx.StaticBitmap(self, -1, png, (0, 0),
                         (png.GetWidth(), png.GetHeight()))
         if os.path.exists("result.json"):
             os.remove("result.json")
         wordlist = utils.extract(value)
         words = ",".join(wordlist)
         path = utils.getPath()
         utils.crawl(words)
         output = utils.process()
         utils.writelist(output, path)
         png = wx.Image('img/finish.png',
                        wx.BITMAP_TYPE_ANY).ConvertToBitmap()
         wx.StaticBitmap(self, -1, png, (0, 0),
                         (png.GetWidth(), png.GetHeight()))

예제 #46

0

파일 보기

파일: gui_python.py 프로젝트: tauhideee/iocbio

 def get_and_install_source(self, installer):
     installer_file = os.path.abspath(os.path.basename (installer))
     if not os.path.isfile (installer_file):
         print 'Downloading', installer, '..',
         installer_file = download (installer)
         if installer_file is None:
             print 'Download FAILED'
             return False
         print 'DONE'
     install_path = self.get_install_path(installer_file)
     if install_path is not None:
         if not os.path.isdir (install_path):
             os.makedirs (install_path)
     else:
         install_path = '.'
     content = extract(installer_file, install_path)
     if not content:
         return False
     cwd = install_path
     for p in content:
         if os.path.isdir(p):
             cwd = p
             break
     return self.install_source(cwd)

예제 #47

0

파일 보기

파일: gui.py 프로젝트: pearu/iocbio

 def get_and_install_source(self, source):
     source_file = os.path.abspath(os.path.basename (source))
     if not os.path.isfile (source_file):
         print 'Downloading', source, '..',
         source_file = download (source)
         if source_file is None:
             print 'Download FAILED'
             return False
         print 'DONE'
     source_path = self.get_source_path(source_file)
     if source_path is not None:
         if not os.path.isdir (source_path):
             os.makedirs (source_path)
     else:
         source_path = '.'
     content = extract(source_file, source_path)
     if not content:
         return False
     cwd = source_path
     for p in content:
         if os.path.isdir(p):
             cwd = p
             break
     return self.install_source(os.path.abspath(cwd))

예제 #48

0

파일 보기

파일: peg.py 프로젝트: rafaelmartins/bluster

def install():
    fetch('http://piumarta.com/software/peg/peg-%(peg)s.tar.gz')
    extract('peg-%(peg)s.tar.gz')
    make('peg-%(peg)s', 'CC=clang')
    make('peg-%(peg)s', 'PREFIX=%s install' % env.prefix)

예제 #49

0

파일 보기

파일: createapp.py 프로젝트: mattorb/Ale

    def execute(self, args=None):
        validTemplateNames = ['helloworld', 'helloworldwebapp', 'pale'] + customStarterApps
        if not args:
            print self.shorthelp
            print 'available app templates:'
            print 'helloworld           -- simple helloworld app'
            print 'helloworldwebapp     -- simple helloworld app using webapp fmk'
            print 'xmppsendandreply     -- simple xmpp (instant message) send and reply'
            print 'emailreceive         -- simple e-mail receive example'
            print 'emailsendui          -- simple e-mail send example'
            print 'deferredemail        -- simple deferred lib queued e-mail send example'
            print 'starter_pale         -- a basic project layout with buckets for most things you could want and an import fix built in'
        else:
            templateName = args[0].lower()

            if templateName not in validTemplateNames:
                print 'Unknown app name %s' % args[0]
                return
            if templateName in customStarterApps:
                tarballurl = 'http://github.com/mpstx/appengine_py_%s/tarball/master' % templateName
                tmpPath = join(join(alePath('tmp'), templateName + '.tar.gz'))
                download(tarballurl, '%s.tar.gz' % templateName)
                logging.info("Extracting %s here" % templateName)
                os.system('tar xzf %s --strip 1 -C .' % tmpPath)
            elif templateName == 'helloworld':
                logging.info('creating ./helloworld.py')
                FILE = open('./helloworld.py', 'w')
                FILE.write("""
print 'Content-Type: text/plain'
print ''
print 'Hello, world!  This is a bare bones app engine application'
""")
                FILE.close()

                logging.info('creating ./app.yaml')
                FILE = open('./app.yaml', 'w')
                FILE.write("""
application: helloworld
version: 1
runtime: python
api_version: 1

handlers:
- url: /.*
  script: helloworld.py        
            """)
                FILE.close()
            elif templateName == 'helloworldwebapp':
                logging.info('creating ./helloworld.py')
                FILE = open('./helloworld.py', 'w')
                FILE.write("""
from google.appengine.ext import webapp
from google.appengine.ext.webapp.util import run_wsgi_app

class MainPage(webapp.RequestHandler):
    def get(self):
        self.response.headers['Content-Type'] = 'text/plain'
        self.response.out.write('Hello, webapp World!')

application = webapp.WSGIApplication(
                                     [('/', MainPage)],
                                     debug=True)

def main():
    run_wsgi_app(application)

if __name__ == "__main__":
    main()
""")
                FILE.close()

                logging.info('creating ./app.yaml')
                FILE = open('./app.yaml', 'w')
                FILE.write("""
application: helloworldwebapp
version: 1
runtime: python
api_version: 1

handlers:
- url: /.*
  script: helloworld.py        
""")
                FILE.close()
            else:
                pkgPath = join(join(alePath('recipes_installed'), 'createapp'), 'pkgs')
                templateZipPath = join(pkgPath, '%s.zip' % templateName)

                if os.path.exists(templateZipPath):
                    extract(templateZipPath, '.')
                    gitignore('tmp')
                else:
                    logging.error('Could not find template: %s' % templateName)
                    return

            return 0

예제 #50

0

파일 보기

파일: fcgi.py 프로젝트: rafaelmartins/bluster

def install():
    fetch("http://www.fastcgi.com/dist/fcgi-%(fcgi)s.tar.gz")
    extract("fcgi-%(fcgi)s.tar.gz")
    configure("fcgi-%(fcgi)s", ["--prefix=%s" % env.prefix])
    make("fcgi-%(fcgi)s")
    make("fcgi-%(fcgi)s", "install")

예제 #51

0

파일 보기

파일: get-daily.py 프로젝트: rewat1984/lsde2016-weather-twins

import sys
import time
import math
import utils
from pyspark.context import SparkContext

if (len(sys.argv) > 1):
	hdfs_file_path = "/user/lsde02/data/%s/*.gz" % sys.argv[1]
else:
	hdfs_file_path = "/user/lsde02/data/1901/*.gz"
hdfs_results_path = "/user/lsde02/results/"
start_time = time.strftime("%Y-%m-%d-%H-%M-%S")

sc = SparkContext()
context = sc.textFile(hdfs_file_path)
stations = context.flatMap(lambda x: [utils.extract(record) for record in x.splitlines()])
stations = stations.filter(lambda x: 'longitude' in x[1] and 'latitude' in x[1])
stations.persist()

# Do computations on month level
month_data = stations.map(lambda x:((x[0][0], x[0][1], x[0][3]), (x[1]['temp'], x[1]['wind-speed'], x[1]['sky-condition'], x[1]['visibility'], \
				x[1]['wind-direction'])))
month_data = month_data.combineByKey(lambda value: (x['temp'], 1, x['wind-speed'], 1, x['sky-condition'], 1, x['visibility'], 1, \
				math.sin(x['wind-direction'])*math.pi/180., math.cos(x['wind-direction']*math.pi/180.)),\
				lambda x, value: (x[0] + value[0], value[1] + 1, x[2]+value[2], 1 + value[3], x[4] + value[4], 1 + value[5],\
					x[6]+value[6], 1 + value[7], x[8] + value[8], x[9] + value[9]),\
				lambda x, y: (x[0]+y[0], x[1]+y[1], x[2]+y[2], x[3]+y[3], x[4]+y[4], x[5]+y[5], x[6]+y[6], x[7]+y[7], x[8]+y[8]\
					x[9]+y[9])) 
month_data = month_data.map(lambda (label, (x1, c1, x2, c2, x3, c3, x4, c4, x5a, x5b)): (label, (x1/c1, x2/c2, x3/c3, x4/c4, math.atan2(x5a, x5b))))
month_data = month_data.coalesce(1, True)
month_avg.saveAsTextFile("%s%s-%s" % (hdfs_results_path, start_time, 'all'))

예제 #52

0

파일 보기

파일: shell.py 프로젝트: mattorb/Ale

 def install(self, args=None):
     dlFile = download('http://ipython.scipy.org/dist/0.10/ipython-0.10.tar.gz', 'ipython-0.10.tar.gz')
     extract(dlFile, extractPath)

예제 #53

0

파일 보기

파일: classes.py 프로젝트: shreyankg/techilla

 def _be(self, *keys):
     """
     Private conviniance wrapper around extract. 
     Hash defaults to self._hash
     """
     return extract(self._hash, *keys)

예제 #54

0

파일 보기

파일: classes.py 프로젝트: shreyankg/techilla

    def __init__(self, bug, hash):
        """
        Initialize attachments

        :arg hash: Dictionary of attachment details
        :arg bug: Instance of :class:`~bz_xmlrpc.classes.Bug` object

        :return: Instance of :class:`Attachment`
        .. note::
            No need to use this directly. 
            Use :meth:`~bz_xmlrpc.classes.Bug.get_attachments()`

        """
        self._hash = hash
        self.id = extract(hash, "id", "attach_id")
        self.content_type = extract(hash, "content_type", "mimetype")
        self.creation_time = to_datetime(extract(hash, "creation_time", "creation_ts"))
        self.attacher = extract(hash, "attacher", "submitter_id")
        self.description = extract(hash, "description")
        self.file_name = extract(hash, "file_name", "filename")
        self.bug = bug
        self.is_private = bool(extract(hash, "is_private", "isprivate"))
        self.is_obsolete = bool(extract(hash, "is_obsolete", "isobsolete"))
        self.is_patch = bool(extract(hash, "is_patch", "ispatch"))
        self.is_url = bool(extract(hash, "is_url", "isurl"))
        self.last_change_time = to_datetime(extract(hash, "last_change_time", "modification_time"))

        if self.id and self.bug:
            self.fetch_url = bug.bz.url.replace("xmlrpc.cgi", "attachment.cgi?id=%s" % self.id)

예제 #55

0

파일 보기

파일: pyautotest.py 프로젝트: mattorb/Ale

    def install(self, args=None):
        dlFile = download('http://github.com/ishikawa/modipyd/zipball/release-1-1-rc1', 'ishikawa-modipyd.zip')
        extract(dlFile, extractPath)

        os.system('chmod +x %s' % join(join(join(extractPath, 'ishikawa-modipyd-1516eeb'), 'bin'), 'modipyd'))
        os.system('chmod +x %s' % join(join(join(extractPath, 'ishikawa-modipyd-1516eeb'), 'bin'), 'pyautotest'))