Python debug示例，zenlog.log.debug Python示例

示例#1

0

显示文件

def parse_message(message: Dict) -> Tuple[str, Any]:
    """
    Parses a message and ensures it has a single variant.

    Args:
        message: The message to parse

    Returns: The message variant and data content

    Raises:
        IndexError: If the message doesn't have exactly 1 key

    """

    keys: List[str] = list(message)

    if len(keys) > 1:
        raise IndexError

    variant: str = keys[0]
    data: Any = message[variant]

    log.debug(f"Parsed a message with variant={variant} and data={data}")

    return (variant, data)

示例#2

0

显示文件

def create_static_pages(output_dir):
    '''Generates a static page from each of the files contained in
    `content/pages/`.'''
    template = env.get_template("page.html")
    for f in glob.glob("content/pages/*.md"):
        page_name = f.split("/")[-1].replace(".md", "")
        target_dir = os.path.join(output_dir, "%s/" % page_name)
        if not os.path.exists(target_dir):
            os.makedirs(target_dir)
        target = os.path.join(target_dir, "index.html")
        context = {}
        md_content = codecs.open(f, 'r', 'utf-8').read()
        context["content"] = markdown.markdown(md_content,
                                               output_format="html5",
                                               encoding="UTF-8")
        contents = template.render(**context)
        f = codecs.open(target, 'w', 'utf-8')
        f.write(contents)
        f.close()
        log.debug("Created static page '%s'." % page_name)

        # Content images
        if os.path.exists("content/media"):
            media_dir = os.path.join(output_dir, "media")
            if os.path.exists(media_dir):
                shutil.rmtree(media_dir)
            shutil.copytree("content/media", media_dir)

示例#3

0

显示文件

文件： datacentral.py 项目： centraldedados/centraldedados.pt

def create_static_pages(output_dir):
    '''Generates a static page from each of the files contained in
    `content/pages/`.'''
    template = env.get_template("page.html")
    for f in glob.glob("content/pages/*.md"):
        page_name = f.split("/")[-1].replace(".md", "")
        target_dir = os.path.join(output_dir, "%s/" % page_name)
        if not os.path.exists(target_dir):
            os.makedirs(target_dir)
        target = os.path.join(target_dir, "index.html")
        md_content = markdown.markdown(codecs.open(f, 'r', 'utf-8').read(), output_format="html5", encoding="UTF-8")
        context = dict({"content": md_content,
                        "pagename": page_name},
                       **global_context)
        contents = template.render(**context)
        f = codecs.open(target, 'w', 'utf-8')
        f.write(contents)
        f.close()
        log.debug("Created static page '%s'." % page_name)

        # Content images
        if os.path.exists("content/media"):
            media_dir = os.path.join(output_dir, "media")
            if os.path.exists(media_dir):
                shutil.rmtree(media_dir)
            shutil.copytree("content/media", media_dir)

示例#4

0

显示文件

文件： sybl_client.py 项目： Sybl-ml/mallus

    def _read_message(self) -> Dict:
        size_bytes = self._sock.recv(4)

        if size_bytes == b"":
            log.error("Empty Message: Closing")
            self._sock.close()
            sys.exit(1)

        size = struct.unpack(">I", size_bytes)[0]
        log.debug("Message size: %d", size)

        if size > 4096:
            remaining_size = size
            buf: List[int] = []

            while remaining_size > 0:
                chunk = self._sock.recv(4096)
                buf.extend(chunk)

                remaining_size -= 4096

            return json.loads(bytes(buf))

        message: Dict = json.loads(self._sock.recv(size))
        # Error handle
        if "Server" in message.keys():
            # There has been an error in communication
            if "text" in message["Server"].keys():
                payload: Dict = json.loads(message["Server"]["text"])
                code = message["Server"]["code"]
                self._handle_server_error(code, payload)

        log.info(message)
        return message

示例#5

0

显示文件

文件： tests.py 项目： skwerlman/python-zenlog

def test_output():
    # All of these just need to output without errors.
    from zenlog import log
    log.debug("A quirky message only developers care about")
    log.info("Curious users might want to know this")
    log.warn("Something is wrong and any user should be informed")
    log.warning("Something is wrong and any user should be informed")
    log.error("Serious stuff, this is red for a reason")
    log.critical("OH NO everything is on fire")
    log.c("OH NO everything is on fire")
    log.crit("OH NO everything is on fire")

示例#6

0

显示文件

def create_api(packages, output_dir, repo_dir):
    '''Generates a static API containing all the datapackage.json of the containing datasets.
    Accepts a list of pkg_info dicts, which are generated with the
    process_datapackage function.'''
    all_metadata = []
    for pkg_info in packages:
        pkg_dir = os.path.join(repo_dir, pkg_info['name'])
        all_metadata.append(json.loads(open(os.path.join(pkg_dir, "datapackage.json")).read()))
    with open(os.path.join(output_dir, 'api.json'), 'w') as api_file:
        json.dump(all_metadata, api_file)
    log.debug("Created api.json.")

示例#7

0

显示文件

文件： tests.py 项目： hanxianfeng123/python-zenlog

def test_output():
    # All of these just need to output without errors.
    from zenlog import log
    log.debug("A quirky message only developers care about")
    log.info("Curious users might want to know this")
    log.warn("Something is wrong and any user should be informed")
    log.warning("Something is wrong and any user should be informed")
    log.error("Serious stuff, this is red for a reason")
    log.critical("OH NO everything is on fire")
    log.c("OH NO everything is on fire")
    log.crit("OH NO everything is on fire")

示例#8

0

显示文件

文件： datacentral.py 项目： centraldedados/centraldedados.pt

def create_api(packages, output_dir, repo_dir):
    '''Generates a static API containing all the datapackage.json of the containing datasets.
    Accepts a list of pkg_info dicts, which are generated with the
    process_datapackage function.'''
    all_metadata = []
    for pkg_info in packages:
        pkg_dir = os.path.join(repo_dir, pkg_info['name'])
        all_metadata.append(json.loads(open(os.path.join(pkg_dir, "datapackage.json")).read()))
    with open(os.path.join(output_dir, 'api.json'), 'w') as api_file:
        json.dump(all_metadata, api_file)
    log.debug("Created api.json.")

示例#9

0

显示文件

文件： datacentral.py 项目： centraldedados/centraldedados.pt

def create_index_page(packages, output_dir):
    '''Generates the index page with the list of available packages.
    Accepts a list of pkg_info dicts, which are generated with the
    process_datapackage function.'''
    template = env.get_template("list.html")
    target = "index.html"
    # Merge global context with local variables (http://stackoverflow.com/a/1552420/122400)
    context = dict({"datapackages": packages}, **global_context)
    contents = template.render(**context)
    f = codecs.open(os.path.join(output_dir, target), 'w', 'utf-8')
    f.write(contents)
    f.close()
    log.debug("Created index.html.")

示例#10

0

显示文件

def create_index_page(packages, output_dir):
    '''Generates the index page with the list of available packages.
    Accepts a list of pkg_info dicts, which are generated with the
    process_datapackage function.'''
    template = env.get_template("list.html")
    target = "index.html"
    # Merge global context with local variables (http://stackoverflow.com/a/1552420/122400)
    context = dict({"datapackages": packages}, **global_context)
    contents = template.render(**context)
    f = codecs.open(os.path.join(output_dir, target), 'w', 'utf-8')
    f.write(contents)
    f.close()
    log.debug("Created index.html.")

示例#11

0

显示文件

文件： datacentral.py 项目： centraldedados/centraldedados.pt

def create_contact_page(output_dir, contact_email=""):
    '''Creates a contact form page.'''
    template = env.get_template("contact.html")
    target_dir = os.path.join(output_dir, "contacto/")
    if not os.path.exists(target_dir):
        os.makedirs(target_dir)
    target = os.path.join(target_dir, "index.html")
    context = {}
    context["contact_email"] = contact_email
    contents = template.render(**context)
    f = codecs.open(target, 'w', 'utf-8')
    f.write(contents)
    f.close()
    log.debug("Created contact page.")

示例#12

0

显示文件

文件： datacentral.py 项目： centraldedados/datacentral

def create_index_page(packages, output_dir):
    '''Generates the index page with the list of available packages.
    Accepts a list of pkg_info dicts, which are generated with the
    process_datapackage function.'''
    template = env.get_template("list.html")
    target = "index.html"
    context = {"datapackages": packages,
               "welcome_text": markdown.markdown(codecs.open("content/welcome_text.md", 'r', 'utf-8').read(), output_format="html5", encoding="UTF-8"),
               }
    contents = template.render(**context)
    f = codecs.open(os.path.join(output_dir, target), 'w', 'utf-8')
    f.write(contents)
    f.close()
    log.debug("Created index.html.")

示例#13

0

显示文件

def create_contact_page(output_dir, contact_email=""):
    '''Creates a contact form page.'''
    template = env.get_template("contact.html")
    target_dir = os.path.join(output_dir, "contact/")
    if not os.path.exists(target_dir):
        os.makedirs(target_dir)
    target = os.path.join(target_dir, "index.html")
    context = {}
    context["contact_email"] = contact_email
    contents = template.render(**context)
    f = codecs.open(target, 'w', 'utf-8')
    f.write(contents)
    f.close()
    log.debug("Created contact page.")

示例#14

0

显示文件

def getpage(url):
    if not os.path.exists('cache'):
        log.info('Creating new cache/ folder.')
        os.mkdir('cache')
    url_hash = hash(url)
    cache_file = 'cache/' + url_hash

    if os.path.exists(cache_file):
        log.debug("Cache hit for %s" % url)
        page = file_get_contents(cache_file)
    else:
        log.debug("Cache miss for %s" % url)
        page = urllib.urlopen(url).read()
        file_put_contents(cache_file, page, utf8=True)
    return page

示例#15

0

显示文件

文件： tools.py 项目： mamrehn/interactive_image_segmentation_evaluation

def load_ground_truth_data(base_path: Union[None, Path, str] = None):
    if base_path is None:
        base_path = Path(__file__).parents[1].joinpath('webapp', 'data')
    else:
        base_path = Path(base_path)

    images_ids = sorted(p.stem for p in base_path.iterdir()
                        if p.is_file() and not p.stem.endswith('_gt'))
    images = []
    ground_truths = []

    for img_id in images_ids:
        img_name = str(base_path.joinpath(f'{img_id}.png'))
        gt_name = str(base_path.joinpath(f'{img_id}_gt.png'))

        # Read image to segment
        try:
            img_data = plt.imread(img_name)
            if np.amax(img_data) <= 1:
                log.debug(
                    f'Image {img_id}.png value range was converted from [0, 1] to [0, 255]'
                )
                img_data *= 255
            img_data = img_data.astype(np.uint8, copy=False)
        except FileNotFoundError:
            log.warning(f'Skipping since no file found with name {img_name}')
            images.append(None)
            ground_truths.append(None)
            continue

        if 2 < img_data.ndim:
            img_data = np.rint(
                ImageTools.rgb_to_grayscale(img_data.astype(
                    np.float64))).astype(np.uint8)
            assert np.amax(img_data) > 1
        images.append(img_data)

        # Read GT image
        gt_data = plt.imread(gt_name)
        if gt_data.ndim == 3:
            gt_data = gt_data[:, :, 0]
        ground_truths.append(gt_data > 0)

    return {
        img_id: (img, gt)
        for img_id, img, gt in zip(images_ids, images, ground_truths)
        if img is not None
    }

示例#16

0

显示文件

def process_dep(i):
    log.debug("Trying ID %d..." % i)

    url = FORMATTER_URL_IL % i
    soup = BeautifulSoup(getpage(url), "lxml")
    title = soup.find('span', id=RE_TITLE)
    if title:
        summary = soup.find('span', id=RE_SUMMARY)
        doc_url = soup.find('a', id=RE_DOCLINK)
        pdf_url = soup.find('a', id=RE_PDFLINK)
        eventdates = soup.findAll('span', id=RE_EVENTDATE)
        eventtypes = soup.findAll('span', id=RE_EVENTTYPE)
        eventinfos = soup.findAll('div', id=RE_EVENTINFO)
        dist_date = soup.find('span', id=RE_DISTDATE)
        authors = soup.findAll('a', id=RE_AUTHOR)
        parlgroup = soup.find('span', id=RE_PARLGROUP)

        row = {'title': title.text,
               'summary': summary.text,
               'id': i,
               'url': url,
               'authors': [a.text for a in authors]}

        if doc_url:
            row['doc_url'] = doc_url['href']
        if pdf_url:
            row['pdf_url'] = pdf_url['href']
        if dist_date:
            row['dist_date'] = dist_date.text
        if parlgroup:
            row['parlgroup'] = parlgroup.text

        for index, eventdate in enumerate(eventdates):
            event = {'date': eventdate.text}
            event['type'] = eventtypes[index].text.strip()
            info = eventinfos[index]
            if info.text:
                # TODO: Processar esta informação
                event = parse_event_info(event, info)
            if not row.get('events'):
                row['events'] = []
            row['events'].append(event)

        log.info("Scraped initiative: %s" % title.text)

        return row
    else:
        return None

示例#17

0

显示文件

文件： scraper-iniciativas.py 项目： rlafuente/scraper-iniciativas

def process_dep(i):
    log.debug("Trying ID %d..." % i)

    url = FORMATTER_URL_IL % i
    soup = BeautifulSoup(getpage(url), "lxml")
    title = soup.find('span', id=RE_TITLE)
    if title:
        summary = soup.find('span', id=RE_SUMMARY)
        doc_url = soup.find('a', id=RE_DOCLINK)
        pdf_url = soup.find('a', id=RE_PDFLINK)
        eventdates = soup.findAll('span', id=RE_EVENTDATE)
        eventtypes = soup.findAll('span', id=RE_EVENTTYPE)
        eventinfos = soup.findAll('div', id=RE_EVENTINFO)
        dist_date = soup.find('span', id=RE_DISTDATE)
        authors = soup.findAll('a', id=RE_AUTHOR)
        parlgroup = soup.find('span', id=RE_PARLGROUP)

        row = {'title': title.text,
               'summary': summary.text,
               'id': i,
               'url': url,
               'authors': [a.text for a in authors]}

        if doc_url:
            row['doc_url'] = doc_url['href']
        if pdf_url:
            row['pdf_url'] = pdf_url['href']
        if dist_date:
            row['dist_date'] = dist_date.text
        if parlgroup:
            row['parlgroup'] = parlgroup.text

        for index, eventdate in enumerate(eventdates):
            event = {'date': eventdate.text}
            event['type'] = eventtypes[index].text.strip()
            info = eventinfos[index]
            if info.text:
                # TODO: Processar esta informação
                event = parse_event_info(event, info)
            if not row.get('events'):
                row['events'] = []
            row['events'].append(event)

        log.info("Scraped initiative: %s" % title.text)

        return row
    else:
        return None

示例#18

0

显示文件

def create_dataset_page(pkg_info, output_dir):
    '''Generate a single dataset page.'''
    template = env.get_template("dataset.html")
    name = pkg_info["name"]
    if not os.path.exists(os.path.join(output_dir, name)):
        os.makedirs(os.path.join(output_dir, name))

    target = "%s/index.html" % (name)

    context = dict({"datapkg": pkg_info}, **global_context)
    contents = template.render(**context)

    f = codecs.open(os.path.join(output_dir, target), 'w', 'utf-8')
    f.write(contents)
    f.close()
    log.debug("Created %s." % target)

示例#19

0

显示文件

文件： datacentral.py 项目： centraldedados/centraldedados.pt

def create_dataset_page(pkg_info, output_dir):
    '''Generate a single dataset page.'''
    template = env.get_template("dataset.html")
    name = pkg_info["name"]
    if not os.path.exists(os.path.join(output_dir, name)):
        os.makedirs(os.path.join(output_dir, name))

    target = "%s/index.html" % (name)

    context = dict({"datapkg": pkg_info}, **global_context)
    contents = template.render(**context)

    f = codecs.open(os.path.join(output_dir, target), 'w', 'utf-8')
    f.write(contents)
    f.close()
    log.debug("Created %s." % target)

示例#20

0

显示文件

    def process(self, t: ROSType, headers_file: str, original_file: str,
                ros_type_str: str) -> FuzzTarget:
        logging.debug(f"Processing {t.type_name} type")
        imports = "\n".join([
            f'#include "{headers_file}"',
            f'#include "{original_file}"',
        ])
        request_code = "\n".join(
            [self.fuzz_field(field) for field in t.fields])

        return FuzzTarget(
            imports=imports,
            client_name=FuzzTargetProcessor.normalize_client_name(t.type_name),
            request_code=request_code,
            node_type=ros_type_str,
        )

示例#21

0

显示文件

    def _send_message(self, message: Union[Dict, str]):
        """
        Serialises a dictionary into JSON and sends it across the stream.
        Messages will be length prefixed before sending.

        Args:
            message: The message to send

        """
        readable: str = json.dumps(message) if isinstance(message,
                                                          dict) else message
        log.debug(f"Sending message={readable} to the control layer")

        data: bytes = readable.encode("utf-8")
        length = (len(data)).to_bytes(4, byteorder="big")

        self.stream.send(length + data)

示例#22

0

显示文件

文件： datacentral.py 项目： centraldedados/datacentral

def create_dataset_page(pkg_info, output_dir):
    '''Generate a single dataset page.'''
    template = env.get_template("dataset.html")
    name = pkg_info["name"]
    if not os.path.exists(os.path.join(output_dir, name)):
        os.makedirs(os.path.join(output_dir, name))

    target = "%s/index.html" % (name)

    context = {"datapkg": pkg_info}
    context['welcome_text'] = markdown.markdown(codecs.open("content/welcome_text.md", 'r', 'utf-8').read(), output_format="html5", encoding="UTF-8")
    contents = template.render(**context)

    f = codecs.open(os.path.join(output_dir, target), 'w', 'utf-8')
    f.write(contents)
    f.close()
    log.debug("Created %s." % target)

示例#23

0

显示文件

文件： sybl_client.py 项目： Sybl-ml/mallus

    def _message_control(self) -> None:

        response: Dict = self._read_message()
        log.debug("HEARTBEAT")

        if "Alive" in response.keys():
            # Write it back
            self._state = State.HEARTBEAT
            self._send_message(response)
        elif "JobConfig" in response.keys():
            log.info("RECIEVED JOB CONFIG")
            self._state = State.READ_JOB
            self._message_stack.append(response)
        elif "Dataset" in response.keys():
            log.info("RECIEVED DATASET")
            self._state = State.PROCESSING
            self._message_stack.append(response)

示例#24

0

显示文件

    def play_by_station_uuid(self, _uuid):
        print(_uuid)
        # Pyradios by default don't let you search by uuid
        # a trick is to call click_counter(uuid) directly to get the statioon info
        is_ok = "false"
        try:
            self.target_station = self.API.click_counter(_uuid)
            log.debug(self.target_station)
            is_ok = self.target_station["ok"]
        except Exception as e:
            log.error("Could not find a station by the UUID")
            sys.exit(0)

        self.API.search(name=self.target_station["name"], name_exact=True)
        # againg register a valid click
        if is_ok == "false":
            res = self.API.click_counter(self.target_station["stationuuid"])
            log.debug(res)

示例#25

0

显示文件

    def verify(self):
        """
        Creates a new model for the user and authenticates it with the
        challenge response method.

        Raises:
            IndexError: If an invalid message is encountered

        """

        # Connect to the socket
        self._connect()

        message = {
            "NewModel": {
                "email": self.email,
                "password": self.password,
                "model_name": self.model_name,
            }
        }

        self._send_message(message)

        while True:
            # Read some data
            data = self._read_message()
            log.debug(f"Received data={data}")

            try:
                variant, data = parse_message(data)

                if variant == "Challenge":
                    self.authenticate_challenge(data)
                elif variant == "AccessToken":
                    self.display_access(data)
                    self.save_access_tokens()
                    break
                else:
                    log.warn(
                        f"Encountered an unexpected message variant={variant}")

            except IndexError:
                log.error(f"Failed to parse a message from data={data}")

示例#26

0

显示文件

    def station_validator(self):
        if len(self.response) == 0:
            log.error("No stations found by the name")
            sys.exit(0)
        if len(self.response) > 1:
            log.info("Multiple stations found by the name")
            stations_name = ""
            for station in self.response:
                # stations_name = stations_name + "," + station["name"]
                log.info("name: {} | id: {} | country: {}".format(
                    station["name"], station["stationuuid"],
                    station["country"]))

            log.info(stations_name)
            sys.exit(0)
        if len(self.response) == 1:
            log.info("Station found: {}".format(self.response[0]["name"]))
            log.debug(self.response[0])
            self.target_station = self.response[0]
            self.API.click_counter(self.target_station["stationuuid"])

示例#27

0

显示文件

def create_dataset_page(pkg_info, output_dir):
    '''Generate a single dataset page.'''
    template = env.get_template("dataset.html")
    name = pkg_info["name"]
    if not os.path.exists(os.path.join(output_dir, name)):
        os.makedirs(os.path.join(output_dir, name))

    target = "%s/index.html" % (name)

    context = {"datapkg": pkg_info}
    context['welcome_text'] = markdown.markdown(codecs.open(
        "content/welcome_text.md", 'r', 'utf-8').read(),
                                                output_format="html5",
                                                encoding="UTF-8")
    contents = template.render(**context)

    f = codecs.open(os.path.join(output_dir, target), 'w', 'utf-8')
    f.write(contents)
    f.close()
    log.debug("Created %s." % target)

示例#28

0

显示文件

def create_index_page(packages, output_dir):
    '''Generates the index page with the list of available packages.
    Accepts a list of pkg_info dicts, which are generated with the
    process_datapackage function.'''
    template = env.get_template("list.html")
    target = "index.html"
    context = {
        "datapackages":
        packages,
        "welcome_text":
        markdown.markdown(codecs.open("content/welcome_text.md", 'r',
                                      'utf-8').read(),
                          output_format="html5",
                          encoding="UTF-8"),
    }
    contents = template.render(**context)
    f = codecs.open(os.path.join(output_dir, target), 'w', 'utf-8')
    f.write(contents)
    f.close()
    log.debug("Created index.html.")

示例#29

0

显示文件

文件： pic_scrapper.py 项目： centraldedados/parlamento-deputados

def main():
    if not os.path.exists(dest):
        os.mkdir(dest)
        log.info("Directory 'imgs/' created.")

    mp_json = json.loads(open(mp_file, 'r').read())
    for mp_id in mp_json:
        url = pic_url_formatter % mp_id
        filename = '%s.jpg' % os.path.join(dest, mp_id)
        if os.path.exists(filename):
            log.debug("File for id %s already exists, skipping." % mp_id)
            continue
        log.info('Retrieving picture with id: %s' % mp_id)
        try:
            urlretrieve(url, filename)
        except IOError:
            log.error('Socket error! :(')

    log.info('Done. Now do find ./imgs/ -size -722c -exec rm {} \;')
    log.info('to clean up things.')

示例#30

0

显示文件

文件： pic_scrapper.py 项目： palmeida/parlamento-deputados

def main():
    if not os.path.exists(dest):
        os.mkdir(dest)
        log.info("Directory 'imgs/' created.")

    mp_json = json.loads(open(mp_file, 'r').read())
    for mp_id in mp_json:
        url = pic_url_formatter % mp_id
        filename = '%s.jpg' % os.path.join(dest, mp_id)
        if os.path.exists(filename):
            log.debug("File for id %s already exists, skipping." % mp_id)
            continue
        log.info('Retrieving picture with id: %s' % mp_id)
        try:
            urlretrieve(url, filename)
        except IOError:
            log.error('Socket error! :(')

    log.info('Done. Now do find ./imgs/ -size -722c -exec rm {} \;')
    log.info('to clean up things.')

示例#31

0

显示文件

文件： sybl_client.py 项目： Sybl-ml/mallus

    def _process_job(self) -> None:
        log.info("PROCCESSING JOB")

        # Get message from message stack
        data: Dict = self._message_stack.pop()

        # Make sure the dataset ia actually there
        assert "Dataset" in data

        # Get training and prediction datasets
        train = decode_and_decompress(data["Dataset"]["train"])
        predict = decode_and_decompress(data["Dataset"]["predict"])

        train_pd = pd.read_csv(io.StringIO(train))
        predict_pd = pd.read_csv(io.StringIO(predict))

        # Prepare the datasets for callback
        train_pd, predict_pd, predict_rids = prepare_datasets(
            train_pd, predict_pd)

        # Check the user has specified a callback here to satisfy mypy
        assert self.callback is not None

        predictions = self.callback(train_pd, predict_pd, self.recv_job_config)

        log.debug("Predictions: %s", predictions.head())

        # Attatch record ids onto predictions
        predictions["record_id"] = predict_rids
        cols = predictions.columns.tolist()
        cols.insert(0, cols.pop())
        predictions = predictions[cols]

        assert len(predictions.index) == len(predict_pd.index)

        compressed_predictions: str = compress_and_encode(
            predictions.to_csv(index=False))

        message = {"Predictions": compressed_predictions}
        self._send_message(message)
        self._state = State.HEARTBEAT

示例#32

0

显示文件

def read_and_validate_yaml_file(path: str) -> dict:
    yaml_file_path = os.path.join(path, "fuzz.yaml")
    ensure_yaml_exists(yaml_file_path)
    yaml_obj = verify_yaml_file(yaml_file_path)

    if "TODO" in json.dumps(yaml_obj):
        logging.warning("The 'TODO' keyword was found in the yaml file\n"
                        "Did you forget to fill in the blanks?")

    services_keys = (yaml_obj["services"]
                     if "services" in yaml_obj else {}).keys()
    topics_keys = (yaml_obj["topics"] if "topics" in yaml_obj else {}).keys()
    actions_keys = (yaml_obj["actions"]
                    if "actions" in yaml_obj else {}).keys()

    logging.debug(
        f"{len(topics_keys)} topics detected: {', '.join([f'`{s}`' for s in topics_keys])}"
    )
    logging.debug(
        f"{len(services_keys)} services detected: {', '.join([f'`{s}`' for s in services_keys])}"
    )
    logging.debug(
        f"{len(actions_keys)} actions detected: {', '.join([f'`{s}`' for s in actions_keys])}"
    )

    return yaml_obj

示例#33

0

显示文件

def generate_cpp_file(fuzz_target: FuzzTarget, source_file: str,
                      template_name: str):
    __location__ = os.path.realpath(
        os.path.join(os.getcwd(), os.path.dirname(__file__)))
    plain_source_file_name = Path(source_file).name
    without_extension = os.path.splitext(plain_source_file_name)[0]

    # Read template
    env = Environment(loader=FileSystemLoader(__location__))
    template = env.get_template(template_name)
    logging.debug("Template read")

    # Populate template
    template_arguments = fuzz_target.get_mapping()
    template_arguments["FILE_NAME"] = plain_source_file_name
    fuzzing_path = os.path.join(os.path.dirname(__file__), "fuzzing_api.hpp")
    template_arguments["FUZZING_API"] = open(fuzzing_path).read()
    template = template.render(template_arguments)
    logging.debug("Template populated")

    # Write the populated file
    full_path = os.path.join(os.path.dirname(source_file),
                             without_extension + "_generated.cpp")
    try:
        with open(full_path, "w") as fd:
            fd.write(template)
            logging.debug(
                f"Template written with {fuzz_target.client_name} client")
    except Exception:
        logging.error("Couldn't write generated file", exc_info=True)

    return full_path

示例#34

0

显示文件

文件： utils.py 项目： centraldedados/centraldedados.pt

def fetch_data_package(url, dir_name):
    rq = requests.get(url)
    if (rq.status_code != 200):
        log.warn("Not authorized %d at %s" % (rq.status_code, url))
        return False
    spec = rq.json()
    # check for update
    dp_filename = os.path.join(dir_name, 'datapackage.json')
    if os.path.isfile(dp_filename):
        with open(dp_filename) as f:
            cached = json.load(f)
            if cached == spec:
                log.debug("No updates")
                return False
    # create a data folder
    data_folder = os.path.join(dir_name, 'data')
    if not os.path.isdir(dir_name):
        os.makedirs(data_folder)
    # download a copy of the datapackage
    download_file(dir_name, url, 'datapackage.json')
    for res in spec['resources']:
        if 'path' in res:
            # paths override urls, for local mirrors
            basepath = "/".join(url.split('/')[:-1]) + '/'
            fn = download_file(data_folder, basepath + res['path'])
        elif 'url' in res:
            # download resource from url
            fn = download_file(data_folder, res['url'])
        else:
            # skip this resource
            log.debug("Skipping: %s" % res)
            continue
        if 'title' in res:
            log.debug('Downloaded: %s - %s' % (res['title'], fn))
            return True

示例#35

0

显示文件

文件： client.py 项目： alexander-jackson/cerutti

async def main(args):
    uri = f"ws://{args.base}"

    if args.port:
        uri = f"{uri}:{args.port}"

    log.info(f"Connecting to: {uri}")

    async with websockets.connect(uri) as websocket:
        registration = Registration(name=bot.name,
                                    gametype=args.gametype,
                                    bots=args.bots,
                                    runs=args.runs)
        message = Registration.Schema().dumps(registration)

        log.debug(f"Sending to the server: {message}")
        await websocket.send(message)

        greeting = await websocket.recv()
        log.info(f"< {greeting}")

        await play_game(websocket, args.gametype)

示例#36

0

显示文件

文件： player.py 项目： deep5050/radio-active

    def __init__(self, URL):
        self.url = URL
        self.is_playing = False
        self.process = None

        log.debug("player: url => {}".format(self.url))

        self.process = Popen(
            ["ffplay", "-nodisp", "-nostats", "-loglevel", "error", self.url])

        log.debug("player: ffplay => PID {} initiated".format(
            self.process.pid))

        sleep(
            3)  # sleeping for 3 seconds wainting for ffplay to start properly

        if self.is_active():
            self.is_playing = True
            log.info("Radio started successfully")
        else:
            log.error("Radio could not be stared, may be a dead station")
            sys.exit(0)

示例#37

0

显示文件

文件： regression_tester.py 项目： Tinder-Gold-Adventures/Regression-Tester

    def handle_message(self, topic, payload):
        log.debug("Trying to handle message on topic: " + str(topic) +
                  " with payload: " + str(payload))

        self.check_sequence(topic, payload)

        if self.topics.get(topic) is None:
            self.handle_error(TopicError(topic))
            return
        else:
            topic = self.topics.get(topic)

        topic_type = type(topic)

        handler = self.message_handlers.get(topic_type)
        try:
            handler.handle_message(topic, payload)
        except Exception as err:
            self.handle_error(err)

        topic.payload = payload
        log.debug("Message on topic " + str(topic.topic) + " handled.")

示例#38

0

显示文件

async def _run_auction(
    room_key: RoomKey,
    room_info: RoomInfo,
    room: Union[SingleRoom, MultiRoom],
    websocket,
):

    if room_info.room_type is MultiRoom:

        assert isinstance(room, MultiRoom)
        log.debug("Running multiroom")
        for _ in range(room.runs):
            auctioneer = Auctioneer(
                room=room.bot_room,
                game_type=room_key.gametype,
                slowdown=0,
                verbose=True,
            )
            winners = await auctioneer.run_auction()

            for winner in winners:
                room.winners[winner] += 1

            log.debug(f"Winners: {room.winners}")

            message = ResetBot.Schema().dumps(ResetBot(reset="RESET"))
            await websocket.send(message)
    else:
        auctioneer = Auctioneer(
            room=room.bot_room,
            game_type=room_key.gametype,
            slowdown=0,
            verbose=True,
        )
        log.info("running normal room")
        room.winners = await auctioneer.run_auction()
        log.info(f"Winners: {room.winners}")

    room_info.has_run = True

示例#39

0

显示文件

    def __init__(
        self,
        email: str,
        password: str,
        model_name: str,
        address: Tuple[str, int],
    ):
        log.debug(
            f"Initialising a new Authentication with email={email}, model_name={model_name}"
        )

        self.email: str = email
        self.password: str = password
        self.model_name: str = model_name

        self.access_token: Optional[str] = None
        self.model_id: Optional[str] = None
        self.stream = socket.socket(socket.AF_INET, socket.SOCK_STREAM)

        self.private_key = load_priv_key()

        self.address: Tuple[str, int] = address

示例#40

0

显示文件

def signal_handler(sig, frame):
    global player
    log.debug("You pressed Ctrl+C!")
    log.debug("Stopping the radio")
    if player.is_playing:
        player.stop()
    log.debug("Exiting now")
    sys.exit(0)

示例#41

0

显示文件

    def fuzz_field(self, field: Field, parent="request", indent=1) -> str:
        logging.debug(f"Generating field {field.name}")
        fresh = self.get_fresh_variable()
        preindent = "    " * indent
        res = preindent + f"// {field.name}\n"

        # Primitive type
        if field.type.is_primitive:
            cpp_type = FuzzTargetProcessor.PRIMITIVES_CPP_TYPES[
                field.type.type_name]
            res += preindent + f"{cpp_type} {fresh};\n"
            res += (
                preindent +
                f"if (!get{field.type.type_name.capitalize()}({fresh})) return;\n"
            )
        # Composite type
        else:
            res += preindent + f"{field.type.type_name} {fresh};\n"
            for subfield in field.type.fields:
                res += preindent + self.fuzz_field(
                    subfield, parent=fresh, indent=indent + 1)
        res += preindent + f"{parent}->{field.name} = {fresh};\n"
        return res

示例#42

0

显示文件

文件： datacentral.py 项目： centraldedados/centraldedados.pt

def generate(offline=False,
             fetch_only=False,
             output_dir=OUTPUT_DIR,
             theme_dir=os.path.join(THEMES_DIR, 'centraldedados'),
             repo_dir=REPO_DIR,
             config_file=CONFIG_FILE):
    '''Main function that takes care of the whole process.'''
    global env, packages
    # Read the config file
    parser = SafeConfigParser()
    parser.read(config_file)
    # Load the theme and set up Jinja
    theme_name = parser.get('ui', 'theme')
    theme_dir = os.path.join(THEMES_DIR, theme_name)
    template_dir = os.path.join(theme_dir, "templates")
    env = jinja2.Environment(loader=jinja2.FileSystemLoader([template_dir]))

    # Set up the output directory
    if not os.path.exists(output_dir):
        os.mkdir(output_dir)
    # Set up the dir for storing repositories
    if not os.path.exists(repo_dir):
        log.debug("Directory %s doesn't exist, creating it." % repo_dir)
        os.mkdir(repo_dir)
    # Copy htaccess file
    shutil.copyfile(os.path.join(theme_dir, 'static/htaccess'), os.path.join(output_dir, ".htaccess"))

    # Static CSS files
    css_dir = os.path.join(output_dir, "css")
    if os.path.exists(css_dir):
        shutil.rmtree(css_dir)
    shutil.copytree(os.path.join(theme_dir, "static/css"), css_dir)
    # Static JavaScript files
    js_dir = os.path.join(output_dir, "js")
    if os.path.exists(js_dir):
        shutil.rmtree(js_dir)
    shutil.copytree(os.path.join(theme_dir, "static/js"), js_dir)
    # Theme images
    img_dir = os.path.join(output_dir, "img")
    if os.path.exists(img_dir):
        shutil.rmtree(img_dir)
    shutil.copytree(os.path.join(theme_dir, "static/img"), img_dir)
    # Fonts
    fonts_dir = os.path.join(output_dir, "fonts")
    if os.path.exists(fonts_dir):
        shutil.rmtree(fonts_dir)
    shutil.copytree(os.path.join(theme_dir, "static/fonts"), fonts_dir)

    if not parser.items('repositories'):
        log.critical('No repository data in settings.conf (does it even exist?). Cannot proceed :(')
        sys.exit()
    # go through each specified dataset
    for r in parser.items('repositories'):
        name, url = r
        dir_name = os.path.join(repo_dir, name)
        repo = None

        # do we have a local copy?
        if os.path.isdir(dir_name):
            if not os.path.isdir(os.path.join(dir_name, '.git')):
                if url.endswith(".json"):
                    log.info("%s: Data package, refreshing" % name)
                    updated = fetch_data_package(url, dir_name)
                else:
                    log.info('%s: Unsupported repo, skipping update' % name)
                    continue

            elif not offline:
                repo = git.Repo(dir_name)
                origin = repo.remotes.origin
                try:
                    origin.fetch()
                except AssertionError:
                    # usually this fails on the first run, try again
                    origin.fetch()
                except git.exc.GitCommandError:
                    log.critical("%s: Fetch error, this dataset will be left out." % name)
                    continue
                # see if we have updates
                if not local_and_remote_are_at_same_commit(repo, origin):
                    log.debug("%s: Repo has new commits, updating local copy." % name)
                    updated = True
                    # connection errors can also happen if fetch succeeds but pull fails
                    try:
                        result = origin.pull()[0]
                    except git.exc.GitCommandError:
                        log.critical("%s: Pull error, this dataset will be left out." % name)
                        continue
                    if result.flags & result.ERROR:
                        log.error("%s: Pull error, but going ahead." % name)
                        updated = False
                else:
                    log.info("%s: No changes." % name)
                    updated = False
            else:
                log.debug("%s: Offline mode, using cached version." % name)
                # we set updated to True in order to re-generate everything
                updated = True
                repo = git.Repo(dir_name)
            if fetch_only:
                # if the --fetch-only flag was set, skip to the next dataset
                continue
        else:
            if offline:
                log.warn("%s: No local cache, skipping." % name)
                continue
            else:
                if url.endswith(".git"):
                    # Handle GIT Repository URL
                    log.info("%s: New repo, cloning." % name)
                    try:
                        repo = git.Repo.clone_from(url, dir_name)
                        # For faster checkouts, one file at a time:
                        # repo = git.Repo.clone_from(url, dir_name, n=True, depth=1)
                        # repo.git.checkout("HEAD", "datapackage.json")
                    except git.exc.GitCommandError as inst:
                        log.warn("%s: skipping %s" % (inst, name))
                        continue
                    updated = True

                elif url.endswith(".json"):
                    # Handle Data Package URL
                    log.info("%s: New data package, fetching." % name)
                    updated = fetch_data_package(url, dir_name)
                else:
                    log.warn("Unsupported repository: %s" % url)

        # get datapackage metadata
        try:
            pkg_info = process_datapackage(name, repo_dir, url)
        except ParseException as inst:
            log.warn("%s: skipping %s" % (inst, name))
            continue

        # set last updated time based on last commit, comes in Unix timestamp format so we convert
        import datetime
        if repo is not None:
            d = repo.head.commit.committed_date
        else:
            d = int(time.mktime(time.localtime()))
        last_updated = datetime.datetime.fromtimestamp(int(d)).strftime('%Y-%m-%d %H:%M:%S')
        pkg_info['last_updated'] = last_updated
        # add it to the packages list for index page generation after the loop ends
        packages.append(pkg_info)
        # re-generate the dataset HTML pages
        create_dataset_page(pkg_info, output_dir)
        # if repo was updated, copy over CSV/JSON/* and ZIP files to the download dir
        # (we always generate them if offline)
        if updated or offline:
            create_dataset_page(pkg_info, output_dir)
            datafiles = pkg_info['datafiles']
            zipf = zipfile.ZipFile(os.path.join(output_dir, name + '.zip'), 'w')
            for d in datafiles:
                log.info("Copying %s" % d['path'])
                # copy file
                target = os.path.join(output_dir, os.path.basename(d['path']))
                shutil.copyfile(os.path.join(dir_name, d['path']), target)
                # generate JSON version of CSV
                if target.endswith('.csv'):
                    csv2json(target, target.replace(".csv", ".json"))
                # make zip file
                zipf.write(os.path.join(dir_name, d['path']), d['basename'], compress_type=zipfile.ZIP_DEFLATED)
            if 'readme_path' in pkg_info:
                try:
                    zipf.write(pkg_info['readme_path'], 'README.md')
                except OSError:
                    pass
            zipf.close()

    # HTML index with the list of available packages
    create_index_page(packages, output_dir)
    # Static JSON API of the data packages
    create_api(packages, output_dir, repo_dir)
    # Static pages
    create_static_pages(output_dir)
    # Contact page
    create_contact_page(output_dir, parser.get('credentials', 'contact_email'))

    log.info("All static content is ready inside '%s'." % OUTPUT_DIR)

示例#43

0

显示文件

文件： generate.py 项目： ManufacturaInd/demukratika

def generate_site(fast_run):
    # flush output
    create_dir(OUTPUT_DIR)
    create_dir(os.path.join(OUTPUT_DIR, TRANSCRIPTS_PATH))
    create_dir(os.path.join(OUTPUT_DIR, MPS_PATH))
    create_dir(os.path.join(OUTPUT_DIR, MEDIA_PATH))

    # init Jinja
    env = jinja2.Environment(loader=jinja2.FileSystemLoader([TEMPLATE_DIR]),
                             extensions=['jinja2htmlcompress.SelectiveHTMLCompress'],
                             trim_blocks=True, lstrip_blocks=True)
    env.filters['date'] = format_date

    # generate pages
    log.info("Copying static files...")
    copy_tree(MEDIA_SOURCE_DIR, os.path.join(OUTPUT_DIR, MEDIA_PATH))

    log.info("Generating index...")
    render_template_into_file(env, 'index.html', 'index.html')

    log.info("Generating MP index...")
    mps = generate_mp_list()
    context = {"mps": mps}
    render_template_into_file(env, 'mp_list.html', "deputados/index.html", context)

    gov_data = get_gov_dataset()
    govpost_data = list(get_govpost_dataset())
    gov_mp_ids = [int(row[2]) for row in govpost_data if row[2]]
    date_data = get_date_dataset()

    log.info("Generating MP pages...")
    for mp in mps:
        id = int(mp['id'])
        mp['photo_url'] = PHOTO_URL_BASE + str(id) + ".jpg"
        # determine government posts
        if id in gov_mp_ids:
            mp['govposts'] = []
            govpost_rows = [row for row in govpost_data if row[2].strip() and int(row[2]) == id]
            for row in govpost_rows:
                gov_number = int(row[0])
                gov = None
                for r in gov_data:
                    if int(r[0]) == gov_number:
                        gov = {'number': r[0], 'start_date': dateparser.parse(r[1]), 'end_date': dateparser.parse(r[2])}
                        break
                if not gov:
                    log.critical("Gov not found!")
                mp['govposts'].append({
                    'post': row[3],
                    'start_date': dateparser.parse(row[4]),
                    'end_date': dateparser.parse(row[5]),
                    'gov': gov,
                })
        # parse dates
        for m in mp['mandates']:
            m['start_date'] = dateparser.parse(m['start_date'])
            m['end_date'] = dateparser.parse(m['end_date'])
            # nice effect: if no end date, set to today

        context = {'mp': mp, 'l': None}
        filename = os.path.join(MPS_PATH, mp['slug'], 'index.html')
        render_template_into_file(env, 'mp_detail.html', filename, context)

    log.info("Generating session index...")
    datedict = generate_datedict()
    all_years = [y for y in datedict]
    for year_number in datedict:
        year = datedict[year_number]
        context = {'year': year,
                   'year_number': year_number,
                   'all_years': all_years,
                   'datedict': datedict,
                   }
        target_dir = os.path.join(TRANSCRIPTS_PATH + "%s/" % year_number)
        filename = target_dir + "index.html"
        # print filename
        render_template_into_file(env, 'day_list.html', filename, context)

    # get most recent year and make the session index
    y = all_years[-1]
    year = datedict[y]
    context = {'year': year,
               'year_number': year_number,
               'all_years': all_years,
               'datedict': datedict,
               }
    render_template_into_file(env, 'day_list.html', TRANSCRIPTS_PATH + 'index.html', context)

    log.info("Generating HTML session pages...")
    if fast_run:
        COUNTER = 0
    date_data.reverse()
    for leg, sess, num, d, dpub, page_start, page_end in date_data:
        dateobj = dateparser.parse(d)
        context = {'session_date': dateobj,
                   'year_number': year_number,
                   'text': get_session_text(leg, sess, num),
                   'monthnames': MESES,
                   'pdf_url': 'xpto',
                   }
        target_dir = "%s%d/%02d/%02d" % (TRANSCRIPTS_PATH, dateobj.year, dateobj.month, dateobj.day)
        if not os.path.exists(os.path.join(OUTPUT_DIR, target_dir)):
            create_dir(os.path.join(OUTPUT_DIR, target_dir))
        filename = "%s/index.html" % target_dir
        render_template_into_file(env, 'day_detail.html', filename, context)
        log.debug(d)
        if fast_run:
            COUNTER += 1
            if COUNTER > 20:
                break

示例#44

0

显示文件

文件： generate.py 项目： transparenciahackday/centraldedados.pt

def generate(offline, fetch_only):
    '''Main function that takes care of the whole process.'''
    # set up the output directory
    if not os.path.exists(output_dir):
        os.mkdir(output_dir)
    # set up the dir for storing repositories
    if not os.path.exists(repo_dir):
        log.info("Directory %s doesn't exist, creating it." % repo_dir)
        os.mkdir(repo_dir)
    # create dir for dataset pages
    if not os.path.exists(os.path.join(output_dir, datasets_dir)):
        os.mkdir(os.path.join(output_dir, datasets_dir))
    # create download dir for zip and csv/json/* dataset files
    if not os.path.exists(os.path.join(output_dir, files_dir)):
        os.mkdir(os.path.join(output_dir, files_dir))
    # create static dirs
    # TODO: only update changed files -- right now we regenerate the whole static dir
    css_dir = os.path.join(output_dir, "css")
    js_dir = os.path.join(output_dir, "js")
    img_dir = os.path.join(output_dir, "img")
    fonts_dir = os.path.join(output_dir, "fonts")
    if os.path.exists(css_dir):
        shutil.rmtree(css_dir)
    shutil.copytree("static/css", css_dir)
    if os.path.exists(js_dir):
        shutil.rmtree(js_dir)
    shutil.copytree("static/js", js_dir)
    if os.path.exists(img_dir):
        shutil.rmtree(img_dir)
    shutil.copytree("static/img", img_dir)
    if os.path.exists(fonts_dir):
        shutil.rmtree(fonts_dir)
    shutil.copytree("static/fonts", fonts_dir)

    # read the config file to get the datasets we want to publish
    parser = SafeConfigParser()
    parser.read(config_file)
    packages = []

    if not parser.items('repositories'):
        log.critical('No repository data in settings.conf (does it even exist?). Cannot proceed :(')
        sys.exit()
    # go through each specified dataset
    for r in parser.items('repositories'):
        name, url = r
        dir_name = os.path.join(repo_dir, name)

        # do we have a local copy?
        if os.path.isdir(dir_name):
            if not offline:
                log.info("Checking for changes in repo '%s'..." % name)
                repo = git.Repo(dir_name)
                origin = repo.remotes.origin
                try:
                    origin.fetch()
                except AssertionError:
                    # usually this fails on the first run, try again
                    origin.fetch()
                except git.exc.GitCommandError:
                    log.critical("Fetch error connecting to repository, this dataset will be ignored and not listed in the index!")
                    continue
                # connection errors can also happen if fetch succeeds but pull fails
                try:
                    result = origin.pull()[0]
                except git.exc.GitCommandError:
                    log.critical("Pull error connecting to repository, this dataset will be ignored and not listed in the index!")
                    continue
                # we get specific flags for the results Git gave us
                # and we set the "updated" var in order to signal whether to
                # copy over the new files to the download dir or not
                if result.flags & result.HEAD_UPTODATE:
                    log.info("No new changes in repo '%s'." % name)
                    updated = False
                elif result.flags & result.ERROR:
                    log.error("Error pulling from repo '%s'!" % name)
                    updated = False
                else:
                    # TODO: figure out other git-python flags and return more
                    # informative log output
                    log.info("Repo changed, updating. (returned flags: %d)" % result.flags)
                    updated = True
            else:
                log.info("Offline mode, using cached version of package %s..." % name)
                # we set updated to True in order to re-generate everything
                # FIXME: See checksum of CSV files to make sure they're new before
                # marking updated as true
                updated = True
                repo = git.Repo(dir_name)
            if fetch_only:
                # if the --fetch-only flag was set, skip to the next dataset
                continue
        else:
            if offline:
                log.warn("Package %s specified in settings but no local cache, skipping..." % name)
                continue
            else:
                log.info("We don't have repo '%s', cloning..." % name)
                repo = git.Repo.clone_from(url, dir_name)
                updated = True

        # get datapackage metadata
        pkg_info = process_datapackage(name)
        # set last updated time based on last commit, comes in Unix timestamp format so we convert
        import datetime
        d = repo.head.commit.committed_date
        last_updated = datetime.datetime.fromtimestamp(int("1284101485")).strftime('%Y-%m-%d %H:%M:%S')
        log.debug(last_updated)
        pkg_info['last_updated'] = last_updated
        # add it to the packages list for index page generation after the loop ends
        packages.append(pkg_info)
        # re-generate the dataset HTML pages
        create_dataset_page(pkg_info)
        # if repo was updated, copy over CSV/JSON/* and ZIP files to the download dir
        # (we always generate them if offline)
        if updated or offline:
            create_dataset_page(pkg_info)
            datafiles = pkg_info['datafiles']
            zipf = zipfile.ZipFile(os.path.join(output_dir, files_dir, name + '.zip'), 'w')
            for d in datafiles:
                # copy CSV file
                target = os.path.join(output_dir, files_dir, os.path.basename(d['path']))
                shutil.copyfile(os.path.join(dir_name, d['path']), target)
                # generate JSON version
                csv2json(target, target.replace(".csv", ".json"))
                # make zip file
                zipf.write(os.path.join(dir_name, d['path']), d['basename'], compress_type=zipfile.ZIP_DEFLATED)
            try:
                zipf.write(pkg_info['readme_path'], 'README.md')
            except OSError:
                pass
            zipf.close()

    # generate the HTML index with the list of available packages
    create_index_page(packages)
    # generate the static JSON API of the data packages
    create_api(packages)

示例#45

0

显示文件

文件： fetchy.py 项目： centraldedados/scraper-instituicoes-financeiras

# -*- coding: utf-8 -*-

import urllib2
import json
from zenlog import log

SOURCE_FILE = "banks.json"

def download_page(url):
    response = urllib2.urlopen(url)
    html = response.read()
    return html

f = open(SOURCE_FILE, 'r')
contents = f.read()
jsondata = json.loads(contents)
for item in jsondata:
    # extrair o URL e o código/ID a partir do JSON
    url = item['url']
    cod = url.split('=')[-1]
    # sacar o conteúdo da página
    html = download_page(url)

    # gravar num ficheiro html
    filename = cod + ".html"
    outfile = open(filename, 'w')
    outfile.write(html)
    outfile.close()
    log.debug(u'Já saquei o ' + cod + " :D")

示例#46

0

显示文件

文件： bank-list.py 项目： centraldedados/scraper-instituicoes-financeiras

from pprint import pprint

#driver = webdriver.PhantomJS()
driver = webdriver.Firefox()
driver.get("http://www.bportugal.pt/en-US/Supervisao/Pages/Instituicoesautorizadas.aspx")

# Get the institutions listed in the table, one per row
# click the number to go to the next page -- .MudarPagina strong + a
# every 10 pages click in the > arrow to advance -- .MudarPagina strong + span > a

bank_list = []

pagecount = 1
while True:
    log.debug("Novo loop, pagecount eh " + str(pagecount))
    wait = UI.WebDriverWait(driver, 10)
    
    links = driver.find_elements_by_css_selector(".AreaResultados td a")
    log.debug("Encontrei %d links..." % len(links))
    if len(links) == 0:
        from time import sleep
        sleep(3)
        links = driver.find_elements_by_css_selector(".AreaResultados td a")
        if len(links) == 0:
            log.error("Não há links, snif")
        else:
            log.debug("Iupi, %d links!" % len(links))
    rows = driver.find_elements_by_css_selector(".AreaResultados tbody tr")
    
    # skip first row