Exemplo n.º 1
0
def download_file(url, dest=None, filename=None):
    u = request.urlopen(url)

    if not filename:
        filename = 'downloaded.file'
    if dest:
        filename = os.path.join(dest, filename)

    with open(filename, 'wb') as fp:
        meta = u.info()
        meta_func = meta.getheaders if hasattr(meta, 'getheaders') else meta.get_all
        meta_length = meta_func('Content-Length')
        file_size = None
        if meta_length:
            file_size = int(meta_length[0])
        logger.info('Downloading: {0} Bytes: {1}'.format(url, file_size))

        file_size_dl = 0
        block_sz = 8192
        while True:
            buffer = u.read(block_sz)
            if not buffer:
                break

            file_size_dl += len(buffer)
            fp.write(buffer)

            status = "{0:16}".format(file_size_dl)
            if file_size:
                status += "   [{0:6.2f}%]".format(file_size_dl * 100 / file_size)
            status += chr(13)
            logger.info(status)
    return filename
Exemplo n.º 2
0
 def dump_file(self, filename, existed_check=False):
     if existed_check and os.path.exists(filename):
         logger.info('already generated file (%s) => (%s) (%s)' %
                     (filename, self.folder, self.title))
         return
     with open(filename, 'w+') as fp:
         cson.dump(self._data, fp)
Exemplo n.º 3
0
 def find_inner_link(self):
     link_pattern = '\[([\w ]+)[\]|\/]\(\:note\:([\w]+)\)'
     for storage, folder, note in self.boostnote.walk_note():
         if note.type == NoteType.SNIPPET_NOTE:
             continue
         matchs = re.findall(link_pattern, note.content)
         if len(matchs) > 0:
             for idx, (link_name, cson) in enumerate(matchs):
                 if cson in storage.notes:
                     logger.info('%02d : %s ==> %s' %
                                 (idx, cson, storage.notes[cson]))
                 else:
                     logger.info('%02d : %s ==> broken link' % (idx, cson))
Exemplo n.º 4
0
def moniwiki_page_attach(wiki_root: str, wiki_title: str, wiki_text: str):
    pds_base = moniwiki_pds_url(wiki_root, wiki_title)

    patterns = [('(\[\[[cC][lL][iI][pP]\()([a-zA-Z0-9]+)(\)\]\])', lambda x: x[1] + '.png'),
                ('(attachment\:)([a-zA-Z0-9_+\-\.\[\]\@가-힣]+.[a-z0-0]+)', lambda x: x[1])]
    for pattern, rep in patterns:
        matchs = re.findall(pattern, wiki_text)
        for idx, match in enumerate(matchs):
            full_url = urljoin(pds_base + '/', quote(rep(match)))
            if url_exists(full_url):
                yield idx, ''.join(match), rep(match), full_url
            else:
                logger.info('not found: %s' % full_url)
Exemplo n.º 5
0
    def convert_contents(cls, contents: str, method=[]) -> str:
        old_contents = contents
        for key, from_rep, to_rep in cls.note_replace:
            if method == []:
                old_contents = re.sub(from_rep, to_rep, old_contents, re.UNICODE)
            elif key in method:
                if cls.verbose:
                    matchs = re.findall(from_rep, old_contents)
                    if len(matchs) > 0:
                        for match in matchs:
                            logger.info(match)
                old_contents = re.sub(from_rep, to_rep, old_contents, re.UNICODE)

        return old_contents
Exemplo n.º 6
0
    def do_rename_file(self,
                       name_pattern='{path}\\notes\{folder_name}_{title}.cson'
                       ):

        for storage, folder, note in self.boostnote.walk_note():
            named_dict = {}
            named_dict.update({
                'folder_key': folder.key,
                'folder_name': folder.name,
                'path': storage._path
            })
            named_dict.update(note._data)
            logger.info(f'old={note.filename}')
            note.filename = name_pattern.format(**named_dict)
            logger.info(f'new={note.filename}')
Exemplo n.º 7
0
    def __init__(self, wiki_root_url):
        WikiImpoter.__init__(self, wiki_root_url)

        parse_object = urlparse(wiki_root_url)
        root_url = '%s://%s' % (parse_object.scheme, parse_object.netloc)
        self.wiki_path = wiki_root_url.replace(root_url, '')
        full_url = root_url + self.wiki_path + 'wiki.php/%s'

        r = requests.get(full_url % 'TitleIndex')
        soup = BeautifulSoup(r.text, 'html.parser')

        no = 0
        for item in soup.find_all('li'):
            wiki_title = self.get_wiki_title(item)
            if skip_page(wiki_title):
                continue
            wiki_root_url = full_url % quote(wiki_title) + '?action=raw'
            wiki_text = get_wiki_text_from_url(wiki_root_url)
            if empty_or_notfound(wiki_text):
                logger.info('wiki_url is empty(%s)' % wiki_root_url)
                continue

            self.append_source(wiki_title, {
                'wiki_title': wiki_title,
                'wiki_url': wiki_root_url,
                'contents': wiki_text,
                'uuid': str(uuid4()),
                'links': {},
                'images': {},
            })
            no = no + 1

        self.init_source()
        for key, value in self.sources.items():
            self.init_wiki(value)
        logger.info('%d files loaded' % no)
Exemplo n.º 8
0
 def check(self):
     for storage, folder, note in self.boostnote.walk_note():
         for from_rep, to_rep in self._note_replace:
             matchs = re.findall(from_rep, note.content)  # , re.DOTALL)
             if self.verbose and len(matchs) > 0:
                 logger.info(
                     '\n<storage=%s folder=%s note=%s(%s)>' %
                     (storage._path, folder.name, note.title, note.uuid))
                 logger.info('=> %s ... %d matchs found' %
                             (from_rep.replace('\n', '\\n'), len(matchs)))
                 for idx, match in enumerate(matchs):
                     logger.info('%02d : %s' % (idx, match))
Exemplo n.º 9
0
    def do_update(self):
        for storage, folder, note in self.boostnote.walk_note():
            content = note.content

            for from_rep, to_rep in self._note_replace:
                content = re.sub(from_rep, to_rep, content)
                if content != note.content:
                    logger.info('%s note changed(%s)=>(%s)' %
                                (note.title, from_rep, to_rep.strip()))
            note.content = content
            if self.verbose is True:
                old = content.split('\n')
                for old_line, new_line in zip(old, content.split('\n')):
                    if (old_line != new_line):
                        logger.info('  ' + old_line)
                        logger.info('=>' + new_line)
Exemplo n.º 10
0
def to_normal_md(s: Storage, n: Note, export_to_path,
                 export_attach_method=AttachPathType.LinkRelativePath) -> bool:
    """
    export boost markdown to normal markdown

    1. remove :storage, :note
    2. insert front-meta data
    """

    # preparing variables for converting
    export_folder_path = os.path.join(export_to_path, s.folders[n.folder].name)
    if not os.path.exists(export_folder_path):
        os.mkdir(export_folder_path)
    filename = normalize_filename(n.title)
    md_storage = os.path.join(export_folder_path, filename)

    # split front-meta and content
    note_metadata, content = split_frontmeta_and_content(n.content)

    # remove :storage
    rx_storage = re.compile(r'\(:storage[\\|/]([a-z0-9\-]+)[\\|/]([\w\.]+)\)', re.MULTILINE)

    for match in rx_storage.finditer(content):
        uuid, linkname = match.groups()

        # link methos
        link_source_path = os.path.join(s._path, 'attachments', uuid, linkname)
        if export_attach_method == AttachPathType.LinkRelativePath:
            link_target_path = os.path.join(uuid_to_md_path(s, export_to_path, uuid), linkname)
        elif export_attach_method == AttachPathType.CopyToMarkdownSubPath:
            link_target_path = os.path.join(md_storage, linkname)
        elif export_attach_method == AttachPathType.CopyToMarkdownPath:
            link_target_path = os.path.join(export_folder_path, linkname)
        else:
            raise ValueError('not support variable type')

        if not os.path.exists(link_target_path):
            if not os.path.exists(os.path.dirname(link_target_path)):
                os.mkdir(os.path.dirname(link_target_path))
            if not os.path.exists(link_source_path):
                logger.info(f'missing file {n.title}/{linkname}')
            else:
                copy2(link_source_path, link_target_path)

        # replace \\ type path name to / type, \\ occur error in windows case
        link_target_path = os.path.relpath(link_target_path, export_folder_path)
        link_target_path = link_target_path.replace('\\', '/')
        content = content.replace(match.group(),
                                  f'({link_target_path})'
                                  )

    # remove :note
    rx_note_link = re.compile(r'\(:note:([a-z0-9\-]+)\)')
    for match in rx_note_link.finditer(content):
        uuid = match.groups()[0]
        link_target_path = os.path.join(uuid_to_md_path(s, export_to_path, uuid) + '.md')

        content = content.replace(match.group(),
                                  f'({link_target_path})'
                                  )
    # update front-meta
    keywords = n.tags
    if 'tags' in note_metadata:
        keywords.extend(note_metadata['tags'])
    if keywords != []:
        note_metadata['keywords'] = list(set(keywords))

    with codecs.open(os.path.join(export_folder_path, filename + '.md'), 'w', "utf-8") as fp:
        if note_metadata != {}:
            yaml_string = yaml.dump(note_metadata, Dumper=Dumper, default_flow_style=False, allow_unicode=True)
            fp.write('\n'.join(['---', yaml_string[:-1], '---\n']))
        fp.write(content)

    return True