def saving_dissected_datasets(hourly_data: list):
    for i, df in enumerate(hourly_data):
        yearly_data = get_yearly_data_from_df(df)
        for j, year in enumerate(yearly_data):
            with open(
                    "dataset/complete_dataset/hour-{}-year-{}.pickle".format(
                        i, j), "wb") as f:
                dump_pickle(year, f)
示例#2
0
def save_features(feat, dir=paths.READY_TO_USE_FEAT, name='1000_htk.csv'):
    extension = splitext(name)[1]
    full_name = join(dir, name)
    if extension == F_CSV:
        with open(full_name, 'w', newline='', encoding='utf-8') as csv_file:
            wr = writer(csv_file, delimiter=',', quoting=QUOTE_NONNUMERIC)
            wr.writerows(feat)
    elif extension == F_PIC:
        with open(full_name, 'wb') as file:
            dump_pickle(feat, file)
    elif extension == F_NPY:
        save(full_name, feat)
    else:
        raise Exception('Данное расширение не поддерживается!')
def authenticate() -> Resource:
    """
        A simple method to authenticate a user with Google Drive API. For the first
        run (or if script does not have the required permissions), this method will
        ask the user to login with a browser window, and then save a pickle file with
        the credentials obtained.

        For subsequent runs, this pickle file will be used directly, ensuring that the
        user does not have to login for every run of this script.

        Returns
        --------
        An instance of `Resource` that can be used to interact with the Google Drive API.
    """

    # Simple declaration, will be populated if credentials are present.
    creds: Optional[Credentials] = None

    # The scope that is to be requested.
    SCOPES = ['https://www.googleapis.com/auth/drive.readonly']

    if exists('token.pickle'):
        with open('token.pickle', 'rb') as token:
            creds: Credentials = load_pickle(token)

    if not creds or not creds.valid:
        if creds and creds.expired and creds.refresh_token:
            creds.refresh(Request())
        else:
            flow = InstalledAppFlow.from_client_secrets_file(
                'credentials.json', SCOPES)
            creds = flow.run_local_server(port=0)
        # Save the credentials for the next run
        with open('token.pickle', 'wb') as token:
            dump_pickle(creds, token)

    service: Resource = build('drive', 'v3', credentials=creds)
    return service
示例#4
0
    def __authenticate(self) -> discovery.Resource:
        """
        Authenticates user session using Drive API.

        Remarks
        --------
        Attempts to open a browser window asking user to login and grant permissions
        during the first run. Saves a `.pickle` file to skip this step in future runs

        Returns
        --------
        Object of `googleapiclient.discovery.Resource`
        """

        creds: Optional[Credentials] = None

        # Selectively asks for read-only permission
        SCOPES = ["https://www.googleapis.com/auth/drive.readonly"]

        if path_exists("token.pickle"):
            with open("token.pickle", "rb") as token:
                creds: Credentials = load_pickle(token)

        if not creds or not creds.valid:
            if creds and creds.expired and creds.refresh_token:
                creds.refresh(Request())
            else:
                flow = InstalledAppFlow.from_client_secrets_file(
                    "credentials.json", SCOPES)
                creds = flow.run_local_server(port=0)
            with open("token.pickle", "wb") as token:
                dump_pickle(creds, token)  # save credentials for next run

        return googleapiclient.discovery.build("drive",
                                               "v3",
                                               credentials=creds)
示例#5
0
文件: yatiblog.py 项目: fijal/ampify
def main(argv=None):

    argv = argv or sys.argv[1:]
    op = OptionParser(
        usage="Usage: %prog [options] [path/to/source/directory]"
        )

    op.add_option('-d', dest='data_file', default='.articlestore',
                  help="Set the path for a data file (default: .articlestore)")

    op.add_option('-o', dest='output_directory', default='website',
                  help="Set the output directory for files (default: website)")

    op.add_option('-p', dest='package', default='',
                  help="Generate documentation for a Python package (optional)")

    op.add_option('--clean', dest='clean', default=False, action='store_true',
                  help="Flag to remove all generated output files")

    op.add_option('--force', dest='force', default=False, action='store_true',
                  help="Flag to force regeneration of all files")

    op.add_option('--quiet', dest='quiet', default=False, action='store_true',
                  help="Flag to suppress output")

    try:
        options, args = op.parse_args(argv)
    except SystemExit:
        return

    # normalise various options and load from the config file

    if args:
        source_directory = args[0]
    else:
        source_directory = getcwd()

    source_directory = abspath(source_directory)
    chdir(source_directory)

    if not isdir(source_directory):
        raise IOError("%r is not a directory!" % source_directory)

    config_file = join_path(source_directory, '_config.yml')
    if not isfile(config_file):
        raise IOError("Couldn't find: %s" % config_file)

    config_file_obj = open(config_file, 'rb')
    config_data = config_file_obj.read()
    config_file_obj.close()
    config = load_yaml(config_data)

    index_pages = config.pop('index_pages')
    if not isinstance(index_pages, list):
        raise ValueError("The 'index_pages' config value is not a list!")

    index_pages = dict(
        (index_page.keys()[0], index_page.values()[0])
        for index_page in index_pages
        )

    output_directory = join_path(source_directory, options.output_directory.rstrip('/'))
    if not isdir(output_directory):
        if not exists(output_directory):
            mkdir(output_directory)
        else:
            raise IOError("%r is not a directory!" % output_directory)

    verbose = not options.quiet

    # see if there's a persistent data file to read from

    data_file = join_path(source_directory, options.data_file)
    if isfile(data_file):
        data_file_obj = open(data_file, 'rb')
        data_dict = load_pickle(data_file_obj)
        data_file_obj.close()
    else:
        data_dict = {}

    # figure out what the generated files would be

    source_files = [
        file for file in listfiles(source_directory) if file.endswith('.txt')
        ]

    generated_files = [
        join_path(output_directory, splitext(file)[0] + '.html')
        for file in source_files
        ]

    index_files = [join_path(output_directory, index) for index in index_pages]

    # handle --clean

    if options.clean:
        for file in generated_files + index_files + [data_file]:
            if isfile(file):
                if verbose:
                    print "Removing: %s" % file
                rm(file)
        sys.exit()

    # figure out layout dependencies for the source .txt files

    layouts = {}
    sources = {}

    def init_rst_source(source_file, destname=None):

        source_path = join_path(source_directory, source_file)
        source_file_obj = open(source_path, 'rb')
        content = source_file_obj.read()
        source_file_obj.close()

        if not content.startswith('---'):
            return

        filebase, filetype = splitext(source_file)
        filebase = filebase.lower()

        env = load_yaml(match_yaml_frontmatter(content).group(1))
        layout = env.pop('layout')

        if layout not in layouts:
            load_layout(layout, source_directory, layouts)

        content = replace_yaml_frontmatter('', content)

        if MORE_LINE in content:
            lead = content.split(MORE_LINE)[0]
            content = content.replace(MORE_LINE, '')
        else:
            lead = content

        if destname:
            destname = join_path(output_directory, destname)
        else:
            destname = join_path(output_directory, filebase + '.html')

        sources[source_file] = {
            '__content__': content,
            '__deps__': find_include_refs(content),
            '__env__': env,
            '__genfile__': destname,
            '__id__': source_file,
            '__layout__': layout,
            '__lead__': lead,
            '__mtime__': stat(source_path).st_mtime,
            '__name__': filebase,
            '__outdir__': output_directory,
            '__path__': source_path,
            '__rst__': True,
            '__type__': filetype
            }

    for source_file in source_files:
        init_rst_source(source_file)

    # and likewise for the index_pages

    render_last = set()

    for index_page, index_source in index_pages.items():
        layout, filetype = splitext(index_source)
        if filetype == '.genshi':
            if layout not in layouts:
                load_layout(layout, source_directory, layouts)
            source_path = join_path(source_directory, '_layouts', index_source)
            sources[index_source] = {
                '__content__': '',
                '__deps__': [],
                '__env__': {},
                '__genfile__': join_path(output_directory, index_page),
                '__id__': index_source,
                '__layout__': layout,
                '__lead__': '',
                '__mtime__': stat(source_path).st_mtime,
                '__name__': index_page,
                '__outdir__': output_directory,
                '__path__': source_path,
                '__rst__': False,
                '__type__': 'index'
                }
        else:
            init_rst_source(index_source, index_page)
        render_last.add(index_source)

    # update the envs for all the source files

    for source in sources:
        info = sources[source]
        layout = info['__layout__']
        layout_info = layouts[layout]
        if layout_info['__deps__']:
            for dep_layout in reversed(layout_info['__deps__']):
                info.update(layouts[dep_layout]['__env__'])
        info.update(layouts[layout]['__env__'])
        info.update(get_git_info(info['__path__']))
        info.update(info.pop('__env__'))

    # figure out which files to regenerate

    if not options.force:

        no_regen = set()
        for source in sources:

            info = sources[source]
            try:
                gen_mtime = stat(info['__genfile__']).st_mtime
            except:
                continue

            dirty = False
            if gen_mtime < info['__mtime__']:
                dirty = True

            layout = info['__layout__']
            layout_info = layouts[layout]
            if layout_info['__deps__']:
                layout_chain = [layout] + layout_info['__deps__']
            else:
                layout_chain = [layout]

            for layout in layout_chain:
                if gen_mtime < layouts[layout]['__mtime__']:
                    dirty = True
                    break

            for dep in info['__deps__']:
                dep_mtime = stat(join_path(source_directory, dep)).st_mtime
                if gen_mtime < dep_mtime:
                    dirty = True
                    break

            if not dirty:
                no_regen.add(source)

        for source in no_regen:
            if source in render_last:
                continue
            del sources[source]

        remaining = set(sources.keys())
        if remaining == render_last:
            for source in remaining.intersection(no_regen):
                del sources[source]

    # regenerate!

    for source, source_info in sorted(sources.items(), key=lambda x: x[1]['__rst__'] == False):

        info = config.copy()
        info.update(source_info)

        if verbose:
            print
            print LINE
            print 'Converting: [%s] %s' % (info['__type__'], info['__path__'])
            print LINE
            print

        if info['__rst__']:
            output = info['__output__'] = render_rst(info['__content__'])
            if info['__lead__'] == info['__content__']:
                info['__lead_output__'] = info['__output__']
            else:
                info['__lead_output__'] = render_rst(info['__lead__'])
        else:
            output = ''

        layout = info['__layout__']
        layout_info = layouts[layout]

        if layout_info['__deps__']:
            layout_chain = [layout] + layout_info['__deps__']
        else:
            layout_chain = [layout]

        for layout in layout_chain:
            template = layouts[layout]['__template__']
            output = template.generate(
                content=output,
                yatidb=data_dict,
                **info
                ).render('xhtml', encoding=None)

        if isinstance(output, unicode):
            output = output.encode('utf-8')

        data_dict[info['__name__']] = info

        output_file = open(info['__genfile__'], 'wb')
        output_file.write(output)
        output_file.close()

        if verbose:
            print 'Done!'

    # persist the data file to disk

    if data_file:
        data_file_obj = open(data_file, 'wb')
        dump_pickle(data_dict, data_file_obj)
        data_file_obj.close()

    sys.exit()

    # @/@ site config

    # @/@ need to fix up this old segment of the code to the latest approach

    if options.package:

        package_root = options.package
        files = []
        add_file = files.append
        package = None
        for part in reversed(package_root.split(SEP)):
            if part:
                package = part
                break
        if package is None:
            raise ValueError("Couldn't find the package name from %r" % package_root)

        for dirpath, dirnames, filenames in walk(package_root):
            for filename in filenames:

                if not filename.endswith('.py'):
                    continue

                filename = join_path(dirpath, filename)
                module = package + filename[len(package_root):]
                if module.endswith('__init__.py'):
                    module = module[:-12]
                else:
                    module = module[:-3]

                module = '.'.join(module.split(SEP))
                module_file = open(filename, 'rb')
                module_source = module_file.read()
                module_file.close()

                docstring = docstring_regex.search(module_source)

                if docstring:
                    docstring = docstring.group(0)
                    if docstring.startswith('r'):
                        docstring = docstring[4:-3]
                    else:
                        docstring = docstring[3:-3]

                if docstring and docstring.strip().startswith('=='):
                    docstring = strip_leading_indent(docstring)
                    module_source = docstring_regex.sub('', module_source, 1)
                else:
                    docstring = ''

                info = {}

                if root_path and isabs(filename) and filename.startswith(root_path):
                    info['__path__'] = filename[len(root_path)+1:]
                else:
                    info['__path__'] = filename

                info['__updated__'] = datetime.utcfromtimestamp(
                    stat(filename).st_mtime
                    )

                info['__outdir__'] = output_directory
                info['__name__'] = 'package.' + module
                info['__type__'] = 'py'
                info['__title__'] = module
                info['__source__'] = highlight(module_source, PythonLexer(), SYNTAX_FORMATTER)
                add_file((docstring, '', info))

    # @/@ fix up the old index.js/json generator

    try:
        import json
    except ImportError:
        import simplejson as json

    index_js_template = join_path(output_directory, 'index.js.template')

    if isfile(index_js_template):

        index_json = json.dumps([
            [_art['__name__'], _art['title'].encode('utf-8')]
            for _art in sorted(
                [item for item in items if item.get('x-created') and
                 item.get('x-type', 'blog') == 'blog'],
                key=lambda i: i['x-created']
                )
            ])

        index_js_template = open(index_js_template, 'rb').read()
        index_js = open(join_path(output_directory, 'index.js'), 'wb')
        index_js.write(index_js_template % index_json)
        index_js.close()
示例#6
0
 def save_pickle(self,filepath:str,info):
     """
     Save info in a picke file
     """
     with open(filepath, 'wb') as f:
         dump_pickle(info, f)
示例#7
0
def rw_data(path, obj=None, parameters=None):
    """
    Read/write from/to a file.

    See <https://pandas.pydata.org/pandas-docs/stable/io.html>.

    Note that the file must have an extension.

    Parameters
    ----------
    path : str
        Path name of the file. It must start with ``./``.
    obj : generic object
        Object to be read or written
    parameters : dict
        Dictionary of parameters for the IO operation
    """

    extension = path.split('.')[-1].lower()

    # Read
    if obj is None:

        if extension == 'pkl':
            obj = load_pickle(open(path, 'rb'))
        elif extension == 'json':
            obj = load_json(open(path, 'rb'))
        elif extension in {'hdf5', 'h5', 'hdf'}:
            if parameters is None:
                obj = read_hdf(path)
            else:
                obj = read_hdf(path, **parameters)
        elif extension == 'csv':
            if parameters is None:
                obj = read_csv(path)
            else:
                obj = read_csv(path, **parameters)
        else:
            print('WARNING: No file format extension specified')

        return obj

    # Write
    else:

        # Make sure the directory exists

        os.makedirs(os.path.dirname(path), exist_ok=True)

        if extension == 'pkl':
            dump_pickle(obj, open(path, 'wb'))
        elif extension == 'json':
            dump_json(obj, fp=open(path, 'w'))
        elif extension in {'hdf5', 'h5', 'hdf'}:
            obj.to_hdf(path, 'key', mode='w')
        elif extension == 'csv':
            if parameters is None:
                obj.to_csv(path)
            else:
                obj.to_csv(path, **parameters)
        else:
            print('WARNING: No file format extension specified')
示例#8
0
 def persist_data_file():
     if data_file:
         data_file_obj = open(data_file, 'wb')
         dump_pickle(data_dict, data_file_obj)
         data_file_obj.close()
示例#9
0
def main(argv, genfiles=None):

    op = OptionParser()

    op.add_option('-a',
                  dest='authors',
                  default='',
                  help="Set the path for a special authors file (optional)")

    op.add_option(
        '-c',
        dest='package',
        default='',
        help="Generate documentation for the Python package (optional)")

    op.add_option('-d',
                  dest='data_file',
                  default='',
                  help="Set the path for a persistent data file (optional)")

    op.add_option('-e',
                  dest='output_encoding',
                  default='utf-8',
                  help="Set the output encoding (default: utf-8)")

    op.add_option('-f',
                  dest='format',
                  default='html',
                  help="Set the output format (default: html)")

    op.add_option('-i',
                  dest='input_encoding',
                  default='utf-8',
                  help="Set the input encoding (default: utf-8)")

    op.add_option('-o',
                  dest='output_path',
                  default=HOME,
                  help="Set the output directory for files (default: $PWD)")

    op.add_option('-p',
                  dest='pattern',
                  default='',
                  help="Generate index files for the path pattern (optional)")

    op.add_option('-r',
                  dest='root_path',
                  default='',
                  help="Set the path to the root working directory (optional)")

    op.add_option('-t',
                  dest='template',
                  default='',
                  help="Set the path to a template file (optional)")

    op.add_option('--quiet',
                  dest='quiet',
                  default=False,
                  action='store_true',
                  help="Flag to suppress output")

    op.add_option('--stdout',
                  dest='stdout',
                  default=False,
                  action='store_true',
                  help="Flag to redirect to stdout instead of to a file")

    try:
        options, args = op.parse_args(argv)
    except SystemExit:
        return

    authors = options.authors

    if authors:
        if not isfile(authors):
            raise IOError("%r is not a valid path!" % authors)
        authors = parse_authors_file(authors)
    else:
        authors = {}

    email2author = {'unknown': 'unknown'}
    author2link = {'unknown': ''}

    for author, author_info in authors.iteritems():
        for _info in author_info:
            if _info.startswith('http://') or _info.startswith('https://'):
                if author not in author2link:
                    author2link[author] = _info
            elif '@' in _info:
                email2author[_info] = author

    authors['unknown'] = ['unknown']

    output_path = options.output_path.rstrip('/')

    if not isdir(output_path):
        raise IOError("%r is not a valid directory!" % output_path)

    root_path = options.root_path

    siteinfo = join_path(output_path, '.siteinfo')
    if isfile(siteinfo):
        env = {}
        execfile(siteinfo, env)
        siteinfo = env['INFO']
    else:
        siteinfo = {
            'site_url': '',
            'site_nick': '',
            'site_description': '',
            'site_title': ''
        }

    stdout = sys.stdout if options.stdout else None
    verbose = False if stdout else (not options.quiet)

    format = options.format

    if format not in ('html', 'tex'):
        raise ValueError("Unknown format: %s" % format)

    if (format == 'tex') or (not options.template):
        template = False
    elif not isfile(options.template):
        raise IOError("%r is not a valid template!" % options.template)
    else:
        template_path = abspath(options.template)
        template_root = dirname(template_path)
        template_loader = TemplateLoader([template_root])
        template_file = open(template_path, 'rb')
        template = MarkupTemplate(template_file.read(),
                                  loader=template_loader,
                                  encoding='utf-8')
        template_file.close()

    data_file = options.data_file

    if data_file:
        if isfile(data_file):
            data_file_obj = open(data_file, 'rb')
            data_dict = load_pickle(data_file_obj)
            data_file_obj.close()
        else:
            data_dict = {}

    input_encoding = options.input_encoding
    output_encoding = options.output_encoding

    if genfiles:

        files = genfiles

    elif options.package:

        package_root = options.package
        files = []
        add_file = files.append
        package = None
        for part in reversed(package_root.split(SEP)):
            if part:
                package = part
                break
        if package is None:
            raise ValueError("Couldn't find the package name from %r" %
                             package_root)

        for dirpath, dirnames, filenames in walk(package_root):
            for filename in filenames:

                if not filename.endswith('.py'):
                    continue

                filename = join_path(dirpath, filename)
                module = package + filename[len(package_root):]
                if module.endswith('__init__.py'):
                    module = module[:-12]
                else:
                    module = module[:-3]

                module = '.'.join(module.split(SEP))
                module_file = open(filename, 'rb')
                module_source = module_file.read()
                module_file.close()

                docstring = docstring_regex.search(module_source)

                if docstring:
                    docstring = docstring.group(0)
                    if docstring.startswith('r'):
                        docstring = docstring[4:-3]
                    else:
                        docstring = docstring[3:-3]

                if docstring and docstring.strip().startswith('=='):
                    docstring = strip_leading_indent(docstring)
                    module_source = docstring_regex.sub('', module_source, 1)
                else:
                    docstring = ''

                info = {}

                if root_path and isabs(filename) and filename.startswith(
                        root_path):
                    info['__path__'] = filename[len(root_path) + 1:]
                else:
                    info['__path__'] = filename

                info['__updated__'] = datetime.utcfromtimestamp(
                    stat(filename).st_mtime)

                info['__outdir__'] = output_path
                info['__name__'] = 'package.' + module
                info['__type__'] = 'py'
                info['__title__'] = module
                info['__source__'] = highlight(module_source, PythonLexer(),
                                               SYNTAX_FORMATTER)
                add_file((docstring, '', info))

    else:

        files = []
        add_file = files.append

        for filename in args:

            if not isfile(filename):
                raise IOError("%r doesn't seem to be a valid file!" % filename)

            if root_path and isabs(filename) and filename.startswith(
                    root_path):
                path = filename[len(root_path) + 1:]
            else:
                path = filename

            info = get_git_info(filename, path)

            # old svn support:
            # info = get_svn_info(path.split(SEP)[0], '*.txt')[path]

            source_file = open(filename, 'rb')
            source = source_file.read()
            source_file.close()

            if MORE_LINE in source:
                source_lead = source.split(MORE_LINE)[0]
                source = source.replace(MORE_LINE, '')
            else:
                source_lead = ''

            filebase, filetype = splitext(basename(filename))
            info['__outdir__'] = output_path
            info['__name__'] = filebase.lower()
            info['__type__'] = 'txt'
            info['__title__'] = filebase.replace('-', ' ')
            add_file((source, source_lead, info))

    for source, source_lead, info in files:

        if verbose:
            print
            print LINE
            print 'Converting: [%s] %s in [%s]' % (
                info['__type__'], info['__path__'], split_path(output_path)[1])
            print LINE
            print

        if template:
            output, props = render_rst(source, format, input_encoding, True)
            # output = output.encode(output_encoding)
            info['__text__'] = output.encode(output_encoding)
            info.update(props)
            if source_lead:
                info['__lead__'] = render_rst(source_lead, format,
                                              input_encoding,
                                              True)[0].encode(output_encoding)
            output = template.generate(content=output,
                                       info=info,
                                       authors=authors,
                                       email2author=email2author,
                                       author2link=author2link,
                                       **siteinfo).render(
                                           'xhtml', encoding=output_encoding)
        else:
            output, props = render_rst(source,
                                       format,
                                       input_encoding,
                                       True,
                                       as_whole=True)
            info.update(props)
            output = output.encode(output_encoding)
            info['__text__'] = output
            if source_lead:
                info['__lead__'] = render_rst(
                    source_lead, format, input_encoding, True,
                    as_whole=True)[0].encode(output_encoding)

        if data_file:
            data_dict[info['__path__']] = info

        if stdout:
            print output
        else:
            output_filename = join_path(output_path,
                                        '%s.%s' % (info['__name__'], format))
            output_file = open(output_filename, 'wb')
            output_file.write(output)
            output_file.close()
            if verbose:
                print 'Done!'

    if data_file:
        data_file_obj = open(data_file, 'wb')
        dump_pickle(data_dict, data_file_obj)
        data_file_obj.close()

    if options.pattern:

        pattern = options.pattern

        items = [
            item for item in data_dict.itervalues()
            if item['__outdir__'] == pattern
        ]

        # index.js/json

        import json

        index_js_template = join_path(output_path, 'index.js.template')

        if isfile(index_js_template):

            index_json = json.dumps(
                [[_art['__name__'], _art['title'].encode('utf-8')]
                 for _art in sorted([
                     item for item in items if item.get('x-created')
                     and item.get('x-type', 'blog') == 'blog'
                 ],
                                    key=lambda i: i['x-created'])])

            index_js_template = open(index_js_template, 'rb').read()
            index_js = open(join_path(output_path, 'index.js'), 'wb')
            index_js.write(index_js_template % index_json)
            index_js.close()

        for name, mode, format in INDEX_FILES:

            pname = name.split('.', 1)[0]
            template_file = None

            if siteinfo['site_nick']:
                template_path = join_path(
                    template_root,
                    '%s.%s.genshi' % (pname, siteinfo['site_nick']))
                if isfile(template_path):
                    template_file = open(template_path, 'rb')

            if not template_file:
                template_path = join_path(template_root, '%s.genshi' % pname)

            template_file = open(template_path, 'rb')
            page_template = MarkupTemplate(template_file.read(),
                                           loader=template_loader,
                                           encoding='utf-8')
            template_file.close()

            poutput = page_template.generate(items=items[:],
                                             authors=authors,
                                             email2author=email2author,
                                             author2link=author2link,
                                             root_path=output_path,
                                             **siteinfo).render(format)

            poutput = unicode(poutput, output_encoding)

            if mode:
                output = template.generate(alternative_content=poutput,
                                           authors=authors,
                                           **siteinfo).render(format)
            else:
                output = poutput

            # @/@ wtf is this needed???
            if isinstance(output, unicode):
                output = output.encode(output_encoding)

            output_file = open(join_path(output_path, name), 'wb')
            output_file.write(output)
            output_file.close()
示例#10
0
def main(argv=None):

    argv = argv or sys.argv[1:]
    op = OptionParser(
        usage="Usage: %prog [options] [path/to/source/directory]")

    op.add_option('-d',
                  dest='data_file',
                  default='.articlestore',
                  help="Set the path for a data file (default: .articlestore)")

    op.add_option('-o',
                  dest='output_directory',
                  default='website',
                  help="Set the output directory for files (default: website)")

    op.add_option(
        '-p',
        dest='package',
        default='',
        help="Generate documentation for a Python package (optional)")

    op.add_option('--clean',
                  dest='clean',
                  default=False,
                  action='store_true',
                  help="Flag to remove all generated output files")

    op.add_option('--force',
                  dest='force',
                  default=False,
                  action='store_true',
                  help="Flag to force regeneration of all files")

    op.add_option('--quiet',
                  dest='quiet',
                  default=False,
                  action='store_true',
                  help="Flag to suppress output")

    try:
        options, args = op.parse_args(argv)
    except SystemExit:
        return

    # normalise various options and load from the config file

    if args:
        source_directory = args[0]
    else:
        source_directory = getcwd()

    source_directory = abspath(source_directory)
    chdir(source_directory)

    if not isdir(source_directory):
        raise IOError("%r is not a directory!" % source_directory)

    config_file = join_path(source_directory, '_config.yml')
    if not isfile(config_file):
        raise IOError("Couldn't find: %s" % config_file)

    config_file_obj = open(config_file, 'rb')
    config_data = config_file_obj.read()
    config_file_obj.close()
    config = load_yaml(config_data)

    index_pages = config.pop('index_pages')
    if not isinstance(index_pages, list):
        raise ValueError("The 'index_pages' config value is not a list!")

    index_pages = dict((index_page.keys()[0], index_page.values()[0])
                       for index_page in index_pages)

    output_directory = join_path(source_directory,
                                 options.output_directory.rstrip('/'))
    if not isdir(output_directory):
        if not exists(output_directory):
            mkdir(output_directory)
        else:
            raise IOError("%r is not a directory!" % output_directory)

    verbose = not options.quiet

    # see if there's a persistent data file to read from

    data_file = join_path(source_directory, options.data_file)
    if isfile(data_file):
        data_file_obj = open(data_file, 'rb')
        data_dict = load_pickle(data_file_obj)
        data_file_obj.close()
    else:
        data_dict = {}

    # figure out what the generated files would be

    source_files = [
        file for file in listfiles(source_directory) if file.endswith('.txt')
    ]

    generated_files = [
        join_path(output_directory,
                  splitext(file)[0] + '.html') for file in source_files
    ]

    index_files = [join_path(output_directory, index) for index in index_pages]

    # handle --clean

    if options.clean:
        for file in generated_files + index_files + [data_file]:
            if isfile(file):
                if verbose:
                    print "Removing: %s" % file
                rm(file)
        sys.exit()

    # figure out layout dependencies for the source .txt files

    layouts = {}
    sources = {}

    def init_rst_source(source_file, destname=None):

        source_path = join_path(source_directory, source_file)
        source_file_obj = open(source_path, 'rb')
        content = source_file_obj.read()
        source_file_obj.close()

        if not content.startswith('---'):
            return

        filebase, filetype = splitext(source_file)
        filebase = filebase.lower()

        env = load_yaml(match_yaml_frontmatter(content).group(1))
        layout = env.pop('layout')

        if layout not in layouts:
            load_layout(layout, source_directory, layouts)

        content = replace_yaml_frontmatter('', content)

        if MORE_LINE in content:
            lead = content.split(MORE_LINE)[0]
            content = content.replace(MORE_LINE, '')
        else:
            lead = content

        if destname:
            destname = join_path(output_directory, destname)
        else:
            destname = join_path(output_directory, filebase + '.html')

        sources[source_file] = {
            '__content__': content,
            '__deps__': find_include_refs(content),
            '__env__': env,
            '__genfile__': destname,
            '__id__': source_file,
            '__layout__': layout,
            '__lead__': lead,
            '__mtime__': stat(source_path).st_mtime,
            '__name__': filebase,
            '__outdir__': output_directory,
            '__path__': source_path,
            '__rst__': True,
            '__type__': filetype
        }

    for source_file in source_files:
        init_rst_source(source_file)

    # and likewise for the index_pages

    render_last = set()

    for index_page, index_source in index_pages.items():
        layout, filetype = splitext(index_source)
        if filetype == '.genshi':
            if layout not in layouts:
                load_layout(layout, source_directory, layouts)
            source_path = join_path(source_directory, '_layouts', index_source)
            sources[index_source] = {
                '__content__': '',
                '__deps__': [],
                '__env__': {},
                '__genfile__': join_path(output_directory, index_page),
                '__id__': index_source,
                '__layout__': layout,
                '__lead__': '',
                '__mtime__': stat(source_path).st_mtime,
                '__name__': index_page,
                '__outdir__': output_directory,
                '__path__': source_path,
                '__rst__': False,
                '__type__': 'index'
            }
        else:
            init_rst_source(index_source, index_page)
        render_last.add(index_source)

    # update the envs for all the source files

    for source in sources:
        info = sources[source]
        layout = info['__layout__']
        layout_info = layouts[layout]
        if layout_info['__deps__']:
            for dep_layout in reversed(layout_info['__deps__']):
                info.update(layouts[dep_layout]['__env__'])
        info.update(layouts[layout]['__env__'])
        info.update(get_git_info(info['__path__']))
        info.update(info.pop('__env__'))

    # figure out which files to regenerate

    if not options.force:

        no_regen = set()
        for source in sources:

            info = sources[source]
            try:
                gen_mtime = stat(info['__genfile__']).st_mtime
            except:
                continue

            dirty = False
            if gen_mtime < info['__mtime__']:
                dirty = True

            layout = info['__layout__']
            layout_info = layouts[layout]
            if layout_info['__deps__']:
                layout_chain = [layout] + layout_info['__deps__']
            else:
                layout_chain = [layout]

            for layout in layout_chain:
                if gen_mtime < layouts[layout]['__mtime__']:
                    dirty = True
                    break

            for dep in info['__deps__']:
                dep_mtime = stat(join_path(source_directory, dep)).st_mtime
                if gen_mtime < dep_mtime:
                    dirty = True
                    break

            if not dirty:
                no_regen.add(source)

        for source in no_regen:
            if source in render_last:
                continue
            del sources[source]

        remaining = set(sources.keys())
        if remaining == render_last:
            for source in remaining.intersection(no_regen):
                del sources[source]

    # regenerate!

    for source, source_info in sorted(sources.items(),
                                      key=lambda x: x[1]['__rst__'] == False):

        info = config.copy()
        info.update(source_info)

        if verbose:
            print
            print LINE
            print 'Converting: [%s] %s' % (info['__type__'], info['__path__'])
            print LINE
            print

        if info['__rst__']:
            output = info['__output__'] = render_rst(info['__content__'])
            if info['__lead__'] == info['__content__']:
                info['__lead_output__'] = info['__output__']
            else:
                info['__lead_output__'] = render_rst(info['__lead__'])
        else:
            output = ''

        layout = info['__layout__']
        layout_info = layouts[layout]

        if layout_info['__deps__']:
            layout_chain = [layout] + layout_info['__deps__']
        else:
            layout_chain = [layout]

        for layout in layout_chain:
            template = layouts[layout]['__template__']
            output = template.generate(content=output,
                                       yatidb=data_dict,
                                       **info).render('xhtml', encoding=None)

        if isinstance(output, unicode):
            output = output.encode('utf-8')

        data_dict[info['__name__']] = info

        output_file = open(info['__genfile__'], 'wb')
        output_file.write(output)
        output_file.close()

        if verbose:
            print 'Done!'

    # persist the data file to disk

    if data_file:
        data_file_obj = open(data_file, 'wb')
        dump_pickle(data_dict, data_file_obj)
        data_file_obj.close()

    sys.exit()

    # @/@ site config

    # @/@ need to fix up this old segment of the code to the latest approach

    if options.package:

        package_root = options.package
        files = []
        add_file = files.append
        package = None
        for part in reversed(package_root.split(SEP)):
            if part:
                package = part
                break
        if package is None:
            raise ValueError("Couldn't find the package name from %r" %
                             package_root)

        for dirpath, dirnames, filenames in walk(package_root):
            for filename in filenames:

                if not filename.endswith('.py'):
                    continue

                filename = join_path(dirpath, filename)
                module = package + filename[len(package_root):]
                if module.endswith('__init__.py'):
                    module = module[:-12]
                else:
                    module = module[:-3]

                module = '.'.join(module.split(SEP))
                module_file = open(filename, 'rb')
                module_source = module_file.read()
                module_file.close()

                docstring = docstring_regex.search(module_source)

                if docstring:
                    docstring = docstring.group(0)
                    if docstring.startswith('r'):
                        docstring = docstring[4:-3]
                    else:
                        docstring = docstring[3:-3]

                if docstring and docstring.strip().startswith('=='):
                    docstring = strip_leading_indent(docstring)
                    module_source = docstring_regex.sub('', module_source, 1)
                else:
                    docstring = ''

                info = {}

                if root_path and isabs(filename) and filename.startswith(
                        root_path):
                    info['__path__'] = filename[len(root_path) + 1:]
                else:
                    info['__path__'] = filename

                info['__updated__'] = datetime.utcfromtimestamp(
                    stat(filename).st_mtime)

                info['__outdir__'] = output_directory
                info['__name__'] = 'package.' + module
                info['__type__'] = 'py'
                info['__title__'] = module
                info['__source__'] = highlight(module_source, PythonLexer(),
                                               SYNTAX_FORMATTER)
                add_file((docstring, '', info))

    # @/@ fix up the old index.js/json generator

    try:
        import json
    except ImportError:
        import simplejson as json

    index_js_template = join_path(output_directory, 'index.js.template')

    if isfile(index_js_template):

        index_json = json.dumps([[
            _art['__name__'], _art['title'].encode('utf-8')
        ] for _art in sorted([
            item for item in items
            if item.get('x-created') and item.get('x-type', 'blog') == 'blog'
        ],
                             key=lambda i: i['x-created'])])

        index_js_template = open(index_js_template, 'rb').read()
        index_js = open(join_path(output_directory, 'index.js'), 'wb')
        index_js.write(index_js_template % index_json)
        index_js.close()
示例#11
0
文件: article.py 项目: enyst/plexnet
def main(argv, genfiles=None):

    op = OptionParser()

    op.add_option('-a', dest='authors', default='',
                  help="Set the path for a special authors file (optional)")

    op.add_option('-c', dest='package', default='',
                  help="Generate documentation for the Python package (optional)")

    op.add_option('-d', dest='data_file', default='',
                  help="Set the path for a persistent data file (optional)")

    op.add_option('-e', dest='output_encoding', default='utf-8',
                  help="Set the output encoding (default: utf-8)")

    op.add_option('-f', dest='format', default='html',
                  help="Set the output format (default: html)")

    op.add_option('-i', dest='input_encoding', default='utf-8',
                  help="Set the input encoding (default: utf-8)")

    op.add_option('-o', dest='output_path', default=HOME,
                  help="Set the output directory for files (default: $PWD)")

    op.add_option('-p', dest='pattern', default='',
                  help="Generate index files for the path pattern (optional)")

    op.add_option('-r', dest='root_path', default='',
                  help="Set the path to the root working directory (optional)")

    op.add_option('-t', dest='template', default='',
                  help="Set the path to a template file (optional)")

    op.add_option('--quiet', dest='quiet', default=False, action='store_true',
                  help="Flag to suppress output")

    op.add_option('--stdout', dest='stdout', default=False, action='store_true',
                  help="Flag to redirect to stdout instead of to a file")

    try:
        options, args = op.parse_args(argv)
    except SystemExit:
        return

    authors = options.authors

    if authors:
        if not isfile(authors):
            raise IOError("%r is not a valid path!" % authors)
        authors = parse_authors_file(authors)
    else:
        authors = {}

    email2author = {'unknown': 'unknown'}
    author2link = {'unknown': ''}

    for author, author_info in authors.iteritems():
        for _info in author_info:
            if _info.startswith('http://') or _info.startswith('https://'):
                if author not in author2link:
                    author2link[author] = _info
            elif '@' in _info:
                email2author[_info] = author

    authors['unknown'] = ['unknown']

    output_path = options.output_path.rstrip('/')

    if not isdir(output_path):
        raise IOError("%r is not a valid directory!" % output_path)

    root_path = options.root_path

    siteinfo = join_path(output_path, '.siteinfo')
    if isfile(siteinfo):
        env = {}
        execfile(siteinfo, env)
        siteinfo = env['INFO']
    else:
        siteinfo = {
            'site_url': '',
            'site_nick': '',
            'site_description': '',
            'site_title': ''
            }

    stdout = sys.stdout if options.stdout else None
    verbose = False if stdout else (not options.quiet)

    format = options.format

    if format not in ('html', 'tex'):
        raise ValueError("Unknown format: %s" % format)

    if (format == 'tex') or (not options.template):
        template = False
    elif not isfile(options.template):
        raise IOError("%r is not a valid template!" % options.template)
    else:
        template_path = abspath(options.template)
        template_root = dirname(template_path)
        template_loader = TemplateLoader([template_root])
        template_file = open(template_path, 'rb')
        template = MarkupTemplate(
            template_file.read(), loader=template_loader, encoding='utf-8'
            )
        template_file.close()

    data_file = options.data_file

    if data_file:
        if isfile(data_file):
            data_file_obj = open(data_file, 'rb')
            data_dict = load_pickle(data_file_obj)
            data_file_obj.close()
        else:
            data_dict = {}

    input_encoding = options.input_encoding
    output_encoding = options.output_encoding

    if genfiles:

        files = genfiles

    elif options.package:

        package_root = options.package
        files = []
        add_file = files.append
        package = None
        for part in reversed(package_root.split(SEP)):
            if part:
                package = part
                break
        if package is None:
            raise ValueError("Couldn't find the package name from %r" % package_root)

        for dirpath, dirnames, filenames in walk(package_root):
            for filename in filenames:

                if not filename.endswith('.py'):
                    continue

                filename = join_path(dirpath, filename)
                module = package + filename[len(package_root):]
                if module.endswith('__init__.py'):
                    module = module[:-12]
                else:
                    module = module[:-3]

                module = '.'.join(module.split(SEP))
                module_file = open(filename, 'rb')
                module_source = module_file.read()
                module_file.close()

                docstring = docstring_regex.search(module_source)

                if docstring:
                    docstring = docstring.group(0)
                    if docstring.startswith('r'):
                        docstring = docstring[4:-3]
                    else:
                        docstring = docstring[3:-3]

                if docstring and docstring.strip().startswith('=='):
                    docstring = strip_leading_indent(docstring)
                    module_source = docstring_regex.sub('', module_source, 1)
                else:
                    docstring = ''

                info = {}

                if root_path and isabs(filename) and filename.startswith(root_path):
                    info['__path__'] = filename[len(root_path)+1:]
                else:
                    info['__path__'] = filename

                info['__updated__'] = datetime.utcfromtimestamp(
                    stat(filename).st_mtime
                    )

                info['__outdir__'] = output_path
                info['__name__'] = 'package.' + module
                info['__type__'] = 'py'
                info['__title__'] = module
                info['__source__'] = highlight(module_source, PythonLexer(), SYNTAX_FORMATTER)
                add_file((docstring, '', info))

    else:

        files = []
        add_file = files.append

        for filename in args:

            if not isfile(filename):
                raise IOError("%r doesn't seem to be a valid file!" % filename)

            if root_path and isabs(filename) and filename.startswith(root_path):
                path = filename[len(root_path)+1:]
            else:
                path = filename

            info = get_git_info(filename, path)

            # old svn support:
            # info = get_svn_info(path.split(SEP)[0], '*.txt')[path]

            source_file = open(filename, 'rb')
            source = source_file.read()
            source_file.close()

            if MORE_LINE in source:
                source_lead = source.split(MORE_LINE)[0]
                source = source.replace(MORE_LINE, '')
            else:
                source_lead = ''

            filebase, filetype = splitext(basename(filename))
            info['__outdir__'] = output_path
            info['__name__'] = filebase.lower()
            info['__type__'] = 'txt'
            info['__title__'] = filebase.replace('-', ' ')
            add_file((source, source_lead, info))

    for source, source_lead, info in files:

        if verbose:
            print
            print LINE
            print 'Converting: [%s] %s in [%s]' % (
                info['__type__'], info['__path__'], split_path(output_path)[1]
                )
            print LINE
            print

        if template:
            output, props = render_rst(
                source, format, input_encoding, True
                )
            # output = output.encode(output_encoding)
            info['__text__'] = output.encode(output_encoding)
            info.update(props)
            if source_lead:
                info['__lead__'] = render_rst(
                    source_lead, format, input_encoding, True
                    )[0].encode(output_encoding)
            output = template.generate(
                content=output,
                info=info,
                authors=authors,
                email2author=email2author,
                author2link=author2link,
                **siteinfo
                ).render('xhtml', encoding=output_encoding)
        else:
            output, props = render_rst(
                source, format, input_encoding, True, as_whole=True
                )
            info.update(props)
            output = output.encode(output_encoding)
            info['__text__'] = output
            if source_lead:
                info['__lead__'] = render_rst(
                    source_lead, format, input_encoding, True, as_whole=True
                    )[0].encode(output_encoding)

        if data_file:
            data_dict[info['__path__']] = info

        if stdout:
            print output
        else:
            output_filename = join_path(
                output_path, '%s.%s' % (info['__name__'], format)
                )
            output_file = open(output_filename, 'wb')
            output_file.write(output)
            output_file.close()
            if verbose:
                print 'Done!'

    if data_file:
        data_file_obj = open(data_file, 'wb')
        dump_pickle(data_dict, data_file_obj)
        data_file_obj.close()

    if options.pattern:

        pattern = options.pattern

        items = [
            item
            for item in data_dict.itervalues()
            if item['__outdir__'] == pattern
            ]

        # index.js/json

        import json

        index_js_template = join_path(output_path, 'index.js.template')

        if isfile(index_js_template):

            index_json = json.dumps([
                [_art['__name__'], _art['title'].encode('utf-8')]
                for _art in sorted(
                    [item for item in items if item.get('x-created') and
                     item.get('x-type', 'blog') == 'blog'],
                    key=lambda i: i['x-created']
                    )
                ])

            index_js_template = open(index_js_template, 'rb').read()
            index_js = open(join_path(output_path, 'index.js'), 'wb')
            index_js.write(index_js_template % index_json)
            index_js.close()

        for name, mode, format in INDEX_FILES:

            pname = name.split('.', 1)[0]
            template_file = None

            if siteinfo['site_nick']:
                template_path = join_path(
                    template_root, '%s.%s.genshi' % (pname, siteinfo['site_nick'])
                    )
                if isfile(template_path):
                    template_file = open(template_path, 'rb')

            if not template_file:
                template_path = join_path(template_root, '%s.genshi' % pname)

            template_file = open(template_path, 'rb')
            page_template = MarkupTemplate(
                template_file.read(), loader=template_loader, encoding='utf-8'
                )
            template_file.close()

            poutput = page_template.generate(
                items=items[:],
                authors=authors,
                email2author=email2author,
                author2link=author2link,
                root_path=output_path,
                **siteinfo
                ).render(format)

            poutput = unicode(poutput, output_encoding)

            if mode:
                output = template.generate(
                    alternative_content=poutput,
                    authors=authors,
                    **siteinfo
                    ).render(format)
            else:
                output = poutput

            # @/@ wtf is this needed???
            if isinstance(output, unicode):
                output = output.encode(output_encoding)

            output_file = open(join_path(output_path, name), 'wb')
            output_file.write(output)
            output_file.close()
 def _save_to_pickle(self, filename, df):
     """
     This function saves the pandas dataframe to pickle file
     """
     with open(os.path.join(basedir, "dataset", filename), "wb") as f:
         dump_pickle(df, f)
示例#13
0
 def persist_data_file():
     if data_file:
         data_file_obj = open(data_file, 'wb')
         dump_pickle(data_dict, data_file_obj)
         data_file_obj.close()