示例#1
0
def main(src_info, evitar_iso, verbose, desconectado,
         procesar_articles, include_windows, tarball):

    if procesar_articles:
        try:
            import SuffixTree
        except ImportError:
            print NO_ST_MSG

    articulos = path.join(src_info, "articles")

    mensaje("Comenzando!")
    preparaTemporal(procesar_articles)

    mensaje("Copiando los assets")
    copiarAssets(src_info, config.DIR_ASSETS)

    if procesar_articles:
        mensaje("Preprocesando")
        if not path.exists(articulos):
            print "\nERROR: No se encuentra el directorio %r" % articulos
            print "Este directorio es obligatorio para el procesamiento general"
            sys.exit()
        cantnew, cantold = preprocesar.run(articulos, verbose)
        print '  total %d páginas procesadas' % cantnew
        print '      y %d que ya estaban de antes' % cantold

        mensaje("Calculando los que quedan y los que no")
        preprocesar.calcula_top_htmls()

        mensaje("Generando el log de imágenes")
        taken, adesc = extraer.run(verbose)
        print '  total: %5d imágenes extraídas' % taken
        print '         %5d a descargar' % adesc
    else:
        mensaje("Evitamos procesar artículos y generar el log de imágenes")

    mensaje("Recalculando porcentajes de reducción")
    calcular.run(verbose)

    if not desconectado:
        mensaje("Descargando las imágenes de la red")
        download.traer(verbose)

    mensaje("Reduciendo las imágenes descargadas")
    notfound = reducir.run(verbose)

    mensaje("Emblocando las imágenes reducidas")
    # agrupamos las imagenes en bloques
    result = ImageManager.generar_bloques(verbose)
    print '  total: %d bloques con %d imags' % result

    if procesar_articles:
        mensaje("Generando el índice")
        result = cdpindex.generar_de_html(articulos, verbose)
        print '  total: %d archivos' % result

        mensaje("Generando los bloques de artículos")
        result = ArticleManager.generar_bloques(verbose)
        print '  total: %d bloques con %d archivos y %d redirects' % result
    else:
        mensaje("Evitamos generar el índice y los bloques")

    mensaje("Copiando las fuentes")
    copiarSources()

    mensaje("Copiando los indices")
    dest_src = path.join(config.DIR_CDBASE, "cdpedia", "indice")
    if os.path.exists(dest_src):
        shutil.rmtree(dest_src)
    shutil.copytree(config.DIR_INDICE, dest_src)

    if include_windows:
        mensaje("Copiando cosas para Windows")
        copy_dir("resources/autorun.win/cdroot", config.DIR_CDBASE)

    mensaje("Generamos la config para runtime")
    genera_run_config()

    if not evitar_iso:
        mensaje("Armamos el ISO")
        armarIso("cdpedia.iso")

    if tarball:
        mensaje("Armamos el tarball con %r" % (tarball,))
        build_tarball(tarball)

    mensaje("Todo terminado!")
示例#2
0
def main(lang, src_info, version, lang_config, gendate,
         verbose=False, desconectado=False, procesar_articles=True):
    # don't affect the rest of the machine
    make_it_nicer()

    if procesar_articles:
        try:
            import SuffixTree  # NOQA
        except ImportError:
            logger.warning(NO_ST_MSG)

    # validate lang and versions, and fix config with selected data
    logger.info("Fixing config for lang=%r version=%r", lang, version)
    try:
        _lang_conf = config.imagtypes[lang]
    except KeyError:
        print "Not a valid language! try one of", config.imagtypes.keys()
        exit()
    try:
        config.imageconf = _lang_conf[version]
    except KeyError:
        print "Not a valid version! try one of", _lang_conf.keys()
        exit()
    config.langconf = lang_config

    logger.info("Starting!")
    preparaTemporal(procesar_articles)

    logger.info("Copying the assets and locale files")
    copy_assets(src_info, config.DIR_ASSETS)
    shutil.copytree('locale', path.join(config.DIR_CDBASE, "locale"))

    articulos = path.join(src_info, "articles")
    if procesar_articles:
        logger.info("Preprocessing")
        if not path.exists(articulos):
            logger.error("Couldn't find articles dir: %r", articulos)
            raise EnvironmentError("Directory not found, can't continue")
            sys.exit()
        cantnew, cantold = preprocesar.run(articulos)
        logger.info("Processed pages: %d new, %d from before",
                    cantnew, cantold)

        logger.info("Calculating which stay and which don't")
        preprocesar.pages_selector.calculate()

        logger.info("Generating the images log")
        taken, adesc = extract.run()
        logger.info("Extracted %d images, need to download %d", taken, adesc)
    else:
        logger.info("Avoid processing articles and generating images log")

    logger.info("Recalculating the reduction percentages.")
    calcular.run()

    if not desconectado:
        logger.info("Downloading the images from the internet")
        download.retrieve()

    logger.info("Reducing the downloaded images")
    reducir.run(verbose)

    logger.info("Putting the reduced images into blocks")
    # agrupamos las imagenes en bloques
    q_blocks, q_images = ImageManager.generar_bloques(verbose)
    logger.info("Got %d blocks with %d images", q_blocks, q_images)

    if not procesar_articles:
        logger.info("Not generating index and blocks (by user request)")
    elif preprocesar.pages_selector.same_info_through_runs:
        logger.info("Same articles than previous run "
                    "(not generating index and blocks)")
    else:
        logger.info("Generating the index")
        result = cdpindex.generar_de_html(articulos, verbose)
        logger.info("Got %d files", result)
        logger.info("Generating the articles blocks")
        q_blocks, q_files, q_redirs = ArticleManager.generar_bloques(lang,
                                                                     verbose)
        logger.info("Got %d blocks with %d files and %d redirects",
                    q_blocks, q_files, q_redirs)

    logger.info("Copying the sources")
    copy_sources()

    logger.info("Generating the links to blocks and indexes")
    # blocks
    dest = path.join(config.DIR_CDBASE, "cdpedia", "bloques")
    if os.path.exists(dest):
        os.remove(dest)
    os.symlink(path.abspath(config.DIR_BLOQUES), dest)
    # indexes
    dest = path.join(config.DIR_CDBASE, "cdpedia", "indice")
    if os.path.exists(dest):
        os.remove(dest)
    os.symlink(path.abspath(config.DIR_INDICE), dest)

    if config.imageconf["windows"]:
        logger.info("Copying Windows stuff")
        # generated by pyinstaller 2.0
        copy_dir("resources/autorun.win/cdroot", config.DIR_CDBASE)

    logger.info("Generating runtime config")
    genera_run_config()

    base_dest_name = "cdpedia-%s-%s-%s-%s" % (lang, config.VERSION, gendate, version)
    if config.imageconf["type"] == "iso":
        logger.info("Building the ISO: %r", base_dest_name)
        build_iso(base_dest_name)
    elif config.imageconf["type"] == "tarball":
        logger.info("Building the tarball: %r", base_dest_name)
        build_tarball(base_dest_name)
    else:
        raise ValueError("Unrecognized image type")

    logger.info("All done!")
示例#3
0
"""

    parser = optparse.OptionParser()
    parser.set_usage(msg)
    parser.add_option("-v",
                      "--verbose",
                      action="store_true",
                      dest="verbose",
                      help="muestra info de lo que va haciendo")

    parser.add_option("-i",
                      "--image",
                      action="store_true",
                      dest="image",
                      help=u"busca en imagenes (busca artículos por default)")

    (options, args) = parser.parse_args()

    if len(args) != 1:
        parser.print_help()
        exit()

    nom_item = args[0].decode("utf8")
    verbose = bool(options.verbose)
    if options.image:
        manager = ImageManager(verbose=verbose)
    else:
        manager = ArticleManager(verbose=verbose)

    main(manager, nom_item, verbose)
示例#4
0
def main(lang,
         src_info,
         version,
         lang_config,
         gendate,
         verbose=False,
         desconectado=False,
         procesar_articles=True):
    # don't affect the rest of the machine
    make_it_nicer()

    if procesar_articles:
        try:
            import SuffixTree  # NOQA
        except ImportError:
            logger.warning(
                "Import error on SuffixTree; compressed index generation will be REALLY slow. "
                "Please install it (download, python2 setup.py build, python2 setup.py install) "
                "from here:  http://taniquetil.com.ar/facundo/SuffixTree-0.7.1-8bit.tar.bz2"
            )

    # validate lang and versions, and fix config with selected data
    logger.info("Fixing config for lang=%r version=%r", lang, version)
    try:
        _lang_conf = config.imagtypes[lang]
    except KeyError:
        print("ERROR: %r is not a valid language! try one of %s" %
              (lang, config.imagtypes.keys()))
        exit()
    try:
        config.imageconf = _lang_conf[version]
    except KeyError:
        print("ERROR: %r is not a valid version! try one of %s" %
              (version, _lang_conf.keys()))
        exit()
    config.langconf = lang_config

    logger.info("Starting!")
    preparaTemporal(procesar_articles)

    logger.info("Copying the assets and locale files")
    copy_assets(src_info, config.DIR_ASSETS)
    shutil.copytree('locale', path.join(config.DIR_CDBASE, "locale"))

    articulos = path.join(src_info, "articles")
    if procesar_articles:
        logger.info("Preprocessing")
        if not path.exists(articulos):
            logger.error("Couldn't find articles dir: %r", articulos)
            raise EnvironmentError("Directory not found, can't continue")
            sys.exit()
        preprocesar.run(articulos)

        logger.info("Calculating which stay and which don't")
        preprocesar.pages_selector.calculate()

        logger.info("Generating the images log")
        taken, adesc = extract.run()
        logger.info("Extracted %d images, need to download %d", taken, adesc)
    else:
        logger.info("Avoid processing articles and generating images log")

    logger.info("Recalculating the reduction percentages.")
    calcular.run()

    if not desconectado:
        logger.info("Downloading the images from the internet")
        download.retrieve()

    logger.info("Reducing the downloaded images")
    reducir.run(verbose)

    logger.info("Putting the reduced images into blocks")
    # agrupamos las imagenes en bloques
    q_blocks, q_images = ImageManager.generar_bloques(verbose)
    logger.info("Got %d blocks with %d images", q_blocks, q_images)

    if not procesar_articles:
        logger.info("Not generating index and blocks (by user request)")
    elif preprocesar.pages_selector.same_info_through_runs:
        logger.info("Same articles than previous run "
                    "(not generating index and blocks)")
    else:
        logger.info("Generating the index")
        result = cdpindex.generar_de_html(articulos, verbose)
        logger.info("Got %d files", result)
        logger.info("Generating the articles blocks")
        q_blocks, q_files, q_redirs = ArticleManager.generar_bloques(
            lang, verbose)
        logger.info("Got %d blocks with %d files and %d redirects", q_blocks,
                    q_files, q_redirs)

    logger.info("Copying the sources")
    copy_sources()

    logger.info("Generating the links to blocks and indexes")
    # blocks
    dest = path.join(config.DIR_CDBASE, "cdpedia", "bloques")
    if os.path.exists(dest):
        os.remove(dest)
    os.symlink(path.abspath(config.DIR_BLOQUES), dest)
    # indexes
    dest = path.join(config.DIR_CDBASE, "cdpedia", "indice")
    if os.path.exists(dest):
        os.remove(dest)
    os.symlink(path.abspath(config.DIR_INDICE), dest)

    if config.imageconf["windows"]:
        logger.info("Copying Windows stuff")
        # generated by pyinstaller 2.0
        copy_dir("resources/autorun.win/cdroot", config.DIR_CDBASE)

    logger.info("Generating runtime config")
    genera_run_config()

    base_dest_name = "cdpedia-%s-%s-%s-%s" % (lang, config.VERSION, gendate,
                                              version)
    if config.imageconf["type"] == "iso":
        logger.info("Building the ISO: %r", base_dest_name)
        build_iso(base_dest_name)
    elif config.imageconf["type"] == "tarball":
        logger.info("Building the tarball: %r", base_dest_name)
        build_tarball(base_dest_name)
    else:
        raise ValueError("Unrecognized image type")

    logger.info("All done!")
示例#5
0
def main(lang,
         src_info,
         version,
         lang_config,
         gendate,
         images_dump_dir,
         verbose=False,
         desconectado=False,
         process_articles=True):
    """Generate the CDPedia tarball or iso."""
    # don't affect the rest of the machine
    make_it_nicer()

    # set language in config
    if config.LANGUAGE is None:
        config.LANGUAGE = lang
        config.URL_WIKIPEDIA = config.URL_WIKIPEDIA_TPL.format(lang=lang)

    # validate lang and versions, and fix config with selected data
    logger.info("Fixing config for lang=%r version=%r", lang, version)
    try:
        _lang_conf = config.imagtypes[lang]
    except KeyError:
        available_langs = list(config.imagtypes.keys())
        logger.error("%r is not a valid language! try one of %s", lang,
                     available_langs)
        exit()
    try:
        config.imageconf = _lang_conf[version]
    except KeyError:
        available_versions = list(_lang_conf.keys())
        logger.error("%r is not a valid version! try one of %s", version,
                     available_versions)
        exit()
    config.langconf = lang_config

    logger.info("Starting!")
    prepare_temporary_dirs(process_articles)

    logger.info("Copying the assets and locale files")
    dst_assets = os.path.join(config.DIR_CDBASE, 'assets')
    copy_assets(src_info, dst_assets)
    link(os.path.join(src_info, 'portal_pages.txt'), config.DIR_TEMP)
    copy_dir('locale', path.join(config.DIR_CDBASE, "locale"))
    set_locale(lang_config.get('second_language'), record=True)

    logger.info("Copying '%s' stylesheet and associated media resources",
                config.CSS_FILENAME)
    copy_css(src_info, dst_assets)

    articulos = path.join(src_info, "articles")
    if process_articles:
        logger.info("Preprocessing")
        if not path.exists(articulos):
            logger.error("Couldn't find articles dir: %r", articulos)
            raise EnvironmentError("Directory not found, can't continue")
            sys.exit()
        preprocess.run(articulos)

        logger.info("Calculating which stay and which don't")
        preprocess.pages_selector.calculate()

        logger.info("Generating the images log")
        taken, adesc = extract.run()
        logger.info("Extracted %d images, need to download %d", taken, adesc)
    else:
        logger.info("Avoid processing articles and generating images log")

    logger.info("Recalculating the reduction percentages.")
    calculate.run()

    if not desconectado:
        logger.info("Downloading the images from the internet")
        download.retrieve(images_dump_dir)

    logger.info("Reducing the downloaded images")
    scale.run(verbose, images_dump_dir)

    if config.EMBED_IMAGES:
        logger.info("Embedding selected images")
        embed.run(images_dump_dir)

    logger.info("Putting the reduced images into blocks")
    # agrupamos las imagenes en bloques
    q_blocks, q_images = ImageManager.generar_bloques(verbose)
    logger.info("Got %d blocks with %d images", q_blocks, q_images)

    if not process_articles:
        logger.info("Not generating index and blocks (by user request)")
    elif preprocess.pages_selector.same_info_through_runs:
        logger.info("Same articles than previous run "
                    "(not generating index and blocks)")
    else:
        logger.info("Generating the index")
        result = cdpindex.generate_from_html(articulos, verbose)
        logger.info("Got %d files", result)
        logger.info("Generating the articles blocks")
        q_blocks, q_files, q_redirs = ArticleManager.generar_bloques(
            lang, verbose)
        logger.info("Got %d blocks with %d files and %d redirects", q_blocks,
                    q_files, q_redirs)

    logger.info("Copying the sources and libs")
    copy_sources()
    generate_libs()

    # Copy python docs
    pydocs.clone(lang, lang_config, os.path.dirname(src_info))

    logger.info("Generating the links to blocks and indexes")
    # pages blocks
    dest = path.join(config.DIR_CDBASE, "pages")
    if os.path.exists(dest):
        os.remove(dest)
    os.symlink(path.abspath(config.DIR_PAGES_BLOCKS), dest)
    # images blocks
    dest = path.join(config.DIR_CDBASE, "images")
    if os.path.exists(dest):
        os.remove(dest)
    os.symlink(path.abspath(config.DIR_IMAGES_BLOCKS), dest)
    # indexes
    dest = path.join(config.DIR_CDBASE, "indice")
    if os.path.exists(dest):
        os.remove(dest)
    os.symlink(path.abspath(config.DIR_INDICE), dest)

    if config.imageconf["windows"]:
        logger.info("Copying Windows stuff")
        copy_dir("resources/autorun.win/cdroot", config.DIR_CDBASE)
        # unpack embeddable python distribution for win32
        py_win_zip = "resources/autorun.win/python-win32.zip"
        py_win_dst = os.path.join(config.DIR_CDBASE, 'python')
        with zipfile.ZipFile(py_win_zip, 'r') as zh:
            zh.extractall(py_win_dst)

    logger.info("Generating runtime config")
    gen_run_config(lang_config)

    base_dest_name = "cdpedia-%s-%s-%s-%s" % (lang, config.VERSION, gendate,
                                              version)
    if config.imageconf["type"] == "iso":
        logger.info("Building the ISO: %r", base_dest_name)
        build_iso(base_dest_name)
    elif config.imageconf["type"] == "tarball":
        logger.info("Building the tarball: %r", base_dest_name)
        build_tarball(base_dest_name)
    else:
        raise ValueError("Unrecognized image type")

    logger.info("All done!")