Python get_version_list примеры, texts.get_version_list Python примеры использования

Пример #1

0

Показать файл

Файл: summaries.py Проект: rivkahcarl/Sefaria-Project

def flatten_toc(toc, include_categories=False, categories_in_titles=False, version_granularity=False):
    """
    Returns an array of strings which corresponds to each category and text in the
    Table of Contents in order.

    - categorie_in_titles: whether to include each category preceding a text title,
        e.g., "Tanach > Torah > Genesis".
    - version_granularity: whether to include a seperate entry for every text version.
    """
    results = []
    for x in toc:
        name = x.get("category", None) or x.get("title", None)
        if "category" in x:
            if include_categories:
                results += [name]
            subcats = flatten_toc(x["contents"], categories_in_titles=categories_in_titles)
            if categories_in_titles:
                subcats = ["%s > %s" %(name, y) for y in subcats]
            results += subcats

        elif "title" in x:
            if not version_granularity:
                results += [name]
            else:
                versions = texts.get_version_list(name)
                for v in versions:
                    lang = {"he": "Hebrew", "en": "English"}[v["language"]]
                    results += ["%s > %s > %s.json" % (name, lang, v["versionTitle"])]

    return results

Пример #2

0

Показать файл

Файл: search.py Проект: rivkahcarl/Sefaria-Project

def index_text(tref, version=None, lang=None):
    """
    Index the text designated by ref.
    If no version and lang are given, this functon will be called for each availble version.
    Currently assumes ref is at section level. 
    """
    #tref = texts.norm_ref(unicode(tref))
    #todo: why the unicode()?
    tref = model.Ref(tref).normal()

    # Recall this function for each specific text version, if non provided
    if not (version and lang):
        for v in texts.get_version_list(tref):
            index_text(tref, version=v["versionTitle"], lang=v["language"])
        return

    # Index each segment of this document individually
    oref = model.Ref(tref).padded_ref()
    if len(oref.sections) < len(oref.index.sectionNames):
        text = texts.get_text(tref,
                              context=0,
                              commentary=False,
                              version=version,
                              lang=lang)
        if "error" in text:
            print text["error"]
        else:
            for i in range(max(len(text["text"]), len(text["he"]))):
                index_text("%s:%d" % (tref, i + 1))

    # Don't try to index docs with depth 3
    if len(oref.sections) < len(oref.index.sectionNames) - 1:
        return

    # Index this document as a whole
    doc = make_text_index_document(tref, version, lang)
    if doc:
        try:
            global doc_count
            if doc_count % 5000 == 0:
                print "[%d] Indexing %s / %s / %s" % (doc_count, tref, version,
                                                      lang)
            es.index('sefaria', 'text', doc,
                     make_text_doc_id(tref, version, lang))
            doc_count += 1
        except Exception, e:
            print "ERROR indexing %s / %s / %s" % (tref, version, lang)
            pprint(e)

Пример #3

0

Показать файл

Файл: search.py Проект: rafirosenberg/Sefaria-Project

def index_text(ref, version=None, lang=None):
    """
    Index the text designated by ref.
    If no version and lang are given, this functon will be called for each availble version.
    Currently assumes ref is at section level. 
    """
    ref = texts.norm_ref(unicode(ref))

    # Recall this function for each specific text version, if non provided
    if not (version and lang):
        for v in texts.get_version_list(ref):
            index_text(ref, version=v["versionTitle"], lang=v["language"])
        return

    # Index each segment of this document individually
    pRef = texts.parse_ref(ref)
    if len(pRef["sections"]) < len(pRef["sectionNames"]):
        text = texts.get_text(ref,
                              context=0,
                              commentary=False,
                              version=version,
                              lang=lang)
        if "error" in text:
            print text["error"]
        else:
            for i in range(max(len(text["text"]), len(text["he"]))):
                index_text("%s:%d" % (ref, i + 1))

    # Don't try to index docs with depth 3
    if len(pRef["sections"]) < len(pRef["sectionNames"]) - 1:
        return

    # Index this document as a whole
    doc = make_text_index_document(ref, version, lang)
    if doc:
        try:
            es.index(doc, 'sefaria', 'text',
                     make_text_doc_id(ref, version, lang))
            global doc_count
            doc_count += 1
        except Exception, e:
            print "Error indexing %s / %s / %s" % (ref, version, lang)
            print e

Пример #4

0

Показать файл

Файл: search.py Проект: rivkahcarl/Sefaria-Project

def index_text(tref, version=None, lang=None):
    """
    Index the text designated by ref.
    If no version and lang are given, this functon will be called for each availble version.
    Currently assumes ref is at section level. 
    """
    #tref = texts.norm_ref(unicode(tref))
    #todo: why the unicode()?
    tref = model.Ref(tref).normal()

    # Recall this function for each specific text version, if non provided
    if not (version and lang):
        for v in texts.get_version_list(tref):
            index_text(tref, version=v["versionTitle"], lang=v["language"])
        return

    # Index each segment of this document individually
    oref = model.Ref(tref).padded_ref()
    if len(oref.sections) < len(oref.index.sectionNames):
        text = texts.get_text(tref, context=0, commentary=False, version=version, lang=lang)
        if "error" in text:
            print text["error"]
        else:
            for i in range(max(len(text["text"]), len(text["he"]))):
                index_text("%s:%d" % (tref, i+1))

    # Don't try to index docs with depth 3
    if len(oref.sections) < len(oref.index.sectionNames) - 1:
        return

    # Index this document as a whole
    doc = make_text_index_document(tref, version, lang)
    if doc:
        try:
            global doc_count
            if doc_count % 5000 == 0:
                print "[%d] Indexing %s / %s / %s" % (doc_count, tref, version, lang)
            es.index('sefaria', 'text', doc, make_text_doc_id(tref, version, lang))
            doc_count += 1
        except Exception, e:
            print "ERROR indexing %s / %s / %s" % (tref, version, lang)
            pprint(e)

Пример #5

0

Показать файл

Файл: search.py Проект: Amichai/Sefaria-Project

def index_text(ref, version=None, lang=None):
    """
    Index the text designated by ref.
    If no version and lang are given, this functon will be called for each availble version.
    Currently assumes ref is at section level. 
    """
    ref = texts.norm_ref(unicode(ref))

    # Recall this function for each specific text version, if non provided
    if not (version and lang):
        for v in texts.get_version_list(ref):
            index_text(ref, version=v["versionTitle"], lang=v["language"])
        return

    # Index each segment of this document individually
    pRef = texts.parse_ref(ref)
    if len(pRef["sections"]) < len(pRef["sectionNames"]):
        text = texts.get_text(ref, context=0, commentary=False, version=version, lang=lang)
        if "error" in text:
            print text["error"]
        else:
            for i in range(max(len(text["text"]), len(text["he"]))):
                index_text("%s:%d" % (ref, i+1))

    # Don't try to index docs with depth 3
    if len(pRef["sections"]) < len(pRef["sectionNames"]) - 1:
        return

    # Index this document as a whole
    doc = make_text_index_document(ref, version, lang)
    if doc:
        try:
            es.index(doc, 'sefaria', 'text', make_text_doc_id(ref, version, lang))
            global doc_count
            doc_count += 1
        except Exception, e:
            print "Error indexing %s / %s / %s" % (ref, version, lang)
            print e

Пример #6

0

Показать файл

def flatten_toc(toc,
                include_categories=False,
                categories_in_titles=False,
                version_granularity=False):
    """
    Returns an array of strings which corresponds to each category and text in the
    Table of Contents in order.

    - categorie_in_titles: whether to include each category preceding a text title,
        e.g., "Tanach > Torah > Genesis".
    - version_granularity: whether to include a seperate entry for every text version.
    """
    results = []
    for x in toc:
        name = x.get("category", None) or x.get("title", None)
        if "category" in x:
            if include_categories:
                results += [name]
            subcats = flatten_toc(x["contents"],
                                  categories_in_titles=categories_in_titles)
            if categories_in_titles:
                subcats = ["%s > %s" % (name, y) for y in subcats]
            results += subcats

        elif "title" in x:
            if not version_granularity:
                results += [name]
            else:
                versions = texts.get_version_list(name)
                for v in versions:
                    lang = {"he": "Hebrew", "en": "English"}[v["language"]]
                    results += [
                        "%s > %s > %s.json" % (name, lang, v["versionTitle"])
                    ]

    return results

Python get_version_list примеры использования