Exemplo n.º 1
0
def get_links(tref, with_text=True, with_sheet_links=False):
    """
    Return a list of links tied to 'ref' in client format.
    If `with_text`, retrieve texts for each link.
    If `with_sheet_links` include sheet results for sheets in groups which are listed in the TOC.
    """
    links = []
    oref = Ref(tref)
    nRef = oref.normal()
    lenRef = len(nRef)
    reRef = oref.regex() if oref.is_range() else None

    # for storing all the section level texts that need to be looked up
    texts = {}

    linkset = LinkSet(oref)
    # For all links that mention ref (in any position)
    for link in linkset:
        # each link contains 2 refs in a list
        # find the position (0 or 1) of "anchor", the one we're getting links for
        # If both sides of the ref are in the same section of a text, only one direction will be used.  bug? maybe not.
        if reRef:
            pos = 0 if any(re.match(reRef, tref) for tref in link.expandedRefs0) else 1
        else:
            pos = 0 if any(nRef == tref[:lenRef] for tref in link.expandedRefs0) else 1
        try:
            com = format_link_object_for_client(link, False, nRef, pos)
        except InputError:
            logger.warning(u"Bad link: {} - {}".format(link.refs[0], link.refs[1]))
            continue
        except AttributeError as e:
            logger.error(u"AttributeError in presenting link: {} - {} : {}".format(link.refs[0], link.refs[1], e))
            continue

        # Rather than getting text with each link, walk through all links here,
        # caching text so that redundant DB calls can be minimized
        # If link is spanning, split into section refs and rejoin
        try:
            if with_text:
                original_com_oref = Ref(com["ref"])
                com_orefs = original_com_oref.split_spanning_ref()
                for com_oref in com_orefs:
                    top_oref = com_oref.top_section_ref()
                    # Lookup and save top level text, only if we haven't already
                    top_nref = top_oref.normal()
                    if top_nref not in texts:
                        for lang in ("en", "he"):
                            top_nref_tc = TextChunk(top_oref, lang)
                            versionInfoMap = None if not top_nref_tc._versions else {
                                v.versionTitle: {
                                    'license': getattr(v, 'license', u''),
                                    'versionTitleInHebrew': getattr(v, 'versionTitleInHebrew', u'')
                                } for v in top_nref_tc._versions
                            }
                            if top_nref_tc.is_merged:
                                version = top_nref_tc.sources
                                license = [versionInfoMap[vtitle]['license'] for vtitle in version]
                                versionTitleInHebrew = [versionInfoMap[vtitle]['versionTitleInHebrew'] for vtitle in version]
                            elif top_nref_tc._versions:
                                version_obj = top_nref_tc.version()
                                version = version_obj.versionTitle
                                license = versionInfoMap[version]['license']
                                versionTitleInHebrew = versionInfoMap[version]['versionTitleInHebrew']
                            else:
                                # version doesn't exist in this language
                                version = None
                                license = None
                                versionTitleInHebrew = None
                            version = top_nref_tc.sources if top_nref_tc.is_merged else (top_nref_tc.version().versionTitle if top_nref_tc._versions else None)
                            if top_nref not in texts:
                                texts[top_nref] = {}
                            texts[top_nref][lang] = {
                                'ja': top_nref_tc.ja(),
                                'version': version,
                                'license': license,
                                'versionTitleInHebrew': versionTitleInHebrew
                            }
                    com_sections = [i - 1 for i in com_oref.sections]
                    com_toSections = [i - 1 for i in com_oref.toSections]
                    for lang, (attr, versionAttr, licenseAttr, vtitleInHeAttr) in (("he", ("he","heVersionTitle","heLicense","heVersionTitleInHebrew")), ("en", ("text", "versionTitle","license","versionTitleInHebrew"))):
                        temp_nref_data = texts[top_nref][lang]
                        res = temp_nref_data['ja'].subarray(com_sections[1:], com_toSections[1:]).array()
                        if attr not in com:
                            com[attr] = res
                        else:
                            if isinstance(com[attr], basestring):
                                com[attr] = [com[attr]]
                            com[attr] += res
                        temp_version = temp_nref_data['version']
                        if isinstance(temp_version, basestring) or temp_version is None:
                            com[versionAttr] = temp_version
                            com[licenseAttr] = temp_nref_data['license']
                            com[vtitleInHeAttr] = temp_nref_data['versionTitleInHebrew']
                        else:
                            # merged. find exact version titles for each segment
                            start_sources = temp_nref_data['ja'].distance([], com_sections[1:])
                            if com_sections == com_toSections:
                                # simplify for the common case
                                versions = temp_version[start_sources] if start_sources < len(temp_version) - 1 else None
                                licenses = temp_nref_data['license'][start_sources] if start_sources < len(temp_nref_data['license']) - 1 else None
                                versionTitlesInHebrew = temp_nref_data['versionTitleInHebrew'][start_sources] if start_sources < len(temp_nref_data['versionTitleInHebrew']) - 1 else None
                            else:
                                end_sources = temp_nref_data['ja'].distance([], com_toSections[1:])
                                versions = temp_version[start_sources:end_sources + 1]
                                licenses = temp_nref_data['license'][start_sources:end_sources + 1]
                                versionTitlesInHebrew = temp_nref_data['versionTitleInHebrew'][start_sources:end_sources + 1]
                            com[versionAttr] = versions
                            com[licenseAttr] = licenses
                            com[vtitleInHeAttr] = versionTitlesInHebrew
            links.append(com)
        except NoVersionFoundError as e:
            logger.warning(u"Trying to get non existent text for ref '{}'. Link refs were: {}".format(top_nref, link.refs))
            continue

    # Harded-coding automatic display of links to an underlying text. bound_texts = ("Rashba on ",)
    # E.g., when requesting "Steinsaltz on X" also include links to "X" as though they were connected directly to Steinsaltz.
    bound_texts = ("Steinsaltz on ",)
    for prefix in bound_texts:
        if nRef.startswith(prefix):
            base_ref = nRef[len(prefix):]
            base_links = get_links(base_ref)
            def add_prefix(link):
                link["anchorRef"] = prefix + link["anchorRef"]
                link["anchorRefExpanded"] = [prefix + l for l in link["anchorRefExpanded"]]
                return link
            base_links = [add_prefix(link) for link in base_links]
            orig_links_refs = [(origlink['sourceRef'], origlink['anchorRef']) for origlink in links]
            base_links = filter(lambda x: ((x['sourceRef'], x['anchorRef']) not in orig_links_refs) and (x["sourceRef"] != x["anchorRef"]), base_links)
            links += base_links

    links = [l for l in links if not Ref(l["anchorRef"]).is_section_level()]


    groups = library.get_groups_in_library()
    if with_sheet_links and len(groups):
        sheet_links = get_sheets_for_ref(tref, in_group=groups)
        formatted_sheet_links = [format_sheet_as_link(sheet) for sheet in sheet_links]
        links += formatted_sheet_links

    return links
Exemplo n.º 2
0
def get_links(tref, with_text=True, with_sheet_links=False):
    """
    Return a list of links tied to 'ref' in client format.
    If `with_text`, retrieve texts for each link.
    If `with_sheet_links` include sheet results for sheets in groups which are listed in the TOC.
    """
    links = []
    oref = Ref(tref)
    nRef = oref.normal()
    lenRef = len(nRef)
    reRef = oref.regex() if oref.is_range() else None

    # for storing all the section level texts that need to be looked up
    texts = {}

    linkset = LinkSet(oref)
    # For all links that mention ref (in any position)
    for link in linkset:
        # each link contains 2 refs in a list
        # find the position (0 or 1) of "anchor", the one we're getting links for
        # If both sides of the ref are in the same section of a text, only one direction will be used.  bug? maybe not.
        if reRef:
            pos = 0 if re.match(reRef, link.refs[0]) else 1
        else:
            pos = 0 if nRef == link.refs[0][:lenRef] else 1
        try:
            com = format_link_object_for_client(link, False, nRef, pos)
        except InputError:
            # logger.warning("Bad link: {} - {}".format(link.refs[0], link.refs[1]))
            continue
        except AttributeError as e:
            logger.error(u"AttributeError in presenting link: {} - {} : {}".format(link.refs[0], link.refs[1], e))
            continue

        # Rather than getting text with each link, walk through all links here,
        # caching text so that redundant DB calls can be minimized
        # If link is spanning, split into section refs and rejoin
        try:
            if with_text:
                original_com_oref = Ref(com["ref"])
                com_orefs = original_com_oref.split_spanning_ref()
                for com_oref in com_orefs:
                    top_oref = com_oref.top_section_ref()
                    # Lookup and save top level text, only if we haven't already
                    top_nref = top_oref.normal()
                    if top_nref not in texts:
                        for lang in ("en", "he"):
                            top_nref_tc = TextChunk(top_oref, lang)
                            versionInfoMap = None if not top_nref_tc._versions else {
                                v.versionTitle: {
                                    'license': getattr(v, 'license', u''),
                                    'versionTitleInHebrew': getattr(v, 'versionTitleInHebrew', u'')
                                } for v in top_nref_tc._versions
                            }
                            if top_nref_tc.is_merged:
                                version = top_nref_tc.sources
                                license = [versionInfoMap[vtitle]['license'] for vtitle in version]
                                versionTitleInHebrew = [versionInfoMap[vtitle]['versionTitleInHebrew'] for vtitle in version]
                            elif top_nref_tc._versions:
                                version_obj = top_nref_tc.version()
                                version = version_obj.versionTitle
                                license = versionInfoMap[version]['license']
                                versionTitleInHebrew = versionInfoMap[version]['versionTitleInHebrew']
                            else:
                                # version doesn't exist in this language
                                version = None
                                license = None
                                versionTitleInHebrew = None
                            version = top_nref_tc.sources if top_nref_tc.is_merged else (top_nref_tc.version().versionTitle if top_nref_tc._versions else None)
                            if top_nref not in texts:
                                texts[top_nref] = {}
                            texts[top_nref][lang] = {
                                'ja': top_nref_tc.ja(),
                                'version': version,
                                'license': license,
                                'versionTitleInHebrew': versionTitleInHebrew
                            }
                    com_sections = [i - 1 for i in com_oref.sections]
                    com_toSections = [i - 1 for i in com_oref.toSections]
                    for lang, (attr, versionAttr, licenseAttr, vtitleInHeAttr) in (("he", ("he","heVersionTitle","heLicense","heVersionTitleInHebrew")), ("en", ("text", "versionTitle","license","versionTitleInHebrew"))):
                        temp_nref_data = texts[top_nref][lang]
                        res = temp_nref_data['ja'].subarray(com_sections[1:], com_toSections[1:]).array()
                        if attr not in com:
                            com[attr] = res
                        else:
                            if isinstance(com[attr], basestring):
                                com[attr] = [com[attr]]
                            com[attr] += res
                        temp_version = temp_nref_data['version']
                        if isinstance(temp_version, basestring) or temp_version is None:
                            com[versionAttr] = temp_version
                            com[licenseAttr] = temp_nref_data['license']
                            com[vtitleInHeAttr] = temp_nref_data['versionTitleInHebrew']
                        else:
                            # merged. find exact version titles for each segment
                            start_sources = temp_nref_data['ja'].distance([], com_sections[1:])
                            if com_sections == com_toSections:
                                # simplify for the common case
                                versions = temp_version[start_sources] if start_sources < len(temp_version) - 1 else None
                                licenses = temp_nref_data['license'][start_sources] if start_sources < len(temp_nref_data['license']) - 1 else None
                                versionTitlesInHebrew = temp_nref_data['versionTitleInHebrew'][start_sources] if start_sources < len(temp_nref_data['versionTitleInHebrew']) - 1 else None
                            else:
                                end_sources = temp_nref_data['ja'].distance([], com_toSections[1:])
                                versions = temp_version[start_sources:end_sources + 1]
                                licenses = temp_nref_data['license'][start_sources:end_sources + 1]
                                versionTitlesInHebrew = temp_nref_data['versionTitleInHebrew'][start_sources:end_sources + 1]
                            com[versionAttr] = versions
                            com[licenseAttr] = licenses
                            com[vtitleInHeAttr] = versionTitlesInHebrew
            links.append(com)
        except NoVersionFoundError as e:
            logger.warning("Trying to get non existent text for ref '{}'. Link refs were: {}".format(top_nref, link.refs))
            continue

    # Harded-coding automatic display of links to an underlying text. bound_texts = ("Rashba on ",)
    # E.g., when requesting "Steinsaltz on X" also include links to "X" as though they were connected directly to Steinsaltz.
    bound_texts = ("Steinsaltz on ",)
    for prefix in bound_texts:
        if nRef.startswith(prefix):
            base_ref = nRef[len(prefix):]
            base_links = get_links(base_ref)
            def add_prefix(link):
                link["anchorRef"] = prefix + link["anchorRef"]
                return link
            base_links = [add_prefix(link) for link in base_links]
            orig_links_refs = [(origlink['sourceRef'], origlink['anchorRef']) for origlink in links]
            base_links = filter(lambda x: ((x['sourceRef'], x['anchorRef']) not in orig_links_refs) and (x["sourceRef"] != x["anchorRef"]), base_links)
            links += base_links

    links = [l for l in links if not Ref(l["anchorRef"]).is_section_level()]


    groups = library.get_groups_in_library()
    if with_sheet_links and len(groups):
        sheet_links = get_sheets_for_ref(tref, in_group=groups)
        formatted_sheet_links = [format_sheet_as_link(sheet) for sheet in sheet_links]
        links += formatted_sheet_links

    return links