def get_links(tref, with_text=True, with_sheet_links=False): """ Return a list of links tied to 'ref' in client format. If `with_text`, retrieve texts for each link. If `with_sheet_links` include sheet results for sheets in groups which are listed in the TOC. """ links = [] oref = Ref(tref) nRef = oref.normal() lenRef = len(nRef) reRef = oref.regex() if oref.is_range() else None # for storing all the section level texts that need to be looked up texts = {} linkset = LinkSet(oref) # For all links that mention ref (in any position) for link in linkset: # each link contains 2 refs in a list # find the position (0 or 1) of "anchor", the one we're getting links for # If both sides of the ref are in the same section of a text, only one direction will be used. bug? maybe not. if reRef: pos = 0 if any(re.match(reRef, tref) for tref in link.expandedRefs0) else 1 else: pos = 0 if any(nRef == tref[:lenRef] for tref in link.expandedRefs0) else 1 try: com = format_link_object_for_client(link, False, nRef, pos) except InputError: logger.warning(u"Bad link: {} - {}".format(link.refs[0], link.refs[1])) continue except AttributeError as e: logger.error(u"AttributeError in presenting link: {} - {} : {}".format(link.refs[0], link.refs[1], e)) continue # Rather than getting text with each link, walk through all links here, # caching text so that redundant DB calls can be minimized # If link is spanning, split into section refs and rejoin try: if with_text: original_com_oref = Ref(com["ref"]) com_orefs = original_com_oref.split_spanning_ref() for com_oref in com_orefs: top_oref = com_oref.top_section_ref() # Lookup and save top level text, only if we haven't already top_nref = top_oref.normal() if top_nref not in texts: for lang in ("en", "he"): top_nref_tc = TextChunk(top_oref, lang) versionInfoMap = None if not top_nref_tc._versions else { v.versionTitle: { 'license': getattr(v, 'license', u''), 'versionTitleInHebrew': getattr(v, 'versionTitleInHebrew', u'') } for v in top_nref_tc._versions } if top_nref_tc.is_merged: version = top_nref_tc.sources license = [versionInfoMap[vtitle]['license'] for vtitle in version] versionTitleInHebrew = [versionInfoMap[vtitle]['versionTitleInHebrew'] for vtitle in version] elif top_nref_tc._versions: version_obj = top_nref_tc.version() version = version_obj.versionTitle license = versionInfoMap[version]['license'] versionTitleInHebrew = versionInfoMap[version]['versionTitleInHebrew'] else: # version doesn't exist in this language version = None license = None versionTitleInHebrew = None version = top_nref_tc.sources if top_nref_tc.is_merged else (top_nref_tc.version().versionTitle if top_nref_tc._versions else None) if top_nref not in texts: texts[top_nref] = {} texts[top_nref][lang] = { 'ja': top_nref_tc.ja(), 'version': version, 'license': license, 'versionTitleInHebrew': versionTitleInHebrew } com_sections = [i - 1 for i in com_oref.sections] com_toSections = [i - 1 for i in com_oref.toSections] for lang, (attr, versionAttr, licenseAttr, vtitleInHeAttr) in (("he", ("he","heVersionTitle","heLicense","heVersionTitleInHebrew")), ("en", ("text", "versionTitle","license","versionTitleInHebrew"))): temp_nref_data = texts[top_nref][lang] res = temp_nref_data['ja'].subarray(com_sections[1:], com_toSections[1:]).array() if attr not in com: com[attr] = res else: if isinstance(com[attr], basestring): com[attr] = [com[attr]] com[attr] += res temp_version = temp_nref_data['version'] if isinstance(temp_version, basestring) or temp_version is None: com[versionAttr] = temp_version com[licenseAttr] = temp_nref_data['license'] com[vtitleInHeAttr] = temp_nref_data['versionTitleInHebrew'] else: # merged. find exact version titles for each segment start_sources = temp_nref_data['ja'].distance([], com_sections[1:]) if com_sections == com_toSections: # simplify for the common case versions = temp_version[start_sources] if start_sources < len(temp_version) - 1 else None licenses = temp_nref_data['license'][start_sources] if start_sources < len(temp_nref_data['license']) - 1 else None versionTitlesInHebrew = temp_nref_data['versionTitleInHebrew'][start_sources] if start_sources < len(temp_nref_data['versionTitleInHebrew']) - 1 else None else: end_sources = temp_nref_data['ja'].distance([], com_toSections[1:]) versions = temp_version[start_sources:end_sources + 1] licenses = temp_nref_data['license'][start_sources:end_sources + 1] versionTitlesInHebrew = temp_nref_data['versionTitleInHebrew'][start_sources:end_sources + 1] com[versionAttr] = versions com[licenseAttr] = licenses com[vtitleInHeAttr] = versionTitlesInHebrew links.append(com) except NoVersionFoundError as e: logger.warning(u"Trying to get non existent text for ref '{}'. Link refs were: {}".format(top_nref, link.refs)) continue # Harded-coding automatic display of links to an underlying text. bound_texts = ("Rashba on ",) # E.g., when requesting "Steinsaltz on X" also include links to "X" as though they were connected directly to Steinsaltz. bound_texts = ("Steinsaltz on ",) for prefix in bound_texts: if nRef.startswith(prefix): base_ref = nRef[len(prefix):] base_links = get_links(base_ref) def add_prefix(link): link["anchorRef"] = prefix + link["anchorRef"] link["anchorRefExpanded"] = [prefix + l for l in link["anchorRefExpanded"]] return link base_links = [add_prefix(link) for link in base_links] orig_links_refs = [(origlink['sourceRef'], origlink['anchorRef']) for origlink in links] base_links = filter(lambda x: ((x['sourceRef'], x['anchorRef']) not in orig_links_refs) and (x["sourceRef"] != x["anchorRef"]), base_links) links += base_links links = [l for l in links if not Ref(l["anchorRef"]).is_section_level()] groups = library.get_groups_in_library() if with_sheet_links and len(groups): sheet_links = get_sheets_for_ref(tref, in_group=groups) formatted_sheet_links = [format_sheet_as_link(sheet) for sheet in sheet_links] links += formatted_sheet_links return links
def get_links(tref, with_text=True, with_sheet_links=False): """ Return a list of links tied to 'ref' in client format. If `with_text`, retrieve texts for each link. If `with_sheet_links` include sheet results for sheets in groups which are listed in the TOC. """ links = [] oref = Ref(tref) nRef = oref.normal() lenRef = len(nRef) reRef = oref.regex() if oref.is_range() else None # for storing all the section level texts that need to be looked up texts = {} linkset = LinkSet(oref) # For all links that mention ref (in any position) for link in linkset: # each link contains 2 refs in a list # find the position (0 or 1) of "anchor", the one we're getting links for # If both sides of the ref are in the same section of a text, only one direction will be used. bug? maybe not. if reRef: pos = 0 if re.match(reRef, link.refs[0]) else 1 else: pos = 0 if nRef == link.refs[0][:lenRef] else 1 try: com = format_link_object_for_client(link, False, nRef, pos) except InputError: # logger.warning("Bad link: {} - {}".format(link.refs[0], link.refs[1])) continue except AttributeError as e: logger.error(u"AttributeError in presenting link: {} - {} : {}".format(link.refs[0], link.refs[1], e)) continue # Rather than getting text with each link, walk through all links here, # caching text so that redundant DB calls can be minimized # If link is spanning, split into section refs and rejoin try: if with_text: original_com_oref = Ref(com["ref"]) com_orefs = original_com_oref.split_spanning_ref() for com_oref in com_orefs: top_oref = com_oref.top_section_ref() # Lookup and save top level text, only if we haven't already top_nref = top_oref.normal() if top_nref not in texts: for lang in ("en", "he"): top_nref_tc = TextChunk(top_oref, lang) versionInfoMap = None if not top_nref_tc._versions else { v.versionTitle: { 'license': getattr(v, 'license', u''), 'versionTitleInHebrew': getattr(v, 'versionTitleInHebrew', u'') } for v in top_nref_tc._versions } if top_nref_tc.is_merged: version = top_nref_tc.sources license = [versionInfoMap[vtitle]['license'] for vtitle in version] versionTitleInHebrew = [versionInfoMap[vtitle]['versionTitleInHebrew'] for vtitle in version] elif top_nref_tc._versions: version_obj = top_nref_tc.version() version = version_obj.versionTitle license = versionInfoMap[version]['license'] versionTitleInHebrew = versionInfoMap[version]['versionTitleInHebrew'] else: # version doesn't exist in this language version = None license = None versionTitleInHebrew = None version = top_nref_tc.sources if top_nref_tc.is_merged else (top_nref_tc.version().versionTitle if top_nref_tc._versions else None) if top_nref not in texts: texts[top_nref] = {} texts[top_nref][lang] = { 'ja': top_nref_tc.ja(), 'version': version, 'license': license, 'versionTitleInHebrew': versionTitleInHebrew } com_sections = [i - 1 for i in com_oref.sections] com_toSections = [i - 1 for i in com_oref.toSections] for lang, (attr, versionAttr, licenseAttr, vtitleInHeAttr) in (("he", ("he","heVersionTitle","heLicense","heVersionTitleInHebrew")), ("en", ("text", "versionTitle","license","versionTitleInHebrew"))): temp_nref_data = texts[top_nref][lang] res = temp_nref_data['ja'].subarray(com_sections[1:], com_toSections[1:]).array() if attr not in com: com[attr] = res else: if isinstance(com[attr], basestring): com[attr] = [com[attr]] com[attr] += res temp_version = temp_nref_data['version'] if isinstance(temp_version, basestring) or temp_version is None: com[versionAttr] = temp_version com[licenseAttr] = temp_nref_data['license'] com[vtitleInHeAttr] = temp_nref_data['versionTitleInHebrew'] else: # merged. find exact version titles for each segment start_sources = temp_nref_data['ja'].distance([], com_sections[1:]) if com_sections == com_toSections: # simplify for the common case versions = temp_version[start_sources] if start_sources < len(temp_version) - 1 else None licenses = temp_nref_data['license'][start_sources] if start_sources < len(temp_nref_data['license']) - 1 else None versionTitlesInHebrew = temp_nref_data['versionTitleInHebrew'][start_sources] if start_sources < len(temp_nref_data['versionTitleInHebrew']) - 1 else None else: end_sources = temp_nref_data['ja'].distance([], com_toSections[1:]) versions = temp_version[start_sources:end_sources + 1] licenses = temp_nref_data['license'][start_sources:end_sources + 1] versionTitlesInHebrew = temp_nref_data['versionTitleInHebrew'][start_sources:end_sources + 1] com[versionAttr] = versions com[licenseAttr] = licenses com[vtitleInHeAttr] = versionTitlesInHebrew links.append(com) except NoVersionFoundError as e: logger.warning("Trying to get non existent text for ref '{}'. Link refs were: {}".format(top_nref, link.refs)) continue # Harded-coding automatic display of links to an underlying text. bound_texts = ("Rashba on ",) # E.g., when requesting "Steinsaltz on X" also include links to "X" as though they were connected directly to Steinsaltz. bound_texts = ("Steinsaltz on ",) for prefix in bound_texts: if nRef.startswith(prefix): base_ref = nRef[len(prefix):] base_links = get_links(base_ref) def add_prefix(link): link["anchorRef"] = prefix + link["anchorRef"] return link base_links = [add_prefix(link) for link in base_links] orig_links_refs = [(origlink['sourceRef'], origlink['anchorRef']) for origlink in links] base_links = filter(lambda x: ((x['sourceRef'], x['anchorRef']) not in orig_links_refs) and (x["sourceRef"] != x["anchorRef"]), base_links) links += base_links links = [l for l in links if not Ref(l["anchorRef"]).is_section_level()] groups = library.get_groups_in_library() if with_sheet_links and len(groups): sheet_links = get_sheets_for_ref(tref, in_group=groups) formatted_sheet_links = [format_sheet_as_link(sheet) for sheet in sheet_links] links += formatted_sheet_links return links