示例#1
0
 def search(self,
            query: str,
            generic_cover: str = "",
            locale: str = "en") -> Optional[List[MetaRecord]]:
     val = list()
     if self.active:
         title_tokens = list(
             self.get_title_tokens(query, strip_joiners=False))
         if title_tokens:
             tokens = [quote(t.encode("utf-8")) for t in title_tokens]
             query = " ".join(tokens)
         try:
             scholarly.set_timeout(20)
             scholarly.set_retries(2)
             scholar_gen = itertools.islice(scholarly.search_pubs(query),
                                            10)
         except Exception as e:
             log.warning(e)
             return None
         for result in scholar_gen:
             match = self._parse_search_result(result=result,
                                               generic_cover="",
                                               locale=locale)
             val.append(match)
     return val
示例#2
0
    def setUp(self):
        proxy_generator = ProxyGenerator()
        if "CONNECTION_METHOD" in scholarly.env:
            self.connection_method = os.getenv("CONNECTION_METHOD")
        else:
            self.connection_method = "none"
        if self.connection_method == "tor":
            tor_sock_port = None
            tor_control_port = None
            tor_password = "******"
            # Tor uses the 9050 port as the default socks port
            # on windows 9150 for socks and 9151 for control
            if sys.platform.startswith("linux") or sys.platform.startswith(
                    "darwin"):
                tor_sock_port = 9050
                tor_control_port = 9051
            elif sys.platform.startswith("win"):
                tor_sock_port = 9150
                tor_control_port = 9151
            proxy_generator.Tor_External(tor_sock_port, tor_control_port,
                                         tor_password)
            scholarly.use_proxy(proxy_generator)

        elif self.connection_method == "tor_internal":
            if sys.platform.startswith("linux"):
                tor_cmd = 'tor'
            elif sys.platform.startswith("win"):
                tor_cmd = 'tor.exe'
            proxy_generator.Tor_Internal(tor_cmd=tor_cmd)
            scholarly.use_proxy(proxy_generator)
        elif self.connection_method == "luminati":
            scholarly.set_retries(10)
            proxy_generator.Luminati(usr=os.getenv("USERNAME"),
                                     passwd=os.getenv("PASSWORD"),
                                     proxy_port=os.getenv("PORT"))
            scholarly.use_proxy(proxy_generator)
        elif self.connection_method == "freeproxy":
            proxy_generator.FreeProxies()
            scholarly.use_proxy(proxy_generator)
        else:
            scholarly.use_proxy(None)
#     """ "Regulated+Domain" """,
#     """ "FDA+Requirement" """,
#     """ "Continuous+Delivery" """,
#     """ "Continuous+Integration" """,
#     """ "Automation+Systems" """,
#     """ "Software+Validation" """,
#     """ "Continuous+Software+Engineering" """
#     ]

# Setup Scholarly to crawl Scholar with Tor Proxy
pg = ProxyGenerator()
pg.Tor_External(tor_sock_port=9050,
                tor_control_port=9051,
                tor_password="******")
scholarly.use_proxy(pg)
scholarly.set_retries(100)


# Creates custom query and retrives the iterator from Scholarly
def CreateSearchQuery(keyword):
    # Parameters:
    #   hl: eng
    #   lang: eng
    #   as_vis: 0
    #   as_sdt: 0,33
    #   q: keyword
    query = f"""/scholar?hl=en&lr=lang_en&as_vis=0&as_sdt=0,33&q={keyword.strip()}"""
    search_query = scholarly.search_pubs_custom_url(query)
    return search_query