Пример #1
0
 def process(self, obj):
     ret = ISIVoid() #default nothing
     try:
         method = getattr(self, obj.method)
         println("Running %s\n" % obj.method)
         if obj.args:
             ret = method(obj.args)
         else:
             ret = method()
     except Exception, error:
         sys.stderr.write("ERROR: %s\n%s\n" % (traceback(error), error))
Пример #2
0
def walkISI(files, archive, notes):
    from papers.pdfget import download_pdf

    parser = SavedRecordParser(archive)

    for file in files:
        text = open(file).read()
        parser.feed(text, notes)
        println("%d new articles\n" % len(parser.archive))

        for article in parser:
            journal = article.get_journal()
            abbrev = article.get_abbrev()
            volume = article.get_volume()
            start = article.get_start_page() 
            name = "%s %d %s" % (abbrev, volume, start)
            println("Downloading %s" % name)

            path = name + ".pdf"
            if os.path.isfile(path):
                println(" -> exists %s" % path)
                article.set_pdf(path)
                continue

            #check to see if we already have it
            path = download_pdf(ISIArticle.get_journal(journal), volume, 0, start) #don't require issue
            if path:
                println(" -> %s" % path)
                article.set_pdf(path)
            else:
                sys.stdout.write(" -> FAILED")
    parser.archive.commit()
Пример #3
0
    def feed(self, text, notes):
        journals = {}
        blocks = re.compile("PT\sJ(.*?)\nER", re.DOTALL).findall(text)
        for block in blocks:
            try:
                self.block = block
                self.article = self.archive.create_article()

                get_number = lambda x: re.compile("(\d+)").search(x).groups()[0] 
                get_page = lambda x: Page(get_number(x))
                #clean_title = lambda x: clean_line(clean_entry(x))
                clean_title = Cleanup.clean_title

                self.get_entry("journal", entries=(("so", "la"), ("so", "ab"), ("so", "sn")) )
                self.get_entry("volume", method=int, entries=(("vl", "is"), ("vl", "bp")) )
                self.get_entry("issue", method=lambda x: int(get_number(x)), require=False, entries=(("is", "bp"),) )
                self.get_entry("start_page", method=get_page, exclude=("art. no.",), entries=(("bp", "ep"), ("bp", "ut"), ("ar", "di"), ("ar", "ut")) )
                self.get_entry("end_page", method=get_page, require=False, entries=(("ep", "di"), ("ep", "ut")) )


                self.get_entry("authors", method=lambda x: get_authors(x, "\n", ","), entries=(("af", "ti"), ("au", "ti"), ("au", "so")))

                self.get_entry("title", method=clean_title, entries=(("ti", "so"),) )
                self.get_entry("abstract", method=clean_entry, require=False, entries=(("ab", "sn"),) )
                self.get_entry("year", method=int, entries=(("py", "vl"), ("py", "tc") ) )

                self.get_entry("doi", require=False, entries=(("di", "pg"), ("di", "ut"),("di", "er")) )

                self.article.set_notes(notes)
                
                journal = ISIArticle.get_journal(self.article.get_journal())
                volume = self.article.get_volume()
                page = self.article.get_page()
                name = "%s %d %s" % (journal, volume, page)
                if not self.master.has(self.article):
                    self.archive.test_and_add(self.article)
                else:
                    println("%s exists in archive\n" % name)
                    continue
            except Exception, error:
                sys.stderr.write("ERROR: %s\n%s\n" % (error, block))
Пример #4
0
    def store(self, download = False, notes = [], keywords = []):
        journal = self.article.get_journal()
        volume = self.article.get_volume()
        page = self.article.get_page()
        year = self.article.get_year()
        name = "%s %d %s (%d)" % (self.article.get_abbrev(), volume, page, year)

        local_match = self.archive.find_match(self.article)
        if local_match:
            download = download and not local_match.has_pdf() #set to download if we don't have pdf
            self.article = local_match
            println("Already have article %s in local archive\n" % name)

        master_match = None
        if not local_match:
            artreq = ArchiveRequest(self.article)
            master_match = artreq.run() #query master
            if master_match:
                println("Already have article %s in master archive\n" % name)
                download = download and not master_match.has_pdf() #set to download if we don't have pdf
                #self.article = master_match
        
        if not local_match and not master_match:
            self.archive.add(self.article)

        if download:
            path = download_pdf(journal, volume=volume, page=page)
            if path:
                println(" -> downloaded %s\n" % path)
                self.article.set_pdf(path)

        if keywords:
            self.add_keywords(keywords)
        if notes:
            self.add_notes(notes)

        println("Completed storage of %s\n%s\n%s\n" % (name, keywords, notes))