Пример #1
0
    def test_paper_can_be_written_to(self):
        paper = Paper()

        text = "I'm better than papyrus"
        paper.write(text)

        self.assertEqual(text, paper.read())
class EraserTest(unittest.TestCase):
    def setUp(self):
        self.paper = Paper()

    def test_when_the_word_to_be_erased_is_not_found_nothing_should_be_erased(
            self):
        eraser = Eraser()
        pencil = Pencil(eraser=eraser)

        text = "Nothing to erase here"
        pencil.write(self.paper, text)
        pencil.erase(self.paper, "Something")

        self.assertEqual(text, self.paper.read())

    def test_when_pencil_eraser_degrades_fully_it_should_stop_erasing(self):
        eraser = Eraser(durability=4)
        pencil = Pencil(eraser=eraser)

        pencil.write(self.paper, "I am related to Buffalo Bill")
        pencil.erase(self.paper, "Bill")
        pencil.erase(self.paper, "Buffalo")

        self.assertEqual("I am related to Buffalo     ", self.paper.read())

    def test_erasing_should_erase_opposite_direction_of_the_written_order(
            self):
        eraser = Eraser(durability=3)
        pencil = Pencil(eraser=eraser)

        pencil.write(self.paper, "I am related to Buffalo Bill")
        pencil.erase(self.paper, "Bill")

        self.assertEqual("I am related to Buffalo B   ", self.paper.read())
def run_game():
    pygame.init()
    settings = Settings()
    screen = pygame.display.set_mode((settings.screen_width, settings.screen_height))
    pygame.display.set_caption("Rock Paper Scissors")
    screen.fill(settings.bg_color)
    rock = Rock(settings, screen)
    paper = Paper(settings, screen)
    scissors = Scissors(settings, screen)
    myfont = pygame.font.SysFont("monospace", 30)
    while True:
        label = myfont.render("Choose either rock, paper or scissors", 1, (0,0,0))
        screen.blit(label, (250, 100))
        # check events here
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                sys.exit()
            elif event.type == pygame.KEYDOWN:
                if event.key == pygame.K_q:
                    sys.exit()
            elif event.type == pygame.MOUSEBUTTONDOWN:
                mouse_x, mouse_y = pygame.mouse.get_pos()
                check_collision(rock,paper,scissors,mouse_x,mouse_y,settings)
        rock.blitme()
        paper.blitme()
        scissors.blitme()

        pygame.display.flip()
Пример #4
0
 def test_paper_bfs(self):
     
     paper = Paper(id_='A00-1031', 
                   title='TnT - A Statistical Part-Of-Speech Tagger',
                   year='2000', link_type='outgoing')
     paper.outgoing_citations = [
         Paper(id_='A92-1018', title='A Practical Part-Of-Speech Tagger', 
               year='1992'),
         Paper(id_='A97-1014', 
               title='An Annotation Scheme For Free Word Order Languages',
               year='1997'),
         Paper(id_='J93-2004',
             title='Building A Large Annotated Corpus Of English: '\
               'The Penn Treebank', year='1993'),
         Paper(id_='P98-1081', title='Improving Data Driven Wordclass '\
               'Tagging by System Combination', year='1998'),
         Paper(id_='W96-0102', title='MBT: A Memory-Based Part Of Speech '\
               'Tagger-Generator', year='1996'),
         Paper(id_='W96-0213', title='A Maximum Entropy Model For '\
               'Part-Of-Speech Tagging', year='1996')
     ]
     result = PaperBFS(start_node=paper, iterations=1)
     for p in paper.outgoing_citations:
         for paper in result._visited:
             if p.id_ == paper.id_:
                 self.assertEquals(p.title, paper.title)
Пример #5
0
    def getLinks(self, page):        
        soup = BeautifulSoup(page, 'html.parser')        
        div = soup.find_all("div", attrs={"class": "result-item-align"})     

        for p in div:                        
            paper = Paper('')
            for index, child in enumerate(p.findChildren('a')):
                if 'href' in child.attrs:                   
                    if 'document' in child['href']:
                        paper.linkPaper = 'https://ieeexplore.ieee.org'+child['href']
                        #este condição de checar se está vazio, é pois se haver outro
                        #document não deverá preencher
                        if paper.titlePaper == '':
                            paper.titlePaper = child.text
                        
                    if 'author' in child['href']:
                        paper.author.append(child.text)   
                        
                    if 'conhome' in child['href']:
                        paper.conference = child.text
                

                '''
                if 'class' in child.attrs:                               
                    if 'media' in child['class']:
                        urlIdeia = self.urlRoot + child['href'] 
                        print(urlIdeia)
                        self.papers.append(Ideia(urlIdeia))
                '''
            print(paper)
            self.papers.append(paper)
        return self.papers    
Пример #6
0
 def createManyCrystals(self):
     paper = Paper('雪の結晶')
     self.drawCrystal(paper)
     self.cutAccordanceWithLine(paper)
     papers = []
     for i in range(100):
         papers.append(paper.createClone())
     return papers
Пример #7
0
def papers():
    """ Display all papers. """
    papers_read = Paper.get_papers_read()
    papers_read.sort(key=lambda p: p.dateRead)  # sort by date, ascending
    papers_read = papers_read[::-1]  # sort by date, descending
    papers_in_queue = Paper.get_papers_in_queue()
    print('papers read:', len(papers_read), 'papers_in_queue:',
          len(papers_in_queue))
    return render_template('papers.html',
                           papers_read=papers_read,
                           papers_in_queue=papers_in_queue)
Пример #8
0
 def createManyCrystalsAndTrees(self):
     paper1 = Paper('雪の結晶')
     paper2 = Paper('もみの木')
     keeper = PrototypeKeeper()
     keeper.addCloneable('snowflake', paper1)
     keeper.addCloneable('tree', paper2)
     papers = []
     for i in range(100):
         papers.append(keeper.getClone('snowflake'))
         papers.append(keeper.getClone('tree'))
     return papers
Пример #9
0
def edit_paper():
    # Stores a read paper in the database.
    params = request.form.to_dict()
    print('params:', params)
    # Deal with checkbox.
    if 'inQueue' in params:
        params['inQueue'] = 1
    else:
        params['inQueue'] = 0
    paper = Paper(**params)
    paper.update()
    return redirect('/')  # @TODO highlight paper in list after redirect.
Пример #10
0
    def __init__(self):
        '''
		Creates an empty Harmonizer with 
		@self nmarkers the number of markers
		@self marker the markers
		@self paper the paper
		@self data the actual grades and the grades from markers
		'''
        self.nmarkers = 0
        self.markers = []
        self.paper = Paper()
        self.data = []
Пример #11
0
def paper_to_list():

    filelist = iterate_folder(PAPER_DIR)

    # number of the paper
    i = 0

    preprocessed_paper_list = []

    # iterate through all main texts and print their sentences
    for file in filelist:

        print(i)
        sent_list = sent_tokenize_file(file)
        paper = Paper(i, sent_list[0], sent_list[1], sent_list[2],
                      sent_list[3])
        preprocessed_paper_list.append(paper)
    i += 1

    return preprocessed_paper_list


#csvimport()
#csvexport()
#print(paper_to_list()[0].cleared_paper)

# //store the sentences in a file
# writefile = io.open('S:\\VMs\\Shared\\Maindata.txt', 'w', encoding="utf-8-sig")
# for file in filelist:
#     i+=1
#     print(i)
#     for line in tokenize_file(file):
#         writefile.write(line + "\n")
#
# writefile.close()
Пример #12
0
    def parse_file(self, filename):
        with codecs.open(filename, encoding='utf-8', mode='r',
                         buffering=1, errors='strict') as file:

            sections = []
            papers = []
            changed_papers_titles = self.get_changed_papers_titles()
            current_section = None

            for line in file.read().splitlines():
                section_details = FileParser.check_all_regexes(self.section_regexes, line)
                if section_details:
                    section_details_dict = section_details.groupdict()
                    current_section = Section(section_details_dict['name'])
                    sections.append(current_section)
                else:
                    details = FileParser.check_all_regexes(self.paper_regexes, line)
                    if details:
                        paper_details = details.groupdict()
                        paper_title = paper_details['title']
                        papers.append(Paper({
                            'title':      paper_title,
                            'publisher':  paper_details['publisher'],
                            'url':        paper_details['url'],
                            'section':    current_section,
                            'notes_path': paper_details.get('notes_path'),
                            'changed':    paper_title in changed_papers_titles,
                            'is_read':    FileParser.is_read(paper_details['read']),
                            'tags':       paper_details.get('tags')}))

        return papers, sections
Пример #13
0
def csvimport():

    preprocessed_paper_list = []

    myfile = open('./export/sentences.csv')
    sentences_data = unicodecsv.reader((x.replace('\0', '') for x in myfile),
                                       encoding='utf-8-sig',
                                       delimiter=';')
    sentences_data.next()

    papers = open('./export/papers.csv')
    papers_data = unicodecsv.reader((x.replace('\0', '') for x in papers),
                                    encoding='utf-8-sig',
                                    delimiter=';')
    papers_data.next()

    #Read Papers
    for row in papers_data:
        paper = Paper(int(row[0]), [], [], row[1], row[2])
        preprocessed_paper_list.append(paper)

    #Read sentences
    for row in sentences_data:
        preprocessed_paper_list[int(row[0])].original_paper.append(row[1])
        preprocessed_paper_list[int(row[0])].cleared_paper.append(row[2])

    return preprocessed_paper_list
Пример #14
0
    def create_paper(self, paper_json):
        if paper_json.has_key('pmid'):
            date = None
            if self.get_date and paper_json.has_key('doi'):
                res = requests.get("http://api.crossref.org/works/%s" %
                                   paper_json['doi'])
                if res.status_code == 200:
                    date = json.loads(
                        res.content)['message']['issued']['date-parts'][0]
                    while len(date) < 3:
                        date.append(1)
                    year, month, day = date
                    date = str(datetime.date(year, month, day))
            if date == None:
                if paper_json.has_key('pubYear'):
                    date = str(datetime.date(int(paper_json['pubYear']), 1, 1))
                else:
                    date = None

            return Paper(api=self.api_name,
                         title=paper_json.setdefault('title', None),
                         authors=paper_json.setdefault('authorString', None),
                         date=date,
                         doi=paper_json.setdefault('doi', None),
                         api_id="%s,%s" %
                         (paper_json['source'], paper_json['pmid']),
                         isOpenAccess=paper_json['isOpenAccess'] == "Y",
                         global_citation_count=paper_json['citedByCount'],
                         has_references=(paper_json['hasReferences'] == "Y"))
Пример #15
0
class TestPaper(unittest.TestCase):
    def setUp(self):
        self.paper = Paper('10.1.1.128.9172')

    def test__get_pars(self):
        # TODO : Most logical test ever seen
        self.assertIsNotNone(self.paper._get_pars())
Пример #16
0
def test_eraser_erases_the_next_occurence_of_text():
    initial_text = "How much wood would a woodchuck chuck if a woodchuck could chuck wood?"
    erase_text = "chuck"
    paper = Paper(initial_text)
    eraser = Eraser()
    eraser.erase(paper, erase_text)
    eraser.erase(paper, erase_text)
    expected_text = "How much wood would a woodchuck chuck if a wood      could       wood?"
    assert paper.buffer == expected_text
Пример #17
0
    def __init__(self):
	self.arm = Arm(self.ARM_BASE, 100, 160, 200, 1, -20)	# TODO: Initialze arm object
	#self.paperPlot = PaperPlot()
	#self.forcePlot = ForcePlot()
	self.paper = Paper(self.PAPER_BASE)	# Initialize paper
	self.armPlot = ArmPlot() # Initialize the 3D plot
	self.armPlot.plotPaper(self.paper)
	self.armPlot.plotArm(self.arm)
	self.controller = DeltaController(self.arm, self.paper)
Пример #18
0
def reducer(lines):
    begins, ends = {}, {}
    for line in lines:
        gold = Paper.get_from_paperline(line)
	if gold.day not in begins and (gold.dt.hour > BEGIN[0] or (gold.dt.hour == BEGIN[0] and gold.dt.minute >= BEGIN[1])):
	    begins[gold.day] = gold.price
	elif gold.dt.hour < END[0] or (gold.dt.hour == END[0] and gold.dt.minute <= END[1]):
	    ends[gold.day] = gold.price 

    for day in sorted(begins.keys()):
        if day not in ends: continue
        print '%s\t%s' % (day, ends[day] - begins[day])
Пример #19
0
def add_paper(filename_, bibtex_name, path=None):
    if not path:
        path = './'
    root = path
    done_path = os.path.join(root, 'done')
    failed_path = os.path.join(root, 'failed')

    filename = filename_.split(".")
    if len(filename[1]) == 4 and len(filename[2]) == 5:
        ref = filename[0]
        code = filename[1] + "." + filename[2]
    else:
        code = get_arxiv_code(filename_, root)
        ref = 'ADD_REF'

    if code is not None:
        # If code is provided by filename OR pdf was downloaded from ArXiv:
        paper = Paper(code=code, bibtex_name=bibtex_name)

        add_new = logger.check_ref(bibtex_name, ref)
        if add_new:
            paper.add_bib(ref)
            paper.add_abstract(ref)
            shutil.move(os.path.join(root, filename_), done_path)
            print(filename_, " paper added to", bibtex_name)
            status = 'ok'
        else:
            print(filename_, " paper already exists in", bibtex_name)
            status = 'repeated'
    else:
        print(filename_, 'paper was not able to being added')
        shutil.move(os.path.join(root, filename_), failed_path)
        status = 'fail'
    return status, ref
Пример #20
0
def iterate(keys, node, year, month, country, publisher, source, out_path):
    """
    Iterate through the elements and store the details in the instance of Paper.
    """
    if node is None:
        return
    if len(node.findall("paper")) == 0:
        #iterate through child nodes to search for the paper element
         for child in node:
             iterate(keys, child, year, month, country, publisher, source, out_path)
    else:
        for paper in node.findall("paper"):
            if paper.find("title") is not None and paper.find("title").find("fixed-case") is not None:
                #recreate the title by extracting values from title and fixed-case
                xmlstr = (ET.tostring(paper.find("title"), encoding='utf8', method='xml')).decode("utf-8")
                xmlstr = xmlstr.replace("<fixed-case>", "")
                xmlstr = xmlstr.replace("</fixed-case>", "")
                title = xmlstr[xmlstr.find("<title>")+7:xmlstr.find("</title>")]
            else:
                title = paper.find("title").text
            #search for each keyword
            for key in keys:
                     if title is not None and title.find(key) != -1 or (paper.find("abstract") is not None and paper.find("abstract").text is not None and (paper.find("abstract").text).find(key) != -1):
                         #if keyword is found, store the paper details
                         if paper.findall("author") is not None:
                             auth_list = []
                             for child in paper.findall("author"):
                                 auth_list.append(child.find("first").text + " " + child.find("last").text)
                         paper_object =  Paper()
                         paper_object.save(year, month, title, auth_list, country, source, publisher)
                         paper_object.write(out_path)
                         #once the paper details have been saved for a key, continue to the next paper
                         break
Пример #21
0
def main():
    paper = Paper()

    paper.id = 1
    paper.total_score = 100
    paper.difficulty = 0.72
    paper.points = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
    paper.each_point_score = [10, 10, 10, 10, 10, 10, 10, 10, 10, 10]
    paper.each_type_count = [15, 15, 5]
    paper.each_type_score = [30, 30, 40]

    db = DB()
    db.generate_fake(paper)
    genetic = Genetic(paper, db)
    start = time.clock()
    genetic.test_run()
    end = time.clock()
    print u"总共用时:", end - start, " 秒"
Пример #22
0
    def import_file(self, filename, is_parent=False):

        citations = dict()
        read_all = False

        try:
            with open(filename, errors="backslashreplace") as input:

                p = Paper()
                start = True

                for line in input:

                    line = line.replace("\n", "")

                    if line == ">>> NEW PAPER <<<":
                        if start:
                            start = False
                        else:
                            self.add_paper(p, is_parent=is_parent)
                            citations[p.citation] = p
                            p = Paper()
                    elif line == ">>> ALL DONE <<<":
                        read_all = True
                    else:

                        items = line.split(">>>")
                        if len(items) < 3:
                            print("line, items:", line, items)
                        setattr(p, items[1].strip(' '), items[2].strip(' '))

                if p.title:
                    self.add_paper(p, is_parent=is_parent)
                    citations[p.citation] = p

        except FileNotFoundError:
            pass

        return citations, read_all
Пример #23
0
def bibtex_entry_to_table(bibtex_entry):
    myPaper = Paper(bibtex_entry)
    id = myPaper.getId()
    if not (myPaper.isPaper()):
        print('No author or keywords')
        return

    # Reference check
    wd = webdriver.PhantomJS(
        '/home/doyun/phantomjs-2.1.1-linux-x86_64/bin/phantomjs')
    url = 'http://ieeexplore.ieee.org/document/' + str(
        id) + '/?anchor=references'
    print(url)
    while True:
        try:
            wd.get(url)
        except http.client.RemoteDisconnected:
            print('RemoteDisconnected Happened')
            continue
        break
    html_page = wd.page_source
    soup = BeautifulSoup(html_page, 'html.parser')
    ref_dom_list = soup.findAll("a",
                                {"class": "stats-reference-link-viewArticle"})
    regex = re.compile(r'^\D+/(\d+)$')
    refs = [
        int(regex.search(ref_dom.get('href')).group(1))
        for ref_dom in ref_dom_list
    ]
    for ref in refs:
        json_data.append({'id': int(id), 'ref_id': ref})
        print('id = {}, ref_id = {}'.format(id, ref))

    wd.quit()

    return
Пример #24
0
    def __call__(self, fileName, *args, **kw):
        """Initializes :attr:`el <_Raphael.el>` and :attr:`fn <_Raphael.fn>` objects and creates :class:`Paper` instance to draw on.

		Parameters might be:

		:param str fileName: file name for saving
		:param width: width of the canvas
		:param height: width of the canvas
		:return: new :class:`Paper` instance

		.. code-block:: python

			paper = Drawing("fname1.svg",640,480)
			paper = Drawing("fname2.svg",640,480,backgroundColor='cyan')
			paper = Drawing("fname3.svg",width=640,height=480)

		or

		:param str fileName: file name for saving
		:param list|tuple attrs: first 4 elements in the list are equal to [x, y, width, height]. The rest are element descriptions in format {"type": type, <attributes>}
		:return: new :class:`Paper` instance

		.. code-block:: python

			paper = Drawing("fname4.svg",[0,0,640,480,
				{
					"type": "path",
					"path": "M100,200 l50,100",
					"stroke-width": 5,
					"stroke": "blue",
				},
				{
					"type": "rect",
					"x": 100,
					"y": 300,
					"width": 300,
					"height": 50,
					"fill": "red",
					"stroke": "cyan",
				},
			])
		"""
        for k, v in self.el.__dict__.iteritems():
            setattr(RaphaelElement, k, v)
        for k, v in self.fn.__dict__.iteritems():
            setattr(Paper, k, v)
        return Paper(fileName, *args, **kw)
Пример #25
0
    def initialize_parents(self, p_file):
        """
		Add all the parent papers using their .txt reference files. Read in
		anything that's already been calculated and stored in parents.txt
		"""

        self.parent_citations, read_all = self.import_file(p_file,
                                                           is_parent=True)

        print("Number of parents read from file:", len(self.all_papers))

        if read_all:
            print("All parents added from file; no lookup necessary")

        else:
            with open(p_file, 'a', errors='backslashreplace') as output:

                for i in range(len(self.bibs)):

                    with open(self.bibs[i],
                              errors="backslashreplace") as input:

                        inputstr = input.readline().strip()
                        splits = re.split('\[[0-9]+\]', inputstr)
                        citation = splits[1].strip(' ')

                        if citation not in self.parent_citations:

                            print("\nlen(all_papers):", len(self.all_papers))
                            print("Add parent", i + 1, "of", len(self.bibs),
                                  "file", self.bibs[i])

                            p = self.add_paper(Paper(),
                                               citation,
                                               is_parent=True)
                            self.parent_citations[p.citation] = p
                            p.file_loc = self.bibs[i]
                            self.write_paper(output, p)

                            print("new len(all_papers):", len(self.all_papers))

                        else:
                            self.parent_citations[
                                citation].file_loc = self.bibs[i]
Пример #26
0
    def index(self):
        """Build openjournal search indices"""
        schema = Schema(attr=TEXT(stored=True), pid=TEXT(stored=True))
        ix = create_in(self.dbname, schema)
        writer = ix.writer()

        def index_title(paper):
            writer.add_document(attr=unicode(prune(paper['title']).lower()),
                                pid=unicode(paper['pid']))

        def index_authors(paper):
            for author in p.authors:
                writer.add_document(attr=unicode(author.lower()),
                                    pid=unicode("XXX"))

        papers = Paper.getall()
        for uuid, p in enumerate(papers):
            index_title(p)
            #index_authors(p)
        writer.commit()
Пример #27
0
    def index(self):
        """Build openjournal search indices"""
        schema = Schema(attr=TEXT(stored=True),
                        pid=TEXT(stored=True))
        ix = create_in(self.dbname, schema)
        writer = ix.writer()
        
        def index_title(paper):
            writer.add_document(attr=unicode(prune(paper['title']).lower()),
                                pid=unicode(paper['pid']))

        def index_authors(paper):
            for author in p.authors:
                writer.add_document(attr=unicode(author.lower()),
                                    pid=unicode("XXX"))

        papers = Paper.getall()
        for uuid, p in enumerate(papers):
            index_title(p)
            #index_authors(p)
        writer.commit()
Пример #28
0
def run_cso_classifier(paper,
                       modules="both",
                       enhancement="first",
                       explanation=False):

    if modules not in ["syntactic", "semantic", "both"]:
        raise ValueError(
            "Error: Field modules must be 'syntactic', 'semantic' or 'both'")

    if enhancement not in ["first", "all", "no"]:
        raise ValueError(
            "Error: Field enhances must be 'first', 'all' or 'no'")

    if type(explanation) != bool:
        raise ValueError(
            "Error: Explanation must be set to either True or False")

    # Loading ontology and model
    cso = CSO()
    model = MODEL()
    t_paper = Paper(paper, modules)
    result = Result(explanation)

    # Passing parameters to the two classes (synt and sema) and actioning classifiers

    if modules == 'syntactic' or modules == 'both':
        synt_module = synt(cso, t_paper)
        result.set_syntactic(synt_module.classify_syntactic())
        if explanation:
            result.dump_temporary_explanation(synt_module.get_explanation())
    if modules == 'semantic' or modules == 'both':
        sema_module = sema(model, cso, t_paper)
        result.set_semantic(sema_module.classify_semantic())
        if explanation:
            result.dump_temporary_explanation(sema_module.get_explanation())

    result.set_enhanced(
        cso.climb_ontology(getattr(result, "union"), enhancement))

    return result.get_dict()
Пример #29
0
    data_dir + community_filename)

num_top_communities = 30
community_conf_counter_list = []
id_to_community = {}
for communityId in range(num_top_communities):
    community_conf_counter_list.append(Counter())
    for nodeId in community_member_list[communityId]:
        if nodeId not in id_to_community:
            id_to_community[nodeId] = [communityId]
        else:
            id_to_community[nodeId].append(communityId)

# Sequentially parse every paper.
with open(data_dir + data_filename, 'r') as f:
    paper = Paper()
    id_counter = 0
    for line in f:
        line = line.strip("\n")
        # Write out when we have reached the end of a paper.
        # TODO: Remove this early-breaking line.
        # if id_counter == 30000:
        #     break
        if len(line) == 0 or line[0] != '#':
            if id_counter % 10000 == 0:
                print "Parsed file", id_counter
            # Update conference counter for community.
            if id_counter in id_to_community:
                for communityId in id_to_community[id_counter]:
                    community_conf_counter_list[communityId][paper.venue] += 1
            paper = Paper()
Пример #30
0
            new_message = message[:empty_space]
            message_size = len(new_message)
        else:
            new_message = message

        space = new_message.find(' ')
        ink_needed = space

        while space != -1 and ink_needed < self.ink_amount and space < \
                empty_space:
            counter_of_spaces += 1
            ink_needed -= counter_of_spaces
            space = new_message.find(' ', space + 1)

        if message_size > self.ink_amount:
            new_message = new_message[:self.ink_amount + counter_of_spaces]
            self.ink_amount = 0
            paper.add_content(new_message)
        else:
            self.ink_amount -= message_size - counter_of_spaces
            paper.add_content(new_message)


pen = Pen(10)
paper = Paper(35)

pen.write("      Hello,  world!", paper)
paper.show()
# pen.write(" Hello, python!", paper)
paper.show()
Пример #31
0
    def add_paper(self, p, citation=None, is_parent=False):
        """
		Add paper p to the database.
		"""

        # Don't add an empty paper to the database
        is_empty = True
        empty_paper = Paper()
        for attr in ["citation", "title", "DOI", "year"]:
            if str(empty_paper.__dict__[attr]) != p.__dict__[attr]:
                is_empty = False
        if is_empty:
            print("DON'T ADD AN EMPTY PAPER, DUMMY")
            return p

        # Look up using CrossRef if given just the citation
        if citation:
            p.lookup(citation, self.base_url)

        # Unpack/convert attributes that shouldn't be just strings
        else:
            if p.checked == "False":
                p.checked = 0.0
            for attr in self.int_attrs:
                if p.__dict__[attr] != "None":
                    setattr(p, attr, float(p.__dict__[attr]))
            p.subject = eval(p.subject)
            p.item = eval(p.item)
            p.container_title = eval(p.container_title)
            """
			# Leftover from initially adding the journal titles as an attr
			if p.container_title == "None":
				try:
					p.container_title = p.item['container-title']
				except KeyError:
					pass
			"""

        p.hash = (p.title, p.year)
        if is_parent:
            p.is_parent = True

        # Deal with the case where the paper hash is already in the database
        if p.hash in self.all_papers:

            # If it's a duplicate, just return the paper
            if p.is_duplicate(self.all_papers[p.hash]):
                return p

            # If it's not really a duplicate, re-hash one of them
            else:
                old_p = self.all_papers[p.hash]

                # Def'n of dup shouldn't let two non-dup papers both be verified
                if p.verified == 1 and old_p.verified == 1:
                    raise ValueError(
                        "THEY CAN'T BE BOTH VERIFIED AND NON-DUPLICATE")

                # If the one already there is correct (and p therefore isn't),
                # keep it as is and hash p by citation
                if old_p.verified:
                    p.hash = (p.citation, "CITATION ONLY")
                    self.all_papers[p.hash] = p

                    # I don't think this is doing anything and might need to move
                    if is_parent:
                        old_p_kids = self.parents[old_p.hash]
                        self.parents[old_p.hash] = old_p_kids
                        self.parents[p.hash] = set()

                # Otherwise, re-hash the old one and put p in its spot
                else:
                    old_p.hash = (old_p.citation, "CITATION ONLY")
                    self.all_papers[old_p.hash] = old_p
                    self.all_papers[p.hash] = p

                    if is_parent:
                        old_p_kids = self.parents[p.hash]
                        self.parents[old_p.hash] = old_p_kids
                        self.parents[p.hash] = set()

        # If we don't already have the paper, simply add as usual
        else:
            self.all_papers[p.hash] = p

        # It's a defaultdict, but it's still nice to just put the hash in now
        if is_parent and p.hash not in self.parents:
            self.parents[p.hash] = set()

        return p
Пример #32
0
    def initialize_children(self):
        """
		Add all the references for each paper in the list of bib files.
		"""
        j = 0

        # Go through all the bibliography files
        for bib in self.bibs:

            j += 1

            with open(bib, errors="backslashreplace") as input:

                # Fetch or add the parent paper
                splits = re.split('\[[0-9]+\]', input.readline().strip())
                parent_citation = clean_citation(splits[1])
                parent = self.parent_citations[parent_citation]

                # Get the right filename for the child file
                bib0 = bib.replace('\\', '').replace('bibliographies', '')
                bib0 = bib0.strip('.').replace('/', '')
                childfile = "ref_lists/children_of_" + bib0

                old_size = len(self.all_papers)

                # Get any papers we've already written to the child file
                child_citations, read_all = self.import_file(childfile)
                for citation, p in child_citations.items():
                    self.parents[parent.hash].add(p)
                    p.file_loc = bib

                if read_all:
                    print("(" + str(j) + " of " + str(len(self.bibs)) + ") " +
                          "Bib #" + bib[25:-4] +
                          ": All children added from file")

                else:
                    # Go through the rest of the file to look up papers
                    with open(childfile, 'a',
                              errors='backslashreplace') as output:

                        inputstr = input.read()
                        splits = re.split('\[[0-9]+\]', inputstr.strip(' '))

                        print(
                            "(" + str(j) + " of " + str(len(self.bibs)) + ")",
                            len(splits) - 1, "children,", bib[17:])
                        #print("\t  ", len(child_citations), "children read from file")

                        for i in range(1, len(splits)):

                            child_citation = clean_citation(splits[i])
                            #print(child_citation)

                            if child_citation not in child_citations:
                                print("\tAdd child", i, "of", len(splits) - 1)
                                print('\t\t' + child_citation[:50])

                                p = self.add_paper(Paper(), child_citation)
                                p.file_loc = bib
                                self.write_paper(output, p)
                                self.parents[parent.hash].add(p)

                        num_children = len(self.parents[parent.hash])
                        print("     Added", num_children, "children,",
                              len(self.all_papers) - old_size, "new papers")
Пример #33
0
def ExtractDBLPKey(buf):
    if buf.find('mdate=') < 0:
        return ''
    p1 = buf.find('key=\"') + len('key=\"')
    p2 = buf.find('\"', p1)
    return buf[p1:p2]

connection = MySQLdb.connect (host = "127.0.0.1", user = "******", passwd = "paper1ens", db = "paperlens")
cursor = connection.cursor()
cursor.execute("truncate table paper;")
cursor.execute("truncate table paper_author;")
connection.commit()
data = open("../../../data/dblp.xml")

item = Paper()
paper_types = set(['article','inproceedings','proceedings','book','incollection','phdthesis','mastersthesis','www']);
author_index = dict()

try:
    n = 0
    for line in data:
        dblp_key = ExtractDBLPKey(line)
        if len(dblp_key) > 0:
            item.dblp_key = dblp_key
        endTag = ExtractEndTag(line);
        if endTag in paper_types and len(item.authors) > 0:
            cursor.execute("insert into paper(id,title,year,booktitle,type,dblp_key,journal,school,publisher) values (%s,%s,%s,%s,%s,%s,%s,%s,%s);",
                           (n, item.title, item.publish_year, item.booktitle, endTag, item.dblp_key,
                            item.journal,item.school,item.publisher))
            author_rank = 0
Пример #34
0
def test_from_pdf():
    tests = [
        # ("../../data/papers/2014_2015_CT422_1_1_5.PDF", "1,1.a,1.b,1.c,2,2.a,2.b,2.c,3,3.a,3.b,3.c,4,4.a,4.b,4.c"),
        ("../../data/papers/2014_2015_CH140_1_1_5.PDF", "A,A.1,A.1.i,A.1.i.a,A.1.i.b,A.1.i.c,A.1.i.d,A.1.i.e,A.1.ii,A.1.ii.a,A.1.ii.b,A.1.ii.c,A.1.ii.d,A.1.ii.e,A.1.iii,A.1.iii.a,A.1.iii.b,A.1.iii.c,A.1.iii.d,A.1.iv,A.1.iv.a,A.1.iv.b,A.1.iv.c,A.1.iv.d,A.1.iv.e,A.1.v,A.1.v.a,A.1.v.b,A.1.v.c,A.1.v.d,A.1.vi,A.1.vi.a,A.1.vi.b,A.1.vi.c,A.1.vi.d,A.1.vii,A.1.vii.a,A.1.vii.b,A.1.vii.c,A.1.vii.d,A.1.viii,A.1.viii.a,A.1.viii.b,A.1.viii.c,A.1.viii.d,A.1.ix,A.1.ix.a,A.1.ix.b,A.1.ix.c,A.1.ix.d,A.1.x,A.1.x.a,A.1.x.b,A.1.x.c,A.1.x.d,A.1.xi,A.1.xi.a,A.1.xi.b,A.1.xi.c,A.1.xi.d,A.1.xii,A.1.xii.a,A.1.xii.b,A.1.xii.c,A.1.xii.d,A.2,A.2.i,A.2.ii,A.2.iii,A.2.iii.a,A.2.iii.b,A.2.iii.c,A.2.iii.d,A.2.iv,A.3,A.3.i,A.3.ii,A.3.iii,A.3.iv,A.3.v,A.4,A.4.i,A.4.ii,A.4.ii.a,A.4.ii.b,A.4.ii.c,A.4.ii.d,A.4.iii"),
        ("../../data/papers/2014_2015_CT420_1_1_2.PDF", "A,A.1,A.1.i,A.1.ii,A.1.iii,A.1.iv,A.2,A.2.i,A.2.ii,A.2.iii,A.3,A.3.i,A.3.ii,B,B.4,B.4.i,B.4.i.a,B.4.i.b,B.4.i.c,B.4.i.d,B.4.i.e,B.4.ii,B.4.iii,B.4.iv,B.5,B.5.i,B.5.ii,B.5.ii.a,B.5.ii.b,B.5.ii.c,B.5.iii,B.5.iv,B.6,B.6.i,B.6.ii,B.6.iii,B.6.iv"),
        ("../../data/papers/CT422-1-2014-2015-2-Autumn.pdf", "1,1.i,1.ii,1.iii,2,2.i,2.ii,2.iii,3,3.i,3.ii,3.iii,4,4.i,4.ii")
    ]

    for paper_path, expected in tests:
        full_paper_path = os.path.join(os.path.dirname(__file__), paper_path)
        with open(full_paper_path) as pdf:
            pages = slate.PDF(pdf)

            questions = Paper.parse_pages(pages[1:])

            for question in questions:
                print question

            index = index_string(questions)

            print "Paper: %s == '%s'" % (os.path.basename(paper_path), index)
            assert index == expected

        break

# def test_unparsable():
#     with raises(UnparseableException):
#         with open(os.path.join(os.path.dirname(__file__), "../../data/CT422-1-2013-2014-2-Autumn.pdf")) as pdf:
#             pages = slate.PDF(pdf)
#             Paper.parse_pages(pages[1:])

# def test_index():
#     Paper.index.parseString("1.")
#     Paper.index.parseString("(1)")
#     Paper.index.parseString("[1]")
#     Paper.index.parseString("(a)")
#     Paper.index.parseString("a.")
#     Paper.index.parseString("[a]")

#     assert Paper.index.parseString("i.")[0].i == 1
#     assert Paper.index.parseString("ii.")[0].i == 2
#     assert Paper.index.parseString("iv.")[0].i == 4

# def test_parser_section():
#     Paper.section.leaveWhitespace().parseString("Section 1 ")

# def test_index():
#     paper = session.query(Paper).filter(Paper.id == 3878).first()

#     Paper.PAPER_DIR = "/tmp"
#     print paper

#     paper.index()

# def test_get_question():
#     paper = session.query(Paper).filter(Paper.id == 3907).first()

#     print paper.get_question(0, 0)

# def test_get_questions():
#     paper = session.query(Paper).filter(Paper.id == 3907).first()

#     print paper.get_questions()
Пример #35
0
from paper import Paper

p = Paper('10.1.1.1.1577')
Пример #36
0
c = Crossref(mailto="*****@*****.**")


headers = {"Ocp-Apim-Subscription-Key":"ba7fae63586a4942bb49403fad4009d3"}
expr="And(Composite(AA.AfN=='brock university'),Y=2018)"

r= requests.get("https://api.labs.cognitive.microsoft.com/academic/v1.0/evaluate?expr="+expr+"&model=latest&count=5&offset=171&attributes=Id,E,J.JN,C.CN,RId,F.FN,Ti,Y,D,AA.AuN,AA.AuId,AA.AfN,AA.AfId", headers=headers)

data = r.json()['entities']



for entity in data:

    paper = Paper(entity)

    print(vars(paper))
    print("")

    paper.getReferencesDOI()

    if paper.references == None:
        paper.getReferencesNoDOI()

    if paper.references != None:
        citationID = 0
        for reference in paper.references:
            time.sleep(2)
            #print(reference)
            #print("")
Пример #37
0
 def test__file_id_to_location(self):
     location = Paper._file_id_to_location(self.id)
     self.assertEqual(location, '10/1/1/582/1/10.1.1.582.1')
Пример #38
0
    def __init__(self):
        Paper.__init__(self)

        self.children = list()
Пример #39
0
# Index(['Authors', 'Author(s) ID', 'Title', 'Year', 'Source title', 'Volume',
#        'Issue', 'Art. No.', 'Page start', 'Page end', 'Page count', 'Cited by',
#        'DOI', 'Link', 'Affiliations', 'Authors with affiliations', 'Abstract',
#        'Author Keywords', 'Index Keywords', 'Molecular Sequence Numbers',
#        'Chemicals/CAS', 'Tradenames', 'Manufacturers', 'Funding Details',
#        'Funding Text 1', 'Funding Text 2', 'Funding Text 3', 'References',
#        'Correspondence Address', 'Editors', 'Sponsors', 'Publisher',
#        'Conference name', 'Conference date', 'Conference location',
#        'Conference code', 'ISSN', 'ISBN', 'CODEN', 'PubMed ID',
#        'Language of Original Document', 'Abbreviated Source Title',
#        'Document Type', 'Publication Stage', 'Access Type', 'Source', 'EID'],
#       dtype='object')

i = 0
for dataframe in list_files_csv:
    paper = Paper(dataframe)

    #Authors
    paper.str_covert(column_name="Authors")
    paper.convert(column_name="Authors", deli=",")

    #Author(s) ID
    paper.str_covert(column_name="Author(s) ID")
    paper.convert(column_name="Author(s) ID", deli=";")

    #Title
    paper.str_covert(column_name="Title")
    paper.convert(column_name="Title", deli=",")

    #Year
    pass
Пример #40
0
 def draw(self, context, hints):
     Paper.draw(self, context)
     for child in sorted(self.children, key=lambda child: child.z):
         child.hints = hints # TODO Not here
         child.draw(context)
Пример #41
0
 def test__xml_file_to_dict(self):
     xml_dict = Paper._xml_file_to_dict(self.id)
     self.assertEqual(type(xml_dict), OrderedDict)
     self.assertIsNotNone(xml_dict['document']['@id'])
Пример #42
0
def download(url, paper=None):
    """
    Main entry point for executing paperbot's primary function, paper fetching.
    The given url may be to a pdf file, which should be archived, or it may be
    to an academic publisher's website which points to a paper. The paper needs
    to be downloaded and the metadata should be stored.

    Returns a tuple of (paper, json_path, pdf_path, logpath).

    :param url: url to fetch and examine
    :type url: str
    """
    # store logs in tempfile
    (templogpath, loghandler) = loghijack()

    if paper is None:
        paper = Paper.create({})

    # clean up url if necessary
    url = run_url_fixers(url)

    # whether or not metadata has already been populated
    populated_metadata = False

    for (url2, response) in iterdownload(url, paper=paper):
        if is_response_pdf(response):
            log.debug("Got pdf.")
            pdfcontent = remove_watermarks(response.content)
            paper.pdf = pdfcontent
            store(paper)
            break

        paper.html = response.content

        # Was not pdf. Attempt to parse the HTML based on normal expected
        # HTML elements. The HTML elements may say that the actual pdf url
        # is something else. If this happens, then attempt to download that
        # pdf url instead and then break out of this loop.

        # no reason to get same metadata on every iteration of loop
        if not populated_metadata:
            tree = parse_html(response.content)

            # most publishers show paper metadata in html in same way because ?
            populate_metadata_from_tree(tree, paper)

            # TODO: better way to check if populate_metadata_from_tree did
            # anything useful?
            if paper.title in [None, ""]:
                log.debug("# TODO: parse metadata from html using plugins here")
            else:
                populated_metadata = True

        # can't try anything else if the url is still bad
        if paper.pdf_url in [None, ""]:
            continue

        # Normalize the two urls. The url from the metadata on the page
        # might be different from the url that was originally passed in,
        # even though both urls might still refer to the same resource.
        if is_same_url(url, paper.pdf_url):
            # pdf_url is same as original url, no pdf found yet. This
            # happens when the pdf url is correct, but the publisher is
            # returning html instead. And the html happens to reference the
            # url that was originally requested in the first place. Argh.
            continue

        log.debug("Switching activity to pdf_url {}".format(paper.pdf_url))

        # paper pdf is stored at a different url. Attempt to fetch that
        # url now. Only do this if pdf_url != url because otherwise
        # this will be an endless loop.
        for (url3, response2) in iterdownload(paper.pdf_url, paper=paper):
            if is_response_pdf(response2):
                log.debug("Got pdf on second-level page.")
                pdfcontent = remove_watermarks(response.content)
                paper.pdf = pdfcontent
                store(paper)
                break
        else:
            log.debug("Couldn't download pdf from {}".format(paper.pdf_url))

        break

    # was pdf downloaded?
    if (hasattr(paper, "pdf") and paper.pdf not in [None, ""]) or os.path.exists(paper.file_path_pdf):
        fetched = True
    else:
        fetched = False

    hasdoi = (paper.doi not in [None, ""])

    if hasdoi:
        # check if libgen has this paper already
        libgenhas = check_libgen_has_paper(paper.doi)

        if fetched and not libgenhas:
            # upload if libgen doesn't already have it
            upload_to_libgen(paper.file_path_pdf, paper.doi)
        elif not fetched and libgenhas:
            urldoi = make_libgen_doi_url(paper.doi)

            # get from libgen
            log.debug("Haven't yet fetched paper. Have doi. Also, libgenhas.")
            log.debug("HTTP GET {}".format(urldoi))
            response = requests.get(urldoi, headers=DEFAULT_HEADERS)

            if is_pdf_response(response):
                log.debug("Got pdf from libgen.")

                # skip pdfparanoia because it's from libgen
                pdfcontent = response.content
                paper.pdf = pdfcontent

                store(paper)

                fetched = True
            else:
                log.debug("libgen lied about haspdf :(")
    else:
        log.debug("Don't know doi, can't check if libgen has this paper.")
        libgenhas = None

    # store(paper) usually handles json but in case of failure there needs to
    # be an explicit save of paper metadata.
    if not fetched:
        store_json(paper)

    # move logs into position
    logpath = store_logs(paper, templogpath)

    # remove loghandler from logger
    mainlogger = logging.getLogger("paperbot")
    mainlogger.handlers.remove(loghandler)

    return (paper, paper.file_path_json, paper.file_path_pdf, logpath)
Пример #43
0
class ArmSim(object):
    # ===== Paper location input for simulation =====
    Rp = Rrpy(pi/2,0,0) # Roll-pitch-yaw rotation parameterization
    PAPER_BASE = np.identity(4)		      # Paper frame rigid body transform
    PAPER_BASE[0:3,0:3] = Rp		      # Paper rotation matrix
    PAPER_BASE[0:3,3]  = np.asfarray([-Paper.X_SIZE/2,-50,0]) # Paper origin
    
    # Arm Location
    Ra = Rrpy(-pi/2-0.05,pi/2,0)		# Arm Roll-Pitch-Yaw base orientation parameterization
    ARM_BASE = np.identity(4)			# Arm fixed-base rigid body transform
    ARM_BASE[0:3,3] = np.asfarray([250,-50,0])	# Arm origin 
    ARM_BASE[0:3,0:3] = Ra			# Set rotation
		 
    INITIAL_CONFIG = np.asfarray([pi/4,pi/4,0])	# Initial arm joint configuration
    
   # ===== Set of waypoints on paper =====
    WAYPOINTS = [[[10,0],[10,30]],[[10,15],[20,15]],[[20,0],[20,30]]]

    def __init__(self):
	self.arm = Arm(self.ARM_BASE, 100, 160, 200, 1, -20)	# TODO: Initialze arm object
	#self.paperPlot = PaperPlot()
	#self.forcePlot = ForcePlot()
	self.paper = Paper(self.PAPER_BASE)	# Initialize paper
	self.armPlot = ArmPlot() # Initialize the 3D plot
	self.armPlot.plotPaper(self.paper)
	self.armPlot.plotArm(self.arm)
	self.controller = DeltaController(self.arm, self.paper)
    
    # Run the simulation
    def run(self, strokes, initialConfig, minStep=100):
	initialConfig = np.asfarray(initialConfig)
	current = initialConfig	# Current configuration
	self.worldWaypoints = self.paper.strokesToWorld(strokes)
	
	# Loop over waypoints
	#for i in range(0,len(strokes)):
	'''
	ikConfig = np.append(self.arm.planarIK(strokes[i][0]),[50])# Compute IK
	print str(ikConfig)
	nsteps = minStep
	configs = interpolateLinear(current, ikConfig, nsteps)  # Interpolate trajectory
	print configs.shape'''
	    
	configs = self.controller.generateTrajectory(strokes)
	
	# Wait for replay
	# Loop over interpolated configurations
	while(True):
	    for k in range(0, len(configs)):
		print 'Step', k
		#print str(configs[k])
		self.arm.setConfiguration(self.rx64RoundConfig(configs[k])) # Update arm position
		self.armPlot.clear()
		self.armPlot.plotArm(self.arm) 	# Plot
		self.armPlot.plotPaper(self.paper)	# Plot paper again
		self.armPlot.plotIdealPath(self.worldWaypoints)
		#print 'Arm Position', str(self.arm.eePosition())
		self.arm.printEEPosition()
		draw()
		self.armPlot.fig.show()
		sleep(0.0001)
	    c = input('Enter some string to continue')
	    #current = ikConfig
    
    # Round the configuration to RX64 angles
    def rx64RoundConfig(self, config, randomness=0):
	nbits = 10
	rx64Range = 5.0*pi/3.0	
	dConfig = (config/rx64Range) * pow(2.0, nbits)
	#print 'Discrete configuration:', str(dConfig)
	dConfig = np.round(dConfig)
	roundedConfig = dConfig/pow(2.0, nbits) * rx64Range
	#print 'Rounded Configuration', str(roundedConfig)
	# Generate Gaussian
	return roundedConfig