def test_paper_can_be_written_to(self): paper = Paper() text = "I'm better than papyrus" paper.write(text) self.assertEqual(text, paper.read())
class EraserTest(unittest.TestCase): def setUp(self): self.paper = Paper() def test_when_the_word_to_be_erased_is_not_found_nothing_should_be_erased( self): eraser = Eraser() pencil = Pencil(eraser=eraser) text = "Nothing to erase here" pencil.write(self.paper, text) pencil.erase(self.paper, "Something") self.assertEqual(text, self.paper.read()) def test_when_pencil_eraser_degrades_fully_it_should_stop_erasing(self): eraser = Eraser(durability=4) pencil = Pencil(eraser=eraser) pencil.write(self.paper, "I am related to Buffalo Bill") pencil.erase(self.paper, "Bill") pencil.erase(self.paper, "Buffalo") self.assertEqual("I am related to Buffalo ", self.paper.read()) def test_erasing_should_erase_opposite_direction_of_the_written_order( self): eraser = Eraser(durability=3) pencil = Pencil(eraser=eraser) pencil.write(self.paper, "I am related to Buffalo Bill") pencil.erase(self.paper, "Bill") self.assertEqual("I am related to Buffalo B ", self.paper.read())
def run_game(): pygame.init() settings = Settings() screen = pygame.display.set_mode((settings.screen_width, settings.screen_height)) pygame.display.set_caption("Rock Paper Scissors") screen.fill(settings.bg_color) rock = Rock(settings, screen) paper = Paper(settings, screen) scissors = Scissors(settings, screen) myfont = pygame.font.SysFont("monospace", 30) while True: label = myfont.render("Choose either rock, paper or scissors", 1, (0,0,0)) screen.blit(label, (250, 100)) # check events here for event in pygame.event.get(): if event.type == pygame.QUIT: sys.exit() elif event.type == pygame.KEYDOWN: if event.key == pygame.K_q: sys.exit() elif event.type == pygame.MOUSEBUTTONDOWN: mouse_x, mouse_y = pygame.mouse.get_pos() check_collision(rock,paper,scissors,mouse_x,mouse_y,settings) rock.blitme() paper.blitme() scissors.blitme() pygame.display.flip()
def test_paper_bfs(self): paper = Paper(id_='A00-1031', title='TnT - A Statistical Part-Of-Speech Tagger', year='2000', link_type='outgoing') paper.outgoing_citations = [ Paper(id_='A92-1018', title='A Practical Part-Of-Speech Tagger', year='1992'), Paper(id_='A97-1014', title='An Annotation Scheme For Free Word Order Languages', year='1997'), Paper(id_='J93-2004', title='Building A Large Annotated Corpus Of English: '\ 'The Penn Treebank', year='1993'), Paper(id_='P98-1081', title='Improving Data Driven Wordclass '\ 'Tagging by System Combination', year='1998'), Paper(id_='W96-0102', title='MBT: A Memory-Based Part Of Speech '\ 'Tagger-Generator', year='1996'), Paper(id_='W96-0213', title='A Maximum Entropy Model For '\ 'Part-Of-Speech Tagging', year='1996') ] result = PaperBFS(start_node=paper, iterations=1) for p in paper.outgoing_citations: for paper in result._visited: if p.id_ == paper.id_: self.assertEquals(p.title, paper.title)
def getLinks(self, page): soup = BeautifulSoup(page, 'html.parser') div = soup.find_all("div", attrs={"class": "result-item-align"}) for p in div: paper = Paper('') for index, child in enumerate(p.findChildren('a')): if 'href' in child.attrs: if 'document' in child['href']: paper.linkPaper = 'https://ieeexplore.ieee.org'+child['href'] #este condição de checar se está vazio, é pois se haver outro #document não deverá preencher if paper.titlePaper == '': paper.titlePaper = child.text if 'author' in child['href']: paper.author.append(child.text) if 'conhome' in child['href']: paper.conference = child.text ''' if 'class' in child.attrs: if 'media' in child['class']: urlIdeia = self.urlRoot + child['href'] print(urlIdeia) self.papers.append(Ideia(urlIdeia)) ''' print(paper) self.papers.append(paper) return self.papers
def createManyCrystals(self): paper = Paper('雪の結晶') self.drawCrystal(paper) self.cutAccordanceWithLine(paper) papers = [] for i in range(100): papers.append(paper.createClone()) return papers
def papers(): """ Display all papers. """ papers_read = Paper.get_papers_read() papers_read.sort(key=lambda p: p.dateRead) # sort by date, ascending papers_read = papers_read[::-1] # sort by date, descending papers_in_queue = Paper.get_papers_in_queue() print('papers read:', len(papers_read), 'papers_in_queue:', len(papers_in_queue)) return render_template('papers.html', papers_read=papers_read, papers_in_queue=papers_in_queue)
def createManyCrystalsAndTrees(self): paper1 = Paper('雪の結晶') paper2 = Paper('もみの木') keeper = PrototypeKeeper() keeper.addCloneable('snowflake', paper1) keeper.addCloneable('tree', paper2) papers = [] for i in range(100): papers.append(keeper.getClone('snowflake')) papers.append(keeper.getClone('tree')) return papers
def edit_paper(): # Stores a read paper in the database. params = request.form.to_dict() print('params:', params) # Deal with checkbox. if 'inQueue' in params: params['inQueue'] = 1 else: params['inQueue'] = 0 paper = Paper(**params) paper.update() return redirect('/') # @TODO highlight paper in list after redirect.
def __init__(self): ''' Creates an empty Harmonizer with @self nmarkers the number of markers @self marker the markers @self paper the paper @self data the actual grades and the grades from markers ''' self.nmarkers = 0 self.markers = [] self.paper = Paper() self.data = []
def paper_to_list(): filelist = iterate_folder(PAPER_DIR) # number of the paper i = 0 preprocessed_paper_list = [] # iterate through all main texts and print their sentences for file in filelist: print(i) sent_list = sent_tokenize_file(file) paper = Paper(i, sent_list[0], sent_list[1], sent_list[2], sent_list[3]) preprocessed_paper_list.append(paper) i += 1 return preprocessed_paper_list #csvimport() #csvexport() #print(paper_to_list()[0].cleared_paper) # //store the sentences in a file # writefile = io.open('S:\\VMs\\Shared\\Maindata.txt', 'w', encoding="utf-8-sig") # for file in filelist: # i+=1 # print(i) # for line in tokenize_file(file): # writefile.write(line + "\n") # # writefile.close()
def parse_file(self, filename): with codecs.open(filename, encoding='utf-8', mode='r', buffering=1, errors='strict') as file: sections = [] papers = [] changed_papers_titles = self.get_changed_papers_titles() current_section = None for line in file.read().splitlines(): section_details = FileParser.check_all_regexes(self.section_regexes, line) if section_details: section_details_dict = section_details.groupdict() current_section = Section(section_details_dict['name']) sections.append(current_section) else: details = FileParser.check_all_regexes(self.paper_regexes, line) if details: paper_details = details.groupdict() paper_title = paper_details['title'] papers.append(Paper({ 'title': paper_title, 'publisher': paper_details['publisher'], 'url': paper_details['url'], 'section': current_section, 'notes_path': paper_details.get('notes_path'), 'changed': paper_title in changed_papers_titles, 'is_read': FileParser.is_read(paper_details['read']), 'tags': paper_details.get('tags')})) return papers, sections
def csvimport(): preprocessed_paper_list = [] myfile = open('./export/sentences.csv') sentences_data = unicodecsv.reader((x.replace('\0', '') for x in myfile), encoding='utf-8-sig', delimiter=';') sentences_data.next() papers = open('./export/papers.csv') papers_data = unicodecsv.reader((x.replace('\0', '') for x in papers), encoding='utf-8-sig', delimiter=';') papers_data.next() #Read Papers for row in papers_data: paper = Paper(int(row[0]), [], [], row[1], row[2]) preprocessed_paper_list.append(paper) #Read sentences for row in sentences_data: preprocessed_paper_list[int(row[0])].original_paper.append(row[1]) preprocessed_paper_list[int(row[0])].cleared_paper.append(row[2]) return preprocessed_paper_list
def create_paper(self, paper_json): if paper_json.has_key('pmid'): date = None if self.get_date and paper_json.has_key('doi'): res = requests.get("http://api.crossref.org/works/%s" % paper_json['doi']) if res.status_code == 200: date = json.loads( res.content)['message']['issued']['date-parts'][0] while len(date) < 3: date.append(1) year, month, day = date date = str(datetime.date(year, month, day)) if date == None: if paper_json.has_key('pubYear'): date = str(datetime.date(int(paper_json['pubYear']), 1, 1)) else: date = None return Paper(api=self.api_name, title=paper_json.setdefault('title', None), authors=paper_json.setdefault('authorString', None), date=date, doi=paper_json.setdefault('doi', None), api_id="%s,%s" % (paper_json['source'], paper_json['pmid']), isOpenAccess=paper_json['isOpenAccess'] == "Y", global_citation_count=paper_json['citedByCount'], has_references=(paper_json['hasReferences'] == "Y"))
class TestPaper(unittest.TestCase): def setUp(self): self.paper = Paper('10.1.1.128.9172') def test__get_pars(self): # TODO : Most logical test ever seen self.assertIsNotNone(self.paper._get_pars())
def test_eraser_erases_the_next_occurence_of_text(): initial_text = "How much wood would a woodchuck chuck if a woodchuck could chuck wood?" erase_text = "chuck" paper = Paper(initial_text) eraser = Eraser() eraser.erase(paper, erase_text) eraser.erase(paper, erase_text) expected_text = "How much wood would a woodchuck chuck if a wood could wood?" assert paper.buffer == expected_text
def __init__(self): self.arm = Arm(self.ARM_BASE, 100, 160, 200, 1, -20) # TODO: Initialze arm object #self.paperPlot = PaperPlot() #self.forcePlot = ForcePlot() self.paper = Paper(self.PAPER_BASE) # Initialize paper self.armPlot = ArmPlot() # Initialize the 3D plot self.armPlot.plotPaper(self.paper) self.armPlot.plotArm(self.arm) self.controller = DeltaController(self.arm, self.paper)
def reducer(lines): begins, ends = {}, {} for line in lines: gold = Paper.get_from_paperline(line) if gold.day not in begins and (gold.dt.hour > BEGIN[0] or (gold.dt.hour == BEGIN[0] and gold.dt.minute >= BEGIN[1])): begins[gold.day] = gold.price elif gold.dt.hour < END[0] or (gold.dt.hour == END[0] and gold.dt.minute <= END[1]): ends[gold.day] = gold.price for day in sorted(begins.keys()): if day not in ends: continue print '%s\t%s' % (day, ends[day] - begins[day])
def add_paper(filename_, bibtex_name, path=None): if not path: path = './' root = path done_path = os.path.join(root, 'done') failed_path = os.path.join(root, 'failed') filename = filename_.split(".") if len(filename[1]) == 4 and len(filename[2]) == 5: ref = filename[0] code = filename[1] + "." + filename[2] else: code = get_arxiv_code(filename_, root) ref = 'ADD_REF' if code is not None: # If code is provided by filename OR pdf was downloaded from ArXiv: paper = Paper(code=code, bibtex_name=bibtex_name) add_new = logger.check_ref(bibtex_name, ref) if add_new: paper.add_bib(ref) paper.add_abstract(ref) shutil.move(os.path.join(root, filename_), done_path) print(filename_, " paper added to", bibtex_name) status = 'ok' else: print(filename_, " paper already exists in", bibtex_name) status = 'repeated' else: print(filename_, 'paper was not able to being added') shutil.move(os.path.join(root, filename_), failed_path) status = 'fail' return status, ref
def iterate(keys, node, year, month, country, publisher, source, out_path): """ Iterate through the elements and store the details in the instance of Paper. """ if node is None: return if len(node.findall("paper")) == 0: #iterate through child nodes to search for the paper element for child in node: iterate(keys, child, year, month, country, publisher, source, out_path) else: for paper in node.findall("paper"): if paper.find("title") is not None and paper.find("title").find("fixed-case") is not None: #recreate the title by extracting values from title and fixed-case xmlstr = (ET.tostring(paper.find("title"), encoding='utf8', method='xml')).decode("utf-8") xmlstr = xmlstr.replace("<fixed-case>", "") xmlstr = xmlstr.replace("</fixed-case>", "") title = xmlstr[xmlstr.find("<title>")+7:xmlstr.find("</title>")] else: title = paper.find("title").text #search for each keyword for key in keys: if title is not None and title.find(key) != -1 or (paper.find("abstract") is not None and paper.find("abstract").text is not None and (paper.find("abstract").text).find(key) != -1): #if keyword is found, store the paper details if paper.findall("author") is not None: auth_list = [] for child in paper.findall("author"): auth_list.append(child.find("first").text + " " + child.find("last").text) paper_object = Paper() paper_object.save(year, month, title, auth_list, country, source, publisher) paper_object.write(out_path) #once the paper details have been saved for a key, continue to the next paper break
def main(): paper = Paper() paper.id = 1 paper.total_score = 100 paper.difficulty = 0.72 paper.points = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] paper.each_point_score = [10, 10, 10, 10, 10, 10, 10, 10, 10, 10] paper.each_type_count = [15, 15, 5] paper.each_type_score = [30, 30, 40] db = DB() db.generate_fake(paper) genetic = Genetic(paper, db) start = time.clock() genetic.test_run() end = time.clock() print u"总共用时:", end - start, " 秒"
def import_file(self, filename, is_parent=False): citations = dict() read_all = False try: with open(filename, errors="backslashreplace") as input: p = Paper() start = True for line in input: line = line.replace("\n", "") if line == ">>> NEW PAPER <<<": if start: start = False else: self.add_paper(p, is_parent=is_parent) citations[p.citation] = p p = Paper() elif line == ">>> ALL DONE <<<": read_all = True else: items = line.split(">>>") if len(items) < 3: print("line, items:", line, items) setattr(p, items[1].strip(' '), items[2].strip(' ')) if p.title: self.add_paper(p, is_parent=is_parent) citations[p.citation] = p except FileNotFoundError: pass return citations, read_all
def bibtex_entry_to_table(bibtex_entry): myPaper = Paper(bibtex_entry) id = myPaper.getId() if not (myPaper.isPaper()): print('No author or keywords') return # Reference check wd = webdriver.PhantomJS( '/home/doyun/phantomjs-2.1.1-linux-x86_64/bin/phantomjs') url = 'http://ieeexplore.ieee.org/document/' + str( id) + '/?anchor=references' print(url) while True: try: wd.get(url) except http.client.RemoteDisconnected: print('RemoteDisconnected Happened') continue break html_page = wd.page_source soup = BeautifulSoup(html_page, 'html.parser') ref_dom_list = soup.findAll("a", {"class": "stats-reference-link-viewArticle"}) regex = re.compile(r'^\D+/(\d+)$') refs = [ int(regex.search(ref_dom.get('href')).group(1)) for ref_dom in ref_dom_list ] for ref in refs: json_data.append({'id': int(id), 'ref_id': ref}) print('id = {}, ref_id = {}'.format(id, ref)) wd.quit() return
def __call__(self, fileName, *args, **kw): """Initializes :attr:`el <_Raphael.el>` and :attr:`fn <_Raphael.fn>` objects and creates :class:`Paper` instance to draw on. Parameters might be: :param str fileName: file name for saving :param width: width of the canvas :param height: width of the canvas :return: new :class:`Paper` instance .. code-block:: python paper = Drawing("fname1.svg",640,480) paper = Drawing("fname2.svg",640,480,backgroundColor='cyan') paper = Drawing("fname3.svg",width=640,height=480) or :param str fileName: file name for saving :param list|tuple attrs: first 4 elements in the list are equal to [x, y, width, height]. The rest are element descriptions in format {"type": type, <attributes>} :return: new :class:`Paper` instance .. code-block:: python paper = Drawing("fname4.svg",[0,0,640,480, { "type": "path", "path": "M100,200 l50,100", "stroke-width": 5, "stroke": "blue", }, { "type": "rect", "x": 100, "y": 300, "width": 300, "height": 50, "fill": "red", "stroke": "cyan", }, ]) """ for k, v in self.el.__dict__.iteritems(): setattr(RaphaelElement, k, v) for k, v in self.fn.__dict__.iteritems(): setattr(Paper, k, v) return Paper(fileName, *args, **kw)
def initialize_parents(self, p_file): """ Add all the parent papers using their .txt reference files. Read in anything that's already been calculated and stored in parents.txt """ self.parent_citations, read_all = self.import_file(p_file, is_parent=True) print("Number of parents read from file:", len(self.all_papers)) if read_all: print("All parents added from file; no lookup necessary") else: with open(p_file, 'a', errors='backslashreplace') as output: for i in range(len(self.bibs)): with open(self.bibs[i], errors="backslashreplace") as input: inputstr = input.readline().strip() splits = re.split('\[[0-9]+\]', inputstr) citation = splits[1].strip(' ') if citation not in self.parent_citations: print("\nlen(all_papers):", len(self.all_papers)) print("Add parent", i + 1, "of", len(self.bibs), "file", self.bibs[i]) p = self.add_paper(Paper(), citation, is_parent=True) self.parent_citations[p.citation] = p p.file_loc = self.bibs[i] self.write_paper(output, p) print("new len(all_papers):", len(self.all_papers)) else: self.parent_citations[ citation].file_loc = self.bibs[i]
def index(self): """Build openjournal search indices""" schema = Schema(attr=TEXT(stored=True), pid=TEXT(stored=True)) ix = create_in(self.dbname, schema) writer = ix.writer() def index_title(paper): writer.add_document(attr=unicode(prune(paper['title']).lower()), pid=unicode(paper['pid'])) def index_authors(paper): for author in p.authors: writer.add_document(attr=unicode(author.lower()), pid=unicode("XXX")) papers = Paper.getall() for uuid, p in enumerate(papers): index_title(p) #index_authors(p) writer.commit()
def run_cso_classifier(paper, modules="both", enhancement="first", explanation=False): if modules not in ["syntactic", "semantic", "both"]: raise ValueError( "Error: Field modules must be 'syntactic', 'semantic' or 'both'") if enhancement not in ["first", "all", "no"]: raise ValueError( "Error: Field enhances must be 'first', 'all' or 'no'") if type(explanation) != bool: raise ValueError( "Error: Explanation must be set to either True or False") # Loading ontology and model cso = CSO() model = MODEL() t_paper = Paper(paper, modules) result = Result(explanation) # Passing parameters to the two classes (synt and sema) and actioning classifiers if modules == 'syntactic' or modules == 'both': synt_module = synt(cso, t_paper) result.set_syntactic(synt_module.classify_syntactic()) if explanation: result.dump_temporary_explanation(synt_module.get_explanation()) if modules == 'semantic' or modules == 'both': sema_module = sema(model, cso, t_paper) result.set_semantic(sema_module.classify_semantic()) if explanation: result.dump_temporary_explanation(sema_module.get_explanation()) result.set_enhanced( cso.climb_ontology(getattr(result, "union"), enhancement)) return result.get_dict()
data_dir + community_filename) num_top_communities = 30 community_conf_counter_list = [] id_to_community = {} for communityId in range(num_top_communities): community_conf_counter_list.append(Counter()) for nodeId in community_member_list[communityId]: if nodeId not in id_to_community: id_to_community[nodeId] = [communityId] else: id_to_community[nodeId].append(communityId) # Sequentially parse every paper. with open(data_dir + data_filename, 'r') as f: paper = Paper() id_counter = 0 for line in f: line = line.strip("\n") # Write out when we have reached the end of a paper. # TODO: Remove this early-breaking line. # if id_counter == 30000: # break if len(line) == 0 or line[0] != '#': if id_counter % 10000 == 0: print "Parsed file", id_counter # Update conference counter for community. if id_counter in id_to_community: for communityId in id_to_community[id_counter]: community_conf_counter_list[communityId][paper.venue] += 1 paper = Paper()
new_message = message[:empty_space] message_size = len(new_message) else: new_message = message space = new_message.find(' ') ink_needed = space while space != -1 and ink_needed < self.ink_amount and space < \ empty_space: counter_of_spaces += 1 ink_needed -= counter_of_spaces space = new_message.find(' ', space + 1) if message_size > self.ink_amount: new_message = new_message[:self.ink_amount + counter_of_spaces] self.ink_amount = 0 paper.add_content(new_message) else: self.ink_amount -= message_size - counter_of_spaces paper.add_content(new_message) pen = Pen(10) paper = Paper(35) pen.write(" Hello, world!", paper) paper.show() # pen.write(" Hello, python!", paper) paper.show()
def add_paper(self, p, citation=None, is_parent=False): """ Add paper p to the database. """ # Don't add an empty paper to the database is_empty = True empty_paper = Paper() for attr in ["citation", "title", "DOI", "year"]: if str(empty_paper.__dict__[attr]) != p.__dict__[attr]: is_empty = False if is_empty: print("DON'T ADD AN EMPTY PAPER, DUMMY") return p # Look up using CrossRef if given just the citation if citation: p.lookup(citation, self.base_url) # Unpack/convert attributes that shouldn't be just strings else: if p.checked == "False": p.checked = 0.0 for attr in self.int_attrs: if p.__dict__[attr] != "None": setattr(p, attr, float(p.__dict__[attr])) p.subject = eval(p.subject) p.item = eval(p.item) p.container_title = eval(p.container_title) """ # Leftover from initially adding the journal titles as an attr if p.container_title == "None": try: p.container_title = p.item['container-title'] except KeyError: pass """ p.hash = (p.title, p.year) if is_parent: p.is_parent = True # Deal with the case where the paper hash is already in the database if p.hash in self.all_papers: # If it's a duplicate, just return the paper if p.is_duplicate(self.all_papers[p.hash]): return p # If it's not really a duplicate, re-hash one of them else: old_p = self.all_papers[p.hash] # Def'n of dup shouldn't let two non-dup papers both be verified if p.verified == 1 and old_p.verified == 1: raise ValueError( "THEY CAN'T BE BOTH VERIFIED AND NON-DUPLICATE") # If the one already there is correct (and p therefore isn't), # keep it as is and hash p by citation if old_p.verified: p.hash = (p.citation, "CITATION ONLY") self.all_papers[p.hash] = p # I don't think this is doing anything and might need to move if is_parent: old_p_kids = self.parents[old_p.hash] self.parents[old_p.hash] = old_p_kids self.parents[p.hash] = set() # Otherwise, re-hash the old one and put p in its spot else: old_p.hash = (old_p.citation, "CITATION ONLY") self.all_papers[old_p.hash] = old_p self.all_papers[p.hash] = p if is_parent: old_p_kids = self.parents[p.hash] self.parents[old_p.hash] = old_p_kids self.parents[p.hash] = set() # If we don't already have the paper, simply add as usual else: self.all_papers[p.hash] = p # It's a defaultdict, but it's still nice to just put the hash in now if is_parent and p.hash not in self.parents: self.parents[p.hash] = set() return p
def initialize_children(self): """ Add all the references for each paper in the list of bib files. """ j = 0 # Go through all the bibliography files for bib in self.bibs: j += 1 with open(bib, errors="backslashreplace") as input: # Fetch or add the parent paper splits = re.split('\[[0-9]+\]', input.readline().strip()) parent_citation = clean_citation(splits[1]) parent = self.parent_citations[parent_citation] # Get the right filename for the child file bib0 = bib.replace('\\', '').replace('bibliographies', '') bib0 = bib0.strip('.').replace('/', '') childfile = "ref_lists/children_of_" + bib0 old_size = len(self.all_papers) # Get any papers we've already written to the child file child_citations, read_all = self.import_file(childfile) for citation, p in child_citations.items(): self.parents[parent.hash].add(p) p.file_loc = bib if read_all: print("(" + str(j) + " of " + str(len(self.bibs)) + ") " + "Bib #" + bib[25:-4] + ": All children added from file") else: # Go through the rest of the file to look up papers with open(childfile, 'a', errors='backslashreplace') as output: inputstr = input.read() splits = re.split('\[[0-9]+\]', inputstr.strip(' ')) print( "(" + str(j) + " of " + str(len(self.bibs)) + ")", len(splits) - 1, "children,", bib[17:]) #print("\t ", len(child_citations), "children read from file") for i in range(1, len(splits)): child_citation = clean_citation(splits[i]) #print(child_citation) if child_citation not in child_citations: print("\tAdd child", i, "of", len(splits) - 1) print('\t\t' + child_citation[:50]) p = self.add_paper(Paper(), child_citation) p.file_loc = bib self.write_paper(output, p) self.parents[parent.hash].add(p) num_children = len(self.parents[parent.hash]) print(" Added", num_children, "children,", len(self.all_papers) - old_size, "new papers")
def ExtractDBLPKey(buf): if buf.find('mdate=') < 0: return '' p1 = buf.find('key=\"') + len('key=\"') p2 = buf.find('\"', p1) return buf[p1:p2] connection = MySQLdb.connect (host = "127.0.0.1", user = "******", passwd = "paper1ens", db = "paperlens") cursor = connection.cursor() cursor.execute("truncate table paper;") cursor.execute("truncate table paper_author;") connection.commit() data = open("../../../data/dblp.xml") item = Paper() paper_types = set(['article','inproceedings','proceedings','book','incollection','phdthesis','mastersthesis','www']); author_index = dict() try: n = 0 for line in data: dblp_key = ExtractDBLPKey(line) if len(dblp_key) > 0: item.dblp_key = dblp_key endTag = ExtractEndTag(line); if endTag in paper_types and len(item.authors) > 0: cursor.execute("insert into paper(id,title,year,booktitle,type,dblp_key,journal,school,publisher) values (%s,%s,%s,%s,%s,%s,%s,%s,%s);", (n, item.title, item.publish_year, item.booktitle, endTag, item.dblp_key, item.journal,item.school,item.publisher)) author_rank = 0
def test_from_pdf(): tests = [ # ("../../data/papers/2014_2015_CT422_1_1_5.PDF", "1,1.a,1.b,1.c,2,2.a,2.b,2.c,3,3.a,3.b,3.c,4,4.a,4.b,4.c"), ("../../data/papers/2014_2015_CH140_1_1_5.PDF", "A,A.1,A.1.i,A.1.i.a,A.1.i.b,A.1.i.c,A.1.i.d,A.1.i.e,A.1.ii,A.1.ii.a,A.1.ii.b,A.1.ii.c,A.1.ii.d,A.1.ii.e,A.1.iii,A.1.iii.a,A.1.iii.b,A.1.iii.c,A.1.iii.d,A.1.iv,A.1.iv.a,A.1.iv.b,A.1.iv.c,A.1.iv.d,A.1.iv.e,A.1.v,A.1.v.a,A.1.v.b,A.1.v.c,A.1.v.d,A.1.vi,A.1.vi.a,A.1.vi.b,A.1.vi.c,A.1.vi.d,A.1.vii,A.1.vii.a,A.1.vii.b,A.1.vii.c,A.1.vii.d,A.1.viii,A.1.viii.a,A.1.viii.b,A.1.viii.c,A.1.viii.d,A.1.ix,A.1.ix.a,A.1.ix.b,A.1.ix.c,A.1.ix.d,A.1.x,A.1.x.a,A.1.x.b,A.1.x.c,A.1.x.d,A.1.xi,A.1.xi.a,A.1.xi.b,A.1.xi.c,A.1.xi.d,A.1.xii,A.1.xii.a,A.1.xii.b,A.1.xii.c,A.1.xii.d,A.2,A.2.i,A.2.ii,A.2.iii,A.2.iii.a,A.2.iii.b,A.2.iii.c,A.2.iii.d,A.2.iv,A.3,A.3.i,A.3.ii,A.3.iii,A.3.iv,A.3.v,A.4,A.4.i,A.4.ii,A.4.ii.a,A.4.ii.b,A.4.ii.c,A.4.ii.d,A.4.iii"), ("../../data/papers/2014_2015_CT420_1_1_2.PDF", "A,A.1,A.1.i,A.1.ii,A.1.iii,A.1.iv,A.2,A.2.i,A.2.ii,A.2.iii,A.3,A.3.i,A.3.ii,B,B.4,B.4.i,B.4.i.a,B.4.i.b,B.4.i.c,B.4.i.d,B.4.i.e,B.4.ii,B.4.iii,B.4.iv,B.5,B.5.i,B.5.ii,B.5.ii.a,B.5.ii.b,B.5.ii.c,B.5.iii,B.5.iv,B.6,B.6.i,B.6.ii,B.6.iii,B.6.iv"), ("../../data/papers/CT422-1-2014-2015-2-Autumn.pdf", "1,1.i,1.ii,1.iii,2,2.i,2.ii,2.iii,3,3.i,3.ii,3.iii,4,4.i,4.ii") ] for paper_path, expected in tests: full_paper_path = os.path.join(os.path.dirname(__file__), paper_path) with open(full_paper_path) as pdf: pages = slate.PDF(pdf) questions = Paper.parse_pages(pages[1:]) for question in questions: print question index = index_string(questions) print "Paper: %s == '%s'" % (os.path.basename(paper_path), index) assert index == expected break # def test_unparsable(): # with raises(UnparseableException): # with open(os.path.join(os.path.dirname(__file__), "../../data/CT422-1-2013-2014-2-Autumn.pdf")) as pdf: # pages = slate.PDF(pdf) # Paper.parse_pages(pages[1:]) # def test_index(): # Paper.index.parseString("1.") # Paper.index.parseString("(1)") # Paper.index.parseString("[1]") # Paper.index.parseString("(a)") # Paper.index.parseString("a.") # Paper.index.parseString("[a]") # assert Paper.index.parseString("i.")[0].i == 1 # assert Paper.index.parseString("ii.")[0].i == 2 # assert Paper.index.parseString("iv.")[0].i == 4 # def test_parser_section(): # Paper.section.leaveWhitespace().parseString("Section 1 ") # def test_index(): # paper = session.query(Paper).filter(Paper.id == 3878).first() # Paper.PAPER_DIR = "/tmp" # print paper # paper.index() # def test_get_question(): # paper = session.query(Paper).filter(Paper.id == 3907).first() # print paper.get_question(0, 0) # def test_get_questions(): # paper = session.query(Paper).filter(Paper.id == 3907).first() # print paper.get_questions()
from paper import Paper p = Paper('10.1.1.1.1577')
c = Crossref(mailto="*****@*****.**") headers = {"Ocp-Apim-Subscription-Key":"ba7fae63586a4942bb49403fad4009d3"} expr="And(Composite(AA.AfN=='brock university'),Y=2018)" r= requests.get("https://api.labs.cognitive.microsoft.com/academic/v1.0/evaluate?expr="+expr+"&model=latest&count=5&offset=171&attributes=Id,E,J.JN,C.CN,RId,F.FN,Ti,Y,D,AA.AuN,AA.AuId,AA.AfN,AA.AfId", headers=headers) data = r.json()['entities'] for entity in data: paper = Paper(entity) print(vars(paper)) print("") paper.getReferencesDOI() if paper.references == None: paper.getReferencesNoDOI() if paper.references != None: citationID = 0 for reference in paper.references: time.sleep(2) #print(reference) #print("")
def test__file_id_to_location(self): location = Paper._file_id_to_location(self.id) self.assertEqual(location, '10/1/1/582/1/10.1.1.582.1')
def __init__(self): Paper.__init__(self) self.children = list()
# Index(['Authors', 'Author(s) ID', 'Title', 'Year', 'Source title', 'Volume', # 'Issue', 'Art. No.', 'Page start', 'Page end', 'Page count', 'Cited by', # 'DOI', 'Link', 'Affiliations', 'Authors with affiliations', 'Abstract', # 'Author Keywords', 'Index Keywords', 'Molecular Sequence Numbers', # 'Chemicals/CAS', 'Tradenames', 'Manufacturers', 'Funding Details', # 'Funding Text 1', 'Funding Text 2', 'Funding Text 3', 'References', # 'Correspondence Address', 'Editors', 'Sponsors', 'Publisher', # 'Conference name', 'Conference date', 'Conference location', # 'Conference code', 'ISSN', 'ISBN', 'CODEN', 'PubMed ID', # 'Language of Original Document', 'Abbreviated Source Title', # 'Document Type', 'Publication Stage', 'Access Type', 'Source', 'EID'], # dtype='object') i = 0 for dataframe in list_files_csv: paper = Paper(dataframe) #Authors paper.str_covert(column_name="Authors") paper.convert(column_name="Authors", deli=",") #Author(s) ID paper.str_covert(column_name="Author(s) ID") paper.convert(column_name="Author(s) ID", deli=";") #Title paper.str_covert(column_name="Title") paper.convert(column_name="Title", deli=",") #Year pass
def draw(self, context, hints): Paper.draw(self, context) for child in sorted(self.children, key=lambda child: child.z): child.hints = hints # TODO Not here child.draw(context)
def test__xml_file_to_dict(self): xml_dict = Paper._xml_file_to_dict(self.id) self.assertEqual(type(xml_dict), OrderedDict) self.assertIsNotNone(xml_dict['document']['@id'])
def download(url, paper=None): """ Main entry point for executing paperbot's primary function, paper fetching. The given url may be to a pdf file, which should be archived, or it may be to an academic publisher's website which points to a paper. The paper needs to be downloaded and the metadata should be stored. Returns a tuple of (paper, json_path, pdf_path, logpath). :param url: url to fetch and examine :type url: str """ # store logs in tempfile (templogpath, loghandler) = loghijack() if paper is None: paper = Paper.create({}) # clean up url if necessary url = run_url_fixers(url) # whether or not metadata has already been populated populated_metadata = False for (url2, response) in iterdownload(url, paper=paper): if is_response_pdf(response): log.debug("Got pdf.") pdfcontent = remove_watermarks(response.content) paper.pdf = pdfcontent store(paper) break paper.html = response.content # Was not pdf. Attempt to parse the HTML based on normal expected # HTML elements. The HTML elements may say that the actual pdf url # is something else. If this happens, then attempt to download that # pdf url instead and then break out of this loop. # no reason to get same metadata on every iteration of loop if not populated_metadata: tree = parse_html(response.content) # most publishers show paper metadata in html in same way because ? populate_metadata_from_tree(tree, paper) # TODO: better way to check if populate_metadata_from_tree did # anything useful? if paper.title in [None, ""]: log.debug("# TODO: parse metadata from html using plugins here") else: populated_metadata = True # can't try anything else if the url is still bad if paper.pdf_url in [None, ""]: continue # Normalize the two urls. The url from the metadata on the page # might be different from the url that was originally passed in, # even though both urls might still refer to the same resource. if is_same_url(url, paper.pdf_url): # pdf_url is same as original url, no pdf found yet. This # happens when the pdf url is correct, but the publisher is # returning html instead. And the html happens to reference the # url that was originally requested in the first place. Argh. continue log.debug("Switching activity to pdf_url {}".format(paper.pdf_url)) # paper pdf is stored at a different url. Attempt to fetch that # url now. Only do this if pdf_url != url because otherwise # this will be an endless loop. for (url3, response2) in iterdownload(paper.pdf_url, paper=paper): if is_response_pdf(response2): log.debug("Got pdf on second-level page.") pdfcontent = remove_watermarks(response.content) paper.pdf = pdfcontent store(paper) break else: log.debug("Couldn't download pdf from {}".format(paper.pdf_url)) break # was pdf downloaded? if (hasattr(paper, "pdf") and paper.pdf not in [None, ""]) or os.path.exists(paper.file_path_pdf): fetched = True else: fetched = False hasdoi = (paper.doi not in [None, ""]) if hasdoi: # check if libgen has this paper already libgenhas = check_libgen_has_paper(paper.doi) if fetched and not libgenhas: # upload if libgen doesn't already have it upload_to_libgen(paper.file_path_pdf, paper.doi) elif not fetched and libgenhas: urldoi = make_libgen_doi_url(paper.doi) # get from libgen log.debug("Haven't yet fetched paper. Have doi. Also, libgenhas.") log.debug("HTTP GET {}".format(urldoi)) response = requests.get(urldoi, headers=DEFAULT_HEADERS) if is_pdf_response(response): log.debug("Got pdf from libgen.") # skip pdfparanoia because it's from libgen pdfcontent = response.content paper.pdf = pdfcontent store(paper) fetched = True else: log.debug("libgen lied about haspdf :(") else: log.debug("Don't know doi, can't check if libgen has this paper.") libgenhas = None # store(paper) usually handles json but in case of failure there needs to # be an explicit save of paper metadata. if not fetched: store_json(paper) # move logs into position logpath = store_logs(paper, templogpath) # remove loghandler from logger mainlogger = logging.getLogger("paperbot") mainlogger.handlers.remove(loghandler) return (paper, paper.file_path_json, paper.file_path_pdf, logpath)
class ArmSim(object): # ===== Paper location input for simulation ===== Rp = Rrpy(pi/2,0,0) # Roll-pitch-yaw rotation parameterization PAPER_BASE = np.identity(4) # Paper frame rigid body transform PAPER_BASE[0:3,0:3] = Rp # Paper rotation matrix PAPER_BASE[0:3,3] = np.asfarray([-Paper.X_SIZE/2,-50,0]) # Paper origin # Arm Location Ra = Rrpy(-pi/2-0.05,pi/2,0) # Arm Roll-Pitch-Yaw base orientation parameterization ARM_BASE = np.identity(4) # Arm fixed-base rigid body transform ARM_BASE[0:3,3] = np.asfarray([250,-50,0]) # Arm origin ARM_BASE[0:3,0:3] = Ra # Set rotation INITIAL_CONFIG = np.asfarray([pi/4,pi/4,0]) # Initial arm joint configuration # ===== Set of waypoints on paper ===== WAYPOINTS = [[[10,0],[10,30]],[[10,15],[20,15]],[[20,0],[20,30]]] def __init__(self): self.arm = Arm(self.ARM_BASE, 100, 160, 200, 1, -20) # TODO: Initialze arm object #self.paperPlot = PaperPlot() #self.forcePlot = ForcePlot() self.paper = Paper(self.PAPER_BASE) # Initialize paper self.armPlot = ArmPlot() # Initialize the 3D plot self.armPlot.plotPaper(self.paper) self.armPlot.plotArm(self.arm) self.controller = DeltaController(self.arm, self.paper) # Run the simulation def run(self, strokes, initialConfig, minStep=100): initialConfig = np.asfarray(initialConfig) current = initialConfig # Current configuration self.worldWaypoints = self.paper.strokesToWorld(strokes) # Loop over waypoints #for i in range(0,len(strokes)): ''' ikConfig = np.append(self.arm.planarIK(strokes[i][0]),[50])# Compute IK print str(ikConfig) nsteps = minStep configs = interpolateLinear(current, ikConfig, nsteps) # Interpolate trajectory print configs.shape''' configs = self.controller.generateTrajectory(strokes) # Wait for replay # Loop over interpolated configurations while(True): for k in range(0, len(configs)): print 'Step', k #print str(configs[k]) self.arm.setConfiguration(self.rx64RoundConfig(configs[k])) # Update arm position self.armPlot.clear() self.armPlot.plotArm(self.arm) # Plot self.armPlot.plotPaper(self.paper) # Plot paper again self.armPlot.plotIdealPath(self.worldWaypoints) #print 'Arm Position', str(self.arm.eePosition()) self.arm.printEEPosition() draw() self.armPlot.fig.show() sleep(0.0001) c = input('Enter some string to continue') #current = ikConfig # Round the configuration to RX64 angles def rx64RoundConfig(self, config, randomness=0): nbits = 10 rx64Range = 5.0*pi/3.0 dConfig = (config/rx64Range) * pow(2.0, nbits) #print 'Discrete configuration:', str(dConfig) dConfig = np.round(dConfig) roundedConfig = dConfig/pow(2.0, nbits) * rx64Range #print 'Rounded Configuration', str(roundedConfig) # Generate Gaussian return roundedConfig