def createManyCrystalsAndTrees(self): paper1 = Paper('雪の結晶') paper2 = Paper('もみの木') keeper = PrototypeKeeper() keeper.addCloneable('snowflake', paper1) keeper.addCloneable('tree', paper2) papers = [] for i in range(100): papers.append(keeper.getClone('snowflake')) papers.append(keeper.getClone('tree')) return papers
def create_paper(self, paper_json): if paper_json.has_key('pmid'): date = None if self.get_date and paper_json.has_key('doi'): res = requests.get("http://api.crossref.org/works/%s" % paper_json['doi']) if res.status_code == 200: date = json.loads( res.content)['message']['issued']['date-parts'][0] while len(date) < 3: date.append(1) year, month, day = date date = str(datetime.date(year, month, day)) if date == None: if paper_json.has_key('pubYear'): date = str(datetime.date(int(paper_json['pubYear']), 1, 1)) else: date = None return Paper(api=self.api_name, title=paper_json.setdefault('title', None), authors=paper_json.setdefault('authorString', None), date=date, doi=paper_json.setdefault('doi', None), api_id="%s,%s" % (paper_json['source'], paper_json['pmid']), isOpenAccess=paper_json['isOpenAccess'] == "Y", global_citation_count=paper_json['citedByCount'], has_references=(paper_json['hasReferences'] == "Y"))
def paper_to_list(): filelist = iterate_folder(PAPER_DIR) # number of the paper i = 0 preprocessed_paper_list = [] # iterate through all main texts and print their sentences for file in filelist: print(i) sent_list = sent_tokenize_file(file) paper = Paper(i, sent_list[0], sent_list[1], sent_list[2], sent_list[3]) preprocessed_paper_list.append(paper) i += 1 return preprocessed_paper_list #csvimport() #csvexport() #print(paper_to_list()[0].cleared_paper) # //store the sentences in a file # writefile = io.open('S:\\VMs\\Shared\\Maindata.txt', 'w', encoding="utf-8-sig") # for file in filelist: # i+=1 # print(i) # for line in tokenize_file(file): # writefile.write(line + "\n") # # writefile.close()
def test_paper_can_be_written_to(self): paper = Paper() text = "I'm better than papyrus" paper.write(text) self.assertEqual(text, paper.read())
def parse_file(self, filename): with codecs.open(filename, encoding='utf-8', mode='r', buffering=1, errors='strict') as file: sections = [] papers = [] changed_papers_titles = self.get_changed_papers_titles() current_section = None for line in file.read().splitlines(): section_details = FileParser.check_all_regexes(self.section_regexes, line) if section_details: section_details_dict = section_details.groupdict() current_section = Section(section_details_dict['name']) sections.append(current_section) else: details = FileParser.check_all_regexes(self.paper_regexes, line) if details: paper_details = details.groupdict() paper_title = paper_details['title'] papers.append(Paper({ 'title': paper_title, 'publisher': paper_details['publisher'], 'url': paper_details['url'], 'section': current_section, 'notes_path': paper_details.get('notes_path'), 'changed': paper_title in changed_papers_titles, 'is_read': FileParser.is_read(paper_details['read']), 'tags': paper_details.get('tags')})) return papers, sections
def getLinks(self, page): soup = BeautifulSoup(page, 'html.parser') div = soup.find_all("div", attrs={"class": "result-item-align"}) for p in div: paper = Paper('') for index, child in enumerate(p.findChildren('a')): if 'href' in child.attrs: if 'document' in child['href']: paper.linkPaper = 'https://ieeexplore.ieee.org'+child['href'] #este condição de checar se está vazio, é pois se haver outro #document não deverá preencher if paper.titlePaper == '': paper.titlePaper = child.text if 'author' in child['href']: paper.author.append(child.text) if 'conhome' in child['href']: paper.conference = child.text ''' if 'class' in child.attrs: if 'media' in child['class']: urlIdeia = self.urlRoot + child['href'] print(urlIdeia) self.papers.append(Ideia(urlIdeia)) ''' print(paper) self.papers.append(paper) return self.papers
def add_paper(filename_, bibtex_name, path=None): if not path: path = './' root = path done_path = os.path.join(root, 'done') failed_path = os.path.join(root, 'failed') filename = filename_.split(".") if len(filename[1]) == 4 and len(filename[2]) == 5: ref = filename[0] code = filename[1] + "." + filename[2] else: code = get_arxiv_code(filename_, root) ref = 'ADD_REF' if code is not None: # If code is provided by filename OR pdf was downloaded from ArXiv: paper = Paper(code=code, bibtex_name=bibtex_name) add_new = logger.check_ref(bibtex_name, ref) if add_new: paper.add_bib(ref) paper.add_abstract(ref) shutil.move(os.path.join(root, filename_), done_path) print(filename_, " paper added to", bibtex_name) status = 'ok' else: print(filename_, " paper already exists in", bibtex_name) status = 'repeated' else: print(filename_, 'paper was not able to being added') shutil.move(os.path.join(root, filename_), failed_path) status = 'fail' return status, ref
def iterate(keys, node, year, month, country, publisher, source, out_path): """ Iterate through the elements and store the details in the instance of Paper. """ if node is None: return if len(node.findall("paper")) == 0: #iterate through child nodes to search for the paper element for child in node: iterate(keys, child, year, month, country, publisher, source, out_path) else: for paper in node.findall("paper"): if paper.find("title") is not None and paper.find("title").find("fixed-case") is not None: #recreate the title by extracting values from title and fixed-case xmlstr = (ET.tostring(paper.find("title"), encoding='utf8', method='xml')).decode("utf-8") xmlstr = xmlstr.replace("<fixed-case>", "") xmlstr = xmlstr.replace("</fixed-case>", "") title = xmlstr[xmlstr.find("<title>")+7:xmlstr.find("</title>")] else: title = paper.find("title").text #search for each keyword for key in keys: if title is not None and title.find(key) != -1 or (paper.find("abstract") is not None and paper.find("abstract").text is not None and (paper.find("abstract").text).find(key) != -1): #if keyword is found, store the paper details if paper.findall("author") is not None: auth_list = [] for child in paper.findall("author"): auth_list.append(child.find("first").text + " " + child.find("last").text) paper_object = Paper() paper_object.save(year, month, title, auth_list, country, source, publisher) paper_object.write(out_path) #once the paper details have been saved for a key, continue to the next paper break
def csvimport(): preprocessed_paper_list = [] myfile = open('./export/sentences.csv') sentences_data = unicodecsv.reader((x.replace('\0', '') for x in myfile), encoding='utf-8-sig', delimiter=';') sentences_data.next() papers = open('./export/papers.csv') papers_data = unicodecsv.reader((x.replace('\0', '') for x in papers), encoding='utf-8-sig', delimiter=';') papers_data.next() #Read Papers for row in papers_data: paper = Paper(int(row[0]), [], [], row[1], row[2]) preprocessed_paper_list.append(paper) #Read sentences for row in sentences_data: preprocessed_paper_list[int(row[0])].original_paper.append(row[1]) preprocessed_paper_list[int(row[0])].cleared_paper.append(row[2]) return preprocessed_paper_list
def createManyCrystals(self): paper = Paper('雪の結晶') self.drawCrystal(paper) self.cutAccordanceWithLine(paper) papers = [] for i in range(100): papers.append(paper.createClone()) return papers
def test_eraser_erases_the_next_occurence_of_text(): initial_text = "How much wood would a woodchuck chuck if a woodchuck could chuck wood?" erase_text = "chuck" paper = Paper(initial_text) eraser = Eraser() eraser.erase(paper, erase_text) eraser.erase(paper, erase_text) expected_text = "How much wood would a woodchuck chuck if a wood could wood?" assert paper.buffer == expected_text
def edit_paper(): # Stores a read paper in the database. params = request.form.to_dict() print('params:', params) # Deal with checkbox. if 'inQueue' in params: params['inQueue'] = 1 else: params['inQueue'] = 0 paper = Paper(**params) paper.update() return redirect('/') # @TODO highlight paper in list after redirect.
def __init__(self): ''' Creates an empty Harmonizer with @self nmarkers the number of markers @self marker the markers @self paper the paper @self data the actual grades and the grades from markers ''' self.nmarkers = 0 self.markers = [] self.paper = Paper() self.data = []
def import_file(self, filename, is_parent=False): citations = dict() read_all = False try: with open(filename, errors="backslashreplace") as input: p = Paper() start = True for line in input: line = line.replace("\n", "") if line == ">>> NEW PAPER <<<": if start: start = False else: self.add_paper(p, is_parent=is_parent) citations[p.citation] = p p = Paper() elif line == ">>> ALL DONE <<<": read_all = True else: items = line.split(">>>") if len(items) < 3: print("line, items:", line, items) setattr(p, items[1].strip(' '), items[2].strip(' ')) if p.title: self.add_paper(p, is_parent=is_parent) citations[p.citation] = p except FileNotFoundError: pass return citations, read_all
def __call__(self, fileName, *args, **kw): """Initializes :attr:`el <_Raphael.el>` and :attr:`fn <_Raphael.fn>` objects and creates :class:`Paper` instance to draw on. Parameters might be: :param str fileName: file name for saving :param width: width of the canvas :param height: width of the canvas :return: new :class:`Paper` instance .. code-block:: python paper = Drawing("fname1.svg",640,480) paper = Drawing("fname2.svg",640,480,backgroundColor='cyan') paper = Drawing("fname3.svg",width=640,height=480) or :param str fileName: file name for saving :param list|tuple attrs: first 4 elements in the list are equal to [x, y, width, height]. The rest are element descriptions in format {"type": type, <attributes>} :return: new :class:`Paper` instance .. code-block:: python paper = Drawing("fname4.svg",[0,0,640,480, { "type": "path", "path": "M100,200 l50,100", "stroke-width": 5, "stroke": "blue", }, { "type": "rect", "x": 100, "y": 300, "width": 300, "height": 50, "fill": "red", "stroke": "cyan", }, ]) """ for k, v in self.el.__dict__.iteritems(): setattr(RaphaelElement, k, v) for k, v in self.fn.__dict__.iteritems(): setattr(Paper, k, v) return Paper(fileName, *args, **kw)
def main(): paper = Paper() paper.id = 1 paper.total_score = 100 paper.difficulty = 0.72 paper.points = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] paper.each_point_score = [10, 10, 10, 10, 10, 10, 10, 10, 10, 10] paper.each_type_count = [15, 15, 5] paper.each_type_score = [30, 30, 40] db = DB() db.generate_fake(paper) genetic = Genetic(paper, db) start = time.clock() genetic.test_run() end = time.clock() print u"总共用时:", end - start, " 秒"
def initialize_parents(self, p_file): """ Add all the parent papers using their .txt reference files. Read in anything that's already been calculated and stored in parents.txt """ self.parent_citations, read_all = self.import_file(p_file, is_parent=True) print("Number of parents read from file:", len(self.all_papers)) if read_all: print("All parents added from file; no lookup necessary") else: with open(p_file, 'a', errors='backslashreplace') as output: for i in range(len(self.bibs)): with open(self.bibs[i], errors="backslashreplace") as input: inputstr = input.readline().strip() splits = re.split('\[[0-9]+\]', inputstr) citation = splits[1].strip(' ') if citation not in self.parent_citations: print("\nlen(all_papers):", len(self.all_papers)) print("Add parent", i + 1, "of", len(self.bibs), "file", self.bibs[i]) p = self.add_paper(Paper(), citation, is_parent=True) self.parent_citations[p.citation] = p p.file_loc = self.bibs[i] self.write_paper(output, p) print("new len(all_papers):", len(self.all_papers)) else: self.parent_citations[ citation].file_loc = self.bibs[i]
def run_cso_classifier(paper, modules="both", enhancement="first", explanation=False): if modules not in ["syntactic", "semantic", "both"]: raise ValueError( "Error: Field modules must be 'syntactic', 'semantic' or 'both'") if enhancement not in ["first", "all", "no"]: raise ValueError( "Error: Field enhances must be 'first', 'all' or 'no'") if type(explanation) != bool: raise ValueError( "Error: Explanation must be set to either True or False") # Loading ontology and model cso = CSO() model = MODEL() t_paper = Paper(paper, modules) result = Result(explanation) # Passing parameters to the two classes (synt and sema) and actioning classifiers if modules == 'syntactic' or modules == 'both': synt_module = synt(cso, t_paper) result.set_syntactic(synt_module.classify_syntactic()) if explanation: result.dump_temporary_explanation(synt_module.get_explanation()) if modules == 'semantic' or modules == 'both': sema_module = sema(model, cso, t_paper) result.set_semantic(sema_module.classify_semantic()) if explanation: result.dump_temporary_explanation(sema_module.get_explanation()) result.set_enhanced( cso.climb_ontology(getattr(result, "union"), enhancement)) return result.get_dict()
def bibtex_entry_to_table(bibtex_entry): myPaper = Paper(bibtex_entry) id = myPaper.getId() if not (myPaper.isPaper()): print('No author or keywords') return # Reference check wd = webdriver.PhantomJS( '/home/doyun/phantomjs-2.1.1-linux-x86_64/bin/phantomjs') url = 'http://ieeexplore.ieee.org/document/' + str( id) + '/?anchor=references' print(url) while True: try: wd.get(url) except http.client.RemoteDisconnected: print('RemoteDisconnected Happened') continue break html_page = wd.page_source soup = BeautifulSoup(html_page, 'html.parser') ref_dom_list = soup.findAll("a", {"class": "stats-reference-link-viewArticle"}) regex = re.compile(r'^\D+/(\d+)$') refs = [ int(regex.search(ref_dom.get('href')).group(1)) for ref_dom in ref_dom_list ] for ref in refs: json_data.append({'id': int(id), 'ref_id': ref}) print('id = {}, ref_id = {}'.format(id, ref)) wd.quit() return
from paper import Paper p = Paper('10.1.1.1.1577')
data_dir + community_filename) num_top_communities = 30 community_conf_counter_list = [] id_to_community = {} for communityId in range(num_top_communities): community_conf_counter_list.append(Counter()) for nodeId in community_member_list[communityId]: if nodeId not in id_to_community: id_to_community[nodeId] = [communityId] else: id_to_community[nodeId].append(communityId) # Sequentially parse every paper. with open(data_dir + data_filename, 'r') as f: paper = Paper() id_counter = 0 for line in f: line = line.strip("\n") # Write out when we have reached the end of a paper. # TODO: Remove this early-breaking line. # if id_counter == 30000: # break if len(line) == 0 or line[0] != '#': if id_counter % 10000 == 0: print "Parsed file", id_counter # Update conference counter for community. if id_counter in id_to_community: for communityId in id_to_community[id_counter]: community_conf_counter_list[communityId][paper.venue] += 1 paper = Paper()
new_message = message[:empty_space] message_size = len(new_message) else: new_message = message space = new_message.find(' ') ink_needed = space while space != -1 and ink_needed < self.ink_amount and space < \ empty_space: counter_of_spaces += 1 ink_needed -= counter_of_spaces space = new_message.find(' ', space + 1) if message_size > self.ink_amount: new_message = new_message[:self.ink_amount + counter_of_spaces] self.ink_amount = 0 paper.add_content(new_message) else: self.ink_amount -= message_size - counter_of_spaces paper.add_content(new_message) pen = Pen(10) paper = Paper(35) pen.write(" Hello, world!", paper) paper.show() # pen.write(" Hello, python!", paper) paper.show()
def add_paper(self, p, citation=None, is_parent=False): """ Add paper p to the database. """ # Don't add an empty paper to the database is_empty = True empty_paper = Paper() for attr in ["citation", "title", "DOI", "year"]: if str(empty_paper.__dict__[attr]) != p.__dict__[attr]: is_empty = False if is_empty: print("DON'T ADD AN EMPTY PAPER, DUMMY") return p # Look up using CrossRef if given just the citation if citation: p.lookup(citation, self.base_url) # Unpack/convert attributes that shouldn't be just strings else: if p.checked == "False": p.checked = 0.0 for attr in self.int_attrs: if p.__dict__[attr] != "None": setattr(p, attr, float(p.__dict__[attr])) p.subject = eval(p.subject) p.item = eval(p.item) p.container_title = eval(p.container_title) """ # Leftover from initially adding the journal titles as an attr if p.container_title == "None": try: p.container_title = p.item['container-title'] except KeyError: pass """ p.hash = (p.title, p.year) if is_parent: p.is_parent = True # Deal with the case where the paper hash is already in the database if p.hash in self.all_papers: # If it's a duplicate, just return the paper if p.is_duplicate(self.all_papers[p.hash]): return p # If it's not really a duplicate, re-hash one of them else: old_p = self.all_papers[p.hash] # Def'n of dup shouldn't let two non-dup papers both be verified if p.verified == 1 and old_p.verified == 1: raise ValueError( "THEY CAN'T BE BOTH VERIFIED AND NON-DUPLICATE") # If the one already there is correct (and p therefore isn't), # keep it as is and hash p by citation if old_p.verified: p.hash = (p.citation, "CITATION ONLY") self.all_papers[p.hash] = p # I don't think this is doing anything and might need to move if is_parent: old_p_kids = self.parents[old_p.hash] self.parents[old_p.hash] = old_p_kids self.parents[p.hash] = set() # Otherwise, re-hash the old one and put p in its spot else: old_p.hash = (old_p.citation, "CITATION ONLY") self.all_papers[old_p.hash] = old_p self.all_papers[p.hash] = p if is_parent: old_p_kids = self.parents[p.hash] self.parents[old_p.hash] = old_p_kids self.parents[p.hash] = set() # If we don't already have the paper, simply add as usual else: self.all_papers[p.hash] = p # It's a defaultdict, but it's still nice to just put the hash in now if is_parent and p.hash not in self.parents: self.parents[p.hash] = set() return p
def initialize_children(self): """ Add all the references for each paper in the list of bib files. """ j = 0 # Go through all the bibliography files for bib in self.bibs: j += 1 with open(bib, errors="backslashreplace") as input: # Fetch or add the parent paper splits = re.split('\[[0-9]+\]', input.readline().strip()) parent_citation = clean_citation(splits[1]) parent = self.parent_citations[parent_citation] # Get the right filename for the child file bib0 = bib.replace('\\', '').replace('bibliographies', '') bib0 = bib0.strip('.').replace('/', '') childfile = "ref_lists/children_of_" + bib0 old_size = len(self.all_papers) # Get any papers we've already written to the child file child_citations, read_all = self.import_file(childfile) for citation, p in child_citations.items(): self.parents[parent.hash].add(p) p.file_loc = bib if read_all: print("(" + str(j) + " of " + str(len(self.bibs)) + ") " + "Bib #" + bib[25:-4] + ": All children added from file") else: # Go through the rest of the file to look up papers with open(childfile, 'a', errors='backslashreplace') as output: inputstr = input.read() splits = re.split('\[[0-9]+\]', inputstr.strip(' ')) print( "(" + str(j) + " of " + str(len(self.bibs)) + ")", len(splits) - 1, "children,", bib[17:]) #print("\t ", len(child_citations), "children read from file") for i in range(1, len(splits)): child_citation = clean_citation(splits[i]) #print(child_citation) if child_citation not in child_citations: print("\tAdd child", i, "of", len(splits) - 1) print('\t\t' + child_citation[:50]) p = self.add_paper(Paper(), child_citation) p.file_loc = bib self.write_paper(output, p) self.parents[parent.hash].add(p) num_children = len(self.parents[parent.hash]) print(" Added", num_children, "children,", len(self.all_papers) - old_size, "new papers")
def test_editing_replaces_text(initial_text, new_text, output): paper = Paper(initial_text) pencil = Pencil() pencil.edit(paper, new_text) assert paper.buffer == output
# Index(['Authors', 'Author(s) ID', 'Title', 'Year', 'Source title', 'Volume', # 'Issue', 'Art. No.', 'Page start', 'Page end', 'Page count', 'Cited by', # 'DOI', 'Link', 'Affiliations', 'Authors with affiliations', 'Abstract', # 'Author Keywords', 'Index Keywords', 'Molecular Sequence Numbers', # 'Chemicals/CAS', 'Tradenames', 'Manufacturers', 'Funding Details', # 'Funding Text 1', 'Funding Text 2', 'Funding Text 3', 'References', # 'Correspondence Address', 'Editors', 'Sponsors', 'Publisher', # 'Conference name', 'Conference date', 'Conference location', # 'Conference code', 'ISSN', 'ISBN', 'CODEN', 'PubMed ID', # 'Language of Original Document', 'Abbreviated Source Title', # 'Document Type', 'Publication Stage', 'Access Type', 'Source', 'EID'], # dtype='object') i = 0 for dataframe in list_files_csv: paper = Paper(dataframe) #Authors paper.str_covert(column_name="Authors") paper.convert(column_name="Authors", deli=",") #Author(s) ID paper.str_covert(column_name="Author(s) ID") paper.convert(column_name="Author(s) ID", deli=";") #Title paper.str_covert(column_name="Title") paper.convert(column_name="Title", deli=",") #Year pass
def test_eraser_can_run_out(): paper = Paper("Buffalo Bill") eraser = Eraser(3) eraser.erase(paper, "Bill") assert paper.buffer == "Buffalo B "
c = Crossref(mailto="*****@*****.**") headers = {"Ocp-Apim-Subscription-Key":"ba7fae63586a4942bb49403fad4009d3"} expr="And(Composite(AA.AfN=='brock university'),Y=2018)" r= requests.get("https://api.labs.cognitive.microsoft.com/academic/v1.0/evaluate?expr="+expr+"&model=latest&count=5&offset=171&attributes=Id,E,J.JN,C.CN,RId,F.FN,Ti,Y,D,AA.AuN,AA.AuId,AA.AfN,AA.AfId", headers=headers) data = r.json()['entities'] for entity in data: paper = Paper(entity) print(vars(paper)) print("") paper.getReferencesDOI() if paper.references == None: paper.getReferencesNoDOI() if paper.references != None: citationID = 0 for reference in paper.references: time.sleep(2) #print(reference) #print("")
class Shape(): # Static class variable removing the need to pass in a Paper object # to draw the shapes on paper = Paper() # Constructor for Shape def __init__(self, width=50, height=50, x=None, y=None, color="black"): """Creates a generic 'shape' which contains properties common to all shapes such as height, width, x y coordinates and colour. """ # Set some attributes self.height = height self.width = width self.color = color # Put the shape in the centre if no xy coords were given if x is None: self.x = (self.paper.paper_width / 2) - (self.width / 2) else: self.x = x if y is None: self.y = (self.paper.paper_height / 2) - (self.height / 2) else: self.y = y # This is an internal method not meant to be called by users # (It has a _ before the method name to show this) def _location(self): """Internal method used by the class to get the location of the shape. This shouldn't be called by users, hence why its name begins with an underscore. """ x1 = self.x y1 = self.y x2 = self.x + self.width y2 = self.y + self.height return [x1, y1, x2, y2] # Randomly generate what the shape looks like def randomize(self, smallest=20, largest=200): """Randomly generates width, height, position and colour for a shape. You can specify the smallest and largest random size that will be generated. If not specified, the generated shape will default to a random size between 20 and 200. """ self.width = random.randint(smallest, largest) self.height = random.randint(smallest, largest) self.x = random.randint(0, self.paper.paper_width - self.width) self.y = random.randint(0, self.paper.paper_height - self.height) self.color = random.choice([ "red", "yellow", "blue", "green", "gray", "white", "black", "cyan", "pink", "purple" ]) # Getters and setters for Shape attributes def set_width(self, width): """Sets the width of the shape""" self.width = width def set_height(self, height): """Sets the height of the shape""" self.height = height def set_x(self, x): """Sets the x position of the shape""" self.x = x def set_y(self, y): """Sets the y position of the shape""" self.y = y def set_color(self, color): """Sets the colour of the shape""" self.color = color def get_color(self): """Returns the colour of the shape""" return self.color
region_name='us-west-2') table = dynamodb.Table('SamplePapers') response = table.scan(ProjectionExpression="id,authors,#p", ExpressionAttributeNames={'#p': 'partition'}) data = response['Items'] while 'LastEvaluatedKey' in response: response = table.scan(ProjectionExpression="id,authors,#p", ExpressionAttributeNames={'#p': 'partition'}, ExclusiveStartKey=response['LastEvaluatedKey']) data.extend(response['Items']) for item in data: paper = Paper(item) if paper.partition <= 1: paper.annotate_authors() #for a in paper.authors: #print(a.predicted_country + " " + a.predicted_gender) #g_score = gender_score(paper) #c_score = country_score(paper) g_dist = gender_distribution(paper) c_dist = country_distribution(paper) #print(c_dist) #print(g_dist) table.update_item( Key={ 'id': paper.id, 'partition': paper.partition },