Пример #1
0
 def createManyCrystalsAndTrees(self):
     paper1 = Paper('雪の結晶')
     paper2 = Paper('もみの木')
     keeper = PrototypeKeeper()
     keeper.addCloneable('snowflake', paper1)
     keeper.addCloneable('tree', paper2)
     papers = []
     for i in range(100):
         papers.append(keeper.getClone('snowflake'))
         papers.append(keeper.getClone('tree'))
     return papers
Пример #2
0
    def create_paper(self, paper_json):
        if paper_json.has_key('pmid'):
            date = None
            if self.get_date and paper_json.has_key('doi'):
                res = requests.get("http://api.crossref.org/works/%s" %
                                   paper_json['doi'])
                if res.status_code == 200:
                    date = json.loads(
                        res.content)['message']['issued']['date-parts'][0]
                    while len(date) < 3:
                        date.append(1)
                    year, month, day = date
                    date = str(datetime.date(year, month, day))
            if date == None:
                if paper_json.has_key('pubYear'):
                    date = str(datetime.date(int(paper_json['pubYear']), 1, 1))
                else:
                    date = None

            return Paper(api=self.api_name,
                         title=paper_json.setdefault('title', None),
                         authors=paper_json.setdefault('authorString', None),
                         date=date,
                         doi=paper_json.setdefault('doi', None),
                         api_id="%s,%s" %
                         (paper_json['source'], paper_json['pmid']),
                         isOpenAccess=paper_json['isOpenAccess'] == "Y",
                         global_citation_count=paper_json['citedByCount'],
                         has_references=(paper_json['hasReferences'] == "Y"))
Пример #3
0
def paper_to_list():

    filelist = iterate_folder(PAPER_DIR)

    # number of the paper
    i = 0

    preprocessed_paper_list = []

    # iterate through all main texts and print their sentences
    for file in filelist:

        print(i)
        sent_list = sent_tokenize_file(file)
        paper = Paper(i, sent_list[0], sent_list[1], sent_list[2],
                      sent_list[3])
        preprocessed_paper_list.append(paper)
    i += 1

    return preprocessed_paper_list


#csvimport()
#csvexport()
#print(paper_to_list()[0].cleared_paper)

# //store the sentences in a file
# writefile = io.open('S:\\VMs\\Shared\\Maindata.txt', 'w', encoding="utf-8-sig")
# for file in filelist:
#     i+=1
#     print(i)
#     for line in tokenize_file(file):
#         writefile.write(line + "\n")
#
# writefile.close()
Пример #4
0
    def test_paper_can_be_written_to(self):
        paper = Paper()

        text = "I'm better than papyrus"
        paper.write(text)

        self.assertEqual(text, paper.read())
Пример #5
0
    def parse_file(self, filename):
        with codecs.open(filename, encoding='utf-8', mode='r',
                         buffering=1, errors='strict') as file:

            sections = []
            papers = []
            changed_papers_titles = self.get_changed_papers_titles()
            current_section = None

            for line in file.read().splitlines():
                section_details = FileParser.check_all_regexes(self.section_regexes, line)
                if section_details:
                    section_details_dict = section_details.groupdict()
                    current_section = Section(section_details_dict['name'])
                    sections.append(current_section)
                else:
                    details = FileParser.check_all_regexes(self.paper_regexes, line)
                    if details:
                        paper_details = details.groupdict()
                        paper_title = paper_details['title']
                        papers.append(Paper({
                            'title':      paper_title,
                            'publisher':  paper_details['publisher'],
                            'url':        paper_details['url'],
                            'section':    current_section,
                            'notes_path': paper_details.get('notes_path'),
                            'changed':    paper_title in changed_papers_titles,
                            'is_read':    FileParser.is_read(paper_details['read']),
                            'tags':       paper_details.get('tags')}))

        return papers, sections
Пример #6
0
    def getLinks(self, page):        
        soup = BeautifulSoup(page, 'html.parser')        
        div = soup.find_all("div", attrs={"class": "result-item-align"})     

        for p in div:                        
            paper = Paper('')
            for index, child in enumerate(p.findChildren('a')):
                if 'href' in child.attrs:                   
                    if 'document' in child['href']:
                        paper.linkPaper = 'https://ieeexplore.ieee.org'+child['href']
                        #este condição de checar se está vazio, é pois se haver outro
                        #document não deverá preencher
                        if paper.titlePaper == '':
                            paper.titlePaper = child.text
                        
                    if 'author' in child['href']:
                        paper.author.append(child.text)   
                        
                    if 'conhome' in child['href']:
                        paper.conference = child.text
                

                '''
                if 'class' in child.attrs:                               
                    if 'media' in child['class']:
                        urlIdeia = self.urlRoot + child['href'] 
                        print(urlIdeia)
                        self.papers.append(Ideia(urlIdeia))
                '''
            print(paper)
            self.papers.append(paper)
        return self.papers    
Пример #7
0
def add_paper(filename_, bibtex_name, path=None):
    if not path:
        path = './'
    root = path
    done_path = os.path.join(root, 'done')
    failed_path = os.path.join(root, 'failed')

    filename = filename_.split(".")
    if len(filename[1]) == 4 and len(filename[2]) == 5:
        ref = filename[0]
        code = filename[1] + "." + filename[2]
    else:
        code = get_arxiv_code(filename_, root)
        ref = 'ADD_REF'

    if code is not None:
        # If code is provided by filename OR pdf was downloaded from ArXiv:
        paper = Paper(code=code, bibtex_name=bibtex_name)

        add_new = logger.check_ref(bibtex_name, ref)
        if add_new:
            paper.add_bib(ref)
            paper.add_abstract(ref)
            shutil.move(os.path.join(root, filename_), done_path)
            print(filename_, " paper added to", bibtex_name)
            status = 'ok'
        else:
            print(filename_, " paper already exists in", bibtex_name)
            status = 'repeated'
    else:
        print(filename_, 'paper was not able to being added')
        shutil.move(os.path.join(root, filename_), failed_path)
        status = 'fail'
    return status, ref
Пример #8
0
def iterate(keys, node, year, month, country, publisher, source, out_path):
    """
    Iterate through the elements and store the details in the instance of Paper.
    """
    if node is None:
        return
    if len(node.findall("paper")) == 0:
        #iterate through child nodes to search for the paper element
         for child in node:
             iterate(keys, child, year, month, country, publisher, source, out_path)
    else:
        for paper in node.findall("paper"):
            if paper.find("title") is not None and paper.find("title").find("fixed-case") is not None:
                #recreate the title by extracting values from title and fixed-case
                xmlstr = (ET.tostring(paper.find("title"), encoding='utf8', method='xml')).decode("utf-8")
                xmlstr = xmlstr.replace("<fixed-case>", "")
                xmlstr = xmlstr.replace("</fixed-case>", "")
                title = xmlstr[xmlstr.find("<title>")+7:xmlstr.find("</title>")]
            else:
                title = paper.find("title").text
            #search for each keyword
            for key in keys:
                     if title is not None and title.find(key) != -1 or (paper.find("abstract") is not None and paper.find("abstract").text is not None and (paper.find("abstract").text).find(key) != -1):
                         #if keyword is found, store the paper details
                         if paper.findall("author") is not None:
                             auth_list = []
                             for child in paper.findall("author"):
                                 auth_list.append(child.find("first").text + " " + child.find("last").text)
                         paper_object =  Paper()
                         paper_object.save(year, month, title, auth_list, country, source, publisher)
                         paper_object.write(out_path)
                         #once the paper details have been saved for a key, continue to the next paper
                         break
Пример #9
0
def csvimport():

    preprocessed_paper_list = []

    myfile = open('./export/sentences.csv')
    sentences_data = unicodecsv.reader((x.replace('\0', '') for x in myfile),
                                       encoding='utf-8-sig',
                                       delimiter=';')
    sentences_data.next()

    papers = open('./export/papers.csv')
    papers_data = unicodecsv.reader((x.replace('\0', '') for x in papers),
                                    encoding='utf-8-sig',
                                    delimiter=';')
    papers_data.next()

    #Read Papers
    for row in papers_data:
        paper = Paper(int(row[0]), [], [], row[1], row[2])
        preprocessed_paper_list.append(paper)

    #Read sentences
    for row in sentences_data:
        preprocessed_paper_list[int(row[0])].original_paper.append(row[1])
        preprocessed_paper_list[int(row[0])].cleared_paper.append(row[2])

    return preprocessed_paper_list
Пример #10
0
 def createManyCrystals(self):
     paper = Paper('雪の結晶')
     self.drawCrystal(paper)
     self.cutAccordanceWithLine(paper)
     papers = []
     for i in range(100):
         papers.append(paper.createClone())
     return papers
Пример #11
0
def test_eraser_erases_the_next_occurence_of_text():
    initial_text = "How much wood would a woodchuck chuck if a woodchuck could chuck wood?"
    erase_text = "chuck"
    paper = Paper(initial_text)
    eraser = Eraser()
    eraser.erase(paper, erase_text)
    eraser.erase(paper, erase_text)
    expected_text = "How much wood would a woodchuck chuck if a wood      could       wood?"
    assert paper.buffer == expected_text
Пример #12
0
def edit_paper():
    # Stores a read paper in the database.
    params = request.form.to_dict()
    print('params:', params)
    # Deal with checkbox.
    if 'inQueue' in params:
        params['inQueue'] = 1
    else:
        params['inQueue'] = 0
    paper = Paper(**params)
    paper.update()
    return redirect('/')  # @TODO highlight paper in list after redirect.
Пример #13
0
    def __init__(self):
        '''
		Creates an empty Harmonizer with 
		@self nmarkers the number of markers
		@self marker the markers
		@self paper the paper
		@self data the actual grades and the grades from markers
		'''
        self.nmarkers = 0
        self.markers = []
        self.paper = Paper()
        self.data = []
Пример #14
0
    def import_file(self, filename, is_parent=False):

        citations = dict()
        read_all = False

        try:
            with open(filename, errors="backslashreplace") as input:

                p = Paper()
                start = True

                for line in input:

                    line = line.replace("\n", "")

                    if line == ">>> NEW PAPER <<<":
                        if start:
                            start = False
                        else:
                            self.add_paper(p, is_parent=is_parent)
                            citations[p.citation] = p
                            p = Paper()
                    elif line == ">>> ALL DONE <<<":
                        read_all = True
                    else:

                        items = line.split(">>>")
                        if len(items) < 3:
                            print("line, items:", line, items)
                        setattr(p, items[1].strip(' '), items[2].strip(' '))

                if p.title:
                    self.add_paper(p, is_parent=is_parent)
                    citations[p.citation] = p

        except FileNotFoundError:
            pass

        return citations, read_all
Пример #15
0
    def __call__(self, fileName, *args, **kw):
        """Initializes :attr:`el <_Raphael.el>` and :attr:`fn <_Raphael.fn>` objects and creates :class:`Paper` instance to draw on.

		Parameters might be:

		:param str fileName: file name for saving
		:param width: width of the canvas
		:param height: width of the canvas
		:return: new :class:`Paper` instance

		.. code-block:: python

			paper = Drawing("fname1.svg",640,480)
			paper = Drawing("fname2.svg",640,480,backgroundColor='cyan')
			paper = Drawing("fname3.svg",width=640,height=480)

		or

		:param str fileName: file name for saving
		:param list|tuple attrs: first 4 elements in the list are equal to [x, y, width, height]. The rest are element descriptions in format {"type": type, <attributes>}
		:return: new :class:`Paper` instance

		.. code-block:: python

			paper = Drawing("fname4.svg",[0,0,640,480,
				{
					"type": "path",
					"path": "M100,200 l50,100",
					"stroke-width": 5,
					"stroke": "blue",
				},
				{
					"type": "rect",
					"x": 100,
					"y": 300,
					"width": 300,
					"height": 50,
					"fill": "red",
					"stroke": "cyan",
				},
			])
		"""
        for k, v in self.el.__dict__.iteritems():
            setattr(RaphaelElement, k, v)
        for k, v in self.fn.__dict__.iteritems():
            setattr(Paper, k, v)
        return Paper(fileName, *args, **kw)
Пример #16
0
def main():
    paper = Paper()

    paper.id = 1
    paper.total_score = 100
    paper.difficulty = 0.72
    paper.points = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
    paper.each_point_score = [10, 10, 10, 10, 10, 10, 10, 10, 10, 10]
    paper.each_type_count = [15, 15, 5]
    paper.each_type_score = [30, 30, 40]

    db = DB()
    db.generate_fake(paper)
    genetic = Genetic(paper, db)
    start = time.clock()
    genetic.test_run()
    end = time.clock()
    print u"总共用时:", end - start, " 秒"
Пример #17
0
    def initialize_parents(self, p_file):
        """
		Add all the parent papers using their .txt reference files. Read in
		anything that's already been calculated and stored in parents.txt
		"""

        self.parent_citations, read_all = self.import_file(p_file,
                                                           is_parent=True)

        print("Number of parents read from file:", len(self.all_papers))

        if read_all:
            print("All parents added from file; no lookup necessary")

        else:
            with open(p_file, 'a', errors='backslashreplace') as output:

                for i in range(len(self.bibs)):

                    with open(self.bibs[i],
                              errors="backslashreplace") as input:

                        inputstr = input.readline().strip()
                        splits = re.split('\[[0-9]+\]', inputstr)
                        citation = splits[1].strip(' ')

                        if citation not in self.parent_citations:

                            print("\nlen(all_papers):", len(self.all_papers))
                            print("Add parent", i + 1, "of", len(self.bibs),
                                  "file", self.bibs[i])

                            p = self.add_paper(Paper(),
                                               citation,
                                               is_parent=True)
                            self.parent_citations[p.citation] = p
                            p.file_loc = self.bibs[i]
                            self.write_paper(output, p)

                            print("new len(all_papers):", len(self.all_papers))

                        else:
                            self.parent_citations[
                                citation].file_loc = self.bibs[i]
Пример #18
0
def run_cso_classifier(paper,
                       modules="both",
                       enhancement="first",
                       explanation=False):

    if modules not in ["syntactic", "semantic", "both"]:
        raise ValueError(
            "Error: Field modules must be 'syntactic', 'semantic' or 'both'")

    if enhancement not in ["first", "all", "no"]:
        raise ValueError(
            "Error: Field enhances must be 'first', 'all' or 'no'")

    if type(explanation) != bool:
        raise ValueError(
            "Error: Explanation must be set to either True or False")

    # Loading ontology and model
    cso = CSO()
    model = MODEL()
    t_paper = Paper(paper, modules)
    result = Result(explanation)

    # Passing parameters to the two classes (synt and sema) and actioning classifiers

    if modules == 'syntactic' or modules == 'both':
        synt_module = synt(cso, t_paper)
        result.set_syntactic(synt_module.classify_syntactic())
        if explanation:
            result.dump_temporary_explanation(synt_module.get_explanation())
    if modules == 'semantic' or modules == 'both':
        sema_module = sema(model, cso, t_paper)
        result.set_semantic(sema_module.classify_semantic())
        if explanation:
            result.dump_temporary_explanation(sema_module.get_explanation())

    result.set_enhanced(
        cso.climb_ontology(getattr(result, "union"), enhancement))

    return result.get_dict()
Пример #19
0
def bibtex_entry_to_table(bibtex_entry):
    myPaper = Paper(bibtex_entry)
    id = myPaper.getId()
    if not (myPaper.isPaper()):
        print('No author or keywords')
        return

    # Reference check
    wd = webdriver.PhantomJS(
        '/home/doyun/phantomjs-2.1.1-linux-x86_64/bin/phantomjs')
    url = 'http://ieeexplore.ieee.org/document/' + str(
        id) + '/?anchor=references'
    print(url)
    while True:
        try:
            wd.get(url)
        except http.client.RemoteDisconnected:
            print('RemoteDisconnected Happened')
            continue
        break
    html_page = wd.page_source
    soup = BeautifulSoup(html_page, 'html.parser')
    ref_dom_list = soup.findAll("a",
                                {"class": "stats-reference-link-viewArticle"})
    regex = re.compile(r'^\D+/(\d+)$')
    refs = [
        int(regex.search(ref_dom.get('href')).group(1))
        for ref_dom in ref_dom_list
    ]
    for ref in refs:
        json_data.append({'id': int(id), 'ref_id': ref})
        print('id = {}, ref_id = {}'.format(id, ref))

    wd.quit()

    return
Пример #20
0
from paper import Paper

p = Paper('10.1.1.1.1577')
Пример #21
0
    data_dir + community_filename)

num_top_communities = 30
community_conf_counter_list = []
id_to_community = {}
for communityId in range(num_top_communities):
    community_conf_counter_list.append(Counter())
    for nodeId in community_member_list[communityId]:
        if nodeId not in id_to_community:
            id_to_community[nodeId] = [communityId]
        else:
            id_to_community[nodeId].append(communityId)

# Sequentially parse every paper.
with open(data_dir + data_filename, 'r') as f:
    paper = Paper()
    id_counter = 0
    for line in f:
        line = line.strip("\n")
        # Write out when we have reached the end of a paper.
        # TODO: Remove this early-breaking line.
        # if id_counter == 30000:
        #     break
        if len(line) == 0 or line[0] != '#':
            if id_counter % 10000 == 0:
                print "Parsed file", id_counter
            # Update conference counter for community.
            if id_counter in id_to_community:
                for communityId in id_to_community[id_counter]:
                    community_conf_counter_list[communityId][paper.venue] += 1
            paper = Paper()
Пример #22
0
            new_message = message[:empty_space]
            message_size = len(new_message)
        else:
            new_message = message

        space = new_message.find(' ')
        ink_needed = space

        while space != -1 and ink_needed < self.ink_amount and space < \
                empty_space:
            counter_of_spaces += 1
            ink_needed -= counter_of_spaces
            space = new_message.find(' ', space + 1)

        if message_size > self.ink_amount:
            new_message = new_message[:self.ink_amount + counter_of_spaces]
            self.ink_amount = 0
            paper.add_content(new_message)
        else:
            self.ink_amount -= message_size - counter_of_spaces
            paper.add_content(new_message)


pen = Pen(10)
paper = Paper(35)

pen.write("      Hello,  world!", paper)
paper.show()
# pen.write(" Hello, python!", paper)
paper.show()
Пример #23
0
    def add_paper(self, p, citation=None, is_parent=False):
        """
		Add paper p to the database.
		"""

        # Don't add an empty paper to the database
        is_empty = True
        empty_paper = Paper()
        for attr in ["citation", "title", "DOI", "year"]:
            if str(empty_paper.__dict__[attr]) != p.__dict__[attr]:
                is_empty = False
        if is_empty:
            print("DON'T ADD AN EMPTY PAPER, DUMMY")
            return p

        # Look up using CrossRef if given just the citation
        if citation:
            p.lookup(citation, self.base_url)

        # Unpack/convert attributes that shouldn't be just strings
        else:
            if p.checked == "False":
                p.checked = 0.0
            for attr in self.int_attrs:
                if p.__dict__[attr] != "None":
                    setattr(p, attr, float(p.__dict__[attr]))
            p.subject = eval(p.subject)
            p.item = eval(p.item)
            p.container_title = eval(p.container_title)
            """
			# Leftover from initially adding the journal titles as an attr
			if p.container_title == "None":
				try:
					p.container_title = p.item['container-title']
				except KeyError:
					pass
			"""

        p.hash = (p.title, p.year)
        if is_parent:
            p.is_parent = True

        # Deal with the case where the paper hash is already in the database
        if p.hash in self.all_papers:

            # If it's a duplicate, just return the paper
            if p.is_duplicate(self.all_papers[p.hash]):
                return p

            # If it's not really a duplicate, re-hash one of them
            else:
                old_p = self.all_papers[p.hash]

                # Def'n of dup shouldn't let two non-dup papers both be verified
                if p.verified == 1 and old_p.verified == 1:
                    raise ValueError(
                        "THEY CAN'T BE BOTH VERIFIED AND NON-DUPLICATE")

                # If the one already there is correct (and p therefore isn't),
                # keep it as is and hash p by citation
                if old_p.verified:
                    p.hash = (p.citation, "CITATION ONLY")
                    self.all_papers[p.hash] = p

                    # I don't think this is doing anything and might need to move
                    if is_parent:
                        old_p_kids = self.parents[old_p.hash]
                        self.parents[old_p.hash] = old_p_kids
                        self.parents[p.hash] = set()

                # Otherwise, re-hash the old one and put p in its spot
                else:
                    old_p.hash = (old_p.citation, "CITATION ONLY")
                    self.all_papers[old_p.hash] = old_p
                    self.all_papers[p.hash] = p

                    if is_parent:
                        old_p_kids = self.parents[p.hash]
                        self.parents[old_p.hash] = old_p_kids
                        self.parents[p.hash] = set()

        # If we don't already have the paper, simply add as usual
        else:
            self.all_papers[p.hash] = p

        # It's a defaultdict, but it's still nice to just put the hash in now
        if is_parent and p.hash not in self.parents:
            self.parents[p.hash] = set()

        return p
Пример #24
0
    def initialize_children(self):
        """
		Add all the references for each paper in the list of bib files.
		"""
        j = 0

        # Go through all the bibliography files
        for bib in self.bibs:

            j += 1

            with open(bib, errors="backslashreplace") as input:

                # Fetch or add the parent paper
                splits = re.split('\[[0-9]+\]', input.readline().strip())
                parent_citation = clean_citation(splits[1])
                parent = self.parent_citations[parent_citation]

                # Get the right filename for the child file
                bib0 = bib.replace('\\', '').replace('bibliographies', '')
                bib0 = bib0.strip('.').replace('/', '')
                childfile = "ref_lists/children_of_" + bib0

                old_size = len(self.all_papers)

                # Get any papers we've already written to the child file
                child_citations, read_all = self.import_file(childfile)
                for citation, p in child_citations.items():
                    self.parents[parent.hash].add(p)
                    p.file_loc = bib

                if read_all:
                    print("(" + str(j) + " of " + str(len(self.bibs)) + ") " +
                          "Bib #" + bib[25:-4] +
                          ": All children added from file")

                else:
                    # Go through the rest of the file to look up papers
                    with open(childfile, 'a',
                              errors='backslashreplace') as output:

                        inputstr = input.read()
                        splits = re.split('\[[0-9]+\]', inputstr.strip(' '))

                        print(
                            "(" + str(j) + " of " + str(len(self.bibs)) + ")",
                            len(splits) - 1, "children,", bib[17:])
                        #print("\t  ", len(child_citations), "children read from file")

                        for i in range(1, len(splits)):

                            child_citation = clean_citation(splits[i])
                            #print(child_citation)

                            if child_citation not in child_citations:
                                print("\tAdd child", i, "of", len(splits) - 1)
                                print('\t\t' + child_citation[:50])

                                p = self.add_paper(Paper(), child_citation)
                                p.file_loc = bib
                                self.write_paper(output, p)
                                self.parents[parent.hash].add(p)

                        num_children = len(self.parents[parent.hash])
                        print("     Added", num_children, "children,",
                              len(self.all_papers) - old_size, "new papers")
Пример #25
0
def test_editing_replaces_text(initial_text, new_text, output):
    paper = Paper(initial_text)
    pencil = Pencil()
    pencil.edit(paper, new_text)
    assert paper.buffer == output
Пример #26
0
# Index(['Authors', 'Author(s) ID', 'Title', 'Year', 'Source title', 'Volume',
#        'Issue', 'Art. No.', 'Page start', 'Page end', 'Page count', 'Cited by',
#        'DOI', 'Link', 'Affiliations', 'Authors with affiliations', 'Abstract',
#        'Author Keywords', 'Index Keywords', 'Molecular Sequence Numbers',
#        'Chemicals/CAS', 'Tradenames', 'Manufacturers', 'Funding Details',
#        'Funding Text 1', 'Funding Text 2', 'Funding Text 3', 'References',
#        'Correspondence Address', 'Editors', 'Sponsors', 'Publisher',
#        'Conference name', 'Conference date', 'Conference location',
#        'Conference code', 'ISSN', 'ISBN', 'CODEN', 'PubMed ID',
#        'Language of Original Document', 'Abbreviated Source Title',
#        'Document Type', 'Publication Stage', 'Access Type', 'Source', 'EID'],
#       dtype='object')

i = 0
for dataframe in list_files_csv:
    paper = Paper(dataframe)

    #Authors
    paper.str_covert(column_name="Authors")
    paper.convert(column_name="Authors", deli=",")

    #Author(s) ID
    paper.str_covert(column_name="Author(s) ID")
    paper.convert(column_name="Author(s) ID", deli=";")

    #Title
    paper.str_covert(column_name="Title")
    paper.convert(column_name="Title", deli=",")

    #Year
    pass
Пример #27
0
def test_eraser_can_run_out():
    paper = Paper("Buffalo Bill")
    eraser = Eraser(3)
    eraser.erase(paper, "Bill")
    assert paper.buffer == "Buffalo B   "
Пример #28
0
c = Crossref(mailto="*****@*****.**")


headers = {"Ocp-Apim-Subscription-Key":"ba7fae63586a4942bb49403fad4009d3"}
expr="And(Composite(AA.AfN=='brock university'),Y=2018)"

r= requests.get("https://api.labs.cognitive.microsoft.com/academic/v1.0/evaluate?expr="+expr+"&model=latest&count=5&offset=171&attributes=Id,E,J.JN,C.CN,RId,F.FN,Ti,Y,D,AA.AuN,AA.AuId,AA.AfN,AA.AfId", headers=headers)

data = r.json()['entities']



for entity in data:

    paper = Paper(entity)

    print(vars(paper))
    print("")

    paper.getReferencesDOI()

    if paper.references == None:
        paper.getReferencesNoDOI()

    if paper.references != None:
        citationID = 0
        for reference in paper.references:
            time.sleep(2)
            #print(reference)
            #print("")
class Shape():

    # Static class variable removing the need to pass in a Paper object
    # to draw the shapes on
    paper = Paper()

    # Constructor for Shape
    def __init__(self, width=50, height=50, x=None, y=None, color="black"):
        """Creates a generic 'shape' which contains properties common to all
        shapes such as height, width, x y coordinates and colour.
        """

        # Set some attributes
        self.height = height
        self.width = width
        self.color = color

        # Put the shape in the centre if no xy coords were given
        if x is None:
            self.x = (self.paper.paper_width / 2) - (self.width / 2)
        else:
            self.x = x
        if y is None:
            self.y = (self.paper.paper_height / 2) - (self.height / 2)
        else:
            self.y = y

    # This is an internal method not meant to be called by users
    # (It has a _ before the method name to show this)
    def _location(self):
        """Internal method used by the class to get the location
        of the shape. This shouldn't be called by users, hence why its
        name begins with an underscore.
        """

        x1 = self.x
        y1 = self.y
        x2 = self.x + self.width
        y2 = self.y + self.height
        return [x1, y1, x2, y2]

    # Randomly generate what the shape looks like
    def randomize(self, smallest=20, largest=200):
        """Randomly generates width, height, position and colour for a shape. You can specify
        the smallest and largest random size that will be generated. If not specified, the
        generated shape will default to a random size between 20 and 200.
        """

        self.width = random.randint(smallest, largest)
        self.height = random.randint(smallest, largest)

        self.x = random.randint(0, self.paper.paper_width - self.width)
        self.y = random.randint(0, self.paper.paper_height - self.height)

        self.color = random.choice([
            "red", "yellow", "blue", "green", "gray", "white", "black", "cyan",
            "pink", "purple"
        ])

    # Getters and setters for Shape attributes
    def set_width(self, width):
        """Sets the width of the shape"""

        self.width = width

    def set_height(self, height):
        """Sets the height of the shape"""

        self.height = height

    def set_x(self, x):
        """Sets the x position of the shape"""

        self.x = x

    def set_y(self, y):
        """Sets the y position of the shape"""

        self.y = y

    def set_color(self, color):
        """Sets the colour of the shape"""

        self.color = color

    def get_color(self):
        """Returns the colour of the shape"""

        return self.color
                              region_name='us-west-2')

    table = dynamodb.Table('SamplePapers')

    response = table.scan(ProjectionExpression="id,authors,#p",
                          ExpressionAttributeNames={'#p': 'partition'})
    data = response['Items']

    while 'LastEvaluatedKey' in response:
        response = table.scan(ProjectionExpression="id,authors,#p",
                              ExpressionAttributeNames={'#p': 'partition'},
                              ExclusiveStartKey=response['LastEvaluatedKey'])
        data.extend(response['Items'])

    for item in data:
        paper = Paper(item)
        if paper.partition <= 1:
            paper.annotate_authors()
            #for a in paper.authors:
            #print(a.predicted_country + " " + a.predicted_gender)
            #g_score = gender_score(paper)
            #c_score = country_score(paper)
            g_dist = gender_distribution(paper)
            c_dist = country_distribution(paper)
            #print(c_dist)
            #print(g_dist)
            table.update_item(
                Key={
                    'id': paper.id,
                    'partition': paper.partition
                },