示例#1
0
    def get_contact(self, response: Response) -> (dict, dict):
        """
        Gets the contact information.

        :param response: the response object
        :return: a tuple of two dict, one for an user and the other for the contact information
        """
        contact = {
            'email': '',
            'phone': '',
            'website': response.url,
            'meet': ''
        }

        # manager
        name = response.xpath("//dd[@class='manager']/a/text()").get()
        link = response.xpath("//dd[@class='manager']/a/@href").get()
        manager = create_user()
        manager['name'] = name
        manager['ref'] = link
        tag = response.xpath("//dd[@class='manager']/div/em[1]/text()").get()
        if tag is not None and isinstance(tag, str):
            manager['tag'] = remove_empty_string_from_array(tag.split(', '))
        contact['phone'] = response.xpath(
            "//dd[@class='manager']/div/em[2]/text()").get()
        manager['contact'] = contact
        manager['contact']['website'] = link
        self.log('find manager {} with contact {}'.format(manager, contact),
                 level=logging.DEBUG)
        return manager, contact
示例#2
0
    def add_inventors(self, response: Response) -> list:
        """
        Add inventors to the project.

        :param response: Response object
        :return a list of inventors
        """
        inventors = []
        for row in response.xpath("//div[contains(@class, 'node-inventor')]"):
            name = row.xpath("string(h2)").get()
            link = 'http://tlo.mit.edu' + row.xpath("h2//a/@href").get()
            department = row.xpath(
                "string(div[@class='content']/div[contains(@class, 'field-name-field-depa')])"
            ).get()
            title = row.xpath(
                "string(div[@class='content']/div[contains(@class, 'field-name-field-link-title')])"
            ).get()
            user = create_user()
            user['name'] = name
            user['exp']['exp']['company'] = self.name
            user['exp']['exp']['title'] = title
            user['ref'] = link
            user['contact']['website'] = link
            user['abs'] = department
            inventors.append(user)
            self.log('Found inventor {}'.format(user['name']),
                     level=logging.DEBUG)
        return inventors
示例#3
0
    def get_contact_person(self, response: Response) -> dict:
        """
        Add inventors to the project.

        :param response: Response object
        :return a list of inventors
        """
        user = create_user()
        user['name'] = response.xpath("string(//*[@id='contact-person'])").get()
        user['exp']['exp']['company'] = self.name
        self.log('Found contact person {}'.format(user['name']), level=logging.DEBUG)
        return user
示例#4
0
    def upload_author(self, author_impact: dict) -> dict:
        """
        Upload the unique authors to the database.

        :param author_impact: author citation data
        :return: a dictionary using the ref of author as key and its _id in database as value
        """

        # find unique author
        author_dict = self.authors.all_elements()

        # upload the user to the server
        users = []
        user_ids = {}
        for a in author_dict:
            user = create_user()
            user['name'] = a[0]
            user['abs'] = a[1]
            user['ref'] = a[1]
            user['contact']['email'] = normalize_email(a[1])
            user['contact']['phone'] = normalize_phone(a[1])
            user['exp']['impact'] = author_impact[a][
                'citation'] if a in author_impact else 0
            user['exp']['impact'] = author_impact[a][
                'keyword'] if a in author_impact else []
            user['onepage']['bg'] = json.dumps([u[1] for u in author_dict[a]])
            address = self.parse_address(a[1])
            if address is not None:
                user['addr'] = address
            else:
                user['addr']['city'] = 'Unknown'
                user['addr']['country'] = 'Unknown'
            users.append(user)
            if len(users) >= 1000:
                response = add_record('entity', users)
                if response['_status'] != 'OK':
                    self.logger.error('fail to create user'.format(a))
                else:
                    for u, r in zip(users, response['_items']):
                        user_ids[(u['name'], u['abs'])] = r['_id']
                    users = []
        if len(users) > 0:
            response = add_record('entity', users)
            if response['_status'] != 'OK':
                self.logger.error('fail to create user'.format(a))
            else:
                for u, r in zip(users, response['_items']):
                    user_ids[(u['name'], u['abs'])] = r['_id']
        del users
        pickle.dump(user_ids, open('pubmed_author_ids.cp', 'wb'))

        return user_ids
示例#5
0
    def _add_sponsor(data: dict) -> dict:
        """
        Add a sponsor for the clinical trial

        :param data: a dictionary contains the sponsor data
        :return: the added sponsor as Project
        """
        user = create_user()
        user['name'] = data['agency']
        user['type'] = 32
        if 'agency_class' in data:
            user['tag'] = data['agency_class']
        return user
示例#6
0
 def add_inventors(self, response):
     inventors = []
     for row in response.xpath("//dd[@class='inventor']"):
         name = row.xpath("a/text()").get()
         link = row.xpath("a/@href").get()
         abstract = row.xpath('string(div[1])').get()
         user = create_user()
         user['name'] = name
         user['abs'] = abstract
         user['ref'] = link
         user['contact']['website'] = link
         user['exp']['exp']['company'] = self.name
         inventors.append(user)
         self.log('Found inventor {}'.format(user['name']),
                  level=logging.DEBUG)
     return inventors
示例#7
0
    def add_inventors(self, response: Response) -> list:
        """
        Add inventors to the project.

        :param response: Response object
        :return a list of inventors
        """
        inventors = []
        for name in response.xpath(
                "//div[@class='inventors']/a/text()").getall():
            user = create_user()
            user['name'] = name
            user['exp']['exp']['company'] = self.name
            inventors.append(user)
            self.log('Found inventor {}'.format(user['name']),
                     level=logging.DEBUG)
        return inventors
示例#8
0
    def add_inventors(self, response: Response) -> list:
        """
        Add inventors to the project.

        :param response: Response object
        :return a list of inventors
        """
        inventors = []
        for name in response.xpath("//div[@class='ncd-data inventors display-block indented']/ul/li/text()").getall():
            if len(name) < 1:
                continue
            user = create_user()
            user['name'] = name
            user['exp']['exp']['company'] = self.name
            inventors.append(user)
            self.log('Found inventor {}'.format(user['name']), level=logging.DEBUG)
        return inventors
示例#9
0
    def get_contact(self, response: Response) -> dict:
        """
        Get contact information of the project.

        :param response: Response object
        :return a list of inventors
        """
        user = create_user()
        email = response.xpath(
            "//div[@class='tech-manager']/a/@href").get()[len('mailto:'):]
        name = response.xpath("//div[@class='tech-manager']/a/text()").get()
        user['name'] = name
        user['contact']['email'] = email if email is not None else ''
        phone = extract_phone(
            response.xpath("//div[@class='tech-manager']/text()").get())
        if len(phone) > 0:
            user['contact']['phone'] = phone
        return user
示例#10
0
def upload_user_to_server(file_name):
    data = pickle.load(open(file_name, 'rb'))
    # upload the user to the server
    users = []
    for a in data:
        user = create_user()
        user['name'] = a[0]
        user['abs'] = a[1]
        user['ref'] = a[1]
        user['contact']['email'] = normalize_email(a[1])
        user['contact']['phone'] = normalize_phone(a[1])
        user['tag'] = data[a]['keyword']
        user['onepage']['prod'] = data[a]['citation']
        # try to parse the address
        addr = parse_us_address(a[1])
        if addr is not None:
            user['addr'] = addr
    json.dump(users, open('pubmed_author.json', 'wb'))
示例#11
0
    def add_inventors(self, response: Response) -> list:
        """
        Add inventors to the project.

        :param response: Response object
        :return a list of inventors
        """
        inventors = []
        for row in response.xpath(
                '//*[@id="formTechPub1"]/div/table/tr/td/table[1]/tr/td/a'):
            name = row.xpath("text()").get()
            if len(name) < 1:
                continue
            user = create_user()
            user['name'] = name
            user['exp']['exp']['company'] = self.name
            inventors.append(user)
            self.log('Found inventor {}'.format(user['name']),
                     level=logging.DEBUG)
        return inventors
示例#12
0
    def add_inventors(self, response: Response) -> list:
        """
        Add inventors to the project.

        :param response: Response object
        :return a list of inventors
        """
        inventors = []
        for row in response.xpath("//dd[@class='inventor']/a"):
            name = row.xpath("text()").get()
            link = row.xpath("@href").get()
            user = create_user()
            user['name'] = name
            user['ref'] = link
            user['contact']['website'] = link
            user['exp']['exp']['company'] = self.name
            inventors.append(user)
            self.log('Found inventor {}'.format(user['name']),
                     level=logging.DEBUG)
        return inventors
示例#13
0
    def add_inventors(self, response: Response) -> list:
        """
        Add inventors to the project.

        :param response: Response object
        :return a list of inventors
        """
        inventors = []
        for row in response.xpath("//div[@class='side-bucket invention-side-block']"):
            if not row.xpath("h4[@class='side-heading']/text()").get() == 'Investigators:':
                continue
            for name in row.xpath("ul/li/text()").getall():
                if len(name) < 1:
                    continue
                user = create_user()
                user['name'] = name
                user['exp']['exp']['company'] = self.name
                inventors.append(user)
                self.log('Found inventor {}'.format(user['name']), level=logging.DEBUG)
        return inventors
示例#14
0
    def get_contact(self, response: Response) -> list:
        """
        Gets the contact information.

        :param response: the response object
        :return: a list of contact
        """
        users = []
        for row in response.xpath("//div[@class='associate-item']/div"):
            user = create_user()
            user['ref'] = response.urljoin(row.xpath("a/@href").get())
            user['contact']['website'] = user['ref']
            user['logo'] = response.urljoin(row.xpath("a/img/@src").get())
            user['name'] = row.xpath("h4[@class='team-name']/a/text()").get()
            user['abs'] = row.xpath("strong[@class='team-position']/text()").get()
            user['exp']['exp']['title'] = user['abs']
            user['exp']['exp']['company'] = self.name
            user['contact']['email'] = response.xpath("ul/li[@class='bottom-item bottom-email']/a/@href").get()
            user['contact']['phone'] = response.xpath("ul/li[@class='bottom-item bottom-phone']/a/text()").get()
            users.append(user)
        return users
示例#15
0
    def get_contact(self, response: Response) -> dict:
        """
        Gets the contact information.

        :param response: the response object
        :return: the contact information
        """
        user = create_user()
        user['name'] = response.xpath(
            "//div[@class='case-manager']/a/text()").get()
        user['ref'] = response.urljoin(
            response.xpath("//div[@class='case-manager']/a/@href").get())
        user['contact']['website'] = user['ref']
        user['contact']['email'] = response.xpath(
            "//div[@class='case-manager']/span/a/text()").get()
        if user['contact']['email'] is None:
            user['contact']['email'] = ''
        phone = extract_phone(
            response.xpath("string(//div[@class='case-manager'])").get())
        if len(phone) > 0:
            user['contact']['phone'] = phone[0]
        return user
示例#16
0
    def _add_user(data: dict) -> dict:
        """
        Add an user's contact.

        :param data: a dictionary contains user information
        :return: an user
        """
        user = create_user()
        name = []
        if 'first_name' in data:
            name.append(data['first_name'])
        if 'middle_name' in data:
            name.append(data['middle_name'])
        if 'last_name' in data:
            name.append(data['last_name'])
        user['name'] = ' '.join(name)
        if 'role' in data:
            user['exp']['exp']['title'] = data['role']
        if 'affiliation' in data:
            user['abs'] = data['affiliation']
            user['exp']['exp']['company'] = data['affiliation']
        elif 'organization' in data:
            user['abs'] = data['organization']
            user['exp']['exp']['company'] = data['organization']
        phone = []
        if 'phone' in data:
            phone.append(data['phone'])
        if 'phone_ext' in data:
            phone.append(data['phone_ext'])
        user['contact']['phone'] = '-'.join(phone)
        user['contact']['email'] = data['email'] if 'email' in data else ''
        if 'degrees' in data:
            if not user.title:
                user['edu']['degree'] = data['degrees']
        if len(user['name']) < 0:
            user['name'] = user['contact']['email'] if len(
                user['contact']['email']) > 0 else 'Anonymous'
        return user
示例#17
0
    def add_inventors(self, response: Response) -> list:
        """
        Add inventors to the project.

        :param response: Response object
        :return a list of inventors
        """
        inventors = []
        inventor_found = False
        for row in response.xpath("//div[@id='alt_toolbox']/*"):
            if inventor_found:
                for name in row.xpath('li/text()').getall():
                    if len(name) < 1:
                        continue
                    user = create_user()
                    user['name'] = name
                    user['exp']['exp']['company'] = self.get_name(response)
                    inventors.append(user)
                    self.log('Found inventor {}'.format(user['name']), level=logging.DEBUG)
                break
            if row.xpath('name()').get() == 'h2' and row.xpath('string()').get() == 'Researchers':
                inventor_found = True
        return inventors
示例#18
0
    def add_inventors(self, response: Response) -> list:
        """
        Add inventors to the project.

        :param response: Response object
        :return a list of inventors
        """
        inventors = []
        for row in response.xpath(
                "//div[contains(@class,'field field-name-body field-type-text-with-summary')]/div/div/p[1]/a"
        ):
            name = row.xpath("text()").get()
            link = row.xpath("@href").get()
            if len(name) < 1:
                continue
            user = create_user()
            user['name'] = name
            user['ref'] = link
            user['contact']['website'] = user['ref']
            user['exp']['exp']['company'] = self.name
            inventors.append(user)
            self.log('Found inventor {}'.format(user['name']),
                     level=logging.DEBUG)
        return inventors
示例#19
0
def parse_html(file):
    document = etree.parse(file, etree.HTMLParser())
    product = create_product()
    data_english = parse(document, 'cn')
    data_chinese = parse(document, 'en')
    product['name'] = data_chinese[u'注册题目']
    product['abs'] = data_chinese[u'研究目的']
    product['asset']['stat'] = map_status(data_english['Recruiting status'])
    product['intro'] = data_chinese['药物成份或治疗方案详述']
    href = document.xpath("//body/div[4]/div[2]/a")
    product['ref'] = 'http://www.chictr.org.cn/' + (href[0].attrib['href']
                                                    if len(href) > 0 else '')

    product['tag'].append(data_chinese[u'研究疾病'])
    product['tag'].append(data_english[u'Target disease'])
    product['tag'].append(data_chinese[u'研究疾病代码'])
    product['tag'].append(data_english[u'Target disease code'])
    product['tag'].append(data_chinese[u'研究类型'])
    product['tag'].append(data_english[u'Study type'])
    product['tag'].append(data_chinese[u'研究所处阶段'])
    product['tag'].append(data_english[u'Study phase'])
    product['tag'].append(data_chinese[u'研究类型'])
    product['tag'].append(data_english[u'Study type'])
    product['tag'] = remove_empty_string(product['tag'])

    product['asset']['lic'].append(data_chinese['研究课题代号(代码)'])
    product['asset']['lic'].append(data_chinese['注册号'])
    product['asset']['lic'].append(data_chinese['伦理委员会批件文号'])
    product['asset']['lic'] = remove_empty_string(product['asset']['lic'])

    product['asset']['type'] = 2
    try:
        product['created'] = parser.parse(
            data_english['Date of Registration']).strftime(
                "%a, %d %b %Y %H:%M:%S GMT")
    except:
        pass
    try:
        product['updated'] = parser.parse(
            data_english['Date of Last Refreshed on']).strftime(
                "%a, %d %b %Y %H:%M:%S GMT")
    except:
        pass
    product['asset']['tech'] = dictionary_to_markdown(data_english, [
        'Study design', 'Inclusion criteria', 'Exclusion criteria',
        'Study execute time', 'Interventions',
        'Countries of recruitment and research settings', 'Outcomes',
        'Collecting sample(s) from participants', 'Participant age', 'Gender',
        'Randomization Procedure (please state who generates the random number sequence and by what method)',
        'Blinding', 'The time of sharing IPD',
        'The way of sharing IPD”(include metadata and protocol, If use web-based public database, please provide the url)',
        'Data collection and Management (A standard data collection and management system include a CRF and an electronic data capture',
        'Data Managemen Committee'
    ])
    product['asset']['tech'] += dictionary_to_markdown(data_chinese, [
        '研究设计', '纳入标准', '排除标准', '研究实施时间', '干预措施', '研究实施地点', '测量指标', '采集人体标本',
        '年龄范围', '性别', '随机方法(请说明由何人用什么方法产生随机序列)', '盲法', '原始数据公开时间',
        '共享原始数据的方式(说明:请填入公开原始数据日期和方式,如采用网络平台,需填该网络平台名称和网址)',
        '数据采集和管理(说明:数据采集和管理由两部分组成,一为病例记录表(Case Record Form, CRF),二为电子采集和管理系统(Electronic Data Capture, EDC),如ResMan即为一种基于互联网的EDC',
        '数据管理委员会'
    ])

    applicant = create_user()
    applicant['name'] = data_chinese[u'申请注册联系人']
    applicant['abs'] = 'Applicant'
    applicant['contact']['phone'] = data_chinese[u'申请注册联系人电话']
    applicant['contact']['email'] = data_chinese[u'申请注册联系人电子邮件']
    applicant['contact']['website'] = data_chinese[u'申请单位网址(自愿提供)']
    applicant['addr'] = parse_address(data_english[u'Applicant address'])
    applicant['addr']['zip'] = data_chinese[u'申请注册联系人邮政编码']
    applicant['exp']['exp']['company'] = data_chinese[u'申请人所在单位']
    principal_investigator = create_user()
    principal_investigator['name'] = data_chinese[u'研究负责人']
    principal_investigator['abs'] = 'Principal Investigator'
    principal_investigator['contact']['phone'] = data_chinese[u'研究负责人电话']
    principal_investigator['contact']['email'] = data_chinese[u'研究负责人电子邮件']
    principal_investigator['contact']['website'] = data_chinese[
        u'研究负责人网址(自愿提供)']
    principal_investigator['addr'] = parse_address(
        data_english[u"Study leader's address"])
    principal_investigator['addr']['zip'] = data_chinese[u'研究负责人邮政编码']

    product['addr'] = copy.deepcopy(applicant['addr'])
    return {
        'product': product,
        'applicant': applicant,
        'principal_investigator': principal_investigator
    }