Exemplo n.º 1
0
 def parse(self, response):
     name = response.url.split('/')[-1]
     with open(os.path.join(self.work_directory, name + '.html'), 'wb') as fo:
         fo.write(response.body)
     data = self.data[response.url]
     p = create_product()
     p['tag'] = [data['Category'] + ' Medicine', 'EU', 'Drug', data['Referral type']]
     p['abs'] = data['International non-proprietary name (INN) / common name']
     p['name'] = data['Referral name']
     p['asset']['stat'] = self.status_code(data['Status of referral'])
     p['updated'] = data['Revision date']
     p['created'] = data['First published']
     p['website'] = response.url
     p['ref'] = response.url
     p['asset']['tech'] = dictionary_to_markdown({key: data[key] for key in (
         'Reference number', 'Decision making model', 'Authorisation model', 'PRAC recommendation',
         'Procedure start date', 'PRAC recommendation date', 'CHMP opinion / CMDh position date', 'EC decision date',
         'Associated name')})
     p['asset']['lic'] = p['tag']
     p['asset']['type'] = 1
     p['intro'] = '\n'.join(response.xpath(
         "//div[contains(@class, 'field-name-field-ema-web-summary')]/div/p/text()").getall())
     p['asset']['market'] = dictionary_to_markdown(self.extract_market(response))
     p['addr']['city'] = 'Unknown'
     p['addr']['country'] = 'EU'
     with open(os.path.join(self.work_directory, name + '.json'), 'w') as fo:
         json.dump(p, fo)
Exemplo n.º 2
0
    def parse(self, response):
        name = response.url.split('/')[-1]
        with open(os.path.join(self.work_directory, name + '.html'),
                  'wb') as fo:
            fo.write(response.body)
        data = self.data[response.url]
        p = create_product()
        p['name'] = data['Medicine name']
        p['abs'] = data[
            'International non-proprietary name (INN) / common name']
        p['tag'] = [data['Category'] + ' Medicine', 'EU', 'Drug']
        if data['Therapeutic area'] is not None:
            p['tag'].extend(data['Therapeutic area'].split(", "))
        p['website'] = response.url
        p['ref'] = response.url
        if data['Patient safety'] == 'no':
            p['tag'].append('Patient Risk')
        if data['Orphan medicine'] == 'yes':
            p['tag'].append('Orphan medicine')
        if data['Species'] is not None and len(data['Species']) > 0:
            p['tag'].append('Species')
        p['updated'] = data['Revision date']
        p['created'] = data['First published']
        p['asset']['lic'] = p['tag']
        p['asset']['stat'] = 3
        p['asset']['type'] = 1
        p['intro'] = '\n'.join(
            response.xpath(
                "//div[contains(@class, 'views-field-field-ema-web-summary')]/div/p/text()"
            ).getall())
        p['asset']['market'] = dictionary_to_markdown(
            self.extract_market(response))
        p['asset']['tech'] = dictionary_to_markdown({
            key: data[key]
            for key in ('Active substance', 'Type of withdrawal',
                        'Date of withdrawal')
        })
        p['addr']['city'] = 'Unknown'
        p['addr']['country'] = 'EU'

        a = create_company()
        a['name'] = data['Marketing authorisation holder/company name']
        a['abs'] = 'A Medicine Company'
        a['addr']['city'] = 'Unknown'
        a['addr']['country'] = 'EU'
        a['entr']['bp'] = response.xpath(
            "//div[contains(@class, 'views-field-view-2')]/span/li/a/@href"
        ).get()
        with open(os.path.join(self.work_directory, name + '.json'),
                  'w') as fo:
            json.dump({'product': p, 'applicant': a}, fo)
Exemplo n.º 3
0
    def parse(self, response):
        self.log('Parse technology {}'.format(response.url),
                 level=logging.INFO)
        name = response.url.split('/')[-1]
        with open(os.path.join(self.work_directory, name + '.html'),
                  'wb') as fo:
            fo.write(response.body)
        product = create_product()
        product['name'] = response.xpath("//h1[@id='page-title']/text()").get()
        product['ref'] = response.url
        product['contact']['website'] = response.url
        product['addr'] = deepcopy(self.address)
        product['asset']['type'] = 3
        description = self.get_description(response)
        abstract = extract_dictionary(description, 'Applications')
        product['abs'] = '\n'.join(abstract.values())
        if len(product['abs']) < 1:
            product['abs'] = next(iter(description.values()))
        if len(product['abs']) < 1:
            product['abs'] = product['name']
        market = extract_dictionary(description, 'Advantages')
        product['asset']['market'] = '\n'.join(market.values())
        tech = extract_dictionary(description, 'Technology')
        product['asset']['tech'] = '\n'.join(tech.values())
        for k in abstract:
            del description[k]
        for k in market:
            del description[k]
        for k in tech:
            del description[k]
        product['intro'] = dictionary_to_markdown(description)
        product['intro'] = dictionary_to_markdown(description)
        product['tag'] = self.add_keywords(response)
        product['contact'] = self.get_contact(response)

        inventors = self.add_inventors(response)
        for index, user in enumerate(inventors):
            if len(user['abs']) < 1:
                user['abs'] = 'Inventor of ' + product['name']
            user['addr'] = product['addr']

        patents = self.get_patents(response)
        publications = self.get_publications(response)
        with open(os.path.join(self.work_directory, name + 'json'), 'w') as fo:
            json.dump(
                {
                    'product': product,
                    'inventors': inventors,
                    'patents': patents,
                    'publications': publications
                }, fo)
Exemplo n.º 4
0
    def parse(self, response):
        self.log('Parse technology {}'.format(response.url),
                 level=logging.INFO)
        name = self.parse_name_from_url(response.url)
        with open(os.path.join(self.work_directory, name + '.html'),
                  'wb') as fo:
            fo.write(response.body)
        product = create_product()
        product['ref'] = response.url
        product['contact']['website'] = response.url
        product['name'] = response.xpath("string(//h1[@class='title'])").get()
        meta = self.get_meta(response)
        market = extract_dictionary(meta, 'Applications')
        product['asset']['market'] = '\n'.join(market.values())
        for k in market:
            del meta[k]
        product['intro'] = dictionary_to_markdown(meta)
        product['abs'] = product['intro'][:product['intro'].find('. ') + 1]
        if len(product['abs']) < 1:
            product['abs'] = product['intro']
        product['asset']['type'] = 3
        product['addr'] = deepcopy(self.address)
        inventors = self.add_inventors(response)
        for index, user in enumerate(inventors):
            user['abs'] = 'Inventor of ' + product['name']
            user['addr'] = product['addr']
            user['tag'] = product['tag']
        contact = self.get_contact(response)
        product['contact'] = contact

        with open(os.path.join(self.work_directory, name + '.json'),
                  'w') as fo:
            json.dump({'product': product, 'inventors': inventors}, fo)
Exemplo n.º 5
0
    def parse(self, response):
        self.log('Parse technology {}'.format(response.url),
                 level=logging.INFO)
        name = response.url.split('/')[-1]
        with open(os.path.join(self.work_directory, name + '.html'),
                  'wb') as fo:
            fo.write(response.body)
        product = create_product()
        product['ref'] = response.url
        product['tag'] = remove_empty_string_from_array(
            self.add_keywords(response))
        product['asset']['type'] = 3
        product['addr'] = deepcopy(self.address)
        product['name'] = response.xpath("string(//h1)").get()
        meta = self.get_meta(response)
        contents = meta['abstract'].split('\n')
        if len(contents) > 0 and len(contents[0]) > 0:
            product['abs'] = contents[0]
        else:
            product['abs'] = name
        product['intro'] = '\n'.join(contents[1:])
        del meta['abstract']
        product['asset']['market'] = dictionary_to_markdown(meta)

        manager, product['contact'] = self.get_contact(response)
        product['contact']['website'] = response.url
        inventors = self.add_inventors(response)
        for index, user in enumerate(inventors):
            user['abs'] = 'Inventor of ' + product['name']
            user['addr'] = product['addr']
            user['tag'] = product['tag']

        with open(os.path.join(self.work_directory, name + '.json'),
                  'w') as fo:
            json.dump({'product': product, 'inventors': inventors}, fo)
Exemplo n.º 6
0
    def parse(self, response):
        self.log('Parse technology {}'.format(response.url),
                 level=logging.INFO)
        name = response.url.split('/')[-2]
        with open(os.path.join(self.work_directory, name + '.html'),
                  'wb') as fo:
            fo.write(response.body)
        product = create_product()
        product['ref'] = response.url
        product['contact']['website'] = response.url
        product['name'] = response.xpath("string(//h1)").get()
        meta = self.get_meta(response)
        abstract = extract_dictionary(meta, 'Advantage|advantage|Abstract')
        product['abs'] = '\n'.join(abstract.values())
        if len(product['abs']) < 1:
            product['abs'] = next(iter(meta.values()))
        if len(product['abs']) < 1:
            product['abs'] = product['name']
        product['asset']['tech'] = dictionary_to_markdown(
            meta, ('Technology', ))
        product['asset']['market'] = dictionary_to_markdown(
            meta, ('Value Proposition', 'Value proposition'))
        for k in abstract:
            del meta[k]
        for key in ('Value Proposition', 'Value proposition', 'Technology'):
            if key in meta:
                del meta[key]
        product['intro'] = dictionary_to_markdown(meta)
        product['asset']['type'] = 3
        product['addr'] = deepcopy(self.address)
        inventors = self.add_inventors(response)
        product['tag'] = self.add_tags(response)
        for index, user in enumerate(inventors):
            user['abs'] = 'Inventor of ' + product['name']
            user['addr'] = product['addr']
            user['tag'] = product['tag']
        product['contact'] = self.get_contact(response)

        with open(os.path.join(self.work_directory, name + '.json'),
                  'w') as fo:
            json.dump({'product': product, 'inventors': inventors}, fo)
Exemplo n.º 7
0
 def parse(self, response):
     name = response.url.split('/')[-1]
     with open(os.path.join(self.work_directory, name + '.html'),
               'wb') as fo:
         fo.write(response.body)
     data = self.data[response.url]
     p = create_product()
     p['name'] = data['English common name of herbal substance']
     p['abs'] = data['Botanical name of plant']
     p['tag'] = ["EU", 'Drug', 'Herbal']
     if data['Use'] is not None:
         p['tag'].extend(data['Use'].split(', '))
     p['website'] = response.url
     p['ref'] = response.url
     if data['Combination'] == 'yes':
         p['tag'].append('Combination')
     p['asset']['stat'] = self.status_code(data['Status'])
     p['asset']['tech'] = dictionary_to_markdown({
         key: data[key]
         for key in ('Latin name of herbal substance', 'Outcome',
                     'Date added to the inventory',
                     'Date added to the priority list', 'First published',
                     'Revision date')
     })
     p['updated'] = data['Revision date']
     p['created'] = data['First published']
     p['asset']['lic'] = p['tag']
     p['asset']['type'] = 1
     p['intro'] = '\n'.join(
         response.xpath(
             "//div[contains(@class, 'field-name-field-ema-web-summary')]/div/div/p/text()"
         ).getall())
     p['asset']['market'] = dictionary_to_markdown(
         self.extract_market(response))
     p['addr']['city'] = 'Unknown'
     p['addr']['country'] = 'EU'
     with open(os.path.join(self.work_directory, name + '.json'),
               'w') as fo:
         json.dump(p, fo)
Exemplo n.º 8
0
    def parse(self, response):
        self.log('Parse technology {}'.format(response.url),
                 level=logging.INFO)
        name = self.parse_name_from_url(response.url)
        with open(os.path.join(self.work_directory, name + '.html'),
                  'wb') as fo:
            fo.write(response.body)
        product = create_product()
        product['ref'] = response.url
        product['contact']['website'] = response.url
        product['name'] = response.xpath(self.title_xpath).get()
        meta = self.get_meta(response)
        abstract = extract_dictionary(meta, self.abstract_filter)
        product['abs'] = '\n'.join(abstract.values())
        market = extract_dictionary(meta, self.market_filter)
        product['asset']['market'] = '\n'.join(market.values())
        tech = extract_dictionary(meta, self.tech_filter)
        product['asset']['tech'] = '\n'.join(tech.values())
        for k in market:
            if k in meta:
                del meta[k]
        for k in tech:
            if k in meta:
                del meta[k]
        for k in abstract:
            if k in meta:
                del meta[k]
        product['intro'] = dictionary_to_markdown(meta)
        product['asset']['type'] = 3
        product['addr'] = deepcopy(self.address)
        product['tag'] = self.add_tags(response)
        inventors = self.add_inventors(response)
        for index, user in enumerate(inventors):
            user['abs'] = 'Inventor of ' + product['name']
            user['addr'] = product['addr']
            user['tag'] = product['tag']
        contact = self.get_contact(response)
        contact['abs'] = 'Inventor of ' + product['name']
        contact['addr'] = product['addr']
        contact['tag'] = product['tag']
        product['contact'] = contact['contact']

        with open(os.path.join(self.work_directory, name + '.json'),
                  'w') as fo:
            json.dump(
                {
                    'product': product,
                    'inventors': inventors,
                    'contact': contact
                }, fo)
Exemplo n.º 9
0
    def parse(self, response):
        self.log('Parse technology {}'.format(response.url), level=logging.INFO)
        name = response.url.split('/')[-1]
        with open(os.path.join(self.work_directory, name), 'wb') as fo:
            fo.write(response.body)
        product = create_product()
        product['name'] = response.xpath("//h1[@class='tech-heading tech-heading-main']/text()").get()
        product['ref'] = response.url
        product['contact']['website'] = response.url
        product['addr'] = deepcopy(self.address)
        product['asset']['type'] = 3
        description = self.get_description(response)
        abstract = extract_dictionary(description, 'brief|Brief|BRIEF')
        product['abs'] = '\n'.join(abstract.values())
        if len(product['abs']) < 1:
            product['abs'] = next(iter(description.values()))
        if len(product['abs']) < 1:
            product['abs'] = product['name']
        introduction = extract_dictionary(description, 'full|Full|FULL')
        product['intro'] = '\n'.join(introduction.values())
        for k in abstract:
            del description[k]
        for k in introduction:
            del description[k]
        product['asset']['market'] = dictionary_to_markdown(description)
        product['contact'] = self.get_contact(response)
        product['tag'] = self.add_keywords(response)

        contact_person = self.get_contact_person(response)
        contact_person['abs'] = 'Person of Contact for ' + product['name']
        contact_person['addr'] = product['addr']
        contact_person['contact'] = product['contact']
        contact_person['tag'] = product['tag']
        inventors = self.add_inventors(response)
        for index, user in enumerate(inventors):
            user['abs'] = 'Inventor of ' + product['name']
            user['addr'] = product['addr']
            user['tag'] = product['tag']

        patents = self.get_patents(response)
        with open(os.path.join(self.work_directory, name[:-4] + 'json'), 'w') as fo:
            json.dump({'product': product, 'contact': contact_person, 'inventors': inventors, 'patents': patents}, fo)
Exemplo n.º 10
0
    def parse(self, response):
        self.log('Parse technology {}'.format(response.url), level=logging.INFO)
        name = response.url.split('/')[-1]
        with open(os.path.join(self.work_directory, name + '.html'), 'wb') as fo:
            fo.write(response.body)
        product = create_product()
        product['ref'] = response.url
        product['contact']['website'] = response.url
        meta = self.get_meta(response)
        product['name'] = meta['Project Title']
        try:
            product['created'] = parse(meta['Posted Date']).strftime("%a, %d %b %Y %H:%M:%S GMT")
        except:
            pass
        product['tag'] = meta['Tags']
        if len(meta['banner']) > 0:
            product['logo'] = meta['banner'][0]
        product['asset']['type'] = 3
        abstract = extract_dictionary(meta, 'brief|Brief|BRIEF|Short')
        product['abs'] = '\n'.join(abstract.values())
        if len(product['abs']) < 1:
            product['abs'] = next(iter(meta.values()))
        if len(product['abs']) < 1:
            product['abs'] = product['name']
        introduction = extract_dictionary(meta, 'abstract|Abstract')
        product['intro'] = '\n'.join(introduction.values())
        for k in abstract:
            del meta[k]
        for k in introduction:
            del meta[k]
        product['asset']['market'] = dictionary_to_markdown(meta)
        product['contact'] = self.get_contact(response)
        product['addr'] = deepcopy(self.get_address(response))
        inventors = self.add_inventors(response)
        for index, user in enumerate(inventors):
            user['abs'] = 'Inventor of ' + product['name']
            user['addr'] = product['addr']
            user['tag'] = product['tag']

        with open(os.path.join(self.work_directory, name + '.json'), 'w') as fo:
            json.dump({'product': product, 'inventors': inventors}, fo)
Exemplo n.º 11
0
def main():
    product_code = get_product_code()
    log = create_logger('PMA')
    result = json.load(
        open('/home/jovyan/work/fda/device-pma-0001-of-0001.json', 'r'))
    log.critical(datetime.datetime.now())
    for r in result['results']:
        #     if client.data.entity.find_one({'ref': r.get('pma_number', r['openfda'].get('pma_number', ''))}) is not None:
        #         log.info('{} already exists'.format(r.get('pma_number', r['openfda'].get('pma_number', ''))))
        #         continue
        p = create_product()
        p['ref'] = r.get('pma_number', r['openfda'].get('pma_number', ''))
        p['name'] = r.get('trade_name', r['openfda'].get('trade_name', ''))
        p['abs'] = r.get('generic_name',
                         r['openfda'].get('generic_name', p['name']))
        if len(p['abs']) < 1:
            p['abs'] = p['name']
        p['addr']['line1'] = r.get('address_1',
                                   r['openfda'].get('address_1', ''))
        p['addr']['line2'] = r.get('address_2',
                                   r['openfda'].get('address_2', ''))
        p['addr']['city'] = r.get('city', r['openfda'].get('city', ''))
        p['addr']['state'] = r.get('state', r['openfda'].get('state', ''))
        p['addr']['zip'] = r.get('zip_code', r['openfda'].get('zip_code', ''))
        p['addr']['country'] = r.get('country_code',
                                     r['openfda'].get('country_code', ''))
        p['asset']['type'] = 0
        p['tag'] = [
            r.get('advisory_committee_description',
                  r['openfda'].get('advisory_committee_description', '')),
            r.get('medical_specialty_description',
                  r['openfda'].get('medical_specialty_description', '')),
            'FDA', 'Medical Device', 'PMA'
        ]
        # p['tag'] is used for tags readable to common users, p['lic'] is used for tags specified for product.
        p['asset']['lic'] = [
            'FDA', 'PMA',
            r.get('advisory_committee_description',
                  r['openfda'].get('advisory_committee_description', '')),
            r['openfda'].get('medical_specialty_description', ''),
            r.get('product_code', r['openfda'].get('product_code', '')),
            r.get('regulation_number',
                  r['openfda'].get('regulation_number', ''))
        ]
        p['asset']['lic'].extend(
            third_party(
                r.get('third_party_flag',
                      r['openfda'].get('third_party_flag', ''))))
        if len(
                r.get('expedited_review_flag', r['openfda'].get(
                    'expedited_review_flag', ''))) > 0:
            p['asset']['lic'].append('Expedited Review')
        if r.get('submission_type_id', r['openfda'].get('submission_type_id', '')) not in {'1', '2'} and \
                submission_type(r.get('submission_type_id', r['openfda'].get('submission_type_id', ''))) is not None:
            p['asset']['lic'].append(
                submission_type(
                    r.get('submission_type_id',
                          r['openfda'].get('submission_type_id', ''))))
            p['tag'].append(
                submission_type(
                    r.get('submission_type_id',
                          r['openfda'].get('submission_type_id', ''))))
        code = product_code.get(
            r.get('product_code', r['openfda'].get('product_code', '')), None)
        if code is not None:
            p['asset']['lic'].extend([
                'Class ' + code['device_class'],
                'GMP Exempt'
                if code['gmp_exempt_flag'] == 'N' else 'GMP Required',
            ])
            p['tag'].append('Class ' + code['device_class'])
            if code['implant_flag'] != 'N':
                p['asset']['lic'].append('Implant')
                p['tag'].append('Implant')
            if code['life_sustain_support_flag'] != 'N':
                p['asset']['lic'].append('Life Sustain Support')
                p['tag'].append('Life Sustain Support')
        p['asset']['stat'] = decision_code(
            r.get('decision_code', r['openfda'].get('decision_code', '')))
        try:
            p['created'] = parser.parse(
                r.get('date_received', r['openfda'].get(
                    'date_received',
                    None))).strftime("%a, %d %b %Y %H:%M:%S GMT")
        except:
            pass
        try:
            p['updated'] = parser.parse(
                r.get('decision_date', r['openfda'].get(
                    'decision_date',
                    None))).strftime("%a, %d %b %Y %H:%M:%S GMT")
        except:
            pass
        p['intro'] = r.get('statement_or_summary',
                           r['openfda'].get('statement_or_summary', ''))
        p['intro'] = dictionary_to_markdown({
            'Summary':
            r.get('supplement_number',
                  r['openfda'].get('statement_or_summary', '')),
            'Supplement Reason':
            r.get('supplement_reason',
                  r['openfda'].get('supplement_reason', '')),
            'Statement':
            r.get('ao_statement', r['openfda'].get('ao_statement', ''))
        })
        p['asset']['lic'] = remove_empty_string_from_array(p['asset']['lic'])
        p['tag'] = remove_empty_string_from_array(p['tag'])
        a = create_company()
        a['name'] = r.get('applicant', r['openfda'].get('applicant', ''))
        a['abs'] = 'A Medical Device Company'
        a['addr'] = p['addr']
        a['tag'] = p['tag']
        a['group']['parentId'] = '000000000000000000000000'
        # contact is just the name of contact

        response = add_record('entity', [p, a])
        if response['_status'] != 'OK':
            log.error('fail to create record for {}'.format(p['name']))
            continue
        applicant_product = create_relationship(response['_items'][1]['_id'],
                                                response['_items'][0]['_id'])
        applicant_product['type'] = 7
        applicant_product['name'] = 'Applicant'
        applicant_product['abs'] = 'Applicant'
        response = add_record('relationship', [applicant_product])
        if response['_status'] != 'OK':
            log.error('fail to create relationship for {}'.format(p['name']))
        else:
            log.debug('added {} to the system'.format(p['name']))
    log.critical(datetime.datetime.now())
Exemplo n.º 12
0
def parse_html(file):
    document = etree.parse(file, etree.HTMLParser())
    product = create_product()
    data_english = parse(document, 'cn')
    data_chinese = parse(document, 'en')
    product['name'] = data_chinese[u'注册题目']
    product['abs'] = data_chinese[u'研究目的']
    product['asset']['stat'] = map_status(data_english['Recruiting status'])
    product['intro'] = data_chinese['药物成份或治疗方案详述']
    href = document.xpath("//body/div[4]/div[2]/a")
    product['ref'] = 'http://www.chictr.org.cn/' + (href[0].attrib['href']
                                                    if len(href) > 0 else '')

    product['tag'].append(data_chinese[u'研究疾病'])
    product['tag'].append(data_english[u'Target disease'])
    product['tag'].append(data_chinese[u'研究疾病代码'])
    product['tag'].append(data_english[u'Target disease code'])
    product['tag'].append(data_chinese[u'研究类型'])
    product['tag'].append(data_english[u'Study type'])
    product['tag'].append(data_chinese[u'研究所处阶段'])
    product['tag'].append(data_english[u'Study phase'])
    product['tag'].append(data_chinese[u'研究类型'])
    product['tag'].append(data_english[u'Study type'])
    product['tag'] = remove_empty_string(product['tag'])

    product['asset']['lic'].append(data_chinese['研究课题代号(代码)'])
    product['asset']['lic'].append(data_chinese['注册号'])
    product['asset']['lic'].append(data_chinese['伦理委员会批件文号'])
    product['asset']['lic'] = remove_empty_string(product['asset']['lic'])

    product['asset']['type'] = 2
    try:
        product['created'] = parser.parse(
            data_english['Date of Registration']).strftime(
                "%a, %d %b %Y %H:%M:%S GMT")
    except:
        pass
    try:
        product['updated'] = parser.parse(
            data_english['Date of Last Refreshed on']).strftime(
                "%a, %d %b %Y %H:%M:%S GMT")
    except:
        pass
    product['asset']['tech'] = dictionary_to_markdown(data_english, [
        'Study design', 'Inclusion criteria', 'Exclusion criteria',
        'Study execute time', 'Interventions',
        'Countries of recruitment and research settings', 'Outcomes',
        'Collecting sample(s) from participants', 'Participant age', 'Gender',
        'Randomization Procedure (please state who generates the random number sequence and by what method)',
        'Blinding', 'The time of sharing IPD',
        'The way of sharing IPD”(include metadata and protocol, If use web-based public database, please provide the url)',
        'Data collection and Management (A standard data collection and management system include a CRF and an electronic data capture',
        'Data Managemen Committee'
    ])
    product['asset']['tech'] += dictionary_to_markdown(data_chinese, [
        '研究设计', '纳入标准', '排除标准', '研究实施时间', '干预措施', '研究实施地点', '测量指标', '采集人体标本',
        '年龄范围', '性别', '随机方法(请说明由何人用什么方法产生随机序列)', '盲法', '原始数据公开时间',
        '共享原始数据的方式(说明:请填入公开原始数据日期和方式,如采用网络平台,需填该网络平台名称和网址)',
        '数据采集和管理(说明:数据采集和管理由两部分组成,一为病例记录表(Case Record Form, CRF),二为电子采集和管理系统(Electronic Data Capture, EDC),如ResMan即为一种基于互联网的EDC',
        '数据管理委员会'
    ])

    applicant = create_user()
    applicant['name'] = data_chinese[u'申请注册联系人']
    applicant['abs'] = 'Applicant'
    applicant['contact']['phone'] = data_chinese[u'申请注册联系人电话']
    applicant['contact']['email'] = data_chinese[u'申请注册联系人电子邮件']
    applicant['contact']['website'] = data_chinese[u'申请单位网址(自愿提供)']
    applicant['addr'] = parse_address(data_english[u'Applicant address'])
    applicant['addr']['zip'] = data_chinese[u'申请注册联系人邮政编码']
    applicant['exp']['exp']['company'] = data_chinese[u'申请人所在单位']
    principal_investigator = create_user()
    principal_investigator['name'] = data_chinese[u'研究负责人']
    principal_investigator['abs'] = 'Principal Investigator'
    principal_investigator['contact']['phone'] = data_chinese[u'研究负责人电话']
    principal_investigator['contact']['email'] = data_chinese[u'研究负责人电子邮件']
    principal_investigator['contact']['website'] = data_chinese[
        u'研究负责人网址(自愿提供)']
    principal_investigator['addr'] = parse_address(
        data_english[u"Study leader's address"])
    principal_investigator['addr']['zip'] = data_chinese[u'研究负责人邮政编码']

    product['addr'] = copy.deepcopy(applicant['addr'])
    return {
        'product': product,
        'applicant': applicant,
        'principal_investigator': principal_investigator
    }
Exemplo n.º 13
0
    def parse(self, response):
        name = response.url.split('/')[-1]
        with open(os.path.join(self.work_directory, name + '.html'),
                  'wb') as fo:
            fo.write(response.body)
        data = self.data[response.url]
        p = create_product()
        p['name'] = data['Medicine name']
        p['abs'] = data[
            'International non-proprietary name (INN) / common name']
        p['tag'] = [data['Category'] + ' Medicine', 'EU', 'Drug']
        if data['Therapeutic area'] is not None:
            p['tag'].extend(data['Therapeutic area'].split(", "))
        p['contact']['website'] = response.url
        p['ref'] = response.url
        if data['Patient safety'] == 'no':
            p['tag'].append('Patient Risk')
        if data['Additional monitoring'] == 'yes':
            p['tag'].append('Additional monitoring')
        if data['Generic'] == 'yes':
            p['tag'].append('Generic')
        if data['Biosimilar'] == 'yes':
            p['tag'].append('Biosimilar')
        if data['Biosimilar'] == 'yes':
            p['tag'].append('Biosimilar')
        if data['Conditional approval'] == 'yes':
            p['tag'].append('Conditional approval')
        if data['Exceptional circumstances'] == 'yes':
            p['tag'].append('Exceptional circumstances')
        if data['Accelerated assessment'] == 'yes':
            p['tag'].append('Accelerated assessment')
        if data['Accelerated assessment'] == 'yes':
            p['tag'].append('Accelerated assessment')
        if data['Orphan medicine'] == 'yes':
            p['tag'].append('Orphan medicine')
        if data['Species'] is not None and len(data['Species']) > 0:
            p['tag'].append('Species')
        p['updated'] = data['Decision date']
        p['created'] = data['First published']
        p['asset']['lic'] = p['tag']
        p['asset']['stat'] = self.status_code(data['Authorisation status'])
        p['asset']['type'] = 1
        p['intro'] = '\n'.join(
            response.xpath(
                "//div[contains(@class, 'field-name-field-ema-web-summary')]/div/div/div/p/text()"
            ).getall())
        p['asset']['market'] = dictionary_to_markdown(
            self.extract_market(response))
        p['asset']['tech'] = dictionary_to_markdown({
            key: data[key]
            for key in ('Active substance', 'Revision number',
                        'Revision number', 'ATCvet code', 'First published',
                        'Revision date', 'Decision date', 'Date of opinion',
                        'Marketing authorisation date',
                        'Date of refusal of marketing authorisation',
                        'ATC code', 'Product number',
                        'Human pharmacotherapeutic group',
                        'Vet pharmacotherapeutic group',
                        'Condition / indication')
        })

        a = create_company()
        a['name'] = data['Marketing authorisation holder/company name']
        a['abs'] = 'A Medicine Company'
        a['addr'] = self.extract_address(response)
        a['entr']['bp'] = response.xpath(
            "//div[contains(@class, 'field-name-ema-medicine-all-author-pres-docs')]/div/div/div/section/div/div/div/div/ul/li/a/@href"
        ).get()
        p['addr'] = a['addr']
        with open(os.path.join(self.work_directory, name + '.json'),
                  'w') as fo:
            json.dump({'product': p, 'applicant': a}, fo)