Exemplo n.º 1
0
    def get_publications(self, response: Response) -> list:
        """
        Obtain the patents.

        :param response: response
        :return list of patents
        """
        patents = []
        for row in response.xpath(
                "//div[contains(@class, 'field-collection-item-field-publications')]/div[@class='content']"
        ):
            title = row.xpath(
                "string(div[contains(@class, 'field-name-field-link')])").get(
                )
            other = row.xpath(
                "string(div[contains(@class, 'field-name-field-date-and-other-info')])"
            ).get()
            link = row.xpath(
                "div[contains(@class, 'field-name-field-link')]//a/@href").get(
                )
            vendor = row.xpath(
                "string(div[contains(@class, 'field-name-field-publication')])"
            ).get()
            patent = create_product()
            patent['asset']['type'] = 8
            patent['ref'] = link
            patent['contact']['website'] = link
            patent['name'] = title
            patent['abs'] = other
            patent['tag'] = remove_empty_string_from_array([vendor])
        return patents
Exemplo n.º 2
0
    def parse(self, response):
        self.log('Parse technology {}'.format(response.url),
                 level=logging.INFO)
        name = response.url.split('/')[-1]
        with open(os.path.join(self.work_directory, name + '.html'),
                  'wb') as fo:
            fo.write(response.body)
        product = create_product()
        product['ref'] = response.url
        product['tag'] = remove_empty_string_from_array(
            self.add_keywords(response))
        product['asset']['type'] = 3
        product['addr'] = deepcopy(self.address)
        product['name'] = response.xpath("string(//h1)").get()
        meta = self.get_meta(response)
        contents = meta['abstract'].split('\n')
        if len(contents) > 0 and len(contents[0]) > 0:
            product['abs'] = contents[0]
        else:
            product['abs'] = name
        product['intro'] = '\n'.join(contents[1:])
        del meta['abstract']
        product['asset']['market'] = dictionary_to_markdown(meta)

        manager, product['contact'] = self.get_contact(response)
        product['contact']['website'] = response.url
        inventors = self.add_inventors(response)
        for index, user in enumerate(inventors):
            user['abs'] = 'Inventor of ' + product['name']
            user['addr'] = product['addr']
            user['tag'] = product['tag']

        with open(os.path.join(self.work_directory, name + '.json'),
                  'w') as fo:
            json.dump({'product': product, 'inventors': inventors}, fo)
Exemplo n.º 3
0
    def get_patents(self, response: Response) -> list:
        """
        Obtain the patents.

        :param response: response
        :return list of patents
        """
        patents = []
        for row in response.xpath(
                "//div[contains(@class, 'field-collection-item-field-ip-info')]/div[@class='content']"
        ):
            title = row.xpath(
                "string(div[contains(@class, 'field-name-field-ip-title')])"
            ).get()
            tag = row.xpath(
                "string(div[contains(@class, 'field-name-field-ip-type')])"
            ).get()
            link = row.xpath(
                "div[contains(@class, 'field-name-field-ip-number-pctwo') or contains(@class, 'field-name-field-ip-number-pat-pend')]//a/@href"
            ).get()
            patent = create_product()
            patent['asset']['type'] = 1
            patent['ref'] = link if link is not None else ''
            patent['contact']['website'] = link if link is not None else ''
            patent['name'] = title
            patent['tag'] = remove_empty_string_from_array([tag])
        return patents
Exemplo n.º 4
0
    def get_contact(self, response: Response) -> (dict, dict):
        """
        Gets the contact information.

        :param response: the response object
        :return: a tuple of two dict, one for an user and the other for the contact information
        """
        contact = {
            'email': '',
            'phone': '',
            'website': response.url,
            'meet': ''
        }

        # manager
        name = response.xpath("//dd[@class='manager']/a/text()").get()
        link = response.xpath("//dd[@class='manager']/a/@href").get()
        manager = create_user()
        manager['name'] = name
        manager['ref'] = link
        tag = response.xpath("//dd[@class='manager']/div/em[1]/text()").get()
        if tag is not None and isinstance(tag, str):
            manager['tag'] = remove_empty_string_from_array(tag.split(', '))
        contact['phone'] = response.xpath(
            "//dd[@class='manager']/div/em[2]/text()").get()
        manager['contact'] = contact
        manager['contact']['website'] = link
        self.log('find manager {} with contact {}'.format(manager, contact),
                 level=logging.DEBUG)
        return manager, contact
Exemplo n.º 5
0
def parse_device(cells):
    review_category = {
        '1':
        'Ophthalmology and otorhinolaryngology',
        '2':
        'dentistry',
        '3':
        'cerebral, cardiovascular, respiratory, psychiatric, and neurological field',
        '3-1':
        'Intervention devices mainly in cerebral, cardiovascular, respiratory, psychiatric, and neurological field',
        '3-2':
        'Non-intervention devices mainly in cerebral, cardiovascular, respiratory, psychiatric, and neurological field',
        '4':
        'cerebral, cardiovascular, respiratory, psychiatric, and neurological field',
        '5':
        'gastrointestinal and urinary systems, obstetrics and gynecology',
        '6':
        'orthopedic/plastic surgery and dermatology',
        '7':
        'laboratory tests, in vitro diagnostics',
        '8':
        'multicategory medical devices, advanced electronic medical devices, and other uncategorized medical devices',
    }
    p = create_product()
    p['name'] = cells[4]
    if isinstance(cells[3], datetime.datetime):
        p['created'] = cells[3].strftime("%a, %d %b %Y %H:%M:%S GMT")
    else:
        try:
            p['created'] = parser.parse(
                cells[3]).strftime("%a, %d %b %Y %H:%M:%S GMT")
        except:
            pass
    if isinstance(cells[1], datetime.datetime):
        p['updated'] = cells[1].strftime("%a, %d %b %Y %H:%M:%S GMT")
    else:
        try:
            p['updated'] = parser.parse(
                cells[1]).strftime("%a, %d %b %Y %H:%M:%S GMT")
        except:
            pass
    p['tag'] = remove_empty_string_from_array(
        [cells[5], cells[6], 'Japan PMDA', 'Medical Device'])
    p['asset']['lic'] = p['tag']
    p['asset']['stat'] = 2
    p['abs'] = review_category.get(cells[10], cells[10])
    if len(p['abs']) < 1:
        p['abs'] = p['name']
    p['asset']['market'] = cells[9]
    p['addr']['country'] = 'Japan'
    p['addr']['city'] = 'Unknown'

    a = create_company()
    a['name'] = cells[0]
    a['abs'] = 'A Medical Device Company'
    a['addr'] = p['addr']
    a['tag'] = p['tag']
    return p, a
Exemplo n.º 6
0
    def add_tags(self, response: Response) -> list:
        """
        Add keywords to the project.

        :param response: Response object
        :return a list of inventors
        """
        return remove_empty_string_from_array(
            response.xpath("//div[@id='categoryLinks']/div/a/text()").getall())
Exemplo n.º 7
0
    def add_tags(self, response: Response) -> list:
        """
        Add keywords to the project.

        :param response: Response object
        :return a list of inventors
        """
        return remove_empty_string_from_array(
            response.xpath(
                "//span[contains(@class, 'label')]/a/text()").getall())
Exemplo n.º 8
0
    def add_tags(self, response: Response) -> list:
        """
        Add keywords to the project.

        :param response: Response object
        :return a list of inventors
        """
        return remove_empty_string_from_array(
            response.xpath(
                '//*[@id="formTechPub1"]/div/table/tr/td[4]/div[1]/table/tr/td/a/text()'
            ).getall())
Exemplo n.º 9
0
    def add_tags(self, response: Response) -> list:
        """
        Add keywords to the project.

        :param response: Response object
        :return a list of inventors
        """
        return remove_empty_string_from_array([
            remove_head_tail_white_space(t) for t in response.xpath(
                "//ul[@class='tech-category-list']//li/a/text()").getall()
        ])
Exemplo n.º 10
0
    def add_tags(self, response: Response) -> list:
        """
        Add keywords to the project.

        :param response: Response object
        :return a list of inventors
        """
        return remove_empty_string_from_array([
            remove_head_tail_white_space(t) for t in response.xpath(
                "//div[@class='bdp']/p[contains(text(), 'Categories')]/text()"
            ).get().split(':')[-1].split('|')
        ])
Exemplo n.º 11
0
 def patent_callback(data: List[str], result: dict) -> None:
     if len(data) != len(self.PATENT_HEADER):
         self.logger.error('fail to parse {}'.format(data))
         return
     d = {k: v for k, v in zip(self.PATENT_HEADER, data)}
     patent = create_product()
     patent['tag'].append(d['type'])
     patent['ref'] = d['number']
     patent['addr']['country'] = d['country']
     patent['updated'] = format_datetime(d['date'])
     patent['abs'] = d['abstract']
     patent['name'] = d['title']
     patent['tag'].append(d['kind'])
     patent['tag'] = remove_empty_string_from_array(patent['tag'])
     result[patent['ref']] = patent
Exemplo n.º 12
0
def parse_drug(cells):
    review_category = {
        '1':
        'Gastrointestinal drugs, dermatologic drugs, immunosuppressive drugs, and others (not classified as other categories)',
        '2':
        "Cardiovascular drugs, antiparkinsonian drugs, anti-Alzheimer's drugs",
        '3-1':
        'Central/peripheral nervous system drugs (excluding anesthetic drugs)',
        '3-2':
        'Anesthetic drugs, sensory organ drugs (excluding drugs for inflammatory diseases), narcotics',
        '4':
        'Antibacterial drugs, antiviral drugs (excluding AIDS drugs), antifungal drugs, antiprotozoal drugs, anthelmintic drugs',
        '5':
        'Reproductive system drugs, drugs for urogenital system, combination drugs',
        '6-1':
        'Respiratory tract drugs, anti-allergy drugs (excluding dermatologic drugs), sensory organ drugs (drugs for inflammatory diseases)',
        '6-2':
        'Hormone drugs, drugs for metabolic disorders (including diabetes mellitus, osteoporosis, gout, and inborn errors of metabolism)',
    }
    p = create_product()
    p['name'] = cells[3]
    if isinstance(cells[2], datetime):
        p['created'] = cells[2]
    else:
        try:
            p['created'] = parser.parse(cells[2])
        except:
            pass
    category = review_category.get(cells[6], cells[6])
    p['tag'] = remove_empty_string_from_array([category, 'Japan PMDA', 'Drug'])
    p['asset']['lic'] = p['tag']
    p['asset']['stat'] = 2
    p['asset']['tech'] = cells[0]
    p['abs'] = cells[5]
    if len(p['abs']) < 1:
        p['abs'] = p['name']
    p['addr']['country'] = 'Japan'
    p['addr']['city'] = 'Unknown'

    a = create_company()
    a['name'] = cells[0]
    a['abs'] = 'A Drug Company'
    a['addr'] = p['addr']
    a['tag'] = p['tag']
    return p, a
Exemplo n.º 13
0
def main():
    product_code = get_product_code()
    log = create_logger('510K')
    result = json.load(open(os.path.expanduser('~/work/fda/device-classification-0001-of-0001.json'), 'r'))
    log.critical(datetime.datetime.now())
    for r in result['results']:
        p = create_product()
        p['name'] = r.get('device_name', r['openfda'].get('device_name', ''))
        p['ref'] = r.get('k_number', r['openfda'].get('k_number', ''))
        p['addr']['line1'] = r.get('address_1', r['openfda'].get('address_1', ''))
        p['addr']['line2'] = r.get('address_2', r['openfda'].get('address_2', ''))
        p['addr']['city'] = r.get('city', r['openfda'].get('city', ''))
        p['addr']['state'] = r.get('state', r['openfda'].get('state', ''))
        p['addr']['zip'] = r.get('zip_code', r['openfda'].get('zip_code', ''))
        p['addr']['country'] = r.get('country_code', r['openfda'].get('country_code', ''))
        p['intro'] = r.get('statement_or_summary', r['openfda'].get('statement_or_summary', ''))
        p['asset']['type'] = 0
        p['tag'] = [
            r.get('advisory_committee_description', r['openfda'].get('advisory_committee_description', '')),
            r.get('medical_specialty_description', r['openfda'].get('medical_specialty_description', '')),
            'FDA',
            'Medical Device',
            '510K']
        # p['tag'] is used for tags readable to common users, p['lic'] is used for tags specified for product.
        p['asset']['lic'] = [
            'FDA',
            '510K',
            r.get('clearance_type', r['openfda'].get('clearance_type', '')),
            r.get('advisory_committee_description', r['openfda'].get('advisory_committee_description', '')),
            r['openfda'].get('medical_specialty_description', ''),
            r.get('product_code', r['openfda'].get('product_code', '')),
            r.get('regulation_number', r['openfda'].get('regulation_number', '')),
            r.get('decision_description', r['openfda'].get('decision_description', '')), ]
        p['asset']['lic'].extend(third_party(r.get('third_party_flag', r['openfda'].get('third_party_flag', ''))))
        if len(r.get('expedited_review_flag', r['openfda'].get('expedited_review_flag', ''))) > 0:
            p['asset']['lic'].append('Expedited Review')
        if r.get('submission_type_id', r['openfda'].get('submission_type_id', '')) not in {'1', '2'} and \
                submission_type(r.get('submission_type_id', r['openfda'].get('submission_type_id', ''))) is not None:
            p['asset']['lic'].append(
                submission_type(r.get('submission_type_id', r['openfda'].get('submission_type_id', ''))))
            p['tag'].append(submission_type(r.get('submission_type_id', r['openfda'].get('submission_type_id', ''))))
        code = product_code.get(r.get('product_code', r['openfda'].get('product_code', '')), None)
        if code is not None:
            p['abs'] = code['device_name']
            p['asset']['lic'].extend([
                'Class ' + code['device_class'],
                'GMP Exempt' if code['gmp_exempt_flag'] == 'N' else 'GMP Required',
            ])
            p['tag'].append('Class ' + code['device_class'])
            if code['implant_flag'] != 'N':
                p['asset']['lic'].append('Implant')
                p['tag'].append('Implant')
            if code['life_sustain_support_flag'] != 'N':
                p['asset']['lic'].append('Life Sustain Support')
                p['tag'].append('Life Sustain Support')
        else:
            p['abs'] = p['name']
        p['asset']['stat'] = map_status(r.get('decision_code', r['openfda'].get('decision_code', '')))
        try:
            p['created'] = parser.parse(r.get('date_received', r['openfda'].get('date_received', None))).strftime(
                "%a, %d %b %Y %H:%M:%S GMT")
        except:
            pass
        try:
            p['updated'] = parser.parse(r.get('decision_date', r['openfda'].get('decision_date', None))).strftime(
                "%a, %d %b %Y %H:%M:%S GMT")
        except:
            pass
        p['asset']['lic'] = remove_empty_string_from_array(p['asset']['lic'])
        p['tag'] = remove_empty_string_from_array(p['tag'])
        a = create_company()
        a['name'] = r.get('applicant', r['openfda'].get('applicant', ''))
        a['abs'] = 'A Medical Device Company'
        a['addr'] = p['addr']
        a['tag'] = p['tag']
        a['group']['parentId'] = '000000000000000000000000'
        # contact is just the name of contact

        response = add_record('entity', [p, a])
        if response['_status'] != 'OK':
            log.error('fail to create record for {}'.format(p['name']))
            continue
        applicant_product = create_relationship(response['_items'][1]['_id'], response['_items'][0]['_id'])
        applicant_product['type'] = 7
        applicant_product['name'] = 'Applicant'
        applicant_product['abs'] = 'Applicant'
        response = add_record('relationship', [applicant_product])
        if response['_status'] != 'OK':
            log.error('fail to create relationship for {}'.format(p['name']))
        else:
            log.debug('added {} to the system'.format(p['name']))
    log.critical(datetime.datetime.now())