Пример #1
0
  def cleaner(k, v):
    ''' Callback function passed into transform_dict. Takes a key/value tuple
        and either passes them through, does a transformation either or drops
        both (by returning None).

        In this case: renaming all fields, returning None on empty keys to
          avoid blowing up downstream transforms, formatting dates and creating
          _exact fields.
    '''
    if k.lower() in RENAME_MAP:
      k = RENAME_MAP[k.lower()]

    if v is None:
      return (k, None)

    if isinstance(v, str):
      v = convert_unicode(v).strip()

    if k in DATES:
      if v:
        try:
          v = datetime.datetime.strptime(v, "%m/%d/%Y").strftime("%Y%m%d")
        except ValueError:
          logging.warning('Unparseable date: ' + v)
      else:
        return None

    if k in EXACT:
      nk = k + '_exact'
      return [(k, v), (nk, v)]

    return (k, v)
Пример #2
0
   def _cleaner(k, v):
       ''' Helper function to rename keys and purge any keys that are not in
     the map.
 '''
       v = common.convert_unicode(v.strip()) if isinstance(v, str) else v
       if k in self.rename_map and v is not None and v != '':
           return (self.rename_map[k], v)
Пример #3
0
    def map(self, key, value, output):
        def _cleaner(k, v):
            ''' Helper function to rename keys and purge any keys that are not in
          the map.
      '''
            v = v.strip() if isinstance(v, str) else v
            if k in self.rename_map and v is not None and v != '':
                new_key = self.rename_map[k]
                if not (new_key == 'title' and v == '0'):
                    return (new_key, v)

        json = common.transform_dict(value, _cleaner)

        json['type'] = self.doc_lookup[json['type_id']]
        del json['type_id']

        # Convert date to format used throughout openFDA (yyyymmdd)
        json['date'] = arrow.get(json['date']).strftime("%Y%m%d")
        json['url'] = common.convert_unicode(json['url'])

        # Assign application number as the key, since all three drugs@FDA files can be joined by this key.
        key = build_submissions_key(json['application_number'], json)
        del json['application_number'], json['submission_number'], json[
            'submission_type']

        output.add(key, json)
Пример #4
0
    def fix_utf8_issues(self, filename):
        file = open(filename, 'r', encoding='utf-8', errors='ignore')
        data = file.read()
        file.close()

        file = open(filename, 'w', encoding='utf-8')
        file.write(common.convert_unicode(data))
        file.flush()
        file.close()
Пример #5
0
    def fix_utf8_issues(self, filename):
        file = open(filename, 'r')
        data = file.read()
        file.close()

        file = open(filename, 'w')
        file.write(common.convert_unicode(data))
        file.flush()
        file.close()