Пример #1
0
def load_all_models_in_directory(dir_name,
    limit_extensions=True,
    recursive=False):
  """
  Load all models in the specified directory, returning a list of file names
  and iotbx.file_reader objects.
  """
  from iotbx.file_reader import any_file, guess_file_type
  assert os.path.isdir(dir_name)
  file_names_and_objects = []
  for file_name in os.listdir(dir_name):
    full_path = os.path.join(dir_name, file_name)
    if os.path.isdir(full_path) and recursive :
      file_names_and_objects.extend(
        load_all_models_in_directory(dir_name=full_path,
          limit_extensions=limit_extensions,
          recursive=True))
    elif os.path.isfile(full_path):
      if (limit_extensions) and (guess_file_type(full_path) != "pdb"):
        continue
      input_file = any_file(full_path,
        raise_sorry_if_not_expected_format=True)
      if (input_file.file_type == "pdb"):
        file_names_and_objects.append((full_path, input_file.file_object))
  return file_names_and_objects
Пример #2
0
def load_all_models_in_directory (dir_name,
    limit_extensions=True,
    recursive=False) :
  """
  Load all models in the specified directory, returning a list of file names
  and iotbx.file_reader objects.
  """
  from iotbx.file_reader import any_file, guess_file_type
  assert os.path.isdir(dir_name)
  file_names_and_objects = []
  for file_name in os.listdir(dir_name) :
    full_path = os.path.join(dir_name, file_name)
    if os.path.isdir(full_path) and recursive :
      file_names_and_objects.extend(
        load_all_models_in_directory(dir_name=full_path,
          limit_extensions=limit_extensions,
          recursive=True))
    elif os.path.isfile(full_path) :
      if (limit_extensions) and (guess_file_type(full_path) != "pdb") :
        continue
      input_file = any_file(full_path,
        raise_sorry_if_not_expected_format=True)
      if (input_file.file_type == "pdb") :
        file_names_and_objects.append((full_path, input_file.file_object))
  return file_names_and_objects
Пример #3
0
def fetch (id, data_type="pdb", format="pdb", mirror="rcsb", log=None,
    force_download=False,
    local_cache=None) :
  """
  Locate and open a data file for the specified PDB ID and format, either in a
  local mirror or online.

  :param id: 4-character PDB ID (e.g. '1hbb')
  :param data_type: type of content to download: pdb, xray, or fasta
  :param format: format of data: cif, pdb, or xml
  :param mirror: remote site to use, either rcsb or pdbe

  :returns: a filehandle-like object (with read() method)
  """
  assert data_type in ["pdb", "xray", "fasta", "seq"]
  assert format in ["cif", "pdb", "xml"]
  assert mirror in ["rcsb", "pdbe", "pdbj"]
  validate_pdb_id(id)
  if (log is None) : log = null_out()

  id = id.lower()
  if (not force_download) :
    if (local_cache is not None) and (data_type == "pdb") :
      from iotbx.file_reader import guess_file_type
      if (local_cache is Auto) :
        local_cache = os.getcwd()
      cache_files = os.listdir(local_cache)
      for file_name in cache_files :
        if (len(file_name) > 4) :
          file_id = re.sub("^pdb", "", file_name)[0:4]
          if (file_id.lower() == id) :
            if (guess_file_type(file_name) == "pdb") :
              file_name = os.path.join(local_cache, file_name)
              print >> log, "Reading from cache directory:"
              print >> log, "  " + file_name
              f = smart_open.for_reading(file_name)
              return f
    # try local mirror for PDB and X-ray data files first, if it exists
    if (data_type == "pdb") and ("PDB_MIRROR_PDB" in os.environ) :
      subdir = os.path.join(os.environ["PDB_MIRROR_PDB"], id[1:3])
      if (os.path.isdir(subdir)) :
        file_name = os.path.join(subdir, "pdb%s.ent.gz" % id)
        if (os.path.isfile(file_name)) :
          print >> log, "Reading from local mirror:"
          print >> log, "  " + file_name
          f = smart_open.for_reading(file_name)
          return f
    if ((data_type == "xray") and
        ("PDB_MIRROR_STRUCTURE_FACTORS" in os.environ)) :
      sf_dir = os.environ["PDB_MIRROR_STRUCTURE_FACTORS"]
      subdir = os.path.join(sf_dir, id[1:3])
      if (os.path.isdir(subdir)) :
        file_name = os.path.join(subdir, "r%ssf.ent.gz" % id)
        if (os.path.isfile(file_name)) :
          print >> log, "Reading from local mirror:"
          print >> log, "  " + file_name
          f = smart_open.for_reading(file_name)
          return f
  # No mirror found (or out of date), default to HTTP download
  url = None
  compressed = False
  if (mirror == "rcsb") :
    url_base = "http://www.rcsb.org/pdb/files/"
    pdb_ext = ".pdb"
    sf_prefix = ""
    sf_ext = "-sf.cif"
  elif (mirror == "pdbe") :
    url_base = "http://www.ebi.ac.uk/pdbe-srv/view/files/"
    pdb_ext = ".ent"
    sf_prefix = "r"
    sf_ext = "sf.ent"
  elif (mirror == "pdbj") :
    url_base = "ftp://ftp.pdbj.org/pub/pdb/data/structures/divided/"
    if (data_type == "pdb") :
      compressed = True
      if (format == "pdb") :
        url = url_base + "pdb/%s/pdb%s.ent.gz" % (id[1:3], id)
      elif (format == "cif") :
        url = url_base + "mmCIF/%s/%s.cif.gz" % (id[1:3], id)
    elif (data_type == "xray") :
      compressed = True
      url = url_base + "structure_factors/%s/r%ssf.ent.gz" % (id[1:3], id)
    elif (data_type in ["fasta", "seq"]) :
      url = "http://pdbj.org/app//downloadFasta4PDBID?pdbid=%s" % id
    if (url is None) and (data_type != "fasta") :
      raise Sorry("Can't determine PDBj download URL for this data/format "+
        "combination.")
  if (data_type in ["fasta", "seq"]) :
    # XXX the RCSB doesn't appear to have a simple URL for FASTA files
    if (url is None) : # TODO PDBe equivalent doesn't exist?
      url = "http://www.rcsb.org/pdb/download/downloadFile.do?fileFormat=FASTA&compression=NO&structureId=%s" % id
    try :
      data = libtbx.utils.urlopen(url)
    except urllib2.HTTPError, e :
      if e.getcode() == 404 :
        raise RuntimeError("Couldn't download sequence for %s." % id)
      else :
        raise
Пример #4
0
def fetch(id,
          data_type="pdb",
          format="pdb",
          mirror="rcsb",
          log=None,
          force_download=False,
          local_cache=None):
    """
  Locate and open a data file for the specified PDB ID and format, either in a
  local mirror or online.

  :param id: 4-character PDB ID (e.g. '1hbb')
  :param data_type: type of content to download: pdb, xray, or fasta
  :param format: format of data: cif, pdb, or xml (or cif_or_pdb)
  :param mirror: remote site to use, either rcsb, pdbe, pdbj or pdb-redo

  :returns: a filehandle-like object (with read() method)
  """
    assert data_type in ["pdb", "xray", "fasta", "seq"]
    assert format in ["cif", "pdb", "xml", "cif_or_pdb"]
    assert mirror in ["rcsb", "pdbe", "pdbj", "pdb-redo"]
    validate_pdb_id(id)
    if (log is None): log = null_out()

    id = id.lower()
    if (not force_download):
        if (local_cache is not None) and (data_type == "pdb"):
            from iotbx.file_reader import guess_file_type
            if (local_cache is Auto):
                local_cache = os.getcwd()
            cache_files = os.listdir(local_cache)
            for file_name in cache_files:
                if (len(file_name) > 4):
                    file_id = re.sub("^pdb", "", file_name)[0:4]
                    if (file_id.lower() == id):
                        if (guess_file_type(file_name) == "pdb"):
                            file_name = os.path.join(local_cache, file_name)
                            print("Reading from cache directory:", file=log)
                            print("  " + file_name, file=log)
                            f = smart_open.for_reading(file_name)
                            return f
        # try local mirror for PDB and X-ray data files first, if it exists
        if (data_type == "pdb") and (format in ["pdb", "cif_or_pdb"]) and \
               ("PDB_MIRROR_PDB" in os.environ):
            subdir = os.path.join(os.environ["PDB_MIRROR_PDB"], id[1:3])
            if (os.path.isdir(subdir)):
                file_name = os.path.join(subdir, "pdb%s.ent.gz" % id)
                if (os.path.isfile(file_name)):
                    print("Reading from local mirror:", file=log)
                    print("  " + file_name, file=log)
                    f = smart_open.for_reading(file_name)
                    return f
        if (data_type == "pdb") and (format in ["cif", "cif_or_pdb"]) and \
               ("PDB_MIRROR_MMCIF" in os.environ):
            subdir = os.path.join(os.environ["PDB_MIRROR_MMCIF"], id[1:3])
            if (os.path.isdir(subdir)):
                file_name = os.path.join(subdir, "%s.cif.gz" % id)
                if (os.path.isfile(file_name)):
                    print("Reading from local mirror:", file=log)
                    print("  " + file_name, file=log)
                    f = smart_open.for_reading(file_name)
                    return f
        if ((data_type == "xray")
                and ("PDB_MIRROR_STRUCTURE_FACTORS" in os.environ)):
            sf_dir = os.environ["PDB_MIRROR_STRUCTURE_FACTORS"]
            subdir = os.path.join(sf_dir, id[1:3])
            if (os.path.isdir(subdir)):
                file_name = os.path.join(subdir, "r%ssf.ent.gz" % id)
                if (os.path.isfile(file_name)):
                    print("Reading from local mirror:", file=log)
                    print("  " + file_name, file=log)
                    f = smart_open.for_reading(file_name)
                    return f
    # No mirror found (or out of date), default to HTTP download
    url = None
    compressed = False
    if (mirror == "rcsb"):
        url_base = 'https://files.rcsb.org/download/'
        pdb_ext = ".pdb"
        sf_prefix = ""
        sf_ext = "-sf.cif"
    elif (mirror == "pdbe"):
        url_base = "https://www.ebi.ac.uk/pdbe-srv/view/files/"
        pdb_ext = ".ent"
        sf_prefix = "r"
        sf_ext = "sf.ent"
    elif (mirror == "pdbj"):
        url_base = "ftp://ftp.pdbj.org/pub/pdb/data/structures/divided/"
        if (data_type == "pdb"):
            compressed = True
            if (format == "pdb"):
                url = url_base + "pdb/%s/pdb%s.ent.gz" % (id[1:3], id)
            elif (format in ["cif", "cif_or_pdb"]):
                url = url_base + "mmCIF/%s/%s.cif.gz" % (id[1:3], id)
        elif (data_type == "xray"):
            compressed = True
            url = url_base + "structure_factors/%s/r%ssf.ent.gz" % (id[1:3],
                                                                    id)
        elif (data_type in ["fasta", "seq"]):
            url = "https://pdbj.org/rest/downloadPDBfile?format=fasta&id=%s" % id
        if (url is None) and (data_type != "fasta"):
            raise Sorry(
                "Can't determine PDBj download URL for this data/format " +
                "combination.")
    elif mirror == "pdb-redo":
        url_base = "https://pdb-redo.eu/db/"
        pdb_ext = "_final.pdb"
        cif_ext = "_final.cif"
        sf_prefix = ""
        sf_ext = "_final.mtz"
        if (data_type == 'pdb'):
            if (format == 'pdb'):
                url = url_base + "{id}/{id}{format}".format(id=id,
                                                            format=pdb_ext)
            elif (format in ['cif', 'cif_or_pdb']):
                url = url_base + "{id}/{id}{format}".format(id=id,
                                                            format=cif_ext)
        elif (data_type == 'xray'):
            url = url_base + "{id}/{id}{format}".format(id=id, format=sf_ext)
    if (data_type in ["fasta", "seq"]):
        if (url is None):  # TODO PDBe equivalent doesn't exist?
            # Seems that this url should be working:
            url = "https://www.rcsb.org/fasta/entry/%s" % id
        try:
            data = libtbx.utils.urlopen(url)
        except HTTPError as e:
            if e.getcode() == 404:
                raise RuntimeError("Couldn't download sequence for %s." % id)
            else:
                raise
    elif data_type == "xray":
        if (url is None):
            url = url_base + sf_prefix + id + sf_ext
        try:
            data = libtbx.utils.urlopen(url)
        except HTTPError as e:
            if e.getcode() == 404:
                raise RuntimeError(
                    "Couldn't download structure factors for %s." % id)
            else:
                raise
    else:
        if (url is None):
            if format == "pdb":
                url = url_base + id + pdb_ext
            elif format == "cif_or_pdb":
                url = url_base + id + "." + "cif"
            else:
                url = url_base + id + "." + format
        try:
            data = libtbx.utils.urlopen(url)
        except HTTPError as e:
            if e.getcode() == 404:
                raise RuntimeError("Couldn't download model for %s." % id)
            else:
                raise
    if (compressed):
        try:
            import gzip
        except ImportError:
            raise Sorry(
                "gzip module not available - please use an uncompressed " +
                "source of PDB data.")
        else:
            # XXX due to a bug in urllib2, we can't pass the supposedly file-like
            # object directly, so we read the data into a StringIO object instead
            return gzip.GzipFile(fileobj=StringIO(data.read()))
    return data
Пример #5
0
def fetch(id,
          data_type="pdb",
          format="pdb",
          mirror="rcsb",
          log=None,
          force_download=False,
          local_cache=None):
    """
  Locate and open a data file for the specified PDB ID and format, either in a
  local mirror or online.

  :param id: 4-character PDB ID (e.g. '1hbb')
  :param data_type: type of content to download: pdb, xray, or fasta
  :param format: format of data: cif, pdb, or xml
  :param mirror: remote site to use, either rcsb, pdbe, pdbj or pdb-redo

  :returns: a filehandle-like object (with read() method)
  """
    assert data_type in ["pdb", "xray", "fasta", "seq"]
    assert format in ["cif", "pdb", "xml"]
    assert mirror in ["rcsb", "pdbe", "pdbj", "pdb-redo"]
    validate_pdb_id(id)
    if (log is None): log = null_out()

    id = id.lower()
    if (not force_download):
        if (local_cache is not None) and (data_type == "pdb"):
            from iotbx.file_reader import guess_file_type
            if (local_cache is Auto):
                local_cache = os.getcwd()
            cache_files = os.listdir(local_cache)
            for file_name in cache_files:
                if (len(file_name) > 4):
                    file_id = re.sub("^pdb", "", file_name)[0:4]
                    if (file_id.lower() == id):
                        if (guess_file_type(file_name) == "pdb"):
                            file_name = os.path.join(local_cache, file_name)
                            print >> log, "Reading from cache directory:"
                            print >> log, "  " + file_name
                            f = smart_open.for_reading(file_name)
                            return f
        # try local mirror for PDB and X-ray data files first, if it exists
        if (data_type == "pdb") and (format == "pdb") and \
               ("PDB_MIRROR_PDB" in os.environ) :
            subdir = os.path.join(os.environ["PDB_MIRROR_PDB"], id[1:3])
            if (os.path.isdir(subdir)):
                file_name = os.path.join(subdir, "pdb%s.ent.gz" % id)
                if (os.path.isfile(file_name)):
                    print >> log, "Reading from local mirror:"
                    print >> log, "  " + file_name
                    f = smart_open.for_reading(file_name)
                    return f
        if (data_type == "pdb") and (format == "cif") and \
               ("PDB_MIRROR_MMCIF" in os.environ) :
            subdir = os.path.join(os.environ["PDB_MIRROR_MMCIF"], id[1:3])
            if (os.path.isdir(subdir)):
                file_name = os.path.join(subdir, "%s.cif.gz" % id)
                if (os.path.isfile(file_name)):
                    print >> log, "Reading from local mirror:"
                    print >> log, "  " + file_name
                    f = smart_open.for_reading(file_name)
                    return f
        if ((data_type == "xray")
                and ("PDB_MIRROR_STRUCTURE_FACTORS" in os.environ)):
            sf_dir = os.environ["PDB_MIRROR_STRUCTURE_FACTORS"]
            subdir = os.path.join(sf_dir, id[1:3])
            if (os.path.isdir(subdir)):
                file_name = os.path.join(subdir, "r%ssf.ent.gz" % id)
                if (os.path.isfile(file_name)):
                    print >> log, "Reading from local mirror:"
                    print >> log, "  " + file_name
                    f = smart_open.for_reading(file_name)
                    return f
    # No mirror found (or out of date), default to HTTP download
    url = None
    compressed = False
    if (mirror == "rcsb"):
        url_base = 'https://files.rcsb.org/download/'
        pdb_ext = ".pdb"
        sf_prefix = ""
        sf_ext = "-sf.cif"
    elif (mirror == "pdbe"):
        url_base = "https://www.ebi.ac.uk/pdbe-srv/view/files/"
        pdb_ext = ".ent"
        sf_prefix = "r"
        sf_ext = "sf.ent"
    elif (mirror == "pdbj"):
        url_base = "ftp://ftp.pdbj.org/pub/pdb/data/structures/divided/"
        if (data_type == "pdb"):
            compressed = True
            if (format == "pdb"):
                url = url_base + "pdb/%s/pdb%s.ent.gz" % (id[1:3], id)
            elif (format == "cif"):
                url = url_base + "mmCIF/%s/%s.cif.gz" % (id[1:3], id)
        elif (data_type == "xray"):
            compressed = True
            url = url_base + "structure_factors/%s/r%ssf.ent.gz" % (id[1:3],
                                                                    id)
        elif (data_type in ["fasta", "seq"]):
            url = "https://pdbj.org/rest/downloadPDBfile?format=fasta&id=%s" % id
        if (url is None) and (data_type != "fasta"):
            raise Sorry(
                "Can't determine PDBj download URL for this data/format " +
                "combination.")
    elif mirror == "pdb-redo":
        url_base = "https://pdb-redo.eu/db/"
        pdb_ext = "_final.pdb"
        cif_ext = "_final.cif"
        sf_prefix = ""
        sf_ext = "_final.mtz"
        if (data_type == 'pdb'):
            if (format == 'pdb'):
                url = url_base + "{id}/{id}{format}".format(id=id,
                                                            format=pdb_ext)
            elif (format == 'cif'):
                url = url_base + "{id}/{id}{format}".format(id=id,
                                                            format=cif_ext)
        elif (data_type == 'xray'):
            url = url_base + "{id}/{id}{format}".format(id=id, format=sf_ext)
    if (data_type in ["fasta", "seq"]):
        # XXX the RCSB doesn't appear to have a simple URL for FASTA files
        if (url is None):  # TODO PDBe equivalent doesn't exist?
            url = "https://www.rcsb.org/pdb/download/downloadFastaFiles.do?structureIdList=%s&compressionType=uncompressed" % id
        try:
            data = libtbx.utils.urlopen(url)
        except urllib2.HTTPError, e:
            if e.getcode() == 404:
                raise RuntimeError("Couldn't download sequence for %s." % id)
            else:
                raise