Пример #1
0
def enumerate_set_ids(fh, progress_func=lambda x: 0):
  """Return list of integers for set_ids from a file handle. This funtion
  resets the file position. Assumes the input is (member_id, set_id) * N in
  binary.
  """
  fh.seek(0)
  set_ids = set()
  for readbytes in itertools.count(start=0, step=BUFFERSIZE):
    ints = fill_buffer(fh, BUFFERSIZE)
    # grab every other integer, skipping the first one
    new_set_ids = (ints[i+1] for i in xrange(0, len(ints), 2))
    set_ids.update(set(new_set_ids))
    progress_func(readbytes, mb=100)
    if len(ints) != (BUFFERSIZE / SIZEOFINT):
      return list(set_ids)
Пример #2
0
def enumerate_set_ids(fh, progress_func=lambda x: 0):
    """Return list of integers for set_ids from a file handle. This funtion
  resets the file position. Assumes the input is (member_id, set_id) * N in
  binary.
  """
    fh.seek(0)
    set_ids = set()
    for readbytes in itertools.count(start=0, step=BUFFERSIZE):
        ints = fill_buffer(fh, BUFFERSIZE)
        # grab every other integer, skipping the first one
        new_set_ids = (ints[i + 1] for i in xrange(0, len(ints), 2))
        set_ids.update(set(new_set_ids))
        progress_func(readbytes, mb=100)
        if len(ints) != (BUFFERSIZE / SIZEOFINT):
            return list(set_ids)
Пример #3
0
def extract_membership(set_id_segment, membership_fh):
  set_membership = dict((set_id, []) for set_id in set_id_segment)
  set_id_segment_set = set(set_id_segment)
  membership_fh.seek(0) # reset file

  # read entire data file until we've hit EOF
  try:
    for readbytes in itertools.count(0, BUFFERSIZE):
      pairs = in_pairs(fill_buffer(membership_fh, BUFFERSIZE))
      for member_id, set_id in pairs:
        if set_id in set_id_segment_set:
          set_membership[set_id].append(member_id)
      progress_func(readbytes, mb=100)
      if len(pairs) != (BUFFERSIZE / SIZEOFINT / 2):
        raise EOFError
  except EOFError:
    pass
  return set_membership
Пример #4
0
def extract_membership(set_id_segment, membership_fh):
    set_membership = dict((set_id, []) for set_id in set_id_segment)
    set_id_segment_set = set(set_id_segment)
    membership_fh.seek(0)  # reset file

    # read entire data file until we've hit EOF
    try:
        for readbytes in itertools.count(0, BUFFERSIZE):
            pairs = in_pairs(fill_buffer(membership_fh, BUFFERSIZE))
            for member_id, set_id in pairs:
                if set_id in set_id_segment_set:
                    set_membership[set_id].append(member_id)
            progress_func(readbytes, mb=100)
            if len(pairs) != (BUFFERSIZE / SIZEOFINT / 2):
                raise EOFError
    except EOFError:
        pass
    return set_membership