Пример #1
0
def find_merge_common_images(args):
    phil = iotbx.phil.process_command_line(args = args,
                                           master_string = master_phil).show()
    work_params = phil.work.extract()
    if ("--help" in args) :
        libtbx.phil.parse(master_phil.show())
        return

    if ((work_params.d_min is None) or
        (work_params.data is None) or
        ((work_params.model is None) and
         work_params.scaling.algorithm != "mark1")) :
        raise Usage("cxi.merge "
                    "d_min=4.0 "
                    "data=~/scratch/r0220/006/strong/ "
                    "model=3bz1_3bz2_core.pdb")
    if ((work_params.rescale_with_average_cell) and
        (not work_params.set_average_unit_cell)) :
        raise Usage(
            "If rescale_with_average_cell=True, you must also specify "+
            "set_average_unit_cell=True.")

    # Read Nat's reference model from an MTZ file.  XXX The observation
    # type is given as F, not I--should they be squared?  Check with Nat!
    log = open("%s_%s_scale.log" % (work_params.output.prefix,
                                    work_params.scaling.algorithm), "w")
    out = multi_out()
    out.register("log", log, atexit_send_to=None)
    out.register("stdout", sys.stdout)

    print >> out, "Target unit cell and space group:"
    print >> out, "  ", work_params.target_unit_cell
    print >> out, "  ", work_params.target_space_group

    uc = work_params.target_unit_cell

    miller_set = symmetry(
        unit_cell=work_params.target_unit_cell,
        space_group_info=work_params.target_space_group
        ).build_miller_set(
        anomalous_flag=not work_params.merge_anomalous,
        d_min=work_params.d_min)
    print 'Miller set size: %d' % len(miller_set.indices())
    from xfel.cxi.merging.general_fcalc import random_structure
    i_model = random_structure(work_params)

    # ---- Augment this code with any special procedures for x scaling
    scaler = xscaling_manager(
        miller_set=miller_set,
        i_model=i_model,
        params=work_params,
        log=out)
    scaler.read_all()
    print "finished reading"
    sg = miller_set.space_group()
    pg = sg.build_derived_laue_group()
    miller_set.show_summary()

    hkl_asu = scaler.observations["hkl_id"]
    imageno = scaler.observations["frame_id"]
    intensi = scaler.observations["i"]
    sigma_i = scaler.observations["sigi"]
    
    lookup = scaler.millers["merged_asu_hkl"]    

    # construct table of start / end indices for frames: now using Python
    # range indexing

    starts = [0]
    ends = []
    
    for x in xrange(1, len(scaler.observations["hkl_id"])):
        if imageno[x] != imageno[x - 1]:
            ends.append(x)
            starts.append(x)
            
    ends.append(len(scaler.observations["hkl_id"]))

    keep_start = []
    keep_end = []

    def nint(a):
        return int(round(a))

    from collections import defaultdict
    i_scale = 0.1
    i_hist = defaultdict(int)

    for j, se in enumerate(zip(starts, ends)):
        s, e = se

        for i in intensi[s:e]:
            i_hist[nint(i_scale * i)] += 1
        
        isig = sum(i / s for i, s in zip(intensi[s:e], sigma_i[s:e])) / (e - s)
        dmin = 100.0
        for x in xrange(s, e):
            d = uc.d(lookup[hkl_asu[x]])
            if d < dmin:
                dmin = d
        if isig > 6.0 and dmin < 3.2:
            keep_start.append(s)
            keep_end.append(e)

    fout = open('i_hist.dat', 'w')
    for i in i_hist:
        fout.write('%.2f %d\n' % (i / i_scale, i_hist[i]))
    fout.close()

    starts = keep_start
    ends = keep_end

    print 'Keeping %d frames' % len(starts)

    frames = []

    odd = 0
    even = 0

    for s, e in zip(starts, ends):

        for x in range(s, e):
            hkl = lookup[hkl_asu[x]]
            
            if (hkl[0] + hkl[1] + hkl[2]) % 2 == 1:
                odd += 1
            else:
                even += 1
        
        indices = [tuple(lookup[hkl_asu[x]]) for x in range(s, e)]
        intensities = intensi[s:e]
        sigmas = sigma_i[s:e]

        frames.append(Frame(uc, indices, intensities, sigmas))

    # pre-scale the data - first determine average ln(k), B; then apply

    kbs = [f.kb() for f in frames]

    mn_k = sum([kb[0] for kb in kbs]) / len(kbs)
    mn_B = sum([kb[1] for kb in kbs]) / len(kbs)

    n_lt_500 = 0
    n_gt_500 = 0

    for j, f in enumerate(frames):
        s_i = f.scale_to_kb(mn_k, mn_B)
        fout = open('frame-s-i-%05d.dat' % j, 'w')
        for s, i, si in s_i:
            fout.write('%f %f %f\n' % (s, i, si))
            if i < 500:
                n_lt_500 += 1
            else:
                n_gt_500 += 1
        fout.close()

    from collections import defaultdict

    hist = defaultdict(int)

    fout = open('kb.dat', 'w')

    for j, f in enumerate(frames):
        kb = f.kb()
        fout.write('%4d %6.3f %6.3f\n' % (j, kb[0], kb[1]))
        hist[int(round(kb[1]))] += 1

    fout.close()

    for b in sorted(hist):
        print b, hist[b]


    print odd, even
    print n_lt_500, n_gt_500
    

    return
Пример #2
0
def run(args):
  phil = iotbx.phil.process_command_line(
    args = args, master_string = master_phil)
  work_params = phil.work.extract()
  if ("--help" in args) :
    libtbx.phil.parse(master_phil.show())
    return

  if ((work_params.d_min is None) or
      (work_params.data is None) or
      ((work_params.model is None) and
       work_params.scaling.algorithm != "mark1")):
    raise Usage("cxi.merge "
                "d_min=4.0 "
                "data=~/scratch/r0220/006/strong/ "
                "model=3bz1_3bz2_core.pdb")
  
  if ((work_params.rescale_with_average_cell) and
      (not work_params.set_average_unit_cell)) :
    raise Usage("If rescale_with_average_cell=True, you must also specify "+
      "set_average_unit_cell=True.")
  
  miller_set = symmetry(
      unit_cell = work_params.target_unit_cell,
      space_group_info = work_params.target_space_group
    ).build_miller_set(
      anomalous_flag = not work_params.merge_anomalous,
      d_min = work_params.d_min)
  from xfel.cxi.merging.general_fcalc import random_structure
  i_model = random_structure(work_params)

# ---- Augment this code with any special procedures for x scaling
  scaler = xscaling_manager(
    miller_set = miller_set,
    i_model = i_model,
    params = work_params)
  
  scaler.read_all()
  sg = miller_set.space_group()
  pg = sg.build_derived_laue_group()
  rational_ops = []
  for symop in pg:
    rational_ops.append((matrix.sqr(symop.r().transpose().as_rational()),
                         symop.r().as_hkl()))

  # miller_set.show_summary()
    
  uc = work_params.target_unit_cell
    
  hkl_asu = scaler.observations["hkl_id"]
  imageno = scaler.observations["frame_id"]
  intensi = scaler.observations["i"]
  sigma_i = scaler.observations["sigi"]
  lookup = scaler.millers["merged_asu_hkl"]
  origH = scaler.observations["H"]
  origK = scaler.observations["K"]
  origL = scaler.observations["L"]

  from cctbx.miller import map_to_asu
  sgtype = miller_set.space_group_info().type()
  aflag = miller_set.anomalous_flag()
  from cctbx.array_family import flex

  # FIXME in here perform the mapping to ASU for both the original and other
  # index as an array-wise manipulation to make things a bunch faster...
  # however this also uses a big chunk of RAM... FIXME also in here use
  # cb_op.apply(indices) to get the indices reindexed...

  original_indices = flex.miller_index()
  for x in xrange(len(scaler.observations["hkl_id"])):
    original_indices.append(lookup[hkl_asu[x]])

  from cctbx.sgtbx import change_of_basis_op

  I23 = change_of_basis_op('k, -h, l')

  other_indices = I23.apply(original_indices)

  map_to_asu(sgtype, aflag, original_indices)
  map_to_asu(sgtype, aflag, other_indices)

  # FIXME would be useful in here to have a less expensive way of finding the
  # symmetry operation which gave the map to the ASU - perhaps best way is to
  # make a new C++ map_to_asu which records this.
  
  # FIXME in here recover the original frame structure of the data to
  # logical frame objetcs - N.B. the frame will need to be augmented to test
  # alternative indexings

  # construct table of start / end indices for frames: now using Python
  # range indexing

  starts = [0]
  ends = []
    
  for x in xrange(1, len(scaler.observations["hkl_id"])):
    if imageno[x] != imageno[x - 1]:
      ends.append(x)
      starts.append(x)
            
  ends.append(len(scaler.observations["hkl_id"]))
  
  keep_start = []
  keep_end = []
  
  for j, se in enumerate(zip(starts, ends)):
    print 'processing frame %d: %d to %d' % (j, se[0], se[1])
    s, e = se
    isig = sum(i / s for i, s in zip(intensi[s:e], sigma_i[s:e])) / (e - s)
    dmin = 100.0
    for x in xrange(s, e):
      d = uc.d(lookup[hkl_asu[x]])
      if d < dmin:
        dmin = d
    if isig > 6.0 and dmin < 3.2:
      keep_start.append(s)
      keep_end.append(e)

  starts = keep_start
  ends = keep_end

  print 'Keeping %d frames' % len(starts)

  # then start running the comparison code

  frames = []

  for s, e in zip(starts, ends):
    # FIXME need this from remap to ASU
    misym = [0 for x in range(s, e)]
    indices = [original_indices[x] for x in range(s, e)]
    other = [other_indices[x] for x in range(s, e)]
    intensities = intensi[s:e]
    sigmas = sigma_i[s:e]

    frames.append(Frame(uc, indices, other, intensities, sigmas))

  reference = FrameFromReferenceMTZ()

  fout = open('cc_reference.log', 'w')

  for j, f in enumerate(frames):
    _cc = reference.cc(f)
    _oo = reference.cc_other(f)
    print '%d %d %d %d %f %d %f' % (j, starts[j], ends[j], _cc[0], _cc[1],
                                    _oo[0], _oo[1])

    fout.write('%d %d %d %d %f %d %f\n' % (j, starts[j], ends[j],
                                           _cc[0], _cc[1], _oo[0], _oo[1]))

  fout.close()

  return
Пример #3
0
def run(args):
    phil = iotbx.phil.process_command_line(args=args,
                                           master_string=master_phil).show()
    work_params = phil.work.extract()
    log = open(
        "%s_%s_merging.log" %
        (work_params.output.prefix, work_params.scaling.algorithm), "w")
    out = multi_out()
    out.register("log", log, atexit_send_to=None)
    out.register("stdout", sys.stdout)

    print >> out, "Target unit cell and space group:"
    print >> out, "  ", work_params.target_unit_cell
    print >> out, "  ", work_params.target_space_group

    miller_set = symmetry(
        unit_cell=work_params.target_unit_cell,
        space_group_info=work_params.target_space_group).build_miller_set(
            anomalous_flag=not work_params.merge_anomalous,
            d_min=work_params.d_min)
    from xfel.merging.general_fcalc import random_structure
    i_model = random_structure(work_params)

    # ---- Augment this code with any special procedures for x scaling
    scaler = xscaling_manager(miller_set=miller_set,
                              i_model=i_model,
                              params=work_params,
                              log=out)
    scaler.read_all_mysql()
    print "finished reading the database"
    sg = miller_set.space_group()

    hkl_asu = scaler.observations["hkl_id"]
    imageno = scaler.observations["frame_id"]
    intensi = scaler.observations["i"]
    lookup = scaler.millers["merged_asu_hkl"]
    origH = scaler.observations["H"]
    origK = scaler.observations["K"]
    origL = scaler.observations["L"]

    from cctbx.array_family import flex

    print "# observations from the database", len(
        scaler.observations["hkl_id"])
    hkl = flex.miller_index(flex.select(lookup, hkl_asu))
    from cctbx import miller

    hkl_list = miller_set.customized_copy(indices=hkl)

    ARRAY = miller.array(miller_set=hkl_list, data=intensi)
    LATTICES = miller.array(miller_set=hkl_list, data=imageno)

    from cctbx.merging.brehm_diederichs import run_multiprocess, run

    L = (ARRAY, LATTICES)  # tuple(data,lattice_id)
    from libtbx import easy_pickle
    presort_file = work_params.output.prefix + "_intensities_presort.pickle"
    print "pickling these intensities to", presort_file
    easy_pickle.dump(presort_file, L)

    ######  INPUTS #######
    #       data = miller array: ASU miller index + intensity (sigmas not implemented yet)
    #       lattice_id = flex double: assignment of each miller index to a lattice number
    ######################
    if work_params.nproc < 5:
        print "Sorting the lattices with 1 processor"
        result = run(L, nproc=1, verbose=True)
    else:
        print "Sorting the lattices with %d processors" % work_params.nproc
        result = run_multiprocess(L, nproc=work_params.nproc, verbose=False)
    for key in result.keys():
        print key, len(result[key])

    # 2) pickle the postsort (reindexed) ARRAY, LATTICES XXX not done yet; not clear if needed

    reverse_lookup = {}
    frame_id_list = list(scaler.frames_mysql["frame_id"])
    for key in result.keys():
        for frame in result[key]:
            frame_idx = frame_id_list.index(frame)
            reverse_lookup[scaler.frames_mysql["unique_file_name"]
                           [frame_idx]] = key

    lookup_file = work_params.output.prefix + "_lookup.pickle"
    reverse_lookup_file = work_params.output.prefix + "_reverse_lookup.pickle"
    easy_pickle.dump(lookup_file, result)
    easy_pickle.dump(reverse_lookup_file, reverse_lookup)
Пример #4
0
def find_merge_common_images(args):
    phil = iotbx.phil.process_command_line(args = args,
                                           master_string = master_phil).show()
    work_params = phil.work.extract()
    if ("--help" in args) :
        libtbx.phil.parse(master_phil.show())
        return

    if ((work_params.d_min is None) or
        (work_params.data is None) or
        ((work_params.model is None) and
         work_params.scaling.algorithm != "mark1")) :
        raise Usage("cxi.merge "
                    "d_min=4.0 "
                    "data=~/scratch/r0220/006/strong/ "
                    "model=3bz1_3bz2_core.pdb")
    if ((work_params.rescale_with_average_cell) and
        (not work_params.set_average_unit_cell)) :
        raise Usage(
            "If rescale_with_average_cell=True, you must also specify "+
            "set_average_unit_cell=True.")

    # Read Nat's reference model from an MTZ file.  XXX The observation
    # type is given as F, not I--should they be squared?  Check with Nat!
    log = open("%s_%s_scale.log" % (work_params.output.prefix,
                                    work_params.scaling.algorithm), "w")
    out = multi_out()
    out.register("log", log, atexit_send_to=None)
    out.register("stdout", sys.stdout)

    print >> out, "Target unit cell and space group:"
    print >> out, "  ", work_params.target_unit_cell
    print >> out, "  ", work_params.target_space_group

    uc = work_params.target_unit_cell

    miller_set = symmetry(
        unit_cell=work_params.target_unit_cell,
        space_group_info=work_params.target_space_group
        ).build_miller_set(
        anomalous_flag=not work_params.merge_anomalous,
        d_min=work_params.d_min)
    from xfel.cxi.merging.general_fcalc import random_structure
    i_model = random_structure(work_params)

    # ---- Augment this code with any special procedures for x scaling
    scaler = xscaling_manager(
        miller_set=miller_set,
        i_model=i_model,
        params=work_params,
        log=out)
    scaler.read_all()
    print "finished reading"
    sg = miller_set.space_group()
    pg = sg.build_derived_laue_group()
    miller_set.show_summary()

    hkl_asu = scaler.observations["hkl_id"]
    imageno = scaler.observations["frame_id"]
    intensi = scaler.observations["i"]
    sigma_i = scaler.observations["sigi"]
    
    lookup = scaler.millers["merged_asu_hkl"]    

    # construct table of start / end indices for frames: now using Python
    # range indexing

    starts = [0]
    ends = []
    
    for x in xrange(1, len(scaler.observations["hkl_id"])):
        if imageno[x] != imageno[x - 1]:
            ends.append(x)
            starts.append(x)
            
    ends.append(len(scaler.observations["hkl_id"]))

    keep_start = []
    keep_end = []

    for j, se in enumerate(zip(starts, ends)):
        s, e = se
        isig = sum(i / s for i, s in zip(intensi[s:e], sigma_i[s:e])) / (e - s)
        dmin = 100.0
        for x in xrange(s, e):
            d = uc.d(lookup[hkl_asu[x]])
            if d < dmin:
                dmin = d
        if isig > 6.0 and dmin < 3.2:
            keep_start.append(s)
            keep_end.append(e)

    starts = keep_start
    ends = keep_end

    print 'Keeping %d frames' % len(starts)

    frames = []

    for s, e in zip(starts, ends):
        indices = [tuple(lookup[hkl_asu[x]]) for x in range(s, e)]
        intensities = intensi[s:e]
        sigmas = sigma_i[s:e]

        frames.append(Frame(uc, indices, intensities, sigmas))

    cycle = 0

    total_nref = sum([len(f.get_indices()) for f in frames])

    # pre-scale the data - first determine average ln(k), B; then apply

    kbs = [f.kb() for f in frames]

    mn_k = sum([kb[0] for kb in kbs]) / len(kbs)
    mn_B = sum([kb[1] for kb in kbs]) / len(kbs)

    for f in frames:
        f.scale_to_kb(mn_k, mn_B)
    
    while True:

        print 'Analysing %d frames' % len(frames)
        print 'Cycle %d' % cycle
        cycle += 1

        print 'Power spectrum'
        fn = frame_numbers(frames)
        for j in sorted(fn):
            print '%4d %4d' % (j, fn[j])
            
        nref_cycle = sum([len(f.get_indices()) for f in frames])
        assert(nref_cycle == total_nref)

        common_reflections = numpy.zeros((len(frames), len(frames)),
                                         dtype = numpy.short)

        obs = { } 

        from cctbx.sgtbx import rt_mx, change_of_basis_op
        oh = change_of_basis_op(rt_mx('h,l,k'))

        for j, f in enumerate(frames):
            indices = set(f.get_indices())
            for i in indices:
                _i = tuple(i)
                if not _i in obs:
                    obs[_i] = []
                obs[_i].append(j)

        # work through unique observations ignoring those which include no
        # hand information
 
        for hkl in obs:
            if hkl == oh.apply(hkl):
                continue
            obs[hkl].sort()
            for j, f1 in enumerate(obs[hkl][:-1]):
                for f2 in obs[hkl][j + 1:]:
                    common_reflections[(f1, f2)] += 1

        cmn_rfl_list = []

        for f1 in range(len(frames)):
            for f2 in range(f1 + 1, len(frames)):
                if common_reflections[(f1, f2)] > 20:
                    cmn_rfl_list.append((common_reflections[(f1, f2)], f1, f2))

        cmn_rfl_list.sort()
        cmn_rfl_list.reverse()
    
        joins = []
        used = []
    
        for n, f1, f2 in cmn_rfl_list:

            if f1 in used or f2 in used:
                continue
            
            _cc = frames[f1].cc(frames[f2])

            # really only need to worry about f2 which will get merged...
            # merging multiple files together should be OK provided they are
            # correctly sorted (though the order should not matter anyhow?)
            # anyhow they are sorted anyway... ah as f2 > f1 then just sorting
            # the list by f2 will make sure the data cascase correctly.

            # p-value very small for cc > 0.75 for > 20 observations - necessary
            # as will be correlated due to Wilson curves

            if _cc[0] > 20 and _cc[1] > 0.75:
                print '%4d %.3f' % _cc, f1, f2
                joins.append((f2, f1))
                # used.append(f1)
                used.append(f2)

        if not joins:
            print 'No pairs found'
            break

        joins.sort()
        joins.reverse()
        
        for j2, j1 in joins:
            rmerge = frames[j1].merge(frames[j2])
            if rmerge:
                print 'R: %4d %4d %6.3f' % (j1, j2, rmerge)
            else:
                print 'R: %4d %4d ------' % (j1, j2)
                
        continue

    frames.sort()

    print 'Biggest few: #frames; #unique refl'
    j = -1
    while frames[j].get_frames() > 1:
        print frames[j].get_frames(), frames[j].get_unique_indices()
        j -= 1

    return
Пример #5
0
def run(args):
  phil = iotbx.phil.process_command_line(
    args = args, master_string = master_phil)
  work_params = phil.work.extract()
  if ("--help" in args) :
    libtbx.phil.parse(master_phil.show())
    return

  if ((work_params.d_min is None) or
      (work_params.data is None) or
      ((work_params.model is None) and
       work_params.scaling.algorithm != "mark1")):
    raise Usage("cxi.merge "
                "d_min=4.0 "
                "data=~/scratch/r0220/006/strong/ "
                "model=3bz1_3bz2_core.pdb")
  
  if ((work_params.rescale_with_average_cell) and
      (not work_params.set_average_unit_cell)) :
    raise Usage("If rescale_with_average_cell=True, you must also specify "+
      "set_average_unit_cell=True.")
  
  miller_set = symmetry(
      unit_cell = work_params.target_unit_cell,
      space_group_info = work_params.target_space_group
    ).build_miller_set(
      anomalous_flag = not work_params.merge_anomalous,
      d_min = work_params.d_min)
  from xfel.cxi.merging.general_fcalc import random_structure
  i_model = random_structure(work_params)

# ---- Augment this code with any special procedures for x scaling
  scaler = xscaling_manager(
    miller_set = miller_set,
    i_model = i_model,
    params = work_params)
  
  scaler.read_all()
  sg = miller_set.space_group()
  pg = sg.build_derived_laue_group()
  rational_ops = []
  for symop in pg:
    rational_ops.append((matrix.sqr(symop.r().transpose().as_rational()),
                         symop.r().as_hkl()))

  # miller_set.show_summary()
    
  uc = work_params.target_unit_cell
    
  hkl_asu = scaler.observations["hkl_id"]
  imageno = scaler.observations["frame_id"]
  intensi = scaler.observations["i"]
  sigma_i = scaler.observations["sigi"]
  lookup = scaler.millers["merged_asu_hkl"]
  origH = scaler.observations["H"]
  origK = scaler.observations["K"]
  origL = scaler.observations["L"]

  from cctbx.miller import map_to_asu
  sgtype = miller_set.space_group_info().type()
  aflag = miller_set.anomalous_flag()
  from cctbx.array_family import flex

  # FIXME in here perform the mapping to ASU for both the original and other
  # index as an array-wise manipulation to make things a bunch faster...
  # however this also uses a big chunk of RAM... FIXME also in here use
  # cb_op.apply(indices) to get the indices reindexed...

  original_indices = flex.miller_index()
  for x in xrange(len(scaler.observations["hkl_id"])):
    original_indices.append(lookup[hkl_asu[x]])

  from cctbx.sgtbx import change_of_basis_op

  I23 = change_of_basis_op('k, -h, l')

  other_indices = I23.apply(original_indices)

  map_to_asu(sgtype, aflag, original_indices)
  map_to_asu(sgtype, aflag, other_indices)

  # FIXME would be useful in here to have a less expensive way of finding the
  # symmetry operation which gave the map to the ASU - perhaps best way is to
  # make a new C++ map_to_asu which records this.
  
  # FIXME in here recover the original frame structure of the data to
  # logical frame objetcs - N.B. the frame will need to be augmented to test
  # alternative indexings

  # construct table of start / end indices for frames: now using Python
  # range indexing

  starts = [0]
  ends = []
    
  for x in xrange(1, len(scaler.observations["hkl_id"])):
    if imageno[x] != imageno[x - 1]:
      ends.append(x)
      starts.append(x)
            
  ends.append(len(scaler.observations["hkl_id"]))
  
  keep_start = []
  keep_end = []
  
  for j, se in enumerate(zip(starts, ends)):
    print 'processing frame %d: %d to %d' % (j, se[0], se[1])
    s, e = se
    isig = sum(i / s for i, s in zip(intensi[s:e], sigma_i[s:e])) / (e - s)
    dmin = 100.0
    for x in xrange(s, e):
      d = uc.d(lookup[hkl_asu[x]])
      if d < dmin:
        dmin = d
    if isig > 6.0 and dmin < 3.2:
      keep_start.append(s)
      keep_end.append(e)

  starts = keep_start
  ends = keep_end

  print 'Keeping %d frames' % len(starts)

  # then start running the comparison code

  frames = []

  for s, e in zip(starts, ends):
    # FIXME need this from remap to ASU
    misym = [0 for x in range(s, e)]
    indices = [original_indices[x] for x in range(s, e)]
    other = [other_indices[x] for x in range(s, e)]
    intensities = intensi[s:e]
    sigmas = sigma_i[s:e]

    frames.append(Frame(uc, indices, other, intensities, sigmas))

  cycle = 0

  total_nref = sum([len(f.get_indices()) for f in frames])

  # pre-scale the data - first determine average ln(k), B; then apply

  kbs = [f.kb() for f in frames]

  mn_k = sum([kb[0] for kb in kbs]) / len(kbs)
  mn_B = sum([kb[1] for kb in kbs]) / len(kbs)

  for f in frames:
    f.scale_to_kb(mn_k, mn_B)
    
  while True:

    print 'Analysing %d frames' % len(frames)
    print 'Cycle %d' % cycle
    cycle += 1

    print 'Power spectrum'
    fn = frame_numbers(frames)
    for j in sorted(fn):
      print '%4d %4d' % (j, fn[j])
            
    nref_cycle = sum([len(f.get_indices()) for f in frames])
    assert(nref_cycle == total_nref)

    # first work on the original indices

    import numpy

    common_reflections = numpy.zeros((len(frames), len(frames)),
                                     dtype = numpy.short)
    
    obs = { } 

    # for other hand add -j

    for j, f in enumerate(frames):
      indices = set(f.get_indices())
      for i in indices:
        _i = tuple(i)
        if not _i in obs:
          obs[_i] = []
        obs[_i].append(j)

    for hkl in obs:
      obs[hkl].sort()
      for j, f1 in enumerate(obs[hkl][:-1]):
        for f2 in obs[hkl][j + 1:]:
          if f1 * f2 > 0:
            common_reflections[(abs(f1), abs(f2))] += 1

    cmn_rfl_list = []

    for f1 in range(len(frames)):
      for f2 in range(f1 + 1, len(frames)):
        if common_reflections[(f1, f2)] > 10:
          cmn_rfl_list.append((common_reflections[(f1, f2)], f1, f2))

    cmn_rfl_list.sort()
    cmn_rfl_list.reverse()
    
    joins = []
    used = []
    
    for n, f1, f2 in cmn_rfl_list:
      
      if f1 in used or f2 in used:
        continue
            
      _cc = frames[f1].cc(frames[f2])

      # really only need to worry about f2 which will get merged...
      # merging multiple files together should be OK provided they are
      # correctly sorted (though the order should not matter anyhow?)
      # anyhow they are sorted anyway... ah as f2 > f1 then just sorting
      # the list by f2 will make sure the data cascase correctly.

      # p-value small (3% ish) for cc > 0.6 for > 10 observations -
      # necessary as will be correlated due to Wilson curves though
      # with B factor < 10 this is less of an issue

      if _cc[0] > 10 and _cc[1] > 0.6:
        print '%4d %.3f' % _cc, f1, f2
        joins.append((f2, f1))
        used.append(f2)

    if not joins:
      print 'No pairs found'
      break

    joins.sort()
    joins.reverse()
        
    for j2, j1 in joins:
      rmerge = frames[j1].merge(frames[j2])
      if rmerge:
        print 'R: %4d %4d %6.3f' % (j1, j2, rmerge)
      else:
        print 'R: %4d %4d ------' % (j1, j2)

    all_joins = [j for j in joins]

    # then do the same for the alternative indices

    other_reflections = numpy.zeros((len(frames), len(frames)),
                                    dtype = numpy.short)

    obs = { } 

    # for other hand add -j

    for j, f in enumerate(frames):
      indices = set(f.get_indices())
      for i in indices:
        _i = tuple(i)
        if not _i in obs:
          obs[_i] = []
        obs[_i].append(j)

      indices = set(f.get_other())
      for i in indices:
        _i = tuple(i)
        if not _i in obs:
          obs[_i] = []
        obs[_i].append(-j)

    for hkl in obs:
      obs[hkl].sort()
      for j, f1 in enumerate(obs[hkl][:-1]):
        for f2 in obs[hkl][j + 1:]:
          if f1 * f2 < 0:
            other_reflections[(abs(f1), abs(f2))] += 1

    oth_rfl_list = []

    for f1 in range(len(frames)):
      for f2 in range(f1 + 1, len(frames)):
        if other_reflections[(f1, f2)] > 10:
          oth_rfl_list.append((other_reflections[(f1, f2)], f1, f2))
    
    joins = []

    oth_rfl_list.sort()
    oth_rfl_list.reverse()
        
    for n, f1, f2 in oth_rfl_list:
      
      if f1 in used or f2 in used:
        continue
            
      _cc = frames[f1].cc_other(frames[f2])

      # really only need to worry about f2 which will get merged...
      # merging multiple files together should be OK provided they are
      # correctly sorted (though the order should not matter anyhow?)
      # anyhow they are sorted anyway... ah as f2 > f1 then just sorting
      # the list by f2 will make sure the data cascase correctly.

      # p-value small (3% ish) for cc > 0.6 for > 10 observations -
      # necessary as will be correlated due to Wilson curves though
      # with B factor < 10 this is less of an issue

      if _cc[0] > 10 and _cc[1] > 0.6:
        print '%4d %.3f' % _cc, f1, f2
        joins.append((f2, f1))
        used.append(f2)

    all_joins += joins

    if not all_joins:
      break
      
    joins.sort()
    joins.reverse()
        
    for j2, j1 in joins:
      frames[j2].reindex()
      rmerge = frames[j1].merge(frames[j2])
      if rmerge:
        print 'R: %4d %4d %6.3f' % (j1, j2, rmerge)
      else:
        print 'R: %4d %4d ------' % (j1, j2)
        
    continue

  frames.sort()

  print 'Biggest few: #frames; #unique refl'
  j = -1
  while frames[j].get_frames() > 1:
    print frames[j].get_frames(), frames[j].get_unique_indices()
    frames[j].output_as_scalepack(sg, 'scalepack-%d.sca' % j)
    j -= 1

  return
Пример #6
0
def find_merge_common_images(args):
    phil = iotbx.phil.process_command_line(args = args,
                                           master_string = master_phil).show()
    work_params = phil.work.extract()
    if ("--help" in args) :
        libtbx.phil.parse(master_phil.show())
        return

    if ((work_params.d_min is None) or
        (work_params.data is None) or
        ((work_params.model is None) and
         work_params.scaling.algorithm != "mark1")) :
        raise Usage("cxi.merge "
                    "d_min=4.0 "
                    "data=~/scratch/r0220/006/strong/ "
                    "model=3bz1_3bz2_core.pdb")
    if ((work_params.rescale_with_average_cell) and
        (not work_params.set_average_unit_cell)) :
        raise Usage(
            "If rescale_with_average_cell=True, you must also specify "+
            "set_average_unit_cell=True.")

    # Read Nat's reference model from an MTZ file.  XXX The observation
    # type is given as F, not I--should they be squared?  Check with Nat!
    log = open("%s_%s_scale.log" % (work_params.output.prefix,
                                    work_params.scaling.algorithm), "w")
    out = multi_out()
    out.register("log", log, atexit_send_to=None)
    out.register("stdout", sys.stdout)

    print >> out, "Target unit cell and space group:"
    print >> out, "  ", work_params.target_unit_cell
    print >> out, "  ", work_params.target_space_group

    uc = work_params.target_unit_cell

    miller_set = symmetry(
        unit_cell=work_params.target_unit_cell,
        space_group_info=work_params.target_space_group
        ).build_miller_set(
        anomalous_flag=not work_params.merge_anomalous,
        d_min=work_params.d_min)
    from xfel.cxi.merging.general_fcalc import random_structure
    i_model = random_structure(work_params)

    # ---- Augment this code with any special procedures for x scaling
    scaler = xscaling_manager(
        miller_set=miller_set,
        i_model=i_model,
        params=work_params,
        log=out)
    scaler.read_all()
    print "finished reading"
    sg = miller_set.space_group()
    pg = sg.build_derived_laue_group()
    miller_set.show_summary()

    hkl_asu = scaler.observations["hkl_id"]
    imageno = scaler.observations["frame_id"]
    intensi = scaler.observations["i"]
    sigma_i = scaler.observations["sigi"]
    
    lookup = scaler.millers["merged_asu_hkl"]    

    # construct table of start / end indices for frames: now using Python
    # range indexing

    starts = [0]
    ends = []
    
    for x in xrange(1, len(scaler.observations["hkl_id"])):
        if imageno[x] != imageno[x - 1]:
            ends.append(x)
            starts.append(x)
            
    ends.append(len(scaler.observations["hkl_id"]))

    keep_start = []
    keep_end = []

    for j, se in enumerate(zip(starts, ends)):
        s, e = se
        isig = sum(i / s for i, s in zip(intensi[s:e], sigma_i[s:e])) / (e - s)
        dmin = 100.0
        for x in xrange(s, e):
            d = uc.d(lookup[hkl_asu[x]])
            if d < dmin:
                dmin = d
        if isig > 6.0 and dmin < 3.2:
            keep_start.append(s)
            keep_end.append(e)

    starts = keep_start
    ends = keep_end

    print 'Keeping %d frames' % len(starts)

    frames = []

    for s, e in zip(starts, ends):
        indices = [tuple(lookup[hkl_asu[x]]) for x in range(s, e)]
        intensities = intensi[s:e]
        sigmas = sigma_i[s:e]

        frames.append(Frame(uc, indices, intensities, sigmas))

    from collections import defaultdict

    hist = defaultdict(int)

    fout = open('common_oh.dat', 'w')

    for j, f in enumerate(frames):
        hp = f.hand_pairs()
        fout.write('%4d %d\n' % (j, hp))
        hist[int(hp)] += 1

    fout.close()

    for b in sorted(hist):
        print b, hist[b]


    

    return
Пример #7
0
def run(args):
  phil = iotbx.phil.process_command_line(args=args, master_string=master_phil).show()
  work_params = phil.work.extract()
  log = open("%s_%s_merging.log" % (work_params.output.prefix,work_params.scaling.algorithm), "w")
  out = multi_out()
  out.register("log", log, atexit_send_to=None)
  out.register("stdout", sys.stdout)

  print >> out, "Target unit cell and space group:"
  print >> out, "  ", work_params.target_unit_cell
  print >> out, "  ", work_params.target_space_group

  miller_set = symmetry(
      unit_cell=work_params.target_unit_cell,
      space_group_info=work_params.target_space_group
    ).build_miller_set(
      anomalous_flag=not work_params.merge_anomalous,
      d_min=work_params.d_min)
  from xfel.merging.general_fcalc import random_structure
  i_model = random_structure(work_params)

# ---- Augment this code with any special procedures for x scaling
  scaler = xscaling_manager(
    miller_set=miller_set,
    i_model=i_model,
    params=work_params,
    log=out)
  scaler.read_all_mysql()
  print "finished reading the database"
  sg = miller_set.space_group()

  hkl_asu = scaler.observations["hkl_id"]
  imageno = scaler.observations["frame_id"]
  intensi = scaler.observations["i"]
  lookup = scaler.millers["merged_asu_hkl"]
  origH = scaler.observations["H"]
  origK = scaler.observations["K"]
  origL = scaler.observations["L"]

  from cctbx.array_family import flex

  print "# observations from the database",len(scaler.observations["hkl_id"])
  hkl = flex.miller_index(flex.select(lookup,hkl_asu))
  from cctbx import miller

  hkl_list = miller_set.customized_copy(indices = hkl)

  ARRAY = miller.array(miller_set = hkl_list, data = intensi)
  LATTICES = miller.array(miller_set = hkl_list, data = imageno)

  from cctbx.merging.brehm_diederichs import run_multiprocess, run

  L = (ARRAY, LATTICES) # tuple(data,lattice_id)
  from libtbx import easy_pickle
  presort_file = work_params.output.prefix+"_intensities_presort.pickle"
  print "pickling these intensities to", presort_file
  easy_pickle.dump(presort_file,L)

    ######  INPUTS #######
    #       data = miller array: ASU miller index + intensity (sigmas not implemented yet)
    #       lattice_id = flex double: assignment of each miller index to a lattice number
    ######################
  if work_params.nproc < 5:
    print "Sorting the lattices with 1 processor"
    result = run(L,nproc=1,verbose=True)
  else:
    print "Sorting the lattices with %d processors"%work_params.nproc
    result = run_multiprocess(L,nproc=work_params.nproc, verbose=False)
  for key in result.keys():
    print key,len(result[key])

  # 2) pickle the postsort (reindexed) ARRAY, LATTICES XXX not done yet; not clear if needed

  reverse_lookup = {}
  frame_id_list = list(scaler.frames_mysql["frame_id"])
  for key in result.keys():
    for frame in result[key]:
      frame_idx = frame_id_list.index(frame)
      reverse_lookup[scaler.frames_mysql["unique_file_name"][frame_idx]] = key

  lookup_file = work_params.output.prefix+"_lookup.pickle"
  reverse_lookup_file = work_params.output.prefix+"_reverse_lookup.pickle"
  easy_pickle.dump(lookup_file, result)
  easy_pickle.dump(reverse_lookup_file, reverse_lookup)
Пример #8
0
def find_merge_common_images(args):
    phil = iotbx.phil.process_command_line(args = args,
                                           master_string = master_phil).show()
    work_params = phil.work.extract()
    if ("--help" in args) :
        libtbx.phil.parse(master_phil.show())
        return

    if ((work_params.d_min is None) or
        (work_params.data is None) or
        ((work_params.model is None) and
         work_params.scaling.algorithm != "mark1")) :
        raise Usage("cxi.merge "
                    "d_min=4.0 "
                    "data=~/scratch/r0220/006/strong/ "
                    "model=3bz1_3bz2_core.pdb")
    if ((work_params.rescale_with_average_cell) and
        (not work_params.set_average_unit_cell)) :
        raise Usage(
            "If rescale_with_average_cell=True, you must also specify "+
            "set_average_unit_cell=True.")

    # Read Nat's reference model from an MTZ file.  XXX The observation
    # type is given as F, not I--should they be squared?  Check with Nat!
    log = open("%s_%s_scale.log" % (work_params.output.prefix,
                                    work_params.scaling.algorithm), "w")
    out = multi_out()
    out.register("log", log, atexit_send_to=None)
    out.register("stdout", sys.stdout)

    print >> out, "Target unit cell and space group:"
    print >> out, "  ", work_params.target_unit_cell
    print >> out, "  ", work_params.target_space_group

    uc = work_params.target_unit_cell

    miller_set = symmetry(
        unit_cell=work_params.target_unit_cell,
        space_group_info=work_params.target_space_group
        ).build_miller_set(
        anomalous_flag=not work_params.merge_anomalous,
        d_min=work_params.d_min)
    from xfel.cxi.merging.general_fcalc import random_structure
    i_model = random_structure(work_params)

    # ---- Augment this code with any special procedures for x scaling
    scaler = xscaling_manager(
        miller_set=miller_set,
        i_model=i_model,
        params=work_params,
        log=out)
    scaler.read_all()
    print "finished reading"
    sg = miller_set.space_group()
    pg = sg.build_derived_laue_group()
    miller_set.show_summary()

    hkl_asu = scaler.observations["hkl_id"]
    imageno = scaler.observations["frame_id"]
    intensi = scaler.observations["i"]
    sigma_i = scaler.observations["sigi"]
    
    lookup = scaler.millers["merged_asu_hkl"]    

    # construct table of start / end indices for frames: now using Python
    # range indexing

    starts = [0]
    ends = []
    
    for x in xrange(1, len(scaler.observations["hkl_id"])):
        if imageno[x] != imageno[x - 1]:
            ends.append(x)
            starts.append(x)
            
    ends.append(len(scaler.observations["hkl_id"]))

    keep_start = []
    keep_end = []

    for j, se in enumerate(zip(starts, ends)):
        s, e = se
        isig = sum(i / s for i, s in zip(intensi[s:e], sigma_i[s:e])) / (e - s)
        dmin = 100.0
        for x in xrange(s, e):
            d = uc.d(lookup[hkl_asu[x]])
            if d < dmin:
                dmin = d
        if isig > 6.0 and dmin < 3.2:
            keep_start.append(s)
            keep_end.append(e)

    starts = keep_start
    ends = keep_end

    print 'Keeping %d frames' % len(starts)

    frames = []

    for s, e in zip(starts, ends):
        indices = [tuple(lookup[hkl_asu[x]]) for x in range(s, e)]
        intensities = intensi[s:e]
        sigmas = sigma_i[s:e]

        frames.append(Frame(indices, intensities, sigmas))

    while True:

        print 'Analysing %d frames' % len(frames)

        common_reflections = numpy.zeros((len(frames), len(frames)),
                                         dtype = numpy.short)

        obs = { } 

        for j, f in enumerate(frames):
            indices = set(f.get_indices())
            for i in indices:
                _i = tuple(i)
                if not _i in obs:
                    obs[_i] = []
                obs[_i].append(j)

        for hkl in obs:
            obs[hkl].sort()
            for j, f1 in enumerate(obs[hkl][:-1]):
                for f2 in obs[hkl][j + 1:]:
                    common_reflections[(f1, f2)] += 1

        cmn_rfl_list = []

        for f1 in range(len(frames)):
            for f2 in range(f1, len(frames)):
                cmn_rfl_list.append((common_reflections[(f1, f2)], f1, f2))

        cmn_rfl_list.sort()
        cmn_rfl_list.reverse()
    
        joins = []
    
        for n, f1, f2 in cmn_rfl_list:
            _cc = frames[f1].cc(frames[f2])

            if _cc[0] > 20 and _cc[1] > 0.75:
                # print '===> %d %d' % (n, frames[f1].common(frames[f2]))
                print '%4d %.3f' % _cc, f1, f2
                joins.append((f1, f2))

        if not joins:
            print 'No pairs found'
            break

        joins.sort()
        joins.reverse()
        
        for j1, j2 in joins:
            frames[j1].merge(frames[j2])
            frames.remove(frames[j2])
            
        continue

    

    return