示例#1
0
    def toCCDCStructure(self, pointer=None, stage=None, identifier=None):
        ''' Returns CCDC structure instance from appropriate pointer
            default atm is crystal optimiser 
            stage is SetCSPStructures.calculation = 'Data' (as general rule) 
            give identifier to change the name in the cif '''

        from ccdc.io import CrystalReader

        # temporary fail as code work in progress
        if stage not in ['crystalOptimiserData', 'pcmData']:
            raise Exception(
                'Implement other options to get structures if needed')

        #generalize this to be self.getAttribute(pointer = {stage: lowestStructurePointer}) if pointer is None
        if pointer is None:
            pointer = self.getAttribute({stage: 'finalStructurePointer'})

        # CrystalReader can be told explicitly which file format to look for
        if pointer[-3:].lower() in ['res',
                                    '.16']:  #.16 is fort.16 from DMACRYS
            fileFormat = 'res'
        elif pointer[-3:].lower() == 'cif':
            fileFormat = 'cif'

        ccdcCrystal = CrystalReader(pointer, format=fileFormat)[0]

        if identifier:
            ccdcCrystal.identifier = identifier

        return ccdcCrystal
示例#2
0
def main():
    # names of files (all strings)
    try:
        try:
            oldResFile, templateResFile, newFilename = sys.argv[1:]
        except:
            oldResFile, templateResFile, newFilename, enforcement = sys.argv[
                1:]
    except:
        raise Exception(usefulMessage)

    # use the above subroutines to achieve the relabelling
    # uses ccdc molecule instances
    with open(newFilename, 'w') as outf:
        if len(sys.argv) == 4:
            outf.write(
                reorderedRes(
                    oldResFile,
                    replacementDict(
                        CrystalReader(templateResFile)[0],
                        CrystalReader(oldResFile)[0])))
        elif len(sys.argv) == 5 and sys.argv[4] == 'y':
            outf.write(
                reorderedRes(oldResFile,
                             replacementDict(
                                 CrystalReader(templateResFile)[0],
                                 CrystalReader(oldResFile)[0]),
                             enforceStandardListOrder=True,
                             enforceLowestNumbering=True))
示例#3
0
def cacl_density(doi):
    print doi
    query = TextNumericSearch()
    query.add_doi(doi)
    hits = query.search()

    ref_codes = []
    for hit in hits:
        ref_codes.append(hit.identifier)

    csd_crystal_reader = CrystalReader('CSD')

    if len(ref_codes) == 0:
        density = 1
    else:
        crystal = csd_crystal_reader.crystal(ref_codes[0])
        density = crystal.calculated_density
    return "%.3f" % density
示例#4
0
    def calc_powder_from_pdb(self, pdbfile):
        crystal = CrystalReader(pdbfile)[0]
        print crystal.spacegroup_symbol
        print 'angles', crystal.cell_angles[0], crystal.cell_angles[1], crystal.cell_angles[2]
        print 'length', crystal.cell_lengths[0], crystal.cell_lengths[1], crystal.cell_lengths[2]
        print 'volume', crystal.cell_volume

        pattern = PowderPattern.from_crystal(crystal)
        pattern.write_xye_file(('./'+pdbfile.replace('.pdb','.xye')))
 def test_ccdcToASE(self):
     from ioAndInterfaces import ccdcCrystalToASE
     from ccdc.io import CrystalReader
     ccdcCrystal = CrystalReader('FPAMCA16.res')[0]
     #print ccdcCrystal
     aseCrystal = ccdcCrystalToASE(ccdcCrystal)
     #print aseCrystal#.symbols
     # this is a Z'=4 30 atom molecule
     self.assertEqual(len(aseCrystal), 4 * 30)
示例#6
0
def main():
    
    try:
#        oldString, oldResFile, templateResFile = sys.argv[1:]
        oldString, resFile = sys.argv[1:]
    except:
        raise Exception(usefulMessage)
#    print simpleRelabelling("+O1_C1_C2_C7 +H1_O1_C1_C2 +C1_C2_C7_N1 +C7_N1_C8_C9 +H6_N1_C8_C9 +F1_C14_C12_C11 +H6_N1_C8 +H1_O1_C1",
    print simpleRelabelling("O1_C1_C2_C7 H1_O1_C1_C2 C1_C2_C7_N1 C7_N1_C8_C9 H6_N1_C8_C9 F1_C14_C12_C11 H6_N1_C8 H1_O1_C1",
                            CrystalReader(resFile)[0])
示例#7
0
    def _get_crystal(obj):
        """
        Convert a obj's writable by MoleculeWriter to a crystal

        :param `ccdc.molecule.Molecule` obj: molecule or protein
        :return: `ccdc.crystal.Crystal`
        """
        tmp = tempfile.mkdtemp()
        f = os.path.join(tmp, "obj.mol2")
        with MoleculeWriter(f) as w:
            w.write(obj)
        return CrystalReader(f)[0]
示例#8
0
    def test_SetCSPStructures(self):
        from cspTracking import SetCSPStructures

        tempSet = SetCSPStructures.initFromCrystalPredictorLog('exampleCP.log')

        self.assertEqual(len(tempSet.structures), 27414)
        self.assertEqual(tempSet.structures[0].crystalPredictorData['spaceGroup'], 'P-1')
        self.assertEqual(tempSet.structures[5].idNumber, 6)

        tempSet.structures[0].createCrystalOptimiserData('1-1/exampleCOSummary.out')
        self.assertEqual(tempSet.structures[0].crystalOptimiserData['rhoFinal'],
                         1.4749)

        #remove the data and add again with different method
        tempSet.structures[0].crystalOptimiserData = {}
        self.assertEqual(len(tempSet.structures[0].crystalOptimiserData.keys()), 0)
#        tempSet.addCrystalOptimiserInfoFromGlobString('1-1/*ummary.out')
        tempSet.addInfoFromGlobString('1-1/*ummary.out')
        self.assertEqual(tempSet.structures[0].crystalOptimiserData['rhoFinal'],
                         1.4749)
        self.assertTrue(tempSet.structures[0].crystalOptimiserComplete)

        # get free energy data from Jonas's log files
        tempSet.addFreeEnergyData(lambda x: 'free_energy.log')
        self.assertEqual(tempSet.structures[-1].getAttribute({'freeEnergyData': 'ZPE'}),
                         2.7239)

        
        #get a subset of data, and assert that it has a positive density
        from cspTracking import Attribute
        self.assertEqual(len(tempSet.subsetStructures([Attribute(pointer='crystalOptimiserComplete',
                                                                 value  = True)])),
                         len([x for x in tempSet.structures if x.crystalOptimiserComplete]))
        self.assertEqual(len(tempSet.subsetStructures([Attribute(pointer   = {'crystalOptimiserData': 'rhoFinal'},
                                                                 minValue  = 0.)])),
                         1)
        self.assertEqual(len(tempSet.subsetStructures([Attribute(pointer   = {'crystalOptimiserData': 'rhoFinal'},
                                                                 maxValue  = 0.)])),
                         0)
        self.assertEqual(len(tempSet.subsetStructures([Attribute(pointer   = {'crystalOptimiserData': 'rhoFinal'},
                                                                 minValue  = 0.1,
                                                                 maxValue  = 0.4)])),
                         0)
#        tempSet.addCrystalOptimiserInfoFromGlobString('2-1/*ummary.out')
        tempSet.addInfoFromGlobString('2-1/*ummary.out')
        self.assertEqual(len(tempSet.subsetStructures([Attribute(pointer   = {'crystalOptimiserData': 'rhoFinal'},
                                                                 minValue  = 1.1,
                                                                 maxValue  = 2.4)])),
                         2)
        #lowest one for rho has idNumber 2
        self.assertEqual([x.getAttribute('idNumber') for x in
                          tempSet.subsetStructures([Attribute(pointer   = {'crystalOptimiserData': 'rhoFinal'},
                                                              minValue  = 1.1,
                                                              maxValue  = 2.4)],
                                                   sortAttribute=Attribute(pointer   = {'crystalOptimiserData': 'rhoFinal'}))][0],
                         2)
                         
        #turn data into a pandas DataFrame
        dataTable = tempSet.toPandasDataFrame()
        self.assertEqual(dataTable['SG'][0], 'P-1')

        #test matching        
        from ccdc.io import CrystalReader
        from cspTracking import CSPStructure

        matchingSet = SetCSPStructures([CSPStructure(1, initCrystalOptimiserData='1-1/exampleCOSummary.out')])
        #match to itself just as an example
        self.assertTrue(matchingSet.structures[0].crystalOptimiserData['finalStructureFileExists'])
        matchDict = matchingSet.matchToStructureList([CrystalReader('1-1/lowest.res')[0]],
                                                     stage='crystalOptimiserData')
        np.testing.assert_array_equal(matchDict['matchesMatrix'],
                                      np.array([[15]]))
                                                 
        self.assertEqual(matchDict['candidateList'][0].idNumber,
                         1)

        #do same match but add RMSD data
        matchDict = matchingSet.matchToStructureList([CrystalReader('1-1/lowest.res')[0]],
                                                     returnRMSD=True,
                                                     stage='crystalOptimiserData')
        np.testing.assert_array_equal(matchDict['rmsdMatrix'],
                                      np.array([[0.]]))
        
        #test packing coefficient (what method does CCDC use??)
        self.assertAlmostEqual(matchingSet.structures[0].packingCoefficient(stage='crystalOptimiserData'),
                               0.69370122631)
        
        #get minimum energy of list
        self.assertEqual(matchingSet.minValueInSet(Attribute(pointer = {'crystalOptimiserData': 'eTotalFinal'})),
                         -133.44284)

        #test writing a CIF - tear down data at the end
        nameOfCIF = 'tempCIFWrite.cif'
        self.assertFalse(os.path.isfile(nameOfCIF))

        matchingSet.calculation = 'crystalOptimiser' # this should be known before matching really
        matchingSet.makeCIF(nameOfCIF)
        self.assertTrue(os.path.isfile(nameOfCIF))
        os.remove(nameOfCIF)        

        #initiate via CSV then match
        csvStructures = SetCSPStructures.initFromCSV('example.csv',
                                                     idNumberIndex     = 0,
                                                     finalEnergyIndex  = 1,
                                                     finalDensityIndex = 3,
                                                     finalStructureDir = os.path.dirname(os.path.realpath(__file__)),
                                                     assertZPrime      = 1.234)
        self.assertEqual(csvStructures.structures[0].idNumber,
                         978)
        self.assertEqual(csvStructures.structures[0].getAttribute({'crystalOptimiserData': 'zPrime'}),
                         1.234)
        self.assertAlmostEqual(csvStructures.structures[0].getAttribute({'crystalOptimiserData': 'eTotalFinal'}),
                               -139.35473)
        self.assertAlmostEqual(csvStructures.structures[0].getAttribute({'crystalOptimiserData': 'rhoFinal'}),
                               1.26410)
        print 'check match'
        
        #use general get and set attributes
        newStructureSet = SetCSPStructures([CSPStructure(i) for i in range(1,5)], calculation = 'crystalOptimiser')
        newStructureSet.structures[2].setAttribute(Attribute(pointer={'crystalOptimiserData': 'rhoFinal'},
                                                             value = 23.4))
        self.assertEqual(newStructureSet.structures[2].getAttribute({'crystalOptimiserData': 'rhoFinal'}),
                         23.4)
        newStructureSet.setGlobalAttributes([Attribute(pointer={'crystalOptimiserData': 'rhoFinal'},
                                                       value=1.0)])
        self.assertEqual(newStructureSet.structures[2].getAttribute({'crystalOptimiserData': 'rhoFinal'}),
                         1.)
        self.assertEqual(newStructureSet.calculation,
                         'crystalOptimiser')
#        print newStructureSet.structures[0].__dict__
        print newStructureSet.statusString()

        #read some PCM data
        newStructureSet.addInfoFromGlobString('1-1/PCM/1-1', calculation = 'pcm')
        self.assertAlmostEqual(newStructureSet.structures[0].pcmData['eInterFinal'],
                               -149.0522)
        self.assertAlmostEqual(newStructureSet.structures[0].pcmData['eIntraHartrees'],
                               -1043.20561970)
        self.assertTrue(newStructureSet.structures[0].pcmData['finalStructureFileExists'])
        
        def tearDown(self):
            print 'tearing down', os.path.isfile('tempCIFWrite.cif')
示例#9
0
def rmsdtime(k, m):

    rpt = 0
    xax = []

    process_time_list = []
    compare_time_list = []
    total_time_list = []

    while rpt <= m:

        file_extract = file_select(k)

        filename_split = [i.split("/") for i in file_extract]
        filename_list = [str(i[len(i) - 1][4:-4]) for i in filename_split]
        print("Files to be compared:")
        print(filename_list)

        a = time.perf_counter()
        crystals = [CrystalReader(c)[0] for c in file_extract]
        #packed_crystals = [c.packing() for c in crystals]
        b = time.perf_counter()

        process_time_list.append(b - a)

        print(f'Processing time (s) for %s molecules:', k)
        print((b - a))
        print(process_time_list)

        c = time.perf_counter()
        for i, j in itertools.combinations_with_replacement(
                range(len(crystals)), 2):

            comp = similarity_engine.compare(crystals[i], crystals[j])
        d = time.perf_counter()

        print(f'Comparison time (s) for %s molecules:', k)
        print((d - c) + (a - b))

        compare_time_list.append(d - c)
        print([compare_time_list])

        total_time_list.append((b - a) + (d - c))
        print([total_time_list])

        rpt += 1
        xax.append(rpt)

    plt.plot(xax, process_time_list, label='Processing Time')
    plt.plot(xax, compare_time_list, label='Comparison Time')
    plt.plot(xax, compare_time_list, label='Total Time')
    plt.xlabel('Run number')
    plt.ylabel('Computation time')
    plt.title(f'RMSD Comparison')
    plt.legend()

    plt.savefig(outputdir + "/RMSD_time_test.png")
    plt.cla()

    print(f'Average rmsd processing time (s) for 5 molecules:')
    print(sum(process_time_list) / len(process_time_list))
    print(f'Average rmsed pairwise comparison time for 5 molecules')
    print(sum(compare_time_list) / len(compare_time_list))
    print(f'Total rmsd time for %s molecules', k)
    print(sum(total_time_list) / len(total_time_list))

    return [
        sum(process_time_list) / len(process_time_list),
        sum(compare_time_list) / len(compare_time_list),
        sum(total_time_list) / len(total_time_list)
    ]
示例#10
0
def powdertime(k, m):

    rpt = 0
    xax = []

    process_time_list = []
    compare_time_list = []
    total_time_list = []

    while rpt <= m:

        file_extract = file_select(k)

        #filename_split = [i.split("/") for i in file_extract]
        #filename_list = [str(i[len(i)-1][4:-4]) for i in filename_split]

        a = time.perf_counter()
        crystals = [CrystalReader(c)[0] for c in file_extract]
        #packed_crystals = [c.packing() for c in crystals]
        powders = [CD.PowderPattern.from_crystal(c) for c in crystals]
        b = time.perf_counter()

        process_time_list.append(b - a)

        print(f'Processing time (s) for 5 molecules')
        print((b - a))

        c = time.perf_counter()
        for i, j in itertools.combinations_with_replacement(
                range(len(crystals)), 2):
            powd = powders[i].similarity(powders[j])
        d = time.perf_counter()

        compare_time_list.append(d - c)
        total_time_list.append((b - a) + (d - c))

        print(f'Compariosn time (s) for 5 molecules:')

        rpt += 1
        xax.append(rpt)

    plt.plot(xax, process_time_list, label='Processing Time')
    plt.plot(xax, compare_time_list, label='Comparison Time')
    plt.plot(xax, total_time_list, label='Total Time')
    plt.xlabel('Run number')
    plt.ylabel('Computation time')
    plt.title('Powder Comparison')
    plt.legend()

    plt.savefig(outputdir + "/powder_time_test.png")
    plt.cla()

    print(f'Average powder processing time (s) for %s molecules:', k)
    print(sum(process_time_list) / len(process_time_list))
    print(f'Average powder pairwise comparison time for %s molecules', k)
    print(sum(compare_time_list) / len(compare_time_list))
    print(f'Total powder time for %s molecules', k)
    print(sum(total_time_list) / len(total_time_list))

    return [
        sum(process_time_list) / len(process_time_list),
        sum(compare_time_list) / len(compare_time_list),
        sum(total_time_list) / len(total_time_list)
    ]
        try:
            return compareResult.nmatched_molecules
        except:
            return None


# Set up comparison object
ps = PackingSimilarity()
allowMolecularDifferences = True
clusterSize = 15
ps.settings.allow_molecular_differences = allowMolecularDifferences
ps.settings.packing_shell_size = clusterSize

# define crystals
try:
    crystal1 = CrystalReader(sys.argv[1], format='res')[0]
except:
    crystal1 = CrystalReader(sys.argv[1], format='cif')[0]
try:
    crystal2 = CrystalReader(sys.argv[2], format='res')[0]
except:
    crystal2 = CrystalReader(sys.argv[2], format='cif')[0]

matchingData = returnNmatched_molecules(ps,
                                        crystal1,
                                        crystal2,
                                        returnRMSD=True)

#print "Matching {1} molecules (out of {2}) with RMSD = {3} Angstroms".format(clusterSize,clusterSize,clusterSize)
print "Allowing molecular differences? ", allowMolecularDifferences
print "Matching %s molecules (out of %s) with RMSD = %s Angstroms" % (
link_list = []
print(st + e)

for file_path in files[st:e]:
    file_path_split = file_path.split("/")
    file_name = file_path_split[len(file_path_split) - 1]
    file_list.append(file_name)
    doc = cif.read(file_path)
    block = doc[0]

    for b in doc:
        if (b.find_loop('_atom_site_') != None):
            block = b

    print("Processed file --> " + file_name)
    crystal_reader = CrystalReader(file_path)
    crystal = crystal_reader[0]
    crystal.assign_bonds()
    packed_molecules = crystal.packing(box_dimensions=((0, 0, 0), (1, 1, 1)),
                                       inclusion='CentroidIncluded')
    packed_molecules.normalise_labels()

    adta_molecules = []
    cent = []
    cent_points = []
    for comp in packed_molecules.components:
        if (len(comp.atoms) > 1):
            adta_molecules.append(comp)
            cent.append(MD.atom_centroid(*list(a for a in comp.atoms)))

    for c in cent:
示例#13
0
from ase.build import niggli_reduce
from ase import Atoms
from ase.io import write as aseWrite
from ioAndInterfaces import ccdcCrystalToASE
from ccdc.io import CrystalReader

#example input and outputs to test this
inputRes, outputRes = 'testingScripts/new.res', 'temp070917.res'

myCell = CrystalReader(inputRes)[0]
myASECell = ccdcCrystalToASE(myCell)
myASECell.set_pbc(True)
niggli_reduce(myASECell)

aseWrite(outputRes, myASECell)

print 'issue with this is that crystal optimiser wants labels etc, and probably fussy with res file format'
#could attach labels or something if can be bothered

print 'another issue is that the cell may not have the same convention, so would have to reset rather
       than just changing angles etc'
示例#14
0
                 for pair in MolecularDescriptors.MaximumCommonSubstructure().search(mol1, mol2)[0]])

def main():
    
    try:
#        oldString, oldResFile, templateResFile = sys.argv[1:]
        oldString, resFile = sys.argv[1:]
    except:
        raise Exception(usefulMessage)
#    print simpleRelabelling("+O1_C1_C2_C7 +H1_O1_C1_C2 +C1_C2_C7_N1 +C7_N1_C8_C9 +H6_N1_C8_C9 +F1_C14_C12_C11 +H6_N1_C8 +H1_O1_C1",
    print simpleRelabelling("O1_C1_C2_C7 H1_O1_C1_C2 C1_C2_C7_N1 C7_N1_C8_C9 H6_N1_C8_C9 F1_C14_C12_C11 H6_N1_C8 H1_O1_C1",
                            CrystalReader(resFile)[0])
if __name__ == '__main__':
    main()

    exit()
    print relabelString("+O1_C1_C2_C7 +H1_O1_C1_C2 +C1_C2_C7_N1 +C7_N1_C8_C9 +H6_N1_C8_C9 +F1_C14_C12_C11 +H6_N1_C8 +H1_O1_C1",
                        replacementDict(CrystalReader(templateResFile)[0],
                                        CrystalReader(oldResFile)[0]))
exit()


exit()

# use the above subroutines to achieve the relabelling
# uses ccdc molecule instances
with open(newFilename, 'w') as outf:
    outf.write(reorderedRes(oldResFile, replacementDict(CrystalReader(templateResFile)[0],
                                                        CrystalReader(oldResFile)[0]))
               )
def linkcomp(start, end, pcomp=True, rcomp=True, lktest=False):

    full_list = []
    file_extract = files[start:end]
    filename_split = [i.split("/") for i in file_extract]
    filename_list = [str(i[len(i) - 1][4:-4]) for i in filename_split]

    if lktest:
        lkarray = np.zeros((len(file_extract), len(file_extract)))

    if pcomp:
        powdarray = np.zeros((len(file_extract), len(file_extract)))

    if rcomp:
        rmsdarray = np.zeros((len(file_extract), len(file_extract)))

    for first_comp in file_extract:
        i = file_extract.index(first_comp)
        rest_list = file_extract[i + 1:end]
        new_entry = []
        for second_comp in rest_list:
            j = file_extract.index(second_comp)
            first_comp_split = first_comp.split("/")
            second_comp_split = second_comp.split("/")
            one_crys_read = CrystalReader(first_comp)
            two_crys_read = CrystalReader(second_comp)
            one_crys = one_crys_read[0]
            two_crys = two_crys_read[0]

            powder_sim = CD.PowderPattern.from_crystal(one_crys)
            powder_comp = CD.PowderPattern.from_crystal(two_crys)

            comp = similarity_engine.compare(one_crys, two_crys)
            powd = powder_sim.similarity(powder_comp)

            if lktest:
                try:
                    lkarray[j][i] += (round(
                        abs(
                            max(link_dic[first_file_name]) -
                            max(link_dic[second_file_name])), 3))
                except TypeError:
                    lkarray[j][i] += np.nan

            if pcomp:
                if powd is None:
                    powdarray[j][i] += np.nan
                else:
                    powdarray[j][i] += (round(powd, 3))

            if rcomp:
                if comp is None:
                    rmsdarray[j][i] += np.nan
                else:
                    rmsdarray[j][i] += (round(comp.rmsd, 3))

    if lktest:
        lkframe = pd.DataFrame(lkarray,
                               index=filename_list,
                               columns=filename_list)
        lkframe.to_csv(outputdir + "/link_comparison.csv",
                       index=True,
                       header=True,
                       sep=',')

    if pcomp:
        powdframe = pd.DataFrame(powdarray,
                                 index=filename_list,
                                 columns=filename_list)
        powdframe.to_csv(outputdir + "/powder_comparison.csv",
                         index=True,
                         header=True,
                         sep=',')

    if rcomp:
        powdframe = pd.DataFrame(rmsdarray,
                                 index=filename_list,
                                 columns=filename_list)
        powdframe.to_csv(outputdir + "/rmsd_comparison.csv",
                         index=True,
                         header=True,
                         sep=',')
示例#16
0
def labelIncludingNumberMols(atom1Label, stoichiometry, molIndex):
    element1, number1 = re.split('(\d+)', atom1Label)[:2]
    return element1 + str(
        int(number1) + int(stoichiometry[element1]) * molIndex)


blurb = '''This file contains the Z-matrix definition for each of the molecules
in the asymmetric unit of the crystal to be minimised by CrystalOptimizer.

It is absolutely necessary to contain an 'introductory' line for the Z-matrix
of each molecule followed imidiately (no empty line) by the Z-matrix
definition. The introductory line should be 'Z-matrix for molecule'
followed by the enumeration of teh molecule. The Z-matrix definition
should be done using the atom labels in exactly the same way as they
appear in the .res file.'''

crystal = CrystalReader(sys.argv[1])[0]
stoichiometry = dict([
    re.split('(\d+)', x)[:2]
    for x in crystal.molecule.components[0].formula.split()
])
zMatrixString = open(sys.argv[2], 'r').read()
print blurb  #zMatrixString
for i in xrange(len(crystal.molecule.components)):
    print "\nZ-matrix for molecule %s" % (i + 1)
    for l in zMatrixString.split('\n')[1:]:
        #        print l
        print "    ".join(
            [labelIncludingNumberMols(x, stoichiometry, i) for x in l.split()])
def molcomp(start,
            end,
            pcomp=True,
            rmsd_threshold=2,
            rounding=6,
            rcomp=True,
            molcomp=True):

    filename_split = [i.split("/") for i in file_extract]

    filename_list = [str(i[len(i) - 1][4:-4]) for i in filename_split]

    l = len(file_extract)

    if molcomp:

        molarray = np.zeros((l, l))

    if pcomp:

        powdarray = np.zeros((l, l))

    if rcomp:

        rmsdarray = np.zeros((l, l))

    crystals = [CrystalReader(c)[0] for c in file_extract]

    powders = [CD.PowderPattern.from_crystal(c) for c in crystals]

    for i, j in itertools.combinations_with_replacement(
            range(len(crystals)), 2):

        comp = similarity_engine.compare(crystals[i], crystals[j])

        powd = powders[i].similarity(powders[j])

        if molcomp:

            try:

                molarray[j][i] += (comp.nmatched_molecules)

                molarray[i][j] += (comp.nmatched_molecules)

            except TypeError:

                molarray[j][i] += 99

                molarray[i][j] += 99

        if pcomp:

            if powd is None:

                powdarray[j][i] += 99
                powfarray[i][j] += 99
            else:

                powdarray[j][i] += (round(powd, 6))

                powdarray[i][j] += (round(powd, 6))

        if rcomp:

            if comp is None:

                rmsdarray[j][i] += 99
                rmsdarray[i][j] += 99
            elif comp.nmatched_molecules < rmsd_threshold:
                rmsdarray[j][i] += 88
                rmsdarray[i][j] += 88
            else:

                rmsdarray[j][i] += (round(comp.rmsd, 6))

                rmsdarray[i][j] += (round(comp.rmsd, 6))

    if molcomp:

        molframe = pd.DataFrame(molarray,
                                index=filename_list,
                                columns=filename_list)

        molframe.replace(99, 'nan')
        molframe.to_csv(outputdir + "/rmsd_molecules_matched.csv",
                        index=True,
                        header=True,
                        sep=',')

    if pcomp:

        powdframe = pd.DataFrame(powdarray,
                                 index=filename_list,
                                 columns=filename_list)

        powdframe.to_csv(outputdir + "/powder_comparison.csv",
                         index=True,
                         header=True,
                         sep=',')

    if rcomp:

        rmsdframe = pd.DataFrame(rmsdarray,
                                 index=filename_list,
                                 columns=filename_list)

        rmsdframe.to_csv(outputdir + "/rmsd_comparison.csv",
                         index=True,
                         header=True,
                         sep=',')