Exemplo n.º 1
0
    def expand_minmax(self):
        sfile = open(self.destfor, 'r')
        dfile = open(self.destcmp, 'w')

        compare_format = Keyword(
            KEYWORDS['compare']) + '(' + Regex(r'[^\s\(\)]*')('op') + ')'
        case_var_format = Word(alphas + "_", alphanums + "_" + ".")('var')
        case_format = Keyword(KEYWORDS['case']) + case_var_format + ":"

        for line in sfile:
            if KEYWORDS['compare'] in line:
                res = compare_format.parseString(line)
                op = res.op
                varlist = OrderedDict()
                while True:
                    l = sfile.readline()
                    if KEYWORDS['endcompare'] in l:
                        break
                    elif KEYWORDS['case'] in l:
                        res = case_format.parseString(l)
                        var = res.var
                        varlist[var] = ''
                        lcase = sfile.readline()
                        content = ''
                        while KEYWORDS['endcase'] not in lcase:
                            content += lcase
                            lcase = sfile.readline()
                        varlist[var] = content
                self.roll_out_compare(varlist, op, dfile)
            else:
                dfile.write(line)

        sfile.close()
        dfile.close()
Exemplo n.º 2
0
    def expand_sync(self):
        sfile = open(self.destbool, 'r')
        dfile = open(self.dest, 'w')

        sync_format = Keyword(KEYWORDS['sync']) + '(' + Regex(r'[_a-zA-Z]*')(
            "header_name") + "." + Regex(r'[^\s\(\),]*')("field") + "," + Word(
                nums)("val") + ')'
        mirror_format = Keyword(KEYWORDS['mirror']) + '(' + Regex(
            r'[_a-zA-Z]*')("header_name") + "." + Regex(r'[^\s\(\),]*')(
                "field") + "," + Word(nums)("val") + ')'

        active_sync, active_mirror = False, False
        fields, field_name, val = [], None, None

        for line in sfile:
            if KEYWORDS['sync'] in line:
                res = sync_format.parseString(line)
                field_name, val = res.header_name + '.' + res.field, res.val
                active_sync = True
                self.sync_id += 1

            elif KEYWORDS['mirror'] in line:
                res = mirror_format.parseString(line)
                field_name, val = res.header_name + '.' + res.field, res.val
                active_mirror = True
                self.mirror_id += 1

            elif KEYWORDS['endsync'] in line:
                active_sync = False
                self.write_sync_action(fields, self.sync_id, field_name, val)
                fields = []
                indent = line[:-len(line.lstrip())]
                dfile.write(APPLY_SYNC_STRING % (indent, self.sync_id))

            elif KEYWORDS['endmirror'] in line:
                active_mirror = False
                self.write_mirror_action(fields, self.mirror_id, field_name,
                                         val)
                fields = []
                indent = line[:-len(line.lstrip())]
                dfile.write(APPLY_MIRROR_STRING % (indent, self.mirror_id))

            elif active_sync or active_mirror:
                fields.append(line.strip())

            else:
                dfile.write(line)
        sfile.close()
        dfile.close()
Exemplo n.º 3
0
class DSLParser:
    def __init__(self, n, colors, shapes, max_constant=5):
        """
        :param n: length of side of the grids
        :param colors: list of color names
        :param shapes: list of shape names
        """

        self.colors = Or([Keyword(w) for w in colors])
        self.colors ^= Keyword("getMarkerColor()")
        self.shapes = Or([Keyword(w) for w in shapes])
        self.shapes ^= Keyword("getMarkerShape()")
        self.positions = Or([Keyword(str(i)) for i in range(n)])
        self.constants = Or([Keyword(str(i)) for i in range(1, max_constant)])
        self.actions = (("move(" + self.positions + "," + self.positions + ")")
                        | "moveUp()" | "moveDown()" | "moveLeft()"
                        | "moveRight()" | "moveTop()" | "moveBottom()"
                        | "moveLeftmost()" | "moveRightmost()"
                        | "moveToMovableMarker()" | "pickMarker()"
                        | "putMarker()" | "fixMarker()")

        self.conditions = (Group(self.shapes + "==" + self.shapes)
                           | Group(self.colors + "==" + self.colors)
                           | "markersPresent()" | "movableMarkersPresent()"
                           | "existMovableMarkers()" | "upperBoundary()"
                           | "lowerBoundary()" | "leftBoundary()"
                           | "rightBoundary()" | "true")

        self.conditions = (self.conditions
                           | Group(Keyword("not") + self.conditions))
        block = Forward()
        stmt = (Group(
            Keyword("while") + "(" + self.conditions + ")" + "{" +
            Group(block) + "}") | Group(
                Keyword("repeat") + "(" + self.constants + ")" + "{" +
                Group(block) + "}") | Group(
                    Keyword("if") + "(" + self.conditions + ")" + "{" +
                    Group(block) + "}") | Group(
                        Keyword("ifelse") + "(" + self.conditions + ")" + "{" +
                        Group(block) + "}" + Keyword("else") + "{" +
                        Group(block) + "}") | Group(self.actions + ";"))
        block << OneOrMore(stmt)
        # stmt ^= block
        self.statements = block
        self.program = Keyword("def") + Keyword("run()") + "{" \
                       + Group(self.statements) + "}"

    def parse_string(self, program):
        return self.program.parseString(program, parseAll=True)
Exemplo n.º 4
0
        molecule_name = os.path.basename(elem)
        molecule_name = os.path.splitext(molecule_name)[-2]

        # parse lines for data extraction
        for line in lines:

            # find out units of distance in file
            try:
                res1 = split_bohr.parseString(line)
                if res1[1] == 'Bohr':
                    unit_array.append(res1[1])
            except Exception:
                print('No units given in this line')
            try:
                res2 = split_angstrom.parseString(line)
                if res2[1] == 'Angstrom':
                    unit_array.append(res2[1])

            except Exception:
                print('No units given in this line')

            try:
                parsed_lines = clt_parser.parseString(line)
                list_conversion = list(parsed_lines)
                df = df.append(
                    {
                        'label': list_conversion[0][0],
                        'atomic_number': float(list_conversion[1]),
                        'x': float(list_conversion[2]),
                        'y': float(list_conversion[3]),
Exemplo n.º 5
0
def extract_moments(file_directory):

    # list to host directory for each unique .mom file
    mom_files = []
    # search through entire file hierarchy to find all .mom files to parse
    for root, dirs, files in os.walk(file_directory):
        for i in files:
            # select only .mom files and add them to list
            if os.path.splitext(os.path.basename(root + '/' + i))[1] == '.mom':
                mom_files.append(root + '/' + i)

    # define parsing grammar
    # find the df type from .mom file
    df_parser = Keyword("! Based on DF-type :") + Word(alphas)

    # parse floating point numbers
    float_parser = Combine(Optional('-') + Word(nums) + '.' + Word(nums))
    mom_parser = OneOrMore(float_parser)

    # parse a line common to all .mom files with the following structure: ATOM  X   Y   Z   Type <ATOM-TYPE>   Rank K
    atom_line = Word(alphas + nums) + mom_parser + OneOrMore(Word(alphas)) + Word(nums)

    # lists for storing information
    error_array, df_array, coords, atom = ([],)*4

    # json array to compile each atom multipole moment info
    json_result = {'moments': []}

    # empty dataframe to host atom information
    df = pd.DataFrame(data={'atom': [], 'type': [], 'rank': []})

    # dictionary to store sorted moment values
    atom_mom = {}

    # using pyparsing's 'search string' method
    for elem in mom_files:

        # open file
        file_object = open(elem, 'r')
        lines = file_object.readlines()

        mom_name = os.path.basename(elem)
        mom_name = os.path.splitext(mom_name)[-2]

        # parse lines for data extraction
        for line in lines:

            # get df type first
            try:
                res1 = df_parser.parseString(line)
                df_array.append(res1[1])
            except Exception:
                print('No DF-Type is specified in file')
                error_array.append('No DF-Type is specified in file')

            # get information about each atom
            try:
                res2 = atom_line.parseString(line)
                atom.append(res2[0])
                type = res2[5]
                rank = (res2[7])
                df = df.append({'atom': atom[len(atom) - 1], 'type': type, 'rank': rank}, ignore_index=True)

            except Exception:
                print('This is not a atom type line')

            # get moment values
            try:
                res3 = mom_parser.parseString(line)
                for i in res3:
                    coords.append(i)
            except Exception:
                print('This is not a moment value')

        coords_float = []

        for val in coords:
            coords_float.append(np.float(val))

        # assign the correct values from coords to the right moment value
        for i in range(len(atom)):

            # get atom information (+1 is so that Q0 is also counted as well as Q4: Q0, Q1, Q2, Q3, Q4)
            name = df.iloc[i]['atom']
            r = int(df.iloc[i]['rank']) + 1

            array, tot, cum_sum = ([],)*3

            # find correct number of coordinates to fill each moment configuration with
            for ii in range(r):
                s = 2 * ii + 1
                tot.append(s)
                cum_sum.append(np.cumsum(tot).tolist())
                cum_sum = cum_sum[len(cum_sum) - 1]

            total = np.sum(tot)

            # value in tot is the number of values stored in each Q layer
            for value in tot:
                temporary_array = []

                # k in range() ensures the correct number of moments is filled in each layer
                for k in range(value):
                    temporary_array.append(coords_float[k])

                array.append(temporary_array)

                for kkk in temporary_array:
                    coords_float.pop(coords_float.index(kkk))

            # fill a dictionary with atoms as keys and moments as values
            atom_mom[atom[i]] = array

        # need to loop over elements in dataframe to add each value for every atom in atom_mom, here we need to do
        # the JSON method too and probably wipe the contents of the dataframe at the beginning of each new loop
        for idx, kk in enumerate(atom_mom):

            # need to reshape lists, first, find max value list can be
            max_len = max(len(i) for i in atom_mom[kk])

            # pad short lists with NaN
            for col in atom_mom[kk]:
                col.extend((max_len - len(col)) * [np.nan])

            # convert to array
            arr = np.asarray(atom_mom[kk]).T

            # create indices depending on rank
            indices = []
            for ind in range(1, len(atom_mom[kk])):
                one = f'{ind}s'
                two = f'{ind}c'
                indices.append(one)
                indices.append(two)
            indices.insert(0, '0')

            # create df depending on rank also
            df_mom = pd.DataFrame(
                arr,
                columns=[f'Q{i}' for i in range(0, len(atom_mom[kk]))],
                index=indices
            )

            moments_string = df_mom.to_string().splitlines()

            mom_data = {

                'atom': atom[idx],
                'scheme': mom_name,
                'type': df['type'][idx],
                'rank': df['rank'][idx],
                'moments': moments_string,
                'file': elem

            }
            json_result['moments'].append({kk: mom_data})

        # save this information to a json file called mom_test.json
        # with open("mom_test.json", "w") as mom_json:
        #     json.dump(json_result, mom_json, indent=4)

    return json_result
Exemplo n.º 6
0
def extract_coordinates(file_directory):
    # list molecule files: below is an example for arg:
    clt_files = glob.glob(file_directory + '*.clt')
    xyz_files = glob.glob(file_directory + '*.xyz')

    # empty dict to host df's
    molecules_from_files = {}

    # definitions of all the parsing tools used
    clt_parser = Word(alphas + nums) + Word(nums + '.' + nums) + Word(printables + '.' + printables) + \
                 Word(printables + '.' + printables) + Word(printables + '.' + printables)

    xyz_parser = Word(alphas) + Word(printables + '.' + printables) + Word(printables + '.' + printables) + \
                 Word(printables + '.' + printables)

    bohr = Keyword('Bohr')
    angstrom = Keyword('Angstrom')
    word = ~bohr + Word(alphas)
    sentence = OneOrMore(word)
    split_bohr = sentence('unit') + bohr + sentence('degree')
    split_angstrom = Keyword('Units') + angstrom
    unit_array = []
    error_array = []

    # empty dataframe to host data
    df = pd.DataFrame(data={'label': [], 'atomic_number': [], 'x': [], 'y': [], 'z': []})

    if len(clt_files) or len(xyz_files) or (len(clt_files) + len(xyz_files)) > 1:

        # extract information from .clt files
        for elem in clt_files:

            # open file
            file_object = open(elem, 'r')
            lines = file_object.readlines()

            molecule_name = os.path.basename(elem)
            molecule_name = os.path.splitext(molecule_name)[-2]

            # parse lines for data extraction
            for line in lines:

                # find out units of distance in file
                try:
                    res1 = split_bohr.parseString(line)
                    if res1[1] == 'Bohr':
                        unit_array.append(res1[1])
                except Exception:
                    print('invalid unit line')
                try:
                    res2 = split_angstrom.parseString(line)
                    if res2[1] == 'Angstrom':
                        unit_array.append(res2[1])

                except Exception:
                    print('not valid data line')

                try:
                    parsed_lines = clt_parser.parseString(line)
                    list_conversion = list(parsed_lines)
                    df = df.append(
                        {'label': list_conversion[0][0], 'atomic_number': float(list_conversion[1]),
                         'x': float(list_conversion[2]), 'y': float(list_conversion[3]),
                         'z': float(list_conversion[4])}, ignore_index=True)
                    print('valid data line')
                except Exception:
                    print('not valid data line')

            # perform necessary unit conversions
            if len(unit_array) == 1:
                if unit_array[0] == 'Angstrom':
                    print('Units are Angstrom')
                elif unit_array[0] == 'Bohr':
                    df['x'] = df['x'] / 1.89
                    df['y'] = df['y'] / 1.89
                    df['z'] = df['z'] / 1.89
            # ambiguous case, just have to assume it's bohr but add line in summary file explaining ambiguity
            elif len(unit_array) == 2:
                print('ambiguous units')
                unit_array.clear()
                unit_array.append('Unknown')
                error_array.append('Ambiguous unit in file.')

            molecules_from_files[str(molecule_name)] = df

            if len(clt_files) > 1:
                print('there are too many clt files, ambiguous')
                error_array.append('There are too many .clt files, see file origin above for file used.')

        # extract information from .xyz files:
        for elem in xyz_files:

            # empty dataframe to host data
            df_2 = pd.DataFrame(data={'label': [], 'atomic_number': [], 'x': [], 'y': [], 'z': []})

            # open file
            file_object = open(elem, 'r')
            lines = file_object.readlines()

            molecule_name = os.path.basename(elem)
            molecule_name = os.path.splitext(molecule_name)[-2]

            # parse lines for data extraction
            for line in lines:

                try:
                    parsed_lines = xyz_parser.parseString(line)
                    list_conversion = list(parsed_lines)
                    atomic_number_df = atom_database.database[atom_database.database['symbol'] == list_conversion[0]]
                    atomic_number = atomic_number_df.iloc[0][0]
                    df_2 = df_2.append(
                        {'label': list_conversion[0], 'atomic_number': atomic_number,
                         'x': float(list_conversion[1]), 'y': float(list_conversion[2]),
                         'z': float(list_conversion[3])}, ignore_index=True)
                    print('valid data line')
                except Exception:
                    print('not valid data line')
            molecules_from_files[str(molecule_name)] = df_2

        if len(error_array) == 0:
            error_array.append('No errors')

        # create a json object that will be saved to a summary file
        for key, value in molecules_from_files.items():
            coord_string = value.to_string().splitlines()

            coord_data = {

                'coordinates': {

                    'file': file_directory,
                    'data frame': df.to_json(),
                    'errors': error_array,
                    'units': unit_array[0],
                    'coordinates': coord_string

                }

            }

            # save information to json file called coord_test.json
            # with open("coord_test.json", "w") as coord_json:
            #     json.dump(coord_data, coord_json, indent=4)

    else:

        # extract information from .clt files
        for elem in clt_files:

            # open file
            file_object = open(elem, 'r')
            lines = file_object.readlines()

            molecule_name = os.path.basename(elem)
            molecule_name = os.path.splitext(molecule_name)[-2]

            # parse lines for data extraction
            for line in lines:

                # find out units of distance in file
                try:
                    res1 = split_bohr.parseString(line)
                    if res1[1] == 'Bohr':
                        unit_array.append(res1[1])
                except Exception:
                    print('invalid unit line')
                try:
                    res2 = split_angstrom.parseString(line)
                    if res2[1] == 'Angstrom':
                        unit_array.append(res2[1])

                except Exception:
                    print('not valid data line')

                try:
                    parsed_lines = clt_parser.parseString(line)
                    list_conversion = list(parsed_lines)
                    df = df.append(
                        {'label': list_conversion[0][0], 'atomic_number': float(list_conversion[1]),
                         'x': float(list_conversion[2]), 'y': float(list_conversion[3]),
                         'z': float(list_conversion[4])}, ignore_index=True)
                    print('valid data line')
                except Exception:
                    print('not valid data line')

            # perform necessary unit conversions
            if len(unit_array) == 1:
                if unit_array[0] == 'Angstrom':
                    print('Units are Angstrom')
                elif unit_array[0] == 'Bohr':
                    df['x'] = df['x'] / 1.89
                    df['y'] = df['y'] / 1.89
                    df['z'] = df['z'] / 1.89
            # ambiguous case, just have to assume it's bohr but add line in summary file explaining ambiguity
            elif len(unit_array) == 2:
                print('ambiguous units')
                unit_array.clear()
                unit_array.append('Unknown')
                error_array.append('Ambiguous unit in file.')

        # extract information from .xyz files:
        for elem in xyz_files:

            # empty dataframe to host data
            df_2 = pd.DataFrame(data={'label': [], 'atomic_number': [], 'x': [], 'y': [], 'z': []})

            # open file
            file_object = open(elem, 'r')
            lines = file_object.readlines()

            molecule_name = os.path.basename(elem)
            molecule_name = os.path.splitext(molecule_name)[-2]

            # parse lines for data extraction
            for line in lines:

                try:
                    parsed_lines = xyz_parser.parseString(line)
                    list_conversion = list(parsed_lines)
                    atomic_number_df = atom_database.database[atom_database.database['symbol'] == list_conversion[0]]
                    atomic_number = atomic_number_df.iloc[0][0]
                    df_2 = df_2.append(
                        {'label': list_conversion[0], 'atomic_number': atomic_number,
                         'x': float(list_conversion[1]), 'y': float(list_conversion[2]),
                         'z': float(list_conversion[3])}, ignore_index=True)
                    print('valid data line')
                except Exception:
                    print('not valid data line')

        if len(error_array) == 0:
            error_array.append('No errors')

        # create a json object that will be saved to a summary file

    coord_string = df.to_string().splitlines()

    coord_data = {

        'coordinates': {

            'file': file_directory,
            'data frame': df.to_json(),
            'errors': error_array,
            'units': unit_array[0],
            'coordinates': coord_string

        }

    }

    # with open("coord_test.json", "w") as coord_json:
    #     json.dump(coord_data, coord_json, indent=4)

    return coord_data
Exemplo n.º 7
0
# using pyparsing's 'search string' method
for elem in mom_files:

    # open file
    file_object = open(elem, 'r')
    lines = file_object.readlines()

    mom_name = os.path.basename(elem)
    mom_name = os.path.splitext(mom_name)[-2]

    # parse lines for data extraction
    for line in lines:

        # get df type first
        try:
            res1 = df_parser.parseString(line)
            df_array.append(res1[1])
        except Exception:
            print('No DF-Type is specified in file')
            error_array.append('No DF-Type is specified in file')

        # get information about each atom
        try:
            res2 = atom_line.parseString(line)
            atom.append(res2[0])
            type = res2[5]
            rank = (res2[7])
            df = df.append({'atom': atom[len(atom) - 1], 'type': type, 'rank': rank}, ignore_index=True)

        except Exception:
            print('This is not a atom type line')
Exemplo n.º 8
0
    molecule_name = os.path.basename(elem)
    molecule_name = os.path.splitext(molecule_name)[-2]

    # parse lines for data extraction
    for line in lines:

        # find out units of distance in file
        try:
            res1 = split_bohr.parseString(line)
            if res1[1] == 'Bohr':
                unit_array.append(res1[1])
        except Exception:
            print('invalid unit line')
        try:
            res2 = split_angrstom.parseString(line)
            if res2[1] == 'Angstrom':
                unit_array.append(res2[1])

        except Exception:
            print('not valid data line')

        try:
            parsed_lines = clt_parser.parseString(line)
            list_conversion = list(parsed_lines)
            df = df.append(
                {
                    'label': list_conversion[0][0],
                    'atomic_number': float(list_conversion[1]),
                    'x': float(list_conversion[2]),
                    'y': float(list_conversion[3]),