示例#1
0
    def _position_values(self):
        csv_data = CsvObject(self.csv_path, [str, float, float, float])

        if csv_data.row_length != 4:
            msg = f"Csv file should contain phenotype, coefficient, lower bound, upper bound yet found" \
                  f" {csv_data.row_length} rows"
            raise IndexError(msg)

        # Normalise the values for the table plot with 0 added so we know where to draw the axis
        numerical_values = flatten([row[1:] for row in csv_data.row_data])
        normalised_value_list = normalisation_min_max(numerical_values +
                                                      [self.axis_target])

        # Isolate the axis and normal array, then chunk the normal array back into the coefficient, lower bound and
        # upper bound
        x_axis_point = normalised_value_list[-1]
        normal_array = chunk_list(normalised_value_list[:-1], 3)

        # Format the rows so we have actual - positional values for each numeric
        formatted_rows = []
        for row, normalised in zip(csv_data.row_data, normal_array):
            formatted_rows.append(
                flatten([[row[0]]] + [[row[i + 1], normalised[i]]
                                      for i in range(3)]))
        return formatted_rows, x_axis_point
示例#2
0
def access_weighted(data_extraction_set, data_requested):
    """
    This will use a set of keys to extract data from a weightGIS json database so that i can be written out or used in
    applications or processes.

    Note
    -----
    This requires the json dict to follow the weightGIS v0.9 or above json format of the following for each entry:

    gid__place: {attribute1: {date1:value1 ... dateN:valueN} ... attributeN: {date1:value1 ... dateN:valueN}

    :param data_extraction_set: The loaded json database
    :type data_extraction_set: dict

    :param data_requested: The attributes you would like to extract from each place
    :type data_requested: list

    """
    all_dates = []
    for place, attributes in zip(data_extraction_set.keys(),
                                 data_extraction_set.values()):
        place_dates = []

        # Isolate each elements keys, which should be the dates, as long as the data exists and is a dict
        for data in data_requested:
            if data in attributes.keys() and isinstance(
                    attributes[data], dict):
                place_dates.append(list(attributes[data].keys()))

        all_dates.append(flatten(place_dates))

    common_dates = sorted(list(set(flatten(all_dates))))

    row_data = []
    for place, d_attr in zip(data_extraction_set.keys(),
                             data_extraction_set.values()):

        # The place name will be a gid__place which we want to extract here
        place_names = place.split("__")
        gid, place = place_names[0], "_".join(
            [p for i, p in enumerate(place_names) if i != 0])

        # Then for each date we setup our first two columns of place-date
        for date in common_dates:
            date_values = [gid, place, date]

            # And for each attribute requested, we extract the value if the date is valid, else NA
            for attr in data_requested:
                if (attr in d_attr.keys()) and (isinstance(d_attr[attr], dict)
                                                and
                                                (date in d_attr[attr].keys())):
                    date_values.append(d_attr[attr][date])
                else:
                    date_values.append("NA")
            row_data.append(date_values)

    print(f"Retrieved Weighted Data {terminal_time()}")
    return row_data
示例#3
0
    def construct_lookup(self, write_directory, write_name):
        """
        This will construct a geo-relation csv from a base shapefile relative to a list of other shapefiles based on
        intersection of geometry. For this to work your base shapefile must be the lowest level, otherwise you will end
        up with large levels of ambiguity

        :param write_directory: Where to save this csv
        :param write_name: What to call this csv
        :return: Nothing, write file then stop
        :rtype: None
        """
        for i, (place,
                record) in enumerate(zip(self.base.polygons,
                                         self.base.records)):
            if i % 100 == 0:
                print(f"{i}/{len(self.base.records)}")

            # Set the place records via the first index as well as the area for the lowest order shape
            name_base = self._index_record(record, self.base_index, place)

            # Then do the same for all the other shapes that intersect with this shape
            match_names = [
                self._find_matches(place, match_shape,
                                   indexes) for match_shape, indexes in zip(
                                       self.others, self.other_indexes)
            ]

            self._place_data.append(flatten([name_base] + match_names))

        write_csv(write_directory, write_name, self.headers, self._place_data)
        print(f"Constructed GeoRelations {terminal_time()}")
示例#4
0
    def _sub_unit_weight(self, sub_unit_file, within_search_polygon, weight_index):
        """
        This configures the input file and then searches for under-lying polygons within the current
        within_search_polygon. Returns the weight associated with this polygon and the weight set of polygons

        :param sub_unit_file: The External ShapeObject to read in and check or the polygons found within this external
            ShapeObject in the form of a list
        :type sub_unit_file: ShapeObject | list

        :param within_search_polygon: The Current polygon to search within
        :type within_search_polygon: Polygon | MultiPolygon

        :param weight_index: For external data, the index position of the weights in the attribute table. Not required
            for the internal.
        :type weight_index: int | None

        :return: The weight associated with this polygon and the weight set of polygons
        :rtype: tuple[float, list[list[float, Polygon]]]
        """
        sub_units, record_list, sub_shape_polygons, current_shape = self._configure_sub_shapes(
            sub_unit_file, within_search_polygon, weight_index)

        weighted_set = [self._select_under(poly, sub_shape_polygons, record_list, sub_units) for poly in current_shape]
        weight = sum([area for area, _ in flatten(weighted_set)])
        return weight, weighted_set
示例#5
0
    def _sub_weight(self, base_shape, area_weight):
        """
        Calculates and returns the sub unit weight for each overlapping shape

        If the overlap percentage isn't 100 then the population weight can be calculated from the under-lapping shapes.
        First all the parts of the under-lapping shapes of the overlapping distinct are isolated to calculate the total
        value of the sub weight value. Then, the parts of this shape that overlap the base_shape we are indexing too
        are isolated. The percentage weight is then just the value of (weight_of_match / weight_of_base) * 100.

        :param base_shape: The current shape in the base shapefile we are indexing too
        :type base_shape: Polygon | MultiPolygon

        :param area_weight: The currently calculated values from the are weights
        :type area_weight: list[list[int, str, float, Polygon | MultiPolygon]]

        :return: A list of lists, where each sub list is composed of the overlap id, name, area weight, and sub unit
            weight
        :rtype: list[list[int, str, float, float]]
        """
        reformatted_weights = []
        for place_key, place_name, overlap_percentage, overlap in area_weight:
            if int(overlap_percentage) != 100:
                weight_of_match, weighted_set = self._sub_unit_weight(self.sub_units, overlap, self._weight_index)
                weight_of_base, weighted_set = self._sub_unit_weight(flatten(weighted_set), base_shape, None)

                sub_unit_weight = (weight_of_base / weight_of_match) * 100
                reformatted_weights.append([place_key, place_name, overlap_percentage, sub_unit_weight])

            else:
                reformatted_weights.append([place_key, place_name, overlap_percentage, 100.0])

        return reformatted_weights
示例#6
0
    def position_extract(self, position, merge_nested=False):
        """
        Sometimes you may wish to extract the most extreme position of a Polygon, such as the left, right, top or bottom
        most point of it. This will extract this point for each shape within the polygon.

        If merge nested is set to be True, then only the most extreme point, of the list of extreme points, will be
        returned

        :param position: left - right - top - bottom
        :param merge_nested: By default all extreme positions of each shape is returned, if True then only the most
            extreme point is returned

        :return: A list of VectorObjects if not set to merge else a VectorObject
        :rtype: list[Vector2D] | Vector2D
        """
        assert position in ("left", "right", "top", "bottom"), "position takes the value of left, right, top or bottom"

        # For each possible shape within our collection of shapes, extract the extreme position
        points_list = []
        if isinstance(self.polygon, GeometryCollection):
            for shape in self.polygon:
                points_list.append(self._extract_extreme_position(position, shape))

        else:
            points_list.append(self._extract_extreme_position(position, self.polygon))

        # Flatten the list, return it if not set to merge otherwise construct another ContourObject and get the extreme
        # from this list of points
        points_list = flatten(points_list)
        if merge_nested:
            return getattr(ContourObject(shapely_points_to_array(points_list)), position)
        else:
            return points_list
示例#7
0
    def _create_center_line(self):

        # Isolate the center boxes for the center line
        center_line = [box for box in self.box_names if box.split("-")[0] == "0"]

        # Select the center line objects and then duplicate and merge them to create the lines
        for line in center_line:
            obj = bpy.data.objects[line]
            obj.select_set(True)

        bpy.ops.object.duplicate_move(OBJECT_OT_duplicate={"linked": False, "mode": 'TRANSLATION'})
        bpy.ops.object.join()

        # Select the temp object, then chunk the y coords of the verts into 8s due to the inset face
        temp_obj = bpy.context.selected_objects[0]
        cords = chunk_list(sorted([v.co[1] for v in temp_obj.data.vertices]), 8)

        # Create the vert list
        vert_list = []
        for i, v in enumerate(cords):
            if i > 0:
                vert_list.append([(0 - (self.line_width / 2), max(cords[i - 1]), -self.line_width),
                                  (0 - (self.line_width / 2), min(v), -self.line_width),
                                  (0 + (self.line_width / 2), min(v), -self.line_width),
                                  (0 + (self.line_width / 2), max(cords[i - 1]), -self.line_width)])

        # Create the center line object, set the origin to the location of the temp object then delete it
        face_list = chunk_list([i for i in range(len(vert_list) * 4)], len(vert_list))
        box_obj, mesh = make_mesh("TestJoin", self.box_colour)
        mesh.from_pydata(flatten(vert_list), [], face_list)
        box_obj.location = temp_obj.location
        bpy.ops.object.delete()
示例#8
0
    def construct_reference(self,
                            base_weights_name="LookupBase.csv",
                            alternative_key="Unique"):
        """
        The construct a reference of every name for every place for every level within the Lookup Base

        :param base_weights_name: Name of the base weights file
        :type base_weights_name: str

        :param alternative_key: Key within files that contains alternative names
        :type alternative_key: str

        :return: Nothing, write out place reference csv then stop
        :rtype: None
        """

        # Load the lookup base
        base_relation = CsvObject(Path(self._working_dir, base_weights_name))

        # Load alternative files
        alt_files = [
            CsvObject(Path(self._working_dir, file), set_columns=True)
            for file in directory_iterator(self._working_dir)
            if alternative_key in file
        ]

        # Order them in the same manner as the headers
        order = [
            index for header in self._headers
            for index, file in enumerate(alt_files) if header in file.file_name
        ]
        alt_files = np.array(alt_files)[order].tolist()

        # Link each row to a unique list to create the reference place look up file
        rows = [
            flatten([[row[0]]] + [
                self._match_row(match, match_file)
                for match, match_file in zip(row[1:], alt_files)
            ]) for row in base_relation.row_data
        ]

        write_csv(self._working_dir, "PlaceReference",
                  ["GID"] + flatten([file.headers for file in alt_files]),
                  rows)
示例#9
0
def main():
    write_directory = r"I:\Work\Figures_and_tables\BIO-HGIS"
    frame_dict = load_json(r"I:\Work\BIO-HGIS\Releases\Json\GBHD.txt")

    attributes = sorted(list(set(flatten([[vv for vv in v.keys()] for v in frame_dict.values()]))))
    attributes = [attr for attr in attributes if attr != 'GID']

    colours = [(0.05, 0.05, 0.05, 1)] + [(0.15 + i / 10, 0.15 + i / 10, 0.15 + i / 10, 1) for i in range(8)] + [
        (1, 1, 1, 1)]
    colours = colours[::-1]

    dates = sorted(list(set(flatten(
        [flatten([v.keys() for k, v in value.items() if k != 'GID']) for value in frame_dict.values()]))))

    obj = bpy.context.scene.objects.get('Districts')
    obj.select_set(True)
    place_dict = {colour.name: colour.node_tree.nodes.get('Emission') for colour in obj.data.materials}

    for attr in attributes:
        print(attr)
        _make_directory(write_directory, attr)

        for d in dates:

            colour_dict, q_values = _create_colour_dict(frame_dict, attr, d, colours)
            if colour_dict:
                bpy.ops.object.select_all(action='DESELECT')

                for i, text in enumerate(q_values, 1):
                    _change_element_colour(f"Q{i}", colours[i - 1])
                    _change_element_colour(f"Q{i}T", colours[i - 1])
                    _change_text(f"Q{i}T", text)

                bpy.ops.object.select_all(action='DESELECT')
                for place, colour in colour_dict.items():
                    place_dict[place].inputs[0].default_value = colour

                bpy.context.scene.render.filepath = str(Path(write_directory, attr, f"{d}.png").absolute())
                bpy.context.scene.eevee.use_gtao = True
                bpy.context.scene.render.film_transparent = True
                bpy.ops.render.render(write_still=True)
示例#10
0
    def __init__(self, args):

        # Load the args from the yaml file
        self.args = load_yaml(args)
        self.write_dir = self.args["output_directory"]

        # Set the gen file info, set the output path for the memory files, and load the file reference
        self.gen_type = self.args["gen_type"]
        self.gen_directory = self.args["path_to_gen_files"]
        self.target_chromosome = self.args["target_chromosome"]
        self.file_name = f"{self.args['output_name']}_Chr{self.target_chromosome}"
        custom_meta_path(validate_path(self.args["memory_file_location"]))

        # Setup logger and system variables
        self.logger = FileOut(self.write_dir, self.file_name, "log", True)
        self.logger.write(f"Setup {terminal_time()}")
        self.iter_size = self.args["array_size"]
        self.start_index = self.args["start_index"]

        # Variable info, load the genetic reference, and sort both it and the external variables so they match on iid
        self.phenotype = self.args["phenotype"]
        self.covariant = self.args["covariant"]
        self.gen, self.df, self.genetic_iid = self._setup_variables()
        self.total_obs = len(self.df)
        self.logger.write(
            f"Set {self.gen.iid_count} in Genetic file and {len(self.df)} in variable file for "
            f"{self.phenotype}~{self.covariant}")

        # Check that we only have a single version of phenotypic columns, if the file contained one of these names this
        # could be why we now have duplicates
        if len(self.df[f"{self.phenotype}RES"].shape) > 1:
            self.logger.write(
                f"Found a duplicated column for phenotypic residuals, removing"
            )
            self.df = self.df.loc[:, ~self.df.columns.duplicated()]

        # Set output file
        self.output = FileOut(validate_path(self.write_dir), self.file_name,
                              "csv")
        headers = [[
            f"M{i}_{h}" for h in [
                "coef", "std_err", "pvalue", "obs", "r2", "chi2tail",
                "95%lower", "95%upper"
            ]
        ] for i in range(1, 5)]
        self.output.write_from_list(["Snp"] + flatten(headers))

        # Start the validation GWAS
        self.residual_gwas()
        self.logger.write(f"Finished predefined {terminal_time()}")
示例#11
0
    def add_element(self, box_text, column, row):
        """
        Add an element to the Prisma Plot

        :param box_text: The text you want to add
        :type box_text: str

        :param column: The column index position you want to use
        :type column: int

        :param row: The row index position you want to use
        :type row: int

        :return: Nothing, append element to plot_dict then stop
        :rtype: None
        """

        # Split the text on line width, then add in custom breaks
        text_list = [
            textwrap.wrap(text, self.line_width)
            for text in box_text.split("\n")
        ]
        text_list = flatten(
            [text if len(text) > 0 else [""] for text in text_list])

        # Create new lines at the end of each element, bar the last element, in the list then add this to the plot dict
        out_text = "".join([
            f"{text}\n" if i != len(text_list) - 1 else text
            for i, text in enumerate(text_list)
        ])
        self.plot_dict[f"{column}-{row}"] = {
            "Text": out_text,
            "Col": column,
            "Row": row
        }

        if self._details_print:
            print(
                f"{out_text}\n{''.join(['-' for _ in range(self.line_width)])}"
            )
示例#12
0
    def pgs_filter_snps(self, write=True):
        """
        Large numbers of snps and individuals can lead to significant memory issues. This will filter the snps in chunks
        vus allowing it to run with less memory
        """

        # Construct the reference panel
        gen_file = self.construct_reference_panel()
        t0 = time.time()

        # Chunk the snps, freqs, and bp positions so we can load raw dosage data in a memory conscious way
        sm_dict = self.sm_dict_from_csv(
            self.summary_directory, f"Cleaned_{self.target_chromosome}.csv")
        snp_list, chunks = self.chunked_snp_names(sm_dict, chunk_return=True)
        bp_positions = np.array_split(
            mc.variant_array(self.bp_position.lower(),
                             sm_dict[self.sm_variants]), chunks)
        freqs = np.array_split(sm_dict[self.freq], chunks)

        # Filter each chunk to clean any snps that may be probabilistic
        accepted_snps = [
            self.filter_snp_chunk(gen_file, snps, f, bp, index, len(snp_list))
            for index, (
                snps, f,
                bp) in enumerate(zip(snp_list, freqs, bp_positions), start=1)
        ]

        mc.filter_array(sm_dict, flatten(accepted_snps), "Filter")
        print(
            f"Found {len(sm_dict[self.sm_variants])} Snps that passed filtering"
        )

        # Return the filter summary dict
        mc.error_dict_to_terminal(self._filter_error_dict, "PGS_Filter_Snps",
                                  t0)
        return self.write_summary_files(sm_dict, write, self.target_chromosome,
                                        "Filtered", self.filter_directory)
示例#13
0
    def _extract_usable_dates(self, attr, date_min, date_max, weight_places,
                              place_name):
        """
        Determine if all required dates are present and return the common dates between places. If the location does not
        contain common dates,  save the location date errors to a separate json

        If we have multiple places, not all places may have the same dates. We cannot create a weighted value from
        multiple places if some of those places are missing. So in these cases, nothing is written to the dataset, and
        a line explaining what was missing is written out to a csv file. If we do have common dates dates for a given
        place, then we return these dates which will be indexed to extract the values associated with them

        :param attr: The current attribute we are weight values for
        :type attr: str

        :param date_min: The start date of this weight
        :type date_min: str | int

        :param date_max: The end date of this weight
        :param date_max: str | int

        :param weight_places: The places that are involved in weighting for this place between date_min and date_max
        :type weight_places: list

        :param place_name: The current name of the place we are constructing weights for
        :type place_name: str

        :return: All the common dates for the weight_places involved in this place
        :rtype: list
        """

        # Isolate all the dates for all the weights places in place_list
        dates_list = flatten([
            list(self.extract_data(place)[attr].keys())
            for place in weight_places
        ])

        # Keep the dates within the time range we are looking for
        dates_list = [
            int(date) for date in dates_list
            if date_min <= int(date) < date_max
        ]

        # Count each occurrence of a date to insure we have the same number in all places
        dates_dict = Counter(dates_list)

        # If we have any non_common dates, we can't use this date for weighting as we won't have data in all of the
        # places involved in the weight
        non_common_dates = [
            date for date in dates_dict
            if dates_dict[date] != len(weight_places)
        ]
        if len(non_common_dates) > 0:
            # Write out this information for users so they can fix their raw data
            self._non_common[place_name][attr] = {
                "Places": weight_places,
                "Target": len(weight_places),
                "Dates": {d: dates_dict[d]
                          for d in non_common_dates}
            }
            print(
                f"Warning: Non Common dates found for {attr} in {weight_places}"
            )

        # Return common dates list
        return sorted(
            [date for date in dates_dict if date not in non_common_dates])
示例#14
0
    def _determine_relations_to_base(self, ambiguous, base_file, base_gid,
                                     base_indexes, level_indexes,
                                     level_shapefiles, year):
        """
        For each level of shapefile use the year to load the shapefile of a given level relevant to the base_file and
        then use this to look for matching relations. Standardise these relations in length, and then return them.

        :param ambiguous: Holder list for ambiguous relations
        :type ambiguous: list

        :param base_file: Base shapefile for the current 'year'
        :type base_file: ShapeObject

        :param base_gid: Index for GID in base shape, defaults to zero in call method
        :type base_gid: int

        :param base_indexes: Indexes for constructing the name from the base shapefile
        :type base_indexes: list[int]

        :param level_indexes: Indexes for constructing names for each level in other shapefiles
        :type level_indexes: list[list[int]]

        :param level_shapefiles: A list levels, where each level contains a list of shapefiles we want to determine
            relations of relative to this 'year'
        :type level_shapefiles: list[list[ShapeObject]]

        :param year: The year we wish to match to our lists of shapefiles to determine which one to load
        :type year: str | int

        :return: A list of standard length relations and the headers for the non base level part of the headers
        :rtype: (list, list)
        """
        relation_list = []
        other_headers = []
        for level, indexes, name in zip(level_shapefiles, level_indexes,
                                        self._level_names):
            # Select the match file for this
            match_file = self._set_match_file(level, year)

            # Set a match unit for each sub unit
            level_relations = [
                self._link_locations(place, rec, match_file, year,
                                     base_indexes, base_gid, indexes,
                                     ambiguous)
                for place, rec in zip(base_file.polygons, base_file.records)
            ]

            # Set the maximum number of rows so we can make a consistent length row
            relation_max = max([len(relation) for relation in level_relations])

            # Append the max - 2 (because of the GID and base name) number of level names to headers
            other_headers.append([name for _ in range(relation_max - 2)])

            # Reformat the level relations to all be of equal length
            reformat_on_length = []
            for relation in level_relations:
                if len(relation) != level_relations:
                    reformat_on_length.append(
                        relation +
                        ["" for _ in range(relation_max - len(relation))])
                else:
                    reformat_on_length.append(relation)

            relation_list.append(reformat_on_length)
        return relation_list, flatten(other_headers)
示例#15
0
    def combine(self, unique_id, data_start, root_directory, write_directory,
                write_name):
        """
        weightGIS expects each file to have a single date, so if you have lots of files of the same date that you wan
        to process at the same time you will need ot combine them

        :param unique_id: The unique id index
        :type unique_id: int

        :param data_start: The index wherein the data starts from
        :type data_start: int

        :param root_directory: The root directory of the csv files
        :type root_directory: Path | str

        :param write_directory: The output directory for the file
        :type write_directory: Path | str

        :param write_name: Name of the combined file
        :type write_name: str

        :return: Nothing, write file then stop
        :rtype: None
        """

        # Create the unique ID's
        unique_id_list = sorted(
            list(
                set(
                    flatten([
                        CsvObject(Path(root_directory, file),
                                  set_columns=True)[unique_id]
                        for file in directory_iterator(root_directory)
                    ]))))

        # For each unique ID
        out_list = []
        for count_i, ids in enumerate(unique_id_list):

            if count_i % 10 == 0:
                print(f"{count_i} / {len(unique_id_list)}")

            # Check each file for a matching row, and then
            ids_list = []
            for index, file in enumerate(directory_iterator(root_directory)):

                # If its the first index, take the full values
                if index == 0:
                    ids_list += self._isolate(root_directory, file, unique_id,
                                              ids)

                # Otherwise only take the values after the data start
                else:
                    ids_list += self._isolate(root_directory, file, unique_id,
                                              ids)[data_start:]

            out_list.append(ids_list)

        headers = []
        for index, file in enumerate(directory_iterator(root_directory)):
            if index == 0:
                headers += CsvObject(Path(root_directory, file)).headers
            else:
                headers += CsvObject(Path(root_directory,
                                          file)).headers[data_start:]

        write_csv(write_directory, write_name, headers, out_list)
示例#16
0
 def _isolate_column(self, index):
     """
     Isolate the non zero indexes from the current column as a list
     """
     return flatten(np.asarray((self.img.image[:, index]).nonzero()))
示例#17
0
 def _isolate_row(self, index):
     """
     Isolate the non zero indexes from the current row as a list
     """
     return flatten(np.asarray((self.img.image[index, :]).nonzero()))
示例#18
0
 def _header_len(self):
     """Target Length for headers"""
     return len(flatten(self._indexes)) + len(self._indexes) * 2
示例#19
0
    def link_places_across_time(self,
                                lowest_level,
                                other_shapefile_levels,
                                record_indexes,
                                base_gid=0):
        """
        This will link to geo-levels together, files must have a numeric component and each sub_unit must be matched
        with a match-unit file with the same numeric component

        Example
        --------
        District and county shapefiles must have the dates within the names and there must be a matching shapefile for
        each district. So if you have 1931 districts you must have a 1931 county. The actual names doesn't matter as
        long as the dates match. Whilst the defaults column indexes for gid, name and type for districts and name for
        county may work, you should check it against the column indexes you have in your download.

        :param lowest_level: A directory of shapefiles that are of the lowest level in terms of geo-levels
        :type lowest_level: str

        :param other_shapefile_levels: A list of lists, where each sub list is a list of shapefiles at a geo-level
        :type other_shapefile_levels: list[str]

        :param record_indexes: Indexes to slice names out of each level, base must be the first
        :type record_indexes: list[list[int]]

        :param base_gid: The gid index in the base shapefile, defaults to zero
        :type base_gid: int

        :return: Nothing, write the relations and ambiguity file is exists to file then stop.
        :rtype: None.
        """

        # Load the shapefiles, determine we have sufficient names
        base_shapefiles, other_shapefiles = self._setup_shapefiles(
            lowest_level, other_shapefile_levels)
        assert len(other_shapefiles) == len(
            self._level_names
        ), f"Not all other shapefiles levels have name provided"

        # Get the name indexes from the list of record_indexes
        base_indexes = record_indexes[0]
        other_level_indexes = record_indexes[1:]

        ambiguous = []
        for base_file in base_shapefiles:
            print(f"\nProcessing {base_file}")

            # Determine the current year for this base unit
            year = re.sub(r"[\D]", "", base_file.file_name)

            # Determine the relations within this base file and set the headers of the output file
            relation_list, headers = self._determine_relations_to_base(
                ambiguous, base_file, base_gid, base_indexes,
                other_level_indexes, other_shapefiles, year)

            # Extract the base names from the first set of relations
            base_shape_names = [relation[:2] for relation in relation_list[0]]

            # Extract the relation names from all relations, then flip them so they are combined
            relation_names = [[relation[2:] for relation in relation_level]
                              for relation_level in relation_list]
            relation_names = flip_list(relation_names)

            # Join the base names and relations two together then write it out
            relation_data = [
                base + flatten(relation)
                for base, relation in zip(base_shape_names, relation_names)
            ]
            write_csv(self._working_dir, f"{year}_relation",
                      ["GID", self._base_name] + headers, relation_data)

        if len(ambiguous) > 0:
            write_csv(self._working_dir, "Ambiguous_Relations", [], ambiguous)
            print(
                f"Please validate the {len(ambiguous)} ambiguous relations before proceeding by creating a file"
                f"called 'SetAmbiguous.csv' where there is now only a single relation for each ambiguous link"
            )
        else:
            print(
                "No problems detected, please move to _write_linked_unique next but set ambiguity to False"
            )