示例#1
0
def test_parse_from_string():
    case_1 = """0 @I5@ INDI
1 NAME First /Last/
1 SEX M
1 BIRT
2 DATE 1 JAN 1900
2 PLAC Kirkland, King, Washington, USA
3 MAP
4 LATI N47.680663
4 LONG W122.234319
"""
    gedcom_parser = Parser()
    gedcom_parser.parse([(a + '\n').encode('utf-8-sig')
                         for a in case_1.splitlines()])
    element_1 = gedcom_parser.get_root_child_elements()[0]
    assert isinstance(element_1, IndividualElement)
    assert element_1.get_tag() == 'INDI'
    assert element_1.get_pointer() == '@I5@'
    element_1_children = element_1.get_child_elements()
    assert len(element_1_children) == 3
    assert element_1_children[0].get_tag() == 'NAME'
    assert element_1_children[1].get_tag() == 'SEX'
    assert element_1_children[2].get_tag() == 'BIRT'

    case_2 = """0 @F28@ FAM
1 HUSB @I80@
1 WIFE @I81@
1 CHIL @I9@
2 _FREL Natural
2 _MREL Natural
1 CHIL @I84@
2 _FREL Natural
2 _MREL Natural
1 CHIL @I85@
2 _FREL Natural
2 _MREL Natural
"""
    gedcom_parser.parse([(a + '\n').encode('utf-8-sig')
                         for a in case_2.splitlines()])
    element_2 = gedcom_parser.get_root_child_elements()[0]
    assert element_2.get_tag() == 'FAM'
    assert element_2.get_pointer() == '@F28@'
    element_2_children = element_2.get_child_elements()
    assert len(element_2_children) == 5
    assert element_2_children[0].get_tag() == 'HUSB'
    assert element_2_children[1].get_tag() == 'WIFE'
    assert element_2_children[2].get_tag() == 'CHIL'
    assert element_2_children[3].get_value() == '@I84@'
示例#2
0
 def setUp(self):
     gedcom_parser = Parser()
     file_path = 'tests/files/Musterstammbaum.ged'
     gedcom_parser.parse_file(file_path)
     self.parser = gedcom_parser
     self.root = gedcom_parser.get_root_element()
     self.child_elements = gedcom_parser.get_root_child_elements()
示例#3
0
    def parse(self, gedcom_file_path: str):
        parser = Parser()
        parser.parse_file(gedcom_file_path)
        root = parser.get_root_child_elements()

        for element in root:
            if isinstance(element, IndividualElement):
                self._parse_individual(element)
            elif isinstance(element, FamilyElement):
                self._parse_family(element)
示例#4
0
    def handle(self, *args, **kwargs):
        filename = kwargs["file name"]

        # validate that the user gave file with extension ged
        if filename.suffix != ".ged":
            raise CommandError("Please specify GEDCOM file, ex: myGedcom.ged")

        # Check that the file is there
        path = Path(
            "mysite/familytree/management/commands/gedcom_files/"
        )  # @@TODO: update to take the whole path (so it doesn't need to be saved in a particular folder)
        path_plus_file = path.joinpath(filename)

        if path_plus_file.is_file():
            gedcom_parser = Parser()
            gedcom_parser.parse_file(path_plus_file)
            root_child_elements = gedcom_parser.get_root_child_elements()

            # Find/add person records
            for element in root_child_elements:
                if isinstance(element, IndividualElement):
                    self.handle_person(element)

            # Find/add family records (after person records exist, so we can look up parents)
            # also save intermediate dictionary: CHIL INDI - family INDI
            for element in root_child_elements:
                if isinstance(element, FamilyElement):
                    self.handle_family(element)

            # now that we've saved all the people and families, populate orig_family on people records
            self.add_person_family_values(self.child_family_dict)

        else:
            raise CommandError(
                "That gedcom file does not exist in the expected directory"
            )

        # gather run results
        run_results = "gedcom_person_records: " + str(self.gedcom_person_records) + "\n"
        run_results += (
            "gedcom_family_records: " + str(self.gedcom_family_records) + "\n"
        )
        run_results += "person_added_count: " + str(self.person_added_count) + "\n"
        run_results += "person_skipped_count: " + str(self.person_skipped_count) + "\n"
        run_results += "family_added_count: " + str(self.family_added_count) + "\n"

        # Display and log them
        self.stdout.write(self.style.SUCCESS("You passed filename: ") + str(filename))
        self.stdout.write(run_results)
        f = open("ImportInfo.txt", "w")
        f.write(run_results)
        f.closed
示例#5
0
def test_parse_file():
    parser = Parser()

    assert len(parser.get_root_child_elements()) == 0

    parser.parse_file('tests/files/Musterstammbaum.ged')

    assert len(parser.get_root_child_elements()) == 34

    individuals_in_root_child_elements = 0
    individuals_in_element_list = 0

    for element in parser.get_root_child_elements():
        if isinstance(element, IndividualElement):
            individuals_in_root_child_elements += 1

    for element in parser.get_element_list():
        if isinstance(element, IndividualElement):
            individuals_in_element_list += 1

    assert individuals_in_root_child_elements == 20
    assert individuals_in_element_list == 20
    def import_gedcom_file(self, gedcom_file_path):
        gedcom_parser = Parser()
        gedcom_parser.parse_file(gedcom_file_path)
        root_child_elements = gedcom_parser.get_root_child_elements()

        # Parse all elements in the GEDCOM file, recording details from
        # individual and family elements.

        families = []
        # Lookup from gedcom individual pointer (e.g. "@I219") to api.Individual.
        individuals = dict()
        for element in root_child_elements:
            if isinstance(element, IndividualElement):
                individuals[element.get_pointer()] = self.parse_indi(element)
            elif isinstance(element, FamilyElement):
                families.append(self.parse_family(element))

        # Note: in order to relations in the DB, we need to commit the
        # Individuals to the DB so they have valid PK's.
        for individual in individuals.values():
            individual.save()

        for (husband, wife, married_date, place, children, note) in families:
            family = Family(
                married_date = married_date,
                married_location = place,
                note = note,
            )
            family.save()
            for partner in filter(lambda k: k != '', [husband, wife]):
                individuals[partner].partner_in_families.add(family)
                individuals[partner].save()
            family.save()

            for child in children:
                if individuals[child].child_in_family != None:
                    raise Exception("Can't handle child {} being a child of two families!".format(child))
                individuals[child].child_in_family = family
                individuals[child].save()

        self.stdout.write(self.style.SUCCESS('Successfully parsed {} individuals {} families'.format(
            len(individuals), len(families))))
示例#7
0
FAM_TABLE = PrettyTable()
INDI_TABLE.field_names = [
    "ID", "Name", "Gender", "Birthday", "Age", "Alive", "Death", "Child",
    "Spouse"
]
FAM_TABLE.field_names = [
    "ID", "Married", "Divorced", "Husband ID", "Husband Name", "Wife ID",
    "Wife Name", "Children"
]

gedcom_parser = Parser()
gedcom_parser.parse_file(file_path, False)  # Disable strict parsing

elements = gedcom_parser.get_element_list()

root_child_elements = gedcom_parser.get_root_child_elements()


def convertGedcomDate(datestring):
    return dt.strptime(datestring, "%d %b %Y")


def processGedcom(file_path):
    """Helper function for reading GEDCOM files when unit testing"""
    gedcom_parser.parse_file(file_path, False)
    elements = gedcom_parser.get_element_list()
    root_child_elements = gedcom_parser.get_root_child_elements()

    for element in root_child_elements:
        if isinstance(element, IndividualElement):
            return element
示例#8
0
class GedcomManipulator:
    def __init__(self, file_path):
        self.gedcom_parser = Parser()
        self.gedcom_parser.parse_file(file_path,
                                      False)  # Disable strict parsing
        self.root_child_elements = self.gedcom_parser.get_root_child_elements()
        self.fullName = []

    @staticmethod
    def first_name(el):
        "Returns the first name of the element (joined)"
        return ''.join(el.get_name()[0])

    def get_full_name(self, el):
        """Returns the full names of all elements in a gedcome file (.ged)"""

        self.fullName = [self.first_name(el)]
        parent = el
        while parent:  # while parent is not an empty list
            parent = self.gedcom_parser.get_parents(parent)
            if parent:
                self.fullName.append(self.first_name(parent[0]))
                parent = parent[0]

        if el.get_name()[1]:
            self.fullName.append(el.get_name()[1])

        return self.fullName

    def write_csv(self, output_file_path):
        pointer = [el.get_pointer() for el in self.root_child_elements[1:2260]]
        wb = Workbook()
        ws = wb.active
        row = 2
        for el in self.root_child_elements[1:]:
            if el.get_tag(
            ) == 'FAM':  # if element tag is "Individual," extract full name.
                for child in el.get_child_elements():
                    element = self.root_child_elements[
                        pointer.index(child.get_value()) + 1]
                    full_name = self.get_full_name(element)
                    if child.get_tag() != 'CHIL':
                        for col, val in enumerate(full_name[::-1], start=1):
                            cell = ws.cell(row=row, column=col + 1)
                            cell.value = val
                            if child.get_tag() == 'HUSB':
                                cell.fill = PatternFill("solid",
                                                        fgColor="66CCFF")
                            elif child.get_tag() == 'WIFE':
                                cell.fill = PatternFill("solid",
                                                        fgColor="FFCCFF")
                        row += 1
                    else:
                        cell = ws.cell(row=row, column=2)
                        cell.value = self.first_name(element)
                        cell.fill = PatternFill("solid", fgColor="00CCCC")
                        row += 1
                row += 1
            else:  # else terminate, i.e. if tag is "family"
                pass
        wb.save(output_file_path)
示例#9
0
def run_checker(file_path):
    results = "Individual Name,Spouse Name,Shared Ancestor, # of Generations Removed from Individual, # of Generations Removed from Spouse<br />"

    # Initialize the parser
    gedcom_parser = Parser()

    # Parse your file
    gedcom_parser.parse_file(file_path, False)

    root_child_elements = gedcom_parser.get_root_child_elements()

    def print_name(person):
        name = person.get_name()
        return name[0] + " " + name[1]

    def get_ancestors(person, level=0):
        parents = gedcom_parser.get_parents(person, "ALL")

        for index, parent in enumerate(parents):
            parents[index] = (parent, level)

        ancestors = []
        ancestors.extend(parents)

        for parent in parents:
            ancestors.extend(get_ancestors(parent[0], level + 1))

        return ancestors

    def are_related(person_one, person_two):
        ancestors_one = get_ancestors(person_one)
        ancestors_two = get_ancestors(person_two)

        # traverse in the 1st list
        for x in ancestors_one:
            # traverse in the 2nd list
            for y in ancestors_two:
                # if one common
                if x[0] == y[0]:
                    return (x[0], x[1], y[1])
        return False

    count = 0

    # Iterate through all root child elements
    for individual in root_child_elements:
        # Is the `element` an actual `IndividualElement`? (Allows usage of extra functions such as `surname_match` and `get_name`.)
        if isinstance(individual, IndividualElement):
            families = gedcom_parser.get_families(
                individual, gedcom.tags.GEDCOM_TAG_FAMILY_SPOUSE)
            for family in families:
                family_members = gedcom_parser.get_family_members(
                    family, members_type=gedcom.tags.GEDCOM_TAG_WIFE)

                if len(family_members) > 0:
                    for spouse in family_members:
                        shared_ancestor_tuple = are_related(individual, spouse)
                        if individual != spouse and shared_ancestor_tuple:
                            results += print_name(
                                individual
                            ) + "," + print_name(spouse) + "," + print_name(
                                shared_ancestor_tuple[0]) + "," + str(
                                    shared_ancestor_tuple[1]) + "," + str(
                                        shared_ancestor_tuple[2]) + "<br />"
                            count += 1

    results += 'Total count: ' + str(count)

    return results
示例#10
0
class GedcomManipulator(object):
    def __init__(self, filename):
        self.filename = filename
        self.gedcom = Parser()
        self.gedcom.parse_file(self.filename)
        self.names = None

    @property
    def namelist(self):
        if self.names is None:
            self.names = []
            root_child_elements = self.gedcom.get_root_child_elements()

            for element in root_child_elements:
                if isinstance(element, IndividualElement):
                    self.names.append(
                        (element.get_pointer(), " ".join(element.get_name())))

        return self.names

    def get_cousins(self, _id, level=2):
        """Find all cousins of given distance."""
        root = self.gedcom[_id]

        assert root is not None

        atlevel = 0
        prevqueue = [
            root,
        ]
        while atlevel < level:
            queue = []
            for person in prevqueue:
                for par in person.parents:
                    queue.append(par)

            prevqueue = queue
            atlevel += 1

        queue = set()
        for person in prevqueue:
            famc = person['FAMC']
            if famc is None:
                continue

            def add_siblings(*families):
                for family in families:
                    for child in family.as_individual().children:
                        if child.as_individual().id != person.id:
                            queue.add(child.as_individual())

            if isinstance(famc, list):
                add_siblings(*famc)
            else:
                add_siblings(famc)

        prevqueue = queue

        while atlevel > 0:
            queue = set()
            for person in prevqueue:
                fams = person['FAMS']
                if fams is None:
                    continue

                def add_children(*families):
                    for family in families:
                        for child in family.as_individual().children:
                            queue.add(child.as_individual())

                if isinstance(fams, list):
                    add_children(*fams)
                else:
                    add_children(fams)

            prevqueue = queue
            atlevel -= 1

        return prevqueue

    def get_ydna(self, _id):
        """Find all people that would/should have the same Y-DNA."""

        root = self.gedcom[_id]

        queue = [
            root,
        ]
        outelements = set()

        while queue:
            cur = queue.pop(0)

            if cur is None:
                continue

            if cur in outelements:
                continue

            fams = cur['FAMS']

            if cur.father:
                queue.append(cur.father)

            if fams is not None:

                def add_children(*families):
                    for family in families:
                        for child in family.as_individual().children:
                            if child.as_individual().is_male:
                                queue.append(child.as_individual())

                if isinstance(fams, list):
                    add_children(*fams)
                else:
                    add_children(fams)

            outelements.add(cur)

        return outelements

    def get_branch(self,
                   _id,
                   siblings=False,
                   descendants=False,
                   ancestors=True):
        root = self.gedcom[_id]

        queue = [
            root,
        ]
        outelements = set()

        while queue:
            # print(len(queue))
            cur = queue.pop(0)

            if cur is None:
                continue

            if cur in outelements:
                continue

            famc = cur['FAMC']
            fams = cur['FAMS']

            if ancestors and famc:
                if famc is not None:
                    outelements.add(famc)

                for par in cur.parents:
                    queue.append(par)

            if siblings and famc:
                if isinstance(famc, list):
                    continue

                fam = famc.as_individual()
                if fam is not None:
                    outelements.add(fam)
                    for child in fam.children:
                        queue.append(child.as_individual())

            if descendants and fams:
                # if cur != root:
                if isinstance(fams, list):
                    for fam in fams:
                        fam = fam.as_individual()
                        if fam is None:
                            continue
                        outelements.add(fams)
                        if fam.husband is not None:
                            outelements.add(fam.husband.as_individual())
                        if fam.wife is not None:
                            outelements.add(fam.wife.as_individual())
                        for child in fam.children:
                            queue.append(child.as_individual())
                elif isinstance(fams, gedcom.Spouse):
                    fam = fams.as_individual()
                    if fam is not None:
                        outelements.add(fams)
                        if fam.husband is not None:
                            outelements.add(fam.husband.as_individual())
                        if fam.wife is not None:
                            outelements.add(fam.wife.as_individual())
                        for child in fam.children:
                            queue.append(child.as_individual())
                elif fams is None:
                    pass
                else:
                    # print(type(fams))
                    pass

            outelements.add(cur)

        output = gedcom.GedcomFile()
        for element in outelements:
            output.add_element(element)
        print(len(outelements))
        return output