Пример #1
0
def prune_trees():
    labels = LanguageTreeLabels.objects.all()
    count = 0
    for t in LanguageTree.objects.order_by('name').all():
        if update_newick(t, labels):
            count += 1
            t.save()
    return count
Пример #2
0
def prune_trees():
    labels = LanguageTreeLabels.objects.all()
    count = 0
    for t in LanguageTree.objects.order_by('name').all():
        if update_newick(t, labels):
            count += 1
            t.save()
    return count
Пример #3
0
def trees_from_societies(request):
    language_trees = []
    for k, v in request.query_params.lists():
        soc_ids = v
        labels = models.LanguageTreeLabels.objects.filter(
            societies__id__in=soc_ids).all()

        global_tree = None
        global_newick = []
        global_isolates = []

    for t in models.LanguageTree.objects\
            .filter(taxa__societies__id__in=soc_ids)\
            .prefetch_related(
                'taxa__languagetreelabelssequence_set__labels',
                'taxa__languagetreelabelssequence_set__society',
            )\
            .distinct():

        if 'global' in t.name:
            global_tree = t
            # TODO ask @Bibiko once the isolates are in the db under global.tree as string: isol1,isol2,isol3,...
            # global_isolates.extend(t.newick_string.split(','))
            global_isolates.extend([
                'alse1251', 'amas1236', 'bana1292', 'calu1239', 'chim1301',
                'chit1248', 'chon1248', 'coah1252', 'coos1249', 'furr1244',
                'gaga1251', 'guai1237', 'guat1253', 'hadz1240', 'high1242',
                'kara1289', 'karo1304', 'klam1254', 'kute1249', 'lara1258',
                'mull1237', 'natc1249', 'nort2938', 'paez1247', 'pume1238',
                'pura1257', 'pure1242', 'sali1253', 'sand1273', 'seri1257',
                'shom1245', 'sius1254', 'sout1439', 'take1257', 'ticu1245',
                'timu1245', 'tiwi1244', 'toll1241', 'trum1247', 'uruu1244',
                'wara1303', 'wash1253', 'yama1264', 'yuch1247', 'zuni1245'
            ])
        else:
            if update_newick(t, labels):
                language_trees.append(t)
                if 'glotto' in t.name:
                    #remove last ; in order to be able to join the trees
                    global_newick.append(t.newick_string[:-1])

    if global_tree:
        langs_in_tree = [str(l.label) for l in labels]
        #add isolates if present in current selection
        [
            global_newick.append('(' + isolate + ':1)')
            for isolate in global_isolates if isolate in langs_in_tree
        ]
        #join all pruned glottolog trees into the global one
        global_tree.newick_string = '(' + ','.join(global_newick) + ');'
        language_trees.append(global_tree)

    return Response(
        serializers.LanguageTreeSerializer(language_trees, many=True).data)
Пример #4
0
def trees_from_societies(request):
    language_trees, labels, soc_ids = [], [], []

    for k, v in request.query_params.lists():
        soc_ids = v
        labels = models.LanguageTreeLabels.objects.filter(societies__id__in=soc_ids).all()

    for t in models.LanguageTree.objects\
            .filter(taxa__societies__id__in=soc_ids)\
            .prefetch_related(
                'taxa__languagetreelabelssequence_set__labels',
                'taxa__languagetreelabelssequence_set__society',
            )\
            .distinct():
        if update_newick(t, labels):
            language_trees.append(t)

    return Response(serializers.LanguageTreeSerializer(language_trees, many=True).data)
Пример #5
0
def trees_from_societies(request):
    language_trees, labels, soc_ids = [], [], []

    for k, v in request.query_params.lists():
        soc_ids = v
        labels = models.LanguageTreeLabels.objects.filter(
            societies__id__in=soc_ids).all()

    for t in models.LanguageTree.objects\
            .filter(taxa__societies__id__in=soc_ids)\
            .prefetch_related(
                'taxa__languagetreelabelssequence_set__labels',
                'taxa__languagetreelabelssequence_set__society',
            )\
            .distinct():
        if update_newick(t, labels):
            language_trees.append(t)

    return Response(
        serializers.LanguageTreeSerializer(language_trees, many=True).data)
Пример #6
0
def result_set_from_query_dict(query_dict):
    from time import time
    _s = time()
    log.info('enter result_set_from_query_dict')

    result_set = serializers.SocietyResultSet()
    sql_joins, sql_where = [], []

    def id_array(l):
        return '(%s)' % ','.join('%s' % int(i) for i in l)

    if 'l' in query_dict:
        sql_joins.append(('language', 'l', 'l.id = s.language_id'))
        sql_where.append('l.id IN ' + id_array(query_dict['l']))
        for lang in models.Language.objects.filter(id__in=query_dict['l']):
            result_set.languages.add(lang)

    if 'c' in query_dict:
        variables = {
            v.id: v for v in models.CulturalVariable.objects
            .filter(id__in=[x['variable'] for x in query_dict['c']])
            .prefetch_related(Prefetch(
                'codes',
                queryset=models.CulturalCodeDescription.objects
                .filter(id__in=[x.get('id') for x in query_dict['c']])))
        }

        for variable, codes in groupby(
            sorted(query_dict['c'], key=lambda c: c['variable']),
            key=lambda x: x['variable']
        ):
            variable = variables[variable]
            codes = list(codes)
            alias = 'cv%s' % variable.id
            sql_joins.append((
                "culturalvalue",
                alias,
                "{0}.society_id = s.id AND {0}.variable_id = {1}".format(alias, variable.id)
            ))

            if variable.data_type and variable.data_type == 'Continuous':
                include_NA = not all('min' in c for c in codes)
                ors = [
                    "({0}.coded_value_float >= %(min)f AND {0}.coded_value_float <= %(max)f)".format(alias) % c
                    for c in codes if 'min' in c]
                if include_NA:
                    ors.append("%s.coded_value = 'NA'" % alias)
                sql_where.append("(%s)" % ' OR '.join(ors))
                if not include_NA:
                    sql_where.append("{0}.coded_value != 'NA'".format(alias))
            else:
                assert all('id' in c for c in codes)
                sql_where.append("{0}.code_id IN %s".format(alias) % id_array([x['id'] for x in codes]))

            result_set.variable_descriptions.add(serializers.VariableCode(variable.codes, variable))

    if 'e' in query_dict:
        # There can be multiple filters, so we must aggregate the results.
        for varid, criteria in groupby(
            sorted(query_dict['e'], key=lambda c: c[0]),
            key=lambda x: x[0]
        ):
            alias = 'ev%s' % varid
            sql_joins.append((
                "environmentalvalue",
                alias,
                "{0}.society_id = s.id AND {0}.variable_id = {1}".format(alias, int(varid))))

            for varid, operator, params in criteria:
                params = map(float, params)
                if operator == 'inrange':
                    sql_where.append("{0}.value >= {1:f} AND {0}.value <= {2:f}".format(alias, params[0], params[1]))
                elif operator == 'outrange':
                    sql_where.append("{0}.value >= {1:f} AND {0}.value <= {2:f}".format(alias, params[1], params[0]))
                elif operator == 'gt':
                    sql_where.append("{0}.value >= {1:f}".format(alias, params[0]))
                elif operator == 'lt':
                    sql_where.append("{0}.value <= {1:f}".format(alias, params[0]))

        for variable in models.EnvironmentalVariable.objects.filter(id__in=[x[0] for x in query_dict['e']]):
            result_set.environmental_variables.add(variable)

    if 'p' in query_dict:
        sql_joins.append(('geographicregion', 'r', 'r.id = s.region_id'))
        sql_where.append('r.id IN %s' % id_array(query_dict['p']))
        for region in models.GeographicRegion.objects.filter(id__in=query_dict['p']):
            result_set.geographic_regions.add(region)

    if sql_where:
        cursor = connection.cursor()
        sql = "select distinct s.id from dplace_app_society as s %s where %s" % (
            ' '.join('join dplace_app_%s as %s on %s' % t for t in sql_joins),
            ' AND '.join(sql_where))
        cursor.execute(sql)
        soc_ids = [r[0] for r in cursor.fetchall()]
    else:
        soc_ids = []

    soc_query = models.Society.objects.filter(id__in=soc_ids)\
        .select_related('source', 'language__family', 'language__iso_code', 'region')
    if result_set.geographic_regions:
        soc_query = soc_query.select_related('region')
    if result_set.variable_descriptions:
        soc_query = soc_query.prefetch_related(Prefetch(
            'culturalvalue_set',
            to_attr='selected_cvalues',
            queryset=models.CulturalValue.objects
            # FIXME: this selects possibly too many values, in case there are multiple
            # values for the same variable, not all of them matching the criteria.
            .filter(variable_id__in=[v.variable.id for v in result_set.variable_descriptions])
            .prefetch_related('references')))
    if result_set.environmental_variables:
        soc_query = soc_query.prefetch_related(Prefetch(
            'environmentalvalue_set',
            to_attr='selected_evalues',
            queryset=models.EnvironmentalValue.objects.filter(
                variable_id__in=[v.id for v in result_set.environmental_variables])))

    for i, soc in enumerate(soc_query):
        soc_result = serializers.SocietyResult(soc)
        if result_set.variable_descriptions:
            for cval in soc.selected_cvalues:
                soc_result.variable_coded_values.add(cval)
        if result_set.environmental_variables:
            for eval in soc.selected_evalues:
                soc_result.environmental_values.add(eval)
        result_set.societies.add(soc_result)

    log.info('mid 1: %s' % (time() - _s,))

    # search for language trees
    labels = models.LanguageTreeLabels.objects.filter(societies__id__in=soc_ids).all()
    log.info('mid 3: %s' % (time() - _s,))

    global_tree = None
    global_newick = []
    global_isolates = []

    for t in models.LanguageTree.objects\
            .filter(taxa__societies__id__in=soc_ids)\
            .prefetch_related(
                'taxa__languagetreelabelssequence_set__labels',
                'taxa__languagetreelabelssequence_set__society',
            )\
            .distinct():
        if 'global' in t.name:
            global_tree = t
            # TODO ask @Bibiko once the isolates are in the db under global.tree as string: isol1,isol2,isol3,...
            # global_isolates.extend(t.newick_string.split(','))
            global_isolates.extend(['alse1251','amas1236','bana1292','calu1239','chim1301','chit1248','chon1248','coah1252','coos1249','furr1244','gaga1251','guai1237','guat1253','hadz1240','high1242','kara1289','karo1304','klam1254','kute1249','lara1258','mull1237','natc1249','nort2938','paez1247','pume1238','pura1257','pure1242','sali1253','sand1273','seri1257','shom1245','sius1254','sout1439','take1257','ticu1245','timu1245','tiwi1244','toll1241','trum1247','uruu1244','wara1303','wash1253','yama1264','yuch1247','zuni1245'])
        else:
            if update_newick(t, labels):
                result_set.language_trees.add(t)
                if 'glotto' in t.name:
                    #remove last ; in order to be able to join the trees
                    global_newick.append(t.newick_string[:-1])

        log.info('mid 4: %s' % (time() - _s,))

    if global_tree:
        langs_in_tree = [str(l.label) for l in labels]
        #add isolates if present in current selection
        [global_newick.append('(' + isolate + ':1)') for isolate in global_isolates if isolate in langs_in_tree]
        #join all pruned glottolog trees into the global one
        global_tree.newick_string = '(' + ','.join(global_newick) + ');'
        result_set.language_trees.add(global_tree)

    return result_set
Пример #7
0
def result_set_from_query_dict(query_dict):
    from time import time
    _s = time()
    log.info('enter result_set_from_query_dict')

    result_set = serializers.SocietyResultSet()
    sql_joins, sql_where = [], []

    def id_array(l):
        return '(%s)' % ','.join('%s' % int(i) for i in l)

    if 'l' in query_dict:
        sql_joins.append(('language', 'l', 'l.id = s.language_id'))
        sql_where.append('l.id IN ' + id_array(query_dict['l']))
        for lang in models.Language.objects.filter(id__in=query_dict['l']):
            result_set.languages.add(lang)

    if 'c' in query_dict:
        variables = {
            v.id: v
            for v in models.CulturalVariable.objects.filter(
                id__in=[x['variable'] for x in query_dict['c']]).
            prefetch_related(
                Prefetch(
                    'codes',
                    queryset=models.CulturalCodeDescription.objects.filter(
                        id__in=[x.get('id') for x in query_dict['c']])))
        }

        for variable, codes in groupby(sorted(query_dict['c'],
                                              key=lambda c: c['variable']),
                                       key=lambda x: x['variable']):
            variable = variables[variable]
            codes = list(codes)
            alias = 'cv%s' % variable.id
            sql_joins.append(
                ("culturalvalue", alias,
                 "{0}.society_id = s.id AND {0}.variable_id = {1}".format(
                     alias, variable.id)))

            if variable.data_type and variable.data_type == 'Continuous':
                include_NA = not all('min' in c for c in codes)
                ors = [
                    "({0}.coded_value_float >= %(min)f AND {0}.coded_value_float <= %(max)f)"
                    .format(alias) % c for c in codes if 'min' in c
                ]
                if include_NA:
                    ors.append("%s.coded_value = 'NA'" % alias)
                sql_where.append("(%s)" % ' OR '.join(ors))
                if not include_NA:
                    sql_where.append("{0}.coded_value != 'NA'".format(alias))
            else:
                assert all('id' in c for c in codes)
                sql_where.append("{0}.code_id IN %s".format(alias) %
                                 id_array([x['id'] for x in codes]))

            result_set.variable_descriptions.add(
                serializers.VariableCode(variable.codes, variable))

    if 'e' in query_dict:
        # There can be multiple filters, so we must aggregate the results.
        for varid, criteria in groupby(sorted(query_dict['e'],
                                              key=lambda c: c[0]),
                                       key=lambda x: x[0]):
            alias = 'ev%s' % varid
            sql_joins.append(
                ("environmentalvalue", alias,
                 "{0}.society_id = s.id AND {0}.variable_id = {1}".format(
                     alias, int(varid))))

            for varid, operator, params in criteria:
                params = map(float, params)
                if operator == 'inrange':
                    sql_where.append(
                        "{0}.value >= {1:f} AND {0}.value <= {2:f}".format(
                            alias, params[0], params[1]))
                elif operator == 'outrange':
                    sql_where.append(
                        "{0}.value >= {1:f} AND {0}.value <= {2:f}".format(
                            alias, params[1], params[0]))
                elif operator == 'gt':
                    sql_where.append("{0}.value >= {1:f}".format(
                        alias, params[0]))
                elif operator == 'lt':
                    sql_where.append("{0}.value <= {1:f}".format(
                        alias, params[0]))

        for variable in models.EnvironmentalVariable.objects.filter(
                id__in=[x[0] for x in query_dict['e']]):
            result_set.environmental_variables.add(variable)

    if 'p' in query_dict:
        sql_joins.append(('geographicregion', 'r', 'r.id = s.region_id'))
        sql_where.append('r.id IN %s' % id_array(query_dict['p']))
        for region in models.GeographicRegion.objects.filter(
                id__in=query_dict['p']):
            result_set.geographic_regions.add(region)

    if sql_where:
        cursor = connection.cursor()
        sql = "select distinct s.id from dplace_app_society as s %s where %s" % (
            ' '.join('join dplace_app_%s as %s on %s' % t
                     for t in sql_joins), ' AND '.join(sql_where))
        cursor.execute(sql)
        soc_ids = [r[0] for r in cursor.fetchall()]
    else:
        soc_ids = []

    soc_query = models.Society.objects.filter(id__in=soc_ids)\
        .select_related('source', 'language__family', 'language__iso_code', 'region')
    if result_set.geographic_regions:
        soc_query = soc_query.select_related('region')
    if result_set.variable_descriptions:
        soc_query = soc_query.prefetch_related(
            Prefetch(
                'culturalvalue_set',
                to_attr='selected_cvalues',
                queryset=models.CulturalValue.objects
                # FIXME: this selects possibly too many values, in case there are multiple
                # values for the same variable, not all of them matching the criteria.
                .filter(variable_id__in=[
                    v.variable.id for v in result_set.variable_descriptions
                ]).prefetch_related('references')))
    if result_set.environmental_variables:
        soc_query = soc_query.prefetch_related(
            Prefetch('environmentalvalue_set',
                     to_attr='selected_evalues',
                     queryset=models.EnvironmentalValue.objects.filter(
                         variable_id__in=[
                             v.id for v in result_set.environmental_variables
                         ])))

    for i, soc in enumerate(soc_query):
        soc_result = serializers.SocietyResult(soc)
        if result_set.variable_descriptions:
            for cval in soc.selected_cvalues:
                soc_result.variable_coded_values.add(cval)
        if result_set.environmental_variables:
            for eval in soc.selected_evalues:
                soc_result.environmental_values.add(eval)
        result_set.societies.add(soc_result)

    log.info('mid 1: %s' % (time() - _s, ))

    # search for language trees
    labels = models.LanguageTreeLabels.objects.filter(
        societies__id__in=soc_ids).all()
    log.info('mid 3: %s' % (time() - _s, ))

    global_tree = None
    global_newick = []
    global_isolates = []

    for t in models.LanguageTree.objects\
            .filter(taxa__societies__id__in=soc_ids)\
            .prefetch_related(
                'taxa__languagetreelabelssequence_set__labels',
                'taxa__languagetreelabelssequence_set__society',
            )\
            .distinct():
        if 'global' in t.name:
            global_tree = t
            # TODO ask @Bibiko once the isolates are in the db under global.tree as string: isol1,isol2,isol3,...
            # global_isolates.extend(t.newick_string.split(','))
            global_isolates.extend([
                'alse1251', 'amas1236', 'bana1292', 'calu1239', 'chim1301',
                'chit1248', 'chon1248', 'coah1252', 'coos1249', 'furr1244',
                'gaga1251', 'guai1237', 'guat1253', 'hadz1240', 'high1242',
                'kara1289', 'karo1304', 'klam1254', 'kute1249', 'lara1258',
                'mull1237', 'natc1249', 'nort2938', 'paez1247', 'pume1238',
                'pura1257', 'pure1242', 'sali1253', 'sand1273', 'seri1257',
                'shom1245', 'sius1254', 'sout1439', 'take1257', 'ticu1245',
                'timu1245', 'tiwi1244', 'toll1241', 'trum1247', 'uruu1244',
                'wara1303', 'wash1253', 'yama1264', 'yuch1247', 'zuni1245'
            ])
        else:
            if update_newick(t, labels):
                result_set.language_trees.add(t)
                if 'glotto' in t.name:
                    #remove last ; in order to be able to join the trees
                    global_newick.append(t.newick_string[:-1])

        log.info('mid 4: %s' % (time() - _s, ))

    if global_tree:
        langs_in_tree = [str(l.label) for l in labels]
        #add isolates if present in current selection
        [
            global_newick.append('(' + isolate + ':1)')
            for isolate in global_isolates if isolate in langs_in_tree
        ]
        #join all pruned glottolog trees into the global one
        global_tree.newick_string = '(' + ','.join(global_newick) + ');'
        result_set.language_trees.add(global_tree)

    return result_set
Пример #8
0
def trees_from_societies(request):
    language_trees = []
    for k, v in request.query_params.lists():
        soc_ids = v
        labels = models.LanguageTreeLabels.objects.filter(societies__id__in=soc_ids).all()

        global_tree = None
        global_newick = []
        global_isolates = []

    for t in (
        models.LanguageTree.objects.filter(taxa__societies__id__in=soc_ids)
        .prefetch_related(
            "taxa__languagetreelabelssequence_set__labels", "taxa__languagetreelabelssequence_set__society"
        )
        .distinct()
    ):

        if "global" in t.name:
            global_tree = t
            # TODO ask @Bibiko once the isolates are in the db under global.tree as string: isol1,isol2,isol3,...
            # global_isolates.extend(t.newick_string.split(','))
            global_isolates.extend(
                [
                    "alse1251",
                    "amas1236",
                    "bana1292",
                    "calu1239",
                    "chim1301",
                    "chit1248",
                    "chon1248",
                    "coah1252",
                    "coos1249",
                    "furr1244",
                    "gaga1251",
                    "guai1237",
                    "guat1253",
                    "hadz1240",
                    "high1242",
                    "kara1289",
                    "karo1304",
                    "klam1254",
                    "kute1249",
                    "lara1258",
                    "mull1237",
                    "natc1249",
                    "nort2938",
                    "paez1247",
                    "pume1238",
                    "pura1257",
                    "pure1242",
                    "sali1253",
                    "sand1273",
                    "seri1257",
                    "shom1245",
                    "sius1254",
                    "sout1439",
                    "take1257",
                    "ticu1245",
                    "timu1245",
                    "tiwi1244",
                    "toll1241",
                    "trum1247",
                    "uruu1244",
                    "wara1303",
                    "wash1253",
                    "yama1264",
                    "yuch1247",
                    "zuni1245",
                ]
            )
        else:
            if update_newick(t, labels):
                language_trees.append(t)
                if "glotto" in t.name:
                    # remove last ; in order to be able to join the trees
                    global_newick.append(t.newick_string[:-1])

    if global_tree:
        langs_in_tree = [str(l.label) for l in labels]
        # add isolates if present in current selection
        [global_newick.append("(" + isolate + ":1)") for isolate in global_isolates if isolate in langs_in_tree]
        # join all pruned glottolog trees into the global one
        global_tree.newick_string = "(" + ",".join(global_newick) + ");"
        language_trees.append(global_tree)

    return Response(serializers.LanguageTreeSerializer(language_trees, many=True).data)