def prune_trees(): labels = LanguageTreeLabels.objects.all() count = 0 for t in LanguageTree.objects.order_by('name').all(): if update_newick(t, labels): count += 1 t.save() return count
def trees_from_societies(request): language_trees = [] for k, v in request.query_params.lists(): soc_ids = v labels = models.LanguageTreeLabels.objects.filter( societies__id__in=soc_ids).all() global_tree = None global_newick = [] global_isolates = [] for t in models.LanguageTree.objects\ .filter(taxa__societies__id__in=soc_ids)\ .prefetch_related( 'taxa__languagetreelabelssequence_set__labels', 'taxa__languagetreelabelssequence_set__society', )\ .distinct(): if 'global' in t.name: global_tree = t # TODO ask @Bibiko once the isolates are in the db under global.tree as string: isol1,isol2,isol3,... # global_isolates.extend(t.newick_string.split(',')) global_isolates.extend([ 'alse1251', 'amas1236', 'bana1292', 'calu1239', 'chim1301', 'chit1248', 'chon1248', 'coah1252', 'coos1249', 'furr1244', 'gaga1251', 'guai1237', 'guat1253', 'hadz1240', 'high1242', 'kara1289', 'karo1304', 'klam1254', 'kute1249', 'lara1258', 'mull1237', 'natc1249', 'nort2938', 'paez1247', 'pume1238', 'pura1257', 'pure1242', 'sali1253', 'sand1273', 'seri1257', 'shom1245', 'sius1254', 'sout1439', 'take1257', 'ticu1245', 'timu1245', 'tiwi1244', 'toll1241', 'trum1247', 'uruu1244', 'wara1303', 'wash1253', 'yama1264', 'yuch1247', 'zuni1245' ]) else: if update_newick(t, labels): language_trees.append(t) if 'glotto' in t.name: #remove last ; in order to be able to join the trees global_newick.append(t.newick_string[:-1]) if global_tree: langs_in_tree = [str(l.label) for l in labels] #add isolates if present in current selection [ global_newick.append('(' + isolate + ':1)') for isolate in global_isolates if isolate in langs_in_tree ] #join all pruned glottolog trees into the global one global_tree.newick_string = '(' + ','.join(global_newick) + ');' language_trees.append(global_tree) return Response( serializers.LanguageTreeSerializer(language_trees, many=True).data)
def trees_from_societies(request): language_trees, labels, soc_ids = [], [], [] for k, v in request.query_params.lists(): soc_ids = v labels = models.LanguageTreeLabels.objects.filter(societies__id__in=soc_ids).all() for t in models.LanguageTree.objects\ .filter(taxa__societies__id__in=soc_ids)\ .prefetch_related( 'taxa__languagetreelabelssequence_set__labels', 'taxa__languagetreelabelssequence_set__society', )\ .distinct(): if update_newick(t, labels): language_trees.append(t) return Response(serializers.LanguageTreeSerializer(language_trees, many=True).data)
def trees_from_societies(request): language_trees, labels, soc_ids = [], [], [] for k, v in request.query_params.lists(): soc_ids = v labels = models.LanguageTreeLabels.objects.filter( societies__id__in=soc_ids).all() for t in models.LanguageTree.objects\ .filter(taxa__societies__id__in=soc_ids)\ .prefetch_related( 'taxa__languagetreelabelssequence_set__labels', 'taxa__languagetreelabelssequence_set__society', )\ .distinct(): if update_newick(t, labels): language_trees.append(t) return Response( serializers.LanguageTreeSerializer(language_trees, many=True).data)
def result_set_from_query_dict(query_dict): from time import time _s = time() log.info('enter result_set_from_query_dict') result_set = serializers.SocietyResultSet() sql_joins, sql_where = [], [] def id_array(l): return '(%s)' % ','.join('%s' % int(i) for i in l) if 'l' in query_dict: sql_joins.append(('language', 'l', 'l.id = s.language_id')) sql_where.append('l.id IN ' + id_array(query_dict['l'])) for lang in models.Language.objects.filter(id__in=query_dict['l']): result_set.languages.add(lang) if 'c' in query_dict: variables = { v.id: v for v in models.CulturalVariable.objects .filter(id__in=[x['variable'] for x in query_dict['c']]) .prefetch_related(Prefetch( 'codes', queryset=models.CulturalCodeDescription.objects .filter(id__in=[x.get('id') for x in query_dict['c']]))) } for variable, codes in groupby( sorted(query_dict['c'], key=lambda c: c['variable']), key=lambda x: x['variable'] ): variable = variables[variable] codes = list(codes) alias = 'cv%s' % variable.id sql_joins.append(( "culturalvalue", alias, "{0}.society_id = s.id AND {0}.variable_id = {1}".format(alias, variable.id) )) if variable.data_type and variable.data_type == 'Continuous': include_NA = not all('min' in c for c in codes) ors = [ "({0}.coded_value_float >= %(min)f AND {0}.coded_value_float <= %(max)f)".format(alias) % c for c in codes if 'min' in c] if include_NA: ors.append("%s.coded_value = 'NA'" % alias) sql_where.append("(%s)" % ' OR '.join(ors)) if not include_NA: sql_where.append("{0}.coded_value != 'NA'".format(alias)) else: assert all('id' in c for c in codes) sql_where.append("{0}.code_id IN %s".format(alias) % id_array([x['id'] for x in codes])) result_set.variable_descriptions.add(serializers.VariableCode(variable.codes, variable)) if 'e' in query_dict: # There can be multiple filters, so we must aggregate the results. for varid, criteria in groupby( sorted(query_dict['e'], key=lambda c: c[0]), key=lambda x: x[0] ): alias = 'ev%s' % varid sql_joins.append(( "environmentalvalue", alias, "{0}.society_id = s.id AND {0}.variable_id = {1}".format(alias, int(varid)))) for varid, operator, params in criteria: params = map(float, params) if operator == 'inrange': sql_where.append("{0}.value >= {1:f} AND {0}.value <= {2:f}".format(alias, params[0], params[1])) elif operator == 'outrange': sql_where.append("{0}.value >= {1:f} AND {0}.value <= {2:f}".format(alias, params[1], params[0])) elif operator == 'gt': sql_where.append("{0}.value >= {1:f}".format(alias, params[0])) elif operator == 'lt': sql_where.append("{0}.value <= {1:f}".format(alias, params[0])) for variable in models.EnvironmentalVariable.objects.filter(id__in=[x[0] for x in query_dict['e']]): result_set.environmental_variables.add(variable) if 'p' in query_dict: sql_joins.append(('geographicregion', 'r', 'r.id = s.region_id')) sql_where.append('r.id IN %s' % id_array(query_dict['p'])) for region in models.GeographicRegion.objects.filter(id__in=query_dict['p']): result_set.geographic_regions.add(region) if sql_where: cursor = connection.cursor() sql = "select distinct s.id from dplace_app_society as s %s where %s" % ( ' '.join('join dplace_app_%s as %s on %s' % t for t in sql_joins), ' AND '.join(sql_where)) cursor.execute(sql) soc_ids = [r[0] for r in cursor.fetchall()] else: soc_ids = [] soc_query = models.Society.objects.filter(id__in=soc_ids)\ .select_related('source', 'language__family', 'language__iso_code', 'region') if result_set.geographic_regions: soc_query = soc_query.select_related('region') if result_set.variable_descriptions: soc_query = soc_query.prefetch_related(Prefetch( 'culturalvalue_set', to_attr='selected_cvalues', queryset=models.CulturalValue.objects # FIXME: this selects possibly too many values, in case there are multiple # values for the same variable, not all of them matching the criteria. .filter(variable_id__in=[v.variable.id for v in result_set.variable_descriptions]) .prefetch_related('references'))) if result_set.environmental_variables: soc_query = soc_query.prefetch_related(Prefetch( 'environmentalvalue_set', to_attr='selected_evalues', queryset=models.EnvironmentalValue.objects.filter( variable_id__in=[v.id for v in result_set.environmental_variables]))) for i, soc in enumerate(soc_query): soc_result = serializers.SocietyResult(soc) if result_set.variable_descriptions: for cval in soc.selected_cvalues: soc_result.variable_coded_values.add(cval) if result_set.environmental_variables: for eval in soc.selected_evalues: soc_result.environmental_values.add(eval) result_set.societies.add(soc_result) log.info('mid 1: %s' % (time() - _s,)) # search for language trees labels = models.LanguageTreeLabels.objects.filter(societies__id__in=soc_ids).all() log.info('mid 3: %s' % (time() - _s,)) global_tree = None global_newick = [] global_isolates = [] for t in models.LanguageTree.objects\ .filter(taxa__societies__id__in=soc_ids)\ .prefetch_related( 'taxa__languagetreelabelssequence_set__labels', 'taxa__languagetreelabelssequence_set__society', )\ .distinct(): if 'global' in t.name: global_tree = t # TODO ask @Bibiko once the isolates are in the db under global.tree as string: isol1,isol2,isol3,... # global_isolates.extend(t.newick_string.split(',')) global_isolates.extend(['alse1251','amas1236','bana1292','calu1239','chim1301','chit1248','chon1248','coah1252','coos1249','furr1244','gaga1251','guai1237','guat1253','hadz1240','high1242','kara1289','karo1304','klam1254','kute1249','lara1258','mull1237','natc1249','nort2938','paez1247','pume1238','pura1257','pure1242','sali1253','sand1273','seri1257','shom1245','sius1254','sout1439','take1257','ticu1245','timu1245','tiwi1244','toll1241','trum1247','uruu1244','wara1303','wash1253','yama1264','yuch1247','zuni1245']) else: if update_newick(t, labels): result_set.language_trees.add(t) if 'glotto' in t.name: #remove last ; in order to be able to join the trees global_newick.append(t.newick_string[:-1]) log.info('mid 4: %s' % (time() - _s,)) if global_tree: langs_in_tree = [str(l.label) for l in labels] #add isolates if present in current selection [global_newick.append('(' + isolate + ':1)') for isolate in global_isolates if isolate in langs_in_tree] #join all pruned glottolog trees into the global one global_tree.newick_string = '(' + ','.join(global_newick) + ');' result_set.language_trees.add(global_tree) return result_set
def result_set_from_query_dict(query_dict): from time import time _s = time() log.info('enter result_set_from_query_dict') result_set = serializers.SocietyResultSet() sql_joins, sql_where = [], [] def id_array(l): return '(%s)' % ','.join('%s' % int(i) for i in l) if 'l' in query_dict: sql_joins.append(('language', 'l', 'l.id = s.language_id')) sql_where.append('l.id IN ' + id_array(query_dict['l'])) for lang in models.Language.objects.filter(id__in=query_dict['l']): result_set.languages.add(lang) if 'c' in query_dict: variables = { v.id: v for v in models.CulturalVariable.objects.filter( id__in=[x['variable'] for x in query_dict['c']]). prefetch_related( Prefetch( 'codes', queryset=models.CulturalCodeDescription.objects.filter( id__in=[x.get('id') for x in query_dict['c']]))) } for variable, codes in groupby(sorted(query_dict['c'], key=lambda c: c['variable']), key=lambda x: x['variable']): variable = variables[variable] codes = list(codes) alias = 'cv%s' % variable.id sql_joins.append( ("culturalvalue", alias, "{0}.society_id = s.id AND {0}.variable_id = {1}".format( alias, variable.id))) if variable.data_type and variable.data_type == 'Continuous': include_NA = not all('min' in c for c in codes) ors = [ "({0}.coded_value_float >= %(min)f AND {0}.coded_value_float <= %(max)f)" .format(alias) % c for c in codes if 'min' in c ] if include_NA: ors.append("%s.coded_value = 'NA'" % alias) sql_where.append("(%s)" % ' OR '.join(ors)) if not include_NA: sql_where.append("{0}.coded_value != 'NA'".format(alias)) else: assert all('id' in c for c in codes) sql_where.append("{0}.code_id IN %s".format(alias) % id_array([x['id'] for x in codes])) result_set.variable_descriptions.add( serializers.VariableCode(variable.codes, variable)) if 'e' in query_dict: # There can be multiple filters, so we must aggregate the results. for varid, criteria in groupby(sorted(query_dict['e'], key=lambda c: c[0]), key=lambda x: x[0]): alias = 'ev%s' % varid sql_joins.append( ("environmentalvalue", alias, "{0}.society_id = s.id AND {0}.variable_id = {1}".format( alias, int(varid)))) for varid, operator, params in criteria: params = map(float, params) if operator == 'inrange': sql_where.append( "{0}.value >= {1:f} AND {0}.value <= {2:f}".format( alias, params[0], params[1])) elif operator == 'outrange': sql_where.append( "{0}.value >= {1:f} AND {0}.value <= {2:f}".format( alias, params[1], params[0])) elif operator == 'gt': sql_where.append("{0}.value >= {1:f}".format( alias, params[0])) elif operator == 'lt': sql_where.append("{0}.value <= {1:f}".format( alias, params[0])) for variable in models.EnvironmentalVariable.objects.filter( id__in=[x[0] for x in query_dict['e']]): result_set.environmental_variables.add(variable) if 'p' in query_dict: sql_joins.append(('geographicregion', 'r', 'r.id = s.region_id')) sql_where.append('r.id IN %s' % id_array(query_dict['p'])) for region in models.GeographicRegion.objects.filter( id__in=query_dict['p']): result_set.geographic_regions.add(region) if sql_where: cursor = connection.cursor() sql = "select distinct s.id from dplace_app_society as s %s where %s" % ( ' '.join('join dplace_app_%s as %s on %s' % t for t in sql_joins), ' AND '.join(sql_where)) cursor.execute(sql) soc_ids = [r[0] for r in cursor.fetchall()] else: soc_ids = [] soc_query = models.Society.objects.filter(id__in=soc_ids)\ .select_related('source', 'language__family', 'language__iso_code', 'region') if result_set.geographic_regions: soc_query = soc_query.select_related('region') if result_set.variable_descriptions: soc_query = soc_query.prefetch_related( Prefetch( 'culturalvalue_set', to_attr='selected_cvalues', queryset=models.CulturalValue.objects # FIXME: this selects possibly too many values, in case there are multiple # values for the same variable, not all of them matching the criteria. .filter(variable_id__in=[ v.variable.id for v in result_set.variable_descriptions ]).prefetch_related('references'))) if result_set.environmental_variables: soc_query = soc_query.prefetch_related( Prefetch('environmentalvalue_set', to_attr='selected_evalues', queryset=models.EnvironmentalValue.objects.filter( variable_id__in=[ v.id for v in result_set.environmental_variables ]))) for i, soc in enumerate(soc_query): soc_result = serializers.SocietyResult(soc) if result_set.variable_descriptions: for cval in soc.selected_cvalues: soc_result.variable_coded_values.add(cval) if result_set.environmental_variables: for eval in soc.selected_evalues: soc_result.environmental_values.add(eval) result_set.societies.add(soc_result) log.info('mid 1: %s' % (time() - _s, )) # search for language trees labels = models.LanguageTreeLabels.objects.filter( societies__id__in=soc_ids).all() log.info('mid 3: %s' % (time() - _s, )) global_tree = None global_newick = [] global_isolates = [] for t in models.LanguageTree.objects\ .filter(taxa__societies__id__in=soc_ids)\ .prefetch_related( 'taxa__languagetreelabelssequence_set__labels', 'taxa__languagetreelabelssequence_set__society', )\ .distinct(): if 'global' in t.name: global_tree = t # TODO ask @Bibiko once the isolates are in the db under global.tree as string: isol1,isol2,isol3,... # global_isolates.extend(t.newick_string.split(',')) global_isolates.extend([ 'alse1251', 'amas1236', 'bana1292', 'calu1239', 'chim1301', 'chit1248', 'chon1248', 'coah1252', 'coos1249', 'furr1244', 'gaga1251', 'guai1237', 'guat1253', 'hadz1240', 'high1242', 'kara1289', 'karo1304', 'klam1254', 'kute1249', 'lara1258', 'mull1237', 'natc1249', 'nort2938', 'paez1247', 'pume1238', 'pura1257', 'pure1242', 'sali1253', 'sand1273', 'seri1257', 'shom1245', 'sius1254', 'sout1439', 'take1257', 'ticu1245', 'timu1245', 'tiwi1244', 'toll1241', 'trum1247', 'uruu1244', 'wara1303', 'wash1253', 'yama1264', 'yuch1247', 'zuni1245' ]) else: if update_newick(t, labels): result_set.language_trees.add(t) if 'glotto' in t.name: #remove last ; in order to be able to join the trees global_newick.append(t.newick_string[:-1]) log.info('mid 4: %s' % (time() - _s, )) if global_tree: langs_in_tree = [str(l.label) for l in labels] #add isolates if present in current selection [ global_newick.append('(' + isolate + ':1)') for isolate in global_isolates if isolate in langs_in_tree ] #join all pruned glottolog trees into the global one global_tree.newick_string = '(' + ','.join(global_newick) + ');' result_set.language_trees.add(global_tree) return result_set
def trees_from_societies(request): language_trees = [] for k, v in request.query_params.lists(): soc_ids = v labels = models.LanguageTreeLabels.objects.filter(societies__id__in=soc_ids).all() global_tree = None global_newick = [] global_isolates = [] for t in ( models.LanguageTree.objects.filter(taxa__societies__id__in=soc_ids) .prefetch_related( "taxa__languagetreelabelssequence_set__labels", "taxa__languagetreelabelssequence_set__society" ) .distinct() ): if "global" in t.name: global_tree = t # TODO ask @Bibiko once the isolates are in the db under global.tree as string: isol1,isol2,isol3,... # global_isolates.extend(t.newick_string.split(',')) global_isolates.extend( [ "alse1251", "amas1236", "bana1292", "calu1239", "chim1301", "chit1248", "chon1248", "coah1252", "coos1249", "furr1244", "gaga1251", "guai1237", "guat1253", "hadz1240", "high1242", "kara1289", "karo1304", "klam1254", "kute1249", "lara1258", "mull1237", "natc1249", "nort2938", "paez1247", "pume1238", "pura1257", "pure1242", "sali1253", "sand1273", "seri1257", "shom1245", "sius1254", "sout1439", "take1257", "ticu1245", "timu1245", "tiwi1244", "toll1241", "trum1247", "uruu1244", "wara1303", "wash1253", "yama1264", "yuch1247", "zuni1245", ] ) else: if update_newick(t, labels): language_trees.append(t) if "glotto" in t.name: # remove last ; in order to be able to join the trees global_newick.append(t.newick_string[:-1]) if global_tree: langs_in_tree = [str(l.label) for l in labels] # add isolates if present in current selection [global_newick.append("(" + isolate + ":1)") for isolate in global_isolates if isolate in langs_in_tree] # join all pruned glottolog trees into the global one global_tree.newick_string = "(" + ",".join(global_newick) + ");" language_trees.append(global_tree) return Response(serializers.LanguageTreeSerializer(language_trees, many=True).data)