Exemplo n.º 1
0
def add_signature(request):
    """The aim is to retrieve a list of differential expressed genes for certain
    criteria (e.g. fold_change, p-value, tissue).
    """
    form = SignatureForm(request.POST or None, request.FILES or None)
    if request.POST:
        if not "file" in request.POST:
            file = request.FILES['file']
            file.name = file.name.replace('.txt', '')
            data = file.read().replace('\r', '').split('\n')
        elif "profile" not in request:
            msg = "No file or profiles selected. Please provide either a signature "\
                  "file to upload or select profiles to derive a signature."
            messages.add_message(request, messages.ERROR, ugettext(msg))
            return redirect('/expressions/signature/add/')

        # Inferre descriptive informations from the filename:
        if file.name.startswith('name='):
            info = dict([item.split('=') for item in file.name.split(';')])
        if 'tissue' in info:
            tissues = info['tissue'].replace('-', '@').replace(
                ', ', '@').replace(' and ', '@').split(
                    '@')  # @ is unlikely to be used as filename.
        else:
            tissues = request.POST.getlist('tissues')
        if 'diet' in request.POST and request.POST['diet']:
            regimen = Regimen.objects.get(pk=request.POST['diet'])
        elif "diet" in info:
            regimen = Regimen.objects.get(shortcut__exact=info['diet'])

        # Species from form:
        try:
            species = Species.objects.get(pk=request.POST['species'])
        except ValueError as e:
            msg = "Species not found in Denigma db. %s. Please select a species." % e
            messages.add_message(request, messages.ERROR, ugettext(msg))
            return redirect('/expressions/signature/add/')

        # Create signature:
        signature = Signature(name=request.POST['name'] or info['name'],
                              diet=regimen,
                              species=species)  #,
        signature.save()

        # Adding tissues:
        for tissue in tissues:
            try:
                tissue = Tissue.objects.get(
                    pk=tissue)  #if it is selected from form
            except:
                print "Did not found tissue by pk."
                try:
                    tissue = Tissue.objects.get(
                        name__iexact=tissue
                    )  # If it is inferred from file name.
                except Tissue.DoesNotExist as e:
                    messages.add_message(
                        request, messages.ERROR,
                        ugettext("%s: %s" % (str(e)[:-1], tissue)))
                    return redirect('/expressions/signature/add/')

            signature.tissues.add(tissue)
        print "Tissues:", signature.tissues.all()

        header = {}
        for index, column in enumerate(data[0].split('\t')):
            if "DR" in column: column = "exp"
            elif "AL" in column: column = "ctr"

            header[column.lower().replace('gene symbol', 'symbol')\
                                 .replace('gene_symbol', 'symbol')\
                                 .replace(' ', '_')\
                                 .replace('platform_cloneid', 'seq_id')\
                                 .replace('ensembl_gene', 'seq_id')] = index # WTF is this?

        #num_lines = len(data); counter = 0
        print len(data[1:])
        for line in data[1:]:
            #print(line)
            #print(header)
            try:
                #print("Trying")
                # For effect size
                ctr_values = []
                exp_values = []

                #counter += 1
                if not line: continue
                columns = line.split('\t')
                if len(columns) < len(header): continue  #break #
                seq_id = columns[header['seq_id']]
                symbol = columns[header['symbol']]
                if symbol == "None": symbol = None
                ctr = float(columns[header['ctr']])
                exp = float(columns[header['exp']])
                if "ratio" in header:
                    ratio = float(columns[header['ratio']])
                    if ratio < 1:
                        fold_change = -(1 / ratio)
                    else:
                        fold_change = ratio
                else:
                    ratio = float(
                        columns[header['fold_change']])  # 2**exp/2**ctr
                if ratio < 1:
                    fold_change = -(1 / ratio)
                else:
                    fold_change = ratio
                # Calculating effect size:
                for k, v in header.items():
                    if k.startswith('ctr') and k != 'ctr':
                        ctr_values.append(float(columns[v]))
                    elif k.startswith('exp') and k != 'exp':
                        exp_values.append(float(columns[v]))


#                if exp_values and exp_values != ctr_values:
#                    #print exp_values
#                    es = effect_size(exp_values, ctr_values)
#                else:
                es = None
                #                if 'pvalue' in header:
                #                    pvalue = columns[header['p_value']]
                #                else:
                if exp_values != ctr_values:
                    pvalue = t_two_sample(ctr_values, exp_values)[1]
                else:
                    pvalue = 1

                transcript = Transcript(seq_id=seq_id,
                                        symbol=symbol,
                                        ratio=ratio,
                                        fold_change=fold_change,
                                        pvalue=pvalue,
                                        effect_size=es)

                transcript.save()
                #print(transcript.id, transcript.symbol, transcript.ratio)
                expression = Expression.objects.create(signature=signature,
                                                       transcript=transcript,
                                                       exp=exp,
                                                       ctr=ctr,
                                                       ratio=ratio,
                                                       fold_change=fold_change,
                                                       pvalue=pvalue,
                                                       effect_size=es)
                #print expression
            except ValueError as e:
                print e, symbol, seq_id, fold_change, pvalue, ctr, exp
                #break

        #print "Counter=%s; Number of lines:%s" % (counter, num_lines)
        #if counter == num_lines:
        msg = "Successfully integrated signature: %s" % signature.name
        msg_type = messages.SUCCESS
        #else:
        #    msg = "File upload failed."
        #    msg_type = messages.ERROR
        messages.add_message(request, msg_type, ugettext(msg))
        redirect('/expressions/signatures/')

    ctx = {'form': form, 'action': 'Add'}
    return render_to_response('expressions/signature_form.html',
                              ctx,
                              context_instance=RequestContext(request))
Exemplo n.º 2
0
def create_signatures(request):
    """Generates signatures from profiles."""
    # Sort profiles according to tissues
    # compare DR vs. AL.
    profiles = Profile.objects.all()
    #print len(profiles)
    signatures = {}
    for profile in profiles:
        tissues = ' '.join([tissue.name for tissue in profile.tissue.all()])
        print tissues, profile.diet.shortcut
        if tissues not in signatures:
            signatures[tissues] = [None, None]
        if profile.diet.shortcut == 'DR':
            signatures[tissues][0] = profile
        else:
            signatures[tissues][1] = profile
    print signatures

    for tissues, profiles in signatures.items():
        print tissues, profiles
        signature = Signature(name=tissues,
                              species=profiles[0].species,
                              diet=profiles[0].diet)
        signature.save()
        for tissue in profiles[0].tissue.all():
            signature.tissues.add(tissue)
        for profile in profiles:
            #background = []
            profile.transcripts = {}
            probes = Probe.objects.filter(profile=profile)
            for probe in probes:
                if not probe.name.startswith('RANDOM'):
                    transcript_name = probe.name.split('P')[0]
                    if transcript_name not in profile.transcripts:
                        profile.transcripts[transcript_name] = [
                            probe.expression
                        ]
                    else:
                        profile.transcripts[transcript_name].append(
                            probe.expression)
                #else: # For background subtraction.
                #background.append(probe.expression)

        for transcript_name, exp_expression in profiles[0].transcripts.items():
            # If expression too low of e.g. 1/3 of probes, exclude probe.
            # RMA (background subtraction, quantile normalization, and median polishing)
            # Benjamini p-value

            exp = sum(exp_expression) / len(exp_expression)
            ctr_expression = profiles[1].transcripts[transcript_name]
            ctr = sum(ctr_expression) / len(ctr_expression)
            ratio = exp / ctr
            if ratio < 1: fold_change = -(1 / ratio)
            else: fold_change = ratio
            if len(exp_expression) == 1 or len(ctr_expression) == 1:
                es = pvalue = None
            else:
                es = effect_size(exp_expression, ctr_expression)
                pvalue = t_two_sample(exp_expression,
                                      ctr_expression)[1]  # Calculate p-value.

            transcript = Transcript(seq_id=transcript_name,
                                    ratio=ratio,
                                    fold_change=fold_change,
                                    effect_size=es,
                                    pvalue=pvalue)
            transcript.save()
            expression = Expression.objects.create(signature=signature,
                                                   transcript=transcript,
                                                   exp=exp,
                                                   ctr=ctr,
                                                   ratio=ratio,
                                                   fold_change=fold_change,
                                                   effect_size=es,
                                                   pvalue=pvalue)
    print('Done')
    return redirect('/expressions/signatures/')
Exemplo n.º 3
0
 def recalculate(self):
     self.pvalue = t_two_sample(self.exp, self.ctr)
     self.exp = mean(self.exp)
     self.ctr = mean(self.ctr)
     self.ratio = self.exp / self.ctr
Exemplo n.º 4
0
def add_signature(request):
    """The aim is to retrieve a list of differential expressed genes for certain
    criteria (e.g. fold_change, p-value, tissue).
    """
    form = SignatureForm(request.POST or None, request.FILES or None)
    if request.POST:
        if not "file" in request.POST:
            file = request.FILES['file']
            file.name = file.name.replace('.txt', '')
            data = file.read().replace('\r', '').split('\n')
        elif "profile" not in request:
            msg = "No file or profiles selected. Please provide either a signature "\
                  "file to upload or select profiles to derive a signature."
            messages.add_message(request, messages.ERROR, ugettext(msg))
            return redirect('/expressions/signature/add/')

        # Inferre descriptive informations from the filename:
        if file.name.startswith('name='):
            info = dict([item.split('=') for item in file.name.split(';')])
        if 'tissue' in info:
            tissues = info['tissue'].replace('-', '@').replace(', ', '@').replace(' and ', '@').split('@') # @ is unlikely to be used as filename.
        else:
            tissues = request.POST.getlist('tissues')
        if 'diet' in request.POST and request.POST['diet']:
            regimen = Regimen.objects.get(pk=request.POST['diet'])
        elif "diet" in info:
            regimen = Regimen.objects.get(shortcut__exact=info['diet'])


        # Species from form:
        try:
            species = Species.objects.get(pk=request.POST['species'])
        except ValueError as e:
            msg = "Species not found in Denigma db. %s. Please select a species." % e
            messages.add_message(request, messages.ERROR, ugettext(msg))
            return redirect('/expressions/signature/add/')

        # Create signature:
        signature = Signature(name=request.POST['name'] or info['name'], diet=regimen, species=species)#,
        signature.save()

        # Adding tissues:
        for tissue in tissues:
            try:
                tissue = Tissue.objects.get(pk=tissue) #if it is selected from form
            except:
                print "Did not found tissue by pk."
                try:
                    tissue = Tissue.objects.get(name__iexact=tissue) # If it is inferred from file name.
                except Tissue.DoesNotExist as e:
                    messages.add_message(request, messages.ERROR, ugettext("%s: %s" % (str(e)[:-1], tissue)))
                    return redirect('/expressions/signature/add/')

            signature.tissues.add(tissue)
        print "Tissues:", signature.tissues.all()

        header = {}
        for index, column in enumerate(data[0].split('\t')):
            if "DR" in column: column = "exp"
            elif "AL" in column: column = "ctr"

            header[column.lower().replace('gene symbol', 'symbol')\
                                 .replace('gene_symbol', 'symbol')\
                                 .replace(' ', '_')\
                                 .replace('platform_cloneid', 'seq_id')\
                                 .replace('ensembl_gene', 'seq_id')] = index # WTF is this?


        #num_lines = len(data); counter = 0
        print len(data[1:])
        for line in data[1:]:
            #print(line)
            #print(header)
            try:
                #print("Trying")
                # For effect size
                ctr_values = []
                exp_values = []

                #counter += 1
                if not line: continue
                columns = line.split('\t')
                if len(columns) < len(header): continue #break #
                seq_id = columns[header['seq_id']]
                symbol = columns[header['symbol']]
                if symbol == "None": symbol = None
                ctr = float(columns[header['ctr']])
                exp = float(columns[header['exp']])
                if "ratio" in header:
                    ratio = float(columns[header['ratio']])
                    if ratio < 1:
                        fold_change = -(1/ratio)
                    else:
                        fold_change = ratio
                else:
                    ratio = float(columns[header['fold_change']]) # 2**exp/2**ctr
                if ratio < 1:
                    fold_change = -(1/ratio)
                else:
                    fold_change = ratio
                # Calculating effect size:
                for k,v  in header.items():
                    if k.startswith('ctr') and k != 'ctr':
                        ctr_values.append(float(columns[v]))
                    elif k.startswith('exp') and k != 'exp':
                        exp_values.append(float(columns[v]))
#                if exp_values and exp_values != ctr_values:
#                    #print exp_values
#                    es = effect_size(exp_values, ctr_values)
#                else:
                es = None
#                if 'pvalue' in header:
#                    pvalue = columns[header['p_value']]
#                else:
                if exp_values != ctr_values:
                    pvalue = t_two_sample(ctr_values, exp_values)[1]
                else: pvalue = 1

                transcript = Transcript(seq_id=seq_id, symbol=symbol, ratio=ratio, fold_change=fold_change, pvalue=pvalue, effect_size=es)

                transcript.save()
                #print(transcript.id, transcript.symbol, transcript.ratio)
                expression = Expression.objects.create(
                    signature=signature,
                    transcript=transcript,
                    exp=exp, ctr=ctr,
                    ratio=ratio,
                    fold_change=fold_change,
                    pvalue=pvalue,
                effect_size=es)
                #print expression
            except ValueError as e:
                print e, symbol, seq_id, fold_change, pvalue, ctr, exp
                #break

        #print "Counter=%s; Number of lines:%s" % (counter, num_lines)
        #if counter == num_lines:
        msg = "Successfully integrated signature: %s" % signature.name
        msg_type = messages.SUCCESS
        #else:
        #    msg = "File upload failed."
        #    msg_type = messages.ERROR
        messages.add_message(request, msg_type, ugettext(msg))
        redirect('/expressions/signatures/')

    ctx = {'form': form, 'action': 'Add'}
    return render_to_response('expressions/signature_form.html', ctx,
        context_instance=RequestContext(request))
Exemplo n.º 5
0
def create_signatures(request):
    """Generates signatures from profiles."""
    # Sort profiles according to tissues
    # compare DR vs. AL.
    profiles = Profile.objects.all()
    #print len(profiles)
    signatures = {}
    for profile in profiles:
        tissues = ' '.join([tissue.name for tissue in profile.tissue.all()])
        print tissues, profile.diet.shortcut
        if tissues not in signatures:
            signatures[tissues] = [None, None]
        if profile.diet.shortcut == 'DR':
            signatures[tissues][0]= profile
        else:
            signatures[tissues][1] = profile
    print signatures

    for tissues, profiles in signatures.items():
        print tissues, profiles
        signature = Signature(name=tissues, species=profiles[0].species, diet=profiles[0].diet)
        signature.save()
        for tissue in profiles[0].tissue.all():
            signature.tissues.add(tissue)
        for profile in profiles:
            #background = []
            profile.transcripts = {}
            probes = Probe.objects.filter(profile=profile)
            for probe in probes:
                if not probe.name.startswith('RANDOM'):
                    transcript_name = probe.name.split('P')[0]
                    if transcript_name not in profile.transcripts:
                        profile.transcripts[transcript_name] = [probe.expression]
                    else:
                        profile.transcripts[transcript_name].append(probe.expression)
                #else: # For background subtraction.
                    #background.append(probe.expression)

        for transcript_name, exp_expression in profiles[0].transcripts.items():
            # If expression too low of e.g. 1/3 of probes, exclude probe.
            # RMA (background subtraction, quantile normalization, and median polishing)
            # Benjamini p-value

            exp = sum(exp_expression)/len(exp_expression)
            ctr_expression = profiles[1].transcripts[transcript_name]
            ctr = sum(ctr_expression)/len(ctr_expression)
            ratio = exp/ctr
            if ratio < 1: fold_change = -(1/ratio)
            else: fold_change = ratio
            if len(exp_expression) == 1 or len(ctr_expression) == 1:
                es = pvalue = None
            else:
                es = effect_size(exp_expression, ctr_expression)
                pvalue = t_two_sample(exp_expression, ctr_expression)[1] # Calculate p-value.

            transcript = Transcript(seq_id=transcript_name,
                                    ratio=ratio,
                                    fold_change=fold_change,
                                    effect_size=es,
                                    pvalue=pvalue)
            transcript.save()
            expression = Expression.objects.create(signature=signature, transcript=transcript,
                                           exp=exp, ctr=ctr, ratio=ratio, fold_change=fold_change,
                                            effect_size=es, pvalue=pvalue)
    print('Done')
    return redirect('/expressions/signatures/')
Exemplo n.º 6
0
 def recalculate(self):
     self.pvalue = t_two_sample(self.exp, self.ctr)
     self.exp = mean(self.exp)
     self.ctr = mean(self.ctr)
     self.ratio = self.exp/self.ctr