def assign_jxn_potential(article):
    if article.articlefulltext_set.all().count() > 0:
        full_text_ob = article.articlefulltext_set.all()[0]
        full_text = full_text_ob.get_content()
        methods_tag = getMethodsTag(full_text, article)
        if methods_tag is None:
            print (article.pmid, article.title, article.journal)
        else:
            text = re.sub('\s+', ' ', methods_tag.text)    
            sents = nltk.sent_tokenize(text)
            jxn_pot_set = set()
            
            for s in sents:
                if jxn_not_re.findall(s):
                    jxn_pot_set.add('Not corrected')
                elif jxn_re.findall(s):
                    jxn_pot_set.add('Corrected')
            if 'Corrected' in jxn_pot_set:
                metadata_ob = m.MetaData.objects.get_or_create(name='JxnPotential', value='Corrected')[0]
                update_amd_obj(article, metadata_ob)
            if 'Not corrected' in jxn_pot_set:
                metadata_ob = m.MetaData.objects.get_or_create(name='JxnPotential', value='Not corrected')[0]   
                update_amd_obj(article, metadata_ob)
            aftStatOb = m.ArticleFullTextStat.objects.get_or_create(article_full_text = full_text_ob)[0]
            aftStatOb.methods_tag_found = True
            aftStatOb.save()
def assign_rec_temp(article):
# find a sentence that mentions recording and temperature or degree celsius
    full_text_ob = article.articlefulltext_set.all()[0]
    ft = full_text_ob.get_content()
    methods_tag = getMethodsTag(ft, article)
    if methods_tag is None:
        print (article.pmid, article.title, article.journal)
    else:
        text = re.sub('\s+', ' ', methods_tag.text)
        temp_dict_list = []
        sents = nltk.sent_tokenize(text)
        for s in sents:
    #        print s.encode("iso-8859-15", "replace")
            if celsius_re.findall(s):
    #            print article.pk
    #            print s.encode("iso-8859-15", "replace")
                degree_ind = s.rfind(u'°C')
                min_sent_ind = 0
                max_sent_ind = len(s)
                degree_close_str = s[np.maximum(min_sent_ind, degree_ind-20):np.minimum(max_sent_ind, degree_ind+1)]
                retDict = resolveDataFloat(degree_close_str)
                if 'value' in retDict:
                    temp_dict_list.append(retDict)
            elif room_temp_re.findall(s):
    #            print article.pk
    #            print s.encode("iso-8859-15", "replace")
                retDict = {'value':22.0, 'maxRange' : 24.0, 'minRange': 20.0}
                temp_dict_list.append(retDict)
        if len(temp_dict_list) > 0:
    #        print temp_dict_list
            temp_dict_fin = validate_temp_list(temp_dict_list)
    #        print temp_dict_fin
            if temp_dict_fin:
                min_range = None
                max_range = None
                stderr = None
                if 'minRange' in temp_dict_fin:
                    min_range = temp_dict_fin['minRange']
                if 'maxRange' in temp_dict_fin:
                    max_range = temp_dict_fin['maxRange']
                if 'error' in temp_dict_fin:
                    stderr = temp_dict_fin['error']
                
                cont_value_ob = m.ContValue.objects.filter(mean = temp_dict_fin['value'], min_range = min_range, max_range = max_range, stderr = stderr)[0]
                if not cont_value_ob:
                    cont_value_ob = m.ContValue.objects.get_or_create(mean = temp_dict_fin['value'], min_range = min_range, max_range = max_range, stderr = stderr)[0]
                metadata_ob = m.MetaData.objects.get_or_create(name='RecTemp', cont_value=cont_value_ob)[0]
                update_amd_obj(article, metadata_ob)
                aftStatOb = m.ArticleFullTextStat.objects.get_or_create(article_full_text = full_text_ob)[0]
                aftStatOb.methods_tag_found = True
                aftStatOb.save()
def assign_prep_type(article):
    metadata_added = False
    if article.articlefulltext_set.all().count() > 0:
        full_text_ob = article.articlefulltext_set.all()[0]
        full_text = full_text_ob.get_content()
        methods_tag = getMethodsTag(full_text, article)
        if methods_tag is None:
            print (article.pmid, article.title, article.journal)
        else:
            text = re.sub('\s+', ' ', methods_tag.text)    
            sents = nltk.sent_tokenize(text)
            prep_type_set = set()
            
            for s in sents:
                if culture_re.findall(s):
                    prep_type_set.add('cell culture')
                if in_vitro_re.findall(s):
                    prep_type_set.add('in vitro')
                if in_vivo_re.findall(s):
                    prep_type_set.add('in vivo')
                if model_re.findall(s):
                    prep_type_set.add('model')
            if 'cell culture' in prep_type_set:
                metadata_ob = m.MetaData.objects.get_or_create(name='PrepType', value='cell culture')[0]
                update_amd_obj(article, metadata_ob)
                metadata_added = True
            if 'in vitro' in prep_type_set:
                metadata_ob = m.MetaData.objects.get_or_create(name='PrepType', value='in vitro')[0]   
                update_amd_obj(article, metadata_ob)
                metadata_added = True
            if 'in vivo' in prep_type_set:
                metadata_ob = m.MetaData.objects.get_or_create(name='PrepType', value='in vivo')[0]   
                update_amd_obj(article, metadata_ob)
                metadata_added = True
#            if 'model' in prep_type_set:
#                metadata_ob = m.MetaData.objects.get_or_create(name='PrepType', value='model', added_by = robot_user)[0]   
#                article.metadata.add(metadata_ob)
#                metadata_added = True
            aftStatOb = m.ArticleFullTextStat.objects.get_or_create(article_full_text = full_text_ob)[0]
            aftStatOb.methods_tag_found = True
            aftStatOb.save()
    if metadata_added == False:
        mesh_terms = article.terms.all()
        if culture_mesh in mesh_terms:
            metadata_ob = m.MetaData.objects.get_or_create(name='PrepType', value='cell culture')[0]
            update_amd_obj(article, metadata_ob)
        if in_silico_mesh in mesh_terms:
            metadata_ob = m.MetaData.objects.get_or_create(name='PrepType', value='model')[0]
            update_amd_obj(article, metadata_ob)
def assign_electrode_type(article):
    metadata_added = False
    if article.articlefulltext_set.all().count() > 0:
        full_text_ob = article.articlefulltext_set.all()[0]
        full_text = full_text_ob.get_content()
        methods_tag = getMethodsTag(full_text, article)
        if methods_tag is None:
            print (article.pmid, article.title, article.journal)
        else:
            text = re.sub('\s+', ' ', methods_tag.text)    
            sents = nltk.sent_tokenize(text)
            electrode_set = set()
            
            for s in sents:
                if whole_re.findall(s):
        #            wholeCellSet.add(art)
        #            print 'whole: ' + art.title
        #            print str(idx) + ' : ' + s.encode("iso-8859-15", "replace")
                    electrode_set.add('Patch-clamp')
        #            electrode_list.append('Whole-cell')
        #            electrode_list_text_mine.append('Whole-cell')
                if sharp_re.findall(s):
        #            sharpSet.add(art)
        #            print 'sharp: ' + art.title
        #            print str(idx) + ' : ' + s.encode("iso-8859-15", "replace")
                    electrode_set.add('Sharp')
            if 'Patch-clamp' in electrode_set:
                metadata_ob = m.MetaData.objects.get_or_create(name='ElectrodeType', value='Patch-clamp')[0]
                update_amd_obj(article, metadata_ob)
                metadata_added = True
            if 'Sharp' in electrode_set:
                metadata_ob = m.MetaData.objects.get_or_create(name='ElectrodeType', value='Sharp')[0]   
                update_amd_obj(article, metadata_ob)
                metadata_added = True
            aftStatOb = m.ArticleFullTextStat.objects.get_or_create(article_full_text = full_text_ob)[0]
            aftStatOb.methods_tag_found = True
            aftStatOb.save()
    if metadata_added == False:
        mesh_terms = article.terms.all()
        if patch_mesh in mesh_terms:
            metadata_ob = m.MetaData.objects.get_or_create(name='ElectrodeType', value='Patch-clamp')[0]
            update_amd_obj(article, metadata_ob)
            metadata_added = True
def assign_solution_concs(article):
#     print "Textmining article: %s" % article.pk
    full_text_list = m.ArticleFullText.objects.filter(article = article.pk)
    
    if not full_text_list:
        return -1
    
    full_text = full_text_list[0].get_content()
    methods_tag = getMethodsTag(full_text, article)
    
    if methods_tag is None:
        print "No methods tag found article id: %s, pmid: %s" % (article.pk, article.pmid)
        return -2
    
    article_text = re.sub('\s+', ' ', methods_tag.text)
    
    if len(article_text) <= 100:
        print "Methods section is too small. Article id: %s, pmid: %s" % (article.pk, article.pmid)
        return -3
    
    return 1
    
    sentences = nltk.sent_tokenize(article_text)
    list_of_solns = []
    wrap_soln_text = []
    
    # Consider a machine learning approach to get the weights, also assign higher score when compounds are in close proximity to avoid: 
    # "The calcium-free saline solution containing cobalt was composed of (in mM): 115 NaCl, 23 NaHCO3, 3.1 KCl, 1.15 CoCl2, 1.2 MgCl2, and 6 glucose."
    # "The extracellular solution to isolate calcium current utilizing Ba2+ as a charge carrier contained (mm): tetraethylammonium chloride 120, BaCl2 10, MgCl2 1, Hepes 10, and glucose 10, pH adjusted to 7.3 with Tris."
    
    for i, sentence in enumerate(sentences):
        matchScore = 0
        if conc_re.search(sentence):
            matchScore += 3
        if mgca_re.search(sentence):
            matchScore += 2
        if na_re.search(sentence):
            matchScore += 1
        if k_re.search(sentence):
            matchScore += 1
        if cl_re.search(sentence):
            matchScore += 2
            
        if matchScore >= 7:
            list_of_solns.append(sentence)
            if i < len(sentences) - 1:
                current_text_wrap = get_preceeding_text(sentences, i)
                current_text_wrap.append(sentences[i+1])
            else:
                current_text_wrap = get_preceeding_text(sentences, i)
                current_text_wrap.append("")
            wrap_soln_text.append(current_text_wrap)
    
    recording_solution_absent = True
    storage_solns = []
    unassigned_solns = []
    
    internalID = 0
    externalID = 0
    for i, soln in enumerate(list_of_solns):
        for j in range(-1, len(wrap_soln_text[i])):
            if j == -1:
                soln_id_text = soln
            else:
                soln_id_text = wrap_soln_text[i][j]
                
            if pipette_re.search(soln_id_text):
                if other_re.search(soln_id_text):
                    break
                record_compounds(article, soln, wrap_soln_text[i], "internal_%s" % internalID)
                internalID += 1
                break
                
            elif record_re.search(soln_id_text):
                if other_re.search(soln_id_text) and not recording_solution_absent:
                    break
                recording_solution_absent = False
                record_compounds(article, soln, wrap_soln_text[i], "external_%s" % externalID)
                externalID += 1
                break
                
            elif cutstore_re.search(soln_id_text):
                storage_solns.append([soln, wrap_soln_text[i]])
                break
            
            elif j == len(wrap_soln_text[i]) - 1:
                unassigned_solns.append([soln, wrap_soln_text[i]])
    
    if recording_solution_absent and storage_solns:
        recording_solution_absent = False
        soln = storage_solns.pop()
        record_compounds(article, soln[0], soln[1], "external_%s" % externalID)

#     if recording_solution_absent and unassigned_solns:
#         recording_solution_absent = False
#         for soln in unassigned_solns:
#             record_compounds(article, soln, wrap_soln_text[i], "unassigned_%s" % externalID)
#             externalID += 1
    flag_soln = 3
    
    if externalID == 1 and internalID == 1 and len(unassigned_solns) == 0:
        flag_soln = 0
    elif externalID == 1 and internalID > 1 and len(unassigned_solns) == 0:
        flag_soln = 1
    elif externalID == 1 and internalID > 1 and len(unassigned_solns) > 0:
        flag_soln = 2

    flag_soln_ob = m.ContValue.objects.get_or_create(mean = flag_soln, stderr = 0, stdev = 0)[0]
    flag_soln_meta_ob = m.MetaData.objects.get_or_create(name = "FlagSoln", cont_value = flag_soln_ob)[0]
    update_amd_obj(article, flag_soln_meta_ob)
        
    return 1
def assign_animal_age(article):
# TODO: find a sentence that mentions recording and temperature or degree celsius
    full_text_ob = article.articlefulltext_set.all()[0]
    ft = full_text_ob.get_content()
    methods_tag = getMethodsTag(ft, article)
    if methods_tag is None:
        print (article.pmid, article.title, article.journal)
    else:
        text = re.sub('\s+', ' ', methods_tag.text)
        age_dict_list = []
        sents = nltk.sent_tokenize(text)
        for s in sents:
    #        print s.encode("iso-8859-15", "replace")
            if p_age_re.findall(s):
    #            print article.pk
#                print s.encode("iso-8859-15", "replace")
#                print 'Pnumber'
                p_iter = re.finditer(ur'P\d', s) 
                matches = [(match.start(0), match.end(0)) for match in p_iter]
                if len(matches) > 0:
                    p_ind = matches[-1][0]
        #            p_ind = s.rfind(ur'P\d')
                    min_sent_ind = 0
                    max_sent_ind = len(s)
                    p_close_str = s[np.maximum(min_sent_ind, p_ind-15):np.minimum(max_sent_ind, p_ind+15)]
        #            print p_close_str
                    p_close_str = p_close_str.translate(dict((ord(c), u'') for c in string.ascii_letters)).strip()
        #            print p_close_str
                    retDict = resolveDataFloat(p_close_str)
        #            print retDict
                    if 'value' in retDict:
                        age_dict_list.append(retDict)
            elif day_re.findall(s):
    #            print article.pk
#                print s.encode("iso-8859-15", "replace")
#                print 'day'
                p_iter = re.finditer(ur'\sday', s) 
                matches = [(match.start(0), match.end(0)) for match in p_iter]
                if len(matches) > 0:
                    p_ind = matches[-1][0]
        #            p_ind = s.rfind(ur'P\d')
                    min_sent_ind = 0
                    max_sent_ind = len(s)
                    p_close_str = s[np.maximum(min_sent_ind, p_ind-15):np.minimum(max_sent_ind, p_ind+15)]
        #            print p_close_str
                    p_close_str = p_close_str.translate(dict((ord(c), u'') for c in string.ascii_letters)).strip()
        #            print p_close_str
                    retDict = resolveDataFloat(p_close_str)
        #            print retDict
                    if 'value' in retDict:
                        age_dict_list.append(retDict)
        if len(age_dict_list) > 0:
    #        print temp_dict_list
    #        print age_dict_list
            age_dict_fin = validate_age_list(age_dict_list)
    #        print age_dict_fin
            if age_dict_fin:
                min_range = None
                max_range = None
                stderr = None
                if 'minRange' in age_dict_fin:
                    min_range = age_dict_fin['minRange']
                if 'maxRange' in age_dict_fin:
                    max_range = age_dict_fin['maxRange']
                if 'error' in age_dict_fin:
                    stderr = age_dict_fin['error']
                cont_value_ob = m.ContValue.objects.get_or_create(mean = age_dict_fin['value'], min_range = min_range,
                                                                  max_range = max_range, stderr = stderr)[0]
                metadata_ob = m.MetaData.objects.get_or_create(name='AnimalAge', cont_value=cont_value_ob)[0]
                update_amd_obj(article, metadata_ob)
                aftStatOb = m.ArticleFullTextStat.objects.get_or_create(article_full_text = full_text_ob)[0]
                aftStatOb.methods_tag_found = True
                aftStatOb.save()