Пример #1
0
def cfweka(input_dict, widget, name):
    from services.webservice import WebService
    wseval = WebService('http://vihar.ijs.si:8092/Evaluation?wsdl',
                        float(input_dict['timeout']))
    wsutil = WebService('http://vihar.ijs.si:8092/Utilities?wsdl',
                        float(input_dict['timeout']))
    somelearner = input_dict['learner']
    print somelearner
    data = input_dict['data']
    #    arffstr = toARFFstring(data).getvalue()
    #    #print arffstr
    #    wekaInstances = wsutil.client.arff_to_weka_instances(arff = arffstr, class_index = odt.domain.index(odt.domain.classVar))
    #    #print wekaInstances
    #    model = wseval.client.build_classifier(learner = somelearner, instances = wekaInstances['instances'])
    #    #return {}

    #    addMetaID(data)
    k = int(input_dict['k_folds'])
    noisyIndices = []
    selection = orange.MakeRandomIndicesCV(data, folds=k)
    count_noisy = [0] * k
    for test_fold in range(k):
        train_arffstr = toARFFstring(
            data.select(selection, test_fold, negate=1)).getvalue()
        train_data = wsutil.client.arff_to_weka_instances(
            arff=train_arffstr,
            class_index=data.domain.index(data.domain.classVar))['instances']

        test_inds = [
            i for i in range(len(selection)) if selection[i] == test_fold
        ]
        test_arffstr = toARFFstring(data.select(selection,
                                                test_fold)).getvalue()
        test_data = wsutil.client.arff_to_weka_instances(
            arff=test_arffstr,
            class_index=data.domain.index(data.domain.classVar))['instances']
        #print "\t\t", "Learned on", len(train_data), "examples"
        #file.flush()
        print "pred cl build"
        classifier = wseval.client.build_classifier(
            learner=somelearner, instances=train_data)['classifier']
        print "po cl build"
        eval_test_data = wseval.client.apply_classifier(classifier=classifier,
                                                        instances=test_data)
        print "po eval"
        for i in range(len(eval_test_data)):
            #print "Test data length:", len(test_data), "Test inds length:", len(test_inds), "Eval Test data length:", len(eval_test_data)
            print i, "v for zanki", eval_test_data[i]['classes'], data[
                test_inds[i]].getclass()
            if eval_test_data[i]['classes'] != unicode(
                    data[test_inds[i]].getclass()):
                # selection_filter[int(example[meta_id].value)] = 0
                noisyIndices.append(test_inds[i])
                count_noisy[test_fold] += 1
        # END test_data
        widget.progress = int((test_fold + 1) * 1.0 / k * 100)
        widget.save()
    # END test_fold
    return {'inds': sorted(noisyIndices), 'name': getWekaName(name)}
Пример #2
0
def call_webservice(input_dict):
    from services.webservice import WebService
    ws = WebService(input_dict['wsdl'], float(input_dict['timeout']))
    selected_method = {}
    for method in ws.methods:
        if method['name'] == input_dict['wsdl_method']:
            selected_method = method
    function_to_call = getattr(ws.client, selected_method['name'])
    ws_dict = {}
    for i in selected_method['inputs']:
        try:
            ws_dict[i['name']] = input_dict[i['name']]
            if ws_dict[i['name']] is None:
                pass
            if i['type'] == bool:
                if input_dict[i['name']] == "true":
                    ws_dict[i['name']] = 1
                else:
                    ws_dict[i['name']] = 0
            if ws_dict[i['name']] == '':
                if input_dict['sendemptystrings'] == "true":
                    ws_dict[i['name']] = ''
                else:
                    ws_dict.pop(i['name'])
        except Exception as e:
            print e
            ws_dict[i['name']] = ''
    results = function_to_call(**ws_dict)
    output_dict = results
    return output_dict
Пример #3
0
def nlp_def_extraction_terms(input_dict):
    '''
    Definition extraction using terms.
    '''
    annotations = input_dict['annotations']
    term_candidates = input_dict['term_candidates']
    lang = input_dict['lang']
    wsdl = input_dict.get('wsdl', 'http://vihar.ijs.si:8099')
    terms_per_sentence = input_dict['terms_per_sentence']
    nominatives = input_dict['nominatives']
    threshold = input_dict['threshold']
    verb_two_terms = input_dict['verb_two_terms']
    multiword_term = input_dict['multiword_term']
    num_multiterms = input_dict['num_multiterms']
    term_beginning = input_dict['term_beginning']
    ws = WebService(wsdl, 60000)
    response = ws.client.GlossaryExtractionByTerms(
        corpus=annotations,
        candidates=term_candidates,
        lang=lang,
        nominatives=nominatives,
        termsPerSent=terms_per_sentence,
        select=threshold,
        verb_two_terms=verb_two_terms,
        multiword_term=multiword_term,
        num_multiterms=num_multiterms,
        term_beginning=term_beginning)
    return {'sentences': response['candidates']}
Пример #4
0
def ilp_sdmaleph(input_dict):
    import orange
    ws = WebService('http://vihar.ijs.si:8097', 3600)
    data = input_dict.get('examples')
    if isinstance(data, orange.ExampleTable):
        with tempfile.NamedTemporaryFile(suffix='.tab', delete=True) as f:
            data.save(f.name)
            examples = f.read()
    elif isinstance(data, list):
        examples = json.dumps(data)
    elif isinstance(data, str):
        examples = data
    else:
        raise Exception('Illegal examples format. \
                         Supported formats: str, list or Orange')
    response = ws.client.sdmaleph(
        examples=examples,
        mapping=input_dict.get('mapping'),
        ontologies=[{'ontology' : ontology} for ontology in input_dict.get('ontology')],
        relations=[{'relation' : relation} for relation in input_dict.get('relation')],
        posClassVal=input_dict.get('posClassVal') if input_dict.get('posClassVal') != '' else None,
        cutoff=input_dict.get('cutoff') if input_dict.get('cutoff') != '' else None,
        minPos=input_dict.get('minPos') if input_dict.get('minPos') != '' else None,
        noise=input_dict.get('noise') if input_dict.get('noise') != '' else None,
        clauseLen=input_dict.get('clauseLen') if input_dict.get('clauseLen') != '' else None,
        dataFormat=input_dict.get('dataFormat') if input_dict.get('dataFormat') != '' else None
    )
    return {'theory' : response['theory']}
Пример #5
0
def load_corpus(input_dict):
    '''
    Parses an input file and encodes it in base 64.
    '''
    f = safeOpen(input_dict['file'])
    fname = os.path.basename(input_dict['file'])
    wsdl = input_dict.get('wsdl', 'http://vihar.ijs.si:8095/totale?wsdl')
    data = base64.b64encode(f.read())
    ws = WebService(wsdl, 60000)
    response = ws.client.parseFile(fileName=fname, inFile=data)
    return {'corpus': response['parsedFile']}
Пример #6
0
def nlp_def_extraction_wnet(input_dict):
    '''
    Definition extraction using WordNet.
    '''
    annotations = input_dict['annotations']
    lang = input_dict['lang']
    wsdl = input_dict.get('wsdl', 'http://vihar.ijs.si:8099')
    ws = WebService(wsdl, 60000)
    response = ws.client.GlossaryExtractionByWnet(corpus=annotations,
                                                  lang=lang)
    return {'sentences': response['candidates']}
Пример #7
0
def get_cross_validation_accuracy(arff):
    acs=[]
    for a in range(10):
        j48 = WebService("http://vihar.ijs.si:8092/Classification?wsdl")
        j48_response = j48.client.J48(params="")
        j48_learner = j48_response['J48_learner']

        arff2weka = WebService("http://vihar.ijs.si:8092/Utilities?wsdl")
        arff2weka_response = arff2weka.client.arff_to_weka_instances(arff=arff,class_index="")

        instances = arff2weka_response['instances']

        cv = WebService("http://vihar.ijs.si:8092/Evaluation?wsdl",timeout=600)
        cv_response = cv.client.cross_validate(learner=j48_learner,instances=instances,folds=5)

        accuracy = cv_response['accuracy']

        acs.append(float(accuracy))

    return sum(acs)*1./len(acs)
Пример #8
0
def nlp_term_extraction(input_dict):
    '''
    Term extraction from totrtale annotations.
    '''
    annotations = input_dict['annotations']
    lang = input_dict['lang']
    wsdl = input_dict.get('wsdl', 'http://vihar.ijs.si:8095/totale?wsdl')
    ws = WebService(wsdl, 60000)
    response = ws.client.TermExtraction(corpus=annotations,
                                        lang=lang,
                                        threshold=0)
    return {'candidates': response['candidates']}
Пример #9
0
def nlp_def_extraction_patterns(input_dict):
    '''
    Definition extraction using pre-defined patterns.
    '''
    annotations = input_dict['annotations']
    lang = input_dict['lang']
    wsdl = input_dict.get('wsdl', 'http://vihar.ijs.si:8099')
    ws = WebService(wsdl, 60000)
    pattern = input_dict['pattern']
    response = ws.client.GlossaryExtractionByPatterns(corpus=annotations,
                                                      lang=lang,
                                                      pattern=pattern)
    return {'sentences': response['candidates']}
Пример #10
0
def nlp_term_extraction(input_dict):
    '''
    Term extraction from totrtale annotations.
    '''
    annotations = input_dict['annotations']
    lang = input_dict['lang']
    wsdl = input_dict.get('wsdl', 'http://vihar.ijs.si:8095/totale?wsdl')

    if '<TEI xmlns="http://www.tei-c.org/ns/1.0">' in annotations:
        annotations = XMLtoTEI(annotations)

    ws = WebService(wsdl, 60000)
    response = ws.client.TermExtraction(corpus=annotations, lang=lang,
                                        threshold=0)
    return {'candidates': response['candidates']}
Пример #11
0
def call_webservice(input_dict):
    from services.webservice import WebService
    ws = WebService(input_dict['wsdl'], float(input_dict['timeout']))
    selected_method = {}
    for method in ws.methods:
        if method['name'] == input_dict['wsdl_method']:
            selected_method = method
    function_to_call = getattr(ws.client, selected_method['name'])
    ws_dict = {}
    for i in selected_method['inputs']:
        try:
            ws_dict[i['name']] = input_dict[i['name']]
            if ws_dict[i['name']] is None:
                pass
            if i['type'] == bool:
                if input_dict[i['name']] == "true":
                    ws_dict[i['name']] = 1
                else:
                    ws_dict[i['name']] = 0
            if ws_dict[i['name']] == '':
                if input_dict['sendemptystrings'] == "true":
                    ws_dict[i['name']] = ''
                else:
                    ws_dict.pop(i['name'])
        except Exception as e:
            print e
            ws_dict[i['name']] = ''
    results = function_to_call(**ws_dict)
    output_dict = results
    if type(results) == dict:
        return output_dict
    elif type(results) == list:
        output_dict = {}
        for l in results:
            if type(l) == dict:
                for k in l.keys():
                    a = output_dict.get(k, [])
                    a.append(l[k])
                    output_dict[k] = a
        return output_dict
    return results
Пример #12
0
def cfdecide(input_dict, widget):
    from pysimplesoap.client import SoapFault
    somelearner = input_dict['learner']
    print somelearner
    # SWITCH TO PROCESSING WITH WEKA CLASSIFIERS
    if type(somelearner) == unicode:
        from services.webservice import WebService
        wsutil = WebService('http://vihar.ijs.si:8092/Utilities?wsdl',
                            float(input_dict['timeout']))
        name = ""
        try:
            name = wsutil.client.print_model(
                model=somelearner)['model_as_string']
            print wsutil.client.print_model(model=somelearner), name
        except SoapFault:
            # TODO something
            print "Soap fault: unicode string is not a Weka classification learner/model."
            return {}
        return cfweka(input_dict, widget, name)
    else:
        return cforange(input_dict, widget)
Пример #13
0
def nlp_totrtale(input_dict):
    '''
    Calls the totrtale web service.
    '''
    corpus = input_dict['corpus']
    lang = input_dict['lang']
    wsdl = input_dict.get('wsdl', 'http://vihar.ijs.si:8095/totale?wsdl')
    xml = input_dict['xml'] == 'true'
    postprocess = input_dict['postprocess'] == 'true'
    bohoricica = input_dict['bohoricica'] == 'true'
    antique = input_dict['antique'] == 'true'

    ws = WebService(wsdl, 60000)
    response = ws.client.runTotale(inFile=corpus,
                                   language=lang,
                                   postProcessing=postprocess,
                                   bohoricica=bohoricica,
                                   antiqueSlovenian=antique,
                                   outputAsXML=xml)
    errors = response['error']
    if errors:
        print errors
    return {'annotations': response['annotatedFile']}
Пример #14
0
 def import_webservice(self, request):
     wsdl = request.data.get('wsdl')
     ws = WebService(wsdl)
     wsdl_category, _ = Category.objects.get_or_create(name='WSDL Imports')
     new_c = Category()
     current_name = ws.name
     i = 0
     while request.user.categories.filter(name=current_name).count() > 0:
         i = i + 1
         current_name = ws.name + ' (' + str(i) + ')'
     new_c.name = current_name
     new_c.user = request.user
     new_c.workflow = request.user.userprofile.active_workflow
     new_c.parent = wsdl_category
     new_c.save()
     for m in ws.methods:
         new_a = AbstractWidget()
         new_a.name = m['name']
         new_a.action = 'call_webservice'
         new_a.wsdl = ws.wsdl_url
         new_a.wsdl_method = m['name']
         new_a.description = m['documentation']
         new_a.user = request.user
         new_a.category = new_c
         new_a.save()
         new_i = AbstractInput()
         new_i.parameter = True
         new_i.widget = new_a
         new_i.name = "Timeout"
         new_i.short_name = "to"
         new_i.variable = "timeout"
         new_i.default = '60'
         new_i.parameter_type = 'text'
         new_i.save()
         new_i = AbstractInput()
         new_i.parameter = True
         new_i.widget = new_a
         new_i.name = "Send empty strings to webservices"
         new_i.short_name = "ses"
         new_i.variable = "sendemptystrings"
         new_i.default = ''
         new_i.parameter_type = 'checkbox'
         new_i.save()
         for i in m['inputs']:
             new_i = AbstractInput()
             new_i.name = i['name']
             new_i.variable = i['name']
             new_i.short_name = i['name'][:3]
             new_i.description = ''
             new_i.required = False
             new_i.parameter = False
             if i['type'] == bool:
                 new_i.parameter_type = 'checkbox'
             else:
                 new_i.parameter_type = 'textarea'
             new_i.default = ''
             new_i.widget = new_a
             new_i.save()
         for o in m['outputs']:
             new_o = AbstractOutput()
             new_o.name = o['name']
             new_o.variable = o['name']
             new_o.short_name = o['name'][:3]
             new_o.description = ''
             new_o.widget = new_a
             new_o.save()
     data = json.dumps({'category_id': new_c.id})
     return HttpResponse(data, 'application/json')