示例#1
0
def bigdata_mse(request,input_dict,output_dict,widget):
    from discomll.utils import accuracy
    from disco.core import result_iterator
    import os.path
    from mothra.settings import MEDIA_ROOT
    from workflows.helpers import ensure_dir

    folder = 'discomll_measures'
    tag = input_dict["predictions"]
    destination = MEDIA_ROOT+'/'+folder+"/"+tag[0][6:]+'.txt'
    ensure_dir(destination)

    if input_dict["dataset"].params["id_index"] == -1:
        input_dict["string"] = "ID index should be defined."
    elif not os.path.isfile(destination): #file doesnt exists
        results = accuracy.measure(test_data = input_dict["dataset"],
                                predictions = input_dict["predictions"],
                                measure = "mse")
        string = "Mean squared error\n"
        for k, v in result_iterator(results):
            string += str(v) + "\n"
        input_dict["string"] = string

        f = open(destination,'w')
        f.write(str(v))
        f.close()
        
    else:
        string = "Mean squared error\n"
        f = open(destination,'r')
        input_dict["string"] = string + str(f.readlines()[0])
        f.close()


    return render(request, 'visualizations/display_string.html',{'widget':widget,'input_dict':input_dict,'output_dict':output_dict})
示例#2
0
 def upload(self, request, pk=None):
     input = self.get_object()
     try:
         destination = settings.FILES_FOLDER + str(
             input.widget.workflow.id) + '/' + request.FILES['file'].name
         ensure_dir(destination)
         destination_file = open(destination, 'wb')
         for chunk in request.FILES['file'].chunks():
             destination_file.write(chunk)
         destination_file.close()
         input.value = destination
         input.save()
         input.widget.unfinish()
         data = json.dumps({
             'status': 'ok',
             'message': 'File successfully uploaded'
         })
     except Exception as e:
         data = json.dumps({
             'status':
             'error',
             'message':
             'Problem uploading file: {}'.format(str(e))
         })
     return HttpResponse(data, 'application/json')
示例#3
0
def model_view(request, input_dict, output_dict, widget):
    from discomll.utils import model_view
    import os.path
    from mothra.settings import MEDIA_ROOT
    from workflows.helpers import ensure_dir

    folder = 'discomll_models'
    tag_name = input_dict["fitmodel_url"]
    tag = input_dict["fitmodel_url"].values()[0]

    destination = MEDIA_ROOT + '/' + folder + "/" + tag[0][6:] + '.txt'
    ensure_dir(destination)

    if not os.path.isfile(destination):  #file doesnt exists

        model = model_view.output_model(tag_name)
        f = open(destination, 'w')
        f.write(model)
        f.close()

    filename = folder + "/" + tag[0][6:] + '.txt'

    output_dict['filename'] = filename
    return render(request, 'visualizations/string_to_file.html', {
        'widget': widget,
        'input_dict': input_dict,
        'output_dict': output_dict
    })
示例#4
0
def results_to_file(request,input_dict,output_dict,widget):
    from disco.core import result_iterator
    import os.path
    from mothra.settings import MEDIA_ROOT
    from workflows.helpers import ensure_dir

    tag = input_dict["string"]
    folder = 'discomll_results'
    add = "add" if input_dict["add_params"] == "true" else ""
    
    destination = MEDIA_ROOT+'/'+folder+"/"+tag[0][6:]+add+'.txt'
    
    ensure_dir(destination)
    
    if not os.path.isfile(destination): #file doesnt exists
        
        f = open(destination,'w')
        if input_dict["add_params"] == "true":
            for k, v in result_iterator(tag):
                f.writelines(str(k) + " " + str(v) + "\n")
        else:
            for k, v in result_iterator(tag):
                f.writelines(str(k) + " " + str(v[0]) + "\n")
        f.close()
    filename = folder+"/"+tag[0][6:]+add+'.txt'

    output_dict['filename'] = filename
    return render(request, 'visualizations/string_to_file.html',{'widget':widget,'input_dict':input_dict,'output_dict':output_dict})
示例#5
0
def results_to_file(request, input_dict, output_dict, widget):
    from disco.core import result_iterator
    import os.path
    from mothra.settings import MEDIA_ROOT
    from workflows.helpers import ensure_dir

    tag = input_dict["string"]
    folder = 'discomll_results'
    add = "add" if input_dict["add_params"] == "true" else ""

    destination = MEDIA_ROOT + '/' + folder + "/" + tag[0][6:] + add + '.txt'

    ensure_dir(destination)

    if not os.path.isfile(destination):  #file doesnt exists

        f = open(destination, 'w')
        if input_dict["add_params"] == "true":
            for k, v in result_iterator(tag):
                f.writelines(str(k) + " " + str(v) + "\n")
        else:
            for k, v in result_iterator(tag):
                f.writelines(str(k) + " " + str(v[0]) + "\n")
        f.close()
    filename = folder + "/" + tag[0][6:] + add + '.txt'

    output_dict['filename'] = filename
    return render(request, 'visualizations/string_to_file.html', {
        'widget': widget,
        'input_dict': input_dict,
        'output_dict': output_dict
    })
示例#6
0
def model_view(request,input_dict,output_dict,widget):
    from discomll.utils import model_view
    import os.path
    from mothra.settings import MEDIA_ROOT
    from workflows.helpers import ensure_dir

    folder = 'discomll_models'
    tag_name = input_dict["fitmodel_url"]
    tag = input_dict["fitmodel_url"].values()[0]
    
    
    destination = MEDIA_ROOT+'/'+folder+"/"+tag[0][6:]+'.txt'
    ensure_dir(destination)
    
    if not os.path.isfile(destination): #file doesnt exists
        
        model = model_view.output_model(tag_name)
        f = open(destination,'w')
        f.write(model)
        f.close()

    filename = folder+"/"+tag[0][6:]+'.txt'
    
    output_dict['filename'] = filename
    return render(request, 'visualizations/string_to_file.html',{'widget':widget,'input_dict':input_dict,'output_dict':output_dict})
示例#7
0
def adc_to_csv(request, input_dict, output_dict, widget):
    from mothra.settings import MEDIA_ROOT
    from workflows.helpers import ensure_dir
    destination = MEDIA_ROOT + '/' + str(request.user.id) + '/' + str(
        widget.id) + '.csv'
    ensure_dir(destination)
    f = open(destination, 'w')

    adc = input_dict['adc']
    ann = input_dict['ann']
    df = defaultdict(list)

    for doc in adc.documents:
        for annotation in ann.split('\n'):
            annotation = annotation.strip()
            df[annotation].extend(doc.get_annotation_texts(annotation))

    df = pd.DataFrame(df, columns=df.keys())
    df.to_csv(destination, sep='\t', encoding='utf-8')

    filename = str(request.user.id) + '/' + str(widget.id) + '.csv'
    output_dict['filename'] = filename
    return render(request, 'visualizations/adc_to_csv.html', {
        'widget': widget,
        'input_dict': input_dict,
        'output_dict': output_dict
    })
示例#8
0
def bigdata_ca(request, input_dict, output_dict, widget):
    from discomll.utils import accuracy
    import os.path
    from mothra.settings import MEDIA_ROOT
    from workflows.helpers import ensure_dir

    folder = 'discomll_measures'
    tag = input_dict["predictions"]
    destination = MEDIA_ROOT + '/' + folder + "/" + tag[0][6:] + '.txt'
    ensure_dir(destination)

    if input_dict["dataset"].params["id_index"] == -1:
        input_dict["string"] = "ID index should be defined."
    elif not os.path.isfile(destination):  # file doesnt exists
        measure, acc = accuracy.measure(test_data=input_dict["dataset"],
                                   predictions=input_dict["predictions"],
                                   measure="ca")
        string = "Classification Accuracy \n"
        score = str(measure) + " " + str(acc) + "\n"
        string += score
        input_dict["string"] = string

        f = open(destination, 'w')
        f.write(score)
        f.close()

    else:
        #ca results are cached
        string = "Classification Accuracy \n"
        f = open(destination, 'r')
        input_dict["string"] = string + str(f.readlines()[0])
        f.close()

    return render(request, 'visualizations/display_string.html',
                  {'widget': widget, 'input_dict': input_dict, 'output_dict': output_dict})
示例#9
0
def scikitAlgorithms_displayDecisionTree(request, input_dict, output_dict,
                                         widget):
    """Visualization displaying a decision tree"""

    import subprocess
    from sklearn import tree
    from mothra.settings import MEDIA_ROOT
    from workflows.helpers import ensure_dir

    # dot_data = StringIO.StringIO()
    filename = '/'.join(
        [str(request.user.id),
         'decisionTree-scikit-%d.dot' % widget.id])
    destination_dot = '/'.join([MEDIA_ROOT, filename])
    ensure_dir(destination_dot)
    tree.export_graphviz(input_dict['classifier'], out_file=destination_dot)

    filename = '/'.join(
        [str(request.user.id),
         'decisionTree-scikit-%d.png' % widget.id])
    destination_img = '/'.join([MEDIA_ROOT, filename])
    ensure_dir(destination_img)
    subprocess.call("dot -Tpng %s -o %s" % (destination_dot, destination_img),
                    shell=True)

    return render(
        request, 'visualizations/scikitAlgorithms_display_decision_tree.html',
        {
            'filename': filename,
            'widget': widget,
            'input_dict': input_dict
        })
def odt_to_tab(request,input_dict,output_dict,widget):
    import Orange
    from mothra.settings import MEDIA_ROOT
    from workflows.helpers import ensure_dir
    destination = MEDIA_ROOT+'/'+str(request.user.id)+'/'+str(widget.id)+'.tab'
    ensure_dir(destination)
    input_dict['data'].save(destination)
    filename = str(request.user.id)+'/'+str(widget.id)+'.tab'
    output_dict['filename'] = filename
    return render(request, 'visualizations/string_to_file.html',{'widget':widget,'input_dict':input_dict,'output_dict':output_dict})
示例#11
0
def MUSE_string_to_file_V3(request, input_dict, output_dict, widget):
    from mothra.settings import MEDIA_ROOT
    from workflows.helpers import ensure_dir

    basename = '/'.join([str(request.user.id), str(widget.id) + str(input_dict['fending'])])
    destination = '/'.join([MEDIA_ROOT, basename])
    ensure_dir(destination)
    with open(destination, 'w') as f:
        f.write(str(input_dict['data']))
    return render(request, 'visualizations/MUSE_string_to_file_v3.html', {'widget': widget, 'fileURL': basename})
示例#12
0
def string_to_file(request,input_dict,output_dict,widget):
    from mothra.settings import MEDIA_ROOT
    from workflows.helpers import ensure_dir
    destination = MEDIA_ROOT+'/'+str(request.user.id)+'/'+str(widget.id)+'.txt'
    ensure_dir(destination)
    f = open(destination,'w')
    f.write(str(input_dict['string']))
    f.close()
    filename = str(request.user.id)+'/'+str(widget.id)+'.txt'
    output_dict['filename'] = filename
    return render(request, 'visualizations/string_to_file.html',{'widget':widget,'input_dict':input_dict,'output_dict':output_dict})
示例#13
0
def bio3graph_biomine_visualizer(request, input_dict, output_dict, widget):
    from mothra.settings import MEDIA_ROOT
    from workflows.helpers import ensure_dir
    filename = os.path.join(str(request.user.id), str(widget.id) + '.bmg')
    destination = os.path.join(MEDIA_ROOT, filename)
    ensure_dir(destination)
    f = open(destination, 'w')
    f.write(str(input_dict['biomine_graph']))
    f.close()
    return render(request, 'visualizations/bio3graph_biomine_visualizer.html',
                  {'widget': widget, 'filename': filename})
示例#14
0
def MUSE_view_xml(request, input_dict, output_dict, widget):
    from mothra.settings import MEDIA_ROOT
    from workflows.helpers import ensure_dir
    filename = os.path.join(str(request.user.id), str(widget.id) + '.xml')
    destination = os.path.join(MEDIA_ROOT, filename)
    ensure_dir(destination)
    f = open(destination, 'w')
    f.write(str(input_dict['xml_data']))
    f.close()

    return render(request, 'visualizations/MUSE_view_xml.html',
                  {'widget': widget, 'filename': filename})
示例#15
0
def segmine_biomine_visualizer(request, input_dict, output_dict, widget):
    from mothra.settings import MEDIA_ROOT
    from workflows.helpers import ensure_dir
    filename = os.path.join(str(request.user.id), str(widget.id) + '.bmg')
    destination = os.path.join(MEDIA_ROOT, filename)
    ensure_dir(destination)
    f = open(destination, 'w')
    f.write(str(input_dict['graph']))
    f.close()
    return render(request, 'visualizations/segmine_biomine_visualizer.html', {
        'widget': widget,
        'filename': filename
    })
示例#16
0
def odt_to_arff(request, input_dict, output_dict, widget):
    import Orange
    from mothra.settings import MEDIA_ROOT
    from workflows.helpers import ensure_dir

    destination = MEDIA_ROOT + "/" + str(request.user.id) + "/" + str(widget.id) + ".arff"
    ensure_dir(destination)
    input_dict["data"].save(destination)
    filename = str(request.user.id) + "/" + str(widget.id) + ".arff"
    output_dict["filename"] = filename
    return render(
        request,
        "visualizations/string_to_file.html",
        {"widget": widget, "input_dict": input_dict, "output_dict": output_dict},
    )
def MUSE_view_xml(request, input_dict, output_dict, widget):
    from mothra.settings import MEDIA_ROOT
    from workflows.helpers import ensure_dir

    filename = '/'.join([str(request.user.id), str(widget.id) + '.xml'])
    destination = '/'.join([MEDIA_ROOT, filename])
    ensure_dir(destination)
    f = open(destination, 'w')
    f.write(str(input_dict['xml_data']))
    f.close()

    return render(request, 'visualizations/MUSE_view_xml.html', {
        'widget': widget,
        'filename': filename
    })
示例#18
0
def odt_to_arff(request, input_dict, output_dict, widget):
    import Orange
    from mothra.settings import MEDIA_ROOT
    from workflows.helpers import ensure_dir
    destination = MEDIA_ROOT + '/' + str(request.user.id) + '/' + str(
        widget.id) + '.arff'
    ensure_dir(destination)
    input_dict['data'].save(destination)
    filename = str(request.user.id) + '/' + str(widget.id) + '.arff'
    output_dict['filename'] = filename
    return render(request, 'visualizations/string_to_file.html', {
        'widget': widget,
        'input_dict': input_dict,
        'output_dict': output_dict
    })
示例#19
0
def MUSE_virtual_environment_visualization(request, input_dict, output_dict, widget):
    from mothra.settings import MEDIA_ROOT
    from workflows.helpers import ensure_dir
    filename = os.path.join(str(request.user.id), str(widget.id) + '.txt')
    destination = os.path.join(MEDIA_ROOT, filename)
    ensure_dir(destination)
    f = open(destination, 'w')
    f.write(str(input_dict['NLP_data']))
    f.close()
    return render(request,
                  'visualizations/MUSE_view_3D_environment.html',
                  {'widget': widget,
                   'filename': filename,
                   'unitylink': input_dict['unitylink']
                   })
示例#20
0
def MUSE_string_to_file(request, input_dict, output_dict, widget):
    from mothra.settings import MEDIA_ROOT
    from workflows.helpers import ensure_dir

    basename = '/'.join(
        [str(request.user.id),
         str(widget.id) + str(input_dict['fending'])])
    destination = '/'.join([MEDIA_ROOT, basename])
    ensure_dir(destination)
    with open(destination, 'w') as f:
        f.write(str(input_dict['data']))
    return render(request, 'visualizations/MUSE_string_to_file.html', {
        'widget': widget,
        'fileURL': basename
    })
示例#21
0
def corpus_to_csv(request, input_dict, output_dict, widget):
    from mothra.settings import MEDIA_ROOT
    from workflows.helpers import ensure_dir
    destination = MEDIA_ROOT + '/' + str(request.user.id) + '/' + str(
        widget.id) + '.csv'
    ensure_dir(destination)
    df = input_dict['df']
    df.to_csv(destination, encoding='utf-8', sep=';', index=False)
    filename = str(request.user.id) + '/' + str(widget.id) + '.csv'
    output_dict['filename'] = filename

    return render(request, 'visualizations/string_to_file.html', {
        'widget': widget,
        'input_dict': input_dict,
        'output_dict': output_dict
    })
示例#22
0
def MUSE_virtual_environment_visualization(request, input_dict, output_dict,
                                           widget):
    from mothra.settings import MEDIA_ROOT
    from workflows.helpers import ensure_dir
    filename = os.path.join(str(request.user.id), str(widget.id) + '.txt')
    destination = os.path.join(MEDIA_ROOT, filename)
    ensure_dir(destination)
    f = open(destination, 'w')
    f.write(str(input_dict['NLP_data']))
    f.close()
    return render(
        request, 'visualizations/MUSE_view_3D_environment.html', {
            'widget': widget,
            'filename': filename,
            'unitylink': input_dict['unitylink']
        })
示例#23
0
def string_to_file(request, input_dict, output_dict, widget):
    from mothra.settings import MEDIA_ROOT
    from workflows.helpers import ensure_dir

    destination = MEDIA_ROOT + "/" + str(request.user.id) + "/" + str(widget.id) + ".txt"
    ensure_dir(destination)
    f = open(destination, "w")
    f.write(str(input_dict["string"]))
    f.close()
    filename = str(request.user.id) + "/" + str(widget.id) + ".txt"
    output_dict["filename"] = filename
    return render(
        request,
        "visualizations/string_to_file.html",
        {"widget": widget, "input_dict": input_dict, "output_dict": output_dict},
    )
示例#24
0
def string_to_file(request, input_dict, output_dict, widget):
    from mothra.settings import MEDIA_ROOT
    from workflows.helpers import ensure_dir
    destination = MEDIA_ROOT + '/' + str(request.user.id) + '/' + str(
        widget.id) + '.txt'
    ensure_dir(destination)
    f = open(destination, 'w')
    f.write(str(input_dict['string']))
    f.close()
    filename = str(request.user.id) + '/' + str(widget.id) + '.txt'
    output_dict['filename'] = filename
    return render(request, 'visualizations/string_to_file.html', {
        'widget': widget,
        'input_dict': input_dict,
        'output_dict': output_dict
    })
示例#25
0
文件: views.py 项目: anzev/clowdflows
 def upload(self, request, pk=None):
     input = self.get_object()
     try:
         destination = FILES_FOLDER + str(input.widget.workflow.id) + '/' + request.FILES['file'].name
         ensure_dir(destination)
         destination_file = open(destination, 'wb')
         for chunk in request.FILES['file'].chunks():
             destination_file.write(chunk)
         destination_file.close()
         input.value = destination
         input.save()
         input.widget.unfinish()
         data = json.dumps(
             {'status': 'ok', 'message': 'File successfully uploaded'})
     except Exception, e:
         data = json.dumps(
             {'status': 'error', 'message': 'Problem uploading file: {}'.format(str(e))})
示例#26
0
def weka_local_display_decision_tree(request, input_dict, output_dict, widget):
    """Visualization displaying a decision tree"""

    import subprocess
    from mothra.settings import MEDIA_ROOT
    from workflows.helpers import ensure_dir

    if not jp.isThreadAttachedToJVM():
        jp.attachThreadToJVM()

    img_type = 'svg'
    if input_dict['img_type'] == 'raster':
        img_type = 'png'

    classifier = common.deserialize_weka_object(input_dict['classifier'])
    dot_text = classifier.graph()

    filename = '/'.join(
        [str(request.user.id),
         'decisionTree-weka-%d.dot' % widget.id])
    destination_dot = '/'.join([MEDIA_ROOT, filename])
    ensure_dir(destination_dot)

    with open(destination_dot, 'w') as dot_file:
        dot_file.write(dot_text)

    # png/svg file
    filename = '/'.join([
        str(request.user.id),
        'decisionTree-weka-%d.%s' % (widget.id, img_type)
    ])
    destination_img = '/'.join([MEDIA_ROOT, filename])
    ensure_dir(destination_img)

    subprocess.call("dot -T%s %s -o %s" %
                    (img_type, destination_dot, destination_img),
                    shell=True)

    return render(request,
                  'visualizations/weka_local_display_decision_tree.html', {
                      'filename': filename,
                      'widget': widget,
                      'input_dict': input_dict
                  })
示例#27
0
def weka_local_display_decision_tree(request, input_dict, output_dict, widget):
    """Visualization displaying a decision tree"""

    import subprocess
    from mothra.settings import MEDIA_ROOT
    from workflows.helpers import ensure_dir

    if not jp.isThreadAttachedToJVM():
        jp.attachThreadToJVM()

    img_type = 'svg'
    if input_dict['img_type'] == 'raster':
        img_type = 'png'

    classifier = common.deserialize_weka_object(input_dict['classifier'])
    dot_text = classifier.graph()

    filename = '/'.join([str(request.user.id), 'decisionTree-weka-%d.dot' % widget.id])
    destination_dot = '/'.join([MEDIA_ROOT, filename])
    ensure_dir(destination_dot)

    with open(destination_dot, 'w') as dot_file:
        dot_file.write(dot_text)


    # png/svg file
    filename = '/'.join([str(request.user.id),
                         'decisionTree-weka-%d.%s' % (widget.id, img_type)
                         ])
    destination_img = '/'.join([MEDIA_ROOT, filename])
    ensure_dir(destination_img)

    subprocess.call("dot -T%s %s -o %s" % (img_type, destination_dot, destination_img), shell=True)

    return render(request,
                  'visualizations/weka_local_display_decision_tree.html',
                  {'filename': filename,
                   'widget': widget,
                   'input_dict': input_dict})
示例#28
0
def bigdata_ca(request, input_dict, output_dict, widget):
    from discomll.utils import accuracy
    import os.path
    from mothra.settings import MEDIA_ROOT
    from workflows.helpers import ensure_dir

    folder = 'discomll_measures'
    tag = input_dict["predictions"]
    destination = MEDIA_ROOT + '/' + folder + "/" + tag[0][6:] + '.txt'
    ensure_dir(destination)

    if input_dict["dataset"].params["id_index"] == -1:
        input_dict["string"] = "ID index should be defined."
    elif not os.path.isfile(destination):  # file doesnt exists
        measure, acc = accuracy.measure(test_data=input_dict["dataset"],
                                        predictions=input_dict["predictions"],
                                        measure="ca")
        string = "Classification Accuracy \n"
        score = str(measure) + " " + str(acc) + "\n"
        string += score
        input_dict["string"] = string

        f = open(destination, 'w')
        f.write(score)
        f.close()

    else:
        #ca results are cached
        string = "Classification Accuracy \n"
        f = open(destination, 'r')
        input_dict["string"] = string + str(f.readlines()[0])
        f.close()

    return render(request, 'visualizations/display_string.html', {
        'widget': widget,
        'input_dict': input_dict,
        'output_dict': output_dict
    })
示例#29
0
def bigdata_mse(request, input_dict, output_dict, widget):
    from discomll.utils import accuracy
    from disco.core import result_iterator
    import os.path
    from mothra.settings import MEDIA_ROOT
    from workflows.helpers import ensure_dir

    folder = 'discomll_measures'
    tag = input_dict["predictions"]
    destination = MEDIA_ROOT + '/' + folder + "/" + tag[0][6:] + '.txt'
    ensure_dir(destination)

    if input_dict["dataset"].params["id_index"] == -1:
        input_dict["string"] = "ID index should be defined."
    elif not os.path.isfile(destination):  #file doesnt exists
        results = accuracy.measure(test_data=input_dict["dataset"],
                                   predictions=input_dict["predictions"],
                                   measure="mse")
        string = "Mean squared error\n"
        for k, v in result_iterator(results):
            string += str(v) + "\n"
        input_dict["string"] = string

        f = open(destination, 'w')
        f.write(str(v))
        f.close()

    else:
        string = "Mean squared error\n"
        f = open(destination, 'r')
        input_dict["string"] = string + str(f.readlines()[0])
        f.close()

    return render(request, 'visualizations/display_string.html', {
        'widget': widget,
        'input_dict': input_dict,
        'output_dict': output_dict
    })
示例#30
0
def cfrm_display_rrfile(request, input_dict, output_dict, widget):
    from mothra.settings import MEDIA_ROOT
    from workflows.helpers import ensure_dir

    output_content_all = input_dict['redescriptions']
    files = []
    count = 1
    for output_content in output_content_all:

        if output_content is None:
            output_content = 'Results missing :('
        filename = os.path.join(
            str(request.user.id),
            'redescriptions_{w_id}_{itCount}.rr'.format(w_id=widget.id,
                                                        itCount=count))
        count = count + 1
        files.append(filename)
        destination_rr = os.path.join(MEDIA_ROOT, filename)
        ensure_dir(destination_rr)
        with open(destination_rr, 'w') as f:
            f.write(output_content)

    #print 'filenames'
    #print files

    return render(
        request,
        'visualizations/cfrm_display_rrfile.html',
        {
            'files': files,
            #'content': "<br />".join(output_content.split("\n")),
            'contents': output_content_all,
            'random': int(random.random() * 10000000),
            'widget': widget,
            'input_dict': input_dict
        })
示例#31
0
def scikitAlgorithms_displayDecisionTree(request, input_dict, output_dict, widget):
    """Visualization displaying a decision tree"""

    import subprocess
    from sklearn import tree
    from mothra.settings import MEDIA_ROOT
    from workflows.helpers import ensure_dir

    # dot_data = StringIO.StringIO()
    filename = '/'.join([str(request.user.id), 'decisionTree-scikit-%d.dot' % widget.id])
    destination_dot = '/'.join([MEDIA_ROOT, filename])
    ensure_dir(destination_dot)
    tree.export_graphviz(input_dict['classifier'], out_file=destination_dot)

    filename = '/'.join([str(request.user.id), 'decisionTree-scikit-%d.png' % widget.id])
    destination_img = '/'.join([MEDIA_ROOT, filename])
    ensure_dir(destination_img)
    subprocess.call("dot -Tpng %s -o %s" % (destination_dot, destination_img), shell=True)

    return render(request,
                  'visualizations/scikitAlgorithms_display_decision_tree.html',
                  {'filename': filename,
                   'widget': widget,
                   'input_dict': input_dict})
示例#32
0
def clus_display_svg(request, input_dict, output_dict, widget):
    """Visualization displaying a decision tree"""

    import subprocess
    from mothra.settings import MEDIA_ROOT
    from workflows.helpers import ensure_dir

    img_type = 'svg'
    if input_dict['img_type'] == 'raster':
        img_type = 'png'

    dot_text = """digraph J48Tree {
    N0 [label="f8" ]
    N0->N1 [label="= +"]
    N1 [label="f99" ]
    N1->N2 [label="= +"]
    N2 [label="east (10.0/1.0)" shape=box style=filled ]
    N1->N3 [label="= -"]
    N3 [label="west (3.0/1.0)" shape=box style=filled ]
    N0->N4 [label="= -"]
    N4 [label="west (7.0)" shape=box style=filled ]
    }"""

    if type(input_dict['classifier']) == list:
        dot_text = ""
        starting_id = 0
        for cls in input_dict['classifier']:
            dot_representation, starting_id = clus_tree_to_dot(
                cls['representation'], starting_id)
            dot_text += dot_representation + "\n"
            # dot_text = dot_text + "digraph " + cls['name'] + " {\n" + \
            #           dot_representation + "}\n\n"
        dot_text = "digraph Tree {\n" + dot_text + "}"
    else:
        dot_text = "digraph Tree {\n" + clus_tree_to_dot(
            input_dict['classifier'], 0)[0] + "}"

    filename = '/'.join(
        [str(request.user.id),
         'decisionTree-clus-%d.dot' % widget.id])
    dotfile = filename
    destination_dot = '/'.join([MEDIA_ROOT, filename])
    ensure_dir(destination_dot)

    with open(destination_dot, 'w') as dot_file:
        dot_file.write(dot_text)

    # png/svg file
    filename = '/'.join([
        str(request.user.id),
        'decisionTree-clus-%d.%s' % (widget.id, img_type)
    ])
    destination_img = os.path.join(MEDIA_ROOT, filename)
    ensure_dir(destination_img)

    try:
        dot_path = settings.DOT_PATH
    except:
        dot_path = 'dot'

    subprocess.call(dot_path + " -T%s %s -o %s" %
                    (img_type, destination_dot, destination_img),
                    shell=True)

    return render(
        request, 'visualizations/cf_clus_display_svg_tree.html', {
            'filename': filename,
            'dotfile': dotfile,
            'random': int(random() * 10000000),
            'widget': widget,
            'input_dict': input_dict
        })
示例#33
0
def display_corpus_statistic(request,
                             input_dict,
                             output_dict,
                             widget,
                             narrow_doc='n'):

    from mothra.settings import MEDIA_ROOT
    from workflows.helpers import ensure_dir
    corpus = input_dict['corpus']
    stat_type = input_dict['stat_type']
    allAnnotations = 0
    result_list = []
    n = int(input_dict['n_gram'])

    #get some general stats
    general_stats = {}
    general_stats['num_doc'] = len(corpus)
    doc_lengths = []
    all_tokens = set()
    for doc in corpus:
        try:
            doc = doc.split()
        except:
            doc = str(doc).split()
        doc_lengths.append(len(doc))
        for tok in doc:
            all_tokens.add(tok)
    general_stats['num_tokens'] = sum(doc_lengths)
    if general_stats['num_doc'] > 0:
        general_stats['avg_doc_length'] = float(
            general_stats['num_tokens']) / general_stats['num_doc']
    else:
        general_stats['avg_doc_length'] = 0
    if general_stats['num_tokens'] > 0:
        general_stats['ttr'] = len(all_tokens) / float(
            general_stats['num_tokens'])
    else:
        general_stats['ttr'] = 0
    if stat_type == 'frequency' or stat_type == 'dis_legomena' or stat_type == 'hapax_legomena':
        annotation_dict = {}
        for doc in corpus:
            try:
                doc.split()
            except:
                doc = str(doc)
            if doc.count('###') > 3:
                annotations = doc.split('###')
            else:
                annotations = doc.split()
            length = len(annotations)
            for i in range(0, length - n + 1):
                combo = ""
                for j in range(i, i + n):
                    value = annotations[j]
                    if j > i:
                        combo += " "
                    combo += value

                if len(combo) > 0:
                    allAnnotations += 1
                    if combo in annotation_dict:
                        annotation_dict[combo] = annotation_dict[combo] + 1
                    else:
                        annotation_dict[combo] = 1
        title = "N-gram"
        measure = 'Frequency'

        if stat_type == 'frequency':
            allAnnotations = float(allAnnotations)
            for pos, number in annotation_dict.items():
                try:
                    pos = pos.encode('utf8')
                    result_list.append(
                        (pos, number,
                         "{0:.4f}".format(float(number) / allAnnotations)))
                except:
                    continue

            result_list = sorted(result_list, key=lambda x: x[1], reverse=True)
            if len(result_list) > 100:
                result_list = result_list[:100]
        else:
            allAnnotations = float(allAnnotations)
            for pos, number in annotation_dict.items():
                if stat_type == 'dis_legomena':
                    if number == 2:
                        pos = pos.encode('utf8')
                        result_list.append(
                            (pos, number,
                             "{0:.4f}".format(float(number) / allAnnotations)))
                else:
                    if number == 1:
                        pos = pos.encode('utf8')
                        result_list.append(
                            (pos, number,
                             "{0:.4f}".format(float(number) / allAnnotations)))
            if len(result_list) > 300:
                result_list = result_list[:300]
    else:
        all_annotations = []
        for doc in corpus:
            if doc.count('###') > 3:
                annotations = doc.split('###')
            else:
                annotations = doc.split()
            all_annotations.extend(annotations)

        if stat_type == 'pmi_bigrams':
            bigram_measures = nltk.collocations.BigramAssocMeasures()
            finder = BigramCollocationFinder.from_words(all_annotations)
            best = sorted(finder.score_ngrams(bigram_measures.pmi),
                          key=lambda x: x[1],
                          reverse=True)
            if len(best) > 100:
                best = best[:100]
            for tags, score in best:
                tag1, tag2 = tags
                result_list.append(
                    (tag1 + "\t" + tag2, "{0:.4f}".format(score)))
            title = "Bigram collocations"

        elif stat_type == 'pmi_trigrams':
            trigram_measures = nltk.collocations.TrigramAssocMeasures()
            finder = TrigramCollocationFinder.from_words(all_annotations)
            best = sorted(finder.score_ngrams(trigram_measures.pmi),
                          key=lambda x: x[1],
                          reverse=True)
            if len(best) > 100:
                best = best[:100]
            for tags, score in best:
                tag1, tag2, tag3 = tags
                result_list.append(
                    (tag1 + " " + tag2 + " " + tag3, "{0:.4f}".format(score)))
            title = "Trigram collocations"
        measure = 'PMI score'

    if title == 'N-gram':
        columns = ['N-gram', 'Raw frequency', 'Frequency']
        df = pd.DataFrame(result_list, columns=columns)

    if title != 'N-gram':
        columns = [title, measure]
        df = pd.DataFrame(result_list, columns=columns)
    destination = MEDIA_ROOT + '/' + str(request.user.id) + '/' + str(
        widget.id) + '.csv'
    ensure_dir(destination)
    df.to_csv(destination, encoding='utf-8', sep=';', index=False)
    filename = str(request.user.id) + '/' + str(widget.id) + '.csv'
    output_dict['filename'] = filename

    return render(
        request, 'visualizations/corpus_statistics.html', {
            'widget': widget,
            'data': [result_list, title, measure, general_stats],
            'narrow_doc': narrow_doc,
            'output_dict': output_dict
        })