Пример #1
0
def prepare_data():
    data.clean()

    url = ("https://api.stackexchange.com/2.2/questions?page=1&pagesize=99"
           "&order=desc&sort=creation&tagged=php&site=stackoverflow")

    request = urllib2.Request(url)
    response = urllib2.urlopen(request)

    if response.info().get('Content-Encoding') == 'gzip':
        buf = StringIO(response.read())
        f = gzip.GzipFile(fileobj=buf)
        result = f.read()

    helper.prepare_data(result)
Пример #2
0
def split_and_write(file1, data1, file2, data2):
    with open(file1, "at") as f1:
        with open(file2, "at") as f2:
            # z = match(data1, data2)
            # if z > 0.05:
            #     d1_count, d1_cleaned = clean_and_split(data1)
            #     d2_count, d2_cleaned = clean_and_split(data2)
            #     if (d1_count == d2_count):
            #         f1.write(encode(d1_cleaned))
            #         f2.write(encode(d2_cleaned))
            #     else:
            #         f1.write(encode(clean(data1)))
            #         f2.write(encode(clean(data2)))
            # else:
            # print("'{}' unlike '{}'\n".format(data1, data2))
            f1.write(encode(clean(data1)))
            f2.write(encode(clean(data2)))
Пример #3
0
def clean_up():
    """For use when an emergency problem causes an unsafe shutdown. Clears out system files and returns to a stable state."""
    logging.info('Cleaning up system.')
    logging.debug('Changing working directory.')
    os.chdir('./secner/bin/')
    logging.debug('Requesting permission to continue.')
    permission = helpers.query(
        'Note that running this proccess will result in deletion of potentially valuable content. Are you absolutely sure you would wish to proceed?'
    )
    if not permission:
        logging.debug('User did not permit running the service.')
        sys.exit('Process stopped by user.')
    logging.debug('Permission recieved. Continuing with process.')
    if os.path.exists('./indexes/'):
        logging.debug('Clearing indexes.')
        status = os.system('rm -rf ./indexes/')
        if status == 256:
            error('Permission denied.')
        elif status != 0:
            error('Something went wrong.')
    if os.path.exists('./tests/'):
        logging.debug('Clearing tests.')
        status = os.system('rm -rf ./tests/*')
        if status == 256:
            error('Permission denied.')
        elif status != 0:
            error('Something went wrong.')
    if os.path.exists('./.secner-helpers/'):
        logging.debug('Clearing secner-helpers.')
        status = os.system('rm -rf ./secner-helpers/*')
        if status == 256:
            error('Permission denied.')
        elif status != 0:
            error('Something went wrong.')
    try:
        logging.debug('Clearing Neo4j Server.')
        data.clean(config.NEO4J_SERVER)
    except:
        logging.error('Something went wrong while clearing the Neo4j server.')
        logging.exception(sys.exc_type + ':' + sys.exc_value)
        sys.exit(
            'Something went wrong while clearing the Neo4j server. Check logs for details.'
        )
    logging.debug('Clearing complete.')
    sys.exit('System cleaned. Factory state returned.')
def process(data, url=None):
    tree = html.fromstring(data)

    utterances = tree.xpath(
        '//div[@id="noFear-comparison"]/table[@class="noFear"]/tr')

    for e in utterances:
        # for original lines
        speaker = e.xpath('td[@class="noFear-left"]/b/text()')
        #
        # if speaker:

        # print("Speaker: {}".format(speaker))

        original_line = ""
        # for element in e.xpath('td[@class="noFear-left"]/div[@class="original-line"]/text()'):
        for element in e.xpath(
                'td[@class="noFear-left"]/div[@class="original-line"]'):
            etree.strip_tags(element, 'a')
            original_line = original_line + ' ' + element.text_content()

        original_line = clean(original_line)
        print("Shakespeare: {}".format(original_line))

        modern_line = ""
        for element in e.xpath(
                'td[@class="noFear-right"]/div[@class="modern-line"]'):
            etree.strip_elements(element, 'span', with_tail=False)
            etree.strip_tags(element, 'a')
            modern_line = modern_line + ' ' + element.text_content()

        modern_line = clean(modern_line)
        print("Modern: {}".format(modern_line))

        with open('utterances.pickle', 'a+b') as f:
            pickle.dump(
                {
                    "speaker": speaker,
                    "shakespeare": original_line,
                    "modern": modern_line,
                    "url": url
                }, f)
Пример #5
0
 def sample(self, step, temperature=1., init=None):
     if not os.path.isdir('Samples'):
         os.mkdir('Samples')
     roll = self.generate(temperature, init)
     roll = clean(roll)
     save_roll(roll, step)
     midi = piano_rolls_to_midi(roll)
     midi.write('Samples/{}.mid'.format(step))
     tqdm.write('Saved to Samples/{}.mid'.format(step))
     roll = np.expand_dims(roll.T, axis=0)
     return roll
Пример #6
0
def clean_up():
    """For use when an emergency problem causes an unsafe shutdown. Clears out system files and returns to a stable state."""
    logging.info('Cleaning up system.')
    logging.debug('Changing working directory.')
    os.chdir('./secner/bin/')
    logging.debug('Requesting permission to continue.')
    permission = helpers.query('Note that running this proccess will result in deletion of potentially valuable content. Are you absolutely sure you would wish to proceed?')
    if not permission:
        logging.debug('User did not permit running the service.')
        sys.exit('Process stopped by user.')
    logging.debug('Permission recieved. Continuing with process.')
    if os.path.exists('./indexes/'):
        logging.debug('Clearing indexes.')
        status = os.system('rm -rf ./indexes/')
        if status == 256:
            error('Permission denied.')
        elif status != 0:
            error('Something went wrong.')
    if os.path.exists('./tests/'):
        logging.debug('Clearing tests.')
        status = os.system('rm -rf ./tests/*')
        if status == 256:
            error('Permission denied.')
        elif status != 0:
            error('Something went wrong.')
    if os.path.exists('./.secner-helpers/'):
        logging.debug('Clearing secner-helpers.')
        status = os.system('rm -rf ./secner-helpers/*')
        if status == 256:
            error('Permission denied.')
        elif status != 0:
            error('Something went wrong.')
    try:
        logging.debug('Clearing Neo4j Server.')
        data.clean(config.NEO4J_SERVER)
    except:
        logging.error('Something went wrong while clearing the Neo4j server.')
        logging.exception(sys.exc_type+':'+sys.exc_value)
        sys.exit('Something went wrong while clearing the Neo4j server. Check logs for details.')
    logging.debug('Clearing complete.')
    sys.exit('System cleaned. Factory state returned.')
Пример #7
0
def main():
    os.environ[
        "GOOGLE_APPLICATION_CREDENTIALS"] = "./resources/credentials/e3682f457e02.json"
    os.system("cls")
    project_id = "recruitertest-dd3ab"
    session_id = str(uuid.uuid4().hex[:12])
    language_code = "en-US"
    input_file_path = "./resources/audio/subject_input.wav"

    # [START DIALOG]
    complete_transcript = [[], []]

    # [QUICK DIALOG]
    # complete_transcript = detect_intent_texts(project_id, session_id, [
    #     "DEMO_START", "Diana Moon", "18", "Adaptable , ambitious, clever and blunt", "I have a bachelor in mathematics",
    #     "I have worked for Google as a data scientist of the past 3 years", "I am good at analysis and computers", "My communication is not great", "40 hours", "No"], language_code)

    # [NORMAL DIALOG]
    detect_intent_texts(project_id, session_id, ["DEMO_START"], language_code)
    while True:
        # text_input = input("Text input: ")
        # partial_transcript = detect_intent_texts(project_id, session_id, [
        #     text_input], language_code)

        audio.record(input_file_path)
        partial_transcript = detect_intent_audio(project_id, session_id,
                                                 input_file_path,
                                                 language_code)

        # audio.record(input_file_path)
        # partial_transcript = detect_intent_stream(project_id, session_id,
        #                                           input_file_path, language_code)

        complete_transcript[0] = complete_transcript[0] + partial_transcript[0]
        complete_transcript[1] = complete_transcript[1] + partial_transcript[1]

        if poke(project_id, session_id, language_code):
            break

    # [END DIALOG]

    # [DATA]
    subject_info = get_subject_info(project_id, session_id, language_code)
    clean_subject_info = data.clean(subject_info)
    match_scores = data.match(subject_info)

    report.create(clean_subject_info, match_scores, complete_transcript,
                  session_id)
Пример #8
0
 def sample(self,
            step,
            temperature=1.,
            init=None,
            nonzero=None,
            diff=None,
            nonzero_diff=None,
            condition=None,
            length=2048):
     if not os.path.isdir('Samples'):
         os.mkdir('Samples')
     roll = self.generate(temperature, init, nonzero, nonzero_diff,
                          condition, length).detach().cpu().numpy()
     roll = clean(roll)
     save_roll(roll, step)
     midi = piano_rolls_to_midi(roll)
     midi.write('Samples/{}.mid'.format(step))
     tqdm.write('Saved to Samples/{}.mid'.format(step))
     roll = np.expand_dims(roll.T, axis=0)
     return roll
Пример #9
0
 def OnInit(self):
     Datlocal = open('swamp.txt', 'r')
     name = Datlocal.read(8)
     lake = data.get(name)
     laker = data.clean(lake)
     dat = coords(laker)
     datas = array(dat)
     print datas
     raw_input("wait:")
     VERTEX_SHADER = shaders.compileShader(
         """
     void main() {
         gl_Position = gl_ModelViewProjectionMatrix * gl_Vertex;
     }""", GL_VERTEX_SHADER)
     FRAGMENT_SHADER = shaders.compileShader(
         """
     void main() {
         gl_FragColor = vec4( 0, 1, 0, 1 );
     }""", GL_FRAGMENT_SHADER)
     self.shader = shaders.compileProgram(VERTEX_SHADER, FRAGMENT_SHADER)
     self.vbo = vbo.VBO(array(datas, 'f'))
Пример #10
0
 def OnInit( self):
     Datlocal = open('swamp.txt','r')
     name = Datlocal.read(8)
     lake = data.get(name)
     laker = data.clean(lake)
     dat = coords(laker)
     datas = array(dat)
     print datas
     raw_input("wait:")
     VERTEX_SHADER = shaders.compileShader("""
     void main() {
         gl_Position = gl_ModelViewProjectionMatrix * gl_Vertex;
     }""", GL_VERTEX_SHADER)
     FRAGMENT_SHADER = shaders.compileShader("""
     void main() {
         gl_FragColor = vec4( 0, 1, 0, 1 );
     }""", GL_FRAGMENT_SHADER)
     self.shader = shaders.compileProgram(VERTEX_SHADER,FRAGMENT_SHADER)
     self.vbo = vbo.VBO(
         array( datas,'f')
     )
import sys
sys.path.append('src')

from data import read_raw_data, clean, vectorize
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from visualisation import plot_precision_recall_vs_threshold
from sklearn.metrics import confusion_matrix

if __name__ == "__main__":
    df = read_raw_data('data\\raw\\A_training data.csv')
    df = clean(df)
    features = df.name.values
    labels = df.name_generic.values

    train_features, test_features, train_labels, test_labels = train_test_split(
        features, labels, stratify=labels, test_size=0.2)
    train_features, vectorizer = vectorize(train_features)
    test_features = vectorizer.transform(test_features)

    weight = (train_labels == 0).sum() / (train_labels == 1).sum()
    clf = XGBClassifier(scale_pos_weight=weight,
                        max_depth=9,
                        n_estimators=300,
                        learning_rate=0.35,
                        gamma=0.08,
                        subsample=0.7)
    clf.fit(train_features, train_labels)

    predictions = clf.predict_proba(test_features)
Пример #12
0
        cont = False

pInput = string.upper(raw_input("File Loaded! Would you like to prepare it for rendering?(y/n)"))
if pInput == 'Y':
    cont = False
    while cont == False:
        numInput = str(raw_input("Please input point size as a float less than 1:"))
        if "0." in numInput:
            print("Size recognised")
            cont = True
        elif "0." not in numInput:
            print("You didn't enter a float less than 1. Try Again")
            cont = False
    sizeConv = float(numInput)
    print("PDB Renderer will now prepare the data")
    darter = data.clean(dart)
    forge = extStrip(fInput)
    print(forge)
    heckler = open(forge, "w")
    top = print_normal((0, 1, 0), heckler)
    bottom = print_normal((0, -1, 0), heckler)
    left = print_normal((-1, 0, 0), heckler)
    right = print_normal((1, 0, 0), heckler)
    front = print_normal((0, 0, -1), heckler)
    back = print_normal((0, 0, 1), heckler)
    for lline in darter:
        print(lline)
    coordData = render.coords(darter)
    noot = []
    for atom in coordData:
        print(atom)
Пример #13
0
def main(args):
    """
    Main function to run experiment on PARIS
    """
    if args.fold_number == "full":
        valid_list = []
        test_list = []
        truth_list = []
        for fold in range(1,6):
            args.fold_number = str(fold)
            triples1, triples2, folder = data.load_kgs(args.training_data, args.dataset_type, args.triples_type, args.dataset_ea)
            if args.seed:
                kg1_path, kg2_path = data.seed_kgs(args.dataset_type, triples1, triples2, folder, args.new_seed, args.dataset_division, args.fold_number, args.frac)
            else:
                kg1_path, kg2_path = data.add_point(triples1, triples2, folder)
            
            # Delete old PARIS directory and create again to be empty
            out_path = args.output + "PARIS/" + args.dataset_type + "/" + args.dataset_ea
            if os.path.exists(out_path):
                shutil.rmtree(out_path)
            os.mkdir(out_path)

            # Run PARIS
            print("Running PARIS...")
            out_paris = os.popen(
                "java -jar ../resources/paris_0_3.jar {kg1_path} {kg2_path} {out_path}".format(
                    kg1_path=kg1_path, kg2_path=kg2_path, out_path=out_path
                )
            ).read()
            print("PARIS output:\n", out_paris)

            print("\nComputing metrics")
            if args.dataset_type == "OpenEA_dataset":
                prec_truth, rec_truth, f1_truth, prec_test, rec_test, f1_test, prec_valid, rec_valid, f1_valid = \
                    check_result(folder, args.dataset_type, out_path, args.dataset_division, args.fold_number)
                truth_list.append((prec_truth, rec_truth, f1_truth))
                test_list.append((prec_test, rec_test, f1_test))
                valid_list.append((prec_valid, rec_valid, f1_valid))
            else:
                check_result(folder, args.dataset_type, out_path)
            
            print("\nCleaning")
            if args.seed and not args.new_seed:
                data.clean(folder, args.dataset_type, out_path, args.dataset_division, args.fold_number)
            else:
                data.clean(folder, args.dataset_type, out_path)
        
        print("Truth average precision:", np.mean(np.array([t[0] for t in truth_list])))
        print("Truth std precision:", np.std(np.array([t[0] for t in truth_list])))
        print("Truth average recall:", np.mean(np.array([t[1] for t in truth_list])))
        print("Truth std recall:", np.std(np.array([t[1] for t in truth_list])))
        print("Truth average f1:", np.mean(np.array([t[2] for t in truth_list])))        
        print("Truth std f1:", np.std(np.array([t[2] for t in truth_list])))
        print("Test average precision:", np.mean(np.array([t[0] for t in test_list])))
        print("Test std precision:", np.std(np.array([t[0] for t in test_list])))
        print("Test average recall:", np.mean(np.array([t[1] for t in test_list])))
        print("Test std recall:", np.std(np.array([t[1] for t in test_list])))
        print("Test average f1:", np.mean(np.array([t[2] for t in test_list])))
        print("Test std f1:", np.std(np.array([t[2] for t in test_list])))
        print("Valid average precision:", np.mean(np.array([t[0] for t in valid_list])))
        print("Valid std precision:", np.std(np.array([t[0] for t in valid_list])))
        print("Valid average recall:", np.mean(np.array([t[1] for t in valid_list])))
        print("Valid std recall:", np.std(np.array([t[1] for t in valid_list])))
        print("Valid average f1:", np.mean(np.array([t[2] for t in valid_list])))
        print("Valid std f1:", np.std(np.array([t[2] for t in valid_list])))

    else:
        triples1, triples2, folder = data.load_kgs(args.training_data, args.dataset_type, args.triples_type, args.dataset_ea)
        if args.seed:
            kg1_path, kg2_path = data.seed_kgs(args.dataset_type, triples1, triples2, folder, args.new_seed, args.dataset_division, args.fold_number, args.frac)
        else:
            kg1_path, kg2_path = data.add_point(triples1, triples2, folder)
        
        # Delete old PARIS directory and create again to be empty
        out_path = args.output + "PARIS/" + args.dataset_type + "/" + args.dataset_ea
        if os.path.exists(out_path):
            shutil.rmtree(out_path)
        os.mkdir(out_path)

        # Run PARIS
        print("Running PARIS...")
        out_paris = os.popen(
            "java -jar ../resources/paris_0_3.jar {kg1_path} {kg2_path} {out_path}".format(
                kg1_path=kg1_path, kg2_path=kg2_path, out_path=out_path
            )
        ).read()
        print("PARIS output:\n", out_paris)

        print("\nComputing metrics")
        if args.dataset_type == "OpenEA_dataset":
            if args.seed:
                check_result(folder, args.dataset_type, out_path, args.dataset_division, args.fold_number)
            else:
                check_result(folder, args.dataset_type, out_path)
        else:
            check_result(folder, args.dataset_type, out_path)
        
        print("\nCleaning")
        if args.seed and not args.new_seed:
            data.clean(folder, args.dataset_type, out_path, args.dataset_division, args.fold_number)
        else:
            data.clean(folder, args.dataset_type, out_path)
    print("\nNothing else to do! Closing")