Пример #1
0
def save_as_csv(trace, file_name, directory="inspector_traces"):
    csv_content = []
    for sample, value in enumerate(trace):
        csv_content.append((sample, value))
    csv_content = np.asarray(csv_content)
    path = files.get_full_path(directory, file_name + ".txt")
    np.savetxt(path, csv_content, fmt="%d, %d")
Пример #2
0
def load_log(filename):
    dir = "fourq_exec_logs"
    full_path = files.get_full_path(os.path.join(dir, filename))
    with open(full_path) as f:
        content = f.readlines()
    content = [x.strip() for x in content]
    return content
Пример #3
0
def init_tag():
    global mt
    if mt is None:
        mt = MainTagger(
            get_full_path("resource/Lexicon.trn"),
            get_full_path("resource/Ngram.trn"),
            0,
            3,
            3,
            0,
            0,
            False,
            0.2,
            0,
            500.0,
            1,
        )
Пример #4
0
 def getconfig(self):
     try:
         config = configparser.ConfigParser()
         curdir = get_full_path("config", "api.properties")
         if (os.path.isfile(curdir)):
             #                print(curdir)
             config.read(curdir)
         else:
             raise errors.Error("Error loading configuration file: ",
                                get_full_path("config", "api.properties"))
         if config.has_section(self.configsection):
             return config[self.configsection]
         else:
             print("There is either no %s or %s in config file." %
                   (self.configsection, self.configname))
     except RuntimeError as err:
         print("Error reading configurations file: %s" % err)
Пример #5
0
def load_target_trace(file_name="target_trace", directory="inspector_traces"):
    """
    Load the target trace from a file.
    :return:
    """
    path = files.get_full_path(directory, file_name + ".npy")
    target_trace = np.load(path)
    return target_trace
Пример #6
0
def loadSoundPack(name):
    print(f'\n-- loading Sound Pack: {name}')

    # SOUNDS [LOAD]
    path = get_full_path(join(name, 'sound'), file_type='snd')
    files = listdir(path)
    for file in files:
        AudioManager.load_sound(path=join(path, file), name=splitext(file)[-2])

    # SOUNDS [STREAM]
    path = get_full_path(join(name, 'music'), file_type='snd')
    files = listdir(path)
    for file in files:
        AudioManager.ambient_paths[splitext(file)[-2]] = join(path, file)

    print(AudioManager.ambient_paths)
    print(f'-- Done.\n')
Пример #7
0
 def screen_capture(self, file_name, format="PNG", directory="img"):
     """
     Capture the screen of the oscilloscope
     :param file_name: The file name of the screen capture
     :param format: The format in which the screen capture must be stored
     """
     aux_string = 'FORMAT,LANDSCAPE,BCKG,BLACK,AREA,FULLSCREEN'
     path = files.get_full_path(directory, file_name + "." + format)
     self.store_hardcopy_to_file(format, path, aux_string)
Пример #8
0
 def __init__(self, toml_file):
     """Initialize setup class."""
     self.input_params = toml.load(toml_file)
     self.data = {}
     self.etc_folder = get_full_path('etc')
     self.nproc = self.input_params['main']['number_of_processors']
     self.ga_ini = configparser.ConfigParser(os.environ)
     self.ga_ini.read(os.path.join(self.etc_folder, 'gdock.ini'),
                      encoding='utf-8')
Пример #9
0
 def __init__(self):
     super().__init__()
     self.db = DB()
     apiErrorsFile = get_full_path("app/apiErrors.json")
     self.apiErrors = json.loads(open(apiErrorsFile, "r").read())
     self.statusRejected = 2
     self.statusAccepted = 1
     #JWT experation time in minutes
     self.jwtExperationTime = 30
     # The exposure of the hard-coded jwt secret key is a consern...
     self.jwtSecret = '73869a97078920dd470b20a6f7487c81'
Пример #10
0
 def save_panel_to_file(self, panel_file_name: str):
     """
     Save a panel obtained from the LeCroy to a file.
     :param panel_file_name: The file name of the file that will contain the saved panel.
     :type panel_file_name: str
     """
     panel = self._scope.GetPanel()
     dir = "lecroy_cfgs"
     panel_file_name = files.get_full_path(dir, panel_file_name)
     with open(panel_file_name, "wt") as file:
         file.write(panel)
Пример #11
0
 def load_lecroy_cfg(self, load_configuration=False):
     """
     Load the CFG for the oscilloscope.
     We use channel C1 for the trigger and channel C2 for the power capture of FPGA board
     :param load_configuration: Whether to load the configuration file for the Lecroy Waverunner 610Zi Oscilloscope
     or not
     """
     from utils.files import get_full_path
     lecroy_cfg_file = "lecroy_ota_config.dat"
     lecroy_cfg_dir = "lecroy_cfgs"
     if load_configuration:
         self.load_panel_from_file(get_full_path(lecroy_cfg_dir, lecroy_cfg_file))
Пример #12
0
def _store_trs_encoded_trace(trs_encoded_trace, file_name):
    """
    Store a TRS encoded trace
    :param trs_encoded_trace: the powertrace encoded as TRC
    :param file_name: The file name to use when storing the file
    """
    dir = "inspector_traces"
    extension = ".trs"
    abs_path = files.get_full_path(dir, file_name + extension)
    os.makedirs(os.path.dirname(abs_path), exist_ok=True)
    with open(abs_path, "wb+") as f:
        f.write(trs_encoded_trace)
Пример #13
0
def save_target_trace(target_trace: np.ndarray,
                      file_name="target_trace",
                      directory="inspector_traces"):
    """
    Write the target trace to a file, for easy loading
    :param dir:
    :param file_name:
    :return:
    """
    path = files.get_full_path(directory, file_name + "")
    # See https://docs.python.org/3/library/string.html#format-specification-mini-language for fmt options
    np.save(path, target_trace)
Пример #14
0
def loadGLSL(path):
    with open(get_full_path(path, file_type='shd')) as file:
        code = file.readlines()
        for i, line in enumerate(code):
            if line[0] != '#':
                break

            tags = line.split()
            if tags[0] == '#constant':
                code[i] = f'{tags[1]} {tags[2]} = {getattr(Const, tags[2])};\n'

    return ''.join(code)
Пример #15
0
def main(working_dir, number_of_models):

    from compssml.tf.utils import read_sets
    from utils.files import get_full_path
    from pycompss.api.api import compss_wait_on

    data_dir = get_full_path(FLAGS.data_dir)

    print("Data dir %s" % data_dir)
    train_data, test_data = read_sets(data_dir, one_hot=True)


    neurons = [1024, 1280, 1440, 1600]
    learning_rates = [1, 10, 25, 50, 75, 100, 125, 150]

    iter_test = 5

    task_iterations = FLAGS.task_batch

    parametrizations = list(product(neurons, learning_rates))
    parametrizations = parametrizations[:number_of_models]

    batch_size = FLAGS.batch_size
    max_training_iterations = (FLAGS.max_iterations)


    models = init_models(working_dir, parametrizations)

    print("Parametrizations to be explored: %s with %s iterations and %s loops per task "
          % (parametrizations, max_training_iterations, task_iterations))
    n_tests = 1 + max_training_iterations/iter_test
    test_accuracies = [[None for _ in xrange(n_tests)] for _ in xrange(len(parametrizations))]

    for iter in range(0, max_training_iterations):
        for (i, (neuron_number, learning_rate)) in enumerate(parametrizations):
            print("[%s] - Queueing %s - %s" % (iter, neuron_number, learning_rate, ))
            if compss_wait_on(continue_exploration(models[i])):
                models[i] = do_training_step(models[i], iter, task_iterations, batch_size, train_data[0], train_data[1])
                if iter % iter_test == 0:
                    models[i] = do_testing(models[i], test_data[0], test_data[1])
            else:
                parametrizations.remove((neuron_number, learning_rate))

    test_accuracies = [0] * len(models)
    for i in range(len(models)):
        test_accuracies[i] = do_testing(models[i], test_data[0], test_data[1])

    write_results(models, test_accuracies)
Пример #16
0
def loadTexturePack(name):
    print(f'\n-- loading Texture Pack: {name}')

    path = get_full_path(name, file_type='tex')
    directories = listdir(path)

    pack = []
    for dr in directories:
        textures = listdir(f'data/Textures/{name}/{dr}')

        for tex in textures:
            t = Gl.GlTexture.load_file(f'{dr}/{tex}', (tex[0] == 'r'))
            pack.append(t)

    print(f'-- Done.\n')
    return pack
Пример #17
0
def test(task_id):
    video = []
    conn = create_connection("datafacebook/Kompas/" + str(task_id) + ".db")
    cursor = conn.execute(
        "SELECT comment_id, comment_content, like_count, love_count, wow_count, haha_count, sad_count, angry_count from Comments"
    )
    for row in cursor:
        video.append({
            "id": row[0],
            "message": row[1],
            "like": row[2],
            "love": row[3],
            "wow": row[4],
            "haha": row[5],
            "sad": row[6],
            "angry": row[7],
        })
    conn.close()

    abc = []

    joy_feel = read_dataset(get_full_path("dataset/cf/pp/filter/joy.txt"),
                            "joy")
    disgust_feel = read_dataset(
        get_full_path("dataset/cf/pp/filter/disgust.txt"), "disgust")
    sadness_feel = read_dataset(
        get_full_path("dataset/cf/pp/filter/sadness.txt"), "sadness")
    anger_feel = read_dataset(get_full_path("dataset/cf/pp/filter/anger.txt"),
                              "anger")
    fear_feel = read_dataset(get_full_path("dataset/cf/pp/filter/fear.txt"),
                             "fear")
    surprise_feel = read_dataset(
        get_full_path("dataset/cf/pp/filter/surpriseExtra.txt"), "surprise")

    dataku = []
    for (words, sentiment) in (joy_feel + disgust_feel + sadness_feel +
                               anger_feel + fear_feel + surprise_feel):
        dataku.append((words.rstrip(), sentiment))

    lines = []
    labels = []
    for words, sentiment in dataku:
        html_parser = HTMLParser()

        lines.append(html_parser.unescape(words))
        labels.append(sentiment)

    headlines, labels = lines, labels

    pipeline = Pipeline([
        (
            "count_vectorizer",
            CountVectorizer(
                ngram_range=(2, 3),
                min_df=1,
                max_df=0.8,
                stop_words=frozenset([
                    "saya",
                    "sedang",
                    "lagi",
                    "adalah",
                    "di",
                    "dari",
                    "karena",
                    "dan",
                    "dengan",
                    "ke",
                    "yang",
                    "untuk",
                    "itu",
                    "orang",
                ]),
            ),
        ),
        ("tfidf_transformer", TfidfTransformer()),
        ("classifier", MultinomialNB()),
    ])
    pipeline.fit(headlines, labels)
    angerx = 0
    joyx = 0
    surprisex = 0
    sadnessx = 0
    fearx = 0
    disgustx = 0

    for each_video in video:
        if each_video["message"] != "":
            # connect to database
            init_tag()
            html_parser = HTMLParser()
            spell_check = jalanSpellCheck()
            koreksi_slang = slangWordCorrect()
            cucco = Cucco()

            kata = cucco.replace_emojis(each_video["message"])

            # Escape HTML
            kata = html_parser.unescape(each_video["message"])
            kata = " ".join(kata.split())

            # Hapus emoji
            kata = cucco.replace_emojis(kata)

            normalizations = ["remove_extra_white_spaces"]

            # Hapus extra spasi
            kata = cucco.normalize(kata, normalizations)

            kata = kata.replace("/", " ")

            # Conver ke lowercase
            kata = kata.lower()

            # Hapus repeating character yang lebih dari 2
            kata = re.sub(r"(.)\1+", r"\1\1", kata)

            # Proses ,. yang sisa jadi 2
            kata = kata.replace("..", ".")
            kata = kata.replace(",,", ",")
            kata = kata.replace("!!", "!")
            kata = kata.replace("??", "?")

            # Tambahkan spasi habis titik
            rx = r"\.(?=\S)"
            kata = re.sub(rx, ". ", kata)

            # Slang correction
            kata = koreksi_slang.jalan(kata)

            # Spellcheck error
            # tampung_kata_1 = []
            # tampung_1 = kata.split()
            # for word in tampung_1:
            #    tampung_kata_1.append(spell_check.correctSpelling(word))
            # kata = " ".join(tampung_kata_1)
            asdqwe = kata

            # Check apakah ada tanda baca di akhir
            if (re.match(".*[^.?!]$", kata) is not None) == True:
                kata = kata + " ."

            resultx = do_tag(kata)
            kata = " ".join(resultx)

            if kata != "":
                linesz = []
                linesz.append(kata)
                words = []

                for y in linesz:
                    lines = y.split()
                    for x in lines:
                        word = x.split("/")
                        chars_to_remove = set((
                            ",",
                            "IN",
                            "CC",
                            "SC",
                            "CDO",
                            "CDC",
                            "CDP",
                            "CDI",
                            "DT",
                            "MD",
                            "OP",
                            "CP",
                            "SYM",
                            ".",
                        ))
                        if word[1] not in chars_to_remove:
                            words.append(word[0] + "_" + word[1])
                    resultx = "".join([" " + i for i in words]).strip()

                cobaa = []
                cobaa.append(resultx)

                for x in pipeline.predict(cobaa):
                    hasilx = x
                if hasilx == "anger":
                    angerx = angerx + 1
                elif hasilx == "joy":
                    joyx = joyx + 1
                elif hasilx == "sadness":
                    sadnessx = sadnessx + 1
                elif hasilx == "fear":
                    fearx = fearx + 1
                elif hasilx == "disgust":
                    disgustx = disgustx + 1
                elif hasilx == "surprise":
                    surprisex = surprisex + 1

                comments_data = {
                    "id": each_video["id"],
                    "komen": each_video["message"],
                    "asdqwe": asdqwe,
                    "komen_edit": resultx,
                    "prediksi": hasilx,
                    "like_count": each_video["like"],
                    "love_count": each_video["love"],
                    "wow_count": each_video["wow"],
                    "haha_count": each_video["haha"],
                    "sad_count": each_video["sad"],
                    "angry_count": each_video["angry"],
                }

            abc.append(comments_data)

    ctrku = {
        "anger": angerx,
        "joy": joyx,
        "sadness": sadnessx,
        "fear": fearx,
        "surprise": surprisex,
        "disgust": disgustx,
    }

    return jsonify({"tasks": abc}, {"ASD": ctrku})
Пример #18
0
import configparser
import os
import subprocess  # nosec
import shlex
import multiprocessing
import pathlib
import numpy as np
import logging
from tempfile import NamedTemporaryFile
from utils.files import get_full_path

ga_log = logging.getLogger('ga_log')
etc_folder = get_full_path('etc')
ini = configparser.ConfigParser(os.environ)
ini.read(os.path.join(etc_folder, 'gdock.ini'), encoding='utf-8')
profit_exe = ini.get('third_party', 'profit_exe')


class Profit:
    """Wrapper for PROFIT."""

    def __init__(self, ref, mobi, nproc):
        self.reference = ref  # str
        self.mobi = mobi  # list
        self.exec = profit_exe
        self.nproc = int(nproc)
        self.izone = ''
        izone_f = pathlib.Path(ref.replace('.pdb', '.izone'))
        if not izone_f.exists():
            raise Exception(f'{izone_f} not found')
        else:
Пример #19
0
import unittest
import pathlib
import shutil
import glob
import os
import tempfile
import copy
from modules.setup import Setup
from modules.error import (DependencyNotDefinedError, DependencyNotFoundError,
                           SectionNotDefinedError)
from utils.files import get_full_path

data_folder = get_full_path('tests', 'test_data')


class TestSetup(unittest.TestCase):
    def setUp(self):

        toml_string = "[main]" + os.linesep
        toml_string += "identifier = 'setup'" + os.linesep
        toml_string += "number_of_processors = 1" + os.linesep
        toml_string += "" + os.linesep
        toml_string += "[restraints]" + os.linesep
        toml_string += "A = [39,40,41]" + os.linesep
        toml_string += "B = [4,5,6]" + os.linesep
        toml_string += "" + os.linesep
        toml_string += "[molecules]" + os.linesep
        toml_string += f"A = '{data_folder}/molA.pdb'" + os.linesep
        toml_string += f"B = '{data_folder}/molB.pdb'" + os.linesep

        with open(f'{data_folder}/setup.toml', 'w') as setup_f:
Пример #20
0
 def __init__(self):
     self.lock = threading.Lock()
     self.basePath = files.get_full_path("finance_app/db/watchlists")
Пример #21
0
 def __init__(self):
     super().__init__()
     dbFolder = "db"
     dbName = "alert.db"
     self.fullDBPath = get_full_path(dbFolder, dbName)
Пример #22
0
def load_map(map_name):
    global objs
    map_data = open(get_full_path(map_name + '.lvl', file_type='maps'),
                    'rb').read()
    map_decom = decompress(map_data).decode('utf-8')
    content = map_decom.splitlines()
    content = [lines.strip() for lines in content]
    for element in content:
        element_list = element.split()
        # ID:     0
        # TYPE:   1
        # GROUP:  2
        # POS_X:  3
        # POX_Y:  4
        # SIZE_W: 5
        # SIZE_H: 6
        """
        Types: 
        | 1 - WorldRectangleRigid  | 
        | 2 - WorldRectangleSensor |
        """

        if element_list[0][0] != "[":
            type_obj = 1
            if element_list[1] == "WorldRectangleRigid":
                type_obj = 1
            elif element_list[1] == "WorldRectangleSensor":
                type_obj = 2
            elif element_list[1] == "MetalCrate":
                type_obj = 3
            elif element_list[1] == "FireLight":
                type_obj = 4
            elif element_list[1] == "LightSource":
                type_obj = 5

            size_h = 0
            size_w = 0

            if type_obj != 3 and type_obj != 4 and type_obj != 5:
                size_h = int(element_list[6])
                size_w = int(element_list[5])

            pos_y = int(element_list[4])
            pos_x = int(element_list[3])

            group = core.screens.game.obstacles_gr  # Default

            if element_list[2] == "obstacles_gr":
                group = core.screens.game.obstacles_gr

            if type_obj == 1:
                obj = core.screens.game.WorldRectangleRigid(
                    group, pos=[pos_x, pos_y], size=[size_w, size_h])
                objs.append(obj)
            if type_obj == 2:
                obj = core.screens.game.WorldRectangleSensor(
                    group,
                    pos=[pos_x, pos_y],
                    size=[size_w, size_h],
                    layer=int(element_list[7]))
                objs.append(obj)
            if type_obj == 3:
                obj = core.screens.game.MetalCrate(group, pos=[pos_x, pos_y])
                objs.append(obj)
            if type_obj == 4:
                if len(element_list) > 8:
                    obj = core.screens.game.addLight(
                        core.screens.game.FireLight, [pos_x, pos_y],
                        int(element_list[5]), element_list[6],
                        int(element_list[7]), element_list[8])
                else:
                    obj = core.screens.game.addLight(
                        core.screens.game.FireLight, [pos_x, pos_y],
                        int(element_list[5]), element_list[6],
                        int(element_list[7]))
                objs.append(obj)
            if type_obj == 5:
                if len(element_list) > 8:
                    obj = core.screens.game.addLight(
                        core.screens.game.LightSource, [pos_x, pos_y],
                        int(element_list[5]), element_list[6],
                        int(element_list[7]), element_list[8])
                else:
                    obj = core.screens.game.addLight(
                        core.screens.game.LightSource, [pos_x, pos_y],
                        int(element_list[5]), element_list[6],
                        int(element_list[7]))
                objs.append(obj)
        else:
            tmp = element_list[0].replace("[", "")
            tmp = tmp.replace("]", "")
            tmp_array = tmp.split(":")

            if tmp_array[3] == "float":
                setattr(objs[int(tmp_array[0])], tmp_array[1],
                        float(tmp_array[2]))
            elif tmp_array[3] == "str":
                setattr(objs[int(tmp_array[0])], tmp_array[1], tmp_array[2])
Пример #23
0
def plot_traces_to_pdf(digit_col: str,
                       dir="inspector_traces",
                       overlap=False,
                       overlap_file_name="plot_templates_overlap"):
    """
    Plot a trace and store as PDF using Matplotlib
    Relevant links are as follows:
    - https://stackoverflow.com/questions/42372617/how-to-plot-csv-data-using-myplotlib-and-pandas-in-python
    - Save as PDF: https://stackoverflow.com/questions/11328958/save-the-plots-into-a-pdf
    - Figure is blank: https://stackoverflow.com/questions/9012487/matplotlib-pyplot-savefig-outputs-blank-image
    :param overlap: Whether to overlap multiple waveforms plots into a single plot
    :param dir: In which directory to store
    :return:
    """
    import pandas as pd
    import matplotlib.pyplot as plt
    headers = ["Sample", "Volt"]

    # Change to the directory that contains the csv files
    os.chdir(files.get_full_path(dir))
    """
    Find the N most distincitve colors for a given color palette.
    Read more on these links:
    - https://stackoverflow.com/q/8389636
    - https://stats.stackexchange.com/q/118033
    """

    csv_files = glob.glob("*.txt")
    # Only plot csv files that are related to the current iteration of the algorithm
    if overlap:
        csv_files = [
            csv_file for csv_file in csv_files
            if digit_col in csv_file and not ("target" in csv_file)
        ]
    else:
        csv_files = [
            csv_file for csv_file in csv_files if digit_col in csv_file
        ]
    # The max number of templates in one iteration is 16, just to verify
    assert len(csv_files) <= 17

    # Define the color map
    NUM_COLORS = len(csv_files)
    # See https://matplotlib.org/users/colormaps.html for the available color maps
    cm = plt.get_cmap('viridis')
    plt.gca().set_prop_cycle(
        color=[cm(1. * i / NUM_COLORS) for i in range(NUM_COLORS)])

    # Loop through all the csv files in the directory and plot them
    for idx, csv_file in enumerate(csv_files):
        path = files.get_full_path(dir, csv_file)
        df = pd.read_csv(path, names=headers)

        x = df['Sample']
        y = df['Volt']
        # plot

        almost_black = "#262626"
        if not overlap or idx == 0:
            plt.figure(figsize=(10, 3))
        if not overlap:
            plt.plot(x, y, linewidth=0.3, color="blue")
        else:

            plt.plot(x, y, linewidth=0.1, alpha=0.5, label="d{}".format(idx))

        # Remove top axes and right axes
        for spine in ["top", "right"]:
            plt.gca().spines[spine].set_visible(False)

        # For remaining spines, thin out their line and change the black to a slightly off-black dark grey
        spines_to_keep = ['bottom', 'left']
        for spine in spines_to_keep:
            plt.gca().spines[spine].set_linewidth(0.5)
            plt.gca().spines[spine].set_color(almost_black)
        # beautify the x-labels
        plt.gcf().autofmt_xdate()
        plt.tight_layout()
        csv_file_name = os.path.splitext(csv_file)[0]
        if not overlap:
            plt.savefig(csv_file_name + ".pdf", bbox_inches='tight')

    if overlap:
        plt.legend(loc="upper right")
        # set the linewidth of each legend object
        plt.gca().legend(loc="center left", bbox_to_anchor=(1, 0.5))
        for line in plt.gca().get_legend().get_lines():
            line.set_linewidth(4.0)
        plt.savefig(overlap_file_name + ".pdf", bbox_inches='tight')

    plt.close("all")
Пример #24
0
def get_tasks(task_id):
    abc = []

    graph = facebook.GraphAPI(access_token=token, version=3.1)
    node = "/%s" % task_id

    video = graph.request(
        node + "/comments?fields=id,message,comment_count,"
        "reactions.type(LIKE).limit(0).summary(total_count).as(like),"
        "reactions.type(LOVE).limit(0).summary(total_count).as(love),"
        "reactions.type(WOW).limit(0).summary(total_count).as(wow),"
        "reactions.type(HAHA).limit(0).summary(total_count).as(haha),"
        "reactions.type(SAD).limit(0).summary(total_count).as(sad),"
        "reactions.type(ANGRY).limit(0).summary(total_count).as(angry)")
    # video = graph.request(node + '?fields='
    #                            'reactions.type(LIKE).limit(0).summary(total_count).as(like),'
    #                           'reactions.type(LOVE).limit(0).summary(total_count).as(love),'
    #                             'reactions.type(WOW).limit(0).summary(total_count).as(wow),'
    #                             'reactions.type(HAHA).limit(0).summary(total_count).as(haha),'
    #                             'reactions.type(SAD).limit(0).summary(total_count).as(sad),'
    #                             'reactions.type(ANGRY).limit(0).summary(total_count).as(angry)')

    # Wrap this block in a while loop so we can keep paginating requests until
    # finished.

    # Baca dataset
    joy_feel = read_dataset(get_full_path("dataset/cf/pp/filter/joy.txt"),
                            "joy")
    disgust_feel = read_dataset(
        get_full_path("dataset/cf/pp/filter/disgust.txt"), "disgust")
    sadness_feel = read_dataset(
        get_full_path("dataset/cf/pp/filter/sadness.txt"), "sadness")
    anger_feel = read_dataset(get_full_path("dataset/cf/pp/filter/anger.txt"),
                              "anger")
    fear_feel = read_dataset(get_full_path("dataset/cf/pp/filter/fear.txt"),
                             "fear")
    surprise_feel = read_dataset(
        get_full_path("dataset/cf/pp/filter/surpriseExtra.txt"), "surprise")

    # filter away words that are less than 3 letters to form the training data
    dataku = []
    for (words, sentiment) in (joy_feel + disgust_feel + sadness_feel +
                               anger_feel + fear_feel + surprise_feel):
        dataku.append((words.rstrip(), sentiment))

    lines = []
    labels = []
    for words, sentiment in dataku:
        html_parser = HTMLParser()

        lines.append(html_parser.unescape(words))
        labels.append(sentiment)

    headlines, labels = lines, labels

    pipeline = Pipeline([
        (
            "count_vectorizer",
            CountVectorizer(
                ngram_range=(2, 3),
                min_df=1,
                max_df=0.8,
                stop_words=frozenset([
                    "saya",
                    "sedang",
                    "lagi",
                    "adalah",
                    "di",
                    "dari",
                    "karena",
                    "dan",
                    "dengan",
                    "ke",
                    "yang",
                    "untuk",
                    "itu",
                    "orang",
                ]),
            ),
        ),
        ("tfidf_transformer", TfidfTransformer()),
        ("classifier", MultinomialNB()),
    ])
    pipeline.fit(headlines, labels)
    angerx = 0
    joyx = 0
    surprisex = 0
    sadnessx = 0
    fearx = 0
    disgustx = 0
    while True:
        try:
            # print("Get post comments data :")
            for each_video in video["data"]:
                if each_video["message"] != "":
                    # connect to database
                    init_tag()
                    html_parser = HTMLParser()
                    spell_check = jalanSpellCheck()
                    koreksi_slang = slangWordCorrect()
                    cucco = Cucco()

                    kata = cucco.replace_emojis(each_video["message"])

                    # Escape HTML
                    kata = html_parser.unescape(each_video["message"])
                    kata = " ".join(kata.split())

                    # Hapus emoji
                    kata = cucco.replace_emojis(kata)

                    normalizations = ["remove_extra_white_spaces"]

                    # Hapus extra spasi
                    kata = cucco.normalize(kata, normalizations)

                    kata = kata.replace("/", " ")

                    # Conver ke lowercase
                    kata = kata.lower()

                    # Hapus repeating character yang lebih dari 2
                    kata = re.sub(r"(.)\1+", r"\1\1", kata)

                    # Proses ,. yang sisa jadi 2
                    kata = kata.replace("..", ".")
                    kata = kata.replace(",,", ",")
                    kata = kata.replace("!!", "!")
                    kata = kata.replace("??", "?")

                    # Tambahkan spasi habis titik
                    rx = r"\.(?=\S)"
                    kata = re.sub(rx, ". ", kata)

                    # Slang correction
                    kata = koreksi_slang.jalan(kata)

                    # Spellcheck error
                    # tampung_kata_1 = []
                    # tampung_1 = kata.split()
                    # for word in tampung_1:
                    #    tampung_kata_1.append(spell_check.correctSpelling(word))
                    # kata = " ".join(tampung_kata_1)
                    asdqwe = kata

                    # Check apakah ada tanda baca di akhir
                    if (re.match(".*[^.?!]$", kata) is not None) == True:
                        kata = kata + " ."

                    resultx = do_tag(kata)
                    kata = " ".join(resultx)

                    # print(words)
                    # xxx = "".join([" " + i for i in words]).strip()

                    # kata = xxx

                    if kata != "":
                        linesz = []
                        linesz.append(kata)
                        words = []
                        for y in linesz:
                            lines = y.split()
                            for x in lines:
                                word = x.split("/")
                                chars_to_remove = set((
                                    ",",
                                    "IN",
                                    "CC",
                                    "SC",
                                    "CDO",
                                    "CDC",
                                    "CDP",
                                    "CDI",
                                    "DT",
                                    "MD",
                                    "OP",
                                    "CP",
                                    "SYM",
                                    ".",
                                ))
                                if word[1] not in chars_to_remove:
                                    words.append(word[0] + "_" + word[1])
                            resultx = "".join([" " + i for i in words]).strip()
                            # print(resultx)

                        cobaa = []
                        cobaa.append(resultx)
                        for x in pipeline.predict(cobaa):
                            hasilx = x
                        if hasilx == "anger":
                            angerx = angerx + 1
                        elif hasilx == "joy":
                            joyx = joyx + 1
                        elif hasilx == "sadness":
                            sadnessx = sadnessx + 1
                        elif hasilx == "fear":
                            fearx = fearx + 1
                        elif hasilx == "disgust":
                            disgustx = disgustx + 1
                        elif hasilx == "surprise":
                            surprisex = surprisex + 1

                        comments_data = {
                            "id":
                            each_video["id"],
                            "komen":
                            each_video["message"],
                            "asdqwe":
                            asdqwe,
                            "komen_edit":
                            resultx,
                            "prediksi":
                            hasilx,
                            "like_count":
                            each_video["like"]["summary"]["total_count"],
                            "love_count":
                            each_video["love"]["summary"]["total_count"],
                            "wow_count":
                            each_video["wow"]["summary"]["total_count"],
                            "haha_count":
                            each_video["haha"]["summary"]["total_count"],
                            "sad_count":
                            each_video["sad"]["summary"]["total_count"],
                            "angry_count":
                            each_video["angry"]["summary"]["total_count"],
                        }

                    abc.append(comments_data)
            # Attempt to make a request to the next page of data, if it exists.
            video = requests.get(video["paging"]["next"]).json()
        except KeyError:
            # When there are no more pages (['paging']['next']), break from the
            # loop and end the script.
            break

    ctrku = {
        "anger": angerx,
        "joy": joyx,
        "sadness": sadnessx,
        "fear": fearx,
        "surprise": surprisex,
        "disgust": disgustx,
    }

    # comments_data = {
    #    'id' : video['comment_count'],
    #    'video_like' : video['like']['summary']['total_count'],
    #    'video_love': video['love']['summary']['total_count'],
    #    'video_wow': video['wow']['summary']['total_count'],
    #    'video_haha': video['haha']['summary']['total_count'],
    #    'video_sad': video['sad']['summary']['total_count'],
    #    'video_angry': video['angry']['summary']['total_count']
    #    }
    # abc.append(comments_data)

    return jsonify({"tasks": abc}, {"ASD": ctrku})
Пример #25
0
#!/usr/bin/python3

import unittest
from unittest.mock import patch

from parameterized import parameterized

from parser.rss_channel import RssItemError
from parser.rss_parser import RssUrlParser, RssParserError
from utils.file_reader import JsonFileReader
from utils.files import get_full_path

TESTDATA_FILENAME_1 = get_full_path("tests", "test_data", "rss_sample.json")
TESTDATA_FILENAME_2 = get_full_path("tests", "test_data", "rss_sample_2.json")


class TestRssParsing(unittest.TestCase):
    @patch("feedparser.parse")
    def test_parse_rss_dataset_1(self, mock_response):
        # Given: an RSS feed URL to be parsed
        url = "https://www.fiercewireless.com/rss/xml"
        obj_under_test = RssUrlParser(url)

        file_reader = JsonFileReader(TESTDATA_FILENAME_1)
        mock_response.return_value = file_reader.read()

        # When: the URL is parsed
        feed_content = obj_under_test.parse()

        # Then: the channel details are returned
        self.assertIn("FierceWireless", feed_content)