Пример #1
0
    def __init__(self, tickets, start='2019-08-01', interval='1h'):
        self.tickets = tickets
        self.start = start
        self.interval = interval

        self.dp = DataParser(tickets, start=self.start, interval=self.interval)

        self.dp.download_data()
        self.dp.parse_to_week_data()

        self.data = self.dp.week_data

        self.budget = 10000.0

        self.start_budget = None
        self.initial_buy_date = None
        self.buy_n = None
        self.sell_n = None
        self.stop_budget = None

        self.market_first_price = None
        self.market_money = None
        self.market_stock_n = None
        self.market_last_price = None
        self.market_stop_budget = None

        self.buy_day = None
        self.sell_day = None
 def Dataset_i(self, i):
     data_origin = DataParser().AmericanOrigination_i(i)
     data_monthly = DataParser().AmericanMonthly_i(i)
     dataset = FeatureExtraction().combine_data_and_origin(
         data_origin, data_monthly)
     assert isinstance(dataset, pd.DataFrame)
     return dataset
Пример #3
0
    def run(self):
        """ Run the main execution. """
        end = False
        pop = 100
        nb_step = 5000

        # Get options
        options = get_options()

        # Get SUMO binary
        if options.nogui:
            sumoBinary = checkBinary('sumo')
        else:
            sumoBinary = checkBinary('sumo-gui')

        # Get number of step
        if (options.nb_step is not None):
            nb_step = options.nb_step

        # Load the Qtable if wanted
        if options.loadqt:
            self.qlu.load_q_table()

        # Traci starts sumo as a subprocess and then this script connects and runs
        traci.start(
            [sumoBinary, "-c", "map.sumocfg", "--tripinfo-output", "data.xml"])

        # Create agents
        self.create_rand_agents(pop)

        traci.simulationStep()

        # Main loop
        for i in range(nb_step):
            print('Step #' + str(i))
            traci.simulationStep()

            self.maintain_rand_agents()

            if i > 99:
                if ((i - 100) % 14 == 0):
                    # Create a Agent
                    self.our_agents[Agent.ID] = OurAgent('e', self.qlu)
                elif ((i - 100) % 7 == 0):
                    # Create a dummy
                    self.dummy_agents[Agent.ID] = InterestingAgent('e')

            self.control_agents()

        traci.close()
        sys.stdout.flush()

        data_dum = DataParser("data.xml", list(self.dummy_agents.keys()))
        data_our = DataParser("data.xml", list(self.our_agents.keys()))

        self.qlu.save_q_table()
 def Dataset_i_Reduced(self, i):
     data_origin = DataParser().AmericanOrigination_i(i)
     data_monthly = DataParser().AmericanMonthly_i(i)
     print "Origin : " + str(data_origin.shape)
     print "Monthly : " + str(data_monthly.shape)
     fe = FeatureExtraction()
     fe.TESTING = True
     dataset = fe.combine_data_and_origin(data_origin, data_monthly)
     assert isinstance(dataset, pd.DataFrame)
     return dataset
    def Dataset(self):
        """str: Properties should be documented in their getter method."""

        data_origin = DataParser().AmericanOrigination
        data_monthly = DataParser().AmericanMonthly
        dataset = FeatureExtraction().combine_data_and_origin(
            data_origin, data_monthly)

        assert isinstance(dataset, pd.DataFrame)
        return dataset
def main():
    print "parsing data... this could take a while..."
    dp = DataParser('posts_news')
    featuresToResultsAll = dp.getFeatureResultPairs()

    # calculate weights
    weights, testData = getWeightsAndTestData(featuresToResultsAll)

    i = 0

    totalError = 0.0
    print "\nPrinting Example Results:"
    for fv, target in testData:
        i += 1
        prediction = predict(weights, fv)

        if i % 20 == 0:
            printResults(prediction, target)

        #error = abs(len(str(prediction)) - len(str(target)))
        error = abs(prediction - target)
        totalError += error

    totalError /= i
    print "total Error as average difference between prediction and target: %s" % totalError

    dp.printMostProvocativeWords(50)
    dp.printMostProvocativeBigrams(50)
    dp.printMostProvocativeTrigrams(50)
Пример #7
0
    def __parseHeaderKeyValueLine(self, fileLine, dict):
        key = ""
        values = []
        lastIdx = 0
        currIdx = 0
        while currIdx < len(fileLine):
            item = fileLine[currIdx]
            if item == ";":
                if lastIdx < currIdx:
                    if lastIdx == 0:
                        key = fileLine[lastIdx:currIdx]
                    else:
                        values.append(fileLine[lastIdx:currIdx])
                    lastIdx = currIdx + 1

            currIdx += 1

        if lastIdx < currIdx:
            values.append(fileLine[lastIdx:currIdx])
        idx = 0
        while len(values) > idx:
            if key == HeaderParser.Head_Key_Order:
                values[idx] = int(values[idx])
            elif key == HeaderParser.Head_Key_PrePressure:
                values[idx] = float(
                    DataParser.checkAndReplaceComma(self, values[idx]))
            elif key == HeaderParser.Head_Key_Furnace_Temp:
                values[idx] = int(values[idx])
            elif key == HeaderParser.Head_Key_Part_Weight:
                values[idx] = int(values[idx])

            elif key == HeaderParser.Foot_Key_Amount:
                values[idx] = int(values[idx])
            elif key == HeaderParser.Foot_Key_Order_Set:
                values[idx] = int(values[idx])
            elif key == HeaderParser.Foot_Key_Order_Cnt:
                values[idx] = int(values[idx])
            elif key == HeaderParser.Foot_Key_Charge_Set:
                values[idx] = int(values[idx])
            #elif key == HeaderParser.Foot_Key_Charge_Cnt:
            #values[idx] = int(values[idx])
            elif key == HeaderParser.Foot_Key_Cycle_Brutto:
                values[idx] = float(
                    DataParser.checkAndReplaceComma(self, values[idx]))
            elif key == HeaderParser.Foot_Key_Cycle_Netto:
                values[idx] = float(
                    DataParser.checkAndReplaceComma(self, values[idx]))

            dict[key] = values
            idx += 1
    def __init__(self):
        #实例化同级类
        self.parser = DataParser()

        # 建立3个url队列:未下载、正在下载、完成下载
        self.beforedownloadset = set()
        self.beingdownloadset = set()
        self.afterdownloadset = set()
Пример #9
0
    def run(self):
        # Connect to db, receive db connection Object
        db_connect = DBConnect()
        db_connect.connect()
        db = db_connect.get_connection()

        """
        #Start reading the file, receive results list with data
        fileReader = FileReader(self.url)
        fileReader.open_and_read_file()
        result = fileReader.getResults()
        """

        # Setting up a class object and connecting.
        unix_reader = UNIXReader("src/client/upload_stream.sock")
        unix_reader.connect()

        # Data parser class object to parse receieved data
        data_parser = DataParser()

        # Query class object getting ready to query database
        query_db = QueryDB(db)
        query_db.find_trip_id()

        print 'Running main loop'
        while True:

            # Receive data from unix reader object
            data = unix_reader.revc_socket()

            json_data = json.loads(data)
            print(json_data)

            # Parse the result data to appropriate format
            if json_data:
                sorted_results = data_parser.parseToDict(json_data)
                if db_connect.check_connection():
                    # Send data to database
                    query_db.query(sorted_results)
                else:
                    db_connect.connect()
                    # Send data to database
                    query_db.query(sorted_results)
                    # Close connection to database
                    db_connect.disconnect()
Пример #10
0
def main():
    # ======= COLLECT DATA =======
    # set to true if need to collect data
    if False:
        dp = DataParser()
        dp.readData()

    # ======= READ IN DATA =======
    df = pd.DataFrame()
    for i in range(0, 5):
        df = df.append(
            pd.read_csv("data/vancouver_data_{0}.csv".format(i),
                        sep="\t",
                        index_col=False))

    # ======= PROCESS DATA =======
    dl = DeepLearning(df.values)
    dl.process()
Пример #11
0
    def __init__(self, ipAddress):

        self.hardData = DataParser("./Data.json")

        self.mqtt_sub = mqtt.Client("Listener-Composition")
        self.mqtt_pub = mqtt.Client("Writer-Composition")
        self.ipAddress = ipAddress

        self.stateVariables = StateVaribles()
        self.prevStateVariables = StateVaribles()
        self.actionVariables = ActionVariables()

        # MQTT
        self.mqtt_sub.on_message = self.on_context_message
        self.mqtt_sub.connect(self.ipAddress, 1883, 70)
        self.mqtt_sub.subscribe("Context/#", qos=2)
        self.mqtt_pub.connect(self.ipAddress, 1883, 70)
        self.mqtt_pub.loop_start()
    def Dataset(self):
        """str: Properties should be documented in their getter method."""

        raw_data_set = DataParser().GermanCredit
        dataset = raw_data_set
        # dataset = self.format_data(raw_data_set)
        dataset = FeatureExtraction().apply_all(dataset)

        assert isinstance(dataset, pd.DataFrame)
        return dataset
    def Dataset(self):
        """str: Properties should be documented in their getter method."""

        # raw_data_set = DataParser().GermanCredit
        # dataset = FeatureExtraction().apply_all(raw_data_set)

        dataset = DataParser().replicateDataLendingClubProcessed

        assert isinstance(dataset, pd.DataFrame)
        return dataset
Пример #14
0
def main():
  if len(sys.argv) != 4:
    print 'Missing file operands!'
    print 'PerfDataViewer.py [program] [input_data_file] [output_data_file]'
    return

  program = str(sys.argv[1])
  inFileName = str(sys.argv[2])
  outFileName = str(sys.argv[3])
  
  #TODO: move these to configuration files
  eventsList = ['0x149','0x151','0x2a2','0x126','0x227','0x224','0x8a2','0x1b0','0x20f0','0x2f1','0x1f2','0x1b8','0x2b8','0x4b8','0x40cb']
  ppc_eventsList = ['0x3c046','0x2c048','0x2f080','0x26080','0x30881','0x26182','0x26880','0xd0a2','0xd0a0']
  arffHeader = ['@relation function_level_badfs_badma_good\n',\
                 '@attribute r0149 numeric\n','@attribute r0151 numeric\n','@attribute r02a2 numeric\n','@attribute r0126 numeric\n',\
                 '@attribute r0227 numeric\n','@attribute r0224 numeric\n','@attribute r08a2 numeric\n','@attribute r01b0 numeric\n',\
                 '@attribute r20f0 numeric\n','@attribute r02f1 numeric\n','@attribute r01f2 numeric\n','@attribute r01b8 numeric\n',\
                 '@attribute r02b8 numeric\n','@attribute r04b8 numeric\n','@attribute r40cb numeric\n','@attribute status {good, badfs, badma}\n',\
                 '@data\n']
  
  ppc_arffHeader = ['@relation function_level_badfs_badma_good\n',\
                    '@attribute r3c046 numeric\n', '@attribute r2c048 numeric\n', '@attribute r2f080 numeric\n',\
                    '@attribute r26080 numeric\n', '@attribute r30881 numeric\n', '@attribute r26182 numeric\n',\
                    '@attribute r26880 numeric\n', '@attribute rd0a2 numeric\n', '@attribute rd0a0 numeric\n',\
                    '@attribute status {good, badfs, badma}\n', '@data\n']
  
  perfData = PerfData()
  
  perfFileReader = FileReader(inFileName)
  dataParser = DataParser(perfFileReader, perfData, program)
  
  eventsHolder = EventsHolder(eventsList)
  eventsHolder.setInstructionCountRawEvent('0xc0')
  
  arffWriter = ArffWriter(outFileName,arffHeader)
  dataWriter = DataWriter(arffWriter, perfData, eventsHolder)
  
  dataParser.parse()
  print(perfData.getDataStore())
  dataWriter.writeToArffFile()

  print outFileName + ' file was created successfully.'
Пример #15
0
    def get_info_A(self):
        def get_data_i_FE2(i):
            print "Read" + str(i)
            return DataParser().AmericanCombo_i_FE2(i)

        def write_out_data(df, i):
            print "Writing out data"
            DataParser()._write_HDFStore_Combined_FE2(df, i)

        data_count = DataParser().number_of_datasets
        i = 0

        df_ALL = pd.DataFrame()
        df_ALL2 = pd.DataFrame()

        LOAN_COUNT = 0
        FICO_MEAN = 0
        FICO_MEDIAN = []
        BALANCE_MEAN = 0

        while i < data_count:

            df_X = get_data_i_FE2(i)

            df_X = df_X.sort_values("id_loan")
            df_X.reset_index(drop=True, inplace=True)
            n = 20000
            if len(df_X) < n:
                n = len(df_X)
            df_ALL = pd.concat([df_X[:n], df_ALL], axis=0)

            m = 5000
            if len(df_X) < m:
                m = len(df_X)
            df_ALL2 = pd.concat([df_X[:m], df_ALL2], axis=0)

            # LOAN_COUNT += len(np.unique(df_X['id_loan'].values))
            # FICO_MEAN += df_X['fico'].mean()
            # BALANCE_MEAN += df_X['orig_upb'].mean()
            # FICO_MEDIAN.append(df_X['fico'].median())
            # FICO_MEDIAN.append(df_X['fico'].median())
            i += 1

        FICO_MEAN = FICO_MEAN / i
        FICO_MEDIAN = np.median(FICO_MEDIAN)

        print "FICO_MEDIAN: " + str(FICO_MEDIAN)
        print "FICO_MEAN: " + str(FICO_MEAN)
        print "LOAN_COUNT: " + str(LOAN_COUNT)
        print "BALANCE_MEAN: " + str(BALANCE_MEAN)

        write_out_data(df_ALL, -1)
        write_out_data(df_ALL2, -2)
Пример #16
0
def ComputePrecisionK(modelfile, testfile, K_list):

    maxParagraphLength = 10
    maxParagraphs = 4
    #nlabels=1001
    #vocabularySize=76391
    labels = 8
    vocabularySize = 244
    model = Model(maxParagraphLength, maxParagraphs, labels, vocabularySize)

    testing = DataParser(maxParagraphLength, maxParagraphs, labels,
                         vocabularySize)
    print(testfile)
    testing.getDataFromfile(testfile)
    print("data loading done")
    print("no of test examples: " + str(testing.totalPages))

    model.load(modelfile)

    print("model loading done")

    batchSize = 1

    testing.restore()
    truePre = []
    pred = []
    for itr in range(testing.totalPages):
        data = testing.nextBatch(1)
        truePre.append(data[0])
        pre = model.predict(data)
        pred.append(pre[0])

    precAtK = {}
    for itr in K_list:
        precAtK[itr] = 0

    for i, v in enumerate(pred):
        temp = [(labId, labProb) for labId, labProb in enumerate(v)]
        #     print(temp)
        temp = sorted(temp, key=lambda x: x[1], reverse=True)
        for ele in K_list:
            pBag = 0
            for itr in range(ele):
                if truePre[i][0][temp[itr][0]] == 1:
                    pBag += 1
        #         print(float(pBag)/float(ele))
            precAtK[ele] += float(pBag) / float(ele)

    f = open("results/precAtK_model3_n", "w")
    for key in sorted(precAtK.keys()):
        #     print(key, precAtK[key]/len(pred))
        print(precAtK[key] / len(pred))
        f.write(str(key) + "\t" + str(precAtK[key] / len(pred)) + "\n")
    f.close()
Пример #17
0
def ComputePrecisionK(modelfile, testfile, K_list):

    CURRENT_DIR = os.path.dirname(os.path.abspath("./WikiCategoryLabelling/"))
    sys.path.append(os.path.dirname(CURRENT_DIR + "/WikiCategoryLabelling/"))

    maxParagraphLength = 250
    maxParagraphs = 10
    labels = 1001
    vocabularySize = 76390
    model = Model(maxParagraphLength, maxParagraphs, labels, vocabularySize)

    testing = DataParser(maxParagraphLength, maxParagraphs, labels,
                         vocabularySize)
    testing.getDataFromfile(testfile)
    print("data loading done")
    print("no of test examples: " + str(testing.totalPages))

    model.load(modelfile)

    print("model loading done")

    batchSize = 10

    testing.restore()
    truePre = []
    pred = []
    for i in range(math.ceil(testing.totalPages / batchSize)):
        if i < testing.totalPages / batchSize:
            data = testing.nextBatch(batchSize)
        else:
            data = testing.nextBatch(testing.totalPages % batchSize)
        truePre.extend(data[0])
        pre = model.predict(data)
        pred.extend(pre[0].tolist())

    avgPrecK = [0] * len(K_list)
    for i, p in enumerate(pred):
        sortedL = sorted(range(len(p)), key=p.__getitem__, reverse=True)
        for k, K in enumerate(K_list):
            labelK = sortedL[:K]
            precK = 0
            for l in labelK:
                if truePre[i][l] == 1:
                    precK += 1
            avgPrecK[k] += precK / float(K)
    avgPrecK = [float(a) / len(pred) for a in avgPrecK]

    for p in avgPrecK:
        print(str(p))
Пример #18
0
    def __init__(self):
        #实例化其他几个功能模块
        self.urlgen = URLGenerator()
        self.downloader = Downloader()
        self.parser = DataParser()
        self.datastore = DataStore()

        #建立3个url队列:未下载、正在下载、完成下载
        self.beforedownloadset = set()
        self.beingdownloadset = set()
        self.afterdownloadset = set()
        #设定种子url
        self.seedurl = 'https://so.gushiwen.org/authors/'
Пример #19
0
def main():
    """Parse and graph data provided in Data(Relevant).csv file."""
    # Take file name as raw string
    data_file = r'Data(Relevant).csv'

    # Regular expression that only accepts strings with valid DC
    # names 'I', 'A', and 'S'.
    my_regex = r'^[IAS]$'

    # List to store user inputed data center names
    dc_name_list = []

    print('Enter data centers to be graphed')

    while True:  # Loop until user enters valid Data center name
        user_input = input(">> ")
        # Checking if user_input matches defined regular expression
        if re.search(my_regex, user_input):
            dc_name_list.append(user_input)
        # Breaking from loop when user enters nothing.
        elif user_input == '':
            break
        else:
            print("Valid dc_name_list: 'I' 'A' 'S'")

    # Modifying data center name list to only have unique dc names
    dc_name_list = set(dc_name_list)

    # Creating data parser to parse data_file, returning records
    # corresponding to the data centers specified in the dc_name_list
    dc_data_parser = DataParser(data_file, dc_name_list)
    dc_dataset = dc_data_parser.get_dataset()

    # Creating a DatasetDisplay object for plotting and displaying
    # the data center dataset.
    dc_data_display = DatasetDisplay(dc_dataset, dc_name_list)
    dc_data_display.show_plot()
def main():
    """Parse and graph data provided in Data(Relevant).csv file."""
    # Take file name as raw string
    data_file = r'Data(Relevant).csv'

    # Regular expression that only accepts strings with valid DC
    # names 'I', 'A', and 'S'.
    my_regex = r'^[IAS]$'

    # List to store user inputed data center names
    dc_name_list = []

    print('Enter data centers to be graphed')

    while True:  # Loop until user enters valid Data center name
        user_input = input(">> ")
        # Checking if user_input matches defined regular expression
        if re.search(my_regex, user_input):
            dc_name_list.append(user_input)
        # Breaking from loop when user enters nothing.
        elif user_input == '':
            break
        else:
            print("Valid dc_name_list: 'I' 'A' 'S'")

    # Modifying data center name list to only have unique dc names
    dc_name_list = set(dc_name_list)

    # Creating data parser to parse data_file, returning records
    # corresponding to the data centers specified in the dc_name_list
    dc_data_parser = DataParser(data_file, dc_name_list)
    dc_dataset = dc_data_parser.get_dataset()

    # Creating a DatasetDisplay object for plotting and displaying
    # the data center dataset.
    dc_data_display = DatasetDisplay(dc_dataset, dc_name_list)
    dc_data_display.show_plot()
def parseKeyValueFile(path):

    # create the variables
    headerSection = False
    footerSection = False
    dataSection = False
    head = HeaderParser()
    foot = HeaderParser()
    data = DataParser()
    headerDict = {}
    footerDict = {}
    dataList = []

    # open file and
    file = open(path, 'r')

    # go through all lines
    for line in file:

        # remove CR/LF
        line = line[:-1]

        # Check for Header Section
        line, headerSection, footerSection, dataSection = checkSectionTrigger(
            line, headerSection, footerSection, dataSection)

        if headerSection == True:
            head.parseHeaderLine(line, headerDict)

        # Check for Footer Section
        elif footerSection == True:
            foot.parseHeaderLine(line, footerDict)

        elif dataSection == True:
            data.parseDataLine(line, dataList)

    return headerDict, footerDict, dataList
def genAnalysis(modelfile,testfile,outputfile):
    maxParagraphLength = 20
    maxParagraphs = 5
    filterSizes = [1]
    num_filters = 64
    wordEmbeddingDimension = 30
    lrate = float(1e-3)
    labels = 30938
    vocabularySize = 101939

    model = Model(maxParagraphs,maxParagraphLength,labels,vocabularySize,filterSizes,num_filters,wordEmbeddingDimension,lrate)

    testing = DataParser(maxParagraphs,maxParagraphLength,labels,vocabularySize)
    testing.getDataFromfile(testfile)

    model.load(modelfile)

    print("loading done")
    print("no of test examples: " + str(testing.totalPages))

    batchSize = 1
    testing.restore()
    truePre=[]
    pred=[]
    for itr in range(testing.totalPages):
        data=testing.nextBatch(1)
        truePre.append(data[0])
        pre=model.predict(data)
        pred.append(pre[0])

    labelIDName = open("../labelId-labelName-full.txt").read().split("\n")
    labelIDName = [  [ int(x.split("\t")[0]) , x.split("\t")[1].rstrip() ] for x in labelIDName]
    # print(labelIDName)    

    #making it a dictionary
    labelName = dict(labelIDName)
    # print(labelName[9026])

    f = open(outputfile,"w")
    for i,v in enumerate(pred):
        temp = [(labId,labProb) for labId,labProb in enumerate(v) ]
        temp = sorted(temp,key=lambda x:x[1],reverse=True)  #sorting based on label probability to get top k
        predLabel = [0]*len(temp)

        output = ""
        for itr in range(11):
            predLabel[temp[itr][0]] = 1
            if truePre[i][0][temp[itr][0]] == 1:
                output = output + "," + labelName[temp[itr][0]]
        f.write(str(i) + ","  + output + "\n")
    f.close()
Пример #23
0
    def __init__(self):
        super(MyDlg, self).__init__()

        # Set up the user interface from Designer.
        self.ui = Ui_Dialog()
        self.ui.setupUi(self)

        self.setWindowTitle("Lambda(λ) debugger Tool by EricWang")

        view = CmderView()
        self.init_view(view, self.ui)
        self.m_ctrller = CmderCtrller(view)
        self.m_ctrller.m_dataParser = DataParser()
        self.m_ctrller.m_keyPassCtrlExt = KeyPassCtrlExt()

        extCmd = ExtendCmd()
        self.m_ctrller.m_extCmdColls = extCmd.get_cmd_colls()
        self.m_ctrller.bind_cmd_set()

        self.make_event()
Пример #24
0
class Main():
    if __name__ == '__main__':
        # This handles Twitter authentication and the connection to Twitter API
        # The API Key Information are stored in data/keys.json
        with open('../data/keys.json') as json_file:
            data = json.load(json_file)
        auth = OAuthHandler(data['consumer_key'], data['consumer_secret'])
        auth.set_access_token(data['access_token'],
                              data['access_token_secret'])

        # Creates a new SearchAPI object and gets the previous tweets

        # searchAPI = SearchAPI()
        # searchAPI.getAccountsFollowers(auth, "KMbappe")
        dp = DataParser()
        nbFollowers = len(dp.getAllFollowers())
        allTweets = dp.getAllTweets()
        week = Week()
        week.fill_week(allTweets)
        week.get_days_activity(nbFollowers)

        week.tuesday.getActivityOfTheHours()
Пример #25
0
    def __init__(self):
        self.dataParser = DataParser.getInstance()
        self.dataParser.LoadDataFile("inputData.xml")

        self.plants = []
        self.grazers = []
        self.predators = []
        self.obstacles = []

        dispatcher.connect(self.HandlePlantDeath,
                           signal=EventSignals.plantDeath,
                           sender=dispatcher.Any)
        dispatcher.connect(self.HandlePlantCreation,
                           signal=EventSignals.plantCreation,
                           sender=dispatcher.Any)
        dispatcher.connect(self.HandlePredatorDeath,
                           signal=EventSignals.predatorDeath,
                           sender=dispatcher.Any)
        dispatcher.connect(self.HandlePredatorCreation,
                           signal=EventSignals.predatorCreation,
                           sender=dispatcher.Any)
        dispatcher.connect(self.HandlePredatorSensing,
                           signal=EventSignals.predatorSensing,
                           sender=dispatcher.Any)
        dispatcher.connect(self.HandleOrganismDeath,
                           signal=EventSignals.organismDeath,
                           sender=dispatcher.Any)
        dispatcher.connect(self.HandleGrazerCreation,
                           signal=EventSignals.grazerCreation,
                           sender=dispatcher.Any)
        dispatcher.connect(self.HandleGrazerSensing,
                           signal=EventSignals.grazerSensing,
                           sender=dispatcher.Any)
        dispatcher.connect(self.HandleGrazerDeath,
                           signal=EventSignals.grazerDeath,
                           sender=dispatcher.Any)
Пример #26
0
import pickle
from DataParser import DataParser
from CronsDataStructure import DataExplorer

data_obj = DataParser()
data_obj.fetch_cron_data()
data = data_obj.get_cron_data()
DataSet = {}
for node in data:
    DataSet[node] = {}
    dex = DataExplorer(node_name=node,
                       cron_data=data[node]['cron_data'],
                       duration_data=data[node]['duration'])
    dex.populate_schedule_table()
    node_schedule_table = dex.get_schedule_data()
    DataSet[node] = node_schedule_table

with open(
        "C:\\mydata\\Bits\\Courses\\4thSem\\Dissertation\\repository\\data.pickle",
        'wb') as f:
    pickle.dump(DataSet, f)
def main(_):
    global dataParser, task_name, popped_name

    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu_device
    tf.logging.set_verbosity(tf.logging.INFO)

    tf.logging.info('model_name: %s' % FLAGS.model_name)
    tf.logging.info('data_dir: %s' % FLAGS.data_dir)
    tf.logging.info('model_dir: %s' % FLAGS.model_dir)
    tf.logging.info('task name: %s' % FLAGS.task_name)
    tf.logging.info('learning_rate: %s' % FLAGS.learning_rate)
    tf.logging.info('batch_size: %s' % FLAGS.batch_size)
    tf.logging.info('num_epochs: %s' % FLAGS.num_epochs)
    tf.logging.info('gpu_device: %s' % FLAGS.gpu_device)

    task_name = FLAGS.task_name
    if task_name == 'finish':
        popped_name = 'like'
    elif task_name == 'like':
        popped_name = 'finish'
    else:
        raise Exception('Only support finish or like task.')

    dataParser = DataParser(track_name=FLAGS.track_name,
                            data_dir=FLAGS.data_dir)
    dataParser.load_user_behavior()
    # dataParser.load_conversion_rate()

    print(
        "=========================== Feature Size: %d ============================"
        % dataParser.feature_length)
    params = None
    if FLAGS.model_name in ['deep_fm', 'xdeepfm']:
        params = {
            'embedding_size': FLAGS.fm_embedding_size,
            'feature_field_size': dataParser.field_size,
            'feature_size': dataParser.feature_length,
            'hidden_units': [200, 100, 75, 50, 25],
            'cin_layer_size': [50, 50, 50, 50],
            'word_size': dataParser.word_size,
            'word_field_size': 35,
            'item_size': dataParser.feature_dict['item_id'],
            'item_field_size': 400,
            'author_size': dataParser.feature_dict['author_id'],
            'author_field_size': 400,
            'music_size': dataParser.feature_dict['music_id'] + 1,
            'music_field_size': 400,
            'item_city_size': dataParser.feature_dict['item_city'] + 1,
            'item_city_field_size': 400,
            'video_size': 128,
            'audio_size': 128,
            'video_field_size': 128,
            'audio_field_size': 128,
            'item_uid_size': dataParser.feature_dict['uid'],
            'item_uid_field_size': 150,
            'author_uid_size': dataParser.feature_dict['uid'],
            'author_uid_field_size': 500,
            'music_uid_size': dataParser.feature_dict['uid'],
            'music_uid_field_size': 500,
            'learning_rate': FLAGS.learning_rate,
            'dropout_rate': FLAGS.dropout_rate,
            'batch_size': FLAGS.batch_size,
        }
        tf.logging.info('deep_fm params: ', params)

    if FLAGS.action == 'train':
        train(params)
    elif FLAGS.action == 'evaluate':
        evaluate(params)
    elif FLAGS.action == 'predict':
        tf.logging.info('predict_output_path: %s' % FLAGS.predict_output_path)
        predict(params)
    elif FLAGS.action == 'train_evaluate':
        train_and_evaluate(params)
    else:
        raise Exception(
            'The action %s is unsupported. Only support train, evaluate, predict, train_evaluate.'
            % FLAGS.action)
Пример #28
0
#from DataParser_siml import DataParser_siml as DataParser
#from model2_siml import Model2_siml as Model
from DataParser import DataParser as DataParser
from model3 import Model3 as Model

maxParagraphLength = 100
maxParagraphs = 1
#nlabels=1001
#vocabularySize=76391
nlabels = 8
vocabularySize = 244
training = DataParser(maxParagraphLength, maxParagraphs, nlabels,
                      vocabularySize)
#training.getDataFromfile("data/wiki_fea_76390_Label_1000_train")
training.getDataFromfile(
    "C:/gitrepo/Wiki-Text-Categorization/Distant Supervision/Reuter_dataset/reuters_sparse_training.txt"
)

model = Model(maxParagraphLength, maxParagraphs, nlabels, vocabularySize)

batchSize = 64

epoch = 0
epochEnd = 105
for e in range(epoch, epochEnd):
    print('Epoch: ' + str(e + 1))
    cost = 0
    for itr in range(int(training.totalPages / batchSize)):
        cost += model.train(training.nextBatch(batchSize))
    print(str(cost / training.totalPages))
Пример #29
0
 def __init__(self):
     #实例化同级类
     self.parser = DataParser()
     self.datastore = DataStore()
Пример #30
0
class Downloader(object):
    #建立类成员
    def __init__(self):
        #实例化同级类
        self.parser = DataParser()
        self.datastore = DataStore()

    # 1.通用的网页请求抓取
    def get_html(self, url):
        try:
            # 使用随机User-Agent
            ua = UserAgent()
            req_headers = {'User-Agent': ua.random}

            res = requests.get(url, headers=req_headers)
            if res.status_code == requests.codes.ok:
                html = res.text
                return html
            return ''
        except Exception as e:
            return e

    # 2.下载指定作者的所有作品
    def downloadworks_oneauthor(self, start_url, authorinfotuple):
        # 1)提取作者信息,并设置请求的完整url和结果记录文件名
        pagenum = 1
        authorid = authorinfotuple[0]
        authorname = authorinfotuple[1]

        # 2)组成目标页面URL,循环爬行当前作者全部诗文
        personalworks_hommeurl = start_url + 'page=%s&id=%s' % (str(pagenum),
                                                                authorid)

        # 3)遍历所有页面,下载并保存到文件中
        try:
            # i.爬取个人作品首页,提取总页数
            works_html = self.get_html(personalworks_hommeurl)
            pagecount = self.parser.getpagecount(works_html)

            # ii.创建文档,写入基本信息
            totalinfo = u'\n作者:{name},页数:{pagecount}\r\n'.format(
                name=authorname, pagecount=pagecount)
            path = u'作品集'
            filename = path + '\\' + authorname + '.txt'
            self.datastore.createfile_oneauther(filename, path, totalinfo)

            # iii.遍历作者所有作品页,提取诗文保存到指定文档
            for i in range(1, pagecount + 1):
                #组合每一页的url
                page_url = start_url + 'page=%s&id=%s' % (str(i), authorid)
                #请求抓取当前诗文页面
                time.sleep(random.randint(3, 6))
                singlepageworks_html = self.get_html(page_url)
                if len(works_html) > 0:
                    # 提取当前页中所有诗文
                    titlelist, contentlist = self.parser.getworks_singlepage(
                        singlepageworks_html)
                    # 写入文档
                    self.datastore.storeworks_singlepage(
                        filename, i, titlelist, contentlist)
            return 'finished'
        except Exception as e:
            return e
Пример #31
0
def ComputeFscore(modelfile, testfile, outputfile):
    labels = 8
    vocabularySize = 244
    regLambda = float(sys.argv[1])

    model = Model(labels, vocabularySize, regLambda)

    testing = DataParser(labels, vocabularySize)
    testing.getDataFromfile(testfile)

    model.load(modelfile)

    print("loading done")

    testing.restore()
    truePre = []
    pred = []
    for itr in range(testing.totalPages):
        data = testing.nextBatch(1)
        truePre.append(data[0])
        pre = model.predict(data)
        pred.append(pre[0])

    labelsCount = {}
    ConfusionMa = {}
    fScr = {}

    thres = 0.5
    valid = int(
        len(truePre) * 0.5
    )  #using first 50% data for threshold tuning - we have merged test and cv files
    labelsCount = {}
    ConfusionMa = {}
    fScr = {}
    thresLab = {}
    for la in range(labels):
        if la % 25 == 0:
            print("Current label", la)
        t = []
        p = []
        for i in range(valid):
            t.append(truePre[i][0][la])
            p.append(pred[i][la])
        bestF, bestThre = thresholdTuning(t, p)

        t = []
        p = []
        for i in range(valid, len(truePre)):
            t.append(truePre[i][0][la])
            p.append(pred[i][la])

        p = np.array(p)
        fScr[la] = f1_score(t, p >= bestThre)
        ConfusionMa[la] = confusion_matrix(t, p > bestThre)
        thresLab[la] = bestThre

    f = open(outputfile, "a")
    output = sys.argv[5]

    sum_fscore = 0.0
    for i in range(labels):
        sum_fscore = sum_fscore + fScr[i]
        output = output + "," + str(fScr[i])
    output += "," + str(sum_fscore / float(labels - 1))
    print("Fscore at " + sys.argv[3] + " epochs: " +
          str(sum_fscore / float(labels - 1)))
    f.write(output + "\n")
    f.close()
Пример #32
0
def analyse(modelfile,testfile,outputfile):
    maxParagraphLength = 20
    maxParagraphs = 10
    filterSizes = [2,3,4]
    num_filters = 64
    wordEmbeddingDimension = 100
    lrate = float(0.001)
    poolLength = 2
    labels = 30938
    vocabularySize = 101939

    model = Model(maxParagraphs,maxParagraphLength,labels,vocabularySize,\
                    filterSizes,num_filters,wordEmbeddingDimension,lrate,poolLength)

    testing = DataParser(maxParagraphs,maxParagraphLength,labels,vocabularySize)
    testing.getDataFromfile(testfile)

    model.load(modelfile)

    print("loading done")
    print("no of test examples: " + str(testing.totalPages))

    batchSize = 1
    testing.restore()
    truePre=[]
    pred=[]
    for itr in range(testing.totalPages):
        data=testing.nextBatch(1)
        truePre.append(data[0])
        pre=model.predict(data)
        pred.append(pre[0])

    labelids = open("../../dataset/sorted_labelid_sans5toplabels.txt","r").read().strip().split("\n")
    labelids = [ int(x) for x in labelids ]

    no_of_partition = 10
    partition_size = labels / no_of_partition
    rank1 = [0]*no_of_partition
    rank3 = [0]*no_of_partition
    rank5 = [0]*no_of_partition

    for i,v in enumerate(pred):
        temp = [(labId,labProb) for labId,labProb in enumerate(v) ]
        temp = sorted(temp,key=lambda x:x[1],reverse=True)  #sorting based on label probability to get top k
        rank1[ labelids.index( temp[0][0] ) / partition_size ] += 1
        rank3[ labelids.index( temp[0][0] ) / partition_size ] += 1
        rank5[ labelids.index( temp[0][0] ) / partition_size ] += 1

        rank3[ labelids.index( temp[1][0] ) / partition_size ] += 1
        rank5[ labelids.index( temp[1][0] ) / partition_size ] += 1
        rank3[ labelids.index( temp[2][0] ) / partition_size ] += 1
        rank5[ labelids.index( temp[2][0] ) / partition_size ] += 1


        rank5[ labelids.index( temp[3][0] ) / partition_size ] += 1
        rank5[ labelids.index( temp[4][0] ) / partition_size ] += 1

    rank1 = [ ( float(x) /testing.totalPages )*100 for x in rank1  ]
    rank3 = [ ( float(x) /( 3 * testing.totalPages) )*100 for x in rank3  ]
    rank5 = [ ( float(x) /( 5 * testing.totalPages) )*100 for x in rank5  ]

    print( rank1)
    print( rank3) 
    print(rank5)

    filePtr = open( outputfile , "w")
    for i in rank1:
        filePtr.write( str(i) + "," )
    filePtr.write("\n")

    for i in rank3:
        filePtr.write( str(i) + "," )
    filePtr.write("\n")

    for i in rank5:
        filePtr.write( str(i) + "," )
    filePtr.close()
Пример #33
0
def ComputePrecisionK(modelfile,testfile,outputfile):
    maxParagraphLength = int(sys.argv[1])
    maxParagraphs = int(sys.argv[2] )
    filterSizes = [int(i) for i in sys.argv[3].split("-")]
    num_filters = int(sys.argv[4])
    wordEmbeddingDimension = int(sys.argv[5])
    lrate = float(sys.argv[10])

    keep_prob = 1.0

    labels = 30938
    vocabularySize = 101939

    model = Model(maxParagraphs,maxParagraphLength,labels,vocabularySize,\
        filterSizes,num_filters,wordEmbeddingDimension,lrate, keep_prob)

    testing = DataParser(maxParagraphs,maxParagraphLength,labels,vocabularySize)
    testing.getDataFromfile(testfile)

    model.load(modelfile)

    print("loading done")
    print("no of test examples: " + str(testing.totalPages))

    print("Computing Prec@k")
    
    #check if batchsize needs to be taken by parameter

    batchSize = 1
    testing.restore()
    truePre=[]
    pred=[]
    for itr in range(testing.totalPages):
        data=testing.nextBatch(1)
        truePre.append(data[0])
        pre=model.predict(data)
        pred.append(pre[0])

    K_list = [1,3,5]     #prec@1 .....prec@NoofLabels
    precAtK = [0.0]*6	

    # #As need to get Prec only on last 50% of test data as first 50% is for cross validation
    # valid=int(len(truePre)*0.5)
    # pred = pred[valid:]
    # truePre = truePre[valid:]

    for i,v in enumerate(pred):
        temp = [(labId,labProb) for labId,labProb in enumerate(v) ]
        temp = sorted(temp,key=lambda x:x[1],reverse=True)  #sorting based on label probability to get top k
        for ele in K_list:        #1....No of Labels
            pBag = 0              #no of true positive for this instance 
            for itr in range(ele): #top k ie top ele
                if truePre[i][0][temp[itr][0]]==1:
                	precAtK[ele] += 1 
                    # pBag += 1
            # precAtK[ele] += float(pBag)/float(ele)

    f = open(outputfile,"a")
    output = sys.argv[9]

    for k in K_list:
		precAtK[k] /= (k * len(pred)) 
		print ("Prec@" + str(k) + " = " + str(precAtK[k]))
		output = output + "," + "Prec@" + str(k) + "=," + str(precAtK[k])
    f.write(output + "\n")
    f.close()
Пример #34
0
__author__ = 'jszheng'
import sys
from antlr4 import *
from antlr4.InputStream import InputStream
from DataLexer import DataLexer
from DataParser import DataParser

if __name__ == '__main__':
    if len(sys.argv) > 1:
        input_stream = FileStream(sys.argv[1])
    else:
        input_stream = InputStream(sys.stdin.read())

    lexer = DataLexer(input_stream)
    token_stream = CommonTokenStream(lexer)
    parser = DataParser(token_stream)
    tree = parser.top()

    lisp_tree_str = tree.toStringTree(recog=parser)
    print(lisp_tree_str)
Пример #35
0
 def __init__(self, trainFile, targetFile):
     dp = DataParser(trainFile)
     self.ePtm, self.sPtm, self.initVect = dp.computeProbabilities()
     self.trueStates, self.emissions = self.parseTarget(targetFile)