Exemplo n.º 1
0
	def join():
		"""Join a network with the given name (SSID) and password"""
		print "SWIFT JOIN"

		print request.json
		#return json.dumps({"success": True})

		# First we need to fish that information out of the JSON that was sent along with this
		d = request.body.read()
		print d
		if len(d) == 0:
			return json.dumps({"success": False})
		try:
			dj = json.loads(d)
		except:
			print "Bad JSON data, aborting..."
			return json.dumps({"success": False})
		if 'ssid' in dj:
			ssid_value = dj['ssid']
		else:
			print "Missing SSID"
			return json.dumps({"success": False})
		if 'password' in dj:
			passwd = dj['password']  # Salted, I hope
		else:
			print "Missing password"
			return json.dumps({"success": False})

		# And here we should be able to join the network maybe
		print "Joining %s..." % ssid_value
		joiner.join(ssid_value,passwd)
		return json.dumps({"success": True})
Exemplo n.º 2
0
    def join():
        """Join a network with the given name (SSID) and password"""
        print "SWIFT JOIN"

        print request.json
        #return json.dumps({"success": True})

        # First we need to fish that information out of the JSON that was sent along with this
        d = request.body.read()
        print d
        if len(d) == 0:
            return json.dumps({"success": False})
        try:
            dj = json.loads(d)
        except:
            print "Bad JSON data, aborting..."
            return json.dumps({"success": False})
        if 'ssid' in dj:
            ssid_value = dj['ssid']
        else:
            print "Missing SSID"
            return json.dumps({"success": False})
        if 'password' in dj:
            passwd = dj['password']  # Salted, I hope
        else:
            print "Missing password"
            return json.dumps({"success": False})

        # And here we should be able to join the network maybe
        print "Joining %s..." % ssid_value
        joiner.join(ssid_value, passwd)
        return json.dumps({"success": True})
Exemplo n.º 3
0
def harmari_craigslist_parsing(html_file, meta_file):
    print('harmari_craigslist_parsing', html_file, meta_file)

    join_file = meta_file + '_join.csv'

    if os.path.exists(join_file):
        print('file extracted:', join_file)
        return

    print('  1. start index..')
    if not os.path.exists(html_file + '_tag'):
        run_cpp('find_start.cpp', [html_file])

    print('  2. start extract..')
    mkdir('html')
    mkdir('otherAttributes')
    run_cpp('extract.cpp', [html_file])

    print('  3. count records..')
    n_records = run_cpp('lc.cpp', [meta_file], True).strip()

    print('  4. parse html..')
    to_parse = html_parse(','.join([html_file, n_records]))

    print('  5. join html and metadata..')
    join([html_file, meta_file, n_records])
    print('done')
def kclustering(top=100, pca=0):
    training = pd.read_csv('documents\csv\drunk\drunk labeling 1300' + '.csv')
    test = pd.read_csv('documents\csv\drunk\drunkTEXT400U' + '.csv')
    main_domain = join(training, 'Clean tweet')
    top = topwords(test, 'Clean tweet', top)
    main_domain = join(training, 'Clean tweet')
    main_domain1 = join(test, 'Clean tweet')
    main_domain.joinall(top.top, 1)
    main_domain1.joinall(top.top, 1)
    training = main_domain.df
    test = main_domain1.df

    cols = ['Clean tweet']

    try:
        for x in cols:
            del training[x]
            del test[x]
    except:
        pass

    print training['L']
    training.L = training.L.replace(['y', 'n'], [True, False])
    test.L = test.L.replace(['y', 'n'], [True, False])
    if pca == 1:

        dftraining, dftest = pcaf(training, test)
        training = dftraining.join(training["L"])
        test = dftest.join(test["L"])

    try:
        training = training.replace(['True', 'False'], [True, False])
        test = test.replace(['True', 'False'], [True, False])
    except:
        pass
    headers_names = list(training.columns.values)
    training = training.astype(np.float64)
    test = test.astype(np.float64)
    training['L'] = training['L'].astype(bool)
    test['L'] = test['L'].astype(bool)
    headers_names.remove('L')
    headers_names.append('L')

    pca = str(pca)
    test = test[headers_names]
    training = training[headers_names]
    TRAINING = training.as_matrix(columns=None)
    TEST = test.as_matrix(columns=None)
    print training.dtypes
    main_domain.df.to_csv(r'documents\csv\unsupervised\test.csv', index=False)
    main_domain.df.to_csv(r'documents\csv\unsupervised\test.csv', index=False)
    arff.dump(r'documents\Arff\unsupervised' + r'\training' + pca + '.arff',
              TRAINING,
              relation="whatever",
              names=headers_names)
    arff.dump(r'documents\Arff\unsupervised' + r'\test' + pca + '.arff',
              TEST,
              relation="whatever",
              names=headers_names)
Exemplo n.º 5
0
def join_wrap():
	if len(sys.argv) < 4:
		print "Not enough arguments for join" #TODO add help
		return
	subn = sys.argv[2]
	serv = sys.argv[3]

	J.join(subn, serv)
def kclustering(top=100,pca=0):
	training=pd.read_csv('documents\csv\drunk\drunk labeling 1300'+'.csv'  )
	test=pd.read_csv( 'documents\csv\drunk\drunkTEXT400U'+'.csv' )
	main_domain = join(training,'Clean tweet')
	top = topwords(test,'Clean tweet',top)
	main_domain = join(training,'Clean tweet')
	main_domain1 = join(test,'Clean tweet')
	main_domain.joinall(top.top,1)
	main_domain1.joinall(top.top,1)
	training=main_domain.df
	test=main_domain1.df


	cols=['Clean tweet']

	try:
		for x in cols:
			del training[x]
			del test[x]
	except:
		pass


	
	print training['L']
	training.L=training.L.replace(['y','n'], [True,False])
	test.L=test.L.replace(['y','n'], [True,False])
	if pca==1:

		dftraining, dftest=pcaf(training,test)
		training =dftraining.join(training["L"])
		test=dftest.join(test["L"])
	
	try:
		training=training.replace(['True','False'], [True,False])	
		test=test.replace(['True','False'], [True,False])
	except:
		pass
	headers_names=list(training.columns.values)
	training=training.astype(np.float64)
	test=test.astype(np.float64)
	training['L']=training['L'].astype(bool)
	test['L']=test['L'].astype(bool)
	headers_names.remove('L')
	headers_names.append('L')
	
	pca=str(pca)
	test = test[headers_names]
	training = training[headers_names]
	TRAINING=training.as_matrix(columns=None)
	TEST=test.as_matrix(columns=None)
	print training.dtypes
	main_domain.df.to_csv(r'documents\csv\unsupervised\test.csv',index=False)
	main_domain.df.to_csv(r'documents\csv\unsupervised\test.csv',index=False)
	arff.dump(r'documents\Arff\unsupervised'+r'\training'+pca+'.arff',TRAINING, relation="whatever", names=headers_names)
	arff.dump(r'documents\Arff\unsupervised'+r'\test'+pca+'.arff',TEST, relation="whatever", names=headers_names)
def addbigrams(dft,dfte,df1,selector=0,n=50):
	
	
	
	top = topwords(df1,'Clean tweet',n)
	bigrams=ngrams(df1,'Clean tweet')
	
	
	bigramsw=bigrams.bigrams
	main_domain = join(dft,'Clean tweet')
	main_domain1 = join(dfte,'Clean tweet')
	main_domain.joinall(bigramsw,2)
	main_domain1.joinall(bigramsw,2)
	
	return main_domain.df, main_domain1.df
		def getmostcommon(df,df1,n=10):
				
				main_domain = join(df,'Clean tweet')
				main_domain1 = join(df1,'Clean tweet')
				top = topwords(self.df2,'Clean tweet',n)
				bigrams=ngrams(self.df2,'Clean tweet',n)
				
				topw=top.top
				bigramsw=bigrams.bigrams
				
				main_domain.joinall(topw,1)
				main_domain.joinall(bigramsw,2)
				main_domain1.joinall(topw,1)
				main_domain1.joinall(bigramsw,2)
				return main_domain.df,main_domain1.df,
Exemplo n.º 9
0
def main():
    warnings.warn = warn
    mk_dir()
    print('# split\n')
    split()
    print('# building dictionary\n')
    for file in tqdm(os.listdir('TEMP')):
        statinfo = os.stat('TEMP/'+file)

        #check if it is empty shell
        if statinfo.st_size>1000:
            try:
                df = build_dict(file)
                df = agg(df)
                
                #cant use if df on it
                if len(df.index)!=0:
                    df['size'] = df['size'] = statinfo.st_size
                    df['raw_label'] = file.replace('.pcap','')
                    df.to_csv('TEMP_CSV/'+file.replace('.pcap','.csv'),index=False)
                    
            except Exception as e:
                print(e)

    print('# collecting dataframe\n')
    r,c = join()
    print('\nGOT ',r,'nonzero records out of ',len(os.listdir('TEMP')),'files\n')
    print('df shape: ',r,'x',c)
    mk_dir()
Exemplo n.º 10
0
def blsSummaryPlot(clipList, num=None):
    mags = np.loadtxt("kees-c5.mags", delimiter="|")
    nPad = mags.shape[1]

#    epic, blsArray = gather.gatherValue(clipList, 'bls.convolved_bls')
    epic, blsArray = gather.gatherFunction(clipList, getBls)
    #Strip out occasional bls spectrum with non standard length
    lengths = np.array(map(lambda x: len(x), blsArray))
    typicalLength = int(np.median(lengths))
    idx = lengths == typicalLength

    epic = np.array(epic)[idx]
    blsArray = np.array(blsArray)[idx]
    obj = np.column_stack([epic, blsArray])


    obj2 = join(mags, 0, None, obj, 0, None, dtype=object)
    nPad += 1
#    print obj2.shape

    magCol = 3
    idx = np.argsort(obj2[:, magCol])
    obj2 = obj2[idx]

    mag = obj2[:,magCol]
    blsArray = np.vstack(obj2[:, -1])
    print blsArray.shape
#    return obj2

    mp.clf()
    mp.imshow(blsArray, interpolation="nearest", origin="bottom",\
        aspect="auto", cmap=mp.cm.YlGnBu_r)
    mp.colorbar()
    return blsArray
Exemplo n.º 11
0
 def test_inner_join(self):
     inner = join(dogs, cats, 'inner', 'name')
     assert len(inner) == 2
     assert inner[0][0].name == 'gatsby'
     assert inner[0][1].meow == 'rowr'
     assert inner[0][0].weight == 16
     assert inner[0][1].weight == 15
Exemplo n.º 12
0
def blsSummaryPlot(clipList, num=None):
    mags = np.loadtxt("kees-c5.mags", delimiter="|")
    nPad = mags.shape[1]

    #    epic, blsArray = gather.gatherValue(clipList, 'bls.convolved_bls')
    epic, blsArray = gather.gatherFunction(clipList, getBls)
    #Strip out occasional bls spectrum with non standard length
    lengths = np.array(map(lambda x: len(x), blsArray))
    typicalLength = int(np.median(lengths))
    idx = lengths == typicalLength

    epic = np.array(epic)[idx]
    blsArray = np.array(blsArray)[idx]
    obj = np.column_stack([epic, blsArray])

    obj2 = join(mags, 0, None, obj, 0, None, dtype=object)
    nPad += 1
    #    print obj2.shape

    magCol = 3
    idx = np.argsort(obj2[:, magCol])
    obj2 = obj2[idx]

    mag = obj2[:, magCol]
    blsArray = np.vstack(obj2[:, -1])
    print blsArray.shape
    #    return obj2

    mp.clf()
    mp.imshow(blsArray, interpolation="nearest", origin="bottom",\
        aspect="auto", cmap=mp.cm.YlGnBu_r)
    mp.colorbar()
    return blsArray
Exemplo n.º 13
0
 def join(self,
          p_strLeftTable,
          p_strRightTable,
          p_lColumn,
          p_strType="inner"):
     return join.join(self.dTables[p_strLeftTable],
                      self.dTables[p_strRightTable], p_lColumn, p_strType)
def modelExperiment(insampleData,
                    outsampleData,
                    dataFolder,
                    componentList,
                    models,
                    modelNames,
                    tableFile,
                    plotFile,
                    buildFV=True):
    j1 = myJoin.join(insampleData, outsampleData, dataFolder)
    j1.setComponentList(componentList)
    if buildFV:
        j1.buildInsampleFV()
    else:
        j1.loadCachedInsampleFV()
    modelResults = []
    for (mod, modName) in zip(models, modelNames):
        j1.model = mod
        j1.modelName = modName
        precision, recall, runtime = j1.quickExperiment()
        modelResults.append([modName, precision, recall, runtime])

    # Write summary of results to csv table
    writeToCSV(dataFolder + tableFile, ['', 'Precision', 'Recall', 'Runtime'],
               modelResults)

    # Write summary of results to plot
    precisionList = [res[1] for res in modelResults]
    recallList = [res[2] for res in modelResults]
    runtimeList = [res[3] for res in modelResults]
    # create plot
    fig, ax = plt.subplots()
    index = np.arange(len(models))
    bar_width = 0.35
    opacity = 0.8

    rects1 = plt.bar(index,
                     tuple(precisionList),
                     bar_width,
                     alpha=opacity,
                     color='b',
                     label='Precision')

    rects2 = plt.bar(index + bar_width,
                     tuple(recallList),
                     bar_width,
                     alpha=opacity,
                     color='g',
                     label='Recall')

    plt.xlabel('Classifier')
    plt.ylabel('Scores')
    plt.title('Bootstrapped Precision and Recall Scores vs. Classifier')
    plt.xticks(index + bar_width, tuple(modelNames))
    plt.legend()
    plt.tight_layout()
    plt.savefig(dataFolder + plotFile)
def featureVectorExperiment(insampleData, outsampleData, dataFolder,
                            allComponents, model, modelName, tableFile,
                            plotFile):
    j1 = myJoin.join(insampleData, outsampleData, dataFolder)
    j1.model = model
    j1.modelName = modelName
    FVResults = []
    FVNames = []
    for componentList in allComponents:
        j1.setComponentList(componentList)
        j1.buildInsampleFV()
        precision, recall, runtime = j1.quickExperiment()
        FVNames.append(j1.FVDescription)
        FVResults.append([j1.FVDescription, precision, recall, runtime])

    # TODO this can be written to a function
    # Write summary of results to csv table
    wr = csv.writer(open(dataFolder + tableFile, 'wb'), quoting=csv.QUOTE_ALL)
    header = ['', 'Precision', 'Recall', 'Runtime']
    wr.writerow(header)
    for row in FVResults:
        wr.writerow(row)

    # Write summary of results to plot
    precisionList = [res[1] for res in FVResults]
    recallList = [res[2] for res in FVResults]
    runtimeList = [res[3] for res in FVResults]
    # create plot
    fig, ax = plt.subplots()
    index = np.arange(len(allComponents))
    bar_width = 0.35
    opacity = 0.8

    rects1 = plt.bar(index,
                     tuple(precisionList),
                     bar_width,
                     alpha=opacity,
                     color='b',
                     label='Precision')

    rects2 = plt.bar(index + bar_width,
                     tuple(recallList),
                     bar_width,
                     alpha=opacity,
                     color='g',
                     label='Recall')

    plt.xlabel('Feature Vector')
    plt.ylabel('Scores')
    plt.title(
        'Bootstrapped Precision and Recall Scores using %s vs. Feature Vector'
        % (modelName))
    plt.xticks(index + bar_width, tuple(FVNames))
    plt.legend()
    plt.tight_layout()
    plt.savefig(dataFolder + plotFile)
 def NLMexperiments():
     j2 = myJoin.join(nlmInsampleData, nlmOutsampleData, 'NLMdata/')
     j2.setComponentList(fullFV)
     j2.loadCachedInsampleFV()
     results = []
     for prop in np.arange(0.05, 0.25, 0.01):
         precision, recall, _, size = j2.classifyNIterations(
             subSampleProportion=prop)
         results.append([size, precision, recall])
     writeToCSV('NLMdata/sizeTest2.csv', ['Size', 'Precision', 'Recall'],
                results)
Exemplo n.º 17
0
def classifyAndPredict(insampleData, outsampleData, folderName, componentList):
    print len(insampleData[0])
    print len(outsampleData[1])
    # Declare instance of a join object with input arguments
    easyJoin = myJoin.join(insampleData, outsampleData, folderName)
    easyJoin.setComponentList(componentList)
    # Build feature vector
    easyJoin.buildInsampleFV()
    easyJoin.buildOutsampleFVReduced(0.01)
    # Classify and predict with logistic regression
    easyJoin.classify()
    easyJoin.classifyNIterations()
    easyJoin.predict()
def thresholdExperiment(insampleData, outsampleData, dataFolder, allComponents,
                        model, modelName, tableFile):
    thresholdRange = np.arange(0.0, 0.01, 1.01)
    j1 = myJoin.join(insampleData, outsampleData, dataFolder)
    j1.model = model
    j1.modelName = modelName
    j1.setComponentList(allComponents)
    j1.loadCachedInsampleFV()
    expResults = []
    for tHold in thresholdRange:
        precision, recall, runtime = j1.classifyNIterations(tHold)
        expResults.append([tHold, precision, recall, runtime])
        print tHold

    writeToCSV(dataFolder + tableFile,
               ['Threshold', 'Precision', 'Recall', 'Runtime'], expResults)
Exemplo n.º 19
0
def PARRES(tPAR0,tRED0):
    """

    """
    tPAR = copy.deepcopy(tPAR0)
    tRED = copy.deepcopy(tRED0)

    tPAR.set_primary_key('seed')
    tRED.set_primary_key('seed')
    tRED = join.join(tPAR,tRED)

    # Convert epochs back into input
    tRED.data['oepoch'] = np.remainder(tRED.oepoch,tRED.P)
    addbg(tRED)
    addFlag(tRED)
    return tRED
Exemplo n.º 20
0
def buildQuery(s, tables):
    tokens = nltk.word_tokenize(s)
    Tagged = nltk.pos_tag(tokens)

    subjects = 0
    # joined = False
    for item in Tagged:
        if item[1] == 'NNS':
            subjects += 1
    if subjects >= 2:
        # joined = True
        s = join(s, tables)
    s = Dict.DoReplacing(s)
    # s = 'Show me when both Mario and Sonic first came out'
    s = s.strip()  # Remove Leading and Trailing Whitespace
    s = ReplaceNotFirst(s, 'where', 'and')
    s = s.replace('and and', 'and')

    # Remove generic database references
    if s.lower().find("from database") != -1:
        s = Dict.Replace(s, [" from database"], '')

    tokens = nltk.word_tokenize(s)
    # if there is no "from"
    if s.find('from') == -1 and s.find('select') != -1:
        temp = tokens[tokens.index('select') + 1]
        temps = singularize(temp)
        tempp = pluralize(temps)
        s = s.replace('select ' + temp, 'select ' + temps + ' from ' + tempp)

        tokens = nltk.word_tokenize(s)
        if tokens[tokens.index('from') + 1] not in tables:
            s = noTableName(s, tables, tokens[tokens.index('from') + 1])

    if s[-1] != ';':
        s = s + ';'
    tokens = s.split(' ')

    s = manageStringVars(s, tokens)
    # above func takes in string and list, looks for operator and if the following
    # string is not a digit, then the function will add quotes to it

    return s
Exemplo n.º 21
0
		def getintersection(df,selector=0,n=50):
				
				main_domain = join(df,'Clean tweet')
				
				top = topwords(df,'Clean tweet',n)
				bigrams=ngrams(df,'Clean tweet')
				
				topw=top.top
				bigramsw=bigrams.bigrams
				
				main_domain.joinall(topw,1)
				mutualwordsu= mutualinfo(main_domain.df)
				main_domain.joinall(bigramsw,2)
				mutualwordsb= mutualinfo(main_domain.df)
				mutualwordsb=[e for e in mutualwordsb if e not in mutualwordsu]
				ratiov=ratio(main_domain.df,'L')
				ratios=ratiov.getoddratios(top.top)
				dratios=list(ratios.keys())
				return topw, bigramsw, dratios,mutualwordsu,mutualwordsb
    def SOexperiments():
        j1 = myJoin.join(SOInsampleData, SOOutsampleData, 'stackoverflowdata/')
        j1.setComponentList(fullFV)
        j1.buildInsampleFV()
        j1.model = RF(n_estimators=200)
        j1.modelName = 'RF'

        def threshHoldTest():
            singleThreshTest = j1.thresholdTest(np.arange(0.0, 1.01, 0.01))
            writeToCSV('stackoverflowdata/simpleThresholdTest1.csv',
                       ['Threshold', 'Precision', 'Recall'], singleThreshTest)
            print 'simple thing done'
            fiftyThreshTest = [
                j1.thresholdTest(np.arange(0.0, 1.01, 0.01)) for i in range(50)
            ]
            mean_values = np.mean(fiftyThreshTest, axis=0)
            writeToCSV('stackoverflowdata/fiftyThresholdTest1.csv',
                       ['Threshold', 'Precision', 'Recall'], mean_values)
            print 'fifty thing done'
            thresholdExperiment(SOInsampleData, SOOutsampleData,
                                'stackoverflowdata/', fullFV,
                                RF(n_estimators=200), 'RF',
                                'thresholdExperiment1.csv')
Exemplo n.º 23
0
def join(self, right, left_outer=False, right_outer=False, **keys):
    '''
    '''
    from atom import Atom
    from join import join
    new = self
    for right in right if isinstance(right, list) else [right]:
        name = right.__name__
        left_index = None
        right_index = None
        mask = None
        index = join(self, right, keys, left_outer, right_outer)
        right = Element(source=right,
                        cnames=[x for x in right.__cnames__
                                if x not in keys.values()],
                        index=Atom(index.right.new_index[index.mask],
                                   mask=(index.right.new_index>=0)[index.mask]))
        new = Element(source = new,
                      index  = Atom(index.left.new_index[index.mask],
                                    mask=(index.left.new_index>=0)[index.mask]),
                      cnames = new.__cnames__ + [name])
        setattr(new, name, right)
    return new
Exemplo n.º 24
0
def join(self, right, left_outer=False, right_outer=False, **keys):
    '''
    '''
    from atom import Atom
    from join import join
    new = self
    for right in right if isinstance(right, list) else [right]:
        name = right.__name__
        left_index = None
        right_index = None
        mask = None
        index = join(self, right, keys, left_outer, right_outer)
        right = Element(
            source=right,
            cnames=[x for x in right.__cnames__ if x not in keys.values()],
            index=Atom(index.right.new_index[index.mask],
                       mask=(index.right.new_index >= 0)[index.mask]))
        new = Element(source=new,
                      index=Atom(index.left.new_index[index.mask],
                                 mask=(index.left.new_index >= 0)[index.mask]),
                      cnames=new.__cnames__ + [name])
        setattr(new, name, right)
    return new
Exemplo n.º 25
0
def outer(self, right, **keys):
    '''
    '''
    return join(self, right, True, True, **keys)
Exemplo n.º 26
0
	infile.close()
	timepattern = r'(\d{2}):(\d{2}):(\d{2})'
	datepattern = r'(\d{4}):(\d{2}):(\d{2})'
	group1 = re.search(timepattern,temp[-1]).groups()
	group2 = re.search(datepattern,temp[-2]).groups()
	return group1, group2

def prefix(time,datfredrik@fredrik-Aspire-V3-571:~/uio/inf3331/uke5$ python jpegrename.py tmp2.jpg
2002_05_19_18_10_03_tmp2.jpg
e,filename):
	"""
	Creates a new file extension if needed. Does not actually
	rename the file though!
	"""
	prefix = filename
	pattern = r'\d{4}_+\d{2}_+\d{2}_+\d{2}_+\d{2}_+\d{2}_+[img]_+\d+\.[jpg]'
	match = re.search(pattern,filename)
	if not match:
		prefix = join.join('_',date,time,filename)
	return prefix

tmp,gls =gettime(somefile)
bla =  prefix(tmp,gls,somefile)
print bla
#os.rename(somefile,bla)

'''
fredrik@fredrik-Aspire-V3-571:~/uio/inf3331/uke5$ python jpegrename.py tmp2.jpg
2002_05_19_18_10_03_tmp2.jpg
'''
Exemplo n.º 27
0
def measure_join():
    s = join.join(int(1e6))
    print("{}".format(len(s)))
Exemplo n.º 28
0
def parse_input(filename,tiles,globalfreevars):
	#tiles is a global list of actually known tiles
	# it is used to synchronise symbols between multiple automata
	# globalfreevars con
	contains=load_input(filename)
	preds={}
	if (re.search("^RootCall",contains[0])):
		# NEW version
		# parse the "RootCall"
		# first check, whether there are existentially quentified parameters
		ex_params=[]
		if (re.search("^RootCall\\\\E",contains[0])):
			ex_params=re.sub('^RootCall\\\\E([^\.]*)\..*$','\\1',contains[0])
			ex_params=re.split(",",ex_params)
			contains[0]=re.sub("\\\\E([^\.]*)\.","",contains[0])

		# first check, whether the join operation is needed --- RootCall contains "*"
		rootcall=re.sub('^RootCall',"",contains[0])
		del contains[0]
		top_calls=[]
		pt_seq=0

		while (re.search("\\*",rootcall)):
			# first handle "->" predicate
			if re.search("^[^\*]*->",rootcall):
				lhs=re.sub("^([^-]*)->.*$","\\1",rootcall)
				rhs=re.sub("^[^-]*->([^\*]*)\*.*$","\\1",rootcall)
				rhs=re.split(",",rhs)
				# remove nil and double occurences from rhs
				rhs_not_nil=[]
				for x in rhs:
					if (not x=="nil") and (not x==lhs) and (not x in rhs_not_nil):
						rhs_not_nil.append(x)
				# create an unique predicate for the points-to
				pred_name=get_unique_name("pt%i"%pt_seq,contains)
				pt_seq=pt_seq+1
				top_calls.append((pred_name,[lhs]+rhs_not_nil))
				rule=(lhs,rhs,[],[])
				preds[pred_name]=([lhs]+rhs_not_nil,[rule])
				# do implicit quantification
				add_implicit_exists(ex_params,globalfreevars,[lhs]+rhs_not_nil)
			else:
				# store the predicate call into top_calls
				call=re.sub("^([^\(]*)\(.*$","\\1",rootcall)
				call_params=re.sub("^[^\(]*\(([^\)]*)\).*$","\\1",rootcall)
				call_params=re.split(",",call_params)
				top_calls.append((call,call_params))
				# do implicit quantification
				add_implicit_exists(ex_params,globalfreevars,call_params)
			# remove the call
			rootcall=re.sub("^[^\*]*\*","",rootcall)
		if re.search("^[^\*]*->",rootcall):
			lhs=re.sub("^([^-]*)->.*$","\\1",rootcall)
			rhs=re.sub("^[^-]*->([^\*]*)$","\\1",rootcall)
			rhs=re.split(",",rhs)
			# remove nil and double occurences from rhs
			rhs_not_nil=[]
			for x in rhs:
				#if not x=="nil":
				if (not x=="nil") and (not x==lhs) and (not x in rhs_not_nil):
					rhs_not_nil.append(x)
			# create an unique predicate for the points-to
			pred_name=get_unique_name("pt%i"%pt_seq,contains)
			pt_seq=pt_seq+1
			top_calls.append((pred_name,[lhs]+rhs_not_nil))
			rule=(lhs,rhs,[],[])
			preds[pred_name]=([lhs]+rhs_not_nil,[rule])
			# do implicit quantification
			add_implicit_exists(ex_params,globalfreevars,[lhs]+rhs_not_nil)
		else:
			call=re.sub("^([^\(]*)\(.*$","\\1",rootcall)
			call_params=re.sub("^[^\(]*\(([^\)]*)\).*$","\\1",rootcall)
			call_params=re.split(",",call_params)
			top_calls.append((call,call_params))
			# do implicit quantification
			add_implicit_exists(ex_params,globalfreevars,call_params)
		type=2
	else:
		# OLD version just for compatibility reasons
		# get parameters
		if not (re.search("^Params",contains[0])):
			raise InputError("No \"Params\" specified on 1st (nonempty) line of input")
		params=re.sub("^Params","",contains[0])
		params=re.split(",",params)
		del contains[0]
		# get root rule identifier
		if not (re.search("^Root",contains[0])):
			raise InputError("No \"Root\" specified on 2st (nonempty) line of input")
		root_rule=re.sub("^Root","",contains[0])
		del contains[0]
		type=0
	#Parse predicates
	empty_rule=0
	for x in contains:
		empty_rule=empty_rule+parse_predicate(x,preds)
	if empty_rule:
		# empty rules in the system of predicates -> inline them + create a formula for empty heap
		emptyheap_eq=inline_empty_rules(preds,top_calls)
	else:
		emptyheap_eq=[] # No empty heap defined by the system of predicates ---> false represented as []
	if type==2:
		# type==2: join operator on top level calls to translate into a single Rootcall
		(root_rule,params,emptyheap_eq)=join.join(preds,top_calls,emptyheap_eq,ex_params)
		# rename all variables in conflict between parameters and predicates
		rename_conflicts_with_params(preds,params)
		#track and eliminate all parameters
		for i in range(0,len(params)):
			ex_quantif=(params[i] in ex_params) # or (params[i]=="nil") # nil is allways handled as existentially quantified variable
			root_rule=trackeliminate(preds,root_rule,0,params[i],ex_quantif)
		# remove unreachable predicates
		remove_unreachable_predicates(preds,root_rule)
		# remove "nil" from params
		new_params=[]
		for i in params:
			if i!="nil" and not (i in ex_params):
				new_params.append(i)
		params=new_params
		# remove ex_params from emptyheap_eq
		new_emptyheap_eq=[]
		for disj in emptyheap_eq:
			new_disj=[]
			for conj in disj:
				new_conj=[]
				for x in conj:
					if not x in ex_params:
						new_conj.append(x)
				if len(new_conj)>1:
					new_disj.append(new_conj)
			new_emptyheap_eq.append(new_disj)
		emptyheap_eq=new_emptyheap_eq
	else:
		# OLD version, just for compatibility reasons (type==0)
		if not emptyheap_eq==[]:
			emptyheap_eq=emptyheap_eq[0]

	sig=compute_signature(preds)

	aut,eq_edges=sl2ta(preds,sig,params,tiles,root_rule)
	#if eq_edges:
	#	print "WARNING: equality edges in use"
	return aut,emptyheap_eq,eq_edges
Exemplo n.º 29
0
def domain(document, crossvalidationundersampling,ArffL,A=0, undersampler=0,sentiment=0 ):
	test=pd.read_csv('documents\csv\drunk\drunkTEXT400'+'.csv'  )
	test.L=test.L.replace(['y','n'], ['True','False'])
	df1=pd.read_csv(document+'.csv'  )
	df1.L=df1.L.replace(['y','n'], ['True','False'])
	joinc=joindocuments(df1,df1)
	top = topwords(df1,'Clean tweet',100)
	main_domain = join(df1,'Clean tweet')
	
	bigrams=ngrams(df1,'Clean tweet')
	print 'bigrams'
	print bigrams.bigrams
	main_domain.joinall(bigrams.bigrams,2)
	main_domain.joinall(top.top,1)
	
	
	
	main_domain.df.to_csv('prueba.csv',index=False)
	ratiov=ratio(main_domain.df,'L')
	ratios=ratiov.getoddratios(top.top)
	print 'ratios'
	print ratios		
	ds=list(ratios.keys())
	testobject = join(test,'Clean tweet')
	oddradiojoin=join(df1,'Clean tweet')
	oddradiojoin.joinall(ds,1)
	testobject.joinall(ds,1)
	oddradiojoin.joinall(bigrams.bigrams,2)
	testobject.joinall(bigrams.bigrams,2)
	test=testobject.df
	cols=['Clean tweet']
	if sentiment==1:
		cols=['Clean tweet','sentiment_polarity', 'sentiment_subjectivity', 'absPolarity']

	try:
		for x in cols:
			del oddradiojoin.df[x]
			del test[x]
	except:
		pass
	#training, test=joinc.gettrainingandtestp(oddradiojoin.df)
	print 'matrix of elements to reduce'
	print "saul,",oddradiojoin.df.shape
	#########################################################
	if undersampler==1:
	  print "saul,",oddradiojoin.df.shape
	  oddradiojoin.df=joinc.undersampling(oddradiojoin.df)
	  print oddradiojoin.df.shape
	if A==1:
		
		
		
		dftraining, dftest=pcaf(oddradiojoin.df,test)
		oddradiojoin.df =dftraining.join(oddradiojoin.df["L"])
		
		
		test=dftest.join(test["L"])

	
	print oddradiojoin.df.shape
	training=oddradiojoin.df
	
	training=training.replace(['True','False'], [True,False])	
	test=test.replace(['True','False'], [True,False])
	training=training.astype(np.float64)
	test=test.astype(np.float64)
	training['L']=training['L'].astype(bool)
	test['L']=test['L'].astype(bool)
	A=str(A)
	sentiment=str(sentiment)
	oddradiojoin.df.to_csv('crossvalidation.csv',index=False)
	#undersampleddf1.to_csv(str(crossvalidationundersampling) +'\undersampling'+A+'.csv',index=False)
	headers_names=list(training.columns.values)
	headers_names.remove('L')
	headers_names.append('L')
	headers_names1=list(test.columns.values)
	print headers_names,'heathers test',headers_names1
	test = test[headers_names]
	training = training[headers_names]
	print 'training' +str(training.dtypes)
	test.to_csv(str(crossvalidationundersampling) + r'\test1'+A+'.csv',index=False)
	training.to_csv(str(crossvalidationundersampling) +r'\training1'+A+'.csv',index=False)
	TRAINING=training.as_matrix(columns=None)
	TEST=test.as_matrix(columns=None)
	print 'training'
	print training.dtypes
	
	arff.dump(ArffL +r'\trainingwu'+A+str(undersampler)+sentiment+'.arff',TRAINING, relation="whatever", names=headers_names)
	 
	arff.dump(ArffL +r'\testwu'+A+str(undersampler)+sentiment+'.arff',TEST, relation="whatever", names=headers_names)
Exemplo n.º 30
0
def right_outer(self, right, **keys):
    '''
    '''
    return join(self, right, False, True, **keys)
Exemplo n.º 31
0
def inner(self, right, **keys):
    '''
    '''
    return join(self, right, False, False, **keys)
Exemplo n.º 32
0
 def test_outer_join(self):
     outer = join(dogs, cats, 'outer', 'weight')
     assert len(outer) == 8
 	def spectralcluster(self,A=1, varydocument=0,joineig=0,undersamplingv=False):
 	  #1 join the documents  and get the test sample 
 	  	
 	  
		def joind(df1,df2,size=0.1,undersamplingv=False, varydocument=0):
			df1.L=df1.L.replace(['y','Y','n','n '], ['True','True','False','False'])  
			df2.L=df2.L.replace(['y','Y','n','n '], ['True','True','False','False']) 
		    
			joindf=joindocuments(df1,df2)
			if varydocument==0:
				df1,otro=joindf.gettrainingandtestp(df1,size)
			if varydocument==1:
			    df2,otro=joindf.gettrainingandtestp(df2,size)
			joinc=joindocuments(df1,df2)
			if undersamplingv==True:
				
				df2=joinc.undersampling(df2)
				df1=joinc.undersampling(df1)
				
			undersampleddf1=joinc.join(df1,df2)
			
			return undersampleddf1
		undersampleddf1=joind(self.df1,self.df2,A, undersamplingv,varydocument)
		
		  #undersampleddf1, undertest=joinc.joinsourcetarget(A,varydocument)

		undertest=pd.read_csv('documents\csv\drunk\drunkTEXT400U'+'.csv'  )
		undertest.L=undertest.L.replace(['y','Y','n','n '], [True,True,False,False]) 
		  #join the domain specific features to the training and sample
		
		laplacian= Laplacian_matrix(self.df1,self.df2,'Clean tweet')
		la,ds,di=laplacian.LAPLACE_NORMALIZED()
	
		self.domain_specific=ds #+['sentiment_polarity','sentiment_subjectivity','absPolarity']
		self.laplacian_matrix=la
		joiner=join(undersampleddf1,'Clean tweet')
		def getmostcommon(df,df1,n=10):
				
				main_domain = join(df,'Clean tweet')
				main_domain1 = join(df1,'Clean tweet')
				top = topwords(self.df2,'Clean tweet',n)
				bigrams=ngrams(self.df2,'Clean tweet',n)
				
				topw=top.top
				bigramsw=bigrams.bigrams
				
				main_domain.joinall(topw,1)
				main_domain.joinall(bigramsw,2)
				main_domain1.joinall(topw,1)
				main_domain1.joinall(bigramsw,2)
				return main_domain.df,main_domain1.df,
		tainingt, testt=getmostcommon(undersampleddf1,undertest,10)
		
		###################################
		#tainingt, testt=addbigrams(tainingt,testt,self.df2)
		
	
		self.matrixtodott,self.matrixtodotest=joiner.jointwodocuments(undersampleddf1,undertest,ds,1)
		
		if joineig==0:
		  trainingset,headerst=self.domain_specificbyeigenvector(tainingt)
		  testset,headerstest=self.domain_specificbyeigenvector(testt)
		elif (joineig==1) or (joineig==2):
			trainingset=tainingt
			testset=testt
			if joineig==2:
				trainingset,headerst=self.domain_specificbyeigenvector(tainingt)
				testset,headerstest=self.domain_specificbyeigenvector(testt)
				headerst=headerst+['L']
				trainingset=trainingset[headerst]
				testset=testset[headerst]
				
		
		
		

		return  trainingset , testset
Exemplo n.º 34
0
        
        while m3u8_info[0][i] <= end_flag:
            if i >= m3u8_num:
                break
            i+=1
        end = i
    
    #下载分片文件
    pro = ProgressBar(total = end+1-start) 
    starttime = datetime.datetime.now()
    for i in range(start,end+1):
        savepath_filename = path + '\\ts\\' + '%06d'%i + '.ts'
        downloader(m3u8_info[1][i] , savepath_filename)
        untilnowtime = datetime.datetime.now()
        interval = (untilnowtime - starttime).seconds
        pro.move()
        pro.log()
    
    #下面开始合并文件
    fromdir = path + '\\ts\\'
    tofile = path + '\\' + title + '[' + str(start_flag) + '-' + str(end_flag) + ']' + '.ts'
    join(fromdir, tofile)
    print ('合并文件成功!')
    
    shutil.rmtree(fromdir)
    os.mkdir(fromdir)
    print ('清理临时文件成功!')

    endtime = datetime.datetime.now()
    interval = (endtime - starttime).seconds
    print ('共计用时 ' + str(interval/60) + ' min (' + str(interval) + ' s )')
Exemplo n.º 35
0
def alltogether(A,varydocument=0,x=0): 
	df1=pd.read_csv('documents\csv\pregnancy\GOOD LABELING 170620151'+'.csv'  )
	df2=pd.read_csv('documents\csv\drunk\drunk labeling 1300'+'.csv'  )
	laplacian= Laplacian_matrix(df1,df2,'Clean tweet')
	la,ds,di=laplacian.LAPLACE_NORMALIZED()
	classesname=ds+di
	n=len(classesname)
	print 'titles'
	print n
	allclusters=[]
	
	print x
	prueba= cluster(la,x,classesname)
	lencluster, clusterd=prueba.cluster
	allclusters.append(clusterd)
	clusterslong=[]
	for x in allclusters:
		for y in x:
			if len(y)>1 and len(y)<100:
				clusterslong.append(y)
	print clusterslong
	clustersall=[]
	for i in clusterslong:
	  if i not in clustersall:
	    clustersall.append(i)
	print len(clustersall)
	
	print 'pass1'
	joinc=joindocuments(df1,df2)
	print 'pass1.a'
	undersampleddf1, undertest=joinc.joinsourcetarget(A,varydocument)
	joiner=join(undersampleddf1,'Clean tweet')
	print 'pas2'

	tainingt, testt=joiner.jointwodocuments(undersampleddf1,undertest,clustersall,4)

	print 'ta'
	


	ratiov=ratio(tainingt,'L')
	a=[ 'L', 'absPolarity', 'sentiment_polarity', 'sentiment_subjectivity']
	
	cols=['Clean tweet']
	try:
		for x in cols:
			del tainingt[x]
			del testt[x]
	except:
		pass
	headers_names=list(tainingt.columns.values)
	headers_names.remove('L')
	headers_names.append('L')
	tainingt=tainingt[headers_names] 
	testt=testt[headers_names]
	
	tainingt=tainingt.replace(['True','False'], [True,False])   
	testt=testt.replace(['True','False'], [True,False]) 
	TRAINING=tainingt.as_matrix(columns=None)
	A=str(A)	
	arff.dump( r'documents\Arff\cluster\trainning'+A+'.arff',TRAINING, relation="whatever", names=headers_names)
	TEST=testt.as_matrix(columns=None)	 
	arff.dump(r'documents\Arff\cluster\test'+A+'.arff',TEST, relation="whatever", names=headers_names)
Exemplo n.º 36
0
def domain(document,
           crossvalidationundersampling,
           ArffL,
           A=0,
           undersampler=0,
           sentiment=0):
    test = pd.read_csv('documents\csv\drunk\drunkTEXT400' + '.csv')
    test.L = test.L.replace(['y', 'n'], ['True', 'False'])
    df1 = pd.read_csv(document + '.csv')
    df1.L = df1.L.replace(['y', 'n'], ['True', 'False'])
    joinc = joindocuments(df1, df1)
    top = topwords(df1, 'Clean tweet', 100)
    main_domain = join(df1, 'Clean tweet')

    bigrams = ngrams(df1, 'Clean tweet')
    print 'bigrams'
    print bigrams.bigrams
    main_domain.joinall(bigrams.bigrams, 2)
    main_domain.joinall(top.top, 1)

    main_domain.df.to_csv('prueba.csv', index=False)
    ratiov = ratio(main_domain.df, 'L')
    ratios = ratiov.getoddratios(top.top)
    print 'ratios'
    print ratios
    ds = list(ratios.keys())
    testobject = join(test, 'Clean tweet')
    oddradiojoin = join(df1, 'Clean tweet')
    oddradiojoin.joinall(ds, 1)
    testobject.joinall(ds, 1)
    oddradiojoin.joinall(bigrams.bigrams, 2)
    testobject.joinall(bigrams.bigrams, 2)
    test = testobject.df
    cols = ['Clean tweet']
    if sentiment == 1:
        cols = [
            'Clean tweet', 'sentiment_polarity', 'sentiment_subjectivity',
            'absPolarity'
        ]

    try:
        for x in cols:
            del oddradiojoin.df[x]
            del test[x]
    except:
        pass
    #training, test=joinc.gettrainingandtestp(oddradiojoin.df)
    print 'matrix of elements to reduce'
    print "saul,", oddradiojoin.df.shape
    #########################################################
    if undersampler == 1:
        print "saul,", oddradiojoin.df.shape
        oddradiojoin.df = joinc.undersampling(oddradiojoin.df)
        print oddradiojoin.df.shape
    if A == 1:

        dftraining, dftest = pcaf(oddradiojoin.df, test)
        oddradiojoin.df = dftraining.join(oddradiojoin.df["L"])

        test = dftest.join(test["L"])

    print oddradiojoin.df.shape
    training = oddradiojoin.df

    training = training.replace(['True', 'False'], [True, False])
    test = test.replace(['True', 'False'], [True, False])
    training = training.astype(np.float64)
    test = test.astype(np.float64)
    training['L'] = training['L'].astype(bool)
    test['L'] = test['L'].astype(bool)
    A = str(A)
    sentiment = str(sentiment)
    oddradiojoin.df.to_csv('crossvalidation.csv', index=False)
    #undersampleddf1.to_csv(str(crossvalidationundersampling) +'\undersampling'+A+'.csv',index=False)
    headers_names = list(training.columns.values)
    headers_names.remove('L')
    headers_names.append('L')
    headers_names1 = list(test.columns.values)
    print headers_names, 'heathers test', headers_names1
    test = test[headers_names]
    training = training[headers_names]
    print 'training' + str(training.dtypes)
    test.to_csv(str(crossvalidationundersampling) + r'\test1' + A + '.csv',
                index=False)
    training.to_csv(str(crossvalidationundersampling) + r'\training1' + A +
                    '.csv',
                    index=False)
    TRAINING = training.as_matrix(columns=None)
    TEST = test.as_matrix(columns=None)
    print 'training'
    print training.dtypes

    arff.dump(ArffL + r'\trainingwu' + A + str(undersampler) + sentiment +
              '.arff',
              TRAINING,
              relation="whatever",
              names=headers_names)

    arff.dump(ArffL + r'\testwu' + A + str(undersampler) + sentiment + '.arff',
              TEST,
              relation="whatever",
              names=headers_names)
Exemplo n.º 37
0
def main():
    op = argparse.ArgumentParser(add_help = False)
    o = op.add_argument
    o("-?", "--help", action = "help"),
    o("-i", "--input", help = "input file")
    o("-c", "--cfile", help = "c output file")
    o("-C", "--comments", help = "keep comments", action = "store_true")
    o("-h", "--hfile", help = "h output file")
    o("-n", "--name", help = "module name")
    o("-p", "--prefix", help = "header guard prefix", default = "")
    o("-N", "--no-lines", action = "store_true",
        help = "don't generate #line directives")
    o("-s", "--sfile", help = "intermediate code output file")
    o("-j", "--join", nargs = "+", help = "files to join")
    o("-o", "--output", help = "source code output file")
    o("-t", "--ctypes", help = "ctypes output file")
    o("-T", "--terminal", action = "store_true")
    o("-d", "--dfile", help = "output api docs")
    o("-DT", "--debug-tokens")
    options = op.parse_args()

    p = None

    if options.terminal:
        terminal.run()
        exit(0)
    
    if options.join:
        if options.output:
            f = open(options.output, "wb")
        else:
            f = sys.stdout
        join.join(options.join, f)
    elif options.input:
        text = open(options.input, "rb").read().decode("utf8")

        p = Parser(options.input, text, comments = options.comments,
            options = options)
        try:
            p.parse()
        except MyError as e:
            print(e.value.encode("ascii", errors = "replace").decode("ascii"))
            exit(1)

        if not options.name:
            options.name = os.path.splitext(options.input)[0]
    else:
        print("No input file given with -i.")
        op.print_help()
        exit(1)

    if p and options.sfile:
        s = SWriter()
        code = s.generate(p)
        f = open(options.sfile, "wb")
        f.write(code.encode("utf8"))

    if p and options.dfile:
        d = dout.DWriter()
        code = d.generate(p, options.name)
        f = open(options.dfile, "wb")
        f.write(code.encode("utf8"))

    if p and (options.cfile or options.hfile):
        c = CWriter()
        try:
            code, header = c.generate(p, options.name, options.no_lines,
                options.prefix)
        except MyError as e:
            print(e)
            exit(1)
        if options.cfile:
            f = open(options.cfile, "wb")
            f.write(code.encode("utf8"))
        if options.hfile:
            f = open(options.hfile, "wb")
            f.write(header.encode("utf8"))

    if p and options.ctypes:
        w = CTypesWriter()
        code = w.generate(p)
        f = open(options.ctypes, "wb")
        f.write(code.encode("utf8"))
Exemplo n.º 38
0
def outer(self, right, **keys):
    '''
    '''
    return join(self, right, True, True, **keys)
Exemplo n.º 39
0
 def test_left_join(self):
     left = join(dogs, cats, 'left', 'name')
     assert len(left) == 4
Exemplo n.º 40
0
 def test_right_join(self):
     right = join(dogs, cats, 'right', 'name')
     assert len(right) == 6
Exemplo n.º 41
0
def right_outer(self, right, **keys):
    '''
    '''
    return join(self, right, False, True, **keys)
Exemplo n.º 42
0
def measure_join():
    s = join.join(int(1e6))
    print("{}".format(len(s)))
Exemplo n.º 43
0
print("retrieving nodes...\n")
circles = get_circlecontours(nodes)

rois, centers = get_maskedcontours(gray, circles)

print("labelling...\n")

labels = []
for roi in rois:
    x = retrieve_digits(roi)
    labels.append(x)

vertices = dict(zip(range(1, len(rois) + 1), labels))

print("vertices : ", vertices)

raw_adj = join(edges, centers, directed)
adj_matrix = []
for x in raw_adj:
    adj_matrix.append(
        (int(vertices[x]), [int(vertices[y]) for y in raw_adj[x]]))

print()
print("Printing Adjacency List....\n")
adj_matrix = dict(adj_matrix)
print(adj_matrix)

cv2.waitKey(0)
cv2.destroyAllWindows()
Exemplo n.º 44
0
def inner(self, right, **keys):
    '''
    '''
    return join(self, right, False, False, **keys)