示例#1
0
def df_sampling_ld(csv,n_samples):
    """
    Extract from the list a new list with n_samples domain names
    
    ARGS:
        - csv: path of csv where extract the samples
        - n_samples: number of samples to extract
    """
    
    #check the presence of file out
    if os.path.exists('sample_'+str(csv.split('/')[-1].split('.')[0])+'.csv'):
        print('removing old csv')
        os.remove('sample_'+str(csv.split('/')[-1].split('.')[0])+'.csv')
    else:
        print('no old csv exists')
      
    df = pd.read_csv(csv, low_memory=False,names=['domain'])
    print(df)
    df_out = df.sample(n_samples)
   
    df_out.to_csv('sample_'+str(csv.split('/')[-1].split('.')[0])+'.csv', header=None)
    
    df_final = pd.read_csv('sample_'+str(csv.split('/')[-1].split('.')[0])+'.csv', low_memory=False, names=['index','domain'], index_col=['index'])
    df_final_out = df_final.sort_values(by=['index'])
    print(df_final)
    df_final_out['domain'].to_csv('samplesort_'+str(csv.split('/')[-1].split('.')[0])+'.csv',header=None,index=False)
示例#2
0
def sampling_ld(csv,n_samples):
    
    """
    Extract from the list a new list with n_samples domain names.. dont'use it, error 'kill 9 ' too memery usage
    Use df_sampling_ld
    
    ARGS:
        - csv: path of csv where extract the samples
        - n_samples: number of samples to extract
    """
    
    #check the presence of file out
    if os.path.exists('sample_'+str(csv.split('/')[-1].split('.')[0])+'.csv'):
        print('removing old csv')
        os.remove('sample_'+str(csv.split('/')[-1].split('.')[0])+'.csv')
    else:
        print('no old csv exists')
    
    
    list_dom = extract_doml(csv)
    
    print('sampling...')
    new_list = random.sample(list_dom,n_samples)
    
    print('Writing..')
    with open('sample_'+str(csv.split('/')[-1].split('.')[0])+'.csv', mode='a') as csv_out:
        writer = csv.writer(csv_out)
        for x  in new_list:
            writer.writerrow([x])
示例#3
0
def lambda_handler(event, context):
    stream_name = "dsoaws-data-stream"
    s3 = boto3.client('s3')
    response = s3.get_object(
        Bucket="sagemaker-us-east-1-806570384721",
        Key="data/amazon_reviews_us_Digital_Software_v1_00_noheader.csv")
    csv = str(response['Body'].read().decode('UTF-8'))
    lines = csv.split("\n")
    for line in csv.split("\n"):
        val = line.split(",")
        data = json.dumps(getRating(val[0], val[1]))
        kinesis.put_record(StreamName=stream_name,
                           Data=data,
                           PartitionKey="reviews")
    return "complete"
示例#4
0
 def deserialize(self, s):
     unseenRecords = set(self.fields)
     for (name, strValue) in zip(self.fields, csv.split(s)):
         setattr(self, name, self.converters[name](strValue))
         unseenRecords.remove(name)
     for name in unseenRecords:
         setattr(self, name, self.defaults[name])
def get_yahoo_prices (exchange, ticker, start, end):
    yahoo_ticker = ticker.replace (".", "-")
    url = urltemplate  % (yahoo_ticker, exchange, start.day, start.month-1, start.year, end.day, end.month-1, end.year)
    try:
        csv = scraperwiki.scrape (url)
    except:
        print 'Cannot find %s:%s' % (exchange, ticker)
        return
    first = True
    alldata = []
    for line in csv.split("\n"):
        if first:
            headers = [h.replace (" ", "") for h in line.split (",")]
            first = False
        elif line == '':
            continue
        else:
            data = {}
            raw = line.split(",")
            data['Ticker'] = ticker
            data['Source'] = 'Yahoo'
            data['Exchange'] = exchange
            data['Date']     = convert_date_iso (raw[0])
            data['Open']     = float (raw[1])
            data['High']     = float (raw[2])
            data['Low']      = float (raw[3])
            data['Close']    = float (raw[4])
            data['Volume']   = float (raw[5])
            data['AdjClose'] = float (raw[6])
            alldata.append (data)
    scraperwiki.sqlite.save (unique_keys=["Ticker", "Source", "Exchange", "Date"], data=alldata)
def parse_csv_string(csv):
    csv_list = csv.split('\n')
    parsed = []
    for line in csv_list:
        if line != '':
            parsed.append(line.split(','))
    return parsed
示例#7
0
def get_intended_ratio(csv_name):

    csv = os.path.basename(csv_name)
    #ratio = float(csv.split('-')[-1][0:3].replace('_','.'))
    ratio = csv.split('-')[-1][0:3].replace('_', '.')
    print(ratio)
    return ratio
示例#8
0
def get_data (exchange, ticker, start, end):
    yahoo_ticker = get_ticker (ticker)
    url = url_template % (yahoo_ticker, exchange, start.day, start.month-1, start.year, end.day, end.month-1, end.year)
    try:
        csv = scraperwiki.scrape (url)
    except:
        print 'Cannot find %s:%s' % (exchange, ticker)
        return
    first = True
    alldata = []
    for line in csv.split("\n"):
        if first:
            headers = [h.replace (" ", "") for h in line.split (",")]
            first = False
        elif line == '':
            continue
        else:
            tds = line.split(',')
            data = \
                {
                'Source':'Yahoo',
                'Type':'',
                'Currency': None,
                'Ticker':ticker,
                'DeclarationDate':None,
                'ExDivDate': convert_date (tds[0]),
                'RecordDate':None,
                'PayDate':None,
                'Amount':float(tds[1]),
                'Url':url
                }
            alldata.append (data)
    scraperwiki.sqlite.save (unique_keys=["Source", "Ticker", "ExDivDate", "Type"], data=alldata)
示例#9
0
文件: dbi.py 项目: TingPing/Limnoria
 def deserialize(self, s):
     unseenRecords = set(self.fields)
     for (name, strValue) in zip(self.fields, csv.split(s)):
         setattr(self, name, self.converters[name](strValue))
         unseenRecords.remove(name)
     for name in unseenRecords:
         setattr(self, name, self.defaults[name])
示例#10
0
	def to_csv(self,data):
		email = '*****@*****.**'
		json = str(data)
		filename = 'json_csv'
		sys.stdout.write('Status: 200 OK\n')
		sys.stdout.write('Content-Type: text/csv; charset=utf-8\n')
		sys.stdout.write('Content-Disposition: attachment; filename=' + filename + '\n\n')
		url = 'https://json-csv.com/api/getcsv'
		post_fields = {'email': email, 'json': json}
		request = Request(url, urlencode(post_fields).encode())
		csv = urlopen(request).read().decode()
		for l in csv.split('\r\n'):
			print (l)


















		"""outputdata=open('output.cvs','w')
示例#11
0
 def CsvVerify(self, csv):
     separator = csv.split(".")
     for separator_field in separator:
         if separator_field == separator[(len(separator) - 1)]:
             if separator_field == "csv":
                 return csv
             else:
                 return False
示例#12
0
文件: api.py 项目: Kawue/vaidra
def get_csvs(dataset_name):
    csv_list = []
    for csv in os.listdir(app.config["CSV_FOLDER"]):
        e_name = dataset_name.split(".h5")[0]
        ce_name = csv.split("_")[0]
        if (ce_name in e_name):
            csv_list.append(csv)
    return json.dumps(csv_list)
示例#13
0
def reName():
    path = "E://scripts//4//pick-up-sku//"
    csvList = os.listdir(path)
    for csv in csvList:
        l = csv.split('-')
        l[7] = "400" + l[7]
        newName = path + "-".join(l)
        oldName = path + csv
        os.rename(oldName, newName)
示例#14
0
def main(dir):
    dir = dir.replace("\\", "/")
    # 图片转换成xml
    #convert_to_xml(dir)
    # xml提取信息到csv
    convert_to_csv(dir)
    # 处理csv生成新xml需要的属性值
    csv_paths = glob.glob(dir + "/*.csv")
    for csv in csv_paths:
        loc_list = deal_csv(csv)
        xml_origin_name = str(os.path.abspath(os.getcwd(
        ))) + '/xml_04_origin/' + csv.split("/")[-1][:-4] + '_origin.xml'
        xml_last_name = str(os.path.abspath(os.getcwd(
        ))) + '/xml_04_last/' + csv.split("/")[-1][:-4] + '_last.xml'
        try:
            generate_xml(xml_origin_name, xml_last_name, loc_list)
        except Exception as e:
            raise e
def createcsv(csv, f):
    try:
        dataLines = csv.decode().split('\r\n')
    except er:
        dataLines = csv.split('\r\n')
    with f:
        writer = csv.writer(f)

        for row in dataLines:
            writer.writerow(row)
    def analyseCSV(self):
        csvFiles = locate("*tif_RESULTS.csv",self.imgDir)
        for csv in csvFiles:
            (mean,stdev) = self.calcIntensity(csv)
            (bgMean,areaBG) = self.calcBGIntensity(csv)
            csvName = csv.split('/')[-1][:-12]
            print csvName, mean,bgMean

            ofile.write("\t".join([csv, csvName, str(mean),str(stdev),str(bgMean), str(areaBG),"\n"]))
            print mean, stdev, csvName
示例#17
0
def read_and_draw(list_csv, anno_path, read_path, save_path, type='bmp'):
    """
    标记预测框并保存
    :param anno_lines: 标记索引 list[ 'img_name','left','top','x1','y1','x2','y2','catagory' ]
    :param read_path: 原始图片路径
    :param save_path: 处理后保存路径
    :param type: 图片格式
    :return: None
    """
    list_csv = [csv for csv in list_csv if csv]
    for csv in tqdm(list_csv):
        anno_lines = read_csv(
            os.path.join(anno_path,
                         csv.split(".")[0] + '.csv'))  # 读取标注csv
        im = cv2.imread(os.path.join(read_path,
                                     csv.split(".")[0] + '.' + type))
        name, show_img = draw(im, anno_lines)
        # 保存
        cv2.imwrite(os.path.join(save_path, str(name) + '.' + type), show_img)
示例#18
0
def plotVSWR(csv, fig=None, freq_range=None, expect_linear=False, **kwargs):
    '''
    Plots the VSWR from a csv (assuming the csv contains log magnitude data).
    If fig is given then it will plot on that figure with the predefined axis/title/etc.  If label
    is given as one of the optional kwargs then the plot will be given this label, otherwise it will pull one from csv.  Will
    always plot in MHz, so ensure that if fig is given that it accounts for this.

    Parameters
    ---------
    csv : str
        The path/filename of the csv to be plotted.  It should lead to a csv file that was made
        using the field fox in the Log Magnitude setting.
    fig : matlotlib.pyplot.figure(), optional
        The input figure, to be plotted on.  If not given then a figure will be created.
    freq_range : tuple of floats
        The range for which to plot the frequencies.  (min_freq,max_freq), given in MHz.
    expect_linear : bool
        If true then the csv is expected to contain linear magnitudes.
    **kwargs
        Should contain only additional plotting kwargs, such as label.
    Returns
    -------
    fig : matlotlib.pyplot.figure()
        The plotted figure object.
    '''
    if fig is None:
        new_plot = True
        fig = pylab.figure()
    else:
        new_plot = False
    if numpy.isin('label', list(kwargs.keys())) == False:
        kwargs['label'] = 'VSWR For ' + csv.split('/')[-1].replace('.csv', '')

    if expect_linear:
        freqs, lin = readerFieldFox(csv)
        VSWR = linToVSWR(lin)
    else:
        freqs, LM = readerFieldFox(csv)
        VSWR = linToVSWR(logMagToLin(LM))

    if freq_range is not None:
        cut = numpy.logical_and(freqs / 1e6 > freq_range[0],
                                freqs / 1e6 < freq_range[1])
    else:
        cut = numpy.ones_like(freqs, dtype=bool)

    pylab.plot(freqs[cut] / 1e6, VSWR[cut], **kwargs)

    if new_plot == True:
        pylab.legend()
        pylab.xlabel('Frequency (MHz)')
        pylab.ylabel('VSWR')

    return fig
示例#19
0
def build_acq_date_list(in_csv_list):
    """Builds an array made up of julian dates paired with csv files."""
    print "Building acquisition date list..."
    acq_date_list = []
    for csv in in_csv_list:
        csv_filepath_list = csv.split('.')
        acq_date = csv_filepath_list[0][-7:-4]
        acq_year = csv_filepath_list[0][-11:-7]
        row = [acq_date, acq_year, csv]
        acq_date_list.append(row)
    return acq_date_list
示例#20
0
def qcewCreateDataRows(csv):
    dataRows = []
    try:
        dataLines = csv.decode().split('\r\n')
    except:
        dataLines = csv.split('\r\n')

    for row in dataLines:
        dataRows.append(
            list(map(lambda x: x.replace('\"', ''), row.split(','))))
    return dataRows
示例#21
0
def get_au_images(au):
    """
    Get au value for each frame for each user
    """
    au_path = "./data/bp4d/AU/Project/AU%d"%au
    csv_files = filter(lambda x: x.rsplit('.')[-1] == 'csv',  os.listdir(au_path))
    image_au_codes = dict()
    for csv in csv_files:
        meta, _ = csv.split('.')
        image_au_codes[meta] = get_frame_au_map( au_path + "/" + csv)
    return image_au_codes
示例#22
0
def build_julian_csv_array(in_csv_list):
    """Builds an array made up of julian dates paired with csv files."""
    print "Building acquisition date list..."
    julian_csv_array = []
    for csv in in_csv_list:
        csv_filepath_list = csv.split('.')
        acq_date = csv_filepath_list[0][-7:-4]
        acq_year = csv_filepath_list[0][-11:-7]
        row = [acq_date, acq_year, csv]
        julian_csv_array.append(row)
    return julian_csv_array
示例#23
0
def crawler():
    csvs = []

    # existing volume csv files by date
    volcsvs = list(
        set([
            volcsv.split('_')[0] for volcsv in listdir(DATA_DIR)
            if volcsv.endswith("csv")
        ]))

    headers = requests.utils.default_headers()
    headers.update({
        'User-Agent':
        'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0'
        # 'Referer': t.getRef()
    })

    try:
        r = requests.get(BASE_URL + VOLUME_URI, headers=headers,
                         verify=False)  #requestsを使って、webから取得
        # print(r.headers['Set-Cookie'])
        soup = BeautifulSoup(r.text, 'lxml')  #要素を抽出 (lxml)

        # csv file uri: a要素を全て取得
        allcsv = []
        for table in soup.find_all('table'):
            for tr in table.find_all('tr'):
                allcsv.extend([
                    a['href'] for a in tr.find_all('a', href=True)
                    if a['href'].endswith('.csv')
                ])

        # download and rename saving
        for csv in allcsv:
            csvfile = csv.split('/')[-1]
            if csvfile.split('_')[0] not in volcsvs:  # not downloaded yet
                print("sleeping 2s ...: " + BASE_URL + csv)
                sleep(2)
                response = requests.get(BASE_URL + csv)

                if response.ok:
                    csvpath = DATA_DIR + csvfile
                    csvs.append(csvpath)
                    with open(
                            csvpath,
                            mode='wb+') as f:  # write, binary, allow creation
                        f.write(response.content)

        return csvs

    except Exception as e:
        print("error: {0}".format(e), file=sys.stderr)
        exitCode = 2
示例#24
0
def insertdata(data, date):
    team1 = data[0]
    team1_id = get_team_id(team1)
    team1_players = data[1]
    team1_players_id = [str(i) for i in get_player_id(team1_players, data[13][:5])]
    
    team2 = data[2]
    team2_id = get_team_id(team2)
    team2_players = data[3]
    team2_players_id = [str(i) for i in get_player_id(team2_players, data[13][5:])]
    
    form = data[4]
    matchid = data[6]
    winner = data[7]
    
    #INSERT Match
    conn.execute("INSERT OR IGNORE INTO Match VALUES("+matchid+", '"+date+"', "+team1_id+", "+team2_id+", '"+winner.replace("'", "''")+"', '"+form+"', \
                  '"+team1_players_id[0]+"', '"+team1_players_id[1]+"', '"+team1_players_id[2]+"', '"+team1_players_id[3]+"', '"+team1_players_id[4]+"', \
                  '"+team2_players_id[0]+"', '"+team2_players_id[1]+"', '"+team2_players_id[2]+"', '"+team2_players_id[3]+"', '"+team2_players_id[4]+"', \
                    '"+create_string(data[9])+"', '"+create_string(data[10])+"','"+create_string(data[11])+"','"+create_string(data[12])+"')")

    for csv in get_file_names('C:\Users\QQ\Dropbox\csgo stats\database\csv'):
        #INSERT Round
        Map = csv.split("_")[1].split(".")[0]
        demo_data = read_csv('csv/'+csv)
        if len(demo_data[0]) < 10:
            continue
        
        demo_team = assign_players(demo_data[0], data[1]+data[3])
        demo_team1, demo_team2 = demo_team[:5], demo_team[5:]
        for row in demo_data[1]:
            if row[0] != '0' and int(row[5]) == 0 and int(row[6]) == 0:
                continue
            if row[26].lower() in demo_team1:
                team_won = team1_id
            elif row[26].lower() in demo_team2:
                team_won = team2_id
            else:
                print "UNABLE TO RECOGNIZE WINNING TEAM!"
                team_won = '-1'
            conn.execute("INSERT OR IGNORE INTO Round VALUES("+matchid+", '"+Map+"', '"+row[0]+"', '"+row[25]+"', "+team_won+",\
                        "+row[3]+", "+row[4]+", "+row[5]+", "+row[6]+", "+row[7]+", "+row[8]+", "+row[9]+", "+row[10]+","+row[11]+","+row[12]+", "+row[19]+","+row[20]+", '"+row[21]+"')") 
    
            #INSERT Playerstats
            pkills = [i.lower() for i in row[22].split(":")]
            pdeaths = [i.lower() for i in row[23].split(":")]
            passists = [i.lower() for i in row[24].split(":")]
            for p in range(5):
                conn.execute("INSERT OR IGNORE INTO Playerstats VALUES("+team1_players_id[p]+", "+matchid+", '"+Map+"', '"+row[0]+"', \
                            "+str(pkills.count( demo_team1[p] ))+","+str(pdeaths.count( demo_team1[p] ))+", "+str(passists.count( demo_team1[p] ))+")" ) 
                conn.execute("INSERT OR IGNORE INTO Playerstats VALUES("+team2_players_id[p]+", "+matchid+", '"+Map+"', '"+row[0]+"', \
                            "+str(pkills.count( demo_team2[p] ))+","+str(pdeaths.count( demo_team2[p] ))+", "+str(passists.count( demo_team2[p] ))+")" )
    conn.commit()
示例#25
0
 def __init__(self, csv):
     self.mapping = {
         'title': 6,
         'authors': 2,
         'year': 18,
         'abstract': 24,
         'doi': 11,
         'keywords': 10,
     }
     self.source = 'ACM'
     self.csv = csv.split('\r\n')
     self.author_delim = ' and '
     self.keyword_delim = ','
def findLatestExport(courseExportDir):
    exportFiles = os.listdir(courseExportDir)
    exportList = []
    for csv in exportFiles:
        if 'export' in csv:
            csvDate = csv.split('_')
            exportList.append(csvDate[-1])
    sortedList = sorted(exportList, reverse=True)

    for csv_r in exportFiles:
        if "_export_{}".format(sortedList[0]) in csv_r:
            print(sortedList[0])
            return os.path.join(courseExportDir, csv_r)
        else:
            return print('Previous export cannot be found.')
示例#27
0
 def __init__(self, csv):
     self.mapping = {
         'title': 0,
         'authors': 1,
         'year': 5,
         'abstract': 10,
         'doi': 13,
         'keywords': 16,
     }
     self.source = 'IEEE'
     from unidecode import unidecode
     csv = unidecode(unicode(csv, encoding="utf-8"))
     self.csv = csv.split('\n')
     self.author_delim = ';'
     self.keyword_delim = ';'
def createMultiX(directory,listOfCSV,listOfComb):
    titleList = []
    rTable = 'Resistance Table.csv'
    rt = {'Filename':filename,
          'Ohmic': olist,
          'NonOhmic' : nolist,
          'Tasr' : tasr,
          'Area Corrected Ohmic' : acohmic,
          'Area Corrected Non Ohmic' : acnonohmic,
          'Area Corrected Tasr' : actasr}
    dt = pd.DataFrame(data=rt)
    dt.to_csv(rTable,index = False)
    
    for csv in listOfCSV:
        csvSplitList = csv.split('_')
        print(csvSplitList)
        for val in csvSplitList:
            try:
                if val != 'aging' or val != 'preaging':
                    intTemp = (int(val))
                   
            except ValueError:
                print ('Was Not a Temp')
        stringName=(str(intTemp))
        titleList.append(stringName)

    dirpath = os.chdir(directory)
    fileN = "Multi X-Axis Support.csv"
    '''
    dataf = { '1': listOfComb[0]}
    df = pd.DataFrame(data=dataf)
    df.to_csv(fileN, index = False)
    for i in range(len(listOfComb)):

        
        df = pd.read_csv(fileN)
        df['test',i] = listOfComb[i]
        df.to_csv(fileN)
    '''
   
    for i in range(len(listOfComb)):
        if i ==0:
            dataf = {titleList[0] : listOfComb[i]}
            df = pd.DataFrame(data=dataf)
        if i !=0:
            head = titleList[i]
            df[head] = listOfComb[i]
            df.to_csv(fileN, index = False)
示例#29
0
def parseCSV( csv ):
    data = []
    lines = csv.split( "\n" )
    keys = lines [ 0 ].split( ',' )

    for l in lines:
        values = l.split( ',' )
        d = {}
        for k,v in zip( keys, values ):
            d[k] = v
        data.append( d )

    # The next two lines are dependant on the report we are using
    del data[0] # This line takes out the keys line from the data set
    del data[len(data) -1] # This line takes out a blank data line at the end
    return data
    def analyseCSV(self):
        csvFiles = locate("*tif_RESULTS.csv", self.imgDir)
        for csv in csvFiles:
            (mean, stdev) = self.calcIntensity(csv)
            (bgMean, areaBG) = self.calcBGIntensity(csv)
            csvName = csv.split('/')[-1][:-12]
            print csvName, mean, bgMean

            ofile.write("\t".join([
                csv, csvName,
                str(mean),
                str(stdev),
                str(bgMean),
                str(areaBG), "\n"
            ]))
            print mean, stdev, csvName
示例#31
0
 def file_finder(self, data):
     path = os.getcwd()
     self.result_path = os.path.join(path, data)
     reward_files = [files for files in glob.glob(os.path.join(path, data, 'result*'))]
     data_dict = {}
     self.modelnumber=None
     if len(reward_files)!=0:
         sorted_reward_files = []
         for i in range(len(reward_files)):
             csv = reward_files[i].split('result')[-1]
             sorted_reward_files.append(int(csv.split('.')[0]))
         sorted_reward_files.sort()
         data_dict['result'] =  'result' + str(sorted_reward_files[-1]) + '.csv'
         data_dict['traveltime']  = 'traveltime' + str(sorted_reward_files[-1]) + '.csv'
         #data_dict['algo_timer'] = 'algo_timer' + str(sorted_reward_files[-1]) + '.csv'
         self.modelnumber = sorted_reward_files[-1] + 10000
     print('MODEL NUMBER BEING USED IS: ', self.modelnumber)
     return data_dict, bool(data_dict)
示例#32
0
	def downloadData(self, symbol, range=None):
		""" 
		Return a csv object with symbol data.
		csv object is iterable by csv.reader
		"""

		#Holds the base Google finance URL
		base_data_url = "http://www.google.com/finance/historical?output=csv&q="
		#Create full URL for downloading desired data
		download_url = base_data_url + symbol

		if (range != None):
			download_url = download_url + "&startdate=" + range[0] + "&enddate=" + range[1]

		#Download
		csv = (requests.get(download_url)).content
		csv = csv.split() #format string to be iterable for csv.reader

		return csv
def generateSheets(listOfCSV, fileName):
    fileName = sg.PopupGetText("enter a name for the combined Excel file", 'Excel File Name')
    writer = pd.ExcelWriter(fileName+'.xlsx', engine = 'xlsxwriter') 
    for csv in listOfCSV:
        csvSplitList = csv.split('_')
        print(csvSplitList)
        for val in csvSplitList:
            try:
                if val != 'aging' or val != 'preaging':
                    
                    intTemp = (int(val))

            except ValueError:
                print ('Was Not a Temp')
        stringName=(str(intTemp))
        #createMultiX()
        df = pd.read_csv(csv)
        df.to_excel(writer, sheet_name=stringName)
    writer.save() 
示例#34
0
def parseHeaderLine(aLine):

    tokens = aLine.split()
    outFilename = tokens[1] + ".tsv"
    fhOut = open(outFilename, 'w')

    i1 = aLine.find("(")
    i2 = aLine.find(")")

    csv = aLine[i1 + 1:i2]
    tokens = csv.split(", ")
    print(tokens)

    nTokens = len(tokens)

    fhOut.write('\t'.join(tokens))
    fhOut.write('\n')

    return (nTokens, tokens, fhOut)
示例#35
0
 def convert_csvtojson(self, csv):
     try:
         array = csv.split('\n')
         header = array[0].split(',')
         ordered = []
         array.pop(0)
         i = 0
         for line in array:
             vals = line.split(',')
             ii=0
             orderedvals = OrderedDict()
             for val in vals:
                 orderedvals[header[ii]] = vals[ii]
                 ii += 1
             ordered.append(orderedvals)
             i += 1
         jsonstr = json.dumps(ordered) 
         return jsonstr
     except:
         return '{}'
示例#36
0
def Process(File_Name,Symbols):
    file = open(File_Name)
    fields = []
    entries = 0

    for line in file:
        current_line = line.strip()+"\n",

        if (current_line[0][0]=="_"):
            if (current_line[0].find(".")<0):

                #print current_line[0]
                csv = current_line[0].replace('\t',',').replace(' ',',').replace('\n','')
                field = csv.split(',')
                if field[0] in Symbols:
                    fields.append(field)
                    entries += 1
                #break
    #print entries, " Symbols Found"
    return fields
示例#37
0
def Process(File_Name, Symbols):
    file = open(File_Name)
    fields = []
    entries = 0

    for line in file:
        current_line = (line.strip() + "\n",)

        if current_line[0][0] == "_":
            if current_line[0].find(".") < 0:

                # print current_line[0]
                csv = current_line[0].replace("\t", ",").replace(" ", ",").replace("\n", "")
                field = csv.split(",")
                if field[0] in Symbols:
                    fields.append(field)
                    entries += 1
                # break
    print entries, " Symbols Found"
    return fields
示例#38
0
def no_repeats(csv, sci):
    #print(type(csv))
    #print('\n')

    #print('csv',10*" ",csv)
    #print('sci',10*" ",sci)
    csv=csv.replace('\""', '')
    csv=csv.replace('  ', ' ')
    sci=sci.replace('\""', '')
    sci=sci.replace('  ', ' ')
    new_csv=[]
    new_sci=[]
    final_draft=[]
    if not csv:
        #print ('final 1',sci)
        return sci
    if not sci:
        #print ('final 2',sci)
        return csv
    if sci in csv:
        #print('passed')
        return False
    if (sci + '.') in csv:
        #print('passed')
        return False

    #FIXME: I haven't been able to seperate properly if there are abbre.
    temp_csv=list(csv.split('. '))

    #print('csv 1',10*" ",temp_csv)
    #for ele in temp_csv:
        #if '.' in ele:
            #new_csv.append(ele.replace('.',''))
        #else:
            #new_csv.append(ele)

    temp_sci=list(sci.split ('. '))

    #for ele in temp_sci:
        #if '.' in ele:
            #new_sci.append(ele.replace('.',''))
        #else:
            #new_sci.append(ele)
    temp2_csv=[]
    for ele in temp_csv:
        temp2_csv.append(ele.lower())

    for ele1 in temp_sci:
        ele=ele1.lower()
        if ele not in temp2_csv:
            final_draft.append(ele1)

    final_draft=final_draft+temp_csv
    final_draft=str(final_draft).strip('[]')
    final_draft=final_draft.replace(',',', ')
    final_draft=final_draft.replace(',  ',', ')
    final_draft=final_draft.replace(r'\\','')#need two \\ to make it valid to read one
    final_draft=final_draft.replace(r'\\"', '')
    final_draft=final_draft.replace(r'\\\\', '')
    final_draft=final_draft.replace('.,','.')

    #comma.join(final_draft)
    #print(new_sci)
    final_draft=final_draft.replace('S, J., Costa, V., Psaroulis, V., Arzoglou, L,','S. J., Costa, V., Psaroulis, V., Arzoglou, L.')
    #print('final',10*" ",final_draft)

    return final_draft
def csv_to_dictionary(csv):
    '''
    Transform input from "special" csv into a python dictionary with namedtuples
    as values. Note, "strings" of interest are hardcoded!

    Also, fix the re-definition of the function transform(). See
    <http://stackoverflow.com/q/30204197/1172302>
    '''
    # split input in rows
    rows = csv.split('\n')
    dictionary = {}  # empty dictionary
    fields = rows.pop(0).split('|')[1:]  # header

    strings = ('TIRS10', 'TIRS11')
    if any(string in fields for string in strings):

        def transform(row):
            '''
            Transform an input row in to a named tuple, then feed it in to a
            dictionary.
            '''
            # split row in elements
            elements = row.split('|')

            # key: 1st column, replace
            key = replace_dot_comma_space(elements[0])

            # namedtuple
            ect = namedtuple(key, [fields[0], fields[1]])

            # feed namedtuples
            ect.TIRS10 = is_number(elements[1])
            ect.TIRS11 = is_number(elements[2])

            # feed dictionary
            dictionary[key] = dictionary.get(key, ect)

    strings = ('b0', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'b7')
    if any(string in fields for string in strings):

        def transform(row):
            '''
            Transform an input row in to a named tuple, then feed it in to a
            dictionary.
            '''
            # split row in elements
            elements = row.split('|')

            # key: 1st column, replace
            key = replace_dot_comma_space(elements[0])

            # *** small modification for the CWV field ***
            fields[0] = 'cwv'

            # named tuples
            cwv = namedtuple(key,
                             [replace_dot_comma_space(fields[0]),
                              replace_dot_comma_space(fields[1]),
                              replace_dot_comma_space(fields[2]),
                              replace_dot_comma_space(fields[3]),
                              replace_dot_comma_space(fields[4]),
                              replace_dot_comma_space(fields[5]),
                              replace_dot_comma_space(fields[6]),
                              replace_dot_comma_space(fields[7]),
                              replace_dot_comma_space(fields[8]),
                              replace_dot_comma_space(fields[9])])

            # feed named tuples
            cwv.subrange = to_tuple(elements[1])
            cwv.b0 = is_number(elements[2])
            cwv.b1 = is_number(elements[3])
            cwv.b2 = is_number(elements[4])
            cwv.b3 = is_number(elements[5])
            cwv.b4 = is_number(elements[6])
            cwv.b5 = is_number(elements[7])
            cwv.b6 = is_number(elements[8])
            cwv.b7 = is_number(elements[9])
            cwv.rmse = is_number(elements[10])
            dictionary[key] = dictionary.get(key, cwv)  # feed dictionary

    map(transform, rows)
    return dictionary
示例#40
0
    def post(self):
        if str(self.request.get('upload')) != 'True':
            check = ''
            filedisabled = 'disabled'
            inputdatadisabled = ''
            raw_data = self.request.get('inputdata')
            split_line = raw_data.split('\r\n')
            split_value = []
            for line in split_line:
                split_value.append(line.split(', '))
            data = []
            for line in split_value:
                new_line = []
                new_line.append(str(line[0]))
                new_line.append(float(line[1]))
                new_line.append(float(line[2]))
                data.append(new_line)
        else:
            check = 'checked'
            filedisabled = ''
            inputdatadisabled = 'disabled'
            csv = self.request.get('csv')
            split_line = csv.split('\r\n')[:-1]
            split_value = []
            for line in split_line:
                split_value.append(line.split(','))
            data = []
            for line in split_value:
                new_line = []
                new_line.append(line[0])
                new_line.append(float(line[1]))
                new_line.append(float(line[2]))
                data.append(new_line)
        self.response.out.write('''
<html>
    <head>
        <title>Graphing Results</title>
        <link href="http://ajax.googleapis.com/ajax/libs/jqueryui/1.8.15/themes/ui-lightness/jquery-ui.css" rel="stylesheet" type="text/css"/>
        <script src="//ajax.googleapis.com/ajax/libs/jquery/1.6.2/jquery.min.js" type="text/javascript"></script>
        <script src="http://ajax.googleapis.com/ajax/libs/jqueryui/1.8.15/jquery-ui.min.js"></script>
        <script type="text/javascript" src="https://www.google.com/jsapi"></script>
        <script type="text/javascript ">

        google.load('visualization', '1', {packages:['table']});
        google.load('visualization', '1', {packages:['corechart']});
        google.setOnLoadCallback(drawTable);
        google.setOnLoadCallback(drawColumnChart);

        var data = '''+str(data)+''';

        function drawTable() {
            var table = new google.visualization.DataTable();
            table.addColumn('string', 'Date');
            table.addColumn('number', 'Principal ($)');
            table.addColumn('number', 'Interest ($)');
            
            table.addRows(data.length);
            for (var i=0;i<data.length;i++) {
                table.setCell(i, 0, data[i][0]);
                table.setCell(i, 1, data[i][1]);
                table.setCell(i, 2, data[i][2]); }
            
            var formatter = new google.visualization.NumberFormat();
            formatter.format(table, 1);
            formatter.format(table, 2);
            
            var datatable = new google.visualization.Table(document.getElementById('table_div'));
            datatable.draw(table, {width:500}); }

        function drawColumnChart() {
            var coldata = new google.visualization.DataTable();
            coldata.addColumn('string', 'Date');
            coldata.addColumn('number', 'Principal');
            coldata.addColumn('number', 'Interest');
            
            coldata.addRows(data.length);
            for (var i=0;i<data.length;i++) {
                coldata.setValue(i, 0, data[i][0]);
                coldata.setValue(i, 1, data[i][1]);
                coldata.setValue(i, 2, data[i][2]); }
                
            var formatter = new google.visualization.NumberFormat({prefix:'$'});
            formatter.format(coldata, 1);
            formatter.format(coldata, 2);
            
            var columnchart = new google.visualization.ColumnChart(document.getElementById('columnchart_div'));
            columnchart.draw(coldata, {width:1500, height:700, title:'Cash Flow', isStacked:true,
                hAxis:{title:'Date'}, legend:'right', vAxis:{title:'Payment', format:'$#,###'}}); }

        function filecheck(){
            if (document.getElementById('uploadfield').checked == true) {
                document.getElementById('filefield').disabled = false;
                document.getElementById('inputdatafield').disabled = true; }
            else {
                document.getElementById('filefield').disabled = true;
                document.getElementById('inputdatafield').disabled = false; } }
        </script>
        <script>
            function home() {window.location="/"; }
            function logout() {location.replace("'''+users.create_logout_url("/")+'''"); }
            
            $(document).ready(function() {
                $("#tabs").tabs();
                $("button").button();
                $("#accordion1").accordion({active: false, collapsible: true});
                $("#accordion2").accordion({collapsible: true, autoHeight: false}); });
        </script>
    </head>
    <body style="font-size:62.5%;">
        <button type="button" onclick="home();");">Back to Home</button>
        <button type="button" onclick="logout();");">Sign Out</button>
        <div id="accordion1">
            <h3><a href="#">Cash Flow Grapher</a></h3>
            <div>
                <form action="/grapher" enctype="multipart/form-data" method="post">
                    <table style="text-align:center; font-size:12px;" align="center" cellpadding="5">
                        <tr><td>Input Data<br />(date, principal, interest)</td><td><textarea id="inputdatafield" name="inputdata" rows="10" cols="50" '''+inputdatadisabled+'''>'''+self.request.get('inputdata')+'''</textarea></td></tr>
                        <tr><td colspan="2"><p style="font-size:10px;">Every line requires all three values separated by commas. Enter 0 if there is no principal and/or interest.<br />
                        Make sure there is no extra empty line at end of dataset.</p></td></tr>
                        <tr><td>Upload CSV File <input id="uploadfield" onclick="filecheck();" type="checkbox" name="upload" value="True" '''+check+''' /></td>
                            <td><input id="filefield" type="file" name="csv" '''+filedisabled+''' /></td></tr>
                        <tr><td colspan="2"><p style="font-size:10px;">Must be a CSV file with three columns in this order: date, principal, interest.<br />
                        Remove all headers and extra data. Remove all number formatting.</p></td></tr>
                        <tr><td colspan="2"><button type="submit">Graph Cash Flow</button></td></tr>
                    </table>
                </form>
            </div>
        </div>
        <div id="accordion2">
            <h3><a href="#">Results</a></h3>
            <div>
                <div id="tabs">
                    <ul>
                        <li><a href="#table_div">Cash Flow</a></li>
                        <li><a href="#columnchart_div">Time Chart of Cash Flow</a></li>
                    </ul>
                    <div id="table_div" align="center"></div><br />
                    <div id="columnchart_div" align="center"></div>
                </div>
            </div>
        </div>
    </body>
</html>
        ''')
示例#41
0
    def post(self):
        if str(self.request.get('upload')) != 'True':
            check = ''
            filedisabled = 'disabled'
            inputdatadisabled = ''
            raw_data = self.request.get('inputdata')
            split_line = raw_data.split('\r\n')
            split_value = []
            for line in split_line:
                split_value.append(line.split(', '))
            data = []
            for line in split_value:
                new_line = []
                new_line.append(str(line[0]))
                new_line.append(float(line[1]))
                new_line.append(float(line[2]))
                data.append(new_line)
        else:
            check = 'checked'
            filedisabled = ''
            inputdatadisabled = 'disabled'
            csv = self.request.get('csv')
            split_line = csv.split('\r\n')[:-1]
            split_value = []
            for line in split_line:
                split_value.append(line.split(','))
            data = []
            for line in split_value:
                new_line = []
                new_line.append(line[0])
                new_line.append(float(line[1]))
                new_line.append(float(line[2]))
                data.append(new_line)
        self.response.out.write('''
        <html>
        <head>
        <title>Graphing Results</title>
        <style type="text/css">
        body {font-family:arial}
        </style>
        <script type="text/javascript" src="https://www.google.com/jsapi"></script>
        <script type="text/javascript ">

        google.load('visualization', '1', {packages:['table']});
        google.load('visualization', '1', {packages:['corechart']});
        google.setOnLoadCallback(drawTable);
        google.setOnLoadCallback(drawColumnChart);

        var data = '''+str(data)+''';

        function drawTable()
        {
            var table = new google.visualization.DataTable();
            table.addColumn('string', 'Date');
            table.addColumn('number', 'Principal ($)');
            table.addColumn('number', 'Interest ($)');
            table.addRows(data.length);
            for (var i=0;i<data.length;i++){
                table.setCell(i, 0, data[i][0]);
                table.setCell(i, 1, data[i][1]);
                table.setCell(i, 2, data[i][2]);}
            
            var formatter = new google.visualization.NumberFormat();
            formatter.format(table, 1);
            formatter.format(table, 2);
            var datatable = new google.visualization.Table(document.getElementById('table_div'));
            datatable.draw(table, {width:400, page:'enable', pageSize:60});
        }


        function drawColumnChart()
        {
            var coldata = new google.visualization.DataTable();
            coldata.addColumn('string', 'Date');
            coldata.addColumn('number', 'Principal');
            coldata.addColumn('number', 'Interest');
            coldata.addRows(data.length);
            for (var i=0;i<data.length;i++){
                coldata.setValue(i, 0, data[i][0]);
                coldata.setValue(i, 1, data[i][1]);
                coldata.setValue(i, 2, data[i][2]);}
                
            var formatter = new google.visualization.NumberFormat({prefix:'$'});
            formatter.format(coldata, 1);
            formatter.format(coldata, 2);
            var columnchart = new google.visualization.ColumnChart(document.getElementById('columnchart_div'));
            columnchart.draw(coldata, {width:1500, height:700, title:'Cash Flow', isStacked:true,
                                hAxis:{title:'Date'}, legend:'top', vAxis:{title:'Payment', format:'$#,###'}})
        }

        function filecheck(){
            if (document.getElementById('uploadfield').checked == true){
                document.getElementById('filefield').disabled = false;
                document.getElementById('inputdatafield').disabled = true;}
            else {
                document.getElementById('filefield').disabled = true;
                document.getElementById('inputdatafield').disabled = false;}}
        </script>
        </head>
        
        <body>
        <p><a href="/">Back to Home</a></p>
        <h1 style="text-align:center;">Cash Flow Grapher</h1>
        <form action="/grapher" enctype="multipart/form-data" method="post">
        <table align="center" cellpadding="10">
        <tr><th>Input Data<br />(date, principal, interest)</th><th><textarea id="inputdatafield" name="inputdata" rows="10" cols="50" '''+inputdatadisabled+'''>'''+self.request.get('inputdata')+'''</textarea></th></tr>
        <tr><td colspan="2"><p style="font-size:10px; text-align:center;">Every line requires all three values separated by commas. Enter 0 if there is no principal and/or interest.<br />
        Make sure there is no extra empty line at end of dataset.</p></td></tr>
        <tr><th>Upload CSV File <input id="uploadfield" onclick="filecheck();" type="checkbox" name="upload" value="True" '''+check+''' /></th>
            <th><input id="filefield" type="file" name="csv" '''+filedisabled+''' /></th></tr>
        <tr><td colspan="2"><p style="font-size:10px; text-align:center;">Must be a CSV file with three columns in this order: date, principal, interest.<br />
        Remove all headers and extra data from the file. Remove all number formatting.</p></td></tr>
        <tr><th colspan="2"><input type="submit" value="Graph Cash Flow" /></th></tr>
        </table>
        </form>
        <br /><hr />
        
        <h1>Results</h1>
        <a href="#outputdata">Output Data</a><br />
        <a href="#columnchart">Time Series Cash Flow Chart</a><br /><br />
        <a name="outputdata"><a href="#top">Back to Top</a></a>
        <h2>Cash Flow</h2>
        <div id="table_div"></div><br />
        <a name="columnchart"><a href="#top">Back to Top</a></a>
        <h2>Time Chart of Cash Flow</h2>
        <div id="columnchart_div"></div>
        </body>
        </html>
        ''')