示例#1
0
def write_full_link_data():
    from numpy import loadtxt
    from csv import reader as csvreader
    from json import dump
    from numpy import nan
    full_link_ids = loadtxt(filenames['full_link_ids'], dtype='int')
    
    V = []
    with open(filenames['data_trips'],'rb') as readfile:
        reader = csvreader(readfile)
        for line in reader:            
            V.append(map(float, [line[i-1] if bool(line[i-1]) else nan
                                 for i in full_link_ids]))
            print(reader.line_num)
    dump(V, open(filenames['full_link_trips'], 'wb'))

    V = []
    with open(filenames['data_traveltimes'],'rb') as readfile:
        reader = csvreader(readfile)
        for line in reader:
            V.append(map(float, [line[i-1] if bool(line[i-1]) else nan
                                 for i in full_link_ids]))
            print(reader.line_num)
    dump(V, open(filenames['full_link_traveltimes'], 'wb'))
    
    return None
示例#2
0
 def get_sheet_configuration(self, sheet_name):
     if sys.version > '3':  # If Python 3 or greater
         with open(os.path.join(self.input_name, sheet_name+'.csv'), encoding=self.encoding) as main_sheet_file:
             r = csvreader(main_sheet_file)
             heading_row = next(r)
     else:  # If Python 2
         with open(os.path.join(self.input_name, sheet_name+'.csv')) as main_sheet_file:
             r = csvreader(main_sheet_file, encoding=self.encoding)
             heading_row = next(r)
     if len(heading_row) > 0 and heading_row[0] == '#':
         return heading_row[1:]
     return []
示例#3
0
def main(argv):
    if len(argv) < 2:
        print "Usage: {0} <csv file>".format(argv[0])
        return

    filename = argv[1]
    csvfile = open(filename, "rb")
    csviter = csvreader(csvfile, delimiter=',')

    mintime = None

    for line in csviter:

        time = line[0]
        channelA = line[1]
        channelB = line[2]

        current = channelToCurrent(channelA)
        voltage = channelToVoltage(channelB)
        power = current * voltage

        if not mintime:
            mintime = int(time)
        offsettime = int(time) - mintime

        output = [
            time, channelA, channelB, current, voltage, power, offsettime
        ]

        print ','.join([str(x) for x in output])

    csvfile.close()
示例#4
0
def read_file(csvfilename):
    """
    Read the csv file.  Each line of the file is a leg.  A trip is N
    consecutive rows.  All the legs in a trip will have the same
    RECORD id (and PRICE and DURATION, which is the length of the
    total trip in minutes).
    """
    trips={}
    with open(csvfilename) as csvfile:
        csvdata = csvreader(csvfile)
        header=None
        for row in csvdata:
            if len(row) == 0:
                continue
            if row[0] == 'RECORD':
                header=row
            else:
                # this row is the leg of a trip
                leg = {}
                for (fld, val) in zip(header, row):
                    leg[fld] = val
                if leg['RECORD'] not in trips:
                    trips[leg['RECORD']] = Trip(leg['RECORD'])
                trips[leg['RECORD']].add_leg(leg)
    return trips
示例#5
0
def Phase2(link_id_list, trips=TRIPS):
    from numpy import array, nan, loadtxt, isnan
    from csv import reader as csvreader
    W = loadtxt(filenames['W_trips'])    # W = 8760 x 50
    print 'W read', W.shape
    HT = []
    link_id_list = sorted(link_id_list)
    reader = csvreader(open(filenames['data_trips_transpose'], 'rb'))
    link_id_old = 0
    E = []
    for link_id in link_id_list:
        for skip in range(link_id_old,link_id):
            trend = reader.next()
        link_id_old = link_id
        trend = [float(entry) if bool(entry) else nan for entry in trend]
        if sum(~isnan(trend)) == 0:
            continue
        # trend should have length 8760 with NaNs
        red_dots, = plot(range(1,49),trend[0+24*24:48+24*24],'ro')
        coeffs, error = find_decomposition(trend, W)
        E.append(error)
        print link_id,'\t', error
	with open('./Phase2_results.txt','ab') as writefile:
	    writefile.write(str(link_id)+'\t'+str(error)+'\n')
        HT.append(coeffs)
    with open('Errors.csv','wb') as Errorfile:
        Errorfile.write(','.join(map(str,link_id_list)))
        Errorfile.write('\n')
        Errorfile.write(','.join(map(str,E)))
    return array(HT).T, red_dots # This is H for link_id_list
示例#6
0
def applyTyp(val: str, typ: type) -> object:
    # execute val=typ(val)
    if ismodule(typ):
        typ = typ.__name__

    typ = TYPE_ALIASES.get(typ, typ)
    if typ is bytes:
        return val
    if typ is bool:
        return val == b'True'
    elif typ is str:
        if type(val) in (bytes, bytearray):
            val = val.decode()
        # remove trailing '":
        if len(val) and (val[0] == '"' and val[-1] == '"'
                         or val[0] == "'" and val[-1] == "'"):
            val = val[1:-1]
        return val
    elif typ == 'json':
        return json.loads(val)
    elif typ == 'csv':
        if type(val) is bytes:
            val = val.decode()
        if len(val) == 1:
            return []  # val is empty
        out = list(csvreader(val.split('\n')))
        if len(out) == 1:
            out = out[0]
        return out
    return typ(val)
示例#7
0
def get_dataset_csv(csv_to_path, casc_path, min_face_dim=(200, 200)):
    face_detector = FaceDetector(casc_path, min_face_dim=min_face_dim)
    pictures = []
    labels = []
    with open(csv_to_path, 'r') as csvfile:
        dataset = csvreader(csvfile, delimiter="|")
        for row in dataset:
            if len(row) == 2:
                image = imread(row[0])
                image = cvtColor(image, COLOR_RGB2GRAY)
                face = face_detector.detect(image)
                if len(face) == 1:
                    x, y, w, h = face[0]
                    pictures.append(image[y:y + h, x:x + w])
                    imshow("Adding faces to traning set...", image[y:y + h,
                                                                   x:x + w])
                    waitKey(50)
                    labels.append(int(row[1]))
                else:
                    print(
                        "Warning: Invalid detection on {}, {} faces detected".
                        format(row[0], len(face)),
                        file=stderr)
            else:
                raise ParserExecption("Your csv seems to be uncorrect.")
    destroyAllWindows()
    return pictures, labels
示例#8
0
def the_loop(proc, args):
    from time import sleep
    data = map(lambda a: [0,0,0,0], range(args['n']))

    while True:
        i = 0
        got_something = False
        for p in proc:
            from Queue import Empty
            l = p.get_nowait()
            if l != None:
                from StringIO import StringIO
                from csv import reader as csvreader
                f = StringIO(l)
                r = csvreader(f, delimiter=',')
                for row in r:
                    data[i] = map(lambda s: float(s), row)
                    #print "From process %i:" % i, data[i]
                got_something = True
            i += 1
        if not got_something:
            sleep(0.6)
        else:
            #print map(lambda l: (0,0,0,0,0) if len(l) < 5 else (l[2], l[2] - l[3], l[4]), data)
            if args['o'] > 0:
                if sum(map(lambda l: 0 if len(l) < 5 else l[4], data)) > args['o']:
                    print 'Reached %i operations, exiting...' % args['o']
                    return
示例#9
0
def _get_clearpond():
    """Get CLEARPOND neighbourhood density data as a dict.

    The method is :func:`~.utils.memoized` since it is called so often.

    Returns
    -------
    dict
        `Dict` with two keys: `orthographic` and `phonological`. `orthographic`
        contains a dict associating words to their orthographic neighbourhood
        density (CLEARPOND's `OTAN` column). `phonological` contains a dict
        associating words to their phonological neighbourhood density
        (CLEARPOND's `PTAN` column).

    """

    logger.debug('Loading Clearpond data')

    clearpond_orthographic = {}
    clearpond_phonological = {}
    with open(settings.CLEARPOND, encoding='iso-8859-2') as csvfile:
        reader = csvreader(csvfile, delimiter='\t')
        for row in reader:
            word = row[0].lower()
            if word in clearpond_phonological:
                raise Exception("'{}' is already is Clearpond phonological "
                                'dictionary'.format(word))
            if word in clearpond_orthographic:
                raise Exception("'{}' is already is Clearpond orthographic "
                                'dictionary'.format(word))
            clearpond_orthographic[word] = int(row[5])
            clearpond_phonological[word] = int(row[29])
    return {'orthographic': clearpond_orthographic,
            'phonological': clearpond_phonological}
示例#10
0
def the_loop(proc, args):
    from time import sleep
    data = map(lambda a: [0, 0, 0, 0], range(args['n']))

    while True:
        i = 0
        got_something = False
        for p in proc:
            from Queue import Empty
            l = p.get_nowait()
            if l != None:
                from StringIO import StringIO
                from csv import reader as csvreader
                f = StringIO(l)
                r = csvreader(f, delimiter=',')
                for row in r:
                    data[i] = map(lambda s: float(s), row)
                    #print "From process %i:" % i, data[i]
                got_something = True
            i += 1
        if not got_something:
            sleep(0.6)
        else:
            #print map(lambda l: (0,0,0,0,0) if len(l) < 5 else (l[2], l[2] - l[3], l[4]), data)
            if args['o'] > 0:
                if sum(map(lambda l: 0
                           if len(l) < 5 else l[4], data)) > args['o']:
                    print 'Reached %i operations, exiting...' % args['o']
                    return
示例#11
0
def applyTyp(val: str, typ: type) -> object:
    # execute val=typ(val)
    #     print(val, typ, 8888888, val == b'True', TYPE_ALIASES.get(typ, typ))
    typ = TYPE_ALIASES.get(typ, typ)
    if typ is bytes:
        return val
    if typ is bool:
        #         if isinstance(val, str):
        # bool(s) where with len() >0 always returns True, therefore:
        return val == b'True'
    elif typ is str:
        if type(val) in (bytes, bytearray):
            val = val.decode()
        # remove trailing '":
        if len(val) and (val[0] == '"' and val[-1] == '"'
                         or val[0] == "'" and val[-1] == "'"):
            val = val[1:-1]
        return val
    elif typ == 'json':
        return json.loads(val)
    elif typ == 'csv':
        if type(val) is bytes:
            val = val.decode()
        if len(val) == 1:
            return []  # val is empty
        out = list(csvreader(val.split('\n')))
        if len(out) == 1:
            out = out[0]
        return out
    return typ(val)
示例#12
0
def find_catids_early(datafile_path):
	"""Scan the CSV file to learn what cat IDs it contains."""

	catid_regex = '^(M|F)[\d]+$'

	with open(datafile_path, 'rb') as datafile:
		csvrows = csvreader(datafile)
		seen = dict()
		catids = list()
		for csvrow in csvrows:
			try:
				if csvrow[1] in seen:
					continue

				regex_match = re.match(catid_regex, csvrow[1], re.I)
				if regex_match:
					catids.append(csvrow[1])
					seen[csvrow[1]] = 1
			except IndexError:
				continue

	if len(catids) > 0:
		return catids
	else:
		return False
示例#13
0
        def get_data():
            """
            Opens a menu to select the openPO report text file.  
            The file is then copied and the contents returned as a 
            list of rows.
            """
            file = filedialog.askopenfilename(
                title="Location of openPO",
                initialdir=r"%USER%\Desktop",
                filetypes=[
                    ("Plain Text", "*.txt"),
                    ("CSV", "*.csv"),
                ],
            )

            if file:
                copied = copy_file(file)

            data = []
            with open(file, "r") as csvfile:
                reader = csvreader(csvfile, delimiter="\t")
                for row in reader:
                    data.append(row)

            return data
示例#14
0
 def testversion(self):
     """открыть файл на чтение и найти там строку ;FILE_FORMAT=1, ;FILE_FORMAT=2
     если ни одна строка не будет найдена, проверить следующие три предположения
     1. может быть файл пуст
     2. в файле нет программы
     3. файл содержит некорректный формат
     """
     try:
         self.format = None
         file = open(self.path2prg, 'r')
         for line in file:
             if ";FILE_FORMAT=1" in line:
                 self.format = "v1"
             elif ";FILE_FORMAT=2" in line:
                 self.format = "v2"
         file.close()
         if self.format==None: #возможно, это prg v1 без заголовка
             reader = csvreader(open(self.path2prg, 'r'), delimiter=" ", skipinitialspace=True)
             row = reader.__next__()
             if (len(row)>=5):
                 print("prg v1?")
                 self.format="v1"
     except OSError as err:
         print(err)
     return self.format
示例#15
0
def find_catids_early(datafile_path):
    """Scan the CSV file to learn what cat IDs it contains."""

    catid_regex = '^(M|F)[\d]+$'

    with open(datafile_path, 'rt') as datafile:
        csvrows = csvreader(datafile)
        seen = dict()
        catids = list()
        for csvrow in csvrows:
            temp_catid = csvrow[int(cfg_data_column_catid)]
            try:
                if temp_catid in seen:
                    continue

                regex_match = re.match(catid_regex, temp_catid, re.I)
                if regex_match:
                    catids.append(temp_catid)
                    seen[temp_catid] = 1
            except IndexError:
                continue

    if len(catids) > 0:
        return catids
    else:
        return False
示例#16
0
def ReadObstaclesGeomsFile(csv_file):
    # mach_name, polyline = ([] for i in range(2))
    mach_geoms = dict()
    with open(
            csv_file,
            'r') as csvfile:  # Python 2 : with open(csv_file, 'rb') as csvfile
        reader = csvreader(csvfile, delimiter=',')
        # Skip the first row of the CSV file
        next(reader)
        for row in reader:
            mach_name, corner_num, x, y = [r for r in row[:4]]
            if not mach_name in mach_geoms:
                mach_geoms[mach_name] = {}
                mach_geoms[mach_name]['vertices'] = [[float(x), float(y)]]
            else:
                mach_geoms[mach_name]['vertices'].append([float(x), float(y)])
        for mach_name in mach_geoms.keys():
            corners_coords = mach_geoms[mach_name]['vertices']
            corners_coords.append(
                corners_coords[0])  # Add a copy of the first point
            mach_geoms[mach_name]['vertices'] = array(
                corners_coords)  # Add a copy of the first point
            mach_geoms[mach_name]['polygon'] = Polygon(
                mach_geoms[mach_name]['vertices'])
    return mach_geoms
示例#17
0
    def scheduleParser(self):

        with open(self.ui.fileLocation_2.text(), "r") as f:
            reader = csvreader(f)

            next(reader)
            parsedSchedule = []

            for row in reader:

                startTime = parser.parse(row[1])

                startTime = (startTime.hour * 3600) + (startTime.minute * 60)

                endTime = parser.parse(row[2])

                endTime = (endTime.hour * 3600) + (endTime.minute * 60)

                parsedSchedule.append((row[0], startTime, endTime, row[3].split(","), row[4].split(",")))

            f.close()
        reformattedSchedule = {}

        for ps in parsedSchedule:
            for pid in ps[3]:

                if str(pid) not in reformattedSchedule:
                    reformattedSchedule[str(pid)] = []

                for day in ps[4]:
                    reformattedSchedule[str(pid)].append((ps[0], ps[1], ps[2], day))

        return reformattedSchedule
示例#18
0
def get_mp_data():
    with open("antimp.csv") as fp:
        reader = csvreader(fp)
        li = list(reader)
    for i in li:
        i.append(0)
    return li
示例#19
0
 def testversion(self):
     """открыть файл на чтение и найти там строку ;FILE_FORMAT=1, ;FILE_FORMAT=2
     если ни одна строка не будет найдена, проверить следующие три предположения
     1. может быть файл пуст
     2. в файле нет программы
     3. файл содержит некорректный формат
     """
     try:
         self.format = None
         file = open(self.path2prg, 'r')
         for line in file:
             if ";FILE_FORMAT=1" in line:
                 self.format = "v1"
             elif ";FILE_FORMAT=2" in line:
                 self.format = "v2"
         file.close()
         if self.format == None:  #возможно, это prg v1 без заголовка
             reader = csvreader(open(self.path2prg, 'r'),
                                delimiter=" ",
                                skipinitialspace=True)
             row = reader.__next__()
             if (len(row) >= 5):
                 print("prg v1?")
                 self.format = "v1"
     except OSError as err:
         print(err)
     return self.format
示例#20
0
def read_cac_scores(filename):
    scores = dict()
    with open(filename) as csvfile:
        for i, row in enumerate(csvreader(csvfile)):
            if i == 0:
                continue  # skip first row = header
            scores[row[1].strip()] = float(row[-4].strip())
    return scores
示例#21
0
 def nameFromSymbol(symbol):
     FILE_PATH = "data/fullCompanyList.csv"
     with open(FILE_PATH, "r") as f:
         companies = csvreader(f)
         for company in companies:
             if company[0] == symbol:
                 return company[1]
     return None
示例#22
0
def load_element_data():
    element_file = open(ROOT_DIR + 'data/element.csv', 'rb')
    reader = csvreader(element_file, delimiter='\t')
    element_data = []
    for row in reader:
        element_data.append([value for value in row])
    element_file.close()
    return element_data
示例#23
0
def load_element_data():
    element_file = open(ROOT_DIR + 'data/element.csv', 'r')
    reader = csvreader(element_file, delimiter='\t')
    element_data = []
    for row in reader:
        element_data.append([value for value in row])
    element_file.close()
    return element_data
示例#24
0
 def get_sheet_configuration(self, sheet_name):
     with open(os.path.join(self.input_name, sheet_name + ".csv"),
               encoding=self.encoding) as main_sheet_file:
         r = csvreader(main_sheet_file)
         heading_row = next(r)
     if len(heading_row) > 0 and heading_row[0] == "#":
         return heading_row[1:]
     return []
    def f_training_data(self):
        training_data = []
        sentiment = []
        tweets = []

        #!!!!!!!!!!!!!!!!!!Better way to use CSV file!!!!!!!!!!!!!!!!!

        #    x,y = numpy.loadtxt('training_2.txt', delimiter=',', unpack=True)

        #********************Gets the total number of rows***************
        #using csv.reader() as csvreader
        check_length = csvreader(
            open('data/training_tennis.csv', encoding='latin-1'))
        length = sum(1 for row in check_length)  #-500
        print(length)
        #********************Saves the Tweets and sentiment**************
        raw_data = csvreader(open('data/training_tennis.csv',
                                  encoding='latin-1'),
                             delimiter=",")
        count = 0
        for row in raw_data:
            if count == 800:
                break
            tweets.append(row[1])
            sentiment.append(row[0])
            count += 1

##        print(self.tweets)
        processed_tweets = self.f_process_tweets(tweets)
        ##        print(self.trprocessed_tweets)
        ##        print(len(self.trprocessed_tweets), len(self.trsentiment))

        for i in range(len(processed_tweets)):

            #to convert it in [([words],positive),([words],positive)] format
            temp = (processed_tweets[i], sentiment[i])

            #to convert it in [[[words],positive],[[words],positive]] format
            ##        temp=[]
            ##        temp.append(self.trprocessed_tweets[i])
            ##        temp.append(self.trsentiment[i])
            training_data.append(temp)
##        print(self.training_data)
        random.shuffle(training_data)
        return training_data
示例#26
0
    def test_group_sample_ids(self):
        sample_response = join('tests', 'data', 'WQP', 'webservice.csv.as.txt')
        with open(sample_response, 'r') as f:
            wqp_service_csv = csvreader(f)

            unique_sample_ids = self.patient._group_rows_by_id(wqp_service_csv)

            self.assertEqual(len(list(unique_sample_ids.keys())), 1)
            self.assertEqual(len(unique_sample_ids['nwisaz.01.00000154']), 3)
            self.assertTrue('nwisaz.01.00000154' in unique_sample_ids)
示例#27
0
def csv_to_list(csv_file, delimiter=','):
    """
    Reads in a CSV file and returns the contents as list,
    where every row is stored as a sublist, and each element
    in the sublist represents 1 cell in the table.

    """
    with open(csv_file, 'rb') as csv_con:
        reader = csvreader(csv_con, delimiter=delimiter)
        return list(reader)
示例#28
0
def read_input(inputfile):
    '''
    Read input CSV into NumPy array and transpose. Assumes last column in the
    input data is for classification and is ignored.
    '''
    l = []
    with open(inputfile) as f:
        for line in csvreader(f, delimiter=',', quotechar='"'):
            l.append(list(float(x) for x in line[:-1]))
    return np.array(l).T
    def f_testing_data(self):
        testing_data = []
        sentiment = []
        tweets = []

        #********************Gets the total number of rows***************
        #using csv.reader() as csvreader
        check_length = csvreader(
            open('data/training_tennis.csv', encoding='latin-1'))
        length = sum(1 for row in check_length) - 800
        print(length)
        #********************Saves the Tweets and sentiment**************
        raw_data = csvreader(open('data/training_tennis.csv',
                                  encoding='latin-1'),
                             delimiter=",")
        count = 0
        for row in raw_data:
            if raw_data.line_num > length:
                ##               print(raw_data.line_num)
                if count == 100:
                    break
                tweets.append(row[1])
                sentiment.append(row[0])
                count += 1
##        print(self.tetweets)
        processed_tweets = self.f_process_tweets(tweets)
        ##        print(self.teprocessed_tweets)
        ##    print(len(self.teprocessed_tweets), len(self.tesentiment))

        for i in range(len(processed_tweets)):

            #to convert it in [([words],positive),([words],positive)] format
            temp = (processed_tweets[i], sentiment[i])

            #to convert it in [[[words],positive],[[words],positive]] format
            ##        temp=[]
            ##        temp.append(self.teprocessed_tweets[i])
            ##        temp.append(self.tesentiment[i])
            testing_data.append(temp)
##        print(self.testing_data)
##    random.shuffle(testing_data)
        return testing_data
示例#30
0
    def scheduleChecker(self):
        with open(self.ui.fileLocation_2.text(), "r") as f:
            reader = csvreader(f)

            next(reader)

            strError = ""
            i = 2

            for row in reader:

                cell = "Row: " + str(i) + " Column: 1"

                try:
                    strError = strError + self.verifyShiftID(row[0], cell)
                except:
                    strError = strError + "Incorrect Syntax in Schedule File. Check " + cell + "\n"

                cell = "Row: " + str(i) + " Column: 2"
                try:
                    strError = strError + self.verifyStartTime(row[1], cell)
                except:
                    strError = strError + "Incorrect Syntax in Schedule File. Check " + cell + "\n"

                cell = "Row: " + str(i) + " Column: 3"
                try:
                    strError = strError + self.verifyEndTime(row[2], cell)
                except:
                    strError = strError + "Incorrect Syntax in Schedule File. Check " + cell + "\n"

                cell = "Row: " + str(i) + " Column: 4"
                try:
                    strError = strError + self.verifyPorterID(row[3], cell)
                except:
                    strError = strError + "Incorrect Syntax in Schedule File. Check " + cell + "\n"

                cell = "Row: " + str(i) + " Column: 5"
                try:
                    strError = strError + self.verifyDays(row[4], cell)
                except:
                    strError = strError + "Incorrect Syntax in Schedule File. Check " + cell + "\n"

                i = i + 1

                if strError != "":
                    break

            f.close()

        if strError == "":
            return strError
        else:
            strError = "*****SCHEDULE DATA FILE ERROR*****\n" + strError
            return strError
示例#31
0
def read_input(infile):
    '''
    Read input file into two arrays
    '''
    a, b = [], []
    with open(infile) as f:
        for line in csvreader(f, delimiter=',', quotechar='"'):
            if line:
                a.append([float(x) for x in line[:-1]])
                b.append(line[-1])
    return np.array(a).T, np.array(b)
示例#32
0
def read_uikparams():
    data = csvreader(open('uikparams.csv'), delimiter=',')
    next(data, None)  # skip the headers
    
    Row = namedtuple('row', 'tik, raion, uik, voters, mn_in, mn_out, koib, addr_vote, place, doma, phone, url, uikpage, addr_komissii, phone_k')
    for row in data:
        try:
            x = Row(*row[:len(Row._fields)])
        except:
            print(row)
        yield x.uik, dict(x._asdict(), koib = 'KOIB' if x.koib != '0' else '')
示例#33
0
def loadcsv(filename):
    try:
        with open(filename, 'r') as fp:
            reader = csvreader(fp)
            dataset = list(reader)
            for i in range(len(dataset)):
                dataset[i] = [float(x) for x in dataset[i]]

            return dataset
    except:
        main()
示例#34
0
def doodle(doodle):
    name = ""
    tmpdates = list()
    with open(doodle, 'r') as csvdoodle:
        csvcontent = csvreader(csvdoodle)
        rownum = 1
        for row in csvcontent:
            row = str(row)
            if rownum == 1:  # read in name of doodle
                name = row.split(";")[0][8:-1]
                Trainingsday = Day(name)
            elif rownum == 4:  # read in dates of that doodle
                rowsplit = row[3:-2].split(";")
                last = ""
                for training in rowsplit:
                    if not training == "":
                        training = training.split(" ")[0]
                    else:
                        training = last
                    tmpdates.append(training)
                    last = training
            elif rownum == 5:
                rowsplit = row[3:-2].split(";")
                for day in range(len(rowsplit)):
                    month = tmpdates[day]
                    Trainingsday.trainings.append(
                        Training(str(rowsplit[day] + "." + month)))
            elif rownum >= 6:  # read in TL and their preferences
                if row[2:7] == 'Count':
                    break
                else:
                    discpart = row[2:-2].split(" ")[0].replace(':', '')
                    TLname = row[(3 + len(discpart)):-2].split(";")[0]
                    TLdiscs = discpart.split("/")
                    if len(TLdiscs) > 1:
                        TLname = TLname + "*"
                    for i in range(len(Trainingsday.trainings)):
                        wish = row[2:-2].split(";")[i + 1]
                        if wish != "":
                            for attr, value in Trainingsday.trainings[
                                    i].__dict__.items():
                                for dis in TLdiscs:
                                    dis = dis.upper()
                                    if attr == dis and wish == "OK":
                                        value.append(TLname + ",\n")
                                    elif attr == 'CAN' and wish == "(OK)":
                                        disz = ""
                                        for d in TLdiscs:
                                            disz = disz + d
                                        value.append(
                                            (disz + " " + str(TLname) + ",\n"))
            rownum += 1
    return Trainingsday
def load_raw_data_from_csv(filename):
    prices = []
    filename = filename if filename[:5] == 'data/' else 'data/{}'.format(
        filename)
    with open(filename, 'r') as f:
        f = csvreader(f, delimiter='\t')
        for i, line in enumerate(f):
            if i == 0:  # Header
                continue
            # date, price_o, price_h, price_l, price_c, volume, mkt_cap
            prices.append(line)
    return prices
示例#36
0
    def get_csv(url):
        response = get(url)
        response.raise_for_status()

        try:
            print('query completed in {}'.format(response.elapsed))
            print('new sites found {}'.format(response.headers['total-site-count']))
            print('new results found {}'.format(response.headers['total-result-count']))
        except:
            pass

        return csvreader(response.text.splitlines())
示例#37
0
def main(argv):
    if len(argv) < 3:
        print 'Usage: {0} <filename> <window size>'.format(argv[0])
        return

    filename = argv[1]
    window_size = int(argv[2])

    csvfile = open(filename, 'rb')
    csviter = csvreader(csvfile, delimiter=',')

    current_win = []
    voltage_win = []
    time_win = []

    for line in csviter:
        if len(line) < 7:
            print 'use add_power.py before'
            return

        time = int(line[0])
        channelA = int(line[1])
        channelB = int(line[2])

        current = float(line[3])
        voltage = float(line[4])
        power = float(line[5])
        offsettime = int(line[6])

        if len(current_win) >= window_size:
            current_med = median(current_win)
            voltage_med = median(voltage_win)
            time_med = median(time_win)

            new_current_win = current_win[1:]
            new_voltage_win = voltage_win[1:]
            new_time_win = time_win[1:]

            current_win = new_current_win
            voltage_win = new_voltage_win
            time_win = new_time_win

            power_med = current_med * voltage_med

            output = [time_med, current_med, voltage_med, power_med]

            print ','.join(str(x) for x in output)

        current_win.append(current)
        voltage_win.append(voltage)
        time_win.append(offsettime)

    csvfile.close()
    def f_gar_sentiment(self, training_featured_words):
        gtweets = []
        gsentiment = []
        gar_data = []
        #********************Gets the total number of rows***************
        #using csv.reader() as csvreader
        check_length = csvreader(
            open('data/garbi_tweets.csv', encoding='latin-1'))
        length = sum(1 for row in check_length)  #-500
        print(length)
        #********************Saves the Tweets and sentiment**************
        file = open('data/garbi_tweets.csv').read()
        count = 0
        for r in file.split('\n'):
            if count == 500:
                break
            gtweets.append(r)
            gsentiment.append('None')
            count += 1

##        print(gtweets)

        gar_tweets = self.f_process_tweets(gtweets)
        ##        print(gar_tweets)
        for i in range(len(gar_tweets)):
            if gar_tweets[i]:
                temp = (gar_tweets[i], gsentiment[i])
                gar_data.append(temp)


##        print(gar_data)

        gar_words = self.f_feature_word(self.f_specific_all_words(gar_tweets))
        ##        print(gar_words)

        gar_set = self.f_feature_set(gar_data, training_featured_words)
        ##        print(gar_set)
        self.gloop_count = 0
        self.gpos = 0
        self.gneg = 0
        self.gneut = 0
        for tweets in range(len(gar_set)):
            self.gloop_count += 1
            if self.voted_classifier.f_classify(
                    gar_set[tweets][0]) == 'positive':
                self.gpos += 1
            elif self.voted_classifier.f_classify(
                    gar_set[tweets][0]) == 'negative':
                self.gneg += 1
            elif self.voted_classifier.f_classify(
                    gar_set[tweets][0]) == 'neutral':
                self.gneut += 1
        print(self.gloop_count, self.gpos, self.gneg, self.gneut)
    def f_novak_sentiment(self, training_featured_words):
        ntweets = []
        nsentiment = []
        novak_data = []
        #********************Gets the total number of rows***************
        #using csv.reader() as csvreader
        check_length = csvreader(
            open('data/novak_tweets.csv', encoding='latin-1'))
        length = sum(1 for row in check_length)  #-500
        print(length)
        #********************Saves the Tweets and sentiment**************
        file = open('data/novak_tweets.csv').read()
        count = 0
        for r in file.split('\n'):
            if count == 1000:
                break
            ntweets.append(r)
            nsentiment.append('None')
            count += 1

##        print(ntweets)

        novak_tweets = self.f_process_tweets(ntweets)
        ##        print(novak_tweets)
        for i in range(len(novak_tweets)):
            if novak_tweets[i]:  #to remove empty lists
                temp = (novak_tweets[i], nsentiment[i])
                novak_data.append(temp)
##        print(novak_data)

        novak_words = self.f_feature_word(
            self.f_specific_all_words(novak_tweets))
        ##        print(self.novak_words)

        novak_set = self.f_feature_set(novak_data, training_featured_words)
        ##        for i in range(10):
        ##            print(novak_set[i])
        self.nloop_count = 0
        self.npos = 0
        self.nneg = 0
        self.nneut = 0
        for tweets in range(len(novak_set)):
            self.nloop_count += 1
            if self.voted_classifier.f_classify(
                    novak_set[tweets][0]) == 'positive':
                self.npos += 1
            elif self.voted_classifier.f_classify(
                    novak_set[tweets][0]) == 'negative':
                self.nneg += 1
            elif self.voted_classifier.f_classify(
                    novak_set[tweets][0]) == 'neutral':
                self.nneut += 1
        print(self.nloop_count, self.npos, self.nneg, self.nneut)
示例#40
0
def download_data(link):
    # Download and parse the CSV file
    file_url = link
    data = [d.decode('utf-8') for d in request.urlopen(file_url).readlines()]

    # Add blank space for missing cities to prevent dropping columns
    for n, row in enumerate(data):
        data[n] = " " + row if row[0] == "," else row

    # Split each row into a list of data
    data_split = [row for row in csvreader(data)]

    return data_split
示例#41
0
 def _import(self, f):
     storage = queryUtility(IShortURLStorage)
     reader = csvreader(f)
     error = None
     for row in reader:
         if len(row) < 2:
             # Ignore rows with too few columns, this also deals with
             # empty rows
             continue
         if SHORTURLRE.match(row[0]) is None:
             # Ignore funny characters
             return _(u'Your upload contains invalid characters.')
         storage.add(row[0], row[1])
     return error
示例#42
0
def open_csv(bus, fn):
    reader = csvreader(open(fn, 'rb'))
    service = {}
    name = reader.next()[0]
    busName = dbus.service.BusName(name, bus)
    for row in reader:
        path, typ, value = row[:3]
        if not value.strip():
            # Invalid
            value = dbus.Array([],
                               signature=dbus.Signature('u'),
                               variant_level=1)
        service[path] = DbusPathObject(busName, path, wrap(typ, value))
    return DbusRootObject(busName, service)
示例#43
0
def load_element_data():
    try:
        import mathics_scanner
        datadir = mathics_scanner.__file__[:-11]
        element_file = open(os.path.join(datadir, 'data/element.csv'), 'r')
    except:
        print(os.path.join(datadir, 'data/element.csv'), "  not found.")
        return None
    reader = csvreader(element_file, delimiter='\t')
    element_data = []
    for row in reader:
        element_data.append([value for value in row])
    element_file.close()
    return element_data
示例#44
0
def get_type_parts(part_type):
    """
    Returns a list of part numbers based on part_type.

    :param part_type: String indicating 'HDD', 'MEM', or 'CPU'
    :return: List of part numbers from csv file
    """
    file = pathjoin("parts_in_sp", "all" + part_type + ".csv")
    data = []
    with open(file, "r") as csvfile:
        reader = csvreader(csvfile)
        for row in reader:
            data.append(row[0])
    return data
示例#45
0
    def get_sheet_headings(self, sheet_name):
        sheet_configuration = self.sheet_configuration[self.sheet_names_map[sheet_name]]
        configuration_line = 1 if sheet_configuration else 0
        if not sheet_configuration:
            sheet_configuration = self.base_configuration
        if not self.use_configuration:
            sheet_configuration = {}
        skip_rows = sheet_configuration.get("skipRows", 0)
        if sheet_configuration.get("ignore"):
            # returning empty headers is a proxy for no data in the sheet.
            return []

        if sys.version > '3':  # If Python 3 or greater
            with open(os.path.join(self.input_name, sheet_name+'.csv'), encoding=self.encoding) as main_sheet_file:
                r = csvreader(main_sheet_file)
                for num, row in enumerate(r):
                    if num == (skip_rows + configuration_line):
                        return row
        else:  # If Python 2
            with open(os.path.join(self.input_name, sheet_name+'.csv')) as main_sheet_file:
                r = csvreader(main_sheet_file, encoding=self.encoding)
                for num, row in enumerate(r):
                    if num == (skip_rows + configuration_line):
                        return row
示例#46
0
 def readCsvFile(self, fileName, delimiter=","):
     """
     Reads csv file and returns a List with each row as an element
     """
     if os.path.exists(fileName):
         try:
             filehandle = open(fileName, "rU")
             reader = csvreader(filehandle, delimiter=delimiter)
             retlist = []
             for row in reader:
                 if row:
                     retlist.append(row)
             filehandle.close()
             return retlist, True
         except:
             return "ERROR: %s" % str(format_exc()), False
     else:
         return "ERROR: File \"%s\" does not exists." % fileName, False
示例#47
0
 def _down(self,indelimeter):
     """скачивает данные в заданном формате"""
     try:
         reader = csvreader(open(self.path2prg, 'r'), delimiter=indelimeter, skipinitialspace=True)
         self.program = list()
         self.title = list()
         for row in reader:
             #команда
             if (len(row)>=5):
                 line=row[0]
                 if (line[0]!=";"):
                     self.program.append(row)
             #заголовок или комментарий
             if len(row)>0 and ";" in row[0]:
                 self.title.append(row)
     except OSError as err:
         print(err)
         self.program = None
         self.title = None
 def test_csv_export(self):
     output = self.portal.container.unrestrictedTraverse('@@collective.excelexportcsv')()
     generated_path = self._get_generated_filepath(output, 'test.csv')
     lines = csvreader(open(generated_path), dialect='excel', delimiter=';')
     headers_row = lines.next()
     self.assertEqual(headers_row, ['Name', 'Biography',
                                    'Birth date', 'subscription',
                                    'amount', 'Languages', 'Photo', 'Related Items'])
     row1 = lines.next()
     self.assertEqual(row1, ['John Doe',
                             'Longtemps, je me suis couch\xe9 de bonne heure',
                             '1980/07/24', 'silver', '100',
                             'English\nFran\xe7ais', 'logoplone.png', ''])
     row2 = lines.next()
     self.assertEqual(row2, ['John Smith',
                             "Je forme une entreprise qui n'eut jamais d'exem...",
                             '1981/07/24', '', '100',
                             'English\nEspa\xf1ol', '', 'John Doe'])
     os.remove(generated_path)
示例#49
0
def the_loop(proc, args):
    from time import sleep, time
    data = list(map(lambda a: [0,0,0,0], range(args.n[0])))
    t = time()

    while True:
        i = 0
        got_something = False
        for p in proc:
            from Queue import Empty
            
            l = p.get_nowait()
            if l != None:
                from StringIO import StringIO
                from csv import reader as csvreader
                f = StringIO(l)
                r = csvreader(f, delimiter=',')
                for row in r:
                    data[i] = map(lambda s: float(s), row)
                    #print "From process %i:" % i, data[i]
                got_something = True
            i += 1
        if time() > t + 5:
            from random import randint
            t = time()
            r = randint(0,1)
            l = args.min[0]
            if r == 1:
                l = args.max[0]
            for p in proc:
                p.set_load(l)
            print "Set load to ", l
        if not got_something:
            sleep(1.0)
        else:
            #print map(lambda l: (0,0,0,0,0) if len(l) < 5 else (l[2], l[2] - l[3], l[4]), data)
            if args.o[0] > 0:
                if sum(map(lambda l: 0 if len(l) < 5 else l[4], data)) > args.o[0]:
                    print 'Reached %i operations, exiting...' % args.o[0]
                    return
示例#50
0
文件: views.py 项目: tompecina/legal
def insbatchform(request):

    LOGGER.debug('Proceedings import page accessed using method {}'.format(request.method), request)

    err_message = ''
    uid = request.user.id
    uname = request.user.username

    if request.method == 'POST':
        button = getbutton(request)

        if button == 'load':
            infile = request.FILES.get('load')
            if not infile:
                err_message = 'Nejprve zvolte soubor k načtení'
            else:
                errors = []
                try:
                    count = 0
                    with infile:
                        idx = 0
                        for line in csvreader(StringIO(infile.read().decode())):
                            idx += 1
                            errlen = len(errors)
                            if not line:
                                continue
                            desc = line[0].strip()
                            if not desc:
                                errors.append((idx, 'Prázdný popis'))
                                continue
                            if len(desc) > 255:
                                errors.append((idx, 'Příliš dlouhý popis'))
                                continue
                            try:
                                number = int(line[1])
                                assert number > 0
                            except:
                                errors.append((idx, 'Chybné běžné číslo'))
                                continue
                            try:
                                year = int(line[2])
                                assert year >= 2008
                            except:
                                errors.append((idx, 'Chybný ročník'))
                                continue
                            detailed = line[3].strip()
                            if detailed == 'ano':
                                detailed = True
                            elif detailed == 'ne':
                                detailed = False
                            else:
                                errors.append((idx, 'Chybný údaj pro pole Vše'))
                                continue

                            if len(errors) == errlen:
                                try:
                                    Insolvency.objects.update_or_create(
                                        uid_id=uid,
                                        desc=desc,
                                        defaults={
                                            'number': number,
                                            'year': year,
                                            'detailed': detailed}
                                    )
                                except:
                                    errors.append((idx, 'Popisu "{}" odpovídá více než jedno řízení'.format(desc)))
                                    continue
                                count += 1
                    LOGGER.info('User "{}" ({:d}) imported {:d} proceedings'.format(uname, uid, count), request)
                    return render(
                        request,
                        'sir_insbatchresult.xhtml',
                        {'app': APP,
                         'page_title': 'Import řízení ze souboru',
                         'count': count,
                         'errors': errors})

                except:  # pragma: no cover
                    LOGGER.error('Error reading file', request)
                    err_message = 'Chyba při načtení souboru'

    return render(
        request,
        'sir_insbatchform.xhtml',
        {'app': APP,
         'page_title': 'Import řízení ze souboru',
         'err_message': err_message})
示例#51
0
文件: views.py 项目: tompecina/legal
def partybatchform(request):

    LOGGER.debug('Party import page accessed using method {}'.format(request.method), request)

    err_message = ''
    uid = request.user.id
    uname = request.user.username

    if request.method == 'POST':
        button = getbutton(request)

        if button == 'load':
            infile = request.FILES.get('load')
            if not infile:
                err_message = 'Nejprve zvolte soubor k načtení'
            else:
                errors = []
                try:
                    count = 0
                    with infile:
                        idx = 0
                        for line in csvreader(StringIO(infile.read().decode())):
                            idx += 1
                            errlen = len(errors)
                            if not line:
                                continue
                            line = line[0].strip()
                            if ':' in line:
                                line, party_opt = line.split(':', 1)
                            else:
                                party_opt = '*'
                            if not between(MIN_LENGTH, len(line), MAX_LENGTH):
                                errors.append((idx, 'Chybná délka řetězce'))
                                continue
                            if party_opt not in TEXT_OPTS_ABBR:
                                errors.append((idx, 'Chybná zkratka pro posici'))
                                continue
                            if len(errors) == errlen:
                                try:
                                    Party.objects.update_or_create(
                                        uid_id=uid,
                                        party=line,
                                        defaults={'party_opt': TEXT_OPTS_AI[party_opt]}
                                    )
                                except:
                                    errors.append((idx, 'Řetězci "{}" odpovídá více než jeden účastník'.format(line)))
                                    continue
                                count += 1
                    LOGGER.info('User "{}" ({:d}) imported {} party/ies'.format(uname, uid, count), request)
                    return render(
                        request,
                        'sur_partybatchresult.xhtml',
                        {'app': APP,
                         'page_title': 'Import účastníků řízení ze souboru',
                         'count': count,
                         'errors': errors})

                except:  # pragma: no cover
                    LOGGER.error('Error reading file', request)
                    err_message = 'Chyba při načtení souboru'
        else:
            LOGGER.debug('Invalid form', request)
            err_message = INERR

    return render(
        request,
        'sur_partybatchform.xhtml',
        {'app': APP,
         'page_title': 'Import účastníků řízení ze souboru',
         'err_message': err_message,
         'min_length': MIN_LENGTH,
         'max_length': MAX_LENGTH})
示例#52
0
# Columns align
aligns = {0: '<'}		# Right padding (left-aligned)
default_align = '>'		# Left padding (right-aligned)
head_aligns = {0: '^'}	# Center align
default_head_align = '>'

files = []
for arg in argv[1:]:
	if isfile(arg):
		files.append(arg)
if not files:
	print ('No file specified')
	exit(1)

cfile = csvreader(open(files[0]), delimiter =';', quotechar ='"')

rows = []
maxs = []
for row in cfile:
	rows.append(row[:])
	if not maxs:
		maxs = [len(field) for field in row]
	else:
		if len(row) > len(maxs):
			maxs.extend([0] * (len(row) - len(maxs)))
		for i in range(len(row)):
			if maxs[i] < len(row[i]):
				maxs[i] = len(row[i])

# Header format and show
示例#53
0
    def process_file(self, file, rewrite=False):
        csv = self.csv
        self.__init__(csv, rewrite=rewrite)
        settings = csv.settings.copy()

        # convert settings["add"] to lambdas
        adds = []
        for it in settings["add"]:  # [("netname", 20, [lambda x, lambda x...]), ...]
            methods = self.csv.guesses.get_methods_from(it[0], it[2], it[3])
            adds.append((it[0], it[1], methods))
        del settings["add"]
        settings["addByMethod"] = adds

        if len(settings["chosen_cols"]) == len(csv.fields):
            del settings["chosen_cols"]

        if not settings["dialect"]:
            settings["dialect"] = csv.dialect

        settings["target_file"] = csv.target_file
        with open(file, "r") as sourceF:
            reader = csvreader(sourceF, dialect=csv.dialect)
            if csv.has_header:  # skip header
                reader.__next__()
            for row in reader:
                if not row:  # skip blank
                    continue
                csv.line_count += 1
                if csv.line_count == csv.line_sout:
                    now = datetime.datetime.now()
                    delta = (now - csv.time_last).total_seconds()
                    csv.time_last = now
                    if delta < 1 or delta > 2:
                        newVel = ceil(csv.velocity / delta) + 1
                        if abs(newVel - csv.velocity) > 100 and csv.velocity < newVel:
                            # smaller accelerating of velocity (decelerating is alright)
                            csv.velocity += 100
                        else:
                            csv.velocity = newVel
                    csv.line_sout = csv.line_count + 1 + csv.velocity
                    csv.informer.sout_info()
                try:
                    self.process_line(csv, row, settings)
                except BdbQuit:  # not sure if working, may be deleted
                    print("BdbQuit called")
                    raise
                except KeyboardInterrupt:
                    print("Keyboard interrupting")
                    try:
                        print("{} line number, {} ip".format(csv.line_count, ip))
                    except:
                        pass
                    o = Dialogue.ask(
                        "Catched keyboard interrupt. Options: continue (default, do the line again), [s]kip the line, [d]ebug, [q]uit: ")
                    if o == "d":
                        print(
                            "Maybe you should hit n multiple times because pdb takes you to the wrong scope.")  # I dont know why.
                        ipdb.set_trace()
                    elif o == "s":
                        continue  # skip to the next line
                    elif o == "q":
                        quit()
                        self._close_descriptors()
                    else:  # continue from last row
                        csv.line_count -= 1  # let's pretend we didn't just do this row before and give it a second chance
                        self.process_line(csv, row, settings)
        self._close_descriptors()

        if self.csv.is_split:
            attch = set()
            for at in self.csv.attachments:
                attch.add(at.path)
            for f in self.files_created:
                if f not in attch and f != Config.INVALID_NAME:
                    # set that a mail with this attachment have not yet been sent
                    self.csv.attachments.append(Attachment(None, None, f))
            Attachment.refresh_attachment_stats(self.csv)
示例#54
0
文件: readers.py 项目: CCLab/RS_ref
 def __init__(self, reader, delim=";", quote='"'):
     self.reader = csvreader(reader, delimiter=delim, quotechar=quote)
     self.rows = deque()
     self.buffer = ""
    template = argv[1]
    csv_files = argv[2:]

    print '-- Converting CSV file to templatized HTML --'
    print '-- Using template: %s' % (template)
    print '-- Operating on CSV files: %s' % (csv_files)

    counter = 0

    with open(template, 'rb') as template:
        template = template.read()
        template = Template(template)
        for f in csv_files:
            with open(f, 'rb') as csvfile:
                csvdata = csvreader(csvfile)
                keys = None

                for row in csvdata:
                    if not keys: keys = row; continue

                    stats = OrderedDict(zip(keys,row))
                    laptop_name = '%s %s' % (stats['Manufacturer'],
                                             stats['Model #'])
                    OS = row[keys.index('Installed OS')] 
                    username,password = stats['Username-Password'].split('-')

                    del stats['Notes']
                    del stats['Username-Password']

                    with open(OUTFNAME_TEMPLATE % (counter), 'wb') as out:
示例#56
0
)

# Using the argument parser to do a lot of work:
# * Prefer command line arguments
# * Fall back on config file values
# * Convert to appropriate types
# * Check validity of arguments
args = argman.parse_args()

# Make sure integer arguments are in a reasonable range.
args.dot_size = catcm.constrain_integer(args.dot_size, 2, 100)
args.perimeter_resolution = catcm.constrain_integer(args.perimeter_resolution, 1, 120)

# Open and process the data file
with open(args.datafile_path, 'rt') as datafile:
	csvrows = csvreader(datafile)

	# Limit to certain cats, if requested
	if args.catids:
		csvrows = [csvrow for csvrow in csvrows if csvrow[int(catcm.cfg_data_column_catid)] in args.catids]

	# If no rows were retrieved, warn user that cat is not represented in the current data
	if not csvrows:
		sys.exit('No CSV data was found after checking cat ids. Cat ids were {}.'.format(','.join(args.catids)))

	# Create a new DataPool object to work with
	datapool = catst.STDataPool(args.dot_size, args.perimeter_resolution)

	# For every row, create a Fix object and add it to the DataPool
	for csvrow in csvrows:
		try:
示例#57
0
文件: views.py 项目: tompecina/legal
def procbatchform(request):

    LOGGER.debug('Proceedings import page accessed using method {}'.format(request.method), request)

    err_message = ''
    uid = request.user.id
    uname = request.user.username
    today = date.today()

    if request.method == 'POST':
        button = getbutton(request)

        if button == 'load':
            infile = request.FILES.get('load')
            if not infile:
                err_message = 'Nejprve zvolte soubor k načtení'
            else:
                errors = []
                try:
                    count = 0
                    with infile:
                        idx = 0
                        for line in csvreader(StringIO(infile.read().decode())):
                            idx += 1
                            errlen = len(errors)
                            if not line:
                                continue
                            if len(line) != 3:
                                errors.append((idx, 'Chybný formát'))
                                continue
                            desc = line[0].strip()
                            if not desc:
                                errors.append((idx, 'Prázdný popis'))
                                continue
                            if len(desc) > 255:
                                errors.append((idx, 'Příliš dlouhý popis'))
                                continue
                            try:
                                court = line[1]
                                assert Court.objects.get(id=court)
                            except:
                                errors.append((idx, 'Chybná zkratka soudu'))
                                continue
                            try:
                                senate, register, number, year = decomposeref(line[2])
                                assert senate >= 0
                                assert register in REGISTERS
                                assert number > 0
                                assert year >= 1990 and year <= today.year
                            except:
                                errors.append((idx, 'Chybná spisová značka'))
                                continue
                            if len(errors) == errlen:
                                proc = Proceedings.objects.filter(
                                    uid_id=uid,
                                    desc=desc,
                                    court=court,
                                    senate=senate,
                                    register=register,
                                    number=number,
                                    year=year)
                                if not proc.exists():
                                    try:
                                        proc = Proceedings.objects.update_or_create(
                                                uid_id=uid,
                                                desc=desc,
                                                defaults={
                                                    'court_id': court,
                                                    'senate': senate,
                                                    'register': register,
                                                    'number': number,
                                                    'year': year,
                                                    'changed': None,
                                                    'updated': None,
                                                    'hash': '',
                                                    'auxid': 0,
                                                    'notify': False})[0]
                                        updateproc(proc)
                                        proc.save()
                                    except:
                                        errors.append((idx, 'Popisu "{}" odpovídá více než jedno řízení'.format(desc)))
                                        continue
                                count += 1
                    LOGGER.info('User "{}" ({:d}) imported {:d} proceedings'.format(uname, uid, count), request)
                    return render(
                        request,
                        'szr_procbatchresult.xhtml',
                        {'app': APP,
                         'page_title': 'Import řízení ze souboru',
                         'count': count,
                         'errors': errors})

                except:  # pragma: no cover
                    LOGGER.error('Error reading file', request)
                    err_message = 'Chyba při načtení souboru'

    return render(
        request,
        'szr_procbatchform.xhtml',
        {'app': APP,
         'page_title': 'Import řízení ze souboru',
         'err_message': err_message})
def import_data(mode='training'):
    global trn_cols, tst_cols
   
    # get input file (features and labels)
    if len(sys.argv) > 1:
        if mode == 'training':
            fname = sys.argv[1]
        elif len(sys.argv) > 2:
            fname = sys.argv[2]
    else:
        if mode == 'training':  
            fname = default_trn_path
        else:
            fname = default_tst_path
 
    if not os.path.exists(fname):
        print "usage:", os.path.split(sys.argv[0])[1], "[default_trn_path]", \
                  "[default_tst_path]"
        print "Valid file paths must be provided as an arg or global varables"
        sys.exit("invalid input")
 
    # get heads
    reader = csvreader(open(fname, 'rb'))
    r, start_row, heads = 0, 0, []
    for row in reader:
        if r == 0:
            # get no of cols in data
            if mode == 'training':
                trn_cols = len(row)
                if heads_in_trn_file:
                        heads = row
                        start_row += 1
            else:
                tst_cols = len(row)
                if heads_in_tst_file:
                        heads = row
                        start_row += 1
            r += 1
        else:
            break
 
    # build a dict to map each col to a conv func (if not excl)
    if mode not in ['test', 'predict']:
        cols = [i for i in range(trn_cols) if i not in excl_trn_cols]
        conv_dict = {c: conv for c in cols}
    else:
        cols = [i for i in range(tst_cols) if i not in excl_tst_cols]
        conv_dict = {c: conv for c in cols}
 
    if verbose:
        print '\nData import:', mode, '| cols:', cols, '\n'
 
    # import data
    #   not excluding unneeded cols, import all, just without conversions
    #   they are exlcuded later in feature_prep
    data = np.genfromtxt(fname, delimiter=delim, converters=conv_dict,
                                         skip_header=start_row)
 
    if verbose:
        print 'all heads:\n',  ', '.join(i for i in heads), '\n'
        print 'shape of data:', np.shape(data)
        print data
   
    return data, heads