Пример #1
0
	def __init__(self, **kwargs):
		if (kwargs.get('text', None)):
			self.fields = parse_csv(kwargs.get('text'), kwargs.get('col_names'))
		elif (kwargs.get('fields', None)):
			self.fields = kwargs.get('fields')
		else:
			raise Exception
Пример #2
0
def executor():
    arguments = parse_input()

    tables = DbHandler()

    service_args_handler(arguments, tables)

    source = source_path_handler(arguments)

    parsed_data = parse_csv(source)

    create_struct(parsed_data, tables, arguments)
Пример #3
0
def parse_csv_or_die(args, parse_row, errors=[], skip=1, **kwargs):
    """Parse the CSV file or print the errors and exit.
    """

    try:
        with open(args["<file>"], "r") as f:
            people = parser.parse_csv(f, parse_row, skip=skip, **kwargs)
    except KeyError:
        errors.append("<file> is required")
    except FileNotFoundError:
        errors.append(f"'{args['<file>']}' doesn't exist")
    except parser.CSVException as e:
        errors.extend(e.errors)

    if errors:
        for e in errors:
            print(e)
        sys.exit(1)

    return people
def bvgFeatureExtractor(bvg_file, index=None, 
						intersections=False, fuzzy_grid=False, 
						vein_angles=False, avg_angles=False, rishi_angles=False, 
						up_down=False, midpoint_veins=False,
						print_features=False, print_advanced_features=False):
	veins = parser.parse_csv(bvg_file)
	dims = parser.get_dimensions(bvg_file)
	fuzz_cell_scale = dims[1] / fuzz_cell_dimension
	out_img = np.zeros((dims[1], dims[0], 3), np.uint8)
	drawVeins(veins, out_img)
	if intersections:
		extractIntersections(veins, out_img)
		extractIntersectionDistances()
	if fuzzy_grid:
		extractGrid(veins, out_img, fuzz_cell_scale, drawLines=True)
	if vein_angles:
		extractVeinAngles(veins)
	
	if avg_angles:
		advanced_vein_features['avg_angles'] = avgVeinAngles(vein_features['veins'], buckets=True)
	if rishi_angles:
		advanced_vein_features['rishi_angles'] = completeVeinAngles(veins, buckets=True)
	if up_down:
		advanced_vein_features['up_down'] = upDowns(vein_features['fuzzy_grid_heatmap'])
	if midpoint_veins:
		advanced_vein_features['midpoint_veins'] = midpointVeins(veins, buckets=True)

	if print_features:
		print 'PRINTING BASIC VEIN FEATURES:\n=====================\n'
		pprint(vein_features)
	if print_advanced_features:
		print 'PRINTING ADVANCED VEIN FEATURES:\n=====================\n'
		for key, val in advanced_vein_features.iteritems():
			print 'Key: %s : %s' % (key, ', '.join(str(i) for i in val))
	if index is not None:
		grid = int('22' + str(index))
Пример #5
0
 def test_parse_csv(self):
     parse_csv('small_csv.txt')
     found = self.db.csv_items.find({'id': 402983319863}).count()
     assert found == 1
Пример #6
0
def main():
  '''
  The menu shown at the beginning of the Page Rank program. It asks whether the
  file is a csv or snap file, then the file name.
   
  Ex:
    > python lab3.py [-w]
    CSC 466: Lab 3 - Page Rank & Link Analysis
    Parse:
      1) csv
      2) snap
    (User enters 1 or 2)
    File name: (User enters file name here)
   
  There is an optional flag '-w' that is used for the Football csv. The program
  outputs every 1000 lines (to ensure that it's parsing) and then at the end of
  the page rank algorithm, print out the top 20 nodes and how long it took to 
  calculate page rank.
   
  Note: -w doesn't quite work at the moment. Please ignore it for now.
  '''
  
  is_weighted = False      # Used for '-w' flag

  # Setting variable if '-w' is used
  if len(sys.argv) > 1:
    if sys.argv[1] == '-w':
      is_weighted = True

  # Menu
  print('CSC 466: Lab 3 - PageRank & Link Analysis')
  parse_menu = raw_input('Parse:\n' +
                         '1) csv\n' +
                         '2) snap\n'
                        )
  file_name = raw_input('File name: ')
  
  # PARSING - CSV Files
  # Note: The algorithm is the same, just parsing is different.
  if parse_menu == '1':
    print('Parsing/Creating Graph...')
    start = time.time()    # Tracking time
    
    # Parses a csv file and returns a tuple (list, dictionary, dictionary)
    if is_weighted == False:
      (nodes, out_degrees, in_degrees) = parser.parse_csv(file_name)
    else:
      (nodes, out_degrees, in_degrees) = parser.parse_weighted_csv(file_name)
      
    end = time.time()
    print('Parse/Graph Set-up Time: ' + str(end - start) + ' seconds')

    # Sets up page rank structures
    pagerank.set_up(nodes, out_degrees, in_degrees)

    # PAGE RANKING
    print('Page Ranking...')
    start = time.time()
    num_iters = pagerank.page_rank(0)  # Stores # of page rank iterations
    end = time.time()
    
    # Statistics
    print('Page Rank Time: ' + str(end-start) + ' seconds')
    print('Page Rank Iterations: ' + str(num_iters))

  # PARSING - SNAP Files
  elif parse_menu == '2':
    print('Parsing/Creating Graph...')
    start = time.time()    # Tracking time
    
    # Parses a SNAP file and returns a tuple (list, dictionary, dictionary)
    (nodes, out_degrees, in_degrees) = parser.parse_snap(file_name)
    
    end = time.time()
    print('Parse/Graph Set-up Time: ' + str(end-start) + 'seconds')

    # Sets up page rank structures
    pagerank.set_up(nodes, out_degrees, in_degrees)

    # PAGE RANKING
    print('Page Ranking...')
    start = time.time()
    num_iters = pagerank.page_rank(0)  # Stores # of page rank iterations
    end = time.time()
    
    # Statistics
    print('Page Rank Time: ' + str(end-start) + ' seconds')
    print('Page Rank Iterations: ' + str(num_iters))
  
  # Wrong input
  else:
    print('Invalid input - exiting')
Пример #7
0
    legend_html = '''
<div style="position: fixed;
            bottom: 50px; left: 50px;
            border:2px solid grey; z-index:9999; font-size:14px; background:white;
            ">&nbsp; The Population Markers <br>
              &nbsp; < 10.000.000  &nbsp; <i class="fa fa-circle fa-2x" style="color:green"></i><br>
              &nbsp; < 20.000.000  &nbsp; <i class="fa fa-circle fa-2x" style="color:purple"></i><br>
              &nbsp; > 10.000.000  &nbsp; <i class="fa fa-circle fa-2x" style="color:orange"></i>
</div>
'''

    map.add_child(mov_density)
    map.add_child(movies_loc)
    map.add_child(population)
    map.get_root().html.add_child(folium.Element(legend_html))

    map.add_child(folium.LayerControl())

    map.save('{}.html'.format(year))


if __name__ == "__main__":
    year = get_year()
    locations = parse_csv("locations.csv")
    places = to_dict(locations, year)
    usr_loc = get_user_location()
    mov_locations = to_dict(locations, year)
    filtered = to_sorted(locations, year)
    map_creator(usr_loc, mov_locations, filtered, year)
Пример #8
0
def main():
  '''
  The menu shown at the beginning of the Page Rank program. It asks whether the
  file is a csv or snap file, then the file name.
   
  Ex:
    > python lab3.py [-w, -c]
    CSC 466: Lab 3 - Page Rank & Link Analysis
    Parse:
      1) csv
      2) snap
    (User enters 1 or 2)
    File name: (User enters file name here)
   
  There is an optional flag '-w' that is used for the Football csv. The program
  outputs every 1000 lines (to ensure that it's parsing) and then at the end of
  the page rank algorithm, print out the top 20 nodes and how long it took to 
  calculate page rank.
   
  Note: -w doesn't quite work at the moment. Please ignore it for now.
  '''
    
  fileSpecs = specs()
  
  is_weighted = False      # Used for '-w' flag

  # Setting variable if '-w' is used
  if len(sys.argv) > 1:
    if sys.argv[1] == '-w':
      is_weighted = True

  # Menu
  print('CSC 466: Lab 3 - PageRank & Link Analysis')
  parse_menu = raw_input('Parse:\n' +
                         '1) csv\n' +
                         '2) snap\n'
                        )
  file_name = raw_input('File name: ')
  version = raw_input('Xeon Phi, Cuda or Both? (x/c/b): ')
  
  # PARSING - CSV Files
  # Note: The algorithm is the same, just parsing is different.
  if parse_menu == '1' or parse_menu == '2':
    if parse_menu == '1':
      print('Parsing/Creating Graph...')
      start = time.time()    # Tracking time
    
      # Parses a csv file and returns a tuple (list, dictionary, dictionary)
      if is_weighted == False:
        (nodes, out_degrees, in_degrees, names) = parser.parse_csv(file_name)
      else:
        (nodes, out_degrees, in_degrees, names) = parser.parse_weighted_csv(file_name)
     
      end = time.time()
      print('Parse/Graph Set-up Time: ' + str(end - start) + ' seconds')

  # PARSING - SNAP Files
    else:
      print('Parsing/Creating Graph...')
      start = time.time()    # Tracking time
    
      # Parses a SNAP file and returns a tuple (list, dictionary, dictionary)
      (nodes, out_degrees, in_degrees, names) = parser.parse_snap(file_name)
   
      end = time.time()
      print('Parse/Graph Set-up Time: ' + str(end-start) + 'seconds')
    
    if file_name.rfind('/') != '-1':
       file_name = file_name[file_name.rfind('/') + 1:len(file_name)]

    (numIterations, numNodes, numEdges) = fileSpecs[file_name]
    print numIterations, numNodes, numEdges
    print out_degrees.get(12028)
    
    '''
    Call C Program
    '''
    
    if version == 'x' or version == 'b':
       p = subprocess.Popen(['./pr_phi', str(numNodes), str(numEdges), str(numIterations)], stdout=subprocess.PIPE, stdin=subprocess.PIPE)
       
       for node in nodes:
          if in_degrees.get(node) is not None:
             for i in range (0, len(in_degrees[node])):
                p.stdin.write('%d %d\n' % (int(node), int(in_degrees[node][i])))
             
       output = p.communicate()[0]
       output = output[:-1]
       p.stdin.close()
       
       useNames = parse_menu == '1' or file_name == "wiki-Vote.txt"
       print str(useNames)
       printPageRankValues(output, useNames, names)

    if version == 'c' or version == 'b':
       p = subprocess.Popen(cuda_command, stdout=subprocess.PIPE, stdin=subprocess.PIPE)
       '''
       for node in nodes:
          for i in range (0, len(in_degress[node])):
             p.stdin.write('%d %d\n' % (int(node), int(in_degrees[node][i])))
       '''
    
    # Sets up page rank structures
    '''
    pagerank.set_up(nodes, out_degrees, in_degrees)
    
    if file_name == 'wiki-Vote.txt':
        parse_menu = '1'

    # PAGE RANKING
    print('Page Ranking...')
    start = time.time()
    num_iters = pagerank.page_rank(0, names, parse_menu)  # Stores # of page rank iterations
    end = time.time()
    
    # Statistics
    print('Page Rank Time: ' + str(end-start) + ' seconds')
    print('Page Rank Iterations: ' + str(num_iters))
    '''
  # Wrong input
  else:
    print('Invalid input - exiting')
Пример #9
0
def demo(opt):
    inputimage = opt.input_image
    boxesscv = opt.boxescsv
    bboxes = parse_csv(inputimage, boxesscv)
    """ model configuration """
    if 'CTC' in opt.Prediction:
        converter = CTCLabelConverter(opt.character)
    else:
        converter = AttnLabelConverter(opt.character)
    opt.num_class = len(converter.character)

    if opt.rgb:
        opt.input_channel = 3
    model = Model(opt)
    print('model input parameters', opt.imgH, opt.imgW, opt.num_fiducial,
          opt.input_channel, opt.output_channel, opt.hidden_size,
          opt.num_class, opt.batch_max_length, opt.Transformation,
          opt.FeatureExtraction, opt.SequenceModeling, opt.Prediction)
    model = torch.nn.DataParallel(model).to(device)

    # load model
    print('loading pretrained model from %s' % opt.saved_model)
    model.load_state_dict(torch.load(opt.saved_model, map_location=device))

    # prepare data. two demo images from https://github.com/bgshih/crnn#run-demo
    AlignCollate_demo = AlignCollate(imgH=opt.imgH,
                                     imgW=opt.imgW,
                                     keep_ratio_with_pad=opt.PAD)
    demo_data = RawDataset(root=opt.image_folder, opt=opt)  # use RawDataset
    demo_loader = torch.utils.data.DataLoader(demo_data,
                                              batch_size=opt.batch_size,
                                              shuffle=False,
                                              num_workers=int(opt.workers),
                                              collate_fn=AlignCollate_demo,
                                              pin_memory=True)

    # predict
    model.eval()
    with torch.no_grad():
        for image_tensors, image_path_list in demo_loader:
            batch_size = image_tensors.size(0)
            image = image_tensors.to(device)
            # For max length prediction
            length_for_pred = torch.IntTensor([opt.batch_max_length] *
                                              batch_size).to(device)
            text_for_pred = torch.LongTensor(batch_size, opt.batch_max_length +
                                             1).fill_(0).to(device)

            if 'CTC' in opt.Prediction:
                preds = model(image, text_for_pred)

                # Select max probabilty (greedy decoding) then decode index to character
                preds_size = torch.IntTensor([preds.size(1)] * batch_size)
                _, preds_index = preds.max(2)
                # preds_index = preds_index.view(-1)
                preds_str = converter.decode(preds_index, preds_size)

            else:
                preds = model(image, text_for_pred, is_train=False)

                # select max probabilty (greedy decoding) then decode index to character
                _, preds_index = preds.max(2)
                preds_str = converter.decode(preds_index, length_for_pred)

            log = open(f'{opt.output_folder}result.csv', 'w')

            preds_prob = F.softmax(preds, dim=2)
            preds_max_prob, _ = preds_prob.max(dim=2)
            for img_index, (pred, pred_max_prob) in enumerate(
                    zip(preds_str, preds_max_prob)):
                if 'Attn' in opt.Prediction:
                    pred_EOS = pred.find('[s]')
                    pred = pred[:
                                pred_EOS]  # prune after "end of sentence" token ([s])
                    pred_max_prob = pred_max_prob[:pred_EOS]

                # calculate confidence score (= multiply of pred_max_prob)
                confidence_score = pred_max_prob.cumprod(dim=0)[-1]
                for pts in bboxes[img_index]:
                    x, y = pts
                    log.write(f'{x},{y},')
                log.write(f'{pred}\n')

            log.close()
            # copy log to local output folder
            os.system(f'cp {opt.output_folder}result.csv /input/output')
            shutil.make_archive('per_word_visual', 'zip', '/input/output')
Пример #10
0
def main():
    """
  The menu shown at the beginning of the Page Rank program. It asks whether the
  file is a csv or snap file, then the file name.
   
  Ex:
    > python lab3.py [-w, -c]
    CSC 466: Lab 3 - Page Rank & Link Analysis
    Parse:
      1) csv
      2) snap
    (User enters 1 or 2)
    File name: (User enters file name here)
   
  There is an optional flag '-w' that is used for the Football csv. The program
  outputs every 1000 lines (to ensure that it's parsing) and then at the end of
  the page rank algorithm, print out the top 20 nodes and how long it took to 
  calculate page rank.
   
  Note: -w doesn't quite work at the moment. Please ignore it for now.
  """

    fileSpecs = specs()

    is_weighted = False  # Used for '-w' flag

    # Setting variable if '-w' is used
    if len(sys.argv) > 1:
        if "-w" in sys.argv:
            is_weighted = True

    # Menu
    print ("CSC 466: Lab 3 - PageRank & Link Analysis")
    parse_menu = "1"
    file_name = sys.argv[1]
    version = sys.argv[2]
    if file_name[-3:] == "csv":
        parse_menu = "1"
    elif file_name[-3:] == "txt":
        parse_menu = "2"
    else:
        print "Format not supported"

    # PARSING - CSV Files
    # Note: The algorithm is the same, just parsing is different.
    if parse_menu == "1" or parse_menu == "2":
        if parse_menu == "1":
            print ("Parsing/Creating Graph...")
            start = time.time()  # Tracking time

            # Parses a csv file and returns a tuple (list, dictionary, dictionary)
            if is_weighted == False:
                (nodes, out_degrees, in_degrees, names) = parser.parse_csv(file_name)
            else:
                (nodes, out_degrees, in_degrees, names) = parser.parse_weighted_csv(file_name)

            end = time.time()
            print ("Parse/Graph Set-up Time: " + str(end - start) + " seconds")

        # PARSING - SNAP Files
        else:
            print ("Parsing/Creating Graph...")
            start = time.time()  # Tracking time

            # Parses a SNAP file and returns a tuple (list, dictionary, dictionary)
            (nodes, out_degrees, in_degrees, names) = parser.parse_snap(file_name)

            end = time.time()
            print ("Parse/Graph Set-up Time: " + str(end - start) + "seconds")

        if file_name.rfind("/") != "-1":
            file_name = file_name[file_name.rfind("/") + 1 : len(file_name)]

        (numIterations, numNodes, numEdges) = fileSpecs[file_name]
        print numIterations, numNodes, numEdges

        """
    Call C Program
    """

        if version == "phi" or version == "both":
            p = subprocess.Popen(
                ["./pr_test", str(numNodes), str(numEdges), str(numIterations)],
                stdout=subprocess.PIPE,
                stdin=subprocess.PIPE,
            )

            start = time.time()
            for node in nodes:
                if in_degrees.get(node) is not None:
                    for i in range(0, len(in_degrees[node])):
                        p.stdin.write("%d\n" % int(in_degrees[node][i]))

            for node in nodes:
                i = len(in_degrees[node]) if in_degrees.get(node) is not None else 0
                j = out_degrees[node] if out_degrees.get(node) is not None else 0
                p.stdin.write("%d %d %d\n" % (int(node), int(i), int(j)))

            output = p.communicate()[0]
            output = output[:-1]
            p.stdin.close()
            useNames = parse_menu == "1" or file_name == "wiki-Vote.txt"
            printPageRankValues(output, useNames, names)

        if version == "cuda" or version == "both":
            p = subprocess.Popen(
                ["./pr_cuda", str(numNodes), str(numEdges), str(numIterations)],
                stdout=subprocess.PIPE,
                stdin=subprocess.PIPE,
            )
            for node in nodes:
                if in_degrees.get(node) is not None:
                    for i in range(0, len(in_degrees[node])):
                        p.stdin.write("%d\n" % int(in_degrees[node][i]))

            for node in nodes:
                i = len(in_degrees[node]) if in_degrees.get(node) is not None else 0
                j = out_degrees[node] if out_degrees.get(node) is not None else 0
                p.stdin.write("%d %d %d\n" % (int(node), int(i), int(j)))

            output = p.communicate()[0]
            output = output[:-1]
            p.stdin.close()
            useNames = parse_menu == "1" or file_name == "wiki-Vote.txt"
            print str(useNames)
            printPageRankValues(output, useNames, names)

    # Wrong input
    else:
        print ("Invalid input - exiting")