def handle(self, *args, **options): primary = options['primary'] secondary = options['secondary'] if not primary or not os.path.exists(os.path.expanduser(primary)): raise Exception('{0} is not a valid CSV path'.format(primary)) if not secondary or not os.path.exists(os.path.expanduser(secondary)): raise Exception('{0} is not a valid CSV path'.format(secondary)) primary_dataset = dict() secondary_dataset = dict() function = None with open(primary) as file_: reader = csv.reader(file_) for row in reader: if 'Function' in row[0]: function = Function() function.name = row[1] function.file = row[2] if function not in primary_dataset: primary_dataset[function] = int(row[3]) else: logger.debug( '{0} duplicate in {1}'.format(function, primary) ) with open(secondary) as file_: reader = csv.reader(file_) for row in reader: if 'Function' in row[0]: function = Function() function.name = row[1] function.file = row[2] if function not in secondary_dataset: secondary_dataset[function] = int(row[3]) else: logger.debug( '{0} duplicate in {1}'.format(function, secondary) ) match = 0 for item in secondary_dataset: if item in primary_dataset: if secondary_dataset[item] == primary_dataset[item]: match += 1 logger.info( '{0}/{1} having matching SLOC'.format( match, len(secondary_dataset) ) )
def handle(self, *args, **options): source = options['source'] if not source or not os.path.exists(os.path.expanduser(source)): raise Exception('{0} is not a valid CSV path'.format(source)) function = None duplicates = list() functions = set() with open(source) as file_: reader = csv.reader(file_) for row in reader: if 'Function' in row[0]: name = row[1] file = row[2] sloc = int(row[3]) function = Function.objects.filter( name=name, file=file ) if not function.exists(): function = Function() function.name = name function.file = file function.sloc = sloc if function not in functions: functions.add(function) else: duplicates.append(function) function = [ f for f in functions if f == function ][0] duplicates.append(function) functions.remove(function) if len(functions) > 0: logger.debug('Adding {0} functions.'.format(len(functions))) Function.objects.bulk_create(functions) if len(duplicates) > 0: for function in duplicates: logger.debug( 'Duplicate {0} in {1} with {2} SLOC'.format( function.name, function.file, function.sloc ) ) logger.info('Appended {0} functions.'.format(len(functions)))
def handle(self, *args, **options): source = options['source'] if not source or not os.path.exists(os.path.expanduser(source)): raise Exception('{0} is not a valid CSV path'.format(source)) functions = set() files = set() with open(source) as file_: reader = csv.reader(file_) for row in reader: if 'Function' in row[0]: name = row[1] file = row[2] sloc = int(row[3]) function = Function() function.name = name function.file = file function.sloc = sloc if function not in functions: functions.add(function) else: functions = self._update(function, functions) elif 'File' in row[0]: name = row[2] sloc = int(row[3]) file_ = File() file_.name = name file_.sloc = sloc if file_ not in files: files.add(file_) else: files = self._update(file_, files) if len(functions) > 0: logger.debug('Adding {0} functions.'.format(len(functions))) Function.objects.bulk_create(functions) logger.info('Loaded {0} functions.'.format(len(functions))) if len(files) > 0: logger.debug('Adding {0} files.'.format(len(files))) File.objects.bulk_create(files) logger.info('Loaded {0} files.'.format(len(files)))