def import_file(filename): from charsiu.charsiu_extras.models import Survey infile = open(filename, "r") records = json.load(infile) if filename.endswith(".json") else list(infile) for sline in records: line = sline.strip() if line: current = Survey.objects.filter(id=line) if not len(current): s = Survey() s.id = line s.save()
def handle(self, *args, **options): count = int(args[0]) if args else 5 terms = options["terms"] if options["terms"] else [""] terms = [ " ".join( filter( bool, [ "agency:%s" % options["agency"] if options["agency"] else False, "type:public_submission", '"%s"' % term if " " in term else term, ], ) ) for term in terms ] docs = set() dockets = set() for survey in Survey.objects.all(): docs.add(survey.id) dockets.add(survey.id.rsplit("-", 1)[0]) searches = [Search(term, docs, dockets) for term in terms] results = [] # main search loop while True: search = random.choice(searches) next = search.next() if not next: searches.remove(search) else: try: document = json.load( urllib2.urlopen( "http://docketwrench.sunlightfoundation.com/api/1.0/document/%s?format=json" % next["_id"] ) ) except: continue # if it doesn't have any views, or as any attachments that don't have any views, skip it if len(document["views"]) == 0 or any( [len(attachment["views"]) == 0 for attachment in document["attachments"]] ): print "Skipping %s because it's broken." % next["_id"] else: results.append(next) print 'On search "%s", adding document %s' % (search.query, next["_id"]) if len(results) >= count or len(searches) == 0: break if options["practice"]: print "Practice mode; doing nothing." else: for result in results: s = Survey() s.id = result["_id"] s.save()