def make_worldwide_question_table(header): global worldwide worldwide_question_table = collections.OrderedDict() dictionaries.append(worldwide_question_table) question_table_start = 0 if worldwide_q: header["worldwide_question_offset"] = offset_count() if file_type == "v": question_table_start = len( country_language[country_code]) * national if file_type == "v": question_table_count = len(country_language[country_code]) elif file_type == "q": question_table_count = 9 for q in question_keys: if is_worldwide(q): worldwide_question_table["poll_id_%s" % num()] = u32(q) worldwide_question_table["poll_category_1_%s" % num()] = u8( get_category(q)) worldwide_question_table["poll_category_2_%s" % num()] = u8( categories[get_category(q)]) worldwide_question_table["opening_timestamp_%s" % num()] = u32( get_timestamp(1, "w", get_date(q))) worldwide_question_table["closing_timestamp_%s" % num()] = u32( get_timestamp(2, "w", get_date(q))) worldwide_question_table["question_table_count_%s" % num()] = u8(question_table_count) worldwide_question_table["question_table_start_%s" % num()] = u32(question_table_start) question_table_count += 1 return worldwide_question_table
def make_national_question_table(header): global national national_question_table = collections.OrderedDict() dictionaries.append(national_question_table) question_table_count = 0 if national_q: header["national_question_offset"] = offset_count() for q in question_keys: if not is_worldwide(q): national_question_table["poll_id_%s" % num()] = u32(q) national_question_table["poll_category_1_%s" % num()] = u8( get_category(q)) national_question_table["poll_category_2_%s" % num()] = u8( categories[get_category(q)]) national_question_table["opening_timestamp_%s" % num()] = u32( get_timestamp(1, "n", get_date(q))) national_question_table["closing_timestamp_%s" % num()] = u32( get_timestamp(2, "n", get_date(q))) national_question_table["question_table_count_%s" % num()] = u8( len(country_language[country_code])) national_question_table["question_table_start_%s" % num()] = u32(question_table_count) question_table_count += len(country_language[country_code]) return national_question_table
def make_national_result_table(header): table = collections.OrderedDict() dictionaries.append(table) national_result_detailed_number_count = 0 national_result_detailed_number_tables = region_number[country_code] header["national_result_offset"] = offset_count() for i in results: if results[i][8] == "n": country_index = country_codes.index(country_code) table["poll_id_%s" % num()] = u32(i) table["male_voters_response_1_num_%s" % num()] = u32( results[i][0][country_index]) table["male_voters_response_2_num_%s" % num()] = u32( results[i][2][country_index]) table["female_voters_response_1_num_%s" % num()] = u32( results[i][1][country_index]) table["female_voters_response_2_num_%s" % num()] = u32( results[i][3][country_index]) table["predictors_response_1_num_%s" % num()] = u32( results[i][4][country_index]) table["predictors_response_2_num_%s" % num()] = u32( results[i][5][country_index]) table["show_voter_number_flag_%s" % num()] = u8(1) table["detailed_results_flag_%s" % num()] = u8(1) table["national_result_detailed_number_number_%s" % num()] = u8(national_result_detailed_number_tables) table["starting_national_result_detailed_number_table_number_%s" % num()] = u32(national_result_detailed_number_count) national_result_detailed_number_count += national_result_detailed_number_tables return table
def make_source_pictures(source_table, data): source_pictures = {} dictionaries.append(source_pictures) source_articles = [] sources = [ "ANP", "AP", "dpa", "Reuters", "SID", "NU.nl", ] # these are the news sources which will use a custom JPG for the logo for article in list(data.values()): if article[8] not in source_articles: if article[8] in sources: source_articles.append(article[8]) source_table["pictures_offset_%s" % article[8]] = offset_count() with open( "./Channels/News_Channel/logos/%s.jpg" % article[8], "rb" ) as source_file: image = source_pictures["logo_%s" % article[8]] = source_file.read() source_table["pictures_size_%s" % article[8]] = u32(len(image)) if source_table["source_picture_%s" % article[8]] != u8(0): source_table["source_picture_%s" % article[8]] = u8(0) return source_pictures
def make_source_table(header, articles_table, source, data): source_table = {} dictionaries.append(source_table) header["source_offset"] = offset_count() # Offset for the source table. source_articles = [] numbers = 0 numbers_article = 0 for article in list(data.values()): if article[8] not in source_articles: source_articles.append(article[8]) source_table["source_picture_%s" % article[8]] = u8( source["picture"] ) # Picture for the source. source_table["source_position_%s" % article[8]] = u8( source["position"] ) # Position for the source. source_table["padding_%s" % article[8]] = u16(0) # Padding. source_table["pictures_size_%s" % article[8]] = u32( 0 ) # Size of the source picture. source_table["pictures_offset_%s" % article[8]] = u32( 0 ) # Offset for the source picture. source_table["name_size_%s" % article[8]] = u32( 0 ) # Size of the source name. source_table["name_offset_%s" % article[8]] = u32( 0 ) # Offset for the source name. source_table["copyright_size_%s" % article[8]] = u32( 0 ) # Size of the copyright. source_table["copyright_offset_%s" % article[8]] = u32( 0 ) # Offset for the copyright. numbers += 1 for article in list(data.values()): numbers_article += 1 articles_table["source_%s_number" % numbers_article] = u32( source_articles.index(article[8]) ) # Number for the source. header["source_number"] = u32(numbers) # Number of entries for the source table. return source_table
def make_header(): header = collections.OrderedDict() dictionaries.append(header) header["timestamp"] = u32(get_timestamp(0, None, None)) header["country_code"] = u8(country_code) header["publicity_flag"] = u8(0) header["question_version"] = u8(0 if file_type == "r" else 1) header["result_version"] = u8(1 if file_type == "r" else 0) header["national_question_number"] = u8(national) header["national_question_offset"] = u32(0) header["worldwide_question_number"] = u8(worldwide) header["worldwide_question_offset"] = u32(0) header["question_number"] = u8(questions * len(country_language[country_code])) header["question_offset"] = u32(0) header["national_result_entry"] = u8(national_results) header["national_result_offset"] = u32(0) header["national_result_detailed_number"] = u16(national_results * region_number[country_code]) header["national_result_detailed_offset"] = u32(0) header["position_number"] = u16(0 if file_type == "q" or national_results == 0 else 22 if country_code == 77 else len(position_table[country_code]) if country_code in position_table.keys() else 0) header["position_offset"] = u32(0) header["worldwide_result_number"] = u8(worldwide_results) header["worldwide_result_offset"] = u32(0) header["worldwide_result_detailed_number"] = u16(0) header["worldwide_result_detailed_offset"] = u32(0) header["country_name_number"] = u16(len(countries) * 7 if file_type == "r" and nw == "w" else 0 if file_type == "q" or file_type == "r" else len(countries) * 7) header["country_name_offset"] = u32(0) return header
def make_worldwide_result_table(header): table = collections.OrderedDict() dictionaries.append(table) worldwide_detailed_table_count_all = 0 header["worldwide_result_offset"] = offset_count() for i in results: if results[i][8] == "w": worldwide_detailed_table_count = 0 for j in range(len(countries)): # 33 total = 0 for voters in range(0, 4): total += results[i][voters][j] if total > 0: worldwide_detailed_table_count += 1 table["poll_id_%s" % num()] = u32(i) table["male_voters_response_1_num_%s" % num()] = u32(sum(results[i][0])) table["male_voters_response_2_num_%s" % num()] = u32(sum(results[i][2])) table["female_voters_response_1_num_%s" % num()] = u32(sum(results[i][1])) table["female_voters_response_2_num_%s" % num()] = u32(sum(results[i][3])) table["predictors_response_1_num_%s" % num()] = u32(sum(results[i][4])) table["predictors_response_2_num_%s" % num()] = u32(sum(results[i][5])) table["total_worldwide_detailed_tables_%s" % num()] = u8(worldwide_detailed_table_count) table["starting_worldwide_detailed_table_number_%s" % num()] = u32(worldwide_detailed_table_count_all) worldwide_detailed_table_count_all += worldwide_detailed_table_count return table
def make_question_text_table(header): global questions question_text_table = collections.OrderedDict() dictionaries.append(question_text_table) header["question_offset"] = offset_count() for q in question_keys: if not is_worldwide(q): list = country_language[country_code] elif is_worldwide(q): if file_type == "v": list = country_language[country_code] elif file_type == "q": list = range(1, 9) for language_code in list: if get_question(q, language_code) is not None: num = question_keys.index(q) question_text_table["language_code_%s_%s" % (num, language_code)] = u8(language_code) question_text_table["question_offset_%s_%s" % (num, language_code)] = u32(0) question_text_table["response_1_offset_%s_%s" % (num, language_code)] = u32(0) question_text_table["response_2_offset_%s_%s" % (num, language_code)] = u32(0) return question_text_table
def make_pictures(pictures_table, data): pictures = {} dictionaries.append(pictures) numbers = 0 for article in list(data.values()): numbers += 1 if article[4] is not None: if "pictures_%s_offset" % numbers in pictures_table: pictures_table[ "pictures_%s_offset" % numbers ] = offset_count() # Offset for the pictures. pictures["pictures_%s_read" % numbers] = article[4] # Read the pictures. pictures["nullbyte_%s_pictures" % numbers] = u8( 0 ) # Null byte for the pictures. for types in ["captions", "credits"]: if pictures_table["%s_%s_offset" % (types, numbers)] != u32( 0 ) and pictures_table["%s_%s_size" % (types, numbers)] == u32(0): pictures_table["%s_%s_offset" % (types, numbers)] = u32(0) return pictures
def make_header(data): header = collections.OrderedDict() dictionaries.append(header) header["updated_timestamp_1"] = get_timestamp(1) # Updated time. header["term_timestamp"] = get_timestamp(2) # Timestamp for the term. header["country_code"] = u32_littleendian(country_code) # Wii Country Code. header["updated_timestamp_2"] = get_timestamp(1) # 3rd timestamp. # List of languages that appear on the language select screen numbers = 0 for language in languages: numbers += 1 header["language_select_%s" % numbers] = u8(language) # Fills the rest of the languages as null while numbers < 16: numbers += 1 header["language_select_%s" % numbers] = u8(255) header["language_code"] = u8(language_code) # Wii language code. header["goo_flag"] = u8(0) # Flag to make the Globe display "Powered by Goo". header["language_select_screen_flag"] = u8(0) # Flag to bring up the language select screen. header["download_interval"] = u8(30) # Interval in minutes to check for new articles to display on the Wii Menu. header["message_offset"] = u32(0) # Offset for a message. header["topics_number"] = u32(0) # Number of entries for the topics table. header["topics_offset"] = u32(0) # Offset for the topics table. header["articles_number"] = u32(0) # Number of entries for the articles table. header["articles_offset"] = u32(0) # Offset for the articles table. header["source_number"] = u32(0) # Number of entries for the source table. header["source_offset"] = u32(0) # Offset for the source table. header["locations_number"] = u32(0) # Number of entries for the locations. header["locations_offset"] = u32(0) # Offset for the locations table. header["pictures_number"] = u32(0) # Number of entries for the pictures table. header["pictures_offset"] = u32(0) # Offset for the pictures table. header["count"] = u16(480) # Count value. header["unknown"] = u16(0) # Unknown. header["wiimenu_articles_number"] = u32(0) # Number of Wii Menu article entries. header["wiimenu_articles_offset"] = u32(0) # Offset for the Wii Menu article table. header["wiimenu_articles_offset"] = offset_count() # Offset for the Wii Menu article table. numbers = 0 headlines = [] for article in list(data.values()): if numbers < 11: if article[3].replace(b'\n', b'') not in headlines: numbers += 1 headlines.append(article[3]) header["headline_%s_size" % numbers] = u32(0) # Size of the headline. header["headline_%s_offset" % numbers] = u32(0) # Offset for the headline. return header
def make_country_name_table(header): global countries country_name_table = collections.OrderedDict() dictionaries.append(country_name_table) header["country_name_offset"] = offset_count() for k in countries.keys(): num = countries.keys().index(k) for i in range(len(languages)): country_name_table["language_code_%s_%s" % (num, i)] = u8(i) country_name_table["text_offset_%s_%s" % (num, i)] = u32(0) return country_name_table
def make_national_result_detailed_table(header): table = collections.OrderedDict() dictionaries.append(table) header["national_result_detailed_offset"] = offset_count() for i in results: if results[i][8] == "n": for j in range(region_number[country_code]): country_index = country_codes.index(country_code) table["voters_response_1_num_%s" % num()] = u32(results[i][6][country_index][j]) table["voters_response_2_num_%s" % num()] = u32(results[i][7][country_index][j]) table["position_entry_table_count_%s" % num()] = u8(0 if (results[i][6][country_index][j] == 0 and results[i][7][country_index][j] == 0) or (country_code not in position_table.keys()) else position_table[country_code][j]) table["starting_position_entry_table_%s" % num()] = u32(sum(position_table[country_code][:j]) if country_code in position_table.keys() else 0) return table
def locations_download( language_code, data ): # using Google Maps API is so much better than the crap Nintendo used for say, AP news. locations = {} gmaps = googlemaps.Client(key=config["google_maps_api_key"]) languages = { # corresponds to the Wii's language codes 0: "ja", 1: "en", 2: "de", 3: "fr", 4: "es", 5: "it", 6: "nl", } for keys, values in list(data.items()): location = values[7] if location and location != "": if location not in locations: locations[location] = [None, None, []] locations[location][2].append(keys) for name in list(locations.keys()): if name == "": continue uni_name = ( name if languages[language_code] == "ja" else unidecode(name) ) # if using unidecode with Japanese, it'll translate all the characters to English print(uni_name) coordinates = None if name not in cities: try: read = gmaps.geocode(uni_name, language=languages[language_code]) loc_name = read[0]["address_components"][0]["long_name"] if languages[language_code] == "ja": loc_name = enc(loc_name) else: loc_name = enc(unidecode(loc_name)) """Not doing anything with these.""" country = u8(0) region = u8(0) location = u16(0) zoom_factor = u32_littleendian( 6 ) # Nintendo used the value of 3 for states and countries but we probably don't have any articles that are just states or countries coordinates = ( s16( int(read[0]["geometry"]["location"]["lat"] / (360 / 65536))) + s16( int(read[0]["geometry"]["location"]["lng"] / (360 / 65536))) + country + region + location + zoom_factor ) # latitude and longitude is divided by the value of 360 (degrees of a full circle) divided by the max int for a 16-bit int except Exception as e: ex = "There was a error downloading the location data - line {}: {}".format( sys.exc_info()[-1].tb_lineno, str(e)) print(ex) log(ex, "INFO") else: coordinates = binascii.unhexlify(cities[name][0] + "0000000006000000") loc_name = enc(cities[name][1]) if locations[name][0] is None and coordinates is not None: locations[name][0] = coordinates else: del locations[name] continue if locations[name][1] is None: locations[name][1] = loc_name return locations
return extracted_idioms if __name__ == '__main__': print 'Hello! Time is {0}'.format(config.TIME) # Create working directory if it doesn't exist if not os.path.isdir(config.WORK_DIR): os.mkdir(config.WORK_DIR) # Read in corpus as list of documents if config.CORPUS_TYPE == 'plain': documents = process_corpus.plain_text(config.CORPUS, config.NO_SPLIT) print 'First sentence of corpus: {0}\nLast sentence of corpus: {1}'.format( u8(documents[0][0]), u8(documents[-1][-1])) elif config.CORPUS_TYPE[0:3] == 'bnc': cache_path = os.path.join( config.WORK_DIR, '{0}_parsed_xml.json'.format(config.CORPUS_TYPE)) documents = process_corpus.bnc(config.CORPUS, config.CORPUS_TYPE, cache_path) print 'First sentence of corpus: {0}\nLast sentence of corpus: {1}'.format( u8(documents[0][0]['sentence']), u8(documents[-1][-1]['sentence'])) # Get idioms from dictionary idioms = get_idiom_list(case_sensitive=config.CASE_SENSITIVE) print "Found {4} idioms ranging from '{0}', '{1}' to '{2}', '{3}'".format( u8(idioms[0]), u8(idioms[1]), u8(idioms[-2]), u8(idioms[-1]), len(idioms)) # Extract idioms
extracted_idioms_1 = [] with open(args.extracted_1, 'r') as csvfile: csvreader = csv.reader(csvfile, delimiter = '\t', quoting=csv.QUOTE_MINIMAL, quotechar = '"') for csvrow in csvreader: extracted_idioms_1.append({'document_id': csvrow[4], 'sentence_number': csvrow[5], 'idiom': csvrow[0], 'context': unicode(csvrow[3], 'utf-8'), 'start': csvrow[1], 'end': csvrow[2], 'bnc_start': csvrow[6], 'bnc_end': csvrow[7]}) extracted_idioms_2 = [] with open(args.extracted_2, 'r') as csvfile: csvreader = csv.reader(csvfile, delimiter = '\t', quoting=csv.QUOTE_MINIMAL, quotechar = '"') for csvrow in csvreader: extracted_idioms_2.append({'document_id': csvrow[4], 'sentence_number': csvrow[5], 'idiom': csvrow[0], 'context': unicode(csvrow[3], 'utf-8'), 'start': csvrow[1], 'end': csvrow[2], 'bnc_start': csvrow[6], 'bnc_end': csvrow[7]}) # Combine two sets of extractions combined_idioms = copy.deepcopy(extracted_idioms_1) for extracted_idiom_2 in extracted_idioms_2: matched = False for extracted_idiom_1 in extracted_idioms_1: if extracted_idiom_2['idiom'].lower() == extracted_idiom_1['idiom'].lower() and extracted_idiom_2['document_id'] == extracted_idiom_1['document_id'] and extracted_idiom_2['sentence_number'] == extracted_idiom_1['sentence_number']: matched = True break if not matched: combined_idioms.append(extracted_idiom_2) # Output to file with open(args.combined, 'w') as of: writer = csv.writer(of, delimiter = '\t', quoting=csv.QUOTE_MINIMAL, quotechar = '"') for extracted_idiom in combined_idioms: output_row = [u8(extracted_idiom['idiom']), extracted_idiom['start'], extracted_idiom['end'], u8(extracted_idiom['context']), u8(extracted_idiom['document_id']), u8(extracted_idiom['sentence_number']), extracted_idiom['bnc_start'], extracted_idiom['bnc_end']] writer.writerow(output_row)
def locations_download(language_code, data): locations = collections.OrderedDict() locations_return = collections.OrderedDict() gmaps = googlemaps.Client(key=config["google_maps_api_key"]) """This dictionary is used to determine languages.""" languages = { 0: "ja", 1: "en", 2: "de", 3: "fr", 4: "es", 5: "it", 6: "nl", } for keys, values in data.items(): location = values[7] if location is not None: if location not in locations: locations[location] = [] locations[location].append(keys) for name in locations.keys(): read = None if name == "": continue uni_name = name if languages[language_code] == "ja" else unidecode(name) print uni_name if name not in cities: try: read = gmaps.geocode(uni_name, language=languages[language_code]) except: log("There was a error downloading the location data.", "INFO") if read is None and name in cities: coordinates = binascii.unhexlify(cities[name][0] + "0000000006000000") new_name = enc(cities[name][1]) for filenames in locations[name]: if new_name not in locations_return: locations_return[new_name] = [coordinates, []] locations_return[new_name][1].append(filenames) elif read is not None: try: new_name = read[0]["address_components"][0]["long_name"].encode("utf-16be") """Not doing anything with these at this time.""" country = u8(0) region = u8(0) location = u16(0) zoom_factor = u32_littleendian(6) coordinates = u16(int(read[0]["geometry"]["location"]["lat"] / 0.0054931640625) & 0xFFFF) + u16(int( read[0]["geometry"]["location"][ "lng"] / 0.0054931640625) & 0xFFFF) + country + region + location + zoom_factor for filenames in locations[name]: if new_name not in locations_return: locations_return[new_name] = [coordinates, []] locations_return[new_name][1].append(filenames) except: log("There was a error downloading the location data.", "INFO") return locations_return
extracted_idioms.append(extracted_idiom) previously_matched_indices = matched_indices return extracted_idioms if __name__ == '__main__': print('Hello! Time is {0}'.format(config.TIME)) # Create working directory if it doesn't exist if not os.path.isdir(config.WORK_DIR): os.mkdir(config.WORK_DIR) # Read in corpus as list of documents if config.CORPUS_TYPE == 'plain': documents = process_corpus.plain_text(config.CORPUS, config.NO_SPLIT) print('First sentence of corpus: {0}\nLast sentence of corpus: {1}'.format(u8(documents[0][0]), u8(documents[-1][-1]))) elif config.CORPUS_TYPE[0:3] == 'bnc': cache_path = os.path.join(config.WORK_DIR, '{0}_parsed_xml.json'.format(config.CORPUS_TYPE)) documents = process_corpus.bnc(config.CORPUS, config.CORPUS_TYPE, cache_path) print('First sentence of corpus: {0}\nLast sentence of corpus: {1}'.format(u8(documents[0][0]['sentence']), u8(documents[-1][-1]['sentence']))) # Get idioms from dictionary idioms = get_idiom_list(case_sensitive = config.CASE_SENSITIVE) print("Found {4} idioms ranging from '{0}', '{1}' to '{2}', '{3}'".format(u8(idioms[0]), u8(idioms[1]), u8(idioms[-2]), u8(idioms[-1]), len(idioms))) # Extract idioms extraction_start = time.time() if config.METHOD == 'exact': extracted_idioms = string_match(idioms, documents, fuzzy = False, inflect = False, case_sensitive = config.CASE_SENSITIVE) elif config.METHOD == 'fuzzy': extracted_idioms = string_match(idioms, documents, fuzzy = True, inflect = False, case_sensitive = config.CASE_SENSITIVE)
extracted_idioms.append(extracted_idiom) previously_matched_indices = matched_indices return extracted_idioms if __name__ == '__main__': # Create working directory if it doesn't exist if not os.path.isdir(config.WORK_DIR): os.mkdir(config.WORK_DIR) # Read in corpus as list of documents if config.CORPUS_TYPE == 'plain': documents = train_dataset.plain_text(config.CORPUS, config.NO_SPLIT) print('First sentence of corpus: {0}\nLast sentence of corpus: {1}'. format(u8(documents[0][0]), u8(documents[-1][-1]))) # Get idioms from dictionary idioms = get_idiom_list(case_sensitive=config.CASE_SENSITIVE) print("Found {4} idioms ranging from '{0}', '{1}' to '{2}', '{3}'".format( u8(idioms[0]), u8(idioms[1]), u8(idioms[-2]), u8(idioms[-1]), len(idioms))) # Extract idioms extraction_start = time.time() if config.METHOD == 'exact': extracted_idioms = string_match(idioms, documents, fuzzy=False, inflect=False, case_sensitive=config.CASE_SENSITIVE) elif config.METHOD == 'fuzzy':
def locations_download(language_code, data): locations = collections.OrderedDict() gmaps = googlemaps.Client(key=config["google_maps_api_key"]) """This dictionary is used to determine languages.""" languages = { 0: "ja", 1: "en", 2: "de", 3: "fr", 4: "es", 5: "it", 6: "nl", } for keys, values in list(data.items()): location = values[7] if location is not None: if location not in locations: locations[location] = [None, None, []] locations[location][2].append(keys) for name in list(locations.keys()): if name == "": continue uni_name = name if languages[language_code] == "ja" else unidecode( name ) # If using unidecode with Japanese, it'll translate all the characters to English print(uni_name) if name not in cities: try: read = gmaps.geocode(uni_name, language=languages[language_code]) loc_name = read[0]["address_components"][0]["long_name"] if languages[language_code] == "ja": loc_name = enc(loc_name) else: loc_name = enc(unidecode(loc_name)) """Not doing anything with these.""" country = u8(0) region = u8(0) location = u16(0) zoom_factor = u32_littleendian(6) coordinates = s16(int(read[0]["geometry"]["location"]["lat"] / (360 / 65536))) + \ s16(int(read[0]["geometry"]["location"]["lng"] / (360 / 65536))) + \ country + region + location + zoom_factor except: log("There was a error downloading the location data.", "INFO") else: coordinates = binascii.unhexlify(cities[name][0] + "0000000006000000") loc_name = enc(cities[name][1]) if locations[name][0] is None: locations[name][0] = coordinates if locations[name][1] is None: locations[name][1] = loc_name return locations
def make_source_table(header, articles_table, data): source_table = collections.OrderedDict() dictionaries.append(source_table) header["source_offset"] = offset_count() # Offset for the source table. source_articles = [] """These are the picture and position values.""" source_nums = { "AP": [0, 1], "Reuters": [0, 4], "AFP": [4, 4], "AFP_French": [4, 4], "ANP": [0, 5], "ANSA": [6, 6], "dpa": [0, 4], "SID": [0, 4], "NU.nl": [0, 5], "Reuters_Japanese": [0, 4], } numbers = 0 numbers_article = 0 for article in data.values(): if article[8] not in source_articles: source_articles.append(article[8]) source = source_nums[article[8]] source_table["source_picture_%s" % article[8]] = u8( source[0]) # Picture for the source. source_table["source_position_%s" % article[8]] = u8( source[1]) # Position for the source. source_table["padding_%s" % article[8]] = u16(0) # Padding. source_table["pictures_size_%s" % article[8]] = u32( 0) # Size of the source picture. source_table["pictures_offset_%s" % article[8]] = u32( 0) # Offset for the source picture. source_table["name_size_%s" % article[8]] = u32( 0) # Size of the source name. source_table["name_offset_%s" % article[8]] = u32( 0) # Offset for the source name. source_table["copyright_size_%s" % article[8]] = u32( 0) # Size of the copyright. source_table["copyright_offset_%s" % article[8]] = u32( 0) # Offset for the copyright. numbers += 1 for article in data.values(): numbers_article += 1 articles_table["source_%s_number" % numbers_article] = u32( source_articles.index(article[8])) # Number for the source. header["source_number"] = u32( numbers) # Number of entries for the source table. return source_table