Пример #1
0
def Create_Input_Files():
    # Create input files (along with word map)
    create_input_files(
        dataset='coco',
        karpathy_json_path=
        '/Users/tangruixiang/Desktop/Fairness_Dataset/code/COCO/Karpathy_split/coco/dataset.json',
        image_folder='/Volumes/Yuening\ Passport/rxtang/coco/images',
        captions_per_image=5,
        min_word_freq=5,
        output_folder='processed_data',
        max_len=50)
Пример #2
0
def run_model(input_summary_file,
              output_summary_file,
              out_crop_path,
              result_output,
              gl_epochs,
              crop_type=None,
              file_name=None,
              run_path_absolute=r'C:\DSSAT47',
              glue_flag=1,
              simulation_model='B'):
    utils.create_input_files(input_summary_file, output_summary_file)
    utils.create_xfile(os.path.join(output_summary_file, 'xfile.json'),
                       out_crop_path, crop_type, file_name)
    for fn in os.listdir(out_crop_path):
        if os.path.splitext(fn)[-1] in list(SUFFIXES.values()):
            dssat = DSSAT(os.path.join(out_crop_path, fn), run_path_absolute)
            dssat(result_output, gl_epochs, glue_flag, simulation_model)
Пример #3
0
from utils import create_input_files

if __name__ == '__main__':
    # Create input files (along with word map)
    create_input_files(
        dataset='flickr8k',
        karpathy_json_path='/scratch/nikolai/dataset_flickr8k.json',
        image_folder='/scratch/nikolai/Flickr8K',
        captions_per_image=5,
        min_word_freq=5,
        output_folder='/scratch/nikolai/out_data',
        max_len=50)
from utils import create_input_files

if __name__ == '__main__':

    # Create input files aling with word map
    create_input_files(dataset = 'coco', karpathy_json_path = './caption_datasets/dataset_coco.json', \
                       image_folder = './images', captions_per_image = 5, min_word_freq = 5, \
                       output_folder = './data', max_len = 50)
Пример #5
0
from utils import create_input_files

if __name__ == '__main__':
    # Create input files (along with word map)
    create_input_files(dataset='coco',
                       karpathy_json_path='../splits/dataset_coco.json',
                       image_folder='/datasets/COCO-2015',
                       captions_per_image=5,
                       min_word_freq=5,
                       output_folder='/datasets/home/50/650/agokhale/285project/a-PyTorch-Tutorial-to-Image-Captioning/data_generated',
                       max_len=50)
from utils import create_input_files

if __name__ == '__main__':
    # Create input files (along with word map)
    create_input_files(dataset='coco',
                       karpathy_json_path='caption_data/dataset_coco.json',
                       image_folder='/caption data/',
                       captions_per_image=5,
                       min_word_freq=5,
                       output_folder='caption_data/',
                       max_len=50)
Пример #7
0
from utils import create_input_files

if __name__ == '__main__':
    # Create input files (along with word map)
    create_input_files(dataset='flickr8kzh',
                       karpathy_json_path='../data/flickr8kzh.json',
                       image_folder='../data/flickr8k_images/',
                       captions_per_image=5,
                       min_word_freq=5,
                       output_folder='../prepared_data/',
                       max_len=50,
                       char_based=False)
Пример #8
0
from utils import create_input_files, train_word2vec_model

if __name__ == '__main__':
    create_input_files(
        csv_folder='./data',
        output_folder='./outdata',
        # sentence_limit=15,
        # word_limit=20,
        # min_word_count=5)
        sentence_limit=40,
        word_limit=200,
        min_word_count=3,
        label_columns=[
            1, 2, 3, 4, 5
        ]  #       # 'news', 'is_relevant', 'Armed Assault', 'Bombing/Explosion', 'Kidnapping', 'Other'
    )

    train_word2vec_model(data_folder='./outdata', algorithm='skipgram')
Пример #9
0
from utils import txt_2_json, create_input_files
from config import *
import os.path

if __name__ == '__main__':
    # convert custom txt caption to karpathy json format, only once
    if not os.path.isfile(caption_json_path):
        txt_2_json()

    # Create input files (along with word map)
    create_input_files()
Пример #10
0
from utils import create_input_files

if __name__ == '__main__':
    # Create input files (along with word map)
    create_input_files(dataset='coco',
                       karpathy_json_path='./crawlers/output/JSON.json',
                       image_folder='',
                       captions_per_image=5,
                       min_word_freq=3,
                       output_folder='./output',
                       max_len=50)
        help="caption datasets type,i.e.,'coco', 'flickr8k', 'flickr30k' ")
    parser.add_argument("--karpathy_json_path",
                        default='data/dataset_coco.json',
                        type=str,
                        required=True,
                        help="annotation json file' ")
    parser.add_argument(
        "--image_folder",
        default='data/',
        type=str,
        required=True,
        help="the directory containing the train2014 and val2014 image folders"
    )
    parser.add_argument("--output_folder",
                        default='data/',
                        type=str,
                        required=True,
                        help="path for saving processed data")
    parser.add_argument("--captions_per_image", default=5, type=int)
    parser.add_argument("--min_word_freq", default=5, type=int)
    parser.add_argument("--max_len", default=50, type=int)
    args = parser.parse_args()

    create_input_files(dataset=args.dataset,
                       karpathy_json_path=args.karpathy_json_path,
                       image_folder=args.image_folder,
                       captions_per_image=args.captions_per_image,
                       min_word_freq=args.min_word_freq,
                       output_folder=args.output_folder,
                       max_len=args.max_len)


from utils import create_input_files

if __name__ == '__main__':
    # Create input files (along with word map)
    create_input_files(dataset='coco',
                       karpathy_json_path='scratch/project/dataset_coco.json',
                       image_folder='scratch/project/',
                       captions_per_image=5,
                       min_word_freq=5,
                       output_folder='scratch/project/preprocessed_data/',
                       max_len=50)

Пример #13
0
from utils import create_input_files, train_word2vec_model

if __name__ == '__main__':
    create_input_files(csv_folder='./yahoo_answers_csv',
                       output_folder='./data',
                       sentence_limit=15,
                       word_limit=20,
                       min_word_count=5)

    train_word2vec_model(data_folder='./data',
                         algorithm='skipgram')
from utils import create_input_files
import _config

if __name__ == '__main__':
    # Create input files (along with word map)
    create_input_files(dataset=_config.dataset,
                       karpathy_json_path=_config.karpathy_json_path,
                       image_folder=_config.image_foldr_path,
                       captions_per_image=_config.captions_per_image,
                       min_word_freq=_config.min_word_freq,
                       output_folder=_config.output_folder,
                       max_len=_config.max_sentence_length)
Пример #15
0
from utils import create_input_files

if __name__ == '__main__':
    # Create input files (along with word map)
    address = ''
    create_input_files(dataset='coco',
                       karpathy_json_path=address +
                       'karpathy/dataset_coco.json',
                       image_folder=address,
                       captions_per_image=5,
                       min_word_freq=5,
                       output_folder=address + 'out',
                       max_len=50)
Пример #16
0
from utils import create_input_files
from imageio import imread

if __name__ == '__main__':
    # img = imread('./datasets/Flicker8k_Dataset/2513260012_03d33305cf.jpg')

    create_input_files('flickr8k',
                       './datasets/dataset_flickr8k.json',
                       './datasets/Flicker8k_Dataset',
                       captions_per_image=5,
                       min_word_freq=5,
                       output_folder='./datasets/caption_data',
                       max_len=50)
Пример #17
0
from utils import create_input_files

if __name__ == '__main__':
    # Create input files (along with word map)
    trainCaptionPath = "../datasets/coco2014/trainval_coco2014_captions/captions_train2014.json"
    valCaptionPath = "../datasets/coco2014/trainval_coco2014_captions/captions_val2014.json"
    trainImagePath = "../datasets/coco2014/"
    valImagePath = "../datasets/coco2014/2014/"
    captionPath = "../scratch/dataset_coco.json"

    create_input_files(dataset='coco',
                       karpathy_json_path=captionPath,
                       image_folder=trainImagePath,
                       captions_per_image=5,
                       min_word_freq=5,
                       output_folder='../scratch/caption data/',
                       max_len=50)
Пример #18
0
from utils import create_input_files

if __name__ == '__main__':
    # Create input files (along with word map)
    create_input_files(
        dataset='coco',
        karpathy_json_path=
        '/home/work/jiangshuai/image_caption/data/dataset_coco.json',
        image_folder='/home/work/jiangshuai/image_caption/data/',
        captions_per_image=5,
        min_word_freq=5,
        output_folder='./data/',
        max_len=50)
Пример #19
0
from utils import create_input_files

max_caption_length = 50

if __name__ == '__main__':
    # Create input files (along with word map)

    create_input_files(
        dataset='flickr8k',
        karpathy_json_path="data/Flicker8k_Dataset/karpathy_flickr8k.json",
        image_folder='data/Flicker8k_Dataset/Flicker8k_images/',
        captions_per_image=5,
        min_word_freq=5,
        output_folder='data/',
        max_len=max_caption_length,
        bert_model_name='bert-base-cased')
from utils import create_input_files
import os

envget = os.environ.get

if __name__ == '__main__':
    # Create input files (along with word map)
    create_input_files(
        dataset='coco',
        karpathy_json_path=os.path.join(envget('HOME'),
                                        'data/captiondata/dataset_coco.json'),
        image_folder=os.path.join(envget('HOME'), 'data/captiondata/'),
        captions_per_image=5,
        min_word_freq=5,
        output_folder=os.path.join(envget('HOME'), 'data/captiondata/'),
        max_len=50)
Пример #21
0
    #train = rd.read_input_file(c.TRAIN_PATH, c.EMOTION_HEADER, c.ATTRIBUTES, c.CLEAN)
    #test = rd.read_input_file(c.TEST_PATH, c.EMOTION_HEADER, c.ATTRIBUTES, c.CLEAN)
    train = pd.read_csv("./data/nlp_train.csv")
    test = pd.read_csv("./data/nlp_test.csv")
    train = train.dropna()
    test = test.dropna()
    train_temp = train.loc[:, ['anger', 'body']]
    test_temp = test.loc[:, ['anger', 'body']]

    train_temp.to_csv('./data/train.csv', index=False, header=False)
    test_temp.to_csv('./data/test.csv', index=False, header=False)

    create_input_files(csv_folder='./data',
                       output_folder='./outdata',
                       # sentence_limit=15,
                       # word_limit=20,
                       # min_word_count=5)
                       sentence_limit=30,
                       word_limit=100,
                       min_word_count=10)

    train_word2vec_model(data_folder='./outdata', algorithm='skipgram')
    file1 = open("label.txt", "a")
    file2 = open("result.txt", "a")
    file2.close()

    for i in c.LABELS:
        print(i)
        file1.write(i)
        file1.write('\n')
        os.system('python3 train.py')
        os.system('python3 eval.py')
from utils import create_input_files

if __name__ == '__main__':
    create_input_files(dataset = 'flickr8k', 
                        karpathy_json_path = '../../Datasets/train_valid_test_splits/dataset_flickr8k.json', 
                        image_folder = '../../Datasets/Flickr8K/Flicker8k_Dataset/', 
                        captions_per_image = 5, 
                        min_word_freq = 5, 
                        ouput_folder = './data') # This will be in /netscratch/deshmukh
Пример #23
0
from utils import create_input_files
import os

if __name__ == '__main__':
    # Create input files (along with word map)
    create_input_files(
        dataset='flickr8k',
        karpathy_json_path=os.path.join(
            "dataset_flickr8k.json"
        ),  #'caption_datasets/dataset_flicker8k.json',
        image_folder=os.path.join(
            "Flicker8k_Dataset"),  #'Flicker8k_Dataset/Flicker8k_Dataset/',
        captions_per_image=5,
        min_word_freq=5,
        output_folder=os.path.join("Flicker8k_Dataset"),
        max_len=50)
Пример #24
0
from utils import create_input_files

if __name__ == '__main__':
    # Create input files (along with word map)
    json_path = '/home/jithin/datasets/imageCaptioning/captions/dataset_flickr8k.json'
    img_folder = '/home/jithin/datasets/imageCaptioning/flicker8k/Flicker8k_Dataset'
    output_folder = './pre_processed'
    create_input_files(dataset='flickr8k',
                       karpathy_json_path=json_path,
                       image_folder=img_folder,
                       captions_per_image=5,
                       min_word_freq=5,
                       output_folder=output_folder,
                       max_len=50)
Пример #25
0
from utils import create_input_files

from macro import IMAGE_FOLDER, OUTPUT_FOLDER

if __name__ == '__main__':
    # Create input files (along with word map)
    create_input_files(
        dataset='flickr30k',  # 'coco',
        karpathy_json_path=
        'karpathy_split_json_path/dataset_flickr30k.json',  # where data split is divided
        image_folder=IMAGE_FOLDER,  # '/media/ssd/caption data/',
        captions_per_image=5,
        min_word_freq=5,
        output_folder=OUTPUT_FOLDER,  # ''/media/ssd/caption data/',
        max_len=50)
Пример #26
0
from utils import create_input_files

if __name__ == '__main__':
    # Create input files (along with word map)
    create_input_files(
        dataset='flickr30k',
        karpathy_json_path='../data/dataset_flickr30k.json',
        image_folder='../data/flickr30k_images/flickr30k_images/',
        captions_per_image=5,
        min_word_freq=5,
        output_folder='../data',
        max_len=50)
Пример #27
0
from utils import create_input_files

if __name__ == '__main__':
    # Create input files (along with word map)
    
    create_input_files(dataset='atlas',
                       karpathy_json_path='../../dataset/atlas_dataset.json',
                       image_folder='../../dataset/',
                       captions_per_image=1,
                       min_word_freq=1,
                       output_folder='output',
                       max_len=50)
from utils import create_input_files

if __name__ == '__main__':
    # Create input files (along with word map)
    create_input_files(dataset='flickr8k',
                       karpathy_json_path='dataset_flickr8k.json',
                       image_folder='Flickr8k_Dataset/gaussian_0.13',
                       captions_per_image=5,
                       min_word_freq=5,
                       output_folder='dataset_gaussian_0.13',
                       max_len=50)
from utils import create_input_files
import argparse
import os

if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Create input files')

    parser.add_argument('--dataset', '-d', default='coco', help='dataset')
    parser.add_argument('--img_folder',
                        '-i',
                        default='image_folder',
                        help='path to image')
    parser.add_argument('--caption_folder',
                        '-cf',
                        default='caption_datasets',
                        help='path to captions')

    args = parser.parse_args()

    # Create input files (along with word map)
    create_input_files(dataset=args.dataset,
                       karpathy_json_path=os.path.join(
                           args.caption_folder,
                           'dataset_{:s}.json'.format(args.dataset)),
                       image_folder=args.img_folder,
                       captions_per_image=5,
                       min_word_freq=5,
                       output_folder='{:s}_folder'.format(args.dataset),
                       max_len=50)
Пример #30
0
from utils import create_input_files

if __name__ == '__main__':
    # Create input files (along with word map)
    create_input_files(
        dataset='face2text',
        karpathy_json_path='./caption data/dataset_face2text.json',
        image_folder='./caption data/dataset-face2text/',
        captions_per_image=2,
        min_word_freq=2,
        output_folder='./caption data/',
        max_len=50)