| import os |
| import random |
| import numpy as np |
| import torch |
| import json |
|
|
| from tqdm import tqdm |
| from pathlib import Path |
| from textgames import GAME_NAMES, LEVEL_IDS, new_game, game_filename |
|
|
|
|
| def set_seed(seed): |
| random.seed(seed) |
| np.random.seed(seed) |
| torch.manual_seed(seed) |
| torch.cuda.manual_seed(seed) |
|
|
|
|
| |
| if __name__ == '__main__': |
| outdir = Path(os.getenv("TEXTGAMES_LOADGAME_DIR", "problemsets")) |
| |
| os.makedirs(outdir, exist_ok=False) |
| set_seed(42) |
|
|
| |
| level_ids = ["1", "2", "3"] |
| session_ids = [ |
| f"session_{sid:04}" for sid in range(os.getenv("TEXTGAMES_GENERATE_N", 1000)) |
| ] |
|
|
| count_duplicate = 0 |
| for game_name in GAME_NAMES: |
| prompts_map = dict() |
| for level_id in level_ids: |
| os.environ["TEXTGAMES_NEWGAME_ERRFILE"] = f"{outdir}/{game_filename(game_name)}_{level_id}.err" |
| for sid in tqdm(session_ids, desc=f"{game_name}_{level_id}"): |
| while True: |
| cur_game = new_game(game_name, level_id) |
| prompt = cur_game._get_prompt() |
| if prompt not in prompts_map: |
| break |
| count_duplicate += 1 |
| prompts_map[prompt] = sid |
| print(f"[{game_name}_{level_id}] Duplicate #: {count_duplicate:-4}") |
|
|
| json_object = json.dumps({sid: prompt for prompt, sid in prompts_map.items()}, indent=4) |
| with open(outdir / f"{game_filename(game_name)}_{level_id}.json", "w") as outfile: |
| outfile.write(json_object) |
|
|
| print(f"duplicates:{count_duplicate}") |
|
|