From c4af01fa75dc0feddacfaa04afc4ee9bc0eef08d Mon Sep 17 00:00:00 2001 From: Mehdi Mirza Date: Wed, 29 Jul 2020 16:08:08 +0000 Subject: [PATCH] Add board games to physics_planning_games. PiperOrigin-RevId: 323799306 --- memo/README.md | 22 +++ memo/load_memo_data.ipynb | 335 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 357 insertions(+) create mode 100644 memo/README.md create mode 100644 memo/load_memo_data.ipynb diff --git a/memo/README.md b/memo/README.md new file mode 100644 index 0000000..1294d15 --- /dev/null +++ b/memo/README.md @@ -0,0 +1,22 @@ +# MEMO: A Deep Network For Flexible Combination Of Episodic Memories. + +This package contains a [Colaboratory notebook](https://colab.research.google.com/github/deepmind/deepmind_research/blob/master/memo/load_memo_data.ipynb) +that loads a version a version of the dataset for the Paired associative +inference task (length 3 and 4) presented in the ICLR 2020 submission (also on +[arXiv](https://arxiv.org/abs/2001.10913)). + +If you use the dataset, please cite: + +``` +@inproceedings{ + banino2020memo:, + title={MEMO: A Deep Network for Flexible Combination of Episodic Memories}, + author={Andrea Banino and Adrià Puigdomènech Badia and + Raphael Köster and Martin J. Chadwick and Vinicius Zambaldi and + Demis Hassabis and Caswell Barry and Matthew Botvinick and + Dharshan Kumaran and Charles Blundell}, + booktitle={International Conference on Learning Representations}, + year={2020}, + url={https://openreview.net/forum?id=rJxlc0EtDr} +} +``` diff --git a/memo/load_memo_data.ipynb b/memo/load_memo_data.ipynb new file mode 100644 index 0000000..58b3f38 --- /dev/null +++ b/memo/load_memo_data.ipynb @@ -0,0 +1,335 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "mP9QIqyCf6G4" + }, + "source": [ + "Copyright 2020 DeepMind Technologies Limited.\n", + "\n", + "Licensed under the Apache License, Version 2.0 (the \"License\"); you may not use this file except in compliance with the License. You may obtain a copy of the License at\n", + "\n", + "https://www.apache.org/licenses/LICENSE-2.0\n", + "\n", + "Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "ng3jUYyQgSjB" + }, + "source": [ + "# The dataset used for the Paired associate inference task\n", + "\n", + "This is the dataset used for the paired associated inference task in\n", + "[\"MEMO: A Deep Network for Flexible Combination of Episodic Memories\n", + "\"](https://arxiv.org/abs/2001.10913)." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "2Nd8cdyccWld" + }, + "outputs": [], + "source": [ + "from __future__ import absolute_import\n", + "from __future__ import division\n", + "from __future__ import print_function\n", + "\n", + "import numpy as np\n", + "\n", + "import tensorflow as tf\n", + "import collections\n", + "import os\n", + "\n", + "from google.colab import auth\n", + "auth.authenticate_user()" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "GP0u6GCUF_6R" + }, + "outputs": [], + "source": [ + "#@title Choices about the dataset you want to load.\n", + "# Make choices about the dataset here.\n", + "chain_length = 3 #@param {type:\"slider\", min:3, max:4, step:1}\n", + "mode = 'valid' #@param ['train', 'test', 'valid']" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "JUWNNIwziHyC" + }, + "source": [ + "**If you choose chain_length 3 the data will look like this:**\n", + "\n", + "* trials shape: (48, 3, 1000); 48 trials x the target picture, left and right option x picture dimensions.\n", + "* correct answer: (48); whether the left or right picture is correct.\n", + "* difficulty (48); How far apart are the target picture and the two options.(e.g. AB are 0 steps apart, AC is 1)\n", + "* trial type (48); See below.\n", + "* memory shape (32, 2, 1000); Content of memory store, 32 pairs of images.\n", + "\n", + "Trial types:\n", + "* 1: AB\n", + "* 2: BC\n", + "* 3: AC\n", + "\n", + "\n", + "**If you choose chain_length 4 the data will look like this:**\n", + "* trials: (96, 3, 1000)\n", + "* correct answer: (96)\n", + "* difficulty: (96)\n", + "* trial type: (96)\n", + "* memory shape: (48, 2, 1000)\n", + "\n", + "Trial types:\n", + "* 1: AB\n", + "* 2: BC\n", + "* 3: AC\n", + "* 4: CD\n", + "* 5: BD\n", + "* 6: AD" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "0QETPFeEgr5d" + }, + "outputs": [], + "source": [ + "# Train has 500 shards, valid 150, test 100.\n", + "if mode == 'train':\n", + " num_shards = 500\n", + "elif mode == 'test':\n", + " num_shards = 100\n", + "elif mode == 'valid':\n", + " num_shards = 150" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "jE3_9k8DOMyZ" + }, + "outputs": [], + "source": [ + "DatasetInfo = collections.namedtuple(\n", + " 'DatasetInfo',\n", + " ['basepath', 'size', 'chain_length']\n", + ")\n", + "\n", + "_DATASETS = dict(\n", + " memo=DatasetInfo(\n", + " basepath=mode,\n", + " size=num_shards,\n", + " chain_length=chain_length)\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "N3D11lxl3kjF" + }, + "outputs": [], + "source": [ + "def _get_dataset_files(dataset_info, root):\n", + " \"\"\"Generates lists of files for a given dataset version.\"\"\"\n", + " basepath = dataset_info.basepath\n", + " base = os.path.join(root, basepath)\n", + " num_files = dataset_info.size\n", + " length = len(str(num_files))\n", + " template = 'trials-{:0%d}-of-{:0%d}' % (5, 5)\n", + " return [os.path.join(base, template.format(i, num_files))\n", + " for i in range(num_files)]" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "6yqLJYAnsyZF" + }, + "outputs": [], + "source": [ + "def parser_tf_examples(raw_data, chain_length=chain_length):\n", + " if chain_length == 3:\n", + " feature_map = {\n", + " 'trials' : tf.io.FixedLenFeature(\n", + " shape=[48, 3, 1000],\n", + " dtype=tf.float32),\n", + " 'correct_answer': tf.io.FixedLenFeature(\n", + " shape=[48],\n", + " dtype=tf.int64),\n", + " 'difficulty': tf.io.FixedLenFeature(\n", + " shape=[48],\n", + " dtype=tf.int64),\n", + " 'trial_type': tf.io.FixedLenFeature(\n", + " shape=[48],\n", + " dtype=tf.int64),\n", + " 'memory': tf.io.FixedLenFeature(\n", + " shape=[32, 2, 1000],\n", + " dtype=tf.float32),\n", + " }\n", + " elif chain_length == 4: \n", + " feature_map = {\n", + " 'trials' : tf.io.FixedLenFeature(\n", + " shape=[96, 3, 1000],\n", + " dtype=tf.float32),\n", + " 'correct_answer': tf.io.FixedLenFeature(\n", + " shape=[96],\n", + " dtype=tf.int64),\n", + " 'difficulty': tf.io.FixedLenFeature(\n", + " shape=[96],\n", + " dtype=tf.int64),\n", + " 'trial_type': tf.io.FixedLenFeature(\n", + " shape=[96],\n", + " dtype=tf.int64),\n", + " 'memory': tf.io.FixedLenFeature(\n", + " shape=[48, 2, 1000],\n", + " dtype=tf.float32),\n", + " }\n", + " example = tf.io.parse_example(raw_data, feature_map)\n", + " batch = [example[\"trials\"],\n", + " example[\"correct_answer\"],\n", + " example[\"difficulty\"],\n", + " example[\"trial_type\"],\n", + " example[\"memory\"]]\n", + " return batch" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "nXMhOoHWj0oP" + }, + "source": [ + "## Load the data." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "rXIOBlWKyMY0" + }, + "outputs": [], + "source": [ + "dataset_info = 'memo'\n", + "root = 'gs://deepmind-memo/length' + str(chain_length) + '/'\n", + "num_epochs = 100\n", + "shuffle_buffer_size = 150\n", + "num_readers = 4\n", + "dataset_info = _DATASETS['memo']\n", + "filenames = _get_dataset_files(dataset_info, root)\n", + "num_map_threads = 4\n", + "batch_size = 10" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "e2G5MSVf9Hpm" + }, + "outputs": [], + "source": [ + "data = tf.data.Dataset.from_tensor_slices(filenames)\n", + "data = data.repeat(num_epochs)\n", + "data = data.shuffle(shuffle_buffer_size)\n", + "data = data.interleave(tf.data.TFRecordDataset,\n", + " cycle_length=num_readers, block_length=1)\n", + "data = data.shuffle(shuffle_buffer_size)\n", + "data = data.map(parser_tf_examples, num_parallel_calls=num_map_threads)\n", + "data = data.batch(batch_size)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "1z3dsDNqkBHD" + }, + "source": [ + "# Looking at what we loaded." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "ibVadDeeAU4Q" + }, + "outputs": [], + "source": [ + "iterator = data.__iter__()\n", + "element = iterator.get_next()" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "Lh7-f08nAeGq" + }, + "outputs": [], + "source": [ + "print(element[0].shape) # trials\n", + "print(element[1].shape) # correct answer\n", + "print(element[2].shape) # difficulty\n", + "print(element[3].shape) # trialtype\n", + "print(element[4].shape) # memory" + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "load_memo_data.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +}