From d1ae6a456fa891e9590a5c398c2a94df7f26bc87 Mon Sep 17 00:00:00 2001
From: Sergio Gomez <sergomez@google.com>
Date: Fri, 26 Feb 2021 10:18:49 +0000
Subject: [PATCH] Add colab example of R2D2 agent training on DeepMind Lab RL
 Unplugged dataset

PiperOrigin-RevId: 359716790
---
 rl_unplugged/dmlab_r2d2.ipynb | 672 ++++++++++++++++++++++++++++++++++
 1 file changed, 672 insertions(+)
 create mode 100644 rl_unplugged/dmlab_r2d2.ipynb

diff --git a/rl_unplugged/dmlab_r2d2.ipynb b/rl_unplugged/dmlab_r2d2.ipynb
new file mode 100644
index 0000000..24b9aef
--- /dev/null
+++ b/rl_unplugged/dmlab_r2d2.ipynb
@@ -0,0 +1,672 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "pdgOfM42e7in"
+      },
+      "source": [
+        "Copyright 2021 DeepMind Technologies Limited.\n",
+        "\n",
+        "Licensed under the Apache License, Version 2.0 (the \"License\"); you may not use\n",
+        "this file except in compliance with the License. You may obtain a copy of the\n",
+        "License at\n",
+        "\n",
+        "[https://www.apache.org/licenses/LICENSE-2.0](https://www.apache.org/licenses/LICENSE-2.0)\n",
+        "\n",
+        "Unless required by applicable law or agreed to in writing, software distributed\n",
+        "under the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR\n",
+        "CONDITIONS OF ANY KIND, either express or implied. See the License for the\n",
+        "specific language governing permissions and limitations under the License."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "WOzmAie8e-NK"
+      },
+      "source": [
+        "# RL Unplugged: Offline R2D2 - DeepMind Lab\n",
+        "\n",
+        "## A Colab example of an Acme R2D2 agent on DeepMind Lab data.\n",
+        "# \u003ca href=\"https://colab.research.google.com/github/deepmind/deepmind_research/blob/master/rl_unplugged/dmlab_r2d2.ipynb\" target=\"_parent\"\u003e\u003cimg src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/\u003e\u003c/a\u003e\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "tr2MoADAQepq"
+      },
+      "source": [
+        "## Installation\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "SvWeEWGd5Nx_"
+      },
+      "source": [
+        "### External dependencies"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "fTuqZxDv4v0y"
+      },
+      "outputs": [],
+      "source": [
+        "!apt-get install libsdl2-dev\n",
+        "!apt-get install libosmesa6-dev\n",
+        "!apt-get install libffi-dev\n",
+        "!apt-get install gettext\n",
+        "!apt-get install python3-numpy-dev python3-dev"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ewPoBUDd04xh"
+      },
+      "source": [
+        "### Bazel"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "ewVV3-Oh0sBm"
+      },
+      "outputs": [],
+      "source": [
+        "BAZEL_VERSION = '3.6.0'\n",
+        "!wget https://github.com/bazelbuild/bazel/releases/download/{BAZEL_VERSION}/bazel-{BAZEL_VERSION}-installer-linux-x86_64.sh\n",
+        "!chmod +x bazel-{BAZEL_VERSION}-installer-linux-x86_64.sh\n",
+        "!./bazel-{BAZEL_VERSION}-installer-linux-x86_64.sh\n",
+        "!bazel --version"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "fwdmJWW3KB7g"
+      },
+      "source": [
+        "### DeepMind Lab"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "Ng9xopirzVYA"
+      },
+      "outputs": [],
+      "source": [
+        "!git clone https://github.com/deepmind/lab.git"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "NeO57QYqDG-L"
+      },
+      "outputs": [],
+      "source": [
+        "%%writefile lab/bazel/python.BUILD\n",
+        "\n",
+        "# Description:\n",
+        "#   Build rule for Python and Numpy.\n",
+        "#   This rule works for Debian and Ubuntu. Other platforms might keep the\n",
+        "#   headers in different places, cf. 'How to build DeepMind Lab' in build.md.\n",
+        "\n",
+        "cc_library(\n",
+        "    name = \"python\",\n",
+        "    hdrs = select(\n",
+        "        {\n",
+        "            \"@bazel_tools//tools/python:PY3\": glob([\n",
+        "                \"usr/include/python3.6m/*.h\",\n",
+        "                \"usr/local/lib/python3.6/dist-packages/numpy/core/include/numpy/*.h\",\n",
+        "            ]),\n",
+        "        },\n",
+        "        no_match_error = \"Internal error, Python version should be one of PY2 or PY3\",\n",
+        "    ),\n",
+        "    includes = select(\n",
+        "        {\n",
+        "            \"@bazel_tools//tools/python:PY3\": [\n",
+        "                \"usr/include/python3.6m\",\n",
+        "                \"usr/local/lib/python3.6/dist-packages/numpy/core/include\",\n",
+        "            ],\n",
+        "        },\n",
+        "        no_match_error = \"Internal error, Python version should be one of PY2 or PY3\",\n",
+        "    ),\n",
+        "    visibility = [\"//visibility:public\"],\n",
+        ")\n",
+        "\n",
+        "alias(\n",
+        "    name = \"python_headers\",\n",
+        "    actual = \":python\",\n",
+        "    visibility = [\"//visibility:public\"],\n",
+        ")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "pRuLCRzpzX8E"
+      },
+      "outputs": [],
+      "source": [
+        "!cd lab \u0026\u0026 bazel build -c opt --python_version=PY3 //python/pip_package:build_pip_package"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "Oen2E99T0E58"
+      },
+      "outputs": [],
+      "source": [
+        "!cd lab \u0026\u0026 ./bazel-bin/python/pip_package/build_pip_package /tmp/dmlab_pkg"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "okrzzmrC0H_O"
+      },
+      "outputs": [],
+      "source": [
+        "!pip install /tmp/dmlab_pkg/deepmind_lab-1.0-py3-none-any.whl --force-reinstall"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "hq-VGgvbRKSI"
+      },
+      "source": [
+        "### Python dependencies"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "8Fme7zxOKejg"
+      },
+      "outputs": [],
+      "source": [
+        "!pip install dm_env\n",
+        "!pip install dm-acme[reverb]\n",
+        "!pip install dm-acme[tf]\n",
+        "!pip install dm-sonnet"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "Rfd4jQGFt-HB"
+      },
+      "outputs": [],
+      "source": [
+        "# Upgrade to recent commit for latest R2D2 learner.\n",
+        "!pip install --upgrade git+https://github.com/deepmind/acme.git@3dfda9d392312d948906e6c567c7f56d8c911de5"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "DvicrJPBqemz"
+      },
+      "source": [
+        "## Imports and Utils"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "_8qxA0KLU468"
+      },
+      "outputs": [],
+      "source": [
+        "# @title Imports\n",
+        "import copy\n",
+        "import functools\n",
+        "\n",
+        "from acme import environment_loop\n",
+        "from acme import specs\n",
+        "from acme.adders import reverb as acme_reverb\n",
+        "from acme.agents.tf import actors\n",
+        "from acme.agents.tf.r2d2 import learning as r2d2\n",
+        "from acme.tf import utils as tf_utils\n",
+        "from acme.tf import networks\n",
+        "from acme.utils import loggers\n",
+        "from acme.wrappers import observation_action_reward\n",
+        "import tree\n",
+        "\n",
+        "import deepmind_lab\n",
+        "import dm_env\n",
+        "import numpy as np\n",
+        "import reverb\n",
+        "import sonnet as snt\n",
+        "import tensorflow as tf\n",
+        "import trfl"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "becmQVMMuCRU"
+      },
+      "outputs": [],
+      "source": [
+        "# @title Environment\n",
+        "\n",
+        "_ACTION_MAP = {\n",
+        "    0: (0, 0, 0, 1, 0, 0, 0),\n",
+        "    1: (0, 0, 0, -1, 0, 0, 0),\n",
+        "    2: (0, 0, -1, 0, 0, 0, 0),\n",
+        "    3: (0, 0, 1, 0, 0, 0, 0),\n",
+        "    4: (-10, 0, 0, 0, 0, 0, 0),\n",
+        "    5: (10, 0, 0, 0, 0, 0, 0),\n",
+        "    6: (-60, 0, 0, 0, 0, 0, 0),\n",
+        "    7: (60, 0, 0, 0, 0, 0, 0),\n",
+        "    8: (0, 10, 0, 0, 0, 0, 0),\n",
+        "    9: (0, -10, 0, 0, 0, 0, 0),\n",
+        "    10: (-10, 0, 0, 1, 0, 0, 0),\n",
+        "    11: (10, 0, 0, 1, 0, 0, 0),\n",
+        "    12: (-60, 0, 0, 1, 0, 0, 0),\n",
+        "    13: (60, 0, 0, 1, 0, 0, 0),\n",
+        "    14: (0, 0, 0, 0, 1, 0, 0),\n",
+        "}\n",
+        "\n",
+        "class DeepMindLabEnvironment(dm_env.Environment):\n",
+        "  \"\"\"DeepMind Lab environment.\"\"\"\n",
+        "\n",
+        "  def __init__(self, level_name: str, action_repeats: int = 4):\n",
+        "    \"\"\"Construct environment.\n",
+        "\n",
+        "    Args:\n",
+        "      level_name: DeepMind lab level name (e.g. 'rooms_watermaze').\n",
+        "      action_repeats: Number of times the same action is repeated on every\n",
+        "        step().\n",
+        "    \"\"\"\n",
+        "    config = dict(fps='30',\n",
+        "                  height='72',\n",
+        "                  width='96',\n",
+        "                  maxAltCameraHeight='1',\n",
+        "                  maxAltCameraWidth='1',\n",
+        "                  hasAltCameras='false')\n",
+        "\n",
+        "    # seekavoid_arena_01 is not part of dmlab30.\n",
+        "    if level_name != 'seekavoid_arena_01':\n",
+        "      level_name = 'contributed/dmlab30/{}'.format(level_name)\n",
+        "\n",
+        "    self._lab = deepmind_lab.Lab(level_name, ['RGB_INTERLEAVED'], config)\n",
+        "    self._action_repeats = action_repeats\n",
+        "    self._reward = 0\n",
+        "\n",
+        "  def _observation(self):\n",
+        "    last_action = getattr(self, '_action', 0)\n",
+        "    last_reward = getattr(self, '_reward', 0)\n",
+        "    self._last_observation = observation_action_reward.OAR(\n",
+        "        observation=self._lab.observations()['RGB_INTERLEAVED'],\n",
+        "        action=np.array(last_action, dtype=np.int64),\n",
+        "        reward=np.array(last_reward, dtype=np.float32))\n",
+        "    return self._last_observation\n",
+        "\n",
+        "  def reset(self):\n",
+        "    self._lab.reset()\n",
+        "    return dm_env.restart(self._observation())\n",
+        "\n",
+        "  def step(self, action):\n",
+        "    if not self._lab.is_running():\n",
+        "      return dm_env.restart(self.reset())\n",
+        "\n",
+        "    self._action = action.item()\n",
+        "    if self._action not in _ACTION_MAP:\n",
+        "      raise ValueError('Action not available')\n",
+        "    lab_action = np.array(_ACTION_MAP[self._action], dtype=np.intc)\n",
+        "    self._reward = self._lab.step(lab_action, num_steps=self._action_repeats)\n",
+        "\n",
+        "    if self._lab.is_running():\n",
+        "      return dm_env.transition(self._reward, self._observation())\n",
+        "    return dm_env.termination(self._reward, self._last_observation)\n",
+        "\n",
+        "  def observation_spec(self):\n",
+        "    return observation_action_reward.OAR(\n",
+        "        observation=dm_env.specs.Array(shape=(72, 96, 3), dtype=np.uint8),\n",
+        "        action=dm_env.specs.Array(shape=(), dtype=np.int64),\n",
+        "        reward=dm_env.specs.Array(shape=(), dtype=np.float32))\n",
+        "\n",
+        "  def action_spec(self):\n",
+        "    return dm_env.specs.DiscreteArray(num_values=15, dtype=np.int64)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "4ms1TBjDSXr0"
+      },
+      "outputs": [],
+      "source": [
+        "# @title Dataset\n",
+        "\n",
+        "def _decode_images(pngs):\n",
+        "  \"\"\"Decode tensor of PNGs.\"\"\"\n",
+        "  decode_rgb_png = functools.partial(tf.io.decode_png, channels=3)\n",
+        "  images = tf.map_fn(decode_rgb_png, pngs, dtype=tf.uint8,\n",
+        "                     parallel_iterations=10)\n",
+        "  # [N, 72, 96, 3]\n",
+        "  images.set_shape((pngs.shape[0], 72, 96, 3))\n",
+        "  return images\n",
+        "\n",
+        "def _tf_example_to_step_ds(tf_example: tf.train.Example,\n",
+        "                           episode_length: int) -\u003e reverb.ReplaySample:\n",
+        "  \"\"\"Create a Reverb replay sample from a TF example.\"\"\"\n",
+        "\n",
+        "  # Parse tf.Example.\n",
+        "  def sequence_feature(shape, dtype=tf.float32):\n",
+        "    return tf.io.FixedLenFeature(shape=[episode_length] + shape, dtype=dtype)\n",
+        "\n",
+        "  feature_description = {\n",
+        "      'episode_id': tf.io.FixedLenFeature([], tf.int64),\n",
+        "      'start_idx': tf.io.FixedLenFeature([], tf.int64),\n",
+        "      'episode_return': tf.io.FixedLenFeature([], tf.float32),\n",
+        "      'observations_pixels': sequence_feature([], tf.string),\n",
+        "      'observations_reward': sequence_feature([]),\n",
+        "      # actions are one-hot arrays.\n",
+        "      'observations_action': sequence_feature([15]),\n",
+        "      'actions': sequence_feature([], tf.int64),\n",
+        "      'rewards': sequence_feature([]),\n",
+        "      'discounted_rewards': sequence_feature([]),\n",
+        "      'discounts': sequence_feature([]),\n",
+        "  }\n",
+        "\n",
+        "  data = tf.io.parse_single_example(tf_example, feature_description)\n",
+        "  pixels = _decode_images(data['observations_pixels'])\n",
+        "\n",
+        "  observation = observation_action_reward.OAR(\n",
+        "      observation=pixels,\n",
+        "      action=tf.argmax(data['observations_action'],\n",
+        "                       axis=1, output_type=tf.int64),\n",
+        "      reward=data['observations_reward'])\n",
+        "\n",
+        "  data = acme_reverb.Step(\n",
+        "      observation=observation,\n",
+        "      action=data['actions'],\n",
+        "      reward=data['rewards'],\n",
+        "      discount=data['discounts'],\n",
+        "      start_of_episode=tf.zeros((episode_length,), tf.bool),\n",
+        "      extras={})\n",
+        "\n",
+        "  # Keys are all zero and probabilities are all one.\n",
+        "  info = reverb.SampleInfo(key=tf.zeros((episode_length,), tf.int64),\n",
+        "                           probability=tf.ones((episode_length,), tf.float32),\n",
+        "                           table_size=tf.zeros((episode_length,), tf.int64),\n",
+        "                           priority=tf.ones((episode_length,), tf.float32))\n",
+        "  sample = reverb.ReplaySample(info=info, data=data)\n",
+        "  return tf.data.Dataset.from_tensor_slices(sample)\n",
+        "\n",
+        "def subsequences(step_ds: tf.data.Dataset,\n",
+        "                 length: int, shift: int = 1\n",
+        "                 ) -\u003e tf.data.Dataset:\n",
+        "  \"\"\"Dataset of subsequences from a dataset of episode steps.\"\"\"\n",
+        "  window_ds = step_ds.window(length, shift=shift, stride=1)\n",
+        "  return window_ds.interleave(_nest_ds).batch(length, drop_remainder=True)\n",
+        "\n",
+        "\n",
+        "def _nest_ds(nested_ds: tf.data.Dataset) -\u003e tf.data.Dataset:\n",
+        "  \"\"\"Produces a dataset of nests from a nest of datasets of the same size.\"\"\"\n",
+        "  flattened_ds = tuple(tree.flatten(nested_ds))\n",
+        "  zipped_ds = tf.data.Dataset.zip(flattened_ds)\n",
+        "  return zipped_ds.map(lambda *x: tree.unflatten_as(nested_ds, x))\n",
+        "\n",
+        "\n",
+        "def make_dataset(path: str,\n",
+        "                 episode_length: int,\n",
+        "                 sequence_length: int,\n",
+        "                 sequence_shift: int,\n",
+        "                 num_shards: int = 500) -\u003e tf.data.Dataset:\n",
+        "  \"\"\"Create dataset of DeepMind Lab sequences.\"\"\"\n",
+        "\n",
+        "  filenames = [f'{path}/tfrecord-{i:05d}-of-{num_shards:05d}'\n",
+        "               for i in range(num_shards)]\n",
+        "  file_ds = tf.data.Dataset.from_tensor_slices(filenames)\n",
+        "  file_ds = file_ds.repeat().shuffle(num_shards)\n",
+        "  tfrecord_dataset = functools.partial(tf.data.TFRecordDataset,\n",
+        "                                       compression_type='GZIP')\n",
+        "\n",
+        "  # Dataset of tf.Examples containing full episodes.\n",
+        "  example_ds = file_ds.interleave(tfrecord_dataset)\n",
+        "\n",
+        "  # Dataset of episodes, each represented as a dataset of steps.\n",
+        "  _tf_example_to_step_ds_with_length = functools.partial(\n",
+        "      _tf_example_to_step_ds, episode_length=episode_length)\n",
+        "  episode_ds = example_ds.map(_tf_example_to_step_ds_with_length,\n",
+        "                              num_parallel_calls=tf.data.experimental.AUTOTUNE)\n",
+        "\n",
+        "  # Dataset of sequences.\n",
+        "  training_sequences = functools.partial(subsequences, length=sequence_length,\n",
+        "                                         shift=sequence_shift)\n",
+        "  return episode_ds.interleave(training_sequences)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "sV2vXWAsU5Zg"
+      },
+      "source": [
+        "## Experiment"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "0F-l-4LolX1c"
+      },
+      "outputs": [],
+      "source": [
+        "# task                            | episode length | run\n",
+        "# ----------------------------------------------------------------------------\n",
+        "# seekavoid_arena_01              | 301            | training_{0..2}\n",
+        "# seekavoid_arena_01              | 301            | snapshot_{0..1}_eps_0.0\n",
+        "# seekavoid_arena_01              | 301            | snapshot_{0..1}_eps_0.01\n",
+        "# seekavoid_arena_01              | 301            | snapshot_{0..1}_eps_0.1\n",
+        "# seekavoid_arena_01              | 301            | snapshot_{0..1}_eps_0.25\n",
+        "# explore_object_rewards_few      | 1351           | training_{0..2}\n",
+        "# explore_object_rewards_many     | 1801           | training_{0..2}\n",
+        "# rooms_select_nonmatching_object | 181            | training_{0..2}\n",
+        "# rooms_watermaze                 | 1801           | training_{0..2}\n",
+        "\n",
+        "TASK = 'seekavoid_arena_01'\n",
+        "RUN = 'training_0'\n",
+        "EPISODE_LENGTH = 301\n",
+        "BATCH_SIZE = 1\n",
+        "DATASET_PATH = f'gs://rl_unplugged/dmlab/{TASK}/{RUN}'"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "H7YN_qwDVPqQ"
+      },
+      "outputs": [],
+      "source": [
+        "environment = DeepMindLabEnvironment(TASK, action_repeats=2)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "-B25Lcgt8JD4"
+      },
+      "outputs": [],
+      "source": [
+        "dataset = make_dataset(DATASET_PATH, num_shards=500,\n",
+        "                       episode_length=EPISODE_LENGTH,\n",
+        "                       sequence_length=120,\n",
+        "                       sequence_shift=40)\n",
+        "dataset = dataset.padded_batch(BATCH_SIZE, drop_remainder=True)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "gTO61WolqkzG"
+      },
+      "source": [
+        "### Learning"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "cBFmIYxTtBg4"
+      },
+      "outputs": [],
+      "source": [
+        "# Create network.\n",
+        "def process_observations(x):\n",
+        "  return x._replace(observation=tf.image.convert_image_dtype(x.observation, tf.float32))\n",
+        "\n",
+        "environment_spec = specs.make_environment_spec(environment)\n",
+        "num_actions = environment_spec.actions.maximum + 1\n",
+        "network = snt.DeepRNN([\n",
+        "    process_observations,\n",
+        "    networks.R2D2AtariNetwork(num_actions=num_actions)\n",
+        "])\n",
+        "tf_utils.create_variables(network, [environment_spec.observations])"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "QLoAU2zwwi3X"
+      },
+      "outputs": [],
+      "source": [
+        "# Create a logger.\n",
+        "logger = loggers.TerminalLogger(label='learner', time_delta=1.)\n",
+        "\n",
+        "# Create the R2D2 learner.\n",
+        "learner = r2d2.R2D2Learner(\n",
+        "    environment_spec=environment_spec,\n",
+        "    network=network,\n",
+        "    target_network=copy.deepcopy(network),\n",
+        "    discount=0.99,\n",
+        "    learning_rate=1e-4,\n",
+        "    importance_sampling_exponent=0.2,\n",
+        "    target_update_period=100,\n",
+        "    burn_in_length=0,\n",
+        "    sequence_length=120,\n",
+        "    store_lstm_state=False,\n",
+        "    dataset=dataset,\n",
+        "    logger=logger)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "lqMgZS9UWfWl"
+      },
+      "outputs": [],
+      "source": [
+        "for _ in range(5):\n",
+        "  learner.step()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "eMpO7eBeqmZn"
+      },
+      "source": [
+        "### Evaluation"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "orUDJVmpA0lU"
+      },
+      "outputs": [],
+      "source": [
+        "# Create a logger.\n",
+        "logger = loggers.TerminalLogger(label='evaluator', time_delta=1.)\n",
+        "\n",
+        "# Create evaluation loop.\n",
+        "eval_network = snt.DeepRNN([\n",
+        "    network,\n",
+        "    lambda q: trfl.epsilon_greedy(q, epsilon=0.4**8).sample(),\n",
+        "])\n",
+        "eval_loop = environment_loop.EnvironmentLoop(\n",
+        "    environment=environment,\n",
+        "    actor=actors.RecurrentActor(policy_network=eval_network),\n",
+        "    logger=logger)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "6FDsfWVXCcYZ"
+      },
+      "outputs": [],
+      "source": [
+        "eval_loop.run(2)"
+      ]
+    }
+  ],
+  "metadata": {
+    "accelerator": "GPU",
+    "colab": {
+      "collapsed_sections": [
+        "tr2MoADAQepq",
+        "SvWeEWGd5Nx_",
+        "ewPoBUDd04xh",
+        "fwdmJWW3KB7g"
+      ],
+      "name": "RL Unplugged: Offline R2D2 - DeepMind Lab",
+      "provenance": [
+        {
+          "file_id": "1vgfEtkThYTNWHhi3pisuRFxgmoMniuQz",
+          "timestamp": 1605722818242
+        },
+        {
+          "file_id": "/v2/external/notebooks/intro.ipynb",
+          "timestamp": 1602763830869
+        }
+      ]
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}