Open-source release of Unsupervised Adversarial Training (UAT)

PiperOrigin-RevId: 267603150
2026-05-10 05:17:46 +08:00 · 2019-09-06 16:33:14 +01:00
parent 451d296490
commit ac3276abf1
6 changed files with 200348 additions and 0 deletions
@@ -0,0 +1,68 @@
+# Unsupervised Adversarial Training (UAT)
+
+This repository contains the trained model and dataset used for Unsupervised
+Adversarial Training (UAT) from the paper
+[Are Labels Required for Improving Adversarial Robustness?](https://arxiv.org/abs/1905.13725)
+
+This is not an official Google product.
+
+## Contents
+
+This repo serves two primary functions:
+
+* Data release: We share indices for the 80 Million Tiny Images Dataset subset
+used in our experiments, and a utility for loading the data.
+* Model release: We have released our top-performing model on TF-Hub, and
+include an example demonstrating how to use it.
+
+## Running the code
+
+### Using the model
+
+Our model is available via
+[TF-Hub](https://tfhub.dev/deepmind/unsupervised-adversarial-training/cifar10/wrn_106/1).
+For example usage, refer to `quick_eval_cifar.py`. The preferred method of
+running this script is through `run.sh`, which will set up a virtual
+environment, install the dependendencies, and run the evaluation script, which
+will print the adversarial accuracy of the model.
+
+```bash
+cd /path/to/deepmind_research
+unsupervised_adversarial_training/run.sh
+```
+
+### Viewing the dataset
+
+First, download the 80 Million Tiny Images Dataset image binary from the
+official web page: http://horatio.cs.nyu.edu/mit/tiny/data/index.html
+
+Note this file is very large, and requires 227 GB of disc space.
+
+The file `tiny_200K_idxs.txt` indicates which images from the dataset form the
+80M@200K training set used in the paper. For example usage, refer to
+`save_example_images.py`.
+
+To view example images from this dataset, use the command:
+
+```bash
+cd /path/to/deepmind_research
+python -m unsupervised_adversarial_training.save_example_images \
+  --data_bin_path=/path/to/tiny_images.bin
+```
+
+This will save the first 100 images to the directory
+`unsupervised_adversarial_training/images`.
+
+## Citing this work
+
+If you use this code in your work, please cite the accompanying paper:
+
+```
+@article{uat2019,
+  title={Are Labels Required for Improving Adversarial Robustness?},
+  author={Uesato, Jonathan and Alayrac, Jean-Baptiste, and Huang, Po-Sen and
+  Stanforth, Robert and Fawzi, Alhussein and Kohli, Pushmeet},
+  journal={arXiv preprint arXiv:1905.13725},
+  year={2019}
+}
+```
@@ -0,0 +1,170 @@
+# coding=utf-8
+# Copyright 2019 Deepmind Technologies Limited.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Single file script for doing a quick evaluation of a model.
+
+This script is called by run.sh.
+Usage:
+  user@host:/path/to/deepmind_research$ unsupervised_adversarial_training/run.sh
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+from absl import app
+from absl import flags
+import cleverhans
+from cleverhans import attacks
+import numpy as np
+import tensorflow as tf
+from tensorflow.python.ops import math_grad
+import tensorflow_hub as hub
+
+UAT_HUB_URL = ('https://tfhub.dev/deepmind/unsupervised-adversarial-training/'
+               'cifar10/wrn_106/1')
+
+FLAGS = flags.FLAGS
+flags.DEFINE_enum('attack_fn_name', 'fgsm', ['fgsm', 'none'],
+                  'Name of the attack method to use.')
+flags.DEFINE_float('epsilon_attack', 8.0 / 255,
+                   'Maximum allowable perturbation size, between 0 and 1.')
+flags.DEFINE_integer('num_steps', 20, 'Number of attack iterations.')
+flags.DEFINE_integer('num_batches', 100, 'Number of batches to evaluate.')
+flags.DEFINE_integer('batch_size', 32, 'Batch size.')
+flags.DEFINE_integer('skip_batches', 0,
+                     'Controls index of start image. This can be used to '
+                     'evaluate the model on different subsets of the test set.')
+flags.DEFINE_float('learning_rate', 0.003, 'Attack optimizer learning rate.')
+
+
+def _top_1_accuracy(logits, labels):
+  return tf.reduce_mean(tf.cast(tf.nn.in_top_k(logits, labels, 1), tf.float32))
+
+
+def make_classifier():
+  model = hub.Module(UAT_HUB_URL)
+
+  def classifier(x):
+    x = _cifar_meanstd_normalize(x)
+    model_input = dict(x=x, decay_rate=0.1, prefix='default')
+    return model(model_input)
+
+  return classifier
+
+
+def eval_cifar():
+  """Evaluate an adversarially trained model."""
+  attack_fn_name = FLAGS.attack_fn_name
+  total_batches = FLAGS.num_batches
+  batch_size = FLAGS.batch_size
+
+  # Note that a `classifier` is a function mapping [0,1]-scaled image Tensors
+  # to a logit Tensor. In particular, it includes *both* the preprocessing
+  # function, and the neural network.
+  classifier = make_classifier()
+  cleverhans_model = cleverhans.model.CallableModelWrapper(classifier, 'logits')
+
+  _, data_test = tf.keras.datasets.cifar10.load_data()
+  data = _build_dataset(data_test, batch_size=batch_size, shuffle=False)
+
+  # Necessary for backwards-compatibility
+  # Earlier versions of TF don't have a registered gradient for the AddV2 op
+  tf.RegisterGradient('AddV2')(math_grad._AddGrad)  # pylint: disable=protected-access
+
+  # Generate adversarial images.
+  if attack_fn_name == 'fgsm':
+    attack = attacks.MadryEtAl(cleverhans_model)
+    num_cifar_classes = 10
+    adv_x = attack.generate(data.image,
+                            eps=FLAGS.epsilon_attack,
+                            eps_iter=FLAGS.learning_rate,
+                            nb_iter=FLAGS.num_steps,
+                            y=tf.one_hot(data.label, depth=num_cifar_classes))
+  elif attack_fn_name == 'none':
+    adv_x = data.image
+
+  logits = classifier(adv_x)
+  probs = tf.nn.softmax(logits)
+  adv_acc = _top_1_accuracy(logits, data.label)
+
+  with tf.train.SingularMonitoredSession() as sess:
+    total_acc = 0.
+    for _ in range(FLAGS.skip_batches):
+      sess.run(data.image)
+    for _ in range(total_batches):
+      _, _, adv_acc_val = sess.run([probs, data.label, adv_acc])
+      total_acc += adv_acc_val
+      print('Batch accuracy: {}'.format(adv_acc_val))
+    print('Total accuracy against {}: {}'.format(
+        FLAGS.attack_fn_name, total_acc / total_batches))
+
+
+##########    Utilities    ##########
+
+
+# Defines a dataset sample."""
+Sample = collections.namedtuple('Sample', ['image', 'label'])
+
+
+def _build_dataset(raw_data, batch_size=32, shuffle=False):
+  """Builds a dataset from raw NumPy tensors.
+
+  Args:
+    raw_data: Pair (images, labels) of numpy arrays. `images` should have shape
+      (N, H, W, C) with values in [0, 255], and `labels` should have shape
+      (N,) or (N, 1) indicating class indices.
+    batch_size: int, batch size
+    shuffle: bool, whether to shuffle the data (default: True).
+
+  Returns:
+    (image_tensor, label_tensor), which iterate over the dataset, which are
+      (batch_size, H, W, C) tf.float32 and (batch_size,) tf.int32 Tensors
+      respectively
+  """
+  images, labels = raw_data
+  labels = np.squeeze(labels)
+  samples = Sample(images.astype(np.float32) / 255., labels.astype(np.int64))
+  data = tf.data.Dataset.from_tensor_slices(samples)
+  if shuffle:
+    data = data.shuffle(1000)
+  return data.repeat().batch(batch_size).make_one_shot_iterator().get_next()
+
+
+def _cifar_meanstd_normalize(image):
+  """Mean + stddev whitening for CIFAR-10 used in ResNets.
+
+  Args:
+    image: Numpy array or TF Tensor, with values in [0, 255]
+
+  Returns:
+    image: Numpy array or TF Tensor, shifted and scaled by mean/stdev on
+      CIFAR-10 dataset.
+  """
+  # Channel-wise means and std devs calculated from the CIFAR-10 training set
+  cifar_means = [125.3, 123.0, 113.9]
+  cifar_devs = [63.0, 62.1, 66.7]
+  rescaled_means = [x / 255. for x in cifar_means]
+  rescaled_devs = [x / 255. for x in cifar_devs]
+  image = (image - rescaled_means) / rescaled_devs
+  return image
+
+
+def main(unused_argv):
+  eval_cifar()
+
+if __name__ == '__main__':
+  app.run(main)
@@ -0,0 +1,7 @@
+absl-py>=0.7.0
+cleverhans>=3.0.1
+numpy>=1.16.4
+pillow>=4.3.0
+tensorflow>=1.14
+tensorflow-gpu  >= 1.11.0  # GPU version of TensorFlow.
+tensorflow-hub>=0.5.0
@@ -0,0 +1,26 @@
+# Copyright 2019 Deepmind Technologies Limited.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#!/bin/sh
+
+# Usage:
+# user@host:/path/to/deepmind_research$ unsupervised_adversarial_training/run.sh
+
+# Sets up virtual environment, install dependencies, and runs evaluation script
+python3 -m venv uat_venv
+source uat_venv/bin/activate
+pip install -r unsupervised_adversarial_training/requirements.txt
+
+python -m unsupervised_adversarial_training.quick_eval_cifar \
+  --attack_fn_name=fgsm
@@ -0,0 +1,77 @@
+# coding=utf-8
+# Copyright 2019 Deepmind Technologies Limited.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+r"""Loads images from the 80M@200K training set and saves them in PNG format.
+
+Usage:
+    cd /path/to/deepmind_research
+    python -m unsupervised_adversarial_training.save_example_images \
+        --data_bin_path=/path/to/tiny_images.bin
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+from absl import app
+from absl import flags
+import numpy as np
+from PIL import Image
+
+DIR_NAME = os.path.dirname(__file__)
+FLAGS = flags.FLAGS
+flags.DEFINE_string('data_bin_path', None,
+                    'path to 80M Tiny Images data binary')
+flags.DEFINE_string('idxs_path', os.path.join(DIR_NAME, 'tiny_200K_idxs.txt'),
+                    'path to file of indices indicating subset of 80M dataset')
+flags.DEFINE_string('output_dir', os.path.join(DIR_NAME, 'images'),
+                    'path to output directory for images')
+flags.mark_flag_as_required('data_bin_path')
+
+CIFAR_LABEL_IDX_TO_NAME = ['airplane', 'automobile', 'bird', 'cat', 'deer',
+                           'dog', 'frog', 'horse', 'ship', 'truck']
+DATASET_SIZE = 79302017
+
+
+def _load_dataset_as_array(ds_path):
+  dataset = np.memmap(filename=ds_path, dtype=np.uint8, mode='r',
+                      shape=(DATASET_SIZE, 3, 32, 32))
+  return dataset.transpose([0, 3, 2, 1])
+
+
+def main(unused_argv):
+  dataset = _load_dataset_as_array(FLAGS.data_bin_path)
+
+  # Load the indices and labels of the 80M@200K training set
+  data_idxs, data_labels = np.loadtxt(
+      FLAGS.idxs_path,
+      delimiter=',',
+      dtype=[('index', np.uint64), ('label', np.uint8)],
+      unpack=True)
+
+  # Save images as PNG files
+  if not os.path.exists(FLAGS.output_dir):
+    os.makedirs(FLAGS.output_dir)
+  for i in range(100):
+    class_name = CIFAR_LABEL_IDX_TO_NAME[data_labels[i]]
+    file_name = 'im{}_{}.png'.format(i, class_name)
+    file_path = os.path.join(FLAGS.output_dir, file_name)
+    img = dataset[data_idxs[i]]
+    Image.fromarray(img).save(file_path)
+
+
+if __name__ == '__main__':
+  app.run(main)