Open-source release of Unsupervised Adversarial Training (UAT)

PiperOrigin-RevId: 267603150
2026-06-03 21:37:40 +08:00 · 2019-09-06 16:33:14 +01:00
parent 451d296490
commit ac3276abf1
6 changed files with 200348 additions and 0 deletions
@@ -0,0 +1,68 @@
 # Unsupervised Adversarial Training (UAT)
 This repository contains the trained model and dataset used for Unsupervised
 Adversarial Training (UAT) from the paper
 [Are Labels Required for Improving Adversarial Robustness?](https://arxiv.org/abs/1905.13725)
 This is not an official Google product.
 ## Contents
 This repo serves two primary functions:
 * Data release: We share indices for the 80 Million Tiny Images Dataset subset
 used in our experiments, and a utility for loading the data.
 * Model release: We have released our top-performing model on TF-Hub, and
 include an example demonstrating how to use it.
 ## Running the code
 ### Using the model
 Our model is available via
 [TF-Hub](https://tfhub.dev/deepmind/unsupervised-adversarial-training/cifar10/wrn_106/1).
 For example usage, refer to `quick_eval_cifar.py`. The preferred method of
 running this script is through `run.sh`, which will set up a virtual
 environment, install the dependendencies, and run the evaluation script, which
 will print the adversarial accuracy of the model.
 ```bash
 cd /path/to/deepmind_research
 unsupervised_adversarial_training/run.sh
 ```
 ### Viewing the dataset
 First, download the 80 Million Tiny Images Dataset image binary from the
 official web page: http://horatio.cs.nyu.edu/mit/tiny/data/index.html
 Note this file is very large, and requires 227 GB of disc space.
 The file `tiny_200K_idxs.txt` indicates which images from the dataset form the
 80M@200K training set used in the paper. For example usage, refer to
 `save_example_images.py`.
 To view example images from this dataset, use the command:
 ```bash
 cd /path/to/deepmind_research
 python -m unsupervised_adversarial_training.save_example_images \
  --data_bin_path=/path/to/tiny_images.bin
 ```
 This will save the first 100 images to the directory
 `unsupervised_adversarial_training/images`.
 ## Citing this work
 If you use this code in your work, please cite the accompanying paper:
 ```
@article{uat2019,
  title={Are Labels Required for Improving Adversarial Robustness?},
  author={Uesato, Jonathan and Alayrac, Jean-Baptiste, and Huang, Po-Sen and
  Stanforth, Robert and Fawzi, Alhussein and Kohli, Pushmeet},
  journal={arXiv preprint arXiv:1905.13725},
  year={2019}
 }
 ```
@@ -0,0 +1,170 @@
 # coding=utf-8
 # Copyright 2019 Deepmind Technologies Limited.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Single file script for doing a quick evaluation of a model.
 This script is called by run.sh.
 Usage:
  user@host:/path/to/deepmind_research$ unsupervised_adversarial_training/run.sh
 """
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 import collections
 from absl import app
 from absl import flags
 import cleverhans
 from cleverhans import attacks
 import numpy as np
 import tensorflow as tf
 from tensorflow.python.ops import math_grad
 import tensorflow_hub as hub
 UAT_HUB_URL = ('https://tfhub.dev/deepmind/unsupervised-adversarial-training/'
               'cifar10/wrn_106/1')
 FLAGS = flags.FLAGS
 flags.DEFINE_enum('attack_fn_name', 'fgsm', ['fgsm', 'none'],
                  'Name of the attack method to use.')
 flags.DEFINE_float('epsilon_attack', 8.0 / 255,
                   'Maximum allowable perturbation size, between 0 and 1.')
 flags.DEFINE_integer('num_steps', 20, 'Number of attack iterations.')
 flags.DEFINE_integer('num_batches', 100, 'Number of batches to evaluate.')
 flags.DEFINE_integer('batch_size', 32, 'Batch size.')
 flags.DEFINE_integer('skip_batches', 0,
                     'Controls index of start image. This can be used to '
                     'evaluate the model on different subsets of the test set.')
 flags.DEFINE_float('learning_rate', 0.003, 'Attack optimizer learning rate.')
 def _top_1_accuracy(logits, labels):
  return tf.reduce_mean(tf.cast(tf.nn.in_top_k(logits, labels, 1), tf.float32))
 def make_classifier():
  model = hub.Module(UAT_HUB_URL)
  def classifier(x):
    x = _cifar_meanstd_normalize(x)
    model_input = dict(x=x, decay_rate=0.1, prefix='default')
    return model(model_input)
  return classifier
 def eval_cifar():
  """Evaluate an adversarially trained model."""
  attack_fn_name = FLAGS.attack_fn_name
  total_batches = FLAGS.num_batches
  batch_size = FLAGS.batch_size
  # Note that a `classifier` is a function mapping [0,1]-scaled image Tensors
  # to a logit Tensor. In particular, it includes *both* the preprocessing
  # function, and the neural network.
  classifier = make_classifier()
  cleverhans_model = cleverhans.model.CallableModelWrapper(classifier, 'logits')
  _, data_test = tf.keras.datasets.cifar10.load_data()
  data = _build_dataset(data_test, batch_size=batch_size, shuffle=False)
  # Necessary for backwards-compatibility
  # Earlier versions of TF don't have a registered gradient for the AddV2 op
  tf.RegisterGradient('AddV2')(math_grad._AddGrad)  # pylint: disable=protected-access
  # Generate adversarial images.
  if attack_fn_name == 'fgsm':
    attack = attacks.MadryEtAl(cleverhans_model)
    num_cifar_classes = 10
    adv_x = attack.generate(data.image,
                            eps=FLAGS.epsilon_attack,
                            eps_iter=FLAGS.learning_rate,
                            nb_iter=FLAGS.num_steps,
                            y=tf.one_hot(data.label, depth=num_cifar_classes))
  elif attack_fn_name == 'none':
    adv_x = data.image
  logits = classifier(adv_x)
  probs = tf.nn.softmax(logits)
  adv_acc = _top_1_accuracy(logits, data.label)
  with tf.train.SingularMonitoredSession() as sess:
    total_acc = 0.
    for _ in range(FLAGS.skip_batches):
      sess.run(data.image)
    for _ in range(total_batches):
      _, _, adv_acc_val = sess.run([probs, data.label, adv_acc])
      total_acc += adv_acc_val
      print('Batch accuracy: {}'.format(adv_acc_val))
    print('Total accuracy against {}: {}'.format(
        FLAGS.attack_fn_name, total_acc / total_batches))
 ##########    Utilities    ##########
 # Defines a dataset sample."""
 Sample = collections.namedtuple('Sample', ['image', 'label'])
 def _build_dataset(raw_data, batch_size=32, shuffle=False):
  """Builds a dataset from raw NumPy tensors.
  Args:
    raw_data: Pair (images, labels) of numpy arrays. `images` should have shape
      (N, H, W, C) with values in [0, 255], and `labels` should have shape
      (N,) or (N, 1) indicating class indices.
    batch_size: int, batch size
    shuffle: bool, whether to shuffle the data (default: True).
  Returns:
    (image_tensor, label_tensor), which iterate over the dataset, which are
      (batch_size, H, W, C) tf.float32 and (batch_size,) tf.int32 Tensors
      respectively
  """
  images, labels = raw_data
  labels = np.squeeze(labels)
  samples = Sample(images.astype(np.float32) / 255., labels.astype(np.int64))
  data = tf.data.Dataset.from_tensor_slices(samples)
  if shuffle:
    data = data.shuffle(1000)
  return data.repeat().batch(batch_size).make_one_shot_iterator().get_next()
 def _cifar_meanstd_normalize(image):
  """Mean + stddev whitening for CIFAR-10 used in ResNets.
  Args:
    image: Numpy array or TF Tensor, with values in [0, 255]
  Returns:
    image: Numpy array or TF Tensor, shifted and scaled by mean/stdev on
      CIFAR-10 dataset.
  """
  # Channel-wise means and std devs calculated from the CIFAR-10 training set
  cifar_means = [125.3, 123.0, 113.9]
  cifar_devs = [63.0, 62.1, 66.7]
  rescaled_means = [x / 255. for x in cifar_means]
  rescaled_devs = [x / 255. for x in cifar_devs]
  image = (image - rescaled_means) / rescaled_devs
  return image
 def main(unused_argv):
  eval_cifar()
 if __name__ == '__main__':
  app.run(main)
@@ -0,0 +1,7 @@
 absl-py>=0.7.0
 cleverhans>=3.0.1
 numpy>=1.16.4
 pillow>=4.3.0
 tensorflow>=1.14
 tensorflow-gpu  >= 1.11.0  # GPU version of TensorFlow.
 tensorflow-hub>=0.5.0
@@ -0,0 +1,26 @@
 # Copyright 2019 Deepmind Technologies Limited.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #!/bin/sh
 # Usage:
 # user@host:/path/to/deepmind_research$ unsupervised_adversarial_training/run.sh
 # Sets up virtual environment, install dependencies, and runs evaluation script
 python3 -m venv uat_venv
 source uat_venv/bin/activate
 pip install -r unsupervised_adversarial_training/requirements.txt
 python -m unsupervised_adversarial_training.quick_eval_cifar \
  --attack_fn_name=fgsm
@@ -0,0 +1,77 @@
 # coding=utf-8
 # Copyright 2019 Deepmind Technologies Limited.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 r"""Loads images from the 80M@200K training set and saves them in PNG format.
 Usage:
    cd /path/to/deepmind_research
    python -m unsupervised_adversarial_training.save_example_images \
        --data_bin_path=/path/to/tiny_images.bin
 """
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 import os
 from absl import app
 from absl import flags
 import numpy as np
 from PIL import Image
 DIR_NAME = os.path.dirname(__file__)
 FLAGS = flags.FLAGS
 flags.DEFINE_string('data_bin_path', None,
                    'path to 80M Tiny Images data binary')
 flags.DEFINE_string('idxs_path', os.path.join(DIR_NAME, 'tiny_200K_idxs.txt'),
                    'path to file of indices indicating subset of 80M dataset')
 flags.DEFINE_string('output_dir', os.path.join(DIR_NAME, 'images'),
                    'path to output directory for images')
 flags.mark_flag_as_required('data_bin_path')
 CIFAR_LABEL_IDX_TO_NAME = ['airplane', 'automobile', 'bird', 'cat', 'deer',
                           'dog', 'frog', 'horse', 'ship', 'truck']
 DATASET_SIZE = 79302017
 def _load_dataset_as_array(ds_path):
  dataset = np.memmap(filename=ds_path, dtype=np.uint8, mode='r',
                      shape=(DATASET_SIZE, 3, 32, 32))
  return dataset.transpose([0, 3, 2, 1])
 def main(unused_argv):
  dataset = _load_dataset_as_array(FLAGS.data_bin_path)
  # Load the indices and labels of the 80M@200K training set
  data_idxs, data_labels = np.loadtxt(
      FLAGS.idxs_path,
      delimiter=',',
      dtype=[('index', np.uint64), ('label', np.uint8)],
      unpack=True)
  # Save images as PNG files
  if not os.path.exists(FLAGS.output_dir):
    os.makedirs(FLAGS.output_dir)
  for i in range(100):
    class_name = CIFAR_LABEL_IDX_TO_NAME[data_labels[i]]
    file_name = 'im{}_{}.png'.format(i, class_name)
    file_path = os.path.join(FLAGS.output_dir, file_name)
    img = dataset[data_idxs[i]]
    Image.fromarray(img).save(file_path)
 if __name__ == '__main__':
  app.run(main)