diff --git a/README.md b/README.md index 04f93ae..82c90f2 100644 --- a/README.md +++ b/README.md @@ -24,6 +24,7 @@ https://deepmind.com/research/publications/ ## Projects +* [Compute-Aided Design as Language](cadl) * [Encoders and ensembles for continual learning](continual_learning) * [Towards mental time travel: a hierarchical memory for reinforcement learning agents](hierarchical_transformer_memory) * [Perceiver IO: A General Architecture for Structured Inputs & Outputs](perceiver) diff --git a/cadl/README.md b/cadl/README.md new file mode 100644 index 0000000..621de43 --- /dev/null +++ b/cadl/README.md @@ -0,0 +1,74 @@ +# A dataset of CAD sketches + +

+ +

+ +## Overview + +This repository contains the dataset used in ["Computer-Aided Design as Language"](https://arxiv.org/abs/2105.02769). +We provide the following splits: + * Training (`4,656,607` sketches) + * Validation (`50,000` sketches) + * Test (`50,000` sketches) + +## Quickstart + +First, download the dataset files: +```shell +bash download_dataset.sh +``` +This will place the splits under `data` subfolder. + +In order to read the data, you will need [protocol buffer](https://developers.google.com/protocol-buffers) +compiler and [Tensorflow](https://www.tensorflow.org/): +```shell +apt install -y protobuf-compiler +virtualenv --python=python3.6 "${ENV}" +${ENV}/bin/activate +pip install tensorflow +``` + +Next, you need to compile `.proto` files that define the layout of entries in +the dataset: +```shell +protoc --python_out=. *.proto +``` + +Finally, you can use the generated classes to access the examples. The following +`python` snippet reads and prints the first 5 elements from the training split: +```python +import tensorflow as tf + +import example_pb2 + +dataset = tf.data.TFRecordDataset("data/train.tfrecord") + +for raw_record in dataset.take(5).as_numpy_iterator(): + example = example_pb2.Example() + example.ParseFromString(raw_record) + print(example, "\n") +``` + +Please refer to `example.proto` for details on the data layout. + +## Citation + +If you use this dataset in your research, please cite: +``` +@article{ganin2021computer, + title={Computer-aided design as language}, + author={Ganin, Yaroslav and Bartunov, Sergey and Li, Yujia and Keller, Ethan and Saliceti, Stefano}, + journal={arXiv preprint arXiv:2105.02769}, + year={2021} +} +``` + +## License + +The code is licensed under the [Apache 2.0 License](https://www.apache.org/licenses/LICENSE-2.0). +The dataset is licensed under a [Creative Commons Attribution 4.0 International License](https://creativecommons.org/licenses/by/4.0/). + +## Disclaimer + +This is not an official Google product. diff --git a/cadl/common.proto b/cadl/common.proto new file mode 100644 index 0000000..fa87f16 --- /dev/null +++ b/cadl/common.proto @@ -0,0 +1,28 @@ +// Copyright 2021 Deepmind Technologies Limited. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto3"; + +package deepmind.cad.protos; + +import "google/protobuf/descriptor.proto"; + +message NCFieldOptions { + bool is_pointer = 1; + uint32 at_least = 2; +} + +extend google.protobuf.FieldOptions { + NCFieldOptions nc_field_options = 50000; +} diff --git a/cadl/constraints.proto b/cadl/constraints.proto new file mode 100644 index 0000000..1d670b1 --- /dev/null +++ b/cadl/constraints.proto @@ -0,0 +1,135 @@ +// Copyright 2021 Deepmind Technologies Limited. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto3"; + +package deepmind.cad.protos; + +import "common.proto"; + +message FixConstraint { + repeated uint32 entities = 1 + [(nc_field_options).is_pointer = true, (nc_field_options).at_least = 1]; +} + +message CoincidentConstraint { + repeated uint32 entities = 1 + [(nc_field_options).is_pointer = true, (nc_field_options).at_least = 2]; +} + +message ConcentricConstraint { + repeated uint32 entities = 1 + [(nc_field_options).is_pointer = true, (nc_field_options).at_least = 2]; +} + +message EqualConstraint { + repeated uint32 entities = 1 + [(nc_field_options).is_pointer = true, (nc_field_options).at_least = 2]; +} + +message ParallelConstraint { + repeated uint32 entities = 1 + [(nc_field_options).is_pointer = true, (nc_field_options).at_least = 2]; +} + +message TangentConstraint { + uint32 first = 1 [(nc_field_options).is_pointer = true]; + uint32 second = 2 [(nc_field_options).is_pointer = true]; +} + +message PerpendicularConstraint { + uint32 first = 1 [(nc_field_options).is_pointer = true]; + uint32 second = 2 [(nc_field_options).is_pointer = true]; +} + +message MirrorConstraint { + uint32 mirror = 1 [(nc_field_options).is_pointer = true]; + message MirroredPair { + uint32 first = 1 [(nc_field_options).is_pointer = true]; + uint32 second = 2 [(nc_field_options).is_pointer = true]; + } + repeated MirroredPair mirrored_pairs = 2 [(nc_field_options).at_least = 1]; +} + +message DistanceConstraint { + uint32 first = 1 [(nc_field_options).is_pointer = true]; + uint32 second = 2 [(nc_field_options).is_pointer = true]; + enum Direction { + HORIZONTAL = 0; + VERTICAL = 1; + MINIMUM = 2; + } + Direction direction = 3; + double length = 4; + enum Alignment { + ALIGNED = 0; + ANTI_ALIGNED = 1; + } + enum HalfSpace { + NOT_AVAILABLE = 0; + LEFT = 1; + RIGHT = 2; + } + message HalfSpaceParams { + HalfSpace half_space_first = 1; + HalfSpace half_space_second = 2; + } + oneof additional_params { + Alignment alignment = 5; + HalfSpaceParams half_space_params = 6; + } +} + +message LengthConstraint { + uint32 entity = 1 [(nc_field_options).is_pointer = true]; + double length = 2; +} + +message DiameterConstraint { + uint32 entity = 1 [(nc_field_options).is_pointer = true]; + double length = 2; +} + +message RadiusConstraint { + uint32 entity = 1 [(nc_field_options).is_pointer = true]; + double length = 2; +} + +message AngleConstraint { + uint32 first = 1 [(nc_field_options).is_pointer = true]; + uint32 second = 2 [(nc_field_options).is_pointer = true]; + double angle = 3; +} + +message HorizontalConstraint { + repeated uint32 entities = 1 + [(nc_field_options).is_pointer = true, (nc_field_options).at_least = 1]; +} + +message VerticalConstraint { + repeated uint32 entities = 2 + [(nc_field_options).is_pointer = true, (nc_field_options).at_least = 1]; +} + +message MidpointConstraint { + uint32 midpoint = 1 [(nc_field_options).is_pointer = true]; + message Endpoints { + uint32 first = 1 [(nc_field_options).is_pointer = true]; + uint32 second = 2 [(nc_field_options).is_pointer = true]; + } + oneof additional_params { + Endpoints endpoints = 2; + uint32 entity = 3 [(nc_field_options).is_pointer = true]; + } +} diff --git a/cadl/download_dataset.sh b/cadl/download_dataset.sh new file mode 100644 index 0000000..b928125 --- /dev/null +++ b/cadl/download_dataset.sh @@ -0,0 +1,25 @@ +#!/bin/bash +# Copyright 2021 Deepmind Technologies Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -e + +OUTPUT_DIR="data" +BASE_URL="https://storage.googleapis.com/cadl/" + +mkdir -p ${OUTPUT_DIR} +for file in train.tfrecord valid.tfrecord test.tfrecord +do +wget -O "${OUTPUT_DIR}/${file}" "${BASE_URL}${file}" +done diff --git a/cadl/entities.proto b/cadl/entities.proto new file mode 100644 index 0000000..9450e66 --- /dev/null +++ b/cadl/entities.proto @@ -0,0 +1,85 @@ +// Copyright 2021 Deepmind Technologies Limited. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto3"; + +package deepmind.cad.protos; + +import "google/protobuf/empty.proto"; +import "common.proto"; + +message Vector { + double x = 1; + double y = 2; +} + +message PointEntity { + bool is_construction = 1; + Vector point = 2; +} + +message LineEntity { + bool is_construction = 1; + Vector start = 2; + Vector end = 3; +} + +message CircleArcEntity { + bool is_construction = 1; + Vector center = 2; + double radius = 3; + message ArcParams { + Vector direction = 1; + bool is_clockwise = 2; + double start_angle = 3; + double end_angle = 4; + } + oneof additional_params { + google.protobuf.Empty circle_params = 4; + ArcParams arc_params = 5; + } +} + +message CircleArcEntityV2 { + bool is_construction = 1; + Vector center = 2; + message CircleParams { + double radius = 1; + } + message ArcParams { + Vector start = 1; + Vector end = 2; + bool is_clockwise = 3; + } + oneof additional_params { + CircleParams circle_params = 3; + ArcParams arc_params = 4; + } +} + +message InterpolatedSplineEntity { + bool is_construction = 1; + bool is_periodic = 2; + repeated Vector interpolation_points = 3 [(nc_field_options).at_least = 2]; + Vector start_derivative = 4; + Vector end_derivative = 5; + message TrimmedParams { + double start_phi = 1; + double end_phi = 2; + } + oneof additional_params { + google.protobuf.Empty untrimmed_params = 6; + TrimmedParams trimmed_params = 7; + } +} diff --git a/cadl/example.proto b/cadl/example.proto new file mode 100644 index 0000000..ac110d2 --- /dev/null +++ b/cadl/example.proto @@ -0,0 +1,63 @@ +// Copyright 2021 Deepmind Technologies Limited. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto3"; + +package deepmind.cad.protos; + +import "constraints.proto"; +import "entities.proto"; + +message Entity { + oneof kind { + PointEntity point_entity = 1; + LineEntity line_entity = 2; + CircleArcEntityV2 circle_arc_entity = 3; + InterpolatedSplineEntity interpolated_spline_entity = 4; + } +} + +message Constraint { + oneof kind { + FixConstraint fix_constraint = 1; + CoincidentConstraint coincident_constraint = 2; + ConcentricConstraint concentric_constraint = 3; + EqualConstraint equal_constraint = 4; + ParallelConstraint parallel_constraint = 5; + TangentConstraint tangent_constraint = 6; + PerpendicularConstraint perpendicular_constraint = 7; + MirrorConstraint mirror_constraint = 8; + DistanceConstraint distance_constraint = 9; + LengthConstraint length_constraint = 10; + DiameterConstraint diameter_constraint = 11; + RadiusConstraint radius_constraint = 12; + AngleConstraint angle_constraint = 13; + HorizontalConstraint horizontal_constraint = 14; + VerticalConstraint vertical_constraint = 15; + MidpointConstraint midpoint_constraint = 16; + } +} + +message EntitySequence { + repeated Entity entities = 1; +} + +message ConstraintSequence { + repeated Constraint constraints = 1; +} + +message Example { + EntitySequence entity_sequence = 1; + ConstraintSequence constraint_sequence = 2; +} diff --git a/cadl/media/sketch_data.gif b/cadl/media/sketch_data.gif new file mode 100644 index 0000000..111f604 Binary files /dev/null and b/cadl/media/sketch_data.gif differ