Merge pull request #5231 from hashicorp/d-devices

Document devices

Merge pull request #5231 from hashicorp/d-devices
Document devices
d56ad039 · Alex Dadgar · GitHub · f059e9a9 · 6f22fc02 · d56ad039
Unverified Commit d56ad039 authored 6 years ago by Alex Dadgar Committed by GitHub 6 years ago
Hide whitespace changes
Inline Side-by-side

Showing

with 731 additions and 48 deletions
+731 -48
--- a/devices/gpu/nvidia/README.md
+++ b/devices/gpu/nvidia/README.md
@@ -18,4 +18,4 @@ config {
 The valid configuration options are:

 * `ignored_gpu_ids` (`list(string)`: `[]`): list of GPU UUIDs strings that should not be exposed to nomad
-* `fingerprint_period` (`string`: `"5s"`): The interval to repeat fingerprint process to identify possible changes.
+* `fingerprint_period` (`string`: `"1m"`): interval to repeat the fingerprint process to identify possible changes.
--- a/devices/gpu/nvidia/device.go
+++ b/devices/gpu/nvidia/device.go
@@ -9,6 +9,7 @@ import (

 	log "github.com/hashicorp/go-hclog"
 	"github.com/hashicorp/nomad/devices/gpu/nvidia/nvml"
+	"github.com/hashicorp/nomad/helper/pluginutils/loader"
 	"github.com/hashicorp/nomad/plugins/base"
 	"github.com/hashicorp/nomad/plugins/device"
 	"github.com/hashicorp/nomad/plugins/shared/hclspec"
@@ -35,6 +36,19 @@ const (
 )

 var (
+	// PluginID is the nvidia plugin metadata registered in the plugin
+	// catalog.
+	PluginID = loader.PluginID{
+		Name:       pluginName,
+		PluginType: base.PluginTypeDevice,
+	}
+
+	// PluginConfig is the nvidia factory function registered in the
+	// plugin catalog.
+	PluginConfig = &loader.InternalPluginConfig{
+		Factory: func(l log.Logger) interface{} { return NewNvidiaDevice(l) },
+	}
+
 	// pluginInfo describes the plugin
 	pluginInfo = &base.PluginInfoResponse{
 		Type:              base.PluginTypeDevice,

--- a/drivers/docker/fingerprint.go
+++ b/drivers/docker/fingerprint.go
@@ -135,7 +135,7 @@ func (d *Driver) buildFingerprint() *drivers.Fingerprint {
 		}
 		sort.Strings(runtimeNames)

-		fp.Attributes["runtimes"] = pstructs.NewStringAttribute(
+		fp.Attributes["driver.docker.runtimes"] = pstructs.NewStringAttribute(
 			strings.Join(runtimeNames, ","))
 	}


--- a/helper/pluginutils/catalog/register_linux.go
+++ b/helper/pluginutils/catalog/register_linux.go
 package catalog

-import "github.com/hashicorp/nomad/drivers/rkt"
+import (
+	"github.com/hashicorp/nomad/devices/gpu/nvidia"
+	"github.com/hashicorp/nomad/drivers/rkt"
+)

 // This file is where all builtin plugins should be registered in the catalog.
 // Plugins with build restrictions should be placed in the appropriate
 // register_XXX.go file.
 func init() {
 	RegisterDeferredConfig(rkt.PluginID, rkt.PluginConfig, rkt.PluginLoader)
+	Register(nvidia.PluginID, nvidia.PluginConfig)
 }
--- a/jobspec/parse_test.go
+++ b/jobspec/parse_test.go
@@ -235,14 +235,14 @@ func TestParse(t *testing.T) {
 											Count: helper.Uint64ToPtr(10),
 											Constraints: []*api.Constraint{
 												{
-													LTarget: "${driver.attr.memory}",
+													LTarget: "${device.attr.memory}",
 													RTarget: "2GB",
 													Operand: ">",
 												},
 											},
 											Affinities: []*api.Affinity{
 												{
-													LTarget: "${driver.model}",
+													LTarget: "${device.model}",
 													RTarget: "1080ti",
 													Operand: "=",
 													Weight:  50,

--- a/jobspec/test-fixtures/basic.hcl
+++ b/jobspec/test-fixtures/basic.hcl
@@ -200,13 +200,13 @@ job "binstore-storagelocker" {
        device "nvidia/gpu" {
            count = 10
            constraint {
-              attribute = "${driver.attr.memory}"
+              attribute = "${device.attr.memory}"
              value = "2GB"
              operator = ">"
            }

            affinity {
-              attribute = "${driver.model}"
+              attribute = "${device.model}"
              value     = "1080ti"
              weight = 50
            }

--- a/scheduler/device_test.go
+++ b/scheduler/device_test.go
@@ -161,7 +161,7 @@ func TestDeviceAllocator_Allocate_Constraints(t *testing.T) {
 			Name: "gpu",
 			Constraints: []*structs.Constraint{
 				{
-					LTarget: "${driver.attr.cuda_cores}",
+					LTarget: "${device.attr.cuda_cores}",
 					Operand: ">",
 					RTarget: "4000",
 				},
@@ -172,7 +172,7 @@ func TestDeviceAllocator_Allocate_Constraints(t *testing.T) {
 			Name: "gpu",
 			Constraints: []*structs.Constraint{
 				{
-					LTarget: "${driver.attr.cuda_cores}",
+					LTarget: "${device.attr.cuda_cores}",
 					Operand: "<",
 					RTarget: "4000",
 				},
@@ -184,17 +184,17 @@ func TestDeviceAllocator_Allocate_Constraints(t *testing.T) {
 			Constraints: []*structs.Constraint{
 				// First two are shared across both devices
 				{
-					LTarget: "${driver.attr.memory_bandwidth}",
+					LTarget: "${device.attr.memory_bandwidth}",
 					Operand: ">",
 					RTarget: "10 GB/s",
 				},
 				{
-					LTarget: "${driver.attr.memory}",
+					LTarget: "${device.attr.memory}",
 					Operand: "is",
 					RTarget: "11264 MiB",
 				},
 				{
-					LTarget: "${driver.attr.graphics_clock}",
+					LTarget: "${device.attr.graphics_clock}",
 					Operand: ">",
 					RTarget: "1.4 GHz",
 				},
@@ -209,18 +209,18 @@ func TestDeviceAllocator_Allocate_Constraints(t *testing.T) {
 			Name: "nvidia/gpu",
 			Constraints: []*structs.Constraint{
 				{
-					LTarget: "${driver.attr.memory_bandwidth}",
+					LTarget: "${device.attr.memory_bandwidth}",
 					Operand: ">",
 					RTarget: "10 GB/s",
 				},
 				{
-					LTarget: "${driver.attr.memory}",
+					LTarget: "${device.attr.memory}",
 					Operand: "is",
 					RTarget: "11264 MiB",
 				},
 				// Rules both out
 				{
-					LTarget: "${driver.attr.graphics_clock}",
+					LTarget: "${device.attr.graphics_clock}",
 					Operand: ">",
 					RTarget: "2.4 GHz",
 				},
@@ -271,7 +271,7 @@ func TestDeviceAllocator_Allocate_Affinities(t *testing.T) {
 			Name: "gpu",
 			Affinities: []*structs.Affinity{
 				{
-					LTarget: "${driver.attr.cuda_cores}",
+					LTarget: "${device.attr.cuda_cores}",
 					Operand: ">",
 					RTarget: "4000",
 					Weight:  0.6,
@@ -283,7 +283,7 @@ func TestDeviceAllocator_Allocate_Affinities(t *testing.T) {
 			Name: "gpu",
 			Affinities: []*structs.Affinity{
 				{
-					LTarget: "${driver.attr.cuda_cores}",
+					LTarget: "${device.attr.cuda_cores}",
 					Operand: "<",
 					RTarget: "4000",
 					Weight:  0.1,
@@ -295,7 +295,7 @@ func TestDeviceAllocator_Allocate_Affinities(t *testing.T) {
 			Name: "gpu",
 			Affinities: []*structs.Affinity{
 				{
-					LTarget: "${driver.attr.cuda_cores}",
+					LTarget: "${device.attr.cuda_cores}",
 					Operand: ">",
 					RTarget: "4000",
 					Weight:  -0.2,
@@ -309,19 +309,19 @@ func TestDeviceAllocator_Allocate_Affinities(t *testing.T) {
 			Affinities: []*structs.Affinity{
 				// First two are shared across both devices
 				{
-					LTarget: "${driver.attr.memory_bandwidth}",
+					LTarget: "${device.attr.memory_bandwidth}",
 					Operand: ">",
 					RTarget: "10 GB/s",
 					Weight:  0.2,
 				},
 				{
-					LTarget: "${driver.attr.memory}",
+					LTarget: "${device.attr.memory}",
 					Operand: "is",
 					RTarget: "11264 MiB",
 					Weight:  0.2,
 				},
 				{
-					LTarget: "${driver.attr.graphics_clock}",
+					LTarget: "${device.attr.graphics_clock}",
 					Operand: ">",
 					RTarget: "1.4 GHz",
 					Weight:  0.9,

--- a/scheduler/feasible.go
+++ b/scheduler/feasible.go
@@ -953,17 +953,17 @@ func resolveDeviceTarget(target string, d *structs.NodeDeviceResource) (*psstruc

 	// Handle the interpolations
 	switch {
-	case "${driver.model}" == target:
+	case "${device.model}" == target:
 		return psstructs.NewStringAttribute(d.Name), true

-	case "${driver.vendor}" == target:
+	case "${device.vendor}" == target:
 		return psstructs.NewStringAttribute(d.Vendor), true

-	case "${driver.type}" == target:
+	case "${device.type}" == target:
 		return psstructs.NewStringAttribute(d.Type), true

-	case strings.HasPrefix(target, "${driver.attr."):
-		attr := strings.TrimPrefix(target, "${driver.attr.")
+	case strings.HasPrefix(target, "${device.attr."):
+		attr := strings.TrimPrefix(target, "${device.attr.")
 		attr = strings.TrimSuffix(attr, "}")
 		val, ok := d.Attributes[attr]
 		return val, ok

--- a/scheduler/feasible_test.go
+++ b/scheduler/feasible_test.go
@@ -1862,22 +1862,22 @@ func TestDeviceChecker(t *testing.T) {
 					Constraints: []*structs.Constraint{
 						{
 							Operand: "=",
-							LTarget: "${driver.model}",
+							LTarget: "${device.model}",
 							RTarget: "1080ti",
 						},
 						{
 							Operand: ">",
-							LTarget: "${driver.attr.memory}",
+							LTarget: "${device.attr.memory}",
 							RTarget: "1320.5 MB",
 						},
 						{
 							Operand: "<=",
-							LTarget: "${driver.attr.pci_bandwidth}",
+							LTarget: "${device.attr.pci_bandwidth}",
 							RTarget: ".98   GiB/s",
 						},
 						{
 							Operand: "=",
-							LTarget: "${driver.attr.cores_clock}",
+							LTarget: "${device.attr.cores_clock}",
 							RTarget: "800MHz",
 						},
 					},
@@ -1895,22 +1895,22 @@ func TestDeviceChecker(t *testing.T) {
 					Constraints: []*structs.Constraint{
 						{
 							Operand: "=",
-							LTarget: "${driver.model}",
+							LTarget: "${device.model}",
 							RTarget: "1080ti",
 						},
 						{
 							Operand: ">",
-							LTarget: "${driver.attr.memory}",
+							LTarget: "${device.attr.memory}",
 							RTarget: "1320.5 MB",
 						},
 						{
 							Operand: "<=",
-							LTarget: "${driver.attr.pci_bandwidth}",
+							LTarget: "${device.attr.pci_bandwidth}",
 							RTarget: ".98   GiB/s",
 						},
 						{
 							Operand: "=",
-							LTarget: "${driver.attr.cores_clock}",
+							LTarget: "${device.attr.cores_clock}",
 							RTarget: "800MHz",
 						},
 					},
@@ -1928,22 +1928,22 @@ func TestDeviceChecker(t *testing.T) {
 					Constraints: []*structs.Constraint{
 						{
 							Operand: "=",
-							LTarget: "${driver.model}",
+							LTarget: "${device.model}",
 							RTarget: "1080ti",
 						},
 						{
 							Operand: ">",
-							LTarget: "${driver.attr.memory}",
+							LTarget: "${device.attr.memory}",
 							RTarget: "1320.5 MB",
 						},
 						{
 							Operand: "<=",
-							LTarget: "${driver.attr.pci_bandwidth}",
+							LTarget: "${device.attr.pci_bandwidth}",
 							RTarget: ".98   GiB/s",
 						},
 						{
 							Operand: "=",
-							LTarget: "${driver.attr.cores_clock}",
+							LTarget: "${device.attr.cores_clock}",
 							RTarget: "800MHz",
 						},
 					},
@@ -1961,22 +1961,22 @@ func TestDeviceChecker(t *testing.T) {
 					Constraints: []*structs.Constraint{
 						{
 							Operand: "=",
-							LTarget: "${driver.model}",
+							LTarget: "${device.model}",
 							RTarget: "2080ti",
 						},
 						{
 							Operand: ">",
-							LTarget: "${driver.attr.memory}",
+							LTarget: "${device.attr.memory}",
 							RTarget: "1320.5 MB",
 						},
 						{
 							Operand: "<=",
-							LTarget: "${driver.attr.pci_bandwidth}",
+							LTarget: "${device.attr.pci_bandwidth}",
 							RTarget: ".98   GiB/s",
 						},
 						{
 							Operand: "=",
-							LTarget: "${driver.attr.cores_clock}",
+							LTarget: "${device.attr.cores_clock}",
 							RTarget: "800MHz",
 						},
 					},
@@ -1994,22 +1994,22 @@ func TestDeviceChecker(t *testing.T) {
 					Constraints: []*structs.Constraint{
 						{
 							Operand: "=",
-							LTarget: "${driver.model}",
+							LTarget: "${device.model}",
 							RTarget: "1080ti",
 						},
 						{
 							Operand: "<",
-							LTarget: "${driver.attr.memory}",
+							LTarget: "${device.attr.memory}",
 							RTarget: "1320.5 MB",
 						},
 						{
 							Operand: "<=",
-							LTarget: "${driver.attr.pci_bandwidth}",
+							LTarget: "${device.attr.pci_bandwidth}",
 							RTarget: ".98   GiB/s",
 						},
 						{
 							Operand: "=",
-							LTarget: "${driver.attr.cores_clock}",
+							LTarget: "${device.attr.cores_clock}",
 							RTarget: "800MHz",
 						},
 					},

--- a/scheduler/rank_test.go
+++ b/scheduler/rank_test.go
@@ -584,7 +584,7 @@ func TestBinPackIterator_Devices(t *testing.T) {
 									Count: 1,
 									Affinities: []*structs.Affinity{
 										{
-											LTarget: "${driver.attr.graphics_clock}",
+											LTarget: "${device.attr.graphics_clock}",
 											Operand: ">",
 											RTarget: "1.4 GHz",
 											Weight:  0.9,

--- a/website/source/api/json-jobs.html.md
+++ b/website/source/api/json-jobs.html.md
@@ -525,6 +525,8 @@ The `Resources` object supports the following keys:

 - `Networks` - A list of network objects.

+- `Devices` - A list of device objects.
+
 The Network object supports the following keys:

 - `MBits` - The number of MBits in bandwidth required.
@@ -538,6 +540,30 @@ ports. A network object allows the user to specify a list of `DynamicPorts` and
 - `Label` - The label to annotate a port so that it can be referred in the
  service discovery block or environment variables.

+The Device object supports the following keys:
+
+- `Name` - Specifies the device required. The following inputs are valid:
+
+  * `<device_type>`: If a single value is given, it is assumed to be the device
+    type, such as "gpu", or "fpga".
+
+  * `<vendor>/<device_type>`: If two values are given separated by a `/`, the
+    given device type will be selected, constraining on the provided vendor.
+    Examples include "nvidia/gpu" or "amd/gpu".
+
+  * `<vendor>/<device_type>/<model>`: If three values are given separated by a `/`, the
+    given device type will be selected, constraining on the provided vendor, and
+    model name. Examples include "nvidia/gpu/1080ti" or "nvidia/gpu/2080ti".
+
+
+- `Count` - The count of devices being requested per task. Defaults to 1.
+
+- `Constraints` - A list to define constraints on which device can satisfy the
+  request. See the constraint reference for more details.
+
+- `Affinities` - A list to define preferences for which device should be
+   chosen.  See the affinity reference for more details.
+
 <a id="ephemeral_disk"></a>

 ### Ephemeral Disk

--- a/website/source/docs/devices/community.html.md
+++ b/website/source/docs/devices/community.html.md
+---
+layout: "docs"
+page_title: "Device Plugins: Community Supported"
+sidebar_current: "docs-devices-community"
+description: |-
+  A list of community supported Device Plugins.
+---
+
+# Community Supported
+
+If you have authored a device plugin that you believe will be useful to the
+broader Nomad community and you are committed to maintaining the plugin, please
+file a PR to add your plugin to this page.
+
+## Authoring Device Plugins
+
+Nomad has a plugin system for defining device drivers. External device plugins
+will have the same user experience as built in drivers. For details on authoring
+a device plugin, please refer to the plugin authoring guide.
--- a/website/source/docs/devices/index.html.md
+++ b/website/source/docs/devices/index.html.md
+---
+layout: "docs"
+page_title: "Device Plugins"
+sidebar_current: "docs-devices"
+description: |-
+  Device Plugins are used to expose devices to tasks in Nomad.
+---
+
+# Device Plugins
+
+Device plugins are used to detect and make devices available to tasks in Nomad.
+Devices are physical hardware that exists on a node such as a GPU or an FPGA. By
+having extensible device plugins, Nomad has the flexibility to support a broad
+set of devices and allows the community to build additional device plugins as
+needed.
+
+The list of supported device plugins is provided on the left of this page.
+Each device plugin documents its configuration and installation requirements,
+the attributes it fingerprints, and the environment variables it exposes to
+tasks.
--- a/website/source/docs/devices/nvidia.html.md
+++ b/website/source/docs/devices/nvidia.html.md
+---
+layout: "docs"
+page_title: "Device Plugins: Nvidia"
+sidebar_current: "docs-devices-nvidia"
+description: |-
+  The Nvidia Device Plugin detects and makes Nvidia devices available to tasks.
+---
+
+# Nvidia GPU Device Plugin
+
+Name: `nvidia-gpu`
+
+The Nvidia device plugin is used to expose Nvidia GPUs to Nomad. The Nvidia
+plugin is built into Nomad and does not need to be downloaded separately.
+
+## Fingerprinted Attributes
+
+<table class="table table-bordered table-striped">
+  <tr>
+    <th>Attribute</th>
+    <th>Unit</th>
+  </tr>
+  <tr>
+    <td><tt>memory</tt></td>
+    <td>MiB</td>
+  </tr>
+  <tr>
+    <td><tt>power</tt></td>
+    <td>W (Watt)</td>
+  </tr>
+  <tr>
+    <td><tt>bar1</tt></td>
+    <td>MiB</td>
+  </tr>
+  <tr>
+    <td><tt>driver_version</tt></td>
+    <td>string</td>
+  </tr>
+  <tr>
+    <td><tt>cores_clock</tt></td>
+    <td>MHz</td>
+  </tr>
+  <tr>
+    <td><tt>memory_clock</tt></td>
+    <td>MHz</td>
+  </tr>
+  <tr>
+    <td><tt>pci_bandwidth</tt></td>
+    <td>MB/s</td>
+  </tr>
+  <tr>
+    <td><tt>display_state</tt></td>
+    <td>string</td>
+  </tr>
+  <tr>
+    <td><tt>persistence_mode</tt></td>
+    <td>string</td>
+  </tr>
+</table>
+
+## Runtime Environment
+
+The `nvidia-gpu` device plugin exposes the following environment variables:
+
+* `NVIDIA_VISIBLE_DEVICES` - List of Nvidia GPU IDs available to the task.
+
+### Additional Task Configurations
+
+Additional environment variables can be set by the task to influence the runtime
+environment. See [Nvidia's
+documentation](https://github.com/NVIDIA/nvidia-container-runtime#environment-variables-oci-spec).
+
+## Installation Requirements
+
+In order to use the `nvidia-gpu` the following prerequisites must be met:
+
+1. GNU/Linux x86_64 with kernel version > 3.10
+2. NVIDIA GPU with Architecture > Fermi (2.1)
+3. NVIDIA drivers >= 340.29 with binary `nvidia-smi`
+
+### Docker Driver Requirements
+
+In order to use the Nvidia driver plugin with the Docker driver, please follow
+the installation instructions for
+[`nvidia-docker`](https://github.com/NVIDIA/nvidia-docker/wiki/Installation-\(version-1.0\)).
+
+## Plugin Configuration
+
+```hcl
+plugin "nvidia-gpu" {
+  ignored_gpu_ids = ["GPU-fef8089b", "GPU-ac81e44d"]
+  fingerprint_period = "1m"
+}  
+```
+
+The `nvidia-gpu` device plugin supports the following configuration in the agent
+config:
+
+* `ignored_gpu_ids` `(array<string>: [])` - Specifies the set of GPU UUIDs that
+  should be ignored when fingerprinting.
+
+* `fingerprint_period` `(string: "1m")` - The period in which to fingerprint for
+  device changes.
+
+## Restrictions
+
+The Nvidia integration only works with drivers who natively integrate with
+Nvidia's [container runtime
+library](https://github.com/NVIDIA/libnvidia-container). 
+
+Nomad has tested support with the [`docker` driver][docker-driver] and plans to
+bring support to the built-in [`exec`][exec-driver] and [`java`][java-driver]
+drivers. Support for [`lxc`][lxc-driver] should be possible by installing the
+[Nvidia hook](https://github.com/lxc/lxc/blob/master/hooks/nvidia) but is not
+tested or documented by Nomad.
+
+## Examples
+
+Inspect a node with a GPU:
+
+```sh
+$ nomad node status 4d46e59f
+ID            = 4d46e59f
+Name          = nomad
+Class         = <none>
+DC            = dc1
+Drain         = false
+Eligibility   = eligible
+Status        = ready
+Uptime        = 19m43s
+Driver Status = docker,mock_driver,raw_exec
+
+Node Events
+Time                  Subsystem  Message
+2019-01-23T18:25:18Z  Cluster    Node registered
+
+Allocated Resources
+CPU          Memory      Disk
+0/15576 MHz  0 B/55 GiB  0 B/28 GiB
+
+Allocation Resource Utilization
+CPU          Memory
+0/15576 MHz  0 B/55 GiB
+
+Host Resource Utilization
+CPU             Memory          Disk
+2674/15576 MHz  1.5 GiB/55 GiB  3.0 GiB/31 GiB
+
+Device Resource Utilization
+nvidia/gpu/Tesla K80[GPU-e1f6f4f1-1ea5-7b9d-5f03-338a9dc32416]  0 / 11441 MiB
+
+Allocations
+No allocations placed
+```
+
+Display detailed statistics on a node with a GPU:
+
+```sh
+$ nomad node status -stats 4d46e59f
+ID            = 4d46e59f
+Name          = nomad
+Class         = <none>
+DC            = dc1
+Drain         = false
+Eligibility   = eligible
+Status        = ready
+Uptime        = 19m59s
+Driver Status = docker,mock_driver,raw_exec
+
+Node Events
+Time                  Subsystem  Message
+2019-01-23T18:25:18Z  Cluster    Node registered
+
+Allocated Resources
+CPU          Memory      Disk
+0/15576 MHz  0 B/55 GiB  0 B/28 GiB
+
+Allocation Resource Utilization
+CPU          Memory
+0/15576 MHz  0 B/55 GiB
+
+Host Resource Utilization
+CPU             Memory          Disk
+2673/15576 MHz  1.5 GiB/55 GiB  3.0 GiB/31 GiB
+
+Device Resource Utilization
+nvidia/gpu/Tesla K80[GPU-e1f6f4f1-1ea5-7b9d-5f03-338a9dc32416]  0 / 11441 MiB
+
+// ...TRUNCATED...
+
+Device Stats
+Device              = nvidia/gpu/Tesla K80[GPU-e1f6f4f1-1ea5-7b9d-5f03-338a9dc32416]
+BAR1 buffer state   = 2 / 16384 MiB
+Decoder utilization = 0 %
+ECC L1 errors       = 0
+ECC L2 errors       = 0
+ECC memory errors   = 0
+Encoder utilization = 0 %
+GPU utilization     = 0 %
+Memory state        = 0 / 11441 MiB
+Memory utilization  = 0 %
+Power usage         = 37 / 149 W
+Temperature         = 34 C
+
+Allocations
+No allocations placed
+```
+
+Run the following example job to see that that the GPU was mounted in the
+container:
+
+```hcl
+job "gpu-test" {
+  datacenters = ["dc1"]
+  type = "batch"
+
+  group "smi" {
+    task "smi" {
+      driver = "docker"
+
+      config {
+        image = "nvidia/cuda:9.0-base"
+        command = "nvidia-smi"
+      }
+
+      resources {
+        device "nvidia/gpu" {
+          count = 1
+
+          # Add an affinity for a particular model
+          affinity {
+            attribute = "${device.model}"
+            value     = "Tesla K80"
+          }
+        }
+      }
+    }
+  }
+}
+```
+
+```sh
+$ nomad run example.nomad
+==> Monitoring evaluation "21bd7584"
+    Evaluation triggered by job "gpu-test"
+    Allocation "d250baed" created: node "4d46e59f", group "smi"
+    Evaluation status changed: "pending" -> "complete"
+==> Evaluation "21bd7584" finished with status "complete"
+
+$ nomad alloc status d250baed
+ID                  = d250baed
+Eval ID             = 21bd7584
+Name                = gpu-test.smi[0]
+Node ID             = 4d46e59f
+Job ID              = example
+Job Version         = 0
+Client Status       = complete
+Client Description  = All tasks have completed
+Desired Status      = run
+Desired Description = <none>
+Created             = 7s ago
+Modified            = 2s ago
+
+Task "smi" is "dead"
+Task Resources
+CPU        Memory       Disk     Addresses
+0/100 MHz  0 B/300 MiB  300 MiB
+
+Device Stats
+nvidia/gpu/Tesla K80[GPU-e1f6f4f1-1ea5-7b9d-5f03-338a9dc32416]  0 / 11441 MiB
+
+Task Events:
+Started At     = 2019-01-23T18:25:32Z
+Finished At    = 2019-01-23T18:25:34Z
+Total Restarts = 0
+Last Restart   = N/A
+
+Recent Events:
+Time                  Type        Description
+2019-01-23T18:25:34Z  Terminated  Exit Code: 0
+2019-01-23T18:25:32Z  Started     Task started by client
+2019-01-23T18:25:29Z  Task Setup  Building Task Directory
+2019-01-23T18:25:29Z  Received    Task received by client
+
+$ nomad alloc logs d250baed
+Wed Jan 23 18:25:32 2019
+-----------------------------------------------------------------------------+
+| NVIDIA-SMI 410.48                 Driver Version: 410.48                    |
+|-------------------------------+----------------------+----------------------+
+| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
+| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
+|===============================+======================+======================|
+|   0  Tesla K80           On   | 00004477:00:00.0 Off |                    0 |
+| N/A   33C    P8    37W / 149W |      0MiB / 11441MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
+
+-----------------------------------------------------------------------------+
+| Processes:                                                       GPU Memory |
+|  GPU       PID   Type   Process name                             Usage      |
+|=============================================================================|
+|  No running processes found                                                 |
+-----------------------------------------------------------------------------+
+```
+
+[docker-driver]: /docs/drivers/docker.html "Nomad docker Driver"
+[exec-driver]: /docs/drivers/exec.html "Nomad exec Driver"
+[java-driver]: /docs/drivers/java.html "Nomad java Driver"
+[lxc-driver]: /docs/drivers/lxc.html "Nomad lxc Driver"
--- a/website/source/docs/job-specification/device.html.md
+++ b/website/source/docs/job-specification/device.html.md
+---
+layout: "docs"
+page_title: "device Stanza - Job Specification"
+sidebar_current: "docs-job-specification-device"
+description: |-
+  The "device" stanza is used to require a certain device be made available
+  to the task.
+---
+
+# `device` Stanza
+
+<table class="table table-bordered table-striped">
+  <tr>
+    <th width="120">Placement</th>
+    <td>
+      <code>job -> group -> task -> resources -> **device**</code>
+    </td>
+  </tr>
+</table>
+
+The `device` stanza is used to create both a scheduling and runtime requirement
+that the given task has access to the specified devices. A device is a hardware
+device that is attached to the node and may be made available to the task.
+Examples are GPUs, FPGAs, and TPUs. 
+
+When a `device` stanza is added, Nomad will schedule the task onto a node that
+contains the set of device(s) that meet the specified requirements. The `device` stanza
+allows the operator to specify as little as just the type of device required,
+such as `gpu`, all the way to specifying arbitrary constraints and affinities.
+Once the scheduler has placed the allocation on a suitable node, the Nomad
+Client will invoke the device plugin to retrieve information on how to mount the
+device and what environment variables to expose. For more information on the
+runtime environment, please consult the individual device plugin's documentation.
+
+See the [device plugin's documentation][devices] for a list of supported devices.
+
+```hcl
+job "docs" {
+  group "example" {
+    task "server" {
+      resources {
+        device "nvidia/gpu" {
+          count = 2
+
+          constraint {
+            attribute = "${driver.attr.memory}"
+            operator  = ">="
+            value     = "2 GiB"
+          }
+
+          affinity {
+            attribute = "${driver.attr.memory}"
+            operator  = ">="
+            value     = "4 GiB"
+            weight    = 75
+          }
+        }
+      }
+    }
+  }
+}
+```
+
+In the above example, the task is requesting two GPUs, from the Nvidia vendor,
+but is not specifying the specific model required. Instead it is placing a hard
+constraint that the device has at least 2 GiB of memory and that it would prefer
+to use GPUs that have at least 4 GiB. This examples shows how expressive the
+`device` stanza can be.
+
+~> Device supported is currently limited to Linux, and container based drivers
+due to the ability to isolate devices to specific tasks.
+
+## `device` Parameters
+
+- `name` `(string: "")` - Specifies the device required. The following inputs
+  are valid:
+
+  * `<device_type>`: If a single value is given, it is assumed to be the device
+    type, such as "gpu", or "fpga".
+
+  * `<vendor>/<device_type>`: If two values are given separated by a `/`, the
+    given device type will be selected, constraining on the provided vendor.
+    Examples include "nvidia/gpu" or "amd/gpu".
+
+  * `<vendor>/<device_type>/<model>`: If three values are given separated by a `/`, the
+    given device type will be selected, constraining on the provided vendor, and
+    model name. Examples include "nvidia/gpu/1080ti" or "nvidia/gpu/2080ti".
+
+- `count` `(int: 1)` - Specifies the number of instances of the given device
+  that are required.
+
+- `constraint` <code>([Constraint][]: nil)</code> - Constraints to restrict
+  which devices are eligible.  This can be provided multiple times to define
+  additional constraints. See below for available attributes. 
+
+- `affinity` <code>([Affinity][]: nil)</code> - Affinity to specify a preference
+  for which devices get selected. This can be provided multiple times to define
+  additional affinities. See below for available attributes.
+
+## `device` Constraint and Affinity Attributes
+
+The set of attributes available for use in a `constraint` or `affinity` are as
+follows:
+
+<table class="table table-bordered table-striped">
+  <tr>
+    <th>Variable</th>
+    <th>Description</th>
+    <th>Example Value</th>
+  </tr>
+  <tr>
+    <td><tt>${device.type}</tt></td>
+    <td>The type of device</td>
+    <td><tt>"gpu", "tpu", "fpga"</tt></td>
+  </tr>
+  <tr>
+    <td><tt>${device.vendor}</tt></td>
+    <td>The device's vendor</td>
+    <td><tt>"amd", "nvidia", "intel"</tt></td>
+  </tr>
+  <tr>
+    <td><tt>${device.model}</tt></td>
+    <td>The device's model</td>
+    <td><tt>"1080ti"</tt></td>
+  </tr>
+  <tr>
+    <td><tt>${device.attr.&lt;property&gt;}</tt></td>
+    <td>Property of the device</td>
+    <td><tt>${device.attr.memory} => 8 GiB</tt></td>
+  </tr>
+</table>
+
+For the set of attributes available, please see the individual [device plugin's
+documentation][devices].
+
+### Attribute Units and Conversions
+
+Devices report their attributes with strict types and can also provide unit
+information. For example, when a GPU is reporting its memory, it can report that
+it is "4096 MiB". Since Nomad has the associated unit information, a constraint
+that requires greater than "3.5 GiB" can match since Nomad can convert between
+these units.
+
+The units Nomad supports is as follows:
+
+<table class="table table-bordered table-striped">
+  <tr>
+    <th>Base Unit</th>
+    <th>Values</th>
+  </tr>
+  <tr>
+    <td><tt>Byte</tt></td>
+    <td><tt>**Base 2**: KiB, MiB, GiB, TiB, PiB, EiB<br>**Base 10**: kB, KB (equivalent to kB), MB, GB, TB, PB, EB</tt>
+  </tr>
+  <tr>
+    <td><tt>Byte Rates</tt></td>
+    <td><tt>**Base 2**: KiB/s, MiB/s, GiB/s, TiB/s, PiB/s, EiB/s<br>**Base 10**: kB/s, KB/s (equivalent to kB/s), MB/s, GB/s, TB/s, PB/s, EB/s</tt>
+  </tr>
+  <tr>
+    <td><tt>Hertz</tt></td>
+    <td><tt>MHz, GHz</tt></td>
+  </tr>
+  <tr>
+    <td><tt>Watts</tt></td>
+    <td><tt>mW, W, kW, MW, GW</tt></td>
+  </tr>
+</table>
+
+Conversion is only possible within the same base unit.
+
+## `device` Examples
+
+The following examples only show the `device` stanzas. Remember that the
+`device` stanza is only valid in the placements listed above.
+
+### Single Nvidia GPU
+
+This example schedules a task with a single Nvidia GPU made available.
+
+```hcl
+device "nvidia/gpu" {}
+```
+
+### Multiple Nvidia GPU
+
+This example schedules a task with a two Nvidia GPU made available.
+
+```hcl
+device "nvidia/gpu" {
+  count = 2    
+}
+```
+
+### Single Nvidia GPU with Specific Model
+
+This example schedules a task with a single Nvidia GPU made available and uses
+the name to specify the exact model to be used.
+
+```hcl
+device "nvidia/gpu/1080ti" {}
+```
+
+This is a simplification of the following:
+
+```hcl
+device "gpu" {
+  count = 1
+
+  constraint {
+    attribute = "${device.vendor}"
+    value     = "nvidia"
+  }
+
+  constraint {
+    attribute = "${device.model}"
+    value     = "1080ti"
+  }
+}
+```
+
+### Affinity with Unit Conversion
+
+This example uses an affinity to tell the scheduler it would prefer if the GPU
+had at least 1.5 GiB of memory. The following are both equivalent as Nomad can
+do unit conversions.
+
+Specified in `GiB`:
+
+```hcl
+device "nvidia/gpu" {
+  affinity {
+    attribute = "${device.attr.memory}"
+    operator  = ">="
+    value     = "1.5 GiB"
+    weight    = 75
+  }
+}
+```
+
+Specified in `MiB`:
+
+```hcl
+device "nvidia/gpu" {
+  affinity {
+    attribute = "${device.attr.memory}"
+    operator  = ">="
+    value     = "1500 MiB"
+    weight    = 75
+  }
+}
+```
+
+[affinity]: /docs/job-specification/affinity.html "Nomad affinity Job Specification"
+[constraint]: /docs/job-specification/constraint.html "Nomad constraint Job Specification"
+[devices]: /docs/devices/index.html "Nomad Device Plugins"
--- a/website/source/docs/job-specification/resources.html.md
+++ b/website/source/docs/job-specification/resources.html.md
@@ -36,6 +36,10 @@ job "docs" {
            static = 22
          }
        }
+
+        device "nvidia/gpu" {
+          count = 2
+        }
      }
    }
  }
@@ -48,9 +52,12 @@ job "docs" {

 - `memory` `(int: 300)` - Specifies the memory required in MB

- `network` <code>([Network][]: <required>)</code> - Specifies the network
+- `network` <code>([Network][]: &lt;optional&gt;)</code> - Specifies the network
  requirements, including static and dynamic port allocations.

+- `device` <code>([Device][]: &lt;optional&gt;)</code> - Specifies the device
+  requirements. This may be repeated to request multiple device types.
+
 ## `resources` Examples

 The following examples only show the `resources` stanzas. Remember that the
@@ -86,4 +93,18 @@ resources {
 }
 ```

+### Devices
+
+This example shows a device constraints as specified in the [device][] stanza
+which require two nvidia GPUs to be made available:
+
+```hcl
+resources {
+  device "nvidia/gpu" {
+    count = 2
+  }
+}
+```
+
 [network]: /docs/job-specification/network.html "Nomad network Job Specification"
+[device]: /docs/job-specification/device.html "Nomad device Job Specification"
--- a/website/source/layouts/docs.erb
+++ b/website/source/layouts/docs.erb
@@ -333,6 +333,9 @@
          <li<%= sidebar_current("docs-job-specification-constraint")%>>
            <a href="/docs/job-specification/constraint.html">constraint</a>
          </li>
+          <li<%= sidebar_current("docs-job-specification-device")%>>
+            <a href="/docs/job-specification/device.html">device</a>
+          </li>
          <li<%= sidebar_current("docs-job-specification-dispatch-payload")%>>
            <a href="/docs/job-specification/dispatch_payload.html">dispatch_payload</a>
          </li>
@@ -434,6 +437,19 @@
        </ul>
      </li>

+      <li<%= sidebar_current("docs-devices") %>>
+        <a href="/docs/devices/index.html">Device Plugins</a>
+        <ul class="nav">
+          <li<%= sidebar_current("docs-devices-nvidia") %>>
+            <a href="/docs/devices/nvidia.html">Nvidia</a>
+          </li>
+
+          <li<%= sidebar_current("docs-devices-community") %>>
+            <a href="/docs/devices/community.html">Community Supported</a>
+          </li>
+        </ul>
+      </li>
+
      <li<%= sidebar_current("docs-schedulers") %>>
        <a href="/docs/schedulers.html">Schedulers</a>
      </li>