Sign Up
Log In
Log In
or
Sign Up
Places
All Projects
Status Monitor
Collapse sidebar
openSUSE:Leap:15.5:Update
docker-runc.15108
bsc1168481-0001-cgroup-devices-major-cleanups-a...
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
File bsc1168481-0001-cgroup-devices-major-cleanups-and-minimal-transition.patch of Package docker-runc.15108
From e37ef513e9566731a25c68a687f60fd566d60feb Mon Sep 17 00:00:00 2001 From: Aleksa Sarai <asarai@suse.de> Date: Sun, 10 May 2020 02:40:25 +1000 Subject: [PATCH] cgroup: devices: major cleanups and minimal transition rules This is a backport of [1], which resolves many issues with the implementation of the runc devices cgroup code. Among other things, it removes some of the disruptive aspects of "runc update" which customers have run into. See [1] for more details. Rather than reproducing all of the commits, this is a squashed version of the following upstream commits: * 19a3e9bf8053 ("contrib: recvtty: add --no-stdin flag") * 11736767d2c5 ("cgroups: add GetFreezerState() helper to Manager") * 4d363ccbd9cc ("cgroup: devices: eradicate the Allow/Deny lists") * 5baa34c9134b ("specconv: remove default /dev/console access") * 8ed0c4c03641 ("configs: use different types for .Devices and .Resources.Devices") * 2ef5f417d923 ("cgroups: implement a devices cgroupv1 emulator") * b70aacc28f43 ("cgroupv1: devices: use minimal transition rules with devices.Emulator") * 410aef3ef858 ("cgroups: systemd: make use of Device*= properties") [1]: https://github.com/opencontainers/runc/pull/2391 SUSE-Bugs: bsc#1168481 Signed-off-by: Aleksa Sarai <asarai@suse.de> --- contrib/cmd/recvtty/recvtty.go | 19 +- libcontainer/README.md | 5 +- libcontainer/cgroups/cgroups.go | 3 + .../cgroups/devices/devices_emulator.go | 355 ++++++ .../cgroups/devices/devices_emulator_test.go | 1096 +++++++++++++++++ libcontainer/cgroups/fs/apply_raw.go | 39 +- libcontainer/cgroups/fs/devices.go | 87 +- libcontainer/cgroups/fs/devices_test.go | 91 +- libcontainer/cgroups/fs/freezer.go | 34 +- libcontainer/cgroups/fs/freezer_v2.go | 21 + libcontainer/cgroups/systemd/apply_systemd.go | 229 +++- .../cgroups/systemd/unified_hierarchy.go | 18 + libcontainer/configs/cgroup_linux.go | 11 +- libcontainer/configs/device.go | 169 ++- libcontainer/configs/device_defaults.go | 111 -- libcontainer/container_linux.go | 22 +- libcontainer/container_linux_test.go | 4 + libcontainer/devices/devices.go | 36 +- libcontainer/integration/template_test.go | 11 +- libcontainer/rootfs_linux.go | 19 +- libcontainer/specconv/spec_linux.go | 557 +++++---- 21 files changed, 2342 insertions(+), 595 deletions(-) create mode 100644 libcontainer/cgroups/devices/devices_emulator.go create mode 100644 libcontainer/cgroups/devices/devices_emulator_test.go delete mode 100644 libcontainer/configs/device_defaults.go diff --git a/contrib/cmd/recvtty/recvtty.go b/contrib/cmd/recvtty/recvtty.go index a658b8d20202..00b30e1c39cd 100644 --- a/contrib/cmd/recvtty/recvtty.go +++ b/contrib/cmd/recvtty/recvtty.go @@ -65,7 +65,7 @@ func bail(err error) { os.Exit(1) } -func handleSingle(path string) error { +func handleSingle(path string, noStdin bool) error { // Open a socket. ln, err := net.Listen("unix", path) if err != nil { @@ -113,10 +113,12 @@ func handleSingle(path string) error { io.Copy(os.Stdout, c) quitChan <- struct{}{} }() - go func() { - io.Copy(c, os.Stdin) - quitChan <- struct{}{} - }() + if !noStdin { + go func() { + io.Copy(c, os.Stdin) + quitChan <- struct{}{} + }() + } // Only close the master fd once we've stopped copying. <-quitChan @@ -201,6 +203,10 @@ func main() { Value: "", Usage: "Path to write daemon process ID to", }, + cli.BoolFlag{ + Name: "no-stdin", + Usage: "Disable stdin handling (no-op for null mode)", + }, } app.Action = func(ctx *cli.Context) error { @@ -218,9 +224,10 @@ func main() { } } + noStdin := ctx.Bool("no-stdin") switch ctx.String("mode") { case "single": - if err := handleSingle(path); err != nil { + if err := handleSingle(path, noStdin); err != nil { return err } case "null": diff --git a/libcontainer/README.md b/libcontainer/README.md index a791ca2d2494..6803ef56c5ba 100644 --- a/libcontainer/README.md +++ b/libcontainer/README.md @@ -155,8 +155,7 @@ config := &configs.Config{ Parent: "system", Resources: &configs.Resources{ MemorySwappiness: nil, - AllowAllDevices: nil, - AllowedDevices: configs.DefaultAllowedDevices, + Devices: specconv.AllowedDevices, }, }, MaskPaths: []string{ @@ -166,7 +165,7 @@ config := &configs.Config{ ReadonlyPaths: []string{ "/proc/sys", "/proc/sysrq-trigger", "/proc/irq", "/proc/bus", }, - Devices: configs.DefaultAutoCreatedDevices, + Devices: specconv.AllowedDevices, Hostname: "testing", Mounts: []*configs.Mount{ { diff --git a/libcontainer/cgroups/cgroups.go b/libcontainer/cgroups/cgroups.go index 25ff5158933a..f16f9bfc0d19 100644 --- a/libcontainer/cgroups/cgroups.go +++ b/libcontainer/cgroups/cgroups.go @@ -39,6 +39,9 @@ type Manager interface { // Sets the cgroup as configured. Set(container *configs.Config) error + + // GetFreezerState retrieves the current FreezerState of the cgroup. + GetFreezerState() (configs.FreezerState, error) } type NotFoundError struct { diff --git a/libcontainer/cgroups/devices/devices_emulator.go b/libcontainer/cgroups/devices/devices_emulator.go new file mode 100644 index 000000000000..cfb535bd4d8b --- /dev/null +++ b/libcontainer/cgroups/devices/devices_emulator.go @@ -0,0 +1,355 @@ +// +build linux + +package devices + +import ( + "bufio" + "io" + "regexp" + "sort" + "strconv" + + "github.com/opencontainers/runc/libcontainer/configs" + + "github.com/pkg/errors" +) + +// deviceMeta is a DeviceRule without the Allow or Permissions fields, and no +// wildcard-type support. It's effectively the "match" portion of a metadata +// rule, for the purposes of our emulation. +type deviceMeta struct { + node configs.DeviceType + major int64 + minor int64 +} + +// deviceRule is effectively the tuple (deviceMeta, DevicePermissions). +type deviceRule struct { + meta deviceMeta + perms configs.DevicePermissions +} + +// deviceRules is a mapping of device metadata rules to the associated +// permissions in the ruleset. +type deviceRules map[deviceMeta]configs.DevicePermissions + +func (r deviceRules) orderedEntries() []deviceRule { + var rules []deviceRule + for meta, perms := range r { + rules = append(rules, deviceRule{meta: meta, perms: perms}) + } + sort.Slice(rules, func(i, j int) bool { + // Sort by (major, minor, type). + a, b := rules[i].meta, rules[j].meta + return a.major < b.major || + (a.major == b.major && a.minor < b.minor) || + (a.major == b.major && a.minor == b.minor && a.node < b.node) + }) + return rules +} + +type Emulator struct { + defaultAllow bool + rules deviceRules +} + +func (e *Emulator) IsBlacklist() bool { + return e.defaultAllow +} + +func (e *Emulator) IsAllowAll() bool { + return e.IsBlacklist() && len(e.rules) == 0 +} + +var devicesListRegexp = regexp.MustCompile(`^([abc])\s+(\d+|\*):(\d+|\*)\s+([rwm]+)$`) + +func parseLine(line string) (*deviceRule, error) { + matches := devicesListRegexp.FindStringSubmatch(line) + if matches == nil { + return nil, errors.Errorf("line doesn't match devices.list format") + } + var ( + rule deviceRule + node = matches[1] + major = matches[2] + minor = matches[3] + perms = matches[4] + ) + + // Parse the node type. + switch node { + case "a": + // Super-special case -- "a" always means every device with every + // access mode. In fact, for devices.list this actually indicates that + // the cgroup is in black-list mode. + // TODO: Double-check that the entire file is "a *:* rwm". + return nil, nil + case "b": + rule.meta.node = configs.BlockDevice + case "c": + rule.meta.node = configs.CharDevice + default: + // Should never happen! + return nil, errors.Errorf("unknown device type %q", node) + } + + // Parse the major number. + if major == "*" { + rule.meta.major = configs.Wildcard + } else { + val, err := strconv.ParseUint(major, 10, 32) + if err != nil { + return nil, errors.Wrap(err, "parse major number") + } + rule.meta.major = int64(val) + } + + // Parse the minor number. + if minor == "*" { + rule.meta.minor = configs.Wildcard + } else { + val, err := strconv.ParseUint(minor, 10, 32) + if err != nil { + return nil, errors.Wrap(err, "parse minor number") + } + rule.meta.minor = int64(val) + } + + // Parse the access permissions. + rule.perms = configs.DevicePermissions(perms) + if !rule.perms.IsValid() || rule.perms.IsEmpty() { + // Should never happen! + return nil, errors.Errorf("parse access mode: contained unknown modes or is empty: %q", perms) + } + return &rule, nil +} + +func (e *Emulator) addRule(rule deviceRule) error { + if e.rules == nil { + e.rules = make(map[deviceMeta]configs.DevicePermissions) + } + + // Merge with any pre-existing permissions. + oldPerms := e.rules[rule.meta] + newPerms := rule.perms.Union(oldPerms) + e.rules[rule.meta] = newPerms + return nil +} + +func (e *Emulator) rmRule(rule deviceRule) error { + // Give an error if any of the permissions requested to be removed are + // present in a partially-matching wildcard rule, because such rules will + // be ignored by cgroupv1. + // + // This is a diversion from cgroupv1, but is necessary to avoid leading + // users into a false sense of security. cgroupv1 will silently(!) ignore + // requests to remove partial exceptions, but we really shouldn't do that. + // + // It may seem like we could just "split" wildcard rules which hit this + // issue, but unfortunately there are 2^32 possible major and minor + // numbers, which would exhaust kernel memory quickly if we did this. Not + // to mention it'd be really slow (the kernel side is implemented as a + // linked-list of exceptions). + for _, partialMeta := range []deviceMeta{ + {node: rule.meta.node, major: configs.Wildcard, minor: rule.meta.minor}, + {node: rule.meta.node, major: rule.meta.major, minor: configs.Wildcard}, + {node: rule.meta.node, major: configs.Wildcard, minor: configs.Wildcard}, + } { + // This wildcard rule is equivalent to the requested rule, so skip it. + if rule.meta == partialMeta { + continue + } + // Only give an error if the set of permissions overlap. + partialPerms := e.rules[partialMeta] + if !partialPerms.Intersection(rule.perms).IsEmpty() { + return errors.Errorf("requested rule [%v %v] not supported by devices cgroupv1 (cannot punch hole in existing wildcard rule [%v %v])", rule.meta, rule.perms, partialMeta, partialPerms) + } + } + + // Subtract all of the permissions listed from the full match rule. If the + // rule didn't exist, all of this is a no-op. + newPerms := e.rules[rule.meta].Difference(rule.perms) + if newPerms.IsEmpty() { + delete(e.rules, rule.meta) + } else { + e.rules[rule.meta] = newPerms + } + // TODO: The actual cgroup code doesn't care if an exception didn't exist + // during removal, so not erroring out here is /accurate/ but quite + // worrying. Maybe we should do additional validation, but again we + // have to worry about backwards-compatibility. + return nil +} + +func (e *Emulator) allow(rule *deviceRule) error { + // This cgroup is configured as a black-list. Reset the entire emulator, + // and put is into black-list mode. + if rule == nil || rule.meta.node == configs.WildcardDevice { + *e = Emulator{ + defaultAllow: true, + rules: nil, + } + return nil + } + + var err error + if e.defaultAllow { + err = errors.Wrap(e.rmRule(*rule), "remove 'deny' exception") + } else { + err = errors.Wrap(e.addRule(*rule), "add 'allow' exception") + } + return err +} + +func (e *Emulator) deny(rule *deviceRule) error { + // This cgroup is configured as a white-list. Reset the entire emulator, + // and put is into white-list mode. + if rule == nil || rule.meta.node == configs.WildcardDevice { + *e = Emulator{ + defaultAllow: false, + rules: nil, + } + return nil + } + + var err error + if e.defaultAllow { + err = errors.Wrap(e.addRule(*rule), "add 'deny' exception") + } else { + err = errors.Wrap(e.rmRule(*rule), "remove 'allow' exception") + } + return err +} + +func (e *Emulator) Apply(rule configs.DeviceRule) error { + if !rule.Type.CanCgroup() { + return errors.Errorf("cannot add rule [%#v] with non-cgroup type %q", rule, rule.Type) + } + + innerRule := &deviceRule{ + meta: deviceMeta{ + node: rule.Type, + major: rule.Major, + minor: rule.Minor, + }, + perms: rule.Permissions, + } + if innerRule.meta.node == configs.WildcardDevice { + innerRule = nil + } + + if rule.Allow { + return e.allow(innerRule) + } else { + return e.deny(innerRule) + } +} + +// EmulatorFromList takes a reader to a "devices.list"-like source, and returns +// a new Emulator that represents the state of the devices cgroup. Note that +// black-list devices cgroups cannot be fully reconstructed, due to limitations +// in the devices cgroup API. Instead, such cgroups are always treated as +// "allow all" cgroups. +func EmulatorFromList(list io.Reader) (*Emulator, error) { + // Normally cgroups are in black-list mode by default, but the way we + // figure out the current mode is whether or not devices.list has an + // allow-all rule. So we default to a white-list, and the existence of an + // "a *:* rwm" entry will tell us otherwise. + e := &Emulator{ + defaultAllow: false, + } + + // Parse the "devices.list". + s := bufio.NewScanner(list) + for s.Scan() { + line := s.Text() + deviceRule, err := parseLine(line) + if err != nil { + return nil, errors.Wrapf(err, "parsing line %q", line) + } + // "devices.list" is an allow list. Note that this means that in + // black-list mode, we have no idea what rules are in play. As a + // result, we need to be very careful in Transition(). + if err := e.allow(deviceRule); err != nil { + return nil, errors.Wrapf(err, "adding devices.list rule") + } + } + if err := s.Err(); err != nil { + return nil, errors.Wrap(err, "reading devices.list lines") + } + return e, nil +} + +// Transition calculates what is the minimally-disruptive set of rules need to +// be applied to a devices cgroup in order to transition to the given target. +// This means that any already-existing rules will not be applied, and +// disruptive rules (like denying all device access) will only be applied if +// necessary. +// +// This function is the sole reason for all of Emulator -- to allow us +// to figure out how to update a containers' cgroups without causing spurrious +// device errors (if possible). +func (source *Emulator) Transition(target *Emulator) ([]*configs.DeviceRule, error) { + var transitionRules []*configs.DeviceRule + oldRules := source.rules + + // If the default policy doesn't match, we need to include a "disruptive" + // rule (either allow-all or deny-all) in order to switch the cgroup to the + // correct default policy. + // + // However, due to a limitation in "devices.list" we cannot be sure what + // deny rules are in place in a black-list cgroup. Thus if the source is a + // black-list we also have to include a disruptive rule. + if source.IsBlacklist() || source.defaultAllow != target.defaultAllow { + transitionRules = append(transitionRules, &configs.DeviceRule{ + Type: 'a', + Major: -1, + Minor: -1, + Permissions: configs.DevicePermissions("rwm"), + Allow: target.defaultAllow, + }) + // The old rules are only relevant if we aren't starting out with a + // disruptive rule. + oldRules = nil + } + + // NOTE: We traverse through the rules in a sorted order so we always write + // the same set of rules (this is to aid testing). + + // First, we create inverse rules for any old rules not in the new set. + // This includes partial-inverse rules for specific permissions. This is a + // no-op if we added a disruptive rule, since oldRules will be empty. + for _, rule := range oldRules.orderedEntries() { + meta, oldPerms := rule.meta, rule.perms + newPerms := target.rules[meta] + droppedPerms := oldPerms.Difference(newPerms) + if !droppedPerms.IsEmpty() { + transitionRules = append(transitionRules, &configs.DeviceRule{ + Type: meta.node, + Major: meta.major, + Minor: meta.minor, + Permissions: droppedPerms, + Allow: target.defaultAllow, + }) + } + } + + // Add any additional rules which weren't in the old set. We happen to + // filter out rules which are present in both sets, though this isn't + // strictly necessary. + for _, rule := range target.rules.orderedEntries() { + meta, newPerms := rule.meta, rule.perms + oldPerms := oldRules[meta] + gainedPerms := newPerms.Difference(oldPerms) + if !gainedPerms.IsEmpty() { + transitionRules = append(transitionRules, &configs.DeviceRule{ + Type: meta.node, + Major: meta.major, + Minor: meta.minor, + Permissions: gainedPerms, + Allow: !target.defaultAllow, + }) + } + } + return transitionRules, nil +} diff --git a/libcontainer/cgroups/devices/devices_emulator_test.go b/libcontainer/cgroups/devices/devices_emulator_test.go new file mode 100644 index 000000000000..f813accef239 --- /dev/null +++ b/libcontainer/cgroups/devices/devices_emulator_test.go @@ -0,0 +1,1096 @@ +package devices + +import ( + "bytes" + "reflect" + "testing" + + "github.com/opencontainers/runc/libcontainer/configs" +) + +func TestDeviceEmulatorLoad(t *testing.T) { + tests := []struct { + name, list string + expected *Emulator + }{ + { + name: "BlacklistMode", + list: `a *:* rwm`, + expected: &Emulator{ + defaultAllow: true, + }, + }, + { + name: "WhitelistBasic", + list: `c 4:2 rw`, + expected: &Emulator{ + defaultAllow: false, + rules: deviceRules{ + { + node: configs.CharDevice, + major: 4, + minor: 2, + }: configs.DevicePermissions("rw"), + }, + }, + }, + { + name: "WhitelistWildcard", + list: `b 0:* m`, + expected: &Emulator{ + defaultAllow: false, + rules: deviceRules{ + { + node: configs.BlockDevice, + major: 0, + minor: configs.Wildcard, + }: configs.DevicePermissions("m"), + }, + }, + }, + { + name: "WhitelistDuplicate", + list: `c *:* rwm +c 1:1 r`, + expected: &Emulator{ + defaultAllow: false, + rules: deviceRules{ + { + node: configs.CharDevice, + major: configs.Wildcard, + minor: configs.Wildcard, + }: configs.DevicePermissions("rwm"), + // To match the kernel, we allow redundant rules. + { + node: configs.CharDevice, + major: 1, + minor: 1, + }: configs.DevicePermissions("r"), + }, + }, + }, + { + name: "WhitelistComplicated", + list: `c *:* m +b *:* m +c 1:3 rwm +c 1:5 rwm +c 1:7 rwm +c 1:8 rwm +c 1:9 rwm +c 5:0 rwm +c 5:2 rwm +c 136:* rwm +c 10:200 rwm`, + expected: &Emulator{ + defaultAllow: false, + rules: deviceRules{ + { + node: configs.CharDevice, + major: configs.Wildcard, + minor: configs.Wildcard, + }: configs.DevicePermissions("m"), + { + node: configs.BlockDevice, + major: configs.Wildcard, + minor: configs.Wildcard, + }: configs.DevicePermissions("m"), + { + node: configs.CharDevice, + major: 1, + minor: 3, + }: configs.DevicePermissions("rwm"), + { + node: configs.CharDevice, + major: 1, + minor: 5, + }: configs.DevicePermissions("rwm"), + { + node: configs.CharDevice, + major: 1, + minor: 7, + }: configs.DevicePermissions("rwm"), + { + node: configs.CharDevice, + major: 1, + minor: 8, + }: configs.DevicePermissions("rwm"), + { + node: configs.CharDevice, + major: 1, + minor: 9, + }: configs.DevicePermissions("rwm"), + { + node: configs.CharDevice, + major: 5, + minor: 0, + }: configs.DevicePermissions("rwm"), + { + node: configs.CharDevice, + major: 5, + minor: 2, + }: configs.DevicePermissions("rwm"), + { + node: configs.CharDevice, + major: 136, + minor: configs.Wildcard, + }: configs.DevicePermissions("rwm"), + { + node: configs.CharDevice, + major: 10, + minor: 200, + }: configs.DevicePermissions("rwm"), + }, + }, + }, + // Some invalid lists. + { + name: "InvalidFieldNumber", + list: `b 1:0`, + expected: nil, + }, + { + name: "InvalidDeviceType", + list: `p *:* rwm`, + expected: nil, + }, + { + name: "InvalidMajorNumber1", + list: `p -1:3 rwm`, + expected: nil, + }, + { + name: "InvalidMajorNumber2", + list: `c foo:27 rwm`, + expected: nil, + }, + { + name: "InvalidMinorNumber1", + list: `b 1:-4 rwm`, + expected: nil, + }, + { + name: "InvalidMinorNumber2", + list: `b 1:foo rwm`, + expected: nil, + }, + { + name: "InvalidPermissions", + list: `b 1:7 rwk`, + expected: nil, + }, + } + + for _, test := range tests { + test := test // capture range variable + t.Run(test.name, func(t *testing.T) { + list := bytes.NewBufferString(test.list) + emu, err := EmulatorFromList(list) + if err != nil && test.expected != nil { + t.Fatalf("unexpected failure when creating emulator: %v", err) + } else if err == nil && test.expected == nil { + t.Fatalf("unexpected success when creating emulator: %#v", emu) + } + + if !reflect.DeepEqual(emu, test.expected) { + t.Errorf("final emulator state mismatch: %#v != %#v", emu, test.expected) + } + }) + } +} + +func testDeviceEmulatorApply(t *testing.T, baseDefaultAllow bool) { + tests := []struct { + name string + rule configs.DeviceRule + base, expected *Emulator + }{ + // Switch between default modes. + { + name: "SwitchToOtherMode", + rule: configs.DeviceRule{ + Type: configs.WildcardDevice, + Major: configs.Wildcard, + Minor: configs.Wildcard, + Permissions: configs.DevicePermissions("rwm"), + Allow: !baseDefaultAllow, + }, + base: &Emulator{ + defaultAllow: baseDefaultAllow, + rules: deviceRules{ + { + node: configs.CharDevice, + major: configs.Wildcard, + minor: configs.Wildcard, + }: configs.DevicePermissions("rwm"), + { + node: configs.CharDevice, + major: 1, + minor: 1, + }: configs.DevicePermissions("r"), + }, + }, + expected: &Emulator{ + defaultAllow: !baseDefaultAllow, + rules: nil, + }, + }, + { + name: "SwitchToSameModeNoop", + rule: configs.DeviceRule{ + Type: configs.WildcardDevice, + Major: configs.Wildcard, + Minor: configs.Wildcard, + Permissions: configs.DevicePermissions("rwm"), + Allow: baseDefaultAllow, + }, + base: &Emulator{ + defaultAllow: baseDefaultAllow, + rules: nil, + }, + expected: &Emulator{ + defaultAllow: baseDefaultAllow, + rules: nil, + }, + }, + { + name: "SwitchToSameMode", + rule: configs.DeviceRule{ + Type: configs.WildcardDevice, + Major: configs.Wildcard, + Minor: configs.Wildcard, + Permissions: configs.DevicePermissions("rwm"), + Allow: baseDefaultAllow, + }, + base: &Emulator{ + defaultAllow: baseDefaultAllow, + rules: deviceRules{ + { + node: configs.CharDevice, + major: configs.Wildcard, + minor: configs.Wildcard, + }: configs.DevicePermissions("rwm"), + { + node: configs.CharDevice, + major: 1, + minor: 1, + }: configs.DevicePermissions("r"), + }, + }, + expected: &Emulator{ + defaultAllow: baseDefaultAllow, + rules: nil, + }, + }, + // Rule addition logic. + { + name: "RuleAdditionBasic", + rule: configs.DeviceRule{ + Type: configs.CharDevice, + Major: 42, + Minor: 1337, + Permissions: configs.DevicePermissions("rm"), + Allow: !baseDefaultAllow, + }, + base: &Emulator{ + defaultAllow: baseDefaultAllow, + rules: deviceRules{ + { + node: configs.CharDevice, + major: 2, + minor: 1, + }: configs.DevicePermissions("rwm"), + { + node: configs.BlockDevice, + major: 1, + minor: 5, + }: configs.DevicePermissions("r"), + }, + }, + expected: &Emulator{ + defaultAllow: baseDefaultAllow, + rules: deviceRules{ + { + node: configs.CharDevice, + major: 2, + minor: 1, + }: configs.DevicePermissions("rwm"), + { + node: configs.BlockDevice, + major: 1, + minor: 5, + }: configs.DevicePermissions("r"), + { + node: configs.CharDevice, + major: 42, + minor: 1337, + }: configs.DevicePermissions("rm"), + }, + }, + }, + { + name: "RuleAdditionBasicDuplicate", + rule: configs.DeviceRule{ + Type: configs.CharDevice, + Major: 42, + Minor: 1337, + Permissions: configs.DevicePermissions("rm"), + Allow: !baseDefaultAllow, + }, + base: &Emulator{ + defaultAllow: baseDefaultAllow, + rules: deviceRules{ + { + node: configs.CharDevice, + major: 42, + minor: configs.Wildcard, + }: configs.DevicePermissions("rwm"), + }, + }, + expected: &Emulator{ + defaultAllow: baseDefaultAllow, + rules: deviceRules{ + { + node: configs.CharDevice, + major: 42, + minor: configs.Wildcard, + }: configs.DevicePermissions("rwm"), + // To match the kernel, we allow redundant rules. + { + node: configs.CharDevice, + major: 42, + minor: 1337, + }: configs.DevicePermissions("rm"), + }, + }, + }, + { + name: "RuleAdditionBasicDuplicateNoop", + rule: configs.DeviceRule{ + Type: configs.CharDevice, + Major: 42, + Minor: 1337, + Permissions: configs.DevicePermissions("rm"), + Allow: !baseDefaultAllow, + }, + base: &Emulator{ + defaultAllow: baseDefaultAllow, + rules: deviceRules{ + { + node: configs.CharDevice, + major: 42, + minor: 1337, + }: configs.DevicePermissions("rm"), + }, + }, + expected: &Emulator{ + defaultAllow: baseDefaultAllow, + rules: deviceRules{ + { + node: configs.CharDevice, + major: 42, + minor: 1337, + }: configs.DevicePermissions("rm"), + }, + }, + }, + { + name: "RuleAdditionMerge", + rule: configs.DeviceRule{ + Type: configs.BlockDevice, + Major: 5, + Minor: 12, + Permissions: configs.DevicePermissions("rm"), + Allow: !baseDefaultAllow, + }, + base: &Emulator{ + defaultAllow: baseDefaultAllow, + rules: deviceRules{ + { + node: configs.CharDevice, + major: 2, + minor: 1, + }: configs.DevicePermissions("rwm"), + { + node: configs.BlockDevice, + major: 5, + minor: 12, + }: configs.DevicePermissions("rw"), + }, + }, + expected: &Emulator{ + defaultAllow: baseDefaultAllow, + rules: deviceRules{ + { + node: configs.CharDevice, + major: 2, + minor: 1, + }: configs.DevicePermissions("rwm"), + { + node: configs.BlockDevice, + major: 5, + minor: 12, + }: configs.DevicePermissions("rwm"), + }, + }, + }, + { + name: "RuleAdditionMergeWildcard", + rule: configs.DeviceRule{ + Type: configs.BlockDevice, + Major: 5, + Minor: configs.Wildcard, + Permissions: configs.DevicePermissions("rm"), + Allow: !baseDefaultAllow, + }, + base: &Emulator{ + defaultAllow: baseDefaultAllow, + rules: deviceRules{ + { + node: configs.CharDevice, + major: 2, + minor: 1, + }: configs.DevicePermissions("rwm"), + { + node: configs.BlockDevice, + major: 5, + minor: configs.Wildcard, + }: configs.DevicePermissions("rw"), + }, + }, + expected: &Emulator{ + defaultAllow: baseDefaultAllow, + rules: deviceRules{ + { + node: configs.CharDevice, + major: 2, + minor: 1, + }: configs.DevicePermissions("rwm"), + { + node: configs.BlockDevice, + major: 5, + minor: configs.Wildcard, + }: configs.DevicePermissions("rwm"), + }, + }, + }, + { + name: "RuleAdditionMergeNoop", + rule: configs.DeviceRule{ + Type: configs.BlockDevice, + Major: 5, + Minor: 12, + Permissions: configs.DevicePermissions("r"), + Allow: !baseDefaultAllow, + }, + base: &Emulator{ + defaultAllow: baseDefaultAllow, + rules: deviceRules{ + { + node: configs.CharDevice, + major: 2, + minor: 1, + }: configs.DevicePermissions("rwm"), + { + node: configs.BlockDevice, + major: 5, + minor: 12, + }: configs.DevicePermissions("rw"), + }, + }, + expected: &Emulator{ + defaultAllow: baseDefaultAllow, + rules: deviceRules{ + { + node: configs.CharDevice, + major: 2, + minor: 1, + }: configs.DevicePermissions("rwm"), + { + node: configs.BlockDevice, + major: 5, + minor: 12, + }: configs.DevicePermissions("rw"), + }, + }, + }, + // Rule removal logic. + { + name: "RuleRemovalBasic", + rule: configs.DeviceRule{ + Type: configs.CharDevice, + Major: 42, + Minor: 1337, + Permissions: configs.DevicePermissions("rm"), + Allow: baseDefaultAllow, + }, + base: &Emulator{ + defaultAllow: baseDefaultAllow, + rules: deviceRules{ + { + node: configs.CharDevice, + major: 42, + minor: 1337, + }: configs.DevicePermissions("rm"), + { + node: configs.BlockDevice, + major: 1, + minor: 5, + }: configs.DevicePermissions("r"), + }, + }, + expected: &Emulator{ + defaultAllow: baseDefaultAllow, + rules: deviceRules{ + { + node: configs.BlockDevice, + major: 1, + minor: 5, + }: configs.DevicePermissions("r"), + }, + }, + }, + { + name: "RuleRemovalNonexistent", + rule: configs.DeviceRule{ + Type: configs.CharDevice, + Major: 4, + Minor: 1, + Permissions: configs.DevicePermissions("rw"), + Allow: baseDefaultAllow, + }, + base: &Emulator{ + defaultAllow: baseDefaultAllow, + rules: deviceRules{ + { + node: configs.BlockDevice, + major: 1, + minor: 5, + }: configs.DevicePermissions("r"), + }, + }, + expected: &Emulator{ + defaultAllow: baseDefaultAllow, + rules: deviceRules{ + { + node: configs.BlockDevice, + major: 1, + minor: 5, + }: configs.DevicePermissions("r"), + }, + }, + }, + { + name: "RuleRemovalFull", + rule: configs.DeviceRule{ + Type: configs.CharDevice, + Major: 42, + Minor: 1337, + Permissions: configs.DevicePermissions("rw"), + Allow: baseDefaultAllow, + }, + base: &Emulator{ + defaultAllow: baseDefaultAllow, + rules: deviceRules{ + { + node: configs.CharDevice, + major: 42, + minor: 1337, + }: configs.DevicePermissions("w"), + { + node: configs.BlockDevice, + major: 1, + minor: 5, + }: configs.DevicePermissions("r"), + }, + }, + expected: &Emulator{ + defaultAllow: baseDefaultAllow, + rules: deviceRules{ + { + node: configs.BlockDevice, + major: 1, + minor: 5, + }: configs.DevicePermissions("r"), + }, + }, + }, + { + name: "RuleRemovalPartial", + rule: configs.DeviceRule{ + Type: configs.CharDevice, + Major: 42, + Minor: 1337, + Permissions: configs.DevicePermissions("r"), + Allow: baseDefaultAllow, + }, + base: &Emulator{ + defaultAllow: baseDefaultAllow, + rules: deviceRules{ + { + node: configs.CharDevice, + major: 42, + minor: 1337, + }: configs.DevicePermissions("rm"), + { + node: configs.BlockDevice, + major: 1, + minor: 5, + }: configs.DevicePermissions("r"), + }, + }, + expected: &Emulator{ + defaultAllow: baseDefaultAllow, + rules: deviceRules{ + { + node: configs.CharDevice, + major: 42, + minor: 1337, + }: configs.DevicePermissions("m"), + { + node: configs.BlockDevice, + major: 1, + minor: 5, + }: configs.DevicePermissions("r"), + }, + }, + }, + // Check our non-canonical behaviour when it comes to try to "punch + // out" holes in a wildcard rule. + { + name: "RuleRemovalWildcardPunchoutImpossible", + rule: configs.DeviceRule{ + Type: configs.CharDevice, + Major: 42, + Minor: 1337, + Permissions: configs.DevicePermissions("r"), + Allow: baseDefaultAllow, + }, + base: &Emulator{ + defaultAllow: baseDefaultAllow, + rules: deviceRules{ + { + node: configs.CharDevice, + major: 42, + minor: configs.Wildcard, + }: configs.DevicePermissions("rm"), + { + node: configs.CharDevice, + major: 42, + minor: 1337, + }: configs.DevicePermissions("r"), + }, + }, + expected: nil, + }, + { + name: "RuleRemovalWildcardPunchoutPossible", + rule: configs.DeviceRule{ + Type: configs.CharDevice, + Major: 42, + Minor: 1337, + Permissions: configs.DevicePermissions("r"), + Allow: baseDefaultAllow, + }, + base: &Emulator{ + defaultAllow: baseDefaultAllow, + rules: deviceRules{ + { + node: configs.CharDevice, + major: 42, + minor: configs.Wildcard, + }: configs.DevicePermissions("wm"), + { + node: configs.CharDevice, + major: 42, + minor: 1337, + }: configs.DevicePermissions("r"), + }, + }, + expected: &Emulator{ + defaultAllow: baseDefaultAllow, + rules: deviceRules{ + { + node: configs.CharDevice, + major: 42, + minor: configs.Wildcard, + }: configs.DevicePermissions("wm"), + }, + }, + }, + } + + for _, test := range tests { + test := test + t.Run(test.name, func(t *testing.T) { + err := test.base.Apply(test.rule) + if err != nil && test.expected != nil { + t.Fatalf("unexpected failure when applying apply rule: %v", err) + } else if err == nil && test.expected == nil { + t.Fatalf("unexpected success when applying apply rule: %#v", test.base) + } + + if test.expected != nil && !reflect.DeepEqual(test.base, test.expected) { + t.Errorf("final emulator state mismatch: %#v != %#v", test.base, test.expected) + } + }) + } +} + +func TestDeviceEmulatorWhitelistApply(t *testing.T) { + testDeviceEmulatorApply(t, false) +} + +func TestDeviceEmulatorBlacklistApply(t *testing.T) { + testDeviceEmulatorApply(t, true) +} + +func testDeviceEmulatorTransition(t *testing.T, sourceDefaultAllow bool) { + tests := []struct { + name string + source, target *Emulator + expected []*configs.DeviceRule + }{ + // No-op changes. + { + name: "Noop", + source: &Emulator{ + defaultAllow: sourceDefaultAllow, + rules: deviceRules{ + { + node: configs.CharDevice, + major: 42, + minor: configs.Wildcard, + }: configs.DevicePermissions("wm"), + }, + }, + target: &Emulator{ + defaultAllow: sourceDefaultAllow, + rules: deviceRules{ + { + node: configs.CharDevice, + major: 42, + minor: configs.Wildcard, + }: configs.DevicePermissions("wm"), + }, + }, + // Identical white-lists produce no extra rules. + expected: nil, + }, + // Switching modes. + { + name: "SwitchToOtherMode", + source: &Emulator{ + defaultAllow: sourceDefaultAllow, + rules: deviceRules{ + { + node: configs.CharDevice, + major: 1, + minor: 2, + }: configs.DevicePermissions("rwm"), + }, + }, + target: &Emulator{ + defaultAllow: !sourceDefaultAllow, + rules: deviceRules{ + { + node: configs.BlockDevice, + major: 42, + minor: configs.Wildcard, + }: configs.DevicePermissions("wm"), + }, + }, + expected: []*configs.DeviceRule{ + // Clear-all rule. + { + Type: configs.WildcardDevice, + Major: configs.Wildcard, + Minor: configs.Wildcard, + Permissions: configs.DevicePermissions("rwm"), + Allow: !sourceDefaultAllow, + }, + // The actual rule-set. + { + Type: configs.BlockDevice, + Major: 42, + Minor: configs.Wildcard, + Permissions: configs.DevicePermissions("wm"), + Allow: sourceDefaultAllow, + }, + }, + }, + // Rule changes. + { + name: "RuleAddition", + source: &Emulator{ + defaultAllow: sourceDefaultAllow, + rules: deviceRules{ + { + node: configs.CharDevice, + major: 1, + minor: 2, + }: configs.DevicePermissions("rwm"), + }, + }, + target: &Emulator{ + defaultAllow: sourceDefaultAllow, + rules: deviceRules{ + { + node: configs.CharDevice, + major: 1, + minor: 2, + }: configs.DevicePermissions("rwm"), + { + node: configs.BlockDevice, + major: 42, + minor: 1337, + }: configs.DevicePermissions("rwm"), + }, + }, + expected: []*configs.DeviceRule{ + { + Type: configs.BlockDevice, + Major: 42, + Minor: 1337, + Permissions: configs.DevicePermissions("rwm"), + Allow: !sourceDefaultAllow, + }, + }, + }, + { + name: "RuleRemoval", + source: &Emulator{ + defaultAllow: sourceDefaultAllow, + rules: deviceRules{ + { + node: configs.CharDevice, + major: 1, + minor: 2, + }: configs.DevicePermissions("rwm"), + { + node: configs.BlockDevice, + major: 42, + minor: 1337, + }: configs.DevicePermissions("rwm"), + }, + }, + target: &Emulator{ + defaultAllow: sourceDefaultAllow, + rules: deviceRules{ + { + node: configs.CharDevice, + major: 1, + minor: 2, + }: configs.DevicePermissions("rwm"), + }, + }, + expected: []*configs.DeviceRule{ + { + Type: configs.BlockDevice, + Major: 42, + Minor: 1337, + Permissions: configs.DevicePermissions("rwm"), + Allow: sourceDefaultAllow, + }, + }, + }, + { + name: "RuleMultipleAdditionRemoval", + source: &Emulator{ + defaultAllow: sourceDefaultAllow, + rules: deviceRules{ + { + node: configs.CharDevice, + major: 1, + minor: 2, + }: configs.DevicePermissions("rwm"), + { + node: configs.BlockDevice, + major: 3, + minor: 9, + }: configs.DevicePermissions("rw"), + }, + }, + target: &Emulator{ + defaultAllow: sourceDefaultAllow, + rules: deviceRules{ + { + node: configs.CharDevice, + major: 1, + minor: 2, + }: configs.DevicePermissions("rwm"), + }, + }, + expected: []*configs.DeviceRule{ + { + Type: configs.BlockDevice, + Major: 3, + Minor: 9, + Permissions: configs.DevicePermissions("rw"), + Allow: sourceDefaultAllow, + }, + }, + }, + // Modifying the access permissions. + { + name: "RulePartialAddition", + source: &Emulator{ + defaultAllow: sourceDefaultAllow, + rules: deviceRules{ + { + node: configs.CharDevice, + major: 1, + minor: 2, + }: configs.DevicePermissions("r"), + }, + }, + target: &Emulator{ + defaultAllow: sourceDefaultAllow, + rules: deviceRules{ + { + node: configs.CharDevice, + major: 1, + minor: 2, + }: configs.DevicePermissions("rwm"), + }, + }, + expected: []*configs.DeviceRule{ + { + Type: configs.CharDevice, + Major: 1, + Minor: 2, + Permissions: configs.DevicePermissions("wm"), + Allow: !sourceDefaultAllow, + }, + }, + }, + { + name: "RulePartialRemoval", + source: &Emulator{ + defaultAllow: sourceDefaultAllow, + rules: deviceRules{ + { + node: configs.CharDevice, + major: 1, + minor: 2, + }: configs.DevicePermissions("rw"), + }, + }, + target: &Emulator{ + defaultAllow: sourceDefaultAllow, + rules: deviceRules{ + { + node: configs.CharDevice, + major: 1, + minor: 2, + }: configs.DevicePermissions("w"), + }, + }, + expected: []*configs.DeviceRule{ + { + Type: configs.CharDevice, + Major: 1, + Minor: 2, + Permissions: configs.DevicePermissions("r"), + Allow: sourceDefaultAllow, + }, + }, + }, + { + name: "RulePartialBoth", + source: &Emulator{ + defaultAllow: sourceDefaultAllow, + rules: deviceRules{ + { + node: configs.CharDevice, + major: 1, + minor: 2, + }: configs.DevicePermissions("rw"), + }, + }, + target: &Emulator{ + defaultAllow: sourceDefaultAllow, + rules: deviceRules{ + { + node: configs.CharDevice, + major: 1, + minor: 2, + }: configs.DevicePermissions("rm"), + }, + }, + expected: []*configs.DeviceRule{ + { + Type: configs.CharDevice, + Major: 1, + Minor: 2, + Permissions: configs.DevicePermissions("w"), + Allow: sourceDefaultAllow, + }, + { + Type: configs.CharDevice, + Major: 1, + Minor: 2, + Permissions: configs.DevicePermissions("m"), + Allow: !sourceDefaultAllow, + }, + }, + }, + } + + for _, test := range tests { + test := test + t.Run(test.name, func(t *testing.T) { + // If we are in black-list mode, we need to prepend the relevant + // clear-all rule (the expected rule lists are written with + // white-list mode in mind), and then make a full copy of the + // target rules. + if sourceDefaultAllow && test.source.defaultAllow == test.target.defaultAllow { + test.expected = []*configs.DeviceRule{{ + Type: configs.WildcardDevice, + Major: configs.Wildcard, + Minor: configs.Wildcard, + Permissions: configs.DevicePermissions("rwm"), + Allow: test.target.defaultAllow, + }} + for _, rule := range test.target.rules.orderedEntries() { + test.expected = append(test.expected, &configs.DeviceRule{ + Type: rule.meta.node, + Major: rule.meta.major, + Minor: rule.meta.minor, + Permissions: rule.perms, + Allow: !test.target.defaultAllow, + }) + } + } + + rules, err := test.source.Transition(test.target) + if err != nil { + t.Fatalf("unexpected error while calculating transition rules: %#v", err) + } + + if !reflect.DeepEqual(rules, test.expected) { + t.Errorf("rules don't match expected set: %#v != %#v", rules, test.expected) + } + + // Apply the rules to the source to see if it actually transitions + // correctly. This is all emulated but it's a good thing to + // double-check. + for _, rule := range rules { + if err := test.source.Apply(*rule); err != nil { + t.Fatalf("error while applying transition rule [%#v]: %v", rule, err) + } + } + if !reflect.DeepEqual(test.source, test.target) { + t.Errorf("transition incomplete after applying all rules: %#v != %#v", test.source, test.target) + } + }) + } +} + +func TestDeviceEmulatorTransitionFromBlacklist(t *testing.T) { + testDeviceEmulatorTransition(t, true) +} + +func TestDeviceEmulatorTransitionFromWhitelist(t *testing.T) { + testDeviceEmulatorTransition(t, false) +} diff --git a/libcontainer/cgroups/fs/apply_raw.go b/libcontainer/cgroups/fs/apply_raw.go index 512fd700104b..79887c6f0f3f 100644 --- a/libcontainer/cgroups/fs/apply_raw.go +++ b/libcontainer/cgroups/fs/apply_raw.go @@ -174,9 +174,6 @@ func (m *Manager) Apply(pid int) (err error) { } for _, sys := range m.getSubsystems() { - // TODO: Apply should, ideally, be reentrant or be broken up into a separate - // create and join phase so that the cgroup hierarchy for a container can be - // created then join consists of writing the process pids to cgroup.procs p, err := d.path(sys.Name()) if err != nil { // The non-presence of the devices subsystem is @@ -189,10 +186,10 @@ func (m *Manager) Apply(pid int) (err error) { m.Paths[sys.Name()] = p if err := sys.Apply(d); err != nil { - // In the case of rootless (including euid=0 in userns), where an explicit cgroup path hasn't - // been set, we don't bail on error in case of permission problems. - // Cases where limits have been set (and we couldn't create our own - // cgroup) are handled by Set. + // In the case of rootless (including euid=0 in userns), where an + // explicit cgroup path hasn't been set, we don't bail on error in + // case of permission problems. Cases where limits have been set + // (and we couldn't create our own cgroup) are handled by Set. if isIgnorableError(m.Rootless, err) && m.Cgroups.Path == "" { delete(m.Paths, sys.Name()) continue @@ -280,22 +277,26 @@ func (m *Manager) Set(container *configs.Config) error { // Freeze toggles the container's freezer cgroup depending on the state // provided -func (m *Manager) Freeze(state configs.FreezerState) error { - if m.Cgroups == nil { +func (m *Manager) Freeze(state configs.FreezerState) (Err error) { + path := m.GetPaths()["freezer"] + if m.Cgroups == nil || path == "" { return errors.New("cannot toggle freezer: cgroups not configured for container") } - paths := m.GetPaths() - dir := paths["freezer"] prevState := m.Cgroups.Resources.Freezer m.Cgroups.Resources.Freezer = state + defer func() { + if Err != nil { + m.Cgroups.Resources.Freezer = prevState + } + }() + freezer, err := m.getSubsystems().Get("freezer") if err != nil { return err } - err = freezer.Set(dir, m.Cgroups) + err = freezer.Set(path, m.Cgroups) if err != nil { - m.Cgroups.Resources.Freezer = prevState return err } return nil @@ -430,3 +431,15 @@ func CheckCpushares(path string, c uint64) error { return nil } + +func (m *Manager) GetFreezerState() (configs.FreezerState, error) { + paths := m.GetPaths() + dir := paths["freezer"] + freezer, err := m.getSubsystems().Get("freezer") + + // If the container doesn't have the freezer cgroup, say it's undefined. + if err != nil || dir == "" { + return configs.Undefined, nil + } + return freezer.(*FreezerGroup).GetState(dir) +} diff --git a/libcontainer/cgroups/fs/devices.go b/libcontainer/cgroups/fs/devices.go index 0ac5b4ed7003..31f8b953d2f4 100644 --- a/libcontainer/cgroups/fs/devices.go +++ b/libcontainer/cgroups/fs/devices.go @@ -3,12 +3,18 @@ package fs import ( + "bytes" + "fmt" + "reflect" + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/cgroups/devices" "github.com/opencontainers/runc/libcontainer/configs" "github.com/opencontainers/runc/libcontainer/system" ) type DevicesGroup struct { + testingSkipFinalCheck bool } func (s *DevicesGroup) Name() string { @@ -25,49 +31,74 @@ func (s *DevicesGroup) Apply(d *cgroupData) error { return nil } +func loadEmulator(path string) (*devices.Emulator, error) { + list, err := readFile(path, "devices.list") + if err != nil { + return nil, err + } + return devices.EmulatorFromList(bytes.NewBufferString(list)) +} + +func buildEmulator(rules []*configs.DeviceRule) (*devices.Emulator, error) { + // This defaults to a white-list -- which is what we want! + emu := &devices.Emulator{} + for _, rule := range rules { + if err := emu.Apply(*rule); err != nil { + return nil, err + } + } + return emu, nil +} + func (s *DevicesGroup) Set(path string, cgroup *configs.Cgroup) error { if system.RunningInUserNS() { return nil } - devices := cgroup.Resources.Devices - if len(devices) > 0 { - for _, dev := range devices { - file := "devices.deny" - if dev.Allow { - file = "devices.allow" - } - if err := writeFile(path, file, dev.CgroupString()); err != nil { - return err - } - } - return nil + // Generate two emulators, one for the current state of the cgroup and one + // for the requested state by the user. + current, err := loadEmulator(path) + if err != nil { + return err + } + target, err := buildEmulator(cgroup.Resources.Devices) + if err != nil { + return err } - if cgroup.Resources.AllowAllDevices != nil { - if *cgroup.Resources.AllowAllDevices == false { - if err := writeFile(path, "devices.deny", "a"); err != nil { - return err - } - for _, dev := range cgroup.Resources.AllowedDevices { - if err := writeFile(path, "devices.allow", dev.CgroupString()); err != nil { - return err - } - } - return nil + // Compute the minimal set of transition rules needed to achieve the + // requested state. + transitionRules, err := current.Transition(target) + if err != nil { + return err + } + for _, rule := range transitionRules { + file := "devices.deny" + if rule.Allow { + file = "devices.allow" } - - if err := writeFile(path, "devices.allow", "a"); err != nil { + if err := writeFile(path, file, rule.CgroupString()); err != nil { return err } } - for _, dev := range cgroup.Resources.DeniedDevices { - if err := writeFile(path, "devices.deny", dev.CgroupString()); err != nil { + // Final safety check -- ensure that the resulting state is what was + // requested. This is only really correct for white-lists, but for + // black-lists we can at least check that the cgroup is in the right mode. + // + // This safety-check is skipped for the unit tests because we cannot + // currently mock devices.list correctly. + if !s.testingSkipFinalCheck { + currentAfter, err := loadEmulator(path) + if err != nil { return err } + if !target.IsBlacklist() && !reflect.DeepEqual(currentAfter, target) { + return fmt.Errorf("resulting devices cgroup doesn't precisely match target") + } else if target.IsBlacklist() != currentAfter.IsBlacklist() { + return fmt.Errorf("resulting devices cgroup doesn't match target mode") + } } - return nil } diff --git a/libcontainer/cgroups/fs/devices_test.go b/libcontainer/cgroups/fs/devices_test.go index fc635b990f56..fdfc37a7e170 100644 --- a/libcontainer/cgroups/fs/devices_test.go +++ b/libcontainer/cgroups/fs/devices_test.go @@ -8,91 +8,44 @@ import ( "github.com/opencontainers/runc/libcontainer/configs" ) -var ( - allowedDevices = []*configs.Device{ - { - Path: "/dev/zero", - Type: 'c', - Major: 1, - Minor: 5, - Permissions: "rwm", - FileMode: 0666, - }, - } - allowedList = "c 1:5 rwm" - deniedDevices = []*configs.Device{ - { - Path: "/dev/null", - Type: 'c', - Major: 1, - Minor: 3, - Permissions: "rwm", - FileMode: 0666, - }, - } - deniedList = "c 1:3 rwm" -) - func TestDevicesSetAllow(t *testing.T) { helper := NewCgroupTestUtil("devices", t) defer helper.cleanup() helper.writeFileContents(map[string]string{ - "devices.deny": "a", + "devices.allow": "", + "devices.deny": "", + "devices.list": "a *:* rwm", }) - allowAllDevices := false - helper.CgroupData.config.Resources.AllowAllDevices = &allowAllDevices - helper.CgroupData.config.Resources.AllowedDevices = allowedDevices - devices := &DevicesGroup{} - if err := devices.Set(helper.CgroupPath, helper.CgroupData.config); err != nil { - t.Fatal(err) - } - - value, err := getCgroupParamString(helper.CgroupPath, "devices.allow") - if err != nil { - t.Fatalf("Failed to parse devices.allow - %s", err) - } - - if value != allowedList { - t.Fatal("Got the wrong value, set devices.allow failed.") - } - // When AllowAllDevices is nil, devices.allow file should not be modified. - helper.CgroupData.config.Resources.AllowAllDevices = nil - if err := devices.Set(helper.CgroupPath, helper.CgroupData.config); err != nil { - t.Fatal(err) - } - value, err = getCgroupParamString(helper.CgroupPath, "devices.allow") - if err != nil { - t.Fatalf("Failed to parse devices.allow - %s", err) - } - if value != allowedList { - t.Fatal("devices policy shouldn't have changed on AllowedAllDevices=nil.") + helper.CgroupData.config.Resources.Devices = []*configs.DeviceRule{ + { + Type: configs.CharDevice, + Major: 1, + Minor: 5, + Permissions: configs.DevicePermissions("rwm"), + Allow: true, + }, } -} - -func TestDevicesSetDeny(t *testing.T) { - helper := NewCgroupTestUtil("devices", t) - defer helper.cleanup() - helper.writeFileContents(map[string]string{ - "devices.allow": "a", - }) - - allowAllDevices := true - helper.CgroupData.config.Resources.AllowAllDevices = &allowAllDevices - helper.CgroupData.config.Resources.DeniedDevices = deniedDevices - devices := &DevicesGroup{} + devices := &DevicesGroup{testingSkipFinalCheck: true} if err := devices.Set(helper.CgroupPath, helper.CgroupData.config); err != nil { t.Fatal(err) } + // The default deny rule must be written. value, err := getCgroupParamString(helper.CgroupPath, "devices.deny") if err != nil { - t.Fatalf("Failed to parse devices.deny - %s", err) + t.Fatalf("Failed to parse devices.deny: %s", err) + } + if value[0] != 'a' { + t.Errorf("Got the wrong value (%q), set devices.deny failed.", value) } - if value != deniedList { - t.Fatal("Got the wrong value, set devices.deny failed.") + // Permitted rule must be written. + if value, err := getCgroupParamString(helper.CgroupPath, "devices.allow"); err != nil { + t.Fatalf("Failed to parse devices.allow: %s", err) + } else if value != "c 1:5 rwm" { + t.Errorf("Got the wrong value (%q), set devices.allow failed.", value) } } diff --git a/libcontainer/cgroups/fs/freezer.go b/libcontainer/cgroups/fs/freezer.go index 4b19f8a970d7..2f9d91bb5097 100644 --- a/libcontainer/cgroups/fs/freezer.go +++ b/libcontainer/cgroups/fs/freezer.go @@ -4,11 +4,14 @@ package fs import ( "fmt" + "os" "strings" "time" "github.com/opencontainers/runc/libcontainer/cgroups" "github.com/opencontainers/runc/libcontainer/configs" + "github.com/pkg/errors" + "golang.org/x/sys/unix" ) type FreezerGroup struct { @@ -38,11 +41,11 @@ func (s *FreezerGroup) Set(path string, cgroup *configs.Cgroup) error { return err } - state, err := readFile(path, "freezer.state") + state, err := s.GetState(path) if err != nil { return err } - if strings.TrimSpace(state) == string(cgroup.Resources.Freezer) { + if state == cgroup.Resources.Freezer { break } @@ -64,3 +67,30 @@ func (s *FreezerGroup) Remove(d *cgroupData) error { func (s *FreezerGroup) GetStats(path string, stats *cgroups.Stats) error { return nil } + +func (s *FreezerGroup) GetState(path string) (configs.FreezerState, error) { + for { + state, err := readFile(path, "freezer.state") + if err != nil { + // If the kernel is too old, then we just treat the freezer as + // being in an "undefined" state. + if os.IsNotExist(err) || errors.Cause(err) == unix.ENODEV { + err = nil + } + return configs.Undefined, err + } + switch strings.TrimSpace(state) { + case "THAWED": + return configs.Thawed, nil + case "FROZEN": + return configs.Frozen, nil + case "FREEZING": + // Make sure we get a stable freezer state, so retry if the cgroup + // is still undergoing freezing. This should be a temporary delay. + time.Sleep(1 * time.Millisecond) + continue + default: + return configs.Undefined, fmt.Errorf("unknown freezer.state %q", state) + } + } +} diff --git a/libcontainer/cgroups/fs/freezer_v2.go b/libcontainer/cgroups/fs/freezer_v2.go index 186de9ab41fd..ad0a962b91c4 100644 --- a/libcontainer/cgroups/fs/freezer_v2.go +++ b/libcontainer/cgroups/fs/freezer_v2.go @@ -4,6 +4,7 @@ package fs import ( "fmt" + "os" "strings" "time" @@ -72,3 +73,23 @@ func (s *FreezerGroupV2) Remove(d *cgroupData) error { func (s *FreezerGroupV2) GetStats(path string, stats *cgroups.Stats) error { return nil } + +func (s *FreezerGroupV2) GetState(path string) (configs.FreezerState, error) { + state, err := readFile(path, "cgroup.freeze") + if err != nil { + // If the kernel is too old, then we just treat the freezer as being in + // an "undefined" state. + if os.IsNotExist(err) { + err = nil + } + return configs.Undefined, err + } + switch strings.TrimSpace(state) { + case "0": + return configs.Thawed, nil + case "1": + return configs.Frozen, nil + default: + return configs.Undefined, fmt.Errorf(`unknown "cgroup.freeze" state: %q`, state) + } +} diff --git a/libcontainer/cgroups/systemd/apply_systemd.go b/libcontainer/cgroups/systemd/apply_systemd.go index f274a49a3c1a..b28a19bb033f 100644 --- a/libcontainer/cgroups/systemd/apply_systemd.go +++ b/libcontainer/cgroups/systemd/apply_systemd.go @@ -3,7 +3,8 @@ package systemd import ( - "errors" + "bufio" + stdErrors "errors" "fmt" "io/ioutil" "math" @@ -16,8 +17,10 @@ import ( systemdDbus "github.com/coreos/go-systemd/dbus" "github.com/godbus/dbus" "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/cgroups/devices" "github.com/opencontainers/runc/libcontainer/cgroups/fs" "github.com/opencontainers/runc/libcontainer/configs" + "github.com/pkg/errors" "github.com/sirupsen/logrus" ) @@ -36,7 +39,7 @@ type subsystem interface { Set(path string, cgroup *configs.Cgroup) error } -var errSubsystemDoesNotExist = errors.New("cgroup: subsystem does not exist") +var errSubsystemDoesNotExist = stdErrors.New("cgroup: subsystem does not exist") type subsystemSet []subsystem @@ -70,6 +73,209 @@ const ( testSliceWait = 4 ) +func groupPrefix(ruleType configs.DeviceType) (string, error) { + switch ruleType { + case configs.BlockDevice: + return "block-", nil + case configs.CharDevice: + return "char-", nil + default: + return "", errors.Errorf("device type %v has no group prefix", ruleType) + } +} + +// findDeviceGroup tries to find the device group name (as listed in +// /proc/devices) with the type prefixed as requried for DeviceAllow, for a +// given (type, major) combination. If more than one device group exists, an +// arbitrary one is chosen. +func findDeviceGroup(ruleType configs.DeviceType, ruleMajor int64) (string, error) { + fh, err := os.Open("/proc/devices") + if err != nil { + return "", err + } + defer fh.Close() + + prefix, err := groupPrefix(ruleType) + if err != nil { + return "", err + } + + scanner := bufio.NewScanner(fh) + var currentType configs.DeviceType + for scanner.Scan() { + // We need to strip spaces because the first number is column-aligned. + line := strings.TrimSpace(scanner.Text()) + + // Handle the "header" lines. + switch line { + case "Block devices:": + currentType = configs.BlockDevice + continue + case "Character devices:": + currentType = configs.CharDevice + continue + case "": + continue + } + + // Skip lines unrelated to our type. + if currentType != ruleType { + continue + } + + // Parse out the (major, name). + var ( + currMajor int64 + currName string + ) + if n, err := fmt.Sscanf(line, "%d %s", &currMajor, &currName); err != nil || n != 2 { + if err == nil { + err = errors.Errorf("wrong number of fields") + } + return "", errors.Wrapf(err, "scan /proc/devices line %q", line) + } + + if currMajor == ruleMajor { + return prefix + currName, nil + } + } + if err := scanner.Err(); err != nil { + return "", errors.Wrap(err, "reading /proc/devices") + } + // Couldn't find the device group. + return "", nil +} + +// generateDeviceProperties takes the configured device rules and generates a +// corresponding set of systemd properties to configure the devices correctly. +func generateDeviceProperties(rules []*configs.DeviceRule) ([]systemdDbus.Property, error) { + // DeviceAllow is the type "a(ss)" which means we need a temporary struct + // to represent it in Go. + type deviceAllowEntry struct { + Path string + Perms string + } + + properties := []systemdDbus.Property{ + // Always run in the strictest white-list mode. + newProp("DevicePolicy", "strict"), + // Empty the DevicesAllow array before filling it. + newProp("DeviceAllow", []deviceAllowEntry{}), + } + + // Figure out the set of rules. + configEmu := &devices.Emulator{} + for _, rule := range rules { + if err := configEmu.Apply(*rule); err != nil { + return nil, errors.Wrap(err, "apply rule for systemd") + } + } + // systemd doesn't support blacklists. So we log a warning, and tell + // systemd to act as a deny-all whitelist. This ruleset will be replaced + // with our normal fallback code. This may result in spurrious errors, but + // the only other option is to error out here. + if configEmu.IsBlacklist() { + // However, if we're dealing with an allow-all rule then we can do it. + if configEmu.IsAllowAll() { + return []systemdDbus.Property{ + // Run in white-list mode by setting to "auto" and removing all + // DeviceAllow rules. + newProp("DevicePolicy", "auto"), + newProp("DeviceAllow", []deviceAllowEntry{}), + }, nil + } + logrus.Warn("systemd doesn't support blacklist device rules -- applying temporary deny-all rule") + return properties, nil + } + + // Now generate the set of rules we actually need to apply. Unlike the + // normal devices cgroup, in "strict" mode systemd defaults to a deny-all + // whitelist which is the default for devices.Emulator. + baseEmu := &devices.Emulator{} + finalRules, err := baseEmu.Transition(configEmu) + if err != nil { + return nil, errors.Wrap(err, "get simplified rules for systemd") + } + var deviceAllowList []deviceAllowEntry + for _, rule := range finalRules { + if !rule.Allow { + // Should never happen. + return nil, errors.Errorf("[internal error] cannot add deny rule to systemd DeviceAllow list: %v", *rule) + } + switch rule.Type { + case configs.BlockDevice, configs.CharDevice: + default: + // Should never happen. + return nil, errors.Errorf("invalid device type for DeviceAllow: %v", rule.Type) + } + + entry := deviceAllowEntry{ + Perms: string(rule.Permissions), + } + + // systemd has a fairly odd (though understandable) syntax here, and + // because of the OCI configuration format we have to do quite a bit of + // trickery to convert things: + // + // * Concrete rules with non-wildcard major/minor numbers have to use + // /dev/{block,char} paths. This is slightly odd because it means + // that we cannot add whitelist rules for devices that don't exist, + // but there's not too much we can do about that. + // + // However, path globbing is not support for path-based rules so we + // need to handle wildcards in some other manner. + // + // * Wildcard-minor rules have to specify a "device group name" (the + // second column in /proc/devices). + // + // * Wildcard (major and minor) rules can just specify a glob with the + // type ("char-*" or "block-*"). + // + // The only type of rule we can't handle is wildcard-major rules, and + // so we'll give a warning in that case (note that the fallback code + // will insert any rules systemd couldn't handle). What amazing fun. + + if rule.Major == configs.Wildcard { + // "_ *:n _" rules aren't supported by systemd. + if rule.Minor != configs.Wildcard { + logrus.Warnf("systemd doesn't support '*:n' device rules -- temporarily ignoring rule: %v", *rule) + continue + } + + // "_ *:* _" rules just wildcard everything. + prefix, err := groupPrefix(rule.Type) + if err != nil { + return nil, err + } + entry.Path = prefix + "*" + } else if rule.Minor == configs.Wildcard { + // "_ n:* _" rules require a device group from /proc/devices. + group, err := findDeviceGroup(rule.Type, rule.Major) + if err != nil { + return nil, errors.Wrapf(err, "find device '%v/%d'", rule.Type, rule.Major) + } + if group == "" { + // Couldn't find a group. + logrus.Warnf("could not find device group for '%v/%d' in /proc/devices -- temporarily ignoring rule: %v", rule.Type, rule.Major, *rule) + continue + } + entry.Path = group + } else { + // "_ n:m _" rules are just a path in /dev/{block,char}/. + switch rule.Type { + case configs.BlockDevice: + entry.Path = fmt.Sprintf("/dev/block/%d:%d", rule.Major, rule.Minor) + case configs.CharDevice: + entry.Path = fmt.Sprintf("/dev/char/%d:%d", rule.Major, rule.Minor) + } + } + deviceAllowList = append(deviceAllowList, entry) + } + + properties = append(properties, newProp("DeviceAllow", deviceAllowList)) + return properties, nil +} + var ( connLock sync.Mutex theConn *systemdDbus.Conn @@ -196,6 +402,12 @@ func (m *LegacyManager) Apply(pid int) error { properties = append(properties, newProp("DefaultDependencies", false)) + deviceProperties, err := generateDeviceProperties(c.Resources.Devices) + if err != nil { + return err + } + properties = append(properties, deviceProperties...) + if c.Resources.Memory != 0 { properties = append(properties, newProp("MemoryLimit", uint64(c.Resources.Memory))) @@ -472,7 +684,6 @@ func (m *LegacyManager) Set(container *configs.Config) error { if err != nil && !cgroups.IsNotFound(err) { return err } - if err := sys.Set(path, container.Cgroups); err != nil { return err } @@ -524,3 +735,15 @@ func isUnitExists(err error) bool { } return false } + +func (m *LegacyManager) GetFreezerState() (configs.FreezerState, error) { + path, err := getSubsystemPath(m.Cgroups, "freezer") + if err != nil && !cgroups.IsNotFound(err) { + return configs.Undefined, err + } + freezer, err := legacySubsystems.Get("freezer") + if err != nil { + return configs.Undefined, err + } + return freezer.(*fs.FreezerGroup).GetState(path) +} diff --git a/libcontainer/cgroups/systemd/unified_hierarchy.go b/libcontainer/cgroups/systemd/unified_hierarchy.go index d394e3a5a8f7..5221db485225 100644 --- a/libcontainer/cgroups/systemd/unified_hierarchy.go +++ b/libcontainer/cgroups/systemd/unified_hierarchy.go @@ -95,6 +95,12 @@ func (m *UnifiedManager) Apply(pid int) error { properties = append(properties, newProp("DefaultDependencies", false)) + deviceProperties, err := generateDeviceProperties(c.Resources.Devices) + if err != nil { + return err + } + properties = append(properties, deviceProperties...) + if c.Resources.Memory != 0 { properties = append(properties, newProp("MemoryLimit", uint64(c.Resources.Memory))) @@ -327,3 +333,15 @@ func (m *UnifiedManager) Set(container *configs.Config) error { } return nil } + +func (m *UnifiedManager) GetFreezerState() (configs.FreezerState, error) { + path, err := getSubsystemPath(m.Cgroups, "freezer") + if err != nil { + return configs.Undefined, err + } + freezer, err := unifiedSubsystems.Get("freezer") + if err != nil { + return configs.Undefined, err + } + return freezer.(*fs.FreezerGroupV2).GetState(path) +} diff --git a/libcontainer/configs/cgroup_linux.go b/libcontainer/configs/cgroup_linux.go index 58ed19c9e78e..e83bb72dc334 100644 --- a/libcontainer/configs/cgroup_linux.go +++ b/libcontainer/configs/cgroup_linux.go @@ -32,15 +32,8 @@ type Cgroup struct { } type Resources struct { - // If this is true allow access to any kind of device within the container. If false, allow access only to devices explicitly listed in the allowed_devices list. - // Deprecated - AllowAllDevices *bool `json:"allow_all_devices,omitempty"` - // Deprecated - AllowedDevices []*Device `json:"allowed_devices,omitempty"` - // Deprecated - DeniedDevices []*Device `json:"denied_devices,omitempty"` - - Devices []*Device `json:"devices"` + // Devices is the set of access rules for devices in the container. + Devices []*DeviceRule `json:"devices"` // Memory limit (in bytes) Memory int64 `json:"memory"` diff --git a/libcontainer/configs/device.go b/libcontainer/configs/device.go index 8701bb212d7b..24c5bbfa6ae8 100644 --- a/libcontainer/configs/device.go +++ b/libcontainer/configs/device.go @@ -1,8 +1,12 @@ package configs import ( + "errors" "fmt" "os" + "strconv" + + "golang.org/x/sys/unix" ) const ( @@ -12,21 +16,11 @@ const ( // TODO Windows: This can be factored out in the future type Device struct { - // Device type, block, char, etc. - Type rune `json:"type"` + DeviceRule // Path to the device. Path string `json:"path"` - // Major is the device's major number. - Major int64 `json:"major"` - - // Minor is the device's minor number. - Minor int64 `json:"minor"` - - // Cgroup permissions format, rwm. - Permissions string `json:"permissions"` - // FileMode permission bits for the device. FileMode os.FileMode `json:"file_mode"` @@ -35,23 +29,154 @@ type Device struct { // Gid of the device. Gid uint32 `json:"gid"` +} - // Write the file to the allowed list - Allow bool `json:"allow"` +// DevicePermissions is a cgroupv1-style string to represent device access. It +// has to be a string for backward compatibility reasons, hence why it has +// methods to do set operations. +type DevicePermissions string + +const ( + deviceRead uint = (1 << iota) + deviceWrite + deviceMknod +) + +func (p DevicePermissions) toSet() uint { + var set uint + for _, perm := range p { + switch perm { + case 'r': + set |= deviceRead + case 'w': + set |= deviceWrite + case 'm': + set |= deviceMknod + } + } + return set +} + +func fromSet(set uint) DevicePermissions { + var perm string + if set&deviceRead == deviceRead { + perm += "r" + } + if set&deviceWrite == deviceWrite { + perm += "w" + } + if set&deviceMknod == deviceMknod { + perm += "m" + } + return DevicePermissions(perm) +} + +// Union returns the union of the two sets of DevicePermissions. +func (p DevicePermissions) Union(o DevicePermissions) DevicePermissions { + lhs := p.toSet() + rhs := o.toSet() + return fromSet(lhs | rhs) +} + +// Difference returns the set difference of the two sets of DevicePermissions. +// In set notation, A.Difference(B) gives you A\B. +func (p DevicePermissions) Difference(o DevicePermissions) DevicePermissions { + lhs := p.toSet() + rhs := o.toSet() + return fromSet(lhs &^ rhs) +} + +// Intersection computes the intersection of the two sets of DevicePermissions. +func (p DevicePermissions) Intersection(o DevicePermissions) DevicePermissions { + lhs := p.toSet() + rhs := o.toSet() + return fromSet(lhs & rhs) +} + +// IsEmpty returns whether the set of permissions in a DevicePermissions is +// empty. +func (p DevicePermissions) IsEmpty() bool { + return p == DevicePermissions("") +} + +// IsValid returns whether the set of permissions is a subset of valid +// permissions (namely, {r,w,m}). +func (p DevicePermissions) IsValid() bool { + return p == fromSet(p.toSet()) +} + +type DeviceType rune + +const ( + WildcardDevice DeviceType = 'a' + BlockDevice DeviceType = 'b' + CharDevice DeviceType = 'c' // or 'u' + FifoDevice DeviceType = 'p' +) + +func (t DeviceType) IsValid() bool { + switch t { + case WildcardDevice, BlockDevice, CharDevice, FifoDevice: + return true + default: + return false + } } -func (d *Device) CgroupString() string { - return fmt.Sprintf("%c %s:%s %s", d.Type, deviceNumberString(d.Major), deviceNumberString(d.Minor), d.Permissions) +func (t DeviceType) CanMknod() bool { + switch t { + case BlockDevice, CharDevice, FifoDevice: + return true + default: + return false + } +} + +func (t DeviceType) CanCgroup() bool { + switch t { + case WildcardDevice, BlockDevice, CharDevice: + return true + default: + return false + } } -func (d *Device) Mkdev() int { - return int((d.Major << 8) | (d.Minor & 0xff) | ((d.Minor & 0xfff00) << 12)) +type DeviceRule struct { + // Type of device ('c' for char, 'b' for block). If set to 'a', this rule + // acts as a wildcard and all fields other than Allow are ignored. + Type DeviceType `json:"type"` + + // Major is the device's major number. + Major int64 `json:"major"` + + // Minor is the device's minor number. + Minor int64 `json:"minor"` + + // Permissions is the set of permissions that this rule applies to (in the + // cgroupv1 format -- any combination of "rwm"). + Permissions DevicePermissions `json:"permissions"` + + // Allow specifies whether this rule is allowed. + Allow bool `json:"allow"` +} + +func (d *DeviceRule) CgroupString() string { + var ( + major = strconv.FormatInt(d.Major, 10) + minor = strconv.FormatInt(d.Minor, 10) + ) + if d.Major == Wildcard { + major = "*" + } + if d.Minor == Wildcard { + minor = "*" + } + return fmt.Sprintf("%c %s:%s %s", d.Type, major, minor, d.Permissions) } -// deviceNumberString converts the device number to a string return result. -func deviceNumberString(number int64) string { - if number == Wildcard { - return "*" +func (d *DeviceRule) Mkdev() (uint64, error) { + if d.Major == Wildcard || d.Minor == Wildcard { + return 0, errors.New("cannot mkdev() device with wildcards") } - return fmt.Sprint(number) + return unix.Mkdev(uint32(d.Major), uint32(d.Minor)), nil } diff --git a/libcontainer/configs/device_defaults.go b/libcontainer/configs/device_defaults.go deleted file mode 100644 index e4f423c523ff..000000000000 --- a/libcontainer/configs/device_defaults.go +++ /dev/null @@ -1,111 +0,0 @@ -// +build linux - -package configs - -var ( - // DefaultSimpleDevices are devices that are to be both allowed and created. - DefaultSimpleDevices = []*Device{ - // /dev/null and zero - { - Path: "/dev/null", - Type: 'c', - Major: 1, - Minor: 3, - Permissions: "rwm", - FileMode: 0666, - }, - { - Path: "/dev/zero", - Type: 'c', - Major: 1, - Minor: 5, - Permissions: "rwm", - FileMode: 0666, - }, - - { - Path: "/dev/full", - Type: 'c', - Major: 1, - Minor: 7, - Permissions: "rwm", - FileMode: 0666, - }, - - // consoles and ttys - { - Path: "/dev/tty", - Type: 'c', - Major: 5, - Minor: 0, - Permissions: "rwm", - FileMode: 0666, - }, - - // /dev/urandom,/dev/random - { - Path: "/dev/urandom", - Type: 'c', - Major: 1, - Minor: 9, - Permissions: "rwm", - FileMode: 0666, - }, - { - Path: "/dev/random", - Type: 'c', - Major: 1, - Minor: 8, - Permissions: "rwm", - FileMode: 0666, - }, - } - DefaultAllowedDevices = append([]*Device{ - // allow mknod for any device - { - Type: 'c', - Major: Wildcard, - Minor: Wildcard, - Permissions: "m", - }, - { - Type: 'b', - Major: Wildcard, - Minor: Wildcard, - Permissions: "m", - }, - - { - Path: "/dev/console", - Type: 'c', - Major: 5, - Minor: 1, - Permissions: "rwm", - }, - // /dev/pts/ - pts namespaces are "coming soon" - { - Path: "", - Type: 'c', - Major: 136, - Minor: Wildcard, - Permissions: "rwm", - }, - { - Path: "", - Type: 'c', - Major: 5, - Minor: 2, - Permissions: "rwm", - }, - - // tuntap - { - Path: "", - Type: 'c', - Major: 10, - Minor: 200, - Permissions: "rwm", - }, - }, DefaultSimpleDevices...) - DefaultAutoCreatedDevices = append([]*Device{}, DefaultSimpleDevices...) -) diff --git a/libcontainer/container_linux.go b/libcontainer/container_linux.go index 6ff4d96a5f55..c247b7876c0e 100644 --- a/libcontainer/container_linux.go +++ b/libcontainer/container_linux.go @@ -1809,27 +1809,11 @@ func (c *linuxContainer) runType() (Status, error) { } func (c *linuxContainer) isPaused() (bool, error) { - fcg := c.cgroupManager.GetPaths()["freezer"] - if fcg == "" { - // A container doesn't have a freezer cgroup - return false, nil - } - pausedState := "FROZEN" - filename := "freezer.state" - if cgroups.IsCgroup2UnifiedMode() { - filename = "cgroup.freeze" - pausedState = "1" - } - - data, err := ioutil.ReadFile(filepath.Join(fcg, filename)) + state, err := c.cgroupManager.GetFreezerState() if err != nil { - // If freezer cgroup is not mounted, the container would just be not paused. - if os.IsNotExist(err) { - return false, nil - } - return false, newSystemErrorWithCause(err, "checking if container is paused") + return false, err } - return bytes.Equal(bytes.TrimSpace(data), []byte(pausedState)), nil + return state == configs.Frozen, nil } func (c *linuxContainer) currentState() (*State, error) { diff --git a/libcontainer/container_linux_test.go b/libcontainer/container_linux_test.go index 0bf8803d3412..c97ed8e4d3ca 100644 --- a/libcontainer/container_linux_test.go +++ b/libcontainer/container_linux_test.go @@ -58,6 +58,10 @@ func (m *mockCgroupManager) Freeze(state configs.FreezerState) error { return nil } +func (m *mockCgroupManager) GetFreezerState() (configs.FreezerState, error) { + return configs.Thawed, nil +} + func (m *mockIntelRdtManager) Apply(pid int) error { return nil } diff --git a/libcontainer/devices/devices.go b/libcontainer/devices/devices.go index 5dabe06cef5b..702f913ec942 100644 --- a/libcontainer/devices/devices.go +++ b/libcontainer/devices/devices.go @@ -31,33 +31,33 @@ func DeviceFromPath(path, permissions string) (*configs.Device, error) { } var ( + devType configs.DeviceType + mode = stat.Mode devNumber = uint64(stat.Rdev) major = unix.Major(devNumber) minor = unix.Minor(devNumber) ) - if major == 0 { - return nil, ErrNotADevice - } - - var ( - devType rune - mode = stat.Mode - ) switch { case mode&unix.S_IFBLK == unix.S_IFBLK: - devType = 'b' + devType = configs.BlockDevice case mode&unix.S_IFCHR == unix.S_IFCHR: - devType = 'c' + devType = configs.CharDevice + case mode&unix.S_IFIFO == unix.S_IFIFO: + devType = configs.FifoDevice + default: + return nil, ErrNotADevice } return &configs.Device{ - Type: devType, - Path: path, - Major: int64(major), - Minor: int64(minor), - Permissions: permissions, - FileMode: os.FileMode(mode), - Uid: stat.Uid, - Gid: stat.Gid, + DeviceRule: configs.DeviceRule{ + Type: devType, + Major: int64(major), + Minor: int64(minor), + Permissions: configs.DevicePermissions(permissions), + }, + Path: path, + FileMode: os.FileMode(mode), + Uid: stat.Uid, + Gid: stat.Gid, }, nil } diff --git a/libcontainer/integration/template_test.go b/libcontainer/integration/template_test.go index 5f7cab53b67c..30503a98bb99 100644 --- a/libcontainer/integration/template_test.go +++ b/libcontainer/integration/template_test.go @@ -2,6 +2,7 @@ package integration import ( "github.com/opencontainers/runc/libcontainer/configs" + "github.com/opencontainers/runc/libcontainer/specconv" "golang.org/x/sys/unix" ) @@ -20,7 +21,10 @@ const defaultMountFlags = unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV // it uses a network strategy of just setting a loopback interface // and the default setup for devices func newTemplateConfig(rootfs string) *configs.Config { - allowAllDevices := false + var allowedDevices []*configs.DeviceRule + for _, device := range specconv.AllowedDevices { + allowedDevices = append(allowedDevices, &device.DeviceRule) + } return &configs.Config{ Rootfs: rootfs, Capabilities: &configs.Capabilities{ @@ -116,8 +120,7 @@ func newTemplateConfig(rootfs string) *configs.Config { Path: "integration/test", Resources: &configs.Resources{ MemorySwappiness: nil, - AllowAllDevices: &allowAllDevices, - AllowedDevices: configs.DefaultAllowedDevices, + Devices: allowedDevices, }, }, MaskPaths: []string{ @@ -127,7 +130,7 @@ func newTemplateConfig(rootfs string) *configs.Config { ReadonlyPaths: []string{ "/proc/sys", "/proc/sysrq-trigger", "/proc/irq", "/proc/bus", }, - Devices: configs.DefaultAutoCreatedDevices, + Devices: specconv.AllowedDevices, Hostname: "integration", Mounts: []*configs.Mount{ { diff --git a/libcontainer/rootfs_linux.go b/libcontainer/rootfs_linux.go index 10888b499beb..465fdd21cae4 100644 --- a/libcontainer/rootfs_linux.go +++ b/libcontainer/rootfs_linux.go @@ -606,11 +606,14 @@ func bindMountDeviceNode(dest string, node *configs.Device) error { // Creates the device node in the rootfs of the container. func createDeviceNode(rootfs string, node *configs.Device, bind bool) error { + if node.Path == "" { + // The node only exists for cgroup reasons, ignore it here. + return nil + } dest := filepath.Join(rootfs, node.Path) if err := os.MkdirAll(filepath.Dir(dest), 0755); err != nil { return err } - if bind { return bindMountDeviceNode(dest, node) } @@ -628,16 +631,20 @@ func createDeviceNode(rootfs string, node *configs.Device, bind bool) error { func mknodDevice(dest string, node *configs.Device) error { fileMode := node.FileMode switch node.Type { - case 'c', 'u': - fileMode |= unix.S_IFCHR - case 'b': + case configs.BlockDevice: fileMode |= unix.S_IFBLK - case 'p': + case configs.CharDevice: + fileMode |= unix.S_IFCHR + case configs.FifoDevice: fileMode |= unix.S_IFIFO default: return fmt.Errorf("%c is not a valid device type for device %s", node.Type, node.Path) } - if err := unix.Mknod(dest, uint32(fileMode), node.Mkdev()); err != nil { + dev, err := node.Mkdev() + if err != nil { + return err + } + if err := unix.Mknod(dest, uint32(fileMode), int(dev)); err != nil { return err } return unix.Chown(dest, int(node.Uid), int(node.Gid)) diff --git a/libcontainer/specconv/spec_linux.go b/libcontainer/specconv/spec_linux.go index d98444ad6c72..7338debcb186 100644 --- a/libcontainer/specconv/spec_linux.go +++ b/libcontainer/specconv/spec_linux.go @@ -43,103 +43,147 @@ var mountPropagationMapping = map[string]int{ "": 0, } -var allowedDevices = []*configs.Device{ +// AllowedDevices is the set of devices which are automatically included for +// all containers. +// +// XXX (cyphar) +// This behaviour is at the very least "questionable" (if not outright +// wrong) according to the runtime-spec. +// +// Yes, we have to include certain devices other than the ones the user +// specifies, but several devices listed here are not part of the spec +// (including "mknod for any device"?!). In addition, these rules are +// appended to the user-provided set which means that users *cannot disable +// this behaviour*. +// +// ... unfortunately I'm too scared to change this now because who knows how +// many people depend on this (incorrect and arguably insecure) behaviour. +var AllowedDevices = []*configs.Device{ // allow mknod for any device { - Type: 'c', - Major: wildcard, - Minor: wildcard, - Permissions: "m", - Allow: true, - }, - { - Type: 'b', - Major: wildcard, - Minor: wildcard, - Permissions: "m", - Allow: true, + DeviceRule: configs.DeviceRule{ + Type: configs.CharDevice, + Major: configs.Wildcard, + Minor: configs.Wildcard, + Permissions: "m", + Allow: true, + }, }, { - Type: 'c', - Path: "/dev/null", - Major: 1, - Minor: 3, - Permissions: "rwm", - Allow: true, + DeviceRule: configs.DeviceRule{ + Type: configs.BlockDevice, + Major: configs.Wildcard, + Minor: configs.Wildcard, + Permissions: "m", + Allow: true, + }, }, { - Type: 'c', - Path: "/dev/random", - Major: 1, - Minor: 8, - Permissions: "rwm", - Allow: true, + Path: "/dev/null", + FileMode: 0666, + Uid: 0, + Gid: 0, + DeviceRule: configs.DeviceRule{ + Type: configs.CharDevice, + Major: 1, + Minor: 3, + Permissions: "rwm", + Allow: true, + }, }, { - Type: 'c', - Path: "/dev/full", - Major: 1, - Minor: 7, - Permissions: "rwm", - Allow: true, + Path: "/dev/random", + FileMode: 0666, + Uid: 0, + Gid: 0, + DeviceRule: configs.DeviceRule{ + Type: configs.CharDevice, + Major: 1, + Minor: 8, + Permissions: "rwm", + Allow: true, + }, }, { - Type: 'c', - Path: "/dev/tty", - Major: 5, - Minor: 0, - Permissions: "rwm", - Allow: true, + Path: "/dev/full", + FileMode: 0666, + Uid: 0, + Gid: 0, + DeviceRule: configs.DeviceRule{ + Type: configs.CharDevice, + Major: 1, + Minor: 7, + Permissions: "rwm", + Allow: true, + }, }, { - Type: 'c', - Path: "/dev/zero", - Major: 1, - Minor: 5, - Permissions: "rwm", - Allow: true, + Path: "/dev/tty", + FileMode: 0666, + Uid: 0, + Gid: 0, + DeviceRule: configs.DeviceRule{ + Type: configs.CharDevice, + Major: 5, + Minor: 0, + Permissions: "rwm", + Allow: true, + }, }, { - Type: 'c', - Path: "/dev/urandom", - Major: 1, - Minor: 9, - Permissions: "rwm", - Allow: true, + Path: "/dev/zero", + FileMode: 0666, + Uid: 0, + Gid: 0, + DeviceRule: configs.DeviceRule{ + Type: configs.CharDevice, + Major: 1, + Minor: 5, + Permissions: "rwm", + Allow: true, + }, }, { - Path: "/dev/console", - Type: 'c', - Major: 5, - Minor: 1, - Permissions: "rwm", - Allow: true, + Path: "/dev/urandom", + FileMode: 0666, + Uid: 0, + Gid: 0, + DeviceRule: configs.DeviceRule{ + Type: configs.CharDevice, + Major: 1, + Minor: 9, + Permissions: "rwm", + Allow: true, + }, }, // /dev/pts/ - pts namespaces are "coming soon" { - Path: "", - Type: 'c', - Major: 136, - Minor: wildcard, - Permissions: "rwm", - Allow: true, + DeviceRule: configs.DeviceRule{ + Type: configs.CharDevice, + Major: 136, + Minor: configs.Wildcard, + Permissions: "rwm", + Allow: true, + }, }, { - Path: "", - Type: 'c', - Major: 5, - Minor: 2, - Permissions: "rwm", - Allow: true, + DeviceRule: configs.DeviceRule{ + Type: configs.CharDevice, + Major: 5, + Minor: 2, + Permissions: "rwm", + Allow: true, + }, }, // tuntap { - Path: "", - Type: 'c', - Major: 10, - Minor: 200, - Permissions: "rwm", - Allow: true, + DeviceRule: configs.DeviceRule{ + Type: configs.CharDevice, + Major: 10, + Minor: 200, + Permissions: "rwm", + Allow: true, + }, }, } @@ -341,251 +385,198 @@ func createCgroupConfig(opts *CreateOpts) (*configs.Cgroup, error) { // In rootless containers, any attempt to make cgroup changes is likely to fail. // libcontainer will validate this but ignores the error. - c.Resources.AllowedDevices = allowedDevices if spec.Linux != nil { r := spec.Linux.Resources - if r == nil { - return c, nil - } - for i, d := range spec.Linux.Resources.Devices { - var ( - t = "a" - major = int64(-1) - minor = int64(-1) - ) - if d.Type != "" { - t = d.Type - } - if d.Major != nil { - major = *d.Major - } - if d.Minor != nil { - minor = *d.Minor - } - if d.Access == "" { - return nil, fmt.Errorf("device access at %d field cannot be empty", i) - } - dt, err := stringToCgroupDeviceRune(t) - if err != nil { - return nil, err - } - dd := &configs.Device{ - Type: dt, - Major: major, - Minor: minor, - Permissions: d.Access, - Allow: d.Allow, - } - c.Resources.Devices = append(c.Resources.Devices, dd) - } - if r.Memory != nil { - if r.Memory.Limit != nil { - c.Resources.Memory = *r.Memory.Limit - } - if r.Memory.Reservation != nil { - c.Resources.MemoryReservation = *r.Memory.Reservation - } - if r.Memory.Swap != nil { - c.Resources.MemorySwap = *r.Memory.Swap - } - if r.Memory.Kernel != nil { - c.Resources.KernelMemory = *r.Memory.Kernel - } - if r.Memory.KernelTCP != nil { - c.Resources.KernelMemoryTCP = *r.Memory.KernelTCP - } - if r.Memory.Swappiness != nil { - c.Resources.MemorySwappiness = r.Memory.Swappiness - } - if r.Memory.DisableOOMKiller != nil { - c.Resources.OomKillDisable = *r.Memory.DisableOOMKiller - } - } - if r.CPU != nil { - if r.CPU.Shares != nil { - c.Resources.CpuShares = *r.CPU.Shares - } - if r.CPU.Quota != nil { - c.Resources.CpuQuota = *r.CPU.Quota - } - if r.CPU.Period != nil { - c.Resources.CpuPeriod = *r.CPU.Period - } - if r.CPU.RealtimeRuntime != nil { - c.Resources.CpuRtRuntime = *r.CPU.RealtimeRuntime - } - if r.CPU.RealtimePeriod != nil { - c.Resources.CpuRtPeriod = *r.CPU.RealtimePeriod - } - if r.CPU.Cpus != "" { - c.Resources.CpusetCpus = r.CPU.Cpus + if r != nil { + for i, d := range spec.Linux.Resources.Devices { + var ( + t = "a" + major = int64(-1) + minor = int64(-1) + ) + if d.Type != "" { + t = d.Type + } + if d.Major != nil { + major = *d.Major + } + if d.Minor != nil { + minor = *d.Minor + } + if d.Access == "" { + return nil, fmt.Errorf("device access at %d field cannot be empty", i) + } + dt, err := stringToCgroupDeviceRune(t) + if err != nil { + return nil, err + } + c.Resources.Devices = append(c.Resources.Devices, &configs.DeviceRule{ + Type: dt, + Major: major, + Minor: minor, + Permissions: configs.DevicePermissions(d.Access), + Allow: d.Allow, + }) } - if r.CPU.Mems != "" { - c.Resources.CpusetMems = r.CPU.Mems + if r.Memory != nil { + if r.Memory.Limit != nil { + c.Resources.Memory = *r.Memory.Limit + } + if r.Memory.Reservation != nil { + c.Resources.MemoryReservation = *r.Memory.Reservation + } + if r.Memory.Swap != nil { + c.Resources.MemorySwap = *r.Memory.Swap + } + if r.Memory.Kernel != nil { + c.Resources.KernelMemory = *r.Memory.Kernel + } + if r.Memory.KernelTCP != nil { + c.Resources.KernelMemoryTCP = *r.Memory.KernelTCP + } + if r.Memory.Swappiness != nil { + c.Resources.MemorySwappiness = r.Memory.Swappiness + } + if r.Memory.DisableOOMKiller != nil { + c.Resources.OomKillDisable = *r.Memory.DisableOOMKiller + } } - } - if r.Pids != nil { - c.Resources.PidsLimit = r.Pids.Limit - } - if r.BlockIO != nil { - if r.BlockIO.Weight != nil { - c.Resources.BlkioWeight = *r.BlockIO.Weight + if r.CPU != nil { + if r.CPU.Shares != nil { + c.Resources.CpuShares = *r.CPU.Shares + } + if r.CPU.Quota != nil { + c.Resources.CpuQuota = *r.CPU.Quota + } + if r.CPU.Period != nil { + c.Resources.CpuPeriod = *r.CPU.Period + } + if r.CPU.RealtimeRuntime != nil { + c.Resources.CpuRtRuntime = *r.CPU.RealtimeRuntime + } + if r.CPU.RealtimePeriod != nil { + c.Resources.CpuRtPeriod = *r.CPU.RealtimePeriod + } + if r.CPU.Cpus != "" { + c.Resources.CpusetCpus = r.CPU.Cpus + } + if r.CPU.Mems != "" { + c.Resources.CpusetMems = r.CPU.Mems + } } - if r.BlockIO.LeafWeight != nil { - c.Resources.BlkioLeafWeight = *r.BlockIO.LeafWeight + if r.Pids != nil { + c.Resources.PidsLimit = r.Pids.Limit } - if r.BlockIO.WeightDevice != nil { - for _, wd := range r.BlockIO.WeightDevice { - var weight, leafWeight uint16 - if wd.Weight != nil { - weight = *wd.Weight - } - if wd.LeafWeight != nil { - leafWeight = *wd.LeafWeight + if r.BlockIO != nil { + if r.BlockIO.Weight != nil { + c.Resources.BlkioWeight = *r.BlockIO.Weight + } + if r.BlockIO.LeafWeight != nil { + c.Resources.BlkioLeafWeight = *r.BlockIO.LeafWeight + } + if r.BlockIO.WeightDevice != nil { + for _, wd := range r.BlockIO.WeightDevice { + var weight, leafWeight uint16 + if wd.Weight != nil { + weight = *wd.Weight + } + if wd.LeafWeight != nil { + leafWeight = *wd.LeafWeight + } + weightDevice := configs.NewWeightDevice(wd.Major, wd.Minor, weight, leafWeight) + c.Resources.BlkioWeightDevice = append(c.Resources.BlkioWeightDevice, weightDevice) } - weightDevice := configs.NewWeightDevice(wd.Major, wd.Minor, weight, leafWeight) - c.Resources.BlkioWeightDevice = append(c.Resources.BlkioWeightDevice, weightDevice) } - } - if r.BlockIO.ThrottleReadBpsDevice != nil { - for _, td := range r.BlockIO.ThrottleReadBpsDevice { - rate := td.Rate - throttleDevice := configs.NewThrottleDevice(td.Major, td.Minor, rate) - c.Resources.BlkioThrottleReadBpsDevice = append(c.Resources.BlkioThrottleReadBpsDevice, throttleDevice) + if r.BlockIO.ThrottleReadBpsDevice != nil { + for _, td := range r.BlockIO.ThrottleReadBpsDevice { + rate := td.Rate + throttleDevice := configs.NewThrottleDevice(td.Major, td.Minor, rate) + c.Resources.BlkioThrottleReadBpsDevice = append(c.Resources.BlkioThrottleReadBpsDevice, throttleDevice) + } } - } - if r.BlockIO.ThrottleWriteBpsDevice != nil { - for _, td := range r.BlockIO.ThrottleWriteBpsDevice { - rate := td.Rate - throttleDevice := configs.NewThrottleDevice(td.Major, td.Minor, rate) - c.Resources.BlkioThrottleWriteBpsDevice = append(c.Resources.BlkioThrottleWriteBpsDevice, throttleDevice) + if r.BlockIO.ThrottleWriteBpsDevice != nil { + for _, td := range r.BlockIO.ThrottleWriteBpsDevice { + rate := td.Rate + throttleDevice := configs.NewThrottleDevice(td.Major, td.Minor, rate) + c.Resources.BlkioThrottleWriteBpsDevice = append(c.Resources.BlkioThrottleWriteBpsDevice, throttleDevice) + } } - } - if r.BlockIO.ThrottleReadIOPSDevice != nil { - for _, td := range r.BlockIO.ThrottleReadIOPSDevice { - rate := td.Rate - throttleDevice := configs.NewThrottleDevice(td.Major, td.Minor, rate) - c.Resources.BlkioThrottleReadIOPSDevice = append(c.Resources.BlkioThrottleReadIOPSDevice, throttleDevice) + if r.BlockIO.ThrottleReadIOPSDevice != nil { + for _, td := range r.BlockIO.ThrottleReadIOPSDevice { + rate := td.Rate + throttleDevice := configs.NewThrottleDevice(td.Major, td.Minor, rate) + c.Resources.BlkioThrottleReadIOPSDevice = append(c.Resources.BlkioThrottleReadIOPSDevice, throttleDevice) + } } - } - if r.BlockIO.ThrottleWriteIOPSDevice != nil { - for _, td := range r.BlockIO.ThrottleWriteIOPSDevice { - rate := td.Rate - throttleDevice := configs.NewThrottleDevice(td.Major, td.Minor, rate) - c.Resources.BlkioThrottleWriteIOPSDevice = append(c.Resources.BlkioThrottleWriteIOPSDevice, throttleDevice) + if r.BlockIO.ThrottleWriteIOPSDevice != nil { + for _, td := range r.BlockIO.ThrottleWriteIOPSDevice { + rate := td.Rate + throttleDevice := configs.NewThrottleDevice(td.Major, td.Minor, rate) + c.Resources.BlkioThrottleWriteIOPSDevice = append(c.Resources.BlkioThrottleWriteIOPSDevice, throttleDevice) + } } } - } - for _, l := range r.HugepageLimits { - c.Resources.HugetlbLimit = append(c.Resources.HugetlbLimit, &configs.HugepageLimit{ - Pagesize: l.Pagesize, - Limit: l.Limit, - }) - } - if r.Network != nil { - if r.Network.ClassID != nil { - c.Resources.NetClsClassid = *r.Network.ClassID - } - for _, m := range r.Network.Priorities { - c.Resources.NetPrioIfpriomap = append(c.Resources.NetPrioIfpriomap, &configs.IfPrioMap{ - Interface: m.Name, - Priority: int64(m.Priority), + for _, l := range r.HugepageLimits { + c.Resources.HugetlbLimit = append(c.Resources.HugetlbLimit, &configs.HugepageLimit{ + Pagesize: l.Pagesize, + Limit: l.Limit, }) } + if r.Network != nil { + if r.Network.ClassID != nil { + c.Resources.NetClsClassid = *r.Network.ClassID + } + for _, m := range r.Network.Priorities { + c.Resources.NetPrioIfpriomap = append(c.Resources.NetPrioIfpriomap, &configs.IfPrioMap{ + Interface: m.Name, + Priority: int64(m.Priority), + }) + } + } } } - // append the default allowed devices to the end of the list - c.Resources.Devices = append(c.Resources.Devices, allowedDevices...) + // Append the default allowed devices to the end of the list. + // XXX: Really this should be prefixed... + for _, device := range AllowedDevices { + c.Resources.Devices = append(c.Resources.Devices, &device.DeviceRule) + } return c, nil } -func stringToCgroupDeviceRune(s string) (rune, error) { +func stringToCgroupDeviceRune(s string) (configs.DeviceType, error) { switch s { case "a": - return 'a', nil + return configs.WildcardDevice, nil case "b": - return 'b', nil + return configs.BlockDevice, nil case "c": - return 'c', nil + return configs.CharDevice, nil default: return 0, fmt.Errorf("invalid cgroup device type %q", s) } } -func stringToDeviceRune(s string) (rune, error) { +func stringToDeviceRune(s string) (configs.DeviceType, error) { switch s { case "p": - return 'p', nil - case "u": - return 'u', nil + return configs.FifoDevice, nil + case "u", "c": + return configs.CharDevice, nil case "b": - return 'b', nil - case "c": - return 'c', nil + return configs.BlockDevice, nil default: return 0, fmt.Errorf("invalid device type %q", s) } } func createDevices(spec *specs.Spec, config *configs.Config) error { - // add whitelisted devices - config.Devices = []*configs.Device{ - { - Type: 'c', - Path: "/dev/null", - Major: 1, - Minor: 3, - FileMode: 0666, - Uid: 0, - Gid: 0, - }, - { - Type: 'c', - Path: "/dev/random", - Major: 1, - Minor: 8, - FileMode: 0666, - Uid: 0, - Gid: 0, - }, - { - Type: 'c', - Path: "/dev/full", - Major: 1, - Minor: 7, - FileMode: 0666, - Uid: 0, - Gid: 0, - }, - { - Type: 'c', - Path: "/dev/tty", - Major: 5, - Minor: 0, - FileMode: 0666, - Uid: 0, - Gid: 0, - }, - { - Type: 'c', - Path: "/dev/zero", - Major: 1, - Minor: 5, - FileMode: 0666, - Uid: 0, - Gid: 0, - }, - { - Type: 'c', - Path: "/dev/urandom", - Major: 1, - Minor: 9, - FileMode: 0666, - Uid: 0, - Gid: 0, - }, + // Add default set of devices. + for _, device := range AllowedDevices { + if device.Path != "" { + config.Devices = append(config.Devices, device) + } } - // merge in additional devices from the spec + // Merge in additional devices from the spec. if spec.Linux != nil { for _, d := range spec.Linux.Devices { var uid, gid uint32 @@ -605,10 +596,12 @@ func createDevices(spec *specs.Spec, config *configs.Config) error { filemode = *d.FileMode } device := &configs.Device{ - Type: dt, + DeviceRule: configs.DeviceRule{ + Type: dt, + Major: d.Major, + Minor: d.Minor, + }, Path: d.Path, - Major: d.Major, - Minor: d.Minor, FileMode: filemode, Uid: uid, Gid: gid, -- 2.26.2
Locations
Projects
Search
Status Monitor
Help
OpenBuildService.org
Documentation
API Documentation
Code of Conduct
Contact
Support
@OBShq
Terms
openSUSE Build Service is sponsored by
The Open Build Service is an
openSUSE project
.
Sign Up
Log In
Places
Places
All Projects
Status Monitor