Merge pull request #10066 from hashicorp/b-nomad

vendor: patch Nomad for cross compilability
This commit is contained in:
Mitchell Hashimoto 2016-11-11 13:54:57 -08:00 committed by GitHub
commit f4cf443368
189 changed files with 0 additions and 31392 deletions

View File

@ -1,579 +0,0 @@
package client
import (
"fmt"
"log"
"os"
"path/filepath"
"sync"
"time"
"github.com/hashicorp/go-multierror"
"github.com/hashicorp/nomad/client/allocdir"
"github.com/hashicorp/nomad/client/config"
"github.com/hashicorp/nomad/client/driver"
"github.com/hashicorp/nomad/nomad/structs"
cstructs "github.com/hashicorp/nomad/client/structs"
)
const (
// taskReceivedSyncLimit is how long the client will wait before sending
// that a task was received to the server. The client does not immediately
// send that the task was received to the server because another transition
// to running or failed is likely to occur immediately after and a single
// update will transfer all past state information. If not other transition
// has occurred up to this limit, we will send to the server.
taskReceivedSyncLimit = 30 * time.Second
)
// AllocStateUpdater is used to update the status of an allocation
type AllocStateUpdater func(alloc *structs.Allocation)
type AllocStatsReporter interface {
LatestAllocStats(taskFilter string) (*cstructs.AllocResourceUsage, error)
}
// AllocRunner is used to wrap an allocation and provide the execution context.
type AllocRunner struct {
config *config.Config
updater AllocStateUpdater
logger *log.Logger
alloc *structs.Allocation
allocClientStatus string // Explicit status of allocation. Set when there are failures
allocClientDescription string
allocLock sync.Mutex
dirtyCh chan struct{}
ctx *driver.ExecContext
ctxLock sync.Mutex
tasks map[string]*TaskRunner
taskStates map[string]*structs.TaskState
restored map[string]struct{}
taskLock sync.RWMutex
taskStatusLock sync.RWMutex
updateCh chan *structs.Allocation
destroy bool
destroyCh chan struct{}
destroyLock sync.Mutex
waitCh chan struct{}
}
// allocRunnerState is used to snapshot the state of the alloc runner
type allocRunnerState struct {
Version string
Alloc *structs.Allocation
AllocClientStatus string
AllocClientDescription string
TaskStates map[string]*structs.TaskState
Context *driver.ExecContext
}
// NewAllocRunner is used to create a new allocation context
func NewAllocRunner(logger *log.Logger, config *config.Config, updater AllocStateUpdater,
alloc *structs.Allocation) *AllocRunner {
ar := &AllocRunner{
config: config,
updater: updater,
logger: logger,
alloc: alloc,
dirtyCh: make(chan struct{}, 1),
tasks: make(map[string]*TaskRunner),
taskStates: copyTaskStates(alloc.TaskStates),
restored: make(map[string]struct{}),
updateCh: make(chan *structs.Allocation, 64),
destroyCh: make(chan struct{}),
waitCh: make(chan struct{}),
}
return ar
}
// stateFilePath returns the path to our state file
func (r *AllocRunner) stateFilePath() string {
r.allocLock.Lock()
defer r.allocLock.Unlock()
path := filepath.Join(r.config.StateDir, "alloc", r.alloc.ID, "state.json")
return path
}
// RestoreState is used to restore the state of the alloc runner
func (r *AllocRunner) RestoreState() error {
// Load the snapshot
var snap allocRunnerState
if err := restoreState(r.stateFilePath(), &snap); err != nil {
return err
}
// Restore fields
r.alloc = snap.Alloc
r.ctx = snap.Context
r.allocClientStatus = snap.AllocClientStatus
r.allocClientDescription = snap.AllocClientDescription
r.taskStates = snap.TaskStates
var snapshotErrors multierror.Error
if r.alloc == nil {
snapshotErrors.Errors = append(snapshotErrors.Errors, fmt.Errorf("alloc_runner snapshot includes a nil allocation"))
}
if r.ctx == nil {
snapshotErrors.Errors = append(snapshotErrors.Errors, fmt.Errorf("alloc_runner snapshot includes a nil context"))
}
if e := snapshotErrors.ErrorOrNil(); e != nil {
return e
}
// Restore the task runners
var mErr multierror.Error
for name, state := range r.taskStates {
// Mark the task as restored.
r.restored[name] = struct{}{}
task := &structs.Task{Name: name}
tr := NewTaskRunner(r.logger, r.config, r.setTaskState, r.ctx, r.Alloc(),
task)
r.tasks[name] = tr
// Skip tasks in terminal states.
if state.State == structs.TaskStateDead {
continue
}
if err := tr.RestoreState(); err != nil {
r.logger.Printf("[ERR] client: failed to restore state for alloc %s task '%s': %v", r.alloc.ID, name, err)
mErr.Errors = append(mErr.Errors, err)
} else if !r.alloc.TerminalStatus() {
// Only start if the alloc isn't in a terminal status.
go tr.Run()
}
}
return mErr.ErrorOrNil()
}
// SaveState is used to snapshot the state of the alloc runner
// if the fullSync is marked as false only the state of the Alloc Runner
// is snapshotted. If fullSync is marked as true, we snapshot
// all the Task Runners associated with the Alloc
func (r *AllocRunner) SaveState() error {
if err := r.saveAllocRunnerState(); err != nil {
return err
}
// Save state for each task
runners := r.getTaskRunners()
var mErr multierror.Error
for _, tr := range runners {
if err := r.saveTaskRunnerState(tr); err != nil {
mErr.Errors = append(mErr.Errors, err)
}
}
return mErr.ErrorOrNil()
}
func (r *AllocRunner) saveAllocRunnerState() error {
// Create the snapshot.
r.taskStatusLock.RLock()
states := copyTaskStates(r.taskStates)
r.taskStatusLock.RUnlock()
alloc := r.Alloc()
r.allocLock.Lock()
allocClientStatus := r.allocClientStatus
allocClientDescription := r.allocClientDescription
r.allocLock.Unlock()
r.ctxLock.Lock()
ctx := r.ctx
r.ctxLock.Unlock()
snap := allocRunnerState{
Version: r.config.Version,
Alloc: alloc,
Context: ctx,
AllocClientStatus: allocClientStatus,
AllocClientDescription: allocClientDescription,
TaskStates: states,
}
return persistState(r.stateFilePath(), &snap)
}
func (r *AllocRunner) saveTaskRunnerState(tr *TaskRunner) error {
if err := tr.SaveState(); err != nil {
return fmt.Errorf("failed to save state for alloc %s task '%s': %v",
r.alloc.ID, tr.task.Name, err)
}
return nil
}
// DestroyState is used to cleanup after ourselves
func (r *AllocRunner) DestroyState() error {
return os.RemoveAll(filepath.Dir(r.stateFilePath()))
}
// DestroyContext is used to destroy the context
func (r *AllocRunner) DestroyContext() error {
return r.ctx.AllocDir.Destroy()
}
// copyTaskStates returns a copy of the passed task states.
func copyTaskStates(states map[string]*structs.TaskState) map[string]*structs.TaskState {
copy := make(map[string]*structs.TaskState, len(states))
for task, state := range states {
copy[task] = state.Copy()
}
return copy
}
// Alloc returns the associated allocation
func (r *AllocRunner) Alloc() *structs.Allocation {
r.allocLock.Lock()
alloc := r.alloc.Copy()
// The status has explicitly been set.
if r.allocClientStatus != "" || r.allocClientDescription != "" {
alloc.ClientStatus = r.allocClientStatus
alloc.ClientDescription = r.allocClientDescription
r.allocLock.Unlock()
return alloc
}
r.allocLock.Unlock()
// Scan the task states to determine the status of the alloc
var pending, running, dead, failed bool
r.taskStatusLock.RLock()
alloc.TaskStates = copyTaskStates(r.taskStates)
for _, state := range r.taskStates {
switch state.State {
case structs.TaskStateRunning:
running = true
case structs.TaskStatePending:
pending = true
case structs.TaskStateDead:
if state.Failed() {
failed = true
} else {
dead = true
}
}
}
r.taskStatusLock.RUnlock()
// Determine the alloc status
if failed {
alloc.ClientStatus = structs.AllocClientStatusFailed
} else if running {
alloc.ClientStatus = structs.AllocClientStatusRunning
} else if pending {
alloc.ClientStatus = structs.AllocClientStatusPending
} else if dead {
alloc.ClientStatus = structs.AllocClientStatusComplete
}
return alloc
}
// dirtySyncState is used to watch for state being marked dirty to sync
func (r *AllocRunner) dirtySyncState() {
for {
select {
case <-r.dirtyCh:
r.syncStatus()
case <-r.destroyCh:
return
}
}
}
// syncStatus is used to run and sync the status when it changes
func (r *AllocRunner) syncStatus() error {
// Get a copy of our alloc, update status server side and sync to disk
alloc := r.Alloc()
r.updater(alloc)
return r.saveAllocRunnerState()
}
// setStatus is used to update the allocation status
func (r *AllocRunner) setStatus(status, desc string) {
r.allocLock.Lock()
r.allocClientStatus = status
r.allocClientDescription = desc
r.allocLock.Unlock()
select {
case r.dirtyCh <- struct{}{}:
default:
}
}
// setTaskState is used to set the status of a task
func (r *AllocRunner) setTaskState(taskName, state string, event *structs.TaskEvent) {
r.taskStatusLock.Lock()
defer r.taskStatusLock.Unlock()
taskState, ok := r.taskStates[taskName]
if !ok {
taskState = &structs.TaskState{}
r.taskStates[taskName] = taskState
}
// Set the tasks state.
taskState.State = state
r.appendTaskEvent(taskState, event)
// If the task failed, we should kill all the other tasks in the task group.
if state == structs.TaskStateDead && taskState.Failed() {
var destroyingTasks []string
for task, tr := range r.tasks {
if task != taskName {
destroyingTasks = append(destroyingTasks, task)
tr.Destroy()
}
}
if len(destroyingTasks) > 0 {
r.logger.Printf("[DEBUG] client: task %q failed, destroying other tasks in task group: %v", taskName, destroyingTasks)
}
}
select {
case r.dirtyCh <- struct{}{}:
default:
}
}
// appendTaskEvent updates the task status by appending the new event.
func (r *AllocRunner) appendTaskEvent(state *structs.TaskState, event *structs.TaskEvent) {
capacity := 10
if state.Events == nil {
state.Events = make([]*structs.TaskEvent, 0, capacity)
}
// If we hit capacity, then shift it.
if len(state.Events) == capacity {
old := state.Events
state.Events = make([]*structs.TaskEvent, 0, capacity)
state.Events = append(state.Events, old[1:]...)
}
state.Events = append(state.Events, event)
}
// Run is a long running goroutine used to manage an allocation
func (r *AllocRunner) Run() {
defer close(r.waitCh)
go r.dirtySyncState()
// Find the task group to run in the allocation
alloc := r.alloc
tg := alloc.Job.LookupTaskGroup(alloc.TaskGroup)
if tg == nil {
r.logger.Printf("[ERR] client: alloc '%s' for missing task group '%s'", alloc.ID, alloc.TaskGroup)
r.setStatus(structs.AllocClientStatusFailed, fmt.Sprintf("missing task group '%s'", alloc.TaskGroup))
return
}
// Create the execution context
r.ctxLock.Lock()
if r.ctx == nil {
allocDir := allocdir.NewAllocDir(filepath.Join(r.config.AllocDir, r.alloc.ID))
if err := allocDir.Build(tg.Tasks); err != nil {
r.logger.Printf("[WARN] client: failed to build task directories: %v", err)
r.setStatus(structs.AllocClientStatusFailed, fmt.Sprintf("failed to build task dirs for '%s'", alloc.TaskGroup))
r.ctxLock.Unlock()
return
}
r.ctx = driver.NewExecContext(allocDir, r.alloc.ID)
}
r.ctxLock.Unlock()
// Check if the allocation is in a terminal status. In this case, we don't
// start any of the task runners and directly wait for the destroy signal to
// clean up the allocation.
if alloc.TerminalStatus() {
r.logger.Printf("[DEBUG] client: alloc %q in terminal status, waiting for destroy", r.alloc.ID)
r.handleDestroy()
r.logger.Printf("[DEBUG] client: terminating runner for alloc '%s'", r.alloc.ID)
return
}
// Start the task runners
r.logger.Printf("[DEBUG] client: starting task runners for alloc '%s'", r.alloc.ID)
r.taskLock.Lock()
for _, task := range tg.Tasks {
if _, ok := r.restored[task.Name]; ok {
continue
}
tr := NewTaskRunner(r.logger, r.config, r.setTaskState, r.ctx, r.Alloc(),
task.Copy())
r.tasks[task.Name] = tr
tr.MarkReceived()
go tr.Run()
}
r.taskLock.Unlock()
OUTER:
// Wait for updates
for {
select {
case update := <-r.updateCh:
// Store the updated allocation.
r.allocLock.Lock()
r.alloc = update
r.allocLock.Unlock()
// Check if we're in a terminal status
if update.TerminalStatus() {
break OUTER
}
// Update the task groups
runners := r.getTaskRunners()
for _, tr := range runners {
tr.Update(update)
}
case <-r.destroyCh:
break OUTER
}
}
// Destroy each sub-task
runners := r.getTaskRunners()
for _, tr := range runners {
tr.Destroy()
}
// Wait for termination of the task runners
for _, tr := range runners {
<-tr.WaitCh()
}
// Final state sync
r.syncStatus()
// Block until we should destroy the state of the alloc
r.handleDestroy()
r.logger.Printf("[DEBUG] client: terminating runner for alloc '%s'", r.alloc.ID)
}
// handleDestroy blocks till the AllocRunner should be destroyed and does the
// necessary cleanup.
func (r *AllocRunner) handleDestroy() {
select {
case <-r.destroyCh:
if err := r.DestroyContext(); err != nil {
r.logger.Printf("[ERR] client: failed to destroy context for alloc '%s': %v",
r.alloc.ID, err)
}
if err := r.DestroyState(); err != nil {
r.logger.Printf("[ERR] client: failed to destroy state for alloc '%s': %v",
r.alloc.ID, err)
}
}
}
// Update is used to update the allocation of the context
func (r *AllocRunner) Update(update *structs.Allocation) {
select {
case r.updateCh <- update:
default:
r.logger.Printf("[ERR] client: dropping update to alloc '%s'", update.ID)
}
}
// StatsReporter returns an interface to query resource usage statistics of an
// allocation
func (r *AllocRunner) StatsReporter() AllocStatsReporter {
return r
}
// getTaskRunners is a helper that returns a copy of the task runners list using
// the taskLock.
func (r *AllocRunner) getTaskRunners() []*TaskRunner {
// Get the task runners
r.taskLock.RLock()
defer r.taskLock.RUnlock()
runners := make([]*TaskRunner, 0, len(r.tasks))
for _, tr := range r.tasks {
runners = append(runners, tr)
}
return runners
}
// LatestAllocStats returns the latest allocation stats. If the optional taskFilter is set
// the allocation stats will only include the given task.
func (r *AllocRunner) LatestAllocStats(taskFilter string) (*cstructs.AllocResourceUsage, error) {
astat := &cstructs.AllocResourceUsage{
Tasks: make(map[string]*cstructs.TaskResourceUsage),
}
var flat []*cstructs.TaskResourceUsage
if taskFilter != "" {
r.taskLock.RLock()
tr, ok := r.tasks[taskFilter]
r.taskLock.RUnlock()
if !ok {
return nil, fmt.Errorf("allocation %q has no task %q", r.alloc.ID, taskFilter)
}
l := tr.LatestResourceUsage()
if l != nil {
astat.Tasks[taskFilter] = l
flat = []*cstructs.TaskResourceUsage{l}
astat.Timestamp = l.Timestamp
}
} else {
// Get the task runners
runners := r.getTaskRunners()
for _, tr := range runners {
l := tr.LatestResourceUsage()
if l != nil {
astat.Tasks[tr.task.Name] = l
flat = append(flat, l)
if l.Timestamp > astat.Timestamp {
astat.Timestamp = l.Timestamp
}
}
}
}
astat.ResourceUsage = sumTaskResourceUsage(flat)
return astat, nil
}
// sumTaskResourceUsage takes a set of task resources and sums their resources
func sumTaskResourceUsage(usages []*cstructs.TaskResourceUsage) *cstructs.ResourceUsage {
summed := &cstructs.ResourceUsage{
MemoryStats: &cstructs.MemoryStats{},
CpuStats: &cstructs.CpuStats{},
}
for _, usage := range usages {
summed.Add(usage.ResourceUsage)
}
return summed
}
// shouldUpdate takes the AllocModifyIndex of an allocation sent from the server and
// checks if the current running allocation is behind and should be updated.
func (r *AllocRunner) shouldUpdate(serverIndex uint64) bool {
r.allocLock.Lock()
defer r.allocLock.Unlock()
return r.alloc.AllocModifyIndex < serverIndex
}
// Destroy is used to indicate that the allocation context should be destroyed
func (r *AllocRunner) Destroy() {
r.destroyLock.Lock()
defer r.destroyLock.Unlock()
if r.destroy {
return
}
r.destroy = true
close(r.destroyCh)
}
// WaitCh returns a channel to wait for termination
func (r *AllocRunner) WaitCh() <-chan struct{} {
return r.waitCh
}

View File

@ -1,399 +0,0 @@
package allocdir
import (
"fmt"
"io"
"io/ioutil"
"os"
"path/filepath"
"time"
"gopkg.in/tomb.v1"
"github.com/hashicorp/go-multierror"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/hpcloud/tail/watch"
)
var (
// The name of the directory that is shared across tasks in a task group.
SharedAllocName = "alloc"
// Name of the directory where logs of Tasks are written
LogDirName = "logs"
// The set of directories that exist inside eache shared alloc directory.
SharedAllocDirs = []string{LogDirName, "tmp", "data"}
// The name of the directory that exists inside each task directory
// regardless of driver.
TaskLocal = "local"
// TaskDirs is the set of directories created in each tasks directory.
TaskDirs = []string{"tmp"}
)
type AllocDir struct {
// AllocDir is the directory used for storing any state
// of this allocation. It will be purged on alloc destroy.
AllocDir string
// The shared directory is available to all tasks within the same task
// group.
SharedDir string
// TaskDirs is a mapping of task names to their non-shared directory.
TaskDirs map[string]string
}
// AllocFileInfo holds information about a file inside the AllocDir
type AllocFileInfo struct {
Name string
IsDir bool
Size int64
FileMode string
ModTime time.Time
}
// AllocDirFS exposes file operations on the alloc dir
type AllocDirFS interface {
List(path string) ([]*AllocFileInfo, error)
Stat(path string) (*AllocFileInfo, error)
ReadAt(path string, offset int64) (io.ReadCloser, error)
BlockUntilExists(path string, t *tomb.Tomb) chan error
ChangeEvents(path string, curOffset int64, t *tomb.Tomb) (*watch.FileChanges, error)
}
func NewAllocDir(allocDir string) *AllocDir {
d := &AllocDir{AllocDir: allocDir, TaskDirs: make(map[string]string)}
d.SharedDir = filepath.Join(d.AllocDir, SharedAllocName)
return d
}
// Tears down previously build directory structure.
func (d *AllocDir) Destroy() error {
// Unmount all mounted shared alloc dirs.
var mErr multierror.Error
if err := d.UnmountAll(); err != nil {
mErr.Errors = append(mErr.Errors, err)
}
if err := os.RemoveAll(d.AllocDir); err != nil {
mErr.Errors = append(mErr.Errors, err)
}
return mErr.ErrorOrNil()
}
func (d *AllocDir) UnmountAll() error {
var mErr multierror.Error
for _, dir := range d.TaskDirs {
// Check if the directory has the shared alloc mounted.
taskAlloc := filepath.Join(dir, SharedAllocName)
if d.pathExists(taskAlloc) {
if err := d.unmountSharedDir(taskAlloc); err != nil {
mErr.Errors = append(mErr.Errors,
fmt.Errorf("failed to unmount shared alloc dir %q: %v", taskAlloc, err))
} else if err := os.RemoveAll(taskAlloc); err != nil {
mErr.Errors = append(mErr.Errors,
fmt.Errorf("failed to delete shared alloc dir %q: %v", taskAlloc, err))
}
}
// Unmount dev/ and proc/ have been mounted.
d.unmountSpecialDirs(dir)
}
return mErr.ErrorOrNil()
}
// Given a list of a task build the correct alloc structure.
func (d *AllocDir) Build(tasks []*structs.Task) error {
// Make the alloc directory, owned by the nomad process.
if err := os.MkdirAll(d.AllocDir, 0755); err != nil {
return fmt.Errorf("Failed to make the alloc directory %v: %v", d.AllocDir, err)
}
// Make the shared directory and make it available to all user/groups.
if err := os.MkdirAll(d.SharedDir, 0777); err != nil {
return err
}
// Make the shared directory have non-root permissions.
if err := d.dropDirPermissions(d.SharedDir); err != nil {
return err
}
for _, dir := range SharedAllocDirs {
p := filepath.Join(d.SharedDir, dir)
if err := os.MkdirAll(p, 0777); err != nil {
return err
}
if err := d.dropDirPermissions(p); err != nil {
return err
}
}
// Make the task directories.
for _, t := range tasks {
taskDir := filepath.Join(d.AllocDir, t.Name)
if err := os.MkdirAll(taskDir, 0777); err != nil {
return err
}
// Make the task directory have non-root permissions.
if err := d.dropDirPermissions(taskDir); err != nil {
return err
}
// Create a local directory that each task can use.
local := filepath.Join(taskDir, TaskLocal)
if err := os.MkdirAll(local, 0777); err != nil {
return err
}
if err := d.dropDirPermissions(local); err != nil {
return err
}
d.TaskDirs[t.Name] = taskDir
// Create the directories that should be in every task.
for _, dir := range TaskDirs {
local := filepath.Join(taskDir, dir)
if err := os.MkdirAll(local, 0777); err != nil {
return err
}
if err := d.dropDirPermissions(local); err != nil {
return err
}
}
}
return nil
}
// Embed takes a mapping of absolute directory or file paths on the host to
// their intended, relative location within the task directory. Embed attempts
// hardlink and then defaults to copying. If the path exists on the host and
// can't be embedded an error is returned.
func (d *AllocDir) Embed(task string, entries map[string]string) error {
taskdir, ok := d.TaskDirs[task]
if !ok {
return fmt.Errorf("Task directory doesn't exist for task %v", task)
}
subdirs := make(map[string]string)
for source, dest := range entries {
// Check to see if directory exists on host.
s, err := os.Stat(source)
if os.IsNotExist(err) {
continue
}
// Embedding a single file
if !s.IsDir() {
destDir := filepath.Join(taskdir, filepath.Dir(dest))
if err := os.MkdirAll(destDir, s.Mode().Perm()); err != nil {
return fmt.Errorf("Couldn't create destination directory %v: %v", destDir, err)
}
// Copy the file.
taskEntry := filepath.Join(destDir, filepath.Base(dest))
if err := d.linkOrCopy(source, taskEntry, s.Mode().Perm()); err != nil {
return err
}
continue
}
// Create destination directory.
destDir := filepath.Join(taskdir, dest)
if err := os.MkdirAll(destDir, s.Mode().Perm()); err != nil {
return fmt.Errorf("Couldn't create destination directory %v: %v", destDir, err)
}
// Enumerate the files in source.
dirEntries, err := ioutil.ReadDir(source)
if err != nil {
return fmt.Errorf("Couldn't read directory %v: %v", source, err)
}
for _, entry := range dirEntries {
hostEntry := filepath.Join(source, entry.Name())
taskEntry := filepath.Join(destDir, filepath.Base(hostEntry))
if entry.IsDir() {
subdirs[hostEntry] = filepath.Join(dest, filepath.Base(hostEntry))
continue
}
// Check if entry exists. This can happen if restarting a failed
// task.
if _, err := os.Lstat(taskEntry); err == nil {
continue
}
if !entry.Mode().IsRegular() {
// If it is a symlink we can create it, otherwise we skip it.
if entry.Mode()&os.ModeSymlink == 0 {
continue
}
link, err := os.Readlink(hostEntry)
if err != nil {
return fmt.Errorf("Couldn't resolve symlink for %v: %v", source, err)
}
if err := os.Symlink(link, taskEntry); err != nil {
// Symlinking twice
if err.(*os.LinkError).Err.Error() != "file exists" {
return fmt.Errorf("Couldn't create symlink: %v", err)
}
}
continue
}
if err := d.linkOrCopy(hostEntry, taskEntry, entry.Mode().Perm()); err != nil {
return err
}
}
}
// Recurse on self to copy subdirectories.
if len(subdirs) != 0 {
return d.Embed(task, subdirs)
}
return nil
}
// MountSharedDir mounts the shared directory into the specified task's
// directory. Mount is documented at an OS level in their respective
// implementation files.
func (d *AllocDir) MountSharedDir(task string) error {
taskDir, ok := d.TaskDirs[task]
if !ok {
return fmt.Errorf("No task directory exists for %v", task)
}
taskLoc := filepath.Join(taskDir, SharedAllocName)
if err := d.mountSharedDir(taskLoc); err != nil {
return fmt.Errorf("Failed to mount shared directory for task %v: %v", task, err)
}
return nil
}
// LogDir returns the log dir in the current allocation directory
func (d *AllocDir) LogDir() string {
return filepath.Join(d.AllocDir, SharedAllocName, LogDirName)
}
// List returns the list of files at a path relative to the alloc dir
func (d *AllocDir) List(path string) ([]*AllocFileInfo, error) {
p := filepath.Join(d.AllocDir, path)
finfos, err := ioutil.ReadDir(p)
if err != nil {
return []*AllocFileInfo{}, err
}
files := make([]*AllocFileInfo, len(finfos))
for idx, info := range finfos {
files[idx] = &AllocFileInfo{
Name: info.Name(),
IsDir: info.IsDir(),
Size: info.Size(),
FileMode: info.Mode().String(),
ModTime: info.ModTime(),
}
}
return files, err
}
// Stat returns information about the file at a path relative to the alloc dir
func (d *AllocDir) Stat(path string) (*AllocFileInfo, error) {
p := filepath.Join(d.AllocDir, path)
info, err := os.Stat(p)
if err != nil {
return nil, err
}
return &AllocFileInfo{
Size: info.Size(),
Name: info.Name(),
IsDir: info.IsDir(),
FileMode: info.Mode().String(),
ModTime: info.ModTime(),
}, nil
}
// ReadAt returns a reader for a file at the path relative to the alloc dir
func (d *AllocDir) ReadAt(path string, offset int64) (io.ReadCloser, error) {
p := filepath.Join(d.AllocDir, path)
f, err := os.Open(p)
if err != nil {
return nil, err
}
if _, err := f.Seek(offset, 0); err != nil {
return nil, fmt.Errorf("can't seek to offset %q: %v", offset, err)
}
return f, nil
}
// BlockUntilExists blocks until the passed file relative the allocation
// directory exists. The block can be cancelled with the passed tomb.
func (d *AllocDir) BlockUntilExists(path string, t *tomb.Tomb) chan error {
// Get the path relative to the alloc directory
p := filepath.Join(d.AllocDir, path)
watcher := getFileWatcher(p)
returnCh := make(chan error, 1)
go func() {
returnCh <- watcher.BlockUntilExists(t)
close(returnCh)
}()
return returnCh
}
// ChangeEvents watches for changes to the passed path relative to the
// allocation directory. The offset should be the last read offset. The tomb is
// used to clean up the watch.
func (d *AllocDir) ChangeEvents(path string, curOffset int64, t *tomb.Tomb) (*watch.FileChanges, error) {
// Get the path relative to the alloc directory
p := filepath.Join(d.AllocDir, path)
watcher := getFileWatcher(p)
return watcher.ChangeEvents(t, curOffset)
}
// getFileWatcher returns a FileWatcher for the given path.
func getFileWatcher(path string) watch.FileWatcher {
return watch.NewPollingFileWatcher(path)
}
func fileCopy(src, dst string, perm os.FileMode) error {
// Do a simple copy.
srcFile, err := os.Open(src)
if err != nil {
return fmt.Errorf("Couldn't open src file %v: %v", src, err)
}
dstFile, err := os.OpenFile(dst, os.O_WRONLY|os.O_CREATE, perm)
if err != nil {
return fmt.Errorf("Couldn't create destination file %v: %v", dst, err)
}
if _, err := io.Copy(dstFile, srcFile); err != nil {
return fmt.Errorf("Couldn't copy %v to %v: %v", src, dst, err)
}
return nil
}
// pathExists is a helper function to check if the path exists.
func (d *AllocDir) pathExists(path string) bool {
if _, err := os.Stat(path); err != nil {
if os.IsNotExist(err) {
return false
}
}
return true
}

View File

@ -1,26 +0,0 @@
package allocdir
import (
"syscall"
)
// Hardlinks the shared directory. As a side-effect the shared directory and
// task directory must be on the same filesystem.
func (d *AllocDir) mountSharedDir(dir string) error {
return syscall.Link(d.SharedDir, dir)
}
func (d *AllocDir) unmountSharedDir(dir string) error {
return syscall.Unlink(dir)
}
// MountSpecialDirs mounts the dev and proc file system on the chroot of the
// task. It's a no-op on darwin.
func (d *AllocDir) MountSpecialDirs(taskDir string) error {
return nil
}
// unmountSpecialDirs unmounts the dev and proc file system from the chroot
func (d *AllocDir) unmountSpecialDirs(taskDir string) error {
return nil
}

View File

@ -1,26 +0,0 @@
package allocdir
import (
"syscall"
)
// Hardlinks the shared directory. As a side-effect the shared directory and
// task directory must be on the same filesystem.
func (d *AllocDir) mountSharedDir(dir string) error {
return syscall.Link(d.SharedDir, dir)
}
func (d *AllocDir) unmountSharedDir(dir string) error {
return syscall.Unlink(dir)
}
// MountSpecialDirs mounts the dev and proc file system on the chroot of the
// task. It's a no-op on FreeBSD right now.
func (d *AllocDir) MountSpecialDirs(taskDir string) error {
return nil
}
// unmountSpecialDirs unmounts the dev and proc file system from the chroot
func (d *AllocDir) unmountSpecialDirs(taskDir string) error {
return nil
}

View File

@ -1,79 +0,0 @@
package allocdir
import (
"fmt"
"os"
"path/filepath"
"syscall"
"github.com/hashicorp/go-multierror"
)
// Bind mounts the shared directory into the task directory. Must be root to
// run.
func (d *AllocDir) mountSharedDir(taskDir string) error {
if err := os.MkdirAll(taskDir, 0777); err != nil {
return err
}
return syscall.Mount(d.SharedDir, taskDir, "", syscall.MS_BIND, "")
}
func (d *AllocDir) unmountSharedDir(dir string) error {
return syscall.Unmount(dir, 0)
}
// MountSpecialDirs mounts the dev and proc file system from the host to the
// chroot
func (d *AllocDir) MountSpecialDirs(taskDir string) error {
// Mount dev
dev := filepath.Join(taskDir, "dev")
if !d.pathExists(dev) {
if err := os.MkdirAll(dev, 0777); err != nil {
return fmt.Errorf("Mkdir(%v) failed: %v", dev, err)
}
if err := syscall.Mount("none", dev, "devtmpfs", syscall.MS_RDONLY, ""); err != nil {
return fmt.Errorf("Couldn't mount /dev to %v: %v", dev, err)
}
}
// Mount proc
proc := filepath.Join(taskDir, "proc")
if !d.pathExists(proc) {
if err := os.MkdirAll(proc, 0777); err != nil {
return fmt.Errorf("Mkdir(%v) failed: %v", proc, err)
}
if err := syscall.Mount("none", proc, "proc", syscall.MS_RDONLY, ""); err != nil {
return fmt.Errorf("Couldn't mount /proc to %v: %v", proc, err)
}
}
return nil
}
// unmountSpecialDirs unmounts the dev and proc file system from the chroot
func (d *AllocDir) unmountSpecialDirs(taskDir string) error {
errs := new(multierror.Error)
dev := filepath.Join(taskDir, "dev")
if d.pathExists(dev) {
if err := syscall.Unmount(dev, 0); err != nil {
errs = multierror.Append(errs, fmt.Errorf("Failed to unmount dev (%v): %v", dev, err))
} else if err := os.RemoveAll(dev); err != nil {
errs = multierror.Append(errs, fmt.Errorf("Failed to delete dev directory (%v): %v", dev, err))
}
}
// Unmount proc.
proc := filepath.Join(taskDir, "proc")
if d.pathExists(proc) {
if err := syscall.Unmount(proc, 0); err != nil {
errs = multierror.Append(errs, fmt.Errorf("Failed to unmount proc (%v): %v", proc, err))
} else if err := os.RemoveAll(proc); err != nil {
errs = multierror.Append(errs, fmt.Errorf("Failed to delete proc directory (%v): %v", dev, err))
}
}
return errs.ErrorOrNil()
}

View File

@ -1,81 +0,0 @@
// +build darwin dragonfly freebsd linux netbsd openbsd solaris
// Functions shared between linux/darwin.
package allocdir
import (
"fmt"
"os"
"os/user"
"path/filepath"
"strconv"
"golang.org/x/sys/unix"
)
var (
//Path inside container for mounted directory shared across tasks in a task group.
SharedAllocContainerPath = filepath.Join("/", SharedAllocName)
//Path inside container for mounted directory for local storage.
TaskLocalContainerPath = filepath.Join("/", TaskLocal)
)
func (d *AllocDir) linkOrCopy(src, dst string, perm os.FileMode) error {
// Attempt to hardlink.
if err := os.Link(src, dst); err == nil {
return nil
}
return fileCopy(src, dst, perm)
}
func (d *AllocDir) dropDirPermissions(path string) error {
// Can't do anything if not root.
if unix.Geteuid() != 0 {
return nil
}
u, err := user.Lookup("nobody")
if err != nil {
return err
}
uid, err := getUid(u)
if err != nil {
return err
}
gid, err := getGid(u)
if err != nil {
return err
}
if err := os.Chown(path, uid, gid); err != nil {
return fmt.Errorf("Couldn't change owner/group of %v to (uid: %v, gid: %v): %v", path, uid, gid, err)
}
if err := os.Chmod(path, 0777); err != nil {
return fmt.Errorf("Chmod(%v) failed: %v", path, err)
}
return nil
}
func getUid(u *user.User) (int, error) {
uid, err := strconv.Atoi(u.Uid)
if err != nil {
return 0, fmt.Errorf("Unable to convert Uid to an int: %v", err)
}
return uid, nil
}
func getGid(u *user.User) (int, error) {
gid, err := strconv.Atoi(u.Gid)
if err != nil {
return 0, fmt.Errorf("Unable to convert Gid to an int: %v", err)
}
return gid, nil
}

View File

@ -1,45 +0,0 @@
package allocdir
import (
"errors"
"os"
"path/filepath"
)
var (
//Path inside container for mounted directory that is shared across tasks in a task group.
SharedAllocContainerPath = filepath.Join("c:\\", SharedAllocName)
//Path inside container for mounted directory for local storage.
TaskLocalContainerPath = filepath.Join("c:\\", TaskLocal)
)
func (d *AllocDir) linkOrCopy(src, dst string, perm os.FileMode) error {
return fileCopy(src, dst, perm)
}
// The windows version does nothing currently.
func (d *AllocDir) mountSharedDir(dir string) error {
return errors.New("Mount on Windows not supported.")
}
// The windows version does nothing currently.
func (d *AllocDir) dropDirPermissions(path string) error {
return nil
}
// The windows version does nothing currently.
func (d *AllocDir) unmountSharedDir(dir string) error {
return nil
}
// MountSpecialDirs mounts the dev and proc file system on the chroot of the
// task. It's a no-op on windows.
func (d *AllocDir) MountSpecialDirs(taskDir string) error {
return nil
}
// unmountSpecialDirs unmounts the dev and proc file system from the chroot
func (d *AllocDir) unmountSpecialDirs(taskDir string) error {
return nil
}

File diff suppressed because it is too large Load Diff

View File

@ -1,221 +0,0 @@
package config
import (
"fmt"
"io"
"os"
"strconv"
"strings"
"time"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/hashicorp/nomad/nomad/structs/config"
)
var (
// DefaultEnvBlacklist is the default set of environment variables that are
// filtered when passing the environment variables of the host to a task.
DefaultEnvBlacklist = strings.Join([]string{
"CONSUL_TOKEN",
"VAULT_TOKEN",
"ATLAS_TOKEN",
"AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY", "AWS_SESSION_TOKEN",
"GOOGLE_APPLICATION_CREDENTIALS",
}, ",")
// DefaulUserBlacklist is the default set of users that tasks are not
// allowed to run as when using a driver in "user.checked_drivers"
DefaultUserBlacklist = strings.Join([]string{
"root",
"Administrator",
}, ",")
// DefaultUserCheckedDrivers is the set of drivers we apply the user
// blacklist onto. For virtualized drivers it often doesn't make sense to
// make this stipulation so by default they are ignored.
DefaultUserCheckedDrivers = strings.Join([]string{
"exec",
"qemu",
"java",
}, ",")
)
// RPCHandler can be provided to the Client if there is a local server
// to avoid going over the network. If not provided, the Client will
// maintain a connection pool to the servers
type RPCHandler interface {
RPC(method string, args interface{}, reply interface{}) error
}
// Config is used to parameterize and configure the behavior of the client
type Config struct {
// DevMode controls if we are in a development mode which
// avoids persistent storage.
DevMode bool
// StateDir is where we store our state
StateDir string
// AllocDir is where we store data for allocations
AllocDir string
// LogOutput is the destination for logs
LogOutput io.Writer
// Region is the clients region
Region string
// Network interface to be used in network fingerprinting
NetworkInterface string
// Network speed is the default speed of network interfaces if they can not
// be determined dynamically.
NetworkSpeed int
// MaxKillTimeout allows capping the user-specifiable KillTimeout. If the
// task's KillTimeout is greater than the MaxKillTimeout, MaxKillTimeout is
// used.
MaxKillTimeout time.Duration
// Servers is a list of known server addresses. These are as "host:port"
Servers []string
// RPCHandler can be provided to avoid network traffic if the
// server is running locally.
RPCHandler RPCHandler
// Node provides the base node
Node *structs.Node
// ClientMaxPort is the upper range of the ports that the client uses for
// communicating with plugin subsystems over loopback
ClientMaxPort uint
// ClientMinPort is the lower range of the ports that the client uses for
// communicating with plugin subsystems over loopback
ClientMinPort uint
// GloballyReservedPorts are ports that are reserved across all network
// devices and IPs.
GloballyReservedPorts []int
// A mapping of directories on the host OS to attempt to embed inside each
// task's chroot.
ChrootEnv map[string]string
// Options provides arbitrary key-value configuration for nomad internals,
// like fingerprinters and drivers. The format is:
//
// namespace.option = value
Options map[string]string
// Version is the version of the Nomad client
Version string
// Revision is the commit number of the Nomad client
Revision string
// ConsulConfig is this Agent's Consul configuration
ConsulConfig *config.ConsulConfig
// StatsCollectionInterval is the interval at which the Nomad client
// collects resource usage stats
StatsCollectionInterval time.Duration
// PublishNodeMetrics determines whether nomad is going to publish node
// level metrics to remote Telemetry sinks
PublishNodeMetrics bool
// PublishAllocationMetrics determines whether nomad is going to publish
// allocation metrics to remote Telemetry sinks
PublishAllocationMetrics bool
}
func (c *Config) Copy() *Config {
nc := new(Config)
*nc = *c
nc.Node = nc.Node.Copy()
nc.Servers = structs.CopySliceString(nc.Servers)
nc.Options = structs.CopyMapStringString(nc.Options)
return nc
}
// DefaultConfig returns the default configuration
func DefaultConfig() *Config {
return &Config{
ConsulConfig: config.DefaultConsulConfig(),
LogOutput: os.Stderr,
Region: "global",
StatsCollectionInterval: 1 * time.Second,
}
}
// Read returns the specified configuration value or "".
func (c *Config) Read(id string) string {
return c.Options[id]
}
// ReadDefault returns the specified configuration value, or the specified
// default value if none is set.
func (c *Config) ReadDefault(id string, defaultValue string) string {
val, ok := c.Options[id]
if !ok {
return defaultValue
}
return val
}
// ReadBool parses the specified option as a boolean.
func (c *Config) ReadBool(id string) (bool, error) {
val, ok := c.Options[id]
if !ok {
return false, fmt.Errorf("Specified config is missing from options")
}
bval, err := strconv.ParseBool(val)
if err != nil {
return false, fmt.Errorf("Failed to parse %s as bool: %s", val, err)
}
return bval, nil
}
// ReadBoolDefault tries to parse the specified option as a boolean. If there is
// an error in parsing, the default option is returned.
func (c *Config) ReadBoolDefault(id string, defaultValue bool) bool {
val, err := c.ReadBool(id)
if err != nil {
return defaultValue
}
return val
}
// ReadStringListToMap tries to parse the specified option as a comma separated list.
// If there is an error in parsing, an empty list is returned.
func (c *Config) ReadStringListToMap(key string) map[string]struct{} {
s := strings.TrimSpace(c.Read(key))
list := make(map[string]struct{})
if s != "" {
for _, e := range strings.Split(s, ",") {
trimmed := strings.TrimSpace(e)
list[trimmed] = struct{}{}
}
}
return list
}
// ReadStringListToMap tries to parse the specified option as a comma separated list.
// If there is an error in parsing, an empty list is returned.
func (c *Config) ReadStringListToMapDefault(key, defaultValue string) map[string]struct{} {
val, ok := c.Options[key]
if !ok {
val = defaultValue
}
list := make(map[string]struct{})
if val != "" {
for _, e := range strings.Split(val, ",") {
trimmed := strings.TrimSpace(e)
list[trimmed] = struct{}{}
}
}
return list
}

File diff suppressed because it is too large Load Diff

View File

@ -1,14 +0,0 @@
//+build !windows
package driver
import docker "github.com/fsouza/go-dockerclient"
const (
// Setting default network mode for non-windows OS as bridge
defaultNetworkMode = "bridge"
)
func getPortBinding(ip string, port string) []docker.PortBinding {
return []docker.PortBinding{docker.PortBinding{HostIP: ip, HostPort: port}}
}

View File

@ -1,13 +0,0 @@
package driver
import docker "github.com/fsouza/go-dockerclient"
const (
// Default network mode for windows containers is nat
defaultNetworkMode = "nat"
)
//Currently Windows containers don't support host ip in port binding.
func getPortBinding(ip string, port string) []docker.PortBinding {
return []docker.PortBinding{docker.PortBinding{HostIP: "", HostPort: port}}
}

View File

@ -1,192 +0,0 @@
package driver
import (
"fmt"
"log"
"path/filepath"
"sync"
"github.com/hashicorp/nomad/client/allocdir"
"github.com/hashicorp/nomad/client/config"
"github.com/hashicorp/nomad/client/driver/env"
"github.com/hashicorp/nomad/client/fingerprint"
"github.com/hashicorp/nomad/nomad/structs"
dstructs "github.com/hashicorp/nomad/client/driver/structs"
cstructs "github.com/hashicorp/nomad/client/structs"
)
// BuiltinDrivers contains the built in registered drivers
// which are available for allocation handling
var BuiltinDrivers = map[string]Factory{
"docker": NewDockerDriver,
"exec": NewExecDriver,
"raw_exec": NewRawExecDriver,
"java": NewJavaDriver,
"qemu": NewQemuDriver,
"rkt": NewRktDriver,
}
// NewDriver is used to instantiate and return a new driver
// given the name and a logger
func NewDriver(name string, ctx *DriverContext) (Driver, error) {
// Lookup the factory function
factory, ok := BuiltinDrivers[name]
if !ok {
return nil, fmt.Errorf("unknown driver '%s'", name)
}
// Instantiate the driver
f := factory(ctx)
return f, nil
}
// Factory is used to instantiate a new Driver
type Factory func(*DriverContext) Driver
// Driver is used for execution of tasks. This allows Nomad
// to support many pluggable implementations of task drivers.
// Examples could include LXC, Docker, Qemu, etc.
type Driver interface {
// Drivers must support the fingerprint interface for detection
fingerprint.Fingerprint
// Start is used to being task execution
Start(ctx *ExecContext, task *structs.Task) (DriverHandle, error)
// Open is used to re-open a handle to a task
Open(ctx *ExecContext, handleID string) (DriverHandle, error)
// Drivers must validate their configuration
Validate(map[string]interface{}) error
}
// DriverContext is a means to inject dependencies such as loggers, configs, and
// node attributes into a Driver without having to change the Driver interface
// each time we do it. Used in conjection with Factory, above.
type DriverContext struct {
taskName string
config *config.Config
logger *log.Logger
node *structs.Node
taskEnv *env.TaskEnvironment
}
// NewEmptyDriverContext returns a DriverContext with all fields set to their
// zero value.
func NewEmptyDriverContext() *DriverContext {
return &DriverContext{
taskName: "",
config: nil,
node: nil,
logger: nil,
taskEnv: nil,
}
}
// NewDriverContext initializes a new DriverContext with the specified fields.
// This enables other packages to create DriverContexts but keeps the fields
// private to the driver. If we want to change this later we can gorename all of
// the fields in DriverContext.
func NewDriverContext(taskName string, config *config.Config, node *structs.Node,
logger *log.Logger, taskEnv *env.TaskEnvironment) *DriverContext {
return &DriverContext{
taskName: taskName,
config: config,
node: node,
logger: logger,
taskEnv: taskEnv,
}
}
// DriverHandle is an opaque handle into a driver used for task
// manipulation
type DriverHandle interface {
// Returns an opaque handle that can be used to re-open the handle
ID() string
// WaitCh is used to return a channel used wait for task completion
WaitCh() chan *dstructs.WaitResult
// Update is used to update the task if possible and update task related
// configurations.
Update(task *structs.Task) error
// Kill is used to stop the task
Kill() error
// Stats returns aggregated stats of the driver
Stats() (*cstructs.TaskResourceUsage, error)
}
// ExecContext is shared between drivers within an allocation
type ExecContext struct {
sync.Mutex
// AllocDir contains information about the alloc directory structure.
AllocDir *allocdir.AllocDir
// Alloc ID
AllocID string
}
// NewExecContext is used to create a new execution context
func NewExecContext(alloc *allocdir.AllocDir, allocID string) *ExecContext {
return &ExecContext{AllocDir: alloc, AllocID: allocID}
}
// GetTaskEnv converts the alloc dir, the node, task and alloc into a
// TaskEnvironment.
func GetTaskEnv(allocDir *allocdir.AllocDir, node *structs.Node,
task *structs.Task, alloc *structs.Allocation) (*env.TaskEnvironment, error) {
tg := alloc.Job.LookupTaskGroup(alloc.TaskGroup)
env := env.NewTaskEnvironment(node).
SetTaskMeta(task.Meta).
SetTaskGroupMeta(tg.Meta).
SetJobMeta(alloc.Job.Meta).
SetEnvvars(task.Env).
SetTaskName(task.Name)
if allocDir != nil {
env.SetAllocDir(allocDir.SharedDir)
taskdir, ok := allocDir.TaskDirs[task.Name]
if !ok {
return nil, fmt.Errorf("failed to get task directory for task %q", task.Name)
}
env.SetTaskLocalDir(filepath.Join(taskdir, allocdir.TaskLocal))
}
if task.Resources != nil {
env.SetMemLimit(task.Resources.MemoryMB).
SetCpuLimit(task.Resources.CPU).
SetNetworks(task.Resources.Networks)
}
if alloc != nil {
env.SetAlloc(alloc)
}
return env.Build(), nil
}
func mapMergeStrInt(maps ...map[string]int) map[string]int {
out := map[string]int{}
for _, in := range maps {
for key, val := range in {
out[key] = val
}
}
return out
}
func mapMergeStrStr(maps ...map[string]string) map[string]string {
out := map[string]string{}
for _, in := range maps {
for key, val := range in {
out[key] = val
}
}
return out
}

View File

@ -1,430 +0,0 @@
package env
import (
"fmt"
"os"
"strconv"
"strings"
hargs "github.com/hashicorp/nomad/helper/args"
"github.com/hashicorp/nomad/nomad/structs"
)
// A set of environment variables that are exported by each driver.
const (
// AllocDir is the environment variable with the path to the alloc directory
// that is shared across tasks within a task group.
AllocDir = "NOMAD_ALLOC_DIR"
// TaskLocalDir is the environment variable with the path to the tasks local
// directory where it can store data that is persisted to the alloc is
// removed.
TaskLocalDir = "NOMAD_TASK_DIR"
// MemLimit is the environment variable with the tasks memory limit in MBs.
MemLimit = "NOMAD_MEMORY_LIMIT"
// CpuLimit is the environment variable with the tasks CPU limit in MHz.
CpuLimit = "NOMAD_CPU_LIMIT"
// AllocID is the environment variable for passing the allocation ID.
AllocID = "NOMAD_ALLOC_ID"
// AllocName is the environment variable for passing the allocation name.
AllocName = "NOMAD_ALLOC_NAME"
// TaskName is the environment variable for passing the task name.
TaskName = "NOMAD_TASK_NAME"
// AllocIndex is the environment variable for passing the allocation index.
AllocIndex = "NOMAD_ALLOC_INDEX"
// AddrPrefix is the prefix for passing both dynamic and static port
// allocations to tasks.
// E.g$NOMAD_ADDR_http=127.0.0.1:80
AddrPrefix = "NOMAD_ADDR_"
// IpPrefix is the prefix for passing the IP of a port allocation to a task.
IpPrefix = "NOMAD_IP_"
// PortPrefix is the prefix for passing the port allocation to a task.
PortPrefix = "NOMAD_PORT_"
// HostPortPrefix is the prefix for passing the host port when a portmap is
// specified.
HostPortPrefix = "NOMAD_HOST_PORT_"
// MetaPrefix is the prefix for passing task meta data.
MetaPrefix = "NOMAD_META_"
)
// The node values that can be interpreted.
const (
nodeIdKey = "node.unique.id"
nodeDcKey = "node.datacenter"
nodeNameKey = "node.unique.name"
nodeClassKey = "node.class"
// Prefixes used for lookups.
nodeAttributePrefix = "attr."
nodeMetaPrefix = "meta."
)
// TaskEnvironment is used to expose information to a task via environment
// variables and provide interpolation of Nomad variables.
type TaskEnvironment struct {
Env map[string]string
TaskMeta map[string]string
TaskGroupMeta map[string]string
JobMeta map[string]string
AllocDir string
TaskDir string
CpuLimit int
MemLimit int
TaskName string
AllocIndex int
AllocId string
AllocName string
Node *structs.Node
Networks []*structs.NetworkResource
PortMap map[string]int
// taskEnv is the variables that will be set in the tasks environment
TaskEnv map[string]string
// nodeValues is the values that are allowed for interprolation from the
// node.
NodeValues map[string]string
}
func NewTaskEnvironment(node *structs.Node) *TaskEnvironment {
return &TaskEnvironment{Node: node, AllocIndex: -1}
}
// ParseAndReplace takes the user supplied args replaces any instance of an
// environment variable or nomad variable in the args with the actual value.
func (t *TaskEnvironment) ParseAndReplace(args []string) []string {
replaced := make([]string, len(args))
for i, arg := range args {
replaced[i] = hargs.ReplaceEnv(arg, t.TaskEnv, t.NodeValues)
}
return replaced
}
// ReplaceEnv takes an arg and replaces all occurrences of environment variables
// and nomad variables. If the variable is found in the passed map it is
// replaced, otherwise the original string is returned.
func (t *TaskEnvironment) ReplaceEnv(arg string) string {
return hargs.ReplaceEnv(arg, t.TaskEnv, t.NodeValues)
}
// Build must be called after all the tasks environment values have been set.
func (t *TaskEnvironment) Build() *TaskEnvironment {
t.NodeValues = make(map[string]string)
t.TaskEnv = make(map[string]string)
// Build the meta with the following precedence: task, task group, job.
for _, meta := range []map[string]string{t.JobMeta, t.TaskGroupMeta, t.TaskMeta} {
for k, v := range meta {
t.TaskEnv[fmt.Sprintf("%s%s", MetaPrefix, strings.ToUpper(k))] = v
}
}
// Build the ports
for _, network := range t.Networks {
for label, value := range network.MapLabelToValues(nil) {
t.TaskEnv[fmt.Sprintf("%s%s", IpPrefix, label)] = network.IP
t.TaskEnv[fmt.Sprintf("%s%s", HostPortPrefix, label)] = strconv.Itoa(value)
if forwardedPort, ok := t.PortMap[label]; ok {
value = forwardedPort
}
t.TaskEnv[fmt.Sprintf("%s%s", PortPrefix, label)] = fmt.Sprintf("%d", value)
IPPort := fmt.Sprintf("%s:%d", network.IP, value)
t.TaskEnv[fmt.Sprintf("%s%s", AddrPrefix, label)] = IPPort
}
}
// Build the directories
if t.AllocDir != "" {
t.TaskEnv[AllocDir] = t.AllocDir
}
if t.TaskDir != "" {
t.TaskEnv[TaskLocalDir] = t.TaskDir
}
// Build the resource limits
if t.MemLimit != 0 {
t.TaskEnv[MemLimit] = strconv.Itoa(t.MemLimit)
}
if t.CpuLimit != 0 {
t.TaskEnv[CpuLimit] = strconv.Itoa(t.CpuLimit)
}
// Build the tasks ids
if t.AllocId != "" {
t.TaskEnv[AllocID] = t.AllocId
}
if t.AllocName != "" {
t.TaskEnv[AllocName] = t.AllocName
}
if t.AllocIndex != -1 {
t.TaskEnv[AllocIndex] = strconv.Itoa(t.AllocIndex)
}
if t.TaskName != "" {
t.TaskEnv[TaskName] = t.TaskName
}
// Build the node
if t.Node != nil {
// Set up the node values.
t.NodeValues[nodeIdKey] = t.Node.ID
t.NodeValues[nodeDcKey] = t.Node.Datacenter
t.NodeValues[nodeNameKey] = t.Node.Name
t.NodeValues[nodeClassKey] = t.Node.NodeClass
// Set up the attributes.
for k, v := range t.Node.Attributes {
t.NodeValues[fmt.Sprintf("%s%s", nodeAttributePrefix, k)] = v
}
// Set up the meta.
for k, v := range t.Node.Meta {
t.NodeValues[fmt.Sprintf("%s%s", nodeMetaPrefix, k)] = v
}
}
// Interpret the environment variables
interpreted := make(map[string]string, len(t.Env))
for k, v := range t.Env {
interpreted[k] = hargs.ReplaceEnv(v, t.NodeValues, t.TaskEnv)
}
for k, v := range interpreted {
t.TaskEnv[k] = v
}
return t
}
// EnvList returns a list of strings with NAME=value pairs.
func (t *TaskEnvironment) EnvList() []string {
env := []string{}
for k, v := range t.TaskEnv {
env = append(env, fmt.Sprintf("%s=%s", k, v))
}
return env
}
// EnvMap returns a copy of the tasks environment variables.
func (t *TaskEnvironment) EnvMap() map[string]string {
m := make(map[string]string, len(t.TaskEnv))
for k, v := range t.TaskEnv {
m[k] = v
}
return m
}
// Builder methods to build the TaskEnvironment
func (t *TaskEnvironment) SetAllocDir(dir string) *TaskEnvironment {
t.AllocDir = dir
return t
}
func (t *TaskEnvironment) ClearAllocDir() *TaskEnvironment {
t.AllocDir = ""
return t
}
func (t *TaskEnvironment) SetTaskLocalDir(dir string) *TaskEnvironment {
t.TaskDir = dir
return t
}
func (t *TaskEnvironment) ClearTaskLocalDir() *TaskEnvironment {
t.TaskDir = ""
return t
}
func (t *TaskEnvironment) SetMemLimit(limit int) *TaskEnvironment {
t.MemLimit = limit
return t
}
func (t *TaskEnvironment) ClearMemLimit() *TaskEnvironment {
t.MemLimit = 0
return t
}
func (t *TaskEnvironment) SetCpuLimit(limit int) *TaskEnvironment {
t.CpuLimit = limit
return t
}
func (t *TaskEnvironment) ClearCpuLimit() *TaskEnvironment {
t.CpuLimit = 0
return t
}
func (t *TaskEnvironment) SetNetworks(networks []*structs.NetworkResource) *TaskEnvironment {
t.Networks = networks
return t
}
func (t *TaskEnvironment) clearNetworks() *TaskEnvironment {
t.Networks = nil
return t
}
func (t *TaskEnvironment) SetPortMap(portMap map[string]int) *TaskEnvironment {
t.PortMap = portMap
return t
}
func (t *TaskEnvironment) clearPortMap() *TaskEnvironment {
t.PortMap = nil
return t
}
// Takes a map of meta values to be passed to the task. The keys are capatilized
// when the environent variable is set.
func (t *TaskEnvironment) SetTaskMeta(m map[string]string) *TaskEnvironment {
t.TaskMeta = m
return t
}
func (t *TaskEnvironment) ClearTaskMeta() *TaskEnvironment {
t.TaskMeta = nil
return t
}
func (t *TaskEnvironment) SetTaskGroupMeta(m map[string]string) *TaskEnvironment {
t.TaskGroupMeta = m
return t
}
func (t *TaskEnvironment) ClearTaskGroupMeta() *TaskEnvironment {
t.TaskGroupMeta = nil
return t
}
func (t *TaskEnvironment) SetJobMeta(m map[string]string) *TaskEnvironment {
t.JobMeta = m
return t
}
func (t *TaskEnvironment) ClearJobMeta() *TaskEnvironment {
t.JobMeta = nil
return t
}
func (t *TaskEnvironment) SetEnvvars(m map[string]string) *TaskEnvironment {
t.Env = m
return t
}
// Appends the given environment variables.
func (t *TaskEnvironment) AppendEnvvars(m map[string]string) *TaskEnvironment {
if t.Env == nil {
t.Env = make(map[string]string, len(m))
}
for k, v := range m {
t.Env[k] = v
}
return t
}
// AppendHostEnvvars adds the host environment variables to the tasks. The
// filter parameter can be use to filter host environment from entering the
// tasks.
func (t *TaskEnvironment) AppendHostEnvvars(filter []string) *TaskEnvironment {
hostEnv := os.Environ()
if t.Env == nil {
t.Env = make(map[string]string, len(hostEnv))
}
// Index the filtered environment variables.
index := make(map[string]struct{}, len(filter))
for _, f := range filter {
index[f] = struct{}{}
}
for _, e := range hostEnv {
parts := strings.SplitN(e, "=", 2)
key, value := parts[0], parts[1]
// Skip filtered environment variables
if _, filtered := index[key]; filtered {
continue
}
// Don't override the tasks environment variables.
if _, existing := t.Env[key]; !existing {
t.Env[key] = value
}
}
return t
}
func (t *TaskEnvironment) ClearEnvvars() *TaskEnvironment {
t.Env = nil
return t
}
// Helper method for setting all fields from an allocation.
func (t *TaskEnvironment) SetAlloc(alloc *structs.Allocation) *TaskEnvironment {
t.AllocId = alloc.ID
t.AllocName = alloc.Name
t.AllocIndex = alloc.Index()
return t
}
// Helper method for clearing all fields from an allocation.
func (t *TaskEnvironment) ClearAlloc(alloc *structs.Allocation) *TaskEnvironment {
return t.ClearAllocId().ClearAllocName().ClearAllocIndex()
}
func (t *TaskEnvironment) SetAllocIndex(index int) *TaskEnvironment {
t.AllocIndex = index
return t
}
func (t *TaskEnvironment) ClearAllocIndex() *TaskEnvironment {
t.AllocIndex = -1
return t
}
func (t *TaskEnvironment) SetAllocId(id string) *TaskEnvironment {
t.AllocId = id
return t
}
func (t *TaskEnvironment) ClearAllocId() *TaskEnvironment {
t.AllocId = ""
return t
}
func (t *TaskEnvironment) SetAllocName(name string) *TaskEnvironment {
t.AllocName = name
return t
}
func (t *TaskEnvironment) ClearAllocName() *TaskEnvironment {
t.AllocName = ""
return t
}
func (t *TaskEnvironment) SetTaskName(name string) *TaskEnvironment {
t.TaskName = name
return t
}
func (t *TaskEnvironment) ClearTaskName() *TaskEnvironment {
t.TaskName = ""
return t
}

View File

@ -1,319 +0,0 @@
package driver
import (
"encoding/json"
"fmt"
"log"
"os/exec"
"path/filepath"
"strings"
"time"
"github.com/hashicorp/go-multierror"
"github.com/hashicorp/go-plugin"
"github.com/hashicorp/nomad/client/allocdir"
"github.com/hashicorp/nomad/client/config"
"github.com/hashicorp/nomad/client/driver/executor"
dstructs "github.com/hashicorp/nomad/client/driver/structs"
cstructs "github.com/hashicorp/nomad/client/structs"
"github.com/hashicorp/nomad/helper/discover"
"github.com/hashicorp/nomad/helper/fields"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/mitchellh/mapstructure"
)
const (
// The key populated in Node Attributes to indicate the presence of the Exec
// driver
execDriverAttr = "driver.exec"
)
// ExecDriver fork/execs tasks using as many of the underlying OS's isolation
// features.
type ExecDriver struct {
DriverContext
}
type ExecDriverConfig struct {
Command string `mapstructure:"command"`
Args []string `mapstructure:"args"`
}
// execHandle is returned from Start/Open as a handle to the PID
type execHandle struct {
pluginClient *plugin.Client
executor executor.Executor
isolationConfig *dstructs.IsolationConfig
userPid int
allocDir *allocdir.AllocDir
killTimeout time.Duration
maxKillTimeout time.Duration
logger *log.Logger
waitCh chan *dstructs.WaitResult
doneCh chan struct{}
version string
}
// NewExecDriver is used to create a new exec driver
func NewExecDriver(ctx *DriverContext) Driver {
return &ExecDriver{DriverContext: *ctx}
}
// Validate is used to validate the driver configuration
func (d *ExecDriver) Validate(config map[string]interface{}) error {
fd := &fields.FieldData{
Raw: config,
Schema: map[string]*fields.FieldSchema{
"command": &fields.FieldSchema{
Type: fields.TypeString,
Required: true,
},
"args": &fields.FieldSchema{
Type: fields.TypeArray,
},
},
}
if err := fd.Validate(); err != nil {
return err
}
return nil
}
func (d *ExecDriver) Periodic() (bool, time.Duration) {
return true, 15 * time.Second
}
func (d *ExecDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle, error) {
var driverConfig ExecDriverConfig
if err := mapstructure.WeakDecode(task.Config, &driverConfig); err != nil {
return nil, err
}
// Get the command to be ran
command := driverConfig.Command
if err := validateCommand(command, "args"); err != nil {
return nil, err
}
// Set the host environment variables.
filter := strings.Split(d.config.ReadDefault("env.blacklist", config.DefaultEnvBlacklist), ",")
d.taskEnv.AppendHostEnvvars(filter)
// Get the task directory for storing the executor logs.
taskDir, ok := ctx.AllocDir.TaskDirs[d.DriverContext.taskName]
if !ok {
return nil, fmt.Errorf("Could not find task directory for task: %v", d.DriverContext.taskName)
}
bin, err := discover.NomadExecutable()
if err != nil {
return nil, fmt.Errorf("unable to find the nomad binary: %v", err)
}
pluginLogFile := filepath.Join(taskDir, fmt.Sprintf("%s-executor.out", task.Name))
pluginConfig := &plugin.ClientConfig{
Cmd: exec.Command(bin, "executor", pluginLogFile),
}
exec, pluginClient, err := createExecutor(pluginConfig, d.config.LogOutput, d.config)
if err != nil {
return nil, err
}
executorCtx := &executor.ExecutorContext{
TaskEnv: d.taskEnv,
Driver: "exec",
AllocDir: ctx.AllocDir,
AllocID: ctx.AllocID,
ChrootEnv: d.config.ChrootEnv,
Task: task,
}
ps, err := exec.LaunchCmd(&executor.ExecCommand{
Cmd: command,
Args: driverConfig.Args,
FSIsolation: true,
ResourceLimits: true,
User: getExecutorUser(task),
}, executorCtx)
if err != nil {
pluginClient.Kill()
return nil, err
}
d.logger.Printf("[DEBUG] driver.exec: started process via plugin with pid: %v", ps.Pid)
// Return a driver handle
maxKill := d.DriverContext.config.MaxKillTimeout
h := &execHandle{
pluginClient: pluginClient,
userPid: ps.Pid,
executor: exec,
allocDir: ctx.AllocDir,
isolationConfig: ps.IsolationConfig,
killTimeout: GetKillTimeout(task.KillTimeout, maxKill),
maxKillTimeout: maxKill,
logger: d.logger,
version: d.config.Version,
doneCh: make(chan struct{}),
waitCh: make(chan *dstructs.WaitResult, 1),
}
if err := exec.SyncServices(consulContext(d.config, "")); err != nil {
d.logger.Printf("[ERR] driver.exec: error registering services with consul for task: %q: %v", task.Name, err)
}
go h.run()
return h, nil
}
type execId struct {
Version string
KillTimeout time.Duration
MaxKillTimeout time.Duration
UserPid int
TaskDir string
AllocDir *allocdir.AllocDir
IsolationConfig *dstructs.IsolationConfig
PluginConfig *PluginReattachConfig
}
func (d *ExecDriver) Open(ctx *ExecContext, handleID string) (DriverHandle, error) {
id := &execId{}
if err := json.Unmarshal([]byte(handleID), id); err != nil {
return nil, fmt.Errorf("Failed to parse handle '%s': %v", handleID, err)
}
pluginConfig := &plugin.ClientConfig{
Reattach: id.PluginConfig.PluginConfig(),
}
exec, client, err := createExecutor(pluginConfig, d.config.LogOutput, d.config)
if err != nil {
merrs := new(multierror.Error)
merrs.Errors = append(merrs.Errors, err)
d.logger.Println("[ERR] driver.exec: error connecting to plugin so destroying plugin pid and user pid")
if e := destroyPlugin(id.PluginConfig.Pid, id.UserPid); e != nil {
merrs.Errors = append(merrs.Errors, fmt.Errorf("error destroying plugin and userpid: %v", e))
}
if id.IsolationConfig != nil {
ePid := pluginConfig.Reattach.Pid
if e := executor.ClientCleanup(id.IsolationConfig, ePid); e != nil {
merrs.Errors = append(merrs.Errors, fmt.Errorf("destroying cgroup failed: %v", e))
}
}
if e := ctx.AllocDir.UnmountAll(); e != nil {
merrs.Errors = append(merrs.Errors, e)
}
return nil, fmt.Errorf("error connecting to plugin: %v", merrs.ErrorOrNil())
}
ver, _ := exec.Version()
d.logger.Printf("[DEBUG] driver.exec : version of executor: %v", ver.Version)
// Return a driver handle
h := &execHandle{
pluginClient: client,
executor: exec,
userPid: id.UserPid,
allocDir: id.AllocDir,
isolationConfig: id.IsolationConfig,
logger: d.logger,
version: id.Version,
killTimeout: id.KillTimeout,
maxKillTimeout: id.MaxKillTimeout,
doneCh: make(chan struct{}),
waitCh: make(chan *dstructs.WaitResult, 1),
}
if err := exec.SyncServices(consulContext(d.config, "")); err != nil {
d.logger.Printf("[ERR] driver.exec: error registering services with consul: %v", err)
}
go h.run()
return h, nil
}
func (h *execHandle) ID() string {
id := execId{
Version: h.version,
KillTimeout: h.killTimeout,
MaxKillTimeout: h.maxKillTimeout,
PluginConfig: NewPluginReattachConfig(h.pluginClient.ReattachConfig()),
UserPid: h.userPid,
AllocDir: h.allocDir,
IsolationConfig: h.isolationConfig,
}
data, err := json.Marshal(id)
if err != nil {
h.logger.Printf("[ERR] driver.exec: failed to marshal ID to JSON: %s", err)
}
return string(data)
}
func (h *execHandle) WaitCh() chan *dstructs.WaitResult {
return h.waitCh
}
func (h *execHandle) Update(task *structs.Task) error {
// Store the updated kill timeout.
h.killTimeout = GetKillTimeout(task.KillTimeout, h.maxKillTimeout)
h.executor.UpdateTask(task)
// Update is not possible
return nil
}
func (h *execHandle) Kill() error {
if err := h.executor.ShutDown(); err != nil {
if h.pluginClient.Exited() {
return nil
}
return fmt.Errorf("executor Shutdown failed: %v", err)
}
select {
case <-h.doneCh:
return nil
case <-time.After(h.killTimeout):
if h.pluginClient.Exited() {
return nil
}
if err := h.executor.Exit(); err != nil {
return fmt.Errorf("executor Exit failed: %v", err)
}
return nil
}
}
func (h *execHandle) Stats() (*cstructs.TaskResourceUsage, error) {
return h.executor.Stats()
}
func (h *execHandle) run() {
ps, err := h.executor.Wait()
close(h.doneCh)
// If the exitcode is 0 and we had an error that means the plugin didn't
// connect and doesn't know the state of the user process so we are killing
// the user process so that when we create a new executor on restarting the
// new user process doesn't have collisions with resources that the older
// user pid might be holding onto.
if ps.ExitCode == 0 && err != nil {
if h.isolationConfig != nil {
ePid := h.pluginClient.ReattachConfig().Pid
if e := executor.ClientCleanup(h.isolationConfig, ePid); e != nil {
h.logger.Printf("[ERR] driver.exec: destroying resource container failed: %v", e)
}
}
if e := h.allocDir.UnmountAll(); e != nil {
h.logger.Printf("[ERR] driver.exec: unmounting dev,proc and alloc dirs failed: %v", e)
}
}
h.waitCh <- dstructs.NewWaitResult(ps.ExitCode, ps.Signal, err)
close(h.waitCh)
// Remove services
if err := h.executor.DeregisterServices(); err != nil {
h.logger.Printf("[ERR] driver.exec: failed to deregister services: %v", err)
}
if err := h.executor.Exit(); err != nil {
h.logger.Printf("[ERR] driver.exec: error destroying executor: %v", err)
}
h.pluginClient.Kill()
}

View File

@ -1,12 +0,0 @@
//+build darwin dragonfly freebsd netbsd openbsd solaris windows
package driver
import (
"github.com/hashicorp/nomad/client/config"
"github.com/hashicorp/nomad/nomad/structs"
)
func (d *ExecDriver) Fingerprint(cfg *config.Config, node *structs.Node) (bool, error) {
return false, nil
}

View File

@ -1,34 +0,0 @@
package driver
import (
"github.com/hashicorp/nomad/client/config"
"github.com/hashicorp/nomad/nomad/structs"
"golang.org/x/sys/unix"
)
func (d *ExecDriver) Fingerprint(cfg *config.Config, node *structs.Node) (bool, error) {
// Get the current status so that we can log any debug messages only if the
// state changes
_, currentlyEnabled := node.Attributes[execDriverAttr]
// Only enable if cgroups are available and we are root
if _, ok := node.Attributes["unique.cgroup.mountpoint"]; !ok {
if currentlyEnabled {
d.logger.Printf("[DEBUG] driver.exec: cgroups unavailable, disabling")
}
delete(node.Attributes, execDriverAttr)
return false, nil
} else if unix.Geteuid() != 0 {
if currentlyEnabled {
d.logger.Printf("[DEBUG] driver.exec: must run as root user, disabling")
}
delete(node.Attributes, execDriverAttr)
return false, nil
}
if !currentlyEnabled {
d.logger.Printf("[DEBUG] driver.exec: exec driver is enabled")
}
node.Attributes[execDriverAttr] = "1"
return true, nil
}

View File

@ -1,206 +0,0 @@
package executor
import (
"fmt"
"log"
"os/exec"
"sync"
"syscall"
"time"
"github.com/armon/circbuf"
docker "github.com/fsouza/go-dockerclient"
cstructs "github.com/hashicorp/nomad/client/driver/structs"
)
var (
// We store the client globally to cache the connection to the docker daemon.
createClient sync.Once
client *docker.Client
)
const (
// The default check timeout
defaultCheckTimeout = 30 * time.Second
)
// DockerScriptCheck runs nagios compatible scripts in a docker container and
// provides the check result
type DockerScriptCheck struct {
id string // id of the check
interval time.Duration // interval of the check
timeout time.Duration // timeout of the check
containerID string // container id in which the check will be invoked
logger *log.Logger
cmd string // check command
args []string // check command arguments
dockerEndpoint string // docker endpoint
tlsCert string // path to tls certificate
tlsCa string // path to tls ca
tlsKey string // path to tls key
}
// dockerClient creates the client to interact with the docker daemon
func (d *DockerScriptCheck) dockerClient() (*docker.Client, error) {
if client != nil {
return client, nil
}
var err error
createClient.Do(func() {
if d.dockerEndpoint != "" {
if d.tlsCert+d.tlsKey+d.tlsCa != "" {
d.logger.Printf("[DEBUG] executor.checks: using TLS client connection to %s", d.dockerEndpoint)
client, err = docker.NewTLSClient(d.dockerEndpoint, d.tlsCert, d.tlsKey, d.tlsCa)
} else {
d.logger.Printf("[DEBUG] executor.checks: using standard client connection to %s", d.dockerEndpoint)
client, err = docker.NewClient(d.dockerEndpoint)
}
return
}
d.logger.Println("[DEBUG] executor.checks: using client connection initialized from environment")
client, err = docker.NewClientFromEnv()
})
return client, err
}
// Run runs a script check inside a docker container
func (d *DockerScriptCheck) Run() *cstructs.CheckResult {
var (
exec *docker.Exec
err error
execRes *docker.ExecInspect
time = time.Now()
)
if client, err = d.dockerClient(); err != nil {
return &cstructs.CheckResult{Err: err}
}
client = client
execOpts := docker.CreateExecOptions{
AttachStdin: false,
AttachStdout: true,
AttachStderr: true,
Tty: false,
Cmd: append([]string{d.cmd}, d.args...),
Container: d.containerID,
}
if exec, err = client.CreateExec(execOpts); err != nil {
return &cstructs.CheckResult{Err: err}
}
output, _ := circbuf.NewBuffer(int64(cstructs.CheckBufSize))
startOpts := docker.StartExecOptions{
Detach: false,
Tty: false,
OutputStream: output,
ErrorStream: output,
}
if err = client.StartExec(exec.ID, startOpts); err != nil {
return &cstructs.CheckResult{Err: err}
}
if execRes, err = client.InspectExec(exec.ID); err != nil {
return &cstructs.CheckResult{Err: err}
}
return &cstructs.CheckResult{
ExitCode: execRes.ExitCode,
Output: string(output.Bytes()),
Timestamp: time,
}
}
// ID returns the check id
func (d *DockerScriptCheck) ID() string {
return d.id
}
// Interval returns the interval at which the check has to run
func (d *DockerScriptCheck) Interval() time.Duration {
return d.interval
}
// Timeout returns the duration after which a check is timed out.
func (d *DockerScriptCheck) Timeout() time.Duration {
if d.timeout == 0 {
return defaultCheckTimeout
}
return d.timeout
}
// ExecScriptCheck runs a nagios compatible script and returns the check result
type ExecScriptCheck struct {
id string // id of the script check
interval time.Duration // interval at which the check is invoked
timeout time.Duration // timeout duration of the check
cmd string // command of the check
args []string // args passed to the check
taskDir string // the root directory of the check
FSIsolation bool // indicates whether the check has to be run within a chroot
}
// Run runs an exec script check
func (e *ExecScriptCheck) Run() *cstructs.CheckResult {
buf, _ := circbuf.NewBuffer(int64(cstructs.CheckBufSize))
cmd := exec.Command(e.cmd, e.args...)
cmd.Stdout = buf
cmd.Stderr = buf
e.setChroot(cmd)
ts := time.Now()
if err := cmd.Start(); err != nil {
return &cstructs.CheckResult{Err: err}
}
errCh := make(chan error, 2)
go func() {
errCh <- cmd.Wait()
}()
for {
select {
case err := <-errCh:
endTime := time.Now()
if err == nil {
return &cstructs.CheckResult{
ExitCode: 0,
Output: string(buf.Bytes()),
Timestamp: ts,
}
}
exitCode := 1
if exitErr, ok := err.(*exec.ExitError); ok {
if status, ok := exitErr.Sys().(syscall.WaitStatus); ok {
exitCode = status.ExitStatus()
}
}
return &cstructs.CheckResult{
ExitCode: exitCode,
Output: string(buf.Bytes()),
Timestamp: ts,
Duration: endTime.Sub(ts),
}
case <-time.After(e.Timeout()):
errCh <- fmt.Errorf("timed out after waiting 30s")
}
}
return nil
}
// ID returns the check id
func (e *ExecScriptCheck) ID() string {
return e.id
}
// Interval returns the interval at which the check has to run
func (e *ExecScriptCheck) Interval() time.Duration {
return e.interval
}
// Timeout returns the duration after which a check is timed out.
func (e *ExecScriptCheck) Timeout() time.Duration {
if e.timeout == 0 {
return defaultCheckTimeout
}
return e.timeout
}

View File

@ -1,18 +0,0 @@
// +build darwin dragonfly freebsd linux netbsd openbsd solaris
package executor
import (
"os/exec"
"syscall"
)
func (e *ExecScriptCheck) setChroot(cmd *exec.Cmd) {
if e.FSIsolation {
if cmd.SysProcAttr == nil {
cmd.SysProcAttr = &syscall.SysProcAttr{}
}
cmd.SysProcAttr.Chroot = e.taskDir
}
cmd.Dir = "/"
}

View File

@ -1,8 +0,0 @@
// +build windows
package executor
import "os/exec"
func (e *ExecScriptCheck) setChroot(cmd *exec.Cmd) {
}

View File

@ -1,856 +0,0 @@
package executor
import (
"fmt"
"io/ioutil"
"log"
"net"
"os"
"os/exec"
"path/filepath"
"runtime"
"strconv"
"strings"
"sync"
"syscall"
"time"
"github.com/hashicorp/go-multierror"
"github.com/mitchellh/go-ps"
"github.com/shirou/gopsutil/process"
"github.com/hashicorp/nomad/client/allocdir"
"github.com/hashicorp/nomad/client/driver/env"
"github.com/hashicorp/nomad/client/driver/logging"
"github.com/hashicorp/nomad/client/stats"
"github.com/hashicorp/nomad/command/agent/consul"
shelpers "github.com/hashicorp/nomad/helper/stats"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/hashicorp/nomad/nomad/structs/config"
dstructs "github.com/hashicorp/nomad/client/driver/structs"
cstructs "github.com/hashicorp/nomad/client/structs"
)
const (
// pidScanInterval is the interval at which the executor scans the process
// tree for finding out the pids that the executor and it's child processes
// have forked
pidScanInterval = 5 * time.Second
)
var (
// The statistics the basic executor exposes
ExecutorBasicMeasuredMemStats = []string{"RSS", "Swap"}
ExecutorBasicMeasuredCpuStats = []string{"System Mode", "User Mode", "Percent"}
)
// Executor is the interface which allows a driver to launch and supervise
// a process
type Executor interface {
LaunchCmd(command *ExecCommand, ctx *ExecutorContext) (*ProcessState, error)
LaunchSyslogServer(ctx *ExecutorContext) (*SyslogServerState, error)
Wait() (*ProcessState, error)
ShutDown() error
Exit() error
UpdateLogConfig(logConfig *structs.LogConfig) error
UpdateTask(task *structs.Task) error
SyncServices(ctx *ConsulContext) error
DeregisterServices() error
Version() (*ExecutorVersion, error)
Stats() (*cstructs.TaskResourceUsage, error)
}
// ConsulContext holds context to configure the Consul client and run checks
type ConsulContext struct {
// ConsulConfig contains the configuration information for talking
// with this Nomad Agent's Consul Agent.
ConsulConfig *config.ConsulConfig
// ContainerID is the ID of the container
ContainerID string
// TLSCert is the cert which docker client uses while interactng with the docker
// daemon over TLS
TLSCert string
// TLSCa is the CA which the docker client uses while interacting with the docker
// daeemon over TLS
TLSCa string
// TLSKey is the TLS key which the docker client uses while interacting with
// the docker daemon
TLSKey string
// DockerEndpoint is the endpoint of the docker daemon
DockerEndpoint string
}
// ExecutorContext holds context to configure the command user
// wants to run and isolate it
type ExecutorContext struct {
// TaskEnv holds information about the environment of a Task
TaskEnv *env.TaskEnvironment
// AllocDir is the handle to do operations on the alloc dir of
// the task
AllocDir *allocdir.AllocDir
// Task is the task whose executor is being launched
Task *structs.Task
// AllocID is the allocation id to which the task belongs
AllocID string
// A mapping of directories on the host OS to attempt to embed inside each
// task's chroot.
ChrootEnv map[string]string
// Driver is the name of the driver that invoked the executor
Driver string
// PortUpperBound is the upper bound of the ports that we can use to start
// the syslog server
PortUpperBound uint
// PortLowerBound is the lower bound of the ports that we can use to start
// the syslog server
PortLowerBound uint
}
// ExecCommand holds the user command, args, and other isolation related
// settings.
type ExecCommand struct {
// Cmd is the command that the user wants to run.
Cmd string
// Args is the args of the command that the user wants to run.
Args []string
// FSIsolation determines whether the command would be run in a chroot.
FSIsolation bool
// User is the user which the executor uses to run the command.
User string
// ResourceLimits determines whether resource limits are enforced by the
// executor.
ResourceLimits bool
}
// ProcessState holds information about the state of a user process.
type ProcessState struct {
Pid int
ExitCode int
Signal int
IsolationConfig *dstructs.IsolationConfig
Time time.Time
}
// nomadPid holds a pid and it's cpu percentage calculator
type nomadPid struct {
pid int
cpuStatsTotal *stats.CpuStats
cpuStatsUser *stats.CpuStats
cpuStatsSys *stats.CpuStats
}
// SyslogServerState holds the address and islation information of a launched
// syslog server
type SyslogServerState struct {
IsolationConfig *dstructs.IsolationConfig
Addr string
}
// ExecutorVersion is the version of the executor
type ExecutorVersion struct {
Version string
}
func (v *ExecutorVersion) GoString() string {
return v.Version
}
// UniversalExecutor is an implementation of the Executor which launches and
// supervises processes. In addition to process supervision it provides resource
// and file system isolation
type UniversalExecutor struct {
cmd exec.Cmd
ctx *ExecutorContext
command *ExecCommand
pids map[int]*nomadPid
pidLock sync.RWMutex
taskDir string
exitState *ProcessState
processExited chan interface{}
fsIsolationEnforced bool
lre *logging.FileRotator
lro *logging.FileRotator
rotatorLock sync.Mutex
shutdownCh chan struct{}
syslogServer *logging.SyslogServer
syslogChan chan *logging.SyslogMessage
resConCtx resourceContainerContext
consulSyncer *consul.Syncer
consulCtx *ConsulContext
totalCpuStats *stats.CpuStats
userCpuStats *stats.CpuStats
systemCpuStats *stats.CpuStats
logger *log.Logger
}
// NewExecutor returns an Executor
func NewExecutor(logger *log.Logger) Executor {
if err := shelpers.Init(); err != nil {
logger.Printf("[FATAL] executor: unable to initialize stats: %v", err)
return nil
}
exec := &UniversalExecutor{
logger: logger,
processExited: make(chan interface{}),
totalCpuStats: stats.NewCpuStats(),
userCpuStats: stats.NewCpuStats(),
systemCpuStats: stats.NewCpuStats(),
pids: make(map[int]*nomadPid),
}
return exec
}
// Version returns the api version of the executor
func (e *UniversalExecutor) Version() (*ExecutorVersion, error) {
return &ExecutorVersion{Version: "1.0.0"}, nil
}
// LaunchCmd launches a process and returns it's state. It also configures an
// applies isolation on certain platforms.
func (e *UniversalExecutor) LaunchCmd(command *ExecCommand, ctx *ExecutorContext) (*ProcessState, error) {
e.logger.Printf("[DEBUG] executor: launching command %v %v", command.Cmd, strings.Join(command.Args, " "))
e.ctx = ctx
e.command = command
// setting the user of the process
if command.User != "" {
e.logger.Printf("[DEBUG] executor: running command as %s", command.User)
if err := e.runAs(command.User); err != nil {
return nil, err
}
}
// configuring the task dir
if err := e.configureTaskDir(); err != nil {
return nil, err
}
e.ctx.TaskEnv.Build()
// configuring the chroot, resource container, and start the plugin
// process in the chroot.
if err := e.configureIsolation(); err != nil {
return nil, err
}
// Apply ourselves into the resource container. The executor MUST be in
// the resource container before the user task is started, otherwise we
// are subject to a fork attack in which a process escapes isolation by
// immediately forking.
if err := e.applyLimits(os.Getpid()); err != nil {
return nil, err
}
// Setup the loggers
if err := e.configureLoggers(); err != nil {
return nil, err
}
e.cmd.Stdout = e.lro
e.cmd.Stderr = e.lre
// Look up the binary path and make it executable
absPath, err := e.lookupBin(ctx.TaskEnv.ReplaceEnv(command.Cmd))
if err != nil {
return nil, err
}
if err := e.makeExecutable(absPath); err != nil {
return nil, err
}
path := absPath
// Determine the path to run as it may have to be relative to the chroot.
if e.fsIsolationEnforced {
rel, err := filepath.Rel(e.taskDir, path)
if err != nil {
return nil, err
}
path = rel
}
// Set the commands arguments
e.cmd.Path = path
e.cmd.Args = append([]string{e.cmd.Path}, ctx.TaskEnv.ParseAndReplace(command.Args)...)
e.cmd.Env = ctx.TaskEnv.EnvList()
// Start the process
if err := e.cmd.Start(); err != nil {
return nil, err
}
go e.collectPids()
go e.wait()
ic := e.resConCtx.getIsolationConfig()
return &ProcessState{Pid: e.cmd.Process.Pid, ExitCode: -1, IsolationConfig: ic, Time: time.Now()}, nil
}
// configureLoggers sets up the standard out/error file rotators
func (e *UniversalExecutor) configureLoggers() error {
e.rotatorLock.Lock()
defer e.rotatorLock.Unlock()
logFileSize := int64(e.ctx.Task.LogConfig.MaxFileSizeMB * 1024 * 1024)
if e.lro == nil {
lro, err := logging.NewFileRotator(e.ctx.AllocDir.LogDir(), fmt.Sprintf("%v.stdout", e.ctx.Task.Name),
e.ctx.Task.LogConfig.MaxFiles, logFileSize, e.logger)
if err != nil {
return err
}
e.lro = lro
}
if e.lre == nil {
lre, err := logging.NewFileRotator(e.ctx.AllocDir.LogDir(), fmt.Sprintf("%v.stderr", e.ctx.Task.Name),
e.ctx.Task.LogConfig.MaxFiles, logFileSize, e.logger)
if err != nil {
return err
}
e.lre = lre
}
return nil
}
// Wait waits until a process has exited and returns it's exitcode and errors
func (e *UniversalExecutor) Wait() (*ProcessState, error) {
<-e.processExited
return e.exitState, nil
}
// COMPAT: prior to Nomad 0.3.2, UpdateTask didn't exist.
// UpdateLogConfig updates the log configuration
func (e *UniversalExecutor) UpdateLogConfig(logConfig *structs.LogConfig) error {
e.ctx.Task.LogConfig = logConfig
if e.lro == nil {
return fmt.Errorf("log rotator for stdout doesn't exist")
}
e.lro.MaxFiles = logConfig.MaxFiles
e.lro.FileSize = int64(logConfig.MaxFileSizeMB * 1024 * 1024)
if e.lre == nil {
return fmt.Errorf("log rotator for stderr doesn't exist")
}
e.lre.MaxFiles = logConfig.MaxFiles
e.lre.FileSize = int64(logConfig.MaxFileSizeMB * 1024 * 1024)
return nil
}
func (e *UniversalExecutor) UpdateTask(task *structs.Task) error {
e.ctx.Task = task
// Updating Log Config
fileSize := int64(task.LogConfig.MaxFileSizeMB * 1024 * 1024)
e.lro.MaxFiles = task.LogConfig.MaxFiles
e.lro.FileSize = fileSize
e.lre.MaxFiles = task.LogConfig.MaxFiles
e.lre.FileSize = fileSize
// Re-syncing task with Consul agent
if e.consulSyncer != nil {
e.interpolateServices(e.ctx.Task)
domain := consul.NewExecutorDomain(e.ctx.AllocID, task.Name)
serviceMap := generateServiceKeys(e.ctx.AllocID, task.Services)
e.consulSyncer.SetServices(domain, serviceMap)
}
return nil
}
// generateServiceKeys takes a list of interpolated Nomad Services and returns a map
// of ServiceKeys to Nomad Services.
func generateServiceKeys(allocID string, services []*structs.Service) map[consul.ServiceKey]*structs.Service {
keys := make(map[consul.ServiceKey]*structs.Service, len(services))
for _, service := range services {
key := consul.GenerateServiceKey(service)
keys[key] = service
}
return keys
}
func (e *UniversalExecutor) wait() {
defer close(e.processExited)
err := e.cmd.Wait()
ic := e.resConCtx.getIsolationConfig()
if err == nil {
e.exitState = &ProcessState{Pid: 0, ExitCode: 0, IsolationConfig: ic, Time: time.Now()}
return
}
exitCode := 1
var signal int
if exitErr, ok := err.(*exec.ExitError); ok {
if status, ok := exitErr.Sys().(syscall.WaitStatus); ok {
exitCode = status.ExitStatus()
if status.Signaled() {
// bash(1) uses the lower 7 bits of a uint8
// to indicate normal program failure (see
// <sysexits.h>). If a process terminates due
// to a signal, encode the signal number to
// indicate which signal caused the process
// to terminate. Mirror this exit code
// encoding scheme.
const exitSignalBase = 128
signal = int(status.Signal())
exitCode = exitSignalBase + signal
}
}
} else {
e.logger.Printf("[DEBUG] executor: unexpected Wait() error type: %v", err)
}
e.exitState = &ProcessState{Pid: 0, ExitCode: exitCode, Signal: signal, IsolationConfig: ic, Time: time.Now()}
}
var (
// finishedErr is the error message received when trying to kill and already
// exited process.
finishedErr = "os: process already finished"
)
// ClientCleanup is the cleanup routine that a Nomad Client uses to remove the
// reminants of a child UniversalExecutor.
func ClientCleanup(ic *dstructs.IsolationConfig, pid int) error {
return clientCleanup(ic, pid)
}
// Exit cleans up the alloc directory, destroys resource container and kills the
// user process
func (e *UniversalExecutor) Exit() error {
var merr multierror.Error
if e.syslogServer != nil {
e.syslogServer.Shutdown()
}
e.lre.Close()
e.lro.Close()
if e.consulSyncer != nil {
e.consulSyncer.Shutdown()
}
// If the executor did not launch a process, return.
if e.command == nil {
return nil
}
// Prefer killing the process via the resource container.
if e.cmd.Process != nil && !e.command.ResourceLimits {
proc, err := os.FindProcess(e.cmd.Process.Pid)
if err != nil {
e.logger.Printf("[ERR] executor: can't find process with pid: %v, err: %v",
e.cmd.Process.Pid, err)
} else if err := proc.Kill(); err != nil && err.Error() != finishedErr {
merr.Errors = append(merr.Errors,
fmt.Errorf("can't kill process with pid: %v, err: %v", e.cmd.Process.Pid, err))
}
}
if e.command.ResourceLimits {
if err := e.resConCtx.executorCleanup(); err != nil {
merr.Errors = append(merr.Errors, err)
}
}
if e.command.FSIsolation {
if err := e.removeChrootMounts(); err != nil {
merr.Errors = append(merr.Errors, err)
}
}
return merr.ErrorOrNil()
}
// Shutdown sends an interrupt signal to the user process
func (e *UniversalExecutor) ShutDown() error {
if e.cmd.Process == nil {
return fmt.Errorf("executor.shutdown error: no process found")
}
proc, err := os.FindProcess(e.cmd.Process.Pid)
if err != nil {
return fmt.Errorf("executor.shutdown failed to find process: %v", err)
}
if runtime.GOOS == "windows" {
if err := proc.Kill(); err != nil && err.Error() != finishedErr {
return err
}
return nil
}
if err = proc.Signal(os.Interrupt); err != nil && err.Error() != finishedErr {
return fmt.Errorf("executor.shutdown error: %v", err)
}
return nil
}
// SyncServices syncs the services of the task that the executor is running with
// Consul
func (e *UniversalExecutor) SyncServices(ctx *ConsulContext) error {
e.logger.Printf("[INFO] executor: registering services")
e.consulCtx = ctx
if e.consulSyncer == nil {
cs, err := consul.NewSyncer(ctx.ConsulConfig, e.shutdownCh, e.logger)
if err != nil {
return err
}
e.consulSyncer = cs
go e.consulSyncer.Run()
}
e.interpolateServices(e.ctx.Task)
e.consulSyncer.SetDelegatedChecks(e.createCheckMap(), e.createCheck)
e.consulSyncer.SetAddrFinder(e.ctx.Task.FindHostAndPortFor)
domain := consul.NewExecutorDomain(e.ctx.AllocID, e.ctx.Task.Name)
serviceMap := generateServiceKeys(e.ctx.AllocID, e.ctx.Task.Services)
e.consulSyncer.SetServices(domain, serviceMap)
return nil
}
// DeregisterServices removes the services of the task that the executor is
// running from Consul
func (e *UniversalExecutor) DeregisterServices() error {
e.logger.Printf("[INFO] executor: de-registering services and shutting down consul service")
if e.consulSyncer != nil {
return e.consulSyncer.Shutdown()
}
return nil
}
// pidStats returns the resource usage stats per pid
func (e *UniversalExecutor) pidStats() (map[string]*cstructs.ResourceUsage, error) {
stats := make(map[string]*cstructs.ResourceUsage)
e.pidLock.RLock()
pids := make(map[int]*nomadPid, len(e.pids))
for k, v := range e.pids {
pids[k] = v
}
e.pidLock.RUnlock()
for pid, np := range pids {
p, err := process.NewProcess(int32(pid))
if err != nil {
e.logger.Printf("[DEBUG] executor: unable to create new process with pid: %v", pid)
continue
}
ms := &cstructs.MemoryStats{}
if memInfo, err := p.MemoryInfo(); err == nil {
ms.RSS = memInfo.RSS
ms.Swap = memInfo.Swap
ms.Measured = ExecutorBasicMeasuredMemStats
}
cs := &cstructs.CpuStats{}
if cpuStats, err := p.Times(); err == nil {
cs.SystemMode = np.cpuStatsSys.Percent(cpuStats.System * float64(time.Second))
cs.UserMode = np.cpuStatsUser.Percent(cpuStats.User * float64(time.Second))
cs.Measured = ExecutorBasicMeasuredCpuStats
// calculate cpu usage percent
cs.Percent = np.cpuStatsTotal.Percent(cpuStats.Total() * float64(time.Second))
}
stats[strconv.Itoa(pid)] = &cstructs.ResourceUsage{MemoryStats: ms, CpuStats: cs}
}
return stats, nil
}
// configureTaskDir sets the task dir in the executor
func (e *UniversalExecutor) configureTaskDir() error {
taskDir, ok := e.ctx.AllocDir.TaskDirs[e.ctx.Task.Name]
e.taskDir = taskDir
if !ok {
return fmt.Errorf("couldn't find task directory for task %v", e.ctx.Task.Name)
}
e.cmd.Dir = taskDir
return nil
}
// lookupBin looks for path to the binary to run by looking for the binary in
// the following locations, in-order: task/local/, task/, based on host $PATH.
// The return path is absolute.
func (e *UniversalExecutor) lookupBin(bin string) (string, error) {
// Check in the local directory
local := filepath.Join(e.taskDir, allocdir.TaskLocal, bin)
if _, err := os.Stat(local); err == nil {
return local, nil
}
// Check at the root of the task's directory
root := filepath.Join(e.taskDir, bin)
if _, err := os.Stat(root); err == nil {
return root, nil
}
// Check the $PATH
if host, err := exec.LookPath(bin); err == nil {
return host, nil
}
return "", fmt.Errorf("binary %q could not be found", bin)
}
// makeExecutable makes the given file executable for root,group,others.
func (e *UniversalExecutor) makeExecutable(binPath string) error {
if runtime.GOOS == "windows" {
return nil
}
fi, err := os.Stat(binPath)
if err != nil {
if os.IsNotExist(err) {
return fmt.Errorf("binary %q does not exist", binPath)
}
return fmt.Errorf("specified binary is invalid: %v", err)
}
// If it is not executable, make it so.
perm := fi.Mode().Perm()
req := os.FileMode(0555)
if perm&req != req {
if err := os.Chmod(binPath, perm|req); err != nil {
return fmt.Errorf("error making %q executable: %s", binPath, err)
}
}
return nil
}
// getFreePort returns a free port ready to be listened on between upper and
// lower bounds
func (e *UniversalExecutor) getListener(lowerBound uint, upperBound uint) (net.Listener, error) {
if runtime.GOOS == "windows" {
return e.listenerTCP(lowerBound, upperBound)
}
return e.listenerUnix()
}
// listenerTCP creates a TCP listener using an unused port between an upper and
// lower bound
func (e *UniversalExecutor) listenerTCP(lowerBound uint, upperBound uint) (net.Listener, error) {
for i := lowerBound; i <= upperBound; i++ {
addr, err := net.ResolveTCPAddr("tcp", fmt.Sprintf("localhost:%v", i))
if err != nil {
return nil, err
}
l, err := net.ListenTCP("tcp", addr)
if err != nil {
continue
}
return l, nil
}
return nil, fmt.Errorf("No free port found")
}
// listenerUnix creates a Unix domain socket
func (e *UniversalExecutor) listenerUnix() (net.Listener, error) {
f, err := ioutil.TempFile("", "plugin")
if err != nil {
return nil, err
}
path := f.Name()
if err := f.Close(); err != nil {
return nil, err
}
if err := os.Remove(path); err != nil {
return nil, err
}
return net.Listen("unix", path)
}
// createCheckMap creates a map of checks that the executor will handle on it's
// own
func (e *UniversalExecutor) createCheckMap() map[string]struct{} {
checks := map[string]struct{}{
"script": struct{}{},
}
return checks
}
// createCheck creates NomadCheck from a ServiceCheck
func (e *UniversalExecutor) createCheck(check *structs.ServiceCheck, checkID string) (consul.Check, error) {
if check.Type == structs.ServiceCheckScript && e.ctx.Driver == "docker" {
return &DockerScriptCheck{
id: checkID,
interval: check.Interval,
timeout: check.Timeout,
containerID: e.consulCtx.ContainerID,
logger: e.logger,
cmd: check.Command,
args: check.Args,
}, nil
}
if check.Type == structs.ServiceCheckScript && (e.ctx.Driver == "exec" ||
e.ctx.Driver == "raw_exec" || e.ctx.Driver == "java") {
return &ExecScriptCheck{
id: checkID,
interval: check.Interval,
timeout: check.Timeout,
cmd: check.Command,
args: check.Args,
taskDir: e.taskDir,
FSIsolation: e.command.FSIsolation,
}, nil
}
return nil, fmt.Errorf("couldn't create check for %v", check.Name)
}
// interpolateServices interpolates tags in a service and checks with values from the
// task's environment.
func (e *UniversalExecutor) interpolateServices(task *structs.Task) {
e.ctx.TaskEnv.Build()
for _, service := range task.Services {
for _, check := range service.Checks {
if check.Type == structs.ServiceCheckScript {
check.Name = e.ctx.TaskEnv.ReplaceEnv(check.Name)
check.Command = e.ctx.TaskEnv.ReplaceEnv(check.Command)
check.Args = e.ctx.TaskEnv.ParseAndReplace(check.Args)
check.Path = e.ctx.TaskEnv.ReplaceEnv(check.Path)
check.Protocol = e.ctx.TaskEnv.ReplaceEnv(check.Protocol)
}
}
service.Name = e.ctx.TaskEnv.ReplaceEnv(service.Name)
service.Tags = e.ctx.TaskEnv.ParseAndReplace(service.Tags)
}
}
// collectPids collects the pids of the child processes that the executor is
// running every 5 seconds
func (e *UniversalExecutor) collectPids() {
// Fire the timer right away when the executor starts from there on the pids
// are collected every scan interval
timer := time.NewTimer(0)
defer timer.Stop()
for {
select {
case <-timer.C:
pids, err := e.getAllPids()
if err != nil {
e.logger.Printf("[DEBUG] executor: error collecting pids: %v", err)
}
e.pidLock.Lock()
// Adding pids which are not being tracked
for pid, np := range pids {
if _, ok := e.pids[pid]; !ok {
e.pids[pid] = np
}
}
// Removing pids which are no longer present
for pid := range e.pids {
if _, ok := pids[pid]; !ok {
delete(e.pids, pid)
}
}
e.pidLock.Unlock()
timer.Reset(pidScanInterval)
case <-e.processExited:
return
}
}
}
// scanPids scans all the pids on the machine running the current executor and
// returns the child processes of the executor.
func (e *UniversalExecutor) scanPids(parentPid int, allPids []ps.Process) (map[int]*nomadPid, error) {
processFamily := make(map[int]struct{})
processFamily[parentPid] = struct{}{}
// A buffer for holding pids which haven't matched with any parent pid
var pidsRemaining []ps.Process
for {
// flag to indicate if we have found a match
foundNewPid := false
for _, pid := range allPids {
_, childPid := processFamily[pid.PPid()]
// checking if the pid is a child of any of the parents
if childPid {
processFamily[pid.Pid()] = struct{}{}
foundNewPid = true
} else {
// if it is not, then we add the pid to the buffer
pidsRemaining = append(pidsRemaining, pid)
}
// scan only the pids which are left in the buffer
allPids = pidsRemaining
}
// not scanning anymore if we couldn't find a single match
if !foundNewPid {
break
}
}
res := make(map[int]*nomadPid)
for pid := range processFamily {
np := nomadPid{
pid: pid,
cpuStatsTotal: stats.NewCpuStats(),
cpuStatsUser: stats.NewCpuStats(),
cpuStatsSys: stats.NewCpuStats(),
}
res[pid] = &np
}
return res, nil
}
// aggregatedResourceUsage aggregates the resource usage of all the pids and
// returns a TaskResourceUsage data point
func (e *UniversalExecutor) aggregatedResourceUsage(pidStats map[string]*cstructs.ResourceUsage) *cstructs.TaskResourceUsage {
ts := time.Now().UTC().UnixNano()
var (
systemModeCPU, userModeCPU, percent float64
totalRSS, totalSwap uint64
)
for _, pidStat := range pidStats {
systemModeCPU += pidStat.CpuStats.SystemMode
userModeCPU += pidStat.CpuStats.UserMode
percent += pidStat.CpuStats.Percent
totalRSS += pidStat.MemoryStats.RSS
totalSwap += pidStat.MemoryStats.Swap
}
totalCPU := &cstructs.CpuStats{
SystemMode: systemModeCPU,
UserMode: userModeCPU,
Percent: percent,
Measured: ExecutorBasicMeasuredCpuStats,
TotalTicks: e.systemCpuStats.TicksConsumed(percent),
}
totalMemory := &cstructs.MemoryStats{
RSS: totalRSS,
Swap: totalSwap,
Measured: ExecutorBasicMeasuredMemStats,
}
resourceUsage := cstructs.ResourceUsage{
MemoryStats: totalMemory,
CpuStats: totalCPU,
}
return &cstructs.TaskResourceUsage{
ResourceUsage: &resourceUsage,
Timestamp: ts,
Pids: pidStats,
}
}

View File

@ -1,46 +0,0 @@
// +build darwin dragonfly freebsd netbsd openbsd solaris windows
package executor
import (
"os"
cstructs "github.com/hashicorp/nomad/client/structs"
"github.com/mitchellh/go-ps"
)
func (e *UniversalExecutor) configureChroot() error {
return nil
}
func (e *UniversalExecutor) removeChrootMounts() error {
return nil
}
func (e *UniversalExecutor) runAs(userid string) error {
return nil
}
func (e *UniversalExecutor) applyLimits(pid int) error {
return nil
}
func (e *UniversalExecutor) configureIsolation() error {
return nil
}
func (e *UniversalExecutor) Stats() (*cstructs.TaskResourceUsage, error) {
pidStats, err := e.pidStats()
if err != nil {
return nil, err
}
return e.aggregatedResourceUsage(pidStats), nil
}
func (e *UniversalExecutor) getAllPids() (map[int]*nomadPid, error) {
allProcesses, err := ps.Processes()
if err != nil {
return nil, err
}
return e.scanPids(os.Getpid(), allProcesses)
}

View File

@ -1,373 +0,0 @@
package executor
import (
"fmt"
"os"
"os/user"
"path/filepath"
"strconv"
"strings"
"syscall"
"time"
"github.com/hashicorp/go-multierror"
"github.com/mitchellh/go-ps"
"github.com/opencontainers/runc/libcontainer/cgroups"
cgroupFs "github.com/opencontainers/runc/libcontainer/cgroups/fs"
cgroupConfig "github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/system"
"github.com/hashicorp/nomad/client/allocdir"
"github.com/hashicorp/nomad/client/stats"
cstructs "github.com/hashicorp/nomad/client/structs"
"github.com/hashicorp/nomad/nomad/structs"
)
var (
// A mapping of directories on the host OS to attempt to embed inside each
// task's chroot.
chrootEnv = map[string]string{
"/bin": "/bin",
"/etc": "/etc",
"/lib": "/lib",
"/lib32": "/lib32",
"/lib64": "/lib64",
"/run/resolvconf": "/run/resolvconf",
"/sbin": "/sbin",
"/usr": "/usr",
}
// clockTicks is the clocks per second of the machine
clockTicks = uint64(system.GetClockTicks())
// The statistics the executor exposes when using cgroups
ExecutorCgroupMeasuredMemStats = []string{"RSS", "Cache", "Swap", "Max Usage", "Kernel Usage", "Kernel Max Usage"}
ExecutorCgroupMeasuredCpuStats = []string{"System Mode", "User Mode", "Throttled Periods", "Throttled Time", "Percent"}
)
// configureIsolation configures chroot and creates cgroups
func (e *UniversalExecutor) configureIsolation() error {
if e.command.FSIsolation {
if err := e.configureChroot(); err != nil {
return err
}
}
if e.command.ResourceLimits {
if err := e.configureCgroups(e.ctx.Task.Resources); err != nil {
return fmt.Errorf("error creating cgroups: %v", err)
}
}
return nil
}
// applyLimits puts a process in a pre-configured cgroup
func (e *UniversalExecutor) applyLimits(pid int) error {
if !e.command.ResourceLimits {
return nil
}
// Entering the process in the cgroup
manager := getCgroupManager(e.resConCtx.groups, nil)
if err := manager.Apply(pid); err != nil {
e.logger.Printf("[ERR] executor: error applying pid to cgroup: %v", err)
if er := e.removeChrootMounts(); er != nil {
e.logger.Printf("[ERR] executor: error removing chroot: %v", er)
}
return err
}
e.resConCtx.cgPaths = manager.GetPaths()
cgConfig := cgroupConfig.Config{Cgroups: e.resConCtx.groups}
if err := manager.Set(&cgConfig); err != nil {
e.logger.Printf("[ERR] executor: error setting cgroup config: %v", err)
if er := DestroyCgroup(e.resConCtx.groups, e.resConCtx.cgPaths, os.Getpid()); er != nil {
e.logger.Printf("[ERR] executor: error destroying cgroup: %v", er)
}
if er := e.removeChrootMounts(); er != nil {
e.logger.Printf("[ERR] executor: error removing chroot: %v", er)
}
return err
}
return nil
}
// configureCgroups converts a Nomad Resources specification into the equivalent
// cgroup configuration. It returns an error if the resources are invalid.
func (e *UniversalExecutor) configureCgroups(resources *structs.Resources) error {
e.resConCtx.groups = &cgroupConfig.Cgroup{}
e.resConCtx.groups.Resources = &cgroupConfig.Resources{}
cgroupName := structs.GenerateUUID()
e.resConCtx.groups.Path = filepath.Join("/nomad", cgroupName)
// TODO: verify this is needed for things like network access
e.resConCtx.groups.Resources.AllowAllDevices = true
if resources.MemoryMB > 0 {
// Total amount of memory allowed to consume
e.resConCtx.groups.Resources.Memory = int64(resources.MemoryMB * 1024 * 1024)
// Disable swap to avoid issues on the machine
e.resConCtx.groups.Resources.MemorySwap = int64(-1)
}
if resources.CPU < 2 {
return fmt.Errorf("resources.CPU must be equal to or greater than 2: %v", resources.CPU)
}
// Set the relative CPU shares for this cgroup.
e.resConCtx.groups.Resources.CpuShares = int64(resources.CPU)
if resources.IOPS != 0 {
// Validate it is in an acceptable range.
if resources.IOPS < 10 || resources.IOPS > 1000 {
return fmt.Errorf("resources.IOPS must be between 10 and 1000: %d", resources.IOPS)
}
e.resConCtx.groups.Resources.BlkioWeight = uint16(resources.IOPS)
}
return nil
}
// Stats reports the resource utilization of the cgroup. If there is no resource
// isolation we aggregate the resource utilization of all the pids launched by
// the executor.
func (e *UniversalExecutor) Stats() (*cstructs.TaskResourceUsage, error) {
if !e.command.ResourceLimits {
pidStats, err := e.pidStats()
if err != nil {
return nil, err
}
return e.aggregatedResourceUsage(pidStats), nil
}
ts := time.Now()
manager := getCgroupManager(e.resConCtx.groups, e.resConCtx.cgPaths)
stats, err := manager.GetStats()
if err != nil {
return nil, err
}
// Memory Related Stats
swap := stats.MemoryStats.SwapUsage
maxUsage := stats.MemoryStats.Usage.MaxUsage
rss := stats.MemoryStats.Stats["rss"]
cache := stats.MemoryStats.Stats["cache"]
ms := &cstructs.MemoryStats{
RSS: rss,
Cache: cache,
Swap: swap.Usage,
MaxUsage: maxUsage,
KernelUsage: stats.MemoryStats.KernelUsage.Usage,
KernelMaxUsage: stats.MemoryStats.KernelUsage.MaxUsage,
Measured: ExecutorCgroupMeasuredMemStats,
}
// CPU Related Stats
totalProcessCPUUsage := float64(stats.CpuStats.CpuUsage.TotalUsage)
userModeTime := float64(stats.CpuStats.CpuUsage.UsageInUsermode)
kernelModeTime := float64(stats.CpuStats.CpuUsage.UsageInKernelmode)
totalPercent := e.totalCpuStats.Percent(totalProcessCPUUsage)
cs := &cstructs.CpuStats{
SystemMode: e.systemCpuStats.Percent(kernelModeTime),
UserMode: e.userCpuStats.Percent(userModeTime),
Percent: totalPercent,
ThrottledPeriods: stats.CpuStats.ThrottlingData.ThrottledPeriods,
ThrottledTime: stats.CpuStats.ThrottlingData.ThrottledTime,
TotalTicks: e.systemCpuStats.TicksConsumed(totalPercent),
Measured: ExecutorCgroupMeasuredCpuStats,
}
taskResUsage := cstructs.TaskResourceUsage{
ResourceUsage: &cstructs.ResourceUsage{
MemoryStats: ms,
CpuStats: cs,
},
Timestamp: ts.UTC().UnixNano(),
}
if pidStats, err := e.pidStats(); err == nil {
taskResUsage.Pids = pidStats
}
return &taskResUsage, nil
}
// runAs takes a user id as a string and looks up the user, and sets the command
// to execute as that user.
func (e *UniversalExecutor) runAs(userid string) error {
u, err := user.Lookup(userid)
if err != nil {
return fmt.Errorf("Failed to identify user %v: %v", userid, err)
}
// Convert the uid and gid
uid, err := strconv.ParseUint(u.Uid, 10, 32)
if err != nil {
return fmt.Errorf("Unable to convert userid to uint32: %s", err)
}
gid, err := strconv.ParseUint(u.Gid, 10, 32)
if err != nil {
return fmt.Errorf("Unable to convert groupid to uint32: %s", err)
}
// Set the command to run as that user and group.
if e.cmd.SysProcAttr == nil {
e.cmd.SysProcAttr = &syscall.SysProcAttr{}
}
if e.cmd.SysProcAttr.Credential == nil {
e.cmd.SysProcAttr.Credential = &syscall.Credential{}
}
e.cmd.SysProcAttr.Credential.Uid = uint32(uid)
e.cmd.SysProcAttr.Credential.Gid = uint32(gid)
return nil
}
// configureChroot configures a chroot
func (e *UniversalExecutor) configureChroot() error {
allocDir := e.ctx.AllocDir
if err := allocDir.MountSharedDir(e.ctx.Task.Name); err != nil {
return err
}
chroot := chrootEnv
if len(e.ctx.ChrootEnv) > 0 {
chroot = e.ctx.ChrootEnv
}
if err := allocDir.Embed(e.ctx.Task.Name, chroot); err != nil {
return err
}
// Set the tasks AllocDir environment variable.
e.ctx.TaskEnv.
SetAllocDir(filepath.Join("/", allocdir.SharedAllocName)).
SetTaskLocalDir(filepath.Join("/", allocdir.TaskLocal)).
Build()
if e.cmd.SysProcAttr == nil {
e.cmd.SysProcAttr = &syscall.SysProcAttr{}
}
e.cmd.SysProcAttr.Chroot = e.taskDir
e.cmd.Dir = "/"
if err := allocDir.MountSpecialDirs(e.taskDir); err != nil {
return err
}
e.fsIsolationEnforced = true
return nil
}
// cleanTaskDir is an idempotent operation to clean the task directory and
// should be called when tearing down the task.
func (e *UniversalExecutor) removeChrootMounts() error {
// Prevent a race between Wait/ForceStop
e.resConCtx.cgLock.Lock()
defer e.resConCtx.cgLock.Unlock()
return e.ctx.AllocDir.UnmountAll()
}
// getAllPids returns the pids of all the processes spun up by the executor. We
// use the libcontainer apis to get the pids when the user is using cgroup
// isolation and we scan the entire process table if the user is not using any
// isolation
func (e *UniversalExecutor) getAllPids() (map[int]*nomadPid, error) {
if e.command.ResourceLimits {
manager := getCgroupManager(e.resConCtx.groups, e.resConCtx.cgPaths)
pids, err := manager.GetAllPids()
if err != nil {
return nil, err
}
np := make(map[int]*nomadPid, len(pids))
for _, pid := range pids {
np[pid] = &nomadPid{
pid: pid,
cpuStatsTotal: stats.NewCpuStats(),
cpuStatsSys: stats.NewCpuStats(),
cpuStatsUser: stats.NewCpuStats(),
}
}
return np, nil
}
allProcesses, err := ps.Processes()
if err != nil {
return nil, err
}
return e.scanPids(os.Getpid(), allProcesses)
}
// destroyCgroup kills all processes in the cgroup and removes the cgroup
// configuration from the host. This function is idempotent.
func DestroyCgroup(groups *cgroupConfig.Cgroup, cgPaths map[string]string, executorPid int) error {
mErrs := new(multierror.Error)
if groups == nil {
return fmt.Errorf("Can't destroy: cgroup configuration empty")
}
// Move the executor into the global cgroup so that the task specific
// cgroup can be destroyed.
nilGroup := &cgroupConfig.Cgroup{}
nilGroup.Path = "/"
nilGroup.Resources = groups.Resources
nilManager := getCgroupManager(nilGroup, nil)
err := nilManager.Apply(executorPid)
if err != nil && !strings.Contains(err.Error(), "no such process") {
return fmt.Errorf("failed to remove executor pid %d: %v", executorPid, err)
}
// Freeze the Cgroup so that it can not continue to fork/exec.
manager := getCgroupManager(groups, cgPaths)
err = manager.Freeze(cgroupConfig.Frozen)
if err != nil && !strings.Contains(err.Error(), "no such file or directory") {
return fmt.Errorf("failed to freeze cgroup: %v", err)
}
var procs []*os.Process
pids, err := manager.GetAllPids()
if err != nil {
multierror.Append(mErrs, fmt.Errorf("error getting pids: %v", err))
// Unfreeze the cgroup.
err = manager.Freeze(cgroupConfig.Thawed)
if err != nil && !strings.Contains(err.Error(), "no such file or directory") {
multierror.Append(mErrs, fmt.Errorf("failed to unfreeze cgroup: %v", err))
}
return mErrs.ErrorOrNil()
}
// Kill the processes in the cgroup
for _, pid := range pids {
proc, err := os.FindProcess(pid)
if err != nil {
multierror.Append(mErrs, fmt.Errorf("error finding process %v: %v", pid, err))
continue
}
procs = append(procs, proc)
if e := proc.Kill(); e != nil {
multierror.Append(mErrs, fmt.Errorf("error killing process %v: %v", pid, e))
}
}
// Unfreeze the cgroug so we can wait.
err = manager.Freeze(cgroupConfig.Thawed)
if err != nil && !strings.Contains(err.Error(), "no such file or directory") {
multierror.Append(mErrs, fmt.Errorf("failed to unfreeze cgroup: %v", err))
}
// Wait on the killed processes to ensure they are cleaned up.
for _, proc := range procs {
// Don't capture the error because we expect this to fail for
// processes we didn't fork.
proc.Wait()
}
// Remove the cgroup.
if err := manager.Destroy(); err != nil {
multierror.Append(mErrs, fmt.Errorf("failed to delete the cgroup directories: %v", err))
}
return mErrs.ErrorOrNil()
}
// getCgroupManager returns the correct libcontainer cgroup manager.
func getCgroupManager(groups *cgroupConfig.Cgroup, paths map[string]string) cgroups.Manager {
return &cgroupFs.Manager{Cgroups: groups, Paths: paths}
}

View File

@ -1,50 +0,0 @@
// +build darwin dragonfly freebsd linux netbsd openbsd solaris
package executor
import (
"fmt"
"io"
"log/syslog"
"github.com/hashicorp/nomad/client/driver/logging"
)
func (e *UniversalExecutor) LaunchSyslogServer(ctx *ExecutorContext) (*SyslogServerState, error) {
e.ctx = ctx
// configuring the task dir
if err := e.configureTaskDir(); err != nil {
return nil, err
}
e.syslogChan = make(chan *logging.SyslogMessage, 2048)
l, err := e.getListener(e.ctx.PortLowerBound, e.ctx.PortUpperBound)
if err != nil {
return nil, err
}
e.logger.Printf("[DEBUG] sylog-server: launching syslog server on addr: %v", l.Addr().String())
if err := e.configureLoggers(); err != nil {
return nil, err
}
e.syslogServer = logging.NewSyslogServer(l, e.syslogChan, e.logger)
go e.syslogServer.Start()
go e.collectLogs(e.lre, e.lro)
syslogAddr := fmt.Sprintf("%s://%s", l.Addr().Network(), l.Addr().String())
return &SyslogServerState{Addr: syslogAddr}, nil
}
func (e *UniversalExecutor) collectLogs(we io.Writer, wo io.Writer) {
for logParts := range e.syslogChan {
// If the severity of the log line is err then we write to stderr
// otherwise all messages go to stdout
if logParts.Severity == syslog.LOG_ERR {
e.lre.Write(logParts.Message)
e.lre.Write([]byte{'\n'})
} else {
e.lro.Write(logParts.Message)
e.lro.Write([]byte{'\n'})
}
}
}

View File

@ -1,5 +0,0 @@
package executor
func (e *UniversalExecutor) LaunchSyslogServer(ctx *ExecutorContext) (*SyslogServerState, error) {
return nil, nil
}

View File

@ -1,24 +0,0 @@
// +build darwin dragonfly freebsd netbsd openbsd solaris windows
package executor
import (
dstructs "github.com/hashicorp/nomad/client/driver/structs"
)
// resourceContainerContext is a platform-specific struct for managing a
// resource container.
type resourceContainerContext struct {
}
func clientCleanup(ic *dstructs.IsolationConfig, pid int) error {
return nil
}
func (rc *resourceContainerContext) executorCleanup() error {
return nil
}
func (rc *resourceContainerContext) getIsolationConfig() *dstructs.IsolationConfig {
return nil
}

View File

@ -1,42 +0,0 @@
package executor
import (
"os"
"sync"
dstructs "github.com/hashicorp/nomad/client/driver/structs"
cgroupConfig "github.com/opencontainers/runc/libcontainer/configs"
)
// resourceContainerContext is a platform-specific struct for managing a
// resource container. In the case of Linux, this is used to control Cgroups.
type resourceContainerContext struct {
groups *cgroupConfig.Cgroup
cgPaths map[string]string
cgLock sync.Mutex
}
// clientCleanup remoevs this host's Cgroup from the Nomad Client's context
func clientCleanup(ic *dstructs.IsolationConfig, pid int) error {
if err := DestroyCgroup(ic.Cgroup, ic.CgroupPaths, pid); err != nil {
return err
}
return nil
}
// cleanup removes this host's Cgroup from within an Executor's context
func (rc *resourceContainerContext) executorCleanup() error {
rc.cgLock.Lock()
defer rc.cgLock.Unlock()
if err := DestroyCgroup(rc.groups, rc.cgPaths, os.Getpid()); err != nil {
return err
}
return nil
}
func (rc *resourceContainerContext) getIsolationConfig() *dstructs.IsolationConfig {
return &dstructs.IsolationConfig{
Cgroup: rc.groups,
CgroupPaths: rc.cgPaths,
}
}

View File

@ -1,181 +0,0 @@
package driver
import (
"encoding/gob"
"log"
"net/rpc"
"github.com/hashicorp/go-plugin"
"github.com/hashicorp/nomad/client/driver/executor"
cstructs "github.com/hashicorp/nomad/client/structs"
"github.com/hashicorp/nomad/nomad/structs"
)
// Registering these types since we have to serialize and de-serialize the Task
// structs over the wire between drivers and the executor.
func init() {
gob.Register([]interface{}{})
gob.Register(map[string]interface{}{})
gob.Register([]map[string]string{})
gob.Register([]map[string]int{})
}
type ExecutorRPC struct {
client *rpc.Client
logger *log.Logger
}
// LaunchCmdArgs wraps a user command and the args for the purposes of RPC
type LaunchCmdArgs struct {
Cmd *executor.ExecCommand
Ctx *executor.ExecutorContext
}
// LaunchSyslogServerArgs wraps the executor context for the purposes of RPC
type LaunchSyslogServerArgs struct {
Ctx *executor.ExecutorContext
}
// SyncServicesArgs wraps the consul context for the purposes of RPC
type SyncServicesArgs struct {
Ctx *executor.ConsulContext
}
func (e *ExecutorRPC) LaunchCmd(cmd *executor.ExecCommand, ctx *executor.ExecutorContext) (*executor.ProcessState, error) {
var ps *executor.ProcessState
err := e.client.Call("Plugin.LaunchCmd", LaunchCmdArgs{Cmd: cmd, Ctx: ctx}, &ps)
return ps, err
}
func (e *ExecutorRPC) LaunchSyslogServer(ctx *executor.ExecutorContext) (*executor.SyslogServerState, error) {
var ss *executor.SyslogServerState
err := e.client.Call("Plugin.LaunchSyslogServer", LaunchSyslogServerArgs{Ctx: ctx}, &ss)
return ss, err
}
func (e *ExecutorRPC) Wait() (*executor.ProcessState, error) {
var ps executor.ProcessState
err := e.client.Call("Plugin.Wait", new(interface{}), &ps)
return &ps, err
}
func (e *ExecutorRPC) ShutDown() error {
return e.client.Call("Plugin.ShutDown", new(interface{}), new(interface{}))
}
func (e *ExecutorRPC) Exit() error {
return e.client.Call("Plugin.Exit", new(interface{}), new(interface{}))
}
func (e *ExecutorRPC) UpdateLogConfig(logConfig *structs.LogConfig) error {
return e.client.Call("Plugin.UpdateLogConfig", logConfig, new(interface{}))
}
func (e *ExecutorRPC) UpdateTask(task *structs.Task) error {
return e.client.Call("Plugin.UpdateTask", task, new(interface{}))
}
func (e *ExecutorRPC) SyncServices(ctx *executor.ConsulContext) error {
return e.client.Call("Plugin.SyncServices", SyncServicesArgs{Ctx: ctx}, new(interface{}))
}
func (e *ExecutorRPC) DeregisterServices() error {
return e.client.Call("Plugin.DeregisterServices", new(interface{}), new(interface{}))
}
func (e *ExecutorRPC) Version() (*executor.ExecutorVersion, error) {
var version executor.ExecutorVersion
err := e.client.Call("Plugin.Version", new(interface{}), &version)
return &version, err
}
func (e *ExecutorRPC) Stats() (*cstructs.TaskResourceUsage, error) {
var resourceUsage cstructs.TaskResourceUsage
err := e.client.Call("Plugin.Stats", new(interface{}), &resourceUsage)
return &resourceUsage, err
}
type ExecutorRPCServer struct {
Impl executor.Executor
logger *log.Logger
}
func (e *ExecutorRPCServer) LaunchCmd(args LaunchCmdArgs, ps *executor.ProcessState) error {
state, err := e.Impl.LaunchCmd(args.Cmd, args.Ctx)
if state != nil {
*ps = *state
}
return err
}
func (e *ExecutorRPCServer) LaunchSyslogServer(args LaunchSyslogServerArgs, ss *executor.SyslogServerState) error {
state, err := e.Impl.LaunchSyslogServer(args.Ctx)
if state != nil {
*ss = *state
}
return err
}
func (e *ExecutorRPCServer) Wait(args interface{}, ps *executor.ProcessState) error {
state, err := e.Impl.Wait()
if state != nil {
*ps = *state
}
return err
}
func (e *ExecutorRPCServer) ShutDown(args interface{}, resp *interface{}) error {
return e.Impl.ShutDown()
}
func (e *ExecutorRPCServer) Exit(args interface{}, resp *interface{}) error {
return e.Impl.Exit()
}
func (e *ExecutorRPCServer) UpdateLogConfig(args *structs.LogConfig, resp *interface{}) error {
return e.Impl.UpdateLogConfig(args)
}
func (e *ExecutorRPCServer) UpdateTask(args *structs.Task, resp *interface{}) error {
return e.Impl.UpdateTask(args)
}
func (e *ExecutorRPCServer) SyncServices(args SyncServicesArgs, resp *interface{}) error {
return e.Impl.SyncServices(args.Ctx)
}
func (e *ExecutorRPCServer) DeregisterServices(args interface{}, resp *interface{}) error {
return e.Impl.DeregisterServices()
}
func (e *ExecutorRPCServer) Version(args interface{}, version *executor.ExecutorVersion) error {
ver, err := e.Impl.Version()
if ver != nil {
*version = *ver
}
return err
}
func (e *ExecutorRPCServer) Stats(args interface{}, resourceUsage *cstructs.TaskResourceUsage) error {
ru, err := e.Impl.Stats()
if ru != nil {
*resourceUsage = *ru
}
return err
}
type ExecutorPlugin struct {
logger *log.Logger
Impl *ExecutorRPCServer
}
func (p *ExecutorPlugin) Server(*plugin.MuxBroker) (interface{}, error) {
if p.Impl == nil {
p.Impl = &ExecutorRPCServer{Impl: executor.NewExecutor(p.logger), logger: p.logger}
}
return p.Impl, nil
}
func (p *ExecutorPlugin) Client(b *plugin.MuxBroker, c *rpc.Client) (interface{}, error) {
return &ExecutorRPC{client: c, logger: p.logger}, nil
}

View File

@ -1,416 +0,0 @@
package driver
import (
"bytes"
"encoding/json"
"fmt"
"log"
"os/exec"
"path/filepath"
"runtime"
"strings"
"syscall"
"time"
"github.com/hashicorp/go-multierror"
"github.com/hashicorp/go-plugin"
"github.com/mitchellh/mapstructure"
"github.com/hashicorp/nomad/client/allocdir"
"github.com/hashicorp/nomad/client/config"
"github.com/hashicorp/nomad/client/driver/executor"
dstructs "github.com/hashicorp/nomad/client/driver/structs"
"github.com/hashicorp/nomad/client/fingerprint"
cstructs "github.com/hashicorp/nomad/client/structs"
"github.com/hashicorp/nomad/helper/discover"
"github.com/hashicorp/nomad/helper/fields"
"github.com/hashicorp/nomad/nomad/structs"
)
const (
// The key populated in Node Attributes to indicate presence of the Java
// driver
javaDriverAttr = "driver.java"
)
// JavaDriver is a simple driver to execute applications packaged in Jars.
// It literally just fork/execs tasks with the java command.
type JavaDriver struct {
DriverContext
fingerprint.StaticFingerprinter
}
type JavaDriverConfig struct {
JarPath string `mapstructure:"jar_path"`
JvmOpts []string `mapstructure:"jvm_options"`
Args []string `mapstructure:"args"`
}
// javaHandle is returned from Start/Open as a handle to the PID
type javaHandle struct {
pluginClient *plugin.Client
userPid int
executor executor.Executor
isolationConfig *dstructs.IsolationConfig
taskDir string
allocDir *allocdir.AllocDir
killTimeout time.Duration
maxKillTimeout time.Duration
version string
logger *log.Logger
waitCh chan *dstructs.WaitResult
doneCh chan struct{}
}
// NewJavaDriver is used to create a new exec driver
func NewJavaDriver(ctx *DriverContext) Driver {
return &JavaDriver{DriverContext: *ctx}
}
// Validate is used to validate the driver configuration
func (d *JavaDriver) Validate(config map[string]interface{}) error {
fd := &fields.FieldData{
Raw: config,
Schema: map[string]*fields.FieldSchema{
"jar_path": &fields.FieldSchema{
Type: fields.TypeString,
Required: true,
},
"jvm_options": &fields.FieldSchema{
Type: fields.TypeArray,
},
"args": &fields.FieldSchema{
Type: fields.TypeArray,
},
},
}
if err := fd.Validate(); err != nil {
return err
}
return nil
}
func (d *JavaDriver) Fingerprint(cfg *config.Config, node *structs.Node) (bool, error) {
// Get the current status so that we can log any debug messages only if the
// state changes
_, currentlyEnabled := node.Attributes[javaDriverAttr]
// Only enable if we are root and cgroups are mounted when running on linux systems.
if runtime.GOOS == "linux" && (syscall.Geteuid() != 0 || !d.cgroupsMounted(node)) {
if currentlyEnabled {
d.logger.Printf("[DEBUG] driver.java: root priviledges and mounted cgroups required on linux, disabling")
}
delete(node.Attributes, "driver.java")
return false, nil
}
// Find java version
var out bytes.Buffer
var erOut bytes.Buffer
cmd := exec.Command("java", "-version")
cmd.Stdout = &out
cmd.Stderr = &erOut
err := cmd.Run()
if err != nil {
// assume Java wasn't found
delete(node.Attributes, javaDriverAttr)
return false, nil
}
// 'java -version' returns output on Stderr typically.
// Check stdout, but it's probably empty
var infoString string
if out.String() != "" {
infoString = out.String()
}
if erOut.String() != "" {
infoString = erOut.String()
}
if infoString == "" {
if currentlyEnabled {
d.logger.Println("[WARN] driver.java: error parsing Java version information, aborting")
}
delete(node.Attributes, javaDriverAttr)
return false, nil
}
// Assume 'java -version' returns 3 lines:
// java version "1.6.0_36"
// OpenJDK Runtime Environment (IcedTea6 1.13.8) (6b36-1.13.8-0ubuntu1~12.04)
// OpenJDK 64-Bit Server VM (build 23.25-b01, mixed mode)
// Each line is terminated by \n
info := strings.Split(infoString, "\n")
versionString := info[0]
versionString = strings.TrimPrefix(versionString, "java version ")
versionString = strings.Trim(versionString, "\"")
node.Attributes[javaDriverAttr] = "1"
node.Attributes["driver.java.version"] = versionString
node.Attributes["driver.java.runtime"] = info[1]
node.Attributes["driver.java.vm"] = info[2]
return true, nil
}
func (d *JavaDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle, error) {
var driverConfig JavaDriverConfig
if err := mapstructure.WeakDecode(task.Config, &driverConfig); err != nil {
return nil, err
}
// Set the host environment variables.
filter := strings.Split(d.config.ReadDefault("env.blacklist", config.DefaultEnvBlacklist), ",")
d.taskEnv.AppendHostEnvvars(filter)
taskDir, ok := ctx.AllocDir.TaskDirs[d.DriverContext.taskName]
if !ok {
return nil, fmt.Errorf("Could not find task directory for task: %v", d.DriverContext.taskName)
}
if driverConfig.JarPath == "" {
return nil, fmt.Errorf("jar_path must be specified")
}
args := []string{}
// Look for jvm options
if len(driverConfig.JvmOpts) != 0 {
d.logger.Printf("[DEBUG] driver.java: found JVM options: %s", driverConfig.JvmOpts)
args = append(args, driverConfig.JvmOpts...)
}
// Build the argument list.
args = append(args, "-jar", driverConfig.JarPath)
if len(driverConfig.Args) != 0 {
args = append(args, driverConfig.Args...)
}
bin, err := discover.NomadExecutable()
if err != nil {
return nil, fmt.Errorf("unable to find the nomad binary: %v", err)
}
pluginLogFile := filepath.Join(taskDir, fmt.Sprintf("%s-executor.out", task.Name))
pluginConfig := &plugin.ClientConfig{
Cmd: exec.Command(bin, "executor", pluginLogFile),
}
execIntf, pluginClient, err := createExecutor(pluginConfig, d.config.LogOutput, d.config)
if err != nil {
return nil, err
}
executorCtx := &executor.ExecutorContext{
TaskEnv: d.taskEnv,
Driver: "java",
AllocDir: ctx.AllocDir,
AllocID: ctx.AllocID,
ChrootEnv: d.config.ChrootEnv,
Task: task,
}
absPath, err := GetAbsolutePath("java")
if err != nil {
return nil, err
}
ps, err := execIntf.LaunchCmd(&executor.ExecCommand{
Cmd: absPath,
Args: args,
FSIsolation: true,
ResourceLimits: true,
User: getExecutorUser(task),
}, executorCtx)
if err != nil {
pluginClient.Kill()
return nil, err
}
d.logger.Printf("[DEBUG] driver.java: started process with pid: %v", ps.Pid)
// Return a driver handle
maxKill := d.DriverContext.config.MaxKillTimeout
h := &javaHandle{
pluginClient: pluginClient,
executor: execIntf,
userPid: ps.Pid,
isolationConfig: ps.IsolationConfig,
taskDir: taskDir,
allocDir: ctx.AllocDir,
killTimeout: GetKillTimeout(task.KillTimeout, maxKill),
maxKillTimeout: maxKill,
version: d.config.Version,
logger: d.logger,
doneCh: make(chan struct{}),
waitCh: make(chan *dstructs.WaitResult, 1),
}
if err := h.executor.SyncServices(consulContext(d.config, "")); err != nil {
d.logger.Printf("[ERR] driver.java: error registering services with consul for task: %q: %v", task.Name, err)
}
go h.run()
return h, nil
}
// cgroupsMounted returns true if the cgroups are mounted on a system otherwise
// returns false
func (d *JavaDriver) cgroupsMounted(node *structs.Node) bool {
_, ok := node.Attributes["unique.cgroup.mountpoint"]
return ok
}
type javaId struct {
Version string
KillTimeout time.Duration
MaxKillTimeout time.Duration
PluginConfig *PluginReattachConfig
IsolationConfig *dstructs.IsolationConfig
TaskDir string
AllocDir *allocdir.AllocDir
UserPid int
}
func (d *JavaDriver) Open(ctx *ExecContext, handleID string) (DriverHandle, error) {
id := &javaId{}
if err := json.Unmarshal([]byte(handleID), id); err != nil {
return nil, fmt.Errorf("Failed to parse handle '%s': %v", handleID, err)
}
pluginConfig := &plugin.ClientConfig{
Reattach: id.PluginConfig.PluginConfig(),
}
exec, pluginClient, err := createExecutor(pluginConfig, d.config.LogOutput, d.config)
if err != nil {
merrs := new(multierror.Error)
merrs.Errors = append(merrs.Errors, err)
d.logger.Println("[ERR] driver.java: error connecting to plugin so destroying plugin pid and user pid")
if e := destroyPlugin(id.PluginConfig.Pid, id.UserPid); e != nil {
merrs.Errors = append(merrs.Errors, fmt.Errorf("error destroying plugin and userpid: %v", e))
}
if id.IsolationConfig != nil {
ePid := pluginConfig.Reattach.Pid
if e := executor.ClientCleanup(id.IsolationConfig, ePid); e != nil {
merrs.Errors = append(merrs.Errors, fmt.Errorf("destroying resource container failed: %v", e))
}
}
if e := ctx.AllocDir.UnmountAll(); e != nil {
merrs.Errors = append(merrs.Errors, e)
}
return nil, fmt.Errorf("error connecting to plugin: %v", merrs.ErrorOrNil())
}
ver, _ := exec.Version()
d.logger.Printf("[DEBUG] driver.java: version of executor: %v", ver.Version)
// Return a driver handle
h := &javaHandle{
pluginClient: pluginClient,
executor: exec,
userPid: id.UserPid,
isolationConfig: id.IsolationConfig,
taskDir: id.TaskDir,
allocDir: id.AllocDir,
logger: d.logger,
version: id.Version,
killTimeout: id.KillTimeout,
maxKillTimeout: id.MaxKillTimeout,
doneCh: make(chan struct{}),
waitCh: make(chan *dstructs.WaitResult, 1),
}
if err := h.executor.SyncServices(consulContext(d.config, "")); err != nil {
d.logger.Printf("[ERR] driver.java: error registering services with consul: %v", err)
}
go h.run()
return h, nil
}
func (h *javaHandle) ID() string {
id := javaId{
Version: h.version,
KillTimeout: h.killTimeout,
MaxKillTimeout: h.maxKillTimeout,
PluginConfig: NewPluginReattachConfig(h.pluginClient.ReattachConfig()),
UserPid: h.userPid,
TaskDir: h.taskDir,
AllocDir: h.allocDir,
IsolationConfig: h.isolationConfig,
}
data, err := json.Marshal(id)
if err != nil {
h.logger.Printf("[ERR] driver.java: failed to marshal ID to JSON: %s", err)
}
return string(data)
}
func (h *javaHandle) WaitCh() chan *dstructs.WaitResult {
return h.waitCh
}
func (h *javaHandle) Update(task *structs.Task) error {
// Store the updated kill timeout.
h.killTimeout = GetKillTimeout(task.KillTimeout, h.maxKillTimeout)
h.executor.UpdateTask(task)
// Update is not possible
return nil
}
func (h *javaHandle) Kill() error {
if err := h.executor.ShutDown(); err != nil {
if h.pluginClient.Exited() {
return nil
}
return fmt.Errorf("executor Shutdown failed: %v", err)
}
select {
case <-h.doneCh:
return nil
case <-time.After(h.killTimeout):
if h.pluginClient.Exited() {
return nil
}
if err := h.executor.Exit(); err != nil {
return fmt.Errorf("executor Exit failed: %v", err)
}
return nil
}
}
func (h *javaHandle) Stats() (*cstructs.TaskResourceUsage, error) {
return h.executor.Stats()
}
func (h *javaHandle) run() {
ps, err := h.executor.Wait()
close(h.doneCh)
if ps.ExitCode == 0 && err != nil {
if h.isolationConfig != nil {
ePid := h.pluginClient.ReattachConfig().Pid
if e := executor.ClientCleanup(h.isolationConfig, ePid); e != nil {
h.logger.Printf("[ERR] driver.java: destroying resource container failed: %v", e)
}
} else {
if e := killProcess(h.userPid); e != nil {
h.logger.Printf("[ERR] driver.java: error killing user process: %v", e)
}
}
if e := h.allocDir.UnmountAll(); e != nil {
h.logger.Printf("[ERR] driver.java: unmounting dev,proc and alloc dirs failed: %v", e)
}
}
h.waitCh <- &dstructs.WaitResult{ExitCode: ps.ExitCode, Signal: ps.Signal, Err: err}
close(h.waitCh)
// Remove services
if err := h.executor.DeregisterServices(); err != nil {
h.logger.Printf("[ERR] driver.java: failed to kill the deregister services: %v", err)
}
h.executor.Exit()
h.pluginClient.Kill()
}

View File

@ -1,71 +0,0 @@
package logging
import (
"log"
"github.com/hashicorp/nomad/client/allocdir"
cstructs "github.com/hashicorp/nomad/client/driver/structs"
"github.com/hashicorp/nomad/nomad/structs"
)
// LogCollectorContext holds context to configure the syslog server
type LogCollectorContext struct {
// TaskName is the name of the Task
TaskName string
// AllocDir is the handle to do operations on the alloc dir of
// the task
AllocDir *allocdir.AllocDir
// LogConfig provides configuration related to log rotation
LogConfig *structs.LogConfig
// PortUpperBound is the upper bound of the ports that we can use to start
// the syslog server
PortUpperBound uint
// PortLowerBound is the lower bound of the ports that we can use to start
// the syslog server
PortLowerBound uint
}
// SyslogCollectorState holds the address and islation information of a launched
// syslog server
type SyslogCollectorState struct {
IsolationConfig *cstructs.IsolationConfig
Addr string
}
// LogCollector is an interface which allows a driver to launch a log server
// and update log configuration
type LogCollector interface {
LaunchCollector(ctx *LogCollectorContext) (*SyslogCollectorState, error)
Exit() error
UpdateLogConfig(logConfig *structs.LogConfig) error
}
// SyslogCollector is a LogCollector which starts a syslog server and does
// rotation to incoming stream
type SyslogCollector struct {
}
// NewSyslogCollector returns an implementation of the SyslogCollector
func NewSyslogCollector(logger *log.Logger) *SyslogCollector {
return &SyslogCollector{}
}
// LaunchCollector launches a new syslog server and starts writing log lines to
// files and rotates them
func (s *SyslogCollector) LaunchCollector(ctx *LogCollectorContext) (*SyslogCollectorState, error) {
return nil, nil
}
// Exit kills the syslog server
func (s *SyslogCollector) Exit() error {
return nil
}
// UpdateLogConfig updates the log configuration
func (s *SyslogCollector) UpdateLogConfig(logConfig *structs.LogConfig) error {
return nil
}

View File

@ -1,285 +0,0 @@
package logging
import (
"bufio"
"fmt"
"io/ioutil"
"log"
"os"
"path/filepath"
"sort"
"strconv"
"strings"
"sync"
"time"
)
const (
bufSize = 32768
flushDur = 100 * time.Millisecond
)
// FileRotator writes bytes to a rotated set of files
type FileRotator struct {
MaxFiles int // MaxFiles is the maximum number of rotated files allowed in a path
FileSize int64 // FileSize is the size a rotated file is allowed to grow
path string // path is the path on the file system where the rotated set of files are opened
baseFileName string // baseFileName is the base file name of the rotated files
logFileIdx int // logFileIdx is the current index of the rotated files
oldestLogFileIdx int // oldestLogFileIdx is the index of the oldest log file in a path
currentFile *os.File // currentFile is the file that is currently getting written
currentWr int64 // currentWr is the number of bytes written to the current file
bufw *bufio.Writer
bufLock sync.Mutex
flushTicker *time.Ticker
logger *log.Logger
purgeCh chan struct{}
doneCh chan struct{}
closed bool
closedLock sync.Mutex
}
// NewFileRotator returns a new file rotator
func NewFileRotator(path string, baseFile string, maxFiles int,
fileSize int64, logger *log.Logger) (*FileRotator, error) {
rotator := &FileRotator{
MaxFiles: maxFiles,
FileSize: fileSize,
path: path,
baseFileName: baseFile,
flushTicker: time.NewTicker(flushDur),
logger: logger,
purgeCh: make(chan struct{}, 1),
doneCh: make(chan struct{}, 1),
}
if err := rotator.lastFile(); err != nil {
return nil, err
}
go rotator.purgeOldFiles()
go rotator.flushPeriodically()
return rotator, nil
}
// Write writes a byte array to a file and rotates the file if it's size becomes
// equal to the maximum size the user has defined.
func (f *FileRotator) Write(p []byte) (n int, err error) {
n = 0
var nw int
for n < len(p) {
// Check if we still have space in the current file, otherwise close and
// open the next file
if f.currentWr >= f.FileSize {
f.flushBuffer()
f.currentFile.Close()
if err := f.nextFile(); err != nil {
f.logger.Printf("[ERROR] driver.rotator: error creating next file: %v", err)
return 0, err
}
}
// Calculate the remaining size on this file
remainingSize := f.FileSize - f.currentWr
// Check if the number of bytes that we have to write is less than the
// remaining size of the file
if remainingSize < int64(len(p[n:])) {
// Write the number of bytes that we can write on the current file
li := int64(n) + remainingSize
nw, err = f.writeToBuffer(p[n:li])
} else {
// Write all the bytes in the current file
nw, err = f.writeToBuffer(p[n:])
}
// Increment the number of bytes written so far in this method
// invocation
n += nw
// Increment the total number of bytes in the file
f.currentWr += int64(n)
if err != nil {
f.logger.Printf("[ERROR] driver.rotator: error writing to file: %v", err)
return
}
}
return
}
// nextFile opens the next file and purges older files if the number of rotated
// files is larger than the maximum files configured by the user
func (f *FileRotator) nextFile() error {
nextFileIdx := f.logFileIdx
for {
nextFileIdx += 1
logFileName := filepath.Join(f.path, fmt.Sprintf("%s.%d", f.baseFileName, nextFileIdx))
if fi, err := os.Stat(logFileName); err == nil {
if fi.IsDir() || fi.Size() >= f.FileSize {
continue
}
}
f.logFileIdx = nextFileIdx
if err := f.createFile(); err != nil {
return err
}
break
}
// Purge old files if we have more files than MaxFiles
f.closedLock.Lock()
defer f.closedLock.Unlock()
if f.logFileIdx-f.oldestLogFileIdx >= f.MaxFiles && !f.closed {
select {
case f.purgeCh <- struct{}{}:
default:
}
}
return nil
}
// lastFile finds out the rotated file with the largest index in a path.
func (f *FileRotator) lastFile() error {
finfos, err := ioutil.ReadDir(f.path)
if err != nil {
return err
}
prefix := fmt.Sprintf("%s.", f.baseFileName)
for _, fi := range finfos {
if fi.IsDir() {
continue
}
if strings.HasPrefix(fi.Name(), prefix) {
fileIdx := strings.TrimPrefix(fi.Name(), prefix)
n, err := strconv.Atoi(fileIdx)
if err != nil {
continue
}
if n > f.logFileIdx {
f.logFileIdx = n
}
}
}
if err := f.createFile(); err != nil {
return err
}
return nil
}
// createFile opens a new or existing file for writing
func (f *FileRotator) createFile() error {
logFileName := filepath.Join(f.path, fmt.Sprintf("%s.%d", f.baseFileName, f.logFileIdx))
cFile, err := os.OpenFile(logFileName, os.O_RDWR|os.O_CREATE|os.O_APPEND, 0666)
if err != nil {
return err
}
f.currentFile = cFile
fi, err := f.currentFile.Stat()
if err != nil {
return err
}
f.currentWr = fi.Size()
f.createOrResetBuffer()
return nil
}
// flushPeriodically flushes the buffered writer every 100ms to the underlying
// file
func (f *FileRotator) flushPeriodically() {
for _ = range f.flushTicker.C {
f.flushBuffer()
}
}
func (f *FileRotator) Close() {
f.closedLock.Lock()
defer f.closedLock.Unlock()
// Stop the ticker and flush for one last time
f.flushTicker.Stop()
f.flushBuffer()
// Stop the purge go routine
if !f.closed {
f.doneCh <- struct{}{}
close(f.purgeCh)
f.closed = true
}
}
// purgeOldFiles removes older files and keeps only the last N files rotated for
// a file
func (f *FileRotator) purgeOldFiles() {
for {
select {
case <-f.purgeCh:
var fIndexes []int
files, err := ioutil.ReadDir(f.path)
if err != nil {
return
}
// Inserting all the rotated files in a slice
for _, fi := range files {
if strings.HasPrefix(fi.Name(), f.baseFileName) {
fileIdx := strings.TrimPrefix(fi.Name(), fmt.Sprintf("%s.", f.baseFileName))
n, err := strconv.Atoi(fileIdx)
if err != nil {
continue
}
fIndexes = append(fIndexes, n)
}
}
// Not continuing to delete files if the number of files is not more
// than MaxFiles
if len(fIndexes) <= f.MaxFiles {
continue
}
// Sorting the file indexes so that we can purge the older files and keep
// only the number of files as configured by the user
sort.Sort(sort.IntSlice(fIndexes))
toDelete := fIndexes[0 : len(fIndexes)-f.MaxFiles]
for _, fIndex := range toDelete {
fname := filepath.Join(f.path, fmt.Sprintf("%s.%d", f.baseFileName, fIndex))
os.RemoveAll(fname)
}
f.oldestLogFileIdx = fIndexes[0]
case <-f.doneCh:
return
}
}
}
// flushBuffer flushes the buffer
func (f *FileRotator) flushBuffer() error {
f.bufLock.Lock()
defer f.bufLock.Unlock()
if f.bufw != nil {
return f.bufw.Flush()
}
return nil
}
// writeToBuffer writes the byte array to buffer
func (f *FileRotator) writeToBuffer(p []byte) (int, error) {
f.bufLock.Lock()
defer f.bufLock.Unlock()
return f.bufw.Write(p)
}
// createOrResetBuffer creates a new buffer if we don't have one otherwise
// resets the buffer
func (f *FileRotator) createOrResetBuffer() {
f.bufLock.Lock()
defer f.bufLock.Unlock()
if f.bufw == nil {
f.bufw = bufio.NewWriterSize(f.currentFile, bufSize)
} else {
f.bufw.Reset(f.currentFile)
}
}

View File

@ -1,158 +0,0 @@
// +build darwin dragonfly freebsd linux netbsd openbsd solaris
package logging
import (
"fmt"
"log"
"log/syslog"
"strconv"
)
// Errors related to parsing priority
var (
ErrPriorityNoStart = fmt.Errorf("No start char found for priority")
ErrPriorityEmpty = fmt.Errorf("Priority field empty")
ErrPriorityNoEnd = fmt.Errorf("No end char found for priority")
ErrPriorityTooShort = fmt.Errorf("Priority field too short")
ErrPriorityTooLong = fmt.Errorf("Priority field too long")
ErrPriorityNonDigit = fmt.Errorf("Non digit found in priority")
)
// Priority header and ending characters
const (
PRI_PART_START = '<'
PRI_PART_END = '>'
)
// SyslogMessage represents a log line received
type SyslogMessage struct {
Message []byte
Severity syslog.Priority
}
// Priority holds all the priority bits in a syslog log line
type Priority struct {
Pri int
Facility syslog.Priority
Severity syslog.Priority
}
// DockerLogParser parses a line of log message that the docker daemon ships
type DockerLogParser struct {
logger *log.Logger
}
// NewDockerLogParser creates a new DockerLogParser
func NewDockerLogParser(logger *log.Logger) *DockerLogParser {
return &DockerLogParser{logger: logger}
}
// Parse parses a syslog log line
func (d *DockerLogParser) Parse(line []byte) *SyslogMessage {
pri, _, _ := d.parsePriority(line)
msgIdx := d.logContentIndex(line)
// Create a copy of the line so that subsequent Scans do not override the
// message
lineCopy := make([]byte, len(line[msgIdx:]))
copy(lineCopy, line[msgIdx:])
return &SyslogMessage{
Severity: pri.Severity,
Message: lineCopy,
}
}
// logContentIndex finds out the index of the start index of the content in a
// syslog line
func (d *DockerLogParser) logContentIndex(line []byte) int {
cursor := 0
numSpace := 0
numColons := 0
// first look for at least 2 colons. This matches into the date that has no more spaces in it
// DefaultFormatter log line look: '<30>2016-07-06T15:13:11Z00:00 hostname docker/9648c64f5037[16200]'
// UnixFormatter log line look: '<30>Jul 6 15:13:11 docker/9648c64f5037[16200]'
for i := 0; i < len(line); i++ {
if line[i] == ':' {
numColons += 1
if numColons == 2 {
cursor = i
break
}
}
}
// then look for the next space
for i := cursor; i < len(line); i++ {
if line[i] == ' ' {
numSpace += 1
if numSpace == 1 {
cursor = i
break
}
}
}
// then the colon is what seperates it, followed by a space
for i := cursor; i < len(line); i++ {
if line[i] == ':' && i+1 < len(line) && line[i+1] == ' ' {
cursor = i + 1
break
}
}
// return the cursor to the next character
return cursor + 1
}
// parsePriority parses the priority in a syslog message
func (d *DockerLogParser) parsePriority(line []byte) (Priority, int, error) {
cursor := 0
pri := d.newPriority(0)
if len(line) <= 0 {
return pri, cursor, ErrPriorityEmpty
}
if line[cursor] != PRI_PART_START {
return pri, cursor, ErrPriorityNoStart
}
i := 1
priDigit := 0
for i < len(line) {
if i >= 5 {
return pri, cursor, ErrPriorityTooLong
}
c := line[i]
if c == PRI_PART_END {
if i == 1 {
return pri, cursor, ErrPriorityTooShort
}
cursor = i + 1
return d.newPriority(priDigit), cursor, nil
}
if d.isDigit(c) {
v, e := strconv.Atoi(string(c))
if e != nil {
return pri, cursor, e
}
priDigit = (priDigit * 10) + v
} else {
return pri, cursor, ErrPriorityNonDigit
}
i++
}
return pri, cursor, ErrPriorityNoEnd
}
// isDigit checks if a byte is a numeric char
func (d *DockerLogParser) isDigit(c byte) bool {
return c >= '0' && c <= '9'
}
// newPriority creates a new default priority
func (d *DockerLogParser) newPriority(p int) Priority {
// The Priority value is calculated by first multiplying the Facility
// number by 8 and then adding the numerical value of the Severity.
return Priority{
Pri: p,
Facility: syslog.Priority(p / 8),
Severity: syslog.Priority(p % 8),
}
}

View File

@ -1,86 +0,0 @@
// +build !windows
package logging
import (
"bufio"
"log"
"net"
"sync"
)
// SyslogServer is a server which listens to syslog messages and parses them
type SyslogServer struct {
listener net.Listener
messages chan *SyslogMessage
parser *DockerLogParser
doneCh chan interface{}
done bool
doneLock sync.Mutex
logger *log.Logger
}
// NewSyslogServer creates a new syslog server
func NewSyslogServer(l net.Listener, messages chan *SyslogMessage, logger *log.Logger) *SyslogServer {
parser := NewDockerLogParser(logger)
return &SyslogServer{
listener: l,
messages: messages,
parser: parser,
logger: logger,
doneCh: make(chan interface{}),
}
}
// Start starts accepting syslog connections
func (s *SyslogServer) Start() {
for {
select {
case <-s.doneCh:
s.listener.Close()
return
default:
connection, err := s.listener.Accept()
if err != nil {
s.logger.Printf("[ERR] logcollector.server: error in accepting connection: %v", err)
continue
}
go s.read(connection)
}
}
}
// read reads the bytes from a connection
func (s *SyslogServer) read(connection net.Conn) {
defer connection.Close()
scanner := bufio.NewScanner(bufio.NewReader(connection))
for {
select {
case <-s.doneCh:
return
default:
}
if scanner.Scan() {
b := scanner.Bytes()
msg := s.parser.Parse(b)
s.messages <- msg
} else {
return
}
}
}
// Shutdown shutsdown the syslog server
func (s *SyslogServer) Shutdown() {
s.doneLock.Lock()
s.doneLock.Unlock()
if !s.done {
close(s.doneCh)
close(s.messages)
s.done = true
}
}

View File

@ -1,10 +0,0 @@
package logging
type SyslogServer struct {
}
func (s *SyslogServer) Shutdown() {
}
type SyslogMessage struct {
}

View File

@ -1,207 +0,0 @@
// +build darwin dragonfly freebsd linux netbsd openbsd solaris
package logging
import (
"fmt"
"io"
"io/ioutil"
"log"
"log/syslog"
"net"
"os"
"runtime"
"github.com/hashicorp/nomad/client/allocdir"
cstructs "github.com/hashicorp/nomad/client/driver/structs"
"github.com/hashicorp/nomad/nomad/structs"
)
// LogCollectorContext holds context to configure the syslog server
type LogCollectorContext struct {
// TaskName is the name of the Task
TaskName string
// AllocDir is the handle to do operations on the alloc dir of
// the task
AllocDir *allocdir.AllocDir
// LogConfig provides configuration related to log rotation
LogConfig *structs.LogConfig
// PortUpperBound is the upper bound of the ports that we can use to start
// the syslog server
PortUpperBound uint
// PortLowerBound is the lower bound of the ports that we can use to start
// the syslog server
PortLowerBound uint
}
// SyslogCollectorState holds the address and islation information of a launched
// syslog server
type SyslogCollectorState struct {
IsolationConfig *cstructs.IsolationConfig
Addr string
}
// LogCollector is an interface which allows a driver to launch a log server
// and update log configuration
type LogCollector interface {
LaunchCollector(ctx *LogCollectorContext) (*SyslogCollectorState, error)
Exit() error
UpdateLogConfig(logConfig *structs.LogConfig) error
}
// SyslogCollector is a LogCollector which starts a syslog server and does
// rotation to incoming stream
type SyslogCollector struct {
addr net.Addr
logConfig *structs.LogConfig
ctx *LogCollectorContext
lro *FileRotator
lre *FileRotator
server *SyslogServer
syslogChan chan *SyslogMessage
taskDir string
logger *log.Logger
}
// NewSyslogCollector returns an implementation of the SyslogCollector
func NewSyslogCollector(logger *log.Logger) *SyslogCollector {
return &SyslogCollector{logger: logger, syslogChan: make(chan *SyslogMessage, 2048)}
}
// LaunchCollector launches a new syslog server and starts writing log lines to
// files and rotates them
func (s *SyslogCollector) LaunchCollector(ctx *LogCollectorContext) (*SyslogCollectorState, error) {
l, err := s.getListener(ctx.PortLowerBound, ctx.PortUpperBound)
if err != nil {
return nil, err
}
s.logger.Printf("[DEBUG] sylog-server: launching syslog server on addr: %v", l.Addr().String())
s.ctx = ctx
// configuring the task dir
if err := s.configureTaskDir(); err != nil {
return nil, err
}
s.server = NewSyslogServer(l, s.syslogChan, s.logger)
go s.server.Start()
logFileSize := int64(ctx.LogConfig.MaxFileSizeMB * 1024 * 1024)
lro, err := NewFileRotator(ctx.AllocDir.LogDir(), fmt.Sprintf("%v.stdout", ctx.TaskName),
ctx.LogConfig.MaxFiles, logFileSize, s.logger)
if err != nil {
return nil, err
}
s.lro = lro
lre, err := NewFileRotator(ctx.AllocDir.LogDir(), fmt.Sprintf("%v.stderr", ctx.TaskName),
ctx.LogConfig.MaxFiles, logFileSize, s.logger)
if err != nil {
return nil, err
}
s.lre = lre
go s.collectLogs(lre, lro)
syslogAddr := fmt.Sprintf("%s://%s", l.Addr().Network(), l.Addr().String())
return &SyslogCollectorState{Addr: syslogAddr}, nil
}
func (s *SyslogCollector) collectLogs(we io.Writer, wo io.Writer) {
for logParts := range s.syslogChan {
// If the severity of the log line is err then we write to stderr
// otherwise all messages go to stdout
if logParts.Severity == syslog.LOG_ERR {
s.lre.Write(logParts.Message)
s.lre.Write([]byte{'\n'})
} else {
s.lro.Write(logParts.Message)
s.lro.Write([]byte{'\n'})
}
}
}
// Exit kills the syslog server
func (s *SyslogCollector) Exit() error {
s.server.Shutdown()
s.lre.Close()
s.lro.Close()
return nil
}
// UpdateLogConfig updates the log configuration
func (s *SyslogCollector) UpdateLogConfig(logConfig *structs.LogConfig) error {
s.ctx.LogConfig = logConfig
if s.lro == nil {
return fmt.Errorf("log rotator for stdout doesn't exist")
}
s.lro.MaxFiles = logConfig.MaxFiles
s.lro.FileSize = int64(logConfig.MaxFileSizeMB * 1024 * 1024)
if s.lre == nil {
return fmt.Errorf("log rotator for stderr doesn't exist")
}
s.lre.MaxFiles = logConfig.MaxFiles
s.lre.FileSize = int64(logConfig.MaxFileSizeMB * 1024 * 1024)
return nil
}
// configureTaskDir sets the task dir in the SyslogCollector
func (s *SyslogCollector) configureTaskDir() error {
taskDir, ok := s.ctx.AllocDir.TaskDirs[s.ctx.TaskName]
if !ok {
return fmt.Errorf("couldn't find task directory for task %v", s.ctx.TaskName)
}
s.taskDir = taskDir
return nil
}
// getFreePort returns a free port ready to be listened on between upper and
// lower bounds
func (s *SyslogCollector) getListener(lowerBound uint, upperBound uint) (net.Listener, error) {
if runtime.GOOS == "windows" {
return s.listenerTCP(lowerBound, upperBound)
}
return s.listenerUnix()
}
// listenerTCP creates a TCP listener using an unused port between an upper and
// lower bound
func (s *SyslogCollector) listenerTCP(lowerBound uint, upperBound uint) (net.Listener, error) {
for i := lowerBound; i <= upperBound; i++ {
addr, err := net.ResolveTCPAddr("tcp", fmt.Sprintf("localhost:%v", i))
if err != nil {
return nil, err
}
l, err := net.ListenTCP("tcp", addr)
if err != nil {
continue
}
return l, nil
}
return nil, fmt.Errorf("No free port found")
}
// listenerUnix creates a Unix domain socket
func (s *SyslogCollector) listenerUnix() (net.Listener, error) {
f, err := ioutil.TempFile("", "plugin")
if err != nil {
return nil, err
}
path := f.Name()
if err := f.Close(); err != nil {
return nil, err
}
if err := os.Remove(path); err != nil {
return nil, err
}
return net.Listen("unix", path)
}

View File

@ -1,51 +0,0 @@
package driver
import (
"io"
"log"
"net"
"github.com/hashicorp/go-plugin"
)
var HandshakeConfig = plugin.HandshakeConfig{
ProtocolVersion: 1,
MagicCookieKey: "NOMAD_PLUGIN_MAGIC_COOKIE",
MagicCookieValue: "e4327c2e01eabfd75a8a67adb114fb34a757d57eee7728d857a8cec6e91a7255",
}
func GetPluginMap(w io.Writer) map[string]plugin.Plugin {
e := new(ExecutorPlugin)
e.logger = log.New(w, "", log.LstdFlags)
s := new(SyslogCollectorPlugin)
s.logger = log.New(w, "", log.LstdFlags)
return map[string]plugin.Plugin{
"executor": e,
"syslogcollector": s,
}
}
// ExecutorReattachConfig is the config that we seralize and de-serialize and
// store in disk
type PluginReattachConfig struct {
Pid int
AddrNet string
AddrName string
}
// PluginConfig returns a config from an ExecutorReattachConfig
func (c *PluginReattachConfig) PluginConfig() *plugin.ReattachConfig {
var addr net.Addr
switch c.AddrNet {
case "unix", "unixgram", "unixpacket":
addr, _ = net.ResolveUnixAddr(c.AddrNet, c.AddrName)
case "tcp", "tcp4", "tcp6":
addr, _ = net.ResolveTCPAddr(c.AddrNet, c.AddrName)
}
return &plugin.ReattachConfig{Pid: c.Pid, Addr: addr}
}
func NewPluginReattachConfig(c *plugin.ReattachConfig) *PluginReattachConfig {
return &PluginReattachConfig{Pid: c.Pid, AddrNet: c.Addr.Network(), AddrName: c.Addr.String()}
}

View File

@ -1,412 +0,0 @@
package driver
import (
"encoding/json"
"fmt"
"log"
"os/exec"
"path/filepath"
"regexp"
"runtime"
"strings"
"time"
"github.com/hashicorp/go-plugin"
"github.com/hashicorp/nomad/client/allocdir"
"github.com/hashicorp/nomad/client/config"
"github.com/hashicorp/nomad/client/driver/executor"
dstructs "github.com/hashicorp/nomad/client/driver/structs"
"github.com/hashicorp/nomad/client/fingerprint"
cstructs "github.com/hashicorp/nomad/client/structs"
"github.com/hashicorp/nomad/helper/discover"
"github.com/hashicorp/nomad/helper/fields"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/mitchellh/mapstructure"
)
var (
reQemuVersion = regexp.MustCompile(`version (\d[\.\d+]+)`)
)
const (
// The key populated in Node Attributes to indicate presence of the Qemu
// driver
qemuDriverAttr = "driver.qemu"
)
// QemuDriver is a driver for running images via Qemu
// We attempt to chose sane defaults for now, with more configuration available
// planned in the future
type QemuDriver struct {
DriverContext
fingerprint.StaticFingerprinter
}
type QemuDriverConfig struct {
ImagePath string `mapstructure:"image_path"`
Accelerator string `mapstructure:"accelerator"`
PortMap []map[string]int `mapstructure:"port_map"` // A map of host port labels and to guest ports.
Args []string `mapstructure:"args"` // extra arguments to qemu executable
}
// qemuHandle is returned from Start/Open as a handle to the PID
type qemuHandle struct {
pluginClient *plugin.Client
userPid int
executor executor.Executor
allocDir *allocdir.AllocDir
killTimeout time.Duration
maxKillTimeout time.Duration
logger *log.Logger
version string
waitCh chan *dstructs.WaitResult
doneCh chan struct{}
}
// NewQemuDriver is used to create a new exec driver
func NewQemuDriver(ctx *DriverContext) Driver {
return &QemuDriver{DriverContext: *ctx}
}
// Validate is used to validate the driver configuration
func (d *QemuDriver) Validate(config map[string]interface{}) error {
fd := &fields.FieldData{
Raw: config,
Schema: map[string]*fields.FieldSchema{
"image_path": &fields.FieldSchema{
Type: fields.TypeString,
Required: true,
},
"accelerator": &fields.FieldSchema{
Type: fields.TypeString,
},
"port_map": &fields.FieldSchema{
Type: fields.TypeArray,
},
"args": &fields.FieldSchema{
Type: fields.TypeArray,
},
},
}
if err := fd.Validate(); err != nil {
return err
}
return nil
}
func (d *QemuDriver) Fingerprint(cfg *config.Config, node *structs.Node) (bool, error) {
// Get the current status so that we can log any debug messages only if the
// state changes
_, currentlyEnabled := node.Attributes[qemuDriverAttr]
bin := "qemu-system-x86_64"
if runtime.GOOS == "windows" {
// On windows, the "qemu-system-x86_64" command does not respond to the
// version flag.
bin = "qemu-img"
}
outBytes, err := exec.Command(bin, "--version").Output()
if err != nil {
delete(node.Attributes, qemuDriverAttr)
return false, nil
}
out := strings.TrimSpace(string(outBytes))
matches := reQemuVersion.FindStringSubmatch(out)
if len(matches) != 2 {
delete(node.Attributes, qemuDriverAttr)
return false, fmt.Errorf("Unable to parse Qemu version string: %#v", matches)
}
if !currentlyEnabled {
d.logger.Printf("[DEBUG] driver.qemu: enabling driver")
}
node.Attributes[qemuDriverAttr] = "1"
node.Attributes["driver.qemu.version"] = matches[1]
return true, nil
}
// Run an existing Qemu image. Start() will pull down an existing, valid Qemu
// image and save it to the Drivers Allocation Dir
func (d *QemuDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle, error) {
var driverConfig QemuDriverConfig
if err := mapstructure.WeakDecode(task.Config, &driverConfig); err != nil {
return nil, err
}
if len(driverConfig.PortMap) > 1 {
return nil, fmt.Errorf("Only one port_map block is allowed in the qemu driver config")
}
// Get the image source
vmPath := driverConfig.ImagePath
if vmPath == "" {
return nil, fmt.Errorf("image_path must be set")
}
vmID := filepath.Base(vmPath)
// Get the tasks local directory.
taskDir, ok := ctx.AllocDir.TaskDirs[d.DriverContext.taskName]
if !ok {
return nil, fmt.Errorf("Could not find task directory for task: %v", d.DriverContext.taskName)
}
// Parse configuration arguments
// Create the base arguments
accelerator := "tcg"
if driverConfig.Accelerator != "" {
accelerator = driverConfig.Accelerator
}
// TODO: Check a lower bounds, e.g. the default 128 of Qemu
mem := fmt.Sprintf("%dM", task.Resources.MemoryMB)
absPath, err := GetAbsolutePath("qemu-system-x86_64")
if err != nil {
return nil, err
}
args := []string{
absPath,
"-machine", "type=pc,accel=" + accelerator,
"-name", vmID,
"-m", mem,
"-drive", "file=" + vmPath,
"-nographic",
}
// Add pass through arguments to qemu executable. A user can specify
// these arguments in driver task configuration. These arguments are
// passed directly to the qemu driver as command line options.
// For example, args = [ "-nodefconfig", "-nodefaults" ]
// This will allow a VM with embedded configuration to boot successfully.
args = append(args, driverConfig.Args...)
// Check the Resources required Networks to add port mappings. If no resources
// are required, we assume the VM is a purely compute job and does not require
// the outside world to be able to reach it. VMs ran without port mappings can
// still reach out to the world, but without port mappings it is effectively
// firewalled
protocols := []string{"udp", "tcp"}
if len(task.Resources.Networks) > 0 && len(driverConfig.PortMap) == 1 {
// Loop through the port map and construct the hostfwd string, to map
// reserved ports to the ports listenting in the VM
// Ex: hostfwd=tcp::22000-:22,hostfwd=tcp::80-:8080
var forwarding []string
taskPorts := task.Resources.Networks[0].MapLabelToValues(nil)
for label, guest := range driverConfig.PortMap[0] {
host, ok := taskPorts[label]
if !ok {
return nil, fmt.Errorf("Unknown port label %q", label)
}
for _, p := range protocols {
forwarding = append(forwarding, fmt.Sprintf("hostfwd=%s::%d-:%d", p, host, guest))
}
}
if len(forwarding) != 0 {
args = append(args,
"-netdev",
fmt.Sprintf("user,id=user.0,%s", strings.Join(forwarding, ",")),
"-device", "virtio-net,netdev=user.0",
)
}
}
// If using KVM, add optimization args
if accelerator == "kvm" {
args = append(args,
"-enable-kvm",
"-cpu", "host",
// Do we have cores information available to the Driver?
// "-smp", fmt.Sprintf("%d", cores),
)
}
d.logger.Printf("[DEBUG] Starting QemuVM command: %q", strings.Join(args, " "))
bin, err := discover.NomadExecutable()
if err != nil {
return nil, fmt.Errorf("unable to find the nomad binary: %v", err)
}
pluginLogFile := filepath.Join(taskDir, fmt.Sprintf("%s-executor.out", task.Name))
pluginConfig := &plugin.ClientConfig{
Cmd: exec.Command(bin, "executor", pluginLogFile),
}
exec, pluginClient, err := createExecutor(pluginConfig, d.config.LogOutput, d.config)
if err != nil {
return nil, err
}
executorCtx := &executor.ExecutorContext{
TaskEnv: d.taskEnv,
Driver: "qemu",
AllocDir: ctx.AllocDir,
AllocID: ctx.AllocID,
Task: task,
}
ps, err := exec.LaunchCmd(&executor.ExecCommand{
Cmd: args[0],
Args: args[1:],
User: task.User,
}, executorCtx)
if err != nil {
pluginClient.Kill()
return nil, err
}
d.logger.Printf("[INFO] Started new QemuVM: %s", vmID)
// Create and Return Handle
maxKill := d.DriverContext.config.MaxKillTimeout
h := &qemuHandle{
pluginClient: pluginClient,
executor: exec,
userPid: ps.Pid,
allocDir: ctx.AllocDir,
killTimeout: GetKillTimeout(task.KillTimeout, maxKill),
maxKillTimeout: maxKill,
version: d.config.Version,
logger: d.logger,
doneCh: make(chan struct{}),
waitCh: make(chan *dstructs.WaitResult, 1),
}
if err := h.executor.SyncServices(consulContext(d.config, "")); err != nil {
h.logger.Printf("[ERR] driver.qemu: error registering services for task: %q: %v", task.Name, err)
}
go h.run()
return h, nil
}
type qemuId struct {
Version string
KillTimeout time.Duration
MaxKillTimeout time.Duration
UserPid int
PluginConfig *PluginReattachConfig
AllocDir *allocdir.AllocDir
}
func (d *QemuDriver) Open(ctx *ExecContext, handleID string) (DriverHandle, error) {
id := &qemuId{}
if err := json.Unmarshal([]byte(handleID), id); err != nil {
return nil, fmt.Errorf("Failed to parse handle '%s': %v", handleID, err)
}
pluginConfig := &plugin.ClientConfig{
Reattach: id.PluginConfig.PluginConfig(),
}
exec, pluginClient, err := createExecutor(pluginConfig, d.config.LogOutput, d.config)
if err != nil {
d.logger.Println("[ERR] driver.qemu: error connecting to plugin so destroying plugin pid and user pid")
if e := destroyPlugin(id.PluginConfig.Pid, id.UserPid); e != nil {
d.logger.Printf("[ERR] driver.qemu: error destroying plugin and userpid: %v", e)
}
return nil, fmt.Errorf("error connecting to plugin: %v", err)
}
ver, _ := exec.Version()
d.logger.Printf("[DEBUG] driver.qemu: version of executor: %v", ver.Version)
// Return a driver handle
h := &qemuHandle{
pluginClient: pluginClient,
executor: exec,
userPid: id.UserPid,
allocDir: id.AllocDir,
logger: d.logger,
killTimeout: id.KillTimeout,
maxKillTimeout: id.MaxKillTimeout,
version: id.Version,
doneCh: make(chan struct{}),
waitCh: make(chan *dstructs.WaitResult, 1),
}
if err := h.executor.SyncServices(consulContext(d.config, "")); err != nil {
h.logger.Printf("[ERR] driver.qemu: error registering services: %v", err)
}
go h.run()
return h, nil
}
func (h *qemuHandle) ID() string {
id := qemuId{
Version: h.version,
KillTimeout: h.killTimeout,
MaxKillTimeout: h.maxKillTimeout,
PluginConfig: NewPluginReattachConfig(h.pluginClient.ReattachConfig()),
UserPid: h.userPid,
AllocDir: h.allocDir,
}
data, err := json.Marshal(id)
if err != nil {
h.logger.Printf("[ERR] driver.qemu: failed to marshal ID to JSON: %s", err)
}
return string(data)
}
func (h *qemuHandle) WaitCh() chan *dstructs.WaitResult {
return h.waitCh
}
func (h *qemuHandle) Update(task *structs.Task) error {
// Store the updated kill timeout.
h.killTimeout = GetKillTimeout(task.KillTimeout, h.maxKillTimeout)
h.executor.UpdateTask(task)
// Update is not possible
return nil
}
// TODO: allow a 'shutdown_command' that can be executed over a ssh connection
// to the VM
func (h *qemuHandle) Kill() error {
if err := h.executor.ShutDown(); err != nil {
if h.pluginClient.Exited() {
return nil
}
return fmt.Errorf("executor Shutdown failed: %v", err)
}
select {
case <-h.doneCh:
return nil
case <-time.After(h.killTimeout):
if h.pluginClient.Exited() {
return nil
}
if err := h.executor.Exit(); err != nil {
return fmt.Errorf("executor Exit failed: %v", err)
}
return nil
}
}
func (h *qemuHandle) Stats() (*cstructs.TaskResourceUsage, error) {
return h.executor.Stats()
}
func (h *qemuHandle) run() {
ps, err := h.executor.Wait()
if ps.ExitCode == 0 && err != nil {
if e := killProcess(h.userPid); e != nil {
h.logger.Printf("[ERR] driver.qemu: error killing user process: %v", e)
}
if e := h.allocDir.UnmountAll(); e != nil {
h.logger.Printf("[ERR] driver.qemu: unmounting dev,proc and alloc dirs failed: %v", e)
}
}
close(h.doneCh)
h.waitCh <- &dstructs.WaitResult{ExitCode: ps.ExitCode, Signal: ps.Signal, Err: err}
close(h.waitCh)
// Remove services
if err := h.executor.DeregisterServices(); err != nil {
h.logger.Printf("[ERR] driver.qemu: failed to deregister services: %v", err)
}
h.executor.Exit()
h.pluginClient.Kill()
}

View File

@ -1,307 +0,0 @@
package driver
import (
"encoding/json"
"fmt"
"log"
"os/exec"
"path/filepath"
"strings"
"time"
"github.com/hashicorp/go-plugin"
"github.com/hashicorp/nomad/client/allocdir"
"github.com/hashicorp/nomad/client/config"
"github.com/hashicorp/nomad/client/driver/executor"
dstructs "github.com/hashicorp/nomad/client/driver/structs"
"github.com/hashicorp/nomad/client/fingerprint"
cstructs "github.com/hashicorp/nomad/client/structs"
"github.com/hashicorp/nomad/helper/discover"
"github.com/hashicorp/nomad/helper/fields"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/mitchellh/mapstructure"
)
const (
// The option that enables this driver in the Config.Options map.
rawExecConfigOption = "driver.raw_exec.enable"
// The key populated in Node Attributes to indicate presence of the Raw Exec
// driver
rawExecDriverAttr = "driver.raw_exec"
)
// The RawExecDriver is a privileged version of the exec driver. It provides no
// resource isolation and just fork/execs. The Exec driver should be preferred
// and this should only be used when explicitly needed.
type RawExecDriver struct {
DriverContext
fingerprint.StaticFingerprinter
}
// rawExecHandle is returned from Start/Open as a handle to the PID
type rawExecHandle struct {
version string
pluginClient *plugin.Client
userPid int
executor executor.Executor
killTimeout time.Duration
maxKillTimeout time.Duration
allocDir *allocdir.AllocDir
logger *log.Logger
waitCh chan *dstructs.WaitResult
doneCh chan struct{}
}
// NewRawExecDriver is used to create a new raw exec driver
func NewRawExecDriver(ctx *DriverContext) Driver {
return &RawExecDriver{DriverContext: *ctx}
}
// Validate is used to validate the driver configuration
func (d *RawExecDriver) Validate(config map[string]interface{}) error {
fd := &fields.FieldData{
Raw: config,
Schema: map[string]*fields.FieldSchema{
"command": &fields.FieldSchema{
Type: fields.TypeString,
Required: true,
},
"args": &fields.FieldSchema{
Type: fields.TypeArray,
},
},
}
if err := fd.Validate(); err != nil {
return err
}
return nil
}
func (d *RawExecDriver) Fingerprint(cfg *config.Config, node *structs.Node) (bool, error) {
// Get the current status so that we can log any debug messages only if the
// state changes
_, currentlyEnabled := node.Attributes[rawExecDriverAttr]
// Check that the user has explicitly enabled this executor.
enabled := cfg.ReadBoolDefault(rawExecConfigOption, false)
if enabled {
if currentlyEnabled {
d.logger.Printf("[WARN] driver.raw_exec: raw exec is enabled. Only enable if needed")
}
node.Attributes[rawExecDriverAttr] = "1"
return true, nil
}
delete(node.Attributes, rawExecDriverAttr)
return false, nil
}
func (d *RawExecDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle, error) {
var driverConfig ExecDriverConfig
if err := mapstructure.WeakDecode(task.Config, &driverConfig); err != nil {
return nil, err
}
// Get the tasks local directory.
taskName := d.DriverContext.taskName
taskDir, ok := ctx.AllocDir.TaskDirs[taskName]
if !ok {
return nil, fmt.Errorf("Could not find task directory for task: %v", d.DriverContext.taskName)
}
// Get the command to be ran
command := driverConfig.Command
if err := validateCommand(command, "args"); err != nil {
return nil, err
}
// Set the host environment variables.
filter := strings.Split(d.config.ReadDefault("env.blacklist", config.DefaultEnvBlacklist), ",")
d.taskEnv.AppendHostEnvvars(filter)
bin, err := discover.NomadExecutable()
if err != nil {
return nil, fmt.Errorf("unable to find the nomad binary: %v", err)
}
pluginLogFile := filepath.Join(taskDir, fmt.Sprintf("%s-executor.out", task.Name))
pluginConfig := &plugin.ClientConfig{
Cmd: exec.Command(bin, "executor", pluginLogFile),
}
exec, pluginClient, err := createExecutor(pluginConfig, d.config.LogOutput, d.config)
if err != nil {
return nil, err
}
executorCtx := &executor.ExecutorContext{
TaskEnv: d.taskEnv,
Driver: "raw_exec",
AllocDir: ctx.AllocDir,
AllocID: ctx.AllocID,
Task: task,
}
ps, err := exec.LaunchCmd(&executor.ExecCommand{
Cmd: command,
Args: driverConfig.Args,
User: task.User,
}, executorCtx)
if err != nil {
pluginClient.Kill()
return nil, err
}
d.logger.Printf("[DEBUG] driver.raw_exec: started process with pid: %v", ps.Pid)
// Return a driver handle
maxKill := d.DriverContext.config.MaxKillTimeout
h := &rawExecHandle{
pluginClient: pluginClient,
executor: exec,
userPid: ps.Pid,
killTimeout: GetKillTimeout(task.KillTimeout, maxKill),
maxKillTimeout: maxKill,
allocDir: ctx.AllocDir,
version: d.config.Version,
logger: d.logger,
doneCh: make(chan struct{}),
waitCh: make(chan *dstructs.WaitResult, 1),
}
if err := h.executor.SyncServices(consulContext(d.config, "")); err != nil {
h.logger.Printf("[ERR] driver.raw_exec: error registering services with consul for task: %q: %v", task.Name, err)
}
go h.run()
return h, nil
}
type rawExecId struct {
Version string
KillTimeout time.Duration
MaxKillTimeout time.Duration
UserPid int
PluginConfig *PluginReattachConfig
AllocDir *allocdir.AllocDir
}
func (d *RawExecDriver) Open(ctx *ExecContext, handleID string) (DriverHandle, error) {
id := &rawExecId{}
if err := json.Unmarshal([]byte(handleID), id); err != nil {
return nil, fmt.Errorf("Failed to parse handle '%s': %v", handleID, err)
}
pluginConfig := &plugin.ClientConfig{
Reattach: id.PluginConfig.PluginConfig(),
}
exec, pluginClient, err := createExecutor(pluginConfig, d.config.LogOutput, d.config)
if err != nil {
d.logger.Println("[ERR] driver.raw_exec: error connecting to plugin so destroying plugin pid and user pid")
if e := destroyPlugin(id.PluginConfig.Pid, id.UserPid); e != nil {
d.logger.Printf("[ERR] driver.raw_exec: error destroying plugin and userpid: %v", e)
}
return nil, fmt.Errorf("error connecting to plugin: %v", err)
}
ver, _ := exec.Version()
d.logger.Printf("[DEBUG] driver.raw_exec: version of executor: %v", ver.Version)
// Return a driver handle
h := &rawExecHandle{
pluginClient: pluginClient,
executor: exec,
userPid: id.UserPid,
logger: d.logger,
killTimeout: id.KillTimeout,
maxKillTimeout: id.MaxKillTimeout,
allocDir: id.AllocDir,
version: id.Version,
doneCh: make(chan struct{}),
waitCh: make(chan *dstructs.WaitResult, 1),
}
if err := h.executor.SyncServices(consulContext(d.config, "")); err != nil {
h.logger.Printf("[ERR] driver.raw_exec: error registering services with consul: %v", err)
}
go h.run()
return h, nil
}
func (h *rawExecHandle) ID() string {
id := rawExecId{
Version: h.version,
KillTimeout: h.killTimeout,
MaxKillTimeout: h.maxKillTimeout,
PluginConfig: NewPluginReattachConfig(h.pluginClient.ReattachConfig()),
UserPid: h.userPid,
AllocDir: h.allocDir,
}
data, err := json.Marshal(id)
if err != nil {
h.logger.Printf("[ERR] driver.raw_exec: failed to marshal ID to JSON: %s", err)
}
return string(data)
}
func (h *rawExecHandle) WaitCh() chan *dstructs.WaitResult {
return h.waitCh
}
func (h *rawExecHandle) Update(task *structs.Task) error {
// Store the updated kill timeout.
h.killTimeout = GetKillTimeout(task.KillTimeout, h.maxKillTimeout)
h.executor.UpdateTask(task)
// Update is not possible
return nil
}
func (h *rawExecHandle) Kill() error {
if err := h.executor.ShutDown(); err != nil {
if h.pluginClient.Exited() {
return nil
}
return fmt.Errorf("executor Shutdown failed: %v", err)
}
select {
case <-h.doneCh:
return nil
case <-time.After(h.killTimeout):
if h.pluginClient.Exited() {
return nil
}
if err := h.executor.Exit(); err != nil {
return fmt.Errorf("executor Exit failed: %v", err)
}
return nil
}
}
func (h *rawExecHandle) Stats() (*cstructs.TaskResourceUsage, error) {
return h.executor.Stats()
}
func (h *rawExecHandle) run() {
ps, err := h.executor.Wait()
close(h.doneCh)
if ps.ExitCode == 0 && err != nil {
if e := killProcess(h.userPid); e != nil {
h.logger.Printf("[ERR] driver.raw_exec: error killing user process: %v", e)
}
if e := h.allocDir.UnmountAll(); e != nil {
h.logger.Printf("[ERR] driver.raw_exec: unmounting dev,proc and alloc dirs failed: %v", e)
}
}
h.waitCh <- &dstructs.WaitResult{ExitCode: ps.ExitCode, Signal: ps.Signal, Err: err}
close(h.waitCh)
// Remove services
if err := h.executor.DeregisterServices(); err != nil {
h.logger.Printf("[ERR] driver.raw_exec: failed to deregister services: %v", err)
}
if err := h.executor.Exit(); err != nil {
h.logger.Printf("[ERR] driver.raw_exec: error killing executor: %v", err)
}
h.pluginClient.Kill()
}

View File

@ -1,436 +0,0 @@
package driver
import (
"bytes"
"encoding/json"
"fmt"
"log"
"net"
"os/exec"
"path/filepath"
"regexp"
"runtime"
"strings"
"syscall"
"time"
"github.com/hashicorp/go-plugin"
"github.com/hashicorp/go-version"
"github.com/hashicorp/nomad/client/allocdir"
"github.com/hashicorp/nomad/client/config"
"github.com/hashicorp/nomad/client/driver/executor"
dstructs "github.com/hashicorp/nomad/client/driver/structs"
"github.com/hashicorp/nomad/client/fingerprint"
cstructs "github.com/hashicorp/nomad/client/structs"
"github.com/hashicorp/nomad/helper/discover"
"github.com/hashicorp/nomad/helper/fields"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/mitchellh/mapstructure"
)
var (
reRktVersion = regexp.MustCompile(`rkt [vV]ersion[:]? (\d[.\d]+)`)
reAppcVersion = regexp.MustCompile(`appc [vV]ersion[:]? (\d[.\d]+)`)
)
const (
// minRktVersion is the earliest supported version of rkt. rkt added support
// for CPU and memory isolators in 0.14.0. We cannot support an earlier
// version to maintain an uniform interface across all drivers
minRktVersion = "0.14.0"
// The key populated in the Node Attributes to indicate the presence of the
// Rkt driver
rktDriverAttr = "driver.rkt"
)
// RktDriver is a driver for running images via Rkt
// We attempt to chose sane defaults for now, with more configuration available
// planned in the future
type RktDriver struct {
DriverContext
fingerprint.StaticFingerprinter
}
type RktDriverConfig struct {
ImageName string `mapstructure:"image"`
Command string `mapstructure:"command"`
Args []string `mapstructure:"args"`
TrustPrefix string `mapstructure:"trust_prefix"`
DNSServers []string `mapstructure:"dns_servers"` // DNS Server for containers
DNSSearchDomains []string `mapstructure:"dns_search_domains"` // DNS Search domains for containers
Debug bool `mapstructure:"debug"` // Enable debug option for rkt command
}
// rktHandle is returned from Start/Open as a handle to the PID
type rktHandle struct {
pluginClient *plugin.Client
executorPid int
executor executor.Executor
allocDir *allocdir.AllocDir
logger *log.Logger
killTimeout time.Duration
maxKillTimeout time.Duration
waitCh chan *dstructs.WaitResult
doneCh chan struct{}
}
// rktPID is a struct to map the pid running the process to the vm image on
// disk
type rktPID struct {
PluginConfig *PluginReattachConfig
AllocDir *allocdir.AllocDir
ExecutorPid int
KillTimeout time.Duration
MaxKillTimeout time.Duration
}
// NewRktDriver is used to create a new exec driver
func NewRktDriver(ctx *DriverContext) Driver {
return &RktDriver{DriverContext: *ctx}
}
// Validate is used to validate the driver configuration
func (d *RktDriver) Validate(config map[string]interface{}) error {
fd := &fields.FieldData{
Raw: config,
Schema: map[string]*fields.FieldSchema{
"image": &fields.FieldSchema{
Type: fields.TypeString,
Required: true,
},
"command": &fields.FieldSchema{
Type: fields.TypeString,
},
"args": &fields.FieldSchema{
Type: fields.TypeArray,
},
"trust_prefix": &fields.FieldSchema{
Type: fields.TypeString,
},
"dns_servers": &fields.FieldSchema{
Type: fields.TypeArray,
},
"dns_search_domains": &fields.FieldSchema{
Type: fields.TypeArray,
},
"debug": &fields.FieldSchema{
Type: fields.TypeBool,
},
},
}
if err := fd.Validate(); err != nil {
return err
}
return nil
}
func (d *RktDriver) Fingerprint(cfg *config.Config, node *structs.Node) (bool, error) {
// Get the current status so that we can log any debug messages only if the
// state changes
_, currentlyEnabled := node.Attributes[rktDriverAttr]
// Only enable if we are root when running on non-windows systems.
if runtime.GOOS != "windows" && syscall.Geteuid() != 0 {
if currentlyEnabled {
d.logger.Printf("[DEBUG] driver.rkt: must run as root user, disabling")
}
delete(node.Attributes, rktDriverAttr)
return false, nil
}
outBytes, err := exec.Command("rkt", "version").Output()
if err != nil {
delete(node.Attributes, rktDriverAttr)
return false, nil
}
out := strings.TrimSpace(string(outBytes))
rktMatches := reRktVersion.FindStringSubmatch(out)
appcMatches := reAppcVersion.FindStringSubmatch(out)
if len(rktMatches) != 2 || len(appcMatches) != 2 {
delete(node.Attributes, rktDriverAttr)
return false, fmt.Errorf("Unable to parse Rkt version string: %#v", rktMatches)
}
node.Attributes[rktDriverAttr] = "1"
node.Attributes["driver.rkt.version"] = rktMatches[1]
node.Attributes["driver.rkt.appc.version"] = appcMatches[1]
minVersion, _ := version.NewVersion(minRktVersion)
currentVersion, _ := version.NewVersion(node.Attributes["driver.rkt.version"])
if currentVersion.LessThan(minVersion) {
// Do not allow rkt < 0.14.0
d.logger.Printf("[WARN] driver.rkt: please upgrade rkt to a version >= %s", minVersion)
node.Attributes[rktDriverAttr] = "0"
}
return true, nil
}
// Run an existing Rkt image.
func (d *RktDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle, error) {
var driverConfig RktDriverConfig
if err := mapstructure.WeakDecode(task.Config, &driverConfig); err != nil {
return nil, err
}
// ACI image
img := driverConfig.ImageName
// Get the tasks local directory.
taskName := d.DriverContext.taskName
taskDir, ok := ctx.AllocDir.TaskDirs[taskName]
if !ok {
return nil, fmt.Errorf("Could not find task directory for task: %v", d.DriverContext.taskName)
}
// Build the command.
var cmdArgs []string
// Add debug option to rkt command.
debug := driverConfig.Debug
// Add the given trust prefix
trustPrefix := driverConfig.TrustPrefix
insecure := false
if trustPrefix != "" {
var outBuf, errBuf bytes.Buffer
cmd := exec.Command("rkt", "trust", "--skip-fingerprint-review=true", fmt.Sprintf("--prefix=%s", trustPrefix), fmt.Sprintf("--debug=%t", debug))
cmd.Stdout = &outBuf
cmd.Stderr = &errBuf
if err := cmd.Run(); err != nil {
return nil, fmt.Errorf("Error running rkt trust: %s\n\nOutput: %s\n\nError: %s",
err, outBuf.String(), errBuf.String())
}
d.logger.Printf("[DEBUG] driver.rkt: added trust prefix: %q", trustPrefix)
} else {
// Disble signature verification if the trust command was not run.
insecure = true
}
cmdArgs = append(cmdArgs, "run")
cmdArgs = append(cmdArgs, fmt.Sprintf("--volume=%s,kind=host,source=%s", task.Name, ctx.AllocDir.SharedDir))
cmdArgs = append(cmdArgs, fmt.Sprintf("--mount=volume=%s,target=%s", task.Name, ctx.AllocDir.SharedDir))
cmdArgs = append(cmdArgs, img)
if insecure == true {
cmdArgs = append(cmdArgs, "--insecure-options=all")
}
cmdArgs = append(cmdArgs, fmt.Sprintf("--debug=%t", debug))
// Inject environment variables
for k, v := range d.taskEnv.EnvMap() {
cmdArgs = append(cmdArgs, fmt.Sprintf("--set-env=%v=%v", k, v))
}
// Check if the user has overridden the exec command.
if driverConfig.Command != "" {
cmdArgs = append(cmdArgs, fmt.Sprintf("--exec=%v", driverConfig.Command))
}
// Add memory isolator
cmdArgs = append(cmdArgs, fmt.Sprintf("--memory=%vM", int64(task.Resources.MemoryMB)))
// Add CPU isolator
cmdArgs = append(cmdArgs, fmt.Sprintf("--cpu=%vm", int64(task.Resources.CPU)))
// Add DNS servers
for _, ip := range driverConfig.DNSServers {
if err := net.ParseIP(ip); err == nil {
msg := fmt.Errorf("invalid ip address for container dns server %q", ip)
d.logger.Printf("[DEBUG] driver.rkt: %v", msg)
return nil, msg
} else {
cmdArgs = append(cmdArgs, fmt.Sprintf("--dns=%s", ip))
}
}
// set DNS search domains
for _, domain := range driverConfig.DNSSearchDomains {
cmdArgs = append(cmdArgs, fmt.Sprintf("--dns-search=%s", domain))
}
// Add user passed arguments.
if len(driverConfig.Args) != 0 {
parsed := d.taskEnv.ParseAndReplace(driverConfig.Args)
// Need to start arguments with "--"
if len(parsed) > 0 {
cmdArgs = append(cmdArgs, "--")
}
for _, arg := range parsed {
cmdArgs = append(cmdArgs, fmt.Sprintf("%v", arg))
}
}
// Set the host environment variables.
filter := strings.Split(d.config.ReadDefault("env.blacklist", config.DefaultEnvBlacklist), ",")
d.taskEnv.AppendHostEnvvars(filter)
bin, err := discover.NomadExecutable()
if err != nil {
return nil, fmt.Errorf("unable to find the nomad binary: %v", err)
}
pluginLogFile := filepath.Join(taskDir, fmt.Sprintf("%s-executor.out", task.Name))
pluginConfig := &plugin.ClientConfig{
Cmd: exec.Command(bin, "executor", pluginLogFile),
}
execIntf, pluginClient, err := createExecutor(pluginConfig, d.config.LogOutput, d.config)
if err != nil {
return nil, err
}
executorCtx := &executor.ExecutorContext{
TaskEnv: d.taskEnv,
Driver: "rkt",
AllocDir: ctx.AllocDir,
AllocID: ctx.AllocID,
Task: task,
}
absPath, err := GetAbsolutePath("rkt")
if err != nil {
return nil, err
}
ps, err := execIntf.LaunchCmd(&executor.ExecCommand{
Cmd: absPath,
Args: cmdArgs,
User: task.User,
}, executorCtx)
if err != nil {
pluginClient.Kill()
return nil, err
}
d.logger.Printf("[DEBUG] driver.rkt: started ACI %q with: %v", img, cmdArgs)
maxKill := d.DriverContext.config.MaxKillTimeout
h := &rktHandle{
pluginClient: pluginClient,
executor: execIntf,
executorPid: ps.Pid,
allocDir: ctx.AllocDir,
logger: d.logger,
killTimeout: GetKillTimeout(task.KillTimeout, maxKill),
maxKillTimeout: maxKill,
doneCh: make(chan struct{}),
waitCh: make(chan *dstructs.WaitResult, 1),
}
if err := h.executor.SyncServices(consulContext(d.config, "")); err != nil {
h.logger.Printf("[ERR] driver.rkt: error registering services for task: %q: %v", task.Name, err)
}
go h.run()
return h, nil
}
func (d *RktDriver) Open(ctx *ExecContext, handleID string) (DriverHandle, error) {
// Parse the handle
pidBytes := []byte(strings.TrimPrefix(handleID, "Rkt:"))
id := &rktPID{}
if err := json.Unmarshal(pidBytes, id); err != nil {
return nil, fmt.Errorf("failed to parse Rkt handle '%s': %v", handleID, err)
}
pluginConfig := &plugin.ClientConfig{
Reattach: id.PluginConfig.PluginConfig(),
}
exec, pluginClient, err := createExecutor(pluginConfig, d.config.LogOutput, d.config)
if err != nil {
d.logger.Println("[ERROR] driver.rkt: error connecting to plugin so destroying plugin pid and user pid")
if e := destroyPlugin(id.PluginConfig.Pid, id.ExecutorPid); e != nil {
d.logger.Printf("[ERROR] driver.rkt: error destroying plugin and executor pid: %v", e)
}
return nil, fmt.Errorf("error connecting to plugin: %v", err)
}
ver, _ := exec.Version()
d.logger.Printf("[DEBUG] driver.rkt: version of executor: %v", ver.Version)
// Return a driver handle
h := &rktHandle{
pluginClient: pluginClient,
executorPid: id.ExecutorPid,
allocDir: id.AllocDir,
executor: exec,
logger: d.logger,
killTimeout: id.KillTimeout,
maxKillTimeout: id.MaxKillTimeout,
doneCh: make(chan struct{}),
waitCh: make(chan *dstructs.WaitResult, 1),
}
if err := h.executor.SyncServices(consulContext(d.config, "")); err != nil {
h.logger.Printf("[ERR] driver.rkt: error registering services: %v", err)
}
go h.run()
return h, nil
}
func (h *rktHandle) ID() string {
// Return a handle to the PID
pid := &rktPID{
PluginConfig: NewPluginReattachConfig(h.pluginClient.ReattachConfig()),
KillTimeout: h.killTimeout,
MaxKillTimeout: h.maxKillTimeout,
ExecutorPid: h.executorPid,
AllocDir: h.allocDir,
}
data, err := json.Marshal(pid)
if err != nil {
h.logger.Printf("[ERR] driver.rkt: failed to marshal rkt PID to JSON: %s", err)
}
return fmt.Sprintf("Rkt:%s", string(data))
}
func (h *rktHandle) WaitCh() chan *dstructs.WaitResult {
return h.waitCh
}
func (h *rktHandle) Update(task *structs.Task) error {
// Store the updated kill timeout.
h.killTimeout = GetKillTimeout(task.KillTimeout, h.maxKillTimeout)
h.executor.UpdateTask(task)
// Update is not possible
return nil
}
// Kill is used to terminate the task. We send an Interrupt
// and then provide a 5 second grace period before doing a Kill.
func (h *rktHandle) Kill() error {
h.executor.ShutDown()
select {
case <-h.doneCh:
return nil
case <-time.After(h.killTimeout):
return h.executor.Exit()
}
}
func (h *rktHandle) Stats() (*cstructs.TaskResourceUsage, error) {
return nil, fmt.Errorf("stats not implemented for rkt")
}
func (h *rktHandle) run() {
ps, err := h.executor.Wait()
close(h.doneCh)
if ps.ExitCode == 0 && err != nil {
if e := killProcess(h.executorPid); e != nil {
h.logger.Printf("[ERROR] driver.rkt: error killing user process: %v", e)
}
if e := h.allocDir.UnmountAll(); e != nil {
h.logger.Printf("[ERROR] driver.rkt: unmounting dev,proc and alloc dirs failed: %v", e)
}
}
h.waitCh <- dstructs.NewWaitResult(ps.ExitCode, 0, err)
close(h.waitCh)
// Remove services
if err := h.executor.DeregisterServices(); err != nil {
h.logger.Printf("[ERR] driver.rkt: failed to deregister services: %v", err)
}
if err := h.executor.Exit(); err != nil {
h.logger.Printf("[ERR] driver.rkt: error killing executor: %v", err)
}
h.pluginClient.Kill()
}

View File

@ -1,77 +0,0 @@
package structs
import (
"fmt"
"time"
)
const (
// The default user that the executor uses to run tasks
DefaultUnpriviledgedUser = "nobody"
// CheckBufSize is the size of the check output result
CheckBufSize = 4 * 1024
)
// WaitResult stores the result of a Wait operation.
type WaitResult struct {
ExitCode int
Signal int
Err error
}
func NewWaitResult(code, signal int, err error) *WaitResult {
return &WaitResult{
ExitCode: code,
Signal: signal,
Err: err,
}
}
func (r *WaitResult) Successful() bool {
return r.ExitCode == 0 && r.Signal == 0 && r.Err == nil
}
func (r *WaitResult) String() string {
return fmt.Sprintf("Wait returned exit code %v, signal %v, and error %v",
r.ExitCode, r.Signal, r.Err)
}
// RecoverableError wraps an error and marks whether it is recoverable and could
// be retried or it is fatal.
type RecoverableError struct {
Err error
Recoverable bool
}
// NewRecoverableError is used to wrap an error and mark it as recoverable or
// not.
func NewRecoverableError(e error, recoverable bool) *RecoverableError {
return &RecoverableError{
Err: e,
Recoverable: recoverable,
}
}
func (r *RecoverableError) Error() string {
return r.Err.Error()
}
// CheckResult encapsulates the result of a check
type CheckResult struct {
// ExitCode is the exit code of the check
ExitCode int
// Output is the output of the check script
Output string
// Timestamp is the time at which the check was executed
Timestamp time.Time
// Duration is the time it took the check to run
Duration time.Duration
// Err is the error that a check returned
Err error
}

View File

@ -1,12 +0,0 @@
// +build darwin dragonfly freebsd netbsd openbsd solaris windows
package structs
// IsolationConfig has information about the isolation mechanism the executor
// uses to put resource constraints and isolation on the user process. The
// default implementation is empty. Platforms that support resource isolation
// (e.g. Linux's Cgroups) should build their own platform-specific copy. This
// information is transmitted via RPC so it is not permissable to change the
// API.
type IsolationConfig struct {
}

View File

@ -1,10 +0,0 @@
package structs
import cgroupConfig "github.com/opencontainers/runc/libcontainer/configs"
// IsolationConfig has information about the isolation mechanism the executor
// uses to put resource constraints and isolation on the user process
type IsolationConfig struct {
Cgroup *cgroupConfig.Cgroup
CgroupPaths map[string]string
}

View File

@ -1,69 +0,0 @@
package driver
import (
"log"
"net/rpc"
"github.com/hashicorp/go-plugin"
"github.com/hashicorp/nomad/client/driver/logging"
"github.com/hashicorp/nomad/nomad/structs"
)
type SyslogCollectorRPC struct {
client *rpc.Client
}
type LaunchCollectorArgs struct {
Ctx *logging.LogCollectorContext
}
func (e *SyslogCollectorRPC) LaunchCollector(ctx *logging.LogCollectorContext) (*logging.SyslogCollectorState, error) {
var ss *logging.SyslogCollectorState
err := e.client.Call("Plugin.LaunchCollector", LaunchCollectorArgs{Ctx: ctx}, &ss)
return ss, err
}
func (e *SyslogCollectorRPC) Exit() error {
return e.client.Call("Plugin.Exit", new(interface{}), new(interface{}))
}
func (e *SyslogCollectorRPC) UpdateLogConfig(logConfig *structs.LogConfig) error {
return e.client.Call("Plugin.UpdateLogConfig", logConfig, new(interface{}))
}
type SyslogCollectorRPCServer struct {
Impl logging.LogCollector
}
func (s *SyslogCollectorRPCServer) LaunchCollector(args LaunchCollectorArgs,
resp *logging.SyslogCollectorState) error {
ss, err := s.Impl.LaunchCollector(args.Ctx)
if ss != nil {
*resp = *ss
}
return err
}
func (s *SyslogCollectorRPCServer) Exit(args interface{}, resp *interface{}) error {
return s.Impl.Exit()
}
func (s *SyslogCollectorRPCServer) UpdateLogConfig(logConfig *structs.LogConfig, resp *interface{}) error {
return s.Impl.UpdateLogConfig(logConfig)
}
type SyslogCollectorPlugin struct {
logger *log.Logger
Impl *SyslogCollectorRPCServer
}
func (p *SyslogCollectorPlugin) Server(*plugin.MuxBroker) (interface{}, error) {
if p.Impl == nil {
p.Impl = &SyslogCollectorRPCServer{Impl: logging.NewSyslogCollector(p.logger)}
}
return p.Impl, nil
}
func (p *SyslogCollectorPlugin) Client(b *plugin.MuxBroker, c *rpc.Client) (interface{}, error) {
return &SyslogCollectorRPC{client: c}, nil
}

View File

@ -1,170 +0,0 @@
package driver
import (
"fmt"
"io"
"os"
"os/exec"
"path/filepath"
"strings"
"time"
"github.com/hashicorp/go-multierror"
"github.com/hashicorp/go-plugin"
"github.com/hashicorp/nomad/client/config"
"github.com/hashicorp/nomad/client/driver/executor"
"github.com/hashicorp/nomad/client/driver/logging"
cstructs "github.com/hashicorp/nomad/client/driver/structs"
"github.com/hashicorp/nomad/nomad/structs"
)
// createExecutor launches an executor plugin and returns an instance of the
// Executor interface
func createExecutor(config *plugin.ClientConfig, w io.Writer,
clientConfig *config.Config) (executor.Executor, *plugin.Client, error) {
config.HandshakeConfig = HandshakeConfig
config.Plugins = GetPluginMap(w)
config.MaxPort = clientConfig.ClientMaxPort
config.MinPort = clientConfig.ClientMinPort
// setting the setsid of the plugin process so that it doesn't get signals sent to
// the nomad client.
if config.Cmd != nil {
isolateCommand(config.Cmd)
}
executorClient := plugin.NewClient(config)
rpcClient, err := executorClient.Client()
if err != nil {
return nil, nil, fmt.Errorf("error creating rpc client for executor plugin: %v", err)
}
raw, err := rpcClient.Dispense("executor")
if err != nil {
return nil, nil, fmt.Errorf("unable to dispense the executor plugin: %v", err)
}
executorPlugin := raw.(executor.Executor)
return executorPlugin, executorClient, nil
}
func createLogCollector(config *plugin.ClientConfig, w io.Writer,
clientConfig *config.Config) (logging.LogCollector, *plugin.Client, error) {
config.HandshakeConfig = HandshakeConfig
config.Plugins = GetPluginMap(w)
config.MaxPort = clientConfig.ClientMaxPort
config.MinPort = clientConfig.ClientMinPort
if config.Cmd != nil {
isolateCommand(config.Cmd)
}
syslogClient := plugin.NewClient(config)
rpcCLient, err := syslogClient.Client()
if err != nil {
return nil, nil, fmt.Errorf("error creating rpc client for syslog plugin: %v", err)
}
raw, err := rpcCLient.Dispense("syslogcollector")
if err != nil {
return nil, nil, fmt.Errorf("unable to dispense the syslog plugin: %v", err)
}
logCollector := raw.(logging.LogCollector)
return logCollector, syslogClient, nil
}
func consulContext(clientConfig *config.Config, containerID string) *executor.ConsulContext {
return &executor.ConsulContext{
ConsulConfig: clientConfig.ConsulConfig,
ContainerID: containerID,
DockerEndpoint: clientConfig.Read("docker.endpoint"),
TLSCa: clientConfig.Read("docker.tls.ca"),
TLSCert: clientConfig.Read("docker.tls.cert"),
TLSKey: clientConfig.Read("docker.tls.key"),
}
}
// killProcess kills a process with the given pid
func killProcess(pid int) error {
proc, err := os.FindProcess(pid)
if err != nil {
return err
}
return proc.Kill()
}
// destroyPlugin kills the plugin with the given pid and also kills the user
// process
func destroyPlugin(pluginPid int, userPid int) error {
var merr error
if err := killProcess(pluginPid); err != nil {
merr = multierror.Append(merr, err)
}
if err := killProcess(userPid); err != nil {
merr = multierror.Append(merr, err)
}
return merr
}
// validateCommand validates that the command only has a single value and
// returns a user friendly error message telling them to use the passed
// argField.
func validateCommand(command, argField string) error {
trimmed := strings.TrimSpace(command)
if len(trimmed) == 0 {
return fmt.Errorf("command empty: %q", command)
}
if len(trimmed) != len(command) {
return fmt.Errorf("command contains extra white space: %q", command)
}
split := strings.Split(trimmed, " ")
if len(split) != 1 {
return fmt.Errorf("command contained more than one input. Use %q field to pass arguments", argField)
}
return nil
}
// GetKillTimeout returns the kill timeout to use given the tasks desired kill
// timeout and the operator configured max kill timeout.
func GetKillTimeout(desired, max time.Duration) time.Duration {
maxNanos := max.Nanoseconds()
desiredNanos := desired.Nanoseconds()
// Make the minimum time between signal and kill, 1 second.
if desiredNanos <= 0 {
desiredNanos = (1 * time.Second).Nanoseconds()
}
// Protect against max not being set properly.
if maxNanos <= 0 {
maxNanos = (10 * time.Second).Nanoseconds()
}
if desiredNanos < maxNanos {
return time.Duration(desiredNanos)
}
return max
}
// GetAbsolutePath returns the absolute path of the passed binary by resolving
// it in the path and following symlinks.
func GetAbsolutePath(bin string) (string, error) {
lp, err := exec.LookPath(bin)
if err != nil {
return "", fmt.Errorf("failed to resolve path to %q executable: %v", bin, err)
}
return filepath.EvalSymlinks(lp)
}
// getExecutorUser returns the user of the task, defaulting to
// cstructs.DefaultUnprivilegedUser if none was given.
func getExecutorUser(task *structs.Task) string {
if task.User == "" {
return cstructs.DefaultUnpriviledgedUser
}
return task.User
}

View File

@ -1,18 +0,0 @@
// +build darwin dragonfly freebsd linux netbsd openbsd solaris
package driver
import (
"os/exec"
"syscall"
)
// isolateCommand sets the setsid flag in exec.Cmd to true so that the process
// becomes the process leader in a new session and doesn't receive signals that
// are sent to the parent process.
func isolateCommand(cmd *exec.Cmd) {
if cmd.SysProcAttr == nil {
cmd.SysProcAttr = &syscall.SysProcAttr{}
}
cmd.SysProcAttr.Setsid = true
}

View File

@ -1,9 +0,0 @@
package driver
import (
"os/exec"
)
// TODO Figure out if this is needed in Wondows
func isolateCommand(cmd *exec.Cmd) {
}

View File

@ -1,26 +0,0 @@
package fingerprint
import (
"log"
"runtime"
client "github.com/hashicorp/nomad/client/config"
"github.com/hashicorp/nomad/nomad/structs"
)
// ArchFingerprint is used to fingerprint the architecture
type ArchFingerprint struct {
StaticFingerprinter
logger *log.Logger
}
// NewArchFingerprint is used to create an OS fingerprint
func NewArchFingerprint(logger *log.Logger) Fingerprint {
f := &ArchFingerprint{logger: logger}
return f
}
func (f *ArchFingerprint) Fingerprint(config *client.Config, node *structs.Node) (bool, error) {
node.Attributes["arch"] = runtime.GOARCH
return true, nil
}

View File

@ -1,59 +0,0 @@
// +build linux
package fingerprint
import (
"log"
"time"
"github.com/hashicorp/nomad/nomad/structs"
)
const (
cgroupAvailable = "available"
cgroupUnavailable = "unavailable"
interval = 15
)
type CGroupFingerprint struct {
logger *log.Logger
lastState string
mountPointDetector MountPointDetector
}
// An interface to isolate calls to the cgroup library
// This facilitates testing where we can implement
// fake mount points to test various code paths
type MountPointDetector interface {
MountPoint() (string, error)
}
// Implements the interface detector which calls the cgroups library directly
type DefaultMountPointDetector struct {
}
// Call out to the default cgroup library
func (b *DefaultMountPointDetector) MountPoint() (string, error) {
return FindCgroupMountpointDir()
}
// NewCGroupFingerprint returns a new cgroup fingerprinter
func NewCGroupFingerprint(logger *log.Logger) Fingerprint {
f := &CGroupFingerprint{
logger: logger,
lastState: cgroupUnavailable,
mountPointDetector: &DefaultMountPointDetector{},
}
return f
}
// clearCGroupAttributes clears any node attributes related to cgroups that might
// have been set in a previous fingerprint run.
func (f *CGroupFingerprint) clearCGroupAttributes(n *structs.Node) {
delete(n.Attributes, "unique.cgroup.mountpoint")
}
// Periodic determines the interval at which the periodic fingerprinter will run.
func (f *CGroupFingerprint) Periodic() (bool, time.Duration) {
return true, interval * time.Second
}

View File

@ -1,57 +0,0 @@
// +build linux
package fingerprint
import (
"fmt"
client "github.com/hashicorp/nomad/client/config"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/opencontainers/runc/libcontainer/cgroups"
)
// FindCgroupMountpointDir is used to find the cgroup mount point on a Linux
// system.
func FindCgroupMountpointDir() (string, error) {
mount, err := cgroups.FindCgroupMountpointDir()
if err != nil {
switch e := err.(type) {
case *cgroups.NotFoundError:
// It's okay if the mount point is not discovered
return "", nil
default:
// All other errors are passed back as is
return "", e
}
}
return mount, nil
}
// Fingerprint tries to find a valid cgroup moint point
func (f *CGroupFingerprint) Fingerprint(cfg *client.Config, node *structs.Node) (bool, error) {
mount, err := f.mountPointDetector.MountPoint()
if err != nil {
f.clearCGroupAttributes(node)
return false, fmt.Errorf("Failed to discover cgroup mount point: %s", err)
}
// Check if a cgroup mount point was found
if mount == "" {
// Clear any attributes from the previous fingerprint.
f.clearCGroupAttributes(node)
if f.lastState == cgroupAvailable {
f.logger.Printf("[INFO] fingerprint.cgroups: cgroups are unavailable")
}
f.lastState = cgroupUnavailable
return true, nil
}
node.Attributes["unique.cgroup.mountpoint"] = mount
if f.lastState == cgroupUnavailable {
f.logger.Printf("[INFO] fingerprint.cgroups: cgroups are available")
}
f.lastState = cgroupAvailable
return true, nil
}

View File

@ -1,100 +0,0 @@
package fingerprint
import (
"fmt"
"log"
"strconv"
"time"
consul "github.com/hashicorp/consul/api"
client "github.com/hashicorp/nomad/client/config"
"github.com/hashicorp/nomad/nomad/structs"
)
const (
consulAvailable = "available"
consulUnavailable = "unavailable"
)
// ConsulFingerprint is used to fingerprint the architecture
type ConsulFingerprint struct {
logger *log.Logger
client *consul.Client
lastState string
}
// NewConsulFingerprint is used to create an OS fingerprint
func NewConsulFingerprint(logger *log.Logger) Fingerprint {
return &ConsulFingerprint{logger: logger, lastState: consulUnavailable}
}
func (f *ConsulFingerprint) Fingerprint(config *client.Config, node *structs.Node) (bool, error) {
// Guard against uninitialized Links
if node.Links == nil {
node.Links = map[string]string{}
}
// Only create the client once to avoid creating too many connections to
// Consul.
if f.client == nil {
consulConfig, err := config.ConsulConfig.ApiConfig()
if err != nil {
return false, fmt.Errorf("Failed to initialize the Consul client config: %v", err)
}
f.client, err = consul.NewClient(consulConfig)
if err != nil {
return false, fmt.Errorf("Failed to initialize consul client: %s", err)
}
}
// We'll try to detect consul by making a query to to the agent's self API.
// If we can't hit this URL consul is probably not running on this machine.
info, err := f.client.Agent().Self()
if err != nil {
// Clear any attributes set by a previous fingerprint.
f.clearConsulAttributes(node)
// Print a message indicating that the Consul Agent is not available
// anymore
if f.lastState == consulAvailable {
f.logger.Printf("[INFO] fingerprint.consul: consul agent is unavailable")
}
f.lastState = consulUnavailable
return false, nil
}
node.Attributes["consul.server"] = strconv.FormatBool(info["Config"]["Server"].(bool))
node.Attributes["consul.version"] = info["Config"]["Version"].(string)
node.Attributes["consul.revision"] = info["Config"]["Revision"].(string)
node.Attributes["unique.consul.name"] = info["Config"]["NodeName"].(string)
node.Attributes["consul.datacenter"] = info["Config"]["Datacenter"].(string)
node.Links["consul"] = fmt.Sprintf("%s.%s",
node.Attributes["consul.datacenter"],
node.Attributes["unique.consul.name"])
// If the Consul Agent was previously unavailable print a message to
// indicate the Agent is available now
if f.lastState == consulUnavailable {
f.logger.Printf("[INFO] fingerprint.consul: consul agent is available")
}
f.lastState = consulAvailable
return true, nil
}
// clearConsulAttributes removes consul attributes and links from the passed
// Node.
func (f *ConsulFingerprint) clearConsulAttributes(n *structs.Node) {
delete(n.Attributes, "consul.server")
delete(n.Attributes, "consul.version")
delete(n.Attributes, "consul.revision")
delete(n.Attributes, "unique.consul.name")
delete(n.Attributes, "consul.datacenter")
delete(n.Links, "consul")
}
func (f *ConsulFingerprint) Periodic() (bool, time.Duration) {
return true, 15 * time.Second
}

View File

@ -1,52 +0,0 @@
package fingerprint
import (
"fmt"
"log"
"github.com/hashicorp/nomad/client/config"
"github.com/hashicorp/nomad/helper/stats"
"github.com/hashicorp/nomad/nomad/structs"
)
// CPUFingerprint is used to fingerprint the CPU
type CPUFingerprint struct {
StaticFingerprinter
logger *log.Logger
}
// NewCPUFingerprint is used to create a CPU fingerprint
func NewCPUFingerprint(logger *log.Logger) Fingerprint {
f := &CPUFingerprint{logger: logger}
return f
}
func (f *CPUFingerprint) Fingerprint(cfg *config.Config, node *structs.Node) (bool, error) {
if err := stats.Init(); err != nil {
return false, fmt.Errorf("Unable to obtain CPU information: %v", err)
}
modelName := stats.CPUModelName()
if modelName != "" {
node.Attributes["cpu.modelname"] = modelName
}
mhz := stats.CPUMHzPerCore()
node.Attributes["cpu.frequency"] = fmt.Sprintf("%.0f", mhz)
f.logger.Printf("[DEBUG] fingerprint.cpu: frequency: %.0f MHz", mhz)
numCores := stats.CPUNumCores()
node.Attributes["cpu.numcores"] = fmt.Sprintf("%d", numCores)
f.logger.Printf("[DEBUG] fingerprint.cpu: core count: %d", numCores)
tt := stats.TotalTicksAvailable()
node.Attributes["cpu.totalcompute"] = fmt.Sprintf("%.0f", tt)
if node.Resources == nil {
node.Resources = &structs.Resources{}
}
node.Resources.CPU = int(tt)
return true, nil
}

View File

@ -1,250 +0,0 @@
package fingerprint
import (
"fmt"
"io/ioutil"
"log"
"net/http"
"net/url"
"os"
"regexp"
"strings"
"time"
"github.com/hashicorp/go-cleanhttp"
"github.com/hashicorp/nomad/client/config"
"github.com/hashicorp/nomad/nomad/structs"
)
// This is where the AWS metadata server normally resides. We hardcode the
// "instance" path as well since it's the only one we access here.
const DEFAULT_AWS_URL = "http://169.254.169.254/latest/meta-data/"
// map of instance type to approximate speed, in Mbits/s
// http://serverfault.com/questions/324883/aws-bandwidth-and-content-delivery/326797#326797
// which itself cites these sources:
// - http://blog.rightscale.com/2007/10/28/network-performance-within-amazon-ec2-and-to-amazon-s3/
// - http://www.soc.napier.ac.uk/~bill/chris_p.pdf
//
// This data is meant for a loose approximation
var ec2InstanceSpeedMap = map[string]int{
"m4.large": 80,
"m3.medium": 80,
"m3.large": 80,
"c4.large": 80,
"c3.large": 80,
"c3.xlarge": 80,
"r3.large": 80,
"r3.xlarge": 80,
"i2.xlarge": 80,
"d2.xlarge": 80,
"t2.micro": 16,
"t2.small": 16,
"t2.medium": 16,
"t2.large": 16,
"m4.xlarge": 760,
"m4.2xlarge": 760,
"m4.4xlarge": 760,
"m3.xlarge": 760,
"m3.2xlarge": 760,
"c4.xlarge": 760,
"c4.2xlarge": 760,
"c4.4xlarge": 760,
"c3.2xlarge": 760,
"c3.4xlarge": 760,
"g2.2xlarge": 760,
"r3.2xlarge": 760,
"r3.4xlarge": 760,
"i2.2xlarge": 760,
"i2.4xlarge": 760,
"d2.2xlarge": 760,
"d2.4xlarge": 760,
"m4.10xlarge": 10000,
"c4.8xlarge": 10000,
"c3.8xlarge": 10000,
"g2.8xlarge": 10000,
"r3.8xlarge": 10000,
"i2.8xlarge": 10000,
"d2.8xlarge": 10000,
}
// EnvAWSFingerprint is used to fingerprint AWS metadata
type EnvAWSFingerprint struct {
StaticFingerprinter
logger *log.Logger
}
// NewEnvAWSFingerprint is used to create a fingerprint from AWS metadata
func NewEnvAWSFingerprint(logger *log.Logger) Fingerprint {
f := &EnvAWSFingerprint{logger: logger}
return f
}
func (f *EnvAWSFingerprint) Fingerprint(cfg *config.Config, node *structs.Node) (bool, error) {
if !f.isAWS() {
return false, nil
}
// newNetwork is populated and addded to the Nodes resources
newNetwork := &structs.NetworkResource{
Device: "eth0",
}
if node.Links == nil {
node.Links = make(map[string]string)
}
metadataURL := os.Getenv("AWS_ENV_URL")
if metadataURL == "" {
metadataURL = DEFAULT_AWS_URL
}
// assume 2 seconds is enough time for inside AWS network
client := &http.Client{
Timeout: 2 * time.Second,
Transport: cleanhttp.DefaultTransport(),
}
// Keys and whether they should be namespaced as unique. Any key whose value
// uniquely identifies a node, such as ip, should be marked as unique. When
// marked as unique, the key isn't included in the computed node class.
keys := map[string]bool{
"ami-id": true,
"hostname": true,
"instance-id": true,
"instance-type": false,
"local-hostname": true,
"local-ipv4": true,
"public-hostname": true,
"public-ipv4": true,
"placement/availability-zone": false,
}
for k, unique := range keys {
res, err := client.Get(metadataURL + k)
if res.StatusCode != http.StatusOK {
f.logger.Printf("[WARN]: fingerprint.env_aws: Could not read value for attribute %q", k)
continue
}
if err != nil {
// if it's a URL error, assume we're not in an AWS environment
// TODO: better way to detect AWS? Check xen virtualization?
if _, ok := err.(*url.Error); ok {
return false, nil
}
// not sure what other errors it would return
return false, err
}
resp, err := ioutil.ReadAll(res.Body)
res.Body.Close()
if err != nil {
f.logger.Printf("[ERR]: fingerprint.env_aws: Error reading response body for AWS %s", k)
}
// assume we want blank entries
key := "platform.aws." + strings.Replace(k, "/", ".", -1)
if unique {
key = structs.UniqueNamespace(key)
}
node.Attributes[key] = strings.Trim(string(resp), "\n")
}
// copy over network specific information
if val := node.Attributes["unique.platform.aws.local-ipv4"]; val != "" {
node.Attributes["unique.network.ip-address"] = val
newNetwork.IP = val
newNetwork.CIDR = newNetwork.IP + "/32"
}
// find LinkSpeed from lookup
if throughput := f.linkSpeed(); throughput > 0 {
newNetwork.MBits = throughput
}
if node.Resources == nil {
node.Resources = &structs.Resources{}
}
node.Resources.Networks = append(node.Resources.Networks, newNetwork)
// populate Node Network Resources
// populate Links
node.Links["aws.ec2"] = fmt.Sprintf("%s.%s",
node.Attributes["platform.aws.placement.availability-zone"],
node.Attributes["unique.platform.aws.instance-id"])
return true, nil
}
func (f *EnvAWSFingerprint) isAWS() bool {
// Read the internal metadata URL from the environment, allowing test files to
// provide their own
metadataURL := os.Getenv("AWS_ENV_URL")
if metadataURL == "" {
metadataURL = DEFAULT_AWS_URL
}
// assume 2 seconds is enough time for inside AWS network
client := &http.Client{
Timeout: 2 * time.Second,
Transport: cleanhttp.DefaultTransport(),
}
// Query the metadata url for the ami-id, to veryify we're on AWS
resp, err := client.Get(metadataURL + "ami-id")
if err != nil {
f.logger.Printf("[DEBUG] fingerprint.env_aws: Error querying AWS Metadata URL, skipping")
return false
}
defer resp.Body.Close()
if resp.StatusCode >= 400 {
// URL not found, which indicates that this isn't AWS
return false
}
instanceID, err := ioutil.ReadAll(resp.Body)
if err != nil {
f.logger.Printf("[DEBUG] fingerprint.env_aws: Error reading AWS Instance ID, skipping")
return false
}
match, err := regexp.MatchString("ami-*", string(instanceID))
if err != nil || !match {
return false
}
return true
}
// EnvAWSFingerprint uses lookup table to approximate network speeds
func (f *EnvAWSFingerprint) linkSpeed() int {
// Query the API for the instance type, and use the table above to approximate
// the network speed
metadataURL := os.Getenv("AWS_ENV_URL")
if metadataURL == "" {
metadataURL = DEFAULT_AWS_URL
}
// assume 2 seconds is enough time for inside AWS network
client := &http.Client{
Timeout: 2 * time.Second,
Transport: cleanhttp.DefaultTransport(),
}
res, err := client.Get(metadataURL + "instance-type")
body, err := ioutil.ReadAll(res.Body)
res.Body.Close()
if err != nil {
f.logger.Printf("[ERR]: fingerprint.env_aws: Error reading response body for instance-type")
return 0
}
key := strings.Trim(string(body), "\n")
v, ok := ec2InstanceSpeedMap[key]
if !ok {
return 0
}
return v
}

View File

@ -1,270 +0,0 @@
package fingerprint
import (
"encoding/json"
"fmt"
"io/ioutil"
"log"
"net/http"
"net/url"
"os"
"regexp"
"strconv"
"strings"
"time"
"github.com/hashicorp/go-cleanhttp"
"github.com/hashicorp/nomad/client/config"
"github.com/hashicorp/nomad/nomad/structs"
)
// This is where the GCE metadata server normally resides. We hardcode the
// "instance" path as well since it's the only one we access here.
const DEFAULT_GCE_URL = "http://169.254.169.254/computeMetadata/v1/instance/"
type GCEMetadataNetworkInterface struct {
AccessConfigs []struct {
ExternalIp string
Type string
}
ForwardedIps []string
Ip string
Network string
}
type ReqError struct {
StatusCode int
}
func (e ReqError) Error() string {
return http.StatusText(e.StatusCode)
}
func lastToken(s string) string {
index := strings.LastIndex(s, "/")
return s[index+1:]
}
// EnvGCEFingerprint is used to fingerprint GCE metadata
type EnvGCEFingerprint struct {
StaticFingerprinter
client *http.Client
logger *log.Logger
metadataURL string
}
// NewEnvGCEFingerprint is used to create a fingerprint from GCE metadata
func NewEnvGCEFingerprint(logger *log.Logger) Fingerprint {
// Read the internal metadata URL from the environment, allowing test files to
// provide their own
metadataURL := os.Getenv("GCE_ENV_URL")
if metadataURL == "" {
metadataURL = DEFAULT_GCE_URL
}
// assume 2 seconds is enough time for inside GCE network
client := &http.Client{
Timeout: 2 * time.Second,
Transport: cleanhttp.DefaultTransport(),
}
return &EnvGCEFingerprint{
client: client,
logger: logger,
metadataURL: metadataURL,
}
}
func (f *EnvGCEFingerprint) Get(attribute string, recursive bool) (string, error) {
reqUrl := f.metadataURL + attribute
if recursive {
reqUrl = reqUrl + "?recursive=true"
}
parsedUrl, err := url.Parse(reqUrl)
if err != nil {
return "", err
}
req := &http.Request{
Method: "GET",
URL: parsedUrl,
Header: http.Header{
"Metadata-Flavor": []string{"Google"},
},
}
res, err := f.client.Do(req)
if err != nil || res.StatusCode != http.StatusOK {
f.logger.Printf("[DEBUG] fingerprint.env_gce: Could not read value for attribute %q", attribute)
return "", err
}
resp, err := ioutil.ReadAll(res.Body)
res.Body.Close()
if err != nil {
f.logger.Printf("[ERR] fingerprint.env_gce: Error reading response body for GCE %s", attribute)
return "", err
}
if res.StatusCode >= 400 {
return "", ReqError{res.StatusCode}
}
return string(resp), nil
}
func checkError(err error, logger *log.Logger, desc string) error {
// If it's a URL error, assume we're not actually in an GCE environment.
// To the outer layers, this isn't an error so return nil.
if _, ok := err.(*url.Error); ok {
logger.Printf("[DEBUG] fingerprint.env_gce: Error querying GCE " + desc + ", skipping")
return nil
}
// Otherwise pass the error through.
return err
}
func (f *EnvGCEFingerprint) Fingerprint(cfg *config.Config, node *structs.Node) (bool, error) {
if !f.isGCE() {
return false, nil
}
if node.Links == nil {
node.Links = make(map[string]string)
}
// Keys and whether they should be namespaced as unique. Any key whose value
// uniquely identifies a node, such as ip, should be marked as unique. When
// marked as unique, the key isn't included in the computed node class.
keys := map[string]bool{
"hostname": true,
"id": true,
"cpu-platform": false,
"scheduling/automatic-restart": false,
"scheduling/on-host-maintenance": false,
}
for k, unique := range keys {
value, err := f.Get(k, false)
if err != nil {
return false, checkError(err, f.logger, k)
}
// assume we want blank entries
key := "platform.gce." + strings.Replace(k, "/", ".", -1)
if unique {
key = structs.UniqueNamespace(key)
}
node.Attributes[key] = strings.Trim(string(value), "\n")
}
// These keys need everything before the final slash removed to be usable.
keys = map[string]bool{
"machine-type": false,
"zone": false,
}
for k, unique := range keys {
value, err := f.Get(k, false)
if err != nil {
return false, checkError(err, f.logger, k)
}
key := "platform.gce." + k
if unique {
key = structs.UniqueNamespace(key)
}
node.Attributes[key] = strings.Trim(lastToken(value), "\n")
}
// Get internal and external IPs (if they exist)
value, err := f.Get("network-interfaces/", true)
var interfaces []GCEMetadataNetworkInterface
if err := json.Unmarshal([]byte(value), &interfaces); err != nil {
f.logger.Printf("[WARN] fingerprint.env_gce: Error decoding network interface information: %s", err.Error())
}
for _, intf := range interfaces {
prefix := "platform.gce.network." + lastToken(intf.Network)
uniquePrefix := "unique." + prefix
node.Attributes[prefix] = "true"
node.Attributes[uniquePrefix+".ip"] = strings.Trim(intf.Ip, "\n")
for index, accessConfig := range intf.AccessConfigs {
node.Attributes[uniquePrefix+".external-ip."+strconv.Itoa(index)] = accessConfig.ExternalIp
}
}
var tagList []string
value, err = f.Get("tags", false)
if err != nil {
return false, checkError(err, f.logger, "tags")
}
if err := json.Unmarshal([]byte(value), &tagList); err != nil {
f.logger.Printf("[WARN] fingerprint.env_gce: Error decoding instance tags: %s", err.Error())
}
for _, tag := range tagList {
attr := "platform.gce.tag."
var key string
// If the tag is namespaced as unique, we strip it from the tag and
// prepend to the whole attribute.
if structs.IsUniqueNamespace(tag) {
tag = strings.TrimPrefix(tag, structs.NodeUniqueNamespace)
key = fmt.Sprintf("%s%s%s", structs.NodeUniqueNamespace, attr, tag)
} else {
key = fmt.Sprintf("%s%s", attr, tag)
}
node.Attributes[key] = "true"
}
var attrDict map[string]string
value, err = f.Get("attributes/", true)
if err != nil {
return false, checkError(err, f.logger, "attributes/")
}
if err := json.Unmarshal([]byte(value), &attrDict); err != nil {
f.logger.Printf("[WARN] fingerprint.env_gce: Error decoding instance attributes: %s", err.Error())
}
for k, v := range attrDict {
attr := "platform.gce.attr."
var key string
// If the key is namespaced as unique, we strip it from the
// key and prepend to the whole attribute.
if structs.IsUniqueNamespace(k) {
k = strings.TrimPrefix(k, structs.NodeUniqueNamespace)
key = fmt.Sprintf("%s%s%s", structs.NodeUniqueNamespace, attr, k)
} else {
key = fmt.Sprintf("%s%s", attr, k)
}
node.Attributes[key] = strings.Trim(v, "\n")
}
// populate Links
node.Links["gce"] = node.Attributes["unique.platform.gce.id"]
return true, nil
}
func (f *EnvGCEFingerprint) isGCE() bool {
// TODO: better way to detect GCE?
// Query the metadata url for the machine type, to verify we're on GCE
machineType, err := f.Get("machine-type", false)
if err != nil {
if re, ok := err.(ReqError); !ok || re.StatusCode != 404 {
// If it wasn't a 404 error, print an error message.
f.logger.Printf("[DEBUG] fingerprint.env_gce: Error querying GCE Metadata URL, skipping")
}
return false
}
match, err := regexp.MatchString("projects/.+/machineTypes/.+", machineType)
if err != nil || !match {
return false
}
return true
}

View File

@ -1,87 +0,0 @@
package fingerprint
import (
"fmt"
"log"
"sort"
"time"
"github.com/hashicorp/nomad/client/config"
"github.com/hashicorp/nomad/nomad/structs"
)
// EmptyDuration is to be used by fingerprinters that are not periodic.
const (
EmptyDuration = time.Duration(0)
)
func init() {
builtinFingerprintMap["arch"] = NewArchFingerprint
builtinFingerprintMap["cpu"] = NewCPUFingerprint
builtinFingerprintMap["env_aws"] = NewEnvAWSFingerprint
builtinFingerprintMap["env_gce"] = NewEnvGCEFingerprint
builtinFingerprintMap["host"] = NewHostFingerprint
builtinFingerprintMap["memory"] = NewMemoryFingerprint
builtinFingerprintMap["network"] = NewNetworkFingerprint
builtinFingerprintMap["nomad"] = NewNomadFingerprint
builtinFingerprintMap["storage"] = NewStorageFingerprint
// Initialize the list of available fingerprinters per platform. Each
// platform defines its own list of available fingerprinters.
initPlatformFingerprints(builtinFingerprintMap)
}
// builtinFingerprintMap contains the built in registered fingerprints which are
// available for a given platform.
var builtinFingerprintMap = make(map[string]Factory, 16)
// BuiltinFingerprints is a slice containing the key names of all registered
// fingerprints available, to provided an ordered iteration
func BuiltinFingerprints() []string {
fingerprints := make([]string, 0, len(builtinFingerprintMap))
for k := range builtinFingerprintMap {
fingerprints = append(fingerprints, k)
}
sort.Strings(fingerprints)
return fingerprints
}
// NewFingerprint is used to instantiate and return a new fingerprint
// given the name and a logger
func NewFingerprint(name string, logger *log.Logger) (Fingerprint, error) {
// Lookup the factory function
factory, ok := builtinFingerprintMap[name]
if !ok {
return nil, fmt.Errorf("unknown fingerprint '%s'", name)
}
// Instantiate the fingerprint
f := factory(logger)
return f, nil
}
// Factory is used to instantiate a new Fingerprint
type Factory func(*log.Logger) Fingerprint
// Fingerprint is used for doing "fingerprinting" of the
// host to automatically determine attributes, resources,
// and metadata about it. Each of these is a heuristic, and
// many of them can be applied on a particular host.
type Fingerprint interface {
// Fingerprint is used to update properties of the Node,
// and returns if the fingerprint was applicable and a potential error.
Fingerprint(*config.Config, *structs.Node) (bool, error)
// Periodic is a mechanism for the fingerprinter to indicate that it should
// be run periodically. The return value is a boolean indicating if it
// should be periodic, and if true, a duration.
Periodic() (bool, time.Duration)
}
// StaticFingerprinter can be embedded in a struct that has a Fingerprint method
// to make it non-periodic.
type StaticFingerprinter struct{}
func (s *StaticFingerprinter) Periodic() (bool, time.Duration) {
return false, EmptyDuration
}

View File

@ -1,6 +0,0 @@
// +build darwin dragonfly freebsd netbsd openbsd solaris windows
package fingerprint
func initPlatformFingerprints(fps map[string]Factory) {
}

View File

@ -1,5 +0,0 @@
package fingerprint
func initPlatformFingerprints(fps map[string]Factory) {
fps["cgroup"] = NewCGroupFingerprint
}

View File

@ -1,51 +0,0 @@
package fingerprint
import (
"fmt"
"log"
"os/exec"
"runtime"
"strings"
"github.com/hashicorp/nomad/client/config"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/shirou/gopsutil/host"
)
// HostFingerprint is used to fingerprint the host
type HostFingerprint struct {
StaticFingerprinter
logger *log.Logger
}
// NewHostFingerprint is used to create a Host fingerprint
func NewHostFingerprint(logger *log.Logger) Fingerprint {
f := &HostFingerprint{logger: logger}
return f
}
func (f *HostFingerprint) Fingerprint(cfg *config.Config, node *structs.Node) (bool, error) {
hostInfo, err := host.Info()
if err != nil {
f.logger.Println("[WARN] Error retrieving host information: ", err)
return false, err
}
node.Attributes["os.name"] = hostInfo.Platform
node.Attributes["os.version"] = hostInfo.PlatformVersion
node.Attributes["kernel.name"] = runtime.GOOS
node.Attributes["kernel.version"] = ""
if runtime.GOOS != "windows" {
out, err := exec.Command("uname", "-r").Output()
if err != nil {
return false, fmt.Errorf("Failed to run uname: %s", err)
}
node.Attributes["kernel.version"] = strings.Trim(string(out), "\n")
}
node.Attributes["unique.hostname"] = hostInfo.Hostname
return true, nil
}

View File

@ -1,43 +0,0 @@
package fingerprint
import (
"fmt"
"log"
"github.com/hashicorp/nomad/client/config"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/shirou/gopsutil/mem"
)
// MemoryFingerprint is used to fingerprint the available memory on the node
type MemoryFingerprint struct {
StaticFingerprinter
logger *log.Logger
}
// NewMemoryFingerprint is used to create a Memory fingerprint
func NewMemoryFingerprint(logger *log.Logger) Fingerprint {
f := &MemoryFingerprint{
logger: logger,
}
return f
}
func (f *MemoryFingerprint) Fingerprint(cfg *config.Config, node *structs.Node) (bool, error) {
memInfo, err := mem.VirtualMemory()
if err != nil {
f.logger.Printf("[WARN] Error reading memory information: %s", err)
return false, err
}
if memInfo.Total > 0 {
node.Attributes["memory.totalbytes"] = fmt.Sprintf("%d", memInfo.Total)
if node.Resources == nil {
node.Resources = &structs.Resources{}
}
node.Resources.MemoryMB = int(memInfo.Total / 1024 / 1024)
}
return true, nil
}

View File

@ -1,167 +0,0 @@
package fingerprint
import (
"errors"
"fmt"
"log"
"net"
"github.com/hashicorp/nomad/client/config"
"github.com/hashicorp/nomad/nomad/structs"
)
// NetworkFingerprint is used to fingerprint the Network capabilities of a node
type NetworkFingerprint struct {
StaticFingerprinter
logger *log.Logger
interfaceDetector NetworkInterfaceDetector
}
// An interface to isolate calls to various api in net package
// This facilitates testing where we can implement
// fake interfaces and addresses to test varios code paths
type NetworkInterfaceDetector interface {
Interfaces() ([]net.Interface, error)
InterfaceByName(name string) (*net.Interface, error)
Addrs(intf *net.Interface) ([]net.Addr, error)
}
// Implements the interface detector which calls net directly
type DefaultNetworkInterfaceDetector struct {
}
func (b *DefaultNetworkInterfaceDetector) Interfaces() ([]net.Interface, error) {
return net.Interfaces()
}
func (b *DefaultNetworkInterfaceDetector) InterfaceByName(name string) (*net.Interface, error) {
return net.InterfaceByName(name)
}
func (b *DefaultNetworkInterfaceDetector) Addrs(intf *net.Interface) ([]net.Addr, error) {
return intf.Addrs()
}
// NewNetworkFingerprint returns a new NetworkFingerprinter with the given
// logger
func NewNetworkFingerprint(logger *log.Logger) Fingerprint {
f := &NetworkFingerprint{logger: logger, interfaceDetector: &DefaultNetworkInterfaceDetector{}}
return f
}
func (f *NetworkFingerprint) Fingerprint(cfg *config.Config, node *structs.Node) (bool, error) {
// newNetwork is populated and addded to the Nodes resources
newNetwork := &structs.NetworkResource{}
var ip string
intf, err := f.findInterface(cfg.NetworkInterface)
switch {
case err != nil:
return false, fmt.Errorf("Error while detecting network interface during fingerprinting: %v", err)
case intf == nil:
// No interface could be found
return false, nil
}
if ip, err = f.ipAddress(intf); err != nil {
return false, fmt.Errorf("Unable to find IP address of interface: %s, err: %v", intf.Name, err)
}
newNetwork.Device = intf.Name
node.Attributes["unique.network.ip-address"] = ip
newNetwork.IP = ip
newNetwork.CIDR = newNetwork.IP + "/32"
f.logger.Printf("[DEBUG] fingerprint.network: Detected interface %v with IP %v during fingerprinting", intf.Name, ip)
if throughput := f.linkSpeed(intf.Name); throughput > 0 {
newNetwork.MBits = throughput
f.logger.Printf("[DEBUG] fingerprint.network: link speed for %v set to %v", intf.Name, newNetwork.MBits)
} else {
f.logger.Printf("[DEBUG] fingerprint.network: Unable to read link speed; setting to default %v", cfg.NetworkSpeed)
newNetwork.MBits = cfg.NetworkSpeed
}
if node.Resources == nil {
node.Resources = &structs.Resources{}
}
node.Resources.Networks = append(node.Resources.Networks, newNetwork)
// return true, because we have a network connection
return true, nil
}
// Gets the ipv4 addr for a network interface
func (f *NetworkFingerprint) ipAddress(intf *net.Interface) (string, error) {
var addrs []net.Addr
var err error
if addrs, err = f.interfaceDetector.Addrs(intf); err != nil {
return "", err
}
if len(addrs) == 0 {
return "", errors.New(fmt.Sprintf("Interface %s has no IP address", intf.Name))
}
for _, addr := range addrs {
var ip net.IP
switch v := (addr).(type) {
case *net.IPNet:
ip = v.IP
case *net.IPAddr:
ip = v.IP
}
if ip.To4() != nil {
return ip.String(), nil
}
}
return "", fmt.Errorf("Couldn't parse IP address for interface %s", intf.Name)
}
// Checks if the device is marked UP by the operator
func (f *NetworkFingerprint) isDeviceEnabled(intf *net.Interface) bool {
return intf.Flags&net.FlagUp != 0
}
// Checks if the device has any IP address configured
func (f *NetworkFingerprint) deviceHasIpAddress(intf *net.Interface) bool {
_, err := f.ipAddress(intf)
return err == nil
}
func (n *NetworkFingerprint) isDeviceLoopBackOrPointToPoint(intf *net.Interface) bool {
return intf.Flags&(net.FlagLoopback|net.FlagPointToPoint) != 0
}
// Returns the interface with the name passed by user
// If the name is blank then it iterates through all the devices
// and finds one which is routable and marked as UP
// It excludes PPP and lo devices unless they are specifically asked
func (f *NetworkFingerprint) findInterface(deviceName string) (*net.Interface, error) {
var interfaces []net.Interface
var err error
if deviceName != "" {
return f.interfaceDetector.InterfaceByName(deviceName)
}
var intfs []net.Interface
if intfs, err = f.interfaceDetector.Interfaces(); err != nil {
return nil, err
}
for _, intf := range intfs {
if f.isDeviceEnabled(&intf) && !f.isDeviceLoopBackOrPointToPoint(&intf) && f.deviceHasIpAddress(&intf) {
interfaces = append(interfaces, intf)
}
}
if len(interfaces) == 0 {
return nil, nil
}
return &interfaces[0], nil
}

View File

@ -1,8 +0,0 @@
// +build !linux,!windows
package fingerprint
// linkSpeed returns the default link speed
func (f *NetworkFingerprint) linkSpeed(device string) int {
return 0
}

View File

@ -1,78 +0,0 @@
package fingerprint
import (
"fmt"
"io/ioutil"
"os/exec"
"regexp"
"strconv"
"strings"
)
// linkSpeedSys parses link speed in Mb/s from /sys.
func (f *NetworkFingerprint) linkSpeedSys(device string) int {
path := fmt.Sprintf("/sys/class/net/%s/speed", device)
// Read contents of the device/speed file
content, err := ioutil.ReadFile(path)
if err != nil {
f.logger.Printf("[DEBUG] fingerprint.network: Unable to read link speed from %s", path)
return 0
}
lines := strings.Split(string(content), "\n")
mbs, err := strconv.Atoi(lines[0])
if err != nil || mbs <= 0 {
f.logger.Printf("[DEBUG] fingerprint.network: Unable to parse link speed from %s", path)
return 0
}
return mbs
}
// linkSpeed returns link speed in Mb/s, or 0 when unable to determine it.
func (f *NetworkFingerprint) linkSpeed(device string) int {
// Use LookPath to find the ethtool in the systems $PATH
// If it's not found or otherwise errors, LookPath returns and empty string
// and an error we can ignore for our purposes
ethtoolPath, _ := exec.LookPath("ethtool")
if ethtoolPath != "" {
if speed := f.linkSpeedEthtool(ethtoolPath, device); speed > 0 {
return speed
}
}
// Fall back on checking a system file for link speed.
return f.linkSpeedSys(device)
}
// linkSpeedEthtool determines link speed in Mb/s with 'ethtool'.
func (f *NetworkFingerprint) linkSpeedEthtool(path, device string) int {
outBytes, err := exec.Command(path, device).Output()
if err != nil {
f.logger.Printf("[WARN] fingerprint.network: Error calling ethtool (%s %s): %v", path, device, err)
return 0
}
output := strings.TrimSpace(string(outBytes))
re := regexp.MustCompile("Speed: [0-9]+[a-zA-Z]+/s")
m := re.FindString(output)
if m == "" {
// no matches found, output may be in a different format
f.logger.Printf("[WARN] fingerprint.network: Unable to parse Speed in output of '%s %s'", path, device)
return 0
}
// Split and trim the Mb/s unit from the string output
args := strings.Split(m, ": ")
raw := strings.TrimSuffix(args[1], "Mb/s")
// convert to Mb/s
mbs, err := strconv.Atoi(raw)
if err != nil || mbs <= 0 {
f.logger.Printf("[WARN] fingerprint.network: Unable to parse Mb/s in output of '%s %s'", path, device)
return 0
}
return mbs
}

View File

@ -1,52 +0,0 @@
package fingerprint
import (
"fmt"
"os/exec"
"strconv"
"strings"
)
// linkSpeed returns link speed in Mb/s, or 0 when unable to determine it.
func (f *NetworkFingerprint) linkSpeed(device string) int {
command := fmt.Sprintf("Get-NetAdapter -IncludeHidden | Where name -eq '%s' | Select -ExpandProperty LinkSpeed", device)
path := "powershell.exe"
outBytes, err := exec.Command(path, command).Output()
if err != nil {
f.logger.Printf("[WARN] fingerprint.network: Error calling %s (%s): %v", path, command, err)
return 0
}
output := strings.TrimSpace(string(outBytes))
return f.parseLinkSpeed(output)
}
func (f *NetworkFingerprint) parseLinkSpeed(commandOutput string) int {
args := strings.Split(commandOutput, " ")
if len(args) != 2 {
f.logger.Printf("[WARN] fingerprint.network: Couldn't split LinkSpeed (%s)", commandOutput)
return 0
}
unit := strings.Replace(args[1], "\r\n", "", -1)
value, err := strconv.Atoi(args[0])
if err != nil {
f.logger.Printf("[WARN] fingerprint.network: Unable to parse LinkSpeed value (%s)", commandOutput)
return 0
}
switch unit {
case "Mbps":
return value
case "Kbps":
return value / 1000
case "Gbps":
return value * 1000
case "bps":
return value / 1000000
}
return 0
}

View File

@ -1,26 +0,0 @@
package fingerprint
import (
"log"
client "github.com/hashicorp/nomad/client/config"
"github.com/hashicorp/nomad/nomad/structs"
)
// NomadFingerprint is used to fingerprint the Nomad version
type NomadFingerprint struct {
StaticFingerprinter
logger *log.Logger
}
// NewNomadFingerprint is used to create a Nomad fingerprint
func NewNomadFingerprint(logger *log.Logger) Fingerprint {
f := &NomadFingerprint{logger: logger}
return f
}
func (f *NomadFingerprint) Fingerprint(config *client.Config, node *structs.Node) (bool, error) {
node.Attributes["nomad.version"] = config.Version
node.Attributes["nomad.revision"] = config.Revision
return true, nil
}

View File

@ -1,59 +0,0 @@
package fingerprint
import (
"fmt"
"log"
"os"
"strconv"
"github.com/hashicorp/nomad/client/config"
"github.com/hashicorp/nomad/nomad/structs"
)
const bytesPerMegabyte = 1024 * 1024
// StorageFingerprint is used to measure the amount of storage free for
// applications that the Nomad agent will run on this machine.
type StorageFingerprint struct {
StaticFingerprinter
logger *log.Logger
}
func NewStorageFingerprint(logger *log.Logger) Fingerprint {
fp := &StorageFingerprint{logger: logger}
return fp
}
func (f *StorageFingerprint) Fingerprint(cfg *config.Config, node *structs.Node) (bool, error) {
// Initialize these to empty defaults
node.Attributes["unique.storage.volume"] = ""
node.Attributes["unique.storage.bytestotal"] = ""
node.Attributes["unique.storage.bytesfree"] = ""
if node.Resources == nil {
node.Resources = &structs.Resources{}
}
// Guard against unset AllocDir
storageDir := cfg.AllocDir
if storageDir == "" {
var err error
storageDir, err = os.Getwd()
if err != nil {
return false, fmt.Errorf("unable to get CWD from filesystem: %s", err)
}
}
volume, total, free, err := f.diskFree(storageDir)
if err != nil {
return false, fmt.Errorf("failed to determine disk space for %s: %v", storageDir, err)
}
node.Attributes["unique.storage.volume"] = volume
node.Attributes["unique.storage.bytestotal"] = strconv.FormatUint(total, 10)
node.Attributes["unique.storage.bytesfree"] = strconv.FormatUint(free, 10)
node.Resources.DiskMB = int(free / bytesPerMegabyte)
return true, nil
}

View File

@ -1,64 +0,0 @@
// +build darwin dragonfly freebsd linux netbsd openbsd solaris
package fingerprint
import (
"fmt"
"os/exec"
"path/filepath"
"runtime"
"strconv"
"strings"
)
// diskFree inspects the filesystem for path and returns the volume name and
// the total and free bytes available on the file system.
func (f *StorageFingerprint) diskFree(path string) (volume string, total, free uint64, err error) {
absPath, err := filepath.Abs(path)
if err != nil {
return "", 0, 0, fmt.Errorf("failed to determine absolute path for %s", path)
}
// Use -k to standardize the output values between darwin and linux
var dfArgs string
if runtime.GOOS == "linux" {
// df on linux needs the -P option to prevent linebreaks on long filesystem paths
dfArgs = "-kP"
} else {
dfArgs = "-k"
}
mountOutput, err := exec.Command("df", dfArgs, absPath).Output()
if err != nil {
return "", 0, 0, fmt.Errorf("failed to determine mount point for %s", absPath)
}
// Output looks something like:
// Filesystem 1024-blocks Used Available Capacity iused ifree %iused Mounted on
// /dev/disk1 487385240 423722532 63406708 87% 105994631 15851677 87% /
// [0] volume [1] capacity [2] SKIP [3] free
lines := strings.Split(string(mountOutput), "\n")
if len(lines) < 2 {
return "", 0, 0, fmt.Errorf("failed to parse `df` output; expected at least 2 lines")
}
fields := strings.Fields(lines[1])
if len(fields) < 4 {
return "", 0, 0, fmt.Errorf("failed to parse `df` output; expected at least 4 columns")
}
volume = fields[0]
total, err = strconv.ParseUint(fields[1], 10, 64)
if err != nil {
return "", 0, 0, fmt.Errorf("failed to parse storage.bytestotal size in kilobytes")
}
// convert to bytes
total *= 1024
free, err = strconv.ParseUint(fields[3], 10, 64)
if err != nil {
return "", 0, 0, fmt.Errorf("failed to parse storage.bytesfree size in kilobytes")
}
// convert to bytes
free *= 1024
return volume, total, free, nil
}

View File

@ -1,33 +0,0 @@
package fingerprint
import (
"fmt"
"path/filepath"
"syscall"
)
//go:generate go run $GOROOT/src/syscall/mksyscall_windows.go -output zstorage_windows.go storage_windows.go
//sys getDiskFreeSpaceEx(dirName *uint16, availableFreeBytes *uint64, totalBytes *uint64, totalFreeBytes *uint64) (err error) = kernel32.GetDiskFreeSpaceExW
// diskFree inspects the filesystem for path and returns the volume name and
// the total and free bytes available on the file system.
func (f *StorageFingerprint) diskFree(path string) (volume string, total, free uint64, err error) {
absPath, err := filepath.Abs(path)
if err != nil {
return "", 0, 0, fmt.Errorf("failed to determine absolute path for %s", path)
}
volume = filepath.VolumeName(absPath)
absPathp, err := syscall.UTF16PtrFromString(absPath)
if err != nil {
return "", 0, 0, fmt.Errorf("failed to convert \"%s\" to UTF16: %v", absPath, err)
}
if err := getDiskFreeSpaceEx(absPathp, nil, &total, &free); err != nil {
return "", 0, 0, fmt.Errorf("failed to get free disk space for %s: %v", absPath, err)
}
return volume, total, free, nil
}

View File

@ -1,26 +0,0 @@
// MACHINE GENERATED BY 'go generate' COMMAND; DO NOT EDIT
package fingerprint
import "unsafe"
import "syscall"
var _ unsafe.Pointer
var (
modkernel32 = syscall.NewLazyDLL("kernel32.dll")
procGetDiskFreeSpaceExW = modkernel32.NewProc("GetDiskFreeSpaceExW")
)
func getDiskFreeSpaceEx(dirName *uint16, availableFreeBytes *uint64, totalBytes *uint64, totalFreeBytes *uint64) (err error) {
r1, _, e1 := syscall.Syscall6(procGetDiskFreeSpaceExW.Addr(), 4, uintptr(unsafe.Pointer(dirName)), uintptr(unsafe.Pointer(availableFreeBytes)), uintptr(unsafe.Pointer(totalBytes)), uintptr(unsafe.Pointer(totalFreeBytes)), 0, 0)
if r1 == 0 {
if e1 != 0 {
err = error(e1)
} else {
err = syscall.EINVAL
}
}
return
}

View File

@ -1,78 +0,0 @@
package getter
import (
"fmt"
"net/url"
"path/filepath"
"sync"
gg "github.com/hashicorp/go-getter"
"github.com/hashicorp/nomad/client/driver/env"
"github.com/hashicorp/nomad/nomad/structs"
)
var (
// getters is the map of getters suitable for Nomad. It is initialized once
// and the lock is used to guard access to it.
getters map[string]gg.Getter
lock sync.Mutex
// supported is the set of download schemes supported by Nomad
supported = []string{"http", "https", "s3"}
)
// getClient returns a client that is suitable for Nomad downloading artifacts.
func getClient(src, dst string) *gg.Client {
lock.Lock()
defer lock.Unlock()
// Return the pre-initialized client
if getters == nil {
getters = make(map[string]gg.Getter, len(supported))
for _, getter := range supported {
if impl, ok := gg.Getters[getter]; ok {
getters[getter] = impl
}
}
}
return &gg.Client{
Src: src,
Dst: dst,
Mode: gg.ClientModeAny,
Getters: getters,
}
}
// getGetterUrl returns the go-getter URL to download the artifact.
func getGetterUrl(taskEnv *env.TaskEnvironment, artifact *structs.TaskArtifact) (string, error) {
taskEnv.Build()
u, err := url.Parse(taskEnv.ReplaceEnv(artifact.GetterSource))
if err != nil {
return "", fmt.Errorf("failed to parse source URL %q: %v", artifact.GetterSource, err)
}
// Build the url
q := u.Query()
for k, v := range artifact.GetterOptions {
q.Add(k, taskEnv.ReplaceEnv(v))
}
u.RawQuery = q.Encode()
return u.String(), nil
}
// GetArtifact downloads an artifact into the specified task directory.
func GetArtifact(taskEnv *env.TaskEnvironment, artifact *structs.TaskArtifact, taskDir string) error {
url, err := getGetterUrl(taskEnv, artifact)
if err != nil {
return err
}
// Download the artifact
dest := filepath.Join(taskDir, artifact.RelativeDest)
if err := getClient(url, dest).Get(); err != nil {
return fmt.Errorf("GET error: %v", err)
}
return nil
}

View File

@ -1 +0,0 @@
hello world

View File

@ -1 +0,0 @@
hello world

View File

@ -1 +0,0 @@
sleep 1

View File

@ -1 +0,0 @@
sleep 1

View File

@ -1,199 +0,0 @@
package client
import (
"fmt"
"math/rand"
"sync"
"time"
cstructs "github.com/hashicorp/nomad/client/driver/structs"
"github.com/hashicorp/nomad/nomad/structs"
)
const (
// jitter is the percent of jitter added to restart delays.
jitter = 0.25
ReasonNoRestartsAllowed = "Policy allows no restarts"
ReasonUnrecoverableErrror = "Error was unrecoverable"
ReasonWithinPolicy = "Restart within policy"
ReasonDelay = "Exceeded allowed attempts, applying a delay"
)
func newRestartTracker(policy *structs.RestartPolicy, jobType string) *RestartTracker {
onSuccess := true
if jobType == structs.JobTypeBatch {
onSuccess = false
}
return &RestartTracker{
startTime: time.Now(),
onSuccess: onSuccess,
policy: policy,
rand: rand.New(rand.NewSource(time.Now().Unix())),
}
}
type RestartTracker struct {
waitRes *cstructs.WaitResult
startErr error
count int // Current number of attempts.
onSuccess bool // Whether to restart on successful exit code.
startTime time.Time // When the interval began
reason string // The reason for the last state
policy *structs.RestartPolicy
rand *rand.Rand
lock sync.Mutex
}
// SetPolicy updates the policy used to determine restarts.
func (r *RestartTracker) SetPolicy(policy *structs.RestartPolicy) {
r.lock.Lock()
defer r.lock.Unlock()
r.policy = policy
}
// SetStartError is used to mark the most recent start error. If starting was
// successful the error should be nil.
func (r *RestartTracker) SetStartError(err error) *RestartTracker {
r.lock.Lock()
defer r.lock.Unlock()
r.startErr = err
return r
}
// SetWaitResult is used to mark the most recent wait result.
func (r *RestartTracker) SetWaitResult(res *cstructs.WaitResult) *RestartTracker {
r.lock.Lock()
defer r.lock.Unlock()
r.waitRes = res
return r
}
// GetReason returns a human-readable description for the last state returned by
// GetState.
func (r *RestartTracker) GetReason() string {
r.lock.Lock()
defer r.lock.Unlock()
return r.reason
}
// GetState returns the tasks next state given the set exit code and start
// error. One of the following states are returned:
// * TaskRestarting - Task should be restarted
// * TaskNotRestarting - Task should not be restarted and has exceeded its
// restart policy.
// * TaskTerminated - Task has terminated successfully and does not need a
// restart.
//
// If TaskRestarting is returned, the duration is how long to wait until
// starting the task again.
func (r *RestartTracker) GetState() (string, time.Duration) {
r.lock.Lock()
defer r.lock.Unlock()
// Hot path if no attempts are expected
if r.policy.Attempts == 0 {
r.reason = ReasonNoRestartsAllowed
if r.waitRes != nil && r.waitRes.Successful() {
return structs.TaskTerminated, 0
}
return structs.TaskNotRestarting, 0
}
r.count++
// Check if we have entered a new interval.
end := r.startTime.Add(r.policy.Interval)
now := time.Now()
if now.After(end) {
r.count = 0
r.startTime = now
}
if r.startErr != nil {
return r.handleStartError()
} else if r.waitRes != nil {
return r.handleWaitResult()
} else {
return "", 0
}
}
// handleStartError returns the new state and potential wait duration for
// restarting the task after it was not successfully started. On start errors,
// the restart policy is always treated as fail mode to ensure we don't
// infinitely try to start a task.
func (r *RestartTracker) handleStartError() (string, time.Duration) {
// If the error is not recoverable, do not restart.
if rerr, ok := r.startErr.(*cstructs.RecoverableError); !(ok && rerr.Recoverable) {
r.reason = ReasonUnrecoverableErrror
return structs.TaskNotRestarting, 0
}
if r.count > r.policy.Attempts {
if r.policy.Mode == structs.RestartPolicyModeFail {
r.reason = fmt.Sprintf(
`Exceeded allowed attempts %d in interval %v and mode is "fail"`,
r.policy.Attempts, r.policy.Interval)
return structs.TaskNotRestarting, 0
} else {
r.reason = ReasonDelay
return structs.TaskRestarting, r.getDelay()
}
}
r.reason = ReasonWithinPolicy
return structs.TaskRestarting, r.jitter()
}
// handleWaitResult returns the new state and potential wait duration for
// restarting the task after it has exited.
func (r *RestartTracker) handleWaitResult() (string, time.Duration) {
// If the task started successfully and restart on success isn't specified,
// don't restart but don't mark as failed.
if r.waitRes.Successful() && !r.onSuccess {
r.reason = "Restart unnecessary as task terminated successfully"
return structs.TaskTerminated, 0
}
if r.count > r.policy.Attempts {
if r.policy.Mode == structs.RestartPolicyModeFail {
r.reason = fmt.Sprintf(
`Exceeded allowed attempts %d in interval %v and mode is "fail"`,
r.policy.Attempts, r.policy.Interval)
return structs.TaskNotRestarting, 0
} else {
r.reason = ReasonDelay
return structs.TaskRestarting, r.getDelay()
}
}
r.reason = ReasonWithinPolicy
return structs.TaskRestarting, r.jitter()
}
// getDelay returns the delay time to enter the next interval.
func (r *RestartTracker) getDelay() time.Duration {
end := r.startTime.Add(r.policy.Interval)
now := time.Now()
return end.Sub(now)
}
// jitter returns the delay time plus a jitter.
func (r *RestartTracker) jitter() time.Duration {
// Get the delay and ensure it is valid.
d := r.policy.Delay.Nanoseconds()
if d == 0 {
d = 1
}
j := float64(r.rand.Int63n(d)) * jitter
return time.Duration(d + int64(j))
}
// Returns a tracker that never restarts.
func noRestartsTracker() *RestartTracker {
policy := &structs.RestartPolicy{Attempts: 0, Mode: structs.RestartPolicyModeFail}
return newRestartTracker(policy, structs.JobTypeBatch)
}

View File

@ -1,779 +0,0 @@
// Package rpcproxy provides a proxy interface to Nomad Servers. The
// RPCProxy periodically shuffles which server a Nomad Client communicates
// with in order to redistribute load across Nomad Servers. Nomad Servers
// that fail an RPC request are automatically cycled to the end of the list
// until the server list is reshuffled.
//
// The rpcproxy package does not provide any external API guarantees and
// should be called only by `hashicorp/nomad`.
package rpcproxy
import (
"fmt"
"log"
"math/rand"
"strings"
"sync"
"sync/atomic"
"time"
"github.com/hashicorp/consul/lib"
"github.com/hashicorp/nomad/nomad/structs"
)
const (
// clientRPCJitterFraction determines the amount of jitter added to
// clientRPCMinReuseDuration before a connection is expired and a new
// connection is established in order to rebalance load across Nomad
// servers. The cluster-wide number of connections per second from
// rebalancing is applied after this jitter to ensure the CPU impact
// is always finite. See newRebalanceConnsPerSecPerServer's comment
// for additional commentary.
//
// For example, in a 10K Nomad cluster with 5x servers, this default
// averages out to ~13 new connections from rebalancing per server
// per second.
clientRPCJitterFraction = 2
// clientRPCMinReuseDuration controls the minimum amount of time RPC
// queries are sent over an established connection to a single server
clientRPCMinReuseDuration = 600 * time.Second
// Limit the number of new connections a server receives per second
// for connection rebalancing. This limit caps the load caused by
// continual rebalancing efforts when a cluster is in equilibrium. A
// lower value comes at the cost of increased recovery time after a
// partition. This parameter begins to take effect when there are
// more than ~48K clients querying 5x servers or at lower server
// counts when there is a partition.
//
// For example, in a 100K Nomad cluster with 5x servers, it will take
// ~5min for all servers to rebalance their connections. If 99,995
// agents are in the minority talking to only one server, it will
// take ~26min for all servers to rebalance. A 10K cluster in the
// same scenario will take ~2.6min to rebalance.
newRebalanceConnsPerSecPerServer = 64
// rpcAPIMismatchLogRate determines the rate at which log entries are
// emitted when the client and server's API versions are mismatched.
rpcAPIMismatchLogRate = 3 * time.Hour
)
// NomadConfigInfo is an interface wrapper around this Nomad Agent's
// configuration to prevents a cyclic import dependency.
type NomadConfigInfo interface {
Datacenter() string
RPCMajorVersion() int
RPCMinorVersion() int
Region() string
}
// Pinger is an interface wrapping client.ConnPool to prevent a
// cyclic import dependency
type Pinger interface {
PingNomadServer(region string, apiMajorVersion int, s *ServerEndpoint) (bool, error)
}
// serverList is an array of Nomad Servers. The first server in the list is
// the active server.
//
// NOTE(sean@): We are explicitly relying on the fact that serverList will be
// copied onto the stack by atomic.Value. Please keep this structure light.
type serverList struct {
L []*ServerEndpoint
}
// RPCProxy is the manager type responsible for returning and managing Nomad
// addresses.
type RPCProxy struct {
// activatedList manages the list of Nomad Servers that are eligible
// to be queried by the Client agent.
activatedList atomic.Value
activatedListLock sync.Mutex
// primaryServers is a list of servers found in the last heartbeat.
// primaryServers are periodically reshuffled. Covered by
// serverListLock.
primaryServers serverList
// backupServers is a list of fallback servers. These servers are
// appended to the RPCProxy's serverList, but are never shuffled with
// the list of servers discovered via the Nomad heartbeat. Covered
// by serverListLock.
backupServers serverList
// serverListLock covers both backupServers and primaryServers. If
// it is necessary to hold serverListLock and listLock, obtain an
// exclusive lock on serverListLock before listLock.
serverListLock sync.RWMutex
leaderAddr string
numNodes int
// rebalanceTimer controls the duration of the rebalance interval
rebalanceTimer *time.Timer
// shutdownCh is a copy of the channel in nomad.Client
shutdownCh chan struct{}
logger *log.Logger
configInfo NomadConfigInfo
// rpcAPIMismatchThrottle regulates the rate at which warning
// messages are emitted in the event of an API mismatch between the
// clients and servers.
rpcAPIMismatchThrottle map[string]time.Time
// connPoolPinger is used to test the health of a server in the
// connection pool. Pinger is an interface that wraps
// client.ConnPool.
connPoolPinger Pinger
}
// NewRPCProxy is the only way to safely create a new RPCProxy.
func NewRPCProxy(logger *log.Logger, shutdownCh chan struct{}, configInfo NomadConfigInfo, connPoolPinger Pinger) *RPCProxy {
p := &RPCProxy{
logger: logger,
configInfo: configInfo, // can't pass *nomad.Client: import cycle
connPoolPinger: connPoolPinger, // can't pass *nomad.ConnPool: import cycle
rebalanceTimer: time.NewTimer(clientRPCMinReuseDuration),
shutdownCh: shutdownCh,
}
l := serverList{}
l.L = make([]*ServerEndpoint, 0)
p.saveServerList(l)
return p
}
// activateEndpoint adds an endpoint to the RPCProxy's active serverList.
// Returns true if the server was added, returns false if the server already
// existed in the RPCProxy's serverList.
func (p *RPCProxy) activateEndpoint(s *ServerEndpoint) bool {
l := p.getServerList()
// Check if this server is known
found := false
for idx, existing := range l.L {
if existing.Name == s.Name {
newServers := make([]*ServerEndpoint, len(l.L))
copy(newServers, l.L)
// Overwrite the existing server details in order to
// possibly update metadata (e.g. server version)
newServers[idx] = s
l.L = newServers
found = true
break
}
}
// Add to the list if not known
if !found {
newServers := make([]*ServerEndpoint, len(l.L), len(l.L)+1)
copy(newServers, l.L)
newServers = append(newServers, s)
l.L = newServers
}
p.saveServerList(l)
return !found
}
// SetBackupServers sets a list of Nomad Servers to be used in the event that
// the Nomad Agent lost contact with the list of Nomad Servers provided via
// the Nomad Agent's heartbeat. If available, the backup servers are
// populated via Consul.
func (p *RPCProxy) SetBackupServers(addrs []string) error {
l := make([]*ServerEndpoint, 0, len(addrs))
for _, s := range addrs {
s, err := NewServerEndpoint(s)
if err != nil {
p.logger.Printf("[WARN] client.rpcproxy: unable to create backup server %+q: %v", s, err)
return fmt.Errorf("unable to create new backup server from %+q: %v", s, err)
}
l = append(l, s)
}
p.serverListLock.Lock()
p.backupServers.L = l
p.serverListLock.Unlock()
p.activatedListLock.Lock()
defer p.activatedListLock.Unlock()
for _, s := range l {
p.activateEndpoint(s)
}
return nil
}
// AddPrimaryServer takes the RPC address of a Nomad server, creates a new
// endpoint, and adds it to both the primaryServers list and the active
// serverList used in the RPC Proxy. If the endpoint is not known by the
// RPCProxy, appends the endpoint to the list. The new endpoint will begin
// seeing use after the rebalance timer fires (or enough servers fail
// organically). Any values in the primary server list are overridden by the
// next successful heartbeat.
func (p *RPCProxy) AddPrimaryServer(rpcAddr string) *ServerEndpoint {
s, err := NewServerEndpoint(rpcAddr)
if err != nil {
p.logger.Printf("[WARN] client.rpcproxy: unable to create new primary server from endpoint %+q: %v", rpcAddr, err)
return nil
}
k := s.Key()
p.serverListLock.Lock()
if serverExists := p.primaryServers.serverExistByKey(k); serverExists {
p.serverListLock.Unlock()
return s
}
p.primaryServers.L = append(p.primaryServers.L, s)
p.serverListLock.Unlock()
p.activatedListLock.Lock()
p.activateEndpoint(s)
p.activatedListLock.Unlock()
return s
}
// cycleServers returns a new list of servers that has dequeued the first
// server and enqueued it at the end of the list. cycleServers assumes the
// caller is holding the listLock. cycleServer does not test or ping
// the next server inline. cycleServer may be called when the environment
// has just entered an unhealthy situation and blocking on a server test is
// less desirable than just returning the next server in the firing line. If
// the next server fails, it will fail fast enough and cycleServer will be
// called again.
func (l *serverList) cycleServer() (servers []*ServerEndpoint) {
numServers := len(l.L)
if numServers < 2 {
return servers // No action required
}
newServers := make([]*ServerEndpoint, 0, numServers)
newServers = append(newServers, l.L[1:]...)
newServers = append(newServers, l.L[0])
return newServers
}
// serverExistByKey performs a search to see if a server exists in the
// serverList. Assumes the caller is holding at least a read lock.
func (l *serverList) serverExistByKey(targetKey *EndpointKey) bool {
var found bool
for _, server := range l.L {
if targetKey.Equal(server.Key()) {
found = true
}
}
return found
}
// removeServerByKey performs an inline removal of the first matching server
func (l *serverList) removeServerByKey(targetKey *EndpointKey) {
for i, s := range l.L {
if targetKey.Equal(s.Key()) {
copy(l.L[i:], l.L[i+1:])
l.L[len(l.L)-1] = nil
l.L = l.L[:len(l.L)-1]
return
}
}
}
// shuffleServers shuffles the server list in place
func (l *serverList) shuffleServers() {
for i := len(l.L) - 1; i > 0; i-- {
j := rand.Int31n(int32(i + 1))
l.L[i], l.L[j] = l.L[j], l.L[i]
}
}
// String returns a string representation of serverList
func (l *serverList) String() string {
if len(l.L) == 0 {
return fmt.Sprintf("empty server list")
}
serverStrs := make([]string, 0, len(l.L))
for _, server := range l.L {
serverStrs = append(serverStrs, server.String())
}
return fmt.Sprintf("[%s]", strings.Join(serverStrs, ", "))
}
// FindServer takes out an internal "read lock" and searches through the list
// of servers to find a "healthy" server. If the server is actually
// unhealthy, we rely on heartbeats to detect this and remove the node from
// the server list. If the server at the front of the list has failed or
// fails during an RPC call, it is rotated to the end of the list. If there
// are no servers available, return nil.
func (p *RPCProxy) FindServer() *ServerEndpoint {
l := p.getServerList()
numServers := len(l.L)
if numServers == 0 {
p.logger.Printf("[WARN] client.rpcproxy: No servers available")
return nil
}
// Return whatever is at the front of the list because it is
// assumed to be the oldest in the server list (unless -
// hypothetically - the server list was rotated right after a
// server was added).
return l.L[0]
}
// getServerList is a convenience method which hides the locking semantics
// of atomic.Value from the caller.
func (p *RPCProxy) getServerList() serverList {
return p.activatedList.Load().(serverList)
}
// saveServerList is a convenience method which hides the locking semantics
// of atomic.Value from the caller.
func (p *RPCProxy) saveServerList(l serverList) {
p.activatedList.Store(l)
}
// LeaderAddr returns the current leader address. If an empty string, then
// the Nomad Server for this Nomad Agent is in the minority or the Nomad
// Servers are in the middle of an election.
func (p *RPCProxy) LeaderAddr() string {
p.activatedListLock.Lock()
defer p.activatedListLock.Unlock()
return p.leaderAddr
}
// NotifyFailedServer marks the passed in server as "failed" by rotating it
// to the end of the server list.
func (p *RPCProxy) NotifyFailedServer(s *ServerEndpoint) {
l := p.getServerList()
// If the server being failed is not the first server on the list,
// this is a noop. If, however, the server is failed and first on
// the list, acquire the lock, retest, and take the penalty of moving
// the server to the end of the list.
// Only rotate the server list when there is more than one server
if len(l.L) > 1 && l.L[0] == s {
// Grab a lock, retest, and take the hit of cycling the first
// server to the end.
p.activatedListLock.Lock()
defer p.activatedListLock.Unlock()
l = p.getServerList()
if len(l.L) > 1 && l.L[0] == s {
l.L = l.cycleServer()
p.saveServerList(l)
}
}
}
// NumNodes returns the estimated number of nodes according to the last Nomad
// Heartbeat.
func (p *RPCProxy) NumNodes() int {
return p.numNodes
}
// NumServers takes out an internal "read lock" and returns the number of
// servers. numServers includes both healthy and unhealthy servers.
func (p *RPCProxy) NumServers() int {
l := p.getServerList()
return len(l.L)
}
// RebalanceServers shuffles the list of servers on this agent. The server
// at the front of the list is selected for the next RPC. RPC calls that
// fail for a particular server are rotated to the end of the list. This
// method reshuffles the list periodically in order to redistribute work
// across all known Nomad servers (i.e. guarantee that the order of servers
// in the server list is not positively correlated with the age of a server
// in the Nomad cluster). Periodically shuffling the server list prevents
// long-lived clients from fixating on long-lived servers.
//
// Unhealthy servers are removed from the server list during the next client
// heartbeat. Before the newly shuffled server list is saved, the new remote
// endpoint is tested to ensure its responsive.
func (p *RPCProxy) RebalanceServers() {
var serverListLocked bool
p.serverListLock.Lock()
serverListLocked = true
defer func() {
if serverListLocked {
p.serverListLock.Unlock()
}
}()
// Early abort if there is nothing to shuffle
if (len(p.primaryServers.L) + len(p.backupServers.L)) < 2 {
return
}
// Shuffle server lists independently
p.primaryServers.shuffleServers()
p.backupServers.shuffleServers()
// Create a new merged serverList
type targetServer struct {
server *ServerEndpoint
// 'p' == Primary Server
// 's' == Secondary/Backup Server
// 'b' == Both
state byte
}
mergedList := make(map[EndpointKey]*targetServer, len(p.primaryServers.L)+len(p.backupServers.L))
for _, s := range p.primaryServers.L {
mergedList[*s.Key()] = &targetServer{server: s, state: 'p'}
}
for _, s := range p.backupServers.L {
k := s.Key()
_, found := mergedList[*k]
if found {
mergedList[*k].state = 'b'
} else {
mergedList[*k] = &targetServer{server: s, state: 's'}
}
}
l := &serverList{L: make([]*ServerEndpoint, 0, len(mergedList))}
for _, s := range p.primaryServers.L {
l.L = append(l.L, s)
}
for _, v := range mergedList {
if v.state != 's' {
continue
}
l.L = append(l.L, v.server)
}
// Release the lock before we begin transition to operations on the
// network timescale and attempt to ping servers. A copy of the
// servers has been made at this point.
p.serverListLock.Unlock()
serverListLocked = false
// Iterate through the shuffled server list to find an assumed
// healthy server. NOTE: Do not iterate on the list directly because
// this loop mutates the server list in-place.
var foundHealthyServer bool
for i := 0; i < len(l.L); i++ {
// Always test the first server. Failed servers are cycled
// and eventually removed from the list when Nomad heartbeats
// detect the failed node.
selectedServer := l.L[0]
ok, err := p.connPoolPinger.PingNomadServer(p.configInfo.Region(), p.configInfo.RPCMajorVersion(), selectedServer)
if ok {
foundHealthyServer = true
break
}
p.logger.Printf(`[DEBUG] client.rpcproxy: pinging server "%s" failed: %s`, selectedServer.String(), err)
l.cycleServer()
}
// If no healthy servers were found, sleep and wait for the admin to
// join this node to a server and begin receiving heartbeats with an
// updated list of Nomad servers. Or Consul will begin advertising a
// new server in the nomad service (Nomad server service).
if !foundHealthyServer {
p.logger.Printf("[DEBUG] client.rpcproxy: No healthy servers during rebalance, aborting")
return
}
// Verify that all servers are present. Reconcile will save the
// final serverList.
if p.reconcileServerList(l) {
p.logger.Printf("[TRACE] client.rpcproxy: Rebalanced %d servers, next active server is %s", len(l.L), l.L[0].String())
} else {
// reconcileServerList failed because Nomad removed the
// server that was at the front of the list that had
// successfully been Ping'ed. Between the Ping and
// reconcile, a Nomad heartbeat removed the node.
//
// Instead of doing any heroics, "freeze in place" and
// continue to use the existing connection until the next
// rebalance occurs.
}
return
}
// reconcileServerList returns true when the first server in serverList
// (l) exists in the receiver's serverList (p). If true, the merged
// serverList (l) is stored as the receiver's serverList (p). Returns
// false if the first server in p does not exist in the passed in list (l)
// (i.e. was removed by Nomad during a PingNomadServer() call. Newly added
// servers are appended to the list and other missing servers are removed
// from the list.
func (p *RPCProxy) reconcileServerList(l *serverList) bool {
p.activatedListLock.Lock()
defer p.activatedListLock.Unlock()
// newServerList is a serverList that has been kept up-to-date with
// join and leave events.
newServerList := p.getServerList()
// If a Nomad heartbeat removed all nodes, or there is no selected
// server (zero nodes in serverList), abort early.
if len(newServerList.L) == 0 || len(l.L) == 0 {
return false
}
type targetServer struct {
server *ServerEndpoint
// 'b' == both
// 'o' == original
// 'n' == new
state byte
}
mergedList := make(map[EndpointKey]*targetServer, len(l.L))
for _, s := range l.L {
mergedList[*s.Key()] = &targetServer{server: s, state: 'o'}
}
for _, s := range newServerList.L {
k := s.Key()
_, found := mergedList[*k]
if found {
mergedList[*k].state = 'b'
} else {
mergedList[*k] = &targetServer{server: s, state: 'n'}
}
}
// Ensure the selected server has not been removed by a heartbeat
selectedServerKey := l.L[0].Key()
if v, found := mergedList[*selectedServerKey]; found && v.state == 'o' {
return false
}
// Append any new servers and remove any old servers
for k, v := range mergedList {
switch v.state {
case 'b':
// Do nothing, server exists in both
case 'o':
// Server has been removed
l.removeServerByKey(&k)
case 'n':
// Server added
l.L = append(l.L, v.server)
default:
panic("unknown merge list state")
}
}
p.saveServerList(*l)
return true
}
// RemoveServer takes out an internal write lock and removes a server from
// the activated server list.
func (p *RPCProxy) RemoveServer(s *ServerEndpoint) {
// Lock hierarchy protocol dictates serverListLock is acquired first.
p.serverListLock.Lock()
defer p.serverListLock.Unlock()
p.activatedListLock.Lock()
defer p.activatedListLock.Unlock()
l := p.getServerList()
k := s.Key()
l.removeServerByKey(k)
p.saveServerList(l)
p.primaryServers.removeServerByKey(k)
p.backupServers.removeServerByKey(k)
}
// refreshServerRebalanceTimer is only called once p.rebalanceTimer expires.
func (p *RPCProxy) refreshServerRebalanceTimer() time.Duration {
l := p.getServerList()
numServers := len(l.L)
// Limit this connection's life based on the size (and health) of the
// cluster. Never rebalance a connection more frequently than
// connReuseLowWatermarkDuration, and make sure we never exceed
// clusterWideRebalanceConnsPerSec operations/s across numLANMembers.
clusterWideRebalanceConnsPerSec := float64(numServers * newRebalanceConnsPerSecPerServer)
connReuseLowWatermarkDuration := clientRPCMinReuseDuration + lib.RandomStagger(clientRPCMinReuseDuration/clientRPCJitterFraction)
numLANMembers := p.numNodes
connRebalanceTimeout := lib.RateScaledInterval(clusterWideRebalanceConnsPerSec, connReuseLowWatermarkDuration, numLANMembers)
p.rebalanceTimer.Reset(connRebalanceTimeout)
return connRebalanceTimeout
}
// ResetRebalanceTimer resets the rebalance timer. This method exists for
// testing and should not be used directly.
func (p *RPCProxy) ResetRebalanceTimer() {
p.activatedListLock.Lock()
defer p.activatedListLock.Unlock()
p.rebalanceTimer.Reset(clientRPCMinReuseDuration)
}
// ServerRPCAddrs returns one RPC Address per server
func (p *RPCProxy) ServerRPCAddrs() []string {
l := p.getServerList()
serverAddrs := make([]string, 0, len(l.L))
for _, s := range l.L {
serverAddrs = append(serverAddrs, s.Addr.String())
}
return serverAddrs
}
// Run is used to start and manage the task of automatically shuffling and
// rebalancing the list of Nomad servers. This maintenance only happens
// periodically based on the expiration of the timer. Failed servers are
// automatically cycled to the end of the list. New servers are appended to
// the list. The order of the server list must be shuffled periodically to
// distribute load across all known and available Nomad servers.
func (p *RPCProxy) Run() {
for {
select {
case <-p.rebalanceTimer.C:
p.RebalanceServers()
p.refreshServerRebalanceTimer()
case <-p.shutdownCh:
p.logger.Printf("[INFO] client.rpcproxy: shutting down")
return
}
}
}
// RefreshServerLists is called when the Client receives an update from a
// Nomad Server. The response from Nomad Client Heartbeats contain a list of
// Nomad Servers that the Nomad Client should use for RPC requests.
// RefreshServerLists does not rebalance its serverLists (that is handled
// elsewhere via a periodic timer). New Nomad Servers learned via the
// heartbeat are appended to the RPCProxy's activated serverList. Servers
// that are no longer present in the Heartbeat are removed immediately from
// all server lists. Nomad Servers speaking a newer major or minor API
// version are filtered from the serverList.
func (p *RPCProxy) RefreshServerLists(servers []*structs.NodeServerInfo, numNodes int32, leaderRPCAddr string) error {
// Merge all servers found in the response. Servers in the response
// with newer API versions are filtered from the list. If the list
// is missing an address found in the RPCProxy's server list, remove
// it from the RPCProxy.
p.serverListLock.Lock()
defer p.serverListLock.Unlock()
// Clear the backup server list when a heartbeat contains at least
// one server.
if len(servers) > 0 && len(p.backupServers.L) > 0 {
p.backupServers.L = make([]*ServerEndpoint, 0, len(servers))
}
// 1) Create a map to reconcile the difference between
// p.primaryServers and servers.
type targetServer struct {
server *ServerEndpoint
// 'b' == both
// 'o' == original
// 'n' == new
state byte
}
mergedPrimaryMap := make(map[EndpointKey]*targetServer, len(p.primaryServers.L)+len(servers))
numOldServers := 0
for _, s := range p.primaryServers.L {
mergedPrimaryMap[*s.Key()] = &targetServer{server: s, state: 'o'}
numOldServers++
}
numBothServers := 0
var newServers bool
for _, s := range servers {
// Filter out servers using a newer API version. Prevent
// spamming the logs every heartbeat.
//
// TODO(sean@): Move the logging throttle logic into a
// dedicated logging package so RPCProxy does not have to
// perform this accounting.
if int32(p.configInfo.RPCMajorVersion()) < s.RPCMajorVersion ||
(int32(p.configInfo.RPCMajorVersion()) == s.RPCMajorVersion &&
int32(p.configInfo.RPCMinorVersion()) < s.RPCMinorVersion) {
now := time.Now()
t, ok := p.rpcAPIMismatchThrottle[s.RPCAdvertiseAddr]
if ok && t.After(now) {
continue
}
p.logger.Printf("[WARN] client.rpcproxy: API mismatch between client version (v%d.%d) and server version (v%d.%d), ignoring server %+q", p.configInfo.RPCMajorVersion(), p.configInfo.RPCMinorVersion(), s.RPCMajorVersion, s.RPCMinorVersion, s.RPCAdvertiseAddr)
p.rpcAPIMismatchThrottle[s.RPCAdvertiseAddr] = now.Add(rpcAPIMismatchLogRate)
continue
}
server, err := NewServerEndpoint(s.RPCAdvertiseAddr)
if err != nil {
p.logger.Printf("[WARN] client.rpcproxy: Unable to create a server from %+q: %v", s.RPCAdvertiseAddr, err)
continue
}
// Nomad servers in different datacenters are automatically
// added to the backup server list.
if s.Datacenter != p.configInfo.Datacenter() {
p.backupServers.L = append(p.backupServers.L, server)
continue
}
k := server.Key()
_, found := mergedPrimaryMap[*k]
if found {
mergedPrimaryMap[*k].state = 'b'
numBothServers++
} else {
mergedPrimaryMap[*k] = &targetServer{server: server, state: 'n'}
newServers = true
}
}
// Short-circuit acquiring listLock if nothing changed
if !newServers && numOldServers == numBothServers {
return nil
}
p.activatedListLock.Lock()
defer p.activatedListLock.Unlock()
newServerCfg := p.getServerList()
for k, v := range mergedPrimaryMap {
switch v.state {
case 'b':
// Do nothing, server exists in both
case 'o':
// Server has been removed
// TODO(sean@): Teach Nomad servers how to remove
// themselves from their heartbeat in order to
// gracefully drain their clients over the next
// cluster's max rebalanceTimer duration. Without
// this enhancement, if a server being shutdown and
// it is the first in serverList, the client will
// fail its next RPC connection.
p.primaryServers.removeServerByKey(&k)
newServerCfg.removeServerByKey(&k)
case 'n':
// Server added. Append it to both lists
// immediately. The server should only go into
// active use in the event of a failure or after a
// rebalance occurs.
p.primaryServers.L = append(p.primaryServers.L, v.server)
newServerCfg.L = append(newServerCfg.L, v.server)
default:
panic("unknown merge list state")
}
}
p.numNodes = int(numNodes)
p.leaderAddr = leaderRPCAddr
p.saveServerList(newServerCfg)
return nil
}

View File

@ -1,84 +0,0 @@
package rpcproxy
import (
"fmt"
"net"
"strings"
)
const (
defaultNomadRPCPort = "4647"
)
// EndpointKey is used in maps and for equality tests. A key is based on endpoints.
type EndpointKey struct {
name string
}
// Equal compares two EndpointKey objects
func (k *EndpointKey) Equal(x *EndpointKey) bool {
return k.name == x.name
}
// ServerEndpoint contains the address information for to connect to a Nomad
// server.
//
// TODO(sean@): Server is stubbed out so that in the future it can hold a
// reference to Node (and ultimately Node.ID).
type ServerEndpoint struct {
// Name is the unique lookup key for a Server instance
Name string
Host string
Port string
Addr net.Addr
}
// Key returns the corresponding Key
func (s *ServerEndpoint) Key() *EndpointKey {
return &EndpointKey{
name: s.Name,
}
}
// NewServerEndpoint creates a new Server instance with a resolvable
// endpoint. `name` can be either an IP address or a DNS name. If `name` is
// a DNS name, it must be resolvable to an IP address (most inputs are IP
// addresses, not DNS names, but both work equally well when the name is
// resolvable).
func NewServerEndpoint(name string) (*ServerEndpoint, error) {
s := &ServerEndpoint{
Name: name,
}
var host, port string
var err error
host, port, err = net.SplitHostPort(name)
if err == nil {
s.Host = host
s.Port = port
} else {
if strings.Contains(err.Error(), "missing port") {
s.Host = name
s.Port = defaultNomadRPCPort
} else {
return nil, err
}
}
if s.Addr, err = net.ResolveTCPAddr("tcp", net.JoinHostPort(s.Host, s.Port)); err != nil {
return nil, err
}
return s, err
}
// String returns a string representation of Server
func (s *ServerEndpoint) String() string {
var addrStr, networkStr string
if s.Addr != nil {
addrStr = s.Addr.String()
networkStr = s.Addr.Network()
}
return fmt.Sprintf("%s (%s:%s)", s.Name, networkStr, addrStr)
}

View File

@ -1,62 +0,0 @@
package stats
import (
"runtime"
"time"
shelpers "github.com/hashicorp/nomad/helper/stats"
)
// CpuStats calculates cpu usage percentage
type CpuStats struct {
prevCpuTime float64
prevTime time.Time
clkSpeed float64
totalCpus int
}
// NewCpuStats returns a cpu stats calculator
func NewCpuStats() *CpuStats {
numCpus := runtime.NumCPU()
cpuStats := &CpuStats{
totalCpus: numCpus,
}
return cpuStats
}
// Percent calculates the cpu usage percentage based on the current cpu usage
// and the previous cpu usage where usage is given as time in nanoseconds spend
// in the cpu
func (c *CpuStats) Percent(cpuTime float64) float64 {
now := time.Now()
if c.prevCpuTime == 0.0 {
// invoked first time
c.prevCpuTime = cpuTime
c.prevTime = now
return 0.0
}
timeDelta := now.Sub(c.prevTime).Nanoseconds()
ret := c.calculatePercent(c.prevCpuTime, cpuTime, timeDelta)
c.prevCpuTime = cpuTime
c.prevTime = now
return ret
}
// TicksConsumed calculates the total ticks consumes by the process across all
// cpu cores
func (c *CpuStats) TicksConsumed(percent float64) float64 {
return (percent / 100) * shelpers.TotalTicksAvailable() / float64(c.totalCpus)
}
func (c *CpuStats) calculatePercent(t1, t2 float64, timeDelta int64) float64 {
vDelta := t2 - t1
if timeDelta <= 0 || vDelta <= 0.0 {
return 0.0
}
overall_percent := (vDelta / float64(timeDelta)) * 100.0
return overall_percent
}

View File

@ -1,187 +0,0 @@
package stats
import (
"math"
"runtime"
"time"
"github.com/shirou/gopsutil/cpu"
"github.com/shirou/gopsutil/disk"
"github.com/shirou/gopsutil/host"
"github.com/shirou/gopsutil/mem"
shelpers "github.com/hashicorp/nomad/helper/stats"
)
// HostStats represents resource usage stats of the host running a Nomad client
type HostStats struct {
Memory *MemoryStats
CPU []*CPUStats
DiskStats []*DiskStats
Uptime uint64
Timestamp int64
CPUTicksConsumed float64
}
// MemoryStats represnts stats related to virtual memory usage
type MemoryStats struct {
Total uint64
Available uint64
Used uint64
Free uint64
}
// CPUStats represents stats related to cpu usage
type CPUStats struct {
CPU string
User float64
System float64
Idle float64
Total float64
}
// DiskStats represents stats related to disk usage
type DiskStats struct {
Device string
Mountpoint string
Size uint64
Used uint64
Available uint64
UsedPercent float64
InodesUsedPercent float64
}
// HostStatsCollector collects host resource usage stats
type HostStatsCollector struct {
clkSpeed float64
numCores int
statsCalculator map[string]*HostCpuStatsCalculator
}
// NewHostStatsCollector returns a HostStatsCollector
func NewHostStatsCollector() *HostStatsCollector {
numCores := runtime.NumCPU()
statsCalculator := make(map[string]*HostCpuStatsCalculator)
collector := &HostStatsCollector{
statsCalculator: statsCalculator,
numCores: numCores,
}
return collector
}
// Collect collects stats related to resource usage of a host
func (h *HostStatsCollector) Collect() (*HostStats, error) {
hs := &HostStats{Timestamp: time.Now().UTC().UnixNano()}
memStats, err := mem.VirtualMemory()
if err != nil {
return nil, err
}
hs.Memory = &MemoryStats{
Total: memStats.Total,
Available: memStats.Available,
Used: memStats.Used,
Free: memStats.Free,
}
ticksConsumed := 0.0
cpuStats, err := cpu.Times(true)
if err != nil {
return nil, err
}
cs := make([]*CPUStats, len(cpuStats))
for idx, cpuStat := range cpuStats {
percentCalculator, ok := h.statsCalculator[cpuStat.CPU]
if !ok {
percentCalculator = NewHostCpuStatsCalculator()
h.statsCalculator[cpuStat.CPU] = percentCalculator
}
idle, user, system, total := percentCalculator.Calculate(cpuStat)
cs[idx] = &CPUStats{
CPU: cpuStat.CPU,
User: user,
System: system,
Idle: idle,
Total: total,
}
ticksConsumed += (total / 100) * (shelpers.TotalTicksAvailable() / float64(len(cpuStats)))
}
hs.CPU = cs
hs.CPUTicksConsumed = ticksConsumed
partitions, err := disk.Partitions(false)
if err != nil {
return nil, err
}
var diskStats []*DiskStats
for _, partition := range partitions {
usage, err := disk.Usage(partition.Mountpoint)
if err != nil {
return nil, err
}
ds := DiskStats{
Device: partition.Device,
Mountpoint: partition.Mountpoint,
Size: usage.Total,
Used: usage.Used,
Available: usage.Free,
UsedPercent: usage.UsedPercent,
InodesUsedPercent: usage.InodesUsedPercent,
}
if math.IsNaN(ds.UsedPercent) {
ds.UsedPercent = 0.0
}
if math.IsNaN(ds.InodesUsedPercent) {
ds.InodesUsedPercent = 0.0
}
diskStats = append(diskStats, &ds)
}
hs.DiskStats = diskStats
uptime, err := host.Uptime()
if err != nil {
return nil, err
}
hs.Uptime = uptime
return hs, nil
}
// HostCpuStatsCalculator calculates cpu usage percentages
type HostCpuStatsCalculator struct {
prevIdle float64
prevUser float64
prevSystem float64
prevBusy float64
prevTotal float64
}
// NewHostCpuStatsCalculator returns a HostCpuStatsCalculator
func NewHostCpuStatsCalculator() *HostCpuStatsCalculator {
return &HostCpuStatsCalculator{}
}
// Calculate calculates the current cpu usage percentages
func (h *HostCpuStatsCalculator) Calculate(times cpu.TimesStat) (idle float64, user float64, system float64, total float64) {
currentIdle := times.Idle
currentUser := times.User
currentSystem := times.System
currentTotal := times.Total()
deltaTotal := currentTotal - h.prevTotal
idle = ((currentIdle - h.prevIdle) / deltaTotal) * 100
user = ((currentUser - h.prevUser) / deltaTotal) * 100
system = ((currentSystem - h.prevSystem) / deltaTotal) * 100
currentBusy := times.User + times.System + times.Nice + times.Iowait + times.Irq +
times.Softirq + times.Steal + times.Guest + times.GuestNice + times.Stolen
total = ((currentBusy - h.prevBusy) / deltaTotal) * 100
h.prevIdle = currentIdle
h.prevUser = currentUser
h.prevSystem = currentSystem
h.prevTotal = currentTotal
h.prevBusy = currentBusy
return
}

View File

@ -1,97 +0,0 @@
package structs
// MemoryStats holds memory usage related stats
type MemoryStats struct {
RSS uint64
Cache uint64
Swap uint64
MaxUsage uint64
KernelUsage uint64
KernelMaxUsage uint64
// A list of fields whose values were actually sampled
Measured []string
}
func (ms *MemoryStats) Add(other *MemoryStats) {
ms.RSS += other.RSS
ms.Cache += other.Cache
ms.Swap += other.Swap
ms.MaxUsage += other.MaxUsage
ms.KernelUsage += other.KernelUsage
ms.KernelMaxUsage += other.KernelMaxUsage
ms.Measured = joinStringSet(ms.Measured, other.Measured)
}
// CpuStats holds cpu usage related stats
type CpuStats struct {
SystemMode float64
UserMode float64
TotalTicks float64
ThrottledPeriods uint64
ThrottledTime uint64
Percent float64
// A list of fields whose values were actually sampled
Measured []string
}
func (cs *CpuStats) Add(other *CpuStats) {
cs.SystemMode += other.SystemMode
cs.UserMode += other.UserMode
cs.TotalTicks += other.TotalTicks
cs.ThrottledPeriods += other.ThrottledPeriods
cs.ThrottledTime += other.ThrottledTime
cs.Percent += other.Percent
cs.Measured = joinStringSet(cs.Measured, other.Measured)
}
// ResourceUsage holds information related to cpu and memory stats
type ResourceUsage struct {
MemoryStats *MemoryStats
CpuStats *CpuStats
}
func (ru *ResourceUsage) Add(other *ResourceUsage) {
ru.MemoryStats.Add(other.MemoryStats)
ru.CpuStats.Add(other.CpuStats)
}
// TaskResourceUsage holds aggregated resource usage of all processes in a Task
// and the resource usage of the individual pids
type TaskResourceUsage struct {
ResourceUsage *ResourceUsage
Timestamp int64
Pids map[string]*ResourceUsage
}
// AllocResourceUsage holds the aggregated task resource usage of the
// allocation.
type AllocResourceUsage struct {
// ResourceUsage is the summation of the task resources
ResourceUsage *ResourceUsage
// Tasks contains the resource usage of each task
Tasks map[string]*TaskResourceUsage
// The max timestamp of all the Tasks
Timestamp int64
}
// joinStringSet takes two slices of strings and joins them
func joinStringSet(s1, s2 []string) []string {
lookup := make(map[string]struct{}, len(s1))
j := make([]string, 0, len(s1))
for _, s := range s1 {
j = append(j, s)
lookup[s] = struct{}{}
}
for _, s := range s2 {
if _, ok := lookup[s]; !ok {
j = append(j, s)
}
}
return j
}

View File

@ -1,651 +0,0 @@
package client
import (
"crypto/md5"
"encoding/hex"
"fmt"
"log"
"os"
"path/filepath"
"strings"
"sync"
"time"
"github.com/armon/go-metrics"
"github.com/hashicorp/go-multierror"
"github.com/hashicorp/nomad/client/config"
"github.com/hashicorp/nomad/client/driver"
"github.com/hashicorp/nomad/client/getter"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/hashicorp/nomad/client/driver/env"
dstructs "github.com/hashicorp/nomad/client/driver/structs"
cstructs "github.com/hashicorp/nomad/client/structs"
)
const (
// killBackoffBaseline is the baseline time for exponential backoff while
// killing a task.
killBackoffBaseline = 5 * time.Second
// killBackoffLimit is the the limit of the exponential backoff for killing
// the task.
killBackoffLimit = 2 * time.Minute
// killFailureLimit is how many times we will attempt to kill a task before
// giving up and potentially leaking resources.
killFailureLimit = 5
)
// TaskRunner is used to wrap a task within an allocation and provide the execution context.
type TaskRunner struct {
config *config.Config
updater TaskStateUpdater
logger *log.Logger
ctx *driver.ExecContext
alloc *structs.Allocation
restartTracker *RestartTracker
// running marks whether the task is running
running bool
runningLock sync.Mutex
resourceUsage *cstructs.TaskResourceUsage
resourceUsageLock sync.RWMutex
task *structs.Task
taskEnv *env.TaskEnvironment
updateCh chan *structs.Allocation
handle driver.DriverHandle
handleLock sync.Mutex
// artifactsDownloaded tracks whether the tasks artifacts have been
// downloaded
artifactsDownloaded bool
destroy bool
destroyCh chan struct{}
destroyLock sync.Mutex
waitCh chan struct{}
}
// taskRunnerState is used to snapshot the state of the task runner
type taskRunnerState struct {
Version string
Task *structs.Task
HandleID string
ArtifactDownloaded bool
}
// TaskStateUpdater is used to signal that tasks state has changed.
type TaskStateUpdater func(taskName, state string, event *structs.TaskEvent)
// NewTaskRunner is used to create a new task context
func NewTaskRunner(logger *log.Logger, config *config.Config,
updater TaskStateUpdater, ctx *driver.ExecContext,
alloc *structs.Allocation, task *structs.Task) *TaskRunner {
// Merge in the task resources
task.Resources = alloc.TaskResources[task.Name]
// Build the restart tracker.
tg := alloc.Job.LookupTaskGroup(alloc.TaskGroup)
if tg == nil {
logger.Printf("[ERR] client: alloc '%s' for missing task group '%s'", alloc.ID, alloc.TaskGroup)
return nil
}
restartTracker := newRestartTracker(tg.RestartPolicy, alloc.Job.Type)
tc := &TaskRunner{
config: config,
updater: updater,
logger: logger,
restartTracker: restartTracker,
ctx: ctx,
alloc: alloc,
task: task,
updateCh: make(chan *structs.Allocation, 64),
destroyCh: make(chan struct{}),
waitCh: make(chan struct{}),
}
return tc
}
// MarkReceived marks the task as received.
func (r *TaskRunner) MarkReceived() {
r.updater(r.task.Name, structs.TaskStatePending, structs.NewTaskEvent(structs.TaskReceived))
}
// WaitCh returns a channel to wait for termination
func (r *TaskRunner) WaitCh() <-chan struct{} {
return r.waitCh
}
// stateFilePath returns the path to our state file
func (r *TaskRunner) stateFilePath() string {
// Get the MD5 of the task name
hashVal := md5.Sum([]byte(r.task.Name))
hashHex := hex.EncodeToString(hashVal[:])
dirName := fmt.Sprintf("task-%s", hashHex)
// Generate the path
path := filepath.Join(r.config.StateDir, "alloc", r.alloc.ID,
dirName, "state.json")
return path
}
// RestoreState is used to restore our state
func (r *TaskRunner) RestoreState() error {
// Load the snapshot
var snap taskRunnerState
if err := restoreState(r.stateFilePath(), &snap); err != nil {
return err
}
// Restore fields
if snap.Task == nil {
return fmt.Errorf("task runner snapshot include nil Task")
} else {
r.task = snap.Task
}
r.artifactsDownloaded = snap.ArtifactDownloaded
if err := r.setTaskEnv(); err != nil {
return fmt.Errorf("client: failed to create task environment for task %q in allocation %q: %v",
r.task.Name, r.alloc.ID, err)
}
// Restore the driver
if snap.HandleID != "" {
driver, err := r.createDriver()
if err != nil {
return err
}
handle, err := driver.Open(r.ctx, snap.HandleID)
// In the case it fails, we relaunch the task in the Run() method.
if err != nil {
r.logger.Printf("[ERR] client: failed to open handle to task '%s' for alloc '%s': %v",
r.task.Name, r.alloc.ID, err)
return nil
}
r.handleLock.Lock()
r.handle = handle
r.handleLock.Unlock()
r.runningLock.Lock()
r.running = true
r.runningLock.Unlock()
}
return nil
}
// SaveState is used to snapshot our state
func (r *TaskRunner) SaveState() error {
snap := taskRunnerState{
Task: r.task,
Version: r.config.Version,
ArtifactDownloaded: r.artifactsDownloaded,
}
r.handleLock.Lock()
if r.handle != nil {
snap.HandleID = r.handle.ID()
}
r.handleLock.Unlock()
return persistState(r.stateFilePath(), &snap)
}
// DestroyState is used to cleanup after ourselves
func (r *TaskRunner) DestroyState() error {
return os.RemoveAll(r.stateFilePath())
}
// setState is used to update the state of the task runner
func (r *TaskRunner) setState(state string, event *structs.TaskEvent) {
// Persist our state to disk.
if err := r.SaveState(); err != nil {
r.logger.Printf("[ERR] client: failed to save state of Task Runner for task %q: %v", r.task.Name, err)
}
// Indicate the task has been updated.
r.updater(r.task.Name, state, event)
}
// setTaskEnv sets the task environment. It returns an error if it could not be
// created.
func (r *TaskRunner) setTaskEnv() error {
taskEnv, err := driver.GetTaskEnv(r.ctx.AllocDir, r.config.Node, r.task.Copy(), r.alloc)
if err != nil {
return err
}
r.taskEnv = taskEnv
return nil
}
// createDriver makes a driver for the task
func (r *TaskRunner) createDriver() (driver.Driver, error) {
if r.taskEnv == nil {
return nil, fmt.Errorf("task environment not made for task %q in allocation %q", r.task.Name, r.alloc.ID)
}
driverCtx := driver.NewDriverContext(r.task.Name, r.config, r.config.Node, r.logger, r.taskEnv)
driver, err := driver.NewDriver(r.task.Driver, driverCtx)
if err != nil {
return nil, fmt.Errorf("failed to create driver '%s' for alloc %s: %v",
r.task.Driver, r.alloc.ID, err)
}
return driver, err
}
// Run is a long running routine used to manage the task
func (r *TaskRunner) Run() {
defer close(r.waitCh)
r.logger.Printf("[DEBUG] client: starting task context for '%s' (alloc '%s')",
r.task.Name, r.alloc.ID)
if err := r.validateTask(); err != nil {
r.setState(
structs.TaskStateDead,
structs.NewTaskEvent(structs.TaskFailedValidation).SetValidationError(err))
return
}
if err := r.setTaskEnv(); err != nil {
r.setState(
structs.TaskStateDead,
structs.NewTaskEvent(structs.TaskDriverFailure).SetDriverError(err))
return
}
r.run()
return
}
// validateTask validates the fields of the task and returns an error if the
// task is invalid.
func (r *TaskRunner) validateTask() error {
var mErr multierror.Error
// Validate the user.
unallowedUsers := r.config.ReadStringListToMapDefault("user.blacklist", config.DefaultUserBlacklist)
checkDrivers := r.config.ReadStringListToMapDefault("user.checked_drivers", config.DefaultUserCheckedDrivers)
if _, driverMatch := checkDrivers[r.task.Driver]; driverMatch {
if _, unallowed := unallowedUsers[r.task.User]; unallowed {
mErr.Errors = append(mErr.Errors, fmt.Errorf("running as user %q is disallowed", r.task.User))
}
}
// Validate the artifacts
for i, artifact := range r.task.Artifacts {
// Verify the artifact doesn't escape the task directory.
if err := artifact.Validate(); err != nil {
// If this error occurs there is potentially a server bug or
// mallicious, server spoofing.
r.logger.Printf("[ERR] client: allocation %q, task %v, artifact %#v (%v) fails validation: %v",
r.alloc.ID, r.task.Name, artifact, i, err)
mErr.Errors = append(mErr.Errors, fmt.Errorf("artifact (%d) failed validation: %v", i, err))
}
}
if len(mErr.Errors) == 1 {
return mErr.Errors[0]
}
return mErr.ErrorOrNil()
}
func (r *TaskRunner) run() {
// Predeclare things so we an jump to the RESTART
var handleEmpty bool
var stopCollection chan struct{}
for {
// Download the task's artifacts
if !r.artifactsDownloaded && len(r.task.Artifacts) > 0 {
r.setState(structs.TaskStatePending, structs.NewTaskEvent(structs.TaskDownloadingArtifacts))
taskDir, ok := r.ctx.AllocDir.TaskDirs[r.task.Name]
if !ok {
err := fmt.Errorf("task directory couldn't be found")
r.setState(structs.TaskStateDead, structs.NewTaskEvent(structs.TaskDriverFailure).SetDriverError(err))
r.logger.Printf("[ERR] client: task directory for alloc %q task %q couldn't be found", r.alloc.ID, r.task.Name)
r.restartTracker.SetStartError(err)
goto RESTART
}
for _, artifact := range r.task.Artifacts {
if err := getter.GetArtifact(r.taskEnv, artifact, taskDir); err != nil {
r.setState(structs.TaskStateDead,
structs.NewTaskEvent(structs.TaskArtifactDownloadFailed).SetDownloadError(err))
r.restartTracker.SetStartError(dstructs.NewRecoverableError(err, true))
goto RESTART
}
}
r.artifactsDownloaded = true
}
// Start the task if not yet started or it is being forced. This logic
// is necessary because in the case of a restore the handle already
// exists.
r.handleLock.Lock()
handleEmpty = r.handle == nil
r.handleLock.Unlock()
if handleEmpty {
startErr := r.startTask()
r.restartTracker.SetStartError(startErr)
if startErr != nil {
r.setState(structs.TaskStateDead, structs.NewTaskEvent(structs.TaskDriverFailure).SetDriverError(startErr))
goto RESTART
}
// Mark the task as started
r.setState(structs.TaskStateRunning, structs.NewTaskEvent(structs.TaskStarted))
r.runningLock.Lock()
r.running = true
r.runningLock.Unlock()
}
if stopCollection == nil {
stopCollection = make(chan struct{})
go r.collectResourceUsageStats(stopCollection)
}
// Wait for updates
WAIT:
for {
select {
case waitRes := <-r.handle.WaitCh():
if waitRes == nil {
panic("nil wait")
}
r.runningLock.Lock()
r.running = false
r.runningLock.Unlock()
// Stop collection of the task's resource usage
close(stopCollection)
// Log whether the task was successful or not.
r.restartTracker.SetWaitResult(waitRes)
r.setState(structs.TaskStateDead, r.waitErrorToEvent(waitRes))
if !waitRes.Successful() {
r.logger.Printf("[INFO] client: task %q for alloc %q failed: %v", r.task.Name, r.alloc.ID, waitRes)
} else {
r.logger.Printf("[INFO] client: task %q for alloc %q completed successfully", r.task.Name, r.alloc.ID)
}
break WAIT
case update := <-r.updateCh:
if err := r.handleUpdate(update); err != nil {
r.logger.Printf("[ERR] client: update to task %q failed: %v", r.task.Name, err)
}
case <-r.destroyCh:
// Mark that we received the kill event
timeout := driver.GetKillTimeout(r.task.KillTimeout, r.config.MaxKillTimeout)
r.setState(structs.TaskStateRunning,
structs.NewTaskEvent(structs.TaskKilling).SetKillTimeout(timeout))
// Kill the task using an exponential backoff in-case of failures.
destroySuccess, err := r.handleDestroy()
if !destroySuccess {
// We couldn't successfully destroy the resource created.
r.logger.Printf("[ERR] client: failed to kill task %q. Resources may have been leaked: %v", r.task.Name, err)
}
// Stop collection of the task's resource usage
close(stopCollection)
// Store that the task has been destroyed and any associated error.
r.setState(structs.TaskStateDead, structs.NewTaskEvent(structs.TaskKilled).SetKillError(err))
r.runningLock.Lock()
r.running = false
r.runningLock.Unlock()
return
}
}
RESTART:
state, when := r.restartTracker.GetState()
r.restartTracker.SetStartError(nil).SetWaitResult(nil)
reason := r.restartTracker.GetReason()
switch state {
case structs.TaskNotRestarting, structs.TaskTerminated:
r.logger.Printf("[INFO] client: Not restarting task: %v for alloc: %v ", r.task.Name, r.alloc.ID)
if state == structs.TaskNotRestarting {
r.setState(structs.TaskStateDead,
structs.NewTaskEvent(structs.TaskNotRestarting).
SetRestartReason(reason))
}
return
case structs.TaskRestarting:
r.logger.Printf("[INFO] client: Restarting task %q for alloc %q in %v", r.task.Name, r.alloc.ID, when)
r.setState(structs.TaskStatePending,
structs.NewTaskEvent(structs.TaskRestarting).
SetRestartDelay(when).
SetRestartReason(reason))
default:
r.logger.Printf("[ERR] client: restart tracker returned unknown state: %q", state)
return
}
// Sleep but watch for destroy events.
select {
case <-time.After(when):
case <-r.destroyCh:
}
// Destroyed while we were waiting to restart, so abort.
r.destroyLock.Lock()
destroyed := r.destroy
r.destroyLock.Unlock()
if destroyed {
r.logger.Printf("[DEBUG] client: Not restarting task: %v because it's destroyed by user", r.task.Name)
r.setState(structs.TaskStateDead, structs.NewTaskEvent(structs.TaskKilled))
return
}
// Clear the handle so a new driver will be created.
r.handleLock.Lock()
r.handle = nil
stopCollection = nil
r.handleLock.Unlock()
}
}
// startTask creates the driver and start the task.
func (r *TaskRunner) startTask() error {
// Create a driver
driver, err := r.createDriver()
if err != nil {
return fmt.Errorf("failed to create driver of task '%s' for alloc '%s': %v",
r.task.Name, r.alloc.ID, err)
}
// Start the job
handle, err := driver.Start(r.ctx, r.task)
if err != nil {
return fmt.Errorf("failed to start task '%s' for alloc '%s': %v",
r.task.Name, r.alloc.ID, err)
}
r.handleLock.Lock()
r.handle = handle
r.handleLock.Unlock()
return nil
}
// collectResourceUsageStats starts collecting resource usage stats of a Task.
// Collection ends when the passed channel is closed
func (r *TaskRunner) collectResourceUsageStats(stopCollection <-chan struct{}) {
// start collecting the stats right away and then start collecting every
// collection interval
next := time.NewTimer(0)
defer next.Stop()
for {
select {
case <-next.C:
ru, err := r.handle.Stats()
next.Reset(r.config.StatsCollectionInterval)
if err != nil {
// We do not log when the plugin is shutdown as this is simply a
// race between the stopCollection channel being closed and calling
// Stats on the handle.
if !strings.Contains(err.Error(), "connection is shut down") {
r.logger.Printf("[WARN] client: error fetching stats of task %v: %v", r.task.Name, err)
}
continue
}
r.resourceUsageLock.Lock()
r.resourceUsage = ru
r.resourceUsageLock.Unlock()
r.emitStats(ru)
case <-stopCollection:
return
}
}
}
// LatestResourceUsage returns the last resource utilization datapoint collected
func (r *TaskRunner) LatestResourceUsage() *cstructs.TaskResourceUsage {
r.resourceUsageLock.RLock()
defer r.resourceUsageLock.RUnlock()
r.runningLock.Lock()
defer r.runningLock.Unlock()
// If the task is not running there can be no latest resource
if !r.running {
return nil
}
return r.resourceUsage
}
// handleUpdate takes an updated allocation and updates internal state to
// reflect the new config for the task.
func (r *TaskRunner) handleUpdate(update *structs.Allocation) error {
// Extract the task group from the alloc.
tg := update.Job.LookupTaskGroup(update.TaskGroup)
if tg == nil {
return fmt.Errorf("alloc '%s' missing task group '%s'", update.ID, update.TaskGroup)
}
// Extract the task.
var updatedTask *structs.Task
for _, t := range tg.Tasks {
if t.Name == r.task.Name {
updatedTask = t
}
}
if updatedTask == nil {
return fmt.Errorf("task group %q doesn't contain task %q", tg.Name, r.task.Name)
}
// Merge in the task resources
updatedTask.Resources = update.TaskResources[updatedTask.Name]
// Update will update resources and store the new kill timeout.
var mErr multierror.Error
r.handleLock.Lock()
if r.handle != nil {
if err := r.handle.Update(updatedTask); err != nil {
mErr.Errors = append(mErr.Errors, fmt.Errorf("updating task resources failed: %v", err))
}
}
r.handleLock.Unlock()
// Update the restart policy.
if r.restartTracker != nil {
r.restartTracker.SetPolicy(tg.RestartPolicy)
}
// Store the updated alloc.
r.alloc = update
r.task = updatedTask
return mErr.ErrorOrNil()
}
// handleDestroy kills the task handle. In the case that killing fails,
// handleDestroy will retry with an exponential backoff and will give up at a
// given limit. It returns whether the task was destroyed and the error
// associated with the last kill attempt.
func (r *TaskRunner) handleDestroy() (destroyed bool, err error) {
// Cap the number of times we attempt to kill the task.
for i := 0; i < killFailureLimit; i++ {
if err = r.handle.Kill(); err != nil {
// Calculate the new backoff
backoff := (1 << (2 * uint64(i))) * killBackoffBaseline
if backoff > killBackoffLimit {
backoff = killBackoffLimit
}
r.logger.Printf("[ERR] client: failed to kill task '%s' for alloc %q. Retrying in %v: %v",
r.task.Name, r.alloc.ID, backoff, err)
time.Sleep(time.Duration(backoff))
} else {
// Kill was successful
return true, nil
}
}
return
}
// Helper function for converting a WaitResult into a TaskTerminated event.
func (r *TaskRunner) waitErrorToEvent(res *dstructs.WaitResult) *structs.TaskEvent {
return structs.NewTaskEvent(structs.TaskTerminated).
SetExitCode(res.ExitCode).
SetSignal(res.Signal).
SetExitMessage(res.Err)
}
// Update is used to update the task of the context
func (r *TaskRunner) Update(update *structs.Allocation) {
select {
case r.updateCh <- update:
default:
r.logger.Printf("[ERR] client: dropping task update '%s' (alloc '%s')",
r.task.Name, r.alloc.ID)
}
}
// Destroy is used to indicate that the task context should be destroyed
func (r *TaskRunner) Destroy() {
r.destroyLock.Lock()
defer r.destroyLock.Unlock()
if r.destroy {
return
}
r.destroy = true
close(r.destroyCh)
}
// emitStats emits resource usage stats of tasks to remote metrics collector
// sinks
func (r *TaskRunner) emitStats(ru *cstructs.TaskResourceUsage) {
if ru.ResourceUsage.MemoryStats != nil && r.config.PublishAllocationMetrics {
metrics.SetGauge([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, r.alloc.ID, r.task.Name, "memory", "rss"}, float32(ru.ResourceUsage.MemoryStats.RSS))
metrics.SetGauge([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, r.alloc.ID, r.task.Name, "memory", "cache"}, float32(ru.ResourceUsage.MemoryStats.Cache))
metrics.SetGauge([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, r.alloc.ID, r.task.Name, "memory", "swap"}, float32(ru.ResourceUsage.MemoryStats.Swap))
metrics.SetGauge([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, r.alloc.ID, r.task.Name, "memory", "max_usage"}, float32(ru.ResourceUsage.MemoryStats.MaxUsage))
metrics.SetGauge([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, r.alloc.ID, r.task.Name, "memory", "kernel_usage"}, float32(ru.ResourceUsage.MemoryStats.KernelUsage))
metrics.SetGauge([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, r.alloc.ID, r.task.Name, "memory", "kernel_max_usage"}, float32(ru.ResourceUsage.MemoryStats.KernelMaxUsage))
}
if ru.ResourceUsage.CpuStats != nil && r.config.PublishAllocationMetrics {
metrics.SetGauge([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, r.alloc.ID, r.task.Name, "cpu", "total_percent"}, float32(ru.ResourceUsage.CpuStats.Percent))
metrics.SetGauge([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, r.alloc.ID, r.task.Name, "cpu", "system"}, float32(ru.ResourceUsage.CpuStats.SystemMode))
metrics.SetGauge([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, r.alloc.ID, r.task.Name, "cpu", "user"}, float32(ru.ResourceUsage.CpuStats.UserMode))
metrics.SetGauge([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, r.alloc.ID, r.task.Name, "cpu", "throttled_time"}, float32(ru.ResourceUsage.CpuStats.ThrottledTime))
metrics.SetGauge([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, r.alloc.ID, r.task.Name, "cpu", "throttled_periods"}, float32(ru.ResourceUsage.CpuStats.ThrottledPeriods))
metrics.SetGauge([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, r.alloc.ID, r.task.Name, "cpu", "total_ticks"}, float32(ru.ResourceUsage.CpuStats.TotalTicks))
}
}

View File

@ -1,25 +0,0 @@
package testutil
import (
docker "github.com/fsouza/go-dockerclient"
"testing"
)
// DockerIsConnected checks to see if a docker daemon is available (local or remote)
func DockerIsConnected(t *testing.T) bool {
client, err := docker.NewClientFromEnv()
if err != nil {
return false
}
// Creating a client doesn't actually connect, so make sure we do something
// like call Version() on it.
env, err := client.Version()
if err != nil {
t.Logf("Failed to connect to docker daemon: %s", err)
return false
}
t.Logf("Successfully connected to docker daemon running version %s", env.Get("Version"))
return true
}

View File

@ -1,53 +0,0 @@
package testutil
import (
"os/exec"
"runtime"
"syscall"
"testing"
)
func ExecCompatible(t *testing.T) {
if runtime.GOOS != "linux" || syscall.Geteuid() != 0 {
t.Skip("Test only available running as root on linux")
}
}
func JavaCompatible(t *testing.T) {
if runtime.GOOS == "linux" && syscall.Geteuid() != 0 {
t.Skip("Test only available when running as root on linux")
}
}
func QemuCompatible(t *testing.T) {
// Check if qemu exists
bin := "qemu-system-x86_64"
if runtime.GOOS == "windows" {
bin = "qemu-img"
}
_, err := exec.Command(bin, "--version").CombinedOutput()
if err != nil {
t.Skip("Must have Qemu installed for Qemu specific tests to run")
}
}
func RktCompatible(t *testing.T) {
if runtime.GOOS == "windows" || syscall.Geteuid() != 0 {
t.Skip("Must be root on non-windows environments to run test")
}
// else see if rkt exists
_, err := exec.Command("rkt", "version").CombinedOutput()
if err != nil {
t.Skip("Must have rkt installed for rkt specific tests to run")
}
}
func MountCompatible(t *testing.T) {
if runtime.GOOS == "windows" {
t.Skip("Windows does not support mount")
}
if syscall.Geteuid() != 0 {
t.Skip("Must be root to run test")
}
}

View File

@ -1,111 +0,0 @@
package client
import (
"encoding/json"
"fmt"
"io/ioutil"
"math/rand"
"os"
"path/filepath"
"github.com/hashicorp/nomad/nomad/structs"
)
type allocTuple struct {
exist, updated *structs.Allocation
}
// diffResult is used to return the sets that result from a diff
type diffResult struct {
added []*structs.Allocation
removed []*structs.Allocation
updated []allocTuple
ignore []*structs.Allocation
}
func (d *diffResult) GoString() string {
return fmt.Sprintf("allocs: (added %d) (removed %d) (updated %d) (ignore %d)",
len(d.added), len(d.removed), len(d.updated), len(d.ignore))
}
// diffAllocs is used to diff the existing and updated allocations
// to see what has happened.
func diffAllocs(existing []*structs.Allocation, allocs *allocUpdates) *diffResult {
// Scan the existing allocations
result := &diffResult{}
existIdx := make(map[string]struct{})
for _, exist := range existing {
// Mark this as existing
existIdx[exist.ID] = struct{}{}
// Check if the alloc was updated or filtered because an update wasn't
// needed.
alloc, pulled := allocs.pulled[exist.ID]
_, filtered := allocs.filtered[exist.ID]
// If not updated or filtered, removed
if !pulled && !filtered {
result.removed = append(result.removed, exist)
continue
}
// Check for an update
if pulled && alloc.AllocModifyIndex > exist.AllocModifyIndex {
result.updated = append(result.updated, allocTuple{exist, alloc})
continue
}
// Ignore this
result.ignore = append(result.ignore, exist)
}
// Scan the updated allocations for any that are new
for id, pulled := range allocs.pulled {
if _, ok := existIdx[id]; !ok {
result.added = append(result.added, pulled)
}
}
return result
}
// shuffleStrings randomly shuffles the list of strings
func shuffleStrings(list []string) {
for i := range list {
j := rand.Intn(i + 1)
list[i], list[j] = list[j], list[i]
}
}
// persistState is used to help with saving state
func persistState(path string, data interface{}) error {
buf, err := json.Marshal(data)
if err != nil {
return fmt.Errorf("failed to encode state: %v", err)
}
if err := os.MkdirAll(filepath.Dir(path), 0700); err != nil {
return fmt.Errorf("failed to make dirs for %s: %v", path, err)
}
tmpPath := path + ".tmp"
if err := ioutil.WriteFile(tmpPath, buf, 0600); err != nil {
return fmt.Errorf("failed to save state to tmp: %v", err)
}
if err := os.Rename(tmpPath, path); err != nil {
return fmt.Errorf("failed to rename tmp to path: %v", err)
}
return nil
}
// restoreState is used to read back in the persisted state
func restoreState(path string, data interface{}) error {
buf, err := ioutil.ReadFile(path)
if err != nil {
if os.IsNotExist(err) {
return nil
}
return fmt.Errorf("failed to read state: %v", err)
}
if err := json.Unmarshal(buf, data); err != nil {
return fmt.Errorf("failed to decode state: %v", err)
}
return nil
}

View File

@ -1,67 +0,0 @@
package stats
import (
"fmt"
"math"
"sync"
"github.com/shirou/gopsutil/cpu"
)
var (
cpuMhzPerCore float64
cpuModelName string
cpuNumCores int
cpuTotalTicks float64
onceLer sync.Once
)
func Init() error {
var err error
onceLer.Do(func() {
if cpuNumCores, err = cpu.Counts(true); err != nil {
err = fmt.Errorf("Unable to determine the number of CPU cores available: %v", err)
return
}
var cpuInfo []cpu.InfoStat
if cpuInfo, err = cpu.Info(); err != nil {
err = fmt.Errorf("Unable to obtain CPU information: %v", err)
return
}
for _, cpu := range cpuInfo {
cpuModelName = cpu.ModelName
cpuMhzPerCore = cpu.Mhz
break
}
// Floor all of the values such that small difference don't cause the
// node to fall into a unique computed node class
cpuMhzPerCore = math.Floor(cpuMhzPerCore)
cpuTotalTicks = math.Floor(float64(cpuNumCores) * cpuMhzPerCore)
})
return err
}
// CPUModelName returns the number of CPU cores available
func CPUNumCores() int {
return cpuNumCores
}
// CPUMHzPerCore returns the MHz per CPU core
func CPUMHzPerCore() float64 {
return cpuMhzPerCore
}
// CPUModelName returns the model name of the CPU
func CPUModelName() string {
return cpuModelName
}
// TotalTicksAvailable calculates the total frequency available across all
// cores
func TotalTicksAvailable() float64 {
return cpuTotalTicks
}

View File

@ -13,7 +13,6 @@ import (
"github.com/hashicorp/go-multierror"
"github.com/hashicorp/hcl"
"github.com/hashicorp/hcl/hcl/ast"
"github.com/hashicorp/nomad/client/driver"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/mitchellh/mapstructure"
)
@ -538,22 +537,6 @@ func parseTasks(jobName string, taskGroupName string, result *[]*structs.Task, l
return err
}
}
// Instantiate a driver to validate the configuration
d, err := driver.NewDriver(
t.Driver,
driver.NewEmptyDriverContext(),
)
if err != nil {
return multierror.Prefix(err,
fmt.Sprintf("'%s', config ->", n))
}
if err := d.Validate(t.Config); err != nil {
return multierror.Prefix(err,
fmt.Sprintf("'%s', config ->", n))
}
}
// Parse constraints

View File

@ -1,21 +0,0 @@
The MIT License (MIT)
Copyright (c) 2014 Mitchell Hashimoto
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.

View File

@ -1,34 +0,0 @@
# Process List Library for Go
go-ps is a library for Go that implements OS-specific APIs to list and
manipulate processes in a platform-safe way. The library can find and
list processes on Linux, Mac OS X, and Windows.
If you're new to Go, this library has a good amount of advanced Go educational
value as well. It uses some advanced features of Go: build tags, accessing
DLL methods for Windows, cgo for Darwin, etc.
How it works:
* **Darwin** uses the `sysctl` syscall to retrieve the process table.
* **Unix** uses the procfs at `/proc` to inspect the process tree.
* **Windows** uses the Windows API, and methods such as
`CreateToolhelp32Snapshot` to get a point-in-time snapshot of
the process table.
## Installation
Install using standard `go get`:
```
$ go get github.com/mitchellh/go-ps
...
```
## TODO
Want to contribute? Here is a short TODO list of things that aren't
implemented for this library that would be nice:
* FreeBSD support
* Plan9 support

View File

@ -1,43 +0,0 @@
# -*- mode: ruby -*-
# vi: set ft=ruby :
# Vagrantfile API/syntax version. Don't touch unless you know what you're doing!
VAGRANTFILE_API_VERSION = "2"
Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|
config.vm.box = "chef/ubuntu-12.04"
config.vm.provision "shell", inline: $script
["vmware_fusion", "vmware_workstation"].each do |p|
config.vm.provider "p" do |v|
v.vmx["memsize"] = "1024"
v.vmx["numvcpus"] = "2"
v.vmx["cpuid.coresPerSocket"] = "1"
end
end
end
$script = <<SCRIPT
SRCROOT="/opt/go"
# Install Go
sudo apt-get update
sudo apt-get install -y build-essential mercurial
sudo hg clone -u release https://code.google.com/p/go ${SRCROOT}
cd ${SRCROOT}/src
sudo ./all.bash
# Setup the GOPATH
sudo mkdir -p /opt/gopath
cat <<EOF >/tmp/gopath.sh
export GOPATH="/opt/gopath"
export PATH="/opt/go/bin:\$GOPATH/bin:\$PATH"
EOF
sudo mv /tmp/gopath.sh /etc/profile.d/gopath.sh
sudo chmod 0755 /etc/profile.d/gopath.sh
# Make sure the gopath is usable by bamboo
sudo chown -R vagrant:vagrant $SRCROOT
sudo chown -R vagrant:vagrant /opt/gopath
SCRIPT

View File

@ -1,40 +0,0 @@
// ps provides an API for finding and listing processes in a platform-agnostic
// way.
//
// NOTE: If you're reading these docs online via GoDocs or some other system,
// you might only see the Unix docs. This project makes heavy use of
// platform-specific implementations. We recommend reading the source if you
// are interested.
package ps
// Process is the generic interface that is implemented on every platform
// and provides common operations for processes.
type Process interface {
// Pid is the process ID for this process.
Pid() int
// PPid is the parent process ID for this process.
PPid() int
// Executable name running this process. This is not a path to the
// executable.
Executable() string
}
// Processes returns all processes.
//
// This of course will be a point-in-time snapshot of when this method was
// called. Some operating systems don't provide snapshot capability of the
// process table, in which case the process table returned might contain
// ephemeral entities that happened to be running when this was called.
func Processes() ([]Process, error) {
return processes()
}
// FindProcess looks up a single process by pid.
//
// Process will be nil and error will be nil if a matching process is
// not found.
func FindProcess(pid int) (Process, error) {
return findProcess(pid)
}

View File

@ -1,138 +0,0 @@
// +build darwin
package ps
import (
"bytes"
"encoding/binary"
"syscall"
"unsafe"
)
type DarwinProcess struct {
pid int
ppid int
binary string
}
func (p *DarwinProcess) Pid() int {
return p.pid
}
func (p *DarwinProcess) PPid() int {
return p.ppid
}
func (p *DarwinProcess) Executable() string {
return p.binary
}
func findProcess(pid int) (Process, error) {
ps, err := processes()
if err != nil {
return nil, err
}
for _, p := range ps {
if p.Pid() == pid {
return p, nil
}
}
return nil, nil
}
func processes() ([]Process, error) {
buf, err := darwinSyscall()
if err != nil {
return nil, err
}
procs := make([]*kinfoProc, 0, 50)
k := 0
for i := _KINFO_STRUCT_SIZE; i < buf.Len(); i += _KINFO_STRUCT_SIZE {
proc := &kinfoProc{}
err = binary.Read(bytes.NewBuffer(buf.Bytes()[k:i]), binary.LittleEndian, proc)
if err != nil {
return nil, err
}
k = i
procs = append(procs, proc)
}
darwinProcs := make([]Process, len(procs))
for i, p := range procs {
darwinProcs[i] = &DarwinProcess{
pid: int(p.Pid),
ppid: int(p.PPid),
binary: darwinCstring(p.Comm),
}
}
return darwinProcs, nil
}
func darwinCstring(s [16]byte) string {
i := 0
for _, b := range s {
if b != 0 {
i++
} else {
break
}
}
return string(s[:i])
}
func darwinSyscall() (*bytes.Buffer, error) {
mib := [4]int32{_CTRL_KERN, _KERN_PROC, _KERN_PROC_ALL, 0}
size := uintptr(0)
_, _, errno := syscall.Syscall6(
syscall.SYS___SYSCTL,
uintptr(unsafe.Pointer(&mib[0])),
4,
0,
uintptr(unsafe.Pointer(&size)),
0,
0)
if errno != 0 {
return nil, errno
}
bs := make([]byte, size)
_, _, errno = syscall.Syscall6(
syscall.SYS___SYSCTL,
uintptr(unsafe.Pointer(&mib[0])),
4,
uintptr(unsafe.Pointer(&bs[0])),
uintptr(unsafe.Pointer(&size)),
0,
0)
if errno != 0 {
return nil, errno
}
return bytes.NewBuffer(bs[0:size]), nil
}
const (
_CTRL_KERN = 1
_KERN_PROC = 14
_KERN_PROC_ALL = 0
_KINFO_STRUCT_SIZE = 648
)
type kinfoProc struct {
_ [40]byte
Pid int32
_ [199]byte
Comm [16]byte
_ [301]byte
PPid int32
_ [84]byte
}

View File

@ -1,260 +0,0 @@
// +build freebsd,amd64
package ps
import (
"bytes"
"encoding/binary"
"syscall"
"unsafe"
)
// copied from sys/sysctl.h
const (
CTL_KERN = 1 // "high kernel": proc, limits
KERN_PROC = 14 // struct: process entries
KERN_PROC_PID = 1 // by process id
KERN_PROC_PROC = 8 // only return procs
KERN_PROC_PATHNAME = 12 // path to executable
)
// copied from sys/user.h
type Kinfo_proc struct {
Ki_structsize int32
Ki_layout int32
Ki_args int64
Ki_paddr int64
Ki_addr int64
Ki_tracep int64
Ki_textvp int64
Ki_fd int64
Ki_vmspace int64
Ki_wchan int64
Ki_pid int32
Ki_ppid int32
Ki_pgid int32
Ki_tpgid int32
Ki_sid int32
Ki_tsid int32
Ki_jobc [2]byte
Ki_spare_short1 [2]byte
Ki_tdev int32
Ki_siglist [16]byte
Ki_sigmask [16]byte
Ki_sigignore [16]byte
Ki_sigcatch [16]byte
Ki_uid int32
Ki_ruid int32
Ki_svuid int32
Ki_rgid int32
Ki_svgid int32
Ki_ngroups [2]byte
Ki_spare_short2 [2]byte
Ki_groups [64]byte
Ki_size int64
Ki_rssize int64
Ki_swrss int64
Ki_tsize int64
Ki_dsize int64
Ki_ssize int64
Ki_xstat [2]byte
Ki_acflag [2]byte
Ki_pctcpu int32
Ki_estcpu int32
Ki_slptime int32
Ki_swtime int32
Ki_cow int32
Ki_runtime int64
Ki_start [16]byte
Ki_childtime [16]byte
Ki_flag int64
Ki_kiflag int64
Ki_traceflag int32
Ki_stat [1]byte
Ki_nice [1]byte
Ki_lock [1]byte
Ki_rqindex [1]byte
Ki_oncpu [1]byte
Ki_lastcpu [1]byte
Ki_ocomm [17]byte
Ki_wmesg [9]byte
Ki_login [18]byte
Ki_lockname [9]byte
Ki_comm [20]byte
Ki_emul [17]byte
Ki_sparestrings [68]byte
Ki_spareints [36]byte
Ki_cr_flags int32
Ki_jid int32
Ki_numthreads int32
Ki_tid int32
Ki_pri int32
Ki_rusage [144]byte
Ki_rusage_ch [144]byte
Ki_pcb int64
Ki_kstack int64
Ki_udata int64
Ki_tdaddr int64
Ki_spareptrs [48]byte
Ki_spareint64s [96]byte
Ki_sflag int64
Ki_tdflags int64
}
// UnixProcess is an implementation of Process that contains Unix-specific
// fields and information.
type UnixProcess struct {
pid int
ppid int
state rune
pgrp int
sid int
binary string
}
func (p *UnixProcess) Pid() int {
return p.pid
}
func (p *UnixProcess) PPid() int {
return p.ppid
}
func (p *UnixProcess) Executable() string {
return p.binary
}
// Refresh reloads all the data associated with this process.
func (p *UnixProcess) Refresh() error {
mib := []int32{CTL_KERN, KERN_PROC, KERN_PROC_PID, int32(p.pid)}
buf, length, err := call_syscall(mib)
if err != nil {
return err
}
proc_k := Kinfo_proc{}
if length != uint64(unsafe.Sizeof(proc_k)) {
return err
}
k, err := parse_kinfo_proc(buf)
if err != nil {
return err
}
p.ppid, p.pgrp, p.sid, p.binary = copy_params(&k)
return nil
}
func copy_params(k *Kinfo_proc) (int, int, int, string) {
n := -1
for i, b := range k.Ki_comm {
if b == 0 {
break
}
n = i + 1
}
comm := string(k.Ki_comm[:n])
return int(k.Ki_ppid), int(k.Ki_pgid), int(k.Ki_sid), comm
}
func findProcess(pid int) (Process, error) {
mib := []int32{CTL_KERN, KERN_PROC, KERN_PROC_PATHNAME, int32(pid)}
_, _, err := call_syscall(mib)
if err != nil {
return nil, err
}
return newUnixProcess(pid)
}
func processes() ([]Process, error) {
results := make([]Process, 0, 50)
mib := []int32{CTL_KERN, KERN_PROC, KERN_PROC_PROC, 0}
buf, length, err := call_syscall(mib)
if err != nil {
return results, err
}
// get kinfo_proc size
k := Kinfo_proc{}
procinfo_len := int(unsafe.Sizeof(k))
count := int(length / uint64(procinfo_len))
// parse buf to procs
for i := 0; i < count; i++ {
b := buf[i*procinfo_len : i*procinfo_len+procinfo_len]
k, err := parse_kinfo_proc(b)
if err != nil {
continue
}
p, err := newUnixProcess(int(k.Ki_pid))
if err != nil {
continue
}
p.ppid, p.pgrp, p.sid, p.binary = copy_params(&k)
results = append(results, p)
}
return results, nil
}
func parse_kinfo_proc(buf []byte) (Kinfo_proc, error) {
var k Kinfo_proc
br := bytes.NewReader(buf)
err := binary.Read(br, binary.LittleEndian, &k)
if err != nil {
return k, err
}
return k, nil
}
func call_syscall(mib []int32) ([]byte, uint64, error) {
miblen := uint64(len(mib))
// get required buffer size
length := uint64(0)
_, _, err := syscall.RawSyscall6(
syscall.SYS___SYSCTL,
uintptr(unsafe.Pointer(&mib[0])),
uintptr(miblen),
0,
uintptr(unsafe.Pointer(&length)),
0,
0)
if err != 0 {
b := make([]byte, 0)
return b, length, err
}
if length == 0 {
b := make([]byte, 0)
return b, length, err
}
// get proc info itself
buf := make([]byte, length)
_, _, err = syscall.RawSyscall6(
syscall.SYS___SYSCTL,
uintptr(unsafe.Pointer(&mib[0])),
uintptr(miblen),
uintptr(unsafe.Pointer(&buf[0])),
uintptr(unsafe.Pointer(&length)),
0,
0)
if err != 0 {
return buf, length, err
}
return buf, length, nil
}
func newUnixProcess(pid int) (*UnixProcess, error) {
p := &UnixProcess{pid: pid}
return p, p.Refresh()
}

View File

@ -1,129 +0,0 @@
// +build linux
package ps
import (
"fmt"
"io"
"io/ioutil"
"os"
"strconv"
"strings"
)
// UnixProcess is an implementation of Process that contains Unix-specific
// fields and information.
type UnixProcess struct {
pid int
ppid int
state rune
pgrp int
sid int
binary string
}
func (p *UnixProcess) Pid() int {
return p.pid
}
func (p *UnixProcess) PPid() int {
return p.ppid
}
func (p *UnixProcess) Executable() string {
return p.binary
}
// Refresh reloads all the data associated with this process.
func (p *UnixProcess) Refresh() error {
statPath := fmt.Sprintf("/proc/%d/stat", p.pid)
dataBytes, err := ioutil.ReadFile(statPath)
if err != nil {
return err
}
// First, parse out the image name
data := string(dataBytes)
binStart := strings.IndexRune(data, '(') + 1
binEnd := strings.IndexRune(data[binStart:], ')')
p.binary = data[binStart : binStart+binEnd]
// Move past the image name and start parsing the rest
data = data[binStart+binEnd+2:]
_, err = fmt.Sscanf(data,
"%c %d %d %d",
&p.state,
&p.ppid,
&p.pgrp,
&p.sid)
return err
}
func findProcess(pid int) (Process, error) {
dir := fmt.Sprintf("/proc/%d", pid)
_, err := os.Stat(dir)
if err != nil {
if os.IsNotExist(err) {
return nil, nil
}
return nil, err
}
return newUnixProcess(pid)
}
func processes() ([]Process, error) {
d, err := os.Open("/proc")
if err != nil {
return nil, err
}
defer d.Close()
results := make([]Process, 0, 50)
for {
fis, err := d.Readdir(10)
if err == io.EOF {
break
}
if err != nil {
return nil, err
}
for _, fi := range fis {
// We only care about directories, since all pids are dirs
if !fi.IsDir() {
continue
}
// We only care if the name starts with a numeric
name := fi.Name()
if name[0] < '0' || name[0] > '9' {
continue
}
// From this point forward, any errors we just ignore, because
// it might simply be that the process doesn't exist anymore.
pid, err := strconv.ParseInt(name, 10, 0)
if err != nil {
continue
}
p, err := newUnixProcess(int(pid))
if err != nil {
continue
}
results = append(results, p)
}
}
return results, nil
}
func newUnixProcess(pid int) (*UnixProcess, error) {
p := &UnixProcess{pid: pid}
return p, p.Refresh()
}

View File

@ -1,119 +0,0 @@
// +build windows
package ps
import (
"fmt"
"syscall"
"unsafe"
)
// Windows API functions
var (
modKernel32 = syscall.NewLazyDLL("kernel32.dll")
procCloseHandle = modKernel32.NewProc("CloseHandle")
procCreateToolhelp32Snapshot = modKernel32.NewProc("CreateToolhelp32Snapshot")
procProcess32First = modKernel32.NewProc("Process32FirstW")
procProcess32Next = modKernel32.NewProc("Process32NextW")
)
// Some constants from the Windows API
const (
ERROR_NO_MORE_FILES = 0x12
MAX_PATH = 260
)
// PROCESSENTRY32 is the Windows API structure that contains a process's
// information.
type PROCESSENTRY32 struct {
Size uint32
CntUsage uint32
ProcessID uint32
DefaultHeapID uintptr
ModuleID uint32
CntThreads uint32
ParentProcessID uint32
PriorityClassBase int32
Flags uint32
ExeFile [MAX_PATH]uint16
}
// WindowsProcess is an implementation of Process for Windows.
type WindowsProcess struct {
pid int
ppid int
exe string
}
func (p *WindowsProcess) Pid() int {
return p.pid
}
func (p *WindowsProcess) PPid() int {
return p.ppid
}
func (p *WindowsProcess) Executable() string {
return p.exe
}
func newWindowsProcess(e *PROCESSENTRY32) *WindowsProcess {
// Find when the string ends for decoding
end := 0
for {
if e.ExeFile[end] == 0 {
break
}
end++
}
return &WindowsProcess{
pid: int(e.ProcessID),
ppid: int(e.ParentProcessID),
exe: syscall.UTF16ToString(e.ExeFile[:end]),
}
}
func findProcess(pid int) (Process, error) {
ps, err := processes()
if err != nil {
return nil, err
}
for _, p := range ps {
if p.Pid() == pid {
return p, nil
}
}
return nil, nil
}
func processes() ([]Process, error) {
handle, _, _ := procCreateToolhelp32Snapshot.Call(
0x00000002,
0)
if handle < 0 {
return nil, syscall.GetLastError()
}
defer procCloseHandle.Call(handle)
var entry PROCESSENTRY32
entry.Size = uint32(unsafe.Sizeof(entry))
ret, _, _ := procProcess32First.Call(handle, uintptr(unsafe.Pointer(&entry)))
if ret == 0 {
return nil, fmt.Errorf("Error retrieving process info.")
}
results := make([]Process, 0, 50)
for {
results = append(results, newWindowsProcess(&entry))
ret, _, _ := procProcess32Next.Call(handle, uintptr(unsafe.Pointer(&entry)))
if ret == 0 {
break
}
}
return results, nil
}

View File

@ -1,61 +0,0 @@
gopsutil is distributed under BSD license reproduced below.
Copyright (c) 2014, WAKAYAMA Shirou
All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of the gopsutil authors nor the names of its contributors
may be used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-------
internal/common/binary.go in the gopsutil is copied and modifid from golang/encoding/binary.go.
Copyright (c) 2009 The Go Authors. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View File

@ -1,93 +0,0 @@
package cpu
import (
"encoding/json"
"runtime"
"strconv"
"strings"
"sync"
"github.com/shirou/gopsutil/internal/common"
)
type TimesStat struct {
CPU string `json:"cpu"`
User float64 `json:"user"`
System float64 `json:"system"`
Idle float64 `json:"idle"`
Nice float64 `json:"nice"`
Iowait float64 `json:"iowait"`
Irq float64 `json:"irq"`
Softirq float64 `json:"softirq"`
Steal float64 `json:"steal"`
Guest float64 `json:"guest"`
GuestNice float64 `json:"guestNice"`
Stolen float64 `json:"stolen"`
}
type InfoStat struct {
CPU int32 `json:"cpu"`
VendorID string `json:"vendorId"`
Family string `json:"family"`
Model string `json:"model"`
Stepping int32 `json:"stepping"`
PhysicalID string `json:"physicalId"`
CoreID string `json:"coreId"`
Cores int32 `json:"cores"`
ModelName string `json:"modelName"`
Mhz float64 `json:"mhz"`
CacheSize int32 `json:"cacheSize"`
Flags []string `json:"flags"`
}
type lastPercent struct {
sync.Mutex
lastCPUTimes []TimesStat
lastPerCPUTimes []TimesStat
}
var lastCPUPercent lastPercent
var invoke common.Invoker
func init() {
invoke = common.Invoke{}
lastCPUPercent.Lock()
lastCPUPercent.lastCPUTimes, _ = Times(false)
lastCPUPercent.lastPerCPUTimes, _ = Times(true)
lastCPUPercent.Unlock()
}
func Counts(logical bool) (int, error) {
return runtime.NumCPU(), nil
}
func (c TimesStat) String() string {
v := []string{
`"cpu":"` + c.CPU + `"`,
`"user":` + strconv.FormatFloat(c.User, 'f', 1, 64),
`"system":` + strconv.FormatFloat(c.System, 'f', 1, 64),
`"idle":` + strconv.FormatFloat(c.Idle, 'f', 1, 64),
`"nice":` + strconv.FormatFloat(c.Nice, 'f', 1, 64),
`"iowait":` + strconv.FormatFloat(c.Iowait, 'f', 1, 64),
`"irq":` + strconv.FormatFloat(c.Irq, 'f', 1, 64),
`"softirq":` + strconv.FormatFloat(c.Softirq, 'f', 1, 64),
`"steal":` + strconv.FormatFloat(c.Steal, 'f', 1, 64),
`"guest":` + strconv.FormatFloat(c.Guest, 'f', 1, 64),
`"guestNice":` + strconv.FormatFloat(c.GuestNice, 'f', 1, 64),
`"stolen":` + strconv.FormatFloat(c.Stolen, 'f', 1, 64),
}
return `{` + strings.Join(v, ",") + `}`
}
// Total returns the total number of seconds in a CPUTimesStat
func (c TimesStat) Total() float64 {
total := c.User + c.System + c.Nice + c.Iowait + c.Irq + c.Softirq + c.Steal +
c.Guest + c.GuestNice + c.Idle + c.Stolen
return total
}
func (c InfoStat) String() string {
s, _ := json.Marshal(c)
return string(s)
}

View File

@ -1,106 +0,0 @@
// +build darwin
package cpu
import (
"os/exec"
"strconv"
"strings"
)
// sys/resource.h
const (
CPUser = 0
CPNice = 1
CPSys = 2
CPIntr = 3
CPIdle = 4
CPUStates = 5
)
// default value. from time.h
var ClocksPerSec = float64(128)
func Times(percpu bool) ([]TimesStat, error) {
if percpu {
return perCPUTimes()
}
return allCPUTimes()
}
// Returns only one CPUInfoStat on FreeBSD
func Info() ([]InfoStat, error) {
var ret []InfoStat
sysctl, err := exec.LookPath("/usr/sbin/sysctl")
if err != nil {
return ret, err
}
out, err := invoke.Command(sysctl, "machdep.cpu")
if err != nil {
return ret, err
}
c := InfoStat{}
for _, line := range strings.Split(string(out), "\n") {
values := strings.Fields(line)
if len(values) < 1 {
continue
}
t, err := strconv.ParseInt(values[1], 10, 64)
// err is not checked here because some value is string.
if strings.HasPrefix(line, "machdep.cpu.brand_string") {
c.ModelName = strings.Join(values[1:], " ")
} else if strings.HasPrefix(line, "machdep.cpu.family") {
c.Family = values[1]
} else if strings.HasPrefix(line, "machdep.cpu.model") {
c.Model = values[1]
} else if strings.HasPrefix(line, "machdep.cpu.stepping") {
if err != nil {
return ret, err
}
c.Stepping = int32(t)
} else if strings.HasPrefix(line, "machdep.cpu.features") {
for _, v := range values[1:] {
c.Flags = append(c.Flags, strings.ToLower(v))
}
} else if strings.HasPrefix(line, "machdep.cpu.leaf7_features") {
for _, v := range values[1:] {
c.Flags = append(c.Flags, strings.ToLower(v))
}
} else if strings.HasPrefix(line, "machdep.cpu.extfeatures") {
for _, v := range values[1:] {
c.Flags = append(c.Flags, strings.ToLower(v))
}
} else if strings.HasPrefix(line, "machdep.cpu.core_count") {
if err != nil {
return ret, err
}
c.Cores = int32(t)
} else if strings.HasPrefix(line, "machdep.cpu.cache.size") {
if err != nil {
return ret, err
}
c.CacheSize = int32(t)
} else if strings.HasPrefix(line, "machdep.cpu.vendor") {
c.VendorID = values[1]
}
}
// Use the rated frequency of the CPU. This is a static value and does not
// account for low power or Turbo Boost modes.
out, err = invoke.Command(sysctl, "hw.cpufrequency")
if err != nil {
return ret, err
}
values := strings.Fields(string(out))
mhz, err := strconv.ParseFloat(values[1], 64)
if err != nil {
return ret, err
}
c.Mhz = mhz / 1000000.0
return append(ret, c), nil
}

View File

@ -1,109 +0,0 @@
// +build darwin
// +build cgo
package cpu
/*
#include <stdlib.h>
#include <sys/sysctl.h>
#include <sys/mount.h>
#include <mach/mach_init.h>
#include <mach/mach_host.h>
#include <mach/host_info.h>
#if TARGET_OS_MAC
#include <libproc.h>
#endif
#include <mach/processor_info.h>
#include <mach/vm_map.h>
*/
import "C"
import (
"bytes"
"encoding/binary"
"fmt"
"unsafe"
)
// these CPU times for darwin is borrowed from influxdb/telegraf.
func perCPUTimes() ([]TimesStat, error) {
var (
count C.mach_msg_type_number_t
cpuload *C.processor_cpu_load_info_data_t
ncpu C.natural_t
)
status := C.host_processor_info(C.host_t(C.mach_host_self()),
C.PROCESSOR_CPU_LOAD_INFO,
&ncpu,
(*C.processor_info_array_t)(unsafe.Pointer(&cpuload)),
&count)
if status != C.KERN_SUCCESS {
return nil, fmt.Errorf("host_processor_info error=%d", status)
}
// jump through some cgo casting hoops and ensure we properly free
// the memory that cpuload points to
target := C.vm_map_t(C.mach_task_self_)
address := C.vm_address_t(uintptr(unsafe.Pointer(cpuload)))
defer C.vm_deallocate(target, address, C.vm_size_t(ncpu))
// the body of struct processor_cpu_load_info
// aka processor_cpu_load_info_data_t
var cpu_ticks [C.CPU_STATE_MAX]uint32
// copy the cpuload array to a []byte buffer
// where we can binary.Read the data
size := int(ncpu) * binary.Size(cpu_ticks)
buf := C.GoBytes(unsafe.Pointer(cpuload), C.int(size))
bbuf := bytes.NewBuffer(buf)
var ret []TimesStat
for i := 0; i < int(ncpu); i++ {
err := binary.Read(bbuf, binary.LittleEndian, &cpu_ticks)
if err != nil {
return nil, err
}
c := TimesStat{
CPU: fmt.Sprintf("cpu%d", i),
User: float64(cpu_ticks[C.CPU_STATE_USER]) / ClocksPerSec,
System: float64(cpu_ticks[C.CPU_STATE_SYSTEM]) / ClocksPerSec,
Nice: float64(cpu_ticks[C.CPU_STATE_NICE]) / ClocksPerSec,
Idle: float64(cpu_ticks[C.CPU_STATE_IDLE]) / ClocksPerSec,
}
ret = append(ret, c)
}
return ret, nil
}
func allCPUTimes() ([]TimesStat, error) {
var count C.mach_msg_type_number_t = C.HOST_CPU_LOAD_INFO_COUNT
var cpuload C.host_cpu_load_info_data_t
status := C.host_statistics(C.host_t(C.mach_host_self()),
C.HOST_CPU_LOAD_INFO,
C.host_info_t(unsafe.Pointer(&cpuload)),
&count)
if status != C.KERN_SUCCESS {
return nil, fmt.Errorf("host_statistics error=%d", status)
}
c := TimesStat{
CPU: "cpu-total",
User: float64(cpuload.cpu_ticks[C.CPU_STATE_USER]) / ClocksPerSec,
System: float64(cpuload.cpu_ticks[C.CPU_STATE_SYSTEM]) / ClocksPerSec,
Nice: float64(cpuload.cpu_ticks[C.CPU_STATE_NICE]) / ClocksPerSec,
Idle: float64(cpuload.cpu_ticks[C.CPU_STATE_IDLE]) / ClocksPerSec,
}
return []TimesStat{c}, nil
}

View File

@ -1,14 +0,0 @@
// +build darwin
// +build !cgo
package cpu
import "github.com/shirou/gopsutil/internal/common"
func perCPUTimes() ([]TimesStat, error) {
return []TimesStat{}, common.ErrNotImplementedError
}
func allCPUTimes() ([]TimesStat, error) {
return []TimesStat{}, common.ErrNotImplementedError
}

View File

@ -1,154 +0,0 @@
package cpu
import (
"fmt"
"os/exec"
"regexp"
"strconv"
"strings"
"github.com/shirou/gopsutil/internal/common"
)
// sys/resource.h
const (
CPUser = 0
CPNice = 1
CPSys = 2
CPIntr = 3
CPIdle = 4
CPUStates = 5
)
var ClocksPerSec = float64(128)
func init() {
getconf, err := exec.LookPath("/usr/bin/getconf")
if err != nil {
return
}
out, err := invoke.Command(getconf, "CLK_TCK")
// ignore errors
if err == nil {
i, err := strconv.ParseFloat(strings.TrimSpace(string(out)), 64)
if err == nil {
ClocksPerSec = float64(i)
}
}
}
func Times(percpu bool) ([]TimesStat, error) {
var ret []TimesStat
var sysctlCall string
var ncpu int
if percpu {
sysctlCall = "kern.cp_times"
ncpu, _ = Counts(true)
} else {
sysctlCall = "kern.cp_time"
ncpu = 1
}
cpuTimes, err := common.DoSysctrl(sysctlCall)
if err != nil {
return ret, err
}
for i := 0; i < ncpu; i++ {
offset := CPUStates * i
user, err := strconv.ParseFloat(cpuTimes[CPUser+offset], 64)
if err != nil {
return ret, err
}
nice, err := strconv.ParseFloat(cpuTimes[CPNice+offset], 64)
if err != nil {
return ret, err
}
sys, err := strconv.ParseFloat(cpuTimes[CPSys+offset], 64)
if err != nil {
return ret, err
}
idle, err := strconv.ParseFloat(cpuTimes[CPIdle+offset], 64)
if err != nil {
return ret, err
}
intr, err := strconv.ParseFloat(cpuTimes[CPIntr+offset], 64)
if err != nil {
return ret, err
}
c := TimesStat{
User: float64(user / ClocksPerSec),
Nice: float64(nice / ClocksPerSec),
System: float64(sys / ClocksPerSec),
Idle: float64(idle / ClocksPerSec),
Irq: float64(intr / ClocksPerSec),
}
if !percpu {
c.CPU = "cpu-total"
} else {
c.CPU = fmt.Sprintf("cpu%d", i)
}
ret = append(ret, c)
}
return ret, nil
}
// Returns only one InfoStat on FreeBSD. The information regarding core
// count, however is accurate and it is assumed that all InfoStat attributes
// are the same across CPUs.
func Info() ([]InfoStat, error) {
const dmesgBoot = "/var/run/dmesg.boot"
lines, _ := common.ReadLines(dmesgBoot)
c := InfoStat{}
var vals []string
var err error
if vals, err = common.DoSysctrl("hw.clockrate"); err != nil {
return nil, err
}
if c.Mhz, err = strconv.ParseFloat(vals[0], 64); err != nil {
return nil, fmt.Errorf("Unable to parse FreeBSD CPU clock rate: %v", err)
}
c.CPU = int32(c.Mhz)
if vals, err = common.DoSysctrl("hw.ncpu"); err != nil {
return nil, err
}
var i64 int64
if i64, err = strconv.ParseInt(vals[0], 10, 32); err != nil {
return nil, fmt.Errorf("Unable to parse FreeBSD cores: %v", err)
}
c.Cores = int32(i64)
if vals, err = common.DoSysctrl("hw.model"); err != nil {
return nil, err
}
c.ModelName = strings.Join(vals, " ")
for _, line := range lines {
if matches := regexp.MustCompile(`Origin\s*=\s*"(.+)"\s+Id\s*=\s*(.+)\s+Family\s*=\s*(.+)\s+Model\s*=\s*(.+)\s+Stepping\s*=\s*(.+)`).FindStringSubmatch(line); matches != nil {
c.VendorID = matches[1]
c.Family = matches[3]
c.Model = matches[4]
t, err := strconv.ParseInt(matches[5], 10, 32)
if err != nil {
return nil, fmt.Errorf("Unable to parse FreeBSD CPU stepping information from %q: %v", line, err)
}
c.Stepping = int32(t)
} else if matches := regexp.MustCompile(`Features=.+<(.+)>`).FindStringSubmatch(line); matches != nil {
for _, v := range strings.Split(matches[1], ",") {
c.Flags = append(c.Flags, strings.ToLower(v))
}
} else if matches := regexp.MustCompile(`Features2=[a-f\dx]+<(.+)>`).FindStringSubmatch(line); matches != nil {
for _, v := range strings.Split(matches[1], ",") {
c.Flags = append(c.Flags, strings.ToLower(v))
}
}
}
return []InfoStat{c}, nil
}

View File

@ -1,244 +0,0 @@
// +build linux
package cpu
import (
"errors"
"fmt"
"os/exec"
"strconv"
"strings"
"github.com/shirou/gopsutil/internal/common"
)
var cpu_tick = float64(100)
func init() {
getconf, err := exec.LookPath("/usr/bin/getconf")
if err != nil {
return
}
out, err := invoke.Command(getconf, "CLK_TCK")
// ignore errors
if err == nil {
i, err := strconv.ParseFloat(strings.TrimSpace(string(out)), 64)
if err == nil {
cpu_tick = float64(i)
}
}
}
func Times(percpu bool) ([]TimesStat, error) {
filename := common.HostProc("stat")
var lines = []string{}
if percpu {
var startIdx uint = 1
for {
linen, _ := common.ReadLinesOffsetN(filename, startIdx, 1)
line := linen[0]
if !strings.HasPrefix(line, "cpu") {
break
}
lines = append(lines, line)
startIdx++
}
} else {
lines, _ = common.ReadLinesOffsetN(filename, 0, 1)
}
ret := make([]TimesStat, 0, len(lines))
for _, line := range lines {
ct, err := parseStatLine(line)
if err != nil {
continue
}
ret = append(ret, *ct)
}
return ret, nil
}
func sysCPUPath(cpu int32, relPath string) string {
return common.HostSys(fmt.Sprintf("devices/system/cpu/cpu%d", cpu), relPath)
}
func finishCPUInfo(c *InfoStat) error {
if c.Mhz == 0 {
lines, err := common.ReadLines(sysCPUPath(c.CPU, "cpufreq/cpuinfo_max_freq"))
if err == nil {
value, err := strconv.ParseFloat(lines[0], 64)
if err != nil {
return err
}
c.Mhz = value
}
}
if len(c.CoreID) == 0 {
lines, err := common.ReadLines(sysCPUPath(c.CPU, "topology/coreId"))
if err == nil {
c.CoreID = lines[0]
}
}
return nil
}
// CPUInfo on linux will return 1 item per physical thread.
//
// CPUs have three levels of counting: sockets, cores, threads.
// Cores with HyperThreading count as having 2 threads per core.
// Sockets often come with many physical CPU cores.
// For example a single socket board with two cores each with HT will
// return 4 CPUInfoStat structs on Linux and the "Cores" field set to 1.
func Info() ([]InfoStat, error) {
filename := common.HostProc("cpuinfo")
lines, _ := common.ReadLines(filename)
var ret []InfoStat
c := InfoStat{CPU: -1, Cores: 1}
for _, line := range lines {
fields := strings.Split(line, ":")
if len(fields) < 2 {
continue
}
key := strings.TrimSpace(fields[0])
value := strings.TrimSpace(fields[1])
switch key {
case "processor":
if c.CPU >= 0 {
err := finishCPUInfo(&c)
if err != nil {
return ret, err
}
ret = append(ret, c)
}
c = InfoStat{Cores: 1}
t, err := strconv.ParseInt(value, 10, 64)
if err != nil {
return ret, err
}
c.CPU = int32(t)
case "vendorId", "vendor_id":
c.VendorID = value
case "cpu family":
c.Family = value
case "model":
c.Model = value
case "model name":
c.ModelName = value
case "stepping":
t, err := strconv.ParseInt(value, 10, 64)
if err != nil {
return ret, err
}
c.Stepping = int32(t)
case "cpu MHz":
t, err := strconv.ParseFloat(value, 64)
if err != nil {
return ret, err
}
c.Mhz = t
case "cache size":
t, err := strconv.ParseInt(strings.Replace(value, " KB", "", 1), 10, 64)
if err != nil {
return ret, err
}
c.CacheSize = int32(t)
case "physical id":
c.PhysicalID = value
case "core id":
c.CoreID = value
case "flags", "Features":
c.Flags = strings.FieldsFunc(value, func(r rune) bool {
return r == ',' || r == ' '
})
}
}
if c.CPU >= 0 {
err := finishCPUInfo(&c)
if err != nil {
return ret, err
}
ret = append(ret, c)
}
return ret, nil
}
func parseStatLine(line string) (*TimesStat, error) {
fields := strings.Fields(line)
if strings.HasPrefix(fields[0], "cpu") == false {
// return CPUTimesStat{}, e
return nil, errors.New("not contain cpu")
}
cpu := fields[0]
if cpu == "cpu" {
cpu = "cpu-total"
}
user, err := strconv.ParseFloat(fields[1], 64)
if err != nil {
return nil, err
}
nice, err := strconv.ParseFloat(fields[2], 64)
if err != nil {
return nil, err
}
system, err := strconv.ParseFloat(fields[3], 64)
if err != nil {
return nil, err
}
idle, err := strconv.ParseFloat(fields[4], 64)
if err != nil {
return nil, err
}
iowait, err := strconv.ParseFloat(fields[5], 64)
if err != nil {
return nil, err
}
irq, err := strconv.ParseFloat(fields[6], 64)
if err != nil {
return nil, err
}
softirq, err := strconv.ParseFloat(fields[7], 64)
if err != nil {
return nil, err
}
ct := &TimesStat{
CPU: cpu,
User: float64(user) / cpu_tick,
Nice: float64(nice) / cpu_tick,
System: float64(system) / cpu_tick,
Idle: float64(idle) / cpu_tick,
Iowait: float64(iowait) / cpu_tick,
Irq: float64(irq) / cpu_tick,
Softirq: float64(softirq) / cpu_tick,
}
if len(fields) > 8 { // Linux >= 2.6.11
steal, err := strconv.ParseFloat(fields[8], 64)
if err != nil {
return nil, err
}
ct.Steal = float64(steal) / cpu_tick
}
if len(fields) > 9 { // Linux >= 2.6.24
guest, err := strconv.ParseFloat(fields[9], 64)
if err != nil {
return nil, err
}
ct.Guest = float64(guest) / cpu_tick
}
if len(fields) > 10 { // Linux >= 3.2.0
guestNice, err := strconv.ParseFloat(fields[10], 64)
if err != nil {
return nil, err
}
ct.GuestNice = float64(guestNice) / cpu_tick
}
return ct, nil
}

Some files were not shown because too many files have changed in this diff Show More