Merge pull request #10066 from hashicorp/b-nomad
vendor: patch Nomad for cross compilability
This commit is contained in:
commit
f4cf443368
|
@ -1,579 +0,0 @@
|
|||
package client
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/go-multierror"
|
||||
"github.com/hashicorp/nomad/client/allocdir"
|
||||
"github.com/hashicorp/nomad/client/config"
|
||||
"github.com/hashicorp/nomad/client/driver"
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
|
||||
cstructs "github.com/hashicorp/nomad/client/structs"
|
||||
)
|
||||
|
||||
const (
|
||||
// taskReceivedSyncLimit is how long the client will wait before sending
|
||||
// that a task was received to the server. The client does not immediately
|
||||
// send that the task was received to the server because another transition
|
||||
// to running or failed is likely to occur immediately after and a single
|
||||
// update will transfer all past state information. If not other transition
|
||||
// has occurred up to this limit, we will send to the server.
|
||||
taskReceivedSyncLimit = 30 * time.Second
|
||||
)
|
||||
|
||||
// AllocStateUpdater is used to update the status of an allocation
|
||||
type AllocStateUpdater func(alloc *structs.Allocation)
|
||||
|
||||
type AllocStatsReporter interface {
|
||||
LatestAllocStats(taskFilter string) (*cstructs.AllocResourceUsage, error)
|
||||
}
|
||||
|
||||
// AllocRunner is used to wrap an allocation and provide the execution context.
|
||||
type AllocRunner struct {
|
||||
config *config.Config
|
||||
updater AllocStateUpdater
|
||||
logger *log.Logger
|
||||
|
||||
alloc *structs.Allocation
|
||||
allocClientStatus string // Explicit status of allocation. Set when there are failures
|
||||
allocClientDescription string
|
||||
allocLock sync.Mutex
|
||||
|
||||
dirtyCh chan struct{}
|
||||
|
||||
ctx *driver.ExecContext
|
||||
ctxLock sync.Mutex
|
||||
tasks map[string]*TaskRunner
|
||||
taskStates map[string]*structs.TaskState
|
||||
restored map[string]struct{}
|
||||
taskLock sync.RWMutex
|
||||
|
||||
taskStatusLock sync.RWMutex
|
||||
|
||||
updateCh chan *structs.Allocation
|
||||
|
||||
destroy bool
|
||||
destroyCh chan struct{}
|
||||
destroyLock sync.Mutex
|
||||
waitCh chan struct{}
|
||||
}
|
||||
|
||||
// allocRunnerState is used to snapshot the state of the alloc runner
|
||||
type allocRunnerState struct {
|
||||
Version string
|
||||
Alloc *structs.Allocation
|
||||
AllocClientStatus string
|
||||
AllocClientDescription string
|
||||
TaskStates map[string]*structs.TaskState
|
||||
Context *driver.ExecContext
|
||||
}
|
||||
|
||||
// NewAllocRunner is used to create a new allocation context
|
||||
func NewAllocRunner(logger *log.Logger, config *config.Config, updater AllocStateUpdater,
|
||||
alloc *structs.Allocation) *AllocRunner {
|
||||
ar := &AllocRunner{
|
||||
config: config,
|
||||
updater: updater,
|
||||
logger: logger,
|
||||
alloc: alloc,
|
||||
dirtyCh: make(chan struct{}, 1),
|
||||
tasks: make(map[string]*TaskRunner),
|
||||
taskStates: copyTaskStates(alloc.TaskStates),
|
||||
restored: make(map[string]struct{}),
|
||||
updateCh: make(chan *structs.Allocation, 64),
|
||||
destroyCh: make(chan struct{}),
|
||||
waitCh: make(chan struct{}),
|
||||
}
|
||||
return ar
|
||||
}
|
||||
|
||||
// stateFilePath returns the path to our state file
|
||||
func (r *AllocRunner) stateFilePath() string {
|
||||
r.allocLock.Lock()
|
||||
defer r.allocLock.Unlock()
|
||||
path := filepath.Join(r.config.StateDir, "alloc", r.alloc.ID, "state.json")
|
||||
return path
|
||||
}
|
||||
|
||||
// RestoreState is used to restore the state of the alloc runner
|
||||
func (r *AllocRunner) RestoreState() error {
|
||||
// Load the snapshot
|
||||
var snap allocRunnerState
|
||||
if err := restoreState(r.stateFilePath(), &snap); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Restore fields
|
||||
r.alloc = snap.Alloc
|
||||
r.ctx = snap.Context
|
||||
r.allocClientStatus = snap.AllocClientStatus
|
||||
r.allocClientDescription = snap.AllocClientDescription
|
||||
r.taskStates = snap.TaskStates
|
||||
|
||||
var snapshotErrors multierror.Error
|
||||
if r.alloc == nil {
|
||||
snapshotErrors.Errors = append(snapshotErrors.Errors, fmt.Errorf("alloc_runner snapshot includes a nil allocation"))
|
||||
}
|
||||
if r.ctx == nil {
|
||||
snapshotErrors.Errors = append(snapshotErrors.Errors, fmt.Errorf("alloc_runner snapshot includes a nil context"))
|
||||
}
|
||||
if e := snapshotErrors.ErrorOrNil(); e != nil {
|
||||
return e
|
||||
}
|
||||
|
||||
// Restore the task runners
|
||||
var mErr multierror.Error
|
||||
for name, state := range r.taskStates {
|
||||
// Mark the task as restored.
|
||||
r.restored[name] = struct{}{}
|
||||
|
||||
task := &structs.Task{Name: name}
|
||||
tr := NewTaskRunner(r.logger, r.config, r.setTaskState, r.ctx, r.Alloc(),
|
||||
task)
|
||||
r.tasks[name] = tr
|
||||
|
||||
// Skip tasks in terminal states.
|
||||
if state.State == structs.TaskStateDead {
|
||||
continue
|
||||
}
|
||||
|
||||
if err := tr.RestoreState(); err != nil {
|
||||
r.logger.Printf("[ERR] client: failed to restore state for alloc %s task '%s': %v", r.alloc.ID, name, err)
|
||||
mErr.Errors = append(mErr.Errors, err)
|
||||
} else if !r.alloc.TerminalStatus() {
|
||||
// Only start if the alloc isn't in a terminal status.
|
||||
go tr.Run()
|
||||
}
|
||||
}
|
||||
return mErr.ErrorOrNil()
|
||||
}
|
||||
|
||||
// SaveState is used to snapshot the state of the alloc runner
|
||||
// if the fullSync is marked as false only the state of the Alloc Runner
|
||||
// is snapshotted. If fullSync is marked as true, we snapshot
|
||||
// all the Task Runners associated with the Alloc
|
||||
func (r *AllocRunner) SaveState() error {
|
||||
if err := r.saveAllocRunnerState(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Save state for each task
|
||||
runners := r.getTaskRunners()
|
||||
var mErr multierror.Error
|
||||
for _, tr := range runners {
|
||||
if err := r.saveTaskRunnerState(tr); err != nil {
|
||||
mErr.Errors = append(mErr.Errors, err)
|
||||
}
|
||||
}
|
||||
return mErr.ErrorOrNil()
|
||||
}
|
||||
|
||||
func (r *AllocRunner) saveAllocRunnerState() error {
|
||||
// Create the snapshot.
|
||||
r.taskStatusLock.RLock()
|
||||
states := copyTaskStates(r.taskStates)
|
||||
r.taskStatusLock.RUnlock()
|
||||
|
||||
alloc := r.Alloc()
|
||||
r.allocLock.Lock()
|
||||
allocClientStatus := r.allocClientStatus
|
||||
allocClientDescription := r.allocClientDescription
|
||||
r.allocLock.Unlock()
|
||||
|
||||
r.ctxLock.Lock()
|
||||
ctx := r.ctx
|
||||
r.ctxLock.Unlock()
|
||||
|
||||
snap := allocRunnerState{
|
||||
Version: r.config.Version,
|
||||
Alloc: alloc,
|
||||
Context: ctx,
|
||||
AllocClientStatus: allocClientStatus,
|
||||
AllocClientDescription: allocClientDescription,
|
||||
TaskStates: states,
|
||||
}
|
||||
return persistState(r.stateFilePath(), &snap)
|
||||
}
|
||||
|
||||
func (r *AllocRunner) saveTaskRunnerState(tr *TaskRunner) error {
|
||||
if err := tr.SaveState(); err != nil {
|
||||
return fmt.Errorf("failed to save state for alloc %s task '%s': %v",
|
||||
r.alloc.ID, tr.task.Name, err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// DestroyState is used to cleanup after ourselves
|
||||
func (r *AllocRunner) DestroyState() error {
|
||||
return os.RemoveAll(filepath.Dir(r.stateFilePath()))
|
||||
}
|
||||
|
||||
// DestroyContext is used to destroy the context
|
||||
func (r *AllocRunner) DestroyContext() error {
|
||||
return r.ctx.AllocDir.Destroy()
|
||||
}
|
||||
|
||||
// copyTaskStates returns a copy of the passed task states.
|
||||
func copyTaskStates(states map[string]*structs.TaskState) map[string]*structs.TaskState {
|
||||
copy := make(map[string]*structs.TaskState, len(states))
|
||||
for task, state := range states {
|
||||
copy[task] = state.Copy()
|
||||
}
|
||||
return copy
|
||||
}
|
||||
|
||||
// Alloc returns the associated allocation
|
||||
func (r *AllocRunner) Alloc() *structs.Allocation {
|
||||
r.allocLock.Lock()
|
||||
alloc := r.alloc.Copy()
|
||||
|
||||
// The status has explicitly been set.
|
||||
if r.allocClientStatus != "" || r.allocClientDescription != "" {
|
||||
alloc.ClientStatus = r.allocClientStatus
|
||||
alloc.ClientDescription = r.allocClientDescription
|
||||
r.allocLock.Unlock()
|
||||
return alloc
|
||||
}
|
||||
r.allocLock.Unlock()
|
||||
|
||||
// Scan the task states to determine the status of the alloc
|
||||
var pending, running, dead, failed bool
|
||||
r.taskStatusLock.RLock()
|
||||
alloc.TaskStates = copyTaskStates(r.taskStates)
|
||||
for _, state := range r.taskStates {
|
||||
switch state.State {
|
||||
case structs.TaskStateRunning:
|
||||
running = true
|
||||
case structs.TaskStatePending:
|
||||
pending = true
|
||||
case structs.TaskStateDead:
|
||||
if state.Failed() {
|
||||
failed = true
|
||||
} else {
|
||||
dead = true
|
||||
}
|
||||
}
|
||||
}
|
||||
r.taskStatusLock.RUnlock()
|
||||
|
||||
// Determine the alloc status
|
||||
if failed {
|
||||
alloc.ClientStatus = structs.AllocClientStatusFailed
|
||||
} else if running {
|
||||
alloc.ClientStatus = structs.AllocClientStatusRunning
|
||||
} else if pending {
|
||||
alloc.ClientStatus = structs.AllocClientStatusPending
|
||||
} else if dead {
|
||||
alloc.ClientStatus = structs.AllocClientStatusComplete
|
||||
}
|
||||
|
||||
return alloc
|
||||
}
|
||||
|
||||
// dirtySyncState is used to watch for state being marked dirty to sync
|
||||
func (r *AllocRunner) dirtySyncState() {
|
||||
for {
|
||||
select {
|
||||
case <-r.dirtyCh:
|
||||
r.syncStatus()
|
||||
case <-r.destroyCh:
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// syncStatus is used to run and sync the status when it changes
|
||||
func (r *AllocRunner) syncStatus() error {
|
||||
// Get a copy of our alloc, update status server side and sync to disk
|
||||
alloc := r.Alloc()
|
||||
r.updater(alloc)
|
||||
return r.saveAllocRunnerState()
|
||||
}
|
||||
|
||||
// setStatus is used to update the allocation status
|
||||
func (r *AllocRunner) setStatus(status, desc string) {
|
||||
r.allocLock.Lock()
|
||||
r.allocClientStatus = status
|
||||
r.allocClientDescription = desc
|
||||
r.allocLock.Unlock()
|
||||
select {
|
||||
case r.dirtyCh <- struct{}{}:
|
||||
default:
|
||||
}
|
||||
}
|
||||
|
||||
// setTaskState is used to set the status of a task
|
||||
func (r *AllocRunner) setTaskState(taskName, state string, event *structs.TaskEvent) {
|
||||
r.taskStatusLock.Lock()
|
||||
defer r.taskStatusLock.Unlock()
|
||||
taskState, ok := r.taskStates[taskName]
|
||||
if !ok {
|
||||
taskState = &structs.TaskState{}
|
||||
r.taskStates[taskName] = taskState
|
||||
}
|
||||
|
||||
// Set the tasks state.
|
||||
taskState.State = state
|
||||
r.appendTaskEvent(taskState, event)
|
||||
|
||||
// If the task failed, we should kill all the other tasks in the task group.
|
||||
if state == structs.TaskStateDead && taskState.Failed() {
|
||||
var destroyingTasks []string
|
||||
for task, tr := range r.tasks {
|
||||
if task != taskName {
|
||||
destroyingTasks = append(destroyingTasks, task)
|
||||
tr.Destroy()
|
||||
}
|
||||
}
|
||||
if len(destroyingTasks) > 0 {
|
||||
r.logger.Printf("[DEBUG] client: task %q failed, destroying other tasks in task group: %v", taskName, destroyingTasks)
|
||||
}
|
||||
}
|
||||
|
||||
select {
|
||||
case r.dirtyCh <- struct{}{}:
|
||||
default:
|
||||
}
|
||||
}
|
||||
|
||||
// appendTaskEvent updates the task status by appending the new event.
|
||||
func (r *AllocRunner) appendTaskEvent(state *structs.TaskState, event *structs.TaskEvent) {
|
||||
capacity := 10
|
||||
if state.Events == nil {
|
||||
state.Events = make([]*structs.TaskEvent, 0, capacity)
|
||||
}
|
||||
|
||||
// If we hit capacity, then shift it.
|
||||
if len(state.Events) == capacity {
|
||||
old := state.Events
|
||||
state.Events = make([]*structs.TaskEvent, 0, capacity)
|
||||
state.Events = append(state.Events, old[1:]...)
|
||||
}
|
||||
|
||||
state.Events = append(state.Events, event)
|
||||
}
|
||||
|
||||
// Run is a long running goroutine used to manage an allocation
|
||||
func (r *AllocRunner) Run() {
|
||||
defer close(r.waitCh)
|
||||
go r.dirtySyncState()
|
||||
|
||||
// Find the task group to run in the allocation
|
||||
alloc := r.alloc
|
||||
tg := alloc.Job.LookupTaskGroup(alloc.TaskGroup)
|
||||
if tg == nil {
|
||||
r.logger.Printf("[ERR] client: alloc '%s' for missing task group '%s'", alloc.ID, alloc.TaskGroup)
|
||||
r.setStatus(structs.AllocClientStatusFailed, fmt.Sprintf("missing task group '%s'", alloc.TaskGroup))
|
||||
return
|
||||
}
|
||||
|
||||
// Create the execution context
|
||||
r.ctxLock.Lock()
|
||||
if r.ctx == nil {
|
||||
allocDir := allocdir.NewAllocDir(filepath.Join(r.config.AllocDir, r.alloc.ID))
|
||||
if err := allocDir.Build(tg.Tasks); err != nil {
|
||||
r.logger.Printf("[WARN] client: failed to build task directories: %v", err)
|
||||
r.setStatus(structs.AllocClientStatusFailed, fmt.Sprintf("failed to build task dirs for '%s'", alloc.TaskGroup))
|
||||
r.ctxLock.Unlock()
|
||||
return
|
||||
}
|
||||
r.ctx = driver.NewExecContext(allocDir, r.alloc.ID)
|
||||
}
|
||||
r.ctxLock.Unlock()
|
||||
|
||||
// Check if the allocation is in a terminal status. In this case, we don't
|
||||
// start any of the task runners and directly wait for the destroy signal to
|
||||
// clean up the allocation.
|
||||
if alloc.TerminalStatus() {
|
||||
r.logger.Printf("[DEBUG] client: alloc %q in terminal status, waiting for destroy", r.alloc.ID)
|
||||
r.handleDestroy()
|
||||
r.logger.Printf("[DEBUG] client: terminating runner for alloc '%s'", r.alloc.ID)
|
||||
return
|
||||
}
|
||||
|
||||
// Start the task runners
|
||||
r.logger.Printf("[DEBUG] client: starting task runners for alloc '%s'", r.alloc.ID)
|
||||
r.taskLock.Lock()
|
||||
for _, task := range tg.Tasks {
|
||||
if _, ok := r.restored[task.Name]; ok {
|
||||
continue
|
||||
}
|
||||
|
||||
tr := NewTaskRunner(r.logger, r.config, r.setTaskState, r.ctx, r.Alloc(),
|
||||
task.Copy())
|
||||
r.tasks[task.Name] = tr
|
||||
tr.MarkReceived()
|
||||
go tr.Run()
|
||||
}
|
||||
r.taskLock.Unlock()
|
||||
|
||||
OUTER:
|
||||
// Wait for updates
|
||||
for {
|
||||
select {
|
||||
case update := <-r.updateCh:
|
||||
// Store the updated allocation.
|
||||
r.allocLock.Lock()
|
||||
r.alloc = update
|
||||
r.allocLock.Unlock()
|
||||
|
||||
// Check if we're in a terminal status
|
||||
if update.TerminalStatus() {
|
||||
break OUTER
|
||||
}
|
||||
|
||||
// Update the task groups
|
||||
runners := r.getTaskRunners()
|
||||
for _, tr := range runners {
|
||||
tr.Update(update)
|
||||
}
|
||||
case <-r.destroyCh:
|
||||
break OUTER
|
||||
}
|
||||
}
|
||||
|
||||
// Destroy each sub-task
|
||||
runners := r.getTaskRunners()
|
||||
for _, tr := range runners {
|
||||
tr.Destroy()
|
||||
}
|
||||
|
||||
// Wait for termination of the task runners
|
||||
for _, tr := range runners {
|
||||
<-tr.WaitCh()
|
||||
}
|
||||
|
||||
// Final state sync
|
||||
r.syncStatus()
|
||||
|
||||
// Block until we should destroy the state of the alloc
|
||||
r.handleDestroy()
|
||||
r.logger.Printf("[DEBUG] client: terminating runner for alloc '%s'", r.alloc.ID)
|
||||
}
|
||||
|
||||
// handleDestroy blocks till the AllocRunner should be destroyed and does the
|
||||
// necessary cleanup.
|
||||
func (r *AllocRunner) handleDestroy() {
|
||||
select {
|
||||
case <-r.destroyCh:
|
||||
if err := r.DestroyContext(); err != nil {
|
||||
r.logger.Printf("[ERR] client: failed to destroy context for alloc '%s': %v",
|
||||
r.alloc.ID, err)
|
||||
}
|
||||
if err := r.DestroyState(); err != nil {
|
||||
r.logger.Printf("[ERR] client: failed to destroy state for alloc '%s': %v",
|
||||
r.alloc.ID, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Update is used to update the allocation of the context
|
||||
func (r *AllocRunner) Update(update *structs.Allocation) {
|
||||
select {
|
||||
case r.updateCh <- update:
|
||||
default:
|
||||
r.logger.Printf("[ERR] client: dropping update to alloc '%s'", update.ID)
|
||||
}
|
||||
}
|
||||
|
||||
// StatsReporter returns an interface to query resource usage statistics of an
|
||||
// allocation
|
||||
func (r *AllocRunner) StatsReporter() AllocStatsReporter {
|
||||
return r
|
||||
}
|
||||
|
||||
// getTaskRunners is a helper that returns a copy of the task runners list using
|
||||
// the taskLock.
|
||||
func (r *AllocRunner) getTaskRunners() []*TaskRunner {
|
||||
// Get the task runners
|
||||
r.taskLock.RLock()
|
||||
defer r.taskLock.RUnlock()
|
||||
runners := make([]*TaskRunner, 0, len(r.tasks))
|
||||
for _, tr := range r.tasks {
|
||||
runners = append(runners, tr)
|
||||
}
|
||||
return runners
|
||||
}
|
||||
|
||||
// LatestAllocStats returns the latest allocation stats. If the optional taskFilter is set
|
||||
// the allocation stats will only include the given task.
|
||||
func (r *AllocRunner) LatestAllocStats(taskFilter string) (*cstructs.AllocResourceUsage, error) {
|
||||
astat := &cstructs.AllocResourceUsage{
|
||||
Tasks: make(map[string]*cstructs.TaskResourceUsage),
|
||||
}
|
||||
|
||||
var flat []*cstructs.TaskResourceUsage
|
||||
if taskFilter != "" {
|
||||
r.taskLock.RLock()
|
||||
tr, ok := r.tasks[taskFilter]
|
||||
r.taskLock.RUnlock()
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("allocation %q has no task %q", r.alloc.ID, taskFilter)
|
||||
}
|
||||
l := tr.LatestResourceUsage()
|
||||
if l != nil {
|
||||
astat.Tasks[taskFilter] = l
|
||||
flat = []*cstructs.TaskResourceUsage{l}
|
||||
astat.Timestamp = l.Timestamp
|
||||
}
|
||||
} else {
|
||||
// Get the task runners
|
||||
runners := r.getTaskRunners()
|
||||
for _, tr := range runners {
|
||||
l := tr.LatestResourceUsage()
|
||||
if l != nil {
|
||||
astat.Tasks[tr.task.Name] = l
|
||||
flat = append(flat, l)
|
||||
if l.Timestamp > astat.Timestamp {
|
||||
astat.Timestamp = l.Timestamp
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
astat.ResourceUsage = sumTaskResourceUsage(flat)
|
||||
return astat, nil
|
||||
}
|
||||
|
||||
// sumTaskResourceUsage takes a set of task resources and sums their resources
|
||||
func sumTaskResourceUsage(usages []*cstructs.TaskResourceUsage) *cstructs.ResourceUsage {
|
||||
summed := &cstructs.ResourceUsage{
|
||||
MemoryStats: &cstructs.MemoryStats{},
|
||||
CpuStats: &cstructs.CpuStats{},
|
||||
}
|
||||
for _, usage := range usages {
|
||||
summed.Add(usage.ResourceUsage)
|
||||
}
|
||||
return summed
|
||||
}
|
||||
|
||||
// shouldUpdate takes the AllocModifyIndex of an allocation sent from the server and
|
||||
// checks if the current running allocation is behind and should be updated.
|
||||
func (r *AllocRunner) shouldUpdate(serverIndex uint64) bool {
|
||||
r.allocLock.Lock()
|
||||
defer r.allocLock.Unlock()
|
||||
return r.alloc.AllocModifyIndex < serverIndex
|
||||
}
|
||||
|
||||
// Destroy is used to indicate that the allocation context should be destroyed
|
||||
func (r *AllocRunner) Destroy() {
|
||||
r.destroyLock.Lock()
|
||||
defer r.destroyLock.Unlock()
|
||||
|
||||
if r.destroy {
|
||||
return
|
||||
}
|
||||
r.destroy = true
|
||||
close(r.destroyCh)
|
||||
}
|
||||
|
||||
// WaitCh returns a channel to wait for termination
|
||||
func (r *AllocRunner) WaitCh() <-chan struct{} {
|
||||
return r.waitCh
|
||||
}
|
|
@ -1,399 +0,0 @@
|
|||
package allocdir
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"time"
|
||||
|
||||
"gopkg.in/tomb.v1"
|
||||
|
||||
"github.com/hashicorp/go-multierror"
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
"github.com/hpcloud/tail/watch"
|
||||
)
|
||||
|
||||
var (
|
||||
// The name of the directory that is shared across tasks in a task group.
|
||||
SharedAllocName = "alloc"
|
||||
|
||||
// Name of the directory where logs of Tasks are written
|
||||
LogDirName = "logs"
|
||||
|
||||
// The set of directories that exist inside eache shared alloc directory.
|
||||
SharedAllocDirs = []string{LogDirName, "tmp", "data"}
|
||||
|
||||
// The name of the directory that exists inside each task directory
|
||||
// regardless of driver.
|
||||
TaskLocal = "local"
|
||||
|
||||
// TaskDirs is the set of directories created in each tasks directory.
|
||||
TaskDirs = []string{"tmp"}
|
||||
)
|
||||
|
||||
type AllocDir struct {
|
||||
// AllocDir is the directory used for storing any state
|
||||
// of this allocation. It will be purged on alloc destroy.
|
||||
AllocDir string
|
||||
|
||||
// The shared directory is available to all tasks within the same task
|
||||
// group.
|
||||
SharedDir string
|
||||
|
||||
// TaskDirs is a mapping of task names to their non-shared directory.
|
||||
TaskDirs map[string]string
|
||||
}
|
||||
|
||||
// AllocFileInfo holds information about a file inside the AllocDir
|
||||
type AllocFileInfo struct {
|
||||
Name string
|
||||
IsDir bool
|
||||
Size int64
|
||||
FileMode string
|
||||
ModTime time.Time
|
||||
}
|
||||
|
||||
// AllocDirFS exposes file operations on the alloc dir
|
||||
type AllocDirFS interface {
|
||||
List(path string) ([]*AllocFileInfo, error)
|
||||
Stat(path string) (*AllocFileInfo, error)
|
||||
ReadAt(path string, offset int64) (io.ReadCloser, error)
|
||||
BlockUntilExists(path string, t *tomb.Tomb) chan error
|
||||
ChangeEvents(path string, curOffset int64, t *tomb.Tomb) (*watch.FileChanges, error)
|
||||
}
|
||||
|
||||
func NewAllocDir(allocDir string) *AllocDir {
|
||||
d := &AllocDir{AllocDir: allocDir, TaskDirs: make(map[string]string)}
|
||||
d.SharedDir = filepath.Join(d.AllocDir, SharedAllocName)
|
||||
return d
|
||||
}
|
||||
|
||||
// Tears down previously build directory structure.
|
||||
func (d *AllocDir) Destroy() error {
|
||||
// Unmount all mounted shared alloc dirs.
|
||||
var mErr multierror.Error
|
||||
if err := d.UnmountAll(); err != nil {
|
||||
mErr.Errors = append(mErr.Errors, err)
|
||||
}
|
||||
|
||||
if err := os.RemoveAll(d.AllocDir); err != nil {
|
||||
mErr.Errors = append(mErr.Errors, err)
|
||||
}
|
||||
|
||||
return mErr.ErrorOrNil()
|
||||
}
|
||||
|
||||
func (d *AllocDir) UnmountAll() error {
|
||||
var mErr multierror.Error
|
||||
for _, dir := range d.TaskDirs {
|
||||
// Check if the directory has the shared alloc mounted.
|
||||
taskAlloc := filepath.Join(dir, SharedAllocName)
|
||||
if d.pathExists(taskAlloc) {
|
||||
if err := d.unmountSharedDir(taskAlloc); err != nil {
|
||||
mErr.Errors = append(mErr.Errors,
|
||||
fmt.Errorf("failed to unmount shared alloc dir %q: %v", taskAlloc, err))
|
||||
} else if err := os.RemoveAll(taskAlloc); err != nil {
|
||||
mErr.Errors = append(mErr.Errors,
|
||||
fmt.Errorf("failed to delete shared alloc dir %q: %v", taskAlloc, err))
|
||||
}
|
||||
}
|
||||
|
||||
// Unmount dev/ and proc/ have been mounted.
|
||||
d.unmountSpecialDirs(dir)
|
||||
}
|
||||
|
||||
return mErr.ErrorOrNil()
|
||||
}
|
||||
|
||||
// Given a list of a task build the correct alloc structure.
|
||||
func (d *AllocDir) Build(tasks []*structs.Task) error {
|
||||
// Make the alloc directory, owned by the nomad process.
|
||||
if err := os.MkdirAll(d.AllocDir, 0755); err != nil {
|
||||
return fmt.Errorf("Failed to make the alloc directory %v: %v", d.AllocDir, err)
|
||||
}
|
||||
|
||||
// Make the shared directory and make it available to all user/groups.
|
||||
if err := os.MkdirAll(d.SharedDir, 0777); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Make the shared directory have non-root permissions.
|
||||
if err := d.dropDirPermissions(d.SharedDir); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, dir := range SharedAllocDirs {
|
||||
p := filepath.Join(d.SharedDir, dir)
|
||||
if err := os.MkdirAll(p, 0777); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := d.dropDirPermissions(p); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// Make the task directories.
|
||||
for _, t := range tasks {
|
||||
taskDir := filepath.Join(d.AllocDir, t.Name)
|
||||
if err := os.MkdirAll(taskDir, 0777); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Make the task directory have non-root permissions.
|
||||
if err := d.dropDirPermissions(taskDir); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Create a local directory that each task can use.
|
||||
local := filepath.Join(taskDir, TaskLocal)
|
||||
if err := os.MkdirAll(local, 0777); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := d.dropDirPermissions(local); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
d.TaskDirs[t.Name] = taskDir
|
||||
|
||||
// Create the directories that should be in every task.
|
||||
for _, dir := range TaskDirs {
|
||||
local := filepath.Join(taskDir, dir)
|
||||
if err := os.MkdirAll(local, 0777); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := d.dropDirPermissions(local); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Embed takes a mapping of absolute directory or file paths on the host to
|
||||
// their intended, relative location within the task directory. Embed attempts
|
||||
// hardlink and then defaults to copying. If the path exists on the host and
|
||||
// can't be embedded an error is returned.
|
||||
func (d *AllocDir) Embed(task string, entries map[string]string) error {
|
||||
taskdir, ok := d.TaskDirs[task]
|
||||
if !ok {
|
||||
return fmt.Errorf("Task directory doesn't exist for task %v", task)
|
||||
}
|
||||
|
||||
subdirs := make(map[string]string)
|
||||
for source, dest := range entries {
|
||||
// Check to see if directory exists on host.
|
||||
s, err := os.Stat(source)
|
||||
if os.IsNotExist(err) {
|
||||
continue
|
||||
}
|
||||
|
||||
// Embedding a single file
|
||||
if !s.IsDir() {
|
||||
destDir := filepath.Join(taskdir, filepath.Dir(dest))
|
||||
if err := os.MkdirAll(destDir, s.Mode().Perm()); err != nil {
|
||||
return fmt.Errorf("Couldn't create destination directory %v: %v", destDir, err)
|
||||
}
|
||||
|
||||
// Copy the file.
|
||||
taskEntry := filepath.Join(destDir, filepath.Base(dest))
|
||||
if err := d.linkOrCopy(source, taskEntry, s.Mode().Perm()); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
continue
|
||||
}
|
||||
|
||||
// Create destination directory.
|
||||
destDir := filepath.Join(taskdir, dest)
|
||||
if err := os.MkdirAll(destDir, s.Mode().Perm()); err != nil {
|
||||
return fmt.Errorf("Couldn't create destination directory %v: %v", destDir, err)
|
||||
}
|
||||
|
||||
// Enumerate the files in source.
|
||||
dirEntries, err := ioutil.ReadDir(source)
|
||||
if err != nil {
|
||||
return fmt.Errorf("Couldn't read directory %v: %v", source, err)
|
||||
}
|
||||
|
||||
for _, entry := range dirEntries {
|
||||
hostEntry := filepath.Join(source, entry.Name())
|
||||
taskEntry := filepath.Join(destDir, filepath.Base(hostEntry))
|
||||
if entry.IsDir() {
|
||||
subdirs[hostEntry] = filepath.Join(dest, filepath.Base(hostEntry))
|
||||
continue
|
||||
}
|
||||
|
||||
// Check if entry exists. This can happen if restarting a failed
|
||||
// task.
|
||||
if _, err := os.Lstat(taskEntry); err == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
if !entry.Mode().IsRegular() {
|
||||
// If it is a symlink we can create it, otherwise we skip it.
|
||||
if entry.Mode()&os.ModeSymlink == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
link, err := os.Readlink(hostEntry)
|
||||
if err != nil {
|
||||
return fmt.Errorf("Couldn't resolve symlink for %v: %v", source, err)
|
||||
}
|
||||
|
||||
if err := os.Symlink(link, taskEntry); err != nil {
|
||||
// Symlinking twice
|
||||
if err.(*os.LinkError).Err.Error() != "file exists" {
|
||||
return fmt.Errorf("Couldn't create symlink: %v", err)
|
||||
}
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
if err := d.linkOrCopy(hostEntry, taskEntry, entry.Mode().Perm()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Recurse on self to copy subdirectories.
|
||||
if len(subdirs) != 0 {
|
||||
return d.Embed(task, subdirs)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// MountSharedDir mounts the shared directory into the specified task's
|
||||
// directory. Mount is documented at an OS level in their respective
|
||||
// implementation files.
|
||||
func (d *AllocDir) MountSharedDir(task string) error {
|
||||
taskDir, ok := d.TaskDirs[task]
|
||||
if !ok {
|
||||
return fmt.Errorf("No task directory exists for %v", task)
|
||||
}
|
||||
|
||||
taskLoc := filepath.Join(taskDir, SharedAllocName)
|
||||
if err := d.mountSharedDir(taskLoc); err != nil {
|
||||
return fmt.Errorf("Failed to mount shared directory for task %v: %v", task, err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// LogDir returns the log dir in the current allocation directory
|
||||
func (d *AllocDir) LogDir() string {
|
||||
return filepath.Join(d.AllocDir, SharedAllocName, LogDirName)
|
||||
}
|
||||
|
||||
// List returns the list of files at a path relative to the alloc dir
|
||||
func (d *AllocDir) List(path string) ([]*AllocFileInfo, error) {
|
||||
p := filepath.Join(d.AllocDir, path)
|
||||
finfos, err := ioutil.ReadDir(p)
|
||||
if err != nil {
|
||||
return []*AllocFileInfo{}, err
|
||||
}
|
||||
files := make([]*AllocFileInfo, len(finfos))
|
||||
for idx, info := range finfos {
|
||||
files[idx] = &AllocFileInfo{
|
||||
Name: info.Name(),
|
||||
IsDir: info.IsDir(),
|
||||
Size: info.Size(),
|
||||
FileMode: info.Mode().String(),
|
||||
ModTime: info.ModTime(),
|
||||
}
|
||||
}
|
||||
return files, err
|
||||
}
|
||||
|
||||
// Stat returns information about the file at a path relative to the alloc dir
|
||||
func (d *AllocDir) Stat(path string) (*AllocFileInfo, error) {
|
||||
p := filepath.Join(d.AllocDir, path)
|
||||
info, err := os.Stat(p)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &AllocFileInfo{
|
||||
Size: info.Size(),
|
||||
Name: info.Name(),
|
||||
IsDir: info.IsDir(),
|
||||
FileMode: info.Mode().String(),
|
||||
ModTime: info.ModTime(),
|
||||
}, nil
|
||||
}
|
||||
|
||||
// ReadAt returns a reader for a file at the path relative to the alloc dir
|
||||
func (d *AllocDir) ReadAt(path string, offset int64) (io.ReadCloser, error) {
|
||||
p := filepath.Join(d.AllocDir, path)
|
||||
f, err := os.Open(p)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if _, err := f.Seek(offset, 0); err != nil {
|
||||
return nil, fmt.Errorf("can't seek to offset %q: %v", offset, err)
|
||||
}
|
||||
return f, nil
|
||||
}
|
||||
|
||||
// BlockUntilExists blocks until the passed file relative the allocation
|
||||
// directory exists. The block can be cancelled with the passed tomb.
|
||||
func (d *AllocDir) BlockUntilExists(path string, t *tomb.Tomb) chan error {
|
||||
// Get the path relative to the alloc directory
|
||||
p := filepath.Join(d.AllocDir, path)
|
||||
watcher := getFileWatcher(p)
|
||||
returnCh := make(chan error, 1)
|
||||
go func() {
|
||||
returnCh <- watcher.BlockUntilExists(t)
|
||||
close(returnCh)
|
||||
}()
|
||||
return returnCh
|
||||
}
|
||||
|
||||
// ChangeEvents watches for changes to the passed path relative to the
|
||||
// allocation directory. The offset should be the last read offset. The tomb is
|
||||
// used to clean up the watch.
|
||||
func (d *AllocDir) ChangeEvents(path string, curOffset int64, t *tomb.Tomb) (*watch.FileChanges, error) {
|
||||
// Get the path relative to the alloc directory
|
||||
p := filepath.Join(d.AllocDir, path)
|
||||
watcher := getFileWatcher(p)
|
||||
return watcher.ChangeEvents(t, curOffset)
|
||||
}
|
||||
|
||||
// getFileWatcher returns a FileWatcher for the given path.
|
||||
func getFileWatcher(path string) watch.FileWatcher {
|
||||
return watch.NewPollingFileWatcher(path)
|
||||
}
|
||||
|
||||
func fileCopy(src, dst string, perm os.FileMode) error {
|
||||
// Do a simple copy.
|
||||
srcFile, err := os.Open(src)
|
||||
if err != nil {
|
||||
return fmt.Errorf("Couldn't open src file %v: %v", src, err)
|
||||
}
|
||||
|
||||
dstFile, err := os.OpenFile(dst, os.O_WRONLY|os.O_CREATE, perm)
|
||||
if err != nil {
|
||||
return fmt.Errorf("Couldn't create destination file %v: %v", dst, err)
|
||||
}
|
||||
|
||||
if _, err := io.Copy(dstFile, srcFile); err != nil {
|
||||
return fmt.Errorf("Couldn't copy %v to %v: %v", src, dst, err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// pathExists is a helper function to check if the path exists.
|
||||
func (d *AllocDir) pathExists(path string) bool {
|
||||
if _, err := os.Stat(path); err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
|
@ -1,26 +0,0 @@
|
|||
package allocdir
|
||||
|
||||
import (
|
||||
"syscall"
|
||||
)
|
||||
|
||||
// Hardlinks the shared directory. As a side-effect the shared directory and
|
||||
// task directory must be on the same filesystem.
|
||||
func (d *AllocDir) mountSharedDir(dir string) error {
|
||||
return syscall.Link(d.SharedDir, dir)
|
||||
}
|
||||
|
||||
func (d *AllocDir) unmountSharedDir(dir string) error {
|
||||
return syscall.Unlink(dir)
|
||||
}
|
||||
|
||||
// MountSpecialDirs mounts the dev and proc file system on the chroot of the
|
||||
// task. It's a no-op on darwin.
|
||||
func (d *AllocDir) MountSpecialDirs(taskDir string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// unmountSpecialDirs unmounts the dev and proc file system from the chroot
|
||||
func (d *AllocDir) unmountSpecialDirs(taskDir string) error {
|
||||
return nil
|
||||
}
|
|
@ -1,26 +0,0 @@
|
|||
package allocdir
|
||||
|
||||
import (
|
||||
"syscall"
|
||||
)
|
||||
|
||||
// Hardlinks the shared directory. As a side-effect the shared directory and
|
||||
// task directory must be on the same filesystem.
|
||||
func (d *AllocDir) mountSharedDir(dir string) error {
|
||||
return syscall.Link(d.SharedDir, dir)
|
||||
}
|
||||
|
||||
func (d *AllocDir) unmountSharedDir(dir string) error {
|
||||
return syscall.Unlink(dir)
|
||||
}
|
||||
|
||||
// MountSpecialDirs mounts the dev and proc file system on the chroot of the
|
||||
// task. It's a no-op on FreeBSD right now.
|
||||
func (d *AllocDir) MountSpecialDirs(taskDir string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// unmountSpecialDirs unmounts the dev and proc file system from the chroot
|
||||
func (d *AllocDir) unmountSpecialDirs(taskDir string) error {
|
||||
return nil
|
||||
}
|
|
@ -1,79 +0,0 @@
|
|||
package allocdir
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"syscall"
|
||||
|
||||
"github.com/hashicorp/go-multierror"
|
||||
)
|
||||
|
||||
// Bind mounts the shared directory into the task directory. Must be root to
|
||||
// run.
|
||||
func (d *AllocDir) mountSharedDir(taskDir string) error {
|
||||
if err := os.MkdirAll(taskDir, 0777); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return syscall.Mount(d.SharedDir, taskDir, "", syscall.MS_BIND, "")
|
||||
}
|
||||
|
||||
func (d *AllocDir) unmountSharedDir(dir string) error {
|
||||
return syscall.Unmount(dir, 0)
|
||||
}
|
||||
|
||||
// MountSpecialDirs mounts the dev and proc file system from the host to the
|
||||
// chroot
|
||||
func (d *AllocDir) MountSpecialDirs(taskDir string) error {
|
||||
// Mount dev
|
||||
dev := filepath.Join(taskDir, "dev")
|
||||
if !d.pathExists(dev) {
|
||||
if err := os.MkdirAll(dev, 0777); err != nil {
|
||||
return fmt.Errorf("Mkdir(%v) failed: %v", dev, err)
|
||||
}
|
||||
|
||||
if err := syscall.Mount("none", dev, "devtmpfs", syscall.MS_RDONLY, ""); err != nil {
|
||||
return fmt.Errorf("Couldn't mount /dev to %v: %v", dev, err)
|
||||
}
|
||||
}
|
||||
|
||||
// Mount proc
|
||||
proc := filepath.Join(taskDir, "proc")
|
||||
if !d.pathExists(proc) {
|
||||
if err := os.MkdirAll(proc, 0777); err != nil {
|
||||
return fmt.Errorf("Mkdir(%v) failed: %v", proc, err)
|
||||
}
|
||||
|
||||
if err := syscall.Mount("none", proc, "proc", syscall.MS_RDONLY, ""); err != nil {
|
||||
return fmt.Errorf("Couldn't mount /proc to %v: %v", proc, err)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// unmountSpecialDirs unmounts the dev and proc file system from the chroot
|
||||
func (d *AllocDir) unmountSpecialDirs(taskDir string) error {
|
||||
errs := new(multierror.Error)
|
||||
dev := filepath.Join(taskDir, "dev")
|
||||
if d.pathExists(dev) {
|
||||
if err := syscall.Unmount(dev, 0); err != nil {
|
||||
errs = multierror.Append(errs, fmt.Errorf("Failed to unmount dev (%v): %v", dev, err))
|
||||
} else if err := os.RemoveAll(dev); err != nil {
|
||||
errs = multierror.Append(errs, fmt.Errorf("Failed to delete dev directory (%v): %v", dev, err))
|
||||
}
|
||||
}
|
||||
|
||||
// Unmount proc.
|
||||
proc := filepath.Join(taskDir, "proc")
|
||||
if d.pathExists(proc) {
|
||||
if err := syscall.Unmount(proc, 0); err != nil {
|
||||
errs = multierror.Append(errs, fmt.Errorf("Failed to unmount proc (%v): %v", proc, err))
|
||||
} else if err := os.RemoveAll(proc); err != nil {
|
||||
errs = multierror.Append(errs, fmt.Errorf("Failed to delete proc directory (%v): %v", dev, err))
|
||||
}
|
||||
}
|
||||
|
||||
return errs.ErrorOrNil()
|
||||
}
|
|
@ -1,81 +0,0 @@
|
|||
// +build darwin dragonfly freebsd linux netbsd openbsd solaris
|
||||
|
||||
// Functions shared between linux/darwin.
|
||||
package allocdir
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"os/user"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
var (
|
||||
//Path inside container for mounted directory shared across tasks in a task group.
|
||||
SharedAllocContainerPath = filepath.Join("/", SharedAllocName)
|
||||
|
||||
//Path inside container for mounted directory for local storage.
|
||||
TaskLocalContainerPath = filepath.Join("/", TaskLocal)
|
||||
)
|
||||
|
||||
func (d *AllocDir) linkOrCopy(src, dst string, perm os.FileMode) error {
|
||||
// Attempt to hardlink.
|
||||
if err := os.Link(src, dst); err == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
return fileCopy(src, dst, perm)
|
||||
}
|
||||
|
||||
func (d *AllocDir) dropDirPermissions(path string) error {
|
||||
// Can't do anything if not root.
|
||||
if unix.Geteuid() != 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
u, err := user.Lookup("nobody")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
uid, err := getUid(u)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
gid, err := getGid(u)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := os.Chown(path, uid, gid); err != nil {
|
||||
return fmt.Errorf("Couldn't change owner/group of %v to (uid: %v, gid: %v): %v", path, uid, gid, err)
|
||||
}
|
||||
|
||||
if err := os.Chmod(path, 0777); err != nil {
|
||||
return fmt.Errorf("Chmod(%v) failed: %v", path, err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func getUid(u *user.User) (int, error) {
|
||||
uid, err := strconv.Atoi(u.Uid)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("Unable to convert Uid to an int: %v", err)
|
||||
}
|
||||
|
||||
return uid, nil
|
||||
}
|
||||
|
||||
func getGid(u *user.User) (int, error) {
|
||||
gid, err := strconv.Atoi(u.Gid)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("Unable to convert Gid to an int: %v", err)
|
||||
}
|
||||
|
||||
return gid, nil
|
||||
}
|
|
@ -1,45 +0,0 @@
|
|||
package allocdir
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"os"
|
||||
"path/filepath"
|
||||
)
|
||||
|
||||
var (
|
||||
//Path inside container for mounted directory that is shared across tasks in a task group.
|
||||
SharedAllocContainerPath = filepath.Join("c:\\", SharedAllocName)
|
||||
|
||||
//Path inside container for mounted directory for local storage.
|
||||
TaskLocalContainerPath = filepath.Join("c:\\", TaskLocal)
|
||||
)
|
||||
|
||||
func (d *AllocDir) linkOrCopy(src, dst string, perm os.FileMode) error {
|
||||
return fileCopy(src, dst, perm)
|
||||
}
|
||||
|
||||
// The windows version does nothing currently.
|
||||
func (d *AllocDir) mountSharedDir(dir string) error {
|
||||
return errors.New("Mount on Windows not supported.")
|
||||
}
|
||||
|
||||
// The windows version does nothing currently.
|
||||
func (d *AllocDir) dropDirPermissions(path string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// The windows version does nothing currently.
|
||||
func (d *AllocDir) unmountSharedDir(dir string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// MountSpecialDirs mounts the dev and proc file system on the chroot of the
|
||||
// task. It's a no-op on windows.
|
||||
func (d *AllocDir) MountSpecialDirs(taskDir string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// unmountSpecialDirs unmounts the dev and proc file system from the chroot
|
||||
func (d *AllocDir) unmountSpecialDirs(taskDir string) error {
|
||||
return nil
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -1,221 +0,0 @@
|
|||
package config
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
"github.com/hashicorp/nomad/nomad/structs/config"
|
||||
)
|
||||
|
||||
var (
|
||||
// DefaultEnvBlacklist is the default set of environment variables that are
|
||||
// filtered when passing the environment variables of the host to a task.
|
||||
DefaultEnvBlacklist = strings.Join([]string{
|
||||
"CONSUL_TOKEN",
|
||||
"VAULT_TOKEN",
|
||||
"ATLAS_TOKEN",
|
||||
"AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY", "AWS_SESSION_TOKEN",
|
||||
"GOOGLE_APPLICATION_CREDENTIALS",
|
||||
}, ",")
|
||||
|
||||
// DefaulUserBlacklist is the default set of users that tasks are not
|
||||
// allowed to run as when using a driver in "user.checked_drivers"
|
||||
DefaultUserBlacklist = strings.Join([]string{
|
||||
"root",
|
||||
"Administrator",
|
||||
}, ",")
|
||||
|
||||
// DefaultUserCheckedDrivers is the set of drivers we apply the user
|
||||
// blacklist onto. For virtualized drivers it often doesn't make sense to
|
||||
// make this stipulation so by default they are ignored.
|
||||
DefaultUserCheckedDrivers = strings.Join([]string{
|
||||
"exec",
|
||||
"qemu",
|
||||
"java",
|
||||
}, ",")
|
||||
)
|
||||
|
||||
// RPCHandler can be provided to the Client if there is a local server
|
||||
// to avoid going over the network. If not provided, the Client will
|
||||
// maintain a connection pool to the servers
|
||||
type RPCHandler interface {
|
||||
RPC(method string, args interface{}, reply interface{}) error
|
||||
}
|
||||
|
||||
// Config is used to parameterize and configure the behavior of the client
|
||||
type Config struct {
|
||||
// DevMode controls if we are in a development mode which
|
||||
// avoids persistent storage.
|
||||
DevMode bool
|
||||
|
||||
// StateDir is where we store our state
|
||||
StateDir string
|
||||
|
||||
// AllocDir is where we store data for allocations
|
||||
AllocDir string
|
||||
|
||||
// LogOutput is the destination for logs
|
||||
LogOutput io.Writer
|
||||
|
||||
// Region is the clients region
|
||||
Region string
|
||||
|
||||
// Network interface to be used in network fingerprinting
|
||||
NetworkInterface string
|
||||
|
||||
// Network speed is the default speed of network interfaces if they can not
|
||||
// be determined dynamically.
|
||||
NetworkSpeed int
|
||||
|
||||
// MaxKillTimeout allows capping the user-specifiable KillTimeout. If the
|
||||
// task's KillTimeout is greater than the MaxKillTimeout, MaxKillTimeout is
|
||||
// used.
|
||||
MaxKillTimeout time.Duration
|
||||
|
||||
// Servers is a list of known server addresses. These are as "host:port"
|
||||
Servers []string
|
||||
|
||||
// RPCHandler can be provided to avoid network traffic if the
|
||||
// server is running locally.
|
||||
RPCHandler RPCHandler
|
||||
|
||||
// Node provides the base node
|
||||
Node *structs.Node
|
||||
|
||||
// ClientMaxPort is the upper range of the ports that the client uses for
|
||||
// communicating with plugin subsystems over loopback
|
||||
ClientMaxPort uint
|
||||
|
||||
// ClientMinPort is the lower range of the ports that the client uses for
|
||||
// communicating with plugin subsystems over loopback
|
||||
ClientMinPort uint
|
||||
|
||||
// GloballyReservedPorts are ports that are reserved across all network
|
||||
// devices and IPs.
|
||||
GloballyReservedPorts []int
|
||||
|
||||
// A mapping of directories on the host OS to attempt to embed inside each
|
||||
// task's chroot.
|
||||
ChrootEnv map[string]string
|
||||
|
||||
// Options provides arbitrary key-value configuration for nomad internals,
|
||||
// like fingerprinters and drivers. The format is:
|
||||
//
|
||||
// namespace.option = value
|
||||
Options map[string]string
|
||||
|
||||
// Version is the version of the Nomad client
|
||||
Version string
|
||||
|
||||
// Revision is the commit number of the Nomad client
|
||||
Revision string
|
||||
|
||||
// ConsulConfig is this Agent's Consul configuration
|
||||
ConsulConfig *config.ConsulConfig
|
||||
|
||||
// StatsCollectionInterval is the interval at which the Nomad client
|
||||
// collects resource usage stats
|
||||
StatsCollectionInterval time.Duration
|
||||
|
||||
// PublishNodeMetrics determines whether nomad is going to publish node
|
||||
// level metrics to remote Telemetry sinks
|
||||
PublishNodeMetrics bool
|
||||
|
||||
// PublishAllocationMetrics determines whether nomad is going to publish
|
||||
// allocation metrics to remote Telemetry sinks
|
||||
PublishAllocationMetrics bool
|
||||
}
|
||||
|
||||
func (c *Config) Copy() *Config {
|
||||
nc := new(Config)
|
||||
*nc = *c
|
||||
nc.Node = nc.Node.Copy()
|
||||
nc.Servers = structs.CopySliceString(nc.Servers)
|
||||
nc.Options = structs.CopyMapStringString(nc.Options)
|
||||
return nc
|
||||
}
|
||||
|
||||
// DefaultConfig returns the default configuration
|
||||
func DefaultConfig() *Config {
|
||||
return &Config{
|
||||
ConsulConfig: config.DefaultConsulConfig(),
|
||||
LogOutput: os.Stderr,
|
||||
Region: "global",
|
||||
StatsCollectionInterval: 1 * time.Second,
|
||||
}
|
||||
}
|
||||
|
||||
// Read returns the specified configuration value or "".
|
||||
func (c *Config) Read(id string) string {
|
||||
return c.Options[id]
|
||||
}
|
||||
|
||||
// ReadDefault returns the specified configuration value, or the specified
|
||||
// default value if none is set.
|
||||
func (c *Config) ReadDefault(id string, defaultValue string) string {
|
||||
val, ok := c.Options[id]
|
||||
if !ok {
|
||||
return defaultValue
|
||||
}
|
||||
return val
|
||||
}
|
||||
|
||||
// ReadBool parses the specified option as a boolean.
|
||||
func (c *Config) ReadBool(id string) (bool, error) {
|
||||
val, ok := c.Options[id]
|
||||
if !ok {
|
||||
return false, fmt.Errorf("Specified config is missing from options")
|
||||
}
|
||||
bval, err := strconv.ParseBool(val)
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("Failed to parse %s as bool: %s", val, err)
|
||||
}
|
||||
return bval, nil
|
||||
}
|
||||
|
||||
// ReadBoolDefault tries to parse the specified option as a boolean. If there is
|
||||
// an error in parsing, the default option is returned.
|
||||
func (c *Config) ReadBoolDefault(id string, defaultValue bool) bool {
|
||||
val, err := c.ReadBool(id)
|
||||
if err != nil {
|
||||
return defaultValue
|
||||
}
|
||||
return val
|
||||
}
|
||||
|
||||
// ReadStringListToMap tries to parse the specified option as a comma separated list.
|
||||
// If there is an error in parsing, an empty list is returned.
|
||||
func (c *Config) ReadStringListToMap(key string) map[string]struct{} {
|
||||
s := strings.TrimSpace(c.Read(key))
|
||||
list := make(map[string]struct{})
|
||||
if s != "" {
|
||||
for _, e := range strings.Split(s, ",") {
|
||||
trimmed := strings.TrimSpace(e)
|
||||
list[trimmed] = struct{}{}
|
||||
}
|
||||
}
|
||||
return list
|
||||
}
|
||||
|
||||
// ReadStringListToMap tries to parse the specified option as a comma separated list.
|
||||
// If there is an error in parsing, an empty list is returned.
|
||||
func (c *Config) ReadStringListToMapDefault(key, defaultValue string) map[string]struct{} {
|
||||
val, ok := c.Options[key]
|
||||
if !ok {
|
||||
val = defaultValue
|
||||
}
|
||||
|
||||
list := make(map[string]struct{})
|
||||
if val != "" {
|
||||
for _, e := range strings.Split(val, ",") {
|
||||
trimmed := strings.TrimSpace(e)
|
||||
list[trimmed] = struct{}{}
|
||||
}
|
||||
}
|
||||
return list
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -1,14 +0,0 @@
|
|||
//+build !windows
|
||||
|
||||
package driver
|
||||
|
||||
import docker "github.com/fsouza/go-dockerclient"
|
||||
|
||||
const (
|
||||
// Setting default network mode for non-windows OS as bridge
|
||||
defaultNetworkMode = "bridge"
|
||||
)
|
||||
|
||||
func getPortBinding(ip string, port string) []docker.PortBinding {
|
||||
return []docker.PortBinding{docker.PortBinding{HostIP: ip, HostPort: port}}
|
||||
}
|
|
@ -1,13 +0,0 @@
|
|||
package driver
|
||||
|
||||
import docker "github.com/fsouza/go-dockerclient"
|
||||
|
||||
const (
|
||||
// Default network mode for windows containers is nat
|
||||
defaultNetworkMode = "nat"
|
||||
)
|
||||
|
||||
//Currently Windows containers don't support host ip in port binding.
|
||||
func getPortBinding(ip string, port string) []docker.PortBinding {
|
||||
return []docker.PortBinding{docker.PortBinding{HostIP: "", HostPort: port}}
|
||||
}
|
|
@ -1,192 +0,0 @@
|
|||
package driver
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log"
|
||||
"path/filepath"
|
||||
"sync"
|
||||
|
||||
"github.com/hashicorp/nomad/client/allocdir"
|
||||
"github.com/hashicorp/nomad/client/config"
|
||||
"github.com/hashicorp/nomad/client/driver/env"
|
||||
"github.com/hashicorp/nomad/client/fingerprint"
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
|
||||
dstructs "github.com/hashicorp/nomad/client/driver/structs"
|
||||
cstructs "github.com/hashicorp/nomad/client/structs"
|
||||
)
|
||||
|
||||
// BuiltinDrivers contains the built in registered drivers
|
||||
// which are available for allocation handling
|
||||
var BuiltinDrivers = map[string]Factory{
|
||||
"docker": NewDockerDriver,
|
||||
"exec": NewExecDriver,
|
||||
"raw_exec": NewRawExecDriver,
|
||||
"java": NewJavaDriver,
|
||||
"qemu": NewQemuDriver,
|
||||
"rkt": NewRktDriver,
|
||||
}
|
||||
|
||||
// NewDriver is used to instantiate and return a new driver
|
||||
// given the name and a logger
|
||||
func NewDriver(name string, ctx *DriverContext) (Driver, error) {
|
||||
// Lookup the factory function
|
||||
factory, ok := BuiltinDrivers[name]
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("unknown driver '%s'", name)
|
||||
}
|
||||
|
||||
// Instantiate the driver
|
||||
f := factory(ctx)
|
||||
return f, nil
|
||||
}
|
||||
|
||||
// Factory is used to instantiate a new Driver
|
||||
type Factory func(*DriverContext) Driver
|
||||
|
||||
// Driver is used for execution of tasks. This allows Nomad
|
||||
// to support many pluggable implementations of task drivers.
|
||||
// Examples could include LXC, Docker, Qemu, etc.
|
||||
type Driver interface {
|
||||
// Drivers must support the fingerprint interface for detection
|
||||
fingerprint.Fingerprint
|
||||
|
||||
// Start is used to being task execution
|
||||
Start(ctx *ExecContext, task *structs.Task) (DriverHandle, error)
|
||||
|
||||
// Open is used to re-open a handle to a task
|
||||
Open(ctx *ExecContext, handleID string) (DriverHandle, error)
|
||||
|
||||
// Drivers must validate their configuration
|
||||
Validate(map[string]interface{}) error
|
||||
}
|
||||
|
||||
// DriverContext is a means to inject dependencies such as loggers, configs, and
|
||||
// node attributes into a Driver without having to change the Driver interface
|
||||
// each time we do it. Used in conjection with Factory, above.
|
||||
type DriverContext struct {
|
||||
taskName string
|
||||
config *config.Config
|
||||
logger *log.Logger
|
||||
node *structs.Node
|
||||
taskEnv *env.TaskEnvironment
|
||||
}
|
||||
|
||||
// NewEmptyDriverContext returns a DriverContext with all fields set to their
|
||||
// zero value.
|
||||
func NewEmptyDriverContext() *DriverContext {
|
||||
return &DriverContext{
|
||||
taskName: "",
|
||||
config: nil,
|
||||
node: nil,
|
||||
logger: nil,
|
||||
taskEnv: nil,
|
||||
}
|
||||
}
|
||||
|
||||
// NewDriverContext initializes a new DriverContext with the specified fields.
|
||||
// This enables other packages to create DriverContexts but keeps the fields
|
||||
// private to the driver. If we want to change this later we can gorename all of
|
||||
// the fields in DriverContext.
|
||||
func NewDriverContext(taskName string, config *config.Config, node *structs.Node,
|
||||
logger *log.Logger, taskEnv *env.TaskEnvironment) *DriverContext {
|
||||
return &DriverContext{
|
||||
taskName: taskName,
|
||||
config: config,
|
||||
node: node,
|
||||
logger: logger,
|
||||
taskEnv: taskEnv,
|
||||
}
|
||||
}
|
||||
|
||||
// DriverHandle is an opaque handle into a driver used for task
|
||||
// manipulation
|
||||
type DriverHandle interface {
|
||||
// Returns an opaque handle that can be used to re-open the handle
|
||||
ID() string
|
||||
|
||||
// WaitCh is used to return a channel used wait for task completion
|
||||
WaitCh() chan *dstructs.WaitResult
|
||||
|
||||
// Update is used to update the task if possible and update task related
|
||||
// configurations.
|
||||
Update(task *structs.Task) error
|
||||
|
||||
// Kill is used to stop the task
|
||||
Kill() error
|
||||
|
||||
// Stats returns aggregated stats of the driver
|
||||
Stats() (*cstructs.TaskResourceUsage, error)
|
||||
}
|
||||
|
||||
// ExecContext is shared between drivers within an allocation
|
||||
type ExecContext struct {
|
||||
sync.Mutex
|
||||
|
||||
// AllocDir contains information about the alloc directory structure.
|
||||
AllocDir *allocdir.AllocDir
|
||||
|
||||
// Alloc ID
|
||||
AllocID string
|
||||
}
|
||||
|
||||
// NewExecContext is used to create a new execution context
|
||||
func NewExecContext(alloc *allocdir.AllocDir, allocID string) *ExecContext {
|
||||
return &ExecContext{AllocDir: alloc, AllocID: allocID}
|
||||
}
|
||||
|
||||
// GetTaskEnv converts the alloc dir, the node, task and alloc into a
|
||||
// TaskEnvironment.
|
||||
func GetTaskEnv(allocDir *allocdir.AllocDir, node *structs.Node,
|
||||
task *structs.Task, alloc *structs.Allocation) (*env.TaskEnvironment, error) {
|
||||
|
||||
tg := alloc.Job.LookupTaskGroup(alloc.TaskGroup)
|
||||
env := env.NewTaskEnvironment(node).
|
||||
SetTaskMeta(task.Meta).
|
||||
SetTaskGroupMeta(tg.Meta).
|
||||
SetJobMeta(alloc.Job.Meta).
|
||||
SetEnvvars(task.Env).
|
||||
SetTaskName(task.Name)
|
||||
|
||||
if allocDir != nil {
|
||||
env.SetAllocDir(allocDir.SharedDir)
|
||||
taskdir, ok := allocDir.TaskDirs[task.Name]
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("failed to get task directory for task %q", task.Name)
|
||||
}
|
||||
|
||||
env.SetTaskLocalDir(filepath.Join(taskdir, allocdir.TaskLocal))
|
||||
}
|
||||
|
||||
if task.Resources != nil {
|
||||
env.SetMemLimit(task.Resources.MemoryMB).
|
||||
SetCpuLimit(task.Resources.CPU).
|
||||
SetNetworks(task.Resources.Networks)
|
||||
}
|
||||
|
||||
if alloc != nil {
|
||||
env.SetAlloc(alloc)
|
||||
}
|
||||
|
||||
return env.Build(), nil
|
||||
}
|
||||
|
||||
func mapMergeStrInt(maps ...map[string]int) map[string]int {
|
||||
out := map[string]int{}
|
||||
for _, in := range maps {
|
||||
for key, val := range in {
|
||||
out[key] = val
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func mapMergeStrStr(maps ...map[string]string) map[string]string {
|
||||
out := map[string]string{}
|
||||
for _, in := range maps {
|
||||
for key, val := range in {
|
||||
out[key] = val
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
|
@ -1,430 +0,0 @@
|
|||
package env
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
hargs "github.com/hashicorp/nomad/helper/args"
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
)
|
||||
|
||||
// A set of environment variables that are exported by each driver.
|
||||
const (
|
||||
// AllocDir is the environment variable with the path to the alloc directory
|
||||
// that is shared across tasks within a task group.
|
||||
AllocDir = "NOMAD_ALLOC_DIR"
|
||||
|
||||
// TaskLocalDir is the environment variable with the path to the tasks local
|
||||
// directory where it can store data that is persisted to the alloc is
|
||||
// removed.
|
||||
TaskLocalDir = "NOMAD_TASK_DIR"
|
||||
|
||||
// MemLimit is the environment variable with the tasks memory limit in MBs.
|
||||
MemLimit = "NOMAD_MEMORY_LIMIT"
|
||||
|
||||
// CpuLimit is the environment variable with the tasks CPU limit in MHz.
|
||||
CpuLimit = "NOMAD_CPU_LIMIT"
|
||||
|
||||
// AllocID is the environment variable for passing the allocation ID.
|
||||
AllocID = "NOMAD_ALLOC_ID"
|
||||
|
||||
// AllocName is the environment variable for passing the allocation name.
|
||||
AllocName = "NOMAD_ALLOC_NAME"
|
||||
|
||||
// TaskName is the environment variable for passing the task name.
|
||||
TaskName = "NOMAD_TASK_NAME"
|
||||
|
||||
// AllocIndex is the environment variable for passing the allocation index.
|
||||
AllocIndex = "NOMAD_ALLOC_INDEX"
|
||||
|
||||
// AddrPrefix is the prefix for passing both dynamic and static port
|
||||
// allocations to tasks.
|
||||
// E.g$NOMAD_ADDR_http=127.0.0.1:80
|
||||
AddrPrefix = "NOMAD_ADDR_"
|
||||
|
||||
// IpPrefix is the prefix for passing the IP of a port allocation to a task.
|
||||
IpPrefix = "NOMAD_IP_"
|
||||
|
||||
// PortPrefix is the prefix for passing the port allocation to a task.
|
||||
PortPrefix = "NOMAD_PORT_"
|
||||
|
||||
// HostPortPrefix is the prefix for passing the host port when a portmap is
|
||||
// specified.
|
||||
HostPortPrefix = "NOMAD_HOST_PORT_"
|
||||
|
||||
// MetaPrefix is the prefix for passing task meta data.
|
||||
MetaPrefix = "NOMAD_META_"
|
||||
)
|
||||
|
||||
// The node values that can be interpreted.
|
||||
const (
|
||||
nodeIdKey = "node.unique.id"
|
||||
nodeDcKey = "node.datacenter"
|
||||
nodeNameKey = "node.unique.name"
|
||||
nodeClassKey = "node.class"
|
||||
|
||||
// Prefixes used for lookups.
|
||||
nodeAttributePrefix = "attr."
|
||||
nodeMetaPrefix = "meta."
|
||||
)
|
||||
|
||||
// TaskEnvironment is used to expose information to a task via environment
|
||||
// variables and provide interpolation of Nomad variables.
|
||||
type TaskEnvironment struct {
|
||||
Env map[string]string
|
||||
TaskMeta map[string]string
|
||||
TaskGroupMeta map[string]string
|
||||
JobMeta map[string]string
|
||||
AllocDir string
|
||||
TaskDir string
|
||||
CpuLimit int
|
||||
MemLimit int
|
||||
TaskName string
|
||||
AllocIndex int
|
||||
AllocId string
|
||||
AllocName string
|
||||
Node *structs.Node
|
||||
Networks []*structs.NetworkResource
|
||||
PortMap map[string]int
|
||||
|
||||
// taskEnv is the variables that will be set in the tasks environment
|
||||
TaskEnv map[string]string
|
||||
|
||||
// nodeValues is the values that are allowed for interprolation from the
|
||||
// node.
|
||||
NodeValues map[string]string
|
||||
}
|
||||
|
||||
func NewTaskEnvironment(node *structs.Node) *TaskEnvironment {
|
||||
return &TaskEnvironment{Node: node, AllocIndex: -1}
|
||||
}
|
||||
|
||||
// ParseAndReplace takes the user supplied args replaces any instance of an
|
||||
// environment variable or nomad variable in the args with the actual value.
|
||||
func (t *TaskEnvironment) ParseAndReplace(args []string) []string {
|
||||
replaced := make([]string, len(args))
|
||||
for i, arg := range args {
|
||||
replaced[i] = hargs.ReplaceEnv(arg, t.TaskEnv, t.NodeValues)
|
||||
}
|
||||
|
||||
return replaced
|
||||
}
|
||||
|
||||
// ReplaceEnv takes an arg and replaces all occurrences of environment variables
|
||||
// and nomad variables. If the variable is found in the passed map it is
|
||||
// replaced, otherwise the original string is returned.
|
||||
func (t *TaskEnvironment) ReplaceEnv(arg string) string {
|
||||
return hargs.ReplaceEnv(arg, t.TaskEnv, t.NodeValues)
|
||||
}
|
||||
|
||||
// Build must be called after all the tasks environment values have been set.
|
||||
func (t *TaskEnvironment) Build() *TaskEnvironment {
|
||||
t.NodeValues = make(map[string]string)
|
||||
t.TaskEnv = make(map[string]string)
|
||||
|
||||
// Build the meta with the following precedence: task, task group, job.
|
||||
for _, meta := range []map[string]string{t.JobMeta, t.TaskGroupMeta, t.TaskMeta} {
|
||||
for k, v := range meta {
|
||||
t.TaskEnv[fmt.Sprintf("%s%s", MetaPrefix, strings.ToUpper(k))] = v
|
||||
}
|
||||
}
|
||||
|
||||
// Build the ports
|
||||
for _, network := range t.Networks {
|
||||
for label, value := range network.MapLabelToValues(nil) {
|
||||
t.TaskEnv[fmt.Sprintf("%s%s", IpPrefix, label)] = network.IP
|
||||
t.TaskEnv[fmt.Sprintf("%s%s", HostPortPrefix, label)] = strconv.Itoa(value)
|
||||
if forwardedPort, ok := t.PortMap[label]; ok {
|
||||
value = forwardedPort
|
||||
}
|
||||
t.TaskEnv[fmt.Sprintf("%s%s", PortPrefix, label)] = fmt.Sprintf("%d", value)
|
||||
IPPort := fmt.Sprintf("%s:%d", network.IP, value)
|
||||
t.TaskEnv[fmt.Sprintf("%s%s", AddrPrefix, label)] = IPPort
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
// Build the directories
|
||||
if t.AllocDir != "" {
|
||||
t.TaskEnv[AllocDir] = t.AllocDir
|
||||
}
|
||||
if t.TaskDir != "" {
|
||||
t.TaskEnv[TaskLocalDir] = t.TaskDir
|
||||
}
|
||||
|
||||
// Build the resource limits
|
||||
if t.MemLimit != 0 {
|
||||
t.TaskEnv[MemLimit] = strconv.Itoa(t.MemLimit)
|
||||
}
|
||||
if t.CpuLimit != 0 {
|
||||
t.TaskEnv[CpuLimit] = strconv.Itoa(t.CpuLimit)
|
||||
}
|
||||
|
||||
// Build the tasks ids
|
||||
if t.AllocId != "" {
|
||||
t.TaskEnv[AllocID] = t.AllocId
|
||||
}
|
||||
if t.AllocName != "" {
|
||||
t.TaskEnv[AllocName] = t.AllocName
|
||||
}
|
||||
if t.AllocIndex != -1 {
|
||||
t.TaskEnv[AllocIndex] = strconv.Itoa(t.AllocIndex)
|
||||
}
|
||||
if t.TaskName != "" {
|
||||
t.TaskEnv[TaskName] = t.TaskName
|
||||
}
|
||||
|
||||
// Build the node
|
||||
if t.Node != nil {
|
||||
// Set up the node values.
|
||||
t.NodeValues[nodeIdKey] = t.Node.ID
|
||||
t.NodeValues[nodeDcKey] = t.Node.Datacenter
|
||||
t.NodeValues[nodeNameKey] = t.Node.Name
|
||||
t.NodeValues[nodeClassKey] = t.Node.NodeClass
|
||||
|
||||
// Set up the attributes.
|
||||
for k, v := range t.Node.Attributes {
|
||||
t.NodeValues[fmt.Sprintf("%s%s", nodeAttributePrefix, k)] = v
|
||||
}
|
||||
|
||||
// Set up the meta.
|
||||
for k, v := range t.Node.Meta {
|
||||
t.NodeValues[fmt.Sprintf("%s%s", nodeMetaPrefix, k)] = v
|
||||
}
|
||||
}
|
||||
|
||||
// Interpret the environment variables
|
||||
interpreted := make(map[string]string, len(t.Env))
|
||||
for k, v := range t.Env {
|
||||
interpreted[k] = hargs.ReplaceEnv(v, t.NodeValues, t.TaskEnv)
|
||||
}
|
||||
|
||||
for k, v := range interpreted {
|
||||
t.TaskEnv[k] = v
|
||||
}
|
||||
|
||||
return t
|
||||
}
|
||||
|
||||
// EnvList returns a list of strings with NAME=value pairs.
|
||||
func (t *TaskEnvironment) EnvList() []string {
|
||||
env := []string{}
|
||||
for k, v := range t.TaskEnv {
|
||||
env = append(env, fmt.Sprintf("%s=%s", k, v))
|
||||
}
|
||||
|
||||
return env
|
||||
}
|
||||
|
||||
// EnvMap returns a copy of the tasks environment variables.
|
||||
func (t *TaskEnvironment) EnvMap() map[string]string {
|
||||
m := make(map[string]string, len(t.TaskEnv))
|
||||
for k, v := range t.TaskEnv {
|
||||
m[k] = v
|
||||
}
|
||||
|
||||
return m
|
||||
}
|
||||
|
||||
// Builder methods to build the TaskEnvironment
|
||||
func (t *TaskEnvironment) SetAllocDir(dir string) *TaskEnvironment {
|
||||
t.AllocDir = dir
|
||||
return t
|
||||
}
|
||||
|
||||
func (t *TaskEnvironment) ClearAllocDir() *TaskEnvironment {
|
||||
t.AllocDir = ""
|
||||
return t
|
||||
}
|
||||
|
||||
func (t *TaskEnvironment) SetTaskLocalDir(dir string) *TaskEnvironment {
|
||||
t.TaskDir = dir
|
||||
return t
|
||||
}
|
||||
|
||||
func (t *TaskEnvironment) ClearTaskLocalDir() *TaskEnvironment {
|
||||
t.TaskDir = ""
|
||||
return t
|
||||
}
|
||||
|
||||
func (t *TaskEnvironment) SetMemLimit(limit int) *TaskEnvironment {
|
||||
t.MemLimit = limit
|
||||
return t
|
||||
}
|
||||
|
||||
func (t *TaskEnvironment) ClearMemLimit() *TaskEnvironment {
|
||||
t.MemLimit = 0
|
||||
return t
|
||||
}
|
||||
|
||||
func (t *TaskEnvironment) SetCpuLimit(limit int) *TaskEnvironment {
|
||||
t.CpuLimit = limit
|
||||
return t
|
||||
}
|
||||
|
||||
func (t *TaskEnvironment) ClearCpuLimit() *TaskEnvironment {
|
||||
t.CpuLimit = 0
|
||||
return t
|
||||
}
|
||||
|
||||
func (t *TaskEnvironment) SetNetworks(networks []*structs.NetworkResource) *TaskEnvironment {
|
||||
t.Networks = networks
|
||||
return t
|
||||
}
|
||||
|
||||
func (t *TaskEnvironment) clearNetworks() *TaskEnvironment {
|
||||
t.Networks = nil
|
||||
return t
|
||||
}
|
||||
|
||||
func (t *TaskEnvironment) SetPortMap(portMap map[string]int) *TaskEnvironment {
|
||||
t.PortMap = portMap
|
||||
return t
|
||||
}
|
||||
|
||||
func (t *TaskEnvironment) clearPortMap() *TaskEnvironment {
|
||||
t.PortMap = nil
|
||||
return t
|
||||
}
|
||||
|
||||
// Takes a map of meta values to be passed to the task. The keys are capatilized
|
||||
// when the environent variable is set.
|
||||
func (t *TaskEnvironment) SetTaskMeta(m map[string]string) *TaskEnvironment {
|
||||
t.TaskMeta = m
|
||||
return t
|
||||
}
|
||||
|
||||
func (t *TaskEnvironment) ClearTaskMeta() *TaskEnvironment {
|
||||
t.TaskMeta = nil
|
||||
return t
|
||||
}
|
||||
|
||||
func (t *TaskEnvironment) SetTaskGroupMeta(m map[string]string) *TaskEnvironment {
|
||||
t.TaskGroupMeta = m
|
||||
return t
|
||||
}
|
||||
|
||||
func (t *TaskEnvironment) ClearTaskGroupMeta() *TaskEnvironment {
|
||||
t.TaskGroupMeta = nil
|
||||
return t
|
||||
}
|
||||
|
||||
func (t *TaskEnvironment) SetJobMeta(m map[string]string) *TaskEnvironment {
|
||||
t.JobMeta = m
|
||||
return t
|
||||
}
|
||||
|
||||
func (t *TaskEnvironment) ClearJobMeta() *TaskEnvironment {
|
||||
t.JobMeta = nil
|
||||
return t
|
||||
}
|
||||
|
||||
func (t *TaskEnvironment) SetEnvvars(m map[string]string) *TaskEnvironment {
|
||||
t.Env = m
|
||||
return t
|
||||
}
|
||||
|
||||
// Appends the given environment variables.
|
||||
func (t *TaskEnvironment) AppendEnvvars(m map[string]string) *TaskEnvironment {
|
||||
if t.Env == nil {
|
||||
t.Env = make(map[string]string, len(m))
|
||||
}
|
||||
|
||||
for k, v := range m {
|
||||
t.Env[k] = v
|
||||
}
|
||||
return t
|
||||
}
|
||||
|
||||
// AppendHostEnvvars adds the host environment variables to the tasks. The
|
||||
// filter parameter can be use to filter host environment from entering the
|
||||
// tasks.
|
||||
func (t *TaskEnvironment) AppendHostEnvvars(filter []string) *TaskEnvironment {
|
||||
hostEnv := os.Environ()
|
||||
if t.Env == nil {
|
||||
t.Env = make(map[string]string, len(hostEnv))
|
||||
}
|
||||
|
||||
// Index the filtered environment variables.
|
||||
index := make(map[string]struct{}, len(filter))
|
||||
for _, f := range filter {
|
||||
index[f] = struct{}{}
|
||||
}
|
||||
|
||||
for _, e := range hostEnv {
|
||||
parts := strings.SplitN(e, "=", 2)
|
||||
key, value := parts[0], parts[1]
|
||||
|
||||
// Skip filtered environment variables
|
||||
if _, filtered := index[key]; filtered {
|
||||
continue
|
||||
}
|
||||
|
||||
// Don't override the tasks environment variables.
|
||||
if _, existing := t.Env[key]; !existing {
|
||||
t.Env[key] = value
|
||||
}
|
||||
}
|
||||
|
||||
return t
|
||||
}
|
||||
|
||||
func (t *TaskEnvironment) ClearEnvvars() *TaskEnvironment {
|
||||
t.Env = nil
|
||||
return t
|
||||
}
|
||||
|
||||
// Helper method for setting all fields from an allocation.
|
||||
func (t *TaskEnvironment) SetAlloc(alloc *structs.Allocation) *TaskEnvironment {
|
||||
t.AllocId = alloc.ID
|
||||
t.AllocName = alloc.Name
|
||||
t.AllocIndex = alloc.Index()
|
||||
return t
|
||||
}
|
||||
|
||||
// Helper method for clearing all fields from an allocation.
|
||||
func (t *TaskEnvironment) ClearAlloc(alloc *structs.Allocation) *TaskEnvironment {
|
||||
return t.ClearAllocId().ClearAllocName().ClearAllocIndex()
|
||||
}
|
||||
|
||||
func (t *TaskEnvironment) SetAllocIndex(index int) *TaskEnvironment {
|
||||
t.AllocIndex = index
|
||||
return t
|
||||
}
|
||||
|
||||
func (t *TaskEnvironment) ClearAllocIndex() *TaskEnvironment {
|
||||
t.AllocIndex = -1
|
||||
return t
|
||||
}
|
||||
|
||||
func (t *TaskEnvironment) SetAllocId(id string) *TaskEnvironment {
|
||||
t.AllocId = id
|
||||
return t
|
||||
}
|
||||
|
||||
func (t *TaskEnvironment) ClearAllocId() *TaskEnvironment {
|
||||
t.AllocId = ""
|
||||
return t
|
||||
}
|
||||
|
||||
func (t *TaskEnvironment) SetAllocName(name string) *TaskEnvironment {
|
||||
t.AllocName = name
|
||||
return t
|
||||
}
|
||||
|
||||
func (t *TaskEnvironment) ClearAllocName() *TaskEnvironment {
|
||||
t.AllocName = ""
|
||||
return t
|
||||
}
|
||||
|
||||
func (t *TaskEnvironment) SetTaskName(name string) *TaskEnvironment {
|
||||
t.TaskName = name
|
||||
return t
|
||||
}
|
||||
|
||||
func (t *TaskEnvironment) ClearTaskName() *TaskEnvironment {
|
||||
t.TaskName = ""
|
||||
return t
|
||||
}
|
|
@ -1,319 +0,0 @@
|
|||
package driver
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"log"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/go-multierror"
|
||||
"github.com/hashicorp/go-plugin"
|
||||
"github.com/hashicorp/nomad/client/allocdir"
|
||||
"github.com/hashicorp/nomad/client/config"
|
||||
"github.com/hashicorp/nomad/client/driver/executor"
|
||||
dstructs "github.com/hashicorp/nomad/client/driver/structs"
|
||||
cstructs "github.com/hashicorp/nomad/client/structs"
|
||||
"github.com/hashicorp/nomad/helper/discover"
|
||||
"github.com/hashicorp/nomad/helper/fields"
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
"github.com/mitchellh/mapstructure"
|
||||
)
|
||||
|
||||
const (
|
||||
// The key populated in Node Attributes to indicate the presence of the Exec
|
||||
// driver
|
||||
execDriverAttr = "driver.exec"
|
||||
)
|
||||
|
||||
// ExecDriver fork/execs tasks using as many of the underlying OS's isolation
|
||||
// features.
|
||||
type ExecDriver struct {
|
||||
DriverContext
|
||||
}
|
||||
|
||||
type ExecDriverConfig struct {
|
||||
Command string `mapstructure:"command"`
|
||||
Args []string `mapstructure:"args"`
|
||||
}
|
||||
|
||||
// execHandle is returned from Start/Open as a handle to the PID
|
||||
type execHandle struct {
|
||||
pluginClient *plugin.Client
|
||||
executor executor.Executor
|
||||
isolationConfig *dstructs.IsolationConfig
|
||||
userPid int
|
||||
allocDir *allocdir.AllocDir
|
||||
killTimeout time.Duration
|
||||
maxKillTimeout time.Duration
|
||||
logger *log.Logger
|
||||
waitCh chan *dstructs.WaitResult
|
||||
doneCh chan struct{}
|
||||
version string
|
||||
}
|
||||
|
||||
// NewExecDriver is used to create a new exec driver
|
||||
func NewExecDriver(ctx *DriverContext) Driver {
|
||||
return &ExecDriver{DriverContext: *ctx}
|
||||
}
|
||||
|
||||
// Validate is used to validate the driver configuration
|
||||
func (d *ExecDriver) Validate(config map[string]interface{}) error {
|
||||
fd := &fields.FieldData{
|
||||
Raw: config,
|
||||
Schema: map[string]*fields.FieldSchema{
|
||||
"command": &fields.FieldSchema{
|
||||
Type: fields.TypeString,
|
||||
Required: true,
|
||||
},
|
||||
"args": &fields.FieldSchema{
|
||||
Type: fields.TypeArray,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
if err := fd.Validate(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (d *ExecDriver) Periodic() (bool, time.Duration) {
|
||||
return true, 15 * time.Second
|
||||
}
|
||||
|
||||
func (d *ExecDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle, error) {
|
||||
var driverConfig ExecDriverConfig
|
||||
if err := mapstructure.WeakDecode(task.Config, &driverConfig); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Get the command to be ran
|
||||
command := driverConfig.Command
|
||||
if err := validateCommand(command, "args"); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Set the host environment variables.
|
||||
filter := strings.Split(d.config.ReadDefault("env.blacklist", config.DefaultEnvBlacklist), ",")
|
||||
d.taskEnv.AppendHostEnvvars(filter)
|
||||
|
||||
// Get the task directory for storing the executor logs.
|
||||
taskDir, ok := ctx.AllocDir.TaskDirs[d.DriverContext.taskName]
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("Could not find task directory for task: %v", d.DriverContext.taskName)
|
||||
}
|
||||
|
||||
bin, err := discover.NomadExecutable()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to find the nomad binary: %v", err)
|
||||
}
|
||||
pluginLogFile := filepath.Join(taskDir, fmt.Sprintf("%s-executor.out", task.Name))
|
||||
pluginConfig := &plugin.ClientConfig{
|
||||
Cmd: exec.Command(bin, "executor", pluginLogFile),
|
||||
}
|
||||
|
||||
exec, pluginClient, err := createExecutor(pluginConfig, d.config.LogOutput, d.config)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
executorCtx := &executor.ExecutorContext{
|
||||
TaskEnv: d.taskEnv,
|
||||
Driver: "exec",
|
||||
AllocDir: ctx.AllocDir,
|
||||
AllocID: ctx.AllocID,
|
||||
ChrootEnv: d.config.ChrootEnv,
|
||||
Task: task,
|
||||
}
|
||||
|
||||
ps, err := exec.LaunchCmd(&executor.ExecCommand{
|
||||
Cmd: command,
|
||||
Args: driverConfig.Args,
|
||||
FSIsolation: true,
|
||||
ResourceLimits: true,
|
||||
User: getExecutorUser(task),
|
||||
}, executorCtx)
|
||||
if err != nil {
|
||||
pluginClient.Kill()
|
||||
return nil, err
|
||||
}
|
||||
d.logger.Printf("[DEBUG] driver.exec: started process via plugin with pid: %v", ps.Pid)
|
||||
|
||||
// Return a driver handle
|
||||
maxKill := d.DriverContext.config.MaxKillTimeout
|
||||
h := &execHandle{
|
||||
pluginClient: pluginClient,
|
||||
userPid: ps.Pid,
|
||||
executor: exec,
|
||||
allocDir: ctx.AllocDir,
|
||||
isolationConfig: ps.IsolationConfig,
|
||||
killTimeout: GetKillTimeout(task.KillTimeout, maxKill),
|
||||
maxKillTimeout: maxKill,
|
||||
logger: d.logger,
|
||||
version: d.config.Version,
|
||||
doneCh: make(chan struct{}),
|
||||
waitCh: make(chan *dstructs.WaitResult, 1),
|
||||
}
|
||||
if err := exec.SyncServices(consulContext(d.config, "")); err != nil {
|
||||
d.logger.Printf("[ERR] driver.exec: error registering services with consul for task: %q: %v", task.Name, err)
|
||||
}
|
||||
go h.run()
|
||||
return h, nil
|
||||
}
|
||||
|
||||
type execId struct {
|
||||
Version string
|
||||
KillTimeout time.Duration
|
||||
MaxKillTimeout time.Duration
|
||||
UserPid int
|
||||
TaskDir string
|
||||
AllocDir *allocdir.AllocDir
|
||||
IsolationConfig *dstructs.IsolationConfig
|
||||
PluginConfig *PluginReattachConfig
|
||||
}
|
||||
|
||||
func (d *ExecDriver) Open(ctx *ExecContext, handleID string) (DriverHandle, error) {
|
||||
id := &execId{}
|
||||
if err := json.Unmarshal([]byte(handleID), id); err != nil {
|
||||
return nil, fmt.Errorf("Failed to parse handle '%s': %v", handleID, err)
|
||||
}
|
||||
|
||||
pluginConfig := &plugin.ClientConfig{
|
||||
Reattach: id.PluginConfig.PluginConfig(),
|
||||
}
|
||||
exec, client, err := createExecutor(pluginConfig, d.config.LogOutput, d.config)
|
||||
if err != nil {
|
||||
merrs := new(multierror.Error)
|
||||
merrs.Errors = append(merrs.Errors, err)
|
||||
d.logger.Println("[ERR] driver.exec: error connecting to plugin so destroying plugin pid and user pid")
|
||||
if e := destroyPlugin(id.PluginConfig.Pid, id.UserPid); e != nil {
|
||||
merrs.Errors = append(merrs.Errors, fmt.Errorf("error destroying plugin and userpid: %v", e))
|
||||
}
|
||||
if id.IsolationConfig != nil {
|
||||
ePid := pluginConfig.Reattach.Pid
|
||||
if e := executor.ClientCleanup(id.IsolationConfig, ePid); e != nil {
|
||||
merrs.Errors = append(merrs.Errors, fmt.Errorf("destroying cgroup failed: %v", e))
|
||||
}
|
||||
}
|
||||
if e := ctx.AllocDir.UnmountAll(); e != nil {
|
||||
merrs.Errors = append(merrs.Errors, e)
|
||||
}
|
||||
return nil, fmt.Errorf("error connecting to plugin: %v", merrs.ErrorOrNil())
|
||||
}
|
||||
|
||||
ver, _ := exec.Version()
|
||||
d.logger.Printf("[DEBUG] driver.exec : version of executor: %v", ver.Version)
|
||||
// Return a driver handle
|
||||
h := &execHandle{
|
||||
pluginClient: client,
|
||||
executor: exec,
|
||||
userPid: id.UserPid,
|
||||
allocDir: id.AllocDir,
|
||||
isolationConfig: id.IsolationConfig,
|
||||
logger: d.logger,
|
||||
version: id.Version,
|
||||
killTimeout: id.KillTimeout,
|
||||
maxKillTimeout: id.MaxKillTimeout,
|
||||
doneCh: make(chan struct{}),
|
||||
waitCh: make(chan *dstructs.WaitResult, 1),
|
||||
}
|
||||
if err := exec.SyncServices(consulContext(d.config, "")); err != nil {
|
||||
d.logger.Printf("[ERR] driver.exec: error registering services with consul: %v", err)
|
||||
}
|
||||
go h.run()
|
||||
return h, nil
|
||||
}
|
||||
|
||||
func (h *execHandle) ID() string {
|
||||
id := execId{
|
||||
Version: h.version,
|
||||
KillTimeout: h.killTimeout,
|
||||
MaxKillTimeout: h.maxKillTimeout,
|
||||
PluginConfig: NewPluginReattachConfig(h.pluginClient.ReattachConfig()),
|
||||
UserPid: h.userPid,
|
||||
AllocDir: h.allocDir,
|
||||
IsolationConfig: h.isolationConfig,
|
||||
}
|
||||
|
||||
data, err := json.Marshal(id)
|
||||
if err != nil {
|
||||
h.logger.Printf("[ERR] driver.exec: failed to marshal ID to JSON: %s", err)
|
||||
}
|
||||
return string(data)
|
||||
}
|
||||
|
||||
func (h *execHandle) WaitCh() chan *dstructs.WaitResult {
|
||||
return h.waitCh
|
||||
}
|
||||
|
||||
func (h *execHandle) Update(task *structs.Task) error {
|
||||
// Store the updated kill timeout.
|
||||
h.killTimeout = GetKillTimeout(task.KillTimeout, h.maxKillTimeout)
|
||||
h.executor.UpdateTask(task)
|
||||
|
||||
// Update is not possible
|
||||
return nil
|
||||
}
|
||||
|
||||
func (h *execHandle) Kill() error {
|
||||
if err := h.executor.ShutDown(); err != nil {
|
||||
if h.pluginClient.Exited() {
|
||||
return nil
|
||||
}
|
||||
return fmt.Errorf("executor Shutdown failed: %v", err)
|
||||
}
|
||||
|
||||
select {
|
||||
case <-h.doneCh:
|
||||
return nil
|
||||
case <-time.After(h.killTimeout):
|
||||
if h.pluginClient.Exited() {
|
||||
return nil
|
||||
}
|
||||
if err := h.executor.Exit(); err != nil {
|
||||
return fmt.Errorf("executor Exit failed: %v", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
func (h *execHandle) Stats() (*cstructs.TaskResourceUsage, error) {
|
||||
return h.executor.Stats()
|
||||
}
|
||||
|
||||
func (h *execHandle) run() {
|
||||
ps, err := h.executor.Wait()
|
||||
close(h.doneCh)
|
||||
|
||||
// If the exitcode is 0 and we had an error that means the plugin didn't
|
||||
// connect and doesn't know the state of the user process so we are killing
|
||||
// the user process so that when we create a new executor on restarting the
|
||||
// new user process doesn't have collisions with resources that the older
|
||||
// user pid might be holding onto.
|
||||
if ps.ExitCode == 0 && err != nil {
|
||||
if h.isolationConfig != nil {
|
||||
ePid := h.pluginClient.ReattachConfig().Pid
|
||||
if e := executor.ClientCleanup(h.isolationConfig, ePid); e != nil {
|
||||
h.logger.Printf("[ERR] driver.exec: destroying resource container failed: %v", e)
|
||||
}
|
||||
}
|
||||
if e := h.allocDir.UnmountAll(); e != nil {
|
||||
h.logger.Printf("[ERR] driver.exec: unmounting dev,proc and alloc dirs failed: %v", e)
|
||||
}
|
||||
}
|
||||
h.waitCh <- dstructs.NewWaitResult(ps.ExitCode, ps.Signal, err)
|
||||
close(h.waitCh)
|
||||
// Remove services
|
||||
if err := h.executor.DeregisterServices(); err != nil {
|
||||
h.logger.Printf("[ERR] driver.exec: failed to deregister services: %v", err)
|
||||
}
|
||||
|
||||
if err := h.executor.Exit(); err != nil {
|
||||
h.logger.Printf("[ERR] driver.exec: error destroying executor: %v", err)
|
||||
}
|
||||
h.pluginClient.Kill()
|
||||
}
|
|
@ -1,12 +0,0 @@
|
|||
//+build darwin dragonfly freebsd netbsd openbsd solaris windows
|
||||
|
||||
package driver
|
||||
|
||||
import (
|
||||
"github.com/hashicorp/nomad/client/config"
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
)
|
||||
|
||||
func (d *ExecDriver) Fingerprint(cfg *config.Config, node *structs.Node) (bool, error) {
|
||||
return false, nil
|
||||
}
|
|
@ -1,34 +0,0 @@
|
|||
package driver
|
||||
|
||||
import (
|
||||
"github.com/hashicorp/nomad/client/config"
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
func (d *ExecDriver) Fingerprint(cfg *config.Config, node *structs.Node) (bool, error) {
|
||||
// Get the current status so that we can log any debug messages only if the
|
||||
// state changes
|
||||
_, currentlyEnabled := node.Attributes[execDriverAttr]
|
||||
|
||||
// Only enable if cgroups are available and we are root
|
||||
if _, ok := node.Attributes["unique.cgroup.mountpoint"]; !ok {
|
||||
if currentlyEnabled {
|
||||
d.logger.Printf("[DEBUG] driver.exec: cgroups unavailable, disabling")
|
||||
}
|
||||
delete(node.Attributes, execDriverAttr)
|
||||
return false, nil
|
||||
} else if unix.Geteuid() != 0 {
|
||||
if currentlyEnabled {
|
||||
d.logger.Printf("[DEBUG] driver.exec: must run as root user, disabling")
|
||||
}
|
||||
delete(node.Attributes, execDriverAttr)
|
||||
return false, nil
|
||||
}
|
||||
|
||||
if !currentlyEnabled {
|
||||
d.logger.Printf("[DEBUG] driver.exec: exec driver is enabled")
|
||||
}
|
||||
node.Attributes[execDriverAttr] = "1"
|
||||
return true, nil
|
||||
}
|
|
@ -1,206 +0,0 @@
|
|||
package executor
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log"
|
||||
"os/exec"
|
||||
"sync"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"github.com/armon/circbuf"
|
||||
docker "github.com/fsouza/go-dockerclient"
|
||||
cstructs "github.com/hashicorp/nomad/client/driver/structs"
|
||||
)
|
||||
|
||||
var (
|
||||
// We store the client globally to cache the connection to the docker daemon.
|
||||
createClient sync.Once
|
||||
client *docker.Client
|
||||
)
|
||||
|
||||
const (
|
||||
// The default check timeout
|
||||
defaultCheckTimeout = 30 * time.Second
|
||||
)
|
||||
|
||||
// DockerScriptCheck runs nagios compatible scripts in a docker container and
|
||||
// provides the check result
|
||||
type DockerScriptCheck struct {
|
||||
id string // id of the check
|
||||
interval time.Duration // interval of the check
|
||||
timeout time.Duration // timeout of the check
|
||||
containerID string // container id in which the check will be invoked
|
||||
logger *log.Logger
|
||||
cmd string // check command
|
||||
args []string // check command arguments
|
||||
|
||||
dockerEndpoint string // docker endpoint
|
||||
tlsCert string // path to tls certificate
|
||||
tlsCa string // path to tls ca
|
||||
tlsKey string // path to tls key
|
||||
}
|
||||
|
||||
// dockerClient creates the client to interact with the docker daemon
|
||||
func (d *DockerScriptCheck) dockerClient() (*docker.Client, error) {
|
||||
if client != nil {
|
||||
return client, nil
|
||||
}
|
||||
|
||||
var err error
|
||||
createClient.Do(func() {
|
||||
if d.dockerEndpoint != "" {
|
||||
if d.tlsCert+d.tlsKey+d.tlsCa != "" {
|
||||
d.logger.Printf("[DEBUG] executor.checks: using TLS client connection to %s", d.dockerEndpoint)
|
||||
client, err = docker.NewTLSClient(d.dockerEndpoint, d.tlsCert, d.tlsKey, d.tlsCa)
|
||||
} else {
|
||||
d.logger.Printf("[DEBUG] executor.checks: using standard client connection to %s", d.dockerEndpoint)
|
||||
client, err = docker.NewClient(d.dockerEndpoint)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
d.logger.Println("[DEBUG] executor.checks: using client connection initialized from environment")
|
||||
client, err = docker.NewClientFromEnv()
|
||||
})
|
||||
return client, err
|
||||
}
|
||||
|
||||
// Run runs a script check inside a docker container
|
||||
func (d *DockerScriptCheck) Run() *cstructs.CheckResult {
|
||||
var (
|
||||
exec *docker.Exec
|
||||
err error
|
||||
execRes *docker.ExecInspect
|
||||
time = time.Now()
|
||||
)
|
||||
|
||||
if client, err = d.dockerClient(); err != nil {
|
||||
return &cstructs.CheckResult{Err: err}
|
||||
}
|
||||
client = client
|
||||
execOpts := docker.CreateExecOptions{
|
||||
AttachStdin: false,
|
||||
AttachStdout: true,
|
||||
AttachStderr: true,
|
||||
Tty: false,
|
||||
Cmd: append([]string{d.cmd}, d.args...),
|
||||
Container: d.containerID,
|
||||
}
|
||||
if exec, err = client.CreateExec(execOpts); err != nil {
|
||||
return &cstructs.CheckResult{Err: err}
|
||||
}
|
||||
|
||||
output, _ := circbuf.NewBuffer(int64(cstructs.CheckBufSize))
|
||||
startOpts := docker.StartExecOptions{
|
||||
Detach: false,
|
||||
Tty: false,
|
||||
OutputStream: output,
|
||||
ErrorStream: output,
|
||||
}
|
||||
|
||||
if err = client.StartExec(exec.ID, startOpts); err != nil {
|
||||
return &cstructs.CheckResult{Err: err}
|
||||
}
|
||||
if execRes, err = client.InspectExec(exec.ID); err != nil {
|
||||
return &cstructs.CheckResult{Err: err}
|
||||
}
|
||||
return &cstructs.CheckResult{
|
||||
ExitCode: execRes.ExitCode,
|
||||
Output: string(output.Bytes()),
|
||||
Timestamp: time,
|
||||
}
|
||||
}
|
||||
|
||||
// ID returns the check id
|
||||
func (d *DockerScriptCheck) ID() string {
|
||||
return d.id
|
||||
}
|
||||
|
||||
// Interval returns the interval at which the check has to run
|
||||
func (d *DockerScriptCheck) Interval() time.Duration {
|
||||
return d.interval
|
||||
}
|
||||
|
||||
// Timeout returns the duration after which a check is timed out.
|
||||
func (d *DockerScriptCheck) Timeout() time.Duration {
|
||||
if d.timeout == 0 {
|
||||
return defaultCheckTimeout
|
||||
}
|
||||
return d.timeout
|
||||
}
|
||||
|
||||
// ExecScriptCheck runs a nagios compatible script and returns the check result
|
||||
type ExecScriptCheck struct {
|
||||
id string // id of the script check
|
||||
interval time.Duration // interval at which the check is invoked
|
||||
timeout time.Duration // timeout duration of the check
|
||||
cmd string // command of the check
|
||||
args []string // args passed to the check
|
||||
taskDir string // the root directory of the check
|
||||
|
||||
FSIsolation bool // indicates whether the check has to be run within a chroot
|
||||
}
|
||||
|
||||
// Run runs an exec script check
|
||||
func (e *ExecScriptCheck) Run() *cstructs.CheckResult {
|
||||
buf, _ := circbuf.NewBuffer(int64(cstructs.CheckBufSize))
|
||||
cmd := exec.Command(e.cmd, e.args...)
|
||||
cmd.Stdout = buf
|
||||
cmd.Stderr = buf
|
||||
e.setChroot(cmd)
|
||||
ts := time.Now()
|
||||
if err := cmd.Start(); err != nil {
|
||||
return &cstructs.CheckResult{Err: err}
|
||||
}
|
||||
errCh := make(chan error, 2)
|
||||
go func() {
|
||||
errCh <- cmd.Wait()
|
||||
}()
|
||||
for {
|
||||
select {
|
||||
case err := <-errCh:
|
||||
endTime := time.Now()
|
||||
if err == nil {
|
||||
return &cstructs.CheckResult{
|
||||
ExitCode: 0,
|
||||
Output: string(buf.Bytes()),
|
||||
Timestamp: ts,
|
||||
}
|
||||
}
|
||||
exitCode := 1
|
||||
if exitErr, ok := err.(*exec.ExitError); ok {
|
||||
if status, ok := exitErr.Sys().(syscall.WaitStatus); ok {
|
||||
exitCode = status.ExitStatus()
|
||||
}
|
||||
}
|
||||
return &cstructs.CheckResult{
|
||||
ExitCode: exitCode,
|
||||
Output: string(buf.Bytes()),
|
||||
Timestamp: ts,
|
||||
Duration: endTime.Sub(ts),
|
||||
}
|
||||
case <-time.After(e.Timeout()):
|
||||
errCh <- fmt.Errorf("timed out after waiting 30s")
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// ID returns the check id
|
||||
func (e *ExecScriptCheck) ID() string {
|
||||
return e.id
|
||||
}
|
||||
|
||||
// Interval returns the interval at which the check has to run
|
||||
func (e *ExecScriptCheck) Interval() time.Duration {
|
||||
return e.interval
|
||||
}
|
||||
|
||||
// Timeout returns the duration after which a check is timed out.
|
||||
func (e *ExecScriptCheck) Timeout() time.Duration {
|
||||
if e.timeout == 0 {
|
||||
return defaultCheckTimeout
|
||||
}
|
||||
return e.timeout
|
||||
}
|
|
@ -1,18 +0,0 @@
|
|||
// +build darwin dragonfly freebsd linux netbsd openbsd solaris
|
||||
|
||||
package executor
|
||||
|
||||
import (
|
||||
"os/exec"
|
||||
"syscall"
|
||||
)
|
||||
|
||||
func (e *ExecScriptCheck) setChroot(cmd *exec.Cmd) {
|
||||
if e.FSIsolation {
|
||||
if cmd.SysProcAttr == nil {
|
||||
cmd.SysProcAttr = &syscall.SysProcAttr{}
|
||||
}
|
||||
cmd.SysProcAttr.Chroot = e.taskDir
|
||||
}
|
||||
cmd.Dir = "/"
|
||||
}
|
|
@ -1,8 +0,0 @@
|
|||
// +build windows
|
||||
|
||||
package executor
|
||||
|
||||
import "os/exec"
|
||||
|
||||
func (e *ExecScriptCheck) setChroot(cmd *exec.Cmd) {
|
||||
}
|
|
@ -1,856 +0,0 @@
|
|||
package executor
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"log"
|
||||
"net"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/go-multierror"
|
||||
"github.com/mitchellh/go-ps"
|
||||
"github.com/shirou/gopsutil/process"
|
||||
|
||||
"github.com/hashicorp/nomad/client/allocdir"
|
||||
"github.com/hashicorp/nomad/client/driver/env"
|
||||
"github.com/hashicorp/nomad/client/driver/logging"
|
||||
"github.com/hashicorp/nomad/client/stats"
|
||||
"github.com/hashicorp/nomad/command/agent/consul"
|
||||
shelpers "github.com/hashicorp/nomad/helper/stats"
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
"github.com/hashicorp/nomad/nomad/structs/config"
|
||||
|
||||
dstructs "github.com/hashicorp/nomad/client/driver/structs"
|
||||
cstructs "github.com/hashicorp/nomad/client/structs"
|
||||
)
|
||||
|
||||
const (
|
||||
// pidScanInterval is the interval at which the executor scans the process
|
||||
// tree for finding out the pids that the executor and it's child processes
|
||||
// have forked
|
||||
pidScanInterval = 5 * time.Second
|
||||
)
|
||||
|
||||
var (
|
||||
// The statistics the basic executor exposes
|
||||
ExecutorBasicMeasuredMemStats = []string{"RSS", "Swap"}
|
||||
ExecutorBasicMeasuredCpuStats = []string{"System Mode", "User Mode", "Percent"}
|
||||
)
|
||||
|
||||
// Executor is the interface which allows a driver to launch and supervise
|
||||
// a process
|
||||
type Executor interface {
|
||||
LaunchCmd(command *ExecCommand, ctx *ExecutorContext) (*ProcessState, error)
|
||||
LaunchSyslogServer(ctx *ExecutorContext) (*SyslogServerState, error)
|
||||
Wait() (*ProcessState, error)
|
||||
ShutDown() error
|
||||
Exit() error
|
||||
UpdateLogConfig(logConfig *structs.LogConfig) error
|
||||
UpdateTask(task *structs.Task) error
|
||||
SyncServices(ctx *ConsulContext) error
|
||||
DeregisterServices() error
|
||||
Version() (*ExecutorVersion, error)
|
||||
Stats() (*cstructs.TaskResourceUsage, error)
|
||||
}
|
||||
|
||||
// ConsulContext holds context to configure the Consul client and run checks
|
||||
type ConsulContext struct {
|
||||
// ConsulConfig contains the configuration information for talking
|
||||
// with this Nomad Agent's Consul Agent.
|
||||
ConsulConfig *config.ConsulConfig
|
||||
|
||||
// ContainerID is the ID of the container
|
||||
ContainerID string
|
||||
|
||||
// TLSCert is the cert which docker client uses while interactng with the docker
|
||||
// daemon over TLS
|
||||
TLSCert string
|
||||
|
||||
// TLSCa is the CA which the docker client uses while interacting with the docker
|
||||
// daeemon over TLS
|
||||
TLSCa string
|
||||
|
||||
// TLSKey is the TLS key which the docker client uses while interacting with
|
||||
// the docker daemon
|
||||
TLSKey string
|
||||
|
||||
// DockerEndpoint is the endpoint of the docker daemon
|
||||
DockerEndpoint string
|
||||
}
|
||||
|
||||
// ExecutorContext holds context to configure the command user
|
||||
// wants to run and isolate it
|
||||
type ExecutorContext struct {
|
||||
// TaskEnv holds information about the environment of a Task
|
||||
TaskEnv *env.TaskEnvironment
|
||||
|
||||
// AllocDir is the handle to do operations on the alloc dir of
|
||||
// the task
|
||||
AllocDir *allocdir.AllocDir
|
||||
|
||||
// Task is the task whose executor is being launched
|
||||
Task *structs.Task
|
||||
|
||||
// AllocID is the allocation id to which the task belongs
|
||||
AllocID string
|
||||
|
||||
// A mapping of directories on the host OS to attempt to embed inside each
|
||||
// task's chroot.
|
||||
ChrootEnv map[string]string
|
||||
|
||||
// Driver is the name of the driver that invoked the executor
|
||||
Driver string
|
||||
|
||||
// PortUpperBound is the upper bound of the ports that we can use to start
|
||||
// the syslog server
|
||||
PortUpperBound uint
|
||||
|
||||
// PortLowerBound is the lower bound of the ports that we can use to start
|
||||
// the syslog server
|
||||
PortLowerBound uint
|
||||
}
|
||||
|
||||
// ExecCommand holds the user command, args, and other isolation related
|
||||
// settings.
|
||||
type ExecCommand struct {
|
||||
// Cmd is the command that the user wants to run.
|
||||
Cmd string
|
||||
|
||||
// Args is the args of the command that the user wants to run.
|
||||
Args []string
|
||||
|
||||
// FSIsolation determines whether the command would be run in a chroot.
|
||||
FSIsolation bool
|
||||
|
||||
// User is the user which the executor uses to run the command.
|
||||
User string
|
||||
|
||||
// ResourceLimits determines whether resource limits are enforced by the
|
||||
// executor.
|
||||
ResourceLimits bool
|
||||
}
|
||||
|
||||
// ProcessState holds information about the state of a user process.
|
||||
type ProcessState struct {
|
||||
Pid int
|
||||
ExitCode int
|
||||
Signal int
|
||||
IsolationConfig *dstructs.IsolationConfig
|
||||
Time time.Time
|
||||
}
|
||||
|
||||
// nomadPid holds a pid and it's cpu percentage calculator
|
||||
type nomadPid struct {
|
||||
pid int
|
||||
cpuStatsTotal *stats.CpuStats
|
||||
cpuStatsUser *stats.CpuStats
|
||||
cpuStatsSys *stats.CpuStats
|
||||
}
|
||||
|
||||
// SyslogServerState holds the address and islation information of a launched
|
||||
// syslog server
|
||||
type SyslogServerState struct {
|
||||
IsolationConfig *dstructs.IsolationConfig
|
||||
Addr string
|
||||
}
|
||||
|
||||
// ExecutorVersion is the version of the executor
|
||||
type ExecutorVersion struct {
|
||||
Version string
|
||||
}
|
||||
|
||||
func (v *ExecutorVersion) GoString() string {
|
||||
return v.Version
|
||||
}
|
||||
|
||||
// UniversalExecutor is an implementation of the Executor which launches and
|
||||
// supervises processes. In addition to process supervision it provides resource
|
||||
// and file system isolation
|
||||
type UniversalExecutor struct {
|
||||
cmd exec.Cmd
|
||||
ctx *ExecutorContext
|
||||
command *ExecCommand
|
||||
|
||||
pids map[int]*nomadPid
|
||||
pidLock sync.RWMutex
|
||||
taskDir string
|
||||
exitState *ProcessState
|
||||
processExited chan interface{}
|
||||
fsIsolationEnforced bool
|
||||
|
||||
lre *logging.FileRotator
|
||||
lro *logging.FileRotator
|
||||
rotatorLock sync.Mutex
|
||||
|
||||
shutdownCh chan struct{}
|
||||
|
||||
syslogServer *logging.SyslogServer
|
||||
syslogChan chan *logging.SyslogMessage
|
||||
|
||||
resConCtx resourceContainerContext
|
||||
|
||||
consulSyncer *consul.Syncer
|
||||
consulCtx *ConsulContext
|
||||
totalCpuStats *stats.CpuStats
|
||||
userCpuStats *stats.CpuStats
|
||||
systemCpuStats *stats.CpuStats
|
||||
logger *log.Logger
|
||||
}
|
||||
|
||||
// NewExecutor returns an Executor
|
||||
func NewExecutor(logger *log.Logger) Executor {
|
||||
if err := shelpers.Init(); err != nil {
|
||||
logger.Printf("[FATAL] executor: unable to initialize stats: %v", err)
|
||||
return nil
|
||||
}
|
||||
|
||||
exec := &UniversalExecutor{
|
||||
logger: logger,
|
||||
processExited: make(chan interface{}),
|
||||
totalCpuStats: stats.NewCpuStats(),
|
||||
userCpuStats: stats.NewCpuStats(),
|
||||
systemCpuStats: stats.NewCpuStats(),
|
||||
pids: make(map[int]*nomadPid),
|
||||
}
|
||||
|
||||
return exec
|
||||
}
|
||||
|
||||
// Version returns the api version of the executor
|
||||
func (e *UniversalExecutor) Version() (*ExecutorVersion, error) {
|
||||
return &ExecutorVersion{Version: "1.0.0"}, nil
|
||||
}
|
||||
|
||||
// LaunchCmd launches a process and returns it's state. It also configures an
|
||||
// applies isolation on certain platforms.
|
||||
func (e *UniversalExecutor) LaunchCmd(command *ExecCommand, ctx *ExecutorContext) (*ProcessState, error) {
|
||||
e.logger.Printf("[DEBUG] executor: launching command %v %v", command.Cmd, strings.Join(command.Args, " "))
|
||||
|
||||
e.ctx = ctx
|
||||
e.command = command
|
||||
|
||||
// setting the user of the process
|
||||
if command.User != "" {
|
||||
e.logger.Printf("[DEBUG] executor: running command as %s", command.User)
|
||||
if err := e.runAs(command.User); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
// configuring the task dir
|
||||
if err := e.configureTaskDir(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
e.ctx.TaskEnv.Build()
|
||||
// configuring the chroot, resource container, and start the plugin
|
||||
// process in the chroot.
|
||||
if err := e.configureIsolation(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// Apply ourselves into the resource container. The executor MUST be in
|
||||
// the resource container before the user task is started, otherwise we
|
||||
// are subject to a fork attack in which a process escapes isolation by
|
||||
// immediately forking.
|
||||
if err := e.applyLimits(os.Getpid()); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Setup the loggers
|
||||
if err := e.configureLoggers(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
e.cmd.Stdout = e.lro
|
||||
e.cmd.Stderr = e.lre
|
||||
|
||||
// Look up the binary path and make it executable
|
||||
absPath, err := e.lookupBin(ctx.TaskEnv.ReplaceEnv(command.Cmd))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := e.makeExecutable(absPath); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
path := absPath
|
||||
|
||||
// Determine the path to run as it may have to be relative to the chroot.
|
||||
if e.fsIsolationEnforced {
|
||||
rel, err := filepath.Rel(e.taskDir, path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
path = rel
|
||||
}
|
||||
|
||||
// Set the commands arguments
|
||||
e.cmd.Path = path
|
||||
e.cmd.Args = append([]string{e.cmd.Path}, ctx.TaskEnv.ParseAndReplace(command.Args)...)
|
||||
e.cmd.Env = ctx.TaskEnv.EnvList()
|
||||
|
||||
// Start the process
|
||||
if err := e.cmd.Start(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
go e.collectPids()
|
||||
go e.wait()
|
||||
ic := e.resConCtx.getIsolationConfig()
|
||||
return &ProcessState{Pid: e.cmd.Process.Pid, ExitCode: -1, IsolationConfig: ic, Time: time.Now()}, nil
|
||||
}
|
||||
|
||||
// configureLoggers sets up the standard out/error file rotators
|
||||
func (e *UniversalExecutor) configureLoggers() error {
|
||||
e.rotatorLock.Lock()
|
||||
defer e.rotatorLock.Unlock()
|
||||
|
||||
logFileSize := int64(e.ctx.Task.LogConfig.MaxFileSizeMB * 1024 * 1024)
|
||||
if e.lro == nil {
|
||||
lro, err := logging.NewFileRotator(e.ctx.AllocDir.LogDir(), fmt.Sprintf("%v.stdout", e.ctx.Task.Name),
|
||||
e.ctx.Task.LogConfig.MaxFiles, logFileSize, e.logger)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
e.lro = lro
|
||||
}
|
||||
|
||||
if e.lre == nil {
|
||||
lre, err := logging.NewFileRotator(e.ctx.AllocDir.LogDir(), fmt.Sprintf("%v.stderr", e.ctx.Task.Name),
|
||||
e.ctx.Task.LogConfig.MaxFiles, logFileSize, e.logger)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
e.lre = lre
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Wait waits until a process has exited and returns it's exitcode and errors
|
||||
func (e *UniversalExecutor) Wait() (*ProcessState, error) {
|
||||
<-e.processExited
|
||||
return e.exitState, nil
|
||||
}
|
||||
|
||||
// COMPAT: prior to Nomad 0.3.2, UpdateTask didn't exist.
|
||||
// UpdateLogConfig updates the log configuration
|
||||
func (e *UniversalExecutor) UpdateLogConfig(logConfig *structs.LogConfig) error {
|
||||
e.ctx.Task.LogConfig = logConfig
|
||||
if e.lro == nil {
|
||||
return fmt.Errorf("log rotator for stdout doesn't exist")
|
||||
}
|
||||
e.lro.MaxFiles = logConfig.MaxFiles
|
||||
e.lro.FileSize = int64(logConfig.MaxFileSizeMB * 1024 * 1024)
|
||||
|
||||
if e.lre == nil {
|
||||
return fmt.Errorf("log rotator for stderr doesn't exist")
|
||||
}
|
||||
e.lre.MaxFiles = logConfig.MaxFiles
|
||||
e.lre.FileSize = int64(logConfig.MaxFileSizeMB * 1024 * 1024)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (e *UniversalExecutor) UpdateTask(task *structs.Task) error {
|
||||
e.ctx.Task = task
|
||||
|
||||
// Updating Log Config
|
||||
fileSize := int64(task.LogConfig.MaxFileSizeMB * 1024 * 1024)
|
||||
e.lro.MaxFiles = task.LogConfig.MaxFiles
|
||||
e.lro.FileSize = fileSize
|
||||
e.lre.MaxFiles = task.LogConfig.MaxFiles
|
||||
e.lre.FileSize = fileSize
|
||||
|
||||
// Re-syncing task with Consul agent
|
||||
if e.consulSyncer != nil {
|
||||
e.interpolateServices(e.ctx.Task)
|
||||
domain := consul.NewExecutorDomain(e.ctx.AllocID, task.Name)
|
||||
serviceMap := generateServiceKeys(e.ctx.AllocID, task.Services)
|
||||
e.consulSyncer.SetServices(domain, serviceMap)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// generateServiceKeys takes a list of interpolated Nomad Services and returns a map
|
||||
// of ServiceKeys to Nomad Services.
|
||||
func generateServiceKeys(allocID string, services []*structs.Service) map[consul.ServiceKey]*structs.Service {
|
||||
keys := make(map[consul.ServiceKey]*structs.Service, len(services))
|
||||
for _, service := range services {
|
||||
key := consul.GenerateServiceKey(service)
|
||||
keys[key] = service
|
||||
}
|
||||
return keys
|
||||
}
|
||||
|
||||
func (e *UniversalExecutor) wait() {
|
||||
defer close(e.processExited)
|
||||
err := e.cmd.Wait()
|
||||
ic := e.resConCtx.getIsolationConfig()
|
||||
if err == nil {
|
||||
e.exitState = &ProcessState{Pid: 0, ExitCode: 0, IsolationConfig: ic, Time: time.Now()}
|
||||
return
|
||||
}
|
||||
exitCode := 1
|
||||
var signal int
|
||||
if exitErr, ok := err.(*exec.ExitError); ok {
|
||||
if status, ok := exitErr.Sys().(syscall.WaitStatus); ok {
|
||||
exitCode = status.ExitStatus()
|
||||
if status.Signaled() {
|
||||
// bash(1) uses the lower 7 bits of a uint8
|
||||
// to indicate normal program failure (see
|
||||
// <sysexits.h>). If a process terminates due
|
||||
// to a signal, encode the signal number to
|
||||
// indicate which signal caused the process
|
||||
// to terminate. Mirror this exit code
|
||||
// encoding scheme.
|
||||
const exitSignalBase = 128
|
||||
signal = int(status.Signal())
|
||||
exitCode = exitSignalBase + signal
|
||||
}
|
||||
}
|
||||
} else {
|
||||
e.logger.Printf("[DEBUG] executor: unexpected Wait() error type: %v", err)
|
||||
}
|
||||
|
||||
e.exitState = &ProcessState{Pid: 0, ExitCode: exitCode, Signal: signal, IsolationConfig: ic, Time: time.Now()}
|
||||
}
|
||||
|
||||
var (
|
||||
// finishedErr is the error message received when trying to kill and already
|
||||
// exited process.
|
||||
finishedErr = "os: process already finished"
|
||||
)
|
||||
|
||||
// ClientCleanup is the cleanup routine that a Nomad Client uses to remove the
|
||||
// reminants of a child UniversalExecutor.
|
||||
func ClientCleanup(ic *dstructs.IsolationConfig, pid int) error {
|
||||
return clientCleanup(ic, pid)
|
||||
}
|
||||
|
||||
// Exit cleans up the alloc directory, destroys resource container and kills the
|
||||
// user process
|
||||
func (e *UniversalExecutor) Exit() error {
|
||||
var merr multierror.Error
|
||||
if e.syslogServer != nil {
|
||||
e.syslogServer.Shutdown()
|
||||
}
|
||||
e.lre.Close()
|
||||
e.lro.Close()
|
||||
|
||||
if e.consulSyncer != nil {
|
||||
e.consulSyncer.Shutdown()
|
||||
}
|
||||
|
||||
// If the executor did not launch a process, return.
|
||||
if e.command == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Prefer killing the process via the resource container.
|
||||
if e.cmd.Process != nil && !e.command.ResourceLimits {
|
||||
proc, err := os.FindProcess(e.cmd.Process.Pid)
|
||||
if err != nil {
|
||||
e.logger.Printf("[ERR] executor: can't find process with pid: %v, err: %v",
|
||||
e.cmd.Process.Pid, err)
|
||||
} else if err := proc.Kill(); err != nil && err.Error() != finishedErr {
|
||||
merr.Errors = append(merr.Errors,
|
||||
fmt.Errorf("can't kill process with pid: %v, err: %v", e.cmd.Process.Pid, err))
|
||||
}
|
||||
}
|
||||
|
||||
if e.command.ResourceLimits {
|
||||
if err := e.resConCtx.executorCleanup(); err != nil {
|
||||
merr.Errors = append(merr.Errors, err)
|
||||
}
|
||||
}
|
||||
|
||||
if e.command.FSIsolation {
|
||||
if err := e.removeChrootMounts(); err != nil {
|
||||
merr.Errors = append(merr.Errors, err)
|
||||
}
|
||||
}
|
||||
return merr.ErrorOrNil()
|
||||
}
|
||||
|
||||
// Shutdown sends an interrupt signal to the user process
|
||||
func (e *UniversalExecutor) ShutDown() error {
|
||||
if e.cmd.Process == nil {
|
||||
return fmt.Errorf("executor.shutdown error: no process found")
|
||||
}
|
||||
proc, err := os.FindProcess(e.cmd.Process.Pid)
|
||||
if err != nil {
|
||||
return fmt.Errorf("executor.shutdown failed to find process: %v", err)
|
||||
}
|
||||
if runtime.GOOS == "windows" {
|
||||
if err := proc.Kill(); err != nil && err.Error() != finishedErr {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
if err = proc.Signal(os.Interrupt); err != nil && err.Error() != finishedErr {
|
||||
return fmt.Errorf("executor.shutdown error: %v", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// SyncServices syncs the services of the task that the executor is running with
|
||||
// Consul
|
||||
func (e *UniversalExecutor) SyncServices(ctx *ConsulContext) error {
|
||||
e.logger.Printf("[INFO] executor: registering services")
|
||||
e.consulCtx = ctx
|
||||
if e.consulSyncer == nil {
|
||||
cs, err := consul.NewSyncer(ctx.ConsulConfig, e.shutdownCh, e.logger)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
e.consulSyncer = cs
|
||||
go e.consulSyncer.Run()
|
||||
}
|
||||
e.interpolateServices(e.ctx.Task)
|
||||
e.consulSyncer.SetDelegatedChecks(e.createCheckMap(), e.createCheck)
|
||||
e.consulSyncer.SetAddrFinder(e.ctx.Task.FindHostAndPortFor)
|
||||
domain := consul.NewExecutorDomain(e.ctx.AllocID, e.ctx.Task.Name)
|
||||
serviceMap := generateServiceKeys(e.ctx.AllocID, e.ctx.Task.Services)
|
||||
e.consulSyncer.SetServices(domain, serviceMap)
|
||||
return nil
|
||||
}
|
||||
|
||||
// DeregisterServices removes the services of the task that the executor is
|
||||
// running from Consul
|
||||
func (e *UniversalExecutor) DeregisterServices() error {
|
||||
e.logger.Printf("[INFO] executor: de-registering services and shutting down consul service")
|
||||
if e.consulSyncer != nil {
|
||||
return e.consulSyncer.Shutdown()
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// pidStats returns the resource usage stats per pid
|
||||
func (e *UniversalExecutor) pidStats() (map[string]*cstructs.ResourceUsage, error) {
|
||||
stats := make(map[string]*cstructs.ResourceUsage)
|
||||
e.pidLock.RLock()
|
||||
pids := make(map[int]*nomadPid, len(e.pids))
|
||||
for k, v := range e.pids {
|
||||
pids[k] = v
|
||||
}
|
||||
e.pidLock.RUnlock()
|
||||
for pid, np := range pids {
|
||||
p, err := process.NewProcess(int32(pid))
|
||||
if err != nil {
|
||||
e.logger.Printf("[DEBUG] executor: unable to create new process with pid: %v", pid)
|
||||
continue
|
||||
}
|
||||
ms := &cstructs.MemoryStats{}
|
||||
if memInfo, err := p.MemoryInfo(); err == nil {
|
||||
ms.RSS = memInfo.RSS
|
||||
ms.Swap = memInfo.Swap
|
||||
ms.Measured = ExecutorBasicMeasuredMemStats
|
||||
}
|
||||
|
||||
cs := &cstructs.CpuStats{}
|
||||
if cpuStats, err := p.Times(); err == nil {
|
||||
cs.SystemMode = np.cpuStatsSys.Percent(cpuStats.System * float64(time.Second))
|
||||
cs.UserMode = np.cpuStatsUser.Percent(cpuStats.User * float64(time.Second))
|
||||
cs.Measured = ExecutorBasicMeasuredCpuStats
|
||||
|
||||
// calculate cpu usage percent
|
||||
cs.Percent = np.cpuStatsTotal.Percent(cpuStats.Total() * float64(time.Second))
|
||||
}
|
||||
stats[strconv.Itoa(pid)] = &cstructs.ResourceUsage{MemoryStats: ms, CpuStats: cs}
|
||||
}
|
||||
|
||||
return stats, nil
|
||||
}
|
||||
|
||||
// configureTaskDir sets the task dir in the executor
|
||||
func (e *UniversalExecutor) configureTaskDir() error {
|
||||
taskDir, ok := e.ctx.AllocDir.TaskDirs[e.ctx.Task.Name]
|
||||
e.taskDir = taskDir
|
||||
if !ok {
|
||||
return fmt.Errorf("couldn't find task directory for task %v", e.ctx.Task.Name)
|
||||
}
|
||||
e.cmd.Dir = taskDir
|
||||
return nil
|
||||
}
|
||||
|
||||
// lookupBin looks for path to the binary to run by looking for the binary in
|
||||
// the following locations, in-order: task/local/, task/, based on host $PATH.
|
||||
// The return path is absolute.
|
||||
func (e *UniversalExecutor) lookupBin(bin string) (string, error) {
|
||||
// Check in the local directory
|
||||
local := filepath.Join(e.taskDir, allocdir.TaskLocal, bin)
|
||||
if _, err := os.Stat(local); err == nil {
|
||||
return local, nil
|
||||
}
|
||||
|
||||
// Check at the root of the task's directory
|
||||
root := filepath.Join(e.taskDir, bin)
|
||||
if _, err := os.Stat(root); err == nil {
|
||||
return root, nil
|
||||
}
|
||||
|
||||
// Check the $PATH
|
||||
if host, err := exec.LookPath(bin); err == nil {
|
||||
return host, nil
|
||||
}
|
||||
|
||||
return "", fmt.Errorf("binary %q could not be found", bin)
|
||||
}
|
||||
|
||||
// makeExecutable makes the given file executable for root,group,others.
|
||||
func (e *UniversalExecutor) makeExecutable(binPath string) error {
|
||||
if runtime.GOOS == "windows" {
|
||||
return nil
|
||||
}
|
||||
|
||||
fi, err := os.Stat(binPath)
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
return fmt.Errorf("binary %q does not exist", binPath)
|
||||
}
|
||||
return fmt.Errorf("specified binary is invalid: %v", err)
|
||||
}
|
||||
|
||||
// If it is not executable, make it so.
|
||||
perm := fi.Mode().Perm()
|
||||
req := os.FileMode(0555)
|
||||
if perm&req != req {
|
||||
if err := os.Chmod(binPath, perm|req); err != nil {
|
||||
return fmt.Errorf("error making %q executable: %s", binPath, err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// getFreePort returns a free port ready to be listened on between upper and
|
||||
// lower bounds
|
||||
func (e *UniversalExecutor) getListener(lowerBound uint, upperBound uint) (net.Listener, error) {
|
||||
if runtime.GOOS == "windows" {
|
||||
return e.listenerTCP(lowerBound, upperBound)
|
||||
}
|
||||
|
||||
return e.listenerUnix()
|
||||
}
|
||||
|
||||
// listenerTCP creates a TCP listener using an unused port between an upper and
|
||||
// lower bound
|
||||
func (e *UniversalExecutor) listenerTCP(lowerBound uint, upperBound uint) (net.Listener, error) {
|
||||
for i := lowerBound; i <= upperBound; i++ {
|
||||
addr, err := net.ResolveTCPAddr("tcp", fmt.Sprintf("localhost:%v", i))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
l, err := net.ListenTCP("tcp", addr)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
return l, nil
|
||||
}
|
||||
return nil, fmt.Errorf("No free port found")
|
||||
}
|
||||
|
||||
// listenerUnix creates a Unix domain socket
|
||||
func (e *UniversalExecutor) listenerUnix() (net.Listener, error) {
|
||||
f, err := ioutil.TempFile("", "plugin")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
path := f.Name()
|
||||
|
||||
if err := f.Close(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err := os.Remove(path); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return net.Listen("unix", path)
|
||||
}
|
||||
|
||||
// createCheckMap creates a map of checks that the executor will handle on it's
|
||||
// own
|
||||
func (e *UniversalExecutor) createCheckMap() map[string]struct{} {
|
||||
checks := map[string]struct{}{
|
||||
"script": struct{}{},
|
||||
}
|
||||
return checks
|
||||
}
|
||||
|
||||
// createCheck creates NomadCheck from a ServiceCheck
|
||||
func (e *UniversalExecutor) createCheck(check *structs.ServiceCheck, checkID string) (consul.Check, error) {
|
||||
if check.Type == structs.ServiceCheckScript && e.ctx.Driver == "docker" {
|
||||
return &DockerScriptCheck{
|
||||
id: checkID,
|
||||
interval: check.Interval,
|
||||
timeout: check.Timeout,
|
||||
containerID: e.consulCtx.ContainerID,
|
||||
logger: e.logger,
|
||||
cmd: check.Command,
|
||||
args: check.Args,
|
||||
}, nil
|
||||
}
|
||||
|
||||
if check.Type == structs.ServiceCheckScript && (e.ctx.Driver == "exec" ||
|
||||
e.ctx.Driver == "raw_exec" || e.ctx.Driver == "java") {
|
||||
return &ExecScriptCheck{
|
||||
id: checkID,
|
||||
interval: check.Interval,
|
||||
timeout: check.Timeout,
|
||||
cmd: check.Command,
|
||||
args: check.Args,
|
||||
taskDir: e.taskDir,
|
||||
FSIsolation: e.command.FSIsolation,
|
||||
}, nil
|
||||
|
||||
}
|
||||
return nil, fmt.Errorf("couldn't create check for %v", check.Name)
|
||||
}
|
||||
|
||||
// interpolateServices interpolates tags in a service and checks with values from the
|
||||
// task's environment.
|
||||
func (e *UniversalExecutor) interpolateServices(task *structs.Task) {
|
||||
e.ctx.TaskEnv.Build()
|
||||
for _, service := range task.Services {
|
||||
for _, check := range service.Checks {
|
||||
if check.Type == structs.ServiceCheckScript {
|
||||
check.Name = e.ctx.TaskEnv.ReplaceEnv(check.Name)
|
||||
check.Command = e.ctx.TaskEnv.ReplaceEnv(check.Command)
|
||||
check.Args = e.ctx.TaskEnv.ParseAndReplace(check.Args)
|
||||
check.Path = e.ctx.TaskEnv.ReplaceEnv(check.Path)
|
||||
check.Protocol = e.ctx.TaskEnv.ReplaceEnv(check.Protocol)
|
||||
}
|
||||
}
|
||||
service.Name = e.ctx.TaskEnv.ReplaceEnv(service.Name)
|
||||
service.Tags = e.ctx.TaskEnv.ParseAndReplace(service.Tags)
|
||||
}
|
||||
}
|
||||
|
||||
// collectPids collects the pids of the child processes that the executor is
|
||||
// running every 5 seconds
|
||||
func (e *UniversalExecutor) collectPids() {
|
||||
// Fire the timer right away when the executor starts from there on the pids
|
||||
// are collected every scan interval
|
||||
timer := time.NewTimer(0)
|
||||
defer timer.Stop()
|
||||
for {
|
||||
select {
|
||||
case <-timer.C:
|
||||
pids, err := e.getAllPids()
|
||||
if err != nil {
|
||||
e.logger.Printf("[DEBUG] executor: error collecting pids: %v", err)
|
||||
}
|
||||
e.pidLock.Lock()
|
||||
|
||||
// Adding pids which are not being tracked
|
||||
for pid, np := range pids {
|
||||
if _, ok := e.pids[pid]; !ok {
|
||||
e.pids[pid] = np
|
||||
}
|
||||
}
|
||||
// Removing pids which are no longer present
|
||||
for pid := range e.pids {
|
||||
if _, ok := pids[pid]; !ok {
|
||||
delete(e.pids, pid)
|
||||
}
|
||||
}
|
||||
e.pidLock.Unlock()
|
||||
timer.Reset(pidScanInterval)
|
||||
case <-e.processExited:
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// scanPids scans all the pids on the machine running the current executor and
|
||||
// returns the child processes of the executor.
|
||||
func (e *UniversalExecutor) scanPids(parentPid int, allPids []ps.Process) (map[int]*nomadPid, error) {
|
||||
processFamily := make(map[int]struct{})
|
||||
processFamily[parentPid] = struct{}{}
|
||||
|
||||
// A buffer for holding pids which haven't matched with any parent pid
|
||||
var pidsRemaining []ps.Process
|
||||
for {
|
||||
// flag to indicate if we have found a match
|
||||
foundNewPid := false
|
||||
|
||||
for _, pid := range allPids {
|
||||
_, childPid := processFamily[pid.PPid()]
|
||||
|
||||
// checking if the pid is a child of any of the parents
|
||||
if childPid {
|
||||
processFamily[pid.Pid()] = struct{}{}
|
||||
foundNewPid = true
|
||||
} else {
|
||||
// if it is not, then we add the pid to the buffer
|
||||
pidsRemaining = append(pidsRemaining, pid)
|
||||
}
|
||||
// scan only the pids which are left in the buffer
|
||||
allPids = pidsRemaining
|
||||
}
|
||||
|
||||
// not scanning anymore if we couldn't find a single match
|
||||
if !foundNewPid {
|
||||
break
|
||||
}
|
||||
}
|
||||
res := make(map[int]*nomadPid)
|
||||
for pid := range processFamily {
|
||||
np := nomadPid{
|
||||
pid: pid,
|
||||
cpuStatsTotal: stats.NewCpuStats(),
|
||||
cpuStatsUser: stats.NewCpuStats(),
|
||||
cpuStatsSys: stats.NewCpuStats(),
|
||||
}
|
||||
res[pid] = &np
|
||||
}
|
||||
return res, nil
|
||||
}
|
||||
|
||||
// aggregatedResourceUsage aggregates the resource usage of all the pids and
|
||||
// returns a TaskResourceUsage data point
|
||||
func (e *UniversalExecutor) aggregatedResourceUsage(pidStats map[string]*cstructs.ResourceUsage) *cstructs.TaskResourceUsage {
|
||||
ts := time.Now().UTC().UnixNano()
|
||||
var (
|
||||
systemModeCPU, userModeCPU, percent float64
|
||||
totalRSS, totalSwap uint64
|
||||
)
|
||||
|
||||
for _, pidStat := range pidStats {
|
||||
systemModeCPU += pidStat.CpuStats.SystemMode
|
||||
userModeCPU += pidStat.CpuStats.UserMode
|
||||
percent += pidStat.CpuStats.Percent
|
||||
|
||||
totalRSS += pidStat.MemoryStats.RSS
|
||||
totalSwap += pidStat.MemoryStats.Swap
|
||||
}
|
||||
|
||||
totalCPU := &cstructs.CpuStats{
|
||||
SystemMode: systemModeCPU,
|
||||
UserMode: userModeCPU,
|
||||
Percent: percent,
|
||||
Measured: ExecutorBasicMeasuredCpuStats,
|
||||
TotalTicks: e.systemCpuStats.TicksConsumed(percent),
|
||||
}
|
||||
|
||||
totalMemory := &cstructs.MemoryStats{
|
||||
RSS: totalRSS,
|
||||
Swap: totalSwap,
|
||||
Measured: ExecutorBasicMeasuredMemStats,
|
||||
}
|
||||
|
||||
resourceUsage := cstructs.ResourceUsage{
|
||||
MemoryStats: totalMemory,
|
||||
CpuStats: totalCPU,
|
||||
}
|
||||
return &cstructs.TaskResourceUsage{
|
||||
ResourceUsage: &resourceUsage,
|
||||
Timestamp: ts,
|
||||
Pids: pidStats,
|
||||
}
|
||||
}
|
|
@ -1,46 +0,0 @@
|
|||
// +build darwin dragonfly freebsd netbsd openbsd solaris windows
|
||||
|
||||
package executor
|
||||
|
||||
import (
|
||||
"os"
|
||||
|
||||
cstructs "github.com/hashicorp/nomad/client/structs"
|
||||
"github.com/mitchellh/go-ps"
|
||||
)
|
||||
|
||||
func (e *UniversalExecutor) configureChroot() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (e *UniversalExecutor) removeChrootMounts() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (e *UniversalExecutor) runAs(userid string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (e *UniversalExecutor) applyLimits(pid int) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (e *UniversalExecutor) configureIsolation() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (e *UniversalExecutor) Stats() (*cstructs.TaskResourceUsage, error) {
|
||||
pidStats, err := e.pidStats()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return e.aggregatedResourceUsage(pidStats), nil
|
||||
}
|
||||
|
||||
func (e *UniversalExecutor) getAllPids() (map[int]*nomadPid, error) {
|
||||
allProcesses, err := ps.Processes()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return e.scanPids(os.Getpid(), allProcesses)
|
||||
}
|
|
@ -1,373 +0,0 @@
|
|||
package executor
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"os/user"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/go-multierror"
|
||||
"github.com/mitchellh/go-ps"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
cgroupFs "github.com/opencontainers/runc/libcontainer/cgroups/fs"
|
||||
cgroupConfig "github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/opencontainers/runc/libcontainer/system"
|
||||
|
||||
"github.com/hashicorp/nomad/client/allocdir"
|
||||
"github.com/hashicorp/nomad/client/stats"
|
||||
cstructs "github.com/hashicorp/nomad/client/structs"
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
)
|
||||
|
||||
var (
|
||||
// A mapping of directories on the host OS to attempt to embed inside each
|
||||
// task's chroot.
|
||||
chrootEnv = map[string]string{
|
||||
"/bin": "/bin",
|
||||
"/etc": "/etc",
|
||||
"/lib": "/lib",
|
||||
"/lib32": "/lib32",
|
||||
"/lib64": "/lib64",
|
||||
"/run/resolvconf": "/run/resolvconf",
|
||||
"/sbin": "/sbin",
|
||||
"/usr": "/usr",
|
||||
}
|
||||
|
||||
// clockTicks is the clocks per second of the machine
|
||||
clockTicks = uint64(system.GetClockTicks())
|
||||
|
||||
// The statistics the executor exposes when using cgroups
|
||||
ExecutorCgroupMeasuredMemStats = []string{"RSS", "Cache", "Swap", "Max Usage", "Kernel Usage", "Kernel Max Usage"}
|
||||
ExecutorCgroupMeasuredCpuStats = []string{"System Mode", "User Mode", "Throttled Periods", "Throttled Time", "Percent"}
|
||||
)
|
||||
|
||||
// configureIsolation configures chroot and creates cgroups
|
||||
func (e *UniversalExecutor) configureIsolation() error {
|
||||
if e.command.FSIsolation {
|
||||
if err := e.configureChroot(); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if e.command.ResourceLimits {
|
||||
if err := e.configureCgroups(e.ctx.Task.Resources); err != nil {
|
||||
return fmt.Errorf("error creating cgroups: %v", err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// applyLimits puts a process in a pre-configured cgroup
|
||||
func (e *UniversalExecutor) applyLimits(pid int) error {
|
||||
if !e.command.ResourceLimits {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Entering the process in the cgroup
|
||||
manager := getCgroupManager(e.resConCtx.groups, nil)
|
||||
if err := manager.Apply(pid); err != nil {
|
||||
e.logger.Printf("[ERR] executor: error applying pid to cgroup: %v", err)
|
||||
if er := e.removeChrootMounts(); er != nil {
|
||||
e.logger.Printf("[ERR] executor: error removing chroot: %v", er)
|
||||
}
|
||||
return err
|
||||
}
|
||||
e.resConCtx.cgPaths = manager.GetPaths()
|
||||
cgConfig := cgroupConfig.Config{Cgroups: e.resConCtx.groups}
|
||||
if err := manager.Set(&cgConfig); err != nil {
|
||||
e.logger.Printf("[ERR] executor: error setting cgroup config: %v", err)
|
||||
if er := DestroyCgroup(e.resConCtx.groups, e.resConCtx.cgPaths, os.Getpid()); er != nil {
|
||||
e.logger.Printf("[ERR] executor: error destroying cgroup: %v", er)
|
||||
}
|
||||
if er := e.removeChrootMounts(); er != nil {
|
||||
e.logger.Printf("[ERR] executor: error removing chroot: %v", er)
|
||||
}
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// configureCgroups converts a Nomad Resources specification into the equivalent
|
||||
// cgroup configuration. It returns an error if the resources are invalid.
|
||||
func (e *UniversalExecutor) configureCgroups(resources *structs.Resources) error {
|
||||
e.resConCtx.groups = &cgroupConfig.Cgroup{}
|
||||
e.resConCtx.groups.Resources = &cgroupConfig.Resources{}
|
||||
cgroupName := structs.GenerateUUID()
|
||||
e.resConCtx.groups.Path = filepath.Join("/nomad", cgroupName)
|
||||
|
||||
// TODO: verify this is needed for things like network access
|
||||
e.resConCtx.groups.Resources.AllowAllDevices = true
|
||||
|
||||
if resources.MemoryMB > 0 {
|
||||
// Total amount of memory allowed to consume
|
||||
e.resConCtx.groups.Resources.Memory = int64(resources.MemoryMB * 1024 * 1024)
|
||||
// Disable swap to avoid issues on the machine
|
||||
e.resConCtx.groups.Resources.MemorySwap = int64(-1)
|
||||
}
|
||||
|
||||
if resources.CPU < 2 {
|
||||
return fmt.Errorf("resources.CPU must be equal to or greater than 2: %v", resources.CPU)
|
||||
}
|
||||
|
||||
// Set the relative CPU shares for this cgroup.
|
||||
e.resConCtx.groups.Resources.CpuShares = int64(resources.CPU)
|
||||
|
||||
if resources.IOPS != 0 {
|
||||
// Validate it is in an acceptable range.
|
||||
if resources.IOPS < 10 || resources.IOPS > 1000 {
|
||||
return fmt.Errorf("resources.IOPS must be between 10 and 1000: %d", resources.IOPS)
|
||||
}
|
||||
|
||||
e.resConCtx.groups.Resources.BlkioWeight = uint16(resources.IOPS)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Stats reports the resource utilization of the cgroup. If there is no resource
|
||||
// isolation we aggregate the resource utilization of all the pids launched by
|
||||
// the executor.
|
||||
func (e *UniversalExecutor) Stats() (*cstructs.TaskResourceUsage, error) {
|
||||
if !e.command.ResourceLimits {
|
||||
pidStats, err := e.pidStats()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return e.aggregatedResourceUsage(pidStats), nil
|
||||
}
|
||||
ts := time.Now()
|
||||
manager := getCgroupManager(e.resConCtx.groups, e.resConCtx.cgPaths)
|
||||
stats, err := manager.GetStats()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Memory Related Stats
|
||||
swap := stats.MemoryStats.SwapUsage
|
||||
maxUsage := stats.MemoryStats.Usage.MaxUsage
|
||||
rss := stats.MemoryStats.Stats["rss"]
|
||||
cache := stats.MemoryStats.Stats["cache"]
|
||||
ms := &cstructs.MemoryStats{
|
||||
RSS: rss,
|
||||
Cache: cache,
|
||||
Swap: swap.Usage,
|
||||
MaxUsage: maxUsage,
|
||||
KernelUsage: stats.MemoryStats.KernelUsage.Usage,
|
||||
KernelMaxUsage: stats.MemoryStats.KernelUsage.MaxUsage,
|
||||
Measured: ExecutorCgroupMeasuredMemStats,
|
||||
}
|
||||
|
||||
// CPU Related Stats
|
||||
totalProcessCPUUsage := float64(stats.CpuStats.CpuUsage.TotalUsage)
|
||||
userModeTime := float64(stats.CpuStats.CpuUsage.UsageInUsermode)
|
||||
kernelModeTime := float64(stats.CpuStats.CpuUsage.UsageInKernelmode)
|
||||
|
||||
totalPercent := e.totalCpuStats.Percent(totalProcessCPUUsage)
|
||||
cs := &cstructs.CpuStats{
|
||||
SystemMode: e.systemCpuStats.Percent(kernelModeTime),
|
||||
UserMode: e.userCpuStats.Percent(userModeTime),
|
||||
Percent: totalPercent,
|
||||
ThrottledPeriods: stats.CpuStats.ThrottlingData.ThrottledPeriods,
|
||||
ThrottledTime: stats.CpuStats.ThrottlingData.ThrottledTime,
|
||||
TotalTicks: e.systemCpuStats.TicksConsumed(totalPercent),
|
||||
Measured: ExecutorCgroupMeasuredCpuStats,
|
||||
}
|
||||
taskResUsage := cstructs.TaskResourceUsage{
|
||||
ResourceUsage: &cstructs.ResourceUsage{
|
||||
MemoryStats: ms,
|
||||
CpuStats: cs,
|
||||
},
|
||||
Timestamp: ts.UTC().UnixNano(),
|
||||
}
|
||||
if pidStats, err := e.pidStats(); err == nil {
|
||||
taskResUsage.Pids = pidStats
|
||||
}
|
||||
return &taskResUsage, nil
|
||||
}
|
||||
|
||||
// runAs takes a user id as a string and looks up the user, and sets the command
|
||||
// to execute as that user.
|
||||
func (e *UniversalExecutor) runAs(userid string) error {
|
||||
u, err := user.Lookup(userid)
|
||||
if err != nil {
|
||||
return fmt.Errorf("Failed to identify user %v: %v", userid, err)
|
||||
}
|
||||
|
||||
// Convert the uid and gid
|
||||
uid, err := strconv.ParseUint(u.Uid, 10, 32)
|
||||
if err != nil {
|
||||
return fmt.Errorf("Unable to convert userid to uint32: %s", err)
|
||||
}
|
||||
gid, err := strconv.ParseUint(u.Gid, 10, 32)
|
||||
if err != nil {
|
||||
return fmt.Errorf("Unable to convert groupid to uint32: %s", err)
|
||||
}
|
||||
|
||||
// Set the command to run as that user and group.
|
||||
if e.cmd.SysProcAttr == nil {
|
||||
e.cmd.SysProcAttr = &syscall.SysProcAttr{}
|
||||
}
|
||||
if e.cmd.SysProcAttr.Credential == nil {
|
||||
e.cmd.SysProcAttr.Credential = &syscall.Credential{}
|
||||
}
|
||||
e.cmd.SysProcAttr.Credential.Uid = uint32(uid)
|
||||
e.cmd.SysProcAttr.Credential.Gid = uint32(gid)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// configureChroot configures a chroot
|
||||
func (e *UniversalExecutor) configureChroot() error {
|
||||
allocDir := e.ctx.AllocDir
|
||||
if err := allocDir.MountSharedDir(e.ctx.Task.Name); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
chroot := chrootEnv
|
||||
if len(e.ctx.ChrootEnv) > 0 {
|
||||
chroot = e.ctx.ChrootEnv
|
||||
}
|
||||
|
||||
if err := allocDir.Embed(e.ctx.Task.Name, chroot); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Set the tasks AllocDir environment variable.
|
||||
e.ctx.TaskEnv.
|
||||
SetAllocDir(filepath.Join("/", allocdir.SharedAllocName)).
|
||||
SetTaskLocalDir(filepath.Join("/", allocdir.TaskLocal)).
|
||||
Build()
|
||||
|
||||
if e.cmd.SysProcAttr == nil {
|
||||
e.cmd.SysProcAttr = &syscall.SysProcAttr{}
|
||||
}
|
||||
e.cmd.SysProcAttr.Chroot = e.taskDir
|
||||
e.cmd.Dir = "/"
|
||||
|
||||
if err := allocDir.MountSpecialDirs(e.taskDir); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
e.fsIsolationEnforced = true
|
||||
return nil
|
||||
}
|
||||
|
||||
// cleanTaskDir is an idempotent operation to clean the task directory and
|
||||
// should be called when tearing down the task.
|
||||
func (e *UniversalExecutor) removeChrootMounts() error {
|
||||
// Prevent a race between Wait/ForceStop
|
||||
e.resConCtx.cgLock.Lock()
|
||||
defer e.resConCtx.cgLock.Unlock()
|
||||
return e.ctx.AllocDir.UnmountAll()
|
||||
}
|
||||
|
||||
// getAllPids returns the pids of all the processes spun up by the executor. We
|
||||
// use the libcontainer apis to get the pids when the user is using cgroup
|
||||
// isolation and we scan the entire process table if the user is not using any
|
||||
// isolation
|
||||
func (e *UniversalExecutor) getAllPids() (map[int]*nomadPid, error) {
|
||||
if e.command.ResourceLimits {
|
||||
manager := getCgroupManager(e.resConCtx.groups, e.resConCtx.cgPaths)
|
||||
pids, err := manager.GetAllPids()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
np := make(map[int]*nomadPid, len(pids))
|
||||
for _, pid := range pids {
|
||||
np[pid] = &nomadPid{
|
||||
pid: pid,
|
||||
cpuStatsTotal: stats.NewCpuStats(),
|
||||
cpuStatsSys: stats.NewCpuStats(),
|
||||
cpuStatsUser: stats.NewCpuStats(),
|
||||
}
|
||||
}
|
||||
return np, nil
|
||||
}
|
||||
allProcesses, err := ps.Processes()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return e.scanPids(os.Getpid(), allProcesses)
|
||||
}
|
||||
|
||||
// destroyCgroup kills all processes in the cgroup and removes the cgroup
|
||||
// configuration from the host. This function is idempotent.
|
||||
func DestroyCgroup(groups *cgroupConfig.Cgroup, cgPaths map[string]string, executorPid int) error {
|
||||
mErrs := new(multierror.Error)
|
||||
if groups == nil {
|
||||
return fmt.Errorf("Can't destroy: cgroup configuration empty")
|
||||
}
|
||||
|
||||
// Move the executor into the global cgroup so that the task specific
|
||||
// cgroup can be destroyed.
|
||||
nilGroup := &cgroupConfig.Cgroup{}
|
||||
nilGroup.Path = "/"
|
||||
nilGroup.Resources = groups.Resources
|
||||
nilManager := getCgroupManager(nilGroup, nil)
|
||||
err := nilManager.Apply(executorPid)
|
||||
if err != nil && !strings.Contains(err.Error(), "no such process") {
|
||||
return fmt.Errorf("failed to remove executor pid %d: %v", executorPid, err)
|
||||
}
|
||||
|
||||
// Freeze the Cgroup so that it can not continue to fork/exec.
|
||||
manager := getCgroupManager(groups, cgPaths)
|
||||
err = manager.Freeze(cgroupConfig.Frozen)
|
||||
if err != nil && !strings.Contains(err.Error(), "no such file or directory") {
|
||||
return fmt.Errorf("failed to freeze cgroup: %v", err)
|
||||
}
|
||||
|
||||
var procs []*os.Process
|
||||
pids, err := manager.GetAllPids()
|
||||
if err != nil {
|
||||
multierror.Append(mErrs, fmt.Errorf("error getting pids: %v", err))
|
||||
|
||||
// Unfreeze the cgroup.
|
||||
err = manager.Freeze(cgroupConfig.Thawed)
|
||||
if err != nil && !strings.Contains(err.Error(), "no such file or directory") {
|
||||
multierror.Append(mErrs, fmt.Errorf("failed to unfreeze cgroup: %v", err))
|
||||
}
|
||||
return mErrs.ErrorOrNil()
|
||||
}
|
||||
|
||||
// Kill the processes in the cgroup
|
||||
for _, pid := range pids {
|
||||
proc, err := os.FindProcess(pid)
|
||||
if err != nil {
|
||||
multierror.Append(mErrs, fmt.Errorf("error finding process %v: %v", pid, err))
|
||||
continue
|
||||
}
|
||||
|
||||
procs = append(procs, proc)
|
||||
if e := proc.Kill(); e != nil {
|
||||
multierror.Append(mErrs, fmt.Errorf("error killing process %v: %v", pid, e))
|
||||
}
|
||||
}
|
||||
|
||||
// Unfreeze the cgroug so we can wait.
|
||||
err = manager.Freeze(cgroupConfig.Thawed)
|
||||
if err != nil && !strings.Contains(err.Error(), "no such file or directory") {
|
||||
multierror.Append(mErrs, fmt.Errorf("failed to unfreeze cgroup: %v", err))
|
||||
}
|
||||
|
||||
// Wait on the killed processes to ensure they are cleaned up.
|
||||
for _, proc := range procs {
|
||||
// Don't capture the error because we expect this to fail for
|
||||
// processes we didn't fork.
|
||||
proc.Wait()
|
||||
}
|
||||
|
||||
// Remove the cgroup.
|
||||
if err := manager.Destroy(); err != nil {
|
||||
multierror.Append(mErrs, fmt.Errorf("failed to delete the cgroup directories: %v", err))
|
||||
}
|
||||
return mErrs.ErrorOrNil()
|
||||
}
|
||||
|
||||
// getCgroupManager returns the correct libcontainer cgroup manager.
|
||||
func getCgroupManager(groups *cgroupConfig.Cgroup, paths map[string]string) cgroups.Manager {
|
||||
return &cgroupFs.Manager{Cgroups: groups, Paths: paths}
|
||||
}
|
|
@ -1,50 +0,0 @@
|
|||
// +build darwin dragonfly freebsd linux netbsd openbsd solaris
|
||||
|
||||
package executor
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"log/syslog"
|
||||
|
||||
"github.com/hashicorp/nomad/client/driver/logging"
|
||||
)
|
||||
|
||||
func (e *UniversalExecutor) LaunchSyslogServer(ctx *ExecutorContext) (*SyslogServerState, error) {
|
||||
e.ctx = ctx
|
||||
|
||||
// configuring the task dir
|
||||
if err := e.configureTaskDir(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
e.syslogChan = make(chan *logging.SyslogMessage, 2048)
|
||||
l, err := e.getListener(e.ctx.PortLowerBound, e.ctx.PortUpperBound)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
e.logger.Printf("[DEBUG] sylog-server: launching syslog server on addr: %v", l.Addr().String())
|
||||
if err := e.configureLoggers(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
e.syslogServer = logging.NewSyslogServer(l, e.syslogChan, e.logger)
|
||||
go e.syslogServer.Start()
|
||||
go e.collectLogs(e.lre, e.lro)
|
||||
syslogAddr := fmt.Sprintf("%s://%s", l.Addr().Network(), l.Addr().String())
|
||||
return &SyslogServerState{Addr: syslogAddr}, nil
|
||||
}
|
||||
|
||||
func (e *UniversalExecutor) collectLogs(we io.Writer, wo io.Writer) {
|
||||
for logParts := range e.syslogChan {
|
||||
// If the severity of the log line is err then we write to stderr
|
||||
// otherwise all messages go to stdout
|
||||
if logParts.Severity == syslog.LOG_ERR {
|
||||
e.lre.Write(logParts.Message)
|
||||
e.lre.Write([]byte{'\n'})
|
||||
} else {
|
||||
e.lro.Write(logParts.Message)
|
||||
e.lro.Write([]byte{'\n'})
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,5 +0,0 @@
|
|||
package executor
|
||||
|
||||
func (e *UniversalExecutor) LaunchSyslogServer(ctx *ExecutorContext) (*SyslogServerState, error) {
|
||||
return nil, nil
|
||||
}
|
24
vendor/github.com/hashicorp/nomad/client/driver/executor/resource_container_default.go
generated
vendored
24
vendor/github.com/hashicorp/nomad/client/driver/executor/resource_container_default.go
generated
vendored
|
@ -1,24 +0,0 @@
|
|||
// +build darwin dragonfly freebsd netbsd openbsd solaris windows
|
||||
|
||||
package executor
|
||||
|
||||
import (
|
||||
dstructs "github.com/hashicorp/nomad/client/driver/structs"
|
||||
)
|
||||
|
||||
// resourceContainerContext is a platform-specific struct for managing a
|
||||
// resource container.
|
||||
type resourceContainerContext struct {
|
||||
}
|
||||
|
||||
func clientCleanup(ic *dstructs.IsolationConfig, pid int) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (rc *resourceContainerContext) executorCleanup() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (rc *resourceContainerContext) getIsolationConfig() *dstructs.IsolationConfig {
|
||||
return nil
|
||||
}
|
42
vendor/github.com/hashicorp/nomad/client/driver/executor/resource_container_linux.go
generated
vendored
42
vendor/github.com/hashicorp/nomad/client/driver/executor/resource_container_linux.go
generated
vendored
|
@ -1,42 +0,0 @@
|
|||
package executor
|
||||
|
||||
import (
|
||||
"os"
|
||||
"sync"
|
||||
|
||||
dstructs "github.com/hashicorp/nomad/client/driver/structs"
|
||||
cgroupConfig "github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
// resourceContainerContext is a platform-specific struct for managing a
|
||||
// resource container. In the case of Linux, this is used to control Cgroups.
|
||||
type resourceContainerContext struct {
|
||||
groups *cgroupConfig.Cgroup
|
||||
cgPaths map[string]string
|
||||
cgLock sync.Mutex
|
||||
}
|
||||
|
||||
// clientCleanup remoevs this host's Cgroup from the Nomad Client's context
|
||||
func clientCleanup(ic *dstructs.IsolationConfig, pid int) error {
|
||||
if err := DestroyCgroup(ic.Cgroup, ic.CgroupPaths, pid); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// cleanup removes this host's Cgroup from within an Executor's context
|
||||
func (rc *resourceContainerContext) executorCleanup() error {
|
||||
rc.cgLock.Lock()
|
||||
defer rc.cgLock.Unlock()
|
||||
if err := DestroyCgroup(rc.groups, rc.cgPaths, os.Getpid()); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (rc *resourceContainerContext) getIsolationConfig() *dstructs.IsolationConfig {
|
||||
return &dstructs.IsolationConfig{
|
||||
Cgroup: rc.groups,
|
||||
CgroupPaths: rc.cgPaths,
|
||||
}
|
||||
}
|
|
@ -1,181 +0,0 @@
|
|||
package driver
|
||||
|
||||
import (
|
||||
"encoding/gob"
|
||||
"log"
|
||||
"net/rpc"
|
||||
|
||||
"github.com/hashicorp/go-plugin"
|
||||
"github.com/hashicorp/nomad/client/driver/executor"
|
||||
cstructs "github.com/hashicorp/nomad/client/structs"
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
)
|
||||
|
||||
// Registering these types since we have to serialize and de-serialize the Task
|
||||
// structs over the wire between drivers and the executor.
|
||||
func init() {
|
||||
gob.Register([]interface{}{})
|
||||
gob.Register(map[string]interface{}{})
|
||||
gob.Register([]map[string]string{})
|
||||
gob.Register([]map[string]int{})
|
||||
}
|
||||
|
||||
type ExecutorRPC struct {
|
||||
client *rpc.Client
|
||||
logger *log.Logger
|
||||
}
|
||||
|
||||
// LaunchCmdArgs wraps a user command and the args for the purposes of RPC
|
||||
type LaunchCmdArgs struct {
|
||||
Cmd *executor.ExecCommand
|
||||
Ctx *executor.ExecutorContext
|
||||
}
|
||||
|
||||
// LaunchSyslogServerArgs wraps the executor context for the purposes of RPC
|
||||
type LaunchSyslogServerArgs struct {
|
||||
Ctx *executor.ExecutorContext
|
||||
}
|
||||
|
||||
// SyncServicesArgs wraps the consul context for the purposes of RPC
|
||||
type SyncServicesArgs struct {
|
||||
Ctx *executor.ConsulContext
|
||||
}
|
||||
|
||||
func (e *ExecutorRPC) LaunchCmd(cmd *executor.ExecCommand, ctx *executor.ExecutorContext) (*executor.ProcessState, error) {
|
||||
var ps *executor.ProcessState
|
||||
err := e.client.Call("Plugin.LaunchCmd", LaunchCmdArgs{Cmd: cmd, Ctx: ctx}, &ps)
|
||||
return ps, err
|
||||
}
|
||||
|
||||
func (e *ExecutorRPC) LaunchSyslogServer(ctx *executor.ExecutorContext) (*executor.SyslogServerState, error) {
|
||||
var ss *executor.SyslogServerState
|
||||
err := e.client.Call("Plugin.LaunchSyslogServer", LaunchSyslogServerArgs{Ctx: ctx}, &ss)
|
||||
return ss, err
|
||||
}
|
||||
|
||||
func (e *ExecutorRPC) Wait() (*executor.ProcessState, error) {
|
||||
var ps executor.ProcessState
|
||||
err := e.client.Call("Plugin.Wait", new(interface{}), &ps)
|
||||
return &ps, err
|
||||
}
|
||||
|
||||
func (e *ExecutorRPC) ShutDown() error {
|
||||
return e.client.Call("Plugin.ShutDown", new(interface{}), new(interface{}))
|
||||
}
|
||||
|
||||
func (e *ExecutorRPC) Exit() error {
|
||||
return e.client.Call("Plugin.Exit", new(interface{}), new(interface{}))
|
||||
}
|
||||
|
||||
func (e *ExecutorRPC) UpdateLogConfig(logConfig *structs.LogConfig) error {
|
||||
return e.client.Call("Plugin.UpdateLogConfig", logConfig, new(interface{}))
|
||||
}
|
||||
|
||||
func (e *ExecutorRPC) UpdateTask(task *structs.Task) error {
|
||||
return e.client.Call("Plugin.UpdateTask", task, new(interface{}))
|
||||
}
|
||||
|
||||
func (e *ExecutorRPC) SyncServices(ctx *executor.ConsulContext) error {
|
||||
return e.client.Call("Plugin.SyncServices", SyncServicesArgs{Ctx: ctx}, new(interface{}))
|
||||
}
|
||||
|
||||
func (e *ExecutorRPC) DeregisterServices() error {
|
||||
return e.client.Call("Plugin.DeregisterServices", new(interface{}), new(interface{}))
|
||||
}
|
||||
|
||||
func (e *ExecutorRPC) Version() (*executor.ExecutorVersion, error) {
|
||||
var version executor.ExecutorVersion
|
||||
err := e.client.Call("Plugin.Version", new(interface{}), &version)
|
||||
return &version, err
|
||||
}
|
||||
|
||||
func (e *ExecutorRPC) Stats() (*cstructs.TaskResourceUsage, error) {
|
||||
var resourceUsage cstructs.TaskResourceUsage
|
||||
err := e.client.Call("Plugin.Stats", new(interface{}), &resourceUsage)
|
||||
return &resourceUsage, err
|
||||
}
|
||||
|
||||
type ExecutorRPCServer struct {
|
||||
Impl executor.Executor
|
||||
logger *log.Logger
|
||||
}
|
||||
|
||||
func (e *ExecutorRPCServer) LaunchCmd(args LaunchCmdArgs, ps *executor.ProcessState) error {
|
||||
state, err := e.Impl.LaunchCmd(args.Cmd, args.Ctx)
|
||||
if state != nil {
|
||||
*ps = *state
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
func (e *ExecutorRPCServer) LaunchSyslogServer(args LaunchSyslogServerArgs, ss *executor.SyslogServerState) error {
|
||||
state, err := e.Impl.LaunchSyslogServer(args.Ctx)
|
||||
if state != nil {
|
||||
*ss = *state
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
func (e *ExecutorRPCServer) Wait(args interface{}, ps *executor.ProcessState) error {
|
||||
state, err := e.Impl.Wait()
|
||||
if state != nil {
|
||||
*ps = *state
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
func (e *ExecutorRPCServer) ShutDown(args interface{}, resp *interface{}) error {
|
||||
return e.Impl.ShutDown()
|
||||
}
|
||||
|
||||
func (e *ExecutorRPCServer) Exit(args interface{}, resp *interface{}) error {
|
||||
return e.Impl.Exit()
|
||||
}
|
||||
|
||||
func (e *ExecutorRPCServer) UpdateLogConfig(args *structs.LogConfig, resp *interface{}) error {
|
||||
return e.Impl.UpdateLogConfig(args)
|
||||
}
|
||||
|
||||
func (e *ExecutorRPCServer) UpdateTask(args *structs.Task, resp *interface{}) error {
|
||||
return e.Impl.UpdateTask(args)
|
||||
}
|
||||
|
||||
func (e *ExecutorRPCServer) SyncServices(args SyncServicesArgs, resp *interface{}) error {
|
||||
return e.Impl.SyncServices(args.Ctx)
|
||||
}
|
||||
|
||||
func (e *ExecutorRPCServer) DeregisterServices(args interface{}, resp *interface{}) error {
|
||||
return e.Impl.DeregisterServices()
|
||||
}
|
||||
|
||||
func (e *ExecutorRPCServer) Version(args interface{}, version *executor.ExecutorVersion) error {
|
||||
ver, err := e.Impl.Version()
|
||||
if ver != nil {
|
||||
*version = *ver
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
func (e *ExecutorRPCServer) Stats(args interface{}, resourceUsage *cstructs.TaskResourceUsage) error {
|
||||
ru, err := e.Impl.Stats()
|
||||
if ru != nil {
|
||||
*resourceUsage = *ru
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
type ExecutorPlugin struct {
|
||||
logger *log.Logger
|
||||
Impl *ExecutorRPCServer
|
||||
}
|
||||
|
||||
func (p *ExecutorPlugin) Server(*plugin.MuxBroker) (interface{}, error) {
|
||||
if p.Impl == nil {
|
||||
p.Impl = &ExecutorRPCServer{Impl: executor.NewExecutor(p.logger), logger: p.logger}
|
||||
}
|
||||
return p.Impl, nil
|
||||
}
|
||||
|
||||
func (p *ExecutorPlugin) Client(b *plugin.MuxBroker, c *rpc.Client) (interface{}, error) {
|
||||
return &ExecutorRPC{client: c, logger: p.logger}, nil
|
||||
}
|
|
@ -1,416 +0,0 @@
|
|||
package driver
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"log"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"strings"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/go-multierror"
|
||||
"github.com/hashicorp/go-plugin"
|
||||
"github.com/mitchellh/mapstructure"
|
||||
|
||||
"github.com/hashicorp/nomad/client/allocdir"
|
||||
"github.com/hashicorp/nomad/client/config"
|
||||
"github.com/hashicorp/nomad/client/driver/executor"
|
||||
dstructs "github.com/hashicorp/nomad/client/driver/structs"
|
||||
"github.com/hashicorp/nomad/client/fingerprint"
|
||||
cstructs "github.com/hashicorp/nomad/client/structs"
|
||||
"github.com/hashicorp/nomad/helper/discover"
|
||||
"github.com/hashicorp/nomad/helper/fields"
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
)
|
||||
|
||||
const (
|
||||
// The key populated in Node Attributes to indicate presence of the Java
|
||||
// driver
|
||||
javaDriverAttr = "driver.java"
|
||||
)
|
||||
|
||||
// JavaDriver is a simple driver to execute applications packaged in Jars.
|
||||
// It literally just fork/execs tasks with the java command.
|
||||
type JavaDriver struct {
|
||||
DriverContext
|
||||
fingerprint.StaticFingerprinter
|
||||
}
|
||||
|
||||
type JavaDriverConfig struct {
|
||||
JarPath string `mapstructure:"jar_path"`
|
||||
JvmOpts []string `mapstructure:"jvm_options"`
|
||||
Args []string `mapstructure:"args"`
|
||||
}
|
||||
|
||||
// javaHandle is returned from Start/Open as a handle to the PID
|
||||
type javaHandle struct {
|
||||
pluginClient *plugin.Client
|
||||
userPid int
|
||||
executor executor.Executor
|
||||
isolationConfig *dstructs.IsolationConfig
|
||||
|
||||
taskDir string
|
||||
allocDir *allocdir.AllocDir
|
||||
killTimeout time.Duration
|
||||
maxKillTimeout time.Duration
|
||||
version string
|
||||
logger *log.Logger
|
||||
waitCh chan *dstructs.WaitResult
|
||||
doneCh chan struct{}
|
||||
}
|
||||
|
||||
// NewJavaDriver is used to create a new exec driver
|
||||
func NewJavaDriver(ctx *DriverContext) Driver {
|
||||
return &JavaDriver{DriverContext: *ctx}
|
||||
}
|
||||
|
||||
// Validate is used to validate the driver configuration
|
||||
func (d *JavaDriver) Validate(config map[string]interface{}) error {
|
||||
fd := &fields.FieldData{
|
||||
Raw: config,
|
||||
Schema: map[string]*fields.FieldSchema{
|
||||
"jar_path": &fields.FieldSchema{
|
||||
Type: fields.TypeString,
|
||||
Required: true,
|
||||
},
|
||||
"jvm_options": &fields.FieldSchema{
|
||||
Type: fields.TypeArray,
|
||||
},
|
||||
"args": &fields.FieldSchema{
|
||||
Type: fields.TypeArray,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
if err := fd.Validate(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (d *JavaDriver) Fingerprint(cfg *config.Config, node *structs.Node) (bool, error) {
|
||||
// Get the current status so that we can log any debug messages only if the
|
||||
// state changes
|
||||
_, currentlyEnabled := node.Attributes[javaDriverAttr]
|
||||
|
||||
// Only enable if we are root and cgroups are mounted when running on linux systems.
|
||||
if runtime.GOOS == "linux" && (syscall.Geteuid() != 0 || !d.cgroupsMounted(node)) {
|
||||
if currentlyEnabled {
|
||||
d.logger.Printf("[DEBUG] driver.java: root priviledges and mounted cgroups required on linux, disabling")
|
||||
}
|
||||
delete(node.Attributes, "driver.java")
|
||||
return false, nil
|
||||
}
|
||||
|
||||
// Find java version
|
||||
var out bytes.Buffer
|
||||
var erOut bytes.Buffer
|
||||
cmd := exec.Command("java", "-version")
|
||||
cmd.Stdout = &out
|
||||
cmd.Stderr = &erOut
|
||||
err := cmd.Run()
|
||||
if err != nil {
|
||||
// assume Java wasn't found
|
||||
delete(node.Attributes, javaDriverAttr)
|
||||
return false, nil
|
||||
}
|
||||
|
||||
// 'java -version' returns output on Stderr typically.
|
||||
// Check stdout, but it's probably empty
|
||||
var infoString string
|
||||
if out.String() != "" {
|
||||
infoString = out.String()
|
||||
}
|
||||
|
||||
if erOut.String() != "" {
|
||||
infoString = erOut.String()
|
||||
}
|
||||
|
||||
if infoString == "" {
|
||||
if currentlyEnabled {
|
||||
d.logger.Println("[WARN] driver.java: error parsing Java version information, aborting")
|
||||
}
|
||||
delete(node.Attributes, javaDriverAttr)
|
||||
return false, nil
|
||||
}
|
||||
|
||||
// Assume 'java -version' returns 3 lines:
|
||||
// java version "1.6.0_36"
|
||||
// OpenJDK Runtime Environment (IcedTea6 1.13.8) (6b36-1.13.8-0ubuntu1~12.04)
|
||||
// OpenJDK 64-Bit Server VM (build 23.25-b01, mixed mode)
|
||||
// Each line is terminated by \n
|
||||
info := strings.Split(infoString, "\n")
|
||||
versionString := info[0]
|
||||
versionString = strings.TrimPrefix(versionString, "java version ")
|
||||
versionString = strings.Trim(versionString, "\"")
|
||||
node.Attributes[javaDriverAttr] = "1"
|
||||
node.Attributes["driver.java.version"] = versionString
|
||||
node.Attributes["driver.java.runtime"] = info[1]
|
||||
node.Attributes["driver.java.vm"] = info[2]
|
||||
|
||||
return true, nil
|
||||
}
|
||||
|
||||
func (d *JavaDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle, error) {
|
||||
var driverConfig JavaDriverConfig
|
||||
if err := mapstructure.WeakDecode(task.Config, &driverConfig); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Set the host environment variables.
|
||||
filter := strings.Split(d.config.ReadDefault("env.blacklist", config.DefaultEnvBlacklist), ",")
|
||||
d.taskEnv.AppendHostEnvvars(filter)
|
||||
|
||||
taskDir, ok := ctx.AllocDir.TaskDirs[d.DriverContext.taskName]
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("Could not find task directory for task: %v", d.DriverContext.taskName)
|
||||
}
|
||||
|
||||
if driverConfig.JarPath == "" {
|
||||
return nil, fmt.Errorf("jar_path must be specified")
|
||||
}
|
||||
|
||||
args := []string{}
|
||||
// Look for jvm options
|
||||
if len(driverConfig.JvmOpts) != 0 {
|
||||
d.logger.Printf("[DEBUG] driver.java: found JVM options: %s", driverConfig.JvmOpts)
|
||||
args = append(args, driverConfig.JvmOpts...)
|
||||
}
|
||||
|
||||
// Build the argument list.
|
||||
args = append(args, "-jar", driverConfig.JarPath)
|
||||
if len(driverConfig.Args) != 0 {
|
||||
args = append(args, driverConfig.Args...)
|
||||
}
|
||||
|
||||
bin, err := discover.NomadExecutable()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to find the nomad binary: %v", err)
|
||||
}
|
||||
|
||||
pluginLogFile := filepath.Join(taskDir, fmt.Sprintf("%s-executor.out", task.Name))
|
||||
pluginConfig := &plugin.ClientConfig{
|
||||
Cmd: exec.Command(bin, "executor", pluginLogFile),
|
||||
}
|
||||
|
||||
execIntf, pluginClient, err := createExecutor(pluginConfig, d.config.LogOutput, d.config)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
executorCtx := &executor.ExecutorContext{
|
||||
TaskEnv: d.taskEnv,
|
||||
Driver: "java",
|
||||
AllocDir: ctx.AllocDir,
|
||||
AllocID: ctx.AllocID,
|
||||
ChrootEnv: d.config.ChrootEnv,
|
||||
Task: task,
|
||||
}
|
||||
|
||||
absPath, err := GetAbsolutePath("java")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
ps, err := execIntf.LaunchCmd(&executor.ExecCommand{
|
||||
Cmd: absPath,
|
||||
Args: args,
|
||||
FSIsolation: true,
|
||||
ResourceLimits: true,
|
||||
User: getExecutorUser(task),
|
||||
}, executorCtx)
|
||||
if err != nil {
|
||||
pluginClient.Kill()
|
||||
return nil, err
|
||||
}
|
||||
d.logger.Printf("[DEBUG] driver.java: started process with pid: %v", ps.Pid)
|
||||
|
||||
// Return a driver handle
|
||||
maxKill := d.DriverContext.config.MaxKillTimeout
|
||||
h := &javaHandle{
|
||||
pluginClient: pluginClient,
|
||||
executor: execIntf,
|
||||
userPid: ps.Pid,
|
||||
isolationConfig: ps.IsolationConfig,
|
||||
taskDir: taskDir,
|
||||
allocDir: ctx.AllocDir,
|
||||
killTimeout: GetKillTimeout(task.KillTimeout, maxKill),
|
||||
maxKillTimeout: maxKill,
|
||||
version: d.config.Version,
|
||||
logger: d.logger,
|
||||
doneCh: make(chan struct{}),
|
||||
waitCh: make(chan *dstructs.WaitResult, 1),
|
||||
}
|
||||
if err := h.executor.SyncServices(consulContext(d.config, "")); err != nil {
|
||||
d.logger.Printf("[ERR] driver.java: error registering services with consul for task: %q: %v", task.Name, err)
|
||||
}
|
||||
go h.run()
|
||||
return h, nil
|
||||
}
|
||||
|
||||
// cgroupsMounted returns true if the cgroups are mounted on a system otherwise
|
||||
// returns false
|
||||
func (d *JavaDriver) cgroupsMounted(node *structs.Node) bool {
|
||||
_, ok := node.Attributes["unique.cgroup.mountpoint"]
|
||||
return ok
|
||||
}
|
||||
|
||||
type javaId struct {
|
||||
Version string
|
||||
KillTimeout time.Duration
|
||||
MaxKillTimeout time.Duration
|
||||
PluginConfig *PluginReattachConfig
|
||||
IsolationConfig *dstructs.IsolationConfig
|
||||
TaskDir string
|
||||
AllocDir *allocdir.AllocDir
|
||||
UserPid int
|
||||
}
|
||||
|
||||
func (d *JavaDriver) Open(ctx *ExecContext, handleID string) (DriverHandle, error) {
|
||||
id := &javaId{}
|
||||
if err := json.Unmarshal([]byte(handleID), id); err != nil {
|
||||
return nil, fmt.Errorf("Failed to parse handle '%s': %v", handleID, err)
|
||||
}
|
||||
|
||||
pluginConfig := &plugin.ClientConfig{
|
||||
Reattach: id.PluginConfig.PluginConfig(),
|
||||
}
|
||||
exec, pluginClient, err := createExecutor(pluginConfig, d.config.LogOutput, d.config)
|
||||
if err != nil {
|
||||
merrs := new(multierror.Error)
|
||||
merrs.Errors = append(merrs.Errors, err)
|
||||
d.logger.Println("[ERR] driver.java: error connecting to plugin so destroying plugin pid and user pid")
|
||||
if e := destroyPlugin(id.PluginConfig.Pid, id.UserPid); e != nil {
|
||||
merrs.Errors = append(merrs.Errors, fmt.Errorf("error destroying plugin and userpid: %v", e))
|
||||
}
|
||||
if id.IsolationConfig != nil {
|
||||
ePid := pluginConfig.Reattach.Pid
|
||||
if e := executor.ClientCleanup(id.IsolationConfig, ePid); e != nil {
|
||||
merrs.Errors = append(merrs.Errors, fmt.Errorf("destroying resource container failed: %v", e))
|
||||
}
|
||||
}
|
||||
if e := ctx.AllocDir.UnmountAll(); e != nil {
|
||||
merrs.Errors = append(merrs.Errors, e)
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf("error connecting to plugin: %v", merrs.ErrorOrNil())
|
||||
}
|
||||
|
||||
ver, _ := exec.Version()
|
||||
d.logger.Printf("[DEBUG] driver.java: version of executor: %v", ver.Version)
|
||||
|
||||
// Return a driver handle
|
||||
h := &javaHandle{
|
||||
pluginClient: pluginClient,
|
||||
executor: exec,
|
||||
userPid: id.UserPid,
|
||||
isolationConfig: id.IsolationConfig,
|
||||
taskDir: id.TaskDir,
|
||||
allocDir: id.AllocDir,
|
||||
logger: d.logger,
|
||||
version: id.Version,
|
||||
killTimeout: id.KillTimeout,
|
||||
maxKillTimeout: id.MaxKillTimeout,
|
||||
doneCh: make(chan struct{}),
|
||||
waitCh: make(chan *dstructs.WaitResult, 1),
|
||||
}
|
||||
if err := h.executor.SyncServices(consulContext(d.config, "")); err != nil {
|
||||
d.logger.Printf("[ERR] driver.java: error registering services with consul: %v", err)
|
||||
}
|
||||
|
||||
go h.run()
|
||||
return h, nil
|
||||
}
|
||||
|
||||
func (h *javaHandle) ID() string {
|
||||
id := javaId{
|
||||
Version: h.version,
|
||||
KillTimeout: h.killTimeout,
|
||||
MaxKillTimeout: h.maxKillTimeout,
|
||||
PluginConfig: NewPluginReattachConfig(h.pluginClient.ReattachConfig()),
|
||||
UserPid: h.userPid,
|
||||
TaskDir: h.taskDir,
|
||||
AllocDir: h.allocDir,
|
||||
IsolationConfig: h.isolationConfig,
|
||||
}
|
||||
|
||||
data, err := json.Marshal(id)
|
||||
if err != nil {
|
||||
h.logger.Printf("[ERR] driver.java: failed to marshal ID to JSON: %s", err)
|
||||
}
|
||||
return string(data)
|
||||
}
|
||||
|
||||
func (h *javaHandle) WaitCh() chan *dstructs.WaitResult {
|
||||
return h.waitCh
|
||||
}
|
||||
|
||||
func (h *javaHandle) Update(task *structs.Task) error {
|
||||
// Store the updated kill timeout.
|
||||
h.killTimeout = GetKillTimeout(task.KillTimeout, h.maxKillTimeout)
|
||||
h.executor.UpdateTask(task)
|
||||
|
||||
// Update is not possible
|
||||
return nil
|
||||
}
|
||||
|
||||
func (h *javaHandle) Kill() error {
|
||||
if err := h.executor.ShutDown(); err != nil {
|
||||
if h.pluginClient.Exited() {
|
||||
return nil
|
||||
}
|
||||
return fmt.Errorf("executor Shutdown failed: %v", err)
|
||||
}
|
||||
|
||||
select {
|
||||
case <-h.doneCh:
|
||||
return nil
|
||||
case <-time.After(h.killTimeout):
|
||||
if h.pluginClient.Exited() {
|
||||
return nil
|
||||
}
|
||||
if err := h.executor.Exit(); err != nil {
|
||||
return fmt.Errorf("executor Exit failed: %v", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
func (h *javaHandle) Stats() (*cstructs.TaskResourceUsage, error) {
|
||||
return h.executor.Stats()
|
||||
}
|
||||
|
||||
func (h *javaHandle) run() {
|
||||
ps, err := h.executor.Wait()
|
||||
close(h.doneCh)
|
||||
if ps.ExitCode == 0 && err != nil {
|
||||
if h.isolationConfig != nil {
|
||||
ePid := h.pluginClient.ReattachConfig().Pid
|
||||
if e := executor.ClientCleanup(h.isolationConfig, ePid); e != nil {
|
||||
h.logger.Printf("[ERR] driver.java: destroying resource container failed: %v", e)
|
||||
}
|
||||
} else {
|
||||
if e := killProcess(h.userPid); e != nil {
|
||||
h.logger.Printf("[ERR] driver.java: error killing user process: %v", e)
|
||||
}
|
||||
}
|
||||
if e := h.allocDir.UnmountAll(); e != nil {
|
||||
h.logger.Printf("[ERR] driver.java: unmounting dev,proc and alloc dirs failed: %v", e)
|
||||
}
|
||||
}
|
||||
h.waitCh <- &dstructs.WaitResult{ExitCode: ps.ExitCode, Signal: ps.Signal, Err: err}
|
||||
close(h.waitCh)
|
||||
|
||||
// Remove services
|
||||
if err := h.executor.DeregisterServices(); err != nil {
|
||||
h.logger.Printf("[ERR] driver.java: failed to kill the deregister services: %v", err)
|
||||
}
|
||||
|
||||
h.executor.Exit()
|
||||
h.pluginClient.Kill()
|
||||
}
|
|
@ -1,71 +0,0 @@
|
|||
package logging
|
||||
|
||||
import (
|
||||
"log"
|
||||
|
||||
"github.com/hashicorp/nomad/client/allocdir"
|
||||
cstructs "github.com/hashicorp/nomad/client/driver/structs"
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
)
|
||||
|
||||
// LogCollectorContext holds context to configure the syslog server
|
||||
type LogCollectorContext struct {
|
||||
// TaskName is the name of the Task
|
||||
TaskName string
|
||||
|
||||
// AllocDir is the handle to do operations on the alloc dir of
|
||||
// the task
|
||||
AllocDir *allocdir.AllocDir
|
||||
|
||||
// LogConfig provides configuration related to log rotation
|
||||
LogConfig *structs.LogConfig
|
||||
|
||||
// PortUpperBound is the upper bound of the ports that we can use to start
|
||||
// the syslog server
|
||||
PortUpperBound uint
|
||||
|
||||
// PortLowerBound is the lower bound of the ports that we can use to start
|
||||
// the syslog server
|
||||
PortLowerBound uint
|
||||
}
|
||||
|
||||
// SyslogCollectorState holds the address and islation information of a launched
|
||||
// syslog server
|
||||
type SyslogCollectorState struct {
|
||||
IsolationConfig *cstructs.IsolationConfig
|
||||
Addr string
|
||||
}
|
||||
|
||||
// LogCollector is an interface which allows a driver to launch a log server
|
||||
// and update log configuration
|
||||
type LogCollector interface {
|
||||
LaunchCollector(ctx *LogCollectorContext) (*SyslogCollectorState, error)
|
||||
Exit() error
|
||||
UpdateLogConfig(logConfig *structs.LogConfig) error
|
||||
}
|
||||
|
||||
// SyslogCollector is a LogCollector which starts a syslog server and does
|
||||
// rotation to incoming stream
|
||||
type SyslogCollector struct {
|
||||
}
|
||||
|
||||
// NewSyslogCollector returns an implementation of the SyslogCollector
|
||||
func NewSyslogCollector(logger *log.Logger) *SyslogCollector {
|
||||
return &SyslogCollector{}
|
||||
}
|
||||
|
||||
// LaunchCollector launches a new syslog server and starts writing log lines to
|
||||
// files and rotates them
|
||||
func (s *SyslogCollector) LaunchCollector(ctx *LogCollectorContext) (*SyslogCollectorState, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// Exit kills the syslog server
|
||||
func (s *SyslogCollector) Exit() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// UpdateLogConfig updates the log configuration
|
||||
func (s *SyslogCollector) UpdateLogConfig(logConfig *structs.LogConfig) error {
|
||||
return nil
|
||||
}
|
|
@ -1,285 +0,0 @@
|
|||
package logging
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"log"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
const (
|
||||
bufSize = 32768
|
||||
flushDur = 100 * time.Millisecond
|
||||
)
|
||||
|
||||
// FileRotator writes bytes to a rotated set of files
|
||||
type FileRotator struct {
|
||||
MaxFiles int // MaxFiles is the maximum number of rotated files allowed in a path
|
||||
FileSize int64 // FileSize is the size a rotated file is allowed to grow
|
||||
|
||||
path string // path is the path on the file system where the rotated set of files are opened
|
||||
baseFileName string // baseFileName is the base file name of the rotated files
|
||||
logFileIdx int // logFileIdx is the current index of the rotated files
|
||||
oldestLogFileIdx int // oldestLogFileIdx is the index of the oldest log file in a path
|
||||
|
||||
currentFile *os.File // currentFile is the file that is currently getting written
|
||||
currentWr int64 // currentWr is the number of bytes written to the current file
|
||||
bufw *bufio.Writer
|
||||
bufLock sync.Mutex
|
||||
|
||||
flushTicker *time.Ticker
|
||||
logger *log.Logger
|
||||
purgeCh chan struct{}
|
||||
doneCh chan struct{}
|
||||
|
||||
closed bool
|
||||
closedLock sync.Mutex
|
||||
}
|
||||
|
||||
// NewFileRotator returns a new file rotator
|
||||
func NewFileRotator(path string, baseFile string, maxFiles int,
|
||||
fileSize int64, logger *log.Logger) (*FileRotator, error) {
|
||||
rotator := &FileRotator{
|
||||
MaxFiles: maxFiles,
|
||||
FileSize: fileSize,
|
||||
|
||||
path: path,
|
||||
baseFileName: baseFile,
|
||||
|
||||
flushTicker: time.NewTicker(flushDur),
|
||||
logger: logger,
|
||||
purgeCh: make(chan struct{}, 1),
|
||||
doneCh: make(chan struct{}, 1),
|
||||
}
|
||||
if err := rotator.lastFile(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
go rotator.purgeOldFiles()
|
||||
go rotator.flushPeriodically()
|
||||
return rotator, nil
|
||||
}
|
||||
|
||||
// Write writes a byte array to a file and rotates the file if it's size becomes
|
||||
// equal to the maximum size the user has defined.
|
||||
func (f *FileRotator) Write(p []byte) (n int, err error) {
|
||||
n = 0
|
||||
var nw int
|
||||
|
||||
for n < len(p) {
|
||||
// Check if we still have space in the current file, otherwise close and
|
||||
// open the next file
|
||||
if f.currentWr >= f.FileSize {
|
||||
f.flushBuffer()
|
||||
f.currentFile.Close()
|
||||
if err := f.nextFile(); err != nil {
|
||||
f.logger.Printf("[ERROR] driver.rotator: error creating next file: %v", err)
|
||||
return 0, err
|
||||
}
|
||||
}
|
||||
// Calculate the remaining size on this file
|
||||
remainingSize := f.FileSize - f.currentWr
|
||||
|
||||
// Check if the number of bytes that we have to write is less than the
|
||||
// remaining size of the file
|
||||
if remainingSize < int64(len(p[n:])) {
|
||||
// Write the number of bytes that we can write on the current file
|
||||
li := int64(n) + remainingSize
|
||||
nw, err = f.writeToBuffer(p[n:li])
|
||||
} else {
|
||||
// Write all the bytes in the current file
|
||||
nw, err = f.writeToBuffer(p[n:])
|
||||
}
|
||||
|
||||
// Increment the number of bytes written so far in this method
|
||||
// invocation
|
||||
n += nw
|
||||
|
||||
// Increment the total number of bytes in the file
|
||||
f.currentWr += int64(n)
|
||||
if err != nil {
|
||||
f.logger.Printf("[ERROR] driver.rotator: error writing to file: %v", err)
|
||||
return
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// nextFile opens the next file and purges older files if the number of rotated
|
||||
// files is larger than the maximum files configured by the user
|
||||
func (f *FileRotator) nextFile() error {
|
||||
nextFileIdx := f.logFileIdx
|
||||
for {
|
||||
nextFileIdx += 1
|
||||
logFileName := filepath.Join(f.path, fmt.Sprintf("%s.%d", f.baseFileName, nextFileIdx))
|
||||
if fi, err := os.Stat(logFileName); err == nil {
|
||||
if fi.IsDir() || fi.Size() >= f.FileSize {
|
||||
continue
|
||||
}
|
||||
}
|
||||
f.logFileIdx = nextFileIdx
|
||||
if err := f.createFile(); err != nil {
|
||||
return err
|
||||
}
|
||||
break
|
||||
}
|
||||
// Purge old files if we have more files than MaxFiles
|
||||
f.closedLock.Lock()
|
||||
defer f.closedLock.Unlock()
|
||||
if f.logFileIdx-f.oldestLogFileIdx >= f.MaxFiles && !f.closed {
|
||||
select {
|
||||
case f.purgeCh <- struct{}{}:
|
||||
default:
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// lastFile finds out the rotated file with the largest index in a path.
|
||||
func (f *FileRotator) lastFile() error {
|
||||
finfos, err := ioutil.ReadDir(f.path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
prefix := fmt.Sprintf("%s.", f.baseFileName)
|
||||
for _, fi := range finfos {
|
||||
if fi.IsDir() {
|
||||
continue
|
||||
}
|
||||
if strings.HasPrefix(fi.Name(), prefix) {
|
||||
fileIdx := strings.TrimPrefix(fi.Name(), prefix)
|
||||
n, err := strconv.Atoi(fileIdx)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
if n > f.logFileIdx {
|
||||
f.logFileIdx = n
|
||||
}
|
||||
}
|
||||
}
|
||||
if err := f.createFile(); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// createFile opens a new or existing file for writing
|
||||
func (f *FileRotator) createFile() error {
|
||||
logFileName := filepath.Join(f.path, fmt.Sprintf("%s.%d", f.baseFileName, f.logFileIdx))
|
||||
cFile, err := os.OpenFile(logFileName, os.O_RDWR|os.O_CREATE|os.O_APPEND, 0666)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
f.currentFile = cFile
|
||||
fi, err := f.currentFile.Stat()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
f.currentWr = fi.Size()
|
||||
f.createOrResetBuffer()
|
||||
return nil
|
||||
}
|
||||
|
||||
// flushPeriodically flushes the buffered writer every 100ms to the underlying
|
||||
// file
|
||||
func (f *FileRotator) flushPeriodically() {
|
||||
for _ = range f.flushTicker.C {
|
||||
f.flushBuffer()
|
||||
}
|
||||
}
|
||||
|
||||
func (f *FileRotator) Close() {
|
||||
f.closedLock.Lock()
|
||||
defer f.closedLock.Unlock()
|
||||
|
||||
// Stop the ticker and flush for one last time
|
||||
f.flushTicker.Stop()
|
||||
f.flushBuffer()
|
||||
|
||||
// Stop the purge go routine
|
||||
if !f.closed {
|
||||
f.doneCh <- struct{}{}
|
||||
close(f.purgeCh)
|
||||
f.closed = true
|
||||
}
|
||||
}
|
||||
|
||||
// purgeOldFiles removes older files and keeps only the last N files rotated for
|
||||
// a file
|
||||
func (f *FileRotator) purgeOldFiles() {
|
||||
for {
|
||||
select {
|
||||
case <-f.purgeCh:
|
||||
var fIndexes []int
|
||||
files, err := ioutil.ReadDir(f.path)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
// Inserting all the rotated files in a slice
|
||||
for _, fi := range files {
|
||||
if strings.HasPrefix(fi.Name(), f.baseFileName) {
|
||||
fileIdx := strings.TrimPrefix(fi.Name(), fmt.Sprintf("%s.", f.baseFileName))
|
||||
n, err := strconv.Atoi(fileIdx)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
fIndexes = append(fIndexes, n)
|
||||
}
|
||||
}
|
||||
|
||||
// Not continuing to delete files if the number of files is not more
|
||||
// than MaxFiles
|
||||
if len(fIndexes) <= f.MaxFiles {
|
||||
continue
|
||||
}
|
||||
|
||||
// Sorting the file indexes so that we can purge the older files and keep
|
||||
// only the number of files as configured by the user
|
||||
sort.Sort(sort.IntSlice(fIndexes))
|
||||
toDelete := fIndexes[0 : len(fIndexes)-f.MaxFiles]
|
||||
for _, fIndex := range toDelete {
|
||||
fname := filepath.Join(f.path, fmt.Sprintf("%s.%d", f.baseFileName, fIndex))
|
||||
os.RemoveAll(fname)
|
||||
}
|
||||
f.oldestLogFileIdx = fIndexes[0]
|
||||
case <-f.doneCh:
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// flushBuffer flushes the buffer
|
||||
func (f *FileRotator) flushBuffer() error {
|
||||
f.bufLock.Lock()
|
||||
defer f.bufLock.Unlock()
|
||||
if f.bufw != nil {
|
||||
return f.bufw.Flush()
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// writeToBuffer writes the byte array to buffer
|
||||
func (f *FileRotator) writeToBuffer(p []byte) (int, error) {
|
||||
f.bufLock.Lock()
|
||||
defer f.bufLock.Unlock()
|
||||
return f.bufw.Write(p)
|
||||
}
|
||||
|
||||
// createOrResetBuffer creates a new buffer if we don't have one otherwise
|
||||
// resets the buffer
|
||||
func (f *FileRotator) createOrResetBuffer() {
|
||||
f.bufLock.Lock()
|
||||
defer f.bufLock.Unlock()
|
||||
if f.bufw == nil {
|
||||
f.bufw = bufio.NewWriterSize(f.currentFile, bufSize)
|
||||
} else {
|
||||
f.bufw.Reset(f.currentFile)
|
||||
}
|
||||
}
|
158
vendor/github.com/hashicorp/nomad/client/driver/logging/syslog_parser_unix.go
generated
vendored
158
vendor/github.com/hashicorp/nomad/client/driver/logging/syslog_parser_unix.go
generated
vendored
|
@ -1,158 +0,0 @@
|
|||
// +build darwin dragonfly freebsd linux netbsd openbsd solaris
|
||||
|
||||
package logging
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log"
|
||||
"log/syslog"
|
||||
"strconv"
|
||||
)
|
||||
|
||||
// Errors related to parsing priority
|
||||
var (
|
||||
ErrPriorityNoStart = fmt.Errorf("No start char found for priority")
|
||||
ErrPriorityEmpty = fmt.Errorf("Priority field empty")
|
||||
ErrPriorityNoEnd = fmt.Errorf("No end char found for priority")
|
||||
ErrPriorityTooShort = fmt.Errorf("Priority field too short")
|
||||
ErrPriorityTooLong = fmt.Errorf("Priority field too long")
|
||||
ErrPriorityNonDigit = fmt.Errorf("Non digit found in priority")
|
||||
)
|
||||
|
||||
// Priority header and ending characters
|
||||
const (
|
||||
PRI_PART_START = '<'
|
||||
PRI_PART_END = '>'
|
||||
)
|
||||
|
||||
// SyslogMessage represents a log line received
|
||||
type SyslogMessage struct {
|
||||
Message []byte
|
||||
Severity syslog.Priority
|
||||
}
|
||||
|
||||
// Priority holds all the priority bits in a syslog log line
|
||||
type Priority struct {
|
||||
Pri int
|
||||
Facility syslog.Priority
|
||||
Severity syslog.Priority
|
||||
}
|
||||
|
||||
// DockerLogParser parses a line of log message that the docker daemon ships
|
||||
type DockerLogParser struct {
|
||||
logger *log.Logger
|
||||
}
|
||||
|
||||
// NewDockerLogParser creates a new DockerLogParser
|
||||
func NewDockerLogParser(logger *log.Logger) *DockerLogParser {
|
||||
return &DockerLogParser{logger: logger}
|
||||
}
|
||||
|
||||
// Parse parses a syslog log line
|
||||
func (d *DockerLogParser) Parse(line []byte) *SyslogMessage {
|
||||
pri, _, _ := d.parsePriority(line)
|
||||
msgIdx := d.logContentIndex(line)
|
||||
|
||||
// Create a copy of the line so that subsequent Scans do not override the
|
||||
// message
|
||||
lineCopy := make([]byte, len(line[msgIdx:]))
|
||||
copy(lineCopy, line[msgIdx:])
|
||||
|
||||
return &SyslogMessage{
|
||||
Severity: pri.Severity,
|
||||
Message: lineCopy,
|
||||
}
|
||||
}
|
||||
|
||||
// logContentIndex finds out the index of the start index of the content in a
|
||||
// syslog line
|
||||
func (d *DockerLogParser) logContentIndex(line []byte) int {
|
||||
cursor := 0
|
||||
numSpace := 0
|
||||
numColons := 0
|
||||
// first look for at least 2 colons. This matches into the date that has no more spaces in it
|
||||
// DefaultFormatter log line look: '<30>2016-07-06T15:13:11Z00:00 hostname docker/9648c64f5037[16200]'
|
||||
// UnixFormatter log line look: '<30>Jul 6 15:13:11 docker/9648c64f5037[16200]'
|
||||
for i := 0; i < len(line); i++ {
|
||||
if line[i] == ':' {
|
||||
numColons += 1
|
||||
if numColons == 2 {
|
||||
cursor = i
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
// then look for the next space
|
||||
for i := cursor; i < len(line); i++ {
|
||||
if line[i] == ' ' {
|
||||
numSpace += 1
|
||||
if numSpace == 1 {
|
||||
cursor = i
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
// then the colon is what seperates it, followed by a space
|
||||
for i := cursor; i < len(line); i++ {
|
||||
if line[i] == ':' && i+1 < len(line) && line[i+1] == ' ' {
|
||||
cursor = i + 1
|
||||
break
|
||||
}
|
||||
}
|
||||
// return the cursor to the next character
|
||||
return cursor + 1
|
||||
}
|
||||
|
||||
// parsePriority parses the priority in a syslog message
|
||||
func (d *DockerLogParser) parsePriority(line []byte) (Priority, int, error) {
|
||||
cursor := 0
|
||||
pri := d.newPriority(0)
|
||||
if len(line) <= 0 {
|
||||
return pri, cursor, ErrPriorityEmpty
|
||||
}
|
||||
if line[cursor] != PRI_PART_START {
|
||||
return pri, cursor, ErrPriorityNoStart
|
||||
}
|
||||
i := 1
|
||||
priDigit := 0
|
||||
for i < len(line) {
|
||||
if i >= 5 {
|
||||
return pri, cursor, ErrPriorityTooLong
|
||||
}
|
||||
c := line[i]
|
||||
if c == PRI_PART_END {
|
||||
if i == 1 {
|
||||
return pri, cursor, ErrPriorityTooShort
|
||||
}
|
||||
cursor = i + 1
|
||||
return d.newPriority(priDigit), cursor, nil
|
||||
}
|
||||
if d.isDigit(c) {
|
||||
v, e := strconv.Atoi(string(c))
|
||||
if e != nil {
|
||||
return pri, cursor, e
|
||||
}
|
||||
priDigit = (priDigit * 10) + v
|
||||
} else {
|
||||
return pri, cursor, ErrPriorityNonDigit
|
||||
}
|
||||
i++
|
||||
}
|
||||
return pri, cursor, ErrPriorityNoEnd
|
||||
}
|
||||
|
||||
// isDigit checks if a byte is a numeric char
|
||||
func (d *DockerLogParser) isDigit(c byte) bool {
|
||||
return c >= '0' && c <= '9'
|
||||
}
|
||||
|
||||
// newPriority creates a new default priority
|
||||
func (d *DockerLogParser) newPriority(p int) Priority {
|
||||
// The Priority value is calculated by first multiplying the Facility
|
||||
// number by 8 and then adding the numerical value of the Severity.
|
||||
return Priority{
|
||||
Pri: p,
|
||||
Facility: syslog.Priority(p / 8),
|
||||
Severity: syslog.Priority(p % 8),
|
||||
}
|
||||
}
|
|
@ -1,86 +0,0 @@
|
|||
// +build !windows
|
||||
|
||||
package logging
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"log"
|
||||
"net"
|
||||
"sync"
|
||||
)
|
||||
|
||||
// SyslogServer is a server which listens to syslog messages and parses them
|
||||
type SyslogServer struct {
|
||||
listener net.Listener
|
||||
messages chan *SyslogMessage
|
||||
parser *DockerLogParser
|
||||
|
||||
doneCh chan interface{}
|
||||
done bool
|
||||
doneLock sync.Mutex
|
||||
|
||||
logger *log.Logger
|
||||
}
|
||||
|
||||
// NewSyslogServer creates a new syslog server
|
||||
func NewSyslogServer(l net.Listener, messages chan *SyslogMessage, logger *log.Logger) *SyslogServer {
|
||||
parser := NewDockerLogParser(logger)
|
||||
return &SyslogServer{
|
||||
listener: l,
|
||||
messages: messages,
|
||||
parser: parser,
|
||||
logger: logger,
|
||||
doneCh: make(chan interface{}),
|
||||
}
|
||||
}
|
||||
|
||||
// Start starts accepting syslog connections
|
||||
func (s *SyslogServer) Start() {
|
||||
for {
|
||||
select {
|
||||
case <-s.doneCh:
|
||||
s.listener.Close()
|
||||
return
|
||||
default:
|
||||
connection, err := s.listener.Accept()
|
||||
if err != nil {
|
||||
s.logger.Printf("[ERR] logcollector.server: error in accepting connection: %v", err)
|
||||
continue
|
||||
}
|
||||
go s.read(connection)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// read reads the bytes from a connection
|
||||
func (s *SyslogServer) read(connection net.Conn) {
|
||||
defer connection.Close()
|
||||
scanner := bufio.NewScanner(bufio.NewReader(connection))
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-s.doneCh:
|
||||
return
|
||||
default:
|
||||
}
|
||||
if scanner.Scan() {
|
||||
b := scanner.Bytes()
|
||||
msg := s.parser.Parse(b)
|
||||
s.messages <- msg
|
||||
} else {
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Shutdown shutsdown the syslog server
|
||||
func (s *SyslogServer) Shutdown() {
|
||||
s.doneLock.Lock()
|
||||
s.doneLock.Unlock()
|
||||
|
||||
if !s.done {
|
||||
close(s.doneCh)
|
||||
close(s.messages)
|
||||
s.done = true
|
||||
}
|
||||
}
|
10
vendor/github.com/hashicorp/nomad/client/driver/logging/syslog_server_windows.go
generated
vendored
10
vendor/github.com/hashicorp/nomad/client/driver/logging/syslog_server_windows.go
generated
vendored
|
@ -1,10 +0,0 @@
|
|||
package logging
|
||||
|
||||
type SyslogServer struct {
|
||||
}
|
||||
|
||||
func (s *SyslogServer) Shutdown() {
|
||||
}
|
||||
|
||||
type SyslogMessage struct {
|
||||
}
|
207
vendor/github.com/hashicorp/nomad/client/driver/logging/universal_collector_unix.go
generated
vendored
207
vendor/github.com/hashicorp/nomad/client/driver/logging/universal_collector_unix.go
generated
vendored
|
@ -1,207 +0,0 @@
|
|||
// +build darwin dragonfly freebsd linux netbsd openbsd solaris
|
||||
|
||||
package logging
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"log"
|
||||
"log/syslog"
|
||||
"net"
|
||||
"os"
|
||||
"runtime"
|
||||
|
||||
"github.com/hashicorp/nomad/client/allocdir"
|
||||
cstructs "github.com/hashicorp/nomad/client/driver/structs"
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
)
|
||||
|
||||
// LogCollectorContext holds context to configure the syslog server
|
||||
type LogCollectorContext struct {
|
||||
// TaskName is the name of the Task
|
||||
TaskName string
|
||||
|
||||
// AllocDir is the handle to do operations on the alloc dir of
|
||||
// the task
|
||||
AllocDir *allocdir.AllocDir
|
||||
|
||||
// LogConfig provides configuration related to log rotation
|
||||
LogConfig *structs.LogConfig
|
||||
|
||||
// PortUpperBound is the upper bound of the ports that we can use to start
|
||||
// the syslog server
|
||||
PortUpperBound uint
|
||||
|
||||
// PortLowerBound is the lower bound of the ports that we can use to start
|
||||
// the syslog server
|
||||
PortLowerBound uint
|
||||
}
|
||||
|
||||
// SyslogCollectorState holds the address and islation information of a launched
|
||||
// syslog server
|
||||
type SyslogCollectorState struct {
|
||||
IsolationConfig *cstructs.IsolationConfig
|
||||
Addr string
|
||||
}
|
||||
|
||||
// LogCollector is an interface which allows a driver to launch a log server
|
||||
// and update log configuration
|
||||
type LogCollector interface {
|
||||
LaunchCollector(ctx *LogCollectorContext) (*SyslogCollectorState, error)
|
||||
Exit() error
|
||||
UpdateLogConfig(logConfig *structs.LogConfig) error
|
||||
}
|
||||
|
||||
// SyslogCollector is a LogCollector which starts a syslog server and does
|
||||
// rotation to incoming stream
|
||||
type SyslogCollector struct {
|
||||
addr net.Addr
|
||||
logConfig *structs.LogConfig
|
||||
ctx *LogCollectorContext
|
||||
|
||||
lro *FileRotator
|
||||
lre *FileRotator
|
||||
server *SyslogServer
|
||||
syslogChan chan *SyslogMessage
|
||||
taskDir string
|
||||
|
||||
logger *log.Logger
|
||||
}
|
||||
|
||||
// NewSyslogCollector returns an implementation of the SyslogCollector
|
||||
func NewSyslogCollector(logger *log.Logger) *SyslogCollector {
|
||||
return &SyslogCollector{logger: logger, syslogChan: make(chan *SyslogMessage, 2048)}
|
||||
}
|
||||
|
||||
// LaunchCollector launches a new syslog server and starts writing log lines to
|
||||
// files and rotates them
|
||||
func (s *SyslogCollector) LaunchCollector(ctx *LogCollectorContext) (*SyslogCollectorState, error) {
|
||||
l, err := s.getListener(ctx.PortLowerBound, ctx.PortUpperBound)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
s.logger.Printf("[DEBUG] sylog-server: launching syslog server on addr: %v", l.Addr().String())
|
||||
s.ctx = ctx
|
||||
// configuring the task dir
|
||||
if err := s.configureTaskDir(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
s.server = NewSyslogServer(l, s.syslogChan, s.logger)
|
||||
go s.server.Start()
|
||||
logFileSize := int64(ctx.LogConfig.MaxFileSizeMB * 1024 * 1024)
|
||||
|
||||
lro, err := NewFileRotator(ctx.AllocDir.LogDir(), fmt.Sprintf("%v.stdout", ctx.TaskName),
|
||||
ctx.LogConfig.MaxFiles, logFileSize, s.logger)
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
s.lro = lro
|
||||
|
||||
lre, err := NewFileRotator(ctx.AllocDir.LogDir(), fmt.Sprintf("%v.stderr", ctx.TaskName),
|
||||
ctx.LogConfig.MaxFiles, logFileSize, s.logger)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
s.lre = lre
|
||||
|
||||
go s.collectLogs(lre, lro)
|
||||
syslogAddr := fmt.Sprintf("%s://%s", l.Addr().Network(), l.Addr().String())
|
||||
return &SyslogCollectorState{Addr: syslogAddr}, nil
|
||||
}
|
||||
|
||||
func (s *SyslogCollector) collectLogs(we io.Writer, wo io.Writer) {
|
||||
for logParts := range s.syslogChan {
|
||||
// If the severity of the log line is err then we write to stderr
|
||||
// otherwise all messages go to stdout
|
||||
if logParts.Severity == syslog.LOG_ERR {
|
||||
s.lre.Write(logParts.Message)
|
||||
s.lre.Write([]byte{'\n'})
|
||||
} else {
|
||||
s.lro.Write(logParts.Message)
|
||||
s.lro.Write([]byte{'\n'})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Exit kills the syslog server
|
||||
func (s *SyslogCollector) Exit() error {
|
||||
s.server.Shutdown()
|
||||
s.lre.Close()
|
||||
s.lro.Close()
|
||||
return nil
|
||||
}
|
||||
|
||||
// UpdateLogConfig updates the log configuration
|
||||
func (s *SyslogCollector) UpdateLogConfig(logConfig *structs.LogConfig) error {
|
||||
s.ctx.LogConfig = logConfig
|
||||
if s.lro == nil {
|
||||
return fmt.Errorf("log rotator for stdout doesn't exist")
|
||||
}
|
||||
s.lro.MaxFiles = logConfig.MaxFiles
|
||||
s.lro.FileSize = int64(logConfig.MaxFileSizeMB * 1024 * 1024)
|
||||
|
||||
if s.lre == nil {
|
||||
return fmt.Errorf("log rotator for stderr doesn't exist")
|
||||
}
|
||||
s.lre.MaxFiles = logConfig.MaxFiles
|
||||
s.lre.FileSize = int64(logConfig.MaxFileSizeMB * 1024 * 1024)
|
||||
return nil
|
||||
}
|
||||
|
||||
// configureTaskDir sets the task dir in the SyslogCollector
|
||||
func (s *SyslogCollector) configureTaskDir() error {
|
||||
taskDir, ok := s.ctx.AllocDir.TaskDirs[s.ctx.TaskName]
|
||||
if !ok {
|
||||
return fmt.Errorf("couldn't find task directory for task %v", s.ctx.TaskName)
|
||||
}
|
||||
s.taskDir = taskDir
|
||||
return nil
|
||||
}
|
||||
|
||||
// getFreePort returns a free port ready to be listened on between upper and
|
||||
// lower bounds
|
||||
func (s *SyslogCollector) getListener(lowerBound uint, upperBound uint) (net.Listener, error) {
|
||||
if runtime.GOOS == "windows" {
|
||||
return s.listenerTCP(lowerBound, upperBound)
|
||||
}
|
||||
|
||||
return s.listenerUnix()
|
||||
}
|
||||
|
||||
// listenerTCP creates a TCP listener using an unused port between an upper and
|
||||
// lower bound
|
||||
func (s *SyslogCollector) listenerTCP(lowerBound uint, upperBound uint) (net.Listener, error) {
|
||||
for i := lowerBound; i <= upperBound; i++ {
|
||||
addr, err := net.ResolveTCPAddr("tcp", fmt.Sprintf("localhost:%v", i))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
l, err := net.ListenTCP("tcp", addr)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
return l, nil
|
||||
}
|
||||
return nil, fmt.Errorf("No free port found")
|
||||
}
|
||||
|
||||
// listenerUnix creates a Unix domain socket
|
||||
func (s *SyslogCollector) listenerUnix() (net.Listener, error) {
|
||||
f, err := ioutil.TempFile("", "plugin")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
path := f.Name()
|
||||
|
||||
if err := f.Close(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err := os.Remove(path); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return net.Listen("unix", path)
|
||||
}
|
|
@ -1,51 +0,0 @@
|
|||
package driver
|
||||
|
||||
import (
|
||||
"io"
|
||||
"log"
|
||||
"net"
|
||||
|
||||
"github.com/hashicorp/go-plugin"
|
||||
)
|
||||
|
||||
var HandshakeConfig = plugin.HandshakeConfig{
|
||||
ProtocolVersion: 1,
|
||||
MagicCookieKey: "NOMAD_PLUGIN_MAGIC_COOKIE",
|
||||
MagicCookieValue: "e4327c2e01eabfd75a8a67adb114fb34a757d57eee7728d857a8cec6e91a7255",
|
||||
}
|
||||
|
||||
func GetPluginMap(w io.Writer) map[string]plugin.Plugin {
|
||||
e := new(ExecutorPlugin)
|
||||
e.logger = log.New(w, "", log.LstdFlags)
|
||||
|
||||
s := new(SyslogCollectorPlugin)
|
||||
s.logger = log.New(w, "", log.LstdFlags)
|
||||
return map[string]plugin.Plugin{
|
||||
"executor": e,
|
||||
"syslogcollector": s,
|
||||
}
|
||||
}
|
||||
|
||||
// ExecutorReattachConfig is the config that we seralize and de-serialize and
|
||||
// store in disk
|
||||
type PluginReattachConfig struct {
|
||||
Pid int
|
||||
AddrNet string
|
||||
AddrName string
|
||||
}
|
||||
|
||||
// PluginConfig returns a config from an ExecutorReattachConfig
|
||||
func (c *PluginReattachConfig) PluginConfig() *plugin.ReattachConfig {
|
||||
var addr net.Addr
|
||||
switch c.AddrNet {
|
||||
case "unix", "unixgram", "unixpacket":
|
||||
addr, _ = net.ResolveUnixAddr(c.AddrNet, c.AddrName)
|
||||
case "tcp", "tcp4", "tcp6":
|
||||
addr, _ = net.ResolveTCPAddr(c.AddrNet, c.AddrName)
|
||||
}
|
||||
return &plugin.ReattachConfig{Pid: c.Pid, Addr: addr}
|
||||
}
|
||||
|
||||
func NewPluginReattachConfig(c *plugin.ReattachConfig) *PluginReattachConfig {
|
||||
return &PluginReattachConfig{Pid: c.Pid, AddrNet: c.Addr.Network(), AddrName: c.Addr.String()}
|
||||
}
|
|
@ -1,412 +0,0 @@
|
|||
package driver
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"log"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"runtime"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/go-plugin"
|
||||
"github.com/hashicorp/nomad/client/allocdir"
|
||||
"github.com/hashicorp/nomad/client/config"
|
||||
"github.com/hashicorp/nomad/client/driver/executor"
|
||||
dstructs "github.com/hashicorp/nomad/client/driver/structs"
|
||||
"github.com/hashicorp/nomad/client/fingerprint"
|
||||
cstructs "github.com/hashicorp/nomad/client/structs"
|
||||
"github.com/hashicorp/nomad/helper/discover"
|
||||
"github.com/hashicorp/nomad/helper/fields"
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
"github.com/mitchellh/mapstructure"
|
||||
)
|
||||
|
||||
var (
|
||||
reQemuVersion = regexp.MustCompile(`version (\d[\.\d+]+)`)
|
||||
)
|
||||
|
||||
const (
|
||||
// The key populated in Node Attributes to indicate presence of the Qemu
|
||||
// driver
|
||||
qemuDriverAttr = "driver.qemu"
|
||||
)
|
||||
|
||||
// QemuDriver is a driver for running images via Qemu
|
||||
// We attempt to chose sane defaults for now, with more configuration available
|
||||
// planned in the future
|
||||
type QemuDriver struct {
|
||||
DriverContext
|
||||
fingerprint.StaticFingerprinter
|
||||
}
|
||||
|
||||
type QemuDriverConfig struct {
|
||||
ImagePath string `mapstructure:"image_path"`
|
||||
Accelerator string `mapstructure:"accelerator"`
|
||||
PortMap []map[string]int `mapstructure:"port_map"` // A map of host port labels and to guest ports.
|
||||
Args []string `mapstructure:"args"` // extra arguments to qemu executable
|
||||
}
|
||||
|
||||
// qemuHandle is returned from Start/Open as a handle to the PID
|
||||
type qemuHandle struct {
|
||||
pluginClient *plugin.Client
|
||||
userPid int
|
||||
executor executor.Executor
|
||||
allocDir *allocdir.AllocDir
|
||||
killTimeout time.Duration
|
||||
maxKillTimeout time.Duration
|
||||
logger *log.Logger
|
||||
version string
|
||||
waitCh chan *dstructs.WaitResult
|
||||
doneCh chan struct{}
|
||||
}
|
||||
|
||||
// NewQemuDriver is used to create a new exec driver
|
||||
func NewQemuDriver(ctx *DriverContext) Driver {
|
||||
return &QemuDriver{DriverContext: *ctx}
|
||||
}
|
||||
|
||||
// Validate is used to validate the driver configuration
|
||||
func (d *QemuDriver) Validate(config map[string]interface{}) error {
|
||||
fd := &fields.FieldData{
|
||||
Raw: config,
|
||||
Schema: map[string]*fields.FieldSchema{
|
||||
"image_path": &fields.FieldSchema{
|
||||
Type: fields.TypeString,
|
||||
Required: true,
|
||||
},
|
||||
"accelerator": &fields.FieldSchema{
|
||||
Type: fields.TypeString,
|
||||
},
|
||||
"port_map": &fields.FieldSchema{
|
||||
Type: fields.TypeArray,
|
||||
},
|
||||
"args": &fields.FieldSchema{
|
||||
Type: fields.TypeArray,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
if err := fd.Validate(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (d *QemuDriver) Fingerprint(cfg *config.Config, node *structs.Node) (bool, error) {
|
||||
// Get the current status so that we can log any debug messages only if the
|
||||
// state changes
|
||||
_, currentlyEnabled := node.Attributes[qemuDriverAttr]
|
||||
|
||||
bin := "qemu-system-x86_64"
|
||||
if runtime.GOOS == "windows" {
|
||||
// On windows, the "qemu-system-x86_64" command does not respond to the
|
||||
// version flag.
|
||||
bin = "qemu-img"
|
||||
}
|
||||
outBytes, err := exec.Command(bin, "--version").Output()
|
||||
if err != nil {
|
||||
delete(node.Attributes, qemuDriverAttr)
|
||||
return false, nil
|
||||
}
|
||||
out := strings.TrimSpace(string(outBytes))
|
||||
|
||||
matches := reQemuVersion.FindStringSubmatch(out)
|
||||
if len(matches) != 2 {
|
||||
delete(node.Attributes, qemuDriverAttr)
|
||||
return false, fmt.Errorf("Unable to parse Qemu version string: %#v", matches)
|
||||
}
|
||||
|
||||
if !currentlyEnabled {
|
||||
d.logger.Printf("[DEBUG] driver.qemu: enabling driver")
|
||||
}
|
||||
node.Attributes[qemuDriverAttr] = "1"
|
||||
node.Attributes["driver.qemu.version"] = matches[1]
|
||||
return true, nil
|
||||
}
|
||||
|
||||
// Run an existing Qemu image. Start() will pull down an existing, valid Qemu
|
||||
// image and save it to the Drivers Allocation Dir
|
||||
func (d *QemuDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle, error) {
|
||||
var driverConfig QemuDriverConfig
|
||||
if err := mapstructure.WeakDecode(task.Config, &driverConfig); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if len(driverConfig.PortMap) > 1 {
|
||||
return nil, fmt.Errorf("Only one port_map block is allowed in the qemu driver config")
|
||||
}
|
||||
|
||||
// Get the image source
|
||||
vmPath := driverConfig.ImagePath
|
||||
if vmPath == "" {
|
||||
return nil, fmt.Errorf("image_path must be set")
|
||||
}
|
||||
vmID := filepath.Base(vmPath)
|
||||
|
||||
// Get the tasks local directory.
|
||||
taskDir, ok := ctx.AllocDir.TaskDirs[d.DriverContext.taskName]
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("Could not find task directory for task: %v", d.DriverContext.taskName)
|
||||
}
|
||||
|
||||
// Parse configuration arguments
|
||||
// Create the base arguments
|
||||
accelerator := "tcg"
|
||||
if driverConfig.Accelerator != "" {
|
||||
accelerator = driverConfig.Accelerator
|
||||
}
|
||||
// TODO: Check a lower bounds, e.g. the default 128 of Qemu
|
||||
mem := fmt.Sprintf("%dM", task.Resources.MemoryMB)
|
||||
|
||||
absPath, err := GetAbsolutePath("qemu-system-x86_64")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
args := []string{
|
||||
absPath,
|
||||
"-machine", "type=pc,accel=" + accelerator,
|
||||
"-name", vmID,
|
||||
"-m", mem,
|
||||
"-drive", "file=" + vmPath,
|
||||
"-nographic",
|
||||
}
|
||||
|
||||
// Add pass through arguments to qemu executable. A user can specify
|
||||
// these arguments in driver task configuration. These arguments are
|
||||
// passed directly to the qemu driver as command line options.
|
||||
// For example, args = [ "-nodefconfig", "-nodefaults" ]
|
||||
// This will allow a VM with embedded configuration to boot successfully.
|
||||
args = append(args, driverConfig.Args...)
|
||||
|
||||
// Check the Resources required Networks to add port mappings. If no resources
|
||||
// are required, we assume the VM is a purely compute job and does not require
|
||||
// the outside world to be able to reach it. VMs ran without port mappings can
|
||||
// still reach out to the world, but without port mappings it is effectively
|
||||
// firewalled
|
||||
protocols := []string{"udp", "tcp"}
|
||||
if len(task.Resources.Networks) > 0 && len(driverConfig.PortMap) == 1 {
|
||||
// Loop through the port map and construct the hostfwd string, to map
|
||||
// reserved ports to the ports listenting in the VM
|
||||
// Ex: hostfwd=tcp::22000-:22,hostfwd=tcp::80-:8080
|
||||
var forwarding []string
|
||||
taskPorts := task.Resources.Networks[0].MapLabelToValues(nil)
|
||||
for label, guest := range driverConfig.PortMap[0] {
|
||||
host, ok := taskPorts[label]
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("Unknown port label %q", label)
|
||||
}
|
||||
|
||||
for _, p := range protocols {
|
||||
forwarding = append(forwarding, fmt.Sprintf("hostfwd=%s::%d-:%d", p, host, guest))
|
||||
}
|
||||
}
|
||||
|
||||
if len(forwarding) != 0 {
|
||||
args = append(args,
|
||||
"-netdev",
|
||||
fmt.Sprintf("user,id=user.0,%s", strings.Join(forwarding, ",")),
|
||||
"-device", "virtio-net,netdev=user.0",
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
// If using KVM, add optimization args
|
||||
if accelerator == "kvm" {
|
||||
args = append(args,
|
||||
"-enable-kvm",
|
||||
"-cpu", "host",
|
||||
// Do we have cores information available to the Driver?
|
||||
// "-smp", fmt.Sprintf("%d", cores),
|
||||
)
|
||||
}
|
||||
|
||||
d.logger.Printf("[DEBUG] Starting QemuVM command: %q", strings.Join(args, " "))
|
||||
bin, err := discover.NomadExecutable()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to find the nomad binary: %v", err)
|
||||
}
|
||||
|
||||
pluginLogFile := filepath.Join(taskDir, fmt.Sprintf("%s-executor.out", task.Name))
|
||||
pluginConfig := &plugin.ClientConfig{
|
||||
Cmd: exec.Command(bin, "executor", pluginLogFile),
|
||||
}
|
||||
|
||||
exec, pluginClient, err := createExecutor(pluginConfig, d.config.LogOutput, d.config)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
executorCtx := &executor.ExecutorContext{
|
||||
TaskEnv: d.taskEnv,
|
||||
Driver: "qemu",
|
||||
AllocDir: ctx.AllocDir,
|
||||
AllocID: ctx.AllocID,
|
||||
Task: task,
|
||||
}
|
||||
ps, err := exec.LaunchCmd(&executor.ExecCommand{
|
||||
Cmd: args[0],
|
||||
Args: args[1:],
|
||||
User: task.User,
|
||||
}, executorCtx)
|
||||
if err != nil {
|
||||
pluginClient.Kill()
|
||||
return nil, err
|
||||
}
|
||||
d.logger.Printf("[INFO] Started new QemuVM: %s", vmID)
|
||||
|
||||
// Create and Return Handle
|
||||
maxKill := d.DriverContext.config.MaxKillTimeout
|
||||
h := &qemuHandle{
|
||||
pluginClient: pluginClient,
|
||||
executor: exec,
|
||||
userPid: ps.Pid,
|
||||
allocDir: ctx.AllocDir,
|
||||
killTimeout: GetKillTimeout(task.KillTimeout, maxKill),
|
||||
maxKillTimeout: maxKill,
|
||||
version: d.config.Version,
|
||||
logger: d.logger,
|
||||
doneCh: make(chan struct{}),
|
||||
waitCh: make(chan *dstructs.WaitResult, 1),
|
||||
}
|
||||
|
||||
if err := h.executor.SyncServices(consulContext(d.config, "")); err != nil {
|
||||
h.logger.Printf("[ERR] driver.qemu: error registering services for task: %q: %v", task.Name, err)
|
||||
}
|
||||
go h.run()
|
||||
return h, nil
|
||||
}
|
||||
|
||||
type qemuId struct {
|
||||
Version string
|
||||
KillTimeout time.Duration
|
||||
MaxKillTimeout time.Duration
|
||||
UserPid int
|
||||
PluginConfig *PluginReattachConfig
|
||||
AllocDir *allocdir.AllocDir
|
||||
}
|
||||
|
||||
func (d *QemuDriver) Open(ctx *ExecContext, handleID string) (DriverHandle, error) {
|
||||
id := &qemuId{}
|
||||
if err := json.Unmarshal([]byte(handleID), id); err != nil {
|
||||
return nil, fmt.Errorf("Failed to parse handle '%s': %v", handleID, err)
|
||||
}
|
||||
|
||||
pluginConfig := &plugin.ClientConfig{
|
||||
Reattach: id.PluginConfig.PluginConfig(),
|
||||
}
|
||||
|
||||
exec, pluginClient, err := createExecutor(pluginConfig, d.config.LogOutput, d.config)
|
||||
if err != nil {
|
||||
d.logger.Println("[ERR] driver.qemu: error connecting to plugin so destroying plugin pid and user pid")
|
||||
if e := destroyPlugin(id.PluginConfig.Pid, id.UserPid); e != nil {
|
||||
d.logger.Printf("[ERR] driver.qemu: error destroying plugin and userpid: %v", e)
|
||||
}
|
||||
return nil, fmt.Errorf("error connecting to plugin: %v", err)
|
||||
}
|
||||
|
||||
ver, _ := exec.Version()
|
||||
d.logger.Printf("[DEBUG] driver.qemu: version of executor: %v", ver.Version)
|
||||
// Return a driver handle
|
||||
h := &qemuHandle{
|
||||
pluginClient: pluginClient,
|
||||
executor: exec,
|
||||
userPid: id.UserPid,
|
||||
allocDir: id.AllocDir,
|
||||
logger: d.logger,
|
||||
killTimeout: id.KillTimeout,
|
||||
maxKillTimeout: id.MaxKillTimeout,
|
||||
version: id.Version,
|
||||
doneCh: make(chan struct{}),
|
||||
waitCh: make(chan *dstructs.WaitResult, 1),
|
||||
}
|
||||
if err := h.executor.SyncServices(consulContext(d.config, "")); err != nil {
|
||||
h.logger.Printf("[ERR] driver.qemu: error registering services: %v", err)
|
||||
}
|
||||
go h.run()
|
||||
return h, nil
|
||||
}
|
||||
|
||||
func (h *qemuHandle) ID() string {
|
||||
id := qemuId{
|
||||
Version: h.version,
|
||||
KillTimeout: h.killTimeout,
|
||||
MaxKillTimeout: h.maxKillTimeout,
|
||||
PluginConfig: NewPluginReattachConfig(h.pluginClient.ReattachConfig()),
|
||||
UserPid: h.userPid,
|
||||
AllocDir: h.allocDir,
|
||||
}
|
||||
|
||||
data, err := json.Marshal(id)
|
||||
if err != nil {
|
||||
h.logger.Printf("[ERR] driver.qemu: failed to marshal ID to JSON: %s", err)
|
||||
}
|
||||
return string(data)
|
||||
}
|
||||
|
||||
func (h *qemuHandle) WaitCh() chan *dstructs.WaitResult {
|
||||
return h.waitCh
|
||||
}
|
||||
|
||||
func (h *qemuHandle) Update(task *structs.Task) error {
|
||||
// Store the updated kill timeout.
|
||||
h.killTimeout = GetKillTimeout(task.KillTimeout, h.maxKillTimeout)
|
||||
h.executor.UpdateTask(task)
|
||||
|
||||
// Update is not possible
|
||||
return nil
|
||||
}
|
||||
|
||||
// TODO: allow a 'shutdown_command' that can be executed over a ssh connection
|
||||
// to the VM
|
||||
func (h *qemuHandle) Kill() error {
|
||||
if err := h.executor.ShutDown(); err != nil {
|
||||
if h.pluginClient.Exited() {
|
||||
return nil
|
||||
}
|
||||
return fmt.Errorf("executor Shutdown failed: %v", err)
|
||||
}
|
||||
|
||||
select {
|
||||
case <-h.doneCh:
|
||||
return nil
|
||||
case <-time.After(h.killTimeout):
|
||||
if h.pluginClient.Exited() {
|
||||
return nil
|
||||
}
|
||||
if err := h.executor.Exit(); err != nil {
|
||||
return fmt.Errorf("executor Exit failed: %v", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
func (h *qemuHandle) Stats() (*cstructs.TaskResourceUsage, error) {
|
||||
return h.executor.Stats()
|
||||
}
|
||||
|
||||
func (h *qemuHandle) run() {
|
||||
ps, err := h.executor.Wait()
|
||||
if ps.ExitCode == 0 && err != nil {
|
||||
if e := killProcess(h.userPid); e != nil {
|
||||
h.logger.Printf("[ERR] driver.qemu: error killing user process: %v", e)
|
||||
}
|
||||
if e := h.allocDir.UnmountAll(); e != nil {
|
||||
h.logger.Printf("[ERR] driver.qemu: unmounting dev,proc and alloc dirs failed: %v", e)
|
||||
}
|
||||
}
|
||||
close(h.doneCh)
|
||||
h.waitCh <- &dstructs.WaitResult{ExitCode: ps.ExitCode, Signal: ps.Signal, Err: err}
|
||||
close(h.waitCh)
|
||||
// Remove services
|
||||
if err := h.executor.DeregisterServices(); err != nil {
|
||||
h.logger.Printf("[ERR] driver.qemu: failed to deregister services: %v", err)
|
||||
}
|
||||
|
||||
h.executor.Exit()
|
||||
h.pluginClient.Kill()
|
||||
}
|
|
@ -1,307 +0,0 @@
|
|||
package driver
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"log"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/go-plugin"
|
||||
"github.com/hashicorp/nomad/client/allocdir"
|
||||
"github.com/hashicorp/nomad/client/config"
|
||||
"github.com/hashicorp/nomad/client/driver/executor"
|
||||
dstructs "github.com/hashicorp/nomad/client/driver/structs"
|
||||
"github.com/hashicorp/nomad/client/fingerprint"
|
||||
cstructs "github.com/hashicorp/nomad/client/structs"
|
||||
"github.com/hashicorp/nomad/helper/discover"
|
||||
"github.com/hashicorp/nomad/helper/fields"
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
"github.com/mitchellh/mapstructure"
|
||||
)
|
||||
|
||||
const (
|
||||
// The option that enables this driver in the Config.Options map.
|
||||
rawExecConfigOption = "driver.raw_exec.enable"
|
||||
|
||||
// The key populated in Node Attributes to indicate presence of the Raw Exec
|
||||
// driver
|
||||
rawExecDriverAttr = "driver.raw_exec"
|
||||
)
|
||||
|
||||
// The RawExecDriver is a privileged version of the exec driver. It provides no
|
||||
// resource isolation and just fork/execs. The Exec driver should be preferred
|
||||
// and this should only be used when explicitly needed.
|
||||
type RawExecDriver struct {
|
||||
DriverContext
|
||||
fingerprint.StaticFingerprinter
|
||||
}
|
||||
|
||||
// rawExecHandle is returned from Start/Open as a handle to the PID
|
||||
type rawExecHandle struct {
|
||||
version string
|
||||
pluginClient *plugin.Client
|
||||
userPid int
|
||||
executor executor.Executor
|
||||
killTimeout time.Duration
|
||||
maxKillTimeout time.Duration
|
||||
allocDir *allocdir.AllocDir
|
||||
logger *log.Logger
|
||||
waitCh chan *dstructs.WaitResult
|
||||
doneCh chan struct{}
|
||||
}
|
||||
|
||||
// NewRawExecDriver is used to create a new raw exec driver
|
||||
func NewRawExecDriver(ctx *DriverContext) Driver {
|
||||
return &RawExecDriver{DriverContext: *ctx}
|
||||
}
|
||||
|
||||
// Validate is used to validate the driver configuration
|
||||
func (d *RawExecDriver) Validate(config map[string]interface{}) error {
|
||||
fd := &fields.FieldData{
|
||||
Raw: config,
|
||||
Schema: map[string]*fields.FieldSchema{
|
||||
"command": &fields.FieldSchema{
|
||||
Type: fields.TypeString,
|
||||
Required: true,
|
||||
},
|
||||
"args": &fields.FieldSchema{
|
||||
Type: fields.TypeArray,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
if err := fd.Validate(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (d *RawExecDriver) Fingerprint(cfg *config.Config, node *structs.Node) (bool, error) {
|
||||
// Get the current status so that we can log any debug messages only if the
|
||||
// state changes
|
||||
_, currentlyEnabled := node.Attributes[rawExecDriverAttr]
|
||||
|
||||
// Check that the user has explicitly enabled this executor.
|
||||
enabled := cfg.ReadBoolDefault(rawExecConfigOption, false)
|
||||
|
||||
if enabled {
|
||||
if currentlyEnabled {
|
||||
d.logger.Printf("[WARN] driver.raw_exec: raw exec is enabled. Only enable if needed")
|
||||
}
|
||||
node.Attributes[rawExecDriverAttr] = "1"
|
||||
return true, nil
|
||||
}
|
||||
|
||||
delete(node.Attributes, rawExecDriverAttr)
|
||||
return false, nil
|
||||
}
|
||||
|
||||
func (d *RawExecDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle, error) {
|
||||
var driverConfig ExecDriverConfig
|
||||
if err := mapstructure.WeakDecode(task.Config, &driverConfig); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// Get the tasks local directory.
|
||||
taskName := d.DriverContext.taskName
|
||||
taskDir, ok := ctx.AllocDir.TaskDirs[taskName]
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("Could not find task directory for task: %v", d.DriverContext.taskName)
|
||||
}
|
||||
|
||||
// Get the command to be ran
|
||||
command := driverConfig.Command
|
||||
if err := validateCommand(command, "args"); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Set the host environment variables.
|
||||
filter := strings.Split(d.config.ReadDefault("env.blacklist", config.DefaultEnvBlacklist), ",")
|
||||
d.taskEnv.AppendHostEnvvars(filter)
|
||||
|
||||
bin, err := discover.NomadExecutable()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to find the nomad binary: %v", err)
|
||||
}
|
||||
pluginLogFile := filepath.Join(taskDir, fmt.Sprintf("%s-executor.out", task.Name))
|
||||
pluginConfig := &plugin.ClientConfig{
|
||||
Cmd: exec.Command(bin, "executor", pluginLogFile),
|
||||
}
|
||||
|
||||
exec, pluginClient, err := createExecutor(pluginConfig, d.config.LogOutput, d.config)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
executorCtx := &executor.ExecutorContext{
|
||||
TaskEnv: d.taskEnv,
|
||||
Driver: "raw_exec",
|
||||
AllocDir: ctx.AllocDir,
|
||||
AllocID: ctx.AllocID,
|
||||
Task: task,
|
||||
}
|
||||
|
||||
ps, err := exec.LaunchCmd(&executor.ExecCommand{
|
||||
Cmd: command,
|
||||
Args: driverConfig.Args,
|
||||
User: task.User,
|
||||
}, executorCtx)
|
||||
if err != nil {
|
||||
pluginClient.Kill()
|
||||
return nil, err
|
||||
}
|
||||
d.logger.Printf("[DEBUG] driver.raw_exec: started process with pid: %v", ps.Pid)
|
||||
|
||||
// Return a driver handle
|
||||
maxKill := d.DriverContext.config.MaxKillTimeout
|
||||
h := &rawExecHandle{
|
||||
pluginClient: pluginClient,
|
||||
executor: exec,
|
||||
userPid: ps.Pid,
|
||||
killTimeout: GetKillTimeout(task.KillTimeout, maxKill),
|
||||
maxKillTimeout: maxKill,
|
||||
allocDir: ctx.AllocDir,
|
||||
version: d.config.Version,
|
||||
logger: d.logger,
|
||||
doneCh: make(chan struct{}),
|
||||
waitCh: make(chan *dstructs.WaitResult, 1),
|
||||
}
|
||||
if err := h.executor.SyncServices(consulContext(d.config, "")); err != nil {
|
||||
h.logger.Printf("[ERR] driver.raw_exec: error registering services with consul for task: %q: %v", task.Name, err)
|
||||
}
|
||||
go h.run()
|
||||
return h, nil
|
||||
}
|
||||
|
||||
type rawExecId struct {
|
||||
Version string
|
||||
KillTimeout time.Duration
|
||||
MaxKillTimeout time.Duration
|
||||
UserPid int
|
||||
PluginConfig *PluginReattachConfig
|
||||
AllocDir *allocdir.AllocDir
|
||||
}
|
||||
|
||||
func (d *RawExecDriver) Open(ctx *ExecContext, handleID string) (DriverHandle, error) {
|
||||
id := &rawExecId{}
|
||||
if err := json.Unmarshal([]byte(handleID), id); err != nil {
|
||||
return nil, fmt.Errorf("Failed to parse handle '%s': %v", handleID, err)
|
||||
}
|
||||
|
||||
pluginConfig := &plugin.ClientConfig{
|
||||
Reattach: id.PluginConfig.PluginConfig(),
|
||||
}
|
||||
exec, pluginClient, err := createExecutor(pluginConfig, d.config.LogOutput, d.config)
|
||||
if err != nil {
|
||||
d.logger.Println("[ERR] driver.raw_exec: error connecting to plugin so destroying plugin pid and user pid")
|
||||
if e := destroyPlugin(id.PluginConfig.Pid, id.UserPid); e != nil {
|
||||
d.logger.Printf("[ERR] driver.raw_exec: error destroying plugin and userpid: %v", e)
|
||||
}
|
||||
return nil, fmt.Errorf("error connecting to plugin: %v", err)
|
||||
}
|
||||
|
||||
ver, _ := exec.Version()
|
||||
d.logger.Printf("[DEBUG] driver.raw_exec: version of executor: %v", ver.Version)
|
||||
|
||||
// Return a driver handle
|
||||
h := &rawExecHandle{
|
||||
pluginClient: pluginClient,
|
||||
executor: exec,
|
||||
userPid: id.UserPid,
|
||||
logger: d.logger,
|
||||
killTimeout: id.KillTimeout,
|
||||
maxKillTimeout: id.MaxKillTimeout,
|
||||
allocDir: id.AllocDir,
|
||||
version: id.Version,
|
||||
doneCh: make(chan struct{}),
|
||||
waitCh: make(chan *dstructs.WaitResult, 1),
|
||||
}
|
||||
if err := h.executor.SyncServices(consulContext(d.config, "")); err != nil {
|
||||
h.logger.Printf("[ERR] driver.raw_exec: error registering services with consul: %v", err)
|
||||
}
|
||||
go h.run()
|
||||
return h, nil
|
||||
}
|
||||
|
||||
func (h *rawExecHandle) ID() string {
|
||||
id := rawExecId{
|
||||
Version: h.version,
|
||||
KillTimeout: h.killTimeout,
|
||||
MaxKillTimeout: h.maxKillTimeout,
|
||||
PluginConfig: NewPluginReattachConfig(h.pluginClient.ReattachConfig()),
|
||||
UserPid: h.userPid,
|
||||
AllocDir: h.allocDir,
|
||||
}
|
||||
|
||||
data, err := json.Marshal(id)
|
||||
if err != nil {
|
||||
h.logger.Printf("[ERR] driver.raw_exec: failed to marshal ID to JSON: %s", err)
|
||||
}
|
||||
return string(data)
|
||||
}
|
||||
|
||||
func (h *rawExecHandle) WaitCh() chan *dstructs.WaitResult {
|
||||
return h.waitCh
|
||||
}
|
||||
|
||||
func (h *rawExecHandle) Update(task *structs.Task) error {
|
||||
// Store the updated kill timeout.
|
||||
h.killTimeout = GetKillTimeout(task.KillTimeout, h.maxKillTimeout)
|
||||
h.executor.UpdateTask(task)
|
||||
|
||||
// Update is not possible
|
||||
return nil
|
||||
}
|
||||
|
||||
func (h *rawExecHandle) Kill() error {
|
||||
if err := h.executor.ShutDown(); err != nil {
|
||||
if h.pluginClient.Exited() {
|
||||
return nil
|
||||
}
|
||||
return fmt.Errorf("executor Shutdown failed: %v", err)
|
||||
}
|
||||
|
||||
select {
|
||||
case <-h.doneCh:
|
||||
return nil
|
||||
case <-time.After(h.killTimeout):
|
||||
if h.pluginClient.Exited() {
|
||||
return nil
|
||||
}
|
||||
if err := h.executor.Exit(); err != nil {
|
||||
return fmt.Errorf("executor Exit failed: %v", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
func (h *rawExecHandle) Stats() (*cstructs.TaskResourceUsage, error) {
|
||||
return h.executor.Stats()
|
||||
}
|
||||
|
||||
func (h *rawExecHandle) run() {
|
||||
ps, err := h.executor.Wait()
|
||||
close(h.doneCh)
|
||||
if ps.ExitCode == 0 && err != nil {
|
||||
if e := killProcess(h.userPid); e != nil {
|
||||
h.logger.Printf("[ERR] driver.raw_exec: error killing user process: %v", e)
|
||||
}
|
||||
if e := h.allocDir.UnmountAll(); e != nil {
|
||||
h.logger.Printf("[ERR] driver.raw_exec: unmounting dev,proc and alloc dirs failed: %v", e)
|
||||
}
|
||||
}
|
||||
h.waitCh <- &dstructs.WaitResult{ExitCode: ps.ExitCode, Signal: ps.Signal, Err: err}
|
||||
close(h.waitCh)
|
||||
// Remove services
|
||||
if err := h.executor.DeregisterServices(); err != nil {
|
||||
h.logger.Printf("[ERR] driver.raw_exec: failed to deregister services: %v", err)
|
||||
}
|
||||
|
||||
if err := h.executor.Exit(); err != nil {
|
||||
h.logger.Printf("[ERR] driver.raw_exec: error killing executor: %v", err)
|
||||
}
|
||||
h.pluginClient.Kill()
|
||||
}
|
|
@ -1,436 +0,0 @@
|
|||
package driver
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"log"
|
||||
"net"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"runtime"
|
||||
"strings"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/go-plugin"
|
||||
"github.com/hashicorp/go-version"
|
||||
"github.com/hashicorp/nomad/client/allocdir"
|
||||
"github.com/hashicorp/nomad/client/config"
|
||||
"github.com/hashicorp/nomad/client/driver/executor"
|
||||
dstructs "github.com/hashicorp/nomad/client/driver/structs"
|
||||
"github.com/hashicorp/nomad/client/fingerprint"
|
||||
cstructs "github.com/hashicorp/nomad/client/structs"
|
||||
"github.com/hashicorp/nomad/helper/discover"
|
||||
"github.com/hashicorp/nomad/helper/fields"
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
"github.com/mitchellh/mapstructure"
|
||||
)
|
||||
|
||||
var (
|
||||
reRktVersion = regexp.MustCompile(`rkt [vV]ersion[:]? (\d[.\d]+)`)
|
||||
reAppcVersion = regexp.MustCompile(`appc [vV]ersion[:]? (\d[.\d]+)`)
|
||||
)
|
||||
|
||||
const (
|
||||
// minRktVersion is the earliest supported version of rkt. rkt added support
|
||||
// for CPU and memory isolators in 0.14.0. We cannot support an earlier
|
||||
// version to maintain an uniform interface across all drivers
|
||||
minRktVersion = "0.14.0"
|
||||
|
||||
// The key populated in the Node Attributes to indicate the presence of the
|
||||
// Rkt driver
|
||||
rktDriverAttr = "driver.rkt"
|
||||
)
|
||||
|
||||
// RktDriver is a driver for running images via Rkt
|
||||
// We attempt to chose sane defaults for now, with more configuration available
|
||||
// planned in the future
|
||||
type RktDriver struct {
|
||||
DriverContext
|
||||
fingerprint.StaticFingerprinter
|
||||
}
|
||||
|
||||
type RktDriverConfig struct {
|
||||
ImageName string `mapstructure:"image"`
|
||||
Command string `mapstructure:"command"`
|
||||
Args []string `mapstructure:"args"`
|
||||
TrustPrefix string `mapstructure:"trust_prefix"`
|
||||
DNSServers []string `mapstructure:"dns_servers"` // DNS Server for containers
|
||||
DNSSearchDomains []string `mapstructure:"dns_search_domains"` // DNS Search domains for containers
|
||||
Debug bool `mapstructure:"debug"` // Enable debug option for rkt command
|
||||
}
|
||||
|
||||
// rktHandle is returned from Start/Open as a handle to the PID
|
||||
type rktHandle struct {
|
||||
pluginClient *plugin.Client
|
||||
executorPid int
|
||||
executor executor.Executor
|
||||
allocDir *allocdir.AllocDir
|
||||
logger *log.Logger
|
||||
killTimeout time.Duration
|
||||
maxKillTimeout time.Duration
|
||||
waitCh chan *dstructs.WaitResult
|
||||
doneCh chan struct{}
|
||||
}
|
||||
|
||||
// rktPID is a struct to map the pid running the process to the vm image on
|
||||
// disk
|
||||
type rktPID struct {
|
||||
PluginConfig *PluginReattachConfig
|
||||
AllocDir *allocdir.AllocDir
|
||||
ExecutorPid int
|
||||
KillTimeout time.Duration
|
||||
MaxKillTimeout time.Duration
|
||||
}
|
||||
|
||||
// NewRktDriver is used to create a new exec driver
|
||||
func NewRktDriver(ctx *DriverContext) Driver {
|
||||
return &RktDriver{DriverContext: *ctx}
|
||||
}
|
||||
|
||||
// Validate is used to validate the driver configuration
|
||||
func (d *RktDriver) Validate(config map[string]interface{}) error {
|
||||
fd := &fields.FieldData{
|
||||
Raw: config,
|
||||
Schema: map[string]*fields.FieldSchema{
|
||||
"image": &fields.FieldSchema{
|
||||
Type: fields.TypeString,
|
||||
Required: true,
|
||||
},
|
||||
"command": &fields.FieldSchema{
|
||||
Type: fields.TypeString,
|
||||
},
|
||||
"args": &fields.FieldSchema{
|
||||
Type: fields.TypeArray,
|
||||
},
|
||||
"trust_prefix": &fields.FieldSchema{
|
||||
Type: fields.TypeString,
|
||||
},
|
||||
"dns_servers": &fields.FieldSchema{
|
||||
Type: fields.TypeArray,
|
||||
},
|
||||
"dns_search_domains": &fields.FieldSchema{
|
||||
Type: fields.TypeArray,
|
||||
},
|
||||
"debug": &fields.FieldSchema{
|
||||
Type: fields.TypeBool,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
if err := fd.Validate(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (d *RktDriver) Fingerprint(cfg *config.Config, node *structs.Node) (bool, error) {
|
||||
// Get the current status so that we can log any debug messages only if the
|
||||
// state changes
|
||||
_, currentlyEnabled := node.Attributes[rktDriverAttr]
|
||||
|
||||
// Only enable if we are root when running on non-windows systems.
|
||||
if runtime.GOOS != "windows" && syscall.Geteuid() != 0 {
|
||||
if currentlyEnabled {
|
||||
d.logger.Printf("[DEBUG] driver.rkt: must run as root user, disabling")
|
||||
}
|
||||
delete(node.Attributes, rktDriverAttr)
|
||||
return false, nil
|
||||
}
|
||||
|
||||
outBytes, err := exec.Command("rkt", "version").Output()
|
||||
if err != nil {
|
||||
delete(node.Attributes, rktDriverAttr)
|
||||
return false, nil
|
||||
}
|
||||
out := strings.TrimSpace(string(outBytes))
|
||||
|
||||
rktMatches := reRktVersion.FindStringSubmatch(out)
|
||||
appcMatches := reAppcVersion.FindStringSubmatch(out)
|
||||
if len(rktMatches) != 2 || len(appcMatches) != 2 {
|
||||
delete(node.Attributes, rktDriverAttr)
|
||||
return false, fmt.Errorf("Unable to parse Rkt version string: %#v", rktMatches)
|
||||
}
|
||||
|
||||
node.Attributes[rktDriverAttr] = "1"
|
||||
node.Attributes["driver.rkt.version"] = rktMatches[1]
|
||||
node.Attributes["driver.rkt.appc.version"] = appcMatches[1]
|
||||
|
||||
minVersion, _ := version.NewVersion(minRktVersion)
|
||||
currentVersion, _ := version.NewVersion(node.Attributes["driver.rkt.version"])
|
||||
if currentVersion.LessThan(minVersion) {
|
||||
// Do not allow rkt < 0.14.0
|
||||
d.logger.Printf("[WARN] driver.rkt: please upgrade rkt to a version >= %s", minVersion)
|
||||
node.Attributes[rktDriverAttr] = "0"
|
||||
}
|
||||
return true, nil
|
||||
}
|
||||
|
||||
// Run an existing Rkt image.
|
||||
func (d *RktDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle, error) {
|
||||
var driverConfig RktDriverConfig
|
||||
if err := mapstructure.WeakDecode(task.Config, &driverConfig); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// ACI image
|
||||
img := driverConfig.ImageName
|
||||
|
||||
// Get the tasks local directory.
|
||||
taskName := d.DriverContext.taskName
|
||||
taskDir, ok := ctx.AllocDir.TaskDirs[taskName]
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("Could not find task directory for task: %v", d.DriverContext.taskName)
|
||||
}
|
||||
|
||||
// Build the command.
|
||||
var cmdArgs []string
|
||||
|
||||
// Add debug option to rkt command.
|
||||
debug := driverConfig.Debug
|
||||
|
||||
// Add the given trust prefix
|
||||
trustPrefix := driverConfig.TrustPrefix
|
||||
insecure := false
|
||||
if trustPrefix != "" {
|
||||
var outBuf, errBuf bytes.Buffer
|
||||
cmd := exec.Command("rkt", "trust", "--skip-fingerprint-review=true", fmt.Sprintf("--prefix=%s", trustPrefix), fmt.Sprintf("--debug=%t", debug))
|
||||
cmd.Stdout = &outBuf
|
||||
cmd.Stderr = &errBuf
|
||||
if err := cmd.Run(); err != nil {
|
||||
return nil, fmt.Errorf("Error running rkt trust: %s\n\nOutput: %s\n\nError: %s",
|
||||
err, outBuf.String(), errBuf.String())
|
||||
}
|
||||
d.logger.Printf("[DEBUG] driver.rkt: added trust prefix: %q", trustPrefix)
|
||||
} else {
|
||||
// Disble signature verification if the trust command was not run.
|
||||
insecure = true
|
||||
}
|
||||
cmdArgs = append(cmdArgs, "run")
|
||||
cmdArgs = append(cmdArgs, fmt.Sprintf("--volume=%s,kind=host,source=%s", task.Name, ctx.AllocDir.SharedDir))
|
||||
cmdArgs = append(cmdArgs, fmt.Sprintf("--mount=volume=%s,target=%s", task.Name, ctx.AllocDir.SharedDir))
|
||||
cmdArgs = append(cmdArgs, img)
|
||||
if insecure == true {
|
||||
cmdArgs = append(cmdArgs, "--insecure-options=all")
|
||||
}
|
||||
cmdArgs = append(cmdArgs, fmt.Sprintf("--debug=%t", debug))
|
||||
|
||||
// Inject environment variables
|
||||
for k, v := range d.taskEnv.EnvMap() {
|
||||
cmdArgs = append(cmdArgs, fmt.Sprintf("--set-env=%v=%v", k, v))
|
||||
}
|
||||
|
||||
// Check if the user has overridden the exec command.
|
||||
if driverConfig.Command != "" {
|
||||
cmdArgs = append(cmdArgs, fmt.Sprintf("--exec=%v", driverConfig.Command))
|
||||
}
|
||||
|
||||
// Add memory isolator
|
||||
cmdArgs = append(cmdArgs, fmt.Sprintf("--memory=%vM", int64(task.Resources.MemoryMB)))
|
||||
|
||||
// Add CPU isolator
|
||||
cmdArgs = append(cmdArgs, fmt.Sprintf("--cpu=%vm", int64(task.Resources.CPU)))
|
||||
|
||||
// Add DNS servers
|
||||
for _, ip := range driverConfig.DNSServers {
|
||||
if err := net.ParseIP(ip); err == nil {
|
||||
msg := fmt.Errorf("invalid ip address for container dns server %q", ip)
|
||||
d.logger.Printf("[DEBUG] driver.rkt: %v", msg)
|
||||
return nil, msg
|
||||
} else {
|
||||
cmdArgs = append(cmdArgs, fmt.Sprintf("--dns=%s", ip))
|
||||
}
|
||||
}
|
||||
|
||||
// set DNS search domains
|
||||
for _, domain := range driverConfig.DNSSearchDomains {
|
||||
cmdArgs = append(cmdArgs, fmt.Sprintf("--dns-search=%s", domain))
|
||||
}
|
||||
|
||||
// Add user passed arguments.
|
||||
if len(driverConfig.Args) != 0 {
|
||||
parsed := d.taskEnv.ParseAndReplace(driverConfig.Args)
|
||||
|
||||
// Need to start arguments with "--"
|
||||
if len(parsed) > 0 {
|
||||
cmdArgs = append(cmdArgs, "--")
|
||||
}
|
||||
|
||||
for _, arg := range parsed {
|
||||
cmdArgs = append(cmdArgs, fmt.Sprintf("%v", arg))
|
||||
}
|
||||
}
|
||||
|
||||
// Set the host environment variables.
|
||||
filter := strings.Split(d.config.ReadDefault("env.blacklist", config.DefaultEnvBlacklist), ",")
|
||||
d.taskEnv.AppendHostEnvvars(filter)
|
||||
|
||||
bin, err := discover.NomadExecutable()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to find the nomad binary: %v", err)
|
||||
}
|
||||
|
||||
pluginLogFile := filepath.Join(taskDir, fmt.Sprintf("%s-executor.out", task.Name))
|
||||
pluginConfig := &plugin.ClientConfig{
|
||||
Cmd: exec.Command(bin, "executor", pluginLogFile),
|
||||
}
|
||||
|
||||
execIntf, pluginClient, err := createExecutor(pluginConfig, d.config.LogOutput, d.config)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
executorCtx := &executor.ExecutorContext{
|
||||
TaskEnv: d.taskEnv,
|
||||
Driver: "rkt",
|
||||
AllocDir: ctx.AllocDir,
|
||||
AllocID: ctx.AllocID,
|
||||
Task: task,
|
||||
}
|
||||
|
||||
absPath, err := GetAbsolutePath("rkt")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
ps, err := execIntf.LaunchCmd(&executor.ExecCommand{
|
||||
Cmd: absPath,
|
||||
Args: cmdArgs,
|
||||
User: task.User,
|
||||
}, executorCtx)
|
||||
if err != nil {
|
||||
pluginClient.Kill()
|
||||
return nil, err
|
||||
}
|
||||
|
||||
d.logger.Printf("[DEBUG] driver.rkt: started ACI %q with: %v", img, cmdArgs)
|
||||
maxKill := d.DriverContext.config.MaxKillTimeout
|
||||
h := &rktHandle{
|
||||
pluginClient: pluginClient,
|
||||
executor: execIntf,
|
||||
executorPid: ps.Pid,
|
||||
allocDir: ctx.AllocDir,
|
||||
logger: d.logger,
|
||||
killTimeout: GetKillTimeout(task.KillTimeout, maxKill),
|
||||
maxKillTimeout: maxKill,
|
||||
doneCh: make(chan struct{}),
|
||||
waitCh: make(chan *dstructs.WaitResult, 1),
|
||||
}
|
||||
if err := h.executor.SyncServices(consulContext(d.config, "")); err != nil {
|
||||
h.logger.Printf("[ERR] driver.rkt: error registering services for task: %q: %v", task.Name, err)
|
||||
}
|
||||
go h.run()
|
||||
return h, nil
|
||||
}
|
||||
|
||||
func (d *RktDriver) Open(ctx *ExecContext, handleID string) (DriverHandle, error) {
|
||||
// Parse the handle
|
||||
pidBytes := []byte(strings.TrimPrefix(handleID, "Rkt:"))
|
||||
id := &rktPID{}
|
||||
if err := json.Unmarshal(pidBytes, id); err != nil {
|
||||
return nil, fmt.Errorf("failed to parse Rkt handle '%s': %v", handleID, err)
|
||||
}
|
||||
|
||||
pluginConfig := &plugin.ClientConfig{
|
||||
Reattach: id.PluginConfig.PluginConfig(),
|
||||
}
|
||||
exec, pluginClient, err := createExecutor(pluginConfig, d.config.LogOutput, d.config)
|
||||
if err != nil {
|
||||
d.logger.Println("[ERROR] driver.rkt: error connecting to plugin so destroying plugin pid and user pid")
|
||||
if e := destroyPlugin(id.PluginConfig.Pid, id.ExecutorPid); e != nil {
|
||||
d.logger.Printf("[ERROR] driver.rkt: error destroying plugin and executor pid: %v", e)
|
||||
}
|
||||
return nil, fmt.Errorf("error connecting to plugin: %v", err)
|
||||
}
|
||||
|
||||
ver, _ := exec.Version()
|
||||
d.logger.Printf("[DEBUG] driver.rkt: version of executor: %v", ver.Version)
|
||||
// Return a driver handle
|
||||
h := &rktHandle{
|
||||
pluginClient: pluginClient,
|
||||
executorPid: id.ExecutorPid,
|
||||
allocDir: id.AllocDir,
|
||||
executor: exec,
|
||||
logger: d.logger,
|
||||
killTimeout: id.KillTimeout,
|
||||
maxKillTimeout: id.MaxKillTimeout,
|
||||
doneCh: make(chan struct{}),
|
||||
waitCh: make(chan *dstructs.WaitResult, 1),
|
||||
}
|
||||
if err := h.executor.SyncServices(consulContext(d.config, "")); err != nil {
|
||||
h.logger.Printf("[ERR] driver.rkt: error registering services: %v", err)
|
||||
}
|
||||
go h.run()
|
||||
return h, nil
|
||||
}
|
||||
|
||||
func (h *rktHandle) ID() string {
|
||||
// Return a handle to the PID
|
||||
pid := &rktPID{
|
||||
PluginConfig: NewPluginReattachConfig(h.pluginClient.ReattachConfig()),
|
||||
KillTimeout: h.killTimeout,
|
||||
MaxKillTimeout: h.maxKillTimeout,
|
||||
ExecutorPid: h.executorPid,
|
||||
AllocDir: h.allocDir,
|
||||
}
|
||||
data, err := json.Marshal(pid)
|
||||
if err != nil {
|
||||
h.logger.Printf("[ERR] driver.rkt: failed to marshal rkt PID to JSON: %s", err)
|
||||
}
|
||||
return fmt.Sprintf("Rkt:%s", string(data))
|
||||
}
|
||||
|
||||
func (h *rktHandle) WaitCh() chan *dstructs.WaitResult {
|
||||
return h.waitCh
|
||||
}
|
||||
|
||||
func (h *rktHandle) Update(task *structs.Task) error {
|
||||
// Store the updated kill timeout.
|
||||
h.killTimeout = GetKillTimeout(task.KillTimeout, h.maxKillTimeout)
|
||||
h.executor.UpdateTask(task)
|
||||
|
||||
// Update is not possible
|
||||
return nil
|
||||
}
|
||||
|
||||
// Kill is used to terminate the task. We send an Interrupt
|
||||
// and then provide a 5 second grace period before doing a Kill.
|
||||
func (h *rktHandle) Kill() error {
|
||||
h.executor.ShutDown()
|
||||
select {
|
||||
case <-h.doneCh:
|
||||
return nil
|
||||
case <-time.After(h.killTimeout):
|
||||
return h.executor.Exit()
|
||||
}
|
||||
}
|
||||
|
||||
func (h *rktHandle) Stats() (*cstructs.TaskResourceUsage, error) {
|
||||
return nil, fmt.Errorf("stats not implemented for rkt")
|
||||
}
|
||||
|
||||
func (h *rktHandle) run() {
|
||||
ps, err := h.executor.Wait()
|
||||
close(h.doneCh)
|
||||
if ps.ExitCode == 0 && err != nil {
|
||||
if e := killProcess(h.executorPid); e != nil {
|
||||
h.logger.Printf("[ERROR] driver.rkt: error killing user process: %v", e)
|
||||
}
|
||||
if e := h.allocDir.UnmountAll(); e != nil {
|
||||
h.logger.Printf("[ERROR] driver.rkt: unmounting dev,proc and alloc dirs failed: %v", e)
|
||||
}
|
||||
}
|
||||
h.waitCh <- dstructs.NewWaitResult(ps.ExitCode, 0, err)
|
||||
close(h.waitCh)
|
||||
// Remove services
|
||||
if err := h.executor.DeregisterServices(); err != nil {
|
||||
h.logger.Printf("[ERR] driver.rkt: failed to deregister services: %v", err)
|
||||
}
|
||||
|
||||
if err := h.executor.Exit(); err != nil {
|
||||
h.logger.Printf("[ERR] driver.rkt: error killing executor: %v", err)
|
||||
}
|
||||
h.pluginClient.Kill()
|
||||
}
|
|
@ -1,77 +0,0 @@
|
|||
package structs
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"time"
|
||||
)
|
||||
|
||||
const (
|
||||
// The default user that the executor uses to run tasks
|
||||
DefaultUnpriviledgedUser = "nobody"
|
||||
|
||||
// CheckBufSize is the size of the check output result
|
||||
CheckBufSize = 4 * 1024
|
||||
)
|
||||
|
||||
// WaitResult stores the result of a Wait operation.
|
||||
type WaitResult struct {
|
||||
ExitCode int
|
||||
Signal int
|
||||
Err error
|
||||
}
|
||||
|
||||
func NewWaitResult(code, signal int, err error) *WaitResult {
|
||||
return &WaitResult{
|
||||
ExitCode: code,
|
||||
Signal: signal,
|
||||
Err: err,
|
||||
}
|
||||
}
|
||||
|
||||
func (r *WaitResult) Successful() bool {
|
||||
return r.ExitCode == 0 && r.Signal == 0 && r.Err == nil
|
||||
}
|
||||
|
||||
func (r *WaitResult) String() string {
|
||||
return fmt.Sprintf("Wait returned exit code %v, signal %v, and error %v",
|
||||
r.ExitCode, r.Signal, r.Err)
|
||||
}
|
||||
|
||||
// RecoverableError wraps an error and marks whether it is recoverable and could
|
||||
// be retried or it is fatal.
|
||||
type RecoverableError struct {
|
||||
Err error
|
||||
Recoverable bool
|
||||
}
|
||||
|
||||
// NewRecoverableError is used to wrap an error and mark it as recoverable or
|
||||
// not.
|
||||
func NewRecoverableError(e error, recoverable bool) *RecoverableError {
|
||||
return &RecoverableError{
|
||||
Err: e,
|
||||
Recoverable: recoverable,
|
||||
}
|
||||
}
|
||||
|
||||
func (r *RecoverableError) Error() string {
|
||||
return r.Err.Error()
|
||||
}
|
||||
|
||||
// CheckResult encapsulates the result of a check
|
||||
type CheckResult struct {
|
||||
|
||||
// ExitCode is the exit code of the check
|
||||
ExitCode int
|
||||
|
||||
// Output is the output of the check script
|
||||
Output string
|
||||
|
||||
// Timestamp is the time at which the check was executed
|
||||
Timestamp time.Time
|
||||
|
||||
// Duration is the time it took the check to run
|
||||
Duration time.Duration
|
||||
|
||||
// Err is the error that a check returned
|
||||
Err error
|
||||
}
|
|
@ -1,12 +0,0 @@
|
|||
// +build darwin dragonfly freebsd netbsd openbsd solaris windows
|
||||
|
||||
package structs
|
||||
|
||||
// IsolationConfig has information about the isolation mechanism the executor
|
||||
// uses to put resource constraints and isolation on the user process. The
|
||||
// default implementation is empty. Platforms that support resource isolation
|
||||
// (e.g. Linux's Cgroups) should build their own platform-specific copy. This
|
||||
// information is transmitted via RPC so it is not permissable to change the
|
||||
// API.
|
||||
type IsolationConfig struct {
|
||||
}
|
|
@ -1,10 +0,0 @@
|
|||
package structs
|
||||
|
||||
import cgroupConfig "github.com/opencontainers/runc/libcontainer/configs"
|
||||
|
||||
// IsolationConfig has information about the isolation mechanism the executor
|
||||
// uses to put resource constraints and isolation on the user process
|
||||
type IsolationConfig struct {
|
||||
Cgroup *cgroupConfig.Cgroup
|
||||
CgroupPaths map[string]string
|
||||
}
|
|
@ -1,69 +0,0 @@
|
|||
package driver
|
||||
|
||||
import (
|
||||
"log"
|
||||
"net/rpc"
|
||||
|
||||
"github.com/hashicorp/go-plugin"
|
||||
"github.com/hashicorp/nomad/client/driver/logging"
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
)
|
||||
|
||||
type SyslogCollectorRPC struct {
|
||||
client *rpc.Client
|
||||
}
|
||||
|
||||
type LaunchCollectorArgs struct {
|
||||
Ctx *logging.LogCollectorContext
|
||||
}
|
||||
|
||||
func (e *SyslogCollectorRPC) LaunchCollector(ctx *logging.LogCollectorContext) (*logging.SyslogCollectorState, error) {
|
||||
var ss *logging.SyslogCollectorState
|
||||
err := e.client.Call("Plugin.LaunchCollector", LaunchCollectorArgs{Ctx: ctx}, &ss)
|
||||
return ss, err
|
||||
}
|
||||
|
||||
func (e *SyslogCollectorRPC) Exit() error {
|
||||
return e.client.Call("Plugin.Exit", new(interface{}), new(interface{}))
|
||||
}
|
||||
|
||||
func (e *SyslogCollectorRPC) UpdateLogConfig(logConfig *structs.LogConfig) error {
|
||||
return e.client.Call("Plugin.UpdateLogConfig", logConfig, new(interface{}))
|
||||
}
|
||||
|
||||
type SyslogCollectorRPCServer struct {
|
||||
Impl logging.LogCollector
|
||||
}
|
||||
|
||||
func (s *SyslogCollectorRPCServer) LaunchCollector(args LaunchCollectorArgs,
|
||||
resp *logging.SyslogCollectorState) error {
|
||||
ss, err := s.Impl.LaunchCollector(args.Ctx)
|
||||
if ss != nil {
|
||||
*resp = *ss
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
func (s *SyslogCollectorRPCServer) Exit(args interface{}, resp *interface{}) error {
|
||||
return s.Impl.Exit()
|
||||
}
|
||||
|
||||
func (s *SyslogCollectorRPCServer) UpdateLogConfig(logConfig *structs.LogConfig, resp *interface{}) error {
|
||||
return s.Impl.UpdateLogConfig(logConfig)
|
||||
}
|
||||
|
||||
type SyslogCollectorPlugin struct {
|
||||
logger *log.Logger
|
||||
Impl *SyslogCollectorRPCServer
|
||||
}
|
||||
|
||||
func (p *SyslogCollectorPlugin) Server(*plugin.MuxBroker) (interface{}, error) {
|
||||
if p.Impl == nil {
|
||||
p.Impl = &SyslogCollectorRPCServer{Impl: logging.NewSyslogCollector(p.logger)}
|
||||
}
|
||||
return p.Impl, nil
|
||||
}
|
||||
|
||||
func (p *SyslogCollectorPlugin) Client(b *plugin.MuxBroker, c *rpc.Client) (interface{}, error) {
|
||||
return &SyslogCollectorRPC{client: c}, nil
|
||||
}
|
|
@ -1,170 +0,0 @@
|
|||
package driver
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/go-multierror"
|
||||
"github.com/hashicorp/go-plugin"
|
||||
"github.com/hashicorp/nomad/client/config"
|
||||
"github.com/hashicorp/nomad/client/driver/executor"
|
||||
"github.com/hashicorp/nomad/client/driver/logging"
|
||||
cstructs "github.com/hashicorp/nomad/client/driver/structs"
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
)
|
||||
|
||||
// createExecutor launches an executor plugin and returns an instance of the
|
||||
// Executor interface
|
||||
func createExecutor(config *plugin.ClientConfig, w io.Writer,
|
||||
clientConfig *config.Config) (executor.Executor, *plugin.Client, error) {
|
||||
config.HandshakeConfig = HandshakeConfig
|
||||
config.Plugins = GetPluginMap(w)
|
||||
config.MaxPort = clientConfig.ClientMaxPort
|
||||
config.MinPort = clientConfig.ClientMinPort
|
||||
|
||||
// setting the setsid of the plugin process so that it doesn't get signals sent to
|
||||
// the nomad client.
|
||||
if config.Cmd != nil {
|
||||
isolateCommand(config.Cmd)
|
||||
}
|
||||
|
||||
executorClient := plugin.NewClient(config)
|
||||
rpcClient, err := executorClient.Client()
|
||||
if err != nil {
|
||||
return nil, nil, fmt.Errorf("error creating rpc client for executor plugin: %v", err)
|
||||
}
|
||||
|
||||
raw, err := rpcClient.Dispense("executor")
|
||||
if err != nil {
|
||||
return nil, nil, fmt.Errorf("unable to dispense the executor plugin: %v", err)
|
||||
}
|
||||
executorPlugin := raw.(executor.Executor)
|
||||
return executorPlugin, executorClient, nil
|
||||
}
|
||||
|
||||
func createLogCollector(config *plugin.ClientConfig, w io.Writer,
|
||||
clientConfig *config.Config) (logging.LogCollector, *plugin.Client, error) {
|
||||
config.HandshakeConfig = HandshakeConfig
|
||||
config.Plugins = GetPluginMap(w)
|
||||
config.MaxPort = clientConfig.ClientMaxPort
|
||||
config.MinPort = clientConfig.ClientMinPort
|
||||
if config.Cmd != nil {
|
||||
isolateCommand(config.Cmd)
|
||||
}
|
||||
|
||||
syslogClient := plugin.NewClient(config)
|
||||
rpcCLient, err := syslogClient.Client()
|
||||
if err != nil {
|
||||
return nil, nil, fmt.Errorf("error creating rpc client for syslog plugin: %v", err)
|
||||
}
|
||||
|
||||
raw, err := rpcCLient.Dispense("syslogcollector")
|
||||
if err != nil {
|
||||
return nil, nil, fmt.Errorf("unable to dispense the syslog plugin: %v", err)
|
||||
}
|
||||
logCollector := raw.(logging.LogCollector)
|
||||
return logCollector, syslogClient, nil
|
||||
}
|
||||
|
||||
func consulContext(clientConfig *config.Config, containerID string) *executor.ConsulContext {
|
||||
return &executor.ConsulContext{
|
||||
ConsulConfig: clientConfig.ConsulConfig,
|
||||
ContainerID: containerID,
|
||||
DockerEndpoint: clientConfig.Read("docker.endpoint"),
|
||||
TLSCa: clientConfig.Read("docker.tls.ca"),
|
||||
TLSCert: clientConfig.Read("docker.tls.cert"),
|
||||
TLSKey: clientConfig.Read("docker.tls.key"),
|
||||
}
|
||||
}
|
||||
|
||||
// killProcess kills a process with the given pid
|
||||
func killProcess(pid int) error {
|
||||
proc, err := os.FindProcess(pid)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return proc.Kill()
|
||||
}
|
||||
|
||||
// destroyPlugin kills the plugin with the given pid and also kills the user
|
||||
// process
|
||||
func destroyPlugin(pluginPid int, userPid int) error {
|
||||
var merr error
|
||||
if err := killProcess(pluginPid); err != nil {
|
||||
merr = multierror.Append(merr, err)
|
||||
}
|
||||
|
||||
if err := killProcess(userPid); err != nil {
|
||||
merr = multierror.Append(merr, err)
|
||||
}
|
||||
return merr
|
||||
}
|
||||
|
||||
// validateCommand validates that the command only has a single value and
|
||||
// returns a user friendly error message telling them to use the passed
|
||||
// argField.
|
||||
func validateCommand(command, argField string) error {
|
||||
trimmed := strings.TrimSpace(command)
|
||||
if len(trimmed) == 0 {
|
||||
return fmt.Errorf("command empty: %q", command)
|
||||
}
|
||||
|
||||
if len(trimmed) != len(command) {
|
||||
return fmt.Errorf("command contains extra white space: %q", command)
|
||||
}
|
||||
|
||||
split := strings.Split(trimmed, " ")
|
||||
if len(split) != 1 {
|
||||
return fmt.Errorf("command contained more than one input. Use %q field to pass arguments", argField)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// GetKillTimeout returns the kill timeout to use given the tasks desired kill
|
||||
// timeout and the operator configured max kill timeout.
|
||||
func GetKillTimeout(desired, max time.Duration) time.Duration {
|
||||
maxNanos := max.Nanoseconds()
|
||||
desiredNanos := desired.Nanoseconds()
|
||||
|
||||
// Make the minimum time between signal and kill, 1 second.
|
||||
if desiredNanos <= 0 {
|
||||
desiredNanos = (1 * time.Second).Nanoseconds()
|
||||
}
|
||||
|
||||
// Protect against max not being set properly.
|
||||
if maxNanos <= 0 {
|
||||
maxNanos = (10 * time.Second).Nanoseconds()
|
||||
}
|
||||
|
||||
if desiredNanos < maxNanos {
|
||||
return time.Duration(desiredNanos)
|
||||
}
|
||||
|
||||
return max
|
||||
}
|
||||
|
||||
// GetAbsolutePath returns the absolute path of the passed binary by resolving
|
||||
// it in the path and following symlinks.
|
||||
func GetAbsolutePath(bin string) (string, error) {
|
||||
lp, err := exec.LookPath(bin)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to resolve path to %q executable: %v", bin, err)
|
||||
}
|
||||
|
||||
return filepath.EvalSymlinks(lp)
|
||||
}
|
||||
|
||||
// getExecutorUser returns the user of the task, defaulting to
|
||||
// cstructs.DefaultUnprivilegedUser if none was given.
|
||||
func getExecutorUser(task *structs.Task) string {
|
||||
if task.User == "" {
|
||||
return cstructs.DefaultUnpriviledgedUser
|
||||
}
|
||||
return task.User
|
||||
}
|
|
@ -1,18 +0,0 @@
|
|||
// +build darwin dragonfly freebsd linux netbsd openbsd solaris
|
||||
|
||||
package driver
|
||||
|
||||
import (
|
||||
"os/exec"
|
||||
"syscall"
|
||||
)
|
||||
|
||||
// isolateCommand sets the setsid flag in exec.Cmd to true so that the process
|
||||
// becomes the process leader in a new session and doesn't receive signals that
|
||||
// are sent to the parent process.
|
||||
func isolateCommand(cmd *exec.Cmd) {
|
||||
if cmd.SysProcAttr == nil {
|
||||
cmd.SysProcAttr = &syscall.SysProcAttr{}
|
||||
}
|
||||
cmd.SysProcAttr.Setsid = true
|
||||
}
|
|
@ -1,9 +0,0 @@
|
|||
package driver
|
||||
|
||||
import (
|
||||
"os/exec"
|
||||
)
|
||||
|
||||
// TODO Figure out if this is needed in Wondows
|
||||
func isolateCommand(cmd *exec.Cmd) {
|
||||
}
|
|
@ -1,26 +0,0 @@
|
|||
package fingerprint
|
||||
|
||||
import (
|
||||
"log"
|
||||
"runtime"
|
||||
|
||||
client "github.com/hashicorp/nomad/client/config"
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
)
|
||||
|
||||
// ArchFingerprint is used to fingerprint the architecture
|
||||
type ArchFingerprint struct {
|
||||
StaticFingerprinter
|
||||
logger *log.Logger
|
||||
}
|
||||
|
||||
// NewArchFingerprint is used to create an OS fingerprint
|
||||
func NewArchFingerprint(logger *log.Logger) Fingerprint {
|
||||
f := &ArchFingerprint{logger: logger}
|
||||
return f
|
||||
}
|
||||
|
||||
func (f *ArchFingerprint) Fingerprint(config *client.Config, node *structs.Node) (bool, error) {
|
||||
node.Attributes["arch"] = runtime.GOARCH
|
||||
return true, nil
|
||||
}
|
|
@ -1,59 +0,0 @@
|
|||
// +build linux
|
||||
|
||||
package fingerprint
|
||||
|
||||
import (
|
||||
"log"
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
)
|
||||
|
||||
const (
|
||||
cgroupAvailable = "available"
|
||||
cgroupUnavailable = "unavailable"
|
||||
interval = 15
|
||||
)
|
||||
|
||||
type CGroupFingerprint struct {
|
||||
logger *log.Logger
|
||||
lastState string
|
||||
mountPointDetector MountPointDetector
|
||||
}
|
||||
|
||||
// An interface to isolate calls to the cgroup library
|
||||
// This facilitates testing where we can implement
|
||||
// fake mount points to test various code paths
|
||||
type MountPointDetector interface {
|
||||
MountPoint() (string, error)
|
||||
}
|
||||
|
||||
// Implements the interface detector which calls the cgroups library directly
|
||||
type DefaultMountPointDetector struct {
|
||||
}
|
||||
|
||||
// Call out to the default cgroup library
|
||||
func (b *DefaultMountPointDetector) MountPoint() (string, error) {
|
||||
return FindCgroupMountpointDir()
|
||||
}
|
||||
|
||||
// NewCGroupFingerprint returns a new cgroup fingerprinter
|
||||
func NewCGroupFingerprint(logger *log.Logger) Fingerprint {
|
||||
f := &CGroupFingerprint{
|
||||
logger: logger,
|
||||
lastState: cgroupUnavailable,
|
||||
mountPointDetector: &DefaultMountPointDetector{},
|
||||
}
|
||||
return f
|
||||
}
|
||||
|
||||
// clearCGroupAttributes clears any node attributes related to cgroups that might
|
||||
// have been set in a previous fingerprint run.
|
||||
func (f *CGroupFingerprint) clearCGroupAttributes(n *structs.Node) {
|
||||
delete(n.Attributes, "unique.cgroup.mountpoint")
|
||||
}
|
||||
|
||||
// Periodic determines the interval at which the periodic fingerprinter will run.
|
||||
func (f *CGroupFingerprint) Periodic() (bool, time.Duration) {
|
||||
return true, interval * time.Second
|
||||
}
|
|
@ -1,57 +0,0 @@
|
|||
// +build linux
|
||||
|
||||
package fingerprint
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
client "github.com/hashicorp/nomad/client/config"
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
)
|
||||
|
||||
// FindCgroupMountpointDir is used to find the cgroup mount point on a Linux
|
||||
// system.
|
||||
func FindCgroupMountpointDir() (string, error) {
|
||||
mount, err := cgroups.FindCgroupMountpointDir()
|
||||
if err != nil {
|
||||
switch e := err.(type) {
|
||||
case *cgroups.NotFoundError:
|
||||
// It's okay if the mount point is not discovered
|
||||
return "", nil
|
||||
default:
|
||||
// All other errors are passed back as is
|
||||
return "", e
|
||||
}
|
||||
}
|
||||
return mount, nil
|
||||
}
|
||||
|
||||
// Fingerprint tries to find a valid cgroup moint point
|
||||
func (f *CGroupFingerprint) Fingerprint(cfg *client.Config, node *structs.Node) (bool, error) {
|
||||
mount, err := f.mountPointDetector.MountPoint()
|
||||
if err != nil {
|
||||
f.clearCGroupAttributes(node)
|
||||
return false, fmt.Errorf("Failed to discover cgroup mount point: %s", err)
|
||||
}
|
||||
|
||||
// Check if a cgroup mount point was found
|
||||
if mount == "" {
|
||||
// Clear any attributes from the previous fingerprint.
|
||||
f.clearCGroupAttributes(node)
|
||||
|
||||
if f.lastState == cgroupAvailable {
|
||||
f.logger.Printf("[INFO] fingerprint.cgroups: cgroups are unavailable")
|
||||
}
|
||||
f.lastState = cgroupUnavailable
|
||||
return true, nil
|
||||
}
|
||||
|
||||
node.Attributes["unique.cgroup.mountpoint"] = mount
|
||||
|
||||
if f.lastState == cgroupUnavailable {
|
||||
f.logger.Printf("[INFO] fingerprint.cgroups: cgroups are available")
|
||||
}
|
||||
f.lastState = cgroupAvailable
|
||||
return true, nil
|
||||
}
|
|
@ -1,100 +0,0 @@
|
|||
package fingerprint
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log"
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
consul "github.com/hashicorp/consul/api"
|
||||
|
||||
client "github.com/hashicorp/nomad/client/config"
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
)
|
||||
|
||||
const (
|
||||
consulAvailable = "available"
|
||||
consulUnavailable = "unavailable"
|
||||
)
|
||||
|
||||
// ConsulFingerprint is used to fingerprint the architecture
|
||||
type ConsulFingerprint struct {
|
||||
logger *log.Logger
|
||||
client *consul.Client
|
||||
lastState string
|
||||
}
|
||||
|
||||
// NewConsulFingerprint is used to create an OS fingerprint
|
||||
func NewConsulFingerprint(logger *log.Logger) Fingerprint {
|
||||
return &ConsulFingerprint{logger: logger, lastState: consulUnavailable}
|
||||
}
|
||||
|
||||
func (f *ConsulFingerprint) Fingerprint(config *client.Config, node *structs.Node) (bool, error) {
|
||||
// Guard against uninitialized Links
|
||||
if node.Links == nil {
|
||||
node.Links = map[string]string{}
|
||||
}
|
||||
|
||||
// Only create the client once to avoid creating too many connections to
|
||||
// Consul.
|
||||
if f.client == nil {
|
||||
consulConfig, err := config.ConsulConfig.ApiConfig()
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("Failed to initialize the Consul client config: %v", err)
|
||||
}
|
||||
|
||||
f.client, err = consul.NewClient(consulConfig)
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("Failed to initialize consul client: %s", err)
|
||||
}
|
||||
}
|
||||
|
||||
// We'll try to detect consul by making a query to to the agent's self API.
|
||||
// If we can't hit this URL consul is probably not running on this machine.
|
||||
info, err := f.client.Agent().Self()
|
||||
if err != nil {
|
||||
// Clear any attributes set by a previous fingerprint.
|
||||
f.clearConsulAttributes(node)
|
||||
|
||||
// Print a message indicating that the Consul Agent is not available
|
||||
// anymore
|
||||
if f.lastState == consulAvailable {
|
||||
f.logger.Printf("[INFO] fingerprint.consul: consul agent is unavailable")
|
||||
}
|
||||
f.lastState = consulUnavailable
|
||||
return false, nil
|
||||
}
|
||||
|
||||
node.Attributes["consul.server"] = strconv.FormatBool(info["Config"]["Server"].(bool))
|
||||
node.Attributes["consul.version"] = info["Config"]["Version"].(string)
|
||||
node.Attributes["consul.revision"] = info["Config"]["Revision"].(string)
|
||||
node.Attributes["unique.consul.name"] = info["Config"]["NodeName"].(string)
|
||||
node.Attributes["consul.datacenter"] = info["Config"]["Datacenter"].(string)
|
||||
|
||||
node.Links["consul"] = fmt.Sprintf("%s.%s",
|
||||
node.Attributes["consul.datacenter"],
|
||||
node.Attributes["unique.consul.name"])
|
||||
|
||||
// If the Consul Agent was previously unavailable print a message to
|
||||
// indicate the Agent is available now
|
||||
if f.lastState == consulUnavailable {
|
||||
f.logger.Printf("[INFO] fingerprint.consul: consul agent is available")
|
||||
}
|
||||
f.lastState = consulAvailable
|
||||
return true, nil
|
||||
}
|
||||
|
||||
// clearConsulAttributes removes consul attributes and links from the passed
|
||||
// Node.
|
||||
func (f *ConsulFingerprint) clearConsulAttributes(n *structs.Node) {
|
||||
delete(n.Attributes, "consul.server")
|
||||
delete(n.Attributes, "consul.version")
|
||||
delete(n.Attributes, "consul.revision")
|
||||
delete(n.Attributes, "unique.consul.name")
|
||||
delete(n.Attributes, "consul.datacenter")
|
||||
delete(n.Links, "consul")
|
||||
}
|
||||
|
||||
func (f *ConsulFingerprint) Periodic() (bool, time.Duration) {
|
||||
return true, 15 * time.Second
|
||||
}
|
|
@ -1,52 +0,0 @@
|
|||
package fingerprint
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log"
|
||||
|
||||
"github.com/hashicorp/nomad/client/config"
|
||||
"github.com/hashicorp/nomad/helper/stats"
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
)
|
||||
|
||||
// CPUFingerprint is used to fingerprint the CPU
|
||||
type CPUFingerprint struct {
|
||||
StaticFingerprinter
|
||||
logger *log.Logger
|
||||
}
|
||||
|
||||
// NewCPUFingerprint is used to create a CPU fingerprint
|
||||
func NewCPUFingerprint(logger *log.Logger) Fingerprint {
|
||||
f := &CPUFingerprint{logger: logger}
|
||||
return f
|
||||
}
|
||||
|
||||
func (f *CPUFingerprint) Fingerprint(cfg *config.Config, node *structs.Node) (bool, error) {
|
||||
if err := stats.Init(); err != nil {
|
||||
return false, fmt.Errorf("Unable to obtain CPU information: %v", err)
|
||||
}
|
||||
|
||||
modelName := stats.CPUModelName()
|
||||
if modelName != "" {
|
||||
node.Attributes["cpu.modelname"] = modelName
|
||||
}
|
||||
|
||||
mhz := stats.CPUMHzPerCore()
|
||||
node.Attributes["cpu.frequency"] = fmt.Sprintf("%.0f", mhz)
|
||||
f.logger.Printf("[DEBUG] fingerprint.cpu: frequency: %.0f MHz", mhz)
|
||||
|
||||
numCores := stats.CPUNumCores()
|
||||
node.Attributes["cpu.numcores"] = fmt.Sprintf("%d", numCores)
|
||||
f.logger.Printf("[DEBUG] fingerprint.cpu: core count: %d", numCores)
|
||||
|
||||
tt := stats.TotalTicksAvailable()
|
||||
node.Attributes["cpu.totalcompute"] = fmt.Sprintf("%.0f", tt)
|
||||
|
||||
if node.Resources == nil {
|
||||
node.Resources = &structs.Resources{}
|
||||
}
|
||||
|
||||
node.Resources.CPU = int(tt)
|
||||
|
||||
return true, nil
|
||||
}
|
|
@ -1,250 +0,0 @@
|
|||
package fingerprint
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"log"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"os"
|
||||
"regexp"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/go-cleanhttp"
|
||||
"github.com/hashicorp/nomad/client/config"
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
)
|
||||
|
||||
// This is where the AWS metadata server normally resides. We hardcode the
|
||||
// "instance" path as well since it's the only one we access here.
|
||||
const DEFAULT_AWS_URL = "http://169.254.169.254/latest/meta-data/"
|
||||
|
||||
// map of instance type to approximate speed, in Mbits/s
|
||||
// http://serverfault.com/questions/324883/aws-bandwidth-and-content-delivery/326797#326797
|
||||
// which itself cites these sources:
|
||||
// - http://blog.rightscale.com/2007/10/28/network-performance-within-amazon-ec2-and-to-amazon-s3/
|
||||
// - http://www.soc.napier.ac.uk/~bill/chris_p.pdf
|
||||
//
|
||||
// This data is meant for a loose approximation
|
||||
var ec2InstanceSpeedMap = map[string]int{
|
||||
"m4.large": 80,
|
||||
"m3.medium": 80,
|
||||
"m3.large": 80,
|
||||
"c4.large": 80,
|
||||
"c3.large": 80,
|
||||
"c3.xlarge": 80,
|
||||
"r3.large": 80,
|
||||
"r3.xlarge": 80,
|
||||
"i2.xlarge": 80,
|
||||
"d2.xlarge": 80,
|
||||
"t2.micro": 16,
|
||||
"t2.small": 16,
|
||||
"t2.medium": 16,
|
||||
"t2.large": 16,
|
||||
"m4.xlarge": 760,
|
||||
"m4.2xlarge": 760,
|
||||
"m4.4xlarge": 760,
|
||||
"m3.xlarge": 760,
|
||||
"m3.2xlarge": 760,
|
||||
"c4.xlarge": 760,
|
||||
"c4.2xlarge": 760,
|
||||
"c4.4xlarge": 760,
|
||||
"c3.2xlarge": 760,
|
||||
"c3.4xlarge": 760,
|
||||
"g2.2xlarge": 760,
|
||||
"r3.2xlarge": 760,
|
||||
"r3.4xlarge": 760,
|
||||
"i2.2xlarge": 760,
|
||||
"i2.4xlarge": 760,
|
||||
"d2.2xlarge": 760,
|
||||
"d2.4xlarge": 760,
|
||||
"m4.10xlarge": 10000,
|
||||
"c4.8xlarge": 10000,
|
||||
"c3.8xlarge": 10000,
|
||||
"g2.8xlarge": 10000,
|
||||
"r3.8xlarge": 10000,
|
||||
"i2.8xlarge": 10000,
|
||||
"d2.8xlarge": 10000,
|
||||
}
|
||||
|
||||
// EnvAWSFingerprint is used to fingerprint AWS metadata
|
||||
type EnvAWSFingerprint struct {
|
||||
StaticFingerprinter
|
||||
logger *log.Logger
|
||||
}
|
||||
|
||||
// NewEnvAWSFingerprint is used to create a fingerprint from AWS metadata
|
||||
func NewEnvAWSFingerprint(logger *log.Logger) Fingerprint {
|
||||
f := &EnvAWSFingerprint{logger: logger}
|
||||
return f
|
||||
}
|
||||
|
||||
func (f *EnvAWSFingerprint) Fingerprint(cfg *config.Config, node *structs.Node) (bool, error) {
|
||||
if !f.isAWS() {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
// newNetwork is populated and addded to the Nodes resources
|
||||
newNetwork := &structs.NetworkResource{
|
||||
Device: "eth0",
|
||||
}
|
||||
|
||||
if node.Links == nil {
|
||||
node.Links = make(map[string]string)
|
||||
}
|
||||
metadataURL := os.Getenv("AWS_ENV_URL")
|
||||
if metadataURL == "" {
|
||||
metadataURL = DEFAULT_AWS_URL
|
||||
}
|
||||
|
||||
// assume 2 seconds is enough time for inside AWS network
|
||||
client := &http.Client{
|
||||
Timeout: 2 * time.Second,
|
||||
Transport: cleanhttp.DefaultTransport(),
|
||||
}
|
||||
|
||||
// Keys and whether they should be namespaced as unique. Any key whose value
|
||||
// uniquely identifies a node, such as ip, should be marked as unique. When
|
||||
// marked as unique, the key isn't included in the computed node class.
|
||||
keys := map[string]bool{
|
||||
"ami-id": true,
|
||||
"hostname": true,
|
||||
"instance-id": true,
|
||||
"instance-type": false,
|
||||
"local-hostname": true,
|
||||
"local-ipv4": true,
|
||||
"public-hostname": true,
|
||||
"public-ipv4": true,
|
||||
"placement/availability-zone": false,
|
||||
}
|
||||
for k, unique := range keys {
|
||||
res, err := client.Get(metadataURL + k)
|
||||
if res.StatusCode != http.StatusOK {
|
||||
f.logger.Printf("[WARN]: fingerprint.env_aws: Could not read value for attribute %q", k)
|
||||
continue
|
||||
}
|
||||
if err != nil {
|
||||
// if it's a URL error, assume we're not in an AWS environment
|
||||
// TODO: better way to detect AWS? Check xen virtualization?
|
||||
if _, ok := err.(*url.Error); ok {
|
||||
return false, nil
|
||||
}
|
||||
// not sure what other errors it would return
|
||||
return false, err
|
||||
}
|
||||
resp, err := ioutil.ReadAll(res.Body)
|
||||
res.Body.Close()
|
||||
if err != nil {
|
||||
f.logger.Printf("[ERR]: fingerprint.env_aws: Error reading response body for AWS %s", k)
|
||||
}
|
||||
|
||||
// assume we want blank entries
|
||||
key := "platform.aws." + strings.Replace(k, "/", ".", -1)
|
||||
if unique {
|
||||
key = structs.UniqueNamespace(key)
|
||||
}
|
||||
|
||||
node.Attributes[key] = strings.Trim(string(resp), "\n")
|
||||
}
|
||||
|
||||
// copy over network specific information
|
||||
if val := node.Attributes["unique.platform.aws.local-ipv4"]; val != "" {
|
||||
node.Attributes["unique.network.ip-address"] = val
|
||||
newNetwork.IP = val
|
||||
newNetwork.CIDR = newNetwork.IP + "/32"
|
||||
}
|
||||
|
||||
// find LinkSpeed from lookup
|
||||
if throughput := f.linkSpeed(); throughput > 0 {
|
||||
newNetwork.MBits = throughput
|
||||
}
|
||||
|
||||
if node.Resources == nil {
|
||||
node.Resources = &structs.Resources{}
|
||||
}
|
||||
node.Resources.Networks = append(node.Resources.Networks, newNetwork)
|
||||
|
||||
// populate Node Network Resources
|
||||
|
||||
// populate Links
|
||||
node.Links["aws.ec2"] = fmt.Sprintf("%s.%s",
|
||||
node.Attributes["platform.aws.placement.availability-zone"],
|
||||
node.Attributes["unique.platform.aws.instance-id"])
|
||||
|
||||
return true, nil
|
||||
}
|
||||
|
||||
func (f *EnvAWSFingerprint) isAWS() bool {
|
||||
// Read the internal metadata URL from the environment, allowing test files to
|
||||
// provide their own
|
||||
metadataURL := os.Getenv("AWS_ENV_URL")
|
||||
if metadataURL == "" {
|
||||
metadataURL = DEFAULT_AWS_URL
|
||||
}
|
||||
|
||||
// assume 2 seconds is enough time for inside AWS network
|
||||
client := &http.Client{
|
||||
Timeout: 2 * time.Second,
|
||||
Transport: cleanhttp.DefaultTransport(),
|
||||
}
|
||||
|
||||
// Query the metadata url for the ami-id, to veryify we're on AWS
|
||||
resp, err := client.Get(metadataURL + "ami-id")
|
||||
if err != nil {
|
||||
f.logger.Printf("[DEBUG] fingerprint.env_aws: Error querying AWS Metadata URL, skipping")
|
||||
return false
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode >= 400 {
|
||||
// URL not found, which indicates that this isn't AWS
|
||||
return false
|
||||
}
|
||||
|
||||
instanceID, err := ioutil.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
f.logger.Printf("[DEBUG] fingerprint.env_aws: Error reading AWS Instance ID, skipping")
|
||||
return false
|
||||
}
|
||||
|
||||
match, err := regexp.MatchString("ami-*", string(instanceID))
|
||||
if err != nil || !match {
|
||||
return false
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
// EnvAWSFingerprint uses lookup table to approximate network speeds
|
||||
func (f *EnvAWSFingerprint) linkSpeed() int {
|
||||
|
||||
// Query the API for the instance type, and use the table above to approximate
|
||||
// the network speed
|
||||
metadataURL := os.Getenv("AWS_ENV_URL")
|
||||
if metadataURL == "" {
|
||||
metadataURL = DEFAULT_AWS_URL
|
||||
}
|
||||
|
||||
// assume 2 seconds is enough time for inside AWS network
|
||||
client := &http.Client{
|
||||
Timeout: 2 * time.Second,
|
||||
Transport: cleanhttp.DefaultTransport(),
|
||||
}
|
||||
|
||||
res, err := client.Get(metadataURL + "instance-type")
|
||||
body, err := ioutil.ReadAll(res.Body)
|
||||
res.Body.Close()
|
||||
if err != nil {
|
||||
f.logger.Printf("[ERR]: fingerprint.env_aws: Error reading response body for instance-type")
|
||||
return 0
|
||||
}
|
||||
|
||||
key := strings.Trim(string(body), "\n")
|
||||
v, ok := ec2InstanceSpeedMap[key]
|
||||
if !ok {
|
||||
return 0
|
||||
}
|
||||
|
||||
return v
|
||||
}
|
|
@ -1,270 +0,0 @@
|
|||
package fingerprint
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"log"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"os"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/go-cleanhttp"
|
||||
"github.com/hashicorp/nomad/client/config"
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
)
|
||||
|
||||
// This is where the GCE metadata server normally resides. We hardcode the
|
||||
// "instance" path as well since it's the only one we access here.
|
||||
const DEFAULT_GCE_URL = "http://169.254.169.254/computeMetadata/v1/instance/"
|
||||
|
||||
type GCEMetadataNetworkInterface struct {
|
||||
AccessConfigs []struct {
|
||||
ExternalIp string
|
||||
Type string
|
||||
}
|
||||
ForwardedIps []string
|
||||
Ip string
|
||||
Network string
|
||||
}
|
||||
|
||||
type ReqError struct {
|
||||
StatusCode int
|
||||
}
|
||||
|
||||
func (e ReqError) Error() string {
|
||||
return http.StatusText(e.StatusCode)
|
||||
}
|
||||
|
||||
func lastToken(s string) string {
|
||||
index := strings.LastIndex(s, "/")
|
||||
return s[index+1:]
|
||||
}
|
||||
|
||||
// EnvGCEFingerprint is used to fingerprint GCE metadata
|
||||
type EnvGCEFingerprint struct {
|
||||
StaticFingerprinter
|
||||
client *http.Client
|
||||
logger *log.Logger
|
||||
metadataURL string
|
||||
}
|
||||
|
||||
// NewEnvGCEFingerprint is used to create a fingerprint from GCE metadata
|
||||
func NewEnvGCEFingerprint(logger *log.Logger) Fingerprint {
|
||||
// Read the internal metadata URL from the environment, allowing test files to
|
||||
// provide their own
|
||||
metadataURL := os.Getenv("GCE_ENV_URL")
|
||||
if metadataURL == "" {
|
||||
metadataURL = DEFAULT_GCE_URL
|
||||
}
|
||||
|
||||
// assume 2 seconds is enough time for inside GCE network
|
||||
client := &http.Client{
|
||||
Timeout: 2 * time.Second,
|
||||
Transport: cleanhttp.DefaultTransport(),
|
||||
}
|
||||
|
||||
return &EnvGCEFingerprint{
|
||||
client: client,
|
||||
logger: logger,
|
||||
metadataURL: metadataURL,
|
||||
}
|
||||
}
|
||||
|
||||
func (f *EnvGCEFingerprint) Get(attribute string, recursive bool) (string, error) {
|
||||
reqUrl := f.metadataURL + attribute
|
||||
if recursive {
|
||||
reqUrl = reqUrl + "?recursive=true"
|
||||
}
|
||||
|
||||
parsedUrl, err := url.Parse(reqUrl)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
req := &http.Request{
|
||||
Method: "GET",
|
||||
URL: parsedUrl,
|
||||
Header: http.Header{
|
||||
"Metadata-Flavor": []string{"Google"},
|
||||
},
|
||||
}
|
||||
|
||||
res, err := f.client.Do(req)
|
||||
if err != nil || res.StatusCode != http.StatusOK {
|
||||
f.logger.Printf("[DEBUG] fingerprint.env_gce: Could not read value for attribute %q", attribute)
|
||||
return "", err
|
||||
}
|
||||
|
||||
resp, err := ioutil.ReadAll(res.Body)
|
||||
res.Body.Close()
|
||||
if err != nil {
|
||||
f.logger.Printf("[ERR] fingerprint.env_gce: Error reading response body for GCE %s", attribute)
|
||||
return "", err
|
||||
}
|
||||
|
||||
if res.StatusCode >= 400 {
|
||||
return "", ReqError{res.StatusCode}
|
||||
}
|
||||
|
||||
return string(resp), nil
|
||||
}
|
||||
|
||||
func checkError(err error, logger *log.Logger, desc string) error {
|
||||
// If it's a URL error, assume we're not actually in an GCE environment.
|
||||
// To the outer layers, this isn't an error so return nil.
|
||||
if _, ok := err.(*url.Error); ok {
|
||||
logger.Printf("[DEBUG] fingerprint.env_gce: Error querying GCE " + desc + ", skipping")
|
||||
return nil
|
||||
}
|
||||
// Otherwise pass the error through.
|
||||
return err
|
||||
}
|
||||
|
||||
func (f *EnvGCEFingerprint) Fingerprint(cfg *config.Config, node *structs.Node) (bool, error) {
|
||||
if !f.isGCE() {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
if node.Links == nil {
|
||||
node.Links = make(map[string]string)
|
||||
}
|
||||
|
||||
// Keys and whether they should be namespaced as unique. Any key whose value
|
||||
// uniquely identifies a node, such as ip, should be marked as unique. When
|
||||
// marked as unique, the key isn't included in the computed node class.
|
||||
keys := map[string]bool{
|
||||
"hostname": true,
|
||||
"id": true,
|
||||
"cpu-platform": false,
|
||||
"scheduling/automatic-restart": false,
|
||||
"scheduling/on-host-maintenance": false,
|
||||
}
|
||||
|
||||
for k, unique := range keys {
|
||||
value, err := f.Get(k, false)
|
||||
if err != nil {
|
||||
return false, checkError(err, f.logger, k)
|
||||
}
|
||||
|
||||
// assume we want blank entries
|
||||
key := "platform.gce." + strings.Replace(k, "/", ".", -1)
|
||||
if unique {
|
||||
key = structs.UniqueNamespace(key)
|
||||
}
|
||||
node.Attributes[key] = strings.Trim(string(value), "\n")
|
||||
}
|
||||
|
||||
// These keys need everything before the final slash removed to be usable.
|
||||
keys = map[string]bool{
|
||||
"machine-type": false,
|
||||
"zone": false,
|
||||
}
|
||||
for k, unique := range keys {
|
||||
value, err := f.Get(k, false)
|
||||
if err != nil {
|
||||
return false, checkError(err, f.logger, k)
|
||||
}
|
||||
|
||||
key := "platform.gce." + k
|
||||
if unique {
|
||||
key = structs.UniqueNamespace(key)
|
||||
}
|
||||
node.Attributes[key] = strings.Trim(lastToken(value), "\n")
|
||||
}
|
||||
|
||||
// Get internal and external IPs (if they exist)
|
||||
value, err := f.Get("network-interfaces/", true)
|
||||
var interfaces []GCEMetadataNetworkInterface
|
||||
if err := json.Unmarshal([]byte(value), &interfaces); err != nil {
|
||||
f.logger.Printf("[WARN] fingerprint.env_gce: Error decoding network interface information: %s", err.Error())
|
||||
}
|
||||
|
||||
for _, intf := range interfaces {
|
||||
prefix := "platform.gce.network." + lastToken(intf.Network)
|
||||
uniquePrefix := "unique." + prefix
|
||||
node.Attributes[prefix] = "true"
|
||||
node.Attributes[uniquePrefix+".ip"] = strings.Trim(intf.Ip, "\n")
|
||||
for index, accessConfig := range intf.AccessConfigs {
|
||||
node.Attributes[uniquePrefix+".external-ip."+strconv.Itoa(index)] = accessConfig.ExternalIp
|
||||
}
|
||||
}
|
||||
|
||||
var tagList []string
|
||||
value, err = f.Get("tags", false)
|
||||
if err != nil {
|
||||
return false, checkError(err, f.logger, "tags")
|
||||
}
|
||||
if err := json.Unmarshal([]byte(value), &tagList); err != nil {
|
||||
f.logger.Printf("[WARN] fingerprint.env_gce: Error decoding instance tags: %s", err.Error())
|
||||
}
|
||||
for _, tag := range tagList {
|
||||
attr := "platform.gce.tag."
|
||||
var key string
|
||||
|
||||
// If the tag is namespaced as unique, we strip it from the tag and
|
||||
// prepend to the whole attribute.
|
||||
if structs.IsUniqueNamespace(tag) {
|
||||
tag = strings.TrimPrefix(tag, structs.NodeUniqueNamespace)
|
||||
key = fmt.Sprintf("%s%s%s", structs.NodeUniqueNamespace, attr, tag)
|
||||
} else {
|
||||
key = fmt.Sprintf("%s%s", attr, tag)
|
||||
}
|
||||
|
||||
node.Attributes[key] = "true"
|
||||
}
|
||||
|
||||
var attrDict map[string]string
|
||||
value, err = f.Get("attributes/", true)
|
||||
if err != nil {
|
||||
return false, checkError(err, f.logger, "attributes/")
|
||||
}
|
||||
if err := json.Unmarshal([]byte(value), &attrDict); err != nil {
|
||||
f.logger.Printf("[WARN] fingerprint.env_gce: Error decoding instance attributes: %s", err.Error())
|
||||
}
|
||||
for k, v := range attrDict {
|
||||
attr := "platform.gce.attr."
|
||||
var key string
|
||||
|
||||
// If the key is namespaced as unique, we strip it from the
|
||||
// key and prepend to the whole attribute.
|
||||
if structs.IsUniqueNamespace(k) {
|
||||
k = strings.TrimPrefix(k, structs.NodeUniqueNamespace)
|
||||
key = fmt.Sprintf("%s%s%s", structs.NodeUniqueNamespace, attr, k)
|
||||
} else {
|
||||
key = fmt.Sprintf("%s%s", attr, k)
|
||||
}
|
||||
|
||||
node.Attributes[key] = strings.Trim(v, "\n")
|
||||
}
|
||||
|
||||
// populate Links
|
||||
node.Links["gce"] = node.Attributes["unique.platform.gce.id"]
|
||||
|
||||
return true, nil
|
||||
}
|
||||
|
||||
func (f *EnvGCEFingerprint) isGCE() bool {
|
||||
// TODO: better way to detect GCE?
|
||||
|
||||
// Query the metadata url for the machine type, to verify we're on GCE
|
||||
machineType, err := f.Get("machine-type", false)
|
||||
if err != nil {
|
||||
if re, ok := err.(ReqError); !ok || re.StatusCode != 404 {
|
||||
// If it wasn't a 404 error, print an error message.
|
||||
f.logger.Printf("[DEBUG] fingerprint.env_gce: Error querying GCE Metadata URL, skipping")
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
match, err := regexp.MatchString("projects/.+/machineTypes/.+", machineType)
|
||||
if err != nil || !match {
|
||||
return false
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
|
@ -1,87 +0,0 @@
|
|||
package fingerprint
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log"
|
||||
"sort"
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/nomad/client/config"
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
)
|
||||
|
||||
// EmptyDuration is to be used by fingerprinters that are not periodic.
|
||||
const (
|
||||
EmptyDuration = time.Duration(0)
|
||||
)
|
||||
|
||||
func init() {
|
||||
builtinFingerprintMap["arch"] = NewArchFingerprint
|
||||
builtinFingerprintMap["cpu"] = NewCPUFingerprint
|
||||
builtinFingerprintMap["env_aws"] = NewEnvAWSFingerprint
|
||||
builtinFingerprintMap["env_gce"] = NewEnvGCEFingerprint
|
||||
builtinFingerprintMap["host"] = NewHostFingerprint
|
||||
builtinFingerprintMap["memory"] = NewMemoryFingerprint
|
||||
builtinFingerprintMap["network"] = NewNetworkFingerprint
|
||||
builtinFingerprintMap["nomad"] = NewNomadFingerprint
|
||||
builtinFingerprintMap["storage"] = NewStorageFingerprint
|
||||
|
||||
// Initialize the list of available fingerprinters per platform. Each
|
||||
// platform defines its own list of available fingerprinters.
|
||||
initPlatformFingerprints(builtinFingerprintMap)
|
||||
}
|
||||
|
||||
// builtinFingerprintMap contains the built in registered fingerprints which are
|
||||
// available for a given platform.
|
||||
var builtinFingerprintMap = make(map[string]Factory, 16)
|
||||
|
||||
// BuiltinFingerprints is a slice containing the key names of all registered
|
||||
// fingerprints available, to provided an ordered iteration
|
||||
func BuiltinFingerprints() []string {
|
||||
fingerprints := make([]string, 0, len(builtinFingerprintMap))
|
||||
for k := range builtinFingerprintMap {
|
||||
fingerprints = append(fingerprints, k)
|
||||
}
|
||||
sort.Strings(fingerprints)
|
||||
return fingerprints
|
||||
}
|
||||
|
||||
// NewFingerprint is used to instantiate and return a new fingerprint
|
||||
// given the name and a logger
|
||||
func NewFingerprint(name string, logger *log.Logger) (Fingerprint, error) {
|
||||
// Lookup the factory function
|
||||
factory, ok := builtinFingerprintMap[name]
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("unknown fingerprint '%s'", name)
|
||||
}
|
||||
|
||||
// Instantiate the fingerprint
|
||||
f := factory(logger)
|
||||
return f, nil
|
||||
}
|
||||
|
||||
// Factory is used to instantiate a new Fingerprint
|
||||
type Factory func(*log.Logger) Fingerprint
|
||||
|
||||
// Fingerprint is used for doing "fingerprinting" of the
|
||||
// host to automatically determine attributes, resources,
|
||||
// and metadata about it. Each of these is a heuristic, and
|
||||
// many of them can be applied on a particular host.
|
||||
type Fingerprint interface {
|
||||
// Fingerprint is used to update properties of the Node,
|
||||
// and returns if the fingerprint was applicable and a potential error.
|
||||
Fingerprint(*config.Config, *structs.Node) (bool, error)
|
||||
|
||||
// Periodic is a mechanism for the fingerprinter to indicate that it should
|
||||
// be run periodically. The return value is a boolean indicating if it
|
||||
// should be periodic, and if true, a duration.
|
||||
Periodic() (bool, time.Duration)
|
||||
}
|
||||
|
||||
// StaticFingerprinter can be embedded in a struct that has a Fingerprint method
|
||||
// to make it non-periodic.
|
||||
type StaticFingerprinter struct{}
|
||||
|
||||
func (s *StaticFingerprinter) Periodic() (bool, time.Duration) {
|
||||
return false, EmptyDuration
|
||||
}
|
|
@ -1,6 +0,0 @@
|
|||
// +build darwin dragonfly freebsd netbsd openbsd solaris windows
|
||||
|
||||
package fingerprint
|
||||
|
||||
func initPlatformFingerprints(fps map[string]Factory) {
|
||||
}
|
|
@ -1,5 +0,0 @@
|
|||
package fingerprint
|
||||
|
||||
func initPlatformFingerprints(fps map[string]Factory) {
|
||||
fps["cgroup"] = NewCGroupFingerprint
|
||||
}
|
|
@ -1,51 +0,0 @@
|
|||
package fingerprint
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log"
|
||||
"os/exec"
|
||||
"runtime"
|
||||
"strings"
|
||||
|
||||
"github.com/hashicorp/nomad/client/config"
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
"github.com/shirou/gopsutil/host"
|
||||
)
|
||||
|
||||
// HostFingerprint is used to fingerprint the host
|
||||
type HostFingerprint struct {
|
||||
StaticFingerprinter
|
||||
logger *log.Logger
|
||||
}
|
||||
|
||||
// NewHostFingerprint is used to create a Host fingerprint
|
||||
func NewHostFingerprint(logger *log.Logger) Fingerprint {
|
||||
f := &HostFingerprint{logger: logger}
|
||||
return f
|
||||
}
|
||||
|
||||
func (f *HostFingerprint) Fingerprint(cfg *config.Config, node *structs.Node) (bool, error) {
|
||||
hostInfo, err := host.Info()
|
||||
if err != nil {
|
||||
f.logger.Println("[WARN] Error retrieving host information: ", err)
|
||||
return false, err
|
||||
}
|
||||
|
||||
node.Attributes["os.name"] = hostInfo.Platform
|
||||
node.Attributes["os.version"] = hostInfo.PlatformVersion
|
||||
|
||||
node.Attributes["kernel.name"] = runtime.GOOS
|
||||
node.Attributes["kernel.version"] = ""
|
||||
|
||||
if runtime.GOOS != "windows" {
|
||||
out, err := exec.Command("uname", "-r").Output()
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("Failed to run uname: %s", err)
|
||||
}
|
||||
node.Attributes["kernel.version"] = strings.Trim(string(out), "\n")
|
||||
}
|
||||
|
||||
node.Attributes["unique.hostname"] = hostInfo.Hostname
|
||||
|
||||
return true, nil
|
||||
}
|
|
@ -1,43 +0,0 @@
|
|||
package fingerprint
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log"
|
||||
|
||||
"github.com/hashicorp/nomad/client/config"
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
"github.com/shirou/gopsutil/mem"
|
||||
)
|
||||
|
||||
// MemoryFingerprint is used to fingerprint the available memory on the node
|
||||
type MemoryFingerprint struct {
|
||||
StaticFingerprinter
|
||||
logger *log.Logger
|
||||
}
|
||||
|
||||
// NewMemoryFingerprint is used to create a Memory fingerprint
|
||||
func NewMemoryFingerprint(logger *log.Logger) Fingerprint {
|
||||
f := &MemoryFingerprint{
|
||||
logger: logger,
|
||||
}
|
||||
return f
|
||||
}
|
||||
|
||||
func (f *MemoryFingerprint) Fingerprint(cfg *config.Config, node *structs.Node) (bool, error) {
|
||||
memInfo, err := mem.VirtualMemory()
|
||||
if err != nil {
|
||||
f.logger.Printf("[WARN] Error reading memory information: %s", err)
|
||||
return false, err
|
||||
}
|
||||
|
||||
if memInfo.Total > 0 {
|
||||
node.Attributes["memory.totalbytes"] = fmt.Sprintf("%d", memInfo.Total)
|
||||
|
||||
if node.Resources == nil {
|
||||
node.Resources = &structs.Resources{}
|
||||
}
|
||||
node.Resources.MemoryMB = int(memInfo.Total / 1024 / 1024)
|
||||
}
|
||||
|
||||
return true, nil
|
||||
}
|
|
@ -1,167 +0,0 @@
|
|||
package fingerprint
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"log"
|
||||
"net"
|
||||
|
||||
"github.com/hashicorp/nomad/client/config"
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
)
|
||||
|
||||
// NetworkFingerprint is used to fingerprint the Network capabilities of a node
|
||||
type NetworkFingerprint struct {
|
||||
StaticFingerprinter
|
||||
logger *log.Logger
|
||||
interfaceDetector NetworkInterfaceDetector
|
||||
}
|
||||
|
||||
// An interface to isolate calls to various api in net package
|
||||
// This facilitates testing where we can implement
|
||||
// fake interfaces and addresses to test varios code paths
|
||||
type NetworkInterfaceDetector interface {
|
||||
Interfaces() ([]net.Interface, error)
|
||||
InterfaceByName(name string) (*net.Interface, error)
|
||||
Addrs(intf *net.Interface) ([]net.Addr, error)
|
||||
}
|
||||
|
||||
// Implements the interface detector which calls net directly
|
||||
type DefaultNetworkInterfaceDetector struct {
|
||||
}
|
||||
|
||||
func (b *DefaultNetworkInterfaceDetector) Interfaces() ([]net.Interface, error) {
|
||||
return net.Interfaces()
|
||||
}
|
||||
|
||||
func (b *DefaultNetworkInterfaceDetector) InterfaceByName(name string) (*net.Interface, error) {
|
||||
return net.InterfaceByName(name)
|
||||
}
|
||||
|
||||
func (b *DefaultNetworkInterfaceDetector) Addrs(intf *net.Interface) ([]net.Addr, error) {
|
||||
return intf.Addrs()
|
||||
}
|
||||
|
||||
// NewNetworkFingerprint returns a new NetworkFingerprinter with the given
|
||||
// logger
|
||||
func NewNetworkFingerprint(logger *log.Logger) Fingerprint {
|
||||
f := &NetworkFingerprint{logger: logger, interfaceDetector: &DefaultNetworkInterfaceDetector{}}
|
||||
return f
|
||||
}
|
||||
|
||||
func (f *NetworkFingerprint) Fingerprint(cfg *config.Config, node *structs.Node) (bool, error) {
|
||||
// newNetwork is populated and addded to the Nodes resources
|
||||
newNetwork := &structs.NetworkResource{}
|
||||
var ip string
|
||||
|
||||
intf, err := f.findInterface(cfg.NetworkInterface)
|
||||
switch {
|
||||
case err != nil:
|
||||
return false, fmt.Errorf("Error while detecting network interface during fingerprinting: %v", err)
|
||||
case intf == nil:
|
||||
// No interface could be found
|
||||
return false, nil
|
||||
}
|
||||
|
||||
if ip, err = f.ipAddress(intf); err != nil {
|
||||
return false, fmt.Errorf("Unable to find IP address of interface: %s, err: %v", intf.Name, err)
|
||||
}
|
||||
|
||||
newNetwork.Device = intf.Name
|
||||
node.Attributes["unique.network.ip-address"] = ip
|
||||
newNetwork.IP = ip
|
||||
newNetwork.CIDR = newNetwork.IP + "/32"
|
||||
|
||||
f.logger.Printf("[DEBUG] fingerprint.network: Detected interface %v with IP %v during fingerprinting", intf.Name, ip)
|
||||
|
||||
if throughput := f.linkSpeed(intf.Name); throughput > 0 {
|
||||
newNetwork.MBits = throughput
|
||||
f.logger.Printf("[DEBUG] fingerprint.network: link speed for %v set to %v", intf.Name, newNetwork.MBits)
|
||||
} else {
|
||||
f.logger.Printf("[DEBUG] fingerprint.network: Unable to read link speed; setting to default %v", cfg.NetworkSpeed)
|
||||
newNetwork.MBits = cfg.NetworkSpeed
|
||||
}
|
||||
|
||||
if node.Resources == nil {
|
||||
node.Resources = &structs.Resources{}
|
||||
}
|
||||
|
||||
node.Resources.Networks = append(node.Resources.Networks, newNetwork)
|
||||
|
||||
// return true, because we have a network connection
|
||||
return true, nil
|
||||
}
|
||||
|
||||
// Gets the ipv4 addr for a network interface
|
||||
func (f *NetworkFingerprint) ipAddress(intf *net.Interface) (string, error) {
|
||||
var addrs []net.Addr
|
||||
var err error
|
||||
|
||||
if addrs, err = f.interfaceDetector.Addrs(intf); err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
if len(addrs) == 0 {
|
||||
return "", errors.New(fmt.Sprintf("Interface %s has no IP address", intf.Name))
|
||||
}
|
||||
for _, addr := range addrs {
|
||||
var ip net.IP
|
||||
switch v := (addr).(type) {
|
||||
case *net.IPNet:
|
||||
ip = v.IP
|
||||
case *net.IPAddr:
|
||||
ip = v.IP
|
||||
}
|
||||
if ip.To4() != nil {
|
||||
return ip.String(), nil
|
||||
}
|
||||
}
|
||||
|
||||
return "", fmt.Errorf("Couldn't parse IP address for interface %s", intf.Name)
|
||||
|
||||
}
|
||||
|
||||
// Checks if the device is marked UP by the operator
|
||||
func (f *NetworkFingerprint) isDeviceEnabled(intf *net.Interface) bool {
|
||||
return intf.Flags&net.FlagUp != 0
|
||||
}
|
||||
|
||||
// Checks if the device has any IP address configured
|
||||
func (f *NetworkFingerprint) deviceHasIpAddress(intf *net.Interface) bool {
|
||||
_, err := f.ipAddress(intf)
|
||||
return err == nil
|
||||
}
|
||||
|
||||
func (n *NetworkFingerprint) isDeviceLoopBackOrPointToPoint(intf *net.Interface) bool {
|
||||
return intf.Flags&(net.FlagLoopback|net.FlagPointToPoint) != 0
|
||||
}
|
||||
|
||||
// Returns the interface with the name passed by user
|
||||
// If the name is blank then it iterates through all the devices
|
||||
// and finds one which is routable and marked as UP
|
||||
// It excludes PPP and lo devices unless they are specifically asked
|
||||
func (f *NetworkFingerprint) findInterface(deviceName string) (*net.Interface, error) {
|
||||
var interfaces []net.Interface
|
||||
var err error
|
||||
|
||||
if deviceName != "" {
|
||||
return f.interfaceDetector.InterfaceByName(deviceName)
|
||||
}
|
||||
|
||||
var intfs []net.Interface
|
||||
|
||||
if intfs, err = f.interfaceDetector.Interfaces(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for _, intf := range intfs {
|
||||
if f.isDeviceEnabled(&intf) && !f.isDeviceLoopBackOrPointToPoint(&intf) && f.deviceHasIpAddress(&intf) {
|
||||
interfaces = append(interfaces, intf)
|
||||
}
|
||||
}
|
||||
|
||||
if len(interfaces) == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
return &interfaces[0], nil
|
||||
}
|
|
@ -1,8 +0,0 @@
|
|||
// +build !linux,!windows
|
||||
|
||||
package fingerprint
|
||||
|
||||
// linkSpeed returns the default link speed
|
||||
func (f *NetworkFingerprint) linkSpeed(device string) int {
|
||||
return 0
|
||||
}
|
|
@ -1,78 +0,0 @@
|
|||
package fingerprint
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os/exec"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// linkSpeedSys parses link speed in Mb/s from /sys.
|
||||
func (f *NetworkFingerprint) linkSpeedSys(device string) int {
|
||||
path := fmt.Sprintf("/sys/class/net/%s/speed", device)
|
||||
|
||||
// Read contents of the device/speed file
|
||||
content, err := ioutil.ReadFile(path)
|
||||
if err != nil {
|
||||
f.logger.Printf("[DEBUG] fingerprint.network: Unable to read link speed from %s", path)
|
||||
return 0
|
||||
}
|
||||
|
||||
lines := strings.Split(string(content), "\n")
|
||||
mbs, err := strconv.Atoi(lines[0])
|
||||
if err != nil || mbs <= 0 {
|
||||
f.logger.Printf("[DEBUG] fingerprint.network: Unable to parse link speed from %s", path)
|
||||
return 0
|
||||
}
|
||||
|
||||
return mbs
|
||||
}
|
||||
|
||||
// linkSpeed returns link speed in Mb/s, or 0 when unable to determine it.
|
||||
func (f *NetworkFingerprint) linkSpeed(device string) int {
|
||||
// Use LookPath to find the ethtool in the systems $PATH
|
||||
// If it's not found or otherwise errors, LookPath returns and empty string
|
||||
// and an error we can ignore for our purposes
|
||||
ethtoolPath, _ := exec.LookPath("ethtool")
|
||||
if ethtoolPath != "" {
|
||||
if speed := f.linkSpeedEthtool(ethtoolPath, device); speed > 0 {
|
||||
return speed
|
||||
}
|
||||
}
|
||||
|
||||
// Fall back on checking a system file for link speed.
|
||||
return f.linkSpeedSys(device)
|
||||
}
|
||||
|
||||
// linkSpeedEthtool determines link speed in Mb/s with 'ethtool'.
|
||||
func (f *NetworkFingerprint) linkSpeedEthtool(path, device string) int {
|
||||
outBytes, err := exec.Command(path, device).Output()
|
||||
if err != nil {
|
||||
f.logger.Printf("[WARN] fingerprint.network: Error calling ethtool (%s %s): %v", path, device, err)
|
||||
return 0
|
||||
}
|
||||
|
||||
output := strings.TrimSpace(string(outBytes))
|
||||
re := regexp.MustCompile("Speed: [0-9]+[a-zA-Z]+/s")
|
||||
m := re.FindString(output)
|
||||
if m == "" {
|
||||
// no matches found, output may be in a different format
|
||||
f.logger.Printf("[WARN] fingerprint.network: Unable to parse Speed in output of '%s %s'", path, device)
|
||||
return 0
|
||||
}
|
||||
|
||||
// Split and trim the Mb/s unit from the string output
|
||||
args := strings.Split(m, ": ")
|
||||
raw := strings.TrimSuffix(args[1], "Mb/s")
|
||||
|
||||
// convert to Mb/s
|
||||
mbs, err := strconv.Atoi(raw)
|
||||
if err != nil || mbs <= 0 {
|
||||
f.logger.Printf("[WARN] fingerprint.network: Unable to parse Mb/s in output of '%s %s'", path, device)
|
||||
return 0
|
||||
}
|
||||
|
||||
return mbs
|
||||
}
|
|
@ -1,52 +0,0 @@
|
|||
package fingerprint
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os/exec"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// linkSpeed returns link speed in Mb/s, or 0 when unable to determine it.
|
||||
func (f *NetworkFingerprint) linkSpeed(device string) int {
|
||||
command := fmt.Sprintf("Get-NetAdapter -IncludeHidden | Where name -eq '%s' | Select -ExpandProperty LinkSpeed", device)
|
||||
path := "powershell.exe"
|
||||
outBytes, err := exec.Command(path, command).Output()
|
||||
|
||||
if err != nil {
|
||||
f.logger.Printf("[WARN] fingerprint.network: Error calling %s (%s): %v", path, command, err)
|
||||
return 0
|
||||
}
|
||||
|
||||
output := strings.TrimSpace(string(outBytes))
|
||||
|
||||
return f.parseLinkSpeed(output)
|
||||
}
|
||||
|
||||
func (f *NetworkFingerprint) parseLinkSpeed(commandOutput string) int {
|
||||
args := strings.Split(commandOutput, " ")
|
||||
if len(args) != 2 {
|
||||
f.logger.Printf("[WARN] fingerprint.network: Couldn't split LinkSpeed (%s)", commandOutput)
|
||||
return 0
|
||||
}
|
||||
|
||||
unit := strings.Replace(args[1], "\r\n", "", -1)
|
||||
value, err := strconv.Atoi(args[0])
|
||||
if err != nil {
|
||||
f.logger.Printf("[WARN] fingerprint.network: Unable to parse LinkSpeed value (%s)", commandOutput)
|
||||
return 0
|
||||
}
|
||||
|
||||
switch unit {
|
||||
case "Mbps":
|
||||
return value
|
||||
case "Kbps":
|
||||
return value / 1000
|
||||
case "Gbps":
|
||||
return value * 1000
|
||||
case "bps":
|
||||
return value / 1000000
|
||||
}
|
||||
|
||||
return 0
|
||||
}
|
|
@ -1,26 +0,0 @@
|
|||
package fingerprint
|
||||
|
||||
import (
|
||||
"log"
|
||||
|
||||
client "github.com/hashicorp/nomad/client/config"
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
)
|
||||
|
||||
// NomadFingerprint is used to fingerprint the Nomad version
|
||||
type NomadFingerprint struct {
|
||||
StaticFingerprinter
|
||||
logger *log.Logger
|
||||
}
|
||||
|
||||
// NewNomadFingerprint is used to create a Nomad fingerprint
|
||||
func NewNomadFingerprint(logger *log.Logger) Fingerprint {
|
||||
f := &NomadFingerprint{logger: logger}
|
||||
return f
|
||||
}
|
||||
|
||||
func (f *NomadFingerprint) Fingerprint(config *client.Config, node *structs.Node) (bool, error) {
|
||||
node.Attributes["nomad.version"] = config.Version
|
||||
node.Attributes["nomad.revision"] = config.Revision
|
||||
return true, nil
|
||||
}
|
|
@ -1,59 +0,0 @@
|
|||
package fingerprint
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
"strconv"
|
||||
|
||||
"github.com/hashicorp/nomad/client/config"
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
)
|
||||
|
||||
const bytesPerMegabyte = 1024 * 1024
|
||||
|
||||
// StorageFingerprint is used to measure the amount of storage free for
|
||||
// applications that the Nomad agent will run on this machine.
|
||||
type StorageFingerprint struct {
|
||||
StaticFingerprinter
|
||||
logger *log.Logger
|
||||
}
|
||||
|
||||
func NewStorageFingerprint(logger *log.Logger) Fingerprint {
|
||||
fp := &StorageFingerprint{logger: logger}
|
||||
return fp
|
||||
}
|
||||
|
||||
func (f *StorageFingerprint) Fingerprint(cfg *config.Config, node *structs.Node) (bool, error) {
|
||||
|
||||
// Initialize these to empty defaults
|
||||
node.Attributes["unique.storage.volume"] = ""
|
||||
node.Attributes["unique.storage.bytestotal"] = ""
|
||||
node.Attributes["unique.storage.bytesfree"] = ""
|
||||
if node.Resources == nil {
|
||||
node.Resources = &structs.Resources{}
|
||||
}
|
||||
|
||||
// Guard against unset AllocDir
|
||||
storageDir := cfg.AllocDir
|
||||
if storageDir == "" {
|
||||
var err error
|
||||
storageDir, err = os.Getwd()
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("unable to get CWD from filesystem: %s", err)
|
||||
}
|
||||
}
|
||||
|
||||
volume, total, free, err := f.diskFree(storageDir)
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("failed to determine disk space for %s: %v", storageDir, err)
|
||||
}
|
||||
|
||||
node.Attributes["unique.storage.volume"] = volume
|
||||
node.Attributes["unique.storage.bytestotal"] = strconv.FormatUint(total, 10)
|
||||
node.Attributes["unique.storage.bytesfree"] = strconv.FormatUint(free, 10)
|
||||
|
||||
node.Resources.DiskMB = int(free / bytesPerMegabyte)
|
||||
|
||||
return true, nil
|
||||
}
|
|
@ -1,64 +0,0 @@
|
|||
// +build darwin dragonfly freebsd linux netbsd openbsd solaris
|
||||
|
||||
package fingerprint
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// diskFree inspects the filesystem for path and returns the volume name and
|
||||
// the total and free bytes available on the file system.
|
||||
func (f *StorageFingerprint) diskFree(path string) (volume string, total, free uint64, err error) {
|
||||
absPath, err := filepath.Abs(path)
|
||||
if err != nil {
|
||||
return "", 0, 0, fmt.Errorf("failed to determine absolute path for %s", path)
|
||||
}
|
||||
|
||||
// Use -k to standardize the output values between darwin and linux
|
||||
var dfArgs string
|
||||
if runtime.GOOS == "linux" {
|
||||
// df on linux needs the -P option to prevent linebreaks on long filesystem paths
|
||||
dfArgs = "-kP"
|
||||
} else {
|
||||
dfArgs = "-k"
|
||||
}
|
||||
|
||||
mountOutput, err := exec.Command("df", dfArgs, absPath).Output()
|
||||
if err != nil {
|
||||
return "", 0, 0, fmt.Errorf("failed to determine mount point for %s", absPath)
|
||||
}
|
||||
// Output looks something like:
|
||||
// Filesystem 1024-blocks Used Available Capacity iused ifree %iused Mounted on
|
||||
// /dev/disk1 487385240 423722532 63406708 87% 105994631 15851677 87% /
|
||||
// [0] volume [1] capacity [2] SKIP [3] free
|
||||
lines := strings.Split(string(mountOutput), "\n")
|
||||
if len(lines) < 2 {
|
||||
return "", 0, 0, fmt.Errorf("failed to parse `df` output; expected at least 2 lines")
|
||||
}
|
||||
fields := strings.Fields(lines[1])
|
||||
if len(fields) < 4 {
|
||||
return "", 0, 0, fmt.Errorf("failed to parse `df` output; expected at least 4 columns")
|
||||
}
|
||||
volume = fields[0]
|
||||
|
||||
total, err = strconv.ParseUint(fields[1], 10, 64)
|
||||
if err != nil {
|
||||
return "", 0, 0, fmt.Errorf("failed to parse storage.bytestotal size in kilobytes")
|
||||
}
|
||||
// convert to bytes
|
||||
total *= 1024
|
||||
|
||||
free, err = strconv.ParseUint(fields[3], 10, 64)
|
||||
if err != nil {
|
||||
return "", 0, 0, fmt.Errorf("failed to parse storage.bytesfree size in kilobytes")
|
||||
}
|
||||
// convert to bytes
|
||||
free *= 1024
|
||||
|
||||
return volume, total, free, nil
|
||||
}
|
|
@ -1,33 +0,0 @@
|
|||
package fingerprint
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"path/filepath"
|
||||
"syscall"
|
||||
)
|
||||
|
||||
//go:generate go run $GOROOT/src/syscall/mksyscall_windows.go -output zstorage_windows.go storage_windows.go
|
||||
|
||||
//sys getDiskFreeSpaceEx(dirName *uint16, availableFreeBytes *uint64, totalBytes *uint64, totalFreeBytes *uint64) (err error) = kernel32.GetDiskFreeSpaceExW
|
||||
|
||||
// diskFree inspects the filesystem for path and returns the volume name and
|
||||
// the total and free bytes available on the file system.
|
||||
func (f *StorageFingerprint) diskFree(path string) (volume string, total, free uint64, err error) {
|
||||
absPath, err := filepath.Abs(path)
|
||||
if err != nil {
|
||||
return "", 0, 0, fmt.Errorf("failed to determine absolute path for %s", path)
|
||||
}
|
||||
|
||||
volume = filepath.VolumeName(absPath)
|
||||
|
||||
absPathp, err := syscall.UTF16PtrFromString(absPath)
|
||||
if err != nil {
|
||||
return "", 0, 0, fmt.Errorf("failed to convert \"%s\" to UTF16: %v", absPath, err)
|
||||
}
|
||||
|
||||
if err := getDiskFreeSpaceEx(absPathp, nil, &total, &free); err != nil {
|
||||
return "", 0, 0, fmt.Errorf("failed to get free disk space for %s: %v", absPath, err)
|
||||
}
|
||||
|
||||
return volume, total, free, nil
|
||||
}
|
|
@ -1,26 +0,0 @@
|
|||
// MACHINE GENERATED BY 'go generate' COMMAND; DO NOT EDIT
|
||||
|
||||
package fingerprint
|
||||
|
||||
import "unsafe"
|
||||
import "syscall"
|
||||
|
||||
var _ unsafe.Pointer
|
||||
|
||||
var (
|
||||
modkernel32 = syscall.NewLazyDLL("kernel32.dll")
|
||||
|
||||
procGetDiskFreeSpaceExW = modkernel32.NewProc("GetDiskFreeSpaceExW")
|
||||
)
|
||||
|
||||
func getDiskFreeSpaceEx(dirName *uint16, availableFreeBytes *uint64, totalBytes *uint64, totalFreeBytes *uint64) (err error) {
|
||||
r1, _, e1 := syscall.Syscall6(procGetDiskFreeSpaceExW.Addr(), 4, uintptr(unsafe.Pointer(dirName)), uintptr(unsafe.Pointer(availableFreeBytes)), uintptr(unsafe.Pointer(totalBytes)), uintptr(unsafe.Pointer(totalFreeBytes)), 0, 0)
|
||||
if r1 == 0 {
|
||||
if e1 != 0 {
|
||||
err = error(e1)
|
||||
} else {
|
||||
err = syscall.EINVAL
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
|
@ -1,78 +0,0 @@
|
|||
package getter
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net/url"
|
||||
"path/filepath"
|
||||
"sync"
|
||||
|
||||
gg "github.com/hashicorp/go-getter"
|
||||
"github.com/hashicorp/nomad/client/driver/env"
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
)
|
||||
|
||||
var (
|
||||
// getters is the map of getters suitable for Nomad. It is initialized once
|
||||
// and the lock is used to guard access to it.
|
||||
getters map[string]gg.Getter
|
||||
lock sync.Mutex
|
||||
|
||||
// supported is the set of download schemes supported by Nomad
|
||||
supported = []string{"http", "https", "s3"}
|
||||
)
|
||||
|
||||
// getClient returns a client that is suitable for Nomad downloading artifacts.
|
||||
func getClient(src, dst string) *gg.Client {
|
||||
lock.Lock()
|
||||
defer lock.Unlock()
|
||||
|
||||
// Return the pre-initialized client
|
||||
if getters == nil {
|
||||
getters = make(map[string]gg.Getter, len(supported))
|
||||
for _, getter := range supported {
|
||||
if impl, ok := gg.Getters[getter]; ok {
|
||||
getters[getter] = impl
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return &gg.Client{
|
||||
Src: src,
|
||||
Dst: dst,
|
||||
Mode: gg.ClientModeAny,
|
||||
Getters: getters,
|
||||
}
|
||||
}
|
||||
|
||||
// getGetterUrl returns the go-getter URL to download the artifact.
|
||||
func getGetterUrl(taskEnv *env.TaskEnvironment, artifact *structs.TaskArtifact) (string, error) {
|
||||
taskEnv.Build()
|
||||
u, err := url.Parse(taskEnv.ReplaceEnv(artifact.GetterSource))
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to parse source URL %q: %v", artifact.GetterSource, err)
|
||||
}
|
||||
|
||||
// Build the url
|
||||
q := u.Query()
|
||||
for k, v := range artifact.GetterOptions {
|
||||
q.Add(k, taskEnv.ReplaceEnv(v))
|
||||
}
|
||||
u.RawQuery = q.Encode()
|
||||
return u.String(), nil
|
||||
}
|
||||
|
||||
// GetArtifact downloads an artifact into the specified task directory.
|
||||
func GetArtifact(taskEnv *env.TaskEnvironment, artifact *structs.TaskArtifact, taskDir string) error {
|
||||
url, err := getGetterUrl(taskEnv, artifact)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Download the artifact
|
||||
dest := filepath.Join(taskDir, artifact.RelativeDest)
|
||||
if err := getClient(url, dest).Get(); err != nil {
|
||||
return fmt.Errorf("GET error: %v", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
Binary file not shown.
|
@ -1 +0,0 @@
|
|||
hello world
|
1
vendor/github.com/hashicorp/nomad/client/getter/test-fixtures/archive/new/my.config
generated
vendored
1
vendor/github.com/hashicorp/nomad/client/getter/test-fixtures/archive/new/my.config
generated
vendored
|
@ -1 +0,0 @@
|
|||
hello world
|
|
@ -1 +0,0 @@
|
|||
sleep 1
|
|
@ -1 +0,0 @@
|
|||
sleep 1
|
|
@ -1,199 +0,0 @@
|
|||
package client
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"math/rand"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
cstructs "github.com/hashicorp/nomad/client/driver/structs"
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
)
|
||||
|
||||
const (
|
||||
// jitter is the percent of jitter added to restart delays.
|
||||
jitter = 0.25
|
||||
|
||||
ReasonNoRestartsAllowed = "Policy allows no restarts"
|
||||
ReasonUnrecoverableErrror = "Error was unrecoverable"
|
||||
ReasonWithinPolicy = "Restart within policy"
|
||||
ReasonDelay = "Exceeded allowed attempts, applying a delay"
|
||||
)
|
||||
|
||||
func newRestartTracker(policy *structs.RestartPolicy, jobType string) *RestartTracker {
|
||||
onSuccess := true
|
||||
if jobType == structs.JobTypeBatch {
|
||||
onSuccess = false
|
||||
}
|
||||
return &RestartTracker{
|
||||
startTime: time.Now(),
|
||||
onSuccess: onSuccess,
|
||||
policy: policy,
|
||||
rand: rand.New(rand.NewSource(time.Now().Unix())),
|
||||
}
|
||||
}
|
||||
|
||||
type RestartTracker struct {
|
||||
waitRes *cstructs.WaitResult
|
||||
startErr error
|
||||
count int // Current number of attempts.
|
||||
onSuccess bool // Whether to restart on successful exit code.
|
||||
startTime time.Time // When the interval began
|
||||
reason string // The reason for the last state
|
||||
policy *structs.RestartPolicy
|
||||
rand *rand.Rand
|
||||
lock sync.Mutex
|
||||
}
|
||||
|
||||
// SetPolicy updates the policy used to determine restarts.
|
||||
func (r *RestartTracker) SetPolicy(policy *structs.RestartPolicy) {
|
||||
r.lock.Lock()
|
||||
defer r.lock.Unlock()
|
||||
r.policy = policy
|
||||
}
|
||||
|
||||
// SetStartError is used to mark the most recent start error. If starting was
|
||||
// successful the error should be nil.
|
||||
func (r *RestartTracker) SetStartError(err error) *RestartTracker {
|
||||
r.lock.Lock()
|
||||
defer r.lock.Unlock()
|
||||
r.startErr = err
|
||||
return r
|
||||
}
|
||||
|
||||
// SetWaitResult is used to mark the most recent wait result.
|
||||
func (r *RestartTracker) SetWaitResult(res *cstructs.WaitResult) *RestartTracker {
|
||||
r.lock.Lock()
|
||||
defer r.lock.Unlock()
|
||||
r.waitRes = res
|
||||
return r
|
||||
}
|
||||
|
||||
// GetReason returns a human-readable description for the last state returned by
|
||||
// GetState.
|
||||
func (r *RestartTracker) GetReason() string {
|
||||
r.lock.Lock()
|
||||
defer r.lock.Unlock()
|
||||
return r.reason
|
||||
}
|
||||
|
||||
// GetState returns the tasks next state given the set exit code and start
|
||||
// error. One of the following states are returned:
|
||||
// * TaskRestarting - Task should be restarted
|
||||
// * TaskNotRestarting - Task should not be restarted and has exceeded its
|
||||
// restart policy.
|
||||
// * TaskTerminated - Task has terminated successfully and does not need a
|
||||
// restart.
|
||||
//
|
||||
// If TaskRestarting is returned, the duration is how long to wait until
|
||||
// starting the task again.
|
||||
func (r *RestartTracker) GetState() (string, time.Duration) {
|
||||
r.lock.Lock()
|
||||
defer r.lock.Unlock()
|
||||
|
||||
// Hot path if no attempts are expected
|
||||
if r.policy.Attempts == 0 {
|
||||
r.reason = ReasonNoRestartsAllowed
|
||||
if r.waitRes != nil && r.waitRes.Successful() {
|
||||
return structs.TaskTerminated, 0
|
||||
}
|
||||
|
||||
return structs.TaskNotRestarting, 0
|
||||
}
|
||||
|
||||
r.count++
|
||||
|
||||
// Check if we have entered a new interval.
|
||||
end := r.startTime.Add(r.policy.Interval)
|
||||
now := time.Now()
|
||||
if now.After(end) {
|
||||
r.count = 0
|
||||
r.startTime = now
|
||||
}
|
||||
|
||||
if r.startErr != nil {
|
||||
return r.handleStartError()
|
||||
} else if r.waitRes != nil {
|
||||
return r.handleWaitResult()
|
||||
} else {
|
||||
return "", 0
|
||||
}
|
||||
}
|
||||
|
||||
// handleStartError returns the new state and potential wait duration for
|
||||
// restarting the task after it was not successfully started. On start errors,
|
||||
// the restart policy is always treated as fail mode to ensure we don't
|
||||
// infinitely try to start a task.
|
||||
func (r *RestartTracker) handleStartError() (string, time.Duration) {
|
||||
// If the error is not recoverable, do not restart.
|
||||
if rerr, ok := r.startErr.(*cstructs.RecoverableError); !(ok && rerr.Recoverable) {
|
||||
r.reason = ReasonUnrecoverableErrror
|
||||
return structs.TaskNotRestarting, 0
|
||||
}
|
||||
|
||||
if r.count > r.policy.Attempts {
|
||||
if r.policy.Mode == structs.RestartPolicyModeFail {
|
||||
r.reason = fmt.Sprintf(
|
||||
`Exceeded allowed attempts %d in interval %v and mode is "fail"`,
|
||||
r.policy.Attempts, r.policy.Interval)
|
||||
return structs.TaskNotRestarting, 0
|
||||
} else {
|
||||
r.reason = ReasonDelay
|
||||
return structs.TaskRestarting, r.getDelay()
|
||||
}
|
||||
}
|
||||
|
||||
r.reason = ReasonWithinPolicy
|
||||
return structs.TaskRestarting, r.jitter()
|
||||
}
|
||||
|
||||
// handleWaitResult returns the new state and potential wait duration for
|
||||
// restarting the task after it has exited.
|
||||
func (r *RestartTracker) handleWaitResult() (string, time.Duration) {
|
||||
// If the task started successfully and restart on success isn't specified,
|
||||
// don't restart but don't mark as failed.
|
||||
if r.waitRes.Successful() && !r.onSuccess {
|
||||
r.reason = "Restart unnecessary as task terminated successfully"
|
||||
return structs.TaskTerminated, 0
|
||||
}
|
||||
|
||||
if r.count > r.policy.Attempts {
|
||||
if r.policy.Mode == structs.RestartPolicyModeFail {
|
||||
r.reason = fmt.Sprintf(
|
||||
`Exceeded allowed attempts %d in interval %v and mode is "fail"`,
|
||||
r.policy.Attempts, r.policy.Interval)
|
||||
return structs.TaskNotRestarting, 0
|
||||
} else {
|
||||
r.reason = ReasonDelay
|
||||
return structs.TaskRestarting, r.getDelay()
|
||||
}
|
||||
}
|
||||
|
||||
r.reason = ReasonWithinPolicy
|
||||
return structs.TaskRestarting, r.jitter()
|
||||
}
|
||||
|
||||
// getDelay returns the delay time to enter the next interval.
|
||||
func (r *RestartTracker) getDelay() time.Duration {
|
||||
end := r.startTime.Add(r.policy.Interval)
|
||||
now := time.Now()
|
||||
return end.Sub(now)
|
||||
}
|
||||
|
||||
// jitter returns the delay time plus a jitter.
|
||||
func (r *RestartTracker) jitter() time.Duration {
|
||||
// Get the delay and ensure it is valid.
|
||||
d := r.policy.Delay.Nanoseconds()
|
||||
if d == 0 {
|
||||
d = 1
|
||||
}
|
||||
|
||||
j := float64(r.rand.Int63n(d)) * jitter
|
||||
return time.Duration(d + int64(j))
|
||||
}
|
||||
|
||||
// Returns a tracker that never restarts.
|
||||
func noRestartsTracker() *RestartTracker {
|
||||
policy := &structs.RestartPolicy{Attempts: 0, Mode: structs.RestartPolicyModeFail}
|
||||
return newRestartTracker(policy, structs.JobTypeBatch)
|
||||
}
|
|
@ -1,779 +0,0 @@
|
|||
// Package rpcproxy provides a proxy interface to Nomad Servers. The
|
||||
// RPCProxy periodically shuffles which server a Nomad Client communicates
|
||||
// with in order to redistribute load across Nomad Servers. Nomad Servers
|
||||
// that fail an RPC request are automatically cycled to the end of the list
|
||||
// until the server list is reshuffled.
|
||||
//
|
||||
// The rpcproxy package does not provide any external API guarantees and
|
||||
// should be called only by `hashicorp/nomad`.
|
||||
package rpcproxy
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log"
|
||||
"math/rand"
|
||||
"strings"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/consul/lib"
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
)
|
||||
|
||||
const (
|
||||
// clientRPCJitterFraction determines the amount of jitter added to
|
||||
// clientRPCMinReuseDuration before a connection is expired and a new
|
||||
// connection is established in order to rebalance load across Nomad
|
||||
// servers. The cluster-wide number of connections per second from
|
||||
// rebalancing is applied after this jitter to ensure the CPU impact
|
||||
// is always finite. See newRebalanceConnsPerSecPerServer's comment
|
||||
// for additional commentary.
|
||||
//
|
||||
// For example, in a 10K Nomad cluster with 5x servers, this default
|
||||
// averages out to ~13 new connections from rebalancing per server
|
||||
// per second.
|
||||
clientRPCJitterFraction = 2
|
||||
|
||||
// clientRPCMinReuseDuration controls the minimum amount of time RPC
|
||||
// queries are sent over an established connection to a single server
|
||||
clientRPCMinReuseDuration = 600 * time.Second
|
||||
|
||||
// Limit the number of new connections a server receives per second
|
||||
// for connection rebalancing. This limit caps the load caused by
|
||||
// continual rebalancing efforts when a cluster is in equilibrium. A
|
||||
// lower value comes at the cost of increased recovery time after a
|
||||
// partition. This parameter begins to take effect when there are
|
||||
// more than ~48K clients querying 5x servers or at lower server
|
||||
// counts when there is a partition.
|
||||
//
|
||||
// For example, in a 100K Nomad cluster with 5x servers, it will take
|
||||
// ~5min for all servers to rebalance their connections. If 99,995
|
||||
// agents are in the minority talking to only one server, it will
|
||||
// take ~26min for all servers to rebalance. A 10K cluster in the
|
||||
// same scenario will take ~2.6min to rebalance.
|
||||
newRebalanceConnsPerSecPerServer = 64
|
||||
|
||||
// rpcAPIMismatchLogRate determines the rate at which log entries are
|
||||
// emitted when the client and server's API versions are mismatched.
|
||||
rpcAPIMismatchLogRate = 3 * time.Hour
|
||||
)
|
||||
|
||||
// NomadConfigInfo is an interface wrapper around this Nomad Agent's
|
||||
// configuration to prevents a cyclic import dependency.
|
||||
type NomadConfigInfo interface {
|
||||
Datacenter() string
|
||||
RPCMajorVersion() int
|
||||
RPCMinorVersion() int
|
||||
Region() string
|
||||
}
|
||||
|
||||
// Pinger is an interface wrapping client.ConnPool to prevent a
|
||||
// cyclic import dependency
|
||||
type Pinger interface {
|
||||
PingNomadServer(region string, apiMajorVersion int, s *ServerEndpoint) (bool, error)
|
||||
}
|
||||
|
||||
// serverList is an array of Nomad Servers. The first server in the list is
|
||||
// the active server.
|
||||
//
|
||||
// NOTE(sean@): We are explicitly relying on the fact that serverList will be
|
||||
// copied onto the stack by atomic.Value. Please keep this structure light.
|
||||
type serverList struct {
|
||||
L []*ServerEndpoint
|
||||
}
|
||||
|
||||
// RPCProxy is the manager type responsible for returning and managing Nomad
|
||||
// addresses.
|
||||
type RPCProxy struct {
|
||||
// activatedList manages the list of Nomad Servers that are eligible
|
||||
// to be queried by the Client agent.
|
||||
activatedList atomic.Value
|
||||
activatedListLock sync.Mutex
|
||||
|
||||
// primaryServers is a list of servers found in the last heartbeat.
|
||||
// primaryServers are periodically reshuffled. Covered by
|
||||
// serverListLock.
|
||||
primaryServers serverList
|
||||
|
||||
// backupServers is a list of fallback servers. These servers are
|
||||
// appended to the RPCProxy's serverList, but are never shuffled with
|
||||
// the list of servers discovered via the Nomad heartbeat. Covered
|
||||
// by serverListLock.
|
||||
backupServers serverList
|
||||
|
||||
// serverListLock covers both backupServers and primaryServers. If
|
||||
// it is necessary to hold serverListLock and listLock, obtain an
|
||||
// exclusive lock on serverListLock before listLock.
|
||||
serverListLock sync.RWMutex
|
||||
|
||||
leaderAddr string
|
||||
numNodes int
|
||||
|
||||
// rebalanceTimer controls the duration of the rebalance interval
|
||||
rebalanceTimer *time.Timer
|
||||
|
||||
// shutdownCh is a copy of the channel in nomad.Client
|
||||
shutdownCh chan struct{}
|
||||
|
||||
logger *log.Logger
|
||||
|
||||
configInfo NomadConfigInfo
|
||||
|
||||
// rpcAPIMismatchThrottle regulates the rate at which warning
|
||||
// messages are emitted in the event of an API mismatch between the
|
||||
// clients and servers.
|
||||
rpcAPIMismatchThrottle map[string]time.Time
|
||||
|
||||
// connPoolPinger is used to test the health of a server in the
|
||||
// connection pool. Pinger is an interface that wraps
|
||||
// client.ConnPool.
|
||||
connPoolPinger Pinger
|
||||
}
|
||||
|
||||
// NewRPCProxy is the only way to safely create a new RPCProxy.
|
||||
func NewRPCProxy(logger *log.Logger, shutdownCh chan struct{}, configInfo NomadConfigInfo, connPoolPinger Pinger) *RPCProxy {
|
||||
p := &RPCProxy{
|
||||
logger: logger,
|
||||
configInfo: configInfo, // can't pass *nomad.Client: import cycle
|
||||
connPoolPinger: connPoolPinger, // can't pass *nomad.ConnPool: import cycle
|
||||
rebalanceTimer: time.NewTimer(clientRPCMinReuseDuration),
|
||||
shutdownCh: shutdownCh,
|
||||
}
|
||||
|
||||
l := serverList{}
|
||||
l.L = make([]*ServerEndpoint, 0)
|
||||
p.saveServerList(l)
|
||||
return p
|
||||
}
|
||||
|
||||
// activateEndpoint adds an endpoint to the RPCProxy's active serverList.
|
||||
// Returns true if the server was added, returns false if the server already
|
||||
// existed in the RPCProxy's serverList.
|
||||
func (p *RPCProxy) activateEndpoint(s *ServerEndpoint) bool {
|
||||
l := p.getServerList()
|
||||
|
||||
// Check if this server is known
|
||||
found := false
|
||||
for idx, existing := range l.L {
|
||||
if existing.Name == s.Name {
|
||||
newServers := make([]*ServerEndpoint, len(l.L))
|
||||
copy(newServers, l.L)
|
||||
|
||||
// Overwrite the existing server details in order to
|
||||
// possibly update metadata (e.g. server version)
|
||||
newServers[idx] = s
|
||||
|
||||
l.L = newServers
|
||||
found = true
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// Add to the list if not known
|
||||
if !found {
|
||||
newServers := make([]*ServerEndpoint, len(l.L), len(l.L)+1)
|
||||
copy(newServers, l.L)
|
||||
newServers = append(newServers, s)
|
||||
l.L = newServers
|
||||
}
|
||||
|
||||
p.saveServerList(l)
|
||||
|
||||
return !found
|
||||
}
|
||||
|
||||
// SetBackupServers sets a list of Nomad Servers to be used in the event that
|
||||
// the Nomad Agent lost contact with the list of Nomad Servers provided via
|
||||
// the Nomad Agent's heartbeat. If available, the backup servers are
|
||||
// populated via Consul.
|
||||
func (p *RPCProxy) SetBackupServers(addrs []string) error {
|
||||
l := make([]*ServerEndpoint, 0, len(addrs))
|
||||
for _, s := range addrs {
|
||||
s, err := NewServerEndpoint(s)
|
||||
if err != nil {
|
||||
p.logger.Printf("[WARN] client.rpcproxy: unable to create backup server %+q: %v", s, err)
|
||||
return fmt.Errorf("unable to create new backup server from %+q: %v", s, err)
|
||||
}
|
||||
l = append(l, s)
|
||||
}
|
||||
|
||||
p.serverListLock.Lock()
|
||||
p.backupServers.L = l
|
||||
p.serverListLock.Unlock()
|
||||
|
||||
p.activatedListLock.Lock()
|
||||
defer p.activatedListLock.Unlock()
|
||||
for _, s := range l {
|
||||
p.activateEndpoint(s)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// AddPrimaryServer takes the RPC address of a Nomad server, creates a new
|
||||
// endpoint, and adds it to both the primaryServers list and the active
|
||||
// serverList used in the RPC Proxy. If the endpoint is not known by the
|
||||
// RPCProxy, appends the endpoint to the list. The new endpoint will begin
|
||||
// seeing use after the rebalance timer fires (or enough servers fail
|
||||
// organically). Any values in the primary server list are overridden by the
|
||||
// next successful heartbeat.
|
||||
func (p *RPCProxy) AddPrimaryServer(rpcAddr string) *ServerEndpoint {
|
||||
s, err := NewServerEndpoint(rpcAddr)
|
||||
if err != nil {
|
||||
p.logger.Printf("[WARN] client.rpcproxy: unable to create new primary server from endpoint %+q: %v", rpcAddr, err)
|
||||
return nil
|
||||
}
|
||||
|
||||
k := s.Key()
|
||||
p.serverListLock.Lock()
|
||||
if serverExists := p.primaryServers.serverExistByKey(k); serverExists {
|
||||
p.serverListLock.Unlock()
|
||||
return s
|
||||
}
|
||||
p.primaryServers.L = append(p.primaryServers.L, s)
|
||||
p.serverListLock.Unlock()
|
||||
|
||||
p.activatedListLock.Lock()
|
||||
p.activateEndpoint(s)
|
||||
p.activatedListLock.Unlock()
|
||||
|
||||
return s
|
||||
}
|
||||
|
||||
// cycleServers returns a new list of servers that has dequeued the first
|
||||
// server and enqueued it at the end of the list. cycleServers assumes the
|
||||
// caller is holding the listLock. cycleServer does not test or ping
|
||||
// the next server inline. cycleServer may be called when the environment
|
||||
// has just entered an unhealthy situation and blocking on a server test is
|
||||
// less desirable than just returning the next server in the firing line. If
|
||||
// the next server fails, it will fail fast enough and cycleServer will be
|
||||
// called again.
|
||||
func (l *serverList) cycleServer() (servers []*ServerEndpoint) {
|
||||
numServers := len(l.L)
|
||||
if numServers < 2 {
|
||||
return servers // No action required
|
||||
}
|
||||
|
||||
newServers := make([]*ServerEndpoint, 0, numServers)
|
||||
newServers = append(newServers, l.L[1:]...)
|
||||
newServers = append(newServers, l.L[0])
|
||||
|
||||
return newServers
|
||||
}
|
||||
|
||||
// serverExistByKey performs a search to see if a server exists in the
|
||||
// serverList. Assumes the caller is holding at least a read lock.
|
||||
func (l *serverList) serverExistByKey(targetKey *EndpointKey) bool {
|
||||
var found bool
|
||||
for _, server := range l.L {
|
||||
if targetKey.Equal(server.Key()) {
|
||||
found = true
|
||||
}
|
||||
}
|
||||
return found
|
||||
}
|
||||
|
||||
// removeServerByKey performs an inline removal of the first matching server
|
||||
func (l *serverList) removeServerByKey(targetKey *EndpointKey) {
|
||||
for i, s := range l.L {
|
||||
if targetKey.Equal(s.Key()) {
|
||||
copy(l.L[i:], l.L[i+1:])
|
||||
l.L[len(l.L)-1] = nil
|
||||
l.L = l.L[:len(l.L)-1]
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// shuffleServers shuffles the server list in place
|
||||
func (l *serverList) shuffleServers() {
|
||||
for i := len(l.L) - 1; i > 0; i-- {
|
||||
j := rand.Int31n(int32(i + 1))
|
||||
l.L[i], l.L[j] = l.L[j], l.L[i]
|
||||
}
|
||||
}
|
||||
|
||||
// String returns a string representation of serverList
|
||||
func (l *serverList) String() string {
|
||||
if len(l.L) == 0 {
|
||||
return fmt.Sprintf("empty server list")
|
||||
}
|
||||
|
||||
serverStrs := make([]string, 0, len(l.L))
|
||||
for _, server := range l.L {
|
||||
serverStrs = append(serverStrs, server.String())
|
||||
}
|
||||
|
||||
return fmt.Sprintf("[%s]", strings.Join(serverStrs, ", "))
|
||||
}
|
||||
|
||||
// FindServer takes out an internal "read lock" and searches through the list
|
||||
// of servers to find a "healthy" server. If the server is actually
|
||||
// unhealthy, we rely on heartbeats to detect this and remove the node from
|
||||
// the server list. If the server at the front of the list has failed or
|
||||
// fails during an RPC call, it is rotated to the end of the list. If there
|
||||
// are no servers available, return nil.
|
||||
func (p *RPCProxy) FindServer() *ServerEndpoint {
|
||||
l := p.getServerList()
|
||||
numServers := len(l.L)
|
||||
if numServers == 0 {
|
||||
p.logger.Printf("[WARN] client.rpcproxy: No servers available")
|
||||
return nil
|
||||
}
|
||||
|
||||
// Return whatever is at the front of the list because it is
|
||||
// assumed to be the oldest in the server list (unless -
|
||||
// hypothetically - the server list was rotated right after a
|
||||
// server was added).
|
||||
return l.L[0]
|
||||
}
|
||||
|
||||
// getServerList is a convenience method which hides the locking semantics
|
||||
// of atomic.Value from the caller.
|
||||
func (p *RPCProxy) getServerList() serverList {
|
||||
return p.activatedList.Load().(serverList)
|
||||
}
|
||||
|
||||
// saveServerList is a convenience method which hides the locking semantics
|
||||
// of atomic.Value from the caller.
|
||||
func (p *RPCProxy) saveServerList(l serverList) {
|
||||
p.activatedList.Store(l)
|
||||
}
|
||||
|
||||
// LeaderAddr returns the current leader address. If an empty string, then
|
||||
// the Nomad Server for this Nomad Agent is in the minority or the Nomad
|
||||
// Servers are in the middle of an election.
|
||||
func (p *RPCProxy) LeaderAddr() string {
|
||||
p.activatedListLock.Lock()
|
||||
defer p.activatedListLock.Unlock()
|
||||
return p.leaderAddr
|
||||
}
|
||||
|
||||
// NotifyFailedServer marks the passed in server as "failed" by rotating it
|
||||
// to the end of the server list.
|
||||
func (p *RPCProxy) NotifyFailedServer(s *ServerEndpoint) {
|
||||
l := p.getServerList()
|
||||
|
||||
// If the server being failed is not the first server on the list,
|
||||
// this is a noop. If, however, the server is failed and first on
|
||||
// the list, acquire the lock, retest, and take the penalty of moving
|
||||
// the server to the end of the list.
|
||||
|
||||
// Only rotate the server list when there is more than one server
|
||||
if len(l.L) > 1 && l.L[0] == s {
|
||||
// Grab a lock, retest, and take the hit of cycling the first
|
||||
// server to the end.
|
||||
p.activatedListLock.Lock()
|
||||
defer p.activatedListLock.Unlock()
|
||||
l = p.getServerList()
|
||||
|
||||
if len(l.L) > 1 && l.L[0] == s {
|
||||
l.L = l.cycleServer()
|
||||
p.saveServerList(l)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// NumNodes returns the estimated number of nodes according to the last Nomad
|
||||
// Heartbeat.
|
||||
func (p *RPCProxy) NumNodes() int {
|
||||
return p.numNodes
|
||||
}
|
||||
|
||||
// NumServers takes out an internal "read lock" and returns the number of
|
||||
// servers. numServers includes both healthy and unhealthy servers.
|
||||
func (p *RPCProxy) NumServers() int {
|
||||
l := p.getServerList()
|
||||
return len(l.L)
|
||||
}
|
||||
|
||||
// RebalanceServers shuffles the list of servers on this agent. The server
|
||||
// at the front of the list is selected for the next RPC. RPC calls that
|
||||
// fail for a particular server are rotated to the end of the list. This
|
||||
// method reshuffles the list periodically in order to redistribute work
|
||||
// across all known Nomad servers (i.e. guarantee that the order of servers
|
||||
// in the server list is not positively correlated with the age of a server
|
||||
// in the Nomad cluster). Periodically shuffling the server list prevents
|
||||
// long-lived clients from fixating on long-lived servers.
|
||||
//
|
||||
// Unhealthy servers are removed from the server list during the next client
|
||||
// heartbeat. Before the newly shuffled server list is saved, the new remote
|
||||
// endpoint is tested to ensure its responsive.
|
||||
func (p *RPCProxy) RebalanceServers() {
|
||||
var serverListLocked bool
|
||||
p.serverListLock.Lock()
|
||||
serverListLocked = true
|
||||
defer func() {
|
||||
if serverListLocked {
|
||||
p.serverListLock.Unlock()
|
||||
}
|
||||
}()
|
||||
|
||||
// Early abort if there is nothing to shuffle
|
||||
if (len(p.primaryServers.L) + len(p.backupServers.L)) < 2 {
|
||||
return
|
||||
}
|
||||
|
||||
// Shuffle server lists independently
|
||||
p.primaryServers.shuffleServers()
|
||||
p.backupServers.shuffleServers()
|
||||
|
||||
// Create a new merged serverList
|
||||
type targetServer struct {
|
||||
server *ServerEndpoint
|
||||
// 'p' == Primary Server
|
||||
// 's' == Secondary/Backup Server
|
||||
// 'b' == Both
|
||||
state byte
|
||||
}
|
||||
mergedList := make(map[EndpointKey]*targetServer, len(p.primaryServers.L)+len(p.backupServers.L))
|
||||
for _, s := range p.primaryServers.L {
|
||||
mergedList[*s.Key()] = &targetServer{server: s, state: 'p'}
|
||||
}
|
||||
for _, s := range p.backupServers.L {
|
||||
k := s.Key()
|
||||
_, found := mergedList[*k]
|
||||
if found {
|
||||
mergedList[*k].state = 'b'
|
||||
} else {
|
||||
mergedList[*k] = &targetServer{server: s, state: 's'}
|
||||
}
|
||||
}
|
||||
|
||||
l := &serverList{L: make([]*ServerEndpoint, 0, len(mergedList))}
|
||||
for _, s := range p.primaryServers.L {
|
||||
l.L = append(l.L, s)
|
||||
}
|
||||
for _, v := range mergedList {
|
||||
if v.state != 's' {
|
||||
continue
|
||||
}
|
||||
l.L = append(l.L, v.server)
|
||||
}
|
||||
|
||||
// Release the lock before we begin transition to operations on the
|
||||
// network timescale and attempt to ping servers. A copy of the
|
||||
// servers has been made at this point.
|
||||
p.serverListLock.Unlock()
|
||||
serverListLocked = false
|
||||
|
||||
// Iterate through the shuffled server list to find an assumed
|
||||
// healthy server. NOTE: Do not iterate on the list directly because
|
||||
// this loop mutates the server list in-place.
|
||||
var foundHealthyServer bool
|
||||
for i := 0; i < len(l.L); i++ {
|
||||
// Always test the first server. Failed servers are cycled
|
||||
// and eventually removed from the list when Nomad heartbeats
|
||||
// detect the failed node.
|
||||
selectedServer := l.L[0]
|
||||
|
||||
ok, err := p.connPoolPinger.PingNomadServer(p.configInfo.Region(), p.configInfo.RPCMajorVersion(), selectedServer)
|
||||
if ok {
|
||||
foundHealthyServer = true
|
||||
break
|
||||
}
|
||||
p.logger.Printf(`[DEBUG] client.rpcproxy: pinging server "%s" failed: %s`, selectedServer.String(), err)
|
||||
|
||||
l.cycleServer()
|
||||
}
|
||||
|
||||
// If no healthy servers were found, sleep and wait for the admin to
|
||||
// join this node to a server and begin receiving heartbeats with an
|
||||
// updated list of Nomad servers. Or Consul will begin advertising a
|
||||
// new server in the nomad service (Nomad server service).
|
||||
if !foundHealthyServer {
|
||||
p.logger.Printf("[DEBUG] client.rpcproxy: No healthy servers during rebalance, aborting")
|
||||
return
|
||||
}
|
||||
|
||||
// Verify that all servers are present. Reconcile will save the
|
||||
// final serverList.
|
||||
if p.reconcileServerList(l) {
|
||||
p.logger.Printf("[TRACE] client.rpcproxy: Rebalanced %d servers, next active server is %s", len(l.L), l.L[0].String())
|
||||
} else {
|
||||
// reconcileServerList failed because Nomad removed the
|
||||
// server that was at the front of the list that had
|
||||
// successfully been Ping'ed. Between the Ping and
|
||||
// reconcile, a Nomad heartbeat removed the node.
|
||||
//
|
||||
// Instead of doing any heroics, "freeze in place" and
|
||||
// continue to use the existing connection until the next
|
||||
// rebalance occurs.
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
// reconcileServerList returns true when the first server in serverList
|
||||
// (l) exists in the receiver's serverList (p). If true, the merged
|
||||
// serverList (l) is stored as the receiver's serverList (p). Returns
|
||||
// false if the first server in p does not exist in the passed in list (l)
|
||||
// (i.e. was removed by Nomad during a PingNomadServer() call. Newly added
|
||||
// servers are appended to the list and other missing servers are removed
|
||||
// from the list.
|
||||
func (p *RPCProxy) reconcileServerList(l *serverList) bool {
|
||||
p.activatedListLock.Lock()
|
||||
defer p.activatedListLock.Unlock()
|
||||
|
||||
// newServerList is a serverList that has been kept up-to-date with
|
||||
// join and leave events.
|
||||
newServerList := p.getServerList()
|
||||
|
||||
// If a Nomad heartbeat removed all nodes, or there is no selected
|
||||
// server (zero nodes in serverList), abort early.
|
||||
if len(newServerList.L) == 0 || len(l.L) == 0 {
|
||||
return false
|
||||
}
|
||||
|
||||
type targetServer struct {
|
||||
server *ServerEndpoint
|
||||
|
||||
// 'b' == both
|
||||
// 'o' == original
|
||||
// 'n' == new
|
||||
state byte
|
||||
}
|
||||
mergedList := make(map[EndpointKey]*targetServer, len(l.L))
|
||||
for _, s := range l.L {
|
||||
mergedList[*s.Key()] = &targetServer{server: s, state: 'o'}
|
||||
}
|
||||
for _, s := range newServerList.L {
|
||||
k := s.Key()
|
||||
_, found := mergedList[*k]
|
||||
if found {
|
||||
mergedList[*k].state = 'b'
|
||||
} else {
|
||||
mergedList[*k] = &targetServer{server: s, state: 'n'}
|
||||
}
|
||||
}
|
||||
|
||||
// Ensure the selected server has not been removed by a heartbeat
|
||||
selectedServerKey := l.L[0].Key()
|
||||
if v, found := mergedList[*selectedServerKey]; found && v.state == 'o' {
|
||||
return false
|
||||
}
|
||||
|
||||
// Append any new servers and remove any old servers
|
||||
for k, v := range mergedList {
|
||||
switch v.state {
|
||||
case 'b':
|
||||
// Do nothing, server exists in both
|
||||
case 'o':
|
||||
// Server has been removed
|
||||
l.removeServerByKey(&k)
|
||||
case 'n':
|
||||
// Server added
|
||||
l.L = append(l.L, v.server)
|
||||
default:
|
||||
panic("unknown merge list state")
|
||||
}
|
||||
}
|
||||
|
||||
p.saveServerList(*l)
|
||||
return true
|
||||
}
|
||||
|
||||
// RemoveServer takes out an internal write lock and removes a server from
|
||||
// the activated server list.
|
||||
func (p *RPCProxy) RemoveServer(s *ServerEndpoint) {
|
||||
// Lock hierarchy protocol dictates serverListLock is acquired first.
|
||||
p.serverListLock.Lock()
|
||||
defer p.serverListLock.Unlock()
|
||||
|
||||
p.activatedListLock.Lock()
|
||||
defer p.activatedListLock.Unlock()
|
||||
l := p.getServerList()
|
||||
|
||||
k := s.Key()
|
||||
l.removeServerByKey(k)
|
||||
p.saveServerList(l)
|
||||
|
||||
p.primaryServers.removeServerByKey(k)
|
||||
p.backupServers.removeServerByKey(k)
|
||||
}
|
||||
|
||||
// refreshServerRebalanceTimer is only called once p.rebalanceTimer expires.
|
||||
func (p *RPCProxy) refreshServerRebalanceTimer() time.Duration {
|
||||
l := p.getServerList()
|
||||
numServers := len(l.L)
|
||||
// Limit this connection's life based on the size (and health) of the
|
||||
// cluster. Never rebalance a connection more frequently than
|
||||
// connReuseLowWatermarkDuration, and make sure we never exceed
|
||||
// clusterWideRebalanceConnsPerSec operations/s across numLANMembers.
|
||||
clusterWideRebalanceConnsPerSec := float64(numServers * newRebalanceConnsPerSecPerServer)
|
||||
connReuseLowWatermarkDuration := clientRPCMinReuseDuration + lib.RandomStagger(clientRPCMinReuseDuration/clientRPCJitterFraction)
|
||||
numLANMembers := p.numNodes
|
||||
connRebalanceTimeout := lib.RateScaledInterval(clusterWideRebalanceConnsPerSec, connReuseLowWatermarkDuration, numLANMembers)
|
||||
|
||||
p.rebalanceTimer.Reset(connRebalanceTimeout)
|
||||
return connRebalanceTimeout
|
||||
}
|
||||
|
||||
// ResetRebalanceTimer resets the rebalance timer. This method exists for
|
||||
// testing and should not be used directly.
|
||||
func (p *RPCProxy) ResetRebalanceTimer() {
|
||||
p.activatedListLock.Lock()
|
||||
defer p.activatedListLock.Unlock()
|
||||
p.rebalanceTimer.Reset(clientRPCMinReuseDuration)
|
||||
}
|
||||
|
||||
// ServerRPCAddrs returns one RPC Address per server
|
||||
func (p *RPCProxy) ServerRPCAddrs() []string {
|
||||
l := p.getServerList()
|
||||
serverAddrs := make([]string, 0, len(l.L))
|
||||
for _, s := range l.L {
|
||||
serverAddrs = append(serverAddrs, s.Addr.String())
|
||||
}
|
||||
return serverAddrs
|
||||
}
|
||||
|
||||
// Run is used to start and manage the task of automatically shuffling and
|
||||
// rebalancing the list of Nomad servers. This maintenance only happens
|
||||
// periodically based on the expiration of the timer. Failed servers are
|
||||
// automatically cycled to the end of the list. New servers are appended to
|
||||
// the list. The order of the server list must be shuffled periodically to
|
||||
// distribute load across all known and available Nomad servers.
|
||||
func (p *RPCProxy) Run() {
|
||||
for {
|
||||
select {
|
||||
case <-p.rebalanceTimer.C:
|
||||
p.RebalanceServers()
|
||||
|
||||
p.refreshServerRebalanceTimer()
|
||||
case <-p.shutdownCh:
|
||||
p.logger.Printf("[INFO] client.rpcproxy: shutting down")
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// RefreshServerLists is called when the Client receives an update from a
|
||||
// Nomad Server. The response from Nomad Client Heartbeats contain a list of
|
||||
// Nomad Servers that the Nomad Client should use for RPC requests.
|
||||
// RefreshServerLists does not rebalance its serverLists (that is handled
|
||||
// elsewhere via a periodic timer). New Nomad Servers learned via the
|
||||
// heartbeat are appended to the RPCProxy's activated serverList. Servers
|
||||
// that are no longer present in the Heartbeat are removed immediately from
|
||||
// all server lists. Nomad Servers speaking a newer major or minor API
|
||||
// version are filtered from the serverList.
|
||||
func (p *RPCProxy) RefreshServerLists(servers []*structs.NodeServerInfo, numNodes int32, leaderRPCAddr string) error {
|
||||
// Merge all servers found in the response. Servers in the response
|
||||
// with newer API versions are filtered from the list. If the list
|
||||
// is missing an address found in the RPCProxy's server list, remove
|
||||
// it from the RPCProxy.
|
||||
|
||||
p.serverListLock.Lock()
|
||||
defer p.serverListLock.Unlock()
|
||||
|
||||
// Clear the backup server list when a heartbeat contains at least
|
||||
// one server.
|
||||
if len(servers) > 0 && len(p.backupServers.L) > 0 {
|
||||
p.backupServers.L = make([]*ServerEndpoint, 0, len(servers))
|
||||
}
|
||||
|
||||
// 1) Create a map to reconcile the difference between
|
||||
// p.primaryServers and servers.
|
||||
type targetServer struct {
|
||||
server *ServerEndpoint
|
||||
|
||||
// 'b' == both
|
||||
// 'o' == original
|
||||
// 'n' == new
|
||||
state byte
|
||||
}
|
||||
mergedPrimaryMap := make(map[EndpointKey]*targetServer, len(p.primaryServers.L)+len(servers))
|
||||
numOldServers := 0
|
||||
for _, s := range p.primaryServers.L {
|
||||
mergedPrimaryMap[*s.Key()] = &targetServer{server: s, state: 'o'}
|
||||
numOldServers++
|
||||
}
|
||||
numBothServers := 0
|
||||
var newServers bool
|
||||
for _, s := range servers {
|
||||
// Filter out servers using a newer API version. Prevent
|
||||
// spamming the logs every heartbeat.
|
||||
//
|
||||
// TODO(sean@): Move the logging throttle logic into a
|
||||
// dedicated logging package so RPCProxy does not have to
|
||||
// perform this accounting.
|
||||
if int32(p.configInfo.RPCMajorVersion()) < s.RPCMajorVersion ||
|
||||
(int32(p.configInfo.RPCMajorVersion()) == s.RPCMajorVersion &&
|
||||
int32(p.configInfo.RPCMinorVersion()) < s.RPCMinorVersion) {
|
||||
now := time.Now()
|
||||
t, ok := p.rpcAPIMismatchThrottle[s.RPCAdvertiseAddr]
|
||||
if ok && t.After(now) {
|
||||
continue
|
||||
}
|
||||
|
||||
p.logger.Printf("[WARN] client.rpcproxy: API mismatch between client version (v%d.%d) and server version (v%d.%d), ignoring server %+q", p.configInfo.RPCMajorVersion(), p.configInfo.RPCMinorVersion(), s.RPCMajorVersion, s.RPCMinorVersion, s.RPCAdvertiseAddr)
|
||||
p.rpcAPIMismatchThrottle[s.RPCAdvertiseAddr] = now.Add(rpcAPIMismatchLogRate)
|
||||
continue
|
||||
}
|
||||
|
||||
server, err := NewServerEndpoint(s.RPCAdvertiseAddr)
|
||||
if err != nil {
|
||||
p.logger.Printf("[WARN] client.rpcproxy: Unable to create a server from %+q: %v", s.RPCAdvertiseAddr, err)
|
||||
continue
|
||||
}
|
||||
|
||||
// Nomad servers in different datacenters are automatically
|
||||
// added to the backup server list.
|
||||
if s.Datacenter != p.configInfo.Datacenter() {
|
||||
p.backupServers.L = append(p.backupServers.L, server)
|
||||
continue
|
||||
}
|
||||
|
||||
k := server.Key()
|
||||
_, found := mergedPrimaryMap[*k]
|
||||
if found {
|
||||
mergedPrimaryMap[*k].state = 'b'
|
||||
numBothServers++
|
||||
} else {
|
||||
mergedPrimaryMap[*k] = &targetServer{server: server, state: 'n'}
|
||||
newServers = true
|
||||
}
|
||||
}
|
||||
|
||||
// Short-circuit acquiring listLock if nothing changed
|
||||
if !newServers && numOldServers == numBothServers {
|
||||
return nil
|
||||
}
|
||||
|
||||
p.activatedListLock.Lock()
|
||||
defer p.activatedListLock.Unlock()
|
||||
newServerCfg := p.getServerList()
|
||||
for k, v := range mergedPrimaryMap {
|
||||
switch v.state {
|
||||
case 'b':
|
||||
// Do nothing, server exists in both
|
||||
case 'o':
|
||||
// Server has been removed
|
||||
|
||||
// TODO(sean@): Teach Nomad servers how to remove
|
||||
// themselves from their heartbeat in order to
|
||||
// gracefully drain their clients over the next
|
||||
// cluster's max rebalanceTimer duration. Without
|
||||
// this enhancement, if a server being shutdown and
|
||||
// it is the first in serverList, the client will
|
||||
// fail its next RPC connection.
|
||||
p.primaryServers.removeServerByKey(&k)
|
||||
newServerCfg.removeServerByKey(&k)
|
||||
case 'n':
|
||||
// Server added. Append it to both lists
|
||||
// immediately. The server should only go into
|
||||
// active use in the event of a failure or after a
|
||||
// rebalance occurs.
|
||||
p.primaryServers.L = append(p.primaryServers.L, v.server)
|
||||
newServerCfg.L = append(newServerCfg.L, v.server)
|
||||
default:
|
||||
panic("unknown merge list state")
|
||||
}
|
||||
}
|
||||
|
||||
p.numNodes = int(numNodes)
|
||||
p.leaderAddr = leaderRPCAddr
|
||||
p.saveServerList(newServerCfg)
|
||||
|
||||
return nil
|
||||
}
|
|
@ -1,84 +0,0 @@
|
|||
package rpcproxy
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net"
|
||||
"strings"
|
||||
)
|
||||
|
||||
const (
|
||||
defaultNomadRPCPort = "4647"
|
||||
)
|
||||
|
||||
// EndpointKey is used in maps and for equality tests. A key is based on endpoints.
|
||||
type EndpointKey struct {
|
||||
name string
|
||||
}
|
||||
|
||||
// Equal compares two EndpointKey objects
|
||||
func (k *EndpointKey) Equal(x *EndpointKey) bool {
|
||||
return k.name == x.name
|
||||
}
|
||||
|
||||
// ServerEndpoint contains the address information for to connect to a Nomad
|
||||
// server.
|
||||
//
|
||||
// TODO(sean@): Server is stubbed out so that in the future it can hold a
|
||||
// reference to Node (and ultimately Node.ID).
|
||||
type ServerEndpoint struct {
|
||||
// Name is the unique lookup key for a Server instance
|
||||
Name string
|
||||
Host string
|
||||
Port string
|
||||
Addr net.Addr
|
||||
}
|
||||
|
||||
// Key returns the corresponding Key
|
||||
func (s *ServerEndpoint) Key() *EndpointKey {
|
||||
return &EndpointKey{
|
||||
name: s.Name,
|
||||
}
|
||||
}
|
||||
|
||||
// NewServerEndpoint creates a new Server instance with a resolvable
|
||||
// endpoint. `name` can be either an IP address or a DNS name. If `name` is
|
||||
// a DNS name, it must be resolvable to an IP address (most inputs are IP
|
||||
// addresses, not DNS names, but both work equally well when the name is
|
||||
// resolvable).
|
||||
func NewServerEndpoint(name string) (*ServerEndpoint, error) {
|
||||
s := &ServerEndpoint{
|
||||
Name: name,
|
||||
}
|
||||
|
||||
var host, port string
|
||||
var err error
|
||||
host, port, err = net.SplitHostPort(name)
|
||||
if err == nil {
|
||||
s.Host = host
|
||||
s.Port = port
|
||||
} else {
|
||||
if strings.Contains(err.Error(), "missing port") {
|
||||
s.Host = name
|
||||
s.Port = defaultNomadRPCPort
|
||||
} else {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
if s.Addr, err = net.ResolveTCPAddr("tcp", net.JoinHostPort(s.Host, s.Port)); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return s, err
|
||||
}
|
||||
|
||||
// String returns a string representation of Server
|
||||
func (s *ServerEndpoint) String() string {
|
||||
var addrStr, networkStr string
|
||||
if s.Addr != nil {
|
||||
addrStr = s.Addr.String()
|
||||
networkStr = s.Addr.Network()
|
||||
}
|
||||
|
||||
return fmt.Sprintf("%s (%s:%s)", s.Name, networkStr, addrStr)
|
||||
}
|
|
@ -1,62 +0,0 @@
|
|||
package stats
|
||||
|
||||
import (
|
||||
"runtime"
|
||||
"time"
|
||||
|
||||
shelpers "github.com/hashicorp/nomad/helper/stats"
|
||||
)
|
||||
|
||||
// CpuStats calculates cpu usage percentage
|
||||
type CpuStats struct {
|
||||
prevCpuTime float64
|
||||
prevTime time.Time
|
||||
clkSpeed float64
|
||||
|
||||
totalCpus int
|
||||
}
|
||||
|
||||
// NewCpuStats returns a cpu stats calculator
|
||||
func NewCpuStats() *CpuStats {
|
||||
numCpus := runtime.NumCPU()
|
||||
cpuStats := &CpuStats{
|
||||
totalCpus: numCpus,
|
||||
}
|
||||
return cpuStats
|
||||
}
|
||||
|
||||
// Percent calculates the cpu usage percentage based on the current cpu usage
|
||||
// and the previous cpu usage where usage is given as time in nanoseconds spend
|
||||
// in the cpu
|
||||
func (c *CpuStats) Percent(cpuTime float64) float64 {
|
||||
now := time.Now()
|
||||
|
||||
if c.prevCpuTime == 0.0 {
|
||||
// invoked first time
|
||||
c.prevCpuTime = cpuTime
|
||||
c.prevTime = now
|
||||
return 0.0
|
||||
}
|
||||
|
||||
timeDelta := now.Sub(c.prevTime).Nanoseconds()
|
||||
ret := c.calculatePercent(c.prevCpuTime, cpuTime, timeDelta)
|
||||
c.prevCpuTime = cpuTime
|
||||
c.prevTime = now
|
||||
return ret
|
||||
}
|
||||
|
||||
// TicksConsumed calculates the total ticks consumes by the process across all
|
||||
// cpu cores
|
||||
func (c *CpuStats) TicksConsumed(percent float64) float64 {
|
||||
return (percent / 100) * shelpers.TotalTicksAvailable() / float64(c.totalCpus)
|
||||
}
|
||||
|
||||
func (c *CpuStats) calculatePercent(t1, t2 float64, timeDelta int64) float64 {
|
||||
vDelta := t2 - t1
|
||||
if timeDelta <= 0 || vDelta <= 0.0 {
|
||||
return 0.0
|
||||
}
|
||||
|
||||
overall_percent := (vDelta / float64(timeDelta)) * 100.0
|
||||
return overall_percent
|
||||
}
|
|
@ -1,187 +0,0 @@
|
|||
package stats
|
||||
|
||||
import (
|
||||
"math"
|
||||
"runtime"
|
||||
"time"
|
||||
|
||||
"github.com/shirou/gopsutil/cpu"
|
||||
"github.com/shirou/gopsutil/disk"
|
||||
"github.com/shirou/gopsutil/host"
|
||||
"github.com/shirou/gopsutil/mem"
|
||||
|
||||
shelpers "github.com/hashicorp/nomad/helper/stats"
|
||||
)
|
||||
|
||||
// HostStats represents resource usage stats of the host running a Nomad client
|
||||
type HostStats struct {
|
||||
Memory *MemoryStats
|
||||
CPU []*CPUStats
|
||||
DiskStats []*DiskStats
|
||||
Uptime uint64
|
||||
Timestamp int64
|
||||
CPUTicksConsumed float64
|
||||
}
|
||||
|
||||
// MemoryStats represnts stats related to virtual memory usage
|
||||
type MemoryStats struct {
|
||||
Total uint64
|
||||
Available uint64
|
||||
Used uint64
|
||||
Free uint64
|
||||
}
|
||||
|
||||
// CPUStats represents stats related to cpu usage
|
||||
type CPUStats struct {
|
||||
CPU string
|
||||
User float64
|
||||
System float64
|
||||
Idle float64
|
||||
Total float64
|
||||
}
|
||||
|
||||
// DiskStats represents stats related to disk usage
|
||||
type DiskStats struct {
|
||||
Device string
|
||||
Mountpoint string
|
||||
Size uint64
|
||||
Used uint64
|
||||
Available uint64
|
||||
UsedPercent float64
|
||||
InodesUsedPercent float64
|
||||
}
|
||||
|
||||
// HostStatsCollector collects host resource usage stats
|
||||
type HostStatsCollector struct {
|
||||
clkSpeed float64
|
||||
numCores int
|
||||
statsCalculator map[string]*HostCpuStatsCalculator
|
||||
}
|
||||
|
||||
// NewHostStatsCollector returns a HostStatsCollector
|
||||
func NewHostStatsCollector() *HostStatsCollector {
|
||||
numCores := runtime.NumCPU()
|
||||
statsCalculator := make(map[string]*HostCpuStatsCalculator)
|
||||
collector := &HostStatsCollector{
|
||||
statsCalculator: statsCalculator,
|
||||
numCores: numCores,
|
||||
}
|
||||
return collector
|
||||
}
|
||||
|
||||
// Collect collects stats related to resource usage of a host
|
||||
func (h *HostStatsCollector) Collect() (*HostStats, error) {
|
||||
hs := &HostStats{Timestamp: time.Now().UTC().UnixNano()}
|
||||
memStats, err := mem.VirtualMemory()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
hs.Memory = &MemoryStats{
|
||||
Total: memStats.Total,
|
||||
Available: memStats.Available,
|
||||
Used: memStats.Used,
|
||||
Free: memStats.Free,
|
||||
}
|
||||
|
||||
ticksConsumed := 0.0
|
||||
cpuStats, err := cpu.Times(true)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
cs := make([]*CPUStats, len(cpuStats))
|
||||
for idx, cpuStat := range cpuStats {
|
||||
percentCalculator, ok := h.statsCalculator[cpuStat.CPU]
|
||||
if !ok {
|
||||
percentCalculator = NewHostCpuStatsCalculator()
|
||||
h.statsCalculator[cpuStat.CPU] = percentCalculator
|
||||
}
|
||||
idle, user, system, total := percentCalculator.Calculate(cpuStat)
|
||||
cs[idx] = &CPUStats{
|
||||
CPU: cpuStat.CPU,
|
||||
User: user,
|
||||
System: system,
|
||||
Idle: idle,
|
||||
Total: total,
|
||||
}
|
||||
ticksConsumed += (total / 100) * (shelpers.TotalTicksAvailable() / float64(len(cpuStats)))
|
||||
}
|
||||
hs.CPU = cs
|
||||
hs.CPUTicksConsumed = ticksConsumed
|
||||
|
||||
partitions, err := disk.Partitions(false)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
var diskStats []*DiskStats
|
||||
for _, partition := range partitions {
|
||||
usage, err := disk.Usage(partition.Mountpoint)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
ds := DiskStats{
|
||||
Device: partition.Device,
|
||||
Mountpoint: partition.Mountpoint,
|
||||
Size: usage.Total,
|
||||
Used: usage.Used,
|
||||
Available: usage.Free,
|
||||
UsedPercent: usage.UsedPercent,
|
||||
InodesUsedPercent: usage.InodesUsedPercent,
|
||||
}
|
||||
if math.IsNaN(ds.UsedPercent) {
|
||||
ds.UsedPercent = 0.0
|
||||
}
|
||||
if math.IsNaN(ds.InodesUsedPercent) {
|
||||
ds.InodesUsedPercent = 0.0
|
||||
}
|
||||
diskStats = append(diskStats, &ds)
|
||||
}
|
||||
hs.DiskStats = diskStats
|
||||
|
||||
uptime, err := host.Uptime()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
hs.Uptime = uptime
|
||||
|
||||
return hs, nil
|
||||
}
|
||||
|
||||
// HostCpuStatsCalculator calculates cpu usage percentages
|
||||
type HostCpuStatsCalculator struct {
|
||||
prevIdle float64
|
||||
prevUser float64
|
||||
prevSystem float64
|
||||
prevBusy float64
|
||||
prevTotal float64
|
||||
}
|
||||
|
||||
// NewHostCpuStatsCalculator returns a HostCpuStatsCalculator
|
||||
func NewHostCpuStatsCalculator() *HostCpuStatsCalculator {
|
||||
return &HostCpuStatsCalculator{}
|
||||
}
|
||||
|
||||
// Calculate calculates the current cpu usage percentages
|
||||
func (h *HostCpuStatsCalculator) Calculate(times cpu.TimesStat) (idle float64, user float64, system float64, total float64) {
|
||||
currentIdle := times.Idle
|
||||
currentUser := times.User
|
||||
currentSystem := times.System
|
||||
currentTotal := times.Total()
|
||||
|
||||
deltaTotal := currentTotal - h.prevTotal
|
||||
idle = ((currentIdle - h.prevIdle) / deltaTotal) * 100
|
||||
user = ((currentUser - h.prevUser) / deltaTotal) * 100
|
||||
system = ((currentSystem - h.prevSystem) / deltaTotal) * 100
|
||||
|
||||
currentBusy := times.User + times.System + times.Nice + times.Iowait + times.Irq +
|
||||
times.Softirq + times.Steal + times.Guest + times.GuestNice + times.Stolen
|
||||
|
||||
total = ((currentBusy - h.prevBusy) / deltaTotal) * 100
|
||||
|
||||
h.prevIdle = currentIdle
|
||||
h.prevUser = currentUser
|
||||
h.prevSystem = currentSystem
|
||||
h.prevTotal = currentTotal
|
||||
h.prevBusy = currentBusy
|
||||
|
||||
return
|
||||
}
|
|
@ -1,97 +0,0 @@
|
|||
package structs
|
||||
|
||||
// MemoryStats holds memory usage related stats
|
||||
type MemoryStats struct {
|
||||
RSS uint64
|
||||
Cache uint64
|
||||
Swap uint64
|
||||
MaxUsage uint64
|
||||
KernelUsage uint64
|
||||
KernelMaxUsage uint64
|
||||
|
||||
// A list of fields whose values were actually sampled
|
||||
Measured []string
|
||||
}
|
||||
|
||||
func (ms *MemoryStats) Add(other *MemoryStats) {
|
||||
ms.RSS += other.RSS
|
||||
ms.Cache += other.Cache
|
||||
ms.Swap += other.Swap
|
||||
ms.MaxUsage += other.MaxUsage
|
||||
ms.KernelUsage += other.KernelUsage
|
||||
ms.KernelMaxUsage += other.KernelMaxUsage
|
||||
ms.Measured = joinStringSet(ms.Measured, other.Measured)
|
||||
}
|
||||
|
||||
// CpuStats holds cpu usage related stats
|
||||
type CpuStats struct {
|
||||
SystemMode float64
|
||||
UserMode float64
|
||||
TotalTicks float64
|
||||
ThrottledPeriods uint64
|
||||
ThrottledTime uint64
|
||||
Percent float64
|
||||
|
||||
// A list of fields whose values were actually sampled
|
||||
Measured []string
|
||||
}
|
||||
|
||||
func (cs *CpuStats) Add(other *CpuStats) {
|
||||
cs.SystemMode += other.SystemMode
|
||||
cs.UserMode += other.UserMode
|
||||
cs.TotalTicks += other.TotalTicks
|
||||
cs.ThrottledPeriods += other.ThrottledPeriods
|
||||
cs.ThrottledTime += other.ThrottledTime
|
||||
cs.Percent += other.Percent
|
||||
cs.Measured = joinStringSet(cs.Measured, other.Measured)
|
||||
}
|
||||
|
||||
// ResourceUsage holds information related to cpu and memory stats
|
||||
type ResourceUsage struct {
|
||||
MemoryStats *MemoryStats
|
||||
CpuStats *CpuStats
|
||||
}
|
||||
|
||||
func (ru *ResourceUsage) Add(other *ResourceUsage) {
|
||||
ru.MemoryStats.Add(other.MemoryStats)
|
||||
ru.CpuStats.Add(other.CpuStats)
|
||||
}
|
||||
|
||||
// TaskResourceUsage holds aggregated resource usage of all processes in a Task
|
||||
// and the resource usage of the individual pids
|
||||
type TaskResourceUsage struct {
|
||||
ResourceUsage *ResourceUsage
|
||||
Timestamp int64
|
||||
Pids map[string]*ResourceUsage
|
||||
}
|
||||
|
||||
// AllocResourceUsage holds the aggregated task resource usage of the
|
||||
// allocation.
|
||||
type AllocResourceUsage struct {
|
||||
// ResourceUsage is the summation of the task resources
|
||||
ResourceUsage *ResourceUsage
|
||||
|
||||
// Tasks contains the resource usage of each task
|
||||
Tasks map[string]*TaskResourceUsage
|
||||
|
||||
// The max timestamp of all the Tasks
|
||||
Timestamp int64
|
||||
}
|
||||
|
||||
// joinStringSet takes two slices of strings and joins them
|
||||
func joinStringSet(s1, s2 []string) []string {
|
||||
lookup := make(map[string]struct{}, len(s1))
|
||||
j := make([]string, 0, len(s1))
|
||||
for _, s := range s1 {
|
||||
j = append(j, s)
|
||||
lookup[s] = struct{}{}
|
||||
}
|
||||
|
||||
for _, s := range s2 {
|
||||
if _, ok := lookup[s]; !ok {
|
||||
j = append(j, s)
|
||||
}
|
||||
}
|
||||
|
||||
return j
|
||||
}
|
|
@ -1,651 +0,0 @@
|
|||
package client
|
||||
|
||||
import (
|
||||
"crypto/md5"
|
||||
"encoding/hex"
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/armon/go-metrics"
|
||||
|
||||
"github.com/hashicorp/go-multierror"
|
||||
"github.com/hashicorp/nomad/client/config"
|
||||
"github.com/hashicorp/nomad/client/driver"
|
||||
"github.com/hashicorp/nomad/client/getter"
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
|
||||
"github.com/hashicorp/nomad/client/driver/env"
|
||||
dstructs "github.com/hashicorp/nomad/client/driver/structs"
|
||||
cstructs "github.com/hashicorp/nomad/client/structs"
|
||||
)
|
||||
|
||||
const (
|
||||
// killBackoffBaseline is the baseline time for exponential backoff while
|
||||
// killing a task.
|
||||
killBackoffBaseline = 5 * time.Second
|
||||
|
||||
// killBackoffLimit is the the limit of the exponential backoff for killing
|
||||
// the task.
|
||||
killBackoffLimit = 2 * time.Minute
|
||||
|
||||
// killFailureLimit is how many times we will attempt to kill a task before
|
||||
// giving up and potentially leaking resources.
|
||||
killFailureLimit = 5
|
||||
)
|
||||
|
||||
// TaskRunner is used to wrap a task within an allocation and provide the execution context.
|
||||
type TaskRunner struct {
|
||||
config *config.Config
|
||||
updater TaskStateUpdater
|
||||
logger *log.Logger
|
||||
ctx *driver.ExecContext
|
||||
alloc *structs.Allocation
|
||||
restartTracker *RestartTracker
|
||||
|
||||
// running marks whether the task is running
|
||||
running bool
|
||||
runningLock sync.Mutex
|
||||
|
||||
resourceUsage *cstructs.TaskResourceUsage
|
||||
resourceUsageLock sync.RWMutex
|
||||
|
||||
task *structs.Task
|
||||
taskEnv *env.TaskEnvironment
|
||||
updateCh chan *structs.Allocation
|
||||
|
||||
handle driver.DriverHandle
|
||||
handleLock sync.Mutex
|
||||
|
||||
// artifactsDownloaded tracks whether the tasks artifacts have been
|
||||
// downloaded
|
||||
artifactsDownloaded bool
|
||||
|
||||
destroy bool
|
||||
destroyCh chan struct{}
|
||||
destroyLock sync.Mutex
|
||||
waitCh chan struct{}
|
||||
}
|
||||
|
||||
// taskRunnerState is used to snapshot the state of the task runner
|
||||
type taskRunnerState struct {
|
||||
Version string
|
||||
Task *structs.Task
|
||||
HandleID string
|
||||
ArtifactDownloaded bool
|
||||
}
|
||||
|
||||
// TaskStateUpdater is used to signal that tasks state has changed.
|
||||
type TaskStateUpdater func(taskName, state string, event *structs.TaskEvent)
|
||||
|
||||
// NewTaskRunner is used to create a new task context
|
||||
func NewTaskRunner(logger *log.Logger, config *config.Config,
|
||||
updater TaskStateUpdater, ctx *driver.ExecContext,
|
||||
alloc *structs.Allocation, task *structs.Task) *TaskRunner {
|
||||
|
||||
// Merge in the task resources
|
||||
task.Resources = alloc.TaskResources[task.Name]
|
||||
|
||||
// Build the restart tracker.
|
||||
tg := alloc.Job.LookupTaskGroup(alloc.TaskGroup)
|
||||
if tg == nil {
|
||||
logger.Printf("[ERR] client: alloc '%s' for missing task group '%s'", alloc.ID, alloc.TaskGroup)
|
||||
return nil
|
||||
}
|
||||
restartTracker := newRestartTracker(tg.RestartPolicy, alloc.Job.Type)
|
||||
|
||||
tc := &TaskRunner{
|
||||
config: config,
|
||||
updater: updater,
|
||||
logger: logger,
|
||||
restartTracker: restartTracker,
|
||||
ctx: ctx,
|
||||
alloc: alloc,
|
||||
task: task,
|
||||
updateCh: make(chan *structs.Allocation, 64),
|
||||
destroyCh: make(chan struct{}),
|
||||
waitCh: make(chan struct{}),
|
||||
}
|
||||
|
||||
return tc
|
||||
}
|
||||
|
||||
// MarkReceived marks the task as received.
|
||||
func (r *TaskRunner) MarkReceived() {
|
||||
r.updater(r.task.Name, structs.TaskStatePending, structs.NewTaskEvent(structs.TaskReceived))
|
||||
}
|
||||
|
||||
// WaitCh returns a channel to wait for termination
|
||||
func (r *TaskRunner) WaitCh() <-chan struct{} {
|
||||
return r.waitCh
|
||||
}
|
||||
|
||||
// stateFilePath returns the path to our state file
|
||||
func (r *TaskRunner) stateFilePath() string {
|
||||
// Get the MD5 of the task name
|
||||
hashVal := md5.Sum([]byte(r.task.Name))
|
||||
hashHex := hex.EncodeToString(hashVal[:])
|
||||
dirName := fmt.Sprintf("task-%s", hashHex)
|
||||
|
||||
// Generate the path
|
||||
path := filepath.Join(r.config.StateDir, "alloc", r.alloc.ID,
|
||||
dirName, "state.json")
|
||||
return path
|
||||
}
|
||||
|
||||
// RestoreState is used to restore our state
|
||||
func (r *TaskRunner) RestoreState() error {
|
||||
// Load the snapshot
|
||||
var snap taskRunnerState
|
||||
if err := restoreState(r.stateFilePath(), &snap); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Restore fields
|
||||
if snap.Task == nil {
|
||||
return fmt.Errorf("task runner snapshot include nil Task")
|
||||
} else {
|
||||
r.task = snap.Task
|
||||
}
|
||||
r.artifactsDownloaded = snap.ArtifactDownloaded
|
||||
|
||||
if err := r.setTaskEnv(); err != nil {
|
||||
return fmt.Errorf("client: failed to create task environment for task %q in allocation %q: %v",
|
||||
r.task.Name, r.alloc.ID, err)
|
||||
}
|
||||
|
||||
// Restore the driver
|
||||
if snap.HandleID != "" {
|
||||
driver, err := r.createDriver()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
handle, err := driver.Open(r.ctx, snap.HandleID)
|
||||
|
||||
// In the case it fails, we relaunch the task in the Run() method.
|
||||
if err != nil {
|
||||
r.logger.Printf("[ERR] client: failed to open handle to task '%s' for alloc '%s': %v",
|
||||
r.task.Name, r.alloc.ID, err)
|
||||
return nil
|
||||
}
|
||||
r.handleLock.Lock()
|
||||
r.handle = handle
|
||||
r.handleLock.Unlock()
|
||||
|
||||
r.runningLock.Lock()
|
||||
r.running = true
|
||||
r.runningLock.Unlock()
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// SaveState is used to snapshot our state
|
||||
func (r *TaskRunner) SaveState() error {
|
||||
snap := taskRunnerState{
|
||||
Task: r.task,
|
||||
Version: r.config.Version,
|
||||
ArtifactDownloaded: r.artifactsDownloaded,
|
||||
}
|
||||
r.handleLock.Lock()
|
||||
if r.handle != nil {
|
||||
snap.HandleID = r.handle.ID()
|
||||
}
|
||||
r.handleLock.Unlock()
|
||||
return persistState(r.stateFilePath(), &snap)
|
||||
}
|
||||
|
||||
// DestroyState is used to cleanup after ourselves
|
||||
func (r *TaskRunner) DestroyState() error {
|
||||
return os.RemoveAll(r.stateFilePath())
|
||||
}
|
||||
|
||||
// setState is used to update the state of the task runner
|
||||
func (r *TaskRunner) setState(state string, event *structs.TaskEvent) {
|
||||
// Persist our state to disk.
|
||||
if err := r.SaveState(); err != nil {
|
||||
r.logger.Printf("[ERR] client: failed to save state of Task Runner for task %q: %v", r.task.Name, err)
|
||||
}
|
||||
|
||||
// Indicate the task has been updated.
|
||||
r.updater(r.task.Name, state, event)
|
||||
}
|
||||
|
||||
// setTaskEnv sets the task environment. It returns an error if it could not be
|
||||
// created.
|
||||
func (r *TaskRunner) setTaskEnv() error {
|
||||
taskEnv, err := driver.GetTaskEnv(r.ctx.AllocDir, r.config.Node, r.task.Copy(), r.alloc)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
r.taskEnv = taskEnv
|
||||
return nil
|
||||
}
|
||||
|
||||
// createDriver makes a driver for the task
|
||||
func (r *TaskRunner) createDriver() (driver.Driver, error) {
|
||||
if r.taskEnv == nil {
|
||||
return nil, fmt.Errorf("task environment not made for task %q in allocation %q", r.task.Name, r.alloc.ID)
|
||||
}
|
||||
|
||||
driverCtx := driver.NewDriverContext(r.task.Name, r.config, r.config.Node, r.logger, r.taskEnv)
|
||||
driver, err := driver.NewDriver(r.task.Driver, driverCtx)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create driver '%s' for alloc %s: %v",
|
||||
r.task.Driver, r.alloc.ID, err)
|
||||
}
|
||||
return driver, err
|
||||
}
|
||||
|
||||
// Run is a long running routine used to manage the task
|
||||
func (r *TaskRunner) Run() {
|
||||
defer close(r.waitCh)
|
||||
r.logger.Printf("[DEBUG] client: starting task context for '%s' (alloc '%s')",
|
||||
r.task.Name, r.alloc.ID)
|
||||
|
||||
if err := r.validateTask(); err != nil {
|
||||
r.setState(
|
||||
structs.TaskStateDead,
|
||||
structs.NewTaskEvent(structs.TaskFailedValidation).SetValidationError(err))
|
||||
return
|
||||
}
|
||||
|
||||
if err := r.setTaskEnv(); err != nil {
|
||||
r.setState(
|
||||
structs.TaskStateDead,
|
||||
structs.NewTaskEvent(structs.TaskDriverFailure).SetDriverError(err))
|
||||
return
|
||||
}
|
||||
|
||||
r.run()
|
||||
return
|
||||
}
|
||||
|
||||
// validateTask validates the fields of the task and returns an error if the
|
||||
// task is invalid.
|
||||
func (r *TaskRunner) validateTask() error {
|
||||
var mErr multierror.Error
|
||||
|
||||
// Validate the user.
|
||||
unallowedUsers := r.config.ReadStringListToMapDefault("user.blacklist", config.DefaultUserBlacklist)
|
||||
checkDrivers := r.config.ReadStringListToMapDefault("user.checked_drivers", config.DefaultUserCheckedDrivers)
|
||||
if _, driverMatch := checkDrivers[r.task.Driver]; driverMatch {
|
||||
if _, unallowed := unallowedUsers[r.task.User]; unallowed {
|
||||
mErr.Errors = append(mErr.Errors, fmt.Errorf("running as user %q is disallowed", r.task.User))
|
||||
}
|
||||
}
|
||||
|
||||
// Validate the artifacts
|
||||
for i, artifact := range r.task.Artifacts {
|
||||
// Verify the artifact doesn't escape the task directory.
|
||||
if err := artifact.Validate(); err != nil {
|
||||
// If this error occurs there is potentially a server bug or
|
||||
// mallicious, server spoofing.
|
||||
r.logger.Printf("[ERR] client: allocation %q, task %v, artifact %#v (%v) fails validation: %v",
|
||||
r.alloc.ID, r.task.Name, artifact, i, err)
|
||||
mErr.Errors = append(mErr.Errors, fmt.Errorf("artifact (%d) failed validation: %v", i, err))
|
||||
}
|
||||
}
|
||||
|
||||
if len(mErr.Errors) == 1 {
|
||||
return mErr.Errors[0]
|
||||
}
|
||||
return mErr.ErrorOrNil()
|
||||
}
|
||||
|
||||
func (r *TaskRunner) run() {
|
||||
// Predeclare things so we an jump to the RESTART
|
||||
var handleEmpty bool
|
||||
var stopCollection chan struct{}
|
||||
|
||||
for {
|
||||
// Download the task's artifacts
|
||||
if !r.artifactsDownloaded && len(r.task.Artifacts) > 0 {
|
||||
r.setState(structs.TaskStatePending, structs.NewTaskEvent(structs.TaskDownloadingArtifacts))
|
||||
taskDir, ok := r.ctx.AllocDir.TaskDirs[r.task.Name]
|
||||
if !ok {
|
||||
err := fmt.Errorf("task directory couldn't be found")
|
||||
r.setState(structs.TaskStateDead, structs.NewTaskEvent(structs.TaskDriverFailure).SetDriverError(err))
|
||||
r.logger.Printf("[ERR] client: task directory for alloc %q task %q couldn't be found", r.alloc.ID, r.task.Name)
|
||||
r.restartTracker.SetStartError(err)
|
||||
goto RESTART
|
||||
}
|
||||
|
||||
for _, artifact := range r.task.Artifacts {
|
||||
if err := getter.GetArtifact(r.taskEnv, artifact, taskDir); err != nil {
|
||||
r.setState(structs.TaskStateDead,
|
||||
structs.NewTaskEvent(structs.TaskArtifactDownloadFailed).SetDownloadError(err))
|
||||
r.restartTracker.SetStartError(dstructs.NewRecoverableError(err, true))
|
||||
goto RESTART
|
||||
}
|
||||
}
|
||||
|
||||
r.artifactsDownloaded = true
|
||||
}
|
||||
|
||||
// Start the task if not yet started or it is being forced. This logic
|
||||
// is necessary because in the case of a restore the handle already
|
||||
// exists.
|
||||
r.handleLock.Lock()
|
||||
handleEmpty = r.handle == nil
|
||||
r.handleLock.Unlock()
|
||||
|
||||
if handleEmpty {
|
||||
startErr := r.startTask()
|
||||
r.restartTracker.SetStartError(startErr)
|
||||
if startErr != nil {
|
||||
r.setState(structs.TaskStateDead, structs.NewTaskEvent(structs.TaskDriverFailure).SetDriverError(startErr))
|
||||
goto RESTART
|
||||
}
|
||||
|
||||
// Mark the task as started
|
||||
r.setState(structs.TaskStateRunning, structs.NewTaskEvent(structs.TaskStarted))
|
||||
r.runningLock.Lock()
|
||||
r.running = true
|
||||
r.runningLock.Unlock()
|
||||
}
|
||||
|
||||
if stopCollection == nil {
|
||||
stopCollection = make(chan struct{})
|
||||
go r.collectResourceUsageStats(stopCollection)
|
||||
}
|
||||
|
||||
// Wait for updates
|
||||
WAIT:
|
||||
for {
|
||||
select {
|
||||
case waitRes := <-r.handle.WaitCh():
|
||||
if waitRes == nil {
|
||||
panic("nil wait")
|
||||
}
|
||||
|
||||
r.runningLock.Lock()
|
||||
r.running = false
|
||||
r.runningLock.Unlock()
|
||||
|
||||
// Stop collection of the task's resource usage
|
||||
close(stopCollection)
|
||||
|
||||
// Log whether the task was successful or not.
|
||||
r.restartTracker.SetWaitResult(waitRes)
|
||||
r.setState(structs.TaskStateDead, r.waitErrorToEvent(waitRes))
|
||||
if !waitRes.Successful() {
|
||||
r.logger.Printf("[INFO] client: task %q for alloc %q failed: %v", r.task.Name, r.alloc.ID, waitRes)
|
||||
} else {
|
||||
r.logger.Printf("[INFO] client: task %q for alloc %q completed successfully", r.task.Name, r.alloc.ID)
|
||||
}
|
||||
|
||||
break WAIT
|
||||
case update := <-r.updateCh:
|
||||
if err := r.handleUpdate(update); err != nil {
|
||||
r.logger.Printf("[ERR] client: update to task %q failed: %v", r.task.Name, err)
|
||||
}
|
||||
case <-r.destroyCh:
|
||||
// Mark that we received the kill event
|
||||
timeout := driver.GetKillTimeout(r.task.KillTimeout, r.config.MaxKillTimeout)
|
||||
r.setState(structs.TaskStateRunning,
|
||||
structs.NewTaskEvent(structs.TaskKilling).SetKillTimeout(timeout))
|
||||
|
||||
// Kill the task using an exponential backoff in-case of failures.
|
||||
destroySuccess, err := r.handleDestroy()
|
||||
if !destroySuccess {
|
||||
// We couldn't successfully destroy the resource created.
|
||||
r.logger.Printf("[ERR] client: failed to kill task %q. Resources may have been leaked: %v", r.task.Name, err)
|
||||
}
|
||||
|
||||
// Stop collection of the task's resource usage
|
||||
close(stopCollection)
|
||||
|
||||
// Store that the task has been destroyed and any associated error.
|
||||
r.setState(structs.TaskStateDead, structs.NewTaskEvent(structs.TaskKilled).SetKillError(err))
|
||||
|
||||
r.runningLock.Lock()
|
||||
r.running = false
|
||||
r.runningLock.Unlock()
|
||||
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
RESTART:
|
||||
state, when := r.restartTracker.GetState()
|
||||
r.restartTracker.SetStartError(nil).SetWaitResult(nil)
|
||||
reason := r.restartTracker.GetReason()
|
||||
switch state {
|
||||
case structs.TaskNotRestarting, structs.TaskTerminated:
|
||||
r.logger.Printf("[INFO] client: Not restarting task: %v for alloc: %v ", r.task.Name, r.alloc.ID)
|
||||
if state == structs.TaskNotRestarting {
|
||||
r.setState(structs.TaskStateDead,
|
||||
structs.NewTaskEvent(structs.TaskNotRestarting).
|
||||
SetRestartReason(reason))
|
||||
}
|
||||
return
|
||||
case structs.TaskRestarting:
|
||||
r.logger.Printf("[INFO] client: Restarting task %q for alloc %q in %v", r.task.Name, r.alloc.ID, when)
|
||||
r.setState(structs.TaskStatePending,
|
||||
structs.NewTaskEvent(structs.TaskRestarting).
|
||||
SetRestartDelay(when).
|
||||
SetRestartReason(reason))
|
||||
default:
|
||||
r.logger.Printf("[ERR] client: restart tracker returned unknown state: %q", state)
|
||||
return
|
||||
}
|
||||
|
||||
// Sleep but watch for destroy events.
|
||||
select {
|
||||
case <-time.After(when):
|
||||
case <-r.destroyCh:
|
||||
}
|
||||
|
||||
// Destroyed while we were waiting to restart, so abort.
|
||||
r.destroyLock.Lock()
|
||||
destroyed := r.destroy
|
||||
r.destroyLock.Unlock()
|
||||
if destroyed {
|
||||
r.logger.Printf("[DEBUG] client: Not restarting task: %v because it's destroyed by user", r.task.Name)
|
||||
r.setState(structs.TaskStateDead, structs.NewTaskEvent(structs.TaskKilled))
|
||||
return
|
||||
}
|
||||
|
||||
// Clear the handle so a new driver will be created.
|
||||
r.handleLock.Lock()
|
||||
r.handle = nil
|
||||
stopCollection = nil
|
||||
r.handleLock.Unlock()
|
||||
}
|
||||
}
|
||||
|
||||
// startTask creates the driver and start the task.
|
||||
func (r *TaskRunner) startTask() error {
|
||||
// Create a driver
|
||||
driver, err := r.createDriver()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create driver of task '%s' for alloc '%s': %v",
|
||||
r.task.Name, r.alloc.ID, err)
|
||||
}
|
||||
|
||||
// Start the job
|
||||
handle, err := driver.Start(r.ctx, r.task)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to start task '%s' for alloc '%s': %v",
|
||||
r.task.Name, r.alloc.ID, err)
|
||||
}
|
||||
|
||||
r.handleLock.Lock()
|
||||
r.handle = handle
|
||||
r.handleLock.Unlock()
|
||||
return nil
|
||||
}
|
||||
|
||||
// collectResourceUsageStats starts collecting resource usage stats of a Task.
|
||||
// Collection ends when the passed channel is closed
|
||||
func (r *TaskRunner) collectResourceUsageStats(stopCollection <-chan struct{}) {
|
||||
// start collecting the stats right away and then start collecting every
|
||||
// collection interval
|
||||
next := time.NewTimer(0)
|
||||
defer next.Stop()
|
||||
for {
|
||||
select {
|
||||
case <-next.C:
|
||||
ru, err := r.handle.Stats()
|
||||
next.Reset(r.config.StatsCollectionInterval)
|
||||
|
||||
if err != nil {
|
||||
// We do not log when the plugin is shutdown as this is simply a
|
||||
// race between the stopCollection channel being closed and calling
|
||||
// Stats on the handle.
|
||||
if !strings.Contains(err.Error(), "connection is shut down") {
|
||||
r.logger.Printf("[WARN] client: error fetching stats of task %v: %v", r.task.Name, err)
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
r.resourceUsageLock.Lock()
|
||||
r.resourceUsage = ru
|
||||
r.resourceUsageLock.Unlock()
|
||||
r.emitStats(ru)
|
||||
case <-stopCollection:
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// LatestResourceUsage returns the last resource utilization datapoint collected
|
||||
func (r *TaskRunner) LatestResourceUsage() *cstructs.TaskResourceUsage {
|
||||
r.resourceUsageLock.RLock()
|
||||
defer r.resourceUsageLock.RUnlock()
|
||||
r.runningLock.Lock()
|
||||
defer r.runningLock.Unlock()
|
||||
|
||||
// If the task is not running there can be no latest resource
|
||||
if !r.running {
|
||||
return nil
|
||||
}
|
||||
|
||||
return r.resourceUsage
|
||||
}
|
||||
|
||||
// handleUpdate takes an updated allocation and updates internal state to
|
||||
// reflect the new config for the task.
|
||||
func (r *TaskRunner) handleUpdate(update *structs.Allocation) error {
|
||||
// Extract the task group from the alloc.
|
||||
tg := update.Job.LookupTaskGroup(update.TaskGroup)
|
||||
if tg == nil {
|
||||
return fmt.Errorf("alloc '%s' missing task group '%s'", update.ID, update.TaskGroup)
|
||||
}
|
||||
|
||||
// Extract the task.
|
||||
var updatedTask *structs.Task
|
||||
for _, t := range tg.Tasks {
|
||||
if t.Name == r.task.Name {
|
||||
updatedTask = t
|
||||
}
|
||||
}
|
||||
if updatedTask == nil {
|
||||
return fmt.Errorf("task group %q doesn't contain task %q", tg.Name, r.task.Name)
|
||||
}
|
||||
|
||||
// Merge in the task resources
|
||||
updatedTask.Resources = update.TaskResources[updatedTask.Name]
|
||||
|
||||
// Update will update resources and store the new kill timeout.
|
||||
var mErr multierror.Error
|
||||
r.handleLock.Lock()
|
||||
if r.handle != nil {
|
||||
if err := r.handle.Update(updatedTask); err != nil {
|
||||
mErr.Errors = append(mErr.Errors, fmt.Errorf("updating task resources failed: %v", err))
|
||||
}
|
||||
}
|
||||
r.handleLock.Unlock()
|
||||
|
||||
// Update the restart policy.
|
||||
if r.restartTracker != nil {
|
||||
r.restartTracker.SetPolicy(tg.RestartPolicy)
|
||||
}
|
||||
|
||||
// Store the updated alloc.
|
||||
r.alloc = update
|
||||
r.task = updatedTask
|
||||
return mErr.ErrorOrNil()
|
||||
}
|
||||
|
||||
// handleDestroy kills the task handle. In the case that killing fails,
|
||||
// handleDestroy will retry with an exponential backoff and will give up at a
|
||||
// given limit. It returns whether the task was destroyed and the error
|
||||
// associated with the last kill attempt.
|
||||
func (r *TaskRunner) handleDestroy() (destroyed bool, err error) {
|
||||
// Cap the number of times we attempt to kill the task.
|
||||
for i := 0; i < killFailureLimit; i++ {
|
||||
if err = r.handle.Kill(); err != nil {
|
||||
// Calculate the new backoff
|
||||
backoff := (1 << (2 * uint64(i))) * killBackoffBaseline
|
||||
if backoff > killBackoffLimit {
|
||||
backoff = killBackoffLimit
|
||||
}
|
||||
|
||||
r.logger.Printf("[ERR] client: failed to kill task '%s' for alloc %q. Retrying in %v: %v",
|
||||
r.task.Name, r.alloc.ID, backoff, err)
|
||||
time.Sleep(time.Duration(backoff))
|
||||
} else {
|
||||
// Kill was successful
|
||||
return true, nil
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// Helper function for converting a WaitResult into a TaskTerminated event.
|
||||
func (r *TaskRunner) waitErrorToEvent(res *dstructs.WaitResult) *structs.TaskEvent {
|
||||
return structs.NewTaskEvent(structs.TaskTerminated).
|
||||
SetExitCode(res.ExitCode).
|
||||
SetSignal(res.Signal).
|
||||
SetExitMessage(res.Err)
|
||||
}
|
||||
|
||||
// Update is used to update the task of the context
|
||||
func (r *TaskRunner) Update(update *structs.Allocation) {
|
||||
select {
|
||||
case r.updateCh <- update:
|
||||
default:
|
||||
r.logger.Printf("[ERR] client: dropping task update '%s' (alloc '%s')",
|
||||
r.task.Name, r.alloc.ID)
|
||||
}
|
||||
}
|
||||
|
||||
// Destroy is used to indicate that the task context should be destroyed
|
||||
func (r *TaskRunner) Destroy() {
|
||||
r.destroyLock.Lock()
|
||||
defer r.destroyLock.Unlock()
|
||||
|
||||
if r.destroy {
|
||||
return
|
||||
}
|
||||
r.destroy = true
|
||||
close(r.destroyCh)
|
||||
}
|
||||
|
||||
// emitStats emits resource usage stats of tasks to remote metrics collector
|
||||
// sinks
|
||||
func (r *TaskRunner) emitStats(ru *cstructs.TaskResourceUsage) {
|
||||
if ru.ResourceUsage.MemoryStats != nil && r.config.PublishAllocationMetrics {
|
||||
metrics.SetGauge([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, r.alloc.ID, r.task.Name, "memory", "rss"}, float32(ru.ResourceUsage.MemoryStats.RSS))
|
||||
metrics.SetGauge([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, r.alloc.ID, r.task.Name, "memory", "cache"}, float32(ru.ResourceUsage.MemoryStats.Cache))
|
||||
metrics.SetGauge([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, r.alloc.ID, r.task.Name, "memory", "swap"}, float32(ru.ResourceUsage.MemoryStats.Swap))
|
||||
metrics.SetGauge([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, r.alloc.ID, r.task.Name, "memory", "max_usage"}, float32(ru.ResourceUsage.MemoryStats.MaxUsage))
|
||||
metrics.SetGauge([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, r.alloc.ID, r.task.Name, "memory", "kernel_usage"}, float32(ru.ResourceUsage.MemoryStats.KernelUsage))
|
||||
metrics.SetGauge([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, r.alloc.ID, r.task.Name, "memory", "kernel_max_usage"}, float32(ru.ResourceUsage.MemoryStats.KernelMaxUsage))
|
||||
}
|
||||
|
||||
if ru.ResourceUsage.CpuStats != nil && r.config.PublishAllocationMetrics {
|
||||
metrics.SetGauge([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, r.alloc.ID, r.task.Name, "cpu", "total_percent"}, float32(ru.ResourceUsage.CpuStats.Percent))
|
||||
metrics.SetGauge([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, r.alloc.ID, r.task.Name, "cpu", "system"}, float32(ru.ResourceUsage.CpuStats.SystemMode))
|
||||
metrics.SetGauge([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, r.alloc.ID, r.task.Name, "cpu", "user"}, float32(ru.ResourceUsage.CpuStats.UserMode))
|
||||
metrics.SetGauge([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, r.alloc.ID, r.task.Name, "cpu", "throttled_time"}, float32(ru.ResourceUsage.CpuStats.ThrottledTime))
|
||||
metrics.SetGauge([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, r.alloc.ID, r.task.Name, "cpu", "throttled_periods"}, float32(ru.ResourceUsage.CpuStats.ThrottledPeriods))
|
||||
metrics.SetGauge([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, r.alloc.ID, r.task.Name, "cpu", "total_ticks"}, float32(ru.ResourceUsage.CpuStats.TotalTicks))
|
||||
}
|
||||
}
|
|
@ -1,25 +0,0 @@
|
|||
package testutil
|
||||
|
||||
import (
|
||||
docker "github.com/fsouza/go-dockerclient"
|
||||
"testing"
|
||||
)
|
||||
|
||||
// DockerIsConnected checks to see if a docker daemon is available (local or remote)
|
||||
func DockerIsConnected(t *testing.T) bool {
|
||||
client, err := docker.NewClientFromEnv()
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
// Creating a client doesn't actually connect, so make sure we do something
|
||||
// like call Version() on it.
|
||||
env, err := client.Version()
|
||||
if err != nil {
|
||||
t.Logf("Failed to connect to docker daemon: %s", err)
|
||||
return false
|
||||
}
|
||||
|
||||
t.Logf("Successfully connected to docker daemon running version %s", env.Get("Version"))
|
||||
return true
|
||||
}
|
|
@ -1,53 +0,0 @@
|
|||
package testutil
|
||||
|
||||
import (
|
||||
"os/exec"
|
||||
"runtime"
|
||||
"syscall"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func ExecCompatible(t *testing.T) {
|
||||
if runtime.GOOS != "linux" || syscall.Geteuid() != 0 {
|
||||
t.Skip("Test only available running as root on linux")
|
||||
}
|
||||
}
|
||||
|
||||
func JavaCompatible(t *testing.T) {
|
||||
if runtime.GOOS == "linux" && syscall.Geteuid() != 0 {
|
||||
t.Skip("Test only available when running as root on linux")
|
||||
}
|
||||
}
|
||||
|
||||
func QemuCompatible(t *testing.T) {
|
||||
// Check if qemu exists
|
||||
bin := "qemu-system-x86_64"
|
||||
if runtime.GOOS == "windows" {
|
||||
bin = "qemu-img"
|
||||
}
|
||||
_, err := exec.Command(bin, "--version").CombinedOutput()
|
||||
if err != nil {
|
||||
t.Skip("Must have Qemu installed for Qemu specific tests to run")
|
||||
}
|
||||
}
|
||||
|
||||
func RktCompatible(t *testing.T) {
|
||||
if runtime.GOOS == "windows" || syscall.Geteuid() != 0 {
|
||||
t.Skip("Must be root on non-windows environments to run test")
|
||||
}
|
||||
// else see if rkt exists
|
||||
_, err := exec.Command("rkt", "version").CombinedOutput()
|
||||
if err != nil {
|
||||
t.Skip("Must have rkt installed for rkt specific tests to run")
|
||||
}
|
||||
}
|
||||
|
||||
func MountCompatible(t *testing.T) {
|
||||
if runtime.GOOS == "windows" {
|
||||
t.Skip("Windows does not support mount")
|
||||
}
|
||||
|
||||
if syscall.Geteuid() != 0 {
|
||||
t.Skip("Must be root to run test")
|
||||
}
|
||||
}
|
|
@ -1,111 +0,0 @@
|
|||
package client
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"math/rand"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
)
|
||||
|
||||
type allocTuple struct {
|
||||
exist, updated *structs.Allocation
|
||||
}
|
||||
|
||||
// diffResult is used to return the sets that result from a diff
|
||||
type diffResult struct {
|
||||
added []*structs.Allocation
|
||||
removed []*structs.Allocation
|
||||
updated []allocTuple
|
||||
ignore []*structs.Allocation
|
||||
}
|
||||
|
||||
func (d *diffResult) GoString() string {
|
||||
return fmt.Sprintf("allocs: (added %d) (removed %d) (updated %d) (ignore %d)",
|
||||
len(d.added), len(d.removed), len(d.updated), len(d.ignore))
|
||||
}
|
||||
|
||||
// diffAllocs is used to diff the existing and updated allocations
|
||||
// to see what has happened.
|
||||
func diffAllocs(existing []*structs.Allocation, allocs *allocUpdates) *diffResult {
|
||||
// Scan the existing allocations
|
||||
result := &diffResult{}
|
||||
existIdx := make(map[string]struct{})
|
||||
for _, exist := range existing {
|
||||
// Mark this as existing
|
||||
existIdx[exist.ID] = struct{}{}
|
||||
|
||||
// Check if the alloc was updated or filtered because an update wasn't
|
||||
// needed.
|
||||
alloc, pulled := allocs.pulled[exist.ID]
|
||||
_, filtered := allocs.filtered[exist.ID]
|
||||
|
||||
// If not updated or filtered, removed
|
||||
if !pulled && !filtered {
|
||||
result.removed = append(result.removed, exist)
|
||||
continue
|
||||
}
|
||||
|
||||
// Check for an update
|
||||
if pulled && alloc.AllocModifyIndex > exist.AllocModifyIndex {
|
||||
result.updated = append(result.updated, allocTuple{exist, alloc})
|
||||
continue
|
||||
}
|
||||
|
||||
// Ignore this
|
||||
result.ignore = append(result.ignore, exist)
|
||||
}
|
||||
|
||||
// Scan the updated allocations for any that are new
|
||||
for id, pulled := range allocs.pulled {
|
||||
if _, ok := existIdx[id]; !ok {
|
||||
result.added = append(result.added, pulled)
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// shuffleStrings randomly shuffles the list of strings
|
||||
func shuffleStrings(list []string) {
|
||||
for i := range list {
|
||||
j := rand.Intn(i + 1)
|
||||
list[i], list[j] = list[j], list[i]
|
||||
}
|
||||
}
|
||||
|
||||
// persistState is used to help with saving state
|
||||
func persistState(path string, data interface{}) error {
|
||||
buf, err := json.Marshal(data)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to encode state: %v", err)
|
||||
}
|
||||
if err := os.MkdirAll(filepath.Dir(path), 0700); err != nil {
|
||||
return fmt.Errorf("failed to make dirs for %s: %v", path, err)
|
||||
}
|
||||
tmpPath := path + ".tmp"
|
||||
if err := ioutil.WriteFile(tmpPath, buf, 0600); err != nil {
|
||||
return fmt.Errorf("failed to save state to tmp: %v", err)
|
||||
}
|
||||
if err := os.Rename(tmpPath, path); err != nil {
|
||||
return fmt.Errorf("failed to rename tmp to path: %v", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// restoreState is used to read back in the persisted state
|
||||
func restoreState(path string, data interface{}) error {
|
||||
buf, err := ioutil.ReadFile(path)
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
return nil
|
||||
}
|
||||
return fmt.Errorf("failed to read state: %v", err)
|
||||
}
|
||||
if err := json.Unmarshal(buf, data); err != nil {
|
||||
return fmt.Errorf("failed to decode state: %v", err)
|
||||
}
|
||||
return nil
|
||||
}
|
|
@ -1,67 +0,0 @@
|
|||
package stats
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"math"
|
||||
"sync"
|
||||
|
||||
"github.com/shirou/gopsutil/cpu"
|
||||
)
|
||||
|
||||
var (
|
||||
cpuMhzPerCore float64
|
||||
cpuModelName string
|
||||
cpuNumCores int
|
||||
cpuTotalTicks float64
|
||||
|
||||
onceLer sync.Once
|
||||
)
|
||||
|
||||
func Init() error {
|
||||
var err error
|
||||
onceLer.Do(func() {
|
||||
if cpuNumCores, err = cpu.Counts(true); err != nil {
|
||||
err = fmt.Errorf("Unable to determine the number of CPU cores available: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
var cpuInfo []cpu.InfoStat
|
||||
if cpuInfo, err = cpu.Info(); err != nil {
|
||||
err = fmt.Errorf("Unable to obtain CPU information: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
for _, cpu := range cpuInfo {
|
||||
cpuModelName = cpu.ModelName
|
||||
cpuMhzPerCore = cpu.Mhz
|
||||
break
|
||||
}
|
||||
|
||||
// Floor all of the values such that small difference don't cause the
|
||||
// node to fall into a unique computed node class
|
||||
cpuMhzPerCore = math.Floor(cpuMhzPerCore)
|
||||
cpuTotalTicks = math.Floor(float64(cpuNumCores) * cpuMhzPerCore)
|
||||
})
|
||||
return err
|
||||
}
|
||||
|
||||
// CPUModelName returns the number of CPU cores available
|
||||
func CPUNumCores() int {
|
||||
return cpuNumCores
|
||||
}
|
||||
|
||||
// CPUMHzPerCore returns the MHz per CPU core
|
||||
func CPUMHzPerCore() float64 {
|
||||
return cpuMhzPerCore
|
||||
}
|
||||
|
||||
// CPUModelName returns the model name of the CPU
|
||||
func CPUModelName() string {
|
||||
return cpuModelName
|
||||
}
|
||||
|
||||
// TotalTicksAvailable calculates the total frequency available across all
|
||||
// cores
|
||||
func TotalTicksAvailable() float64 {
|
||||
return cpuTotalTicks
|
||||
}
|
|
@ -13,7 +13,6 @@ import (
|
|||
"github.com/hashicorp/go-multierror"
|
||||
"github.com/hashicorp/hcl"
|
||||
"github.com/hashicorp/hcl/hcl/ast"
|
||||
"github.com/hashicorp/nomad/client/driver"
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
"github.com/mitchellh/mapstructure"
|
||||
)
|
||||
|
@ -538,22 +537,6 @@ func parseTasks(jobName string, taskGroupName string, result *[]*structs.Task, l
|
|||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// Instantiate a driver to validate the configuration
|
||||
d, err := driver.NewDriver(
|
||||
t.Driver,
|
||||
driver.NewEmptyDriverContext(),
|
||||
)
|
||||
|
||||
if err != nil {
|
||||
return multierror.Prefix(err,
|
||||
fmt.Sprintf("'%s', config ->", n))
|
||||
}
|
||||
|
||||
if err := d.Validate(t.Config); err != nil {
|
||||
return multierror.Prefix(err,
|
||||
fmt.Sprintf("'%s', config ->", n))
|
||||
}
|
||||
}
|
||||
|
||||
// Parse constraints
|
||||
|
|
|
@ -1,21 +0,0 @@
|
|||
The MIT License (MIT)
|
||||
|
||||
Copyright (c) 2014 Mitchell Hashimoto
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
|
@ -1,34 +0,0 @@
|
|||
# Process List Library for Go
|
||||
|
||||
go-ps is a library for Go that implements OS-specific APIs to list and
|
||||
manipulate processes in a platform-safe way. The library can find and
|
||||
list processes on Linux, Mac OS X, and Windows.
|
||||
|
||||
If you're new to Go, this library has a good amount of advanced Go educational
|
||||
value as well. It uses some advanced features of Go: build tags, accessing
|
||||
DLL methods for Windows, cgo for Darwin, etc.
|
||||
|
||||
How it works:
|
||||
|
||||
* **Darwin** uses the `sysctl` syscall to retrieve the process table.
|
||||
* **Unix** uses the procfs at `/proc` to inspect the process tree.
|
||||
* **Windows** uses the Windows API, and methods such as
|
||||
`CreateToolhelp32Snapshot` to get a point-in-time snapshot of
|
||||
the process table.
|
||||
|
||||
## Installation
|
||||
|
||||
Install using standard `go get`:
|
||||
|
||||
```
|
||||
$ go get github.com/mitchellh/go-ps
|
||||
...
|
||||
```
|
||||
|
||||
## TODO
|
||||
|
||||
Want to contribute? Here is a short TODO list of things that aren't
|
||||
implemented for this library that would be nice:
|
||||
|
||||
* FreeBSD support
|
||||
* Plan9 support
|
|
@ -1,43 +0,0 @@
|
|||
# -*- mode: ruby -*-
|
||||
# vi: set ft=ruby :
|
||||
|
||||
# Vagrantfile API/syntax version. Don't touch unless you know what you're doing!
|
||||
VAGRANTFILE_API_VERSION = "2"
|
||||
|
||||
Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|
|
||||
config.vm.box = "chef/ubuntu-12.04"
|
||||
|
||||
config.vm.provision "shell", inline: $script
|
||||
|
||||
["vmware_fusion", "vmware_workstation"].each do |p|
|
||||
config.vm.provider "p" do |v|
|
||||
v.vmx["memsize"] = "1024"
|
||||
v.vmx["numvcpus"] = "2"
|
||||
v.vmx["cpuid.coresPerSocket"] = "1"
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
$script = <<SCRIPT
|
||||
SRCROOT="/opt/go"
|
||||
|
||||
# Install Go
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y build-essential mercurial
|
||||
sudo hg clone -u release https://code.google.com/p/go ${SRCROOT}
|
||||
cd ${SRCROOT}/src
|
||||
sudo ./all.bash
|
||||
|
||||
# Setup the GOPATH
|
||||
sudo mkdir -p /opt/gopath
|
||||
cat <<EOF >/tmp/gopath.sh
|
||||
export GOPATH="/opt/gopath"
|
||||
export PATH="/opt/go/bin:\$GOPATH/bin:\$PATH"
|
||||
EOF
|
||||
sudo mv /tmp/gopath.sh /etc/profile.d/gopath.sh
|
||||
sudo chmod 0755 /etc/profile.d/gopath.sh
|
||||
|
||||
# Make sure the gopath is usable by bamboo
|
||||
sudo chown -R vagrant:vagrant $SRCROOT
|
||||
sudo chown -R vagrant:vagrant /opt/gopath
|
||||
SCRIPT
|
|
@ -1,40 +0,0 @@
|
|||
// ps provides an API for finding and listing processes in a platform-agnostic
|
||||
// way.
|
||||
//
|
||||
// NOTE: If you're reading these docs online via GoDocs or some other system,
|
||||
// you might only see the Unix docs. This project makes heavy use of
|
||||
// platform-specific implementations. We recommend reading the source if you
|
||||
// are interested.
|
||||
package ps
|
||||
|
||||
// Process is the generic interface that is implemented on every platform
|
||||
// and provides common operations for processes.
|
||||
type Process interface {
|
||||
// Pid is the process ID for this process.
|
||||
Pid() int
|
||||
|
||||
// PPid is the parent process ID for this process.
|
||||
PPid() int
|
||||
|
||||
// Executable name running this process. This is not a path to the
|
||||
// executable.
|
||||
Executable() string
|
||||
}
|
||||
|
||||
// Processes returns all processes.
|
||||
//
|
||||
// This of course will be a point-in-time snapshot of when this method was
|
||||
// called. Some operating systems don't provide snapshot capability of the
|
||||
// process table, in which case the process table returned might contain
|
||||
// ephemeral entities that happened to be running when this was called.
|
||||
func Processes() ([]Process, error) {
|
||||
return processes()
|
||||
}
|
||||
|
||||
// FindProcess looks up a single process by pid.
|
||||
//
|
||||
// Process will be nil and error will be nil if a matching process is
|
||||
// not found.
|
||||
func FindProcess(pid int) (Process, error) {
|
||||
return findProcess(pid)
|
||||
}
|
|
@ -1,138 +0,0 @@
|
|||
// +build darwin
|
||||
|
||||
package ps
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"syscall"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
type DarwinProcess struct {
|
||||
pid int
|
||||
ppid int
|
||||
binary string
|
||||
}
|
||||
|
||||
func (p *DarwinProcess) Pid() int {
|
||||
return p.pid
|
||||
}
|
||||
|
||||
func (p *DarwinProcess) PPid() int {
|
||||
return p.ppid
|
||||
}
|
||||
|
||||
func (p *DarwinProcess) Executable() string {
|
||||
return p.binary
|
||||
}
|
||||
|
||||
func findProcess(pid int) (Process, error) {
|
||||
ps, err := processes()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for _, p := range ps {
|
||||
if p.Pid() == pid {
|
||||
return p, nil
|
||||
}
|
||||
}
|
||||
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func processes() ([]Process, error) {
|
||||
buf, err := darwinSyscall()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
procs := make([]*kinfoProc, 0, 50)
|
||||
k := 0
|
||||
for i := _KINFO_STRUCT_SIZE; i < buf.Len(); i += _KINFO_STRUCT_SIZE {
|
||||
proc := &kinfoProc{}
|
||||
err = binary.Read(bytes.NewBuffer(buf.Bytes()[k:i]), binary.LittleEndian, proc)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
k = i
|
||||
procs = append(procs, proc)
|
||||
}
|
||||
|
||||
darwinProcs := make([]Process, len(procs))
|
||||
for i, p := range procs {
|
||||
darwinProcs[i] = &DarwinProcess{
|
||||
pid: int(p.Pid),
|
||||
ppid: int(p.PPid),
|
||||
binary: darwinCstring(p.Comm),
|
||||
}
|
||||
}
|
||||
|
||||
return darwinProcs, nil
|
||||
}
|
||||
|
||||
func darwinCstring(s [16]byte) string {
|
||||
i := 0
|
||||
for _, b := range s {
|
||||
if b != 0 {
|
||||
i++
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
return string(s[:i])
|
||||
}
|
||||
|
||||
func darwinSyscall() (*bytes.Buffer, error) {
|
||||
mib := [4]int32{_CTRL_KERN, _KERN_PROC, _KERN_PROC_ALL, 0}
|
||||
size := uintptr(0)
|
||||
|
||||
_, _, errno := syscall.Syscall6(
|
||||
syscall.SYS___SYSCTL,
|
||||
uintptr(unsafe.Pointer(&mib[0])),
|
||||
4,
|
||||
0,
|
||||
uintptr(unsafe.Pointer(&size)),
|
||||
0,
|
||||
0)
|
||||
|
||||
if errno != 0 {
|
||||
return nil, errno
|
||||
}
|
||||
|
||||
bs := make([]byte, size)
|
||||
_, _, errno = syscall.Syscall6(
|
||||
syscall.SYS___SYSCTL,
|
||||
uintptr(unsafe.Pointer(&mib[0])),
|
||||
4,
|
||||
uintptr(unsafe.Pointer(&bs[0])),
|
||||
uintptr(unsafe.Pointer(&size)),
|
||||
0,
|
||||
0)
|
||||
|
||||
if errno != 0 {
|
||||
return nil, errno
|
||||
}
|
||||
|
||||
return bytes.NewBuffer(bs[0:size]), nil
|
||||
}
|
||||
|
||||
const (
|
||||
_CTRL_KERN = 1
|
||||
_KERN_PROC = 14
|
||||
_KERN_PROC_ALL = 0
|
||||
_KINFO_STRUCT_SIZE = 648
|
||||
)
|
||||
|
||||
type kinfoProc struct {
|
||||
_ [40]byte
|
||||
Pid int32
|
||||
_ [199]byte
|
||||
Comm [16]byte
|
||||
_ [301]byte
|
||||
PPid int32
|
||||
_ [84]byte
|
||||
}
|
|
@ -1,260 +0,0 @@
|
|||
// +build freebsd,amd64
|
||||
|
||||
package ps
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"syscall"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
// copied from sys/sysctl.h
|
||||
const (
|
||||
CTL_KERN = 1 // "high kernel": proc, limits
|
||||
KERN_PROC = 14 // struct: process entries
|
||||
KERN_PROC_PID = 1 // by process id
|
||||
KERN_PROC_PROC = 8 // only return procs
|
||||
KERN_PROC_PATHNAME = 12 // path to executable
|
||||
)
|
||||
|
||||
// copied from sys/user.h
|
||||
type Kinfo_proc struct {
|
||||
Ki_structsize int32
|
||||
Ki_layout int32
|
||||
Ki_args int64
|
||||
Ki_paddr int64
|
||||
Ki_addr int64
|
||||
Ki_tracep int64
|
||||
Ki_textvp int64
|
||||
Ki_fd int64
|
||||
Ki_vmspace int64
|
||||
Ki_wchan int64
|
||||
Ki_pid int32
|
||||
Ki_ppid int32
|
||||
Ki_pgid int32
|
||||
Ki_tpgid int32
|
||||
Ki_sid int32
|
||||
Ki_tsid int32
|
||||
Ki_jobc [2]byte
|
||||
Ki_spare_short1 [2]byte
|
||||
Ki_tdev int32
|
||||
Ki_siglist [16]byte
|
||||
Ki_sigmask [16]byte
|
||||
Ki_sigignore [16]byte
|
||||
Ki_sigcatch [16]byte
|
||||
Ki_uid int32
|
||||
Ki_ruid int32
|
||||
Ki_svuid int32
|
||||
Ki_rgid int32
|
||||
Ki_svgid int32
|
||||
Ki_ngroups [2]byte
|
||||
Ki_spare_short2 [2]byte
|
||||
Ki_groups [64]byte
|
||||
Ki_size int64
|
||||
Ki_rssize int64
|
||||
Ki_swrss int64
|
||||
Ki_tsize int64
|
||||
Ki_dsize int64
|
||||
Ki_ssize int64
|
||||
Ki_xstat [2]byte
|
||||
Ki_acflag [2]byte
|
||||
Ki_pctcpu int32
|
||||
Ki_estcpu int32
|
||||
Ki_slptime int32
|
||||
Ki_swtime int32
|
||||
Ki_cow int32
|
||||
Ki_runtime int64
|
||||
Ki_start [16]byte
|
||||
Ki_childtime [16]byte
|
||||
Ki_flag int64
|
||||
Ki_kiflag int64
|
||||
Ki_traceflag int32
|
||||
Ki_stat [1]byte
|
||||
Ki_nice [1]byte
|
||||
Ki_lock [1]byte
|
||||
Ki_rqindex [1]byte
|
||||
Ki_oncpu [1]byte
|
||||
Ki_lastcpu [1]byte
|
||||
Ki_ocomm [17]byte
|
||||
Ki_wmesg [9]byte
|
||||
Ki_login [18]byte
|
||||
Ki_lockname [9]byte
|
||||
Ki_comm [20]byte
|
||||
Ki_emul [17]byte
|
||||
Ki_sparestrings [68]byte
|
||||
Ki_spareints [36]byte
|
||||
Ki_cr_flags int32
|
||||
Ki_jid int32
|
||||
Ki_numthreads int32
|
||||
Ki_tid int32
|
||||
Ki_pri int32
|
||||
Ki_rusage [144]byte
|
||||
Ki_rusage_ch [144]byte
|
||||
Ki_pcb int64
|
||||
Ki_kstack int64
|
||||
Ki_udata int64
|
||||
Ki_tdaddr int64
|
||||
Ki_spareptrs [48]byte
|
||||
Ki_spareint64s [96]byte
|
||||
Ki_sflag int64
|
||||
Ki_tdflags int64
|
||||
}
|
||||
|
||||
// UnixProcess is an implementation of Process that contains Unix-specific
|
||||
// fields and information.
|
||||
type UnixProcess struct {
|
||||
pid int
|
||||
ppid int
|
||||
state rune
|
||||
pgrp int
|
||||
sid int
|
||||
|
||||
binary string
|
||||
}
|
||||
|
||||
func (p *UnixProcess) Pid() int {
|
||||
return p.pid
|
||||
}
|
||||
|
||||
func (p *UnixProcess) PPid() int {
|
||||
return p.ppid
|
||||
}
|
||||
|
||||
func (p *UnixProcess) Executable() string {
|
||||
return p.binary
|
||||
}
|
||||
|
||||
// Refresh reloads all the data associated with this process.
|
||||
func (p *UnixProcess) Refresh() error {
|
||||
|
||||
mib := []int32{CTL_KERN, KERN_PROC, KERN_PROC_PID, int32(p.pid)}
|
||||
|
||||
buf, length, err := call_syscall(mib)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
proc_k := Kinfo_proc{}
|
||||
if length != uint64(unsafe.Sizeof(proc_k)) {
|
||||
return err
|
||||
}
|
||||
|
||||
k, err := parse_kinfo_proc(buf)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
p.ppid, p.pgrp, p.sid, p.binary = copy_params(&k)
|
||||
return nil
|
||||
}
|
||||
|
||||
func copy_params(k *Kinfo_proc) (int, int, int, string) {
|
||||
n := -1
|
||||
for i, b := range k.Ki_comm {
|
||||
if b == 0 {
|
||||
break
|
||||
}
|
||||
n = i + 1
|
||||
}
|
||||
comm := string(k.Ki_comm[:n])
|
||||
|
||||
return int(k.Ki_ppid), int(k.Ki_pgid), int(k.Ki_sid), comm
|
||||
}
|
||||
|
||||
func findProcess(pid int) (Process, error) {
|
||||
mib := []int32{CTL_KERN, KERN_PROC, KERN_PROC_PATHNAME, int32(pid)}
|
||||
|
||||
_, _, err := call_syscall(mib)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return newUnixProcess(pid)
|
||||
}
|
||||
|
||||
func processes() ([]Process, error) {
|
||||
results := make([]Process, 0, 50)
|
||||
|
||||
mib := []int32{CTL_KERN, KERN_PROC, KERN_PROC_PROC, 0}
|
||||
buf, length, err := call_syscall(mib)
|
||||
if err != nil {
|
||||
return results, err
|
||||
}
|
||||
|
||||
// get kinfo_proc size
|
||||
k := Kinfo_proc{}
|
||||
procinfo_len := int(unsafe.Sizeof(k))
|
||||
count := int(length / uint64(procinfo_len))
|
||||
|
||||
// parse buf to procs
|
||||
for i := 0; i < count; i++ {
|
||||
b := buf[i*procinfo_len : i*procinfo_len+procinfo_len]
|
||||
k, err := parse_kinfo_proc(b)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
p, err := newUnixProcess(int(k.Ki_pid))
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
p.ppid, p.pgrp, p.sid, p.binary = copy_params(&k)
|
||||
|
||||
results = append(results, p)
|
||||
}
|
||||
|
||||
return results, nil
|
||||
}
|
||||
|
||||
func parse_kinfo_proc(buf []byte) (Kinfo_proc, error) {
|
||||
var k Kinfo_proc
|
||||
br := bytes.NewReader(buf)
|
||||
err := binary.Read(br, binary.LittleEndian, &k)
|
||||
if err != nil {
|
||||
return k, err
|
||||
}
|
||||
|
||||
return k, nil
|
||||
}
|
||||
|
||||
func call_syscall(mib []int32) ([]byte, uint64, error) {
|
||||
miblen := uint64(len(mib))
|
||||
|
||||
// get required buffer size
|
||||
length := uint64(0)
|
||||
_, _, err := syscall.RawSyscall6(
|
||||
syscall.SYS___SYSCTL,
|
||||
uintptr(unsafe.Pointer(&mib[0])),
|
||||
uintptr(miblen),
|
||||
0,
|
||||
uintptr(unsafe.Pointer(&length)),
|
||||
0,
|
||||
0)
|
||||
if err != 0 {
|
||||
b := make([]byte, 0)
|
||||
return b, length, err
|
||||
}
|
||||
if length == 0 {
|
||||
b := make([]byte, 0)
|
||||
return b, length, err
|
||||
}
|
||||
// get proc info itself
|
||||
buf := make([]byte, length)
|
||||
_, _, err = syscall.RawSyscall6(
|
||||
syscall.SYS___SYSCTL,
|
||||
uintptr(unsafe.Pointer(&mib[0])),
|
||||
uintptr(miblen),
|
||||
uintptr(unsafe.Pointer(&buf[0])),
|
||||
uintptr(unsafe.Pointer(&length)),
|
||||
0,
|
||||
0)
|
||||
if err != 0 {
|
||||
return buf, length, err
|
||||
}
|
||||
|
||||
return buf, length, nil
|
||||
}
|
||||
|
||||
func newUnixProcess(pid int) (*UnixProcess, error) {
|
||||
p := &UnixProcess{pid: pid}
|
||||
return p, p.Refresh()
|
||||
}
|
|
@ -1,129 +0,0 @@
|
|||
// +build linux
|
||||
|
||||
package ps
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// UnixProcess is an implementation of Process that contains Unix-specific
|
||||
// fields and information.
|
||||
type UnixProcess struct {
|
||||
pid int
|
||||
ppid int
|
||||
state rune
|
||||
pgrp int
|
||||
sid int
|
||||
|
||||
binary string
|
||||
}
|
||||
|
||||
func (p *UnixProcess) Pid() int {
|
||||
return p.pid
|
||||
}
|
||||
|
||||
func (p *UnixProcess) PPid() int {
|
||||
return p.ppid
|
||||
}
|
||||
|
||||
func (p *UnixProcess) Executable() string {
|
||||
return p.binary
|
||||
}
|
||||
|
||||
// Refresh reloads all the data associated with this process.
|
||||
func (p *UnixProcess) Refresh() error {
|
||||
statPath := fmt.Sprintf("/proc/%d/stat", p.pid)
|
||||
dataBytes, err := ioutil.ReadFile(statPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// First, parse out the image name
|
||||
data := string(dataBytes)
|
||||
binStart := strings.IndexRune(data, '(') + 1
|
||||
binEnd := strings.IndexRune(data[binStart:], ')')
|
||||
p.binary = data[binStart : binStart+binEnd]
|
||||
|
||||
// Move past the image name and start parsing the rest
|
||||
data = data[binStart+binEnd+2:]
|
||||
_, err = fmt.Sscanf(data,
|
||||
"%c %d %d %d",
|
||||
&p.state,
|
||||
&p.ppid,
|
||||
&p.pgrp,
|
||||
&p.sid)
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
func findProcess(pid int) (Process, error) {
|
||||
dir := fmt.Sprintf("/proc/%d", pid)
|
||||
_, err := os.Stat(dir)
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return newUnixProcess(pid)
|
||||
}
|
||||
|
||||
func processes() ([]Process, error) {
|
||||
d, err := os.Open("/proc")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer d.Close()
|
||||
|
||||
results := make([]Process, 0, 50)
|
||||
for {
|
||||
fis, err := d.Readdir(10)
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for _, fi := range fis {
|
||||
// We only care about directories, since all pids are dirs
|
||||
if !fi.IsDir() {
|
||||
continue
|
||||
}
|
||||
|
||||
// We only care if the name starts with a numeric
|
||||
name := fi.Name()
|
||||
if name[0] < '0' || name[0] > '9' {
|
||||
continue
|
||||
}
|
||||
|
||||
// From this point forward, any errors we just ignore, because
|
||||
// it might simply be that the process doesn't exist anymore.
|
||||
pid, err := strconv.ParseInt(name, 10, 0)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
p, err := newUnixProcess(int(pid))
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
results = append(results, p)
|
||||
}
|
||||
}
|
||||
|
||||
return results, nil
|
||||
}
|
||||
|
||||
func newUnixProcess(pid int) (*UnixProcess, error) {
|
||||
p := &UnixProcess{pid: pid}
|
||||
return p, p.Refresh()
|
||||
}
|
|
@ -1,119 +0,0 @@
|
|||
// +build windows
|
||||
|
||||
package ps
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"syscall"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
// Windows API functions
|
||||
var (
|
||||
modKernel32 = syscall.NewLazyDLL("kernel32.dll")
|
||||
procCloseHandle = modKernel32.NewProc("CloseHandle")
|
||||
procCreateToolhelp32Snapshot = modKernel32.NewProc("CreateToolhelp32Snapshot")
|
||||
procProcess32First = modKernel32.NewProc("Process32FirstW")
|
||||
procProcess32Next = modKernel32.NewProc("Process32NextW")
|
||||
)
|
||||
|
||||
// Some constants from the Windows API
|
||||
const (
|
||||
ERROR_NO_MORE_FILES = 0x12
|
||||
MAX_PATH = 260
|
||||
)
|
||||
|
||||
// PROCESSENTRY32 is the Windows API structure that contains a process's
|
||||
// information.
|
||||
type PROCESSENTRY32 struct {
|
||||
Size uint32
|
||||
CntUsage uint32
|
||||
ProcessID uint32
|
||||
DefaultHeapID uintptr
|
||||
ModuleID uint32
|
||||
CntThreads uint32
|
||||
ParentProcessID uint32
|
||||
PriorityClassBase int32
|
||||
Flags uint32
|
||||
ExeFile [MAX_PATH]uint16
|
||||
}
|
||||
|
||||
// WindowsProcess is an implementation of Process for Windows.
|
||||
type WindowsProcess struct {
|
||||
pid int
|
||||
ppid int
|
||||
exe string
|
||||
}
|
||||
|
||||
func (p *WindowsProcess) Pid() int {
|
||||
return p.pid
|
||||
}
|
||||
|
||||
func (p *WindowsProcess) PPid() int {
|
||||
return p.ppid
|
||||
}
|
||||
|
||||
func (p *WindowsProcess) Executable() string {
|
||||
return p.exe
|
||||
}
|
||||
|
||||
func newWindowsProcess(e *PROCESSENTRY32) *WindowsProcess {
|
||||
// Find when the string ends for decoding
|
||||
end := 0
|
||||
for {
|
||||
if e.ExeFile[end] == 0 {
|
||||
break
|
||||
}
|
||||
end++
|
||||
}
|
||||
|
||||
return &WindowsProcess{
|
||||
pid: int(e.ProcessID),
|
||||
ppid: int(e.ParentProcessID),
|
||||
exe: syscall.UTF16ToString(e.ExeFile[:end]),
|
||||
}
|
||||
}
|
||||
|
||||
func findProcess(pid int) (Process, error) {
|
||||
ps, err := processes()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for _, p := range ps {
|
||||
if p.Pid() == pid {
|
||||
return p, nil
|
||||
}
|
||||
}
|
||||
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func processes() ([]Process, error) {
|
||||
handle, _, _ := procCreateToolhelp32Snapshot.Call(
|
||||
0x00000002,
|
||||
0)
|
||||
if handle < 0 {
|
||||
return nil, syscall.GetLastError()
|
||||
}
|
||||
defer procCloseHandle.Call(handle)
|
||||
|
||||
var entry PROCESSENTRY32
|
||||
entry.Size = uint32(unsafe.Sizeof(entry))
|
||||
ret, _, _ := procProcess32First.Call(handle, uintptr(unsafe.Pointer(&entry)))
|
||||
if ret == 0 {
|
||||
return nil, fmt.Errorf("Error retrieving process info.")
|
||||
}
|
||||
|
||||
results := make([]Process, 0, 50)
|
||||
for {
|
||||
results = append(results, newWindowsProcess(&entry))
|
||||
|
||||
ret, _, _ := procProcess32Next.Call(handle, uintptr(unsafe.Pointer(&entry)))
|
||||
if ret == 0 {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
return results, nil
|
||||
}
|
|
@ -1,61 +0,0 @@
|
|||
gopsutil is distributed under BSD license reproduced below.
|
||||
|
||||
Copyright (c) 2014, WAKAYAMA Shirou
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
* Neither the name of the gopsutil authors nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this software without
|
||||
specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
||||
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
|
||||
-------
|
||||
internal/common/binary.go in the gopsutil is copied and modifid from golang/encoding/binary.go.
|
||||
|
||||
|
||||
|
||||
Copyright (c) 2009 The Go Authors. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following disclaimer
|
||||
in the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Google Inc. nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
@ -1,93 +0,0 @@
|
|||
package cpu
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"runtime"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"github.com/shirou/gopsutil/internal/common"
|
||||
)
|
||||
|
||||
type TimesStat struct {
|
||||
CPU string `json:"cpu"`
|
||||
User float64 `json:"user"`
|
||||
System float64 `json:"system"`
|
||||
Idle float64 `json:"idle"`
|
||||
Nice float64 `json:"nice"`
|
||||
Iowait float64 `json:"iowait"`
|
||||
Irq float64 `json:"irq"`
|
||||
Softirq float64 `json:"softirq"`
|
||||
Steal float64 `json:"steal"`
|
||||
Guest float64 `json:"guest"`
|
||||
GuestNice float64 `json:"guestNice"`
|
||||
Stolen float64 `json:"stolen"`
|
||||
}
|
||||
|
||||
type InfoStat struct {
|
||||
CPU int32 `json:"cpu"`
|
||||
VendorID string `json:"vendorId"`
|
||||
Family string `json:"family"`
|
||||
Model string `json:"model"`
|
||||
Stepping int32 `json:"stepping"`
|
||||
PhysicalID string `json:"physicalId"`
|
||||
CoreID string `json:"coreId"`
|
||||
Cores int32 `json:"cores"`
|
||||
ModelName string `json:"modelName"`
|
||||
Mhz float64 `json:"mhz"`
|
||||
CacheSize int32 `json:"cacheSize"`
|
||||
Flags []string `json:"flags"`
|
||||
}
|
||||
|
||||
type lastPercent struct {
|
||||
sync.Mutex
|
||||
lastCPUTimes []TimesStat
|
||||
lastPerCPUTimes []TimesStat
|
||||
}
|
||||
|
||||
var lastCPUPercent lastPercent
|
||||
var invoke common.Invoker
|
||||
|
||||
func init() {
|
||||
invoke = common.Invoke{}
|
||||
lastCPUPercent.Lock()
|
||||
lastCPUPercent.lastCPUTimes, _ = Times(false)
|
||||
lastCPUPercent.lastPerCPUTimes, _ = Times(true)
|
||||
lastCPUPercent.Unlock()
|
||||
}
|
||||
|
||||
func Counts(logical bool) (int, error) {
|
||||
return runtime.NumCPU(), nil
|
||||
}
|
||||
|
||||
func (c TimesStat) String() string {
|
||||
v := []string{
|
||||
`"cpu":"` + c.CPU + `"`,
|
||||
`"user":` + strconv.FormatFloat(c.User, 'f', 1, 64),
|
||||
`"system":` + strconv.FormatFloat(c.System, 'f', 1, 64),
|
||||
`"idle":` + strconv.FormatFloat(c.Idle, 'f', 1, 64),
|
||||
`"nice":` + strconv.FormatFloat(c.Nice, 'f', 1, 64),
|
||||
`"iowait":` + strconv.FormatFloat(c.Iowait, 'f', 1, 64),
|
||||
`"irq":` + strconv.FormatFloat(c.Irq, 'f', 1, 64),
|
||||
`"softirq":` + strconv.FormatFloat(c.Softirq, 'f', 1, 64),
|
||||
`"steal":` + strconv.FormatFloat(c.Steal, 'f', 1, 64),
|
||||
`"guest":` + strconv.FormatFloat(c.Guest, 'f', 1, 64),
|
||||
`"guestNice":` + strconv.FormatFloat(c.GuestNice, 'f', 1, 64),
|
||||
`"stolen":` + strconv.FormatFloat(c.Stolen, 'f', 1, 64),
|
||||
}
|
||||
|
||||
return `{` + strings.Join(v, ",") + `}`
|
||||
}
|
||||
|
||||
// Total returns the total number of seconds in a CPUTimesStat
|
||||
func (c TimesStat) Total() float64 {
|
||||
total := c.User + c.System + c.Nice + c.Iowait + c.Irq + c.Softirq + c.Steal +
|
||||
c.Guest + c.GuestNice + c.Idle + c.Stolen
|
||||
return total
|
||||
}
|
||||
|
||||
func (c InfoStat) String() string {
|
||||
s, _ := json.Marshal(c)
|
||||
return string(s)
|
||||
}
|
|
@ -1,106 +0,0 @@
|
|||
// +build darwin
|
||||
|
||||
package cpu
|
||||
|
||||
import (
|
||||
"os/exec"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// sys/resource.h
|
||||
const (
|
||||
CPUser = 0
|
||||
CPNice = 1
|
||||
CPSys = 2
|
||||
CPIntr = 3
|
||||
CPIdle = 4
|
||||
CPUStates = 5
|
||||
)
|
||||
|
||||
// default value. from time.h
|
||||
var ClocksPerSec = float64(128)
|
||||
|
||||
func Times(percpu bool) ([]TimesStat, error) {
|
||||
if percpu {
|
||||
return perCPUTimes()
|
||||
}
|
||||
|
||||
return allCPUTimes()
|
||||
}
|
||||
|
||||
// Returns only one CPUInfoStat on FreeBSD
|
||||
func Info() ([]InfoStat, error) {
|
||||
var ret []InfoStat
|
||||
sysctl, err := exec.LookPath("/usr/sbin/sysctl")
|
||||
if err != nil {
|
||||
return ret, err
|
||||
}
|
||||
out, err := invoke.Command(sysctl, "machdep.cpu")
|
||||
if err != nil {
|
||||
return ret, err
|
||||
}
|
||||
|
||||
c := InfoStat{}
|
||||
for _, line := range strings.Split(string(out), "\n") {
|
||||
values := strings.Fields(line)
|
||||
if len(values) < 1 {
|
||||
continue
|
||||
}
|
||||
|
||||
t, err := strconv.ParseInt(values[1], 10, 64)
|
||||
// err is not checked here because some value is string.
|
||||
if strings.HasPrefix(line, "machdep.cpu.brand_string") {
|
||||
c.ModelName = strings.Join(values[1:], " ")
|
||||
} else if strings.HasPrefix(line, "machdep.cpu.family") {
|
||||
c.Family = values[1]
|
||||
} else if strings.HasPrefix(line, "machdep.cpu.model") {
|
||||
c.Model = values[1]
|
||||
} else if strings.HasPrefix(line, "machdep.cpu.stepping") {
|
||||
if err != nil {
|
||||
return ret, err
|
||||
}
|
||||
c.Stepping = int32(t)
|
||||
} else if strings.HasPrefix(line, "machdep.cpu.features") {
|
||||
for _, v := range values[1:] {
|
||||
c.Flags = append(c.Flags, strings.ToLower(v))
|
||||
}
|
||||
} else if strings.HasPrefix(line, "machdep.cpu.leaf7_features") {
|
||||
for _, v := range values[1:] {
|
||||
c.Flags = append(c.Flags, strings.ToLower(v))
|
||||
}
|
||||
} else if strings.HasPrefix(line, "machdep.cpu.extfeatures") {
|
||||
for _, v := range values[1:] {
|
||||
c.Flags = append(c.Flags, strings.ToLower(v))
|
||||
}
|
||||
} else if strings.HasPrefix(line, "machdep.cpu.core_count") {
|
||||
if err != nil {
|
||||
return ret, err
|
||||
}
|
||||
c.Cores = int32(t)
|
||||
} else if strings.HasPrefix(line, "machdep.cpu.cache.size") {
|
||||
if err != nil {
|
||||
return ret, err
|
||||
}
|
||||
c.CacheSize = int32(t)
|
||||
} else if strings.HasPrefix(line, "machdep.cpu.vendor") {
|
||||
c.VendorID = values[1]
|
||||
}
|
||||
}
|
||||
|
||||
// Use the rated frequency of the CPU. This is a static value and does not
|
||||
// account for low power or Turbo Boost modes.
|
||||
out, err = invoke.Command(sysctl, "hw.cpufrequency")
|
||||
if err != nil {
|
||||
return ret, err
|
||||
}
|
||||
|
||||
values := strings.Fields(string(out))
|
||||
mhz, err := strconv.ParseFloat(values[1], 64)
|
||||
if err != nil {
|
||||
return ret, err
|
||||
}
|
||||
c.Mhz = mhz / 1000000.0
|
||||
|
||||
return append(ret, c), nil
|
||||
}
|
|
@ -1,109 +0,0 @@
|
|||
// +build darwin
|
||||
// +build cgo
|
||||
|
||||
package cpu
|
||||
|
||||
/*
|
||||
#include <stdlib.h>
|
||||
#include <sys/sysctl.h>
|
||||
#include <sys/mount.h>
|
||||
#include <mach/mach_init.h>
|
||||
#include <mach/mach_host.h>
|
||||
#include <mach/host_info.h>
|
||||
#if TARGET_OS_MAC
|
||||
#include <libproc.h>
|
||||
#endif
|
||||
#include <mach/processor_info.h>
|
||||
#include <mach/vm_map.h>
|
||||
*/
|
||||
import "C"
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
// these CPU times for darwin is borrowed from influxdb/telegraf.
|
||||
|
||||
func perCPUTimes() ([]TimesStat, error) {
|
||||
var (
|
||||
count C.mach_msg_type_number_t
|
||||
cpuload *C.processor_cpu_load_info_data_t
|
||||
ncpu C.natural_t
|
||||
)
|
||||
|
||||
status := C.host_processor_info(C.host_t(C.mach_host_self()),
|
||||
C.PROCESSOR_CPU_LOAD_INFO,
|
||||
&ncpu,
|
||||
(*C.processor_info_array_t)(unsafe.Pointer(&cpuload)),
|
||||
&count)
|
||||
|
||||
if status != C.KERN_SUCCESS {
|
||||
return nil, fmt.Errorf("host_processor_info error=%d", status)
|
||||
}
|
||||
|
||||
// jump through some cgo casting hoops and ensure we properly free
|
||||
// the memory that cpuload points to
|
||||
target := C.vm_map_t(C.mach_task_self_)
|
||||
address := C.vm_address_t(uintptr(unsafe.Pointer(cpuload)))
|
||||
defer C.vm_deallocate(target, address, C.vm_size_t(ncpu))
|
||||
|
||||
// the body of struct processor_cpu_load_info
|
||||
// aka processor_cpu_load_info_data_t
|
||||
var cpu_ticks [C.CPU_STATE_MAX]uint32
|
||||
|
||||
// copy the cpuload array to a []byte buffer
|
||||
// where we can binary.Read the data
|
||||
size := int(ncpu) * binary.Size(cpu_ticks)
|
||||
buf := C.GoBytes(unsafe.Pointer(cpuload), C.int(size))
|
||||
|
||||
bbuf := bytes.NewBuffer(buf)
|
||||
|
||||
var ret []TimesStat
|
||||
|
||||
for i := 0; i < int(ncpu); i++ {
|
||||
err := binary.Read(bbuf, binary.LittleEndian, &cpu_ticks)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
c := TimesStat{
|
||||
CPU: fmt.Sprintf("cpu%d", i),
|
||||
User: float64(cpu_ticks[C.CPU_STATE_USER]) / ClocksPerSec,
|
||||
System: float64(cpu_ticks[C.CPU_STATE_SYSTEM]) / ClocksPerSec,
|
||||
Nice: float64(cpu_ticks[C.CPU_STATE_NICE]) / ClocksPerSec,
|
||||
Idle: float64(cpu_ticks[C.CPU_STATE_IDLE]) / ClocksPerSec,
|
||||
}
|
||||
|
||||
ret = append(ret, c)
|
||||
}
|
||||
|
||||
return ret, nil
|
||||
}
|
||||
|
||||
func allCPUTimes() ([]TimesStat, error) {
|
||||
var count C.mach_msg_type_number_t = C.HOST_CPU_LOAD_INFO_COUNT
|
||||
var cpuload C.host_cpu_load_info_data_t
|
||||
|
||||
status := C.host_statistics(C.host_t(C.mach_host_self()),
|
||||
C.HOST_CPU_LOAD_INFO,
|
||||
C.host_info_t(unsafe.Pointer(&cpuload)),
|
||||
&count)
|
||||
|
||||
if status != C.KERN_SUCCESS {
|
||||
return nil, fmt.Errorf("host_statistics error=%d", status)
|
||||
}
|
||||
|
||||
c := TimesStat{
|
||||
CPU: "cpu-total",
|
||||
User: float64(cpuload.cpu_ticks[C.CPU_STATE_USER]) / ClocksPerSec,
|
||||
System: float64(cpuload.cpu_ticks[C.CPU_STATE_SYSTEM]) / ClocksPerSec,
|
||||
Nice: float64(cpuload.cpu_ticks[C.CPU_STATE_NICE]) / ClocksPerSec,
|
||||
Idle: float64(cpuload.cpu_ticks[C.CPU_STATE_IDLE]) / ClocksPerSec,
|
||||
}
|
||||
|
||||
return []TimesStat{c}, nil
|
||||
|
||||
}
|
|
@ -1,14 +0,0 @@
|
|||
// +build darwin
|
||||
// +build !cgo
|
||||
|
||||
package cpu
|
||||
|
||||
import "github.com/shirou/gopsutil/internal/common"
|
||||
|
||||
func perCPUTimes() ([]TimesStat, error) {
|
||||
return []TimesStat{}, common.ErrNotImplementedError
|
||||
}
|
||||
|
||||
func allCPUTimes() ([]TimesStat, error) {
|
||||
return []TimesStat{}, common.ErrNotImplementedError
|
||||
}
|
|
@ -1,154 +0,0 @@
|
|||
package cpu
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os/exec"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/shirou/gopsutil/internal/common"
|
||||
)
|
||||
|
||||
// sys/resource.h
|
||||
const (
|
||||
CPUser = 0
|
||||
CPNice = 1
|
||||
CPSys = 2
|
||||
CPIntr = 3
|
||||
CPIdle = 4
|
||||
CPUStates = 5
|
||||
)
|
||||
|
||||
var ClocksPerSec = float64(128)
|
||||
|
||||
func init() {
|
||||
getconf, err := exec.LookPath("/usr/bin/getconf")
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
out, err := invoke.Command(getconf, "CLK_TCK")
|
||||
// ignore errors
|
||||
if err == nil {
|
||||
i, err := strconv.ParseFloat(strings.TrimSpace(string(out)), 64)
|
||||
if err == nil {
|
||||
ClocksPerSec = float64(i)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func Times(percpu bool) ([]TimesStat, error) {
|
||||
var ret []TimesStat
|
||||
|
||||
var sysctlCall string
|
||||
var ncpu int
|
||||
if percpu {
|
||||
sysctlCall = "kern.cp_times"
|
||||
ncpu, _ = Counts(true)
|
||||
} else {
|
||||
sysctlCall = "kern.cp_time"
|
||||
ncpu = 1
|
||||
}
|
||||
|
||||
cpuTimes, err := common.DoSysctrl(sysctlCall)
|
||||
if err != nil {
|
||||
return ret, err
|
||||
}
|
||||
|
||||
for i := 0; i < ncpu; i++ {
|
||||
offset := CPUStates * i
|
||||
user, err := strconv.ParseFloat(cpuTimes[CPUser+offset], 64)
|
||||
if err != nil {
|
||||
return ret, err
|
||||
}
|
||||
nice, err := strconv.ParseFloat(cpuTimes[CPNice+offset], 64)
|
||||
if err != nil {
|
||||
return ret, err
|
||||
}
|
||||
sys, err := strconv.ParseFloat(cpuTimes[CPSys+offset], 64)
|
||||
if err != nil {
|
||||
return ret, err
|
||||
}
|
||||
idle, err := strconv.ParseFloat(cpuTimes[CPIdle+offset], 64)
|
||||
if err != nil {
|
||||
return ret, err
|
||||
}
|
||||
intr, err := strconv.ParseFloat(cpuTimes[CPIntr+offset], 64)
|
||||
if err != nil {
|
||||
return ret, err
|
||||
}
|
||||
|
||||
c := TimesStat{
|
||||
User: float64(user / ClocksPerSec),
|
||||
Nice: float64(nice / ClocksPerSec),
|
||||
System: float64(sys / ClocksPerSec),
|
||||
Idle: float64(idle / ClocksPerSec),
|
||||
Irq: float64(intr / ClocksPerSec),
|
||||
}
|
||||
if !percpu {
|
||||
c.CPU = "cpu-total"
|
||||
} else {
|
||||
c.CPU = fmt.Sprintf("cpu%d", i)
|
||||
}
|
||||
|
||||
ret = append(ret, c)
|
||||
}
|
||||
|
||||
return ret, nil
|
||||
}
|
||||
|
||||
// Returns only one InfoStat on FreeBSD. The information regarding core
|
||||
// count, however is accurate and it is assumed that all InfoStat attributes
|
||||
// are the same across CPUs.
|
||||
func Info() ([]InfoStat, error) {
|
||||
const dmesgBoot = "/var/run/dmesg.boot"
|
||||
lines, _ := common.ReadLines(dmesgBoot)
|
||||
|
||||
c := InfoStat{}
|
||||
var vals []string
|
||||
var err error
|
||||
if vals, err = common.DoSysctrl("hw.clockrate"); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if c.Mhz, err = strconv.ParseFloat(vals[0], 64); err != nil {
|
||||
return nil, fmt.Errorf("Unable to parse FreeBSD CPU clock rate: %v", err)
|
||||
}
|
||||
c.CPU = int32(c.Mhz)
|
||||
|
||||
if vals, err = common.DoSysctrl("hw.ncpu"); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
var i64 int64
|
||||
if i64, err = strconv.ParseInt(vals[0], 10, 32); err != nil {
|
||||
return nil, fmt.Errorf("Unable to parse FreeBSD cores: %v", err)
|
||||
}
|
||||
c.Cores = int32(i64)
|
||||
|
||||
if vals, err = common.DoSysctrl("hw.model"); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
c.ModelName = strings.Join(vals, " ")
|
||||
|
||||
for _, line := range lines {
|
||||
if matches := regexp.MustCompile(`Origin\s*=\s*"(.+)"\s+Id\s*=\s*(.+)\s+Family\s*=\s*(.+)\s+Model\s*=\s*(.+)\s+Stepping\s*=\s*(.+)`).FindStringSubmatch(line); matches != nil {
|
||||
c.VendorID = matches[1]
|
||||
c.Family = matches[3]
|
||||
c.Model = matches[4]
|
||||
t, err := strconv.ParseInt(matches[5], 10, 32)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("Unable to parse FreeBSD CPU stepping information from %q: %v", line, err)
|
||||
}
|
||||
c.Stepping = int32(t)
|
||||
} else if matches := regexp.MustCompile(`Features=.+<(.+)>`).FindStringSubmatch(line); matches != nil {
|
||||
for _, v := range strings.Split(matches[1], ",") {
|
||||
c.Flags = append(c.Flags, strings.ToLower(v))
|
||||
}
|
||||
} else if matches := regexp.MustCompile(`Features2=[a-f\dx]+<(.+)>`).FindStringSubmatch(line); matches != nil {
|
||||
for _, v := range strings.Split(matches[1], ",") {
|
||||
c.Flags = append(c.Flags, strings.ToLower(v))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return []InfoStat{c}, nil
|
||||
}
|
|
@ -1,244 +0,0 @@
|
|||
// +build linux
|
||||
|
||||
package cpu
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"os/exec"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/shirou/gopsutil/internal/common"
|
||||
)
|
||||
|
||||
var cpu_tick = float64(100)
|
||||
|
||||
func init() {
|
||||
getconf, err := exec.LookPath("/usr/bin/getconf")
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
out, err := invoke.Command(getconf, "CLK_TCK")
|
||||
// ignore errors
|
||||
if err == nil {
|
||||
i, err := strconv.ParseFloat(strings.TrimSpace(string(out)), 64)
|
||||
if err == nil {
|
||||
cpu_tick = float64(i)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func Times(percpu bool) ([]TimesStat, error) {
|
||||
filename := common.HostProc("stat")
|
||||
var lines = []string{}
|
||||
if percpu {
|
||||
var startIdx uint = 1
|
||||
for {
|
||||
linen, _ := common.ReadLinesOffsetN(filename, startIdx, 1)
|
||||
line := linen[0]
|
||||
if !strings.HasPrefix(line, "cpu") {
|
||||
break
|
||||
}
|
||||
lines = append(lines, line)
|
||||
startIdx++
|
||||
}
|
||||
} else {
|
||||
lines, _ = common.ReadLinesOffsetN(filename, 0, 1)
|
||||
}
|
||||
|
||||
ret := make([]TimesStat, 0, len(lines))
|
||||
|
||||
for _, line := range lines {
|
||||
ct, err := parseStatLine(line)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
ret = append(ret, *ct)
|
||||
|
||||
}
|
||||
return ret, nil
|
||||
}
|
||||
|
||||
func sysCPUPath(cpu int32, relPath string) string {
|
||||
return common.HostSys(fmt.Sprintf("devices/system/cpu/cpu%d", cpu), relPath)
|
||||
}
|
||||
|
||||
func finishCPUInfo(c *InfoStat) error {
|
||||
if c.Mhz == 0 {
|
||||
lines, err := common.ReadLines(sysCPUPath(c.CPU, "cpufreq/cpuinfo_max_freq"))
|
||||
if err == nil {
|
||||
value, err := strconv.ParseFloat(lines[0], 64)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
c.Mhz = value
|
||||
}
|
||||
}
|
||||
if len(c.CoreID) == 0 {
|
||||
lines, err := common.ReadLines(sysCPUPath(c.CPU, "topology/coreId"))
|
||||
if err == nil {
|
||||
c.CoreID = lines[0]
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// CPUInfo on linux will return 1 item per physical thread.
|
||||
//
|
||||
// CPUs have three levels of counting: sockets, cores, threads.
|
||||
// Cores with HyperThreading count as having 2 threads per core.
|
||||
// Sockets often come with many physical CPU cores.
|
||||
// For example a single socket board with two cores each with HT will
|
||||
// return 4 CPUInfoStat structs on Linux and the "Cores" field set to 1.
|
||||
func Info() ([]InfoStat, error) {
|
||||
filename := common.HostProc("cpuinfo")
|
||||
lines, _ := common.ReadLines(filename)
|
||||
|
||||
var ret []InfoStat
|
||||
|
||||
c := InfoStat{CPU: -1, Cores: 1}
|
||||
for _, line := range lines {
|
||||
fields := strings.Split(line, ":")
|
||||
if len(fields) < 2 {
|
||||
continue
|
||||
}
|
||||
key := strings.TrimSpace(fields[0])
|
||||
value := strings.TrimSpace(fields[1])
|
||||
|
||||
switch key {
|
||||
case "processor":
|
||||
if c.CPU >= 0 {
|
||||
err := finishCPUInfo(&c)
|
||||
if err != nil {
|
||||
return ret, err
|
||||
}
|
||||
ret = append(ret, c)
|
||||
}
|
||||
c = InfoStat{Cores: 1}
|
||||
t, err := strconv.ParseInt(value, 10, 64)
|
||||
if err != nil {
|
||||
return ret, err
|
||||
}
|
||||
c.CPU = int32(t)
|
||||
case "vendorId", "vendor_id":
|
||||
c.VendorID = value
|
||||
case "cpu family":
|
||||
c.Family = value
|
||||
case "model":
|
||||
c.Model = value
|
||||
case "model name":
|
||||
c.ModelName = value
|
||||
case "stepping":
|
||||
t, err := strconv.ParseInt(value, 10, 64)
|
||||
if err != nil {
|
||||
return ret, err
|
||||
}
|
||||
c.Stepping = int32(t)
|
||||
case "cpu MHz":
|
||||
t, err := strconv.ParseFloat(value, 64)
|
||||
if err != nil {
|
||||
return ret, err
|
||||
}
|
||||
c.Mhz = t
|
||||
case "cache size":
|
||||
t, err := strconv.ParseInt(strings.Replace(value, " KB", "", 1), 10, 64)
|
||||
if err != nil {
|
||||
return ret, err
|
||||
}
|
||||
c.CacheSize = int32(t)
|
||||
case "physical id":
|
||||
c.PhysicalID = value
|
||||
case "core id":
|
||||
c.CoreID = value
|
||||
case "flags", "Features":
|
||||
c.Flags = strings.FieldsFunc(value, func(r rune) bool {
|
||||
return r == ',' || r == ' '
|
||||
})
|
||||
}
|
||||
}
|
||||
if c.CPU >= 0 {
|
||||
err := finishCPUInfo(&c)
|
||||
if err != nil {
|
||||
return ret, err
|
||||
}
|
||||
ret = append(ret, c)
|
||||
}
|
||||
return ret, nil
|
||||
}
|
||||
|
||||
func parseStatLine(line string) (*TimesStat, error) {
|
||||
fields := strings.Fields(line)
|
||||
|
||||
if strings.HasPrefix(fields[0], "cpu") == false {
|
||||
// return CPUTimesStat{}, e
|
||||
return nil, errors.New("not contain cpu")
|
||||
}
|
||||
|
||||
cpu := fields[0]
|
||||
if cpu == "cpu" {
|
||||
cpu = "cpu-total"
|
||||
}
|
||||
user, err := strconv.ParseFloat(fields[1], 64)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
nice, err := strconv.ParseFloat(fields[2], 64)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
system, err := strconv.ParseFloat(fields[3], 64)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
idle, err := strconv.ParseFloat(fields[4], 64)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
iowait, err := strconv.ParseFloat(fields[5], 64)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
irq, err := strconv.ParseFloat(fields[6], 64)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
softirq, err := strconv.ParseFloat(fields[7], 64)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
ct := &TimesStat{
|
||||
CPU: cpu,
|
||||
User: float64(user) / cpu_tick,
|
||||
Nice: float64(nice) / cpu_tick,
|
||||
System: float64(system) / cpu_tick,
|
||||
Idle: float64(idle) / cpu_tick,
|
||||
Iowait: float64(iowait) / cpu_tick,
|
||||
Irq: float64(irq) / cpu_tick,
|
||||
Softirq: float64(softirq) / cpu_tick,
|
||||
}
|
||||
if len(fields) > 8 { // Linux >= 2.6.11
|
||||
steal, err := strconv.ParseFloat(fields[8], 64)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
ct.Steal = float64(steal) / cpu_tick
|
||||
}
|
||||
if len(fields) > 9 { // Linux >= 2.6.24
|
||||
guest, err := strconv.ParseFloat(fields[9], 64)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
ct.Guest = float64(guest) / cpu_tick
|
||||
}
|
||||
if len(fields) > 10 { // Linux >= 3.2.0
|
||||
guestNice, err := strconv.ParseFloat(fields[10], 64)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
ct.GuestNice = float64(guestNice) / cpu_tick
|
||||
}
|
||||
|
||||
return ct, nil
|
||||
}
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue