From 3225d9ac113db673e830aeee68cc44327864d65e Mon Sep 17 00:00:00 2001 From: James Bardin Date: Fri, 23 Oct 2020 13:09:29 -0400 Subject: [PATCH] record all plugin panics, and print on main exit Create a logger that will record any apparent crash output for later processing. If the cli command returns with a non-zero exit status, check for any recorded crashes and add those to the output. --- internal/logging/logging.go | 28 ++++++--- internal/logging/panic.go | 111 +++++++++++++++++++++++++++++++++ internal/logging/panic_test.go | 51 +++++++++++++++ main.go | 10 +++ 4 files changed, 192 insertions(+), 8 deletions(-) create mode 100644 internal/logging/panic_test.go diff --git a/internal/logging/logging.go b/internal/logging/logging.go index e78758543..970686344 100644 --- a/internal/logging/logging.go +++ b/internal/logging/logging.go @@ -24,14 +24,22 @@ const ( envLogProvider = "TF_LOG_PROVIDER" ) -// ValidLevels are the log level names that Terraform recognizes. -var ValidLevels = []string{"TRACE", "DEBUG", "INFO", "WARN", "ERROR", "OFF"} +var ( + // ValidLevels are the log level names that Terraform recognizes. + ValidLevels = []string{"TRACE", "DEBUG", "INFO", "WARN", "ERROR", "OFF"} -// logger is the global hclog logger -var logger hclog.Logger + // logger is the global hclog logger + logger hclog.Logger -// logWriter is a global writer for logs, to be used with the std log package -var logWriter io.Writer + // logWriter is a global writer for logs, to be used with the std log package + logWriter io.Writer + + // initialize our cache of panic output from providers + panics = &panicRecorder{ + panics: make(map[string][]string), + maxLines: 100, + } +) func init() { logger = newHCLogger("") @@ -98,13 +106,17 @@ func NewLogger(name string) hclog.Logger { if name == "" { panic("logger name required") } - return logger.Named(name) + return &logPanicWrapper{ + Logger: logger.Named(name), + } } // NewProviderLogger returns a logger for the provider plugin, possibly with a // different log level from the global logger. func NewProviderLogger(prefix string) hclog.Logger { - l := logger.Named(prefix + "provider") + l := &logPanicWrapper{ + Logger: logger.Named(prefix + "provider"), + } level := providerLogLevel() logger.Debug("created provider logger", "level", level) diff --git a/internal/logging/panic.go b/internal/logging/panic.go index cc639b38f..960d4f685 100644 --- a/internal/logging/panic.go +++ b/internal/logging/panic.go @@ -5,7 +5,10 @@ import ( "io" "io/ioutil" "os" + "strings" + "sync" + "github.com/hashicorp/go-hclog" "github.com/mitchellh/panicwrap" ) @@ -69,3 +72,111 @@ func PanicHandler(tmpLogPath string) panicwrap.HandlerFunc { fmt.Printf(panicOutput, f.Name()) } } + +const pluginPanicOutput = ` +Stack trace from the %[1]s plugin: + +%s + +Error: The %[1]s plugin crashed! + +This is always indicative of a bug within the plugin. It would be immensely +helpful if you could report the crash with the plugin's maintainers so that it +can be fixed. The output above should help diagnose the issue. +` + +// PluginPanics returns a series of provider panics that were collected during +// execution, and formatted for output. +func PluginPanics() []string { + return panics.allPanics() +} + +// panicRecorder provides a registry to check for plugin panics that may have +// happened when a plugin suddenly terminates. +type panicRecorder struct { + sync.Mutex + + // panics maps the plugin name to the panic output lines received from + // the logger. + panics map[string][]string + + // maxLines is the max number of lines we'll record after seeing a + // panic header. Since this is going to be printed in the UI output, we + // don't want to destroy the scrollback. In most cases, the first few lines + // of the stack trace is all that are required. + maxLines int +} + +// registerPlugin returns an accumulator function which will accept lines of +// a panic stack trace to collect into an error when requested. +func (p *panicRecorder) registerPlugin(name string) func(string) { + p.Lock() + defer p.Unlock() + + // In most cases we shouldn't be starting a plugin if it already + // panicked, but clear out previous entries just in case. + delete(p.panics, name) + + count := 0 + + // this callback is used by the logger to store panic output + return func(line string) { + p.Lock() + defer p.Unlock() + + // stop recording if there are too many lines. + if count > p.maxLines { + return + } + count++ + + p.panics[name] = append(p.panics[name], line) + } +} + +func (p *panicRecorder) allPanics() []string { + p.Lock() + defer p.Unlock() + + var res []string + for name, lines := range p.panics { + if len(lines) == 0 { + continue + } + + res = append(res, fmt.Sprintf(pluginPanicOutput, name, strings.Join(lines, "\n"))) + } + return res +} + +// logPanicWrapper wraps an hclog.Logger and intercepts and records any output +// that appears to be a panic. +type logPanicWrapper struct { + hclog.Logger + panicRecorder func(string) + inPanic bool +} + +// go-plugin will create a new named logger for each plugin binary. +func (l *logPanicWrapper) Named(name string) hclog.Logger { + return &logPanicWrapper{ + Logger: l.Logger.Named(name), + panicRecorder: panics.registerPlugin(name), + } +} + +// we only need to implement Debug, since that is the default output level used +// by go-plugin when encountering unstructured output on stderr. +func (l *logPanicWrapper) Debug(msg string, args ...interface{}) { + // We don't have access to the binary itself, so guess based on the stderr + // output if this is the start of the traceback. An occasional false + // positive shouldn't be a big deal, since this is only retrieved after an + // error of some sort. + l.inPanic = l.inPanic || strings.HasPrefix(msg, "panic: ") || strings.HasPrefix(msg, "fatal error: ") + + if l.inPanic && l.panicRecorder != nil { + l.panicRecorder(msg) + } + + l.Logger.Debug(msg, args...) +} diff --git a/internal/logging/panic_test.go b/internal/logging/panic_test.go new file mode 100644 index 000000000..e83a0ba5a --- /dev/null +++ b/internal/logging/panic_test.go @@ -0,0 +1,51 @@ +package logging + +import ( + "fmt" + "strings" + "testing" +) + +func TestPanicRecorder(t *testing.T) { + rec := panics.registerPlugin("test") + + output := []string{ + "panic: test", + " stack info", + } + + for _, line := range output { + rec(line) + } + + expected := fmt.Sprintf(pluginPanicOutput, "test", strings.Join(output, "\n")) + + res := PluginPanics() + if len(res) == 0 { + t.Fatal("no output") + } + + if res[0] != expected { + t.Fatalf("expected: %q\ngot: %q", expected, res[0]) + } +} + +func TestPanicLimit(t *testing.T) { + rec := panics.registerPlugin("test") + + rec("panic: test") + + for i := 0; i < 200; i++ { + rec(fmt.Sprintf("LINE: %d", i)) + } + + res := PluginPanics() + // take the extra content into account + max := strings.Count(pluginPanicOutput, "\n") + panics.maxLines + for _, out := range res { + found := strings.Count(out, "\n") + if found > max { + t.Fatalf("expected no more than %d lines, got: %d", max, found) + } + } +} diff --git a/main.go b/main.go index 0a40ae0f1..183698408 100644 --- a/main.go +++ b/main.go @@ -288,6 +288,16 @@ func wrappedMain() int { return 1 } + // if we are exiting with a non-zero code, check if it was caused by any + // plugins crashing + if exitCode != 0 { + for _, panicLog := range logging.PluginPanics() { + // we don't write this to Error, or else panicwrap will think this + // process panicked + Ui.Info(panicLog) + } + } + return exitCode }