Merge pull request #12684 from hashicorp/jbardin/consul-tests
Include a consul server for tests
This commit is contained in:
commit
f9190f37ae
13
.travis.yml
13
.travis.yml
|
@ -1,7 +1,20 @@
|
|||
dist: trusty
|
||||
sudo: false
|
||||
language: go
|
||||
go:
|
||||
- 1.8
|
||||
|
||||
env:
|
||||
- CONSUL_VERSION=0.7.5 TF_CONSUL_TEST=1
|
||||
|
||||
# Fetch consul for the backend and provider tests
|
||||
before_install:
|
||||
- curl -sLo consul.zip https://releases.hashicorp.com/consul/${CONSUL_VERSION}/consul_${CONSUL_VERSION}_linux_amd64.zip
|
||||
- unzip consul.zip
|
||||
- mkdir ~/bin
|
||||
- mv consul ~/bin
|
||||
- export PATH="~/bin:$PATH"
|
||||
|
||||
install:
|
||||
# This script is used by the Travis build to install a cookie for
|
||||
# go.googlesource.com so rate limits are higher when using `go get` to fetch
|
||||
|
|
|
@ -2,10 +2,12 @@ package consul
|
|||
|
||||
import (
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/consul/testutil"
|
||||
"github.com/hashicorp/terraform/backend"
|
||||
)
|
||||
|
||||
|
@ -13,16 +15,32 @@ func TestBackend_impl(t *testing.T) {
|
|||
var _ backend.Backend = new(Backend)
|
||||
}
|
||||
|
||||
func TestBackend(t *testing.T) {
|
||||
addr := os.Getenv("CONSUL_HTTP_ADDR")
|
||||
if addr == "" {
|
||||
t.Log("consul tests require CONSUL_HTTP_ADDR")
|
||||
func newConsulTestServer(t *testing.T) *testutil.TestServer {
|
||||
skip := os.Getenv("TF_ACC") == "" && os.Getenv("TF_CONSUL_TEST") == ""
|
||||
if skip {
|
||||
t.Log("consul server tests require setting TF_ACC or TF_CONSUL_TEST")
|
||||
t.Skip()
|
||||
}
|
||||
|
||||
srv := testutil.NewTestServerConfig(t, func(c *testutil.TestServerConfig) {
|
||||
c.LogLevel = "warn"
|
||||
|
||||
if !testing.Verbose() {
|
||||
c.Stdout = ioutil.Discard
|
||||
c.Stderr = ioutil.Discard
|
||||
}
|
||||
})
|
||||
|
||||
return srv
|
||||
}
|
||||
|
||||
func TestBackend(t *testing.T) {
|
||||
srv := newConsulTestServer(t)
|
||||
defer srv.Stop()
|
||||
|
||||
// Get the backend
|
||||
b := backend.TestBackendConfig(t, New(), map[string]interface{}{
|
||||
"address": addr,
|
||||
"address": srv.HTTPAddr,
|
||||
"path": fmt.Sprintf("tf-unit/%s", time.Now().String()),
|
||||
})
|
||||
|
||||
|
|
|
@ -2,7 +2,6 @@ package consul
|
|||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
|
@ -16,15 +15,12 @@ func TestRemoteClient_impl(t *testing.T) {
|
|||
}
|
||||
|
||||
func TestRemoteClient(t *testing.T) {
|
||||
addr := os.Getenv("CONSUL_HTTP_ADDR")
|
||||
if addr == "" {
|
||||
t.Log("consul tests require CONSUL_HTTP_ADDR")
|
||||
t.Skip()
|
||||
}
|
||||
srv := newConsulTestServer(t)
|
||||
defer srv.Stop()
|
||||
|
||||
// Get the backend
|
||||
b := backend.TestBackendConfig(t, New(), map[string]interface{}{
|
||||
"address": addr,
|
||||
"address": srv.HTTPAddr,
|
||||
"path": fmt.Sprintf("tf-unit/%s", time.Now().String()),
|
||||
})
|
||||
|
||||
|
@ -39,17 +35,14 @@ func TestRemoteClient(t *testing.T) {
|
|||
}
|
||||
|
||||
func TestConsul_stateLock(t *testing.T) {
|
||||
addr := os.Getenv("CONSUL_HTTP_ADDR")
|
||||
if addr == "" {
|
||||
t.Log("consul lock tests require CONSUL_HTTP_ADDR")
|
||||
t.Skip()
|
||||
}
|
||||
srv := newConsulTestServer(t)
|
||||
defer srv.Stop()
|
||||
|
||||
path := fmt.Sprintf("tf-unit/%s", time.Now().String())
|
||||
|
||||
// create 2 instances to get 2 remote.Clients
|
||||
sA, err := backend.TestBackendConfig(t, New(), map[string]interface{}{
|
||||
"address": addr,
|
||||
"address": srv.HTTPAddr,
|
||||
"path": path,
|
||||
}).State(backend.DefaultStateName)
|
||||
if err != nil {
|
||||
|
@ -57,7 +50,7 @@ func TestConsul_stateLock(t *testing.T) {
|
|||
}
|
||||
|
||||
sB, err := backend.TestBackendConfig(t, New(), map[string]interface{}{
|
||||
"address": addr,
|
||||
"address": srv.HTTPAddr,
|
||||
"path": path,
|
||||
}).State(backend.DefaultStateName)
|
||||
if err != nil {
|
||||
|
|
|
@ -10,7 +10,6 @@ import (
|
|||
|
||||
func TestAccDataConsulAgentSelf_basic(t *testing.T) {
|
||||
resource.Test(t, resource.TestCase{
|
||||
PreCheck: func() { testAccPreCheck(t) },
|
||||
Providers: testAccProviders,
|
||||
Steps: []resource.TestStep{
|
||||
resource.TestStep{
|
||||
|
@ -25,7 +24,8 @@ func TestAccDataConsulAgentSelf_basic(t *testing.T) {
|
|||
testAccCheckDataSourceValue("data.consul_agent_self.read", "advertise_addr", "<any>"),
|
||||
testAccCheckDataSourceValue("data.consul_agent_self.read", "bind_addr", "<any>"),
|
||||
testAccCheckDataSourceValue("data.consul_agent_self.read", "bootstrap_expect", "<all>"),
|
||||
testAccCheckDataSourceValue("data.consul_agent_self.read", "bootstrap_mode", "false"),
|
||||
// the local test server is bootstrapped
|
||||
testAccCheckDataSourceValue("data.consul_agent_self.read", "bootstrap_mode", "true"),
|
||||
testAccCheckDataSourceValue("data.consul_agent_self.read", "client_addr", "<any>"),
|
||||
testAccCheckDataSourceValue("data.consul_agent_self.read", "datacenter", "<any>"),
|
||||
testAccCheckDataSourceValue("data.consul_agent_self.read", "dev_mode", "<any>"),
|
||||
|
|
|
@ -8,7 +8,6 @@ import (
|
|||
|
||||
func TestAccDataConsulCatalogNodes_basic(t *testing.T) {
|
||||
resource.Test(t, resource.TestCase{
|
||||
PreCheck: func() { testAccPreCheck(t) },
|
||||
Providers: testAccProviders,
|
||||
Steps: []resource.TestStep{
|
||||
resource.TestStep{
|
||||
|
|
|
@ -8,7 +8,6 @@ import (
|
|||
|
||||
func TestAccDataConsulCatalogService_basic(t *testing.T) {
|
||||
resource.Test(t, resource.TestCase{
|
||||
PreCheck: func() { testAccPreCheck(t) },
|
||||
Providers: testAccProviders,
|
||||
Steps: []resource.TestStep{
|
||||
resource.TestStep{
|
||||
|
|
|
@ -8,7 +8,6 @@ import (
|
|||
|
||||
func TestAccDataConsulCatalogServices_basic(t *testing.T) {
|
||||
resource.Test(t, resource.TestCase{
|
||||
PreCheck: func() { testAccPreCheck(t) },
|
||||
Providers: testAccProviders,
|
||||
Steps: []resource.TestStep{
|
||||
resource.TestStep{
|
||||
|
|
|
@ -8,7 +8,6 @@ import (
|
|||
|
||||
func TestAccDataConsulKeys_basic(t *testing.T) {
|
||||
resource.Test(t, resource.TestCase{
|
||||
PreCheck: func() { testAccPreCheck(t) },
|
||||
Providers: testAccProviders,
|
||||
Steps: []resource.TestStep{
|
||||
resource.TestStep{
|
||||
|
|
|
@ -11,7 +11,6 @@ import (
|
|||
|
||||
func TestAccConsulKeyPrefix_basic(t *testing.T) {
|
||||
resource.Test(t, resource.TestCase{
|
||||
PreCheck: func() { testAccPreCheck(t) },
|
||||
Providers: testAccProviders,
|
||||
CheckDestroy: resource.ComposeTestCheckFunc(
|
||||
testAccCheckConsulKeyPrefixKeyAbsent("species"),
|
||||
|
|
|
@ -11,7 +11,6 @@ import (
|
|||
|
||||
func TestAccConsulKeys_basic(t *testing.T) {
|
||||
resource.Test(t, resource.TestCase{
|
||||
PreCheck: func() { testAccPreCheck(t) },
|
||||
Providers: testAccProviders,
|
||||
CheckDestroy: testAccCheckConsulKeysDestroy,
|
||||
Steps: []resource.TestStep{
|
||||
|
|
|
@ -11,7 +11,6 @@ import (
|
|||
|
||||
func TestAccConsulPreparedQuery_basic(t *testing.T) {
|
||||
resource.Test(t, resource.TestCase{
|
||||
PreCheck: func() { testAccPreCheck(t) },
|
||||
Providers: testAccProviders,
|
||||
CheckDestroy: testAccCheckConsulPreparedQueryDestroy,
|
||||
Steps: []resource.TestStep{
|
||||
|
|
|
@ -1,24 +1,64 @@
|
|||
package consul
|
||||
|
||||
import (
|
||||
"io/ioutil"
|
||||
"log"
|
||||
"os"
|
||||
"testing"
|
||||
|
||||
"github.com/hashicorp/consul/testutil"
|
||||
"github.com/hashicorp/terraform/config"
|
||||
"github.com/hashicorp/terraform/helper/schema"
|
||||
"github.com/hashicorp/terraform/terraform"
|
||||
"github.com/mitchellh/mapstructure"
|
||||
)
|
||||
|
||||
var testAccProviders map[string]terraform.ResourceProvider
|
||||
var testAccProvider *schema.Provider
|
||||
var testConsulHTTPAddr string
|
||||
|
||||
func init() {
|
||||
testAccProvider = Provider().(*schema.Provider)
|
||||
testAccProvider.ConfigureFunc = testProviderConfigure
|
||||
|
||||
testAccProviders = map[string]terraform.ResourceProvider{
|
||||
"consul": testAccProvider,
|
||||
}
|
||||
}
|
||||
|
||||
// we need to overrride the configured address for the tests
|
||||
func testProviderConfigure(d *schema.ResourceData) (interface{}, error) {
|
||||
var config Config
|
||||
configRaw := d.Get("").(map[string]interface{})
|
||||
if err := mapstructure.Decode(configRaw, &config); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
config.Address = testConsulHTTPAddr
|
||||
|
||||
log.Printf("[INFO] Initializing Consul test client")
|
||||
return config.Client()
|
||||
}
|
||||
|
||||
func TestMain(m *testing.M) {
|
||||
t := struct {
|
||||
testutil.TestingT
|
||||
}{}
|
||||
|
||||
// start and stop the test consul server once for all tests
|
||||
srv := testutil.NewTestServerConfig(t, func(c *testutil.TestServerConfig) {
|
||||
c.LogLevel = "warn"
|
||||
c.Stdout = ioutil.Discard
|
||||
c.Stderr = ioutil.Discard
|
||||
})
|
||||
|
||||
testConsulHTTPAddr = srv.HTTPAddr
|
||||
|
||||
ret := m.Run()
|
||||
|
||||
srv.Stop()
|
||||
os.Exit(ret)
|
||||
}
|
||||
|
||||
func TestResourceProvider(t *testing.T) {
|
||||
if err := Provider().(*schema.Provider).InternalValidate(); err != nil {
|
||||
t.Fatalf("err: %s", err)
|
||||
|
@ -32,8 +72,9 @@ func TestResourceProvider_impl(t *testing.T) {
|
|||
func TestResourceProvider_Configure(t *testing.T) {
|
||||
rp := Provider()
|
||||
|
||||
// these configuration tests don't require an running server
|
||||
raw := map[string]interface{}{
|
||||
"address": "demo.consul.io:80",
|
||||
"address": "example.com:8500",
|
||||
"datacenter": "nyc3",
|
||||
"scheme": "https",
|
||||
}
|
||||
|
@ -53,7 +94,7 @@ func TestResourceProvider_ConfigureTLS(t *testing.T) {
|
|||
rp := Provider()
|
||||
|
||||
raw := map[string]interface{}{
|
||||
"address": "demo.consul.io:80",
|
||||
"address": "example.com:8943",
|
||||
"ca_file": "test-fixtures/cacert.pem",
|
||||
"cert_file": "test-fixtures/usercert.pem",
|
||||
"datacenter": "nyc3",
|
||||
|
@ -71,13 +112,3 @@ func TestResourceProvider_ConfigureTLS(t *testing.T) {
|
|||
t.Fatalf("err: %s", err)
|
||||
}
|
||||
}
|
||||
|
||||
func testAccPreCheck(t *testing.T) {
|
||||
if v := os.Getenv("CONSUL_HTTP_ADDR"); v != "" {
|
||||
return
|
||||
}
|
||||
if v := os.Getenv("CONSUL_ADDRESS"); v != "" {
|
||||
return
|
||||
}
|
||||
t.Fatal("Either CONSUL_ADDRESS or CONSUL_HTTP_ADDR must be set for acceptance tests")
|
||||
}
|
||||
|
|
|
@ -0,0 +1,74 @@
|
|||
go-metrics
|
||||
==========
|
||||
|
||||
This library provides a `metrics` package which can be used to instrument code,
|
||||
expose application metrics, and profile runtime performance in a flexible manner.
|
||||
|
||||
Current API: [![GoDoc](https://godoc.org/github.com/armon/go-metrics?status.svg)](https://godoc.org/github.com/armon/go-metrics)
|
||||
|
||||
Sinks
|
||||
=====
|
||||
|
||||
The `metrics` package makes use of a `MetricSink` interface to support delivery
|
||||
to any type of backend. Currently the following sinks are provided:
|
||||
|
||||
* StatsiteSink : Sinks to a [statsite](https://github.com/armon/statsite/) instance (TCP)
|
||||
* StatsdSink: Sinks to a [StatsD](https://github.com/etsy/statsd/) / statsite instance (UDP)
|
||||
* PrometheusSink: Sinks to a [Prometheus](http://prometheus.io/) metrics endpoint (exposed via HTTP for scrapes)
|
||||
* InmemSink : Provides in-memory aggregation, can be used to export stats
|
||||
* FanoutSink : Sinks to multiple sinks. Enables writing to multiple statsite instances for example.
|
||||
* BlackholeSink : Sinks to nowhere
|
||||
|
||||
In addition to the sinks, the `InmemSignal` can be used to catch a signal,
|
||||
and dump a formatted output of recent metrics. For example, when a process gets
|
||||
a SIGUSR1, it can dump to stderr recent performance metrics for debugging.
|
||||
|
||||
Examples
|
||||
========
|
||||
|
||||
Here is an example of using the package:
|
||||
|
||||
```go
|
||||
func SlowMethod() {
|
||||
// Profiling the runtime of a method
|
||||
defer metrics.MeasureSince([]string{"SlowMethod"}, time.Now())
|
||||
}
|
||||
|
||||
// Configure a statsite sink as the global metrics sink
|
||||
sink, _ := metrics.NewStatsiteSink("statsite:8125")
|
||||
metrics.NewGlobal(metrics.DefaultConfig("service-name"), sink)
|
||||
|
||||
// Emit a Key/Value pair
|
||||
metrics.EmitKey([]string{"questions", "meaning of life"}, 42)
|
||||
```
|
||||
|
||||
Here is an example of setting up a signal handler:
|
||||
|
||||
```go
|
||||
// Setup the inmem sink and signal handler
|
||||
inm := metrics.NewInmemSink(10*time.Second, time.Minute)
|
||||
sig := metrics.DefaultInmemSignal(inm)
|
||||
metrics.NewGlobal(metrics.DefaultConfig("service-name"), inm)
|
||||
|
||||
// Run some code
|
||||
inm.SetGauge([]string{"foo"}, 42)
|
||||
inm.EmitKey([]string{"bar"}, 30)
|
||||
|
||||
inm.IncrCounter([]string{"baz"}, 42)
|
||||
inm.IncrCounter([]string{"baz"}, 1)
|
||||
inm.IncrCounter([]string{"baz"}, 80)
|
||||
|
||||
inm.AddSample([]string{"method", "wow"}, 42)
|
||||
inm.AddSample([]string{"method", "wow"}, 100)
|
||||
inm.AddSample([]string{"method", "wow"}, 22)
|
||||
|
||||
....
|
||||
```
|
||||
|
||||
When a signal comes in, output like the following will be dumped to stderr:
|
||||
|
||||
[2014-01-28 14:57:33.04 -0800 PST][G] 'foo': 42.000
|
||||
[2014-01-28 14:57:33.04 -0800 PST][P] 'bar': 30.000
|
||||
[2014-01-28 14:57:33.04 -0800 PST][C] 'baz': Count: 3 Min: 1.000 Mean: 41.000 Max: 80.000 Stddev: 39.509
|
||||
[2014-01-28 14:57:33.04 -0800 PST][S] 'method.wow': Count: 3 Min: 22.000 Mean: 54.667 Max: 100.000 Stddev: 40.513
|
||||
|
|
@ -0,0 +1,12 @@
|
|||
// +build !windows
|
||||
|
||||
package metrics
|
||||
|
||||
import (
|
||||
"syscall"
|
||||
)
|
||||
|
||||
const (
|
||||
// DefaultSignal is used with DefaultInmemSignal
|
||||
DefaultSignal = syscall.SIGUSR1
|
||||
)
|
|
@ -0,0 +1,13 @@
|
|||
// +build windows
|
||||
|
||||
package metrics
|
||||
|
||||
import (
|
||||
"syscall"
|
||||
)
|
||||
|
||||
const (
|
||||
// DefaultSignal is used with DefaultInmemSignal
|
||||
// Windows has no SIGUSR1, use SIGBREAK
|
||||
DefaultSignal = syscall.Signal(21)
|
||||
)
|
|
@ -0,0 +1,247 @@
|
|||
package metrics
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"math"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
// InmemSink provides a MetricSink that does in-memory aggregation
|
||||
// without sending metrics over a network. It can be embedded within
|
||||
// an application to provide profiling information.
|
||||
type InmemSink struct {
|
||||
// How long is each aggregation interval
|
||||
interval time.Duration
|
||||
|
||||
// Retain controls how many metrics interval we keep
|
||||
retain time.Duration
|
||||
|
||||
// maxIntervals is the maximum length of intervals.
|
||||
// It is retain / interval.
|
||||
maxIntervals int
|
||||
|
||||
// intervals is a slice of the retained intervals
|
||||
intervals []*IntervalMetrics
|
||||
intervalLock sync.RWMutex
|
||||
|
||||
rateDenom float64
|
||||
}
|
||||
|
||||
// IntervalMetrics stores the aggregated metrics
|
||||
// for a specific interval
|
||||
type IntervalMetrics struct {
|
||||
sync.RWMutex
|
||||
|
||||
// The start time of the interval
|
||||
Interval time.Time
|
||||
|
||||
// Gauges maps the key to the last set value
|
||||
Gauges map[string]float32
|
||||
|
||||
// Points maps the string to the list of emitted values
|
||||
// from EmitKey
|
||||
Points map[string][]float32
|
||||
|
||||
// Counters maps the string key to a sum of the counter
|
||||
// values
|
||||
Counters map[string]*AggregateSample
|
||||
|
||||
// Samples maps the key to an AggregateSample,
|
||||
// which has the rolled up view of a sample
|
||||
Samples map[string]*AggregateSample
|
||||
}
|
||||
|
||||
// NewIntervalMetrics creates a new IntervalMetrics for a given interval
|
||||
func NewIntervalMetrics(intv time.Time) *IntervalMetrics {
|
||||
return &IntervalMetrics{
|
||||
Interval: intv,
|
||||
Gauges: make(map[string]float32),
|
||||
Points: make(map[string][]float32),
|
||||
Counters: make(map[string]*AggregateSample),
|
||||
Samples: make(map[string]*AggregateSample),
|
||||
}
|
||||
}
|
||||
|
||||
// AggregateSample is used to hold aggregate metrics
|
||||
// about a sample
|
||||
type AggregateSample struct {
|
||||
Count int // The count of emitted pairs
|
||||
Rate float64 // The count of emitted pairs per time unit (usually 1 second)
|
||||
Sum float64 // The sum of values
|
||||
SumSq float64 // The sum of squared values
|
||||
Min float64 // Minimum value
|
||||
Max float64 // Maximum value
|
||||
LastUpdated time.Time // When value was last updated
|
||||
}
|
||||
|
||||
// Computes a Stddev of the values
|
||||
func (a *AggregateSample) Stddev() float64 {
|
||||
num := (float64(a.Count) * a.SumSq) - math.Pow(a.Sum, 2)
|
||||
div := float64(a.Count * (a.Count - 1))
|
||||
if div == 0 {
|
||||
return 0
|
||||
}
|
||||
return math.Sqrt(num / div)
|
||||
}
|
||||
|
||||
// Computes a mean of the values
|
||||
func (a *AggregateSample) Mean() float64 {
|
||||
if a.Count == 0 {
|
||||
return 0
|
||||
}
|
||||
return a.Sum / float64(a.Count)
|
||||
}
|
||||
|
||||
// Ingest is used to update a sample
|
||||
func (a *AggregateSample) Ingest(v float64, rateDenom float64) {
|
||||
a.Count++
|
||||
a.Sum += v
|
||||
a.SumSq += (v * v)
|
||||
if v < a.Min || a.Count == 1 {
|
||||
a.Min = v
|
||||
}
|
||||
if v > a.Max || a.Count == 1 {
|
||||
a.Max = v
|
||||
}
|
||||
a.Rate = float64(a.Count)/rateDenom
|
||||
a.LastUpdated = time.Now()
|
||||
}
|
||||
|
||||
func (a *AggregateSample) String() string {
|
||||
if a.Count == 0 {
|
||||
return "Count: 0"
|
||||
} else if a.Stddev() == 0 {
|
||||
return fmt.Sprintf("Count: %d Sum: %0.3f LastUpdated: %s", a.Count, a.Sum, a.LastUpdated)
|
||||
} else {
|
||||
return fmt.Sprintf("Count: %d Min: %0.3f Mean: %0.3f Max: %0.3f Stddev: %0.3f Sum: %0.3f LastUpdated: %s",
|
||||
a.Count, a.Min, a.Mean(), a.Max, a.Stddev(), a.Sum, a.LastUpdated)
|
||||
}
|
||||
}
|
||||
|
||||
// NewInmemSink is used to construct a new in-memory sink.
|
||||
// Uses an aggregation interval and maximum retention period.
|
||||
func NewInmemSink(interval, retain time.Duration) *InmemSink {
|
||||
rateTimeUnit := time.Second
|
||||
i := &InmemSink{
|
||||
interval: interval,
|
||||
retain: retain,
|
||||
maxIntervals: int(retain / interval),
|
||||
rateDenom: float64(interval.Nanoseconds()) / float64(rateTimeUnit.Nanoseconds()),
|
||||
}
|
||||
i.intervals = make([]*IntervalMetrics, 0, i.maxIntervals)
|
||||
return i
|
||||
}
|
||||
|
||||
func (i *InmemSink) SetGauge(key []string, val float32) {
|
||||
k := i.flattenKey(key)
|
||||
intv := i.getInterval()
|
||||
|
||||
intv.Lock()
|
||||
defer intv.Unlock()
|
||||
intv.Gauges[k] = val
|
||||
}
|
||||
|
||||
func (i *InmemSink) EmitKey(key []string, val float32) {
|
||||
k := i.flattenKey(key)
|
||||
intv := i.getInterval()
|
||||
|
||||
intv.Lock()
|
||||
defer intv.Unlock()
|
||||
vals := intv.Points[k]
|
||||
intv.Points[k] = append(vals, val)
|
||||
}
|
||||
|
||||
func (i *InmemSink) IncrCounter(key []string, val float32) {
|
||||
k := i.flattenKey(key)
|
||||
intv := i.getInterval()
|
||||
|
||||
intv.Lock()
|
||||
defer intv.Unlock()
|
||||
|
||||
agg := intv.Counters[k]
|
||||
if agg == nil {
|
||||
agg = &AggregateSample{}
|
||||
intv.Counters[k] = agg
|
||||
}
|
||||
agg.Ingest(float64(val), i.rateDenom)
|
||||
}
|
||||
|
||||
func (i *InmemSink) AddSample(key []string, val float32) {
|
||||
k := i.flattenKey(key)
|
||||
intv := i.getInterval()
|
||||
|
||||
intv.Lock()
|
||||
defer intv.Unlock()
|
||||
|
||||
agg := intv.Samples[k]
|
||||
if agg == nil {
|
||||
agg = &AggregateSample{}
|
||||
intv.Samples[k] = agg
|
||||
}
|
||||
agg.Ingest(float64(val), i.rateDenom)
|
||||
}
|
||||
|
||||
// Data is used to retrieve all the aggregated metrics
|
||||
// Intervals may be in use, and a read lock should be acquired
|
||||
func (i *InmemSink) Data() []*IntervalMetrics {
|
||||
// Get the current interval, forces creation
|
||||
i.getInterval()
|
||||
|
||||
i.intervalLock.RLock()
|
||||
defer i.intervalLock.RUnlock()
|
||||
|
||||
intervals := make([]*IntervalMetrics, len(i.intervals))
|
||||
copy(intervals, i.intervals)
|
||||
return intervals
|
||||
}
|
||||
|
||||
func (i *InmemSink) getExistingInterval(intv time.Time) *IntervalMetrics {
|
||||
i.intervalLock.RLock()
|
||||
defer i.intervalLock.RUnlock()
|
||||
|
||||
n := len(i.intervals)
|
||||
if n > 0 && i.intervals[n-1].Interval == intv {
|
||||
return i.intervals[n-1]
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (i *InmemSink) createInterval(intv time.Time) *IntervalMetrics {
|
||||
i.intervalLock.Lock()
|
||||
defer i.intervalLock.Unlock()
|
||||
|
||||
// Check for an existing interval
|
||||
n := len(i.intervals)
|
||||
if n > 0 && i.intervals[n-1].Interval == intv {
|
||||
return i.intervals[n-1]
|
||||
}
|
||||
|
||||
// Add the current interval
|
||||
current := NewIntervalMetrics(intv)
|
||||
i.intervals = append(i.intervals, current)
|
||||
n++
|
||||
|
||||
// Truncate the intervals if they are too long
|
||||
if n >= i.maxIntervals {
|
||||
copy(i.intervals[0:], i.intervals[n-i.maxIntervals:])
|
||||
i.intervals = i.intervals[:i.maxIntervals]
|
||||
}
|
||||
return current
|
||||
}
|
||||
|
||||
// getInterval returns the current interval to write to
|
||||
func (i *InmemSink) getInterval() *IntervalMetrics {
|
||||
intv := time.Now().Truncate(i.interval)
|
||||
if m := i.getExistingInterval(intv); m != nil {
|
||||
return m
|
||||
}
|
||||
return i.createInterval(intv)
|
||||
}
|
||||
|
||||
// Flattens the key for formatting, removes spaces
|
||||
func (i *InmemSink) flattenKey(parts []string) string {
|
||||
joined := strings.Join(parts, ".")
|
||||
return strings.Replace(joined, " ", "_", -1)
|
||||
}
|
|
@ -0,0 +1,100 @@
|
|||
package metrics
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"os/signal"
|
||||
"sync"
|
||||
"syscall"
|
||||
)
|
||||
|
||||
// InmemSignal is used to listen for a given signal, and when received,
|
||||
// to dump the current metrics from the InmemSink to an io.Writer
|
||||
type InmemSignal struct {
|
||||
signal syscall.Signal
|
||||
inm *InmemSink
|
||||
w io.Writer
|
||||
sigCh chan os.Signal
|
||||
|
||||
stop bool
|
||||
stopCh chan struct{}
|
||||
stopLock sync.Mutex
|
||||
}
|
||||
|
||||
// NewInmemSignal creates a new InmemSignal which listens for a given signal,
|
||||
// and dumps the current metrics out to a writer
|
||||
func NewInmemSignal(inmem *InmemSink, sig syscall.Signal, w io.Writer) *InmemSignal {
|
||||
i := &InmemSignal{
|
||||
signal: sig,
|
||||
inm: inmem,
|
||||
w: w,
|
||||
sigCh: make(chan os.Signal, 1),
|
||||
stopCh: make(chan struct{}),
|
||||
}
|
||||
signal.Notify(i.sigCh, sig)
|
||||
go i.run()
|
||||
return i
|
||||
}
|
||||
|
||||
// DefaultInmemSignal returns a new InmemSignal that responds to SIGUSR1
|
||||
// and writes output to stderr. Windows uses SIGBREAK
|
||||
func DefaultInmemSignal(inmem *InmemSink) *InmemSignal {
|
||||
return NewInmemSignal(inmem, DefaultSignal, os.Stderr)
|
||||
}
|
||||
|
||||
// Stop is used to stop the InmemSignal from listening
|
||||
func (i *InmemSignal) Stop() {
|
||||
i.stopLock.Lock()
|
||||
defer i.stopLock.Unlock()
|
||||
|
||||
if i.stop {
|
||||
return
|
||||
}
|
||||
i.stop = true
|
||||
close(i.stopCh)
|
||||
signal.Stop(i.sigCh)
|
||||
}
|
||||
|
||||
// run is a long running routine that handles signals
|
||||
func (i *InmemSignal) run() {
|
||||
for {
|
||||
select {
|
||||
case <-i.sigCh:
|
||||
i.dumpStats()
|
||||
case <-i.stopCh:
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// dumpStats is used to dump the data to output writer
|
||||
func (i *InmemSignal) dumpStats() {
|
||||
buf := bytes.NewBuffer(nil)
|
||||
|
||||
data := i.inm.Data()
|
||||
// Skip the last period which is still being aggregated
|
||||
for i := 0; i < len(data)-1; i++ {
|
||||
intv := data[i]
|
||||
intv.RLock()
|
||||
for name, val := range intv.Gauges {
|
||||
fmt.Fprintf(buf, "[%v][G] '%s': %0.3f\n", intv.Interval, name, val)
|
||||
}
|
||||
for name, vals := range intv.Points {
|
||||
for _, val := range vals {
|
||||
fmt.Fprintf(buf, "[%v][P] '%s': %0.3f\n", intv.Interval, name, val)
|
||||
}
|
||||
}
|
||||
for name, agg := range intv.Counters {
|
||||
fmt.Fprintf(buf, "[%v][C] '%s': %s\n", intv.Interval, name, agg)
|
||||
}
|
||||
for name, agg := range intv.Samples {
|
||||
fmt.Fprintf(buf, "[%v][S] '%s': %s\n", intv.Interval, name, agg)
|
||||
}
|
||||
intv.RUnlock()
|
||||
}
|
||||
|
||||
// Write out the bytes
|
||||
i.w.Write(buf.Bytes())
|
||||
}
|
|
@ -0,0 +1,115 @@
|
|||
package metrics
|
||||
|
||||
import (
|
||||
"runtime"
|
||||
"time"
|
||||
)
|
||||
|
||||
func (m *Metrics) SetGauge(key []string, val float32) {
|
||||
if m.HostName != "" && m.EnableHostname {
|
||||
key = insert(0, m.HostName, key)
|
||||
}
|
||||
if m.EnableTypePrefix {
|
||||
key = insert(0, "gauge", key)
|
||||
}
|
||||
if m.ServiceName != "" {
|
||||
key = insert(0, m.ServiceName, key)
|
||||
}
|
||||
m.sink.SetGauge(key, val)
|
||||
}
|
||||
|
||||
func (m *Metrics) EmitKey(key []string, val float32) {
|
||||
if m.EnableTypePrefix {
|
||||
key = insert(0, "kv", key)
|
||||
}
|
||||
if m.ServiceName != "" {
|
||||
key = insert(0, m.ServiceName, key)
|
||||
}
|
||||
m.sink.EmitKey(key, val)
|
||||
}
|
||||
|
||||
func (m *Metrics) IncrCounter(key []string, val float32) {
|
||||
if m.EnableTypePrefix {
|
||||
key = insert(0, "counter", key)
|
||||
}
|
||||
if m.ServiceName != "" {
|
||||
key = insert(0, m.ServiceName, key)
|
||||
}
|
||||
m.sink.IncrCounter(key, val)
|
||||
}
|
||||
|
||||
func (m *Metrics) AddSample(key []string, val float32) {
|
||||
if m.EnableTypePrefix {
|
||||
key = insert(0, "sample", key)
|
||||
}
|
||||
if m.ServiceName != "" {
|
||||
key = insert(0, m.ServiceName, key)
|
||||
}
|
||||
m.sink.AddSample(key, val)
|
||||
}
|
||||
|
||||
func (m *Metrics) MeasureSince(key []string, start time.Time) {
|
||||
if m.EnableTypePrefix {
|
||||
key = insert(0, "timer", key)
|
||||
}
|
||||
if m.ServiceName != "" {
|
||||
key = insert(0, m.ServiceName, key)
|
||||
}
|
||||
now := time.Now()
|
||||
elapsed := now.Sub(start)
|
||||
msec := float32(elapsed.Nanoseconds()) / float32(m.TimerGranularity)
|
||||
m.sink.AddSample(key, msec)
|
||||
}
|
||||
|
||||
// Periodically collects runtime stats to publish
|
||||
func (m *Metrics) collectStats() {
|
||||
for {
|
||||
time.Sleep(m.ProfileInterval)
|
||||
m.emitRuntimeStats()
|
||||
}
|
||||
}
|
||||
|
||||
// Emits various runtime statsitics
|
||||
func (m *Metrics) emitRuntimeStats() {
|
||||
// Export number of Goroutines
|
||||
numRoutines := runtime.NumGoroutine()
|
||||
m.SetGauge([]string{"runtime", "num_goroutines"}, float32(numRoutines))
|
||||
|
||||
// Export memory stats
|
||||
var stats runtime.MemStats
|
||||
runtime.ReadMemStats(&stats)
|
||||
m.SetGauge([]string{"runtime", "alloc_bytes"}, float32(stats.Alloc))
|
||||
m.SetGauge([]string{"runtime", "sys_bytes"}, float32(stats.Sys))
|
||||
m.SetGauge([]string{"runtime", "malloc_count"}, float32(stats.Mallocs))
|
||||
m.SetGauge([]string{"runtime", "free_count"}, float32(stats.Frees))
|
||||
m.SetGauge([]string{"runtime", "heap_objects"}, float32(stats.HeapObjects))
|
||||
m.SetGauge([]string{"runtime", "total_gc_pause_ns"}, float32(stats.PauseTotalNs))
|
||||
m.SetGauge([]string{"runtime", "total_gc_runs"}, float32(stats.NumGC))
|
||||
|
||||
// Export info about the last few GC runs
|
||||
num := stats.NumGC
|
||||
|
||||
// Handle wrap around
|
||||
if num < m.lastNumGC {
|
||||
m.lastNumGC = 0
|
||||
}
|
||||
|
||||
// Ensure we don't scan more than 256
|
||||
if num-m.lastNumGC >= 256 {
|
||||
m.lastNumGC = num - 255
|
||||
}
|
||||
|
||||
for i := m.lastNumGC; i < num; i++ {
|
||||
pause := stats.PauseNs[i%256]
|
||||
m.AddSample([]string{"runtime", "gc_pause_ns"}, float32(pause))
|
||||
}
|
||||
m.lastNumGC = num
|
||||
}
|
||||
|
||||
// Inserts a string value at an index into the slice
|
||||
func insert(i int, v string, s []string) []string {
|
||||
s = append(s, "")
|
||||
copy(s[i+1:], s[i:])
|
||||
s[i] = v
|
||||
return s
|
||||
}
|
|
@ -0,0 +1,52 @@
|
|||
package metrics
|
||||
|
||||
// The MetricSink interface is used to transmit metrics information
|
||||
// to an external system
|
||||
type MetricSink interface {
|
||||
// A Gauge should retain the last value it is set to
|
||||
SetGauge(key []string, val float32)
|
||||
|
||||
// Should emit a Key/Value pair for each call
|
||||
EmitKey(key []string, val float32)
|
||||
|
||||
// Counters should accumulate values
|
||||
IncrCounter(key []string, val float32)
|
||||
|
||||
// Samples are for timing information, where quantiles are used
|
||||
AddSample(key []string, val float32)
|
||||
}
|
||||
|
||||
// BlackholeSink is used to just blackhole messages
|
||||
type BlackholeSink struct{}
|
||||
|
||||
func (*BlackholeSink) SetGauge(key []string, val float32) {}
|
||||
func (*BlackholeSink) EmitKey(key []string, val float32) {}
|
||||
func (*BlackholeSink) IncrCounter(key []string, val float32) {}
|
||||
func (*BlackholeSink) AddSample(key []string, val float32) {}
|
||||
|
||||
// FanoutSink is used to sink to fanout values to multiple sinks
|
||||
type FanoutSink []MetricSink
|
||||
|
||||
func (fh FanoutSink) SetGauge(key []string, val float32) {
|
||||
for _, s := range fh {
|
||||
s.SetGauge(key, val)
|
||||
}
|
||||
}
|
||||
|
||||
func (fh FanoutSink) EmitKey(key []string, val float32) {
|
||||
for _, s := range fh {
|
||||
s.EmitKey(key, val)
|
||||
}
|
||||
}
|
||||
|
||||
func (fh FanoutSink) IncrCounter(key []string, val float32) {
|
||||
for _, s := range fh {
|
||||
s.IncrCounter(key, val)
|
||||
}
|
||||
}
|
||||
|
||||
func (fh FanoutSink) AddSample(key []string, val float32) {
|
||||
for _, s := range fh {
|
||||
s.AddSample(key, val)
|
||||
}
|
||||
}
|
|
@ -0,0 +1,95 @@
|
|||
package metrics
|
||||
|
||||
import (
|
||||
"os"
|
||||
"time"
|
||||
)
|
||||
|
||||
// Config is used to configure metrics settings
|
||||
type Config struct {
|
||||
ServiceName string // Prefixed with keys to seperate services
|
||||
HostName string // Hostname to use. If not provided and EnableHostname, it will be os.Hostname
|
||||
EnableHostname bool // Enable prefixing gauge values with hostname
|
||||
EnableRuntimeMetrics bool // Enables profiling of runtime metrics (GC, Goroutines, Memory)
|
||||
EnableTypePrefix bool // Prefixes key with a type ("counter", "gauge", "timer")
|
||||
TimerGranularity time.Duration // Granularity of timers.
|
||||
ProfileInterval time.Duration // Interval to profile runtime metrics
|
||||
}
|
||||
|
||||
// Metrics represents an instance of a metrics sink that can
|
||||
// be used to emit
|
||||
type Metrics struct {
|
||||
Config
|
||||
lastNumGC uint32
|
||||
sink MetricSink
|
||||
}
|
||||
|
||||
// Shared global metrics instance
|
||||
var globalMetrics *Metrics
|
||||
|
||||
func init() {
|
||||
// Initialize to a blackhole sink to avoid errors
|
||||
globalMetrics = &Metrics{sink: &BlackholeSink{}}
|
||||
}
|
||||
|
||||
// DefaultConfig provides a sane default configuration
|
||||
func DefaultConfig(serviceName string) *Config {
|
||||
c := &Config{
|
||||
ServiceName: serviceName, // Use client provided service
|
||||
HostName: "",
|
||||
EnableHostname: true, // Enable hostname prefix
|
||||
EnableRuntimeMetrics: true, // Enable runtime profiling
|
||||
EnableTypePrefix: false, // Disable type prefix
|
||||
TimerGranularity: time.Millisecond, // Timers are in milliseconds
|
||||
ProfileInterval: time.Second, // Poll runtime every second
|
||||
}
|
||||
|
||||
// Try to get the hostname
|
||||
name, _ := os.Hostname()
|
||||
c.HostName = name
|
||||
return c
|
||||
}
|
||||
|
||||
// New is used to create a new instance of Metrics
|
||||
func New(conf *Config, sink MetricSink) (*Metrics, error) {
|
||||
met := &Metrics{}
|
||||
met.Config = *conf
|
||||
met.sink = sink
|
||||
|
||||
// Start the runtime collector
|
||||
if conf.EnableRuntimeMetrics {
|
||||
go met.collectStats()
|
||||
}
|
||||
return met, nil
|
||||
}
|
||||
|
||||
// NewGlobal is the same as New, but it assigns the metrics object to be
|
||||
// used globally as well as returning it.
|
||||
func NewGlobal(conf *Config, sink MetricSink) (*Metrics, error) {
|
||||
metrics, err := New(conf, sink)
|
||||
if err == nil {
|
||||
globalMetrics = metrics
|
||||
}
|
||||
return metrics, err
|
||||
}
|
||||
|
||||
// Proxy all the methods to the globalMetrics instance
|
||||
func SetGauge(key []string, val float32) {
|
||||
globalMetrics.SetGauge(key, val)
|
||||
}
|
||||
|
||||
func EmitKey(key []string, val float32) {
|
||||
globalMetrics.EmitKey(key, val)
|
||||
}
|
||||
|
||||
func IncrCounter(key []string, val float32) {
|
||||
globalMetrics.IncrCounter(key, val)
|
||||
}
|
||||
|
||||
func AddSample(key []string, val float32) {
|
||||
globalMetrics.AddSample(key, val)
|
||||
}
|
||||
|
||||
func MeasureSince(key []string, start time.Time) {
|
||||
globalMetrics.MeasureSince(key, start)
|
||||
}
|
|
@ -0,0 +1,154 @@
|
|||
package metrics
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"log"
|
||||
"net"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
const (
|
||||
// statsdMaxLen is the maximum size of a packet
|
||||
// to send to statsd
|
||||
statsdMaxLen = 1400
|
||||
)
|
||||
|
||||
// StatsdSink provides a MetricSink that can be used
|
||||
// with a statsite or statsd metrics server. It uses
|
||||
// only UDP packets, while StatsiteSink uses TCP.
|
||||
type StatsdSink struct {
|
||||
addr string
|
||||
metricQueue chan string
|
||||
}
|
||||
|
||||
// NewStatsdSink is used to create a new StatsdSink
|
||||
func NewStatsdSink(addr string) (*StatsdSink, error) {
|
||||
s := &StatsdSink{
|
||||
addr: addr,
|
||||
metricQueue: make(chan string, 4096),
|
||||
}
|
||||
go s.flushMetrics()
|
||||
return s, nil
|
||||
}
|
||||
|
||||
// Close is used to stop flushing to statsd
|
||||
func (s *StatsdSink) Shutdown() {
|
||||
close(s.metricQueue)
|
||||
}
|
||||
|
||||
func (s *StatsdSink) SetGauge(key []string, val float32) {
|
||||
flatKey := s.flattenKey(key)
|
||||
s.pushMetric(fmt.Sprintf("%s:%f|g\n", flatKey, val))
|
||||
}
|
||||
|
||||
func (s *StatsdSink) EmitKey(key []string, val float32) {
|
||||
flatKey := s.flattenKey(key)
|
||||
s.pushMetric(fmt.Sprintf("%s:%f|kv\n", flatKey, val))
|
||||
}
|
||||
|
||||
func (s *StatsdSink) IncrCounter(key []string, val float32) {
|
||||
flatKey := s.flattenKey(key)
|
||||
s.pushMetric(fmt.Sprintf("%s:%f|c\n", flatKey, val))
|
||||
}
|
||||
|
||||
func (s *StatsdSink) AddSample(key []string, val float32) {
|
||||
flatKey := s.flattenKey(key)
|
||||
s.pushMetric(fmt.Sprintf("%s:%f|ms\n", flatKey, val))
|
||||
}
|
||||
|
||||
// Flattens the key for formatting, removes spaces
|
||||
func (s *StatsdSink) flattenKey(parts []string) string {
|
||||
joined := strings.Join(parts, ".")
|
||||
return strings.Map(func(r rune) rune {
|
||||
switch r {
|
||||
case ':':
|
||||
fallthrough
|
||||
case ' ':
|
||||
return '_'
|
||||
default:
|
||||
return r
|
||||
}
|
||||
}, joined)
|
||||
}
|
||||
|
||||
// Does a non-blocking push to the metrics queue
|
||||
func (s *StatsdSink) pushMetric(m string) {
|
||||
select {
|
||||
case s.metricQueue <- m:
|
||||
default:
|
||||
}
|
||||
}
|
||||
|
||||
// Flushes metrics
|
||||
func (s *StatsdSink) flushMetrics() {
|
||||
var sock net.Conn
|
||||
var err error
|
||||
var wait <-chan time.Time
|
||||
ticker := time.NewTicker(flushInterval)
|
||||
defer ticker.Stop()
|
||||
|
||||
CONNECT:
|
||||
// Create a buffer
|
||||
buf := bytes.NewBuffer(nil)
|
||||
|
||||
// Attempt to connect
|
||||
sock, err = net.Dial("udp", s.addr)
|
||||
if err != nil {
|
||||
log.Printf("[ERR] Error connecting to statsd! Err: %s", err)
|
||||
goto WAIT
|
||||
}
|
||||
|
||||
for {
|
||||
select {
|
||||
case metric, ok := <-s.metricQueue:
|
||||
// Get a metric from the queue
|
||||
if !ok {
|
||||
goto QUIT
|
||||
}
|
||||
|
||||
// Check if this would overflow the packet size
|
||||
if len(metric)+buf.Len() > statsdMaxLen {
|
||||
_, err := sock.Write(buf.Bytes())
|
||||
buf.Reset()
|
||||
if err != nil {
|
||||
log.Printf("[ERR] Error writing to statsd! Err: %s", err)
|
||||
goto WAIT
|
||||
}
|
||||
}
|
||||
|
||||
// Append to the buffer
|
||||
buf.WriteString(metric)
|
||||
|
||||
case <-ticker.C:
|
||||
if buf.Len() == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
_, err := sock.Write(buf.Bytes())
|
||||
buf.Reset()
|
||||
if err != nil {
|
||||
log.Printf("[ERR] Error flushing to statsd! Err: %s", err)
|
||||
goto WAIT
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
WAIT:
|
||||
// Wait for a while
|
||||
wait = time.After(time.Duration(5) * time.Second)
|
||||
for {
|
||||
select {
|
||||
// Dequeue the messages to avoid backlog
|
||||
case _, ok := <-s.metricQueue:
|
||||
if !ok {
|
||||
goto QUIT
|
||||
}
|
||||
case <-wait:
|
||||
goto CONNECT
|
||||
}
|
||||
}
|
||||
QUIT:
|
||||
s.metricQueue = nil
|
||||
}
|
|
@ -0,0 +1,142 @@
|
|||
package metrics
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"log"
|
||||
"net"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
const (
|
||||
// We force flush the statsite metrics after this period of
|
||||
// inactivity. Prevents stats from getting stuck in a buffer
|
||||
// forever.
|
||||
flushInterval = 100 * time.Millisecond
|
||||
)
|
||||
|
||||
// StatsiteSink provides a MetricSink that can be used with a
|
||||
// statsite metrics server
|
||||
type StatsiteSink struct {
|
||||
addr string
|
||||
metricQueue chan string
|
||||
}
|
||||
|
||||
// NewStatsiteSink is used to create a new StatsiteSink
|
||||
func NewStatsiteSink(addr string) (*StatsiteSink, error) {
|
||||
s := &StatsiteSink{
|
||||
addr: addr,
|
||||
metricQueue: make(chan string, 4096),
|
||||
}
|
||||
go s.flushMetrics()
|
||||
return s, nil
|
||||
}
|
||||
|
||||
// Close is used to stop flushing to statsite
|
||||
func (s *StatsiteSink) Shutdown() {
|
||||
close(s.metricQueue)
|
||||
}
|
||||
|
||||
func (s *StatsiteSink) SetGauge(key []string, val float32) {
|
||||
flatKey := s.flattenKey(key)
|
||||
s.pushMetric(fmt.Sprintf("%s:%f|g\n", flatKey, val))
|
||||
}
|
||||
|
||||
func (s *StatsiteSink) EmitKey(key []string, val float32) {
|
||||
flatKey := s.flattenKey(key)
|
||||
s.pushMetric(fmt.Sprintf("%s:%f|kv\n", flatKey, val))
|
||||
}
|
||||
|
||||
func (s *StatsiteSink) IncrCounter(key []string, val float32) {
|
||||
flatKey := s.flattenKey(key)
|
||||
s.pushMetric(fmt.Sprintf("%s:%f|c\n", flatKey, val))
|
||||
}
|
||||
|
||||
func (s *StatsiteSink) AddSample(key []string, val float32) {
|
||||
flatKey := s.flattenKey(key)
|
||||
s.pushMetric(fmt.Sprintf("%s:%f|ms\n", flatKey, val))
|
||||
}
|
||||
|
||||
// Flattens the key for formatting, removes spaces
|
||||
func (s *StatsiteSink) flattenKey(parts []string) string {
|
||||
joined := strings.Join(parts, ".")
|
||||
return strings.Map(func(r rune) rune {
|
||||
switch r {
|
||||
case ':':
|
||||
fallthrough
|
||||
case ' ':
|
||||
return '_'
|
||||
default:
|
||||
return r
|
||||
}
|
||||
}, joined)
|
||||
}
|
||||
|
||||
// Does a non-blocking push to the metrics queue
|
||||
func (s *StatsiteSink) pushMetric(m string) {
|
||||
select {
|
||||
case s.metricQueue <- m:
|
||||
default:
|
||||
}
|
||||
}
|
||||
|
||||
// Flushes metrics
|
||||
func (s *StatsiteSink) flushMetrics() {
|
||||
var sock net.Conn
|
||||
var err error
|
||||
var wait <-chan time.Time
|
||||
var buffered *bufio.Writer
|
||||
ticker := time.NewTicker(flushInterval)
|
||||
defer ticker.Stop()
|
||||
|
||||
CONNECT:
|
||||
// Attempt to connect
|
||||
sock, err = net.Dial("tcp", s.addr)
|
||||
if err != nil {
|
||||
log.Printf("[ERR] Error connecting to statsite! Err: %s", err)
|
||||
goto WAIT
|
||||
}
|
||||
|
||||
// Create a buffered writer
|
||||
buffered = bufio.NewWriter(sock)
|
||||
|
||||
for {
|
||||
select {
|
||||
case metric, ok := <-s.metricQueue:
|
||||
// Get a metric from the queue
|
||||
if !ok {
|
||||
goto QUIT
|
||||
}
|
||||
|
||||
// Try to send to statsite
|
||||
_, err := buffered.Write([]byte(metric))
|
||||
if err != nil {
|
||||
log.Printf("[ERR] Error writing to statsite! Err: %s", err)
|
||||
goto WAIT
|
||||
}
|
||||
case <-ticker.C:
|
||||
if err := buffered.Flush(); err != nil {
|
||||
log.Printf("[ERR] Error flushing to statsite! Err: %s", err)
|
||||
goto WAIT
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
WAIT:
|
||||
// Wait for a while
|
||||
wait = time.After(time.Duration(5) * time.Second)
|
||||
for {
|
||||
select {
|
||||
// Dequeue the messages to avoid backlog
|
||||
case _, ok := <-s.metricQueue:
|
||||
if !ok {
|
||||
goto QUIT
|
||||
}
|
||||
case <-wait:
|
||||
goto CONNECT
|
||||
}
|
||||
}
|
||||
QUIT:
|
||||
s.metricQueue = nil
|
||||
}
|
|
@ -0,0 +1,672 @@
|
|||
package acl
|
||||
|
||||
import (
|
||||
"github.com/armon/go-radix"
|
||||
)
|
||||
|
||||
var (
|
||||
// allowAll is a singleton policy which allows all
|
||||
// non-management actions
|
||||
allowAll ACL
|
||||
|
||||
// denyAll is a singleton policy which denies all actions
|
||||
denyAll ACL
|
||||
|
||||
// manageAll is a singleton policy which allows all
|
||||
// actions, including management
|
||||
manageAll ACL
|
||||
)
|
||||
|
||||
func init() {
|
||||
// Setup the singletons
|
||||
allowAll = &StaticACL{
|
||||
allowManage: false,
|
||||
defaultAllow: true,
|
||||
}
|
||||
denyAll = &StaticACL{
|
||||
allowManage: false,
|
||||
defaultAllow: false,
|
||||
}
|
||||
manageAll = &StaticACL{
|
||||
allowManage: true,
|
||||
defaultAllow: true,
|
||||
}
|
||||
}
|
||||
|
||||
// ACL is the interface for policy enforcement.
|
||||
type ACL interface {
|
||||
// ACLList checks for permission to list all the ACLs
|
||||
ACLList() bool
|
||||
|
||||
// ACLModify checks for permission to manipulate ACLs
|
||||
ACLModify() bool
|
||||
|
||||
// AgentRead checks for permission to read from agent endpoints for a
|
||||
// given node.
|
||||
AgentRead(string) bool
|
||||
|
||||
// AgentWrite checks for permission to make changes via agent endpoints
|
||||
// for a given node.
|
||||
AgentWrite(string) bool
|
||||
|
||||
// EventRead determines if a specific event can be queried.
|
||||
EventRead(string) bool
|
||||
|
||||
// EventWrite determines if a specific event may be fired.
|
||||
EventWrite(string) bool
|
||||
|
||||
// KeyRead checks for permission to read a given key
|
||||
KeyRead(string) bool
|
||||
|
||||
// KeyWrite checks for permission to write a given key
|
||||
KeyWrite(string) bool
|
||||
|
||||
// KeyWritePrefix checks for permission to write to an
|
||||
// entire key prefix. This means there must be no sub-policies
|
||||
// that deny a write.
|
||||
KeyWritePrefix(string) bool
|
||||
|
||||
// KeyringRead determines if the encryption keyring used in
|
||||
// the gossip layer can be read.
|
||||
KeyringRead() bool
|
||||
|
||||
// KeyringWrite determines if the keyring can be manipulated
|
||||
KeyringWrite() bool
|
||||
|
||||
// NodeRead checks for permission to read (discover) a given node.
|
||||
NodeRead(string) bool
|
||||
|
||||
// NodeWrite checks for permission to create or update (register) a
|
||||
// given node.
|
||||
NodeWrite(string) bool
|
||||
|
||||
// OperatorRead determines if the read-only Consul operator functions
|
||||
// can be used.
|
||||
OperatorRead() bool
|
||||
|
||||
// OperatorWrite determines if the state-changing Consul operator
|
||||
// functions can be used.
|
||||
OperatorWrite() bool
|
||||
|
||||
// PrepardQueryRead determines if a specific prepared query can be read
|
||||
// to show its contents (this is not used for execution).
|
||||
PreparedQueryRead(string) bool
|
||||
|
||||
// PreparedQueryWrite determines if a specific prepared query can be
|
||||
// created, modified, or deleted.
|
||||
PreparedQueryWrite(string) bool
|
||||
|
||||
// ServiceRead checks for permission to read a given service
|
||||
ServiceRead(string) bool
|
||||
|
||||
// ServiceWrite checks for permission to create or update a given
|
||||
// service
|
||||
ServiceWrite(string) bool
|
||||
|
||||
// SessionRead checks for permission to read sessions for a given node.
|
||||
SessionRead(string) bool
|
||||
|
||||
// SessionWrite checks for permission to create sessions for a given
|
||||
// node.
|
||||
SessionWrite(string) bool
|
||||
|
||||
// Snapshot checks for permission to take and restore snapshots.
|
||||
Snapshot() bool
|
||||
}
|
||||
|
||||
// StaticACL is used to implement a base ACL policy. It either
|
||||
// allows or denies all requests. This can be used as a parent
|
||||
// ACL to act in a blacklist or whitelist mode.
|
||||
type StaticACL struct {
|
||||
allowManage bool
|
||||
defaultAllow bool
|
||||
}
|
||||
|
||||
func (s *StaticACL) ACLList() bool {
|
||||
return s.allowManage
|
||||
}
|
||||
|
||||
func (s *StaticACL) ACLModify() bool {
|
||||
return s.allowManage
|
||||
}
|
||||
|
||||
func (s *StaticACL) AgentRead(string) bool {
|
||||
return s.defaultAllow
|
||||
}
|
||||
|
||||
func (s *StaticACL) AgentWrite(string) bool {
|
||||
return s.defaultAllow
|
||||
}
|
||||
|
||||
func (s *StaticACL) EventRead(string) bool {
|
||||
return s.defaultAllow
|
||||
}
|
||||
|
||||
func (s *StaticACL) EventWrite(string) bool {
|
||||
return s.defaultAllow
|
||||
}
|
||||
|
||||
func (s *StaticACL) KeyRead(string) bool {
|
||||
return s.defaultAllow
|
||||
}
|
||||
|
||||
func (s *StaticACL) KeyWrite(string) bool {
|
||||
return s.defaultAllow
|
||||
}
|
||||
|
||||
func (s *StaticACL) KeyWritePrefix(string) bool {
|
||||
return s.defaultAllow
|
||||
}
|
||||
|
||||
func (s *StaticACL) KeyringRead() bool {
|
||||
return s.defaultAllow
|
||||
}
|
||||
|
||||
func (s *StaticACL) KeyringWrite() bool {
|
||||
return s.defaultAllow
|
||||
}
|
||||
|
||||
func (s *StaticACL) NodeRead(string) bool {
|
||||
return s.defaultAllow
|
||||
}
|
||||
|
||||
func (s *StaticACL) NodeWrite(string) bool {
|
||||
return s.defaultAllow
|
||||
}
|
||||
|
||||
func (s *StaticACL) OperatorRead() bool {
|
||||
return s.defaultAllow
|
||||
}
|
||||
|
||||
func (s *StaticACL) OperatorWrite() bool {
|
||||
return s.defaultAllow
|
||||
}
|
||||
|
||||
func (s *StaticACL) PreparedQueryRead(string) bool {
|
||||
return s.defaultAllow
|
||||
}
|
||||
|
||||
func (s *StaticACL) PreparedQueryWrite(string) bool {
|
||||
return s.defaultAllow
|
||||
}
|
||||
|
||||
func (s *StaticACL) ServiceRead(string) bool {
|
||||
return s.defaultAllow
|
||||
}
|
||||
|
||||
func (s *StaticACL) ServiceWrite(string) bool {
|
||||
return s.defaultAllow
|
||||
}
|
||||
|
||||
func (s *StaticACL) SessionRead(string) bool {
|
||||
return s.defaultAllow
|
||||
}
|
||||
|
||||
func (s *StaticACL) SessionWrite(string) bool {
|
||||
return s.defaultAllow
|
||||
}
|
||||
|
||||
func (s *StaticACL) Snapshot() bool {
|
||||
return s.allowManage
|
||||
}
|
||||
|
||||
// AllowAll returns an ACL rule that allows all operations
|
||||
func AllowAll() ACL {
|
||||
return allowAll
|
||||
}
|
||||
|
||||
// DenyAll returns an ACL rule that denies all operations
|
||||
func DenyAll() ACL {
|
||||
return denyAll
|
||||
}
|
||||
|
||||
// ManageAll returns an ACL rule that can manage all resources
|
||||
func ManageAll() ACL {
|
||||
return manageAll
|
||||
}
|
||||
|
||||
// RootACL returns a possible ACL if the ID matches a root policy
|
||||
func RootACL(id string) ACL {
|
||||
switch id {
|
||||
case "allow":
|
||||
return allowAll
|
||||
case "deny":
|
||||
return denyAll
|
||||
case "manage":
|
||||
return manageAll
|
||||
default:
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// PolicyACL is used to wrap a set of ACL policies to provide
|
||||
// the ACL interface.
|
||||
type PolicyACL struct {
|
||||
// parent is used to resolve policy if we have
|
||||
// no matching rule.
|
||||
parent ACL
|
||||
|
||||
// agentRules contains the agent policies
|
||||
agentRules *radix.Tree
|
||||
|
||||
// keyRules contains the key policies
|
||||
keyRules *radix.Tree
|
||||
|
||||
// nodeRules contains the node policies
|
||||
nodeRules *radix.Tree
|
||||
|
||||
// serviceRules contains the service policies
|
||||
serviceRules *radix.Tree
|
||||
|
||||
// sessionRules contains the session policies
|
||||
sessionRules *radix.Tree
|
||||
|
||||
// eventRules contains the user event policies
|
||||
eventRules *radix.Tree
|
||||
|
||||
// preparedQueryRules contains the prepared query policies
|
||||
preparedQueryRules *radix.Tree
|
||||
|
||||
// keyringRule contains the keyring policies. The keyring has
|
||||
// a very simple yes/no without prefix matching, so here we
|
||||
// don't need to use a radix tree.
|
||||
keyringRule string
|
||||
|
||||
// operatorRule contains the operator policies.
|
||||
operatorRule string
|
||||
}
|
||||
|
||||
// New is used to construct a policy based ACL from a set of policies
|
||||
// and a parent policy to resolve missing cases.
|
||||
func New(parent ACL, policy *Policy) (*PolicyACL, error) {
|
||||
p := &PolicyACL{
|
||||
parent: parent,
|
||||
agentRules: radix.New(),
|
||||
keyRules: radix.New(),
|
||||
nodeRules: radix.New(),
|
||||
serviceRules: radix.New(),
|
||||
sessionRules: radix.New(),
|
||||
eventRules: radix.New(),
|
||||
preparedQueryRules: radix.New(),
|
||||
}
|
||||
|
||||
// Load the agent policy
|
||||
for _, ap := range policy.Agents {
|
||||
p.agentRules.Insert(ap.Node, ap.Policy)
|
||||
}
|
||||
|
||||
// Load the key policy
|
||||
for _, kp := range policy.Keys {
|
||||
p.keyRules.Insert(kp.Prefix, kp.Policy)
|
||||
}
|
||||
|
||||
// Load the node policy
|
||||
for _, np := range policy.Nodes {
|
||||
p.nodeRules.Insert(np.Name, np.Policy)
|
||||
}
|
||||
|
||||
// Load the service policy
|
||||
for _, sp := range policy.Services {
|
||||
p.serviceRules.Insert(sp.Name, sp.Policy)
|
||||
}
|
||||
|
||||
// Load the session policy
|
||||
for _, sp := range policy.Sessions {
|
||||
p.sessionRules.Insert(sp.Node, sp.Policy)
|
||||
}
|
||||
|
||||
// Load the event policy
|
||||
for _, ep := range policy.Events {
|
||||
p.eventRules.Insert(ep.Event, ep.Policy)
|
||||
}
|
||||
|
||||
// Load the prepared query policy
|
||||
for _, pq := range policy.PreparedQueries {
|
||||
p.preparedQueryRules.Insert(pq.Prefix, pq.Policy)
|
||||
}
|
||||
|
||||
// Load the keyring policy
|
||||
p.keyringRule = policy.Keyring
|
||||
|
||||
// Load the operator policy
|
||||
p.operatorRule = policy.Operator
|
||||
|
||||
return p, nil
|
||||
}
|
||||
|
||||
// ACLList checks if listing of ACLs is allowed
|
||||
func (p *PolicyACL) ACLList() bool {
|
||||
return p.parent.ACLList()
|
||||
}
|
||||
|
||||
// ACLModify checks if modification of ACLs is allowed
|
||||
func (p *PolicyACL) ACLModify() bool {
|
||||
return p.parent.ACLModify()
|
||||
}
|
||||
|
||||
// AgentRead checks for permission to read from agent endpoints for a given
|
||||
// node.
|
||||
func (p *PolicyACL) AgentRead(node string) bool {
|
||||
// Check for an exact rule or catch-all
|
||||
_, rule, ok := p.agentRules.LongestPrefix(node)
|
||||
|
||||
if ok {
|
||||
switch rule {
|
||||
case PolicyRead, PolicyWrite:
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// No matching rule, use the parent.
|
||||
return p.parent.AgentRead(node)
|
||||
}
|
||||
|
||||
// AgentWrite checks for permission to make changes via agent endpoints for a
|
||||
// given node.
|
||||
func (p *PolicyACL) AgentWrite(node string) bool {
|
||||
// Check for an exact rule or catch-all
|
||||
_, rule, ok := p.agentRules.LongestPrefix(node)
|
||||
|
||||
if ok {
|
||||
switch rule {
|
||||
case PolicyWrite:
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// No matching rule, use the parent.
|
||||
return p.parent.AgentWrite(node)
|
||||
}
|
||||
|
||||
// Snapshot checks if taking and restoring snapshots is allowed.
|
||||
func (p *PolicyACL) Snapshot() bool {
|
||||
return p.parent.Snapshot()
|
||||
}
|
||||
|
||||
// EventRead is used to determine if the policy allows for a
|
||||
// specific user event to be read.
|
||||
func (p *PolicyACL) EventRead(name string) bool {
|
||||
// Longest-prefix match on event names
|
||||
if _, rule, ok := p.eventRules.LongestPrefix(name); ok {
|
||||
switch rule {
|
||||
case PolicyRead, PolicyWrite:
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// Nothing matched, use parent
|
||||
return p.parent.EventRead(name)
|
||||
}
|
||||
|
||||
// EventWrite is used to determine if new events can be created
|
||||
// (fired) by the policy.
|
||||
func (p *PolicyACL) EventWrite(name string) bool {
|
||||
// Longest-prefix match event names
|
||||
if _, rule, ok := p.eventRules.LongestPrefix(name); ok {
|
||||
return rule == PolicyWrite
|
||||
}
|
||||
|
||||
// No match, use parent
|
||||
return p.parent.EventWrite(name)
|
||||
}
|
||||
|
||||
// KeyRead returns if a key is allowed to be read
|
||||
func (p *PolicyACL) KeyRead(key string) bool {
|
||||
// Look for a matching rule
|
||||
_, rule, ok := p.keyRules.LongestPrefix(key)
|
||||
if ok {
|
||||
switch rule.(string) {
|
||||
case PolicyRead, PolicyWrite:
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// No matching rule, use the parent.
|
||||
return p.parent.KeyRead(key)
|
||||
}
|
||||
|
||||
// KeyWrite returns if a key is allowed to be written
|
||||
func (p *PolicyACL) KeyWrite(key string) bool {
|
||||
// Look for a matching rule
|
||||
_, rule, ok := p.keyRules.LongestPrefix(key)
|
||||
if ok {
|
||||
switch rule.(string) {
|
||||
case PolicyWrite:
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// No matching rule, use the parent.
|
||||
return p.parent.KeyWrite(key)
|
||||
}
|
||||
|
||||
// KeyWritePrefix returns if a prefix is allowed to be written
|
||||
func (p *PolicyACL) KeyWritePrefix(prefix string) bool {
|
||||
// Look for a matching rule that denies
|
||||
_, rule, ok := p.keyRules.LongestPrefix(prefix)
|
||||
if ok && rule.(string) != PolicyWrite {
|
||||
return false
|
||||
}
|
||||
|
||||
// Look if any of our children have a deny policy
|
||||
deny := false
|
||||
p.keyRules.WalkPrefix(prefix, func(path string, rule interface{}) bool {
|
||||
// We have a rule to prevent a write in a sub-directory!
|
||||
if rule.(string) != PolicyWrite {
|
||||
deny = true
|
||||
return true
|
||||
}
|
||||
return false
|
||||
})
|
||||
|
||||
// Deny the write if any sub-rules may be violated
|
||||
if deny {
|
||||
return false
|
||||
}
|
||||
|
||||
// If we had a matching rule, done
|
||||
if ok {
|
||||
return true
|
||||
}
|
||||
|
||||
// No matching rule, use the parent.
|
||||
return p.parent.KeyWritePrefix(prefix)
|
||||
}
|
||||
|
||||
// KeyringRead is used to determine if the keyring can be
|
||||
// read by the current ACL token.
|
||||
func (p *PolicyACL) KeyringRead() bool {
|
||||
switch p.keyringRule {
|
||||
case PolicyRead, PolicyWrite:
|
||||
return true
|
||||
case PolicyDeny:
|
||||
return false
|
||||
default:
|
||||
return p.parent.KeyringRead()
|
||||
}
|
||||
}
|
||||
|
||||
// KeyringWrite determines if the keyring can be manipulated.
|
||||
func (p *PolicyACL) KeyringWrite() bool {
|
||||
if p.keyringRule == PolicyWrite {
|
||||
return true
|
||||
}
|
||||
return p.parent.KeyringWrite()
|
||||
}
|
||||
|
||||
// OperatorRead determines if the read-only operator functions are allowed.
|
||||
func (p *PolicyACL) OperatorRead() bool {
|
||||
switch p.operatorRule {
|
||||
case PolicyRead, PolicyWrite:
|
||||
return true
|
||||
case PolicyDeny:
|
||||
return false
|
||||
default:
|
||||
return p.parent.OperatorRead()
|
||||
}
|
||||
}
|
||||
|
||||
// NodeRead checks if reading (discovery) of a node is allowed
|
||||
func (p *PolicyACL) NodeRead(name string) bool {
|
||||
// Check for an exact rule or catch-all
|
||||
_, rule, ok := p.nodeRules.LongestPrefix(name)
|
||||
|
||||
if ok {
|
||||
switch rule {
|
||||
case PolicyRead, PolicyWrite:
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// No matching rule, use the parent.
|
||||
return p.parent.NodeRead(name)
|
||||
}
|
||||
|
||||
// NodeWrite checks if writing (registering) a node is allowed
|
||||
func (p *PolicyACL) NodeWrite(name string) bool {
|
||||
// Check for an exact rule or catch-all
|
||||
_, rule, ok := p.nodeRules.LongestPrefix(name)
|
||||
|
||||
if ok {
|
||||
switch rule {
|
||||
case PolicyWrite:
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// No matching rule, use the parent.
|
||||
return p.parent.NodeWrite(name)
|
||||
}
|
||||
|
||||
// OperatorWrite determines if the state-changing operator functions are
|
||||
// allowed.
|
||||
func (p *PolicyACL) OperatorWrite() bool {
|
||||
if p.operatorRule == PolicyWrite {
|
||||
return true
|
||||
}
|
||||
return p.parent.OperatorWrite()
|
||||
}
|
||||
|
||||
// PreparedQueryRead checks if reading (listing) of a prepared query is
|
||||
// allowed - this isn't execution, just listing its contents.
|
||||
func (p *PolicyACL) PreparedQueryRead(prefix string) bool {
|
||||
// Check for an exact rule or catch-all
|
||||
_, rule, ok := p.preparedQueryRules.LongestPrefix(prefix)
|
||||
|
||||
if ok {
|
||||
switch rule {
|
||||
case PolicyRead, PolicyWrite:
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// No matching rule, use the parent.
|
||||
return p.parent.PreparedQueryRead(prefix)
|
||||
}
|
||||
|
||||
// PreparedQueryWrite checks if writing (creating, updating, or deleting) of a
|
||||
// prepared query is allowed.
|
||||
func (p *PolicyACL) PreparedQueryWrite(prefix string) bool {
|
||||
// Check for an exact rule or catch-all
|
||||
_, rule, ok := p.preparedQueryRules.LongestPrefix(prefix)
|
||||
|
||||
if ok {
|
||||
switch rule {
|
||||
case PolicyWrite:
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// No matching rule, use the parent.
|
||||
return p.parent.PreparedQueryWrite(prefix)
|
||||
}
|
||||
|
||||
// ServiceRead checks if reading (discovery) of a service is allowed
|
||||
func (p *PolicyACL) ServiceRead(name string) bool {
|
||||
// Check for an exact rule or catch-all
|
||||
_, rule, ok := p.serviceRules.LongestPrefix(name)
|
||||
|
||||
if ok {
|
||||
switch rule {
|
||||
case PolicyRead, PolicyWrite:
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// No matching rule, use the parent.
|
||||
return p.parent.ServiceRead(name)
|
||||
}
|
||||
|
||||
// ServiceWrite checks if writing (registering) a service is allowed
|
||||
func (p *PolicyACL) ServiceWrite(name string) bool {
|
||||
// Check for an exact rule or catch-all
|
||||
_, rule, ok := p.serviceRules.LongestPrefix(name)
|
||||
|
||||
if ok {
|
||||
switch rule {
|
||||
case PolicyWrite:
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// No matching rule, use the parent.
|
||||
return p.parent.ServiceWrite(name)
|
||||
}
|
||||
|
||||
// SessionRead checks for permission to read sessions for a given node.
|
||||
func (p *PolicyACL) SessionRead(node string) bool {
|
||||
// Check for an exact rule or catch-all
|
||||
_, rule, ok := p.sessionRules.LongestPrefix(node)
|
||||
|
||||
if ok {
|
||||
switch rule {
|
||||
case PolicyRead, PolicyWrite:
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// No matching rule, use the parent.
|
||||
return p.parent.SessionRead(node)
|
||||
}
|
||||
|
||||
// SessionWrite checks for permission to create sessions for a given node.
|
||||
func (p *PolicyACL) SessionWrite(node string) bool {
|
||||
// Check for an exact rule or catch-all
|
||||
_, rule, ok := p.sessionRules.LongestPrefix(node)
|
||||
|
||||
if ok {
|
||||
switch rule {
|
||||
case PolicyWrite:
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// No matching rule, use the parent.
|
||||
return p.parent.SessionWrite(node)
|
||||
}
|
|
@ -0,0 +1,177 @@
|
|||
package acl
|
||||
|
||||
import (
|
||||
"crypto/md5"
|
||||
"fmt"
|
||||
|
||||
"github.com/hashicorp/golang-lru"
|
||||
)
|
||||
|
||||
// FaultFunc is a function used to fault in the parent,
|
||||
// rules for an ACL given its ID
|
||||
type FaultFunc func(id string) (string, string, error)
|
||||
|
||||
// aclEntry allows us to store the ACL with it's policy ID
|
||||
type aclEntry struct {
|
||||
ACL ACL
|
||||
Parent string
|
||||
RuleID string
|
||||
}
|
||||
|
||||
// Cache is used to implement policy and ACL caching
|
||||
type Cache struct {
|
||||
faultfn FaultFunc
|
||||
aclCache *lru.TwoQueueCache // Cache id -> acl
|
||||
policyCache *lru.TwoQueueCache // Cache policy -> acl
|
||||
ruleCache *lru.TwoQueueCache // Cache rules -> policy
|
||||
}
|
||||
|
||||
// NewCache constructs a new policy and ACL cache of a given size
|
||||
func NewCache(size int, faultfn FaultFunc) (*Cache, error) {
|
||||
if size <= 0 {
|
||||
return nil, fmt.Errorf("Must provide positive cache size")
|
||||
}
|
||||
|
||||
rc, err := lru.New2Q(size)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
pc, err := lru.New2Q(size)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
ac, err := lru.New2Q(size)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
c := &Cache{
|
||||
faultfn: faultfn,
|
||||
aclCache: ac,
|
||||
policyCache: pc,
|
||||
ruleCache: rc,
|
||||
}
|
||||
return c, nil
|
||||
}
|
||||
|
||||
// GetPolicy is used to get a potentially cached policy set.
|
||||
// If not cached, it will be parsed, and then cached.
|
||||
func (c *Cache) GetPolicy(rules string) (*Policy, error) {
|
||||
return c.getPolicy(RuleID(rules), rules)
|
||||
}
|
||||
|
||||
// getPolicy is an internal method to get a cached policy,
|
||||
// but it assumes a pre-computed ID
|
||||
func (c *Cache) getPolicy(id, rules string) (*Policy, error) {
|
||||
raw, ok := c.ruleCache.Get(id)
|
||||
if ok {
|
||||
return raw.(*Policy), nil
|
||||
}
|
||||
policy, err := Parse(rules)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
policy.ID = id
|
||||
c.ruleCache.Add(id, policy)
|
||||
return policy, nil
|
||||
|
||||
}
|
||||
|
||||
// RuleID is used to generate an ID for a rule
|
||||
func RuleID(rules string) string {
|
||||
return fmt.Sprintf("%x", md5.Sum([]byte(rules)))
|
||||
}
|
||||
|
||||
// policyID returns the cache ID for a policy
|
||||
func (c *Cache) policyID(parent, ruleID string) string {
|
||||
return parent + ":" + ruleID
|
||||
}
|
||||
|
||||
// GetACLPolicy is used to get the potentially cached ACL
|
||||
// policy. If not cached, it will be generated and then cached.
|
||||
func (c *Cache) GetACLPolicy(id string) (string, *Policy, error) {
|
||||
// Check for a cached acl
|
||||
if raw, ok := c.aclCache.Get(id); ok {
|
||||
cached := raw.(aclEntry)
|
||||
if raw, ok := c.ruleCache.Get(cached.RuleID); ok {
|
||||
return cached.Parent, raw.(*Policy), nil
|
||||
}
|
||||
}
|
||||
|
||||
// Fault in the rules
|
||||
parent, rules, err := c.faultfn(id)
|
||||
if err != nil {
|
||||
return "", nil, err
|
||||
}
|
||||
|
||||
// Get cached
|
||||
policy, err := c.GetPolicy(rules)
|
||||
return parent, policy, err
|
||||
}
|
||||
|
||||
// GetACL is used to get a potentially cached ACL policy.
|
||||
// If not cached, it will be generated and then cached.
|
||||
func (c *Cache) GetACL(id string) (ACL, error) {
|
||||
// Look for the ACL directly
|
||||
raw, ok := c.aclCache.Get(id)
|
||||
if ok {
|
||||
return raw.(aclEntry).ACL, nil
|
||||
}
|
||||
|
||||
// Get the rules
|
||||
parentID, rules, err := c.faultfn(id)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
ruleID := RuleID(rules)
|
||||
|
||||
// Check for a compiled ACL
|
||||
policyID := c.policyID(parentID, ruleID)
|
||||
var compiled ACL
|
||||
if raw, ok := c.policyCache.Get(policyID); ok {
|
||||
compiled = raw.(ACL)
|
||||
} else {
|
||||
// Get the policy
|
||||
policy, err := c.getPolicy(ruleID, rules)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Get the parent ACL
|
||||
parent := RootACL(parentID)
|
||||
if parent == nil {
|
||||
parent, err = c.GetACL(parentID)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
// Compile the ACL
|
||||
acl, err := New(parent, policy)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Cache the compiled ACL
|
||||
c.policyCache.Add(policyID, acl)
|
||||
compiled = acl
|
||||
}
|
||||
|
||||
// Cache and return the ACL
|
||||
c.aclCache.Add(id, aclEntry{compiled, parentID, ruleID})
|
||||
return compiled, nil
|
||||
}
|
||||
|
||||
// ClearACL is used to clear the ACL cache if any
|
||||
func (c *Cache) ClearACL(id string) {
|
||||
c.aclCache.Remove(id)
|
||||
}
|
||||
|
||||
// Purge is used to clear all the ACL caches. The
|
||||
// rule and policy caches are not purged, since they
|
||||
// are content-hashed anyways.
|
||||
func (c *Cache) Purge() {
|
||||
c.aclCache.Purge()
|
||||
}
|
|
@ -0,0 +1,191 @@
|
|||
package acl
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/hashicorp/hcl"
|
||||
)
|
||||
|
||||
const (
|
||||
PolicyDeny = "deny"
|
||||
PolicyRead = "read"
|
||||
PolicyWrite = "write"
|
||||
)
|
||||
|
||||
// Policy is used to represent the policy specified by
|
||||
// an ACL configuration.
|
||||
type Policy struct {
|
||||
ID string `hcl:"-"`
|
||||
Agents []*AgentPolicy `hcl:"agent,expand"`
|
||||
Keys []*KeyPolicy `hcl:"key,expand"`
|
||||
Nodes []*NodePolicy `hcl:"node,expand"`
|
||||
Services []*ServicePolicy `hcl:"service,expand"`
|
||||
Sessions []*SessionPolicy `hcl:"session,expand"`
|
||||
Events []*EventPolicy `hcl:"event,expand"`
|
||||
PreparedQueries []*PreparedQueryPolicy `hcl:"query,expand"`
|
||||
Keyring string `hcl:"keyring"`
|
||||
Operator string `hcl:"operator"`
|
||||
}
|
||||
|
||||
// AgentPolicy represents a policy for working with agent endpoints on nodes
|
||||
// with specific name prefixes.
|
||||
type AgentPolicy struct {
|
||||
Node string `hcl:",key"`
|
||||
Policy string
|
||||
}
|
||||
|
||||
func (a *AgentPolicy) GoString() string {
|
||||
return fmt.Sprintf("%#v", *a)
|
||||
}
|
||||
|
||||
// KeyPolicy represents a policy for a key
|
||||
type KeyPolicy struct {
|
||||
Prefix string `hcl:",key"`
|
||||
Policy string
|
||||
}
|
||||
|
||||
func (k *KeyPolicy) GoString() string {
|
||||
return fmt.Sprintf("%#v", *k)
|
||||
}
|
||||
|
||||
// NodePolicy represents a policy for a node
|
||||
type NodePolicy struct {
|
||||
Name string `hcl:",key"`
|
||||
Policy string
|
||||
}
|
||||
|
||||
func (n *NodePolicy) GoString() string {
|
||||
return fmt.Sprintf("%#v", *n)
|
||||
}
|
||||
|
||||
// ServicePolicy represents a policy for a service
|
||||
type ServicePolicy struct {
|
||||
Name string `hcl:",key"`
|
||||
Policy string
|
||||
}
|
||||
|
||||
func (s *ServicePolicy) GoString() string {
|
||||
return fmt.Sprintf("%#v", *s)
|
||||
}
|
||||
|
||||
// SessionPolicy represents a policy for making sessions tied to specific node
|
||||
// name prefixes.
|
||||
type SessionPolicy struct {
|
||||
Node string `hcl:",key"`
|
||||
Policy string
|
||||
}
|
||||
|
||||
func (s *SessionPolicy) GoString() string {
|
||||
return fmt.Sprintf("%#v", *s)
|
||||
}
|
||||
|
||||
// EventPolicy represents a user event policy.
|
||||
type EventPolicy struct {
|
||||
Event string `hcl:",key"`
|
||||
Policy string
|
||||
}
|
||||
|
||||
func (e *EventPolicy) GoString() string {
|
||||
return fmt.Sprintf("%#v", *e)
|
||||
}
|
||||
|
||||
// PreparedQueryPolicy represents a prepared query policy.
|
||||
type PreparedQueryPolicy struct {
|
||||
Prefix string `hcl:",key"`
|
||||
Policy string
|
||||
}
|
||||
|
||||
func (p *PreparedQueryPolicy) GoString() string {
|
||||
return fmt.Sprintf("%#v", *p)
|
||||
}
|
||||
|
||||
// isPolicyValid makes sure the given string matches one of the valid policies.
|
||||
func isPolicyValid(policy string) bool {
|
||||
switch policy {
|
||||
case PolicyDeny:
|
||||
return true
|
||||
case PolicyRead:
|
||||
return true
|
||||
case PolicyWrite:
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// Parse is used to parse the specified ACL rules into an
|
||||
// intermediary set of policies, before being compiled into
|
||||
// the ACL
|
||||
func Parse(rules string) (*Policy, error) {
|
||||
// Decode the rules
|
||||
p := &Policy{}
|
||||
if rules == "" {
|
||||
// Hot path for empty rules
|
||||
return p, nil
|
||||
}
|
||||
|
||||
if err := hcl.Decode(p, rules); err != nil {
|
||||
return nil, fmt.Errorf("Failed to parse ACL rules: %v", err)
|
||||
}
|
||||
|
||||
// Validate the agent policy
|
||||
for _, ap := range p.Agents {
|
||||
if !isPolicyValid(ap.Policy) {
|
||||
return nil, fmt.Errorf("Invalid agent policy: %#v", ap)
|
||||
}
|
||||
}
|
||||
|
||||
// Validate the key policy
|
||||
for _, kp := range p.Keys {
|
||||
if !isPolicyValid(kp.Policy) {
|
||||
return nil, fmt.Errorf("Invalid key policy: %#v", kp)
|
||||
}
|
||||
}
|
||||
|
||||
// Validate the node policies
|
||||
for _, np := range p.Nodes {
|
||||
if !isPolicyValid(np.Policy) {
|
||||
return nil, fmt.Errorf("Invalid node policy: %#v", np)
|
||||
}
|
||||
}
|
||||
|
||||
// Validate the service policies
|
||||
for _, sp := range p.Services {
|
||||
if !isPolicyValid(sp.Policy) {
|
||||
return nil, fmt.Errorf("Invalid service policy: %#v", sp)
|
||||
}
|
||||
}
|
||||
|
||||
// Validate the session policies
|
||||
for _, sp := range p.Sessions {
|
||||
if !isPolicyValid(sp.Policy) {
|
||||
return nil, fmt.Errorf("Invalid session policy: %#v", sp)
|
||||
}
|
||||
}
|
||||
|
||||
// Validate the user event policies
|
||||
for _, ep := range p.Events {
|
||||
if !isPolicyValid(ep.Policy) {
|
||||
return nil, fmt.Errorf("Invalid event policy: %#v", ep)
|
||||
}
|
||||
}
|
||||
|
||||
// Validate the prepared query policies
|
||||
for _, pq := range p.PreparedQueries {
|
||||
if !isPolicyValid(pq.Policy) {
|
||||
return nil, fmt.Errorf("Invalid query policy: %#v", pq)
|
||||
}
|
||||
}
|
||||
|
||||
// Validate the keyring policy - this one is allowed to be empty
|
||||
if p.Keyring != "" && !isPolicyValid(p.Keyring) {
|
||||
return nil, fmt.Errorf("Invalid keyring policy: %#v", p.Keyring)
|
||||
}
|
||||
|
||||
// Validate the operator policy - this one is allowed to be empty
|
||||
if p.Operator != "" && !isPolicyValid(p.Operator) {
|
||||
return nil, fmt.Errorf("Invalid operator policy: %#v", p.Operator)
|
||||
}
|
||||
|
||||
return p, nil
|
||||
}
|
|
@ -0,0 +1,57 @@
|
|||
package structs
|
||||
|
||||
import (
|
||||
"github.com/hashicorp/raft"
|
||||
)
|
||||
|
||||
// RaftServer has information about a server in the Raft configuration.
|
||||
type RaftServer struct {
|
||||
// ID is the unique ID for the server. These are currently the same
|
||||
// as the address, but they will be changed to a real GUID in a future
|
||||
// release of Consul.
|
||||
ID raft.ServerID
|
||||
|
||||
// Node is the node name of the server, as known by Consul, or this
|
||||
// will be set to "(unknown)" otherwise.
|
||||
Node string
|
||||
|
||||
// Address is the IP:port of the server, used for Raft communications.
|
||||
Address raft.ServerAddress
|
||||
|
||||
// Leader is true if this server is the current cluster leader.
|
||||
Leader bool
|
||||
|
||||
// Voter is true if this server has a vote in the cluster. This might
|
||||
// be false if the server is staging and still coming online, or if
|
||||
// it's a non-voting server, which will be added in a future release of
|
||||
// Consul.
|
||||
Voter bool
|
||||
}
|
||||
|
||||
// RaftConfigrationResponse is returned when querying for the current Raft
|
||||
// configuration.
|
||||
type RaftConfigurationResponse struct {
|
||||
// Servers has the list of servers in the Raft configuration.
|
||||
Servers []*RaftServer
|
||||
|
||||
// Index has the Raft index of this configuration.
|
||||
Index uint64
|
||||
}
|
||||
|
||||
// RaftPeerByAddressRequest is used by the Operator endpoint to apply a Raft
|
||||
// operation on a specific Raft peer by address in the form of "IP:port".
|
||||
type RaftPeerByAddressRequest struct {
|
||||
// Datacenter is the target this request is intended for.
|
||||
Datacenter string
|
||||
|
||||
// Address is the peer to remove, in the form "IP:port".
|
||||
Address raft.ServerAddress
|
||||
|
||||
// WriteRequest holds the ACL token to go along with this request.
|
||||
WriteRequest
|
||||
}
|
||||
|
||||
// RequestDatacenter returns the datacenter for a given request.
|
||||
func (op *RaftPeerByAddressRequest) RequestDatacenter() string {
|
||||
return op.Datacenter
|
||||
}
|
257
vendor/github.com/hashicorp/consul/consul/structs/prepared_query.go
generated
vendored
Normal file
257
vendor/github.com/hashicorp/consul/consul/structs/prepared_query.go
generated
vendored
Normal file
|
@ -0,0 +1,257 @@
|
|||
package structs
|
||||
|
||||
// QueryDatacenterOptions sets options about how we fail over if there are no
|
||||
// healthy nodes in the local datacenter.
|
||||
type QueryDatacenterOptions struct {
|
||||
// NearestN is set to the number of remote datacenters to try, based on
|
||||
// network coordinates.
|
||||
NearestN int
|
||||
|
||||
// Datacenters is a fixed list of datacenters to try after NearestN. We
|
||||
// never try a datacenter multiple times, so those are subtracted from
|
||||
// this list before proceeding.
|
||||
Datacenters []string
|
||||
}
|
||||
|
||||
// QueryDNSOptions controls settings when query results are served over DNS.
|
||||
type QueryDNSOptions struct {
|
||||
// TTL is the time to live for the served DNS results.
|
||||
TTL string
|
||||
}
|
||||
|
||||
// ServiceQuery is used to query for a set of healthy nodes offering a specific
|
||||
// service.
|
||||
type ServiceQuery struct {
|
||||
// Service is the service to query.
|
||||
Service string
|
||||
|
||||
// Failover controls what we do if there are no healthy nodes in the
|
||||
// local datacenter.
|
||||
Failover QueryDatacenterOptions
|
||||
|
||||
// If OnlyPassing is true then we will only include nodes with passing
|
||||
// health checks (critical AND warning checks will cause a node to be
|
||||
// discarded)
|
||||
OnlyPassing bool
|
||||
|
||||
// Near allows the query to always prefer the node nearest the given
|
||||
// node. If the node does not exist, results are returned in their
|
||||
// normal randomly-shuffled order. Supplying the magic "_agent" value
|
||||
// is supported to sort near the agent which initiated the request.
|
||||
Near string
|
||||
|
||||
// Tags are a set of required and/or disallowed tags. If a tag is in
|
||||
// this list it must be present. If the tag is preceded with "!" then
|
||||
// it is disallowed.
|
||||
Tags []string
|
||||
|
||||
// NodeMeta is a map of required node metadata fields. If a key/value
|
||||
// pair is in this map it must be present on the node in order for the
|
||||
// service entry to be returned.
|
||||
NodeMeta map[string]string
|
||||
}
|
||||
|
||||
const (
|
||||
// QueryTemplateTypeNamePrefixMatch uses the Name field of the query as
|
||||
// a prefix to select the template.
|
||||
QueryTemplateTypeNamePrefixMatch = "name_prefix_match"
|
||||
)
|
||||
|
||||
// QueryTemplateOptions controls settings if this query is a template.
|
||||
type QueryTemplateOptions struct {
|
||||
// Type, if non-empty, means that this query is a template. This is
|
||||
// set to one of the QueryTemplateType* constants above.
|
||||
Type string
|
||||
|
||||
// Regexp is an optional regular expression to use to parse the full
|
||||
// name, once the prefix match has selected a template. This can be
|
||||
// used to extract parts of the name and choose a service name, set
|
||||
// tags, etc.
|
||||
Regexp string
|
||||
}
|
||||
|
||||
// PreparedQuery defines a complete prepared query, and is the structure we
|
||||
// maintain in the state store.
|
||||
type PreparedQuery struct {
|
||||
// ID is this UUID-based ID for the query, always generated by Consul.
|
||||
ID string
|
||||
|
||||
// Name is an optional friendly name for the query supplied by the
|
||||
// user. NOTE - if this feature is used then it will reduce the security
|
||||
// of any read ACL associated with this query/service since this name
|
||||
// can be used to locate nodes with supplying any ACL.
|
||||
Name string
|
||||
|
||||
// Session is an optional session to tie this query's lifetime to. If
|
||||
// this is omitted then the query will not expire.
|
||||
Session string
|
||||
|
||||
// Token is the ACL token used when the query was created, and it is
|
||||
// used when a query is subsequently executed. This token, or a token
|
||||
// with management privileges, must be used to change the query later.
|
||||
Token string
|
||||
|
||||
// Template is used to configure this query as a template, which will
|
||||
// respond to queries based on the Name, and then will be rendered
|
||||
// before it is executed.
|
||||
Template QueryTemplateOptions
|
||||
|
||||
// Service defines a service query (leaving things open for other types
|
||||
// later).
|
||||
Service ServiceQuery
|
||||
|
||||
// DNS has options that control how the results of this query are
|
||||
// served over DNS.
|
||||
DNS QueryDNSOptions
|
||||
|
||||
RaftIndex
|
||||
}
|
||||
|
||||
// GetACLPrefix returns the prefix to look up the prepared_query ACL policy for
|
||||
// this query, and whether the prefix applies to this query. You always need to
|
||||
// check the ok value before using the prefix.
|
||||
func (pq *PreparedQuery) GetACLPrefix() (string, bool) {
|
||||
if pq.Name != "" || pq.Template.Type != "" {
|
||||
return pq.Name, true
|
||||
}
|
||||
|
||||
return "", false
|
||||
}
|
||||
|
||||
type PreparedQueries []*PreparedQuery
|
||||
|
||||
type IndexedPreparedQueries struct {
|
||||
Queries PreparedQueries
|
||||
QueryMeta
|
||||
}
|
||||
|
||||
type PreparedQueryOp string
|
||||
|
||||
const (
|
||||
PreparedQueryCreate PreparedQueryOp = "create"
|
||||
PreparedQueryUpdate PreparedQueryOp = "update"
|
||||
PreparedQueryDelete PreparedQueryOp = "delete"
|
||||
)
|
||||
|
||||
// QueryRequest is used to create or change prepared queries.
|
||||
type PreparedQueryRequest struct {
|
||||
// Datacenter is the target this request is intended for.
|
||||
Datacenter string
|
||||
|
||||
// Op is the operation to apply.
|
||||
Op PreparedQueryOp
|
||||
|
||||
// Query is the query itself.
|
||||
Query *PreparedQuery
|
||||
|
||||
// WriteRequest holds the ACL token to go along with this request.
|
||||
WriteRequest
|
||||
}
|
||||
|
||||
// RequestDatacenter returns the datacenter for a given request.
|
||||
func (q *PreparedQueryRequest) RequestDatacenter() string {
|
||||
return q.Datacenter
|
||||
}
|
||||
|
||||
// PreparedQuerySpecificRequest is used to get information about a prepared
|
||||
// query.
|
||||
type PreparedQuerySpecificRequest struct {
|
||||
// Datacenter is the target this request is intended for.
|
||||
Datacenter string
|
||||
|
||||
// QueryID is the ID of a query.
|
||||
QueryID string
|
||||
|
||||
// QueryOptions (unfortunately named here) controls the consistency
|
||||
// settings for the query lookup itself, as well as the service lookups.
|
||||
QueryOptions
|
||||
}
|
||||
|
||||
// RequestDatacenter returns the datacenter for a given request.
|
||||
func (q *PreparedQuerySpecificRequest) RequestDatacenter() string {
|
||||
return q.Datacenter
|
||||
}
|
||||
|
||||
// PreparedQueryExecuteRequest is used to execute a prepared query.
|
||||
type PreparedQueryExecuteRequest struct {
|
||||
// Datacenter is the target this request is intended for.
|
||||
Datacenter string
|
||||
|
||||
// QueryIDOrName is the ID of a query _or_ the name of one, either can
|
||||
// be provided.
|
||||
QueryIDOrName string
|
||||
|
||||
// Limit will trim the resulting list down to the given limit.
|
||||
Limit int
|
||||
|
||||
// Source is used to sort the results relative to a given node using
|
||||
// network coordinates.
|
||||
Source QuerySource
|
||||
|
||||
// Agent is used to carry around a reference to the agent which initiated
|
||||
// the execute request. Used to distance-sort relative to the local node.
|
||||
Agent QuerySource
|
||||
|
||||
// QueryOptions (unfortunately named here) controls the consistency
|
||||
// settings for the query lookup itself, as well as the service lookups.
|
||||
QueryOptions
|
||||
}
|
||||
|
||||
// RequestDatacenter returns the datacenter for a given request.
|
||||
func (q *PreparedQueryExecuteRequest) RequestDatacenter() string {
|
||||
return q.Datacenter
|
||||
}
|
||||
|
||||
// PreparedQueryExecuteRemoteRequest is used when running a local query in a
|
||||
// remote datacenter.
|
||||
type PreparedQueryExecuteRemoteRequest struct {
|
||||
// Datacenter is the target this request is intended for.
|
||||
Datacenter string
|
||||
|
||||
// Query is a copy of the query to execute. We have to ship the entire
|
||||
// query over since it won't be present in the remote state store.
|
||||
Query PreparedQuery
|
||||
|
||||
// Limit will trim the resulting list down to the given limit.
|
||||
Limit int
|
||||
|
||||
// QueryOptions (unfortunately named here) controls the consistency
|
||||
// settings for the the service lookups.
|
||||
QueryOptions
|
||||
}
|
||||
|
||||
// RequestDatacenter returns the datacenter for a given request.
|
||||
func (q *PreparedQueryExecuteRemoteRequest) RequestDatacenter() string {
|
||||
return q.Datacenter
|
||||
}
|
||||
|
||||
// PreparedQueryExecuteResponse has the results of executing a query.
|
||||
type PreparedQueryExecuteResponse struct {
|
||||
// Service is the service that was queried.
|
||||
Service string
|
||||
|
||||
// Nodes has the nodes that were output by the query.
|
||||
Nodes CheckServiceNodes
|
||||
|
||||
// DNS has the options for serving these results over DNS.
|
||||
DNS QueryDNSOptions
|
||||
|
||||
// Datacenter is the datacenter that these results came from.
|
||||
Datacenter string
|
||||
|
||||
// Failovers is a count of how many times we had to query a remote
|
||||
// datacenter.
|
||||
Failovers int
|
||||
|
||||
// QueryMeta has freshness information about the query.
|
||||
QueryMeta
|
||||
}
|
||||
|
||||
// PreparedQueryExplainResponse has the results when explaining a query/
|
||||
type PreparedQueryExplainResponse struct {
|
||||
// Query has the fully-rendered query.
|
||||
Query PreparedQuery
|
||||
|
||||
// QueryMeta has freshness information about the query.
|
||||
QueryMeta
|
||||
}
|
|
@ -0,0 +1,40 @@
|
|||
package structs
|
||||
|
||||
type SnapshotOp int
|
||||
|
||||
const (
|
||||
SnapshotSave SnapshotOp = iota
|
||||
SnapshotRestore
|
||||
)
|
||||
|
||||
// SnapshotRequest is used as a header for a snapshot RPC request. This will
|
||||
// precede any streaming data that's part of the request and is JSON-encoded on
|
||||
// the wire.
|
||||
type SnapshotRequest struct {
|
||||
// Datacenter is the target datacenter for this request. The request
|
||||
// will be forwarded if necessary.
|
||||
Datacenter string
|
||||
|
||||
// Token is the ACL token to use for the operation. If ACLs are enabled
|
||||
// then all operations require a management token.
|
||||
Token string
|
||||
|
||||
// If set, any follower can service the request. Results may be
|
||||
// arbitrarily stale. Only applies to SnapshotSave.
|
||||
AllowStale bool
|
||||
|
||||
// Op is the operation code for the RPC.
|
||||
Op SnapshotOp
|
||||
}
|
||||
|
||||
// SnapshotResponse is used header for a snapshot RPC response. This will
|
||||
// precede any streaming data that's part of the request and is JSON-encoded on
|
||||
// the wire.
|
||||
type SnapshotResponse struct {
|
||||
// Error is the overall error status of the RPC request.
|
||||
Error string
|
||||
|
||||
// QueryMeta has freshness information about the server that handled the
|
||||
// request. It is only filled in for a SnapshotSave.
|
||||
QueryMeta
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,85 @@
|
|||
package structs
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
)
|
||||
|
||||
// TxnKVOp is used to define a single operation on the KVS inside a
|
||||
// transaction
|
||||
type TxnKVOp struct {
|
||||
Verb KVSOp
|
||||
DirEnt DirEntry
|
||||
}
|
||||
|
||||
// TxnKVResult is used to define the result of a single operation on the KVS
|
||||
// inside a transaction.
|
||||
type TxnKVResult *DirEntry
|
||||
|
||||
// TxnOp is used to define a single operation inside a transaction. Only one
|
||||
// of the types should be filled out per entry.
|
||||
type TxnOp struct {
|
||||
KV *TxnKVOp
|
||||
}
|
||||
|
||||
// TxnOps is a list of operations within a transaction.
|
||||
type TxnOps []*TxnOp
|
||||
|
||||
// TxnRequest is used to apply multiple operations to the state store in a
|
||||
// single transaction
|
||||
type TxnRequest struct {
|
||||
Datacenter string
|
||||
Ops TxnOps
|
||||
WriteRequest
|
||||
}
|
||||
|
||||
func (r *TxnRequest) RequestDatacenter() string {
|
||||
return r.Datacenter
|
||||
}
|
||||
|
||||
// TxnReadRequest is used as a fast path for read-only transactions that don't
|
||||
// modify the state store.
|
||||
type TxnReadRequest struct {
|
||||
Datacenter string
|
||||
Ops TxnOps
|
||||
QueryOptions
|
||||
}
|
||||
|
||||
func (r *TxnReadRequest) RequestDatacenter() string {
|
||||
return r.Datacenter
|
||||
}
|
||||
|
||||
// TxnError is used to return information about an error for a specific
|
||||
// operation.
|
||||
type TxnError struct {
|
||||
OpIndex int
|
||||
What string
|
||||
}
|
||||
|
||||
// Error returns the string representation of an atomic error.
|
||||
func (e TxnError) Error() string {
|
||||
return fmt.Sprintf("op %d: %s", e.OpIndex, e.What)
|
||||
}
|
||||
|
||||
// TxnErrors is a list of TxnError entries.
|
||||
type TxnErrors []*TxnError
|
||||
|
||||
// TxnResult is used to define the result of a given operation inside a
|
||||
// transaction. Only one of the types should be filled out per entry.
|
||||
type TxnResult struct {
|
||||
KV TxnKVResult
|
||||
}
|
||||
|
||||
// TxnResults is a list of TxnResult entries.
|
||||
type TxnResults []*TxnResult
|
||||
|
||||
// TxnResponse is the structure returned by a TxnRequest.
|
||||
type TxnResponse struct {
|
||||
Results TxnResults
|
||||
Errors TxnErrors
|
||||
}
|
||||
|
||||
// TxnReadResponse is the structure returned by a TxnReadRequest.
|
||||
type TxnReadResponse struct {
|
||||
TxnResponse
|
||||
QueryMeta
|
||||
}
|
|
@ -0,0 +1,65 @@
|
|||
Consul Testing Utilities
|
||||
========================
|
||||
|
||||
This package provides some generic helpers to facilitate testing in Consul.
|
||||
|
||||
TestServer
|
||||
==========
|
||||
|
||||
TestServer is a harness for managing Consul agents and initializing them with
|
||||
test data. Using it, you can form test clusters, create services, add health
|
||||
checks, manipulate the K/V store, etc. This test harness is completely decoupled
|
||||
from Consul's core and API client, meaning it can be easily imported and used in
|
||||
external unit tests for various applications. It works by invoking the Consul
|
||||
CLI, which means it is a requirement to have Consul installed in the `$PATH`.
|
||||
|
||||
Following is an example usage:
|
||||
|
||||
```go
|
||||
package my_program
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/hashicorp/consul/consul/structs"
|
||||
"github.com/hashicorp/consul/testutil"
|
||||
)
|
||||
|
||||
func TestMain(t *testing.T) {
|
||||
// Create a test Consul server
|
||||
srv1 := testutil.NewTestServer(t)
|
||||
defer srv1.Stop()
|
||||
|
||||
// Create a secondary server, passing in configuration
|
||||
// to avoid bootstrapping as we are forming a cluster.
|
||||
srv2 := testutil.NewTestServerConfig(t, func(c *testutil.TestServerConfig) {
|
||||
c.Bootstrap = false
|
||||
})
|
||||
defer srv2.Stop()
|
||||
|
||||
// Join the servers together
|
||||
srv1.JoinLAN(srv2.LANAddr)
|
||||
|
||||
// Create a test key/value pair
|
||||
srv1.SetKV("foo", []byte("bar"))
|
||||
|
||||
// Create lots of test key/value pairs
|
||||
srv1.PopulateKV(map[string][]byte{
|
||||
"bar": []byte("123"),
|
||||
"baz": []byte("456"),
|
||||
})
|
||||
|
||||
// Create a service
|
||||
srv1.AddService("redis", structs.HealthPassing, []string{"master"})
|
||||
|
||||
// Create a service check
|
||||
srv1.AddCheck("service:redis", "redis", structs.HealthPassing)
|
||||
|
||||
// Create a node check
|
||||
srv1.AddCheck("mem", "", structs.HealthCritical)
|
||||
|
||||
// The HTTPAddr field contains the address of the Consul
|
||||
// API on the new test server instance.
|
||||
println(srv1.HTTPAddr)
|
||||
}
|
||||
```
|
|
@ -0,0 +1,528 @@
|
|||
package testutil
|
||||
|
||||
// TestServer is a test helper. It uses a fork/exec model to create
|
||||
// a test Consul server instance in the background and initialize it
|
||||
// with some data and/or services. The test server can then be used
|
||||
// to run a unit test, and offers an easy API to tear itself down
|
||||
// when the test has completed. The only prerequisite is to have a consul
|
||||
// binary available on the $PATH.
|
||||
//
|
||||
// This package does not use Consul's official API client. This is
|
||||
// because we use TestServer to test the API client, which would
|
||||
// otherwise cause an import cycle.
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/base64"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"net"
|
||||
"net/http"
|
||||
"os"
|
||||
"os/exec"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/hashicorp/consul/consul/structs"
|
||||
"github.com/hashicorp/go-cleanhttp"
|
||||
)
|
||||
|
||||
// TestPerformanceConfig configures the performance parameters.
|
||||
type TestPerformanceConfig struct {
|
||||
RaftMultiplier uint `json:"raft_multiplier,omitempty"`
|
||||
}
|
||||
|
||||
// TestPortConfig configures the various ports used for services
|
||||
// provided by the Consul server.
|
||||
type TestPortConfig struct {
|
||||
DNS int `json:"dns,omitempty"`
|
||||
HTTP int `json:"http,omitempty"`
|
||||
RPC int `json:"rpc,omitempty"`
|
||||
SerfLan int `json:"serf_lan,omitempty"`
|
||||
SerfWan int `json:"serf_wan,omitempty"`
|
||||
Server int `json:"server,omitempty"`
|
||||
}
|
||||
|
||||
// TestAddressConfig contains the bind addresses for various
|
||||
// components of the Consul server.
|
||||
type TestAddressConfig struct {
|
||||
HTTP string `json:"http,omitempty"`
|
||||
}
|
||||
|
||||
// TestServerConfig is the main server configuration struct.
|
||||
type TestServerConfig struct {
|
||||
NodeName string `json:"node_name"`
|
||||
NodeMeta map[string]string `json:"node_meta,omitempty"`
|
||||
Performance *TestPerformanceConfig `json:"performance,omitempty"`
|
||||
Bootstrap bool `json:"bootstrap,omitempty"`
|
||||
Server bool `json:"server,omitempty"`
|
||||
DataDir string `json:"data_dir,omitempty"`
|
||||
Datacenter string `json:"datacenter,omitempty"`
|
||||
DisableCheckpoint bool `json:"disable_update_check"`
|
||||
LogLevel string `json:"log_level,omitempty"`
|
||||
Bind string `json:"bind_addr,omitempty"`
|
||||
Addresses *TestAddressConfig `json:"addresses,omitempty"`
|
||||
Ports *TestPortConfig `json:"ports,omitempty"`
|
||||
ACLMasterToken string `json:"acl_master_token,omitempty"`
|
||||
ACLDatacenter string `json:"acl_datacenter,omitempty"`
|
||||
ACLDefaultPolicy string `json:"acl_default_policy,omitempty"`
|
||||
Encrypt string `json:"encrypt,omitempty"`
|
||||
Stdout, Stderr io.Writer `json:"-"`
|
||||
Args []string `json:"-"`
|
||||
}
|
||||
|
||||
// ServerConfigCallback is a function interface which can be
|
||||
// passed to NewTestServerConfig to modify the server config.
|
||||
type ServerConfigCallback func(c *TestServerConfig)
|
||||
|
||||
// defaultServerConfig returns a new TestServerConfig struct
|
||||
// with all of the listen ports incremented by one.
|
||||
func defaultServerConfig() *TestServerConfig {
|
||||
return &TestServerConfig{
|
||||
NodeName: fmt.Sprintf("node%d", randomPort()),
|
||||
DisableCheckpoint: true,
|
||||
Performance: &TestPerformanceConfig{
|
||||
RaftMultiplier: 1,
|
||||
},
|
||||
Bootstrap: true,
|
||||
Server: true,
|
||||
LogLevel: "debug",
|
||||
Bind: "127.0.0.1",
|
||||
Addresses: &TestAddressConfig{},
|
||||
Ports: &TestPortConfig{
|
||||
DNS: randomPort(),
|
||||
HTTP: randomPort(),
|
||||
RPC: randomPort(),
|
||||
SerfLan: randomPort(),
|
||||
SerfWan: randomPort(),
|
||||
Server: randomPort(),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// randomPort asks the kernel for a random port to use.
|
||||
func randomPort() int {
|
||||
l, err := net.Listen("tcp", "127.0.0.1:0")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
defer l.Close()
|
||||
return l.Addr().(*net.TCPAddr).Port
|
||||
}
|
||||
|
||||
// TestService is used to serialize a service definition.
|
||||
type TestService struct {
|
||||
ID string `json:",omitempty"`
|
||||
Name string `json:",omitempty"`
|
||||
Tags []string `json:",omitempty"`
|
||||
Address string `json:",omitempty"`
|
||||
Port int `json:",omitempty"`
|
||||
}
|
||||
|
||||
// TestCheck is used to serialize a check definition.
|
||||
type TestCheck struct {
|
||||
ID string `json:",omitempty"`
|
||||
Name string `json:",omitempty"`
|
||||
ServiceID string `json:",omitempty"`
|
||||
TTL string `json:",omitempty"`
|
||||
}
|
||||
|
||||
// TestingT is an interface wrapper around TestingT
|
||||
type TestingT interface {
|
||||
Logf(format string, args ...interface{})
|
||||
Errorf(format string, args ...interface{})
|
||||
Fatalf(format string, args ...interface{})
|
||||
Fatal(args ...interface{})
|
||||
Skip(args ...interface{})
|
||||
}
|
||||
|
||||
// TestKVResponse is what we use to decode KV data.
|
||||
type TestKVResponse struct {
|
||||
Value string
|
||||
}
|
||||
|
||||
// TestServer is the main server wrapper struct.
|
||||
type TestServer struct {
|
||||
cmd *exec.Cmd
|
||||
Config *TestServerConfig
|
||||
t TestingT
|
||||
|
||||
HTTPAddr string
|
||||
LANAddr string
|
||||
WANAddr string
|
||||
|
||||
HttpClient *http.Client
|
||||
}
|
||||
|
||||
// NewTestServer is an easy helper method to create a new Consul
|
||||
// test server with the most basic configuration.
|
||||
func NewTestServer(t TestingT) *TestServer {
|
||||
return NewTestServerConfig(t, nil)
|
||||
}
|
||||
|
||||
// NewTestServerConfig creates a new TestServer, and makes a call to
|
||||
// an optional callback function to modify the configuration.
|
||||
func NewTestServerConfig(t TestingT, cb ServerConfigCallback) *TestServer {
|
||||
if path, err := exec.LookPath("consul"); err != nil || path == "" {
|
||||
t.Fatal("consul not found on $PATH - download and install " +
|
||||
"consul or skip this test")
|
||||
}
|
||||
|
||||
dataDir, err := ioutil.TempDir("", "consul")
|
||||
if err != nil {
|
||||
t.Fatalf("err: %s", err)
|
||||
}
|
||||
|
||||
configFile, err := ioutil.TempFile(dataDir, "config")
|
||||
if err != nil {
|
||||
defer os.RemoveAll(dataDir)
|
||||
t.Fatalf("err: %s", err)
|
||||
}
|
||||
|
||||
consulConfig := defaultServerConfig()
|
||||
consulConfig.DataDir = dataDir
|
||||
|
||||
if cb != nil {
|
||||
cb(consulConfig)
|
||||
}
|
||||
|
||||
configContent, err := json.Marshal(consulConfig)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %s", err)
|
||||
}
|
||||
|
||||
if _, err := configFile.Write(configContent); err != nil {
|
||||
t.Fatalf("err: %s", err)
|
||||
}
|
||||
configFile.Close()
|
||||
|
||||
stdout := io.Writer(os.Stdout)
|
||||
if consulConfig.Stdout != nil {
|
||||
stdout = consulConfig.Stdout
|
||||
}
|
||||
|
||||
stderr := io.Writer(os.Stderr)
|
||||
if consulConfig.Stderr != nil {
|
||||
stderr = consulConfig.Stderr
|
||||
}
|
||||
|
||||
// Start the server
|
||||
args := []string{"agent", "-config-file", configFile.Name()}
|
||||
args = append(args, consulConfig.Args...)
|
||||
cmd := exec.Command("consul", args...)
|
||||
cmd.Stdout = stdout
|
||||
cmd.Stderr = stderr
|
||||
if err := cmd.Start(); err != nil {
|
||||
t.Fatalf("err: %s", err)
|
||||
}
|
||||
|
||||
var httpAddr string
|
||||
var client *http.Client
|
||||
if strings.HasPrefix(consulConfig.Addresses.HTTP, "unix://") {
|
||||
httpAddr = consulConfig.Addresses.HTTP
|
||||
trans := cleanhttp.DefaultTransport()
|
||||
trans.Dial = func(_, _ string) (net.Conn, error) {
|
||||
return net.Dial("unix", httpAddr[7:])
|
||||
}
|
||||
client = &http.Client{
|
||||
Transport: trans,
|
||||
}
|
||||
} else {
|
||||
httpAddr = fmt.Sprintf("127.0.0.1:%d", consulConfig.Ports.HTTP)
|
||||
client = cleanhttp.DefaultClient()
|
||||
}
|
||||
|
||||
server := &TestServer{
|
||||
Config: consulConfig,
|
||||
cmd: cmd,
|
||||
t: t,
|
||||
|
||||
HTTPAddr: httpAddr,
|
||||
LANAddr: fmt.Sprintf("127.0.0.1:%d", consulConfig.Ports.SerfLan),
|
||||
WANAddr: fmt.Sprintf("127.0.0.1:%d", consulConfig.Ports.SerfWan),
|
||||
|
||||
HttpClient: client,
|
||||
}
|
||||
|
||||
// Wait for the server to be ready
|
||||
if consulConfig.Bootstrap {
|
||||
server.waitForLeader()
|
||||
} else {
|
||||
server.waitForAPI()
|
||||
}
|
||||
|
||||
return server
|
||||
}
|
||||
|
||||
// Stop stops the test Consul server, and removes the Consul data
|
||||
// directory once we are done.
|
||||
func (s *TestServer) Stop() {
|
||||
defer os.RemoveAll(s.Config.DataDir)
|
||||
|
||||
if err := s.cmd.Process.Kill(); err != nil {
|
||||
s.t.Errorf("err: %s", err)
|
||||
}
|
||||
|
||||
// wait for the process to exit to be sure that the data dir can be
|
||||
// deleted on all platforms.
|
||||
s.cmd.Wait()
|
||||
}
|
||||
|
||||
// waitForAPI waits for only the agent HTTP endpoint to start
|
||||
// responding. This is an indication that the agent has started,
|
||||
// but will likely return before a leader is elected.
|
||||
func (s *TestServer) waitForAPI() {
|
||||
WaitForResult(func() (bool, error) {
|
||||
resp, err := s.HttpClient.Get(s.url("/v1/agent/self"))
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
if err := s.requireOK(resp); err != nil {
|
||||
return false, err
|
||||
}
|
||||
return true, nil
|
||||
}, func(err error) {
|
||||
defer s.Stop()
|
||||
s.t.Fatalf("err: %s", err)
|
||||
})
|
||||
}
|
||||
|
||||
// waitForLeader waits for the Consul server's HTTP API to become
|
||||
// available, and then waits for a known leader and an index of
|
||||
// 1 or more to be observed to confirm leader election is done.
|
||||
// It then waits to ensure the anti-entropy sync has completed.
|
||||
func (s *TestServer) waitForLeader() {
|
||||
var index int64
|
||||
WaitForResult(func() (bool, error) {
|
||||
// Query the API and check the status code.
|
||||
url := s.url(fmt.Sprintf("/v1/catalog/nodes?index=%d&wait=2s", index))
|
||||
resp, err := s.HttpClient.Get(url)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
if err := s.requireOK(resp); err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
// Ensure we have a leader and a node registration.
|
||||
if leader := resp.Header.Get("X-Consul-KnownLeader"); leader != "true" {
|
||||
return false, fmt.Errorf("Consul leader status: %#v", leader)
|
||||
}
|
||||
index, err = strconv.ParseInt(resp.Header.Get("X-Consul-Index"), 10, 64)
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("Consul index was bad: %v", err)
|
||||
}
|
||||
if index == 0 {
|
||||
return false, fmt.Errorf("Consul index is 0")
|
||||
}
|
||||
|
||||
// Watch for the anti-entropy sync to finish.
|
||||
var parsed []map[string]interface{}
|
||||
dec := json.NewDecoder(resp.Body)
|
||||
if err := dec.Decode(&parsed); err != nil {
|
||||
return false, err
|
||||
}
|
||||
if len(parsed) < 1 {
|
||||
return false, fmt.Errorf("No nodes")
|
||||
}
|
||||
taggedAddresses, ok := parsed[0]["TaggedAddresses"].(map[string]interface{})
|
||||
if !ok {
|
||||
return false, fmt.Errorf("Missing tagged addresses")
|
||||
}
|
||||
if _, ok := taggedAddresses["lan"]; !ok {
|
||||
return false, fmt.Errorf("No lan tagged addresses")
|
||||
}
|
||||
return true, nil
|
||||
}, func(err error) {
|
||||
defer s.Stop()
|
||||
s.t.Fatalf("err: %s", err)
|
||||
})
|
||||
}
|
||||
|
||||
// url is a helper function which takes a relative URL and
|
||||
// makes it into a proper URL against the local Consul server.
|
||||
func (s *TestServer) url(path string) string {
|
||||
return fmt.Sprintf("http://127.0.0.1:%d%s", s.Config.Ports.HTTP, path)
|
||||
}
|
||||
|
||||
// requireOK checks the HTTP response code and ensures it is acceptable.
|
||||
func (s *TestServer) requireOK(resp *http.Response) error {
|
||||
if resp.StatusCode != 200 {
|
||||
return fmt.Errorf("Bad status code: %d", resp.StatusCode)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// put performs a new HTTP PUT request.
|
||||
func (s *TestServer) put(path string, body io.Reader) *http.Response {
|
||||
req, err := http.NewRequest("PUT", s.url(path), body)
|
||||
if err != nil {
|
||||
s.t.Fatalf("err: %s", err)
|
||||
}
|
||||
resp, err := s.HttpClient.Do(req)
|
||||
if err != nil {
|
||||
s.t.Fatalf("err: %s", err)
|
||||
}
|
||||
if err := s.requireOK(resp); err != nil {
|
||||
defer resp.Body.Close()
|
||||
s.t.Fatal(err)
|
||||
}
|
||||
return resp
|
||||
}
|
||||
|
||||
// get performs a new HTTP GET request.
|
||||
func (s *TestServer) get(path string) *http.Response {
|
||||
resp, err := s.HttpClient.Get(s.url(path))
|
||||
if err != nil {
|
||||
s.t.Fatalf("err: %s", err)
|
||||
}
|
||||
if err := s.requireOK(resp); err != nil {
|
||||
defer resp.Body.Close()
|
||||
s.t.Fatal(err)
|
||||
}
|
||||
return resp
|
||||
}
|
||||
|
||||
// encodePayload returns a new io.Reader wrapping the encoded contents
|
||||
// of the payload, suitable for passing directly to a new request.
|
||||
func (s *TestServer) encodePayload(payload interface{}) io.Reader {
|
||||
var encoded bytes.Buffer
|
||||
enc := json.NewEncoder(&encoded)
|
||||
if err := enc.Encode(payload); err != nil {
|
||||
s.t.Fatalf("err: %s", err)
|
||||
}
|
||||
return &encoded
|
||||
}
|
||||
|
||||
// JoinLAN is used to join nodes within the same datacenter.
|
||||
func (s *TestServer) JoinLAN(addr string) {
|
||||
resp := s.get("/v1/agent/join/" + addr)
|
||||
resp.Body.Close()
|
||||
}
|
||||
|
||||
// JoinWAN is used to join remote datacenters together.
|
||||
func (s *TestServer) JoinWAN(addr string) {
|
||||
resp := s.get("/v1/agent/join/" + addr + "?wan=1")
|
||||
resp.Body.Close()
|
||||
}
|
||||
|
||||
// SetKV sets an individual key in the K/V store.
|
||||
func (s *TestServer) SetKV(key string, val []byte) {
|
||||
resp := s.put("/v1/kv/"+key, bytes.NewBuffer(val))
|
||||
resp.Body.Close()
|
||||
}
|
||||
|
||||
// GetKV retrieves a single key and returns its value
|
||||
func (s *TestServer) GetKV(key string) []byte {
|
||||
resp := s.get("/v1/kv/" + key)
|
||||
defer resp.Body.Close()
|
||||
|
||||
raw, err := ioutil.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
s.t.Fatalf("err: %s", err)
|
||||
}
|
||||
|
||||
var result []*TestKVResponse
|
||||
if err := json.Unmarshal(raw, &result); err != nil {
|
||||
s.t.Fatalf("err: %s", err)
|
||||
}
|
||||
if len(result) < 1 {
|
||||
s.t.Fatalf("key does not exist: %s", key)
|
||||
}
|
||||
|
||||
v, err := base64.StdEncoding.DecodeString(result[0].Value)
|
||||
if err != nil {
|
||||
s.t.Fatalf("err: %s", err)
|
||||
}
|
||||
|
||||
return v
|
||||
}
|
||||
|
||||
// PopulateKV fills the Consul KV with data from a generic map.
|
||||
func (s *TestServer) PopulateKV(data map[string][]byte) {
|
||||
for k, v := range data {
|
||||
s.SetKV(k, v)
|
||||
}
|
||||
}
|
||||
|
||||
// ListKV returns a list of keys present in the KV store. This will list all
|
||||
// keys under the given prefix recursively and return them as a slice.
|
||||
func (s *TestServer) ListKV(prefix string) []string {
|
||||
resp := s.get("/v1/kv/" + prefix + "?keys")
|
||||
defer resp.Body.Close()
|
||||
|
||||
raw, err := ioutil.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
s.t.Fatalf("err: %s", err)
|
||||
}
|
||||
|
||||
var result []string
|
||||
if err := json.Unmarshal(raw, &result); err != nil {
|
||||
s.t.Fatalf("err: %s", err)
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// AddService adds a new service to the Consul instance. It also
|
||||
// automatically adds a health check with the given status, which
|
||||
// can be one of "passing", "warning", or "critical".
|
||||
func (s *TestServer) AddService(name, status string, tags []string) {
|
||||
svc := &TestService{
|
||||
Name: name,
|
||||
Tags: tags,
|
||||
}
|
||||
payload := s.encodePayload(svc)
|
||||
s.put("/v1/agent/service/register", payload)
|
||||
|
||||
chkName := "service:" + name
|
||||
chk := &TestCheck{
|
||||
Name: chkName,
|
||||
ServiceID: name,
|
||||
TTL: "10m",
|
||||
}
|
||||
payload = s.encodePayload(chk)
|
||||
s.put("/v1/agent/check/register", payload)
|
||||
|
||||
switch status {
|
||||
case structs.HealthPassing:
|
||||
s.put("/v1/agent/check/pass/"+chkName, nil)
|
||||
case structs.HealthWarning:
|
||||
s.put("/v1/agent/check/warn/"+chkName, nil)
|
||||
case structs.HealthCritical:
|
||||
s.put("/v1/agent/check/fail/"+chkName, nil)
|
||||
default:
|
||||
s.t.Fatalf("Unrecognized status: %s", status)
|
||||
}
|
||||
}
|
||||
|
||||
// AddCheck adds a check to the Consul instance. If the serviceID is
|
||||
// left empty (""), then the check will be associated with the node.
|
||||
// The check status may be "passing", "warning", or "critical".
|
||||
func (s *TestServer) AddCheck(name, serviceID, status string) {
|
||||
chk := &TestCheck{
|
||||
ID: name,
|
||||
Name: name,
|
||||
TTL: "10m",
|
||||
}
|
||||
if serviceID != "" {
|
||||
chk.ServiceID = serviceID
|
||||
}
|
||||
|
||||
payload := s.encodePayload(chk)
|
||||
s.put("/v1/agent/check/register", payload)
|
||||
|
||||
switch status {
|
||||
case structs.HealthPassing:
|
||||
s.put("/v1/agent/check/pass/"+name, nil)
|
||||
case structs.HealthWarning:
|
||||
s.put("/v1/agent/check/warn/"+name, nil)
|
||||
case structs.HealthCritical:
|
||||
s.put("/v1/agent/check/fail/"+name, nil)
|
||||
default:
|
||||
s.t.Fatalf("Unrecognized status: %s", status)
|
||||
}
|
||||
}
|
|
@ -0,0 +1,62 @@
|
|||
package testutil
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/consul/consul/structs"
|
||||
)
|
||||
|
||||
type testFn func() (bool, error)
|
||||
type errorFn func(error)
|
||||
|
||||
const (
|
||||
baseWait = 1 * time.Millisecond
|
||||
maxWait = 100 * time.Millisecond
|
||||
)
|
||||
|
||||
func WaitForResult(try testFn, fail errorFn) {
|
||||
var err error
|
||||
wait := baseWait
|
||||
for retries := 100; retries > 0; retries-- {
|
||||
var success bool
|
||||
success, err = try()
|
||||
if success {
|
||||
time.Sleep(25 * time.Millisecond)
|
||||
return
|
||||
}
|
||||
|
||||
time.Sleep(wait)
|
||||
wait *= 2
|
||||
if wait > maxWait {
|
||||
wait = maxWait
|
||||
}
|
||||
}
|
||||
fail(err)
|
||||
}
|
||||
|
||||
type rpcFn func(string, interface{}, interface{}) error
|
||||
|
||||
func WaitForLeader(t *testing.T, rpc rpcFn, dc string) structs.IndexedNodes {
|
||||
var out structs.IndexedNodes
|
||||
WaitForResult(func() (bool, error) {
|
||||
// Ensure we have a leader and a node registration.
|
||||
args := &structs.DCSpecificRequest{
|
||||
Datacenter: dc,
|
||||
}
|
||||
if err := rpc("Catalog.ListNodes", args, &out); err != nil {
|
||||
return false, fmt.Errorf("Catalog.ListNodes failed: %v", err)
|
||||
}
|
||||
if !out.QueryMeta.KnownLeader {
|
||||
return false, fmt.Errorf("No leader")
|
||||
}
|
||||
if out.Index == 0 {
|
||||
return false, fmt.Errorf("Consul index is 0")
|
||||
}
|
||||
return true, nil
|
||||
}, func(err error) {
|
||||
t.Fatalf("failed to find leader: %v", err)
|
||||
})
|
||||
return out
|
||||
}
|
|
@ -0,0 +1,39 @@
|
|||
# Consul `types` Package
|
||||
|
||||
The Go language has a strong type system built into the language. The
|
||||
`types` package corrals named types into a single package that is terminal in
|
||||
`go`'s import graph. The `types` package should not have any downstream
|
||||
dependencies. Each subsystem that defines its own set of types exists in its
|
||||
own file, but all types are defined in the same package.
|
||||
|
||||
# Why
|
||||
|
||||
> Everything should be made as simple as possible, but not simpler.
|
||||
|
||||
`string` is a useful container and underlying type for identifiers, however
|
||||
the `string` type is effectively opaque to the compiler in terms of how a
|
||||
given string is intended to be used. For instance, there is nothing
|
||||
preventing the following from happening:
|
||||
|
||||
```go
|
||||
// `map` of Widgets, looked up by ID
|
||||
var widgetLookup map[string]*Widget
|
||||
// ...
|
||||
var widgetID string = "widgetID"
|
||||
w, found := widgetLookup[widgetID]
|
||||
|
||||
// Bad!
|
||||
var widgetName string = "name of widget"
|
||||
w, found := widgetLookup[widgetName]
|
||||
```
|
||||
|
||||
but this class of problem is entirely preventable:
|
||||
|
||||
```go
|
||||
type WidgetID string
|
||||
var widgetLookup map[WidgetID]*Widget
|
||||
var widgetName
|
||||
```
|
||||
|
||||
TL;DR: intentions and idioms aren't statically checked by compilers. The
|
||||
`types` package uses Go's strong type system to prevent this class of bug.
|
|
@ -0,0 +1,5 @@
|
|||
package types
|
||||
|
||||
// CheckID is a strongly typed string used to uniquely represent a Consul
|
||||
// Check on an Agent (a CheckID is not globally unique).
|
||||
type CheckID string
|
|
@ -0,0 +1,4 @@
|
|||
package types
|
||||
|
||||
// NodeID is a unique identifier for a node across space and time.
|
||||
type NodeID string
|
|
@ -0,0 +1,373 @@
|
|||
Mozilla Public License Version 2.0
|
||||
==================================
|
||||
|
||||
1. Definitions
|
||||
--------------
|
||||
|
||||
1.1. "Contributor"
|
||||
means each individual or legal entity that creates, contributes to
|
||||
the creation of, or owns Covered Software.
|
||||
|
||||
1.2. "Contributor Version"
|
||||
means the combination of the Contributions of others (if any) used
|
||||
by a Contributor and that particular Contributor's Contribution.
|
||||
|
||||
1.3. "Contribution"
|
||||
means Covered Software of a particular Contributor.
|
||||
|
||||
1.4. "Covered Software"
|
||||
means Source Code Form to which the initial Contributor has attached
|
||||
the notice in Exhibit A, the Executable Form of such Source Code
|
||||
Form, and Modifications of such Source Code Form, in each case
|
||||
including portions thereof.
|
||||
|
||||
1.5. "Incompatible With Secondary Licenses"
|
||||
means
|
||||
|
||||
(a) that the initial Contributor has attached the notice described
|
||||
in Exhibit B to the Covered Software; or
|
||||
|
||||
(b) that the Covered Software was made available under the terms of
|
||||
version 1.1 or earlier of the License, but not also under the
|
||||
terms of a Secondary License.
|
||||
|
||||
1.6. "Executable Form"
|
||||
means any form of the work other than Source Code Form.
|
||||
|
||||
1.7. "Larger Work"
|
||||
means a work that combines Covered Software with other material, in
|
||||
a separate file or files, that is not Covered Software.
|
||||
|
||||
1.8. "License"
|
||||
means this document.
|
||||
|
||||
1.9. "Licensable"
|
||||
means having the right to grant, to the maximum extent possible,
|
||||
whether at the time of the initial grant or subsequently, any and
|
||||
all of the rights conveyed by this License.
|
||||
|
||||
1.10. "Modifications"
|
||||
means any of the following:
|
||||
|
||||
(a) any file in Source Code Form that results from an addition to,
|
||||
deletion from, or modification of the contents of Covered
|
||||
Software; or
|
||||
|
||||
(b) any new file in Source Code Form that contains any Covered
|
||||
Software.
|
||||
|
||||
1.11. "Patent Claims" of a Contributor
|
||||
means any patent claim(s), including without limitation, method,
|
||||
process, and apparatus claims, in any patent Licensable by such
|
||||
Contributor that would be infringed, but for the grant of the
|
||||
License, by the making, using, selling, offering for sale, having
|
||||
made, import, or transfer of either its Contributions or its
|
||||
Contributor Version.
|
||||
|
||||
1.12. "Secondary License"
|
||||
means either the GNU General Public License, Version 2.0, the GNU
|
||||
Lesser General Public License, Version 2.1, the GNU Affero General
|
||||
Public License, Version 3.0, or any later versions of those
|
||||
licenses.
|
||||
|
||||
1.13. "Source Code Form"
|
||||
means the form of the work preferred for making modifications.
|
||||
|
||||
1.14. "You" (or "Your")
|
||||
means an individual or a legal entity exercising rights under this
|
||||
License. For legal entities, "You" includes any entity that
|
||||
controls, is controlled by, or is under common control with You. For
|
||||
purposes of this definition, "control" means (a) the power, direct
|
||||
or indirect, to cause the direction or management of such entity,
|
||||
whether by contract or otherwise, or (b) ownership of more than
|
||||
fifty percent (50%) of the outstanding shares or beneficial
|
||||
ownership of such entity.
|
||||
|
||||
2. License Grants and Conditions
|
||||
--------------------------------
|
||||
|
||||
2.1. Grants
|
||||
|
||||
Each Contributor hereby grants You a world-wide, royalty-free,
|
||||
non-exclusive license:
|
||||
|
||||
(a) under intellectual property rights (other than patent or trademark)
|
||||
Licensable by such Contributor to use, reproduce, make available,
|
||||
modify, display, perform, distribute, and otherwise exploit its
|
||||
Contributions, either on an unmodified basis, with Modifications, or
|
||||
as part of a Larger Work; and
|
||||
|
||||
(b) under Patent Claims of such Contributor to make, use, sell, offer
|
||||
for sale, have made, import, and otherwise transfer either its
|
||||
Contributions or its Contributor Version.
|
||||
|
||||
2.2. Effective Date
|
||||
|
||||
The licenses granted in Section 2.1 with respect to any Contribution
|
||||
become effective for each Contribution on the date the Contributor first
|
||||
distributes such Contribution.
|
||||
|
||||
2.3. Limitations on Grant Scope
|
||||
|
||||
The licenses granted in this Section 2 are the only rights granted under
|
||||
this License. No additional rights or licenses will be implied from the
|
||||
distribution or licensing of Covered Software under this License.
|
||||
Notwithstanding Section 2.1(b) above, no patent license is granted by a
|
||||
Contributor:
|
||||
|
||||
(a) for any code that a Contributor has removed from Covered Software;
|
||||
or
|
||||
|
||||
(b) for infringements caused by: (i) Your and any other third party's
|
||||
modifications of Covered Software, or (ii) the combination of its
|
||||
Contributions with other software (except as part of its Contributor
|
||||
Version); or
|
||||
|
||||
(c) under Patent Claims infringed by Covered Software in the absence of
|
||||
its Contributions.
|
||||
|
||||
This License does not grant any rights in the trademarks, service marks,
|
||||
or logos of any Contributor (except as may be necessary to comply with
|
||||
the notice requirements in Section 3.4).
|
||||
|
||||
2.4. Subsequent Licenses
|
||||
|
||||
No Contributor makes additional grants as a result of Your choice to
|
||||
distribute the Covered Software under a subsequent version of this
|
||||
License (see Section 10.2) or under the terms of a Secondary License (if
|
||||
permitted under the terms of Section 3.3).
|
||||
|
||||
2.5. Representation
|
||||
|
||||
Each Contributor represents that the Contributor believes its
|
||||
Contributions are its original creation(s) or it has sufficient rights
|
||||
to grant the rights to its Contributions conveyed by this License.
|
||||
|
||||
2.6. Fair Use
|
||||
|
||||
This License is not intended to limit any rights You have under
|
||||
applicable copyright doctrines of fair use, fair dealing, or other
|
||||
equivalents.
|
||||
|
||||
2.7. Conditions
|
||||
|
||||
Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted
|
||||
in Section 2.1.
|
||||
|
||||
3. Responsibilities
|
||||
-------------------
|
||||
|
||||
3.1. Distribution of Source Form
|
||||
|
||||
All distribution of Covered Software in Source Code Form, including any
|
||||
Modifications that You create or to which You contribute, must be under
|
||||
the terms of this License. You must inform recipients that the Source
|
||||
Code Form of the Covered Software is governed by the terms of this
|
||||
License, and how they can obtain a copy of this License. You may not
|
||||
attempt to alter or restrict the recipients' rights in the Source Code
|
||||
Form.
|
||||
|
||||
3.2. Distribution of Executable Form
|
||||
|
||||
If You distribute Covered Software in Executable Form then:
|
||||
|
||||
(a) such Covered Software must also be made available in Source Code
|
||||
Form, as described in Section 3.1, and You must inform recipients of
|
||||
the Executable Form how they can obtain a copy of such Source Code
|
||||
Form by reasonable means in a timely manner, at a charge no more
|
||||
than the cost of distribution to the recipient; and
|
||||
|
||||
(b) You may distribute such Executable Form under the terms of this
|
||||
License, or sublicense it under different terms, provided that the
|
||||
license for the Executable Form does not attempt to limit or alter
|
||||
the recipients' rights in the Source Code Form under this License.
|
||||
|
||||
3.3. Distribution of a Larger Work
|
||||
|
||||
You may create and distribute a Larger Work under terms of Your choice,
|
||||
provided that You also comply with the requirements of this License for
|
||||
the Covered Software. If the Larger Work is a combination of Covered
|
||||
Software with a work governed by one or more Secondary Licenses, and the
|
||||
Covered Software is not Incompatible With Secondary Licenses, this
|
||||
License permits You to additionally distribute such Covered Software
|
||||
under the terms of such Secondary License(s), so that the recipient of
|
||||
the Larger Work may, at their option, further distribute the Covered
|
||||
Software under the terms of either this License or such Secondary
|
||||
License(s).
|
||||
|
||||
3.4. Notices
|
||||
|
||||
You may not remove or alter the substance of any license notices
|
||||
(including copyright notices, patent notices, disclaimers of warranty,
|
||||
or limitations of liability) contained within the Source Code Form of
|
||||
the Covered Software, except that You may alter any license notices to
|
||||
the extent required to remedy known factual inaccuracies.
|
||||
|
||||
3.5. Application of Additional Terms
|
||||
|
||||
You may choose to offer, and to charge a fee for, warranty, support,
|
||||
indemnity or liability obligations to one or more recipients of Covered
|
||||
Software. However, You may do so only on Your own behalf, and not on
|
||||
behalf of any Contributor. You must make it absolutely clear that any
|
||||
such warranty, support, indemnity, or liability obligation is offered by
|
||||
You alone, and You hereby agree to indemnify every Contributor for any
|
||||
liability incurred by such Contributor as a result of warranty, support,
|
||||
indemnity or liability terms You offer. You may include additional
|
||||
disclaimers of warranty and limitations of liability specific to any
|
||||
jurisdiction.
|
||||
|
||||
4. Inability to Comply Due to Statute or Regulation
|
||||
---------------------------------------------------
|
||||
|
||||
If it is impossible for You to comply with any of the terms of this
|
||||
License with respect to some or all of the Covered Software due to
|
||||
statute, judicial order, or regulation then You must: (a) comply with
|
||||
the terms of this License to the maximum extent possible; and (b)
|
||||
describe the limitations and the code they affect. Such description must
|
||||
be placed in a text file included with all distributions of the Covered
|
||||
Software under this License. Except to the extent prohibited by statute
|
||||
or regulation, such description must be sufficiently detailed for a
|
||||
recipient of ordinary skill to be able to understand it.
|
||||
|
||||
5. Termination
|
||||
--------------
|
||||
|
||||
5.1. The rights granted under this License will terminate automatically
|
||||
if You fail to comply with any of its terms. However, if You become
|
||||
compliant, then the rights granted under this License from a particular
|
||||
Contributor are reinstated (a) provisionally, unless and until such
|
||||
Contributor explicitly and finally terminates Your grants, and (b) on an
|
||||
ongoing basis, if such Contributor fails to notify You of the
|
||||
non-compliance by some reasonable means prior to 60 days after You have
|
||||
come back into compliance. Moreover, Your grants from a particular
|
||||
Contributor are reinstated on an ongoing basis if such Contributor
|
||||
notifies You of the non-compliance by some reasonable means, this is the
|
||||
first time You have received notice of non-compliance with this License
|
||||
from such Contributor, and You become compliant prior to 30 days after
|
||||
Your receipt of the notice.
|
||||
|
||||
5.2. If You initiate litigation against any entity by asserting a patent
|
||||
infringement claim (excluding declaratory judgment actions,
|
||||
counter-claims, and cross-claims) alleging that a Contributor Version
|
||||
directly or indirectly infringes any patent, then the rights granted to
|
||||
You by any and all Contributors for the Covered Software under Section
|
||||
2.1 of this License shall terminate.
|
||||
|
||||
5.3. In the event of termination under Sections 5.1 or 5.2 above, all
|
||||
end user license agreements (excluding distributors and resellers) which
|
||||
have been validly granted by You or Your distributors under this License
|
||||
prior to termination shall survive termination.
|
||||
|
||||
************************************************************************
|
||||
* *
|
||||
* 6. Disclaimer of Warranty *
|
||||
* ------------------------- *
|
||||
* *
|
||||
* Covered Software is provided under this License on an "as is" *
|
||||
* basis, without warranty of any kind, either expressed, implied, or *
|
||||
* statutory, including, without limitation, warranties that the *
|
||||
* Covered Software is free of defects, merchantable, fit for a *
|
||||
* particular purpose or non-infringing. The entire risk as to the *
|
||||
* quality and performance of the Covered Software is with You. *
|
||||
* Should any Covered Software prove defective in any respect, You *
|
||||
* (not any Contributor) assume the cost of any necessary servicing, *
|
||||
* repair, or correction. This disclaimer of warranty constitutes an *
|
||||
* essential part of this License. No use of any Covered Software is *
|
||||
* authorized under this License except under this disclaimer. *
|
||||
* *
|
||||
************************************************************************
|
||||
|
||||
************************************************************************
|
||||
* *
|
||||
* 7. Limitation of Liability *
|
||||
* -------------------------- *
|
||||
* *
|
||||
* Under no circumstances and under no legal theory, whether tort *
|
||||
* (including negligence), contract, or otherwise, shall any *
|
||||
* Contributor, or anyone who distributes Covered Software as *
|
||||
* permitted above, be liable to You for any direct, indirect, *
|
||||
* special, incidental, or consequential damages of any character *
|
||||
* including, without limitation, damages for lost profits, loss of *
|
||||
* goodwill, work stoppage, computer failure or malfunction, or any *
|
||||
* and all other commercial damages or losses, even if such party *
|
||||
* shall have been informed of the possibility of such damages. This *
|
||||
* limitation of liability shall not apply to liability for death or *
|
||||
* personal injury resulting from such party's negligence to the *
|
||||
* extent applicable law prohibits such limitation. Some *
|
||||
* jurisdictions do not allow the exclusion or limitation of *
|
||||
* incidental or consequential damages, so this exclusion and *
|
||||
* limitation may not apply to You. *
|
||||
* *
|
||||
************************************************************************
|
||||
|
||||
8. Litigation
|
||||
-------------
|
||||
|
||||
Any litigation relating to this License may be brought only in the
|
||||
courts of a jurisdiction where the defendant maintains its principal
|
||||
place of business and such litigation shall be governed by laws of that
|
||||
jurisdiction, without reference to its conflict-of-law provisions.
|
||||
Nothing in this Section shall prevent a party's ability to bring
|
||||
cross-claims or counter-claims.
|
||||
|
||||
9. Miscellaneous
|
||||
----------------
|
||||
|
||||
This License represents the complete agreement concerning the subject
|
||||
matter hereof. If any provision of this License is held to be
|
||||
unenforceable, such provision shall be reformed only to the extent
|
||||
necessary to make it enforceable. Any law or regulation which provides
|
||||
that the language of a contract shall be construed against the drafter
|
||||
shall not be used to construe this License against a Contributor.
|
||||
|
||||
10. Versions of the License
|
||||
---------------------------
|
||||
|
||||
10.1. New Versions
|
||||
|
||||
Mozilla Foundation is the license steward. Except as provided in Section
|
||||
10.3, no one other than the license steward has the right to modify or
|
||||
publish new versions of this License. Each version will be given a
|
||||
distinguishing version number.
|
||||
|
||||
10.2. Effect of New Versions
|
||||
|
||||
You may distribute the Covered Software under the terms of the version
|
||||
of the License under which You originally received the Covered Software,
|
||||
or under the terms of any subsequent version published by the license
|
||||
steward.
|
||||
|
||||
10.3. Modified Versions
|
||||
|
||||
If you create software not governed by this License, and you want to
|
||||
create a new license for such software, you may create and use a
|
||||
modified version of this License if you rename the license and remove
|
||||
any references to the name of the license steward (except to note that
|
||||
such modified license differs from this License).
|
||||
|
||||
10.4. Distributing Source Code Form that is Incompatible With Secondary
|
||||
Licenses
|
||||
|
||||
If You choose to distribute Source Code Form that is Incompatible With
|
||||
Secondary Licenses under the terms of this version of the License, the
|
||||
notice described in Exhibit B of this License must be attached.
|
||||
|
||||
Exhibit A - Source Code Form License Notice
|
||||
-------------------------------------------
|
||||
|
||||
This Source Code Form is subject to the terms of the Mozilla Public
|
||||
License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
If it is not possible or desirable to put the notice in a particular
|
||||
file, then You may include the notice in a location (such as a LICENSE
|
||||
file in a relevant directory) where a recipient would be likely to look
|
||||
for such a notice.
|
||||
|
||||
You may add additional accurate notices of copyright ownership.
|
||||
|
||||
Exhibit B - "Incompatible With Secondary Licenses" Notice
|
||||
---------------------------------------------------------
|
||||
|
||||
This Source Code Form is "Incompatible With Secondary Licenses", as
|
||||
defined by the Mozilla Public License, v. 2.0.
|
|
@ -0,0 +1,63 @@
|
|||
TOOLS= golang.org/x/tools/cover
|
||||
GOCOVER_TMPFILE?= $(GOCOVER_FILE).tmp
|
||||
GOCOVER_FILE?= .cover.out
|
||||
GOCOVERHTML?= coverage.html
|
||||
|
||||
test:: $(GOCOVER_FILE)
|
||||
@$(MAKE) -C cmd/sockaddr test
|
||||
|
||||
cover:: coverage_report
|
||||
|
||||
$(GOCOVER_FILE)::
|
||||
@find . -type d ! -path '*cmd*' ! -path '*.git*' -print0 | xargs -0 -I % sh -ec "cd % && rm -f $(GOCOVER_TMPFILE) && go test -coverprofile=$(GOCOVER_TMPFILE)"
|
||||
|
||||
@echo 'mode: set' > $(GOCOVER_FILE)
|
||||
@find . -type f ! -path '*cmd*' ! -path '*.git*' -name "$(GOCOVER_TMPFILE)" -print0 | xargs -0 -n1 cat $(GOCOVER_TMPFILE) | grep -v '^mode: ' >> ${PWD}/$(GOCOVER_FILE)
|
||||
|
||||
$(GOCOVERHTML): $(GOCOVER_FILE)
|
||||
go tool cover -html=$(GOCOVER_FILE) -o $(GOCOVERHTML)
|
||||
|
||||
coverage_report:: $(GOCOVER_FILE)
|
||||
go tool cover -html=$(GOCOVER_FILE)
|
||||
|
||||
audit_tools::
|
||||
@go get -u github.com/golang/lint/golint && echo "Installed golint:"
|
||||
@go get -u github.com/fzipp/gocyclo && echo "Installed gocyclo:"
|
||||
@go get -u github.com/remyoudompheng/go-misc/deadcode && echo "Installed deadcode:"
|
||||
@go get -u github.com/client9/misspell/cmd/misspell && echo "Installed misspell:"
|
||||
@go get -u github.com/gordonklaus/ineffassign && echo "Installed ineffassign:"
|
||||
|
||||
audit::
|
||||
deadcode
|
||||
go tool vet -all *.go
|
||||
go tool vet -shadow=true *.go
|
||||
golint *.go
|
||||
ineffassign .
|
||||
gocyclo -over 65 *.go
|
||||
misspell *.go
|
||||
|
||||
clean::
|
||||
rm -f $(GOCOVER_FILE) $(GOCOVERHTML)
|
||||
|
||||
dev::
|
||||
@go build
|
||||
@make -B -C cmd/sockaddr sockaddr
|
||||
|
||||
install::
|
||||
@go install
|
||||
@make -C cmd/sockaddr install
|
||||
|
||||
doc::
|
||||
echo Visit: http://127.0.0.1:6060/pkg/github.com/hashicorp/go-sockaddr/
|
||||
godoc -http=:6060 -goroot $GOROOT
|
||||
|
||||
world::
|
||||
@set -e; \
|
||||
for os in solaris darwin freebsd linux windows; do \
|
||||
for arch in amd64; do \
|
||||
printf "Building on %s-%s\n" "$${os}" "$${arch}" ; \
|
||||
env GOOS="$${os}" GOARCH="$${arch}" go build -o /dev/null; \
|
||||
done; \
|
||||
done
|
||||
|
||||
make -C cmd/sockaddr world
|
|
@ -0,0 +1,118 @@
|
|||
# go-sockaddr
|
||||
|
||||
## `sockaddr` Library
|
||||
|
||||
Socket address convenience functions for Go. `go-sockaddr` is a convenience
|
||||
library that makes doing the right thing with IP addresses easy. `go-sockaddr`
|
||||
is loosely modeled after the UNIX `sockaddr_t` and creates a union of the family
|
||||
of `sockaddr_t` types (see below for an ascii diagram). Library documentation
|
||||
is available
|
||||
at
|
||||
[https://godoc.org/github.com/hashicorp/go-sockaddr](https://godoc.org/github.com/hashicorp/go-sockaddr).
|
||||
The primary intent of the library was to make it possible to define heuristics
|
||||
for selecting the correct IP addresses when a configuration is evaluated at
|
||||
runtime. See
|
||||
the
|
||||
[docs](https://godoc.org/github.com/hashicorp/go-sockaddr),
|
||||
[`template` package](https://godoc.org/github.com/hashicorp/go-sockaddr/template),
|
||||
tests,
|
||||
and
|
||||
[CLI utility](https://github.com/hashicorp/go-sockaddr/tree/master/cmd/sockaddr)
|
||||
for details and hints as to how to use this library.
|
||||
|
||||
For example, with this library it is possible to find an IP address that:
|
||||
|
||||
* is attached to a default route
|
||||
([`GetDefaultInterfaces()`](https://godoc.org/github.com/hashicorp/go-sockaddr#GetDefaultInterfaces))
|
||||
* is contained within a CIDR block (['IfByNetwork()'](https://godoc.org/github.com/hashicorp/go-sockaddr#IfByNetwork))
|
||||
* is an RFC1918 address
|
||||
([`IfByRFC("1918")`](https://godoc.org/github.com/hashicorp/go-sockaddr#IfByRFC))
|
||||
* is ordered
|
||||
([`OrderedIfAddrBy(args)`](https://godoc.org/github.com/hashicorp/go-sockaddr#OrderedIfAddrBy) where
|
||||
`args` includes, but is not limited
|
||||
to,
|
||||
[`AscIfType`](https://godoc.org/github.com/hashicorp/go-sockaddr#AscIfType),
|
||||
[`AscNetworkSize`](https://godoc.org/github.com/hashicorp/go-sockaddr#AscNetworkSize))
|
||||
* excludes all IPv6 addresses
|
||||
([`IfByType("^(IPv4)$")`](https://godoc.org/github.com/hashicorp/go-sockaddr#IfByType))
|
||||
* is larger than a `/32`
|
||||
([`IfByMaskSize(32)`](https://godoc.org/github.com/hashicorp/go-sockaddr#IfByMaskSize))
|
||||
* is not on a `down` interface
|
||||
([`ExcludeIfs("flags", "down")`](https://godoc.org/github.com/hashicorp/go-sockaddr#ExcludeIfs))
|
||||
* preferences an IPv6 address over an IPv4 address
|
||||
([`SortIfByType()`](https://godoc.org/github.com/hashicorp/go-sockaddr#SortIfByType) +
|
||||
[`ReverseIfAddrs()`](https://godoc.org/github.com/hashicorp/go-sockaddr#ReverseIfAddrs)); and
|
||||
* excludes any IP in RFC6890 address
|
||||
([`IfByRFC("6890")`](https://godoc.org/github.com/hashicorp/go-sockaddr#IfByRFC))
|
||||
|
||||
Or any combination or variation therein.
|
||||
|
||||
There are also a few simple helper functions such as `GetPublicIP` and
|
||||
`GetPrivateIP` which both return strings and select the first public or private
|
||||
IP address on the default interface, respectively. Similarly, there is also a
|
||||
helper function called `GetInterfaceIP` which returns the first usable IP
|
||||
address on the named interface.
|
||||
|
||||
## `sockaddr` CLI
|
||||
|
||||
Given the possible complexity of the `sockaddr` library, there is a CLI utility
|
||||
that accompanies the library, also
|
||||
called
|
||||
[`sockaddr`](https://github.com/hashicorp/go-sockaddr/tree/master/cmd/sockaddr).
|
||||
The
|
||||
[`sockaddr`](https://github.com/hashicorp/go-sockaddr/tree/master/cmd/sockaddr)
|
||||
utility exposes nearly all of the functionality of the library and can be used
|
||||
either as an administrative tool or testing tool. To install
|
||||
the
|
||||
[`sockaddr`](https://github.com/hashicorp/go-sockaddr/tree/master/cmd/sockaddr),
|
||||
run:
|
||||
|
||||
```text
|
||||
$ go get -u github.com/hashicorp/go-sockaddr/cmd/sockaddr
|
||||
```
|
||||
|
||||
If you're familiar with UNIX's `sockaddr` struct's, the following diagram
|
||||
mapping the C `sockaddr` (top) to `go-sockaddr` structs (bottom) and
|
||||
interfaces will be helpful:
|
||||
|
||||
```
|
||||
+-------------------------------------------------------+
|
||||
| |
|
||||
| sockaddr |
|
||||
| SockAddr |
|
||||
| |
|
||||
| +--------------+ +----------------------------------+ |
|
||||
| | sockaddr_un | | | |
|
||||
| | SockAddrUnix | | sockaddr_in{,6} | |
|
||||
| +--------------+ | IPAddr | |
|
||||
| | | |
|
||||
| | +-------------+ +--------------+ | |
|
||||
| | | sockaddr_in | | sockaddr_in6 | | |
|
||||
| | | IPv4Addr | | IPv6Addr | | |
|
||||
| | +-------------+ +--------------+ | |
|
||||
| | | |
|
||||
| +----------------------------------+ |
|
||||
| |
|
||||
+-------------------------------------------------------+
|
||||
```
|
||||
|
||||
## Inspiration and Design
|
||||
|
||||
There were many subtle inspirations that led to this design, but the most direct
|
||||
inspiration for the filtering syntax was
|
||||
OpenBSD's
|
||||
[`pf.conf(5)`](https://www.freebsd.org/cgi/man.cgi?query=pf.conf&apropos=0&sektion=0&arch=default&format=html#PARAMETERS) firewall
|
||||
syntax that lets you select the first IP address on a given named interface.
|
||||
The original problem stemmed from:
|
||||
|
||||
* needing to create immutable images using [Packer](https://www.packer.io) that
|
||||
ran the [Consul](https://www.consul.io) process (Consul can only use one IP
|
||||
address at a time);
|
||||
* images that may or may not have multiple interfaces or IP addresses at
|
||||
runtime; and
|
||||
* we didn't want to rely on configuration management to render out the correct
|
||||
IP address if the VM image was being used in an auto-scaling group.
|
||||
|
||||
Instead we needed some way to codify a heuristic that would correctly select the
|
||||
right IP address but the input parameters were not known when the image was
|
||||
created.
|
|
@ -0,0 +1,5 @@
|
|||
/*
|
||||
Package sockaddr is a Go implementation of the UNIX socket family data types and
|
||||
related helper functions.
|
||||
*/
|
||||
package sockaddr
|
|
@ -0,0 +1,126 @@
|
|||
package sockaddr
|
||||
|
||||
// ifAddrAttrMap is a map of the IfAddr type-specific attributes.
|
||||
var ifAddrAttrMap map[AttrName]func(IfAddr) string
|
||||
var ifAddrAttrs []AttrName
|
||||
|
||||
func init() {
|
||||
ifAddrAttrInit()
|
||||
}
|
||||
|
||||
// GetPrivateIP returns a string with a single IP address that is part of RFC
|
||||
// 6890 and has a default route. If the system can't determine its IP address
|
||||
// or find an RFC 6890 IP address, an empty string will be returned instead.
|
||||
// This function is the `eval` equivalent of:
|
||||
//
|
||||
// ```
|
||||
// $ sockaddr eval -r '{{GetPrivateInterfaces | attr "address"}}'
|
||||
/// ```
|
||||
func GetPrivateIP() (string, error) {
|
||||
privateIfs, err := GetPrivateInterfaces()
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
if len(privateIfs) < 1 {
|
||||
return "", nil
|
||||
}
|
||||
|
||||
ifAddr := privateIfs[0]
|
||||
ip := *ToIPAddr(ifAddr.SockAddr)
|
||||
return ip.NetIP().String(), nil
|
||||
}
|
||||
|
||||
// GetPublicIP returns a string with a single IP address that is NOT part of RFC
|
||||
// 6890 and has a default route. If the system can't determine its IP address
|
||||
// or find a non RFC 6890 IP address, an empty string will be returned instead.
|
||||
// This function is the `eval` equivalent of:
|
||||
//
|
||||
// ```
|
||||
// $ sockaddr eval -r '{{GetPublicInterfaces | attr "address"}}'
|
||||
/// ```
|
||||
func GetPublicIP() (string, error) {
|
||||
publicIfs, err := GetPublicInterfaces()
|
||||
if err != nil {
|
||||
return "", err
|
||||
} else if len(publicIfs) < 1 {
|
||||
return "", nil
|
||||
}
|
||||
|
||||
ifAddr := publicIfs[0]
|
||||
ip := *ToIPAddr(ifAddr.SockAddr)
|
||||
return ip.NetIP().String(), nil
|
||||
}
|
||||
|
||||
// GetInterfaceIP returns a string with a single IP address sorted by the size
|
||||
// of the network (i.e. IP addresses with a smaller netmask, larger network
|
||||
// size, are sorted first). This function is the `eval` equivalent of:
|
||||
//
|
||||
// ```
|
||||
// $ sockaddr eval -r '{{GetAllInterfaces | include "name" <<ARG>> | sort "type,size" | include "flag" "forwardable" | attr "address" }}'
|
||||
/// ```
|
||||
func GetInterfaceIP(namedIfRE string) (string, error) {
|
||||
ifAddrs, err := GetAllInterfaces()
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
ifAddrs, _, err = IfByName(namedIfRE, ifAddrs)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
ifAddrs, _, err = IfByFlag("forwardable", ifAddrs)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
ifAddrs, err = SortIfBy("+type,+size", ifAddrs)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
if len(ifAddrs) == 0 {
|
||||
return "", err
|
||||
}
|
||||
|
||||
ip := ToIPAddr(ifAddrs[0].SockAddr)
|
||||
if ip == nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return IPAddrAttr(*ip, "address"), nil
|
||||
}
|
||||
|
||||
// IfAddrAttrs returns a list of attributes supported by the IfAddr type
|
||||
func IfAddrAttrs() []AttrName {
|
||||
return ifAddrAttrs
|
||||
}
|
||||
|
||||
// IfAddrAttr returns a string representation of an attribute for the given
|
||||
// IfAddr.
|
||||
func IfAddrAttr(ifAddr IfAddr, attrName AttrName) string {
|
||||
fn, found := ifAddrAttrMap[attrName]
|
||||
if !found {
|
||||
return ""
|
||||
}
|
||||
|
||||
return fn(ifAddr)
|
||||
}
|
||||
|
||||
// ifAddrAttrInit is called once at init()
|
||||
func ifAddrAttrInit() {
|
||||
// Sorted for human readability
|
||||
ifAddrAttrs = []AttrName{
|
||||
"flags",
|
||||
"name",
|
||||
}
|
||||
|
||||
ifAddrAttrMap = map[AttrName]func(ifAddr IfAddr) string{
|
||||
"flags": func(ifAddr IfAddr) string {
|
||||
return ifAddr.Interface.Flags.String()
|
||||
},
|
||||
"name": func(ifAddr IfAddr) string {
|
||||
return ifAddr.Interface.Name
|
||||
},
|
||||
}
|
||||
}
|
|
@ -0,0 +1,969 @@
|
|||
package sockaddr
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"net"
|
||||
"regexp"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// IfAddrs is a slice of IfAddr
|
||||
type IfAddrs []IfAddr
|
||||
|
||||
func (ifs IfAddrs) Len() int { return len(ifs) }
|
||||
|
||||
// CmpIfFunc is the function signature that must be met to be used in the
|
||||
// OrderedIfAddrBy multiIfAddrSorter
|
||||
type CmpIfAddrFunc func(p1, p2 *IfAddr) int
|
||||
|
||||
// multiIfAddrSorter implements the Sort interface, sorting the IfAddrs within.
|
||||
type multiIfAddrSorter struct {
|
||||
ifAddrs IfAddrs
|
||||
cmp []CmpIfAddrFunc
|
||||
}
|
||||
|
||||
// Sort sorts the argument slice according to the Cmp functions passed to
|
||||
// OrderedIfAddrBy.
|
||||
func (ms *multiIfAddrSorter) Sort(ifAddrs IfAddrs) {
|
||||
ms.ifAddrs = ifAddrs
|
||||
sort.Sort(ms)
|
||||
}
|
||||
|
||||
// OrderedIfAddrBy sorts SockAddr by the list of sort function pointers.
|
||||
func OrderedIfAddrBy(cmpFuncs ...CmpIfAddrFunc) *multiIfAddrSorter {
|
||||
return &multiIfAddrSorter{
|
||||
cmp: cmpFuncs,
|
||||
}
|
||||
}
|
||||
|
||||
// Len is part of sort.Interface.
|
||||
func (ms *multiIfAddrSorter) Len() int {
|
||||
return len(ms.ifAddrs)
|
||||
}
|
||||
|
||||
// Less is part of sort.Interface. It is implemented by looping along the Cmp()
|
||||
// functions until it finds a comparison that is either less than or greater
|
||||
// than. A return value of 0 defers sorting to the next function in the
|
||||
// multisorter (which means the results of sorting may leave the resutls in a
|
||||
// non-deterministic order).
|
||||
func (ms *multiIfAddrSorter) Less(i, j int) bool {
|
||||
p, q := &ms.ifAddrs[i], &ms.ifAddrs[j]
|
||||
// Try all but the last comparison.
|
||||
var k int
|
||||
for k = 0; k < len(ms.cmp)-1; k++ {
|
||||
cmp := ms.cmp[k]
|
||||
x := cmp(p, q)
|
||||
switch x {
|
||||
case -1:
|
||||
// p < q, so we have a decision.
|
||||
return true
|
||||
case 1:
|
||||
// p > q, so we have a decision.
|
||||
return false
|
||||
}
|
||||
// p == q; try the next comparison.
|
||||
}
|
||||
// All comparisons to here said "equal", so just return whatever the
|
||||
// final comparison reports.
|
||||
switch ms.cmp[k](p, q) {
|
||||
case -1:
|
||||
return true
|
||||
case 1:
|
||||
return false
|
||||
default:
|
||||
// Still a tie! Now what?
|
||||
return false
|
||||
panic("undefined sort order for remaining items in the list")
|
||||
}
|
||||
}
|
||||
|
||||
// Swap is part of sort.Interface.
|
||||
func (ms *multiIfAddrSorter) Swap(i, j int) {
|
||||
ms.ifAddrs[i], ms.ifAddrs[j] = ms.ifAddrs[j], ms.ifAddrs[i]
|
||||
}
|
||||
|
||||
// AscIfAddress is a sorting function to sort IfAddrs by their respective
|
||||
// address type. Non-equal types are deferred in the sort.
|
||||
func AscIfAddress(p1Ptr, p2Ptr *IfAddr) int {
|
||||
return AscAddress(&p1Ptr.SockAddr, &p2Ptr.SockAddr)
|
||||
}
|
||||
|
||||
// AscIfName is a sorting function to sort IfAddrs by their interface names.
|
||||
func AscIfName(p1Ptr, p2Ptr *IfAddr) int {
|
||||
return strings.Compare(p1Ptr.Name, p2Ptr.Name)
|
||||
}
|
||||
|
||||
// AscIfNetworkSize is a sorting function to sort IfAddrs by their respective
|
||||
// network mask size.
|
||||
func AscIfNetworkSize(p1Ptr, p2Ptr *IfAddr) int {
|
||||
return AscNetworkSize(&p1Ptr.SockAddr, &p2Ptr.SockAddr)
|
||||
}
|
||||
|
||||
// AscIfPort is a sorting function to sort IfAddrs by their respective
|
||||
// port type. Non-equal types are deferred in the sort.
|
||||
func AscIfPort(p1Ptr, p2Ptr *IfAddr) int {
|
||||
return AscPort(&p1Ptr.SockAddr, &p2Ptr.SockAddr)
|
||||
}
|
||||
|
||||
// AscIfPrivate is a sorting function to sort IfAddrs by "private" values before
|
||||
// "public" values. Both IPv4 and IPv6 are compared against RFC6890 (RFC6890
|
||||
// includes, and is not limited to, RFC1918 and RFC6598 for IPv4, and IPv6
|
||||
// includes RFC4193).
|
||||
func AscIfPrivate(p1Ptr, p2Ptr *IfAddr) int {
|
||||
return AscPrivate(&p1Ptr.SockAddr, &p2Ptr.SockAddr)
|
||||
}
|
||||
|
||||
// AscIfType is a sorting function to sort IfAddrs by their respective address
|
||||
// type. Non-equal types are deferred in the sort.
|
||||
func AscIfType(p1Ptr, p2Ptr *IfAddr) int {
|
||||
return AscType(&p1Ptr.SockAddr, &p2Ptr.SockAddr)
|
||||
}
|
||||
|
||||
// DescIfAddress is identical to AscIfAddress but reverse ordered.
|
||||
func DescIfAddress(p1Ptr, p2Ptr *IfAddr) int {
|
||||
return -1 * AscAddress(&p1Ptr.SockAddr, &p2Ptr.SockAddr)
|
||||
}
|
||||
|
||||
// DescIfName is identical to AscIfName but reverse ordered.
|
||||
func DescIfName(p1Ptr, p2Ptr *IfAddr) int {
|
||||
return -1 * strings.Compare(p1Ptr.Name, p2Ptr.Name)
|
||||
}
|
||||
|
||||
// DescIfNetworkSize is identical to AscIfNetworkSize but reverse ordered.
|
||||
func DescIfNetworkSize(p1Ptr, p2Ptr *IfAddr) int {
|
||||
return -1 * AscNetworkSize(&p1Ptr.SockAddr, &p2Ptr.SockAddr)
|
||||
}
|
||||
|
||||
// DescIfPort is identical to AscIfPort but reverse ordered.
|
||||
func DescIfPort(p1Ptr, p2Ptr *IfAddr) int {
|
||||
return -1 * AscPort(&p1Ptr.SockAddr, &p2Ptr.SockAddr)
|
||||
}
|
||||
|
||||
// DescIfPrivate is identical to AscIfPrivate but reverse ordered.
|
||||
func DescIfPrivate(p1Ptr, p2Ptr *IfAddr) int {
|
||||
return -1 * AscPrivate(&p1Ptr.SockAddr, &p2Ptr.SockAddr)
|
||||
}
|
||||
|
||||
// DescIfType is identical to AscIfType but reverse ordered.
|
||||
func DescIfType(p1Ptr, p2Ptr *IfAddr) int {
|
||||
return -1 * AscType(&p1Ptr.SockAddr, &p2Ptr.SockAddr)
|
||||
}
|
||||
|
||||
// FilterIfByType filters IfAddrs and returns a list of the matching type
|
||||
func FilterIfByType(ifAddrs IfAddrs, type_ SockAddrType) (matchedIfs, excludedIfs IfAddrs) {
|
||||
excludedIfs = make(IfAddrs, 0, len(ifAddrs))
|
||||
matchedIfs = make(IfAddrs, 0, len(ifAddrs))
|
||||
|
||||
for _, ifAddr := range ifAddrs {
|
||||
if ifAddr.SockAddr.Type()&type_ != 0 {
|
||||
matchedIfs = append(matchedIfs, ifAddr)
|
||||
} else {
|
||||
excludedIfs = append(excludedIfs, ifAddr)
|
||||
}
|
||||
}
|
||||
return matchedIfs, excludedIfs
|
||||
}
|
||||
|
||||
// IfAttr forwards the selector to IfAttr.Attr() for resolution. If there is
|
||||
// more than one IfAddr, only the first IfAddr is used.
|
||||
func IfAttr(selectorName string, ifAddrs IfAddrs) (string, error) {
|
||||
if len(ifAddrs) == 0 {
|
||||
return "", nil
|
||||
}
|
||||
|
||||
attrName := AttrName(strings.ToLower(selectorName))
|
||||
attrVal, err := ifAddrs[0].Attr(attrName)
|
||||
return attrVal, err
|
||||
}
|
||||
|
||||
// GetAllInterfaces iterates over all available network interfaces and finds all
|
||||
// available IP addresses on each interface and converts them to
|
||||
// sockaddr.IPAddrs, and returning the result as an array of IfAddr.
|
||||
func GetAllInterfaces() (IfAddrs, error) {
|
||||
ifs, err := net.Interfaces()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
ifAddrs := make(IfAddrs, 0, len(ifs))
|
||||
for _, intf := range ifs {
|
||||
addrs, err := intf.Addrs()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for _, addr := range addrs {
|
||||
var ipAddr IPAddr
|
||||
ipAddr, err = NewIPAddr(addr.String())
|
||||
if err != nil {
|
||||
return IfAddrs{}, fmt.Errorf("unable to create an IP address from %q", addr.String())
|
||||
}
|
||||
|
||||
ifAddr := IfAddr{
|
||||
SockAddr: ipAddr,
|
||||
Interface: intf,
|
||||
}
|
||||
ifAddrs = append(ifAddrs, ifAddr)
|
||||
}
|
||||
}
|
||||
|
||||
return ifAddrs, nil
|
||||
}
|
||||
|
||||
// GetDefaultInterfaces returns IfAddrs of the addresses attached to the default
|
||||
// route.
|
||||
func GetDefaultInterfaces() (IfAddrs, error) {
|
||||
ri, err := NewRouteInfo()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
defaultIfName, err := ri.GetDefaultInterfaceName()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var defaultIfs, ifAddrs IfAddrs
|
||||
ifAddrs, err = GetAllInterfaces()
|
||||
for _, ifAddr := range ifAddrs {
|
||||
if ifAddr.Name == defaultIfName {
|
||||
defaultIfs = append(defaultIfs, ifAddr)
|
||||
}
|
||||
}
|
||||
|
||||
return defaultIfs, nil
|
||||
}
|
||||
|
||||
// GetPrivateInterfaces returns an IfAddrs that are part of RFC 6890 and have a
|
||||
// default route. If the system can't determine its IP address or find an RFC
|
||||
// 6890 IP address, an empty IfAddrs will be returned instead. This function is
|
||||
// the `eval` equivalent of:
|
||||
//
|
||||
// ```
|
||||
// $ sockaddr eval -r '{{GetDefaultInterfaces | include "type" "ip" | include "flags" "forwardable|up" | sort "type,size" | include "RFC" "6890" }}'
|
||||
/// ```
|
||||
func GetPrivateInterfaces() (IfAddrs, error) {
|
||||
privateIfs, err := GetDefaultInterfaces()
|
||||
if err != nil {
|
||||
return IfAddrs{}, err
|
||||
}
|
||||
if len(privateIfs) == 0 {
|
||||
return IfAddrs{}, nil
|
||||
}
|
||||
|
||||
privateIfs, _ = FilterIfByType(privateIfs, TypeIP)
|
||||
if len(privateIfs) == 0 {
|
||||
return IfAddrs{}, nil
|
||||
}
|
||||
|
||||
privateIfs, _, err = IfByFlag("forwardable|up", privateIfs)
|
||||
if err != nil {
|
||||
return IfAddrs{}, err
|
||||
}
|
||||
if len(privateIfs) == 0 {
|
||||
return IfAddrs{}, nil
|
||||
}
|
||||
|
||||
OrderedIfAddrBy(AscIfType, AscIfNetworkSize).Sort(privateIfs)
|
||||
|
||||
privateIfs, _, err = IfByRFC("6890", privateIfs)
|
||||
if err != nil {
|
||||
return IfAddrs{}, err
|
||||
} else if len(privateIfs) == 0 {
|
||||
return IfAddrs{}, nil
|
||||
}
|
||||
|
||||
return privateIfs, nil
|
||||
}
|
||||
|
||||
// GetPublicInterfaces returns an IfAddrs that are NOT part of RFC 6890 and has a
|
||||
// default route. If the system can't determine its IP address or find a non
|
||||
// RFC 6890 IP address, an empty IfAddrs will be returned instead. This
|
||||
// function is the `eval` equivalent of:
|
||||
//
|
||||
// ```
|
||||
// $ sockaddr eval -r '{{GetDefaultInterfaces | include "type" "ip" | include "flags" "forwardable|up" | sort "type,size" | exclude "RFC" "6890" }}'
|
||||
/// ```
|
||||
func GetPublicInterfaces() (IfAddrs, error) {
|
||||
publicIfs, err := GetDefaultInterfaces()
|
||||
if err != nil {
|
||||
return IfAddrs{}, err
|
||||
}
|
||||
if len(publicIfs) == 0 {
|
||||
return IfAddrs{}, nil
|
||||
}
|
||||
|
||||
publicIfs, _ = FilterIfByType(publicIfs, TypeIP)
|
||||
if len(publicIfs) == 0 {
|
||||
return IfAddrs{}, nil
|
||||
}
|
||||
|
||||
publicIfs, _, err = IfByFlag("forwardable|up", publicIfs)
|
||||
if err != nil {
|
||||
return IfAddrs{}, err
|
||||
}
|
||||
if len(publicIfs) == 0 {
|
||||
return IfAddrs{}, nil
|
||||
}
|
||||
|
||||
OrderedIfAddrBy(AscIfType, AscIfNetworkSize).Sort(publicIfs)
|
||||
|
||||
_, publicIfs, err = IfByRFC("6890", publicIfs)
|
||||
if err != nil {
|
||||
return IfAddrs{}, err
|
||||
} else if len(publicIfs) == 0 {
|
||||
return IfAddrs{}, nil
|
||||
}
|
||||
|
||||
return publicIfs, nil
|
||||
}
|
||||
|
||||
// IfByAddress returns a list of matched and non-matched IfAddrs, or an error if
|
||||
// the regexp fails to compile.
|
||||
func IfByAddress(inputRe string, ifAddrs IfAddrs) (matched, remainder IfAddrs, err error) {
|
||||
re, err := regexp.Compile(inputRe)
|
||||
if err != nil {
|
||||
return nil, nil, fmt.Errorf("Unable to compile address regexp %+q: %v", inputRe, err)
|
||||
}
|
||||
|
||||
matchedAddrs := make(IfAddrs, 0, len(ifAddrs))
|
||||
excludedAddrs := make(IfAddrs, 0, len(ifAddrs))
|
||||
for _, addr := range ifAddrs {
|
||||
if re.MatchString(addr.SockAddr.String()) {
|
||||
matchedAddrs = append(matchedAddrs, addr)
|
||||
} else {
|
||||
excludedAddrs = append(excludedAddrs, addr)
|
||||
}
|
||||
}
|
||||
|
||||
return matchedAddrs, excludedAddrs, nil
|
||||
}
|
||||
|
||||
// IfByName returns a list of matched and non-matched IfAddrs, or an error if
|
||||
// the regexp fails to compile.
|
||||
func IfByName(inputRe string, ifAddrs IfAddrs) (matched, remainder IfAddrs, err error) {
|
||||
re, err := regexp.Compile(inputRe)
|
||||
if err != nil {
|
||||
return nil, nil, fmt.Errorf("Unable to compile name regexp %+q: %v", inputRe, err)
|
||||
}
|
||||
|
||||
matchedAddrs := make(IfAddrs, 0, len(ifAddrs))
|
||||
excludedAddrs := make(IfAddrs, 0, len(ifAddrs))
|
||||
for _, addr := range ifAddrs {
|
||||
if re.MatchString(addr.Name) {
|
||||
matchedAddrs = append(matchedAddrs, addr)
|
||||
} else {
|
||||
excludedAddrs = append(excludedAddrs, addr)
|
||||
}
|
||||
}
|
||||
|
||||
return matchedAddrs, excludedAddrs, nil
|
||||
}
|
||||
|
||||
// IfByPort returns a list of matched and non-matched IfAddrs, or an error if
|
||||
// the regexp fails to compile.
|
||||
func IfByPort(inputRe string, ifAddrs IfAddrs) (matchedIfs, excludedIfs IfAddrs, err error) {
|
||||
re, err := regexp.Compile(inputRe)
|
||||
if err != nil {
|
||||
return nil, nil, fmt.Errorf("Unable to compile port regexp %+q: %v", inputRe, err)
|
||||
}
|
||||
|
||||
ipIfs, nonIfs := FilterIfByType(ifAddrs, TypeIP)
|
||||
matchedIfs = make(IfAddrs, 0, len(ipIfs))
|
||||
excludedIfs = append(IfAddrs(nil), nonIfs...)
|
||||
for _, addr := range ipIfs {
|
||||
ipAddr := ToIPAddr(addr.SockAddr)
|
||||
if ipAddr == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
port := strconv.FormatInt(int64((*ipAddr).IPPort()), 10)
|
||||
if re.MatchString(port) {
|
||||
matchedIfs = append(matchedIfs, addr)
|
||||
} else {
|
||||
excludedIfs = append(excludedIfs, addr)
|
||||
}
|
||||
}
|
||||
|
||||
return matchedIfs, excludedIfs, nil
|
||||
}
|
||||
|
||||
// IfByRFC returns a list of matched and non-matched IfAddrs that contain the
|
||||
// relevant RFC-specified traits.
|
||||
func IfByRFC(selectorParam string, ifAddrs IfAddrs) (matched, remainder IfAddrs, err error) {
|
||||
inputRFC, err := strconv.ParseUint(selectorParam, 10, 64)
|
||||
if err != nil {
|
||||
return IfAddrs{}, IfAddrs{}, fmt.Errorf("unable to parse RFC number %q: %v", selectorParam, err)
|
||||
}
|
||||
|
||||
matchedIfAddrs := make(IfAddrs, 0, len(ifAddrs))
|
||||
remainingIfAddrs := make(IfAddrs, 0, len(ifAddrs))
|
||||
|
||||
rfcNetMap := KnownRFCs()
|
||||
rfcNets, ok := rfcNetMap[uint(inputRFC)]
|
||||
if !ok {
|
||||
return nil, nil, fmt.Errorf("unsupported RFC %d", inputRFC)
|
||||
}
|
||||
|
||||
for _, ifAddr := range ifAddrs {
|
||||
var contained bool
|
||||
for _, rfcNet := range rfcNets {
|
||||
if rfcNet.Contains(ifAddr.SockAddr) {
|
||||
matchedIfAddrs = append(matchedIfAddrs, ifAddr)
|
||||
contained = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !contained {
|
||||
remainingIfAddrs = append(remainingIfAddrs, ifAddr)
|
||||
}
|
||||
}
|
||||
|
||||
return matchedIfAddrs, remainingIfAddrs, nil
|
||||
}
|
||||
|
||||
// IfByRFCs returns a list of matched and non-matched IfAddrs that contain the
|
||||
// relevant RFC-specified traits. Multiple RFCs can be specified and separated
|
||||
// by the `|` symbol. No protection is taken to ensure an IfAddr does not end
|
||||
// up in both the included and excluded list.
|
||||
func IfByRFCs(selectorParam string, ifAddrs IfAddrs) (matched, remainder IfAddrs, err error) {
|
||||
var includedIfs, excludedIfs IfAddrs
|
||||
for _, rfcStr := range strings.Split(selectorParam, "|") {
|
||||
includedRFCIfs, excludedRFCIfs, err := IfByRFC(rfcStr, ifAddrs)
|
||||
if err != nil {
|
||||
return IfAddrs{}, IfAddrs{}, fmt.Errorf("unable to lookup RFC number %q: %v", rfcStr, err)
|
||||
}
|
||||
includedIfs = append(includedIfs, includedRFCIfs...)
|
||||
excludedIfs = append(excludedIfs, excludedRFCIfs...)
|
||||
}
|
||||
|
||||
return includedIfs, excludedIfs, nil
|
||||
}
|
||||
|
||||
// IfByMaskSize returns a list of matched and non-matched IfAddrs that have the
|
||||
// matching mask size.
|
||||
func IfByMaskSize(selectorParam string, ifAddrs IfAddrs) (matchedIfs, excludedIfs IfAddrs, err error) {
|
||||
maskSize, err := strconv.ParseUint(selectorParam, 10, 64)
|
||||
if err != nil {
|
||||
return IfAddrs{}, IfAddrs{}, fmt.Errorf("invalid exclude size argument (%q): %v", selectorParam, err)
|
||||
}
|
||||
|
||||
ipIfs, nonIfs := FilterIfByType(ifAddrs, TypeIP)
|
||||
matchedIfs = make(IfAddrs, 0, len(ipIfs))
|
||||
excludedIfs = append(IfAddrs(nil), nonIfs...)
|
||||
for _, addr := range ipIfs {
|
||||
ipAddr := ToIPAddr(addr.SockAddr)
|
||||
if ipAddr == nil {
|
||||
return IfAddrs{}, IfAddrs{}, fmt.Errorf("unable to filter mask sizes on non-IP type %s: %v", addr.SockAddr.Type().String(), addr.SockAddr.String())
|
||||
}
|
||||
|
||||
switch {
|
||||
case (*ipAddr).Type()&TypeIPv4 != 0 && maskSize > 32:
|
||||
return IfAddrs{}, IfAddrs{}, fmt.Errorf("mask size out of bounds for IPv4 address: %d", maskSize)
|
||||
case (*ipAddr).Type()&TypeIPv6 != 0 && maskSize > 128:
|
||||
return IfAddrs{}, IfAddrs{}, fmt.Errorf("mask size out of bounds for IPv6 address: %d", maskSize)
|
||||
}
|
||||
|
||||
if (*ipAddr).Maskbits() == int(maskSize) {
|
||||
matchedIfs = append(matchedIfs, addr)
|
||||
} else {
|
||||
excludedIfs = append(excludedIfs, addr)
|
||||
}
|
||||
}
|
||||
|
||||
return matchedIfs, excludedIfs, nil
|
||||
}
|
||||
|
||||
// IfByType returns a list of matching and non-matching IfAddr that match the
|
||||
// specified type. For instance:
|
||||
//
|
||||
// include "type" "IPv4,IPv6"
|
||||
//
|
||||
// will include any IfAddrs that is either an IPv4 or IPv6 address. Any
|
||||
// addresses on those interfaces that don't match will be included in the
|
||||
// remainder results.
|
||||
func IfByType(inputTypes string, ifAddrs IfAddrs) (matched, remainder IfAddrs, err error) {
|
||||
matchingIfAddrs := make(IfAddrs, 0, len(ifAddrs))
|
||||
remainingIfAddrs := make(IfAddrs, 0, len(ifAddrs))
|
||||
|
||||
ifTypes := strings.Split(strings.ToLower(inputTypes), "|")
|
||||
for _, ifType := range ifTypes {
|
||||
switch ifType {
|
||||
case "ip", "ipv4", "ipv6", "unix":
|
||||
// Valid types
|
||||
default:
|
||||
return nil, nil, fmt.Errorf("unsupported type %q %q", ifType, inputTypes)
|
||||
}
|
||||
}
|
||||
|
||||
for _, ifAddr := range ifAddrs {
|
||||
for _, ifType := range ifTypes {
|
||||
var matched bool
|
||||
switch {
|
||||
case ifType == "ip" && ifAddr.SockAddr.Type()&TypeIP != 0:
|
||||
matched = true
|
||||
case ifType == "ipv4" && ifAddr.SockAddr.Type()&TypeIPv4 != 0:
|
||||
matched = true
|
||||
case ifType == "ipv6" && ifAddr.SockAddr.Type()&TypeIPv6 != 0:
|
||||
matched = true
|
||||
case ifType == "unix" && ifAddr.SockAddr.Type()&TypeUnix != 0:
|
||||
matched = true
|
||||
}
|
||||
|
||||
if matched {
|
||||
matchingIfAddrs = append(matchingIfAddrs, ifAddr)
|
||||
} else {
|
||||
remainingIfAddrs = append(remainingIfAddrs, ifAddr)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return matchingIfAddrs, remainingIfAddrs, nil
|
||||
}
|
||||
|
||||
// IfByFlag returns a list of matching and non-matching IfAddrs that match the
|
||||
// specified type. For instance:
|
||||
//
|
||||
// include "flag" "up,broadcast"
|
||||
//
|
||||
// will include any IfAddrs that have both the "up" and "broadcast" flags set.
|
||||
// Any addresses on those interfaces that don't match will be omitted from the
|
||||
// results.
|
||||
func IfByFlag(inputFlags string, ifAddrs IfAddrs) (matched, remainder IfAddrs, err error) {
|
||||
matchedAddrs := make(IfAddrs, 0, len(ifAddrs))
|
||||
excludedAddrs := make(IfAddrs, 0, len(ifAddrs))
|
||||
|
||||
var wantForwardable,
|
||||
wantGlobalUnicast,
|
||||
wantInterfaceLocalMulticast,
|
||||
wantLinkLocalMulticast,
|
||||
wantLinkLocalUnicast,
|
||||
wantLoopback,
|
||||
wantMulticast,
|
||||
wantUnspecified bool
|
||||
var ifFlags net.Flags
|
||||
var checkFlags, checkAttrs bool
|
||||
for _, flagName := range strings.Split(strings.ToLower(inputFlags), "|") {
|
||||
switch flagName {
|
||||
case "broadcast":
|
||||
checkFlags = true
|
||||
ifFlags = ifFlags | net.FlagBroadcast
|
||||
case "down":
|
||||
checkFlags = true
|
||||
ifFlags = (ifFlags &^ net.FlagUp)
|
||||
case "forwardable":
|
||||
checkAttrs = true
|
||||
wantForwardable = true
|
||||
case "global unicast":
|
||||
checkAttrs = true
|
||||
wantGlobalUnicast = true
|
||||
case "interface-local multicast":
|
||||
checkAttrs = true
|
||||
wantInterfaceLocalMulticast = true
|
||||
case "link-local multicast":
|
||||
checkAttrs = true
|
||||
wantLinkLocalMulticast = true
|
||||
case "link-local unicast":
|
||||
checkAttrs = true
|
||||
wantLinkLocalUnicast = true
|
||||
case "loopback":
|
||||
checkAttrs = true
|
||||
checkFlags = true
|
||||
ifFlags = ifFlags | net.FlagLoopback
|
||||
wantLoopback = true
|
||||
case "multicast":
|
||||
checkAttrs = true
|
||||
checkFlags = true
|
||||
ifFlags = ifFlags | net.FlagMulticast
|
||||
wantMulticast = true
|
||||
case "point-to-point":
|
||||
checkFlags = true
|
||||
ifFlags = ifFlags | net.FlagPointToPoint
|
||||
case "unspecified":
|
||||
checkAttrs = true
|
||||
wantUnspecified = true
|
||||
case "up":
|
||||
checkFlags = true
|
||||
ifFlags = ifFlags | net.FlagUp
|
||||
default:
|
||||
return nil, nil, fmt.Errorf("Unknown interface flag: %+q", flagName)
|
||||
}
|
||||
}
|
||||
|
||||
for _, ifAddr := range ifAddrs {
|
||||
var matched bool
|
||||
if checkFlags && ifAddr.Interface.Flags&ifFlags == ifFlags {
|
||||
matched = true
|
||||
}
|
||||
if checkAttrs {
|
||||
if ip := ToIPAddr(ifAddr.SockAddr); ip != nil {
|
||||
netIP := (*ip).NetIP()
|
||||
switch {
|
||||
case wantGlobalUnicast && netIP.IsGlobalUnicast():
|
||||
matched = true
|
||||
case wantInterfaceLocalMulticast && netIP.IsInterfaceLocalMulticast():
|
||||
matched = true
|
||||
case wantLinkLocalMulticast && netIP.IsLinkLocalMulticast():
|
||||
matched = true
|
||||
case wantLinkLocalUnicast && netIP.IsLinkLocalUnicast():
|
||||
matched = true
|
||||
case wantLoopback && netIP.IsLoopback():
|
||||
matched = true
|
||||
case wantMulticast && netIP.IsMulticast():
|
||||
matched = true
|
||||
case wantUnspecified && netIP.IsUnspecified():
|
||||
matched = true
|
||||
case wantForwardable && !IsRFC(ForwardingBlacklist, ifAddr.SockAddr):
|
||||
matched = true
|
||||
}
|
||||
}
|
||||
}
|
||||
if matched {
|
||||
matchedAddrs = append(matchedAddrs, ifAddr)
|
||||
} else {
|
||||
excludedAddrs = append(excludedAddrs, ifAddr)
|
||||
}
|
||||
}
|
||||
return matchedAddrs, excludedAddrs, nil
|
||||
}
|
||||
|
||||
// IfByNetwork returns an IfAddrs that are equal to or included within the
|
||||
// network passed in by selector.
|
||||
func IfByNetwork(selectorParam string, inputIfAddrs IfAddrs) (IfAddrs, IfAddrs, error) {
|
||||
var includedIfs, excludedIfs IfAddrs
|
||||
for _, netStr := range strings.Split(selectorParam, "|") {
|
||||
netAddr, err := NewIPAddr(netStr)
|
||||
if err != nil {
|
||||
return nil, nil, fmt.Errorf("unable to create an IP address from %+q: %v", netStr, err)
|
||||
}
|
||||
|
||||
for _, ifAddr := range inputIfAddrs {
|
||||
if netAddr.Contains(ifAddr.SockAddr) {
|
||||
includedIfs = append(includedIfs, ifAddr)
|
||||
} else {
|
||||
excludedIfs = append(excludedIfs, ifAddr)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return includedIfs, excludedIfs, nil
|
||||
}
|
||||
|
||||
// IncludeIfs returns an IfAddrs based on the passed in selector.
|
||||
func IncludeIfs(selectorName, selectorParam string, inputIfAddrs IfAddrs) (IfAddrs, error) {
|
||||
var includedIfs IfAddrs
|
||||
var err error
|
||||
|
||||
switch strings.ToLower(selectorName) {
|
||||
case "address":
|
||||
includedIfs, _, err = IfByAddress(selectorParam, inputIfAddrs)
|
||||
case "flag", "flags":
|
||||
includedIfs, _, err = IfByFlag(selectorParam, inputIfAddrs)
|
||||
case "name":
|
||||
includedIfs, _, err = IfByName(selectorParam, inputIfAddrs)
|
||||
case "network":
|
||||
includedIfs, _, err = IfByNetwork(selectorParam, inputIfAddrs)
|
||||
case "port":
|
||||
includedIfs, _, err = IfByPort(selectorParam, inputIfAddrs)
|
||||
case "rfc", "rfcs":
|
||||
includedIfs, _, err = IfByRFCs(selectorParam, inputIfAddrs)
|
||||
case "size":
|
||||
includedIfs, _, err = IfByMaskSize(selectorParam, inputIfAddrs)
|
||||
case "type":
|
||||
includedIfs, _, err = IfByType(selectorParam, inputIfAddrs)
|
||||
default:
|
||||
return IfAddrs{}, fmt.Errorf("invalid include selector %q", selectorName)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return IfAddrs{}, err
|
||||
}
|
||||
|
||||
return includedIfs, nil
|
||||
}
|
||||
|
||||
// ExcludeIfs returns an IfAddrs based on the passed in selector.
|
||||
func ExcludeIfs(selectorName, selectorParam string, inputIfAddrs IfAddrs) (IfAddrs, error) {
|
||||
var excludedIfs IfAddrs
|
||||
var err error
|
||||
|
||||
switch strings.ToLower(selectorName) {
|
||||
case "address":
|
||||
_, excludedIfs, err = IfByAddress(selectorParam, inputIfAddrs)
|
||||
case "flag", "flags":
|
||||
_, excludedIfs, err = IfByFlag(selectorParam, inputIfAddrs)
|
||||
case "name":
|
||||
_, excludedIfs, err = IfByName(selectorParam, inputIfAddrs)
|
||||
case "network":
|
||||
_, excludedIfs, err = IfByNetwork(selectorParam, inputIfAddrs)
|
||||
case "port":
|
||||
_, excludedIfs, err = IfByPort(selectorParam, inputIfAddrs)
|
||||
case "rfc", "rfcs":
|
||||
_, excludedIfs, err = IfByRFCs(selectorParam, inputIfAddrs)
|
||||
case "size":
|
||||
_, excludedIfs, err = IfByMaskSize(selectorParam, inputIfAddrs)
|
||||
case "type":
|
||||
_, excludedIfs, err = IfByType(selectorParam, inputIfAddrs)
|
||||
default:
|
||||
return IfAddrs{}, fmt.Errorf("invalid exclude selector %q", selectorName)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return IfAddrs{}, err
|
||||
}
|
||||
|
||||
return excludedIfs, nil
|
||||
}
|
||||
|
||||
// SortIfBy returns an IfAddrs sorted based on the passed in selector. Multiple
|
||||
// sort clauses can be passed in as a comma delimited list without whitespace.
|
||||
func SortIfBy(selectorParam string, inputIfAddrs IfAddrs) (IfAddrs, error) {
|
||||
sortedIfs := append(IfAddrs(nil), inputIfAddrs...)
|
||||
|
||||
clauses := strings.Split(selectorParam, ",")
|
||||
sortFuncs := make([]CmpIfAddrFunc, len(clauses))
|
||||
|
||||
for i, clause := range clauses {
|
||||
switch strings.TrimSpace(strings.ToLower(clause)) {
|
||||
case "+address", "address":
|
||||
// The "address" selector returns an array of IfAddrs
|
||||
// ordered by the network address. IfAddrs that are not
|
||||
// comparable will be at the end of the list and in a
|
||||
// non-deterministic order.
|
||||
sortFuncs[i] = AscIfAddress
|
||||
case "-address":
|
||||
sortFuncs[i] = DescIfAddress
|
||||
case "+name", "name":
|
||||
// The "name" selector returns an array of IfAddrs
|
||||
// ordered by the interface name.
|
||||
sortFuncs[i] = AscIfName
|
||||
case "-name":
|
||||
sortFuncs[i] = DescIfName
|
||||
case "+port", "port":
|
||||
// The "port" selector returns an array of IfAddrs
|
||||
// ordered by the port, if included in the IfAddr.
|
||||
// IfAddrs that are not comparable will be at the end of
|
||||
// the list and in a non-deterministic order.
|
||||
sortFuncs[i] = AscIfPort
|
||||
case "-port":
|
||||
sortFuncs[i] = DescIfPort
|
||||
case "+private", "private":
|
||||
// The "private" selector returns an array of IfAddrs
|
||||
// ordered by private addresses first. IfAddrs that are
|
||||
// not comparable will be at the end of the list and in
|
||||
// a non-deterministic order.
|
||||
sortFuncs[i] = AscIfPrivate
|
||||
case "-private":
|
||||
sortFuncs[i] = DescIfPrivate
|
||||
case "+size", "size":
|
||||
// The "size" selector returns an array of IfAddrs
|
||||
// ordered by the size of the network mask, smaller mask
|
||||
// (larger number of hosts per network) to largest
|
||||
// (e.g. a /24 sorts before a /32).
|
||||
sortFuncs[i] = AscIfNetworkSize
|
||||
case "-size":
|
||||
sortFuncs[i] = DescIfNetworkSize
|
||||
case "+type", "type":
|
||||
// The "type" selector returns an array of IfAddrs
|
||||
// ordered by the type of the IfAddr. The sort order is
|
||||
// Unix, IPv4, then IPv6.
|
||||
sortFuncs[i] = AscIfType
|
||||
case "-type":
|
||||
sortFuncs[i] = DescIfType
|
||||
default:
|
||||
// Return an empty list for invalid sort types.
|
||||
return IfAddrs{}, fmt.Errorf("unknown sort type: %q", clause)
|
||||
}
|
||||
}
|
||||
|
||||
OrderedIfAddrBy(sortFuncs...).Sort(sortedIfs)
|
||||
|
||||
return sortedIfs, nil
|
||||
}
|
||||
|
||||
// UniqueIfAddrsBy creates a unique set of IfAddrs based on the matching
|
||||
// selector. UniqueIfAddrsBy assumes the input has already been sorted.
|
||||
func UniqueIfAddrsBy(selectorName string, inputIfAddrs IfAddrs) (IfAddrs, error) {
|
||||
attrName := strings.ToLower(selectorName)
|
||||
|
||||
ifs := make(IfAddrs, 0, len(inputIfAddrs))
|
||||
var lastMatch string
|
||||
for _, ifAddr := range inputIfAddrs {
|
||||
var out string
|
||||
switch attrName {
|
||||
case "address":
|
||||
out = ifAddr.SockAddr.String()
|
||||
case "name":
|
||||
out = ifAddr.Name
|
||||
default:
|
||||
return nil, fmt.Errorf("unsupported unique constraint %+q", selectorName)
|
||||
}
|
||||
|
||||
switch {
|
||||
case lastMatch == "", lastMatch != out:
|
||||
lastMatch = out
|
||||
ifs = append(ifs, ifAddr)
|
||||
case lastMatch == out:
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
return ifs, nil
|
||||
}
|
||||
|
||||
// JoinIfAddrs joins an IfAddrs and returns a string
|
||||
func JoinIfAddrs(selectorName string, joinStr string, inputIfAddrs IfAddrs) (string, error) {
|
||||
outputs := make([]string, 0, len(inputIfAddrs))
|
||||
attrName := AttrName(strings.ToLower(selectorName))
|
||||
|
||||
for _, ifAddr := range inputIfAddrs {
|
||||
var attrVal string
|
||||
var err error
|
||||
attrVal, err = ifAddr.Attr(attrName)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
outputs = append(outputs, attrVal)
|
||||
}
|
||||
return strings.Join(outputs, joinStr), nil
|
||||
}
|
||||
|
||||
// LimitIfAddrs returns a slice of IfAddrs based on the specified limit.
|
||||
func LimitIfAddrs(lim uint, in IfAddrs) (IfAddrs, error) {
|
||||
// Clamp the limit to the length of the array
|
||||
if int(lim) > len(in) {
|
||||
lim = uint(len(in))
|
||||
}
|
||||
|
||||
return in[0:lim], nil
|
||||
}
|
||||
|
||||
// OffsetIfAddrs returns a slice of IfAddrs based on the specified offset.
|
||||
func OffsetIfAddrs(off int, in IfAddrs) (IfAddrs, error) {
|
||||
var end bool
|
||||
if off < 0 {
|
||||
end = true
|
||||
off = off * -1
|
||||
}
|
||||
|
||||
if off > len(in) {
|
||||
return IfAddrs{}, fmt.Errorf("unable to seek past the end of the interface array: offset (%d) exceeds the number of interfaces (%d)", off, len(in))
|
||||
}
|
||||
|
||||
if end {
|
||||
return in[len(in)-off:], nil
|
||||
}
|
||||
return in[off:], nil
|
||||
}
|
||||
|
||||
func (ifAddr IfAddr) String() string {
|
||||
return fmt.Sprintf("%s %v", ifAddr.SockAddr, ifAddr.Interface)
|
||||
}
|
||||
|
||||
// parseDefaultIfNameFromRoute parses standard route(8)'s output for the *BSDs
|
||||
// and Solaris.
|
||||
func parseDefaultIfNameFromRoute(routeOut string) (string, error) {
|
||||
lines := strings.Split(routeOut, "\n")
|
||||
for _, line := range lines {
|
||||
kvs := strings.SplitN(line, ":", 2)
|
||||
if len(kvs) != 2 {
|
||||
continue
|
||||
}
|
||||
|
||||
if strings.TrimSpace(kvs[0]) == "interface" {
|
||||
ifName := strings.TrimSpace(kvs[1])
|
||||
return ifName, nil
|
||||
}
|
||||
}
|
||||
|
||||
return "", errors.New("No default interface found")
|
||||
}
|
||||
|
||||
// parseDefaultIfNameFromIPCmd parses the default interface from ip(8) for
|
||||
// Linux.
|
||||
func parseDefaultIfNameFromIPCmd(routeOut string) (string, error) {
|
||||
lines := strings.Split(routeOut, "\n")
|
||||
re := regexp.MustCompile(`[\s]+`)
|
||||
for _, line := range lines {
|
||||
kvs := re.Split(line, -1)
|
||||
if len(kvs) < 5 {
|
||||
continue
|
||||
}
|
||||
|
||||
if kvs[0] == "default" &&
|
||||
kvs[1] == "via" &&
|
||||
kvs[3] == "dev" {
|
||||
ifName := strings.TrimSpace(kvs[4])
|
||||
return ifName, nil
|
||||
}
|
||||
}
|
||||
|
||||
return "", errors.New("No default interface found")
|
||||
}
|
||||
|
||||
// parseDefaultIfNameWindows parses the default interface from `netstat -rn` and
|
||||
// `ipconfig` on Windows.
|
||||
func parseDefaultIfNameWindows(routeOut, ipconfigOut string) (string, error) {
|
||||
defaultIPAddr, err := parseDefaultIPAddrWindowsRoute(routeOut)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
ifName, err := parseDefaultIfNameWindowsIPConfig(defaultIPAddr, ipconfigOut)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return ifName, nil
|
||||
}
|
||||
|
||||
// parseDefaultIPAddrWindowsRoute parses the IP address on the default interface
|
||||
// `netstat -rn`.
|
||||
//
|
||||
// NOTES(sean): Only IPv4 addresses are parsed at this time. If you have an
|
||||
// IPv6 connected host, submit an issue on github.com/hashicorp/go-sockaddr with
|
||||
// the output from `netstat -rn`, `ipconfig`, and version of Windows to see IPv6
|
||||
// support added.
|
||||
func parseDefaultIPAddrWindowsRoute(routeOut string) (string, error) {
|
||||
lines := strings.Split(routeOut, "\n")
|
||||
re := regexp.MustCompile(`[\s]+`)
|
||||
for _, line := range lines {
|
||||
kvs := re.Split(strings.TrimSpace(line), -1)
|
||||
if len(kvs) < 3 {
|
||||
continue
|
||||
}
|
||||
|
||||
if kvs[0] == "0.0.0.0" && kvs[1] == "0.0.0.0" {
|
||||
defaultIPAddr := strings.TrimSpace(kvs[3])
|
||||
return defaultIPAddr, nil
|
||||
}
|
||||
}
|
||||
|
||||
return "", errors.New("No IP on default interface found")
|
||||
}
|
||||
|
||||
// parseDefaultIfNameWindowsIPConfig parses the output of `ipconfig` to find the
|
||||
// interface name forwarding traffic to the default gateway.
|
||||
func parseDefaultIfNameWindowsIPConfig(defaultIPAddr, routeOut string) (string, error) {
|
||||
lines := strings.Split(routeOut, "\n")
|
||||
ifNameRE := regexp.MustCompile(`^Ethernet adapter ([^\s:]+):`)
|
||||
ipAddrRE := regexp.MustCompile(`^ IPv[46] Address\. \. \. \. \. \. \. \. \. \. \. : ([^\s]+)`)
|
||||
var ifName string
|
||||
for _, line := range lines {
|
||||
switch ifNameMatches := ifNameRE.FindStringSubmatch(line); {
|
||||
case len(ifNameMatches) > 1:
|
||||
ifName = ifNameMatches[1]
|
||||
continue
|
||||
}
|
||||
|
||||
switch ipAddrMatches := ipAddrRE.FindStringSubmatch(line); {
|
||||
case len(ipAddrMatches) > 1 && ipAddrMatches[1] == defaultIPAddr:
|
||||
return ifName, nil
|
||||
}
|
||||
}
|
||||
|
||||
return "", errors.New("No default interface found with matching IP")
|
||||
}
|
|
@ -0,0 +1,65 @@
|
|||
package sockaddr
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net"
|
||||
)
|
||||
|
||||
// IfAddr is a union of a SockAddr and a net.Interface.
|
||||
type IfAddr struct {
|
||||
SockAddr
|
||||
net.Interface
|
||||
}
|
||||
|
||||
// Attr returns the named attribute as a string
|
||||
func (ifAddr IfAddr) Attr(attrName AttrName) (string, error) {
|
||||
val := IfAddrAttr(ifAddr, attrName)
|
||||
if val != "" {
|
||||
return val, nil
|
||||
}
|
||||
|
||||
return Attr(ifAddr.SockAddr, attrName)
|
||||
}
|
||||
|
||||
// Attr returns the named attribute as a string
|
||||
func Attr(sa SockAddr, attrName AttrName) (string, error) {
|
||||
switch sockType := sa.Type(); {
|
||||
case sockType&TypeIP != 0:
|
||||
ip := *ToIPAddr(sa)
|
||||
attrVal := IPAddrAttr(ip, attrName)
|
||||
if attrVal != "" {
|
||||
return attrVal, nil
|
||||
}
|
||||
|
||||
if sockType == TypeIPv4 {
|
||||
ipv4 := *ToIPv4Addr(sa)
|
||||
attrVal := IPv4AddrAttr(ipv4, attrName)
|
||||
if attrVal != "" {
|
||||
return attrVal, nil
|
||||
}
|
||||
} else if sockType == TypeIPv6 {
|
||||
ipv6 := *ToIPv6Addr(sa)
|
||||
attrVal := IPv6AddrAttr(ipv6, attrName)
|
||||
if attrVal != "" {
|
||||
return attrVal, nil
|
||||
}
|
||||
}
|
||||
|
||||
case sockType == TypeUnix:
|
||||
us := *ToUnixSock(sa)
|
||||
attrVal := UnixSockAttr(us, attrName)
|
||||
if attrVal != "" {
|
||||
return attrVal, nil
|
||||
}
|
||||
}
|
||||
|
||||
// Non type-specific attributes
|
||||
switch attrName {
|
||||
case "string":
|
||||
return sa.String(), nil
|
||||
case "type":
|
||||
return sa.Type().String(), nil
|
||||
}
|
||||
|
||||
return "", fmt.Errorf("unsupported attribute name %q", attrName)
|
||||
}
|
|
@ -0,0 +1,169 @@
|
|||
package sockaddr
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"math/big"
|
||||
"net"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// Constants for the sizes of IPv3, IPv4, and IPv6 address types.
|
||||
const (
|
||||
IPv3len = 6
|
||||
IPv4len = 4
|
||||
IPv6len = 16
|
||||
)
|
||||
|
||||
// IPAddr is a generic IP address interface for IPv4 and IPv6 addresses,
|
||||
// networks, and socket endpoints.
|
||||
type IPAddr interface {
|
||||
SockAddr
|
||||
AddressBinString() string
|
||||
AddressHexString() string
|
||||
Cmp(SockAddr) int
|
||||
CmpAddress(SockAddr) int
|
||||
CmpPort(SockAddr) int
|
||||
FirstUsable() IPAddr
|
||||
Host() IPAddr
|
||||
IPPort() IPPort
|
||||
LastUsable() IPAddr
|
||||
Maskbits() int
|
||||
NetIP() *net.IP
|
||||
NetIPMask() *net.IPMask
|
||||
NetIPNet() *net.IPNet
|
||||
Network() IPAddr
|
||||
Octets() []int
|
||||
}
|
||||
|
||||
// IPPort is the type for an IP port number for the TCP and UDP IP transports.
|
||||
type IPPort uint16
|
||||
|
||||
// IPPrefixLen is a typed integer representing the prefix length for a given
|
||||
// IPAddr.
|
||||
type IPPrefixLen byte
|
||||
|
||||
// ipAddrAttrMap is a map of the IPAddr type-specific attributes.
|
||||
var ipAddrAttrMap map[AttrName]func(IPAddr) string
|
||||
var ipAddrAttrs []AttrName
|
||||
|
||||
func init() {
|
||||
ipAddrInit()
|
||||
}
|
||||
|
||||
// NewIPAddr creates a new IPAddr from a string. Returns nil if the string is
|
||||
// not an IPv4 or an IPv6 address.
|
||||
func NewIPAddr(addr string) (IPAddr, error) {
|
||||
ipv4Addr, err := NewIPv4Addr(addr)
|
||||
if err == nil {
|
||||
return ipv4Addr, nil
|
||||
}
|
||||
|
||||
ipv6Addr, err := NewIPv6Addr(addr)
|
||||
if err == nil {
|
||||
return ipv6Addr, nil
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf("invalid IPAddr %v", addr)
|
||||
}
|
||||
|
||||
// IPAddrAttr returns a string representation of an attribute for the given
|
||||
// IPAddr.
|
||||
func IPAddrAttr(ip IPAddr, selector AttrName) string {
|
||||
fn, found := ipAddrAttrMap[selector]
|
||||
if !found {
|
||||
return ""
|
||||
}
|
||||
|
||||
return fn(ip)
|
||||
}
|
||||
|
||||
// IPAttrs returns a list of attributes supported by the IPAddr type
|
||||
func IPAttrs() []AttrName {
|
||||
return ipAddrAttrs
|
||||
}
|
||||
|
||||
// MustIPAddr is a helper method that must return an IPAddr or panic on invalid
|
||||
// input.
|
||||
func MustIPAddr(addr string) IPAddr {
|
||||
ip, err := NewIPAddr(addr)
|
||||
if err != nil {
|
||||
panic(fmt.Sprintf("Unable to create an IPAddr from %+q: %v", addr, err))
|
||||
}
|
||||
return ip
|
||||
}
|
||||
|
||||
// ipAddrInit is called once at init()
|
||||
func ipAddrInit() {
|
||||
// Sorted for human readability
|
||||
ipAddrAttrs = []AttrName{
|
||||
"host",
|
||||
"address",
|
||||
"port",
|
||||
"netmask",
|
||||
"network",
|
||||
"mask_bits",
|
||||
"binary",
|
||||
"hex",
|
||||
"first_usable",
|
||||
"last_usable",
|
||||
"octets",
|
||||
}
|
||||
|
||||
ipAddrAttrMap = map[AttrName]func(ip IPAddr) string{
|
||||
"address": func(ip IPAddr) string {
|
||||
return ip.NetIP().String()
|
||||
},
|
||||
"binary": func(ip IPAddr) string {
|
||||
return ip.AddressBinString()
|
||||
},
|
||||
"first_usable": func(ip IPAddr) string {
|
||||
return ip.FirstUsable().String()
|
||||
},
|
||||
"hex": func(ip IPAddr) string {
|
||||
return ip.AddressHexString()
|
||||
},
|
||||
"host": func(ip IPAddr) string {
|
||||
return ip.Host().String()
|
||||
},
|
||||
"last_usable": func(ip IPAddr) string {
|
||||
return ip.LastUsable().String()
|
||||
},
|
||||
"mask_bits": func(ip IPAddr) string {
|
||||
return fmt.Sprintf("%d", ip.Maskbits())
|
||||
},
|
||||
"netmask": func(ip IPAddr) string {
|
||||
switch v := ip.(type) {
|
||||
case IPv4Addr:
|
||||
ipv4Mask := IPv4Addr{
|
||||
Address: IPv4Address(v.Mask),
|
||||
Mask: IPv4HostMask,
|
||||
}
|
||||
return ipv4Mask.String()
|
||||
case IPv6Addr:
|
||||
ipv6Mask := new(big.Int)
|
||||
ipv6Mask.Set(v.Mask)
|
||||
ipv6MaskAddr := IPv6Addr{
|
||||
Address: IPv6Address(ipv6Mask),
|
||||
Mask: ipv6HostMask,
|
||||
}
|
||||
return ipv6MaskAddr.String()
|
||||
default:
|
||||
return fmt.Sprintf("<unsupported type: %T>", ip)
|
||||
}
|
||||
},
|
||||
"network": func(ip IPAddr) string {
|
||||
return ip.Network().NetIP().String()
|
||||
},
|
||||
"octets": func(ip IPAddr) string {
|
||||
octets := ip.Octets()
|
||||
octetStrs := make([]string, 0, len(octets))
|
||||
for _, octet := range octets {
|
||||
octetStrs = append(octetStrs, fmt.Sprintf("%d", octet))
|
||||
}
|
||||
return strings.Join(octetStrs, " ")
|
||||
},
|
||||
"port": func(ip IPAddr) string {
|
||||
return fmt.Sprintf("%d", ip.IPPort())
|
||||
},
|
||||
}
|
||||
}
|
|
@ -0,0 +1,98 @@
|
|||
package sockaddr
|
||||
|
||||
import "bytes"
|
||||
|
||||
type IPAddrs []IPAddr
|
||||
|
||||
func (s IPAddrs) Len() int { return len(s) }
|
||||
func (s IPAddrs) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
|
||||
|
||||
// // SortIPAddrsByCmp is a type that satisfies sort.Interface and can be used
|
||||
// // by the routines in this package. The SortIPAddrsByCmp type is used to
|
||||
// // sort IPAddrs by Cmp()
|
||||
// type SortIPAddrsByCmp struct{ IPAddrs }
|
||||
|
||||
// // Less reports whether the element with index i should sort before the
|
||||
// // element with index j.
|
||||
// func (s SortIPAddrsByCmp) Less(i, j int) bool {
|
||||
// // Sort by Type, then address, then port number.
|
||||
// return Less(s.IPAddrs[i], s.IPAddrs[j])
|
||||
// }
|
||||
|
||||
// SortIPAddrsBySpecificMaskLen is a type that satisfies sort.Interface and
|
||||
// can be used by the routines in this package. The
|
||||
// SortIPAddrsBySpecificMaskLen type is used to sort IPAddrs by smallest
|
||||
// network (most specific to largest network).
|
||||
type SortIPAddrsByNetworkSize struct{ IPAddrs }
|
||||
|
||||
// Less reports whether the element with index i should sort before the
|
||||
// element with index j.
|
||||
func (s SortIPAddrsByNetworkSize) Less(i, j int) bool {
|
||||
// Sort masks with a larger binary value (i.e. fewer hosts per network
|
||||
// prefix) after masks with a smaller value (larger number of hosts per
|
||||
// prefix).
|
||||
switch bytes.Compare([]byte(*s.IPAddrs[i].NetIPMask()), []byte(*s.IPAddrs[j].NetIPMask())) {
|
||||
case 0:
|
||||
// Fall through to the second test if the net.IPMasks are the
|
||||
// same.
|
||||
break
|
||||
case 1:
|
||||
return true
|
||||
case -1:
|
||||
return false
|
||||
default:
|
||||
panic("bad, m'kay?")
|
||||
}
|
||||
|
||||
// Sort IPs based on the length (i.e. prefer IPv4 over IPv6).
|
||||
iLen := len(*s.IPAddrs[i].NetIP())
|
||||
jLen := len(*s.IPAddrs[j].NetIP())
|
||||
if iLen != jLen {
|
||||
return iLen > jLen
|
||||
}
|
||||
|
||||
// Sort IPs based on their network address from lowest to highest.
|
||||
switch bytes.Compare(s.IPAddrs[i].NetIPNet().IP, s.IPAddrs[j].NetIPNet().IP) {
|
||||
case 0:
|
||||
break
|
||||
case 1:
|
||||
return false
|
||||
case -1:
|
||||
return true
|
||||
default:
|
||||
panic("lol wut?")
|
||||
}
|
||||
|
||||
// If a host does not have a port set, it always sorts after hosts
|
||||
// that have a port (e.g. a host with a /32 and port number is more
|
||||
// specific and should sort first over a host with a /32 but no port
|
||||
// set).
|
||||
if s.IPAddrs[i].IPPort() == 0 || s.IPAddrs[j].IPPort() == 0 {
|
||||
return false
|
||||
}
|
||||
return s.IPAddrs[i].IPPort() < s.IPAddrs[j].IPPort()
|
||||
}
|
||||
|
||||
// SortIPAddrsBySpecificMaskLen is a type that satisfies sort.Interface and
|
||||
// can be used by the routines in this package. The
|
||||
// SortIPAddrsBySpecificMaskLen type is used to sort IPAddrs by smallest
|
||||
// network (most specific to largest network).
|
||||
type SortIPAddrsBySpecificMaskLen struct{ IPAddrs }
|
||||
|
||||
// Less reports whether the element with index i should sort before the
|
||||
// element with index j.
|
||||
func (s SortIPAddrsBySpecificMaskLen) Less(i, j int) bool {
|
||||
return s.IPAddrs[i].Maskbits() > s.IPAddrs[j].Maskbits()
|
||||
}
|
||||
|
||||
// SortIPAddrsByBroadMaskLen is a type that satisfies sort.Interface and can
|
||||
// be used by the routines in this package. The SortIPAddrsByBroadMaskLen
|
||||
// type is used to sort IPAddrs by largest network (i.e. largest subnets
|
||||
// first).
|
||||
type SortIPAddrsByBroadMaskLen struct{ IPAddrs }
|
||||
|
||||
// Less reports whether the element with index i should sort before the
|
||||
// element with index j.
|
||||
func (s SortIPAddrsByBroadMaskLen) Less(i, j int) bool {
|
||||
return s.IPAddrs[i].Maskbits() < s.IPAddrs[j].Maskbits()
|
||||
}
|
|
@ -0,0 +1,515 @@
|
|||
package sockaddr
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"net"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
type (
|
||||
// IPv4Address is a named type representing an IPv4 address.
|
||||
IPv4Address uint32
|
||||
|
||||
// IPv4Network is a named type representing an IPv4 network.
|
||||
IPv4Network uint32
|
||||
|
||||
// IPv4Mask is a named type representing an IPv4 network mask.
|
||||
IPv4Mask uint32
|
||||
)
|
||||
|
||||
// IPv4HostMask is a constant represents a /32 IPv4 Address
|
||||
// (i.e. 255.255.255.255).
|
||||
const IPv4HostMask = IPv4Mask(0xffffffff)
|
||||
|
||||
// ipv4AddrAttrMap is a map of the IPv4Addr type-specific attributes.
|
||||
var ipv4AddrAttrMap map[AttrName]func(IPv4Addr) string
|
||||
var ipv4AddrAttrs []AttrName
|
||||
var trailingHexNetmaskRE *regexp.Regexp
|
||||
|
||||
// IPv4Addr implements a convenience wrapper around the union of Go's
|
||||
// built-in net.IP and net.IPNet types. In UNIX-speak, IPv4Addr implements
|
||||
// `sockaddr` when the the address family is set to AF_INET
|
||||
// (i.e. `sockaddr_in`).
|
||||
type IPv4Addr struct {
|
||||
IPAddr
|
||||
Address IPv4Address
|
||||
Mask IPv4Mask
|
||||
Port IPPort
|
||||
}
|
||||
|
||||
func init() {
|
||||
ipv4AddrInit()
|
||||
trailingHexNetmaskRE = regexp.MustCompile(`/([0f]{8})$`)
|
||||
}
|
||||
|
||||
// NewIPv4Addr creates an IPv4Addr from a string. String can be in the form
|
||||
// of either an IPv4:port (e.g. `1.2.3.4:80`, in which case the mask is
|
||||
// assumed to be a `/32`), an IPv4 address (e.g. `1.2.3.4`, also with a `/32`
|
||||
// mask), or an IPv4 CIDR (e.g. `1.2.3.4/24`, which has its IP port
|
||||
// initialized to zero). ipv4Str can not be a hostname.
|
||||
//
|
||||
// NOTE: Many net.*() routines will initialize and return an IPv6 address.
|
||||
// To create uint32 values from net.IP, always test to make sure the address
|
||||
// returned can be converted to a 4 byte array using To4().
|
||||
func NewIPv4Addr(ipv4Str string) (IPv4Addr, error) {
|
||||
// Strip off any bogus hex-encoded netmasks that will be mis-parsed by Go. In
|
||||
// particular, clients with the Barracuda VPN client will see something like:
|
||||
// `192.168.3.51/00ffffff` as their IP address.
|
||||
if match := trailingHexNetmaskRE.FindStringIndex(ipv4Str); match != nil {
|
||||
ipv4Str = ipv4Str[:match[0]]
|
||||
}
|
||||
|
||||
// Parse as an IPv4 CIDR
|
||||
ipAddr, network, err := net.ParseCIDR(ipv4Str)
|
||||
if err == nil {
|
||||
ipv4 := ipAddr.To4()
|
||||
if ipv4 == nil {
|
||||
return IPv4Addr{}, fmt.Errorf("Unable to convert %s to an IPv4 address", ipv4Str)
|
||||
}
|
||||
|
||||
// If we see an IPv6 netmask, convert it to an IPv4 mask.
|
||||
netmaskSepPos := strings.LastIndexByte(ipv4Str, '/')
|
||||
if netmaskSepPos != -1 && netmaskSepPos+1 < len(ipv4Str) {
|
||||
netMask, err := strconv.ParseUint(ipv4Str[netmaskSepPos+1:], 10, 8)
|
||||
if err != nil {
|
||||
return IPv4Addr{}, fmt.Errorf("Unable to convert %s to an IPv4 address: unable to parse CIDR netmask: %v", ipv4Str, err)
|
||||
} else if netMask > 128 {
|
||||
return IPv4Addr{}, fmt.Errorf("Unable to convert %s to an IPv4 address: invalid CIDR netmask", ipv4Str)
|
||||
}
|
||||
|
||||
if netMask >= 96 {
|
||||
// Convert the IPv6 netmask to an IPv4 netmask
|
||||
network.Mask = net.CIDRMask(int(netMask-96), IPv4len*8)
|
||||
}
|
||||
}
|
||||
ipv4Addr := IPv4Addr{
|
||||
Address: IPv4Address(binary.BigEndian.Uint32(ipv4)),
|
||||
Mask: IPv4Mask(binary.BigEndian.Uint32(network.Mask)),
|
||||
}
|
||||
return ipv4Addr, nil
|
||||
}
|
||||
|
||||
// Attempt to parse ipv4Str as a /32 host with a port number.
|
||||
tcpAddr, err := net.ResolveTCPAddr("tcp4", ipv4Str)
|
||||
if err == nil {
|
||||
ipv4 := tcpAddr.IP.To4()
|
||||
if ipv4 == nil {
|
||||
return IPv4Addr{}, fmt.Errorf("Unable to resolve %+q as an IPv4 address", ipv4Str)
|
||||
}
|
||||
|
||||
ipv4Uint32 := binary.BigEndian.Uint32(ipv4)
|
||||
ipv4Addr := IPv4Addr{
|
||||
Address: IPv4Address(ipv4Uint32),
|
||||
Mask: IPv4HostMask,
|
||||
Port: IPPort(tcpAddr.Port),
|
||||
}
|
||||
|
||||
return ipv4Addr, nil
|
||||
}
|
||||
|
||||
// Parse as a naked IPv4 address
|
||||
ip := net.ParseIP(ipv4Str)
|
||||
if ip != nil {
|
||||
ipv4 := ip.To4()
|
||||
if ipv4 == nil {
|
||||
return IPv4Addr{}, fmt.Errorf("Unable to string convert %+q to an IPv4 address", ipv4Str)
|
||||
}
|
||||
|
||||
ipv4Uint32 := binary.BigEndian.Uint32(ipv4)
|
||||
ipv4Addr := IPv4Addr{
|
||||
Address: IPv4Address(ipv4Uint32),
|
||||
Mask: IPv4HostMask,
|
||||
}
|
||||
return ipv4Addr, nil
|
||||
}
|
||||
|
||||
return IPv4Addr{}, fmt.Errorf("Unable to parse %+q to an IPv4 address: %v", ipv4Str, err)
|
||||
}
|
||||
|
||||
// AddressBinString returns a string with the IPv4Addr's Address represented
|
||||
// as a sequence of '0' and '1' characters. This method is useful for
|
||||
// debugging or by operators who want to inspect an address.
|
||||
func (ipv4 IPv4Addr) AddressBinString() string {
|
||||
return fmt.Sprintf("%032s", strconv.FormatUint(uint64(ipv4.Address), 2))
|
||||
}
|
||||
|
||||
// AddressHexString returns a string with the IPv4Addr address represented as
|
||||
// a sequence of hex characters. This method is useful for debugging or by
|
||||
// operators who want to inspect an address.
|
||||
func (ipv4 IPv4Addr) AddressHexString() string {
|
||||
return fmt.Sprintf("%08s", strconv.FormatUint(uint64(ipv4.Address), 16))
|
||||
}
|
||||
|
||||
// Broadcast is an IPv4Addr-only method that returns the broadcast address of
|
||||
// the network.
|
||||
//
|
||||
// NOTE: IPv6 only supports multicast, so this method only exists for
|
||||
// IPv4Addr.
|
||||
func (ipv4 IPv4Addr) Broadcast() IPAddr {
|
||||
// Nothing should listen on a broadcast address.
|
||||
return IPv4Addr{
|
||||
Address: IPv4Address(ipv4.BroadcastAddress()),
|
||||
Mask: IPv4HostMask,
|
||||
}
|
||||
}
|
||||
|
||||
// BroadcastAddress returns a IPv4Network of the IPv4Addr's broadcast
|
||||
// address.
|
||||
func (ipv4 IPv4Addr) BroadcastAddress() IPv4Network {
|
||||
return IPv4Network(uint32(ipv4.Address)&uint32(ipv4.Mask) | ^uint32(ipv4.Mask))
|
||||
}
|
||||
|
||||
// CmpAddress follows the Cmp() standard protocol and returns:
|
||||
//
|
||||
// - -1 If the receiver should sort first because its address is lower than arg
|
||||
// - 0 if the SockAddr arg is equal to the receiving IPv4Addr or the argument is
|
||||
// of a different type.
|
||||
// - 1 If the argument should sort first.
|
||||
func (ipv4 IPv4Addr) CmpAddress(sa SockAddr) int {
|
||||
ipv4b, ok := sa.(IPv4Addr)
|
||||
if !ok {
|
||||
return sortDeferDecision
|
||||
}
|
||||
|
||||
switch {
|
||||
case ipv4.Address == ipv4b.Address:
|
||||
return sortDeferDecision
|
||||
case ipv4.Address < ipv4b.Address:
|
||||
return sortReceiverBeforeArg
|
||||
default:
|
||||
return sortArgBeforeReceiver
|
||||
}
|
||||
}
|
||||
|
||||
// CmpPort follows the Cmp() standard protocol and returns:
|
||||
//
|
||||
// - -1 If the receiver should sort first because its port is lower than arg
|
||||
// - 0 if the SockAddr arg's port number is equal to the receiving IPv4Addr,
|
||||
// regardless of type.
|
||||
// - 1 If the argument should sort first.
|
||||
func (ipv4 IPv4Addr) CmpPort(sa SockAddr) int {
|
||||
var saPort IPPort
|
||||
switch v := sa.(type) {
|
||||
case IPv4Addr:
|
||||
saPort = v.Port
|
||||
case IPv6Addr:
|
||||
saPort = v.Port
|
||||
default:
|
||||
return sortDeferDecision
|
||||
}
|
||||
|
||||
switch {
|
||||
case ipv4.Port == saPort:
|
||||
return sortDeferDecision
|
||||
case ipv4.Port < saPort:
|
||||
return sortReceiverBeforeArg
|
||||
default:
|
||||
return sortArgBeforeReceiver
|
||||
}
|
||||
}
|
||||
|
||||
// CmpRFC follows the Cmp() standard protocol and returns:
|
||||
//
|
||||
// - -1 If the receiver should sort first because it belongs to the RFC and its
|
||||
// arg does not
|
||||
// - 0 if the receiver and arg both belong to the same RFC or neither do.
|
||||
// - 1 If the arg belongs to the RFC but receiver does not.
|
||||
func (ipv4 IPv4Addr) CmpRFC(rfcNum uint, sa SockAddr) int {
|
||||
recvInRFC := IsRFC(rfcNum, ipv4)
|
||||
ipv4b, ok := sa.(IPv4Addr)
|
||||
if !ok {
|
||||
// If the receiver is part of the desired RFC and the SockAddr
|
||||
// argument is not, return -1 so that the receiver sorts before
|
||||
// the non-IPv4 SockAddr. Conversely, if the receiver is not
|
||||
// part of the RFC, punt on sorting and leave it for the next
|
||||
// sorter.
|
||||
if recvInRFC {
|
||||
return sortReceiverBeforeArg
|
||||
} else {
|
||||
return sortDeferDecision
|
||||
}
|
||||
}
|
||||
|
||||
argInRFC := IsRFC(rfcNum, ipv4b)
|
||||
switch {
|
||||
case (recvInRFC && argInRFC), (!recvInRFC && !argInRFC):
|
||||
// If a and b both belong to the RFC, or neither belong to
|
||||
// rfcNum, defer sorting to the next sorter.
|
||||
return sortDeferDecision
|
||||
case recvInRFC && !argInRFC:
|
||||
return sortReceiverBeforeArg
|
||||
default:
|
||||
return sortArgBeforeReceiver
|
||||
}
|
||||
}
|
||||
|
||||
// Contains returns true if the SockAddr is contained within the receiver.
|
||||
func (ipv4 IPv4Addr) Contains(sa SockAddr) bool {
|
||||
ipv4b, ok := sa.(IPv4Addr)
|
||||
if !ok {
|
||||
return false
|
||||
}
|
||||
|
||||
return ipv4.ContainsNetwork(ipv4b)
|
||||
}
|
||||
|
||||
// ContainsAddress returns true if the IPv4Address is contained within the
|
||||
// receiver.
|
||||
func (ipv4 IPv4Addr) ContainsAddress(x IPv4Address) bool {
|
||||
return IPv4Address(ipv4.NetworkAddress()) <= x &&
|
||||
IPv4Address(ipv4.BroadcastAddress()) >= x
|
||||
}
|
||||
|
||||
// ContainsNetwork returns true if the network from IPv4Addr is contained
|
||||
// within the receiver.
|
||||
func (ipv4 IPv4Addr) ContainsNetwork(x IPv4Addr) bool {
|
||||
return ipv4.NetworkAddress() <= x.NetworkAddress() &&
|
||||
ipv4.BroadcastAddress() >= x.BroadcastAddress()
|
||||
}
|
||||
|
||||
// DialPacketArgs returns the arguments required to be passed to
|
||||
// net.DialUDP(). If the Mask of ipv4 is not a /32 or the Port is 0,
|
||||
// DialPacketArgs() will fail. See Host() to create an IPv4Addr with its
|
||||
// mask set to /32.
|
||||
func (ipv4 IPv4Addr) DialPacketArgs() (network, dialArgs string) {
|
||||
if ipv4.Mask != IPv4HostMask || ipv4.Port == 0 {
|
||||
return "udp4", ""
|
||||
}
|
||||
return "udp4", fmt.Sprintf("%s:%d", ipv4.NetIP().String(), ipv4.Port)
|
||||
}
|
||||
|
||||
// DialStreamArgs returns the arguments required to be passed to
|
||||
// net.DialTCP(). If the Mask of ipv4 is not a /32 or the Port is 0,
|
||||
// DialStreamArgs() will fail. See Host() to create an IPv4Addr with its
|
||||
// mask set to /32.
|
||||
func (ipv4 IPv4Addr) DialStreamArgs() (network, dialArgs string) {
|
||||
if ipv4.Mask != IPv4HostMask || ipv4.Port == 0 {
|
||||
return "tcp4", ""
|
||||
}
|
||||
return "tcp4", fmt.Sprintf("%s:%d", ipv4.NetIP().String(), ipv4.Port)
|
||||
}
|
||||
|
||||
// Equal returns true if a SockAddr is equal to the receiving IPv4Addr.
|
||||
func (ipv4 IPv4Addr) Equal(sa SockAddr) bool {
|
||||
ipv4b, ok := sa.(IPv4Addr)
|
||||
if !ok {
|
||||
return false
|
||||
}
|
||||
|
||||
if ipv4.Port != ipv4b.Port {
|
||||
return false
|
||||
}
|
||||
|
||||
if ipv4.Address != ipv4b.Address {
|
||||
return false
|
||||
}
|
||||
|
||||
if ipv4.NetIPNet().String() != ipv4b.NetIPNet().String() {
|
||||
return false
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
// FirstUsable returns an IPv4Addr set to the first address following the
|
||||
// network prefix. The first usable address in a network is normally the
|
||||
// gateway and should not be used except by devices forwarding packets
|
||||
// between two administratively distinct networks (i.e. a router). This
|
||||
// function does not discriminate against first usable vs "first address that
|
||||
// should be used." For example, FirstUsable() on "192.168.1.10/24" would
|
||||
// return the address "192.168.1.1/24".
|
||||
func (ipv4 IPv4Addr) FirstUsable() IPAddr {
|
||||
addr := ipv4.NetworkAddress()
|
||||
|
||||
// If /32, return the address itself. If /31 assume a point-to-point
|
||||
// link and return the lower address.
|
||||
if ipv4.Maskbits() < 31 {
|
||||
addr++
|
||||
}
|
||||
|
||||
return IPv4Addr{
|
||||
Address: IPv4Address(addr),
|
||||
Mask: IPv4HostMask,
|
||||
}
|
||||
}
|
||||
|
||||
// Host returns a copy of ipv4 with its mask set to /32 so that it can be
|
||||
// used by DialPacketArgs(), DialStreamArgs(), ListenPacketArgs(), or
|
||||
// ListenStreamArgs().
|
||||
func (ipv4 IPv4Addr) Host() IPAddr {
|
||||
// Nothing should listen on a broadcast address.
|
||||
return IPv4Addr{
|
||||
Address: ipv4.Address,
|
||||
Mask: IPv4HostMask,
|
||||
Port: ipv4.Port,
|
||||
}
|
||||
}
|
||||
|
||||
// IPPort returns the Port number attached to the IPv4Addr
|
||||
func (ipv4 IPv4Addr) IPPort() IPPort {
|
||||
return ipv4.Port
|
||||
}
|
||||
|
||||
// LastUsable returns the last address before the broadcast address in a
|
||||
// given network.
|
||||
func (ipv4 IPv4Addr) LastUsable() IPAddr {
|
||||
addr := ipv4.BroadcastAddress()
|
||||
|
||||
// If /32, return the address itself. If /31 assume a point-to-point
|
||||
// link and return the upper address.
|
||||
if ipv4.Maskbits() < 31 {
|
||||
addr--
|
||||
}
|
||||
|
||||
return IPv4Addr{
|
||||
Address: IPv4Address(addr),
|
||||
Mask: IPv4HostMask,
|
||||
}
|
||||
}
|
||||
|
||||
// ListenPacketArgs returns the arguments required to be passed to
|
||||
// net.ListenUDP(). If the Mask of ipv4 is not a /32, ListenPacketArgs()
|
||||
// will fail. See Host() to create an IPv4Addr with its mask set to /32.
|
||||
func (ipv4 IPv4Addr) ListenPacketArgs() (network, listenArgs string) {
|
||||
if ipv4.Mask != IPv4HostMask {
|
||||
return "udp4", ""
|
||||
}
|
||||
return "udp4", fmt.Sprintf("%s:%d", ipv4.NetIP().String(), ipv4.Port)
|
||||
}
|
||||
|
||||
// ListenStreamArgs returns the arguments required to be passed to
|
||||
// net.ListenTCP(). If the Mask of ipv4 is not a /32, ListenStreamArgs()
|
||||
// will fail. See Host() to create an IPv4Addr with its mask set to /32.
|
||||
func (ipv4 IPv4Addr) ListenStreamArgs() (network, listenArgs string) {
|
||||
if ipv4.Mask != IPv4HostMask {
|
||||
return "tcp4", ""
|
||||
}
|
||||
return "tcp4", fmt.Sprintf("%s:%d", ipv4.NetIP().String(), ipv4.Port)
|
||||
}
|
||||
|
||||
// Maskbits returns the number of network mask bits in a given IPv4Addr. For
|
||||
// example, the Maskbits() of "192.168.1.1/24" would return 24.
|
||||
func (ipv4 IPv4Addr) Maskbits() int {
|
||||
mask := make(net.IPMask, IPv4len)
|
||||
binary.BigEndian.PutUint32(mask, uint32(ipv4.Mask))
|
||||
maskOnes, _ := mask.Size()
|
||||
return maskOnes
|
||||
}
|
||||
|
||||
// MustIPv4Addr is a helper method that must return an IPv4Addr or panic on
|
||||
// invalid input.
|
||||
func MustIPv4Addr(addr string) IPv4Addr {
|
||||
ipv4, err := NewIPv4Addr(addr)
|
||||
if err != nil {
|
||||
panic(fmt.Sprintf("Unable to create an IPv4Addr from %+q: %v", addr, err))
|
||||
}
|
||||
return ipv4
|
||||
}
|
||||
|
||||
// NetIP returns the address as a net.IP (address is always presized to
|
||||
// IPv4).
|
||||
func (ipv4 IPv4Addr) NetIP() *net.IP {
|
||||
x := make(net.IP, IPv4len)
|
||||
binary.BigEndian.PutUint32(x, uint32(ipv4.Address))
|
||||
return &x
|
||||
}
|
||||
|
||||
// NetIPMask create a new net.IPMask from the IPv4Addr.
|
||||
func (ipv4 IPv4Addr) NetIPMask() *net.IPMask {
|
||||
ipv4Mask := net.IPMask{}
|
||||
ipv4Mask = make(net.IPMask, IPv4len)
|
||||
binary.BigEndian.PutUint32(ipv4Mask, uint32(ipv4.Mask))
|
||||
return &ipv4Mask
|
||||
}
|
||||
|
||||
// NetIPNet create a new net.IPNet from the IPv4Addr.
|
||||
func (ipv4 IPv4Addr) NetIPNet() *net.IPNet {
|
||||
ipv4net := &net.IPNet{}
|
||||
ipv4net.IP = make(net.IP, IPv4len)
|
||||
binary.BigEndian.PutUint32(ipv4net.IP, uint32(ipv4.NetworkAddress()))
|
||||
ipv4net.Mask = *ipv4.NetIPMask()
|
||||
return ipv4net
|
||||
}
|
||||
|
||||
// Network returns the network prefix or network address for a given network.
|
||||
func (ipv4 IPv4Addr) Network() IPAddr {
|
||||
return IPv4Addr{
|
||||
Address: IPv4Address(ipv4.NetworkAddress()),
|
||||
Mask: ipv4.Mask,
|
||||
}
|
||||
}
|
||||
|
||||
// NetworkAddress returns an IPv4Network of the IPv4Addr's network address.
|
||||
func (ipv4 IPv4Addr) NetworkAddress() IPv4Network {
|
||||
return IPv4Network(uint32(ipv4.Address) & uint32(ipv4.Mask))
|
||||
}
|
||||
|
||||
// Octets returns a slice of the four octets in an IPv4Addr's Address. The
|
||||
// order of the bytes is big endian.
|
||||
func (ipv4 IPv4Addr) Octets() []int {
|
||||
return []int{
|
||||
int(ipv4.Address >> 24),
|
||||
int((ipv4.Address >> 16) & 0xff),
|
||||
int((ipv4.Address >> 8) & 0xff),
|
||||
int(ipv4.Address & 0xff),
|
||||
}
|
||||
}
|
||||
|
||||
// String returns a string representation of the IPv4Addr
|
||||
func (ipv4 IPv4Addr) String() string {
|
||||
if ipv4.Port != 0 {
|
||||
return fmt.Sprintf("%s:%d", ipv4.NetIP().String(), ipv4.Port)
|
||||
}
|
||||
|
||||
if ipv4.Maskbits() == 32 {
|
||||
return ipv4.NetIP().String()
|
||||
}
|
||||
|
||||
return fmt.Sprintf("%s/%d", ipv4.NetIP().String(), ipv4.Maskbits())
|
||||
}
|
||||
|
||||
// Type is used as a type switch and returns TypeIPv4
|
||||
func (IPv4Addr) Type() SockAddrType {
|
||||
return TypeIPv4
|
||||
}
|
||||
|
||||
// IPv4AddrAttr returns a string representation of an attribute for the given
|
||||
// IPv4Addr.
|
||||
func IPv4AddrAttr(ipv4 IPv4Addr, selector AttrName) string {
|
||||
fn, found := ipv4AddrAttrMap[selector]
|
||||
if !found {
|
||||
return ""
|
||||
}
|
||||
|
||||
return fn(ipv4)
|
||||
}
|
||||
|
||||
// IPv4Attrs returns a list of attributes supported by the IPv4Addr type
|
||||
func IPv4Attrs() []AttrName {
|
||||
return ipv4AddrAttrs
|
||||
}
|
||||
|
||||
// ipv4AddrInit is called once at init()
|
||||
func ipv4AddrInit() {
|
||||
// Sorted for human readability
|
||||
ipv4AddrAttrs = []AttrName{
|
||||
"size", // Same position as in IPv6 for output consistency
|
||||
"broadcast",
|
||||
"uint32",
|
||||
}
|
||||
|
||||
ipv4AddrAttrMap = map[AttrName]func(ipv4 IPv4Addr) string{
|
||||
"broadcast": func(ipv4 IPv4Addr) string {
|
||||
return ipv4.Broadcast().String()
|
||||
},
|
||||
"size": func(ipv4 IPv4Addr) string {
|
||||
return fmt.Sprintf("%d", 1<<uint(IPv4len*8-ipv4.Maskbits()))
|
||||
},
|
||||
"uint32": func(ipv4 IPv4Addr) string {
|
||||
return fmt.Sprintf("%d", uint32(ipv4.Address))
|
||||
},
|
||||
}
|
||||
}
|
|
@ -0,0 +1,591 @@
|
|||
package sockaddr
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"math/big"
|
||||
"net"
|
||||
)
|
||||
|
||||
type (
|
||||
// IPv6Address is a named type representing an IPv6 address.
|
||||
IPv6Address *big.Int
|
||||
|
||||
// IPv6Network is a named type representing an IPv6 network.
|
||||
IPv6Network *big.Int
|
||||
|
||||
// IPv6Mask is a named type representing an IPv6 network mask.
|
||||
IPv6Mask *big.Int
|
||||
)
|
||||
|
||||
// IPv6HostPrefix is a constant represents a /128 IPv6 Prefix.
|
||||
const IPv6HostPrefix = IPPrefixLen(128)
|
||||
|
||||
// ipv6HostMask is an unexported big.Int representing a /128 IPv6 address.
|
||||
// This value must be a constant and always set to all ones.
|
||||
var ipv6HostMask IPv6Mask
|
||||
|
||||
// ipv6AddrAttrMap is a map of the IPv6Addr type-specific attributes.
|
||||
var ipv6AddrAttrMap map[AttrName]func(IPv6Addr) string
|
||||
var ipv6AddrAttrs []AttrName
|
||||
|
||||
func init() {
|
||||
biMask := new(big.Int)
|
||||
biMask.SetBytes([]byte{
|
||||
0xff, 0xff,
|
||||
0xff, 0xff,
|
||||
0xff, 0xff,
|
||||
0xff, 0xff,
|
||||
0xff, 0xff,
|
||||
0xff, 0xff,
|
||||
0xff, 0xff,
|
||||
0xff, 0xff,
|
||||
},
|
||||
)
|
||||
ipv6HostMask = IPv6Mask(biMask)
|
||||
|
||||
ipv6AddrInit()
|
||||
}
|
||||
|
||||
// IPv6Addr implements a convenience wrapper around the union of Go's
|
||||
// built-in net.IP and net.IPNet types. In UNIX-speak, IPv6Addr implements
|
||||
// `sockaddr` when the the address family is set to AF_INET6
|
||||
// (i.e. `sockaddr_in6`).
|
||||
type IPv6Addr struct {
|
||||
IPAddr
|
||||
Address IPv6Address
|
||||
Mask IPv6Mask
|
||||
Port IPPort
|
||||
}
|
||||
|
||||
// NewIPv6Addr creates an IPv6Addr from a string. String can be in the form of
|
||||
// an an IPv6:port (e.g. `[2001:4860:0:2001::68]:80`, in which case the mask is
|
||||
// assumed to be a /128), an IPv6 address (e.g. `2001:4860:0:2001::68`, also
|
||||
// with a `/128` mask), an IPv6 CIDR (e.g. `2001:4860:0:2001::68/64`, which has
|
||||
// its IP port initialized to zero). ipv6Str can not be a hostname.
|
||||
//
|
||||
// NOTE: Many net.*() routines will initialize and return an IPv4 address.
|
||||
// Always test to make sure the address returned cannot be converted to a 4 byte
|
||||
// array using To4().
|
||||
func NewIPv6Addr(ipv6Str string) (IPv6Addr, error) {
|
||||
v6Addr := false
|
||||
LOOP:
|
||||
for i := 0; i < len(ipv6Str); i++ {
|
||||
switch ipv6Str[i] {
|
||||
case '.':
|
||||
break LOOP
|
||||
case ':':
|
||||
v6Addr = true
|
||||
break LOOP
|
||||
}
|
||||
}
|
||||
|
||||
if !v6Addr {
|
||||
return IPv6Addr{}, fmt.Errorf("Unable to resolve %+q as an IPv6 address, appears to be an IPv4 address", ipv6Str)
|
||||
}
|
||||
|
||||
// Attempt to parse ipv6Str as a /128 host with a port number.
|
||||
tcpAddr, err := net.ResolveTCPAddr("tcp6", ipv6Str)
|
||||
if err == nil {
|
||||
ipv6 := tcpAddr.IP.To16()
|
||||
if ipv6 == nil {
|
||||
return IPv6Addr{}, fmt.Errorf("Unable to resolve %+q as a 16byte IPv6 address", ipv6Str)
|
||||
}
|
||||
|
||||
ipv6BigIntAddr := new(big.Int)
|
||||
ipv6BigIntAddr.SetBytes(ipv6)
|
||||
|
||||
ipv6BigIntMask := new(big.Int)
|
||||
ipv6BigIntMask.Set(ipv6HostMask)
|
||||
|
||||
ipv6Addr := IPv6Addr{
|
||||
Address: IPv6Address(ipv6BigIntAddr),
|
||||
Mask: IPv6Mask(ipv6BigIntMask),
|
||||
Port: IPPort(tcpAddr.Port),
|
||||
}
|
||||
|
||||
return ipv6Addr, nil
|
||||
}
|
||||
|
||||
// Parse as a naked IPv6 address. Trim square brackets if present.
|
||||
if len(ipv6Str) > 2 && ipv6Str[0] == '[' && ipv6Str[len(ipv6Str)-1] == ']' {
|
||||
ipv6Str = ipv6Str[1 : len(ipv6Str)-1]
|
||||
}
|
||||
ip := net.ParseIP(ipv6Str)
|
||||
if ip != nil {
|
||||
ipv6 := ip.To16()
|
||||
if ipv6 == nil {
|
||||
return IPv6Addr{}, fmt.Errorf("Unable to string convert %+q to a 16byte IPv6 address", ipv6Str)
|
||||
}
|
||||
|
||||
ipv6BigIntAddr := new(big.Int)
|
||||
ipv6BigIntAddr.SetBytes(ipv6)
|
||||
|
||||
ipv6BigIntMask := new(big.Int)
|
||||
ipv6BigIntMask.Set(ipv6HostMask)
|
||||
|
||||
return IPv6Addr{
|
||||
Address: IPv6Address(ipv6BigIntAddr),
|
||||
Mask: IPv6Mask(ipv6BigIntMask),
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Parse as an IPv6 CIDR
|
||||
ipAddr, network, err := net.ParseCIDR(ipv6Str)
|
||||
if err == nil {
|
||||
ipv6 := ipAddr.To16()
|
||||
if ipv6 == nil {
|
||||
return IPv6Addr{}, fmt.Errorf("Unable to convert %+q to a 16byte IPv6 address", ipv6Str)
|
||||
}
|
||||
|
||||
ipv6BigIntAddr := new(big.Int)
|
||||
ipv6BigIntAddr.SetBytes(ipv6)
|
||||
|
||||
ipv6BigIntMask := new(big.Int)
|
||||
ipv6BigIntMask.SetBytes(network.Mask)
|
||||
|
||||
ipv6Addr := IPv6Addr{
|
||||
Address: IPv6Address(ipv6BigIntAddr),
|
||||
Mask: IPv6Mask(ipv6BigIntMask),
|
||||
}
|
||||
return ipv6Addr, nil
|
||||
}
|
||||
|
||||
return IPv6Addr{}, fmt.Errorf("Unable to parse %+q to an IPv6 address: %v", ipv6Str, err)
|
||||
}
|
||||
|
||||
// AddressBinString returns a string with the IPv6Addr's Address represented
|
||||
// as a sequence of '0' and '1' characters. This method is useful for
|
||||
// debugging or by operators who want to inspect an address.
|
||||
func (ipv6 IPv6Addr) AddressBinString() string {
|
||||
bi := big.Int(*ipv6.Address)
|
||||
return fmt.Sprintf("%0128s", bi.Text(2))
|
||||
}
|
||||
|
||||
// AddressHexString returns a string with the IPv6Addr address represented as
|
||||
// a sequence of hex characters. This method is useful for debugging or by
|
||||
// operators who want to inspect an address.
|
||||
func (ipv6 IPv6Addr) AddressHexString() string {
|
||||
bi := big.Int(*ipv6.Address)
|
||||
return fmt.Sprintf("%032s", bi.Text(16))
|
||||
}
|
||||
|
||||
// CmpAddress follows the Cmp() standard protocol and returns:
|
||||
//
|
||||
// - -1 If the receiver should sort first because its address is lower than arg
|
||||
// - 0 if the SockAddr arg equal to the receiving IPv6Addr or the argument is of a
|
||||
// different type.
|
||||
// - 1 If the argument should sort first.
|
||||
func (ipv6 IPv6Addr) CmpAddress(sa SockAddr) int {
|
||||
ipv6b, ok := sa.(IPv6Addr)
|
||||
if !ok {
|
||||
return sortDeferDecision
|
||||
}
|
||||
|
||||
ipv6aBigInt := new(big.Int)
|
||||
ipv6aBigInt.Set(ipv6.Address)
|
||||
ipv6bBigInt := new(big.Int)
|
||||
ipv6bBigInt.Set(ipv6b.Address)
|
||||
|
||||
return ipv6aBigInt.Cmp(ipv6bBigInt)
|
||||
}
|
||||
|
||||
// CmpPort follows the Cmp() standard protocol and returns:
|
||||
//
|
||||
// - -1 If the receiver should sort first because its port is lower than arg
|
||||
// - 0 if the SockAddr arg's port number is equal to the receiving IPv6Addr,
|
||||
// regardless of type.
|
||||
// - 1 If the argument should sort first.
|
||||
func (ipv6 IPv6Addr) CmpPort(sa SockAddr) int {
|
||||
var saPort IPPort
|
||||
switch v := sa.(type) {
|
||||
case IPv4Addr:
|
||||
saPort = v.Port
|
||||
case IPv6Addr:
|
||||
saPort = v.Port
|
||||
default:
|
||||
return sortDeferDecision
|
||||
}
|
||||
|
||||
switch {
|
||||
case ipv6.Port == saPort:
|
||||
return sortDeferDecision
|
||||
case ipv6.Port < saPort:
|
||||
return sortReceiverBeforeArg
|
||||
default:
|
||||
return sortArgBeforeReceiver
|
||||
}
|
||||
}
|
||||
|
||||
// CmpRFC follows the Cmp() standard protocol and returns:
|
||||
//
|
||||
// - -1 If the receiver should sort first because it belongs to the RFC and its
|
||||
// arg does not
|
||||
// - 0 if the receiver and arg both belong to the same RFC or neither do.
|
||||
// - 1 If the arg belongs to the RFC but receiver does not.
|
||||
func (ipv6 IPv6Addr) CmpRFC(rfcNum uint, sa SockAddr) int {
|
||||
recvInRFC := IsRFC(rfcNum, ipv6)
|
||||
ipv6b, ok := sa.(IPv6Addr)
|
||||
if !ok {
|
||||
// If the receiver is part of the desired RFC and the SockAddr
|
||||
// argument is not, sort receiver before the non-IPv6 SockAddr.
|
||||
// Conversely, if the receiver is not part of the RFC, punt on
|
||||
// sorting and leave it for the next sorter.
|
||||
if recvInRFC {
|
||||
return sortReceiverBeforeArg
|
||||
} else {
|
||||
return sortDeferDecision
|
||||
}
|
||||
}
|
||||
|
||||
argInRFC := IsRFC(rfcNum, ipv6b)
|
||||
switch {
|
||||
case (recvInRFC && argInRFC), (!recvInRFC && !argInRFC):
|
||||
// If a and b both belong to the RFC, or neither belong to
|
||||
// rfcNum, defer sorting to the next sorter.
|
||||
return sortDeferDecision
|
||||
case recvInRFC && !argInRFC:
|
||||
return sortReceiverBeforeArg
|
||||
default:
|
||||
return sortArgBeforeReceiver
|
||||
}
|
||||
}
|
||||
|
||||
// Contains returns true if the SockAddr is contained within the receiver.
|
||||
func (ipv6 IPv6Addr) Contains(sa SockAddr) bool {
|
||||
ipv6b, ok := sa.(IPv6Addr)
|
||||
if !ok {
|
||||
return false
|
||||
}
|
||||
|
||||
return ipv6.ContainsNetwork(ipv6b)
|
||||
}
|
||||
|
||||
// ContainsAddress returns true if the IPv6Address is contained within the
|
||||
// receiver.
|
||||
func (ipv6 IPv6Addr) ContainsAddress(x IPv6Address) bool {
|
||||
xAddr := IPv6Addr{
|
||||
Address: x,
|
||||
Mask: ipv6HostMask,
|
||||
}
|
||||
|
||||
{
|
||||
xIPv6 := xAddr.FirstUsable().(IPv6Addr)
|
||||
yIPv6 := ipv6.FirstUsable().(IPv6Addr)
|
||||
if xIPv6.CmpAddress(yIPv6) >= 1 {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
xIPv6 := xAddr.LastUsable().(IPv6Addr)
|
||||
yIPv6 := ipv6.LastUsable().(IPv6Addr)
|
||||
if xIPv6.CmpAddress(yIPv6) <= -1 {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// ContainsNetwork returns true if the network from IPv6Addr is contained within
|
||||
// the receiver.
|
||||
func (x IPv6Addr) ContainsNetwork(y IPv6Addr) bool {
|
||||
{
|
||||
xIPv6 := x.FirstUsable().(IPv6Addr)
|
||||
yIPv6 := y.FirstUsable().(IPv6Addr)
|
||||
if ret := xIPv6.CmpAddress(yIPv6); ret >= 1 {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
xIPv6 := x.LastUsable().(IPv6Addr)
|
||||
yIPv6 := y.LastUsable().(IPv6Addr)
|
||||
if ret := xIPv6.CmpAddress(yIPv6); ret <= -1 {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// DialPacketArgs returns the arguments required to be passed to
|
||||
// net.DialUDP(). If the Mask of ipv6 is not a /128 or the Port is 0,
|
||||
// DialPacketArgs() will fail. See Host() to create an IPv6Addr with its
|
||||
// mask set to /128.
|
||||
func (ipv6 IPv6Addr) DialPacketArgs() (network, dialArgs string) {
|
||||
ipv6Mask := big.Int(*ipv6.Mask)
|
||||
if ipv6Mask.Cmp(ipv6HostMask) != 0 || ipv6.Port == 0 {
|
||||
return "udp6", ""
|
||||
}
|
||||
return "udp6", fmt.Sprintf("[%s]:%d", ipv6.NetIP().String(), ipv6.Port)
|
||||
}
|
||||
|
||||
// DialStreamArgs returns the arguments required to be passed to
|
||||
// net.DialTCP(). If the Mask of ipv6 is not a /128 or the Port is 0,
|
||||
// DialStreamArgs() will fail. See Host() to create an IPv6Addr with its
|
||||
// mask set to /128.
|
||||
func (ipv6 IPv6Addr) DialStreamArgs() (network, dialArgs string) {
|
||||
ipv6Mask := big.Int(*ipv6.Mask)
|
||||
if ipv6Mask.Cmp(ipv6HostMask) != 0 || ipv6.Port == 0 {
|
||||
return "tcp6", ""
|
||||
}
|
||||
return "tcp6", fmt.Sprintf("[%s]:%d", ipv6.NetIP().String(), ipv6.Port)
|
||||
}
|
||||
|
||||
// Equal returns true if a SockAddr is equal to the receiving IPv4Addr.
|
||||
func (ipv6a IPv6Addr) Equal(sa SockAddr) bool {
|
||||
ipv6b, ok := sa.(IPv6Addr)
|
||||
if !ok {
|
||||
return false
|
||||
}
|
||||
|
||||
if ipv6a.NetIP().String() != ipv6b.NetIP().String() {
|
||||
return false
|
||||
}
|
||||
|
||||
if ipv6a.NetIPNet().String() != ipv6b.NetIPNet().String() {
|
||||
return false
|
||||
}
|
||||
|
||||
if ipv6a.Port != ipv6b.Port {
|
||||
return false
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
// FirstUsable returns an IPv6Addr set to the first address following the
|
||||
// network prefix. The first usable address in a network is normally the
|
||||
// gateway and should not be used except by devices forwarding packets
|
||||
// between two administratively distinct networks (i.e. a router). This
|
||||
// function does not discriminate against first usable vs "first address that
|
||||
// should be used." For example, FirstUsable() on "2001:0db8::0003/64" would
|
||||
// return "2001:0db8::00011".
|
||||
func (ipv6 IPv6Addr) FirstUsable() IPAddr {
|
||||
return IPv6Addr{
|
||||
Address: IPv6Address(ipv6.NetworkAddress()),
|
||||
Mask: ipv6HostMask,
|
||||
}
|
||||
}
|
||||
|
||||
// Host returns a copy of ipv6 with its mask set to /128 so that it can be
|
||||
// used by DialPacketArgs(), DialStreamArgs(), ListenPacketArgs(), or
|
||||
// ListenStreamArgs().
|
||||
func (ipv6 IPv6Addr) Host() IPAddr {
|
||||
// Nothing should listen on a broadcast address.
|
||||
return IPv6Addr{
|
||||
Address: ipv6.Address,
|
||||
Mask: ipv6HostMask,
|
||||
Port: ipv6.Port,
|
||||
}
|
||||
}
|
||||
|
||||
// IPPort returns the Port number attached to the IPv6Addr
|
||||
func (ipv6 IPv6Addr) IPPort() IPPort {
|
||||
return ipv6.Port
|
||||
}
|
||||
|
||||
// LastUsable returns the last address in a given network.
|
||||
func (ipv6 IPv6Addr) LastUsable() IPAddr {
|
||||
addr := new(big.Int)
|
||||
addr.Set(ipv6.Address)
|
||||
|
||||
mask := new(big.Int)
|
||||
mask.Set(ipv6.Mask)
|
||||
|
||||
negMask := new(big.Int)
|
||||
negMask.Xor(ipv6HostMask, mask)
|
||||
|
||||
lastAddr := new(big.Int)
|
||||
lastAddr.And(addr, mask)
|
||||
lastAddr.Or(lastAddr, negMask)
|
||||
|
||||
return IPv6Addr{
|
||||
Address: IPv6Address(lastAddr),
|
||||
Mask: ipv6HostMask,
|
||||
}
|
||||
}
|
||||
|
||||
// ListenPacketArgs returns the arguments required to be passed to
|
||||
// net.ListenUDP(). If the Mask of ipv6 is not a /128, ListenPacketArgs()
|
||||
// will fail. See Host() to create an IPv6Addr with its mask set to /128.
|
||||
func (ipv6 IPv6Addr) ListenPacketArgs() (network, listenArgs string) {
|
||||
ipv6Mask := big.Int(*ipv6.Mask)
|
||||
if ipv6Mask.Cmp(ipv6HostMask) != 0 {
|
||||
return "udp6", ""
|
||||
}
|
||||
return "udp6", fmt.Sprintf("[%s]:%d", ipv6.NetIP().String(), ipv6.Port)
|
||||
}
|
||||
|
||||
// ListenStreamArgs returns the arguments required to be passed to
|
||||
// net.ListenTCP(). If the Mask of ipv6 is not a /128, ListenStreamArgs()
|
||||
// will fail. See Host() to create an IPv6Addr with its mask set to /128.
|
||||
func (ipv6 IPv6Addr) ListenStreamArgs() (network, listenArgs string) {
|
||||
ipv6Mask := big.Int(*ipv6.Mask)
|
||||
if ipv6Mask.Cmp(ipv6HostMask) != 0 {
|
||||
return "tcp6", ""
|
||||
}
|
||||
return "tcp6", fmt.Sprintf("[%s]:%d", ipv6.NetIP().String(), ipv6.Port)
|
||||
}
|
||||
|
||||
// Maskbits returns the number of network mask bits in a given IPv6Addr. For
|
||||
// example, the Maskbits() of "2001:0db8::0003/64" would return 64.
|
||||
func (ipv6 IPv6Addr) Maskbits() int {
|
||||
maskOnes, _ := ipv6.NetIPNet().Mask.Size()
|
||||
|
||||
return maskOnes
|
||||
}
|
||||
|
||||
// MustIPv6Addr is a helper method that must return an IPv6Addr or panic on
|
||||
// invalid input.
|
||||
func MustIPv6Addr(addr string) IPv6Addr {
|
||||
ipv6, err := NewIPv6Addr(addr)
|
||||
if err != nil {
|
||||
panic(fmt.Sprintf("Unable to create an IPv6Addr from %+q: %v", addr, err))
|
||||
}
|
||||
return ipv6
|
||||
}
|
||||
|
||||
// NetIP returns the address as a net.IP.
|
||||
func (ipv6 IPv6Addr) NetIP() *net.IP {
|
||||
return bigIntToNetIPv6(ipv6.Address)
|
||||
}
|
||||
|
||||
// NetIPMask create a new net.IPMask from the IPv6Addr.
|
||||
func (ipv6 IPv6Addr) NetIPMask() *net.IPMask {
|
||||
ipv6Mask := make(net.IPMask, IPv6len)
|
||||
m := big.Int(*ipv6.Mask)
|
||||
copy(ipv6Mask, m.Bytes())
|
||||
return &ipv6Mask
|
||||
}
|
||||
|
||||
// Network returns a pointer to the net.IPNet within IPv4Addr receiver.
|
||||
func (ipv6 IPv6Addr) NetIPNet() *net.IPNet {
|
||||
ipv6net := &net.IPNet{}
|
||||
ipv6net.IP = make(net.IP, IPv6len)
|
||||
copy(ipv6net.IP, *ipv6.NetIP())
|
||||
ipv6net.Mask = *ipv6.NetIPMask()
|
||||
return ipv6net
|
||||
}
|
||||
|
||||
// Network returns the network prefix or network address for a given network.
|
||||
func (ipv6 IPv6Addr) Network() IPAddr {
|
||||
return IPv6Addr{
|
||||
Address: IPv6Address(ipv6.NetworkAddress()),
|
||||
Mask: ipv6.Mask,
|
||||
}
|
||||
}
|
||||
|
||||
// NetworkAddress returns an IPv6Network of the IPv6Addr's network address.
|
||||
func (ipv6 IPv6Addr) NetworkAddress() IPv6Network {
|
||||
addr := new(big.Int)
|
||||
addr.SetBytes((*ipv6.Address).Bytes())
|
||||
|
||||
mask := new(big.Int)
|
||||
mask.SetBytes(*ipv6.NetIPMask())
|
||||
|
||||
netAddr := new(big.Int)
|
||||
netAddr.And(addr, mask)
|
||||
|
||||
return IPv6Network(netAddr)
|
||||
}
|
||||
|
||||
// Octets returns a slice of the 16 octets in an IPv6Addr's Address. The
|
||||
// order of the bytes is big endian.
|
||||
func (ipv6 IPv6Addr) Octets() []int {
|
||||
x := make([]int, IPv6len)
|
||||
for i, b := range *bigIntToNetIPv6(ipv6.Address) {
|
||||
x[i] = int(b)
|
||||
}
|
||||
|
||||
return x
|
||||
}
|
||||
|
||||
// String returns a string representation of the IPv6Addr
|
||||
func (ipv6 IPv6Addr) String() string {
|
||||
if ipv6.Port != 0 {
|
||||
return fmt.Sprintf("[%s]:%d", ipv6.NetIP().String(), ipv6.Port)
|
||||
}
|
||||
|
||||
if ipv6.Maskbits() == 128 {
|
||||
return ipv6.NetIP().String()
|
||||
}
|
||||
|
||||
return fmt.Sprintf("%s/%d", ipv6.NetIP().String(), ipv6.Maskbits())
|
||||
}
|
||||
|
||||
// Type is used as a type switch and returns TypeIPv6
|
||||
func (IPv6Addr) Type() SockAddrType {
|
||||
return TypeIPv6
|
||||
}
|
||||
|
||||
// IPv6Attrs returns a list of attributes supported by the IPv6Addr type
|
||||
func IPv6Attrs() []AttrName {
|
||||
return ipv6AddrAttrs
|
||||
}
|
||||
|
||||
// IPv6AddrAttr returns a string representation of an attribute for the given
|
||||
// IPv6Addr.
|
||||
func IPv6AddrAttr(ipv6 IPv6Addr, selector AttrName) string {
|
||||
fn, found := ipv6AddrAttrMap[selector]
|
||||
if !found {
|
||||
return ""
|
||||
}
|
||||
|
||||
return fn(ipv6)
|
||||
}
|
||||
|
||||
// ipv6AddrInit is called once at init()
|
||||
func ipv6AddrInit() {
|
||||
// Sorted for human readability
|
||||
ipv6AddrAttrs = []AttrName{
|
||||
"size", // Same position as in IPv6 for output consistency
|
||||
"uint128",
|
||||
}
|
||||
|
||||
ipv6AddrAttrMap = map[AttrName]func(ipv6 IPv6Addr) string{
|
||||
"size": func(ipv6 IPv6Addr) string {
|
||||
netSize := big.NewInt(1)
|
||||
netSize = netSize.Lsh(netSize, uint(IPv6len*8-ipv6.Maskbits()))
|
||||
return netSize.Text(10)
|
||||
},
|
||||
"uint128": func(ipv6 IPv6Addr) string {
|
||||
b := big.Int(*ipv6.Address)
|
||||
return b.Text(10)
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// bigIntToNetIPv6 is a helper function that correctly returns a net.IP with the
|
||||
// correctly padded values.
|
||||
func bigIntToNetIPv6(bi *big.Int) *net.IP {
|
||||
x := make(net.IP, IPv6len)
|
||||
ipv6Bytes := bi.Bytes()
|
||||
|
||||
// It's possibe for ipv6Bytes to be less than IPv6len bytes in size. If
|
||||
// they are different sizes we to pad the size of response.
|
||||
if len(ipv6Bytes) < IPv6len {
|
||||
buf := new(bytes.Buffer)
|
||||
buf.Grow(IPv6len)
|
||||
|
||||
for i := len(ipv6Bytes); i < IPv6len; i++ {
|
||||
if err := binary.Write(buf, binary.BigEndian, byte(0)); err != nil {
|
||||
panic(fmt.Sprintf("Unable to pad byte %d of input %v: %v", i, bi, err))
|
||||
}
|
||||
}
|
||||
|
||||
for _, b := range ipv6Bytes {
|
||||
if err := binary.Write(buf, binary.BigEndian, b); err != nil {
|
||||
panic(fmt.Sprintf("Unable to preserve endianness of input %v: %v", bi, err))
|
||||
}
|
||||
}
|
||||
|
||||
ipv6Bytes = buf.Bytes()
|
||||
}
|
||||
i := copy(x, ipv6Bytes)
|
||||
if i != IPv6len {
|
||||
panic("IPv6 wrong size")
|
||||
}
|
||||
return &x
|
||||
}
|
|
@ -0,0 +1,947 @@
|
|||
package sockaddr
|
||||
|
||||
// ForwardingBlacklist is a faux RFC that includes a list of non-forwardable IP
|
||||
// blocks.
|
||||
const ForwardingBlacklist = 4294967295
|
||||
|
||||
// IsRFC tests to see if an SockAddr matches the specified RFC
|
||||
func IsRFC(rfcNum uint, sa SockAddr) bool {
|
||||
rfcNetMap := KnownRFCs()
|
||||
rfcNets, ok := rfcNetMap[rfcNum]
|
||||
if !ok {
|
||||
return false
|
||||
}
|
||||
|
||||
var contained bool
|
||||
for _, rfcNet := range rfcNets {
|
||||
if rfcNet.Contains(sa) {
|
||||
contained = true
|
||||
break
|
||||
}
|
||||
}
|
||||
return contained
|
||||
}
|
||||
|
||||
// KnownRFCs returns an initial set of known RFCs.
|
||||
//
|
||||
// NOTE (sean@): As this list evolves over time, please submit patches to keep
|
||||
// this list current. If something isn't right, inquire, as it may just be a
|
||||
// bug on my part. Some of the inclusions were based on my judgement as to what
|
||||
// would be a useful value (e.g. RFC3330).
|
||||
//
|
||||
// Useful resources:
|
||||
//
|
||||
// * https://www.iana.org/assignments/ipv6-address-space/ipv6-address-space.xhtml
|
||||
// * https://www.iana.org/assignments/ipv6-unicast-address-assignments/ipv6-unicast-address-assignments.xhtml
|
||||
// * https://www.iana.org/assignments/ipv6-address-space/ipv6-address-space.xhtml
|
||||
func KnownRFCs() map[uint]SockAddrs {
|
||||
// NOTE(sean@): Multiple SockAddrs per RFC lend themselves well to a
|
||||
// RADIX tree, but `ENOTIME`. Patches welcome.
|
||||
return map[uint]SockAddrs{
|
||||
919: {
|
||||
// [RFC919] Broadcasting Internet Datagrams
|
||||
MustIPv4Addr("255.255.255.255/32"), // [RFC1122], §7 Broadcast IP Addressing - Proposed Standards
|
||||
},
|
||||
1122: {
|
||||
// [RFC1122] Requirements for Internet Hosts -- Communication Layers
|
||||
MustIPv4Addr("0.0.0.0/8"), // [RFC1122], §3.2.1.3
|
||||
MustIPv4Addr("127.0.0.0/8"), // [RFC1122], §3.2.1.3
|
||||
},
|
||||
1112: {
|
||||
// [RFC1112] Host Extensions for IP Multicasting
|
||||
MustIPv4Addr("224.0.0.0/4"), // [RFC1112], §4 Host Group Addresses
|
||||
},
|
||||
1918: {
|
||||
// [RFC1918] Address Allocation for Private Internets
|
||||
MustIPv4Addr("10.0.0.0/8"),
|
||||
MustIPv4Addr("172.16.0.0/12"),
|
||||
MustIPv4Addr("192.168.0.0/16"),
|
||||
},
|
||||
2544: {
|
||||
// [RFC2544] Benchmarking Methodology for Network
|
||||
// Interconnect Devices
|
||||
MustIPv4Addr("198.18.0.0/15"),
|
||||
},
|
||||
2765: {
|
||||
// [RFC2765] Stateless IP/ICMP Translation Algorithm
|
||||
// (SIIT) (obsoleted by RFCs 6145, which itself was
|
||||
// later obsoleted by 7915).
|
||||
|
||||
// [RFC2765], §2.1 Addresses
|
||||
MustIPv6Addr("0:0:0:0:0:ffff:0:0/96"),
|
||||
},
|
||||
2928: {
|
||||
// [RFC2928] Initial IPv6 Sub-TLA ID Assignments
|
||||
MustIPv6Addr("2001::/16"), // Superblock
|
||||
//MustIPv6Addr("2001:0000::/23"), // IANA
|
||||
//MustIPv6Addr("2001:0200::/23"), // APNIC
|
||||
//MustIPv6Addr("2001:0400::/23"), // ARIN
|
||||
//MustIPv6Addr("2001:0600::/23"), // RIPE NCC
|
||||
//MustIPv6Addr("2001:0800::/23"), // (future assignment)
|
||||
// ...
|
||||
//MustIPv6Addr("2001:FE00::/23"), // (future assignment)
|
||||
},
|
||||
3056: { // 6to4 address
|
||||
// [RFC3056] Connection of IPv6 Domains via IPv4 Clouds
|
||||
|
||||
// [RFC3056], §2 IPv6 Prefix Allocation
|
||||
MustIPv6Addr("2002::/16"),
|
||||
},
|
||||
3068: {
|
||||
// [RFC3068] An Anycast Prefix for 6to4 Relay Routers
|
||||
// (obsolete by RFC7526)
|
||||
|
||||
// [RFC3068], § 6to4 Relay anycast address
|
||||
MustIPv4Addr("192.88.99.0/24"),
|
||||
|
||||
// [RFC3068], §2.5 6to4 IPv6 relay anycast address
|
||||
//
|
||||
// NOTE: /120 == 128-(32-24)
|
||||
MustIPv6Addr("2002:c058:6301::/120"),
|
||||
},
|
||||
3171: {
|
||||
// [RFC3171] IANA Guidelines for IPv4 Multicast Address Assignments
|
||||
MustIPv4Addr("224.0.0.0/4"),
|
||||
},
|
||||
3330: {
|
||||
// [RFC3330] Special-Use IPv4 Addresses
|
||||
|
||||
// Addresses in this block refer to source hosts on
|
||||
// "this" network. Address 0.0.0.0/32 may be used as a
|
||||
// source address for this host on this network; other
|
||||
// addresses within 0.0.0.0/8 may be used to refer to
|
||||
// specified hosts on this network [RFC1700, page 4].
|
||||
MustIPv4Addr("0.0.0.0/8"),
|
||||
|
||||
// 10.0.0.0/8 - This block is set aside for use in
|
||||
// private networks. Its intended use is documented in
|
||||
// [RFC1918]. Addresses within this block should not
|
||||
// appear on the public Internet.
|
||||
MustIPv4Addr("10.0.0.0/8"),
|
||||
|
||||
// 14.0.0.0/8 - This block is set aside for assignments
|
||||
// to the international system of Public Data Networks
|
||||
// [RFC1700, page 181]. The registry of assignments
|
||||
// within this block can be accessed from the "Public
|
||||
// Data Network Numbers" link on the web page at
|
||||
// http://www.iana.org/numbers.html. Addresses within
|
||||
// this block are assigned to users and should be
|
||||
// treated as such.
|
||||
|
||||
// 24.0.0.0/8 - This block was allocated in early 1996
|
||||
// for use in provisioning IP service over cable
|
||||
// television systems. Although the IANA initially was
|
||||
// involved in making assignments to cable operators,
|
||||
// this responsibility was transferred to American
|
||||
// Registry for Internet Numbers (ARIN) in May 2001.
|
||||
// Addresses within this block are assigned in the
|
||||
// normal manner and should be treated as such.
|
||||
|
||||
// 39.0.0.0/8 - This block was used in the "Class A
|
||||
// Subnet Experiment" that commenced in May 1995, as
|
||||
// documented in [RFC1797]. The experiment has been
|
||||
// completed and this block has been returned to the
|
||||
// pool of addresses reserved for future allocation or
|
||||
// assignment. This block therefore no longer has a
|
||||
// special use and is subject to allocation to a
|
||||
// Regional Internet Registry for assignment in the
|
||||
// normal manner.
|
||||
|
||||
// 127.0.0.0/8 - This block is assigned for use as the Internet host
|
||||
// loopback address. A datagram sent by a higher level protocol to an
|
||||
// address anywhere within this block should loop back inside the host.
|
||||
// This is ordinarily implemented using only 127.0.0.1/32 for loopback,
|
||||
// but no addresses within this block should ever appear on any network
|
||||
// anywhere [RFC1700, page 5].
|
||||
MustIPv4Addr("127.0.0.0/8"),
|
||||
|
||||
// 128.0.0.0/16 - This block, corresponding to the
|
||||
// numerically lowest of the former Class B addresses,
|
||||
// was initially and is still reserved by the IANA.
|
||||
// Given the present classless nature of the IP address
|
||||
// space, the basis for the reservation no longer
|
||||
// applies and addresses in this block are subject to
|
||||
// future allocation to a Regional Internet Registry for
|
||||
// assignment in the normal manner.
|
||||
|
||||
// 169.254.0.0/16 - This is the "link local" block. It
|
||||
// is allocated for communication between hosts on a
|
||||
// single link. Hosts obtain these addresses by
|
||||
// auto-configuration, such as when a DHCP server may
|
||||
// not be found.
|
||||
MustIPv4Addr("169.254.0.0/16"),
|
||||
|
||||
// 172.16.0.0/12 - This block is set aside for use in
|
||||
// private networks. Its intended use is documented in
|
||||
// [RFC1918]. Addresses within this block should not
|
||||
// appear on the public Internet.
|
||||
MustIPv4Addr("172.16.0.0/12"),
|
||||
|
||||
// 191.255.0.0/16 - This block, corresponding to the numerically highest
|
||||
// to the former Class B addresses, was initially and is still reserved
|
||||
// by the IANA. Given the present classless nature of the IP address
|
||||
// space, the basis for the reservation no longer applies and addresses
|
||||
// in this block are subject to future allocation to a Regional Internet
|
||||
// Registry for assignment in the normal manner.
|
||||
|
||||
// 192.0.0.0/24 - This block, corresponding to the
|
||||
// numerically lowest of the former Class C addresses,
|
||||
// was initially and is still reserved by the IANA.
|
||||
// Given the present classless nature of the IP address
|
||||
// space, the basis for the reservation no longer
|
||||
// applies and addresses in this block are subject to
|
||||
// future allocation to a Regional Internet Registry for
|
||||
// assignment in the normal manner.
|
||||
|
||||
// 192.0.2.0/24 - This block is assigned as "TEST-NET" for use in
|
||||
// documentation and example code. It is often used in conjunction with
|
||||
// domain names example.com or example.net in vendor and protocol
|
||||
// documentation. Addresses within this block should not appear on the
|
||||
// public Internet.
|
||||
MustIPv4Addr("192.0.2.0/24"),
|
||||
|
||||
// 192.88.99.0/24 - This block is allocated for use as 6to4 relay
|
||||
// anycast addresses, according to [RFC3068].
|
||||
MustIPv4Addr("192.88.99.0/24"),
|
||||
|
||||
// 192.168.0.0/16 - This block is set aside for use in private networks.
|
||||
// Its intended use is documented in [RFC1918]. Addresses within this
|
||||
// block should not appear on the public Internet.
|
||||
MustIPv4Addr("192.168.0.0/16"),
|
||||
|
||||
// 198.18.0.0/15 - This block has been allocated for use
|
||||
// in benchmark tests of network interconnect devices.
|
||||
// Its use is documented in [RFC2544].
|
||||
MustIPv4Addr("198.18.0.0/15"),
|
||||
|
||||
// 223.255.255.0/24 - This block, corresponding to the
|
||||
// numerically highest of the former Class C addresses,
|
||||
// was initially and is still reserved by the IANA.
|
||||
// Given the present classless nature of the IP address
|
||||
// space, the basis for the reservation no longer
|
||||
// applies and addresses in this block are subject to
|
||||
// future allocation to a Regional Internet Registry for
|
||||
// assignment in the normal manner.
|
||||
|
||||
// 224.0.0.0/4 - This block, formerly known as the Class
|
||||
// D address space, is allocated for use in IPv4
|
||||
// multicast address assignments. The IANA guidelines
|
||||
// for assignments from this space are described in
|
||||
// [RFC3171].
|
||||
MustIPv4Addr("224.0.0.0/4"),
|
||||
|
||||
// 240.0.0.0/4 - This block, formerly known as the Class E address
|
||||
// space, is reserved. The "limited broadcast" destination address
|
||||
// 255.255.255.255 should never be forwarded outside the (sub-)net of
|
||||
// the source. The remainder of this space is reserved
|
||||
// for future use. [RFC1700, page 4]
|
||||
MustIPv4Addr("240.0.0.0/4"),
|
||||
},
|
||||
3849: {
|
||||
// [RFC3849] IPv6 Address Prefix Reserved for Documentation
|
||||
MustIPv6Addr("2001:db8::/32"), // [RFC3849], §4 IANA Considerations
|
||||
},
|
||||
3927: {
|
||||
// [RFC3927] Dynamic Configuration of IPv4 Link-Local Addresses
|
||||
MustIPv4Addr("169.254.0.0/16"), // [RFC3927], §2.1 Link-Local Address Selection
|
||||
},
|
||||
4038: {
|
||||
// [RFC4038] Application Aspects of IPv6 Transition
|
||||
|
||||
// [RFC4038], §4.2. IPv6 Applications in a Dual-Stack Node
|
||||
MustIPv6Addr("0:0:0:0:0:ffff::/96"),
|
||||
},
|
||||
4193: {
|
||||
// [RFC4193] Unique Local IPv6 Unicast Addresses
|
||||
MustIPv6Addr("fc00::/7"),
|
||||
},
|
||||
4291: {
|
||||
// [RFC4291] IP Version 6 Addressing Architecture
|
||||
|
||||
// [RFC4291], §2.5.2 The Unspecified Address
|
||||
MustIPv6Addr("::/128"),
|
||||
|
||||
// [RFC4291], §2.5.3 The Loopback Address
|
||||
MustIPv6Addr("::1/128"),
|
||||
|
||||
// [RFC4291], §2.5.5.1. IPv4-Compatible IPv6 Address
|
||||
MustIPv6Addr("::/96"),
|
||||
|
||||
// [RFC4291], §2.5.5.2. IPv4-Mapped IPv6 Address
|
||||
MustIPv6Addr("::ffff:0:0/96"),
|
||||
|
||||
// [RFC4291], §2.5.6 Link-Local IPv6 Unicast Addresses
|
||||
MustIPv6Addr("fe80::/10"),
|
||||
|
||||
// [RFC4291], §2.5.7 Site-Local IPv6 Unicast Addresses
|
||||
// (depreciated)
|
||||
MustIPv6Addr("fec0::/10"),
|
||||
|
||||
// [RFC4291], §2.7 Multicast Addresses
|
||||
MustIPv6Addr("ff00::/8"),
|
||||
|
||||
// IPv6 Multicast Information.
|
||||
//
|
||||
// In the following "table" below, `ff0x` is replaced
|
||||
// with the following values depending on the scope of
|
||||
// the query:
|
||||
//
|
||||
// IPv6 Multicast Scopes:
|
||||
// * ff00/9 // reserved
|
||||
// * ff01/9 // interface-local
|
||||
// * ff02/9 // link-local
|
||||
// * ff03/9 // realm-local
|
||||
// * ff04/9 // admin-local
|
||||
// * ff05/9 // site-local
|
||||
// * ff08/9 // organization-local
|
||||
// * ff0e/9 // global
|
||||
// * ff0f/9 // reserved
|
||||
//
|
||||
// IPv6 Multicast Addresses:
|
||||
// * ff0x::2 // All routers
|
||||
// * ff02::5 // OSPFIGP
|
||||
// * ff02::6 // OSPFIGP Designated Routers
|
||||
// * ff02::9 // RIP Routers
|
||||
// * ff02::a // EIGRP Routers
|
||||
// * ff02::d // All PIM Routers
|
||||
// * ff02::1a // All RPL Routers
|
||||
// * ff0x::fb // mDNSv6
|
||||
// * ff0x::101 // All Network Time Protocol (NTP) servers
|
||||
// * ff02::1:1 // Link Name
|
||||
// * ff02::1:2 // All-dhcp-agents
|
||||
// * ff02::1:3 // Link-local Multicast Name Resolution
|
||||
// * ff05::1:3 // All-dhcp-servers
|
||||
// * ff02::1:ff00:0/104 // Solicited-node multicast address.
|
||||
// * ff02::2:ff00:0/104 // Node Information Queries
|
||||
},
|
||||
4380: {
|
||||
// [RFC4380] Teredo: Tunneling IPv6 over UDP through
|
||||
// Network Address Translations (NATs)
|
||||
|
||||
// [RFC4380], §2.6 Global Teredo IPv6 Service Prefix
|
||||
MustIPv6Addr("2001:0000::/32"),
|
||||
},
|
||||
4773: {
|
||||
// [RFC4773] Administration of the IANA Special Purpose IPv6 Address Block
|
||||
MustIPv6Addr("2001:0000::/23"), // IANA
|
||||
},
|
||||
4843: {
|
||||
// [RFC4843] An IPv6 Prefix for Overlay Routable Cryptographic Hash Identifiers (ORCHID)
|
||||
MustIPv6Addr("2001:10::/28"), // [RFC4843], §7 IANA Considerations
|
||||
},
|
||||
5180: {
|
||||
// [RFC5180] IPv6 Benchmarking Methodology for Network Interconnect Devices
|
||||
MustIPv6Addr("2001:0200::/48"), // [RFC5180], §8 IANA Considerations
|
||||
},
|
||||
5735: {
|
||||
// [RFC5735] Special Use IPv4 Addresses
|
||||
MustIPv4Addr("192.0.2.0/24"), // TEST-NET-1
|
||||
MustIPv4Addr("198.51.100.0/24"), // TEST-NET-2
|
||||
MustIPv4Addr("203.0.113.0/24"), // TEST-NET-3
|
||||
MustIPv4Addr("198.18.0.0/15"), // Benchmarks
|
||||
},
|
||||
5737: {
|
||||
// [RFC5737] IPv4 Address Blocks Reserved for Documentation
|
||||
MustIPv4Addr("192.0.2.0/24"), // TEST-NET-1
|
||||
MustIPv4Addr("198.51.100.0/24"), // TEST-NET-2
|
||||
MustIPv4Addr("203.0.113.0/24"), // TEST-NET-3
|
||||
},
|
||||
6052: {
|
||||
// [RFC6052] IPv6 Addressing of IPv4/IPv6 Translators
|
||||
MustIPv6Addr("64:ff9b::/96"), // [RFC6052], §2.1. Well-Known Prefix
|
||||
},
|
||||
6333: {
|
||||
// [RFC6333] Dual-Stack Lite Broadband Deployments Following IPv4 Exhaustion
|
||||
MustIPv4Addr("192.0.0.0/29"), // [RFC6333], §5.7 Well-Known IPv4 Address
|
||||
},
|
||||
6598: {
|
||||
// [RFC6598] IANA-Reserved IPv4 Prefix for Shared Address Space
|
||||
MustIPv4Addr("100.64.0.0/10"),
|
||||
},
|
||||
6666: {
|
||||
// [RFC6666] A Discard Prefix for IPv6
|
||||
MustIPv6Addr("0100::/64"),
|
||||
},
|
||||
6890: {
|
||||
// [RFC6890] Special-Purpose IP Address Registries
|
||||
|
||||
// From "RFC6890 §2.2.1 Information Requirements":
|
||||
/*
|
||||
The IPv4 and IPv6 Special-Purpose Address Registries maintain the
|
||||
following information regarding each entry:
|
||||
|
||||
o Address Block - A block of IPv4 or IPv6 addresses that has been
|
||||
registered for a special purpose.
|
||||
|
||||
o Name - A descriptive name for the special-purpose address block.
|
||||
|
||||
o RFC - The RFC through which the special-purpose address block was
|
||||
requested.
|
||||
|
||||
o Allocation Date - The date upon which the special-purpose address
|
||||
block was allocated.
|
||||
|
||||
o Termination Date - The date upon which the allocation is to be
|
||||
terminated. This field is applicable for limited-use allocations
|
||||
only.
|
||||
|
||||
o Source - A boolean value indicating whether an address from the
|
||||
allocated special-purpose address block is valid when used as the
|
||||
source address of an IP datagram that transits two devices.
|
||||
|
||||
o Destination - A boolean value indicating whether an address from
|
||||
the allocated special-purpose address block is valid when used as
|
||||
the destination address of an IP datagram that transits two
|
||||
devices.
|
||||
|
||||
o Forwardable - A boolean value indicating whether a router may
|
||||
forward an IP datagram whose destination address is drawn from the
|
||||
allocated special-purpose address block between external
|
||||
interfaces.
|
||||
|
||||
o Global - A boolean value indicating whether an IP datagram whose
|
||||
destination address is drawn from the allocated special-purpose
|
||||
address block is forwardable beyond a specified administrative
|
||||
domain.
|
||||
|
||||
o Reserved-by-Protocol - A boolean value indicating whether the
|
||||
special-purpose address block is reserved by IP, itself. This
|
||||
value is "TRUE" if the RFC that created the special-purpose
|
||||
address block requires all compliant IP implementations to behave
|
||||
in a special way when processing packets either to or from
|
||||
addresses contained by the address block.
|
||||
|
||||
If the value of "Destination" is FALSE, the values of "Forwardable"
|
||||
and "Global" must also be false.
|
||||
*/
|
||||
|
||||
/*+----------------------+----------------------------+
|
||||
* | Attribute | Value |
|
||||
* +----------------------+----------------------------+
|
||||
* | Address Block | 0.0.0.0/8 |
|
||||
* | Name | "This host on this network"|
|
||||
* | RFC | [RFC1122], Section 3.2.1.3 |
|
||||
* | Allocation Date | September 1981 |
|
||||
* | Termination Date | N/A |
|
||||
* | Source | True |
|
||||
* | Destination | False |
|
||||
* | Forwardable | False |
|
||||
* | Global | False |
|
||||
* | Reserved-by-Protocol | True |
|
||||
* +----------------------+----------------------------+*/
|
||||
MustIPv4Addr("0.0.0.0/8"),
|
||||
|
||||
/*+----------------------+---------------+
|
||||
* | Attribute | Value |
|
||||
* +----------------------+---------------+
|
||||
* | Address Block | 10.0.0.0/8 |
|
||||
* | Name | Private-Use |
|
||||
* | RFC | [RFC1918] |
|
||||
* | Allocation Date | February 1996 |
|
||||
* | Termination Date | N/A |
|
||||
* | Source | True |
|
||||
* | Destination | True |
|
||||
* | Forwardable | True |
|
||||
* | Global | False |
|
||||
* | Reserved-by-Protocol | False |
|
||||
* +----------------------+---------------+ */
|
||||
MustIPv4Addr("10.0.0.0/8"),
|
||||
|
||||
/*+----------------------+----------------------+
|
||||
| Attribute | Value |
|
||||
+----------------------+----------------------+
|
||||
| Address Block | 100.64.0.0/10 |
|
||||
| Name | Shared Address Space |
|
||||
| RFC | [RFC6598] |
|
||||
| Allocation Date | April 2012 |
|
||||
| Termination Date | N/A |
|
||||
| Source | True |
|
||||
| Destination | True |
|
||||
| Forwardable | True |
|
||||
| Global | False |
|
||||
| Reserved-by-Protocol | False |
|
||||
+----------------------+----------------------+*/
|
||||
MustIPv4Addr("100.64.0.0/10"),
|
||||
|
||||
/*+----------------------+----------------------------+
|
||||
| Attribute | Value |
|
||||
+----------------------+----------------------------+
|
||||
| Address Block | 127.0.0.0/8 |
|
||||
| Name | Loopback |
|
||||
| RFC | [RFC1122], Section 3.2.1.3 |
|
||||
| Allocation Date | September 1981 |
|
||||
| Termination Date | N/A |
|
||||
| Source | False [1] |
|
||||
| Destination | False [1] |
|
||||
| Forwardable | False [1] |
|
||||
| Global | False [1] |
|
||||
| Reserved-by-Protocol | True |
|
||||
+----------------------+----------------------------+*/
|
||||
// [1] Several protocols have been granted exceptions to
|
||||
// this rule. For examples, see [RFC4379] and
|
||||
// [RFC5884].
|
||||
MustIPv4Addr("127.0.0.0/8"),
|
||||
|
||||
/*+----------------------+----------------+
|
||||
| Attribute | Value |
|
||||
+----------------------+----------------+
|
||||
| Address Block | 169.254.0.0/16 |
|
||||
| Name | Link Local |
|
||||
| RFC | [RFC3927] |
|
||||
| Allocation Date | May 2005 |
|
||||
| Termination Date | N/A |
|
||||
| Source | True |
|
||||
| Destination | True |
|
||||
| Forwardable | False |
|
||||
| Global | False |
|
||||
| Reserved-by-Protocol | True |
|
||||
+----------------------+----------------+*/
|
||||
MustIPv4Addr("169.254.0.0/16"),
|
||||
|
||||
/*+----------------------+---------------+
|
||||
| Attribute | Value |
|
||||
+----------------------+---------------+
|
||||
| Address Block | 172.16.0.0/12 |
|
||||
| Name | Private-Use |
|
||||
| RFC | [RFC1918] |
|
||||
| Allocation Date | February 1996 |
|
||||
| Termination Date | N/A |
|
||||
| Source | True |
|
||||
| Destination | True |
|
||||
| Forwardable | True |
|
||||
| Global | False |
|
||||
| Reserved-by-Protocol | False |
|
||||
+----------------------+---------------+*/
|
||||
MustIPv4Addr("172.16.0.0/12"),
|
||||
|
||||
/*+----------------------+---------------------------------+
|
||||
| Attribute | Value |
|
||||
+----------------------+---------------------------------+
|
||||
| Address Block | 192.0.0.0/24 [2] |
|
||||
| Name | IETF Protocol Assignments |
|
||||
| RFC | Section 2.1 of this document |
|
||||
| Allocation Date | January 2010 |
|
||||
| Termination Date | N/A |
|
||||
| Source | False |
|
||||
| Destination | False |
|
||||
| Forwardable | False |
|
||||
| Global | False |
|
||||
| Reserved-by-Protocol | False |
|
||||
+----------------------+---------------------------------+*/
|
||||
// [2] Not usable unless by virtue of a more specific
|
||||
// reservation.
|
||||
MustIPv4Addr("192.0.0.0/24"),
|
||||
|
||||
/*+----------------------+--------------------------------+
|
||||
| Attribute | Value |
|
||||
+----------------------+--------------------------------+
|
||||
| Address Block | 192.0.0.0/29 |
|
||||
| Name | IPv4 Service Continuity Prefix |
|
||||
| RFC | [RFC6333], [RFC7335] |
|
||||
| Allocation Date | June 2011 |
|
||||
| Termination Date | N/A |
|
||||
| Source | True |
|
||||
| Destination | True |
|
||||
| Forwardable | True |
|
||||
| Global | False |
|
||||
| Reserved-by-Protocol | False |
|
||||
+----------------------+--------------------------------+*/
|
||||
MustIPv4Addr("192.0.0.0/29"),
|
||||
|
||||
/*+----------------------+----------------------------+
|
||||
| Attribute | Value |
|
||||
+----------------------+----------------------------+
|
||||
| Address Block | 192.0.2.0/24 |
|
||||
| Name | Documentation (TEST-NET-1) |
|
||||
| RFC | [RFC5737] |
|
||||
| Allocation Date | January 2010 |
|
||||
| Termination Date | N/A |
|
||||
| Source | False |
|
||||
| Destination | False |
|
||||
| Forwardable | False |
|
||||
| Global | False |
|
||||
| Reserved-by-Protocol | False |
|
||||
+----------------------+----------------------------+*/
|
||||
MustIPv4Addr("192.0.2.0/24"),
|
||||
|
||||
/*+----------------------+--------------------+
|
||||
| Attribute | Value |
|
||||
+----------------------+--------------------+
|
||||
| Address Block | 192.88.99.0/24 |
|
||||
| Name | 6to4 Relay Anycast |
|
||||
| RFC | [RFC3068] |
|
||||
| Allocation Date | June 2001 |
|
||||
| Termination Date | N/A |
|
||||
| Source | True |
|
||||
| Destination | True |
|
||||
| Forwardable | True |
|
||||
| Global | True |
|
||||
| Reserved-by-Protocol | False |
|
||||
+----------------------+--------------------+*/
|
||||
MustIPv4Addr("192.88.99.0/24"),
|
||||
|
||||
/*+----------------------+----------------+
|
||||
| Attribute | Value |
|
||||
+----------------------+----------------+
|
||||
| Address Block | 192.168.0.0/16 |
|
||||
| Name | Private-Use |
|
||||
| RFC | [RFC1918] |
|
||||
| Allocation Date | February 1996 |
|
||||
| Termination Date | N/A |
|
||||
| Source | True |
|
||||
| Destination | True |
|
||||
| Forwardable | True |
|
||||
| Global | False |
|
||||
| Reserved-by-Protocol | False |
|
||||
+----------------------+----------------+*/
|
||||
MustIPv4Addr("192.168.0.0/16"),
|
||||
|
||||
/*+----------------------+---------------+
|
||||
| Attribute | Value |
|
||||
+----------------------+---------------+
|
||||
| Address Block | 198.18.0.0/15 |
|
||||
| Name | Benchmarking |
|
||||
| RFC | [RFC2544] |
|
||||
| Allocation Date | March 1999 |
|
||||
| Termination Date | N/A |
|
||||
| Source | True |
|
||||
| Destination | True |
|
||||
| Forwardable | True |
|
||||
| Global | False |
|
||||
| Reserved-by-Protocol | False |
|
||||
+----------------------+---------------+*/
|
||||
MustIPv4Addr("198.18.0.0/15"),
|
||||
|
||||
/*+----------------------+----------------------------+
|
||||
| Attribute | Value |
|
||||
+----------------------+----------------------------+
|
||||
| Address Block | 198.51.100.0/24 |
|
||||
| Name | Documentation (TEST-NET-2) |
|
||||
| RFC | [RFC5737] |
|
||||
| Allocation Date | January 2010 |
|
||||
| Termination Date | N/A |
|
||||
| Source | False |
|
||||
| Destination | False |
|
||||
| Forwardable | False |
|
||||
| Global | False |
|
||||
| Reserved-by-Protocol | False |
|
||||
+----------------------+----------------------------+*/
|
||||
MustIPv4Addr("198.51.100.0/24"),
|
||||
|
||||
/*+----------------------+----------------------------+
|
||||
| Attribute | Value |
|
||||
+----------------------+----------------------------+
|
||||
| Address Block | 203.0.113.0/24 |
|
||||
| Name | Documentation (TEST-NET-3) |
|
||||
| RFC | [RFC5737] |
|
||||
| Allocation Date | January 2010 |
|
||||
| Termination Date | N/A |
|
||||
| Source | False |
|
||||
| Destination | False |
|
||||
| Forwardable | False |
|
||||
| Global | False |
|
||||
| Reserved-by-Protocol | False |
|
||||
+----------------------+----------------------------+*/
|
||||
MustIPv4Addr("203.0.113.0/24"),
|
||||
|
||||
/*+----------------------+----------------------+
|
||||
| Attribute | Value |
|
||||
+----------------------+----------------------+
|
||||
| Address Block | 240.0.0.0/4 |
|
||||
| Name | Reserved |
|
||||
| RFC | [RFC1112], Section 4 |
|
||||
| Allocation Date | August 1989 |
|
||||
| Termination Date | N/A |
|
||||
| Source | False |
|
||||
| Destination | False |
|
||||
| Forwardable | False |
|
||||
| Global | False |
|
||||
| Reserved-by-Protocol | True |
|
||||
+----------------------+----------------------+*/
|
||||
MustIPv4Addr("240.0.0.0/4"),
|
||||
|
||||
/*+----------------------+----------------------+
|
||||
| Attribute | Value |
|
||||
+----------------------+----------------------+
|
||||
| Address Block | 255.255.255.255/32 |
|
||||
| Name | Limited Broadcast |
|
||||
| RFC | [RFC0919], Section 7 |
|
||||
| Allocation Date | October 1984 |
|
||||
| Termination Date | N/A |
|
||||
| Source | False |
|
||||
| Destination | True |
|
||||
| Forwardable | False |
|
||||
| Global | False |
|
||||
| Reserved-by-Protocol | False |
|
||||
+----------------------+----------------------+*/
|
||||
MustIPv4Addr("255.255.255.255/32"),
|
||||
|
||||
/*+----------------------+------------------+
|
||||
| Attribute | Value |
|
||||
+----------------------+------------------+
|
||||
| Address Block | ::1/128 |
|
||||
| Name | Loopback Address |
|
||||
| RFC | [RFC4291] |
|
||||
| Allocation Date | February 2006 |
|
||||
| Termination Date | N/A |
|
||||
| Source | False |
|
||||
| Destination | False |
|
||||
| Forwardable | False |
|
||||
| Global | False |
|
||||
| Reserved-by-Protocol | True |
|
||||
+----------------------+------------------+*/
|
||||
MustIPv6Addr("::1/128"),
|
||||
|
||||
/*+----------------------+---------------------+
|
||||
| Attribute | Value |
|
||||
+----------------------+---------------------+
|
||||
| Address Block | ::/128 |
|
||||
| Name | Unspecified Address |
|
||||
| RFC | [RFC4291] |
|
||||
| Allocation Date | February 2006 |
|
||||
| Termination Date | N/A |
|
||||
| Source | True |
|
||||
| Destination | False |
|
||||
| Forwardable | False |
|
||||
| Global | False |
|
||||
| Reserved-by-Protocol | True |
|
||||
+----------------------+---------------------+*/
|
||||
MustIPv6Addr("::/128"),
|
||||
|
||||
/*+----------------------+---------------------+
|
||||
| Attribute | Value |
|
||||
+----------------------+---------------------+
|
||||
| Address Block | 64:ff9b::/96 |
|
||||
| Name | IPv4-IPv6 Translat. |
|
||||
| RFC | [RFC6052] |
|
||||
| Allocation Date | October 2010 |
|
||||
| Termination Date | N/A |
|
||||
| Source | True |
|
||||
| Destination | True |
|
||||
| Forwardable | True |
|
||||
| Global | True |
|
||||
| Reserved-by-Protocol | False |
|
||||
+----------------------+---------------------+*/
|
||||
MustIPv6Addr("64:ff9b::/96"),
|
||||
|
||||
/*+----------------------+---------------------+
|
||||
| Attribute | Value |
|
||||
+----------------------+---------------------+
|
||||
| Address Block | ::ffff:0:0/96 |
|
||||
| Name | IPv4-mapped Address |
|
||||
| RFC | [RFC4291] |
|
||||
| Allocation Date | February 2006 |
|
||||
| Termination Date | N/A |
|
||||
| Source | False |
|
||||
| Destination | False |
|
||||
| Forwardable | False |
|
||||
| Global | False |
|
||||
| Reserved-by-Protocol | True |
|
||||
+----------------------+---------------------+*/
|
||||
MustIPv6Addr("::ffff:0:0/96"),
|
||||
|
||||
/*+----------------------+----------------------------+
|
||||
| Attribute | Value |
|
||||
+----------------------+----------------------------+
|
||||
| Address Block | 100::/64 |
|
||||
| Name | Discard-Only Address Block |
|
||||
| RFC | [RFC6666] |
|
||||
| Allocation Date | June 2012 |
|
||||
| Termination Date | N/A |
|
||||
| Source | True |
|
||||
| Destination | True |
|
||||
| Forwardable | True |
|
||||
| Global | False |
|
||||
| Reserved-by-Protocol | False |
|
||||
+----------------------+----------------------------+*/
|
||||
MustIPv6Addr("100::/64"),
|
||||
|
||||
/*+----------------------+---------------------------+
|
||||
| Attribute | Value |
|
||||
+----------------------+---------------------------+
|
||||
| Address Block | 2001::/23 |
|
||||
| Name | IETF Protocol Assignments |
|
||||
| RFC | [RFC2928] |
|
||||
| Allocation Date | September 2000 |
|
||||
| Termination Date | N/A |
|
||||
| Source | False[1] |
|
||||
| Destination | False[1] |
|
||||
| Forwardable | False[1] |
|
||||
| Global | False[1] |
|
||||
| Reserved-by-Protocol | False |
|
||||
+----------------------+---------------------------+*/
|
||||
// [1] Unless allowed by a more specific allocation.
|
||||
MustIPv6Addr("2001::/16"),
|
||||
|
||||
/*+----------------------+----------------+
|
||||
| Attribute | Value |
|
||||
+----------------------+----------------+
|
||||
| Address Block | 2001::/32 |
|
||||
| Name | TEREDO |
|
||||
| RFC | [RFC4380] |
|
||||
| Allocation Date | January 2006 |
|
||||
| Termination Date | N/A |
|
||||
| Source | True |
|
||||
| Destination | True |
|
||||
| Forwardable | True |
|
||||
| Global | False |
|
||||
| Reserved-by-Protocol | False |
|
||||
+----------------------+----------------+*/
|
||||
// Covered by previous entry, included for completeness.
|
||||
//
|
||||
// MustIPv6Addr("2001::/16"),
|
||||
|
||||
/*+----------------------+----------------+
|
||||
| Attribute | Value |
|
||||
+----------------------+----------------+
|
||||
| Address Block | 2001:2::/48 |
|
||||
| Name | Benchmarking |
|
||||
| RFC | [RFC5180] |
|
||||
| Allocation Date | April 2008 |
|
||||
| Termination Date | N/A |
|
||||
| Source | True |
|
||||
| Destination | True |
|
||||
| Forwardable | True |
|
||||
| Global | False |
|
||||
| Reserved-by-Protocol | False |
|
||||
+----------------------+----------------+*/
|
||||
// Covered by previous entry, included for completeness.
|
||||
//
|
||||
// MustIPv6Addr("2001:2::/48"),
|
||||
|
||||
/*+----------------------+---------------+
|
||||
| Attribute | Value |
|
||||
+----------------------+---------------+
|
||||
| Address Block | 2001:db8::/32 |
|
||||
| Name | Documentation |
|
||||
| RFC | [RFC3849] |
|
||||
| Allocation Date | July 2004 |
|
||||
| Termination Date | N/A |
|
||||
| Source | False |
|
||||
| Destination | False |
|
||||
| Forwardable | False |
|
||||
| Global | False |
|
||||
| Reserved-by-Protocol | False |
|
||||
+----------------------+---------------+*/
|
||||
// Covered by previous entry, included for completeness.
|
||||
//
|
||||
// MustIPv6Addr("2001:db8::/32"),
|
||||
|
||||
/*+----------------------+--------------+
|
||||
| Attribute | Value |
|
||||
+----------------------+--------------+
|
||||
| Address Block | 2001:10::/28 |
|
||||
| Name | ORCHID |
|
||||
| RFC | [RFC4843] |
|
||||
| Allocation Date | March 2007 |
|
||||
| Termination Date | March 2014 |
|
||||
| Source | False |
|
||||
| Destination | False |
|
||||
| Forwardable | False |
|
||||
| Global | False |
|
||||
| Reserved-by-Protocol | False |
|
||||
+----------------------+--------------+*/
|
||||
// Covered by previous entry, included for completeness.
|
||||
//
|
||||
// MustIPv6Addr("2001:10::/28"),
|
||||
|
||||
/*+----------------------+---------------+
|
||||
| Attribute | Value |
|
||||
+----------------------+---------------+
|
||||
| Address Block | 2002::/16 [2] |
|
||||
| Name | 6to4 |
|
||||
| RFC | [RFC3056] |
|
||||
| Allocation Date | February 2001 |
|
||||
| Termination Date | N/A |
|
||||
| Source | True |
|
||||
| Destination | True |
|
||||
| Forwardable | True |
|
||||
| Global | N/A [2] |
|
||||
| Reserved-by-Protocol | False |
|
||||
+----------------------+---------------+*/
|
||||
// [2] See [RFC3056] for details.
|
||||
MustIPv6Addr("2002::/16"),
|
||||
|
||||
/*+----------------------+--------------+
|
||||
| Attribute | Value |
|
||||
+----------------------+--------------+
|
||||
| Address Block | fc00::/7 |
|
||||
| Name | Unique-Local |
|
||||
| RFC | [RFC4193] |
|
||||
| Allocation Date | October 2005 |
|
||||
| Termination Date | N/A |
|
||||
| Source | True |
|
||||
| Destination | True |
|
||||
| Forwardable | True |
|
||||
| Global | False |
|
||||
| Reserved-by-Protocol | False |
|
||||
+----------------------+--------------+*/
|
||||
MustIPv6Addr("fc00::/7"),
|
||||
|
||||
/*+----------------------+-----------------------+
|
||||
| Attribute | Value |
|
||||
+----------------------+-----------------------+
|
||||
| Address Block | fe80::/10 |
|
||||
| Name | Linked-Scoped Unicast |
|
||||
| RFC | [RFC4291] |
|
||||
| Allocation Date | February 2006 |
|
||||
| Termination Date | N/A |
|
||||
| Source | True |
|
||||
| Destination | True |
|
||||
| Forwardable | False |
|
||||
| Global | False |
|
||||
| Reserved-by-Protocol | True |
|
||||
+----------------------+-----------------------+*/
|
||||
MustIPv6Addr("fe80::/10"),
|
||||
},
|
||||
7335: {
|
||||
// [RFC7335] IPv4 Service Continuity Prefix
|
||||
MustIPv4Addr("192.0.0.0/29"), // [RFC7335], §6 IANA Considerations
|
||||
},
|
||||
ForwardingBlacklist: { // Pseudo-RFC
|
||||
// Blacklist of non-forwardable IP blocks taken from RFC6890
|
||||
//
|
||||
// TODO: the attributes for forwardable should be
|
||||
// searcahble and embedded in the main list of RFCs
|
||||
// above.
|
||||
MustIPv4Addr("0.0.0.0/8"),
|
||||
MustIPv4Addr("127.0.0.0/8"),
|
||||
MustIPv4Addr("169.254.0.0/16"),
|
||||
MustIPv4Addr("192.0.0.0/24"),
|
||||
MustIPv4Addr("192.0.2.0/24"),
|
||||
MustIPv4Addr("198.51.100.0/24"),
|
||||
MustIPv4Addr("203.0.113.0/24"),
|
||||
MustIPv4Addr("240.0.0.0/4"),
|
||||
MustIPv4Addr("255.255.255.255/32"),
|
||||
MustIPv6Addr("::1/128"),
|
||||
MustIPv6Addr("::/128"),
|
||||
MustIPv6Addr("::ffff:0:0/96"),
|
||||
|
||||
// There is no way of expressing a whitelist per RFC2928
|
||||
// atm without creating a negative mask, which I don't
|
||||
// want to do atm.
|
||||
//MustIPv6Addr("2001::/23"),
|
||||
|
||||
MustIPv6Addr("2001:db8::/32"),
|
||||
MustIPv6Addr("2001:10::/28"),
|
||||
MustIPv6Addr("fe80::/10"),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// VisitAllRFCs iterates over all known RFCs and calls the visitor
|
||||
func VisitAllRFCs(fn func(rfcNum uint, sockaddrs SockAddrs)) {
|
||||
rfcNetMap := KnownRFCs()
|
||||
|
||||
// Blacklist of faux-RFCs. Don't show the world that we're abusing the
|
||||
// RFC system in this library.
|
||||
rfcBlacklist := map[uint]struct{}{
|
||||
ForwardingBlacklist: {},
|
||||
}
|
||||
|
||||
for rfcNum, sas := range rfcNetMap {
|
||||
if _, found := rfcBlacklist[rfcNum]; !found {
|
||||
fn(rfcNum, sas)
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,19 @@
|
|||
package sockaddr
|
||||
|
||||
// RouteInterface specifies an interface for obtaining memoized route table and
|
||||
// network information from a given OS.
|
||||
type RouteInterface interface {
|
||||
// GetDefaultInterfaceName returns the name of the interface that has a
|
||||
// default route or an error and an empty string if a problem was
|
||||
// encountered.
|
||||
GetDefaultInterfaceName() (string, error)
|
||||
}
|
||||
|
||||
// VisitCommands visits each command used by the platform-specific RouteInfo
|
||||
// implementation.
|
||||
func (ri routeInfo) VisitCommands(fn func(name string, cmd []string)) {
|
||||
for k, v := range ri.cmds {
|
||||
cmds := append([]string(nil), v...)
|
||||
fn(k, cmds)
|
||||
}
|
||||
}
|
|
@ -0,0 +1,36 @@
|
|||
// +build darwin dragonfly freebsd netbsd openbsd
|
||||
|
||||
package sockaddr
|
||||
|
||||
import "os/exec"
|
||||
|
||||
var cmds map[string][]string = map[string][]string{
|
||||
"route": {"/sbin/route", "-n", "get", "default"},
|
||||
}
|
||||
|
||||
type routeInfo struct {
|
||||
cmds map[string][]string
|
||||
}
|
||||
|
||||
// NewRouteInfo returns a BSD-specific implementation of the RouteInfo
|
||||
// interface.
|
||||
func NewRouteInfo() (routeInfo, error) {
|
||||
return routeInfo{
|
||||
cmds: cmds,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// GetDefaultInterfaceName returns the interface name attached to the default
|
||||
// route on the default interface.
|
||||
func (ri routeInfo) GetDefaultInterfaceName() (string, error) {
|
||||
out, err := exec.Command(cmds["route"][0], cmds["route"][1:]...).Output()
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
var ifName string
|
||||
if ifName, err = parseDefaultIfNameFromRoute(string(out)); err != nil {
|
||||
return "", err
|
||||
}
|
||||
return ifName, nil
|
||||
}
|
|
@ -0,0 +1,10 @@
|
|||
// +build android nacl plan9
|
||||
|
||||
package sockaddr
|
||||
|
||||
import "errors"
|
||||
|
||||
// getDefaultIfName is the default interface function for unsupported platforms.
|
||||
func getDefaultIfName() (string, error) {
|
||||
return "", errors.New("No default interface found (unsupported platform)")
|
||||
}
|
|
@ -0,0 +1,37 @@
|
|||
package sockaddr
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"os/exec"
|
||||
)
|
||||
|
||||
var cmds map[string][]string = map[string][]string{
|
||||
"ip": {"/sbin/ip", "route"},
|
||||
}
|
||||
|
||||
type routeInfo struct {
|
||||
cmds map[string][]string
|
||||
}
|
||||
|
||||
// NewRouteInfo returns a Linux-specific implementation of the RouteInfo
|
||||
// interface.
|
||||
func NewRouteInfo() (routeInfo, error) {
|
||||
return routeInfo{
|
||||
cmds: cmds,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// GetDefaultInterfaceName returns the interface name attached to the default
|
||||
// route on the default interface.
|
||||
func (ri routeInfo) GetDefaultInterfaceName() (string, error) {
|
||||
out, err := exec.Command(cmds["ip"][0], cmds["ip"][1:]...).Output()
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
var ifName string
|
||||
if ifName, err = parseDefaultIfNameFromIPCmd(string(out)); err != nil {
|
||||
return "", errors.New("No default interface found")
|
||||
}
|
||||
return ifName, nil
|
||||
}
|
|
@ -0,0 +1,37 @@
|
|||
package sockaddr
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"os/exec"
|
||||
)
|
||||
|
||||
var cmds map[string][]string = map[string][]string{
|
||||
"route": {"/usr/sbin/route", "-n", "get", "default"},
|
||||
}
|
||||
|
||||
type routeInfo struct {
|
||||
cmds map[string][]string
|
||||
}
|
||||
|
||||
// NewRouteInfo returns a BSD-specific implementation of the RouteInfo
|
||||
// interface.
|
||||
func NewRouteInfo() (routeInfo, error) {
|
||||
return routeInfo{
|
||||
cmds: cmds,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// GetDefaultInterfaceName returns the interface name attached to the default
|
||||
// route on the default interface.
|
||||
func (ri routeInfo) GetDefaultInterfaceName() (string, error) {
|
||||
out, err := exec.Command(cmds["route"][0], cmds["route"][1:]...).Output()
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
var ifName string
|
||||
if ifName, err = parseDefaultIfNameFromRoute(string(out)); err != nil {
|
||||
return "", errors.New("No default interface found")
|
||||
}
|
||||
return ifName, nil
|
||||
}
|
|
@ -0,0 +1,41 @@
|
|||
package sockaddr
|
||||
|
||||
import "os/exec"
|
||||
|
||||
var cmds map[string][]string = map[string][]string{
|
||||
"netstat": {"netstat", "-rn"},
|
||||
"ipconfig": {"ipconfig"},
|
||||
}
|
||||
|
||||
type routeInfo struct {
|
||||
cmds map[string][]string
|
||||
}
|
||||
|
||||
// NewRouteInfo returns a BSD-specific implementation of the RouteInfo
|
||||
// interface.
|
||||
func NewRouteInfo() (routeInfo, error) {
|
||||
return routeInfo{
|
||||
cmds: cmds,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// GetDefaultInterfaceName returns the interface name attached to the default
|
||||
// route on the default interface.
|
||||
func (ri routeInfo) GetDefaultInterfaceName() (string, error) {
|
||||
ifNameOut, err := exec.Command(cmds["netstat"][0], cmds["netstat"][1:]...).Output()
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
ipconfigOut, err := exec.Command(cmds["ipconfig"][0], cmds["ipconfig"][1:]...).Output()
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
ifName, err := parseDefaultIfNameWindows(string(ifNameOut), string(ipconfigOut))
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return ifName, nil
|
||||
}
|
|
@ -0,0 +1,178 @@
|
|||
package sockaddr
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
)
|
||||
|
||||
type SockAddrType int
|
||||
type AttrName string
|
||||
|
||||
const (
|
||||
TypeUnknown SockAddrType = 0x0
|
||||
TypeUnix = 0x1
|
||||
TypeIPv4 = 0x2
|
||||
TypeIPv6 = 0x4
|
||||
|
||||
// TypeIP is the union of TypeIPv4 and TypeIPv6
|
||||
TypeIP = 0x6
|
||||
)
|
||||
|
||||
type SockAddr interface {
|
||||
// CmpRFC returns 0 if SockAddr exactly matches one of the matched RFC
|
||||
// networks, -1 if the receiver is contained within the RFC network, or
|
||||
// 1 if the address is not contained within the RFC.
|
||||
CmpRFC(rfcNum uint, sa SockAddr) int
|
||||
|
||||
// Contains returns true if the SockAddr arg is contained within the
|
||||
// receiver
|
||||
Contains(SockAddr) bool
|
||||
|
||||
// Equal allows for the comparison of two SockAddrs
|
||||
Equal(SockAddr) bool
|
||||
|
||||
DialPacketArgs() (string, string)
|
||||
DialStreamArgs() (string, string)
|
||||
ListenPacketArgs() (string, string)
|
||||
ListenStreamArgs() (string, string)
|
||||
|
||||
// String returns the string representation of SockAddr
|
||||
String() string
|
||||
|
||||
// Type returns the SockAddrType
|
||||
Type() SockAddrType
|
||||
}
|
||||
|
||||
// sockAddrAttrMap is a map of the SockAddr type-specific attributes.
|
||||
var sockAddrAttrMap map[AttrName]func(SockAddr) string
|
||||
var sockAddrAttrs []AttrName
|
||||
|
||||
func init() {
|
||||
sockAddrInit()
|
||||
}
|
||||
|
||||
// New creates a new SockAddr from the string. The order in which New()
|
||||
// attempts to construct a SockAddr is: IPv4Addr, IPv6Addr, SockAddrUnix.
|
||||
//
|
||||
// NOTE: New() relies on the heuristic wherein if the path begins with either a
|
||||
// '.' or '/' character before creating a new UnixSock. For UNIX sockets that
|
||||
// are absolute paths or are nested within a sub-directory, this works as
|
||||
// expected, however if the UNIX socket is contained in the current working
|
||||
// directory, this will fail unless the path begins with "./"
|
||||
// (e.g. "./my-local-socket"). Calls directly to NewUnixSock() do not suffer
|
||||
// this limitation. Invalid IP addresses such as "256.0.0.0/-1" will run afoul
|
||||
// of this heuristic and be assumed to be a valid UNIX socket path (which they
|
||||
// are, but it is probably not what you want and you won't realize it until you
|
||||
// stat(2) the file system to discover it doesn't exist).
|
||||
func NewSockAddr(s string) (SockAddr, error) {
|
||||
ipv4Addr, err := NewIPv4Addr(s)
|
||||
if err == nil {
|
||||
return ipv4Addr, nil
|
||||
}
|
||||
|
||||
ipv6Addr, err := NewIPv6Addr(s)
|
||||
if err == nil {
|
||||
return ipv6Addr, nil
|
||||
}
|
||||
|
||||
// Check to make sure the string begins with either a '.' or '/', or
|
||||
// contains a '/'.
|
||||
if len(s) > 1 && (strings.IndexAny(s[0:1], "./") != -1 || strings.IndexByte(s, '/') != -1) {
|
||||
unixSock, err := NewUnixSock(s)
|
||||
if err == nil {
|
||||
return unixSock, nil
|
||||
}
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf("Unable to convert %q to an IPv4 or IPv6 address, or a UNIX Socket", s)
|
||||
}
|
||||
|
||||
// ToIPAddr returns an IPAddr type or nil if the type conversion fails.
|
||||
func ToIPAddr(sa SockAddr) *IPAddr {
|
||||
ipa, ok := sa.(IPAddr)
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
return &ipa
|
||||
}
|
||||
|
||||
// ToIPv4Addr returns an IPv4Addr type or nil if the type conversion fails.
|
||||
func ToIPv4Addr(sa SockAddr) *IPv4Addr {
|
||||
switch v := sa.(type) {
|
||||
case IPv4Addr:
|
||||
return &v
|
||||
default:
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// ToIPv6Addr returns an IPv6Addr type or nil if the type conversion fails.
|
||||
func ToIPv6Addr(sa SockAddr) *IPv6Addr {
|
||||
switch v := sa.(type) {
|
||||
case IPv6Addr:
|
||||
return &v
|
||||
default:
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// ToUnixSock returns a UnixSock type or nil if the type conversion fails.
|
||||
func ToUnixSock(sa SockAddr) *UnixSock {
|
||||
switch v := sa.(type) {
|
||||
case UnixSock:
|
||||
return &v
|
||||
default:
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// SockAddrAttr returns a string representation of an attribute for the given
|
||||
// SockAddr.
|
||||
func SockAddrAttr(sa SockAddr, selector AttrName) string {
|
||||
fn, found := sockAddrAttrMap[selector]
|
||||
if !found {
|
||||
return ""
|
||||
}
|
||||
|
||||
return fn(sa)
|
||||
}
|
||||
|
||||
// String() for SockAddrType returns a string representation of the
|
||||
// SockAddrType (e.g. "IPv4", "IPv6", "UNIX", "IP", or "unknown").
|
||||
func (sat SockAddrType) String() string {
|
||||
switch sat {
|
||||
case TypeIPv4:
|
||||
return "IPv4"
|
||||
case TypeIPv6:
|
||||
return "IPv6"
|
||||
// There is no concrete "IP" type. Leaving here as a reminder.
|
||||
// case TypeIP:
|
||||
// return "IP"
|
||||
case TypeUnix:
|
||||
return "UNIX"
|
||||
default:
|
||||
panic("unsupported type")
|
||||
}
|
||||
}
|
||||
|
||||
// sockAddrInit is called once at init()
|
||||
func sockAddrInit() {
|
||||
sockAddrAttrs = []AttrName{
|
||||
"type", // type should be first
|
||||
"string",
|
||||
}
|
||||
|
||||
sockAddrAttrMap = map[AttrName]func(sa SockAddr) string{
|
||||
"string": func(sa SockAddr) string {
|
||||
return sa.String()
|
||||
},
|
||||
"type": func(sa SockAddr) string {
|
||||
return sa.Type().String()
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// UnixSockAttrs returns a list of attributes supported by the UnixSock type
|
||||
func SockAddrAttrs() []AttrName {
|
||||
return sockAddrAttrs
|
||||
}
|
|
@ -0,0 +1,193 @@
|
|||
package sockaddr
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"sort"
|
||||
)
|
||||
|
||||
// SockAddrs is a slice of SockAddrs
|
||||
type SockAddrs []SockAddr
|
||||
|
||||
func (s SockAddrs) Len() int { return len(s) }
|
||||
func (s SockAddrs) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
|
||||
|
||||
// CmpAddrFunc is the function signature that must be met to be used in the
|
||||
// OrderedAddrBy multiAddrSorter
|
||||
type CmpAddrFunc func(p1, p2 *SockAddr) int
|
||||
|
||||
// multiAddrSorter implements the Sort interface, sorting the SockAddrs within.
|
||||
type multiAddrSorter struct {
|
||||
addrs SockAddrs
|
||||
cmp []CmpAddrFunc
|
||||
}
|
||||
|
||||
// Sort sorts the argument slice according to the Cmp functions passed to
|
||||
// OrderedAddrBy.
|
||||
func (ms *multiAddrSorter) Sort(sockAddrs SockAddrs) {
|
||||
ms.addrs = sockAddrs
|
||||
sort.Sort(ms)
|
||||
}
|
||||
|
||||
// OrderedAddrBy sorts SockAddr by the list of sort function pointers.
|
||||
func OrderedAddrBy(cmpFuncs ...CmpAddrFunc) *multiAddrSorter {
|
||||
return &multiAddrSorter{
|
||||
cmp: cmpFuncs,
|
||||
}
|
||||
}
|
||||
|
||||
// Len is part of sort.Interface.
|
||||
func (ms *multiAddrSorter) Len() int {
|
||||
return len(ms.addrs)
|
||||
}
|
||||
|
||||
// Less is part of sort.Interface. It is implemented by looping along the
|
||||
// Cmp() functions until it finds a comparison that is either less than,
|
||||
// equal to, or greater than.
|
||||
func (ms *multiAddrSorter) Less(i, j int) bool {
|
||||
p, q := &ms.addrs[i], &ms.addrs[j]
|
||||
// Try all but the last comparison.
|
||||
var k int
|
||||
for k = 0; k < len(ms.cmp)-1; k++ {
|
||||
cmp := ms.cmp[k]
|
||||
x := cmp(p, q)
|
||||
switch x {
|
||||
case -1:
|
||||
// p < q, so we have a decision.
|
||||
return true
|
||||
case 1:
|
||||
// p > q, so we have a decision.
|
||||
return false
|
||||
}
|
||||
// p == q; try the next comparison.
|
||||
}
|
||||
// All comparisons to here said "equal", so just return whatever the
|
||||
// final comparison reports.
|
||||
switch ms.cmp[k](p, q) {
|
||||
case -1:
|
||||
return true
|
||||
case 1:
|
||||
return false
|
||||
default:
|
||||
// Still a tie! Now what?
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// Swap is part of sort.Interface.
|
||||
func (ms *multiAddrSorter) Swap(i, j int) {
|
||||
ms.addrs[i], ms.addrs[j] = ms.addrs[j], ms.addrs[i]
|
||||
}
|
||||
|
||||
const (
|
||||
// NOTE (sean@): These constants are here for code readability only and
|
||||
// are sprucing up the code for readability purposes. Some of the
|
||||
// Cmp*() variants have confusing logic (especially when dealing with
|
||||
// mixed-type comparisons) and this, I think, has made it easier to grok
|
||||
// the code faster.
|
||||
sortReceiverBeforeArg = -1
|
||||
sortDeferDecision = 0
|
||||
sortArgBeforeReceiver = 1
|
||||
)
|
||||
|
||||
// AscAddress is a sorting function to sort SockAddrs by their respective
|
||||
// address type. Non-equal types are deferred in the sort.
|
||||
func AscAddress(p1Ptr, p2Ptr *SockAddr) int {
|
||||
p1 := *p1Ptr
|
||||
p2 := *p2Ptr
|
||||
|
||||
switch v := p1.(type) {
|
||||
case IPv4Addr:
|
||||
return v.CmpAddress(p2)
|
||||
case IPv6Addr:
|
||||
return v.CmpAddress(p2)
|
||||
case UnixSock:
|
||||
return v.CmpAddress(p2)
|
||||
default:
|
||||
return sortDeferDecision
|
||||
}
|
||||
}
|
||||
|
||||
// AscPort is a sorting function to sort SockAddrs by their respective address
|
||||
// type. Non-equal types are deferred in the sort.
|
||||
func AscPort(p1Ptr, p2Ptr *SockAddr) int {
|
||||
p1 := *p1Ptr
|
||||
p2 := *p2Ptr
|
||||
|
||||
switch v := p1.(type) {
|
||||
case IPv4Addr:
|
||||
return v.CmpPort(p2)
|
||||
case IPv6Addr:
|
||||
return v.CmpPort(p2)
|
||||
default:
|
||||
return sortDeferDecision
|
||||
}
|
||||
}
|
||||
|
||||
// AscPrivate is a sorting function to sort "more secure" private values before
|
||||
// "more public" values. Both IPv4 and IPv6 are compared against RFC6890
|
||||
// (RFC6890 includes, and is not limited to, RFC1918 and RFC6598 for IPv4, and
|
||||
// IPv6 includes RFC4193).
|
||||
func AscPrivate(p1Ptr, p2Ptr *SockAddr) int {
|
||||
p1 := *p1Ptr
|
||||
p2 := *p2Ptr
|
||||
|
||||
switch v := p1.(type) {
|
||||
case IPv4Addr, IPv6Addr:
|
||||
return v.CmpRFC(6890, p2)
|
||||
default:
|
||||
return sortDeferDecision
|
||||
}
|
||||
}
|
||||
|
||||
// AscNetworkSize is a sorting function to sort SockAddrs based on their network
|
||||
// size. Non-equal types are deferred in the sort.
|
||||
func AscNetworkSize(p1Ptr, p2Ptr *SockAddr) int {
|
||||
p1 := *p1Ptr
|
||||
p2 := *p2Ptr
|
||||
p1Type := p1.Type()
|
||||
p2Type := p2.Type()
|
||||
|
||||
// Network size operations on non-IP types make no sense
|
||||
if p1Type != p2Type && p1Type != TypeIP {
|
||||
return sortDeferDecision
|
||||
}
|
||||
|
||||
ipA := p1.(IPAddr)
|
||||
ipB := p2.(IPAddr)
|
||||
|
||||
return bytes.Compare([]byte(*ipA.NetIPMask()), []byte(*ipB.NetIPMask()))
|
||||
}
|
||||
|
||||
// AscType is a sorting function to sort "more secure" types before
|
||||
// "less-secure" types.
|
||||
func AscType(p1Ptr, p2Ptr *SockAddr) int {
|
||||
p1 := *p1Ptr
|
||||
p2 := *p2Ptr
|
||||
p1Type := p1.Type()
|
||||
p2Type := p2.Type()
|
||||
switch {
|
||||
case p1Type < p2Type:
|
||||
return sortReceiverBeforeArg
|
||||
case p1Type == p2Type:
|
||||
return sortDeferDecision
|
||||
case p1Type > p2Type:
|
||||
return sortArgBeforeReceiver
|
||||
default:
|
||||
return sortDeferDecision
|
||||
}
|
||||
}
|
||||
|
||||
// FilterByType returns two lists: a list of matched and unmatched SockAddrs
|
||||
func (sas SockAddrs) FilterByType(type_ SockAddrType) (matched, excluded SockAddrs) {
|
||||
matched = make(SockAddrs, 0, len(sas))
|
||||
excluded = make(SockAddrs, 0, len(sas))
|
||||
|
||||
for _, sa := range sas {
|
||||
if sa.Type()&type_ != 0 {
|
||||
matched = append(matched, sa)
|
||||
} else {
|
||||
excluded = append(excluded, sa)
|
||||
}
|
||||
}
|
||||
return matched, excluded
|
||||
}
|
|
@ -0,0 +1,135 @@
|
|||
package sockaddr
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
)
|
||||
|
||||
type UnixSock struct {
|
||||
SockAddr
|
||||
path string
|
||||
}
|
||||
type UnixSocks []*UnixSock
|
||||
|
||||
// unixAttrMap is a map of the UnixSockAddr type-specific attributes.
|
||||
var unixAttrMap map[AttrName]func(UnixSock) string
|
||||
var unixAttrs []AttrName
|
||||
|
||||
func init() {
|
||||
unixAttrInit()
|
||||
}
|
||||
|
||||
// NewUnixSock creates an UnixSock from a string path. String can be in the
|
||||
// form of either URI-based string (e.g. `file:///etc/passwd`), an absolute
|
||||
// path (e.g. `/etc/passwd`), or a relative path (e.g. `./foo`).
|
||||
func NewUnixSock(s string) (ret UnixSock, err error) {
|
||||
ret.path = s
|
||||
return ret, nil
|
||||
}
|
||||
|
||||
// CmpAddress follows the Cmp() standard protocol and returns:
|
||||
//
|
||||
// - -1 If the receiver should sort first because its name lexically sorts before arg
|
||||
// - 0 if the SockAddr arg is not a UnixSock, or is a UnixSock with the same path.
|
||||
// - 1 If the argument should sort first.
|
||||
func (us UnixSock) CmpAddress(sa SockAddr) int {
|
||||
usb, ok := sa.(UnixSock)
|
||||
if !ok {
|
||||
return sortDeferDecision
|
||||
}
|
||||
|
||||
return strings.Compare(us.Path(), usb.Path())
|
||||
}
|
||||
|
||||
// DialPacketArgs returns the arguments required to be passed to net.DialUnix()
|
||||
// with the `unixgram` network type.
|
||||
func (us UnixSock) DialPacketArgs() (network, dialArgs string) {
|
||||
return "unixgram", us.path
|
||||
}
|
||||
|
||||
// DialStreamArgs returns the arguments required to be passed to net.DialUnix()
|
||||
// with the `unix` network type.
|
||||
func (us UnixSock) DialStreamArgs() (network, dialArgs string) {
|
||||
return "unix", us.path
|
||||
}
|
||||
|
||||
// Equal returns true if a SockAddr is equal to the receiving UnixSock.
|
||||
func (us UnixSock) Equal(sa SockAddr) bool {
|
||||
usb, ok := sa.(UnixSock)
|
||||
if !ok {
|
||||
return false
|
||||
}
|
||||
|
||||
if us.Path() != usb.Path() {
|
||||
return false
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
// ListenPacketArgs returns the arguments required to be passed to
|
||||
// net.ListenUnixgram() with the `unixgram` network type.
|
||||
func (us UnixSock) ListenPacketArgs() (network, dialArgs string) {
|
||||
return "unixgram", us.path
|
||||
}
|
||||
|
||||
// ListenStreamArgs returns the arguments required to be passed to
|
||||
// net.ListenUnix() with the `unix` network type.
|
||||
func (us UnixSock) ListenStreamArgs() (network, dialArgs string) {
|
||||
return "unix", us.path
|
||||
}
|
||||
|
||||
// MustUnixSock is a helper method that must return an UnixSock or panic on
|
||||
// invalid input.
|
||||
func MustUnixSock(addr string) UnixSock {
|
||||
us, err := NewUnixSock(addr)
|
||||
if err != nil {
|
||||
panic(fmt.Sprintf("Unable to create a UnixSock from %+q: %v", addr, err))
|
||||
}
|
||||
return us
|
||||
}
|
||||
|
||||
// Path returns the given path of the UnixSock
|
||||
func (us UnixSock) Path() string {
|
||||
return us.path
|
||||
}
|
||||
|
||||
// String returns the path of the UnixSock
|
||||
func (us UnixSock) String() string {
|
||||
return fmt.Sprintf("%+q", us.path)
|
||||
}
|
||||
|
||||
// Type is used as a type switch and returns TypeUnix
|
||||
func (UnixSock) Type() SockAddrType {
|
||||
return TypeUnix
|
||||
}
|
||||
|
||||
// UnixSockAttrs returns a list of attributes supported by the UnixSockAddr type
|
||||
func UnixSockAttrs() []AttrName {
|
||||
return unixAttrs
|
||||
}
|
||||
|
||||
// UnixSockAttr returns a string representation of an attribute for the given
|
||||
// UnixSock.
|
||||
func UnixSockAttr(us UnixSock, attrName AttrName) string {
|
||||
fn, found := unixAttrMap[attrName]
|
||||
if !found {
|
||||
return ""
|
||||
}
|
||||
|
||||
return fn(us)
|
||||
}
|
||||
|
||||
// unixAttrInit is called once at init()
|
||||
func unixAttrInit() {
|
||||
// Sorted for human readability
|
||||
unixAttrs = []AttrName{
|
||||
"path",
|
||||
}
|
||||
|
||||
unixAttrMap = map[AttrName]func(us UnixSock) string{
|
||||
"path": func(us UnixSock) string {
|
||||
return us.Path()
|
||||
},
|
||||
}
|
||||
}
|
|
@ -0,0 +1,212 @@
|
|||
package lru
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sync"
|
||||
|
||||
"github.com/hashicorp/golang-lru/simplelru"
|
||||
)
|
||||
|
||||
const (
|
||||
// Default2QRecentRatio is the ratio of the 2Q cache dedicated
|
||||
// to recently added entries that have only been accessed once.
|
||||
Default2QRecentRatio = 0.25
|
||||
|
||||
// Default2QGhostEntries is the default ratio of ghost
|
||||
// entries kept to track entries recently evicted
|
||||
Default2QGhostEntries = 0.50
|
||||
)
|
||||
|
||||
// TwoQueueCache is a thread-safe fixed size 2Q cache.
|
||||
// 2Q is an enhancement over the standard LRU cache
|
||||
// in that it tracks both frequently and recently used
|
||||
// entries separately. This avoids a burst in access to new
|
||||
// entries from evicting frequently used entries. It adds some
|
||||
// additional tracking overhead to the standard LRU cache, and is
|
||||
// computationally about 2x the cost, and adds some metadata over
|
||||
// head. The ARCCache is similar, but does not require setting any
|
||||
// parameters.
|
||||
type TwoQueueCache struct {
|
||||
size int
|
||||
recentSize int
|
||||
|
||||
recent *simplelru.LRU
|
||||
frequent *simplelru.LRU
|
||||
recentEvict *simplelru.LRU
|
||||
lock sync.RWMutex
|
||||
}
|
||||
|
||||
// New2Q creates a new TwoQueueCache using the default
|
||||
// values for the parameters.
|
||||
func New2Q(size int) (*TwoQueueCache, error) {
|
||||
return New2QParams(size, Default2QRecentRatio, Default2QGhostEntries)
|
||||
}
|
||||
|
||||
// New2QParams creates a new TwoQueueCache using the provided
|
||||
// parameter values.
|
||||
func New2QParams(size int, recentRatio float64, ghostRatio float64) (*TwoQueueCache, error) {
|
||||
if size <= 0 {
|
||||
return nil, fmt.Errorf("invalid size")
|
||||
}
|
||||
if recentRatio < 0.0 || recentRatio > 1.0 {
|
||||
return nil, fmt.Errorf("invalid recent ratio")
|
||||
}
|
||||
if ghostRatio < 0.0 || ghostRatio > 1.0 {
|
||||
return nil, fmt.Errorf("invalid ghost ratio")
|
||||
}
|
||||
|
||||
// Determine the sub-sizes
|
||||
recentSize := int(float64(size) * recentRatio)
|
||||
evictSize := int(float64(size) * ghostRatio)
|
||||
|
||||
// Allocate the LRUs
|
||||
recent, err := simplelru.NewLRU(size, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
frequent, err := simplelru.NewLRU(size, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
recentEvict, err := simplelru.NewLRU(evictSize, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Initialize the cache
|
||||
c := &TwoQueueCache{
|
||||
size: size,
|
||||
recentSize: recentSize,
|
||||
recent: recent,
|
||||
frequent: frequent,
|
||||
recentEvict: recentEvict,
|
||||
}
|
||||
return c, nil
|
||||
}
|
||||
|
||||
func (c *TwoQueueCache) Get(key interface{}) (interface{}, bool) {
|
||||
c.lock.Lock()
|
||||
defer c.lock.Unlock()
|
||||
|
||||
// Check if this is a frequent value
|
||||
if val, ok := c.frequent.Get(key); ok {
|
||||
return val, ok
|
||||
}
|
||||
|
||||
// If the value is contained in recent, then we
|
||||
// promote it to frequent
|
||||
if val, ok := c.recent.Peek(key); ok {
|
||||
c.recent.Remove(key)
|
||||
c.frequent.Add(key, val)
|
||||
return val, ok
|
||||
}
|
||||
|
||||
// No hit
|
||||
return nil, false
|
||||
}
|
||||
|
||||
func (c *TwoQueueCache) Add(key, value interface{}) {
|
||||
c.lock.Lock()
|
||||
defer c.lock.Unlock()
|
||||
|
||||
// Check if the value is frequently used already,
|
||||
// and just update the value
|
||||
if c.frequent.Contains(key) {
|
||||
c.frequent.Add(key, value)
|
||||
return
|
||||
}
|
||||
|
||||
// Check if the value is recently used, and promote
|
||||
// the value into the frequent list
|
||||
if c.recent.Contains(key) {
|
||||
c.recent.Remove(key)
|
||||
c.frequent.Add(key, value)
|
||||
return
|
||||
}
|
||||
|
||||
// If the value was recently evicted, add it to the
|
||||
// frequently used list
|
||||
if c.recentEvict.Contains(key) {
|
||||
c.ensureSpace(true)
|
||||
c.recentEvict.Remove(key)
|
||||
c.frequent.Add(key, value)
|
||||
return
|
||||
}
|
||||
|
||||
// Add to the recently seen list
|
||||
c.ensureSpace(false)
|
||||
c.recent.Add(key, value)
|
||||
return
|
||||
}
|
||||
|
||||
// ensureSpace is used to ensure we have space in the cache
|
||||
func (c *TwoQueueCache) ensureSpace(recentEvict bool) {
|
||||
// If we have space, nothing to do
|
||||
recentLen := c.recent.Len()
|
||||
freqLen := c.frequent.Len()
|
||||
if recentLen+freqLen < c.size {
|
||||
return
|
||||
}
|
||||
|
||||
// If the recent buffer is larger than
|
||||
// the target, evict from there
|
||||
if recentLen > 0 && (recentLen > c.recentSize || (recentLen == c.recentSize && !recentEvict)) {
|
||||
k, _, _ := c.recent.RemoveOldest()
|
||||
c.recentEvict.Add(k, nil)
|
||||
return
|
||||
}
|
||||
|
||||
// Remove from the frequent list otherwise
|
||||
c.frequent.RemoveOldest()
|
||||
}
|
||||
|
||||
func (c *TwoQueueCache) Len() int {
|
||||
c.lock.RLock()
|
||||
defer c.lock.RUnlock()
|
||||
return c.recent.Len() + c.frequent.Len()
|
||||
}
|
||||
|
||||
func (c *TwoQueueCache) Keys() []interface{} {
|
||||
c.lock.RLock()
|
||||
defer c.lock.RUnlock()
|
||||
k1 := c.frequent.Keys()
|
||||
k2 := c.recent.Keys()
|
||||
return append(k1, k2...)
|
||||
}
|
||||
|
||||
func (c *TwoQueueCache) Remove(key interface{}) {
|
||||
c.lock.Lock()
|
||||
defer c.lock.Unlock()
|
||||
if c.frequent.Remove(key) {
|
||||
return
|
||||
}
|
||||
if c.recent.Remove(key) {
|
||||
return
|
||||
}
|
||||
if c.recentEvict.Remove(key) {
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
func (c *TwoQueueCache) Purge() {
|
||||
c.lock.Lock()
|
||||
defer c.lock.Unlock()
|
||||
c.recent.Purge()
|
||||
c.frequent.Purge()
|
||||
c.recentEvict.Purge()
|
||||
}
|
||||
|
||||
func (c *TwoQueueCache) Contains(key interface{}) bool {
|
||||
c.lock.RLock()
|
||||
defer c.lock.RUnlock()
|
||||
return c.frequent.Contains(key) || c.recent.Contains(key)
|
||||
}
|
||||
|
||||
func (c *TwoQueueCache) Peek(key interface{}) (interface{}, bool) {
|
||||
c.lock.RLock()
|
||||
defer c.lock.RUnlock()
|
||||
if val, ok := c.frequent.Peek(key); ok {
|
||||
return val, ok
|
||||
}
|
||||
return c.recent.Peek(key)
|
||||
}
|
|
@ -0,0 +1,25 @@
|
|||
golang-lru
|
||||
==========
|
||||
|
||||
This provides the `lru` package which implements a fixed-size
|
||||
thread safe LRU cache. It is based on the cache in Groupcache.
|
||||
|
||||
Documentation
|
||||
=============
|
||||
|
||||
Full docs are available on [Godoc](http://godoc.org/github.com/hashicorp/golang-lru)
|
||||
|
||||
Example
|
||||
=======
|
||||
|
||||
Using the LRU is very simple:
|
||||
|
||||
```go
|
||||
l, _ := New(128)
|
||||
for i := 0; i < 256; i++ {
|
||||
l.Add(i, nil)
|
||||
}
|
||||
if l.Len() != 128 {
|
||||
panic(fmt.Sprintf("bad len: %v", l.Len()))
|
||||
}
|
||||
```
|
|
@ -0,0 +1,257 @@
|
|||
package lru
|
||||
|
||||
import (
|
||||
"sync"
|
||||
|
||||
"github.com/hashicorp/golang-lru/simplelru"
|
||||
)
|
||||
|
||||
// ARCCache is a thread-safe fixed size Adaptive Replacement Cache (ARC).
|
||||
// ARC is an enhancement over the standard LRU cache in that tracks both
|
||||
// frequency and recency of use. This avoids a burst in access to new
|
||||
// entries from evicting the frequently used older entries. It adds some
|
||||
// additional tracking overhead to a standard LRU cache, computationally
|
||||
// it is roughly 2x the cost, and the extra memory overhead is linear
|
||||
// with the size of the cache. ARC has been patented by IBM, but is
|
||||
// similar to the TwoQueueCache (2Q) which requires setting parameters.
|
||||
type ARCCache struct {
|
||||
size int // Size is the total capacity of the cache
|
||||
p int // P is the dynamic preference towards T1 or T2
|
||||
|
||||
t1 *simplelru.LRU // T1 is the LRU for recently accessed items
|
||||
b1 *simplelru.LRU // B1 is the LRU for evictions from t1
|
||||
|
||||
t2 *simplelru.LRU // T2 is the LRU for frequently accessed items
|
||||
b2 *simplelru.LRU // B2 is the LRU for evictions from t2
|
||||
|
||||
lock sync.RWMutex
|
||||
}
|
||||
|
||||
// NewARC creates an ARC of the given size
|
||||
func NewARC(size int) (*ARCCache, error) {
|
||||
// Create the sub LRUs
|
||||
b1, err := simplelru.NewLRU(size, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
b2, err := simplelru.NewLRU(size, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
t1, err := simplelru.NewLRU(size, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
t2, err := simplelru.NewLRU(size, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Initialize the ARC
|
||||
c := &ARCCache{
|
||||
size: size,
|
||||
p: 0,
|
||||
t1: t1,
|
||||
b1: b1,
|
||||
t2: t2,
|
||||
b2: b2,
|
||||
}
|
||||
return c, nil
|
||||
}
|
||||
|
||||
// Get looks up a key's value from the cache.
|
||||
func (c *ARCCache) Get(key interface{}) (interface{}, bool) {
|
||||
c.lock.Lock()
|
||||
defer c.lock.Unlock()
|
||||
|
||||
// Ff the value is contained in T1 (recent), then
|
||||
// promote it to T2 (frequent)
|
||||
if val, ok := c.t1.Peek(key); ok {
|
||||
c.t1.Remove(key)
|
||||
c.t2.Add(key, val)
|
||||
return val, ok
|
||||
}
|
||||
|
||||
// Check if the value is contained in T2 (frequent)
|
||||
if val, ok := c.t2.Get(key); ok {
|
||||
return val, ok
|
||||
}
|
||||
|
||||
// No hit
|
||||
return nil, false
|
||||
}
|
||||
|
||||
// Add adds a value to the cache.
|
||||
func (c *ARCCache) Add(key, value interface{}) {
|
||||
c.lock.Lock()
|
||||
defer c.lock.Unlock()
|
||||
|
||||
// Check if the value is contained in T1 (recent), and potentially
|
||||
// promote it to frequent T2
|
||||
if c.t1.Contains(key) {
|
||||
c.t1.Remove(key)
|
||||
c.t2.Add(key, value)
|
||||
return
|
||||
}
|
||||
|
||||
// Check if the value is already in T2 (frequent) and update it
|
||||
if c.t2.Contains(key) {
|
||||
c.t2.Add(key, value)
|
||||
return
|
||||
}
|
||||
|
||||
// Check if this value was recently evicted as part of the
|
||||
// recently used list
|
||||
if c.b1.Contains(key) {
|
||||
// T1 set is too small, increase P appropriately
|
||||
delta := 1
|
||||
b1Len := c.b1.Len()
|
||||
b2Len := c.b2.Len()
|
||||
if b2Len > b1Len {
|
||||
delta = b2Len / b1Len
|
||||
}
|
||||
if c.p+delta >= c.size {
|
||||
c.p = c.size
|
||||
} else {
|
||||
c.p += delta
|
||||
}
|
||||
|
||||
// Potentially need to make room in the cache
|
||||
if c.t1.Len()+c.t2.Len() >= c.size {
|
||||
c.replace(false)
|
||||
}
|
||||
|
||||
// Remove from B1
|
||||
c.b1.Remove(key)
|
||||
|
||||
// Add the key to the frequently used list
|
||||
c.t2.Add(key, value)
|
||||
return
|
||||
}
|
||||
|
||||
// Check if this value was recently evicted as part of the
|
||||
// frequently used list
|
||||
if c.b2.Contains(key) {
|
||||
// T2 set is too small, decrease P appropriately
|
||||
delta := 1
|
||||
b1Len := c.b1.Len()
|
||||
b2Len := c.b2.Len()
|
||||
if b1Len > b2Len {
|
||||
delta = b1Len / b2Len
|
||||
}
|
||||
if delta >= c.p {
|
||||
c.p = 0
|
||||
} else {
|
||||
c.p -= delta
|
||||
}
|
||||
|
||||
// Potentially need to make room in the cache
|
||||
if c.t1.Len()+c.t2.Len() >= c.size {
|
||||
c.replace(true)
|
||||
}
|
||||
|
||||
// Remove from B2
|
||||
c.b2.Remove(key)
|
||||
|
||||
// Add the key to the frequntly used list
|
||||
c.t2.Add(key, value)
|
||||
return
|
||||
}
|
||||
|
||||
// Potentially need to make room in the cache
|
||||
if c.t1.Len()+c.t2.Len() >= c.size {
|
||||
c.replace(false)
|
||||
}
|
||||
|
||||
// Keep the size of the ghost buffers trim
|
||||
if c.b1.Len() > c.size-c.p {
|
||||
c.b1.RemoveOldest()
|
||||
}
|
||||
if c.b2.Len() > c.p {
|
||||
c.b2.RemoveOldest()
|
||||
}
|
||||
|
||||
// Add to the recently seen list
|
||||
c.t1.Add(key, value)
|
||||
return
|
||||
}
|
||||
|
||||
// replace is used to adaptively evict from either T1 or T2
|
||||
// based on the current learned value of P
|
||||
func (c *ARCCache) replace(b2ContainsKey bool) {
|
||||
t1Len := c.t1.Len()
|
||||
if t1Len > 0 && (t1Len > c.p || (t1Len == c.p && b2ContainsKey)) {
|
||||
k, _, ok := c.t1.RemoveOldest()
|
||||
if ok {
|
||||
c.b1.Add(k, nil)
|
||||
}
|
||||
} else {
|
||||
k, _, ok := c.t2.RemoveOldest()
|
||||
if ok {
|
||||
c.b2.Add(k, nil)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Len returns the number of cached entries
|
||||
func (c *ARCCache) Len() int {
|
||||
c.lock.RLock()
|
||||
defer c.lock.RUnlock()
|
||||
return c.t1.Len() + c.t2.Len()
|
||||
}
|
||||
|
||||
// Keys returns all the cached keys
|
||||
func (c *ARCCache) Keys() []interface{} {
|
||||
c.lock.RLock()
|
||||
defer c.lock.RUnlock()
|
||||
k1 := c.t1.Keys()
|
||||
k2 := c.t2.Keys()
|
||||
return append(k1, k2...)
|
||||
}
|
||||
|
||||
// Remove is used to purge a key from the cache
|
||||
func (c *ARCCache) Remove(key interface{}) {
|
||||
c.lock.Lock()
|
||||
defer c.lock.Unlock()
|
||||
if c.t1.Remove(key) {
|
||||
return
|
||||
}
|
||||
if c.t2.Remove(key) {
|
||||
return
|
||||
}
|
||||
if c.b1.Remove(key) {
|
||||
return
|
||||
}
|
||||
if c.b2.Remove(key) {
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// Purge is used to clear the cache
|
||||
func (c *ARCCache) Purge() {
|
||||
c.lock.Lock()
|
||||
defer c.lock.Unlock()
|
||||
c.t1.Purge()
|
||||
c.t2.Purge()
|
||||
c.b1.Purge()
|
||||
c.b2.Purge()
|
||||
}
|
||||
|
||||
// Contains is used to check if the cache contains a key
|
||||
// without updating recency or frequency.
|
||||
func (c *ARCCache) Contains(key interface{}) bool {
|
||||
c.lock.RLock()
|
||||
defer c.lock.RUnlock()
|
||||
return c.t1.Contains(key) || c.t2.Contains(key)
|
||||
}
|
||||
|
||||
// Peek is used to inspect the cache value of a key
|
||||
// without updating recency or frequency.
|
||||
func (c *ARCCache) Peek(key interface{}) (interface{}, bool) {
|
||||
c.lock.RLock()
|
||||
defer c.lock.RUnlock()
|
||||
if val, ok := c.t1.Peek(key); ok {
|
||||
return val, ok
|
||||
}
|
||||
return c.t2.Peek(key)
|
||||
}
|
|
@ -0,0 +1,114 @@
|
|||
// This package provides a simple LRU cache. It is based on the
|
||||
// LRU implementation in groupcache:
|
||||
// https://github.com/golang/groupcache/tree/master/lru
|
||||
package lru
|
||||
|
||||
import (
|
||||
"sync"
|
||||
|
||||
"github.com/hashicorp/golang-lru/simplelru"
|
||||
)
|
||||
|
||||
// Cache is a thread-safe fixed size LRU cache.
|
||||
type Cache struct {
|
||||
lru *simplelru.LRU
|
||||
lock sync.RWMutex
|
||||
}
|
||||
|
||||
// New creates an LRU of the given size
|
||||
func New(size int) (*Cache, error) {
|
||||
return NewWithEvict(size, nil)
|
||||
}
|
||||
|
||||
// NewWithEvict constructs a fixed size cache with the given eviction
|
||||
// callback.
|
||||
func NewWithEvict(size int, onEvicted func(key interface{}, value interface{})) (*Cache, error) {
|
||||
lru, err := simplelru.NewLRU(size, simplelru.EvictCallback(onEvicted))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
c := &Cache{
|
||||
lru: lru,
|
||||
}
|
||||
return c, nil
|
||||
}
|
||||
|
||||
// Purge is used to completely clear the cache
|
||||
func (c *Cache) Purge() {
|
||||
c.lock.Lock()
|
||||
c.lru.Purge()
|
||||
c.lock.Unlock()
|
||||
}
|
||||
|
||||
// Add adds a value to the cache. Returns true if an eviction occurred.
|
||||
func (c *Cache) Add(key, value interface{}) bool {
|
||||
c.lock.Lock()
|
||||
defer c.lock.Unlock()
|
||||
return c.lru.Add(key, value)
|
||||
}
|
||||
|
||||
// Get looks up a key's value from the cache.
|
||||
func (c *Cache) Get(key interface{}) (interface{}, bool) {
|
||||
c.lock.Lock()
|
||||
defer c.lock.Unlock()
|
||||
return c.lru.Get(key)
|
||||
}
|
||||
|
||||
// Check if a key is in the cache, without updating the recent-ness
|
||||
// or deleting it for being stale.
|
||||
func (c *Cache) Contains(key interface{}) bool {
|
||||
c.lock.RLock()
|
||||
defer c.lock.RUnlock()
|
||||
return c.lru.Contains(key)
|
||||
}
|
||||
|
||||
// Returns the key value (or undefined if not found) without updating
|
||||
// the "recently used"-ness of the key.
|
||||
func (c *Cache) Peek(key interface{}) (interface{}, bool) {
|
||||
c.lock.RLock()
|
||||
defer c.lock.RUnlock()
|
||||
return c.lru.Peek(key)
|
||||
}
|
||||
|
||||
// ContainsOrAdd checks if a key is in the cache without updating the
|
||||
// recent-ness or deleting it for being stale, and if not, adds the value.
|
||||
// Returns whether found and whether an eviction occurred.
|
||||
func (c *Cache) ContainsOrAdd(key, value interface{}) (ok, evict bool) {
|
||||
c.lock.Lock()
|
||||
defer c.lock.Unlock()
|
||||
|
||||
if c.lru.Contains(key) {
|
||||
return true, false
|
||||
} else {
|
||||
evict := c.lru.Add(key, value)
|
||||
return false, evict
|
||||
}
|
||||
}
|
||||
|
||||
// Remove removes the provided key from the cache.
|
||||
func (c *Cache) Remove(key interface{}) {
|
||||
c.lock.Lock()
|
||||
c.lru.Remove(key)
|
||||
c.lock.Unlock()
|
||||
}
|
||||
|
||||
// RemoveOldest removes the oldest item from the cache.
|
||||
func (c *Cache) RemoveOldest() {
|
||||
c.lock.Lock()
|
||||
c.lru.RemoveOldest()
|
||||
c.lock.Unlock()
|
||||
}
|
||||
|
||||
// Keys returns a slice of the keys in the cache, from oldest to newest.
|
||||
func (c *Cache) Keys() []interface{} {
|
||||
c.lock.RLock()
|
||||
defer c.lock.RUnlock()
|
||||
return c.lru.Keys()
|
||||
}
|
||||
|
||||
// Len returns the number of items in the cache.
|
||||
func (c *Cache) Len() int {
|
||||
c.lock.RLock()
|
||||
defer c.lock.RUnlock()
|
||||
return c.lru.Len()
|
||||
}
|
|
@ -0,0 +1,160 @@
|
|||
package simplelru
|
||||
|
||||
import (
|
||||
"container/list"
|
||||
"errors"
|
||||
)
|
||||
|
||||
// EvictCallback is used to get a callback when a cache entry is evicted
|
||||
type EvictCallback func(key interface{}, value interface{})
|
||||
|
||||
// LRU implements a non-thread safe fixed size LRU cache
|
||||
type LRU struct {
|
||||
size int
|
||||
evictList *list.List
|
||||
items map[interface{}]*list.Element
|
||||
onEvict EvictCallback
|
||||
}
|
||||
|
||||
// entry is used to hold a value in the evictList
|
||||
type entry struct {
|
||||
key interface{}
|
||||
value interface{}
|
||||
}
|
||||
|
||||
// NewLRU constructs an LRU of the given size
|
||||
func NewLRU(size int, onEvict EvictCallback) (*LRU, error) {
|
||||
if size <= 0 {
|
||||
return nil, errors.New("Must provide a positive size")
|
||||
}
|
||||
c := &LRU{
|
||||
size: size,
|
||||
evictList: list.New(),
|
||||
items: make(map[interface{}]*list.Element),
|
||||
onEvict: onEvict,
|
||||
}
|
||||
return c, nil
|
||||
}
|
||||
|
||||
// Purge is used to completely clear the cache
|
||||
func (c *LRU) Purge() {
|
||||
for k, v := range c.items {
|
||||
if c.onEvict != nil {
|
||||
c.onEvict(k, v.Value.(*entry).value)
|
||||
}
|
||||
delete(c.items, k)
|
||||
}
|
||||
c.evictList.Init()
|
||||
}
|
||||
|
||||
// Add adds a value to the cache. Returns true if an eviction occurred.
|
||||
func (c *LRU) Add(key, value interface{}) bool {
|
||||
// Check for existing item
|
||||
if ent, ok := c.items[key]; ok {
|
||||
c.evictList.MoveToFront(ent)
|
||||
ent.Value.(*entry).value = value
|
||||
return false
|
||||
}
|
||||
|
||||
// Add new item
|
||||
ent := &entry{key, value}
|
||||
entry := c.evictList.PushFront(ent)
|
||||
c.items[key] = entry
|
||||
|
||||
evict := c.evictList.Len() > c.size
|
||||
// Verify size not exceeded
|
||||
if evict {
|
||||
c.removeOldest()
|
||||
}
|
||||
return evict
|
||||
}
|
||||
|
||||
// Get looks up a key's value from the cache.
|
||||
func (c *LRU) Get(key interface{}) (value interface{}, ok bool) {
|
||||
if ent, ok := c.items[key]; ok {
|
||||
c.evictList.MoveToFront(ent)
|
||||
return ent.Value.(*entry).value, true
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// Check if a key is in the cache, without updating the recent-ness
|
||||
// or deleting it for being stale.
|
||||
func (c *LRU) Contains(key interface{}) (ok bool) {
|
||||
_, ok = c.items[key]
|
||||
return ok
|
||||
}
|
||||
|
||||
// Returns the key value (or undefined if not found) without updating
|
||||
// the "recently used"-ness of the key.
|
||||
func (c *LRU) Peek(key interface{}) (value interface{}, ok bool) {
|
||||
if ent, ok := c.items[key]; ok {
|
||||
return ent.Value.(*entry).value, true
|
||||
}
|
||||
return nil, ok
|
||||
}
|
||||
|
||||
// Remove removes the provided key from the cache, returning if the
|
||||
// key was contained.
|
||||
func (c *LRU) Remove(key interface{}) bool {
|
||||
if ent, ok := c.items[key]; ok {
|
||||
c.removeElement(ent)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// RemoveOldest removes the oldest item from the cache.
|
||||
func (c *LRU) RemoveOldest() (interface{}, interface{}, bool) {
|
||||
ent := c.evictList.Back()
|
||||
if ent != nil {
|
||||
c.removeElement(ent)
|
||||
kv := ent.Value.(*entry)
|
||||
return kv.key, kv.value, true
|
||||
}
|
||||
return nil, nil, false
|
||||
}
|
||||
|
||||
// GetOldest returns the oldest entry
|
||||
func (c *LRU) GetOldest() (interface{}, interface{}, bool) {
|
||||
ent := c.evictList.Back()
|
||||
if ent != nil {
|
||||
kv := ent.Value.(*entry)
|
||||
return kv.key, kv.value, true
|
||||
}
|
||||
return nil, nil, false
|
||||
}
|
||||
|
||||
// Keys returns a slice of the keys in the cache, from oldest to newest.
|
||||
func (c *LRU) Keys() []interface{} {
|
||||
keys := make([]interface{}, len(c.items))
|
||||
i := 0
|
||||
for ent := c.evictList.Back(); ent != nil; ent = ent.Prev() {
|
||||
keys[i] = ent.Value.(*entry).key
|
||||
i++
|
||||
}
|
||||
return keys
|
||||
}
|
||||
|
||||
// Len returns the number of items in the cache.
|
||||
func (c *LRU) Len() int {
|
||||
return c.evictList.Len()
|
||||
}
|
||||
|
||||
// removeOldest removes the oldest item from the cache.
|
||||
func (c *LRU) removeOldest() {
|
||||
ent := c.evictList.Back()
|
||||
if ent != nil {
|
||||
c.removeElement(ent)
|
||||
}
|
||||
}
|
||||
|
||||
// removeElement is used to remove a given list element from the cache
|
||||
func (c *LRU) removeElement(e *list.Element) {
|
||||
c.evictList.Remove(e)
|
||||
kv := e.Value.(*entry)
|
||||
delete(c.items, kv.key)
|
||||
if c.onEvict != nil {
|
||||
c.onEvict(kv.key, kv.value)
|
||||
}
|
||||
}
|
|
@ -0,0 +1,354 @@
|
|||
Mozilla Public License, version 2.0
|
||||
|
||||
1. Definitions
|
||||
|
||||
1.1. “Contributor”
|
||||
|
||||
means each individual or legal entity that creates, contributes to the
|
||||
creation of, or owns Covered Software.
|
||||
|
||||
1.2. “Contributor Version”
|
||||
|
||||
means the combination of the Contributions of others (if any) used by a
|
||||
Contributor and that particular Contributor’s Contribution.
|
||||
|
||||
1.3. “Contribution”
|
||||
|
||||
means Covered Software of a particular Contributor.
|
||||
|
||||
1.4. “Covered Software”
|
||||
|
||||
means Source Code Form to which the initial Contributor has attached the
|
||||
notice in Exhibit A, the Executable Form of such Source Code Form, and
|
||||
Modifications of such Source Code Form, in each case including portions
|
||||
thereof.
|
||||
|
||||
1.5. “Incompatible With Secondary Licenses”
|
||||
means
|
||||
|
||||
a. that the initial Contributor has attached the notice described in
|
||||
Exhibit B to the Covered Software; or
|
||||
|
||||
b. that the Covered Software was made available under the terms of version
|
||||
1.1 or earlier of the License, but not also under the terms of a
|
||||
Secondary License.
|
||||
|
||||
1.6. “Executable Form”
|
||||
|
||||
means any form of the work other than Source Code Form.
|
||||
|
||||
1.7. “Larger Work”
|
||||
|
||||
means a work that combines Covered Software with other material, in a separate
|
||||
file or files, that is not Covered Software.
|
||||
|
||||
1.8. “License”
|
||||
|
||||
means this document.
|
||||
|
||||
1.9. “Licensable”
|
||||
|
||||
means having the right to grant, to the maximum extent possible, whether at the
|
||||
time of the initial grant or subsequently, any and all of the rights conveyed by
|
||||
this License.
|
||||
|
||||
1.10. “Modifications”
|
||||
|
||||
means any of the following:
|
||||
|
||||
a. any file in Source Code Form that results from an addition to, deletion
|
||||
from, or modification of the contents of Covered Software; or
|
||||
|
||||
b. any new file in Source Code Form that contains any Covered Software.
|
||||
|
||||
1.11. “Patent Claims” of a Contributor
|
||||
|
||||
means any patent claim(s), including without limitation, method, process,
|
||||
and apparatus claims, in any patent Licensable by such Contributor that
|
||||
would be infringed, but for the grant of the License, by the making,
|
||||
using, selling, offering for sale, having made, import, or transfer of
|
||||
either its Contributions or its Contributor Version.
|
||||
|
||||
1.12. “Secondary License”
|
||||
|
||||
means either the GNU General Public License, Version 2.0, the GNU Lesser
|
||||
General Public License, Version 2.1, the GNU Affero General Public
|
||||
License, Version 3.0, or any later versions of those licenses.
|
||||
|
||||
1.13. “Source Code Form”
|
||||
|
||||
means the form of the work preferred for making modifications.
|
||||
|
||||
1.14. “You” (or “Your”)
|
||||
|
||||
means an individual or a legal entity exercising rights under this
|
||||
License. For legal entities, “You” includes any entity that controls, is
|
||||
controlled by, or is under common control with You. For purposes of this
|
||||
definition, “control” means (a) the power, direct or indirect, to cause
|
||||
the direction or management of such entity, whether by contract or
|
||||
otherwise, or (b) ownership of more than fifty percent (50%) of the
|
||||
outstanding shares or beneficial ownership of such entity.
|
||||
|
||||
|
||||
2. License Grants and Conditions
|
||||
|
||||
2.1. Grants
|
||||
|
||||
Each Contributor hereby grants You a world-wide, royalty-free,
|
||||
non-exclusive license:
|
||||
|
||||
a. under intellectual property rights (other than patent or trademark)
|
||||
Licensable by such Contributor to use, reproduce, make available,
|
||||
modify, display, perform, distribute, and otherwise exploit its
|
||||
Contributions, either on an unmodified basis, with Modifications, or as
|
||||
part of a Larger Work; and
|
||||
|
||||
b. under Patent Claims of such Contributor to make, use, sell, offer for
|
||||
sale, have made, import, and otherwise transfer either its Contributions
|
||||
or its Contributor Version.
|
||||
|
||||
2.2. Effective Date
|
||||
|
||||
The licenses granted in Section 2.1 with respect to any Contribution become
|
||||
effective for each Contribution on the date the Contributor first distributes
|
||||
such Contribution.
|
||||
|
||||
2.3. Limitations on Grant Scope
|
||||
|
||||
The licenses granted in this Section 2 are the only rights granted under this
|
||||
License. No additional rights or licenses will be implied from the distribution
|
||||
or licensing of Covered Software under this License. Notwithstanding Section
|
||||
2.1(b) above, no patent license is granted by a Contributor:
|
||||
|
||||
a. for any code that a Contributor has removed from Covered Software; or
|
||||
|
||||
b. for infringements caused by: (i) Your and any other third party’s
|
||||
modifications of Covered Software, or (ii) the combination of its
|
||||
Contributions with other software (except as part of its Contributor
|
||||
Version); or
|
||||
|
||||
c. under Patent Claims infringed by Covered Software in the absence of its
|
||||
Contributions.
|
||||
|
||||
This License does not grant any rights in the trademarks, service marks, or
|
||||
logos of any Contributor (except as may be necessary to comply with the
|
||||
notice requirements in Section 3.4).
|
||||
|
||||
2.4. Subsequent Licenses
|
||||
|
||||
No Contributor makes additional grants as a result of Your choice to
|
||||
distribute the Covered Software under a subsequent version of this License
|
||||
(see Section 10.2) or under the terms of a Secondary License (if permitted
|
||||
under the terms of Section 3.3).
|
||||
|
||||
2.5. Representation
|
||||
|
||||
Each Contributor represents that the Contributor believes its Contributions
|
||||
are its original creation(s) or it has sufficient rights to grant the
|
||||
rights to its Contributions conveyed by this License.
|
||||
|
||||
2.6. Fair Use
|
||||
|
||||
This License is not intended to limit any rights You have under applicable
|
||||
copyright doctrines of fair use, fair dealing, or other equivalents.
|
||||
|
||||
2.7. Conditions
|
||||
|
||||
Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted in
|
||||
Section 2.1.
|
||||
|
||||
|
||||
3. Responsibilities
|
||||
|
||||
3.1. Distribution of Source Form
|
||||
|
||||
All distribution of Covered Software in Source Code Form, including any
|
||||
Modifications that You create or to which You contribute, must be under the
|
||||
terms of this License. You must inform recipients that the Source Code Form
|
||||
of the Covered Software is governed by the terms of this License, and how
|
||||
they can obtain a copy of this License. You may not attempt to alter or
|
||||
restrict the recipients’ rights in the Source Code Form.
|
||||
|
||||
3.2. Distribution of Executable Form
|
||||
|
||||
If You distribute Covered Software in Executable Form then:
|
||||
|
||||
a. such Covered Software must also be made available in Source Code Form,
|
||||
as described in Section 3.1, and You must inform recipients of the
|
||||
Executable Form how they can obtain a copy of such Source Code Form by
|
||||
reasonable means in a timely manner, at a charge no more than the cost
|
||||
of distribution to the recipient; and
|
||||
|
||||
b. You may distribute such Executable Form under the terms of this License,
|
||||
or sublicense it under different terms, provided that the license for
|
||||
the Executable Form does not attempt to limit or alter the recipients’
|
||||
rights in the Source Code Form under this License.
|
||||
|
||||
3.3. Distribution of a Larger Work
|
||||
|
||||
You may create and distribute a Larger Work under terms of Your choice,
|
||||
provided that You also comply with the requirements of this License for the
|
||||
Covered Software. If the Larger Work is a combination of Covered Software
|
||||
with a work governed by one or more Secondary Licenses, and the Covered
|
||||
Software is not Incompatible With Secondary Licenses, this License permits
|
||||
You to additionally distribute such Covered Software under the terms of
|
||||
such Secondary License(s), so that the recipient of the Larger Work may, at
|
||||
their option, further distribute the Covered Software under the terms of
|
||||
either this License or such Secondary License(s).
|
||||
|
||||
3.4. Notices
|
||||
|
||||
You may not remove or alter the substance of any license notices (including
|
||||
copyright notices, patent notices, disclaimers of warranty, or limitations
|
||||
of liability) contained within the Source Code Form of the Covered
|
||||
Software, except that You may alter any license notices to the extent
|
||||
required to remedy known factual inaccuracies.
|
||||
|
||||
3.5. Application of Additional Terms
|
||||
|
||||
You may choose to offer, and to charge a fee for, warranty, support,
|
||||
indemnity or liability obligations to one or more recipients of Covered
|
||||
Software. However, You may do so only on Your own behalf, and not on behalf
|
||||
of any Contributor. You must make it absolutely clear that any such
|
||||
warranty, support, indemnity, or liability obligation is offered by You
|
||||
alone, and You hereby agree to indemnify every Contributor for any
|
||||
liability incurred by such Contributor as a result of warranty, support,
|
||||
indemnity or liability terms You offer. You may include additional
|
||||
disclaimers of warranty and limitations of liability specific to any
|
||||
jurisdiction.
|
||||
|
||||
4. Inability to Comply Due to Statute or Regulation
|
||||
|
||||
If it is impossible for You to comply with any of the terms of this License
|
||||
with respect to some or all of the Covered Software due to statute, judicial
|
||||
order, or regulation then You must: (a) comply with the terms of this License
|
||||
to the maximum extent possible; and (b) describe the limitations and the code
|
||||
they affect. Such description must be placed in a text file included with all
|
||||
distributions of the Covered Software under this License. Except to the
|
||||
extent prohibited by statute or regulation, such description must be
|
||||
sufficiently detailed for a recipient of ordinary skill to be able to
|
||||
understand it.
|
||||
|
||||
5. Termination
|
||||
|
||||
5.1. The rights granted under this License will terminate automatically if You
|
||||
fail to comply with any of its terms. However, if You become compliant,
|
||||
then the rights granted under this License from a particular Contributor
|
||||
are reinstated (a) provisionally, unless and until such Contributor
|
||||
explicitly and finally terminates Your grants, and (b) on an ongoing basis,
|
||||
if such Contributor fails to notify You of the non-compliance by some
|
||||
reasonable means prior to 60 days after You have come back into compliance.
|
||||
Moreover, Your grants from a particular Contributor are reinstated on an
|
||||
ongoing basis if such Contributor notifies You of the non-compliance by
|
||||
some reasonable means, this is the first time You have received notice of
|
||||
non-compliance with this License from such Contributor, and You become
|
||||
compliant prior to 30 days after Your receipt of the notice.
|
||||
|
||||
5.2. If You initiate litigation against any entity by asserting a patent
|
||||
infringement claim (excluding declaratory judgment actions, counter-claims,
|
||||
and cross-claims) alleging that a Contributor Version directly or
|
||||
indirectly infringes any patent, then the rights granted to You by any and
|
||||
all Contributors for the Covered Software under Section 2.1 of this License
|
||||
shall terminate.
|
||||
|
||||
5.3. In the event of termination under Sections 5.1 or 5.2 above, all end user
|
||||
license agreements (excluding distributors and resellers) which have been
|
||||
validly granted by You or Your distributors under this License prior to
|
||||
termination shall survive termination.
|
||||
|
||||
6. Disclaimer of Warranty
|
||||
|
||||
Covered Software is provided under this License on an “as is” basis, without
|
||||
warranty of any kind, either expressed, implied, or statutory, including,
|
||||
without limitation, warranties that the Covered Software is free of defects,
|
||||
merchantable, fit for a particular purpose or non-infringing. The entire
|
||||
risk as to the quality and performance of the Covered Software is with You.
|
||||
Should any Covered Software prove defective in any respect, You (not any
|
||||
Contributor) assume the cost of any necessary servicing, repair, or
|
||||
correction. This disclaimer of warranty constitutes an essential part of this
|
||||
License. No use of any Covered Software is authorized under this License
|
||||
except under this disclaimer.
|
||||
|
||||
7. Limitation of Liability
|
||||
|
||||
Under no circumstances and under no legal theory, whether tort (including
|
||||
negligence), contract, or otherwise, shall any Contributor, or anyone who
|
||||
distributes Covered Software as permitted above, be liable to You for any
|
||||
direct, indirect, special, incidental, or consequential damages of any
|
||||
character including, without limitation, damages for lost profits, loss of
|
||||
goodwill, work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses, even if such party shall have been
|
||||
informed of the possibility of such damages. This limitation of liability
|
||||
shall not apply to liability for death or personal injury resulting from such
|
||||
party’s negligence to the extent applicable law prohibits such limitation.
|
||||
Some jurisdictions do not allow the exclusion or limitation of incidental or
|
||||
consequential damages, so this exclusion and limitation may not apply to You.
|
||||
|
||||
8. Litigation
|
||||
|
||||
Any litigation relating to this License may be brought only in the courts of
|
||||
a jurisdiction where the defendant maintains its principal place of business
|
||||
and such litigation shall be governed by laws of that jurisdiction, without
|
||||
reference to its conflict-of-law provisions. Nothing in this Section shall
|
||||
prevent a party’s ability to bring cross-claims or counter-claims.
|
||||
|
||||
9. Miscellaneous
|
||||
|
||||
This License represents the complete agreement concerning the subject matter
|
||||
hereof. If any provision of this License is held to be unenforceable, such
|
||||
provision shall be reformed only to the extent necessary to make it
|
||||
enforceable. Any law or regulation which provides that the language of a
|
||||
contract shall be construed against the drafter shall not be used to construe
|
||||
this License against a Contributor.
|
||||
|
||||
|
||||
10. Versions of the License
|
||||
|
||||
10.1. New Versions
|
||||
|
||||
Mozilla Foundation is the license steward. Except as provided in Section
|
||||
10.3, no one other than the license steward has the right to modify or
|
||||
publish new versions of this License. Each version will be given a
|
||||
distinguishing version number.
|
||||
|
||||
10.2. Effect of New Versions
|
||||
|
||||
You may distribute the Covered Software under the terms of the version of
|
||||
the License under which You originally received the Covered Software, or
|
||||
under the terms of any subsequent version published by the license
|
||||
steward.
|
||||
|
||||
10.3. Modified Versions
|
||||
|
||||
If you create software not governed by this License, and you want to
|
||||
create a new license for such software, you may create and use a modified
|
||||
version of this License if you rename the license and remove any
|
||||
references to the name of the license steward (except to note that such
|
||||
modified license differs from this License).
|
||||
|
||||
10.4. Distributing Source Code Form that is Incompatible With Secondary Licenses
|
||||
If You choose to distribute Source Code Form that is Incompatible With
|
||||
Secondary Licenses under the terms of this version of the License, the
|
||||
notice described in Exhibit B of this License must be attached.
|
||||
|
||||
Exhibit A - Source Code Form License Notice
|
||||
|
||||
This Source Code Form is subject to the
|
||||
terms of the Mozilla Public License, v.
|
||||
2.0. If a copy of the MPL was not
|
||||
distributed with this file, You can
|
||||
obtain one at
|
||||
http://mozilla.org/MPL/2.0/.
|
||||
|
||||
If it is not possible or desirable to put the notice in a particular file, then
|
||||
You may include the notice in a location (such as a LICENSE file in a relevant
|
||||
directory) where a recipient would be likely to look for such a notice.
|
||||
|
||||
You may add additional accurate notices of copyright ownership.
|
||||
|
||||
Exhibit B - “Incompatible With Secondary Licenses” Notice
|
||||
|
||||
This Source Code Form is “Incompatible
|
||||
With Secondary Licenses”, as defined by
|
||||
the Mozilla Public License, v. 2.0.
|
||||
|
|
@ -0,0 +1,14 @@
|
|||
test: subnet
|
||||
go test ./...
|
||||
|
||||
integ: subnet
|
||||
INTEG_TESTS=yes go test ./...
|
||||
|
||||
subnet:
|
||||
./test/setup_subnet.sh
|
||||
|
||||
cov:
|
||||
gocov test github.com/hashicorp/memberlist | gocov-html > /tmp/coverage.html
|
||||
open /tmp/coverage.html
|
||||
|
||||
.PNONY: test cov integ
|
|
@ -0,0 +1,144 @@
|
|||
# memberlist [![GoDoc](https://godoc.org/github.com/hashicorp/memberlist?status.png)](https://godoc.org/github.com/hashicorp/memberlist)
|
||||
|
||||
memberlist is a [Go](http://www.golang.org) library that manages cluster
|
||||
membership and member failure detection using a gossip based protocol.
|
||||
|
||||
The use cases for such a library are far-reaching: all distributed systems
|
||||
require membership, and memberlist is a re-usable solution to managing
|
||||
cluster membership and node failure detection.
|
||||
|
||||
memberlist is eventually consistent but converges quickly on average.
|
||||
The speed at which it converges can be heavily tuned via various knobs
|
||||
on the protocol. Node failures are detected and network partitions are partially
|
||||
tolerated by attempting to communicate to potentially dead nodes through
|
||||
multiple routes.
|
||||
|
||||
## Building
|
||||
|
||||
If you wish to build memberlist you'll need Go version 1.2+ installed.
|
||||
|
||||
Please check your installation with:
|
||||
|
||||
```
|
||||
go version
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
Memberlist is surprisingly simple to use. An example is shown below:
|
||||
|
||||
```go
|
||||
/* Create the initial memberlist from a safe configuration.
|
||||
Please reference the godoc for other default config types.
|
||||
http://godoc.org/github.com/hashicorp/memberlist#Config
|
||||
*/
|
||||
list, err := memberlist.Create(memberlist.DefaultLocalConfig())
|
||||
if err != nil {
|
||||
panic("Failed to create memberlist: " + err.Error())
|
||||
}
|
||||
|
||||
// Join an existing cluster by specifying at least one known member.
|
||||
n, err := list.Join([]string{"1.2.3.4"})
|
||||
if err != nil {
|
||||
panic("Failed to join cluster: " + err.Error())
|
||||
}
|
||||
|
||||
// Ask for members of the cluster
|
||||
for _, member := range list.Members() {
|
||||
fmt.Printf("Member: %s %s\n", member.Name, member.Addr)
|
||||
}
|
||||
|
||||
// Continue doing whatever you need, memberlist will maintain membership
|
||||
// information in the background. Delegates can be used for receiving
|
||||
// events when members join or leave.
|
||||
```
|
||||
|
||||
The most difficult part of memberlist is configuring it since it has many
|
||||
available knobs in order to tune state propagation delay and convergence times.
|
||||
Memberlist provides a default configuration that offers a good starting point,
|
||||
but errs on the side of caution, choosing values that are optimized for
|
||||
higher convergence at the cost of higher bandwidth usage.
|
||||
|
||||
For complete documentation, see the associated [Godoc](http://godoc.org/github.com/hashicorp/memberlist).
|
||||
|
||||
## Protocol
|
||||
|
||||
memberlist is based on ["SWIM: Scalable Weakly-consistent Infection-style Process Group Membership Protocol"](http://www.cs.cornell.edu/~asdas/research/dsn02-swim.pdf),
|
||||
with a few minor adaptations, mostly to increase propagation speed and
|
||||
convergence rate.
|
||||
|
||||
A high level overview of the memberlist protocol (based on SWIM) is
|
||||
described below, but for details please read the full
|
||||
[SWIM paper](http://www.cs.cornell.edu/~asdas/research/dsn02-swim.pdf)
|
||||
followed by the memberlist source. We welcome any questions related
|
||||
to the protocol on our issue tracker.
|
||||
|
||||
### Protocol Description
|
||||
|
||||
memberlist begins by joining an existing cluster or starting a new
|
||||
cluster. If starting a new cluster, additional nodes are expected to join
|
||||
it. New nodes in an existing cluster must be given the address of at
|
||||
least one existing member in order to join the cluster. The new member
|
||||
does a full state sync with the existing member over TCP and begins gossiping its
|
||||
existence to the cluster.
|
||||
|
||||
Gossip is done over UDP with a configurable but fixed fanout and interval.
|
||||
This ensures that network usage is constant with regards to number of nodes, as opposed to
|
||||
exponential growth that can occur with traditional heartbeat mechanisms.
|
||||
Complete state exchanges with a random node are done periodically over
|
||||
TCP, but much less often than gossip messages. This increases the likelihood
|
||||
that the membership list converges properly since the full state is exchanged
|
||||
and merged. The interval between full state exchanges is configurable or can
|
||||
be disabled entirely.
|
||||
|
||||
Failure detection is done by periodic random probing using a configurable interval.
|
||||
If the node fails to ack within a reasonable time (typically some multiple
|
||||
of RTT), then an indirect probe as well as a direct TCP probe are attempted. An
|
||||
indirect probe asks a configurable number of random nodes to probe the same node,
|
||||
in case there are network issues causing our own node to fail the probe. The direct
|
||||
TCP probe is used to help identify the common situation where networking is
|
||||
misconfigured to allow TCP but not UDP. Without the TCP probe, a UDP-isolated node
|
||||
would think all other nodes were suspect and could cause churn in the cluster when
|
||||
it attempts a TCP-based state exchange with another node. It is not desirable to
|
||||
operate with only TCP connectivity because convergence will be much slower, but it
|
||||
is enabled so that memberlist can detect this situation and alert operators.
|
||||
|
||||
If both our probe, the indirect probes, and the direct TCP probe fail within a
|
||||
configurable time, then the node is marked "suspicious" and this knowledge is
|
||||
gossiped to the cluster. A suspicious node is still considered a member of
|
||||
cluster. If the suspect member of the cluster does not dispute the suspicion
|
||||
within a configurable period of time, the node is finally considered dead,
|
||||
and this state is then gossiped to the cluster.
|
||||
|
||||
This is a brief and incomplete description of the protocol. For a better idea,
|
||||
please read the
|
||||
[SWIM paper](http://www.cs.cornell.edu/~asdas/research/dsn02-swim.pdf)
|
||||
in its entirety, along with the memberlist source code.
|
||||
|
||||
### Changes from SWIM
|
||||
|
||||
As mentioned earlier, the memberlist protocol is based on SWIM but includes
|
||||
minor changes, mostly to increase propagation speed and convergence rates.
|
||||
|
||||
The changes from SWIM are noted here:
|
||||
|
||||
* memberlist does a full state sync over TCP periodically. SWIM only propagates
|
||||
changes over gossip. While both eventually reach convergence, the full state
|
||||
sync increases the likelihood that nodes are fully converged more quickly,
|
||||
at the expense of more bandwidth usage. This feature can be totally disabled
|
||||
if you wish.
|
||||
|
||||
* memberlist has a dedicated gossip layer separate from the failure detection
|
||||
protocol. SWIM only piggybacks gossip messages on top of probe/ack messages.
|
||||
memberlist also piggybacks gossip messages on top of probe/ack messages, but
|
||||
also will periodically send out dedicated gossip messages on their own. This
|
||||
feature lets you have a higher gossip rate (for example once per 200ms)
|
||||
and a slower failure detection rate (such as once per second), resulting
|
||||
in overall faster convergence rates and data propagation speeds. This feature
|
||||
can be totally disabed as well, if you wish.
|
||||
|
||||
* memberlist stores around the state of dead nodes for a set amount of time,
|
||||
so that when full syncs are requested, the requester also receives information
|
||||
about dead nodes. Because SWIM doesn't do full syncs, SWIM deletes dead node
|
||||
state immediately upon learning that the node is dead. This change again helps
|
||||
the cluster converge more quickly.
|
|
@ -0,0 +1,14 @@
|
|||
package memberlist
|
||||
|
||||
// AliveDelegate is used to involve a client in processing
|
||||
// a node "alive" message. When a node joins, either through
|
||||
// a UDP gossip or TCP push/pull, we update the state of
|
||||
// that node via an alive message. This can be used to filter
|
||||
// a node out and prevent it from being considered a peer
|
||||
// using application specific logic.
|
||||
type AliveDelegate interface {
|
||||
// NotifyMerge is invoked when a merge could take place.
|
||||
// Provides a list of the nodes known by the peer. If
|
||||
// the return value is non-nil, the merge is canceled.
|
||||
NotifyAlive(peer *Node) error
|
||||
}
|
|
@ -0,0 +1,69 @@
|
|||
package memberlist
|
||||
|
||||
import (
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/armon/go-metrics"
|
||||
)
|
||||
|
||||
// awareness manages a simple metric for tracking the estimated health of the
|
||||
// local node. Health is primary the node's ability to respond in the soft
|
||||
// real-time manner required for correct health checking of other nodes in the
|
||||
// cluster.
|
||||
type awareness struct {
|
||||
sync.RWMutex
|
||||
|
||||
// max is the upper threshold for the timeout scale (the score will be
|
||||
// constrained to be from 0 <= score < max).
|
||||
max int
|
||||
|
||||
// score is the current awareness score. Lower values are healthier and
|
||||
// zero is the minimum value.
|
||||
score int
|
||||
}
|
||||
|
||||
// newAwareness returns a new awareness object.
|
||||
func newAwareness(max int) *awareness {
|
||||
return &awareness{
|
||||
max: max,
|
||||
score: 0,
|
||||
}
|
||||
}
|
||||
|
||||
// ApplyDelta takes the given delta and applies it to the score in a thread-safe
|
||||
// manner. It also enforces a floor of zero and a max of max, so deltas may not
|
||||
// change the overall score if it's railed at one of the extremes.
|
||||
func (a *awareness) ApplyDelta(delta int) {
|
||||
a.Lock()
|
||||
initial := a.score
|
||||
a.score += delta
|
||||
if a.score < 0 {
|
||||
a.score = 0
|
||||
} else if a.score > (a.max - 1) {
|
||||
a.score = (a.max - 1)
|
||||
}
|
||||
final := a.score
|
||||
a.Unlock()
|
||||
|
||||
if initial != final {
|
||||
metrics.SetGauge([]string{"memberlist", "health", "score"}, float32(final))
|
||||
}
|
||||
}
|
||||
|
||||
// GetHealthScore returns the raw health score.
|
||||
func (a *awareness) GetHealthScore() int {
|
||||
a.RLock()
|
||||
score := a.score
|
||||
a.RUnlock()
|
||||
return score
|
||||
}
|
||||
|
||||
// ScaleTimeout takes the given duration and scales it based on the current
|
||||
// score. Less healthyness will lead to longer timeouts.
|
||||
func (a *awareness) ScaleTimeout(timeout time.Duration) time.Duration {
|
||||
a.RLock()
|
||||
score := a.score
|
||||
a.RUnlock()
|
||||
return timeout * (time.Duration(score) + 1)
|
||||
}
|
|
@ -0,0 +1,100 @@
|
|||
package memberlist
|
||||
|
||||
/*
|
||||
The broadcast mechanism works by maintaining a sorted list of messages to be
|
||||
sent out. When a message is to be broadcast, the retransmit count
|
||||
is set to zero and appended to the queue. The retransmit count serves
|
||||
as the "priority", ensuring that newer messages get sent first. Once
|
||||
a message hits the retransmit limit, it is removed from the queue.
|
||||
|
||||
Additionally, older entries can be invalidated by new messages that
|
||||
are contradictory. For example, if we send "{suspect M1 inc: 1},
|
||||
then a following {alive M1 inc: 2} will invalidate that message
|
||||
*/
|
||||
|
||||
type memberlistBroadcast struct {
|
||||
node string
|
||||
msg []byte
|
||||
notify chan struct{}
|
||||
}
|
||||
|
||||
func (b *memberlistBroadcast) Invalidates(other Broadcast) bool {
|
||||
// Check if that broadcast is a memberlist type
|
||||
mb, ok := other.(*memberlistBroadcast)
|
||||
if !ok {
|
||||
return false
|
||||
}
|
||||
|
||||
// Invalidates any message about the same node
|
||||
return b.node == mb.node
|
||||
}
|
||||
|
||||
func (b *memberlistBroadcast) Message() []byte {
|
||||
return b.msg
|
||||
}
|
||||
|
||||
func (b *memberlistBroadcast) Finished() {
|
||||
select {
|
||||
case b.notify <- struct{}{}:
|
||||
default:
|
||||
}
|
||||
}
|
||||
|
||||
// encodeAndBroadcast encodes a message and enqueues it for broadcast. Fails
|
||||
// silently if there is an encoding error.
|
||||
func (m *Memberlist) encodeAndBroadcast(node string, msgType messageType, msg interface{}) {
|
||||
m.encodeBroadcastNotify(node, msgType, msg, nil)
|
||||
}
|
||||
|
||||
// encodeBroadcastNotify encodes a message and enqueues it for broadcast
|
||||
// and notifies the given channel when transmission is finished. Fails
|
||||
// silently if there is an encoding error.
|
||||
func (m *Memberlist) encodeBroadcastNotify(node string, msgType messageType, msg interface{}, notify chan struct{}) {
|
||||
buf, err := encode(msgType, msg)
|
||||
if err != nil {
|
||||
m.logger.Printf("[ERR] memberlist: Failed to encode message for broadcast: %s", err)
|
||||
} else {
|
||||
m.queueBroadcast(node, buf.Bytes(), notify)
|
||||
}
|
||||
}
|
||||
|
||||
// queueBroadcast is used to start dissemination of a message. It will be
|
||||
// sent up to a configured number of times. The message could potentially
|
||||
// be invalidated by a future message about the same node
|
||||
func (m *Memberlist) queueBroadcast(node string, msg []byte, notify chan struct{}) {
|
||||
b := &memberlistBroadcast{node, msg, notify}
|
||||
m.broadcasts.QueueBroadcast(b)
|
||||
}
|
||||
|
||||
// getBroadcasts is used to return a slice of broadcasts to send up to
|
||||
// a maximum byte size, while imposing a per-broadcast overhead. This is used
|
||||
// to fill a UDP packet with piggybacked data
|
||||
func (m *Memberlist) getBroadcasts(overhead, limit int) [][]byte {
|
||||
// Get memberlist messages first
|
||||
toSend := m.broadcasts.GetBroadcasts(overhead, limit)
|
||||
|
||||
// Check if the user has anything to broadcast
|
||||
d := m.config.Delegate
|
||||
if d != nil {
|
||||
// Determine the bytes used already
|
||||
bytesUsed := 0
|
||||
for _, msg := range toSend {
|
||||
bytesUsed += len(msg) + overhead
|
||||
}
|
||||
|
||||
// Check space remaining for user messages
|
||||
avail := limit - bytesUsed
|
||||
if avail > overhead+userMsgOverhead {
|
||||
userMsgs := d.GetBroadcasts(overhead+userMsgOverhead, avail)
|
||||
|
||||
// Frame each user message
|
||||
for _, msg := range userMsgs {
|
||||
buf := make([]byte, 1, len(msg)+1)
|
||||
buf[0] = byte(userMsg)
|
||||
buf = append(buf, msg...)
|
||||
toSend = append(toSend, buf)
|
||||
}
|
||||
}
|
||||
}
|
||||
return toSend
|
||||
}
|
|
@ -0,0 +1,277 @@
|
|||
package memberlist
|
||||
|
||||
import (
|
||||
"io"
|
||||
"log"
|
||||
"os"
|
||||
"time"
|
||||
)
|
||||
|
||||
type Config struct {
|
||||
// The name of this node. This must be unique in the cluster.
|
||||
Name string
|
||||
|
||||
// Configuration related to what address to bind to and ports to
|
||||
// listen on. The port is used for both UDP and TCP gossip.
|
||||
// It is assumed other nodes are running on this port, but they
|
||||
// do not need to.
|
||||
BindAddr string
|
||||
BindPort int
|
||||
|
||||
// Configuration related to what address to advertise to other
|
||||
// cluster members. Used for nat traversal.
|
||||
AdvertiseAddr string
|
||||
AdvertisePort int
|
||||
|
||||
// ProtocolVersion is the configured protocol version that we
|
||||
// will _speak_. This must be between ProtocolVersionMin and
|
||||
// ProtocolVersionMax.
|
||||
ProtocolVersion uint8
|
||||
|
||||
// TCPTimeout is the timeout for establishing a TCP connection with
|
||||
// a remote node for a full state sync.
|
||||
TCPTimeout time.Duration
|
||||
|
||||
// IndirectChecks is the number of nodes that will be asked to perform
|
||||
// an indirect probe of a node in the case a direct probe fails. Memberlist
|
||||
// waits for an ack from any single indirect node, so increasing this
|
||||
// number will increase the likelihood that an indirect probe will succeed
|
||||
// at the expense of bandwidth.
|
||||
IndirectChecks int
|
||||
|
||||
// RetransmitMult is the multiplier for the number of retransmissions
|
||||
// that are attempted for messages broadcasted over gossip. The actual
|
||||
// count of retransmissions is calculated using the formula:
|
||||
//
|
||||
// Retransmits = RetransmitMult * log(N+1)
|
||||
//
|
||||
// This allows the retransmits to scale properly with cluster size. The
|
||||
// higher the multiplier, the more likely a failed broadcast is to converge
|
||||
// at the expense of increased bandwidth.
|
||||
RetransmitMult int
|
||||
|
||||
// SuspicionMult is the multiplier for determining the time an
|
||||
// inaccessible node is considered suspect before declaring it dead.
|
||||
// The actual timeout is calculated using the formula:
|
||||
//
|
||||
// SuspicionTimeout = SuspicionMult * log(N+1) * ProbeInterval
|
||||
//
|
||||
// This allows the timeout to scale properly with expected propagation
|
||||
// delay with a larger cluster size. The higher the multiplier, the longer
|
||||
// an inaccessible node is considered part of the cluster before declaring
|
||||
// it dead, giving that suspect node more time to refute if it is indeed
|
||||
// still alive.
|
||||
SuspicionMult int
|
||||
|
||||
// SuspicionMaxTimeoutMult is the multiplier applied to the
|
||||
// SuspicionTimeout used as an upper bound on detection time. This max
|
||||
// timeout is calculated using the formula:
|
||||
//
|
||||
// SuspicionMaxTimeout = SuspicionMaxTimeoutMult * SuspicionTimeout
|
||||
//
|
||||
// If everything is working properly, confirmations from other nodes will
|
||||
// accelerate suspicion timers in a manner which will cause the timeout
|
||||
// to reach the base SuspicionTimeout before that elapses, so this value
|
||||
// will typically only come into play if a node is experiencing issues
|
||||
// communicating with other nodes. It should be set to a something fairly
|
||||
// large so that a node having problems will have a lot of chances to
|
||||
// recover before falsely declaring other nodes as failed, but short
|
||||
// enough for a legitimately isolated node to still make progress marking
|
||||
// nodes failed in a reasonable amount of time.
|
||||
SuspicionMaxTimeoutMult int
|
||||
|
||||
// PushPullInterval is the interval between complete state syncs.
|
||||
// Complete state syncs are done with a single node over TCP and are
|
||||
// quite expensive relative to standard gossiped messages. Setting this
|
||||
// to zero will disable state push/pull syncs completely.
|
||||
//
|
||||
// Setting this interval lower (more frequent) will increase convergence
|
||||
// speeds across larger clusters at the expense of increased bandwidth
|
||||
// usage.
|
||||
PushPullInterval time.Duration
|
||||
|
||||
// ProbeInterval and ProbeTimeout are used to configure probing
|
||||
// behavior for memberlist.
|
||||
//
|
||||
// ProbeInterval is the interval between random node probes. Setting
|
||||
// this lower (more frequent) will cause the memberlist cluster to detect
|
||||
// failed nodes more quickly at the expense of increased bandwidth usage.
|
||||
//
|
||||
// ProbeTimeout is the timeout to wait for an ack from a probed node
|
||||
// before assuming it is unhealthy. This should be set to 99-percentile
|
||||
// of RTT (round-trip time) on your network.
|
||||
ProbeInterval time.Duration
|
||||
ProbeTimeout time.Duration
|
||||
|
||||
// DisableTcpPings will turn off the fallback TCP pings that are attempted
|
||||
// if the direct UDP ping fails. These get pipelined along with the
|
||||
// indirect UDP pings.
|
||||
DisableTcpPings bool
|
||||
|
||||
// AwarenessMaxMultiplier will increase the probe interval if the node
|
||||
// becomes aware that it might be degraded and not meeting the soft real
|
||||
// time requirements to reliably probe other nodes.
|
||||
AwarenessMaxMultiplier int
|
||||
|
||||
// GossipInterval and GossipNodes are used to configure the gossip
|
||||
// behavior of memberlist.
|
||||
//
|
||||
// GossipInterval is the interval between sending messages that need
|
||||
// to be gossiped that haven't been able to piggyback on probing messages.
|
||||
// If this is set to zero, non-piggyback gossip is disabled. By lowering
|
||||
// this value (more frequent) gossip messages are propagated across
|
||||
// the cluster more quickly at the expense of increased bandwidth.
|
||||
//
|
||||
// GossipNodes is the number of random nodes to send gossip messages to
|
||||
// per GossipInterval. Increasing this number causes the gossip messages
|
||||
// to propagate across the cluster more quickly at the expense of
|
||||
// increased bandwidth.
|
||||
//
|
||||
// GossipToTheDeadTime is the interval after which a node has died that
|
||||
// we will still try to gossip to it. This gives it a chance to refute.
|
||||
GossipInterval time.Duration
|
||||
GossipNodes int
|
||||
GossipToTheDeadTime time.Duration
|
||||
|
||||
// EnableCompression is used to control message compression. This can
|
||||
// be used to reduce bandwidth usage at the cost of slightly more CPU
|
||||
// utilization. This is only available starting at protocol version 1.
|
||||
EnableCompression bool
|
||||
|
||||
// SecretKey is used to initialize the primary encryption key in a keyring.
|
||||
// The primary encryption key is the only key used to encrypt messages and
|
||||
// the first key used while attempting to decrypt messages. Providing a
|
||||
// value for this primary key will enable message-level encryption and
|
||||
// verification, and automatically install the key onto the keyring.
|
||||
// The value should be either 16, 24, or 32 bytes to select AES-128,
|
||||
// AES-192, or AES-256.
|
||||
SecretKey []byte
|
||||
|
||||
// The keyring holds all of the encryption keys used internally. It is
|
||||
// automatically initialized using the SecretKey and SecretKeys values.
|
||||
Keyring *Keyring
|
||||
|
||||
// Delegate and Events are delegates for receiving and providing
|
||||
// data to memberlist via callback mechanisms. For Delegate, see
|
||||
// the Delegate interface. For Events, see the EventDelegate interface.
|
||||
//
|
||||
// The DelegateProtocolMin/Max are used to guarantee protocol-compatibility
|
||||
// for any custom messages that the delegate might do (broadcasts,
|
||||
// local/remote state, etc.). If you don't set these, then the protocol
|
||||
// versions will just be zero, and version compliance won't be done.
|
||||
Delegate Delegate
|
||||
DelegateProtocolVersion uint8
|
||||
DelegateProtocolMin uint8
|
||||
DelegateProtocolMax uint8
|
||||
Events EventDelegate
|
||||
Conflict ConflictDelegate
|
||||
Merge MergeDelegate
|
||||
Ping PingDelegate
|
||||
Alive AliveDelegate
|
||||
|
||||
// DNSConfigPath points to the system's DNS config file, usually located
|
||||
// at /etc/resolv.conf. It can be overridden via config for easier testing.
|
||||
DNSConfigPath string
|
||||
|
||||
// LogOutput is the writer where logs should be sent. If this is not
|
||||
// set, logging will go to stderr by default. You cannot specify both LogOutput
|
||||
// and Logger at the same time.
|
||||
LogOutput io.Writer
|
||||
|
||||
// Logger is a custom logger which you provide. If Logger is set, it will use
|
||||
// this for the internal logger. If Logger is not set, it will fall back to the
|
||||
// behavior for using LogOutput. You cannot specify both LogOutput and Logger
|
||||
// at the same time.
|
||||
Logger *log.Logger
|
||||
|
||||
// Size of Memberlist's internal channel which handles UDP messages. The
|
||||
// size of this determines the size of the queue which Memberlist will keep
|
||||
// while UDP messages are handled.
|
||||
HandoffQueueDepth int
|
||||
|
||||
// Maximum number of bytes that memberlist expects UDP messages to be. A safe
|
||||
// value for this is typically 1400 bytes (which is the default.) However,
|
||||
// depending on your network's MTU (Maximum Transmission Unit) you may be able
|
||||
// to increase this.
|
||||
UDPBufferSize int
|
||||
}
|
||||
|
||||
// DefaultLANConfig returns a sane set of configurations for Memberlist.
|
||||
// It uses the hostname as the node name, and otherwise sets very conservative
|
||||
// values that are sane for most LAN environments. The default configuration
|
||||
// errs on the side of caution, choosing values that are optimized
|
||||
// for higher convergence at the cost of higher bandwidth usage. Regardless,
|
||||
// these values are a good starting point when getting started with memberlist.
|
||||
func DefaultLANConfig() *Config {
|
||||
hostname, _ := os.Hostname()
|
||||
return &Config{
|
||||
Name: hostname,
|
||||
BindAddr: "0.0.0.0",
|
||||
BindPort: 7946,
|
||||
AdvertiseAddr: "",
|
||||
AdvertisePort: 7946,
|
||||
ProtocolVersion: ProtocolVersion2Compatible,
|
||||
TCPTimeout: 10 * time.Second, // Timeout after 10 seconds
|
||||
IndirectChecks: 3, // Use 3 nodes for the indirect ping
|
||||
RetransmitMult: 4, // Retransmit a message 4 * log(N+1) nodes
|
||||
SuspicionMult: 5, // Suspect a node for 5 * log(N+1) * Interval
|
||||
SuspicionMaxTimeoutMult: 6, // For 10k nodes this will give a max timeout of 120 seconds
|
||||
PushPullInterval: 30 * time.Second, // Low frequency
|
||||
ProbeTimeout: 500 * time.Millisecond, // Reasonable RTT time for LAN
|
||||
ProbeInterval: 1 * time.Second, // Failure check every second
|
||||
DisableTcpPings: false, // TCP pings are safe, even with mixed versions
|
||||
AwarenessMaxMultiplier: 8, // Probe interval backs off to 8 seconds
|
||||
|
||||
GossipNodes: 3, // Gossip to 3 nodes
|
||||
GossipInterval: 200 * time.Millisecond, // Gossip more rapidly
|
||||
GossipToTheDeadTime: 30 * time.Second, // Same as push/pull
|
||||
|
||||
EnableCompression: true, // Enable compression by default
|
||||
|
||||
SecretKey: nil,
|
||||
Keyring: nil,
|
||||
|
||||
DNSConfigPath: "/etc/resolv.conf",
|
||||
|
||||
HandoffQueueDepth: 1024,
|
||||
UDPBufferSize: 1400,
|
||||
}
|
||||
}
|
||||
|
||||
// DefaultWANConfig works like DefaultConfig, however it returns a configuration
|
||||
// that is optimized for most WAN environments. The default configuration is
|
||||
// still very conservative and errs on the side of caution.
|
||||
func DefaultWANConfig() *Config {
|
||||
conf := DefaultLANConfig()
|
||||
conf.TCPTimeout = 30 * time.Second
|
||||
conf.SuspicionMult = 6
|
||||
conf.PushPullInterval = 60 * time.Second
|
||||
conf.ProbeTimeout = 3 * time.Second
|
||||
conf.ProbeInterval = 5 * time.Second
|
||||
conf.GossipNodes = 4 // Gossip less frequently, but to an additional node
|
||||
conf.GossipInterval = 500 * time.Millisecond
|
||||
conf.GossipToTheDeadTime = 60 * time.Second
|
||||
return conf
|
||||
}
|
||||
|
||||
// DefaultLocalConfig works like DefaultConfig, however it returns a configuration
|
||||
// that is optimized for a local loopback environments. The default configuration is
|
||||
// still very conservative and errs on the side of caution.
|
||||
func DefaultLocalConfig() *Config {
|
||||
conf := DefaultLANConfig()
|
||||
conf.TCPTimeout = time.Second
|
||||
conf.IndirectChecks = 1
|
||||
conf.RetransmitMult = 2
|
||||
conf.SuspicionMult = 3
|
||||
conf.PushPullInterval = 15 * time.Second
|
||||
conf.ProbeTimeout = 200 * time.Millisecond
|
||||
conf.ProbeInterval = time.Second
|
||||
conf.GossipInterval = 100 * time.Millisecond
|
||||
conf.GossipToTheDeadTime = 15 * time.Second
|
||||
return conf
|
||||
}
|
||||
|
||||
// Returns whether or not encryption is enabled
|
||||
func (c *Config) EncryptionEnabled() bool {
|
||||
return c.Keyring != nil && len(c.Keyring.GetKeys()) > 0
|
||||
}
|
|
@ -0,0 +1,10 @@
|
|||
package memberlist
|
||||
|
||||
// ConflictDelegate is a used to inform a client that
|
||||
// a node has attempted to join which would result in a
|
||||
// name conflict. This happens if two clients are configured
|
||||
// with the same name but different addresses.
|
||||
type ConflictDelegate interface {
|
||||
// NotifyConflict is invoked when a name conflict is detected
|
||||
NotifyConflict(existing, other *Node)
|
||||
}
|
|
@ -0,0 +1,37 @@
|
|||
package memberlist
|
||||
|
||||
// Delegate is the interface that clients must implement if they want to hook
|
||||
// into the gossip layer of Memberlist. All the methods must be thread-safe,
|
||||
// as they can and generally will be called concurrently.
|
||||
type Delegate interface {
|
||||
// NodeMeta is used to retrieve meta-data about the current node
|
||||
// when broadcasting an alive message. It's length is limited to
|
||||
// the given byte size. This metadata is available in the Node structure.
|
||||
NodeMeta(limit int) []byte
|
||||
|
||||
// NotifyMsg is called when a user-data message is received.
|
||||
// Care should be taken that this method does not block, since doing
|
||||
// so would block the entire UDP packet receive loop. Additionally, the byte
|
||||
// slice may be modified after the call returns, so it should be copied if needed.
|
||||
NotifyMsg([]byte)
|
||||
|
||||
// GetBroadcasts is called when user data messages can be broadcast.
|
||||
// It can return a list of buffers to send. Each buffer should assume an
|
||||
// overhead as provided with a limit on the total byte size allowed.
|
||||
// The total byte size of the resulting data to send must not exceed
|
||||
// the limit. Care should be taken that this method does not block,
|
||||
// since doing so would block the entire UDP packet receive loop.
|
||||
GetBroadcasts(overhead, limit int) [][]byte
|
||||
|
||||
// LocalState is used for a TCP Push/Pull. This is sent to
|
||||
// the remote side in addition to the membership information. Any
|
||||
// data can be sent here. See MergeRemoteState as well. The `join`
|
||||
// boolean indicates this is for a join instead of a push/pull.
|
||||
LocalState(join bool) []byte
|
||||
|
||||
// MergeRemoteState is invoked after a TCP Push/Pull. This is the
|
||||
// state received from the remote side and is the result of the
|
||||
// remote side's LocalState call. The 'join'
|
||||
// boolean indicates this is for a join instead of a push/pull.
|
||||
MergeRemoteState(buf []byte, join bool)
|
||||
}
|
|
@ -0,0 +1,61 @@
|
|||
package memberlist
|
||||
|
||||
// EventDelegate is a simpler delegate that is used only to receive
|
||||
// notifications about members joining and leaving. The methods in this
|
||||
// delegate may be called by multiple goroutines, but never concurrently.
|
||||
// This allows you to reason about ordering.
|
||||
type EventDelegate interface {
|
||||
// NotifyJoin is invoked when a node is detected to have joined.
|
||||
// The Node argument must not be modified.
|
||||
NotifyJoin(*Node)
|
||||
|
||||
// NotifyLeave is invoked when a node is detected to have left.
|
||||
// The Node argument must not be modified.
|
||||
NotifyLeave(*Node)
|
||||
|
||||
// NotifyUpdate is invoked when a node is detected to have
|
||||
// updated, usually involving the meta data. The Node argument
|
||||
// must not be modified.
|
||||
NotifyUpdate(*Node)
|
||||
}
|
||||
|
||||
// ChannelEventDelegate is used to enable an application to receive
|
||||
// events about joins and leaves over a channel instead of a direct
|
||||
// function call.
|
||||
//
|
||||
// Care must be taken that events are processed in a timely manner from
|
||||
// the channel, since this delegate will block until an event can be sent.
|
||||
type ChannelEventDelegate struct {
|
||||
Ch chan<- NodeEvent
|
||||
}
|
||||
|
||||
// NodeEventType are the types of events that can be sent from the
|
||||
// ChannelEventDelegate.
|
||||
type NodeEventType int
|
||||
|
||||
const (
|
||||
NodeJoin NodeEventType = iota
|
||||
NodeLeave
|
||||
NodeUpdate
|
||||
)
|
||||
|
||||
// NodeEvent is a single event related to node activity in the memberlist.
|
||||
// The Node member of this struct must not be directly modified. It is passed
|
||||
// as a pointer to avoid unnecessary copies. If you wish to modify the node,
|
||||
// make a copy first.
|
||||
type NodeEvent struct {
|
||||
Event NodeEventType
|
||||
Node *Node
|
||||
}
|
||||
|
||||
func (c *ChannelEventDelegate) NotifyJoin(n *Node) {
|
||||
c.Ch <- NodeEvent{NodeJoin, n}
|
||||
}
|
||||
|
||||
func (c *ChannelEventDelegate) NotifyLeave(n *Node) {
|
||||
c.Ch <- NodeEvent{NodeLeave, n}
|
||||
}
|
||||
|
||||
func (c *ChannelEventDelegate) NotifyUpdate(n *Node) {
|
||||
c.Ch <- NodeEvent{NodeUpdate, n}
|
||||
}
|
|
@ -0,0 +1,160 @@
|
|||
package memberlist
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"sync"
|
||||
)
|
||||
|
||||
type Keyring struct {
|
||||
// Keys stores the key data used during encryption and decryption. It is
|
||||
// ordered in such a way where the first key (index 0) is the primary key,
|
||||
// which is used for encrypting messages, and is the first key tried during
|
||||
// message decryption.
|
||||
keys [][]byte
|
||||
|
||||
// The keyring lock is used while performing IO operations on the keyring.
|
||||
l sync.Mutex
|
||||
}
|
||||
|
||||
// Init allocates substructures
|
||||
func (k *Keyring) init() {
|
||||
k.keys = make([][]byte, 0)
|
||||
}
|
||||
|
||||
// NewKeyring constructs a new container for a set of encryption keys. The
|
||||
// keyring contains all key data used internally by memberlist.
|
||||
//
|
||||
// While creating a new keyring, you must do one of:
|
||||
// - Omit keys and primary key, effectively disabling encryption
|
||||
// - Pass a set of keys plus the primary key
|
||||
// - Pass only a primary key
|
||||
//
|
||||
// If only a primary key is passed, then it will be automatically added to the
|
||||
// keyring. If creating a keyring with multiple keys, one key must be designated
|
||||
// primary by passing it as the primaryKey. If the primaryKey does not exist in
|
||||
// the list of secondary keys, it will be automatically added at position 0.
|
||||
//
|
||||
// A key should be either 16, 24, or 32 bytes to select AES-128,
|
||||
// AES-192, or AES-256.
|
||||
func NewKeyring(keys [][]byte, primaryKey []byte) (*Keyring, error) {
|
||||
keyring := &Keyring{}
|
||||
keyring.init()
|
||||
|
||||
if len(keys) > 0 || len(primaryKey) > 0 {
|
||||
if len(primaryKey) == 0 {
|
||||
return nil, fmt.Errorf("Empty primary key not allowed")
|
||||
}
|
||||
if err := keyring.AddKey(primaryKey); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
for _, key := range keys {
|
||||
if err := keyring.AddKey(key); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return keyring, nil
|
||||
}
|
||||
|
||||
// ValidateKey will check to see if the key is valid and returns an error if not.
|
||||
//
|
||||
// key should be either 16, 24, or 32 bytes to select AES-128,
|
||||
// AES-192, or AES-256.
|
||||
func ValidateKey(key []byte) error {
|
||||
if l := len(key); l != 16 && l != 24 && l != 32 {
|
||||
return fmt.Errorf("key size must be 16, 24 or 32 bytes")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// AddKey will install a new key on the ring. Adding a key to the ring will make
|
||||
// it available for use in decryption. If the key already exists on the ring,
|
||||
// this function will just return noop.
|
||||
//
|
||||
// key should be either 16, 24, or 32 bytes to select AES-128,
|
||||
// AES-192, or AES-256.
|
||||
func (k *Keyring) AddKey(key []byte) error {
|
||||
if err := ValidateKey(key); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// No-op if key is already installed
|
||||
for _, installedKey := range k.keys {
|
||||
if bytes.Equal(installedKey, key) {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
keys := append(k.keys, key)
|
||||
primaryKey := k.GetPrimaryKey()
|
||||
if primaryKey == nil {
|
||||
primaryKey = key
|
||||
}
|
||||
k.installKeys(keys, primaryKey)
|
||||
return nil
|
||||
}
|
||||
|
||||
// UseKey changes the key used to encrypt messages. This is the only key used to
|
||||
// encrypt messages, so peers should know this key before this method is called.
|
||||
func (k *Keyring) UseKey(key []byte) error {
|
||||
for _, installedKey := range k.keys {
|
||||
if bytes.Equal(key, installedKey) {
|
||||
k.installKeys(k.keys, key)
|
||||
return nil
|
||||
}
|
||||
}
|
||||
return fmt.Errorf("Requested key is not in the keyring")
|
||||
}
|
||||
|
||||
// RemoveKey drops a key from the keyring. This will return an error if the key
|
||||
// requested for removal is currently at position 0 (primary key).
|
||||
func (k *Keyring) RemoveKey(key []byte) error {
|
||||
if bytes.Equal(key, k.keys[0]) {
|
||||
return fmt.Errorf("Removing the primary key is not allowed")
|
||||
}
|
||||
for i, installedKey := range k.keys {
|
||||
if bytes.Equal(key, installedKey) {
|
||||
keys := append(k.keys[:i], k.keys[i+1:]...)
|
||||
k.installKeys(keys, k.keys[0])
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// installKeys will take out a lock on the keyring, and replace the keys with a
|
||||
// new set of keys. The key indicated by primaryKey will be installed as the new
|
||||
// primary key.
|
||||
func (k *Keyring) installKeys(keys [][]byte, primaryKey []byte) {
|
||||
k.l.Lock()
|
||||
defer k.l.Unlock()
|
||||
|
||||
newKeys := [][]byte{primaryKey}
|
||||
for _, key := range keys {
|
||||
if !bytes.Equal(key, primaryKey) {
|
||||
newKeys = append(newKeys, key)
|
||||
}
|
||||
}
|
||||
k.keys = newKeys
|
||||
}
|
||||
|
||||
// GetKeys returns the current set of keys on the ring.
|
||||
func (k *Keyring) GetKeys() [][]byte {
|
||||
k.l.Lock()
|
||||
defer k.l.Unlock()
|
||||
|
||||
return k.keys
|
||||
}
|
||||
|
||||
// GetPrimaryKey returns the key on the ring at position 0. This is the key used
|
||||
// for encrypting messages, and is the first key tried for decrypting messages.
|
||||
func (k *Keyring) GetPrimaryKey() (key []byte) {
|
||||
k.l.Lock()
|
||||
defer k.l.Unlock()
|
||||
|
||||
if len(k.keys) > 0 {
|
||||
key = k.keys[0]
|
||||
}
|
||||
return
|
||||
}
|
|
@ -0,0 +1,22 @@
|
|||
package memberlist
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net"
|
||||
)
|
||||
|
||||
func LogAddress(addr net.Addr) string {
|
||||
if addr == nil {
|
||||
return "from=<unknown address>"
|
||||
}
|
||||
|
||||
return fmt.Sprintf("from=%s", addr.String())
|
||||
}
|
||||
|
||||
func LogConn(conn net.Conn) string {
|
||||
if conn == nil {
|
||||
return LogAddress(nil)
|
||||
}
|
||||
|
||||
return LogAddress(conn.RemoteAddr())
|
||||
}
|
|
@ -0,0 +1,660 @@
|
|||
/*
|
||||
memberlist is a library that manages cluster
|
||||
membership and member failure detection using a gossip based protocol.
|
||||
|
||||
The use cases for such a library are far-reaching: all distributed systems
|
||||
require membership, and memberlist is a re-usable solution to managing
|
||||
cluster membership and node failure detection.
|
||||
|
||||
memberlist is eventually consistent but converges quickly on average.
|
||||
The speed at which it converges can be heavily tuned via various knobs
|
||||
on the protocol. Node failures are detected and network partitions are partially
|
||||
tolerated by attempting to communicate to potentially dead nodes through
|
||||
multiple routes.
|
||||
*/
|
||||
package memberlist
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log"
|
||||
"net"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/go-multierror"
|
||||
sockaddr "github.com/hashicorp/go-sockaddr"
|
||||
"github.com/miekg/dns"
|
||||
)
|
||||
|
||||
type Memberlist struct {
|
||||
sequenceNum uint32 // Local sequence number
|
||||
incarnation uint32 // Local incarnation number
|
||||
numNodes uint32 // Number of known nodes (estimate)
|
||||
|
||||
config *Config
|
||||
shutdown bool
|
||||
shutdownCh chan struct{}
|
||||
leave bool
|
||||
leaveBroadcast chan struct{}
|
||||
|
||||
udpListener *net.UDPConn
|
||||
tcpListener *net.TCPListener
|
||||
handoff chan msgHandoff
|
||||
|
||||
nodeLock sync.RWMutex
|
||||
nodes []*nodeState // Known nodes
|
||||
nodeMap map[string]*nodeState // Maps Addr.String() -> NodeState
|
||||
nodeTimers map[string]*suspicion // Maps Addr.String() -> suspicion timer
|
||||
awareness *awareness
|
||||
|
||||
tickerLock sync.Mutex
|
||||
tickers []*time.Ticker
|
||||
stopTick chan struct{}
|
||||
probeIndex int
|
||||
|
||||
ackLock sync.Mutex
|
||||
ackHandlers map[uint32]*ackHandler
|
||||
|
||||
broadcasts *TransmitLimitedQueue
|
||||
|
||||
logger *log.Logger
|
||||
}
|
||||
|
||||
// newMemberlist creates the network listeners.
|
||||
// Does not schedule execution of background maintenance.
|
||||
func newMemberlist(conf *Config) (*Memberlist, error) {
|
||||
if conf.ProtocolVersion < ProtocolVersionMin {
|
||||
return nil, fmt.Errorf("Protocol version '%d' too low. Must be in range: [%d, %d]",
|
||||
conf.ProtocolVersion, ProtocolVersionMin, ProtocolVersionMax)
|
||||
} else if conf.ProtocolVersion > ProtocolVersionMax {
|
||||
return nil, fmt.Errorf("Protocol version '%d' too high. Must be in range: [%d, %d]",
|
||||
conf.ProtocolVersion, ProtocolVersionMin, ProtocolVersionMax)
|
||||
}
|
||||
|
||||
if len(conf.SecretKey) > 0 {
|
||||
if conf.Keyring == nil {
|
||||
keyring, err := NewKeyring(nil, conf.SecretKey)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
conf.Keyring = keyring
|
||||
} else {
|
||||
if err := conf.Keyring.AddKey(conf.SecretKey); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err := conf.Keyring.UseKey(conf.SecretKey); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
tcpAddr := &net.TCPAddr{IP: net.ParseIP(conf.BindAddr), Port: conf.BindPort}
|
||||
tcpLn, err := net.ListenTCP("tcp", tcpAddr)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("Failed to start TCP listener. Err: %s", err)
|
||||
}
|
||||
if conf.BindPort == 0 {
|
||||
conf.BindPort = tcpLn.Addr().(*net.TCPAddr).Port
|
||||
}
|
||||
|
||||
udpAddr := &net.UDPAddr{IP: net.ParseIP(conf.BindAddr), Port: conf.BindPort}
|
||||
udpLn, err := net.ListenUDP("udp", udpAddr)
|
||||
if err != nil {
|
||||
tcpLn.Close()
|
||||
return nil, fmt.Errorf("Failed to start UDP listener. Err: %s", err)
|
||||
}
|
||||
|
||||
// Set the UDP receive window size
|
||||
setUDPRecvBuf(udpLn)
|
||||
|
||||
if conf.LogOutput != nil && conf.Logger != nil {
|
||||
return nil, fmt.Errorf("Cannot specify both LogOutput and Logger. Please choose a single log configuration setting.")
|
||||
}
|
||||
|
||||
logDest := conf.LogOutput
|
||||
if logDest == nil {
|
||||
logDest = os.Stderr
|
||||
}
|
||||
|
||||
logger := conf.Logger
|
||||
if logger == nil {
|
||||
logger = log.New(logDest, "", log.LstdFlags)
|
||||
}
|
||||
|
||||
m := &Memberlist{
|
||||
config: conf,
|
||||
shutdownCh: make(chan struct{}),
|
||||
leaveBroadcast: make(chan struct{}, 1),
|
||||
udpListener: udpLn,
|
||||
tcpListener: tcpLn,
|
||||
handoff: make(chan msgHandoff, conf.HandoffQueueDepth),
|
||||
nodeMap: make(map[string]*nodeState),
|
||||
nodeTimers: make(map[string]*suspicion),
|
||||
awareness: newAwareness(conf.AwarenessMaxMultiplier),
|
||||
ackHandlers: make(map[uint32]*ackHandler),
|
||||
broadcasts: &TransmitLimitedQueue{RetransmitMult: conf.RetransmitMult},
|
||||
logger: logger,
|
||||
}
|
||||
m.broadcasts.NumNodes = func() int {
|
||||
return m.estNumNodes()
|
||||
}
|
||||
go m.tcpListen()
|
||||
go m.udpListen()
|
||||
go m.udpHandler()
|
||||
return m, nil
|
||||
}
|
||||
|
||||
// Create will create a new Memberlist using the given configuration.
|
||||
// This will not connect to any other node (see Join) yet, but will start
|
||||
// all the listeners to allow other nodes to join this memberlist.
|
||||
// After creating a Memberlist, the configuration given should not be
|
||||
// modified by the user anymore.
|
||||
func Create(conf *Config) (*Memberlist, error) {
|
||||
m, err := newMemberlist(conf)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err := m.setAlive(); err != nil {
|
||||
m.Shutdown()
|
||||
return nil, err
|
||||
}
|
||||
m.schedule()
|
||||
return m, nil
|
||||
}
|
||||
|
||||
// Join is used to take an existing Memberlist and attempt to join a cluster
|
||||
// by contacting all the given hosts and performing a state sync. Initially,
|
||||
// the Memberlist only contains our own state, so doing this will cause
|
||||
// remote nodes to become aware of the existence of this node, effectively
|
||||
// joining the cluster.
|
||||
//
|
||||
// This returns the number of hosts successfully contacted and an error if
|
||||
// none could be reached. If an error is returned, the node did not successfully
|
||||
// join the cluster.
|
||||
func (m *Memberlist) Join(existing []string) (int, error) {
|
||||
numSuccess := 0
|
||||
var errs error
|
||||
for _, exist := range existing {
|
||||
addrs, err := m.resolveAddr(exist)
|
||||
if err != nil {
|
||||
err = fmt.Errorf("Failed to resolve %s: %v", exist, err)
|
||||
errs = multierror.Append(errs, err)
|
||||
m.logger.Printf("[WARN] memberlist: %v", err)
|
||||
continue
|
||||
}
|
||||
|
||||
for _, addr := range addrs {
|
||||
if err := m.pushPullNode(addr.ip, addr.port, true); err != nil {
|
||||
err = fmt.Errorf("Failed to join %s: %v", addr.ip, err)
|
||||
errs = multierror.Append(errs, err)
|
||||
m.logger.Printf("[DEBUG] memberlist: %v", err)
|
||||
continue
|
||||
}
|
||||
numSuccess++
|
||||
}
|
||||
|
||||
}
|
||||
if numSuccess > 0 {
|
||||
errs = nil
|
||||
}
|
||||
return numSuccess, errs
|
||||
}
|
||||
|
||||
// ipPort holds information about a node we want to try to join.
|
||||
type ipPort struct {
|
||||
ip net.IP
|
||||
port uint16
|
||||
}
|
||||
|
||||
// tcpLookupIP is a helper to initiate a TCP-based DNS lookup for the given host.
|
||||
// The built-in Go resolver will do a UDP lookup first, and will only use TCP if
|
||||
// the response has the truncate bit set, which isn't common on DNS servers like
|
||||
// Consul's. By doing the TCP lookup directly, we get the best chance for the
|
||||
// largest list of hosts to join. Since joins are relatively rare events, it's ok
|
||||
// to do this rather expensive operation.
|
||||
func (m *Memberlist) tcpLookupIP(host string, defaultPort uint16) ([]ipPort, error) {
|
||||
// Don't attempt any TCP lookups against non-fully qualified domain
|
||||
// names, since those will likely come from the resolv.conf file.
|
||||
if !strings.Contains(host, ".") {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// Make sure the domain name is terminated with a dot (we know there's
|
||||
// at least one character at this point).
|
||||
dn := host
|
||||
if dn[len(dn)-1] != '.' {
|
||||
dn = dn + "."
|
||||
}
|
||||
|
||||
// See if we can find a server to try.
|
||||
cc, err := dns.ClientConfigFromFile(m.config.DNSConfigPath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if len(cc.Servers) > 0 {
|
||||
// We support host:port in the DNS config, but need to add the
|
||||
// default port if one is not supplied.
|
||||
server := cc.Servers[0]
|
||||
if !hasPort(server) {
|
||||
server = net.JoinHostPort(server, cc.Port)
|
||||
}
|
||||
|
||||
// Do the lookup.
|
||||
c := new(dns.Client)
|
||||
c.Net = "tcp"
|
||||
msg := new(dns.Msg)
|
||||
msg.SetQuestion(dn, dns.TypeANY)
|
||||
in, _, err := c.Exchange(msg, server)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Handle any IPs we get back that we can attempt to join.
|
||||
var ips []ipPort
|
||||
for _, r := range in.Answer {
|
||||
switch rr := r.(type) {
|
||||
case (*dns.A):
|
||||
ips = append(ips, ipPort{rr.A, defaultPort})
|
||||
case (*dns.AAAA):
|
||||
ips = append(ips, ipPort{rr.AAAA, defaultPort})
|
||||
case (*dns.CNAME):
|
||||
m.logger.Printf("[DEBUG] memberlist: Ignoring CNAME RR in TCP-first answer for '%s'", host)
|
||||
}
|
||||
}
|
||||
return ips, nil
|
||||
}
|
||||
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// resolveAddr is used to resolve the address into an address,
|
||||
// port, and error. If no port is given, use the default
|
||||
func (m *Memberlist) resolveAddr(hostStr string) ([]ipPort, error) {
|
||||
// Normalize the incoming string to host:port so we can apply Go's
|
||||
// parser to it.
|
||||
port := uint16(0)
|
||||
if !hasPort(hostStr) {
|
||||
hostStr += ":" + strconv.Itoa(m.config.BindPort)
|
||||
}
|
||||
host, sport, err := net.SplitHostPort(hostStr)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// This will capture the supplied port, or the default one added above.
|
||||
lport, err := strconv.ParseUint(sport, 10, 16)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
port = uint16(lport)
|
||||
|
||||
// If it looks like an IP address we are done. The SplitHostPort() above
|
||||
// will make sure the host part is in good shape for parsing, even for
|
||||
// IPv6 addresses.
|
||||
if ip := net.ParseIP(host); ip != nil {
|
||||
return []ipPort{ipPort{ip, port}}, nil
|
||||
}
|
||||
|
||||
// First try TCP so we have the best chance for the largest list of
|
||||
// hosts to join. If this fails it's not fatal since this isn't a standard
|
||||
// way to query DNS, and we have a fallback below.
|
||||
ips, err := m.tcpLookupIP(host, port)
|
||||
if err != nil {
|
||||
m.logger.Printf("[DEBUG] memberlist: TCP-first lookup failed for '%s', falling back to UDP: %s", hostStr, err)
|
||||
}
|
||||
if len(ips) > 0 {
|
||||
return ips, nil
|
||||
}
|
||||
|
||||
// If TCP didn't yield anything then use the normal Go resolver which
|
||||
// will try UDP, then might possibly try TCP again if the UDP response
|
||||
// indicates it was truncated.
|
||||
ans, err := net.LookupIP(host)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
ips = make([]ipPort, 0, len(ans))
|
||||
for _, ip := range ans {
|
||||
ips = append(ips, ipPort{ip, port})
|
||||
}
|
||||
return ips, nil
|
||||
}
|
||||
|
||||
// setAlive is used to mark this node as being alive. This is the same
|
||||
// as if we received an alive notification our own network channel for
|
||||
// ourself.
|
||||
func (m *Memberlist) setAlive() error {
|
||||
var advertiseAddr net.IP
|
||||
var advertisePort int
|
||||
if m.config.AdvertiseAddr != "" {
|
||||
// If AdvertiseAddr is not empty, then advertise
|
||||
// the given address and port.
|
||||
ip := net.ParseIP(m.config.AdvertiseAddr)
|
||||
if ip == nil {
|
||||
return fmt.Errorf("Failed to parse advertise address!")
|
||||
}
|
||||
|
||||
// Ensure IPv4 conversion if necessary
|
||||
if ip4 := ip.To4(); ip4 != nil {
|
||||
ip = ip4
|
||||
}
|
||||
|
||||
advertiseAddr = ip
|
||||
advertisePort = m.config.AdvertisePort
|
||||
} else {
|
||||
if m.config.BindAddr == "0.0.0.0" {
|
||||
// Otherwise, if we're not bound to a specific IP, let's use a suitable
|
||||
// private IP address.
|
||||
var err error
|
||||
m.config.AdvertiseAddr, err = sockaddr.GetPrivateIP()
|
||||
if err != nil {
|
||||
return fmt.Errorf("Failed to get interface addresses: %v", err)
|
||||
}
|
||||
if m.config.AdvertiseAddr == "" {
|
||||
return fmt.Errorf("No private IP address found, and explicit IP not provided")
|
||||
}
|
||||
|
||||
advertiseAddr = net.ParseIP(m.config.AdvertiseAddr)
|
||||
if advertiseAddr == nil {
|
||||
return fmt.Errorf("Failed to parse advertise address: %q", m.config.AdvertiseAddr)
|
||||
}
|
||||
} else {
|
||||
// Use the IP that we're bound to.
|
||||
addr := m.tcpListener.Addr().(*net.TCPAddr)
|
||||
advertiseAddr = addr.IP
|
||||
}
|
||||
|
||||
// Use the port we are bound to.
|
||||
advertisePort = m.tcpListener.Addr().(*net.TCPAddr).Port
|
||||
}
|
||||
|
||||
// Check if this is a public address without encryption
|
||||
ipAddr, err := sockaddr.NewIPAddr(advertiseAddr.String())
|
||||
if err != nil {
|
||||
return fmt.Errorf("Failed to parse interface addresses: %v", err)
|
||||
}
|
||||
|
||||
ifAddrs := []sockaddr.IfAddr{
|
||||
sockaddr.IfAddr{
|
||||
SockAddr: ipAddr,
|
||||
},
|
||||
}
|
||||
|
||||
_, publicIfs, err := sockaddr.IfByRFC("6890", ifAddrs)
|
||||
if len(publicIfs) > 0 && !m.config.EncryptionEnabled() {
|
||||
m.logger.Printf("[WARN] memberlist: Binding to public address without encryption!")
|
||||
}
|
||||
|
||||
// Get the node meta data
|
||||
var meta []byte
|
||||
if m.config.Delegate != nil {
|
||||
meta = m.config.Delegate.NodeMeta(MetaMaxSize)
|
||||
if len(meta) > MetaMaxSize {
|
||||
panic("Node meta data provided is longer than the limit")
|
||||
}
|
||||
}
|
||||
|
||||
a := alive{
|
||||
Incarnation: m.nextIncarnation(),
|
||||
Node: m.config.Name,
|
||||
Addr: advertiseAddr,
|
||||
Port: uint16(advertisePort),
|
||||
Meta: meta,
|
||||
Vsn: []uint8{
|
||||
ProtocolVersionMin, ProtocolVersionMax, m.config.ProtocolVersion,
|
||||
m.config.DelegateProtocolMin, m.config.DelegateProtocolMax,
|
||||
m.config.DelegateProtocolVersion,
|
||||
},
|
||||
}
|
||||
m.aliveNode(&a, nil, true)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// LocalNode is used to return the local Node
|
||||
func (m *Memberlist) LocalNode() *Node {
|
||||
m.nodeLock.RLock()
|
||||
defer m.nodeLock.RUnlock()
|
||||
state := m.nodeMap[m.config.Name]
|
||||
return &state.Node
|
||||
}
|
||||
|
||||
// UpdateNode is used to trigger re-advertising the local node. This is
|
||||
// primarily used with a Delegate to support dynamic updates to the local
|
||||
// meta data. This will block until the update message is successfully
|
||||
// broadcasted to a member of the cluster, if any exist or until a specified
|
||||
// timeout is reached.
|
||||
func (m *Memberlist) UpdateNode(timeout time.Duration) error {
|
||||
// Get the node meta data
|
||||
var meta []byte
|
||||
if m.config.Delegate != nil {
|
||||
meta = m.config.Delegate.NodeMeta(MetaMaxSize)
|
||||
if len(meta) > MetaMaxSize {
|
||||
panic("Node meta data provided is longer than the limit")
|
||||
}
|
||||
}
|
||||
|
||||
// Get the existing node
|
||||
m.nodeLock.RLock()
|
||||
state := m.nodeMap[m.config.Name]
|
||||
m.nodeLock.RUnlock()
|
||||
|
||||
// Format a new alive message
|
||||
a := alive{
|
||||
Incarnation: m.nextIncarnation(),
|
||||
Node: m.config.Name,
|
||||
Addr: state.Addr,
|
||||
Port: state.Port,
|
||||
Meta: meta,
|
||||
Vsn: []uint8{
|
||||
ProtocolVersionMin, ProtocolVersionMax, m.config.ProtocolVersion,
|
||||
m.config.DelegateProtocolMin, m.config.DelegateProtocolMax,
|
||||
m.config.DelegateProtocolVersion,
|
||||
},
|
||||
}
|
||||
notifyCh := make(chan struct{})
|
||||
m.aliveNode(&a, notifyCh, true)
|
||||
|
||||
// Wait for the broadcast or a timeout
|
||||
if m.anyAlive() {
|
||||
var timeoutCh <-chan time.Time
|
||||
if timeout > 0 {
|
||||
timeoutCh = time.After(timeout)
|
||||
}
|
||||
select {
|
||||
case <-notifyCh:
|
||||
case <-timeoutCh:
|
||||
return fmt.Errorf("timeout waiting for update broadcast")
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// SendTo is used to directly send a message to another node, without
|
||||
// the use of the gossip mechanism. This will encode the message as a
|
||||
// user-data message, which a delegate will receive through NotifyMsg
|
||||
// The actual data is transmitted over UDP, which means this is a
|
||||
// best-effort transmission mechanism, and the maximum size of the
|
||||
// message is the size of a single UDP datagram, after compression.
|
||||
// This method is DEPRECATED in favor or SendToUDP
|
||||
func (m *Memberlist) SendTo(to net.Addr, msg []byte) error {
|
||||
// Encode as a user message
|
||||
buf := make([]byte, 1, len(msg)+1)
|
||||
buf[0] = byte(userMsg)
|
||||
buf = append(buf, msg...)
|
||||
|
||||
// Send the message
|
||||
return m.rawSendMsgUDP(to, nil, buf)
|
||||
}
|
||||
|
||||
// SendToUDP is used to directly send a message to another node, without
|
||||
// the use of the gossip mechanism. This will encode the message as a
|
||||
// user-data message, which a delegate will receive through NotifyMsg
|
||||
// The actual data is transmitted over UDP, which means this is a
|
||||
// best-effort transmission mechanism, and the maximum size of the
|
||||
// message is the size of a single UDP datagram, after compression
|
||||
func (m *Memberlist) SendToUDP(to *Node, msg []byte) error {
|
||||
// Encode as a user message
|
||||
buf := make([]byte, 1, len(msg)+1)
|
||||
buf[0] = byte(userMsg)
|
||||
buf = append(buf, msg...)
|
||||
|
||||
// Send the message
|
||||
destAddr := &net.UDPAddr{IP: to.Addr, Port: int(to.Port)}
|
||||
return m.rawSendMsgUDP(destAddr, to, buf)
|
||||
}
|
||||
|
||||
// SendToTCP is used to directly send a message to another node, without
|
||||
// the use of the gossip mechanism. This will encode the message as a
|
||||
// user-data message, which a delegate will receive through NotifyMsg
|
||||
// The actual data is transmitted over TCP, which means delivery
|
||||
// is guaranteed if no error is returned. There is no limit
|
||||
// to the size of the message
|
||||
func (m *Memberlist) SendToTCP(to *Node, msg []byte) error {
|
||||
// Send the message
|
||||
destAddr := &net.TCPAddr{IP: to.Addr, Port: int(to.Port)}
|
||||
return m.sendTCPUserMsg(destAddr, msg)
|
||||
}
|
||||
|
||||
// Members returns a list of all known live nodes. The node structures
|
||||
// returned must not be modified. If you wish to modify a Node, make a
|
||||
// copy first.
|
||||
func (m *Memberlist) Members() []*Node {
|
||||
m.nodeLock.RLock()
|
||||
defer m.nodeLock.RUnlock()
|
||||
|
||||
nodes := make([]*Node, 0, len(m.nodes))
|
||||
for _, n := range m.nodes {
|
||||
if n.State != stateDead {
|
||||
nodes = append(nodes, &n.Node)
|
||||
}
|
||||
}
|
||||
|
||||
return nodes
|
||||
}
|
||||
|
||||
// NumMembers returns the number of alive nodes currently known. Between
|
||||
// the time of calling this and calling Members, the number of alive nodes
|
||||
// may have changed, so this shouldn't be used to determine how many
|
||||
// members will be returned by Members.
|
||||
func (m *Memberlist) NumMembers() (alive int) {
|
||||
m.nodeLock.RLock()
|
||||
defer m.nodeLock.RUnlock()
|
||||
|
||||
for _, n := range m.nodes {
|
||||
if n.State != stateDead {
|
||||
alive++
|
||||
}
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
// Leave will broadcast a leave message but will not shutdown the background
|
||||
// listeners, meaning the node will continue participating in gossip and state
|
||||
// updates.
|
||||
//
|
||||
// This will block until the leave message is successfully broadcasted to
|
||||
// a member of the cluster, if any exist or until a specified timeout
|
||||
// is reached.
|
||||
//
|
||||
// This method is safe to call multiple times, but must not be called
|
||||
// after the cluster is already shut down.
|
||||
func (m *Memberlist) Leave(timeout time.Duration) error {
|
||||
m.nodeLock.Lock()
|
||||
// We can't defer m.nodeLock.Unlock() because m.deadNode will also try to
|
||||
// acquire a lock so we need to Unlock before that.
|
||||
|
||||
if m.shutdown {
|
||||
m.nodeLock.Unlock()
|
||||
panic("leave after shutdown")
|
||||
}
|
||||
|
||||
if !m.leave {
|
||||
m.leave = true
|
||||
|
||||
state, ok := m.nodeMap[m.config.Name]
|
||||
m.nodeLock.Unlock()
|
||||
if !ok {
|
||||
m.logger.Printf("[WARN] memberlist: Leave but we're not in the node map.")
|
||||
return nil
|
||||
}
|
||||
|
||||
d := dead{
|
||||
Incarnation: state.Incarnation,
|
||||
Node: state.Name,
|
||||
}
|
||||
m.deadNode(&d)
|
||||
|
||||
// Block until the broadcast goes out
|
||||
if m.anyAlive() {
|
||||
var timeoutCh <-chan time.Time
|
||||
if timeout > 0 {
|
||||
timeoutCh = time.After(timeout)
|
||||
}
|
||||
select {
|
||||
case <-m.leaveBroadcast:
|
||||
case <-timeoutCh:
|
||||
return fmt.Errorf("timeout waiting for leave broadcast")
|
||||
}
|
||||
}
|
||||
} else {
|
||||
m.nodeLock.Unlock()
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Check for any other alive node.
|
||||
func (m *Memberlist) anyAlive() bool {
|
||||
m.nodeLock.RLock()
|
||||
defer m.nodeLock.RUnlock()
|
||||
for _, n := range m.nodes {
|
||||
if n.State != stateDead && n.Name != m.config.Name {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// GetHealthScore gives this instance's idea of how well it is meeting the soft
|
||||
// real-time requirements of the protocol. Lower numbers are better, and zero
|
||||
// means "totally healthy".
|
||||
func (m *Memberlist) GetHealthScore() int {
|
||||
return m.awareness.GetHealthScore()
|
||||
}
|
||||
|
||||
// ProtocolVersion returns the protocol version currently in use by
|
||||
// this memberlist.
|
||||
func (m *Memberlist) ProtocolVersion() uint8 {
|
||||
// NOTE: This method exists so that in the future we can control
|
||||
// any locking if necessary, if we change the protocol version at
|
||||
// runtime, etc.
|
||||
return m.config.ProtocolVersion
|
||||
}
|
||||
|
||||
// Shutdown will stop any background maintanence of network activity
|
||||
// for this memberlist, causing it to appear "dead". A leave message
|
||||
// will not be broadcasted prior, so the cluster being left will have
|
||||
// to detect this node's shutdown using probing. If you wish to more
|
||||
// gracefully exit the cluster, call Leave prior to shutting down.
|
||||
//
|
||||
// This method is safe to call multiple times.
|
||||
func (m *Memberlist) Shutdown() error {
|
||||
m.nodeLock.Lock()
|
||||
defer m.nodeLock.Unlock()
|
||||
|
||||
if m.shutdown {
|
||||
return nil
|
||||
}
|
||||
|
||||
m.shutdown = true
|
||||
close(m.shutdownCh)
|
||||
m.deschedule()
|
||||
m.udpListener.Close()
|
||||
m.tcpListener.Close()
|
||||
return nil
|
||||
}
|
|
@ -0,0 +1,14 @@
|
|||
package memberlist
|
||||
|
||||
// MergeDelegate is used to involve a client in
|
||||
// a potential cluster merge operation. Namely, when
|
||||
// a node does a TCP push/pull (as part of a join),
|
||||
// the delegate is involved and allowed to cancel the join
|
||||
// based on custom logic. The merge delegate is NOT invoked
|
||||
// as part of the push-pull anti-entropy.
|
||||
type MergeDelegate interface {
|
||||
// NotifyMerge is invoked when a merge could take place.
|
||||
// Provides a list of the nodes known by the peer. If
|
||||
// the return value is non-nil, the merge is canceled.
|
||||
NotifyMerge(peers []*Node) error
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,14 @@
|
|||
package memberlist
|
||||
|
||||
import "time"
|
||||
|
||||
// PingDelegate is used to notify an observer how long it took for a ping message to
|
||||
// complete a round trip. It can also be used for writing arbitrary byte slices
|
||||
// into ack messages. Note that in order to be meaningful for RTT estimates, this
|
||||
// delegate does not apply to indirect pings, nor fallback pings sent over TCP.
|
||||
type PingDelegate interface {
|
||||
// AckPayload is invoked when an ack is being sent; the returned bytes will be appended to the ack
|
||||
AckPayload() []byte
|
||||
// NotifyPing is invoked when an ack for a ping is received
|
||||
NotifyPingComplete(other *Node, rtt time.Duration, payload []byte)
|
||||
}
|
|
@ -0,0 +1,167 @@
|
|||
package memberlist
|
||||
|
||||
import (
|
||||
"sort"
|
||||
"sync"
|
||||
)
|
||||
|
||||
// TransmitLimitedQueue is used to queue messages to broadcast to
|
||||
// the cluster (via gossip) but limits the number of transmits per
|
||||
// message. It also prioritizes messages with lower transmit counts
|
||||
// (hence newer messages).
|
||||
type TransmitLimitedQueue struct {
|
||||
// NumNodes returns the number of nodes in the cluster. This is
|
||||
// used to determine the retransmit count, which is calculated
|
||||
// based on the log of this.
|
||||
NumNodes func() int
|
||||
|
||||
// RetransmitMult is the multiplier used to determine the maximum
|
||||
// number of retransmissions attempted.
|
||||
RetransmitMult int
|
||||
|
||||
sync.Mutex
|
||||
bcQueue limitedBroadcasts
|
||||
}
|
||||
|
||||
type limitedBroadcast struct {
|
||||
transmits int // Number of transmissions attempted.
|
||||
b Broadcast
|
||||
}
|
||||
type limitedBroadcasts []*limitedBroadcast
|
||||
|
||||
// Broadcast is something that can be broadcasted via gossip to
|
||||
// the memberlist cluster.
|
||||
type Broadcast interface {
|
||||
// Invalidates checks if enqueuing the current broadcast
|
||||
// invalidates a previous broadcast
|
||||
Invalidates(b Broadcast) bool
|
||||
|
||||
// Returns a byte form of the message
|
||||
Message() []byte
|
||||
|
||||
// Finished is invoked when the message will no longer
|
||||
// be broadcast, either due to invalidation or to the
|
||||
// transmit limit being reached
|
||||
Finished()
|
||||
}
|
||||
|
||||
// QueueBroadcast is used to enqueue a broadcast
|
||||
func (q *TransmitLimitedQueue) QueueBroadcast(b Broadcast) {
|
||||
q.Lock()
|
||||
defer q.Unlock()
|
||||
|
||||
// Check if this message invalidates another
|
||||
n := len(q.bcQueue)
|
||||
for i := 0; i < n; i++ {
|
||||
if b.Invalidates(q.bcQueue[i].b) {
|
||||
q.bcQueue[i].b.Finished()
|
||||
copy(q.bcQueue[i:], q.bcQueue[i+1:])
|
||||
q.bcQueue[n-1] = nil
|
||||
q.bcQueue = q.bcQueue[:n-1]
|
||||
n--
|
||||
}
|
||||
}
|
||||
|
||||
// Append to the queue
|
||||
q.bcQueue = append(q.bcQueue, &limitedBroadcast{0, b})
|
||||
}
|
||||
|
||||
// GetBroadcasts is used to get a number of broadcasts, up to a byte limit
|
||||
// and applying a per-message overhead as provided.
|
||||
func (q *TransmitLimitedQueue) GetBroadcasts(overhead, limit int) [][]byte {
|
||||
q.Lock()
|
||||
defer q.Unlock()
|
||||
|
||||
// Fast path the default case
|
||||
if len(q.bcQueue) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
transmitLimit := retransmitLimit(q.RetransmitMult, q.NumNodes())
|
||||
bytesUsed := 0
|
||||
var toSend [][]byte
|
||||
|
||||
for i := len(q.bcQueue) - 1; i >= 0; i-- {
|
||||
// Check if this is within our limits
|
||||
b := q.bcQueue[i]
|
||||
msg := b.b.Message()
|
||||
if bytesUsed+overhead+len(msg) > limit {
|
||||
continue
|
||||
}
|
||||
|
||||
// Add to slice to send
|
||||
bytesUsed += overhead + len(msg)
|
||||
toSend = append(toSend, msg)
|
||||
|
||||
// Check if we should stop transmission
|
||||
b.transmits++
|
||||
if b.transmits >= transmitLimit {
|
||||
b.b.Finished()
|
||||
n := len(q.bcQueue)
|
||||
q.bcQueue[i], q.bcQueue[n-1] = q.bcQueue[n-1], nil
|
||||
q.bcQueue = q.bcQueue[:n-1]
|
||||
}
|
||||
}
|
||||
|
||||
// If we are sending anything, we need to re-sort to deal
|
||||
// with adjusted transmit counts
|
||||
if len(toSend) > 0 {
|
||||
q.bcQueue.Sort()
|
||||
}
|
||||
return toSend
|
||||
}
|
||||
|
||||
// NumQueued returns the number of queued messages
|
||||
func (q *TransmitLimitedQueue) NumQueued() int {
|
||||
q.Lock()
|
||||
defer q.Unlock()
|
||||
return len(q.bcQueue)
|
||||
}
|
||||
|
||||
// Reset clears all the queued messages
|
||||
func (q *TransmitLimitedQueue) Reset() {
|
||||
q.Lock()
|
||||
defer q.Unlock()
|
||||
for _, b := range q.bcQueue {
|
||||
b.b.Finished()
|
||||
}
|
||||
q.bcQueue = nil
|
||||
}
|
||||
|
||||
// Prune will retain the maxRetain latest messages, and the rest
|
||||
// will be discarded. This can be used to prevent unbounded queue sizes
|
||||
func (q *TransmitLimitedQueue) Prune(maxRetain int) {
|
||||
q.Lock()
|
||||
defer q.Unlock()
|
||||
|
||||
// Do nothing if queue size is less than the limit
|
||||
n := len(q.bcQueue)
|
||||
if n < maxRetain {
|
||||
return
|
||||
}
|
||||
|
||||
// Invalidate the messages we will be removing
|
||||
for i := 0; i < n-maxRetain; i++ {
|
||||
q.bcQueue[i].b.Finished()
|
||||
}
|
||||
|
||||
// Move the messages, and retain only the last maxRetain
|
||||
copy(q.bcQueue[0:], q.bcQueue[n-maxRetain:])
|
||||
q.bcQueue = q.bcQueue[:maxRetain]
|
||||
}
|
||||
|
||||
func (b limitedBroadcasts) Len() int {
|
||||
return len(b)
|
||||
}
|
||||
|
||||
func (b limitedBroadcasts) Less(i, j int) bool {
|
||||
return b[i].transmits < b[j].transmits
|
||||
}
|
||||
|
||||
func (b limitedBroadcasts) Swap(i, j int) {
|
||||
b[i], b[j] = b[j], b[i]
|
||||
}
|
||||
|
||||
func (b limitedBroadcasts) Sort() {
|
||||
sort.Sort(sort.Reverse(b))
|
||||
}
|
|
@ -0,0 +1,198 @@
|
|||
package memberlist
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"crypto/aes"
|
||||
"crypto/cipher"
|
||||
"crypto/rand"
|
||||
"fmt"
|
||||
"io"
|
||||
)
|
||||
|
||||
/*
|
||||
|
||||
Encrypted messages are prefixed with an encryptionVersion byte
|
||||
that is used for us to be able to properly encode/decode. We
|
||||
currently support the following versions:
|
||||
|
||||
0 - AES-GCM 128, using PKCS7 padding
|
||||
1 - AES-GCM 128, no padding. Padding not needed, caused bloat.
|
||||
|
||||
*/
|
||||
type encryptionVersion uint8
|
||||
|
||||
const (
|
||||
minEncryptionVersion encryptionVersion = 0
|
||||
maxEncryptionVersion encryptionVersion = 1
|
||||
)
|
||||
|
||||
const (
|
||||
versionSize = 1
|
||||
nonceSize = 12
|
||||
tagSize = 16
|
||||
maxPadOverhead = 16
|
||||
blockSize = aes.BlockSize
|
||||
)
|
||||
|
||||
// pkcs7encode is used to pad a byte buffer to a specific block size using
|
||||
// the PKCS7 algorithm. "Ignores" some bytes to compensate for IV
|
||||
func pkcs7encode(buf *bytes.Buffer, ignore, blockSize int) {
|
||||
n := buf.Len() - ignore
|
||||
more := blockSize - (n % blockSize)
|
||||
for i := 0; i < more; i++ {
|
||||
buf.WriteByte(byte(more))
|
||||
}
|
||||
}
|
||||
|
||||
// pkcs7decode is used to decode a buffer that has been padded
|
||||
func pkcs7decode(buf []byte, blockSize int) []byte {
|
||||
if len(buf) == 0 {
|
||||
panic("Cannot decode a PKCS7 buffer of zero length")
|
||||
}
|
||||
n := len(buf)
|
||||
last := buf[n-1]
|
||||
n -= int(last)
|
||||
return buf[:n]
|
||||
}
|
||||
|
||||
// encryptOverhead returns the maximum possible overhead of encryption by version
|
||||
func encryptOverhead(vsn encryptionVersion) int {
|
||||
switch vsn {
|
||||
case 0:
|
||||
return 45 // Version: 1, IV: 12, Padding: 16, Tag: 16
|
||||
case 1:
|
||||
return 29 // Version: 1, IV: 12, Tag: 16
|
||||
default:
|
||||
panic("unsupported version")
|
||||
}
|
||||
}
|
||||
|
||||
// encryptedLength is used to compute the buffer size needed
|
||||
// for a message of given length
|
||||
func encryptedLength(vsn encryptionVersion, inp int) int {
|
||||
// If we are on version 1, there is no padding
|
||||
if vsn >= 1 {
|
||||
return versionSize + nonceSize + inp + tagSize
|
||||
}
|
||||
|
||||
// Determine the padding size
|
||||
padding := blockSize - (inp % blockSize)
|
||||
|
||||
// Sum the extra parts to get total size
|
||||
return versionSize + nonceSize + inp + padding + tagSize
|
||||
}
|
||||
|
||||
// encryptPayload is used to encrypt a message with a given key.
|
||||
// We make use of AES-128 in GCM mode. New byte buffer is the version,
|
||||
// nonce, ciphertext and tag
|
||||
func encryptPayload(vsn encryptionVersion, key []byte, msg []byte, data []byte, dst *bytes.Buffer) error {
|
||||
// Get the AES block cipher
|
||||
aesBlock, err := aes.NewCipher(key)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Get the GCM cipher mode
|
||||
gcm, err := cipher.NewGCM(aesBlock)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Grow the buffer to make room for everything
|
||||
offset := dst.Len()
|
||||
dst.Grow(encryptedLength(vsn, len(msg)))
|
||||
|
||||
// Write the encryption version
|
||||
dst.WriteByte(byte(vsn))
|
||||
|
||||
// Add a random nonce
|
||||
io.CopyN(dst, rand.Reader, nonceSize)
|
||||
afterNonce := dst.Len()
|
||||
|
||||
// Ensure we are correctly padded (only version 0)
|
||||
if vsn == 0 {
|
||||
io.Copy(dst, bytes.NewReader(msg))
|
||||
pkcs7encode(dst, offset+versionSize+nonceSize, aes.BlockSize)
|
||||
}
|
||||
|
||||
// Encrypt message using GCM
|
||||
slice := dst.Bytes()[offset:]
|
||||
nonce := slice[versionSize : versionSize+nonceSize]
|
||||
|
||||
// Message source depends on the encryption version.
|
||||
// Version 0 uses padding, version 1 does not
|
||||
var src []byte
|
||||
if vsn == 0 {
|
||||
src = slice[versionSize+nonceSize:]
|
||||
} else {
|
||||
src = msg
|
||||
}
|
||||
out := gcm.Seal(nil, nonce, src, data)
|
||||
|
||||
// Truncate the plaintext, and write the cipher text
|
||||
dst.Truncate(afterNonce)
|
||||
dst.Write(out)
|
||||
return nil
|
||||
}
|
||||
|
||||
// decryptMessage performs the actual decryption of ciphertext. This is in its
|
||||
// own function to allow it to be called on all keys easily.
|
||||
func decryptMessage(key, msg []byte, data []byte) ([]byte, error) {
|
||||
// Get the AES block cipher
|
||||
aesBlock, err := aes.NewCipher(key)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Get the GCM cipher mode
|
||||
gcm, err := cipher.NewGCM(aesBlock)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Decrypt the message
|
||||
nonce := msg[versionSize : versionSize+nonceSize]
|
||||
ciphertext := msg[versionSize+nonceSize:]
|
||||
plain, err := gcm.Open(nil, nonce, ciphertext, data)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Success!
|
||||
return plain, nil
|
||||
}
|
||||
|
||||
// decryptPayload is used to decrypt a message with a given key,
|
||||
// and verify it's contents. Any padding will be removed, and a
|
||||
// slice to the plaintext is returned. Decryption is done IN PLACE!
|
||||
func decryptPayload(keys [][]byte, msg []byte, data []byte) ([]byte, error) {
|
||||
// Ensure we have at least one byte
|
||||
if len(msg) == 0 {
|
||||
return nil, fmt.Errorf("Cannot decrypt empty payload")
|
||||
}
|
||||
|
||||
// Verify the version
|
||||
vsn := encryptionVersion(msg[0])
|
||||
if vsn > maxEncryptionVersion {
|
||||
return nil, fmt.Errorf("Unsupported encryption version %d", msg[0])
|
||||
}
|
||||
|
||||
// Ensure the length is sane
|
||||
if len(msg) < encryptedLength(vsn, 0) {
|
||||
return nil, fmt.Errorf("Payload is too small to decrypt: %d", len(msg))
|
||||
}
|
||||
|
||||
for _, key := range keys {
|
||||
plain, err := decryptMessage(key, msg, data)
|
||||
if err == nil {
|
||||
// Remove the PKCS7 padding for vsn 0
|
||||
if vsn == 0 {
|
||||
return pkcs7decode(plain, aes.BlockSize), nil
|
||||
} else {
|
||||
return plain, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf("No installed keys could decrypt the message")
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,130 @@
|
|||
package memberlist
|
||||
|
||||
import (
|
||||
"math"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
)
|
||||
|
||||
// suspicion manages the suspect timer for a node and provides an interface
|
||||
// to accelerate the timeout as we get more independent confirmations that
|
||||
// a node is suspect.
|
||||
type suspicion struct {
|
||||
// n is the number of independent confirmations we've seen. This must
|
||||
// be updated using atomic instructions to prevent contention with the
|
||||
// timer callback.
|
||||
n int32
|
||||
|
||||
// k is the number of independent confirmations we'd like to see in
|
||||
// order to drive the timer to its minimum value.
|
||||
k int32
|
||||
|
||||
// min is the minimum timer value.
|
||||
min time.Duration
|
||||
|
||||
// max is the maximum timer value.
|
||||
max time.Duration
|
||||
|
||||
// start captures the timestamp when we began the timer. This is used
|
||||
// so we can calculate durations to feed the timer during updates in
|
||||
// a way the achieves the overall time we'd like.
|
||||
start time.Time
|
||||
|
||||
// timer is the underlying timer that implements the timeout.
|
||||
timer *time.Timer
|
||||
|
||||
// f is the function to call when the timer expires. We hold on to this
|
||||
// because there are cases where we call it directly.
|
||||
timeoutFn func()
|
||||
|
||||
// confirmations is a map of "from" nodes that have confirmed a given
|
||||
// node is suspect. This prevents double counting.
|
||||
confirmations map[string]struct{}
|
||||
}
|
||||
|
||||
// newSuspicion returns a timer started with the max time, and that will drive
|
||||
// to the min time after seeing k or more confirmations. The from node will be
|
||||
// excluded from confirmations since we might get our own suspicion message
|
||||
// gossiped back to us. The minimum time will be used if no confirmations are
|
||||
// called for (k <= 0).
|
||||
func newSuspicion(from string, k int, min time.Duration, max time.Duration, fn func(int)) *suspicion {
|
||||
s := &suspicion{
|
||||
k: int32(k),
|
||||
min: min,
|
||||
max: max,
|
||||
confirmations: make(map[string]struct{}),
|
||||
}
|
||||
|
||||
// Exclude the from node from any confirmations.
|
||||
s.confirmations[from] = struct{}{}
|
||||
|
||||
// Pass the number of confirmations into the timeout function for
|
||||
// easy telemetry.
|
||||
s.timeoutFn = func() {
|
||||
fn(int(atomic.LoadInt32(&s.n)))
|
||||
}
|
||||
|
||||
// If there aren't any confirmations to be made then take the min
|
||||
// time from the start.
|
||||
timeout := max
|
||||
if k < 1 {
|
||||
timeout = min
|
||||
}
|
||||
s.timer = time.AfterFunc(timeout, s.timeoutFn)
|
||||
|
||||
// Capture the start time right after starting the timer above so
|
||||
// we should always err on the side of a little longer timeout if
|
||||
// there's any preemption that separates this and the step above.
|
||||
s.start = time.Now()
|
||||
return s
|
||||
}
|
||||
|
||||
// remainingSuspicionTime takes the state variables of the suspicion timer and
|
||||
// calculates the remaining time to wait before considering a node dead. The
|
||||
// return value can be negative, so be prepared to fire the timer immediately in
|
||||
// that case.
|
||||
func remainingSuspicionTime(n, k int32, elapsed time.Duration, min, max time.Duration) time.Duration {
|
||||
frac := math.Log(float64(n)+1.0) / math.Log(float64(k)+1.0)
|
||||
raw := max.Seconds() - frac*(max.Seconds()-min.Seconds())
|
||||
timeout := time.Duration(math.Floor(1000.0*raw)) * time.Millisecond
|
||||
if timeout < min {
|
||||
timeout = min
|
||||
}
|
||||
|
||||
// We have to take into account the amount of time that has passed so
|
||||
// far, so we get the right overall timeout.
|
||||
return timeout - elapsed
|
||||
}
|
||||
|
||||
// Confirm registers that a possibly new peer has also determined the given
|
||||
// node is suspect. This returns true if this was new information, and false
|
||||
// if it was a duplicate confirmation, or if we've got enough confirmations to
|
||||
// hit the minimum.
|
||||
func (s *suspicion) Confirm(from string) bool {
|
||||
// If we've got enough confirmations then stop accepting them.
|
||||
if atomic.LoadInt32(&s.n) >= s.k {
|
||||
return false
|
||||
}
|
||||
|
||||
// Only allow one confirmation from each possible peer.
|
||||
if _, ok := s.confirmations[from]; ok {
|
||||
return false
|
||||
}
|
||||
s.confirmations[from] = struct{}{}
|
||||
|
||||
// Compute the new timeout given the current number of confirmations and
|
||||
// adjust the timer. If the timeout becomes negative *and* we can cleanly
|
||||
// stop the timer then we will call the timeout function directly from
|
||||
// here.
|
||||
n := atomic.AddInt32(&s.n, 1)
|
||||
elapsed := time.Now().Sub(s.start)
|
||||
remaining := remainingSuspicionTime(n, s.k, elapsed, s.min, s.max)
|
||||
if s.timer.Stop() {
|
||||
if remaining > 0 {
|
||||
s.timer.Reset(remaining)
|
||||
} else {
|
||||
go s.timeoutFn()
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
|
@ -0,0 +1,6 @@
|
|||
# TODO
|
||||
* Dynamic RTT discovery
|
||||
* Compute 99th percentile for ping/ack
|
||||
* Better lower bound for ping/ack, faster failure detection
|
||||
* Dynamic MTU discovery
|
||||
* Prevent lost updates, increases efficiency
|
|
@ -0,0 +1,288 @@
|
|||
package memberlist
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"compress/lzw"
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"io"
|
||||
"math"
|
||||
"math/rand"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/go-msgpack/codec"
|
||||
"github.com/sean-/seed"
|
||||
)
|
||||
|
||||
// pushPullScale is the minimum number of nodes
|
||||
// before we start scaling the push/pull timing. The scale
|
||||
// effect is the log2(Nodes) - log2(pushPullScale). This means
|
||||
// that the 33rd node will cause us to double the interval,
|
||||
// while the 65th will triple it.
|
||||
const pushPullScaleThreshold = 32
|
||||
|
||||
const (
|
||||
// Constant litWidth 2-8
|
||||
lzwLitWidth = 8
|
||||
)
|
||||
|
||||
func init() {
|
||||
seed.Init()
|
||||
}
|
||||
|
||||
// Decode reverses the encode operation on a byte slice input
|
||||
func decode(buf []byte, out interface{}) error {
|
||||
r := bytes.NewReader(buf)
|
||||
hd := codec.MsgpackHandle{}
|
||||
dec := codec.NewDecoder(r, &hd)
|
||||
return dec.Decode(out)
|
||||
}
|
||||
|
||||
// Encode writes an encoded object to a new bytes buffer
|
||||
func encode(msgType messageType, in interface{}) (*bytes.Buffer, error) {
|
||||
buf := bytes.NewBuffer(nil)
|
||||
buf.WriteByte(uint8(msgType))
|
||||
hd := codec.MsgpackHandle{}
|
||||
enc := codec.NewEncoder(buf, &hd)
|
||||
err := enc.Encode(in)
|
||||
return buf, err
|
||||
}
|
||||
|
||||
// Returns a random offset between 0 and n
|
||||
func randomOffset(n int) int {
|
||||
if n == 0 {
|
||||
return 0
|
||||
}
|
||||
return int(rand.Uint32() % uint32(n))
|
||||
}
|
||||
|
||||
// suspicionTimeout computes the timeout that should be used when
|
||||
// a node is suspected
|
||||
func suspicionTimeout(suspicionMult, n int, interval time.Duration) time.Duration {
|
||||
nodeScale := math.Max(1.0, math.Log10(math.Max(1.0, float64(n))))
|
||||
// multiply by 1000 to keep some precision because time.Duration is an int64 type
|
||||
timeout := time.Duration(suspicionMult) * time.Duration(nodeScale*1000) * interval / 1000
|
||||
return timeout
|
||||
}
|
||||
|
||||
// retransmitLimit computes the limit of retransmissions
|
||||
func retransmitLimit(retransmitMult, n int) int {
|
||||
nodeScale := math.Ceil(math.Log10(float64(n + 1)))
|
||||
limit := retransmitMult * int(nodeScale)
|
||||
return limit
|
||||
}
|
||||
|
||||
// shuffleNodes randomly shuffles the input nodes using the Fisher-Yates shuffle
|
||||
func shuffleNodes(nodes []*nodeState) {
|
||||
n := len(nodes)
|
||||
for i := n - 1; i > 0; i-- {
|
||||
j := rand.Intn(i + 1)
|
||||
nodes[i], nodes[j] = nodes[j], nodes[i]
|
||||
}
|
||||
}
|
||||
|
||||
// pushPushScale is used to scale the time interval at which push/pull
|
||||
// syncs take place. It is used to prevent network saturation as the
|
||||
// cluster size grows
|
||||
func pushPullScale(interval time.Duration, n int) time.Duration {
|
||||
// Don't scale until we cross the threshold
|
||||
if n <= pushPullScaleThreshold {
|
||||
return interval
|
||||
}
|
||||
|
||||
multiplier := math.Ceil(math.Log2(float64(n))-math.Log2(pushPullScaleThreshold)) + 1.0
|
||||
return time.Duration(multiplier) * interval
|
||||
}
|
||||
|
||||
// moveDeadNodes moves nodes that are dead and beyond the gossip to the dead interval
|
||||
// to the end of the slice and returns the index of the first moved node.
|
||||
func moveDeadNodes(nodes []*nodeState, gossipToTheDeadTime time.Duration) int {
|
||||
numDead := 0
|
||||
n := len(nodes)
|
||||
for i := 0; i < n-numDead; i++ {
|
||||
if nodes[i].State != stateDead {
|
||||
continue
|
||||
}
|
||||
|
||||
// Respect the gossip to the dead interval
|
||||
if time.Since(nodes[i].StateChange) <= gossipToTheDeadTime {
|
||||
continue
|
||||
}
|
||||
|
||||
// Move this node to the end
|
||||
nodes[i], nodes[n-numDead-1] = nodes[n-numDead-1], nodes[i]
|
||||
numDead++
|
||||
i--
|
||||
}
|
||||
return n - numDead
|
||||
}
|
||||
|
||||
// kRandomNodes is used to select up to k random nodes, excluding any nodes where
|
||||
// the filter function returns true. It is possible that less than k nodes are
|
||||
// returned.
|
||||
func kRandomNodes(k int, nodes []*nodeState, filterFn func(*nodeState) bool) []*nodeState {
|
||||
n := len(nodes)
|
||||
kNodes := make([]*nodeState, 0, k)
|
||||
OUTER:
|
||||
// Probe up to 3*n times, with large n this is not necessary
|
||||
// since k << n, but with small n we want search to be
|
||||
// exhaustive
|
||||
for i := 0; i < 3*n && len(kNodes) < k; i++ {
|
||||
// Get random node
|
||||
idx := randomOffset(n)
|
||||
node := nodes[idx]
|
||||
|
||||
// Give the filter a shot at it.
|
||||
if filterFn != nil && filterFn(node) {
|
||||
continue OUTER
|
||||
}
|
||||
|
||||
// Check if we have this node already
|
||||
for j := 0; j < len(kNodes); j++ {
|
||||
if node == kNodes[j] {
|
||||
continue OUTER
|
||||
}
|
||||
}
|
||||
|
||||
// Append the node
|
||||
kNodes = append(kNodes, node)
|
||||
}
|
||||
return kNodes
|
||||
}
|
||||
|
||||
// makeCompoundMessage takes a list of messages and generates
|
||||
// a single compound message containing all of them
|
||||
func makeCompoundMessage(msgs [][]byte) *bytes.Buffer {
|
||||
// Create a local buffer
|
||||
buf := bytes.NewBuffer(nil)
|
||||
|
||||
// Write out the type
|
||||
buf.WriteByte(uint8(compoundMsg))
|
||||
|
||||
// Write out the number of message
|
||||
buf.WriteByte(uint8(len(msgs)))
|
||||
|
||||
// Add the message lengths
|
||||
for _, m := range msgs {
|
||||
binary.Write(buf, binary.BigEndian, uint16(len(m)))
|
||||
}
|
||||
|
||||
// Append the messages
|
||||
for _, m := range msgs {
|
||||
buf.Write(m)
|
||||
}
|
||||
|
||||
return buf
|
||||
}
|
||||
|
||||
// decodeCompoundMessage splits a compound message and returns
|
||||
// the slices of individual messages. Also returns the number
|
||||
// of truncated messages and any potential error
|
||||
func decodeCompoundMessage(buf []byte) (trunc int, parts [][]byte, err error) {
|
||||
if len(buf) < 1 {
|
||||
err = fmt.Errorf("missing compound length byte")
|
||||
return
|
||||
}
|
||||
numParts := uint8(buf[0])
|
||||
buf = buf[1:]
|
||||
|
||||
// Check we have enough bytes
|
||||
if len(buf) < int(numParts*2) {
|
||||
err = fmt.Errorf("truncated len slice")
|
||||
return
|
||||
}
|
||||
|
||||
// Decode the lengths
|
||||
lengths := make([]uint16, numParts)
|
||||
for i := 0; i < int(numParts); i++ {
|
||||
lengths[i] = binary.BigEndian.Uint16(buf[i*2 : i*2+2])
|
||||
}
|
||||
buf = buf[numParts*2:]
|
||||
|
||||
// Split each message
|
||||
for idx, msgLen := range lengths {
|
||||
if len(buf) < int(msgLen) {
|
||||
trunc = int(numParts) - idx
|
||||
return
|
||||
}
|
||||
|
||||
// Extract the slice, seek past on the buffer
|
||||
slice := buf[:msgLen]
|
||||
buf = buf[msgLen:]
|
||||
parts = append(parts, slice)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// Given a string of the form "host", "host:port",
|
||||
// "ipv6::addr" or "[ipv6::address]:port",
|
||||
// return true if the string includes a port.
|
||||
func hasPort(s string) bool {
|
||||
last := strings.LastIndex(s, ":")
|
||||
if last == -1 {
|
||||
return false
|
||||
}
|
||||
if s[0] == '[' {
|
||||
return s[last-1] == ']'
|
||||
}
|
||||
return strings.Index(s, ":") == last
|
||||
}
|
||||
|
||||
// compressPayload takes an opaque input buffer, compresses it
|
||||
// and wraps it in a compress{} message that is encoded.
|
||||
func compressPayload(inp []byte) (*bytes.Buffer, error) {
|
||||
var buf bytes.Buffer
|
||||
compressor := lzw.NewWriter(&buf, lzw.LSB, lzwLitWidth)
|
||||
|
||||
_, err := compressor.Write(inp)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Ensure we flush everything out
|
||||
if err := compressor.Close(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Create a compressed message
|
||||
c := compress{
|
||||
Algo: lzwAlgo,
|
||||
Buf: buf.Bytes(),
|
||||
}
|
||||
return encode(compressMsg, &c)
|
||||
}
|
||||
|
||||
// decompressPayload is used to unpack an encoded compress{}
|
||||
// message and return its payload uncompressed
|
||||
func decompressPayload(msg []byte) ([]byte, error) {
|
||||
// Decode the message
|
||||
var c compress
|
||||
if err := decode(msg, &c); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return decompressBuffer(&c)
|
||||
}
|
||||
|
||||
// decompressBuffer is used to decompress the buffer of
|
||||
// a single compress message, handling multiple algorithms
|
||||
func decompressBuffer(c *compress) ([]byte, error) {
|
||||
// Verify the algorithm
|
||||
if c.Algo != lzwAlgo {
|
||||
return nil, fmt.Errorf("Cannot decompress unknown algorithm %d", c.Algo)
|
||||
}
|
||||
|
||||
// Create a uncompressor
|
||||
uncomp := lzw.NewReader(bytes.NewReader(c.Buf), lzw.LSB, lzwLitWidth)
|
||||
defer uncomp.Close()
|
||||
|
||||
// Read all the data
|
||||
var b bytes.Buffer
|
||||
_, err := io.Copy(&b, uncomp)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Return the uncompressed bytes
|
||||
return b.Bytes(), nil
|
||||
}
|
|
@ -0,0 +1,354 @@
|
|||
Mozilla Public License, version 2.0
|
||||
|
||||
1. Definitions
|
||||
|
||||
1.1. “Contributor”
|
||||
|
||||
means each individual or legal entity that creates, contributes to the
|
||||
creation of, or owns Covered Software.
|
||||
|
||||
1.2. “Contributor Version”
|
||||
|
||||
means the combination of the Contributions of others (if any) used by a
|
||||
Contributor and that particular Contributor’s Contribution.
|
||||
|
||||
1.3. “Contribution”
|
||||
|
||||
means Covered Software of a particular Contributor.
|
||||
|
||||
1.4. “Covered Software”
|
||||
|
||||
means Source Code Form to which the initial Contributor has attached the
|
||||
notice in Exhibit A, the Executable Form of such Source Code Form, and
|
||||
Modifications of such Source Code Form, in each case including portions
|
||||
thereof.
|
||||
|
||||
1.5. “Incompatible With Secondary Licenses”
|
||||
means
|
||||
|
||||
a. that the initial Contributor has attached the notice described in
|
||||
Exhibit B to the Covered Software; or
|
||||
|
||||
b. that the Covered Software was made available under the terms of version
|
||||
1.1 or earlier of the License, but not also under the terms of a
|
||||
Secondary License.
|
||||
|
||||
1.6. “Executable Form”
|
||||
|
||||
means any form of the work other than Source Code Form.
|
||||
|
||||
1.7. “Larger Work”
|
||||
|
||||
means a work that combines Covered Software with other material, in a separate
|
||||
file or files, that is not Covered Software.
|
||||
|
||||
1.8. “License”
|
||||
|
||||
means this document.
|
||||
|
||||
1.9. “Licensable”
|
||||
|
||||
means having the right to grant, to the maximum extent possible, whether at the
|
||||
time of the initial grant or subsequently, any and all of the rights conveyed by
|
||||
this License.
|
||||
|
||||
1.10. “Modifications”
|
||||
|
||||
means any of the following:
|
||||
|
||||
a. any file in Source Code Form that results from an addition to, deletion
|
||||
from, or modification of the contents of Covered Software; or
|
||||
|
||||
b. any new file in Source Code Form that contains any Covered Software.
|
||||
|
||||
1.11. “Patent Claims” of a Contributor
|
||||
|
||||
means any patent claim(s), including without limitation, method, process,
|
||||
and apparatus claims, in any patent Licensable by such Contributor that
|
||||
would be infringed, but for the grant of the License, by the making,
|
||||
using, selling, offering for sale, having made, import, or transfer of
|
||||
either its Contributions or its Contributor Version.
|
||||
|
||||
1.12. “Secondary License”
|
||||
|
||||
means either the GNU General Public License, Version 2.0, the GNU Lesser
|
||||
General Public License, Version 2.1, the GNU Affero General Public
|
||||
License, Version 3.0, or any later versions of those licenses.
|
||||
|
||||
1.13. “Source Code Form”
|
||||
|
||||
means the form of the work preferred for making modifications.
|
||||
|
||||
1.14. “You” (or “Your”)
|
||||
|
||||
means an individual or a legal entity exercising rights under this
|
||||
License. For legal entities, “You” includes any entity that controls, is
|
||||
controlled by, or is under common control with You. For purposes of this
|
||||
definition, “control” means (a) the power, direct or indirect, to cause
|
||||
the direction or management of such entity, whether by contract or
|
||||
otherwise, or (b) ownership of more than fifty percent (50%) of the
|
||||
outstanding shares or beneficial ownership of such entity.
|
||||
|
||||
|
||||
2. License Grants and Conditions
|
||||
|
||||
2.1. Grants
|
||||
|
||||
Each Contributor hereby grants You a world-wide, royalty-free,
|
||||
non-exclusive license:
|
||||
|
||||
a. under intellectual property rights (other than patent or trademark)
|
||||
Licensable by such Contributor to use, reproduce, make available,
|
||||
modify, display, perform, distribute, and otherwise exploit its
|
||||
Contributions, either on an unmodified basis, with Modifications, or as
|
||||
part of a Larger Work; and
|
||||
|
||||
b. under Patent Claims of such Contributor to make, use, sell, offer for
|
||||
sale, have made, import, and otherwise transfer either its Contributions
|
||||
or its Contributor Version.
|
||||
|
||||
2.2. Effective Date
|
||||
|
||||
The licenses granted in Section 2.1 with respect to any Contribution become
|
||||
effective for each Contribution on the date the Contributor first distributes
|
||||
such Contribution.
|
||||
|
||||
2.3. Limitations on Grant Scope
|
||||
|
||||
The licenses granted in this Section 2 are the only rights granted under this
|
||||
License. No additional rights or licenses will be implied from the distribution
|
||||
or licensing of Covered Software under this License. Notwithstanding Section
|
||||
2.1(b) above, no patent license is granted by a Contributor:
|
||||
|
||||
a. for any code that a Contributor has removed from Covered Software; or
|
||||
|
||||
b. for infringements caused by: (i) Your and any other third party’s
|
||||
modifications of Covered Software, or (ii) the combination of its
|
||||
Contributions with other software (except as part of its Contributor
|
||||
Version); or
|
||||
|
||||
c. under Patent Claims infringed by Covered Software in the absence of its
|
||||
Contributions.
|
||||
|
||||
This License does not grant any rights in the trademarks, service marks, or
|
||||
logos of any Contributor (except as may be necessary to comply with the
|
||||
notice requirements in Section 3.4).
|
||||
|
||||
2.4. Subsequent Licenses
|
||||
|
||||
No Contributor makes additional grants as a result of Your choice to
|
||||
distribute the Covered Software under a subsequent version of this License
|
||||
(see Section 10.2) or under the terms of a Secondary License (if permitted
|
||||
under the terms of Section 3.3).
|
||||
|
||||
2.5. Representation
|
||||
|
||||
Each Contributor represents that the Contributor believes its Contributions
|
||||
are its original creation(s) or it has sufficient rights to grant the
|
||||
rights to its Contributions conveyed by this License.
|
||||
|
||||
2.6. Fair Use
|
||||
|
||||
This License is not intended to limit any rights You have under applicable
|
||||
copyright doctrines of fair use, fair dealing, or other equivalents.
|
||||
|
||||
2.7. Conditions
|
||||
|
||||
Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted in
|
||||
Section 2.1.
|
||||
|
||||
|
||||
3. Responsibilities
|
||||
|
||||
3.1. Distribution of Source Form
|
||||
|
||||
All distribution of Covered Software in Source Code Form, including any
|
||||
Modifications that You create or to which You contribute, must be under the
|
||||
terms of this License. You must inform recipients that the Source Code Form
|
||||
of the Covered Software is governed by the terms of this License, and how
|
||||
they can obtain a copy of this License. You may not attempt to alter or
|
||||
restrict the recipients’ rights in the Source Code Form.
|
||||
|
||||
3.2. Distribution of Executable Form
|
||||
|
||||
If You distribute Covered Software in Executable Form then:
|
||||
|
||||
a. such Covered Software must also be made available in Source Code Form,
|
||||
as described in Section 3.1, and You must inform recipients of the
|
||||
Executable Form how they can obtain a copy of such Source Code Form by
|
||||
reasonable means in a timely manner, at a charge no more than the cost
|
||||
of distribution to the recipient; and
|
||||
|
||||
b. You may distribute such Executable Form under the terms of this License,
|
||||
or sublicense it under different terms, provided that the license for
|
||||
the Executable Form does not attempt to limit or alter the recipients’
|
||||
rights in the Source Code Form under this License.
|
||||
|
||||
3.3. Distribution of a Larger Work
|
||||
|
||||
You may create and distribute a Larger Work under terms of Your choice,
|
||||
provided that You also comply with the requirements of this License for the
|
||||
Covered Software. If the Larger Work is a combination of Covered Software
|
||||
with a work governed by one or more Secondary Licenses, and the Covered
|
||||
Software is not Incompatible With Secondary Licenses, this License permits
|
||||
You to additionally distribute such Covered Software under the terms of
|
||||
such Secondary License(s), so that the recipient of the Larger Work may, at
|
||||
their option, further distribute the Covered Software under the terms of
|
||||
either this License or such Secondary License(s).
|
||||
|
||||
3.4. Notices
|
||||
|
||||
You may not remove or alter the substance of any license notices (including
|
||||
copyright notices, patent notices, disclaimers of warranty, or limitations
|
||||
of liability) contained within the Source Code Form of the Covered
|
||||
Software, except that You may alter any license notices to the extent
|
||||
required to remedy known factual inaccuracies.
|
||||
|
||||
3.5. Application of Additional Terms
|
||||
|
||||
You may choose to offer, and to charge a fee for, warranty, support,
|
||||
indemnity or liability obligations to one or more recipients of Covered
|
||||
Software. However, You may do so only on Your own behalf, and not on behalf
|
||||
of any Contributor. You must make it absolutely clear that any such
|
||||
warranty, support, indemnity, or liability obligation is offered by You
|
||||
alone, and You hereby agree to indemnify every Contributor for any
|
||||
liability incurred by such Contributor as a result of warranty, support,
|
||||
indemnity or liability terms You offer. You may include additional
|
||||
disclaimers of warranty and limitations of liability specific to any
|
||||
jurisdiction.
|
||||
|
||||
4. Inability to Comply Due to Statute or Regulation
|
||||
|
||||
If it is impossible for You to comply with any of the terms of this License
|
||||
with respect to some or all of the Covered Software due to statute, judicial
|
||||
order, or regulation then You must: (a) comply with the terms of this License
|
||||
to the maximum extent possible; and (b) describe the limitations and the code
|
||||
they affect. Such description must be placed in a text file included with all
|
||||
distributions of the Covered Software under this License. Except to the
|
||||
extent prohibited by statute or regulation, such description must be
|
||||
sufficiently detailed for a recipient of ordinary skill to be able to
|
||||
understand it.
|
||||
|
||||
5. Termination
|
||||
|
||||
5.1. The rights granted under this License will terminate automatically if You
|
||||
fail to comply with any of its terms. However, if You become compliant,
|
||||
then the rights granted under this License from a particular Contributor
|
||||
are reinstated (a) provisionally, unless and until such Contributor
|
||||
explicitly and finally terminates Your grants, and (b) on an ongoing basis,
|
||||
if such Contributor fails to notify You of the non-compliance by some
|
||||
reasonable means prior to 60 days after You have come back into compliance.
|
||||
Moreover, Your grants from a particular Contributor are reinstated on an
|
||||
ongoing basis if such Contributor notifies You of the non-compliance by
|
||||
some reasonable means, this is the first time You have received notice of
|
||||
non-compliance with this License from such Contributor, and You become
|
||||
compliant prior to 30 days after Your receipt of the notice.
|
||||
|
||||
5.2. If You initiate litigation against any entity by asserting a patent
|
||||
infringement claim (excluding declaratory judgment actions, counter-claims,
|
||||
and cross-claims) alleging that a Contributor Version directly or
|
||||
indirectly infringes any patent, then the rights granted to You by any and
|
||||
all Contributors for the Covered Software under Section 2.1 of this License
|
||||
shall terminate.
|
||||
|
||||
5.3. In the event of termination under Sections 5.1 or 5.2 above, all end user
|
||||
license agreements (excluding distributors and resellers) which have been
|
||||
validly granted by You or Your distributors under this License prior to
|
||||
termination shall survive termination.
|
||||
|
||||
6. Disclaimer of Warranty
|
||||
|
||||
Covered Software is provided under this License on an “as is” basis, without
|
||||
warranty of any kind, either expressed, implied, or statutory, including,
|
||||
without limitation, warranties that the Covered Software is free of defects,
|
||||
merchantable, fit for a particular purpose or non-infringing. The entire
|
||||
risk as to the quality and performance of the Covered Software is with You.
|
||||
Should any Covered Software prove defective in any respect, You (not any
|
||||
Contributor) assume the cost of any necessary servicing, repair, or
|
||||
correction. This disclaimer of warranty constitutes an essential part of this
|
||||
License. No use of any Covered Software is authorized under this License
|
||||
except under this disclaimer.
|
||||
|
||||
7. Limitation of Liability
|
||||
|
||||
Under no circumstances and under no legal theory, whether tort (including
|
||||
negligence), contract, or otherwise, shall any Contributor, or anyone who
|
||||
distributes Covered Software as permitted above, be liable to You for any
|
||||
direct, indirect, special, incidental, or consequential damages of any
|
||||
character including, without limitation, damages for lost profits, loss of
|
||||
goodwill, work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses, even if such party shall have been
|
||||
informed of the possibility of such damages. This limitation of liability
|
||||
shall not apply to liability for death or personal injury resulting from such
|
||||
party’s negligence to the extent applicable law prohibits such limitation.
|
||||
Some jurisdictions do not allow the exclusion or limitation of incidental or
|
||||
consequential damages, so this exclusion and limitation may not apply to You.
|
||||
|
||||
8. Litigation
|
||||
|
||||
Any litigation relating to this License may be brought only in the courts of
|
||||
a jurisdiction where the defendant maintains its principal place of business
|
||||
and such litigation shall be governed by laws of that jurisdiction, without
|
||||
reference to its conflict-of-law provisions. Nothing in this Section shall
|
||||
prevent a party’s ability to bring cross-claims or counter-claims.
|
||||
|
||||
9. Miscellaneous
|
||||
|
||||
This License represents the complete agreement concerning the subject matter
|
||||
hereof. If any provision of this License is held to be unenforceable, such
|
||||
provision shall be reformed only to the extent necessary to make it
|
||||
enforceable. Any law or regulation which provides that the language of a
|
||||
contract shall be construed against the drafter shall not be used to construe
|
||||
this License against a Contributor.
|
||||
|
||||
|
||||
10. Versions of the License
|
||||
|
||||
10.1. New Versions
|
||||
|
||||
Mozilla Foundation is the license steward. Except as provided in Section
|
||||
10.3, no one other than the license steward has the right to modify or
|
||||
publish new versions of this License. Each version will be given a
|
||||
distinguishing version number.
|
||||
|
||||
10.2. Effect of New Versions
|
||||
|
||||
You may distribute the Covered Software under the terms of the version of
|
||||
the License under which You originally received the Covered Software, or
|
||||
under the terms of any subsequent version published by the license
|
||||
steward.
|
||||
|
||||
10.3. Modified Versions
|
||||
|
||||
If you create software not governed by this License, and you want to
|
||||
create a new license for such software, you may create and use a modified
|
||||
version of this License if you rename the license and remove any
|
||||
references to the name of the license steward (except to note that such
|
||||
modified license differs from this License).
|
||||
|
||||
10.4. Distributing Source Code Form that is Incompatible With Secondary Licenses
|
||||
If You choose to distribute Source Code Form that is Incompatible With
|
||||
Secondary Licenses under the terms of this version of the License, the
|
||||
notice described in Exhibit B of this License must be attached.
|
||||
|
||||
Exhibit A - Source Code Form License Notice
|
||||
|
||||
This Source Code Form is subject to the
|
||||
terms of the Mozilla Public License, v.
|
||||
2.0. If a copy of the MPL was not
|
||||
distributed with this file, You can
|
||||
obtain one at
|
||||
http://mozilla.org/MPL/2.0/.
|
||||
|
||||
If it is not possible or desirable to put the notice in a particular file, then
|
||||
You may include the notice in a location (such as a LICENSE file in a relevant
|
||||
directory) where a recipient would be likely to look for such a notice.
|
||||
|
||||
You may add additional accurate notices of copyright ownership.
|
||||
|
||||
Exhibit B - “Incompatible With Secondary Licenses” Notice
|
||||
|
||||
This Source Code Form is “Incompatible
|
||||
With Secondary Licenses”, as defined by
|
||||
the Mozilla Public License, v. 2.0.
|
||||
|
|
@ -0,0 +1,17 @@
|
|||
DEPS = $(go list -f '{{range .TestImports}}{{.}} {{end}}' ./...)
|
||||
|
||||
test:
|
||||
go test -timeout=60s ./...
|
||||
|
||||
integ: test
|
||||
INTEG_TESTS=yes go test -timeout=5s -run=Integ ./...
|
||||
|
||||
deps:
|
||||
go get -d -v ./...
|
||||
echo $(DEPS) | xargs -n1 go get -d
|
||||
|
||||
cov:
|
||||
INTEG_TESTS=yes gocov test github.com/hashicorp/raft | gocov-html > /tmp/coverage.html
|
||||
open /tmp/coverage.html
|
||||
|
||||
.PHONY: test cov integ deps
|
|
@ -0,0 +1,89 @@
|
|||
raft [![Build Status](https://travis-ci.org/hashicorp/raft.png)](https://travis-ci.org/hashicorp/raft)
|
||||
====
|
||||
|
||||
raft is a [Go](http://www.golang.org) library that manages a replicated
|
||||
log and can be used with an FSM to manage replicated state machines. It
|
||||
is library for providing [consensus](http://en.wikipedia.org/wiki/Consensus_(computer_science)).
|
||||
|
||||
The use cases for such a library are far-reaching as replicated state
|
||||
machines are a key component of many distributed systems. They enable
|
||||
building Consistent, Partition Tolerant (CP) systems, with limited
|
||||
fault tolerance as well.
|
||||
|
||||
## Building
|
||||
|
||||
If you wish to build raft you'll need Go version 1.2+ installed.
|
||||
|
||||
Please check your installation with:
|
||||
|
||||
```
|
||||
go version
|
||||
```
|
||||
|
||||
## Documentation
|
||||
|
||||
For complete documentation, see the associated [Godoc](http://godoc.org/github.com/hashicorp/raft).
|
||||
|
||||
To prevent complications with cgo, the primary backend `MDBStore` is in a separate repository,
|
||||
called [raft-mdb](http://github.com/hashicorp/raft-mdb). That is the recommended implementation
|
||||
for the `LogStore` and `StableStore`.
|
||||
|
||||
A pure Go backend using [BoltDB](https://github.com/boltdb/bolt) is also available called
|
||||
[raft-boltdb](https://github.com/hashicorp/raft-boltdb). It can also be used as a `LogStore`
|
||||
and `StableStore`.
|
||||
|
||||
## Protocol
|
||||
|
||||
raft is based on ["Raft: In Search of an Understandable Consensus Algorithm"](https://ramcloud.stanford.edu/wiki/download/attachments/11370504/raft.pdf)
|
||||
|
||||
A high level overview of the Raft protocol is described below, but for details please read the full
|
||||
[Raft paper](https://ramcloud.stanford.edu/wiki/download/attachments/11370504/raft.pdf)
|
||||
followed by the raft source. Any questions about the raft protocol should be sent to the
|
||||
[raft-dev mailing list](https://groups.google.com/forum/#!forum/raft-dev).
|
||||
|
||||
### Protocol Description
|
||||
|
||||
Raft nodes are always in one of three states: follower, candidate or leader. All
|
||||
nodes initially start out as a follower. In this state, nodes can accept log entries
|
||||
from a leader and cast votes. If no entries are received for some time, nodes
|
||||
self-promote to the candidate state. In the candidate state nodes request votes from
|
||||
their peers. If a candidate receives a quorum of votes, then it is promoted to a leader.
|
||||
The leader must accept new log entries and replicate to all the other followers.
|
||||
In addition, if stale reads are not acceptable, all queries must also be performed on
|
||||
the leader.
|
||||
|
||||
Once a cluster has a leader, it is able to accept new log entries. A client can
|
||||
request that a leader append a new log entry, which is an opaque binary blob to
|
||||
Raft. The leader then writes the entry to durable storage and attempts to replicate
|
||||
to a quorum of followers. Once the log entry is considered *committed*, it can be
|
||||
*applied* to a finite state machine. The finite state machine is application specific,
|
||||
and is implemented using an interface.
|
||||
|
||||
An obvious question relates to the unbounded nature of a replicated log. Raft provides
|
||||
a mechanism by which the current state is snapshotted, and the log is compacted. Because
|
||||
of the FSM abstraction, restoring the state of the FSM must result in the same state
|
||||
as a replay of old logs. This allows Raft to capture the FSM state at a point in time,
|
||||
and then remove all the logs that were used to reach that state. This is performed automatically
|
||||
without user intervention, and prevents unbounded disk usage as well as minimizing
|
||||
time spent replaying logs.
|
||||
|
||||
Lastly, there is the issue of updating the peer set when new servers are joining
|
||||
or existing servers are leaving. As long as a quorum of nodes is available, this
|
||||
is not an issue as Raft provides mechanisms to dynamically update the peer set.
|
||||
If a quorum of nodes is unavailable, then this becomes a very challenging issue.
|
||||
For example, suppose there are only 2 peers, A and B. The quorum size is also
|
||||
2, meaning both nodes must agree to commit a log entry. If either A or B fails,
|
||||
it is now impossible to reach quorum. This means the cluster is unable to add,
|
||||
or remove a node, or commit any additional log entries. This results in *unavailability*.
|
||||
At this point, manual intervention would be required to remove either A or B,
|
||||
and to restart the remaining node in bootstrap mode.
|
||||
|
||||
A Raft cluster of 3 nodes can tolerate a single node failure, while a cluster
|
||||
of 5 can tolerate 2 node failures. The recommended configuration is to either
|
||||
run 3 or 5 raft servers. This maximizes availability without
|
||||
greatly sacrificing performance.
|
||||
|
||||
In terms of performance, Raft is comparable to Paxos. Assuming stable leadership,
|
||||
committing a log entry requires a single round trip to half of the cluster.
|
||||
Thus performance is bound by disk I/O and network latency.
|
||||
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,151 @@
|
|||
package raft
|
||||
|
||||
// RPCHeader is a common sub-structure used to pass along protocol version and
|
||||
// other information about the cluster. For older Raft implementations before
|
||||
// versioning was added this will default to a zero-valued structure when read
|
||||
// by newer Raft versions.
|
||||
type RPCHeader struct {
|
||||
// ProtocolVersion is the version of the protocol the sender is
|
||||
// speaking.
|
||||
ProtocolVersion ProtocolVersion
|
||||
}
|
||||
|
||||
// WithRPCHeader is an interface that exposes the RPC header.
|
||||
type WithRPCHeader interface {
|
||||
GetRPCHeader() RPCHeader
|
||||
}
|
||||
|
||||
// AppendEntriesRequest is the command used to append entries to the
|
||||
// replicated log.
|
||||
type AppendEntriesRequest struct {
|
||||
RPCHeader
|
||||
|
||||
// Provide the current term and leader
|
||||
Term uint64
|
||||
Leader []byte
|
||||
|
||||
// Provide the previous entries for integrity checking
|
||||
PrevLogEntry uint64
|
||||
PrevLogTerm uint64
|
||||
|
||||
// New entries to commit
|
||||
Entries []*Log
|
||||
|
||||
// Commit index on the leader
|
||||
LeaderCommitIndex uint64
|
||||
}
|
||||
|
||||
// See WithRPCHeader.
|
||||
func (r *AppendEntriesRequest) GetRPCHeader() RPCHeader {
|
||||
return r.RPCHeader
|
||||
}
|
||||
|
||||
// AppendEntriesResponse is the response returned from an
|
||||
// AppendEntriesRequest.
|
||||
type AppendEntriesResponse struct {
|
||||
RPCHeader
|
||||
|
||||
// Newer term if leader is out of date
|
||||
Term uint64
|
||||
|
||||
// Last Log is a hint to help accelerate rebuilding slow nodes
|
||||
LastLog uint64
|
||||
|
||||
// We may not succeed if we have a conflicting entry
|
||||
Success bool
|
||||
|
||||
// There are scenarios where this request didn't succeed
|
||||
// but there's no need to wait/back-off the next attempt.
|
||||
NoRetryBackoff bool
|
||||
}
|
||||
|
||||
// See WithRPCHeader.
|
||||
func (r *AppendEntriesResponse) GetRPCHeader() RPCHeader {
|
||||
return r.RPCHeader
|
||||
}
|
||||
|
||||
// RequestVoteRequest is the command used by a candidate to ask a Raft peer
|
||||
// for a vote in an election.
|
||||
type RequestVoteRequest struct {
|
||||
RPCHeader
|
||||
|
||||
// Provide the term and our id
|
||||
Term uint64
|
||||
Candidate []byte
|
||||
|
||||
// Used to ensure safety
|
||||
LastLogIndex uint64
|
||||
LastLogTerm uint64
|
||||
}
|
||||
|
||||
// See WithRPCHeader.
|
||||
func (r *RequestVoteRequest) GetRPCHeader() RPCHeader {
|
||||
return r.RPCHeader
|
||||
}
|
||||
|
||||
// RequestVoteResponse is the response returned from a RequestVoteRequest.
|
||||
type RequestVoteResponse struct {
|
||||
RPCHeader
|
||||
|
||||
// Newer term if leader is out of date.
|
||||
Term uint64
|
||||
|
||||
// Peers is deprecated, but required by servers that only understand
|
||||
// protocol version 0. This is not populated in protocol version 2
|
||||
// and later.
|
||||
Peers []byte
|
||||
|
||||
// Is the vote granted.
|
||||
Granted bool
|
||||
}
|
||||
|
||||
// See WithRPCHeader.
|
||||
func (r *RequestVoteResponse) GetRPCHeader() RPCHeader {
|
||||
return r.RPCHeader
|
||||
}
|
||||
|
||||
// InstallSnapshotRequest is the command sent to a Raft peer to bootstrap its
|
||||
// log (and state machine) from a snapshot on another peer.
|
||||
type InstallSnapshotRequest struct {
|
||||
RPCHeader
|
||||
SnapshotVersion SnapshotVersion
|
||||
|
||||
Term uint64
|
||||
Leader []byte
|
||||
|
||||
// These are the last index/term included in the snapshot
|
||||
LastLogIndex uint64
|
||||
LastLogTerm uint64
|
||||
|
||||
// Peer Set in the snapshot. This is deprecated in favor of Configuration
|
||||
// but remains here in case we receive an InstallSnapshot from a leader
|
||||
// that's running old code.
|
||||
Peers []byte
|
||||
|
||||
// Cluster membership.
|
||||
Configuration []byte
|
||||
// Log index where 'Configuration' entry was originally written.
|
||||
ConfigurationIndex uint64
|
||||
|
||||
// Size of the snapshot
|
||||
Size int64
|
||||
}
|
||||
|
||||
// See WithRPCHeader.
|
||||
func (r *InstallSnapshotRequest) GetRPCHeader() RPCHeader {
|
||||
return r.RPCHeader
|
||||
}
|
||||
|
||||
// InstallSnapshotResponse is the response returned from an
|
||||
// InstallSnapshotRequest.
|
||||
type InstallSnapshotResponse struct {
|
||||
RPCHeader
|
||||
|
||||
Term uint64
|
||||
Success bool
|
||||
}
|
||||
|
||||
// See WithRPCHeader.
|
||||
func (r *InstallSnapshotResponse) GetRPCHeader() RPCHeader {
|
||||
return r.RPCHeader
|
||||
}
|
|
@ -0,0 +1,101 @@
|
|||
package raft
|
||||
|
||||
import (
|
||||
"sort"
|
||||
"sync"
|
||||
)
|
||||
|
||||
// Commitment is used to advance the leader's commit index. The leader and
|
||||
// replication goroutines report in newly written entries with Match(), and
|
||||
// this notifies on commitCh when the commit index has advanced.
|
||||
type commitment struct {
|
||||
// protectes matchIndexes and commitIndex
|
||||
sync.Mutex
|
||||
// notified when commitIndex increases
|
||||
commitCh chan struct{}
|
||||
// voter ID to log index: the server stores up through this log entry
|
||||
matchIndexes map[ServerID]uint64
|
||||
// a quorum stores up through this log entry. monotonically increases.
|
||||
commitIndex uint64
|
||||
// the first index of this leader's term: this needs to be replicated to a
|
||||
// majority of the cluster before this leader may mark anything committed
|
||||
// (per Raft's commitment rule)
|
||||
startIndex uint64
|
||||
}
|
||||
|
||||
// newCommitment returns an commitment struct that notifies the provided
|
||||
// channel when log entries have been committed. A new commitment struct is
|
||||
// created each time this server becomes leader for a particular term.
|
||||
// 'configuration' is the servers in the cluster.
|
||||
// 'startIndex' is the first index created in this term (see
|
||||
// its description above).
|
||||
func newCommitment(commitCh chan struct{}, configuration Configuration, startIndex uint64) *commitment {
|
||||
matchIndexes := make(map[ServerID]uint64)
|
||||
for _, server := range configuration.Servers {
|
||||
if server.Suffrage == Voter {
|
||||
matchIndexes[server.ID] = 0
|
||||
}
|
||||
}
|
||||
return &commitment{
|
||||
commitCh: commitCh,
|
||||
matchIndexes: matchIndexes,
|
||||
commitIndex: 0,
|
||||
startIndex: startIndex,
|
||||
}
|
||||
}
|
||||
|
||||
// Called when a new cluster membership configuration is created: it will be
|
||||
// used to determine commitment from now on. 'configuration' is the servers in
|
||||
// the cluster.
|
||||
func (c *commitment) setConfiguration(configuration Configuration) {
|
||||
c.Lock()
|
||||
defer c.Unlock()
|
||||
oldMatchIndexes := c.matchIndexes
|
||||
c.matchIndexes = make(map[ServerID]uint64)
|
||||
for _, server := range configuration.Servers {
|
||||
if server.Suffrage == Voter {
|
||||
c.matchIndexes[server.ID] = oldMatchIndexes[server.ID] // defaults to 0
|
||||
}
|
||||
}
|
||||
c.recalculate()
|
||||
}
|
||||
|
||||
// Called by leader after commitCh is notified
|
||||
func (c *commitment) getCommitIndex() uint64 {
|
||||
c.Lock()
|
||||
defer c.Unlock()
|
||||
return c.commitIndex
|
||||
}
|
||||
|
||||
// Match is called once a server completes writing entries to disk: either the
|
||||
// leader has written the new entry or a follower has replied to an
|
||||
// AppendEntries RPC. The given server's disk agrees with this server's log up
|
||||
// through the given index.
|
||||
func (c *commitment) match(server ServerID, matchIndex uint64) {
|
||||
c.Lock()
|
||||
defer c.Unlock()
|
||||
if prev, hasVote := c.matchIndexes[server]; hasVote && matchIndex > prev {
|
||||
c.matchIndexes[server] = matchIndex
|
||||
c.recalculate()
|
||||
}
|
||||
}
|
||||
|
||||
// Internal helper to calculate new commitIndex from matchIndexes.
|
||||
// Must be called with lock held.
|
||||
func (c *commitment) recalculate() {
|
||||
if len(c.matchIndexes) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
matched := make([]uint64, 0, len(c.matchIndexes))
|
||||
for _, idx := range c.matchIndexes {
|
||||
matched = append(matched, idx)
|
||||
}
|
||||
sort.Sort(uint64Slice(matched))
|
||||
quorumMatchIndex := matched[(len(matched)-1)/2]
|
||||
|
||||
if quorumMatchIndex > c.commitIndex && quorumMatchIndex >= c.startIndex {
|
||||
c.commitIndex = quorumMatchIndex
|
||||
asyncNotifyCh(c.commitCh)
|
||||
}
|
||||
}
|
|
@ -0,0 +1,258 @@
|
|||
package raft
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"time"
|
||||
)
|
||||
|
||||
// These are the versions of the protocol (which includes RPC messages as
|
||||
// well as Raft-specific log entries) that this server can _understand_. Use
|
||||
// the ProtocolVersion member of the Config object to control the version of
|
||||
// the protocol to use when _speaking_ to other servers. Note that depending on
|
||||
// the protocol version being spoken, some otherwise understood RPC messages
|
||||
// may be refused. See dispositionRPC for details of this logic.
|
||||
//
|
||||
// There are notes about the upgrade path in the description of the versions
|
||||
// below. If you are starting a fresh cluster then there's no reason not to
|
||||
// jump right to the latest protocol version. If you need to interoperate with
|
||||
// older, version 0 Raft servers you'll need to drive the cluster through the
|
||||
// different versions in order.
|
||||
//
|
||||
// The version details are complicated, but here's a summary of what's required
|
||||
// to get from a version 0 cluster to version 3:
|
||||
//
|
||||
// 1. In version N of your app that starts using the new Raft library with
|
||||
// versioning, set ProtocolVersion to 1.
|
||||
// 2. Make version N+1 of your app require version N as a prerequisite (all
|
||||
// servers must be upgraded). For version N+1 of your app set ProtocolVersion
|
||||
// to 2.
|
||||
// 3. Similarly, make version N+2 of your app require version N+1 as a
|
||||
// prerequisite. For version N+2 of your app, set ProtocolVersion to 3.
|
||||
//
|
||||
// During this upgrade, older cluster members will still have Server IDs equal
|
||||
// to their network addresses. To upgrade an older member and give it an ID, it
|
||||
// needs to leave the cluster and re-enter:
|
||||
//
|
||||
// 1. Remove the server from the cluster with RemoveServer, using its network
|
||||
// address as its ServerID.
|
||||
// 2. Update the server's config to a better ID (restarting the server).
|
||||
// 3. Add the server back to the cluster with AddVoter, using its new ID.
|
||||
//
|
||||
// You can do this during the rolling upgrade from N+1 to N+2 of your app, or
|
||||
// as a rolling change at any time after the upgrade.
|
||||
//
|
||||
// Version History
|
||||
//
|
||||
// 0: Original Raft library before versioning was added. Servers running this
|
||||
// version of the Raft library use AddPeerDeprecated/RemovePeerDeprecated
|
||||
// for all configuration changes, and have no support for LogConfiguration.
|
||||
// 1: First versioned protocol, used to interoperate with old servers, and begin
|
||||
// the migration path to newer versions of the protocol. Under this version
|
||||
// all configuration changes are propagated using the now-deprecated
|
||||
// RemovePeerDeprecated Raft log entry. This means that server IDs are always
|
||||
// set to be the same as the server addresses (since the old log entry type
|
||||
// cannot transmit an ID), and only AddPeer/RemovePeer APIs are supported.
|
||||
// Servers running this version of the protocol can understand the new
|
||||
// LogConfiguration Raft log entry but will never generate one so they can
|
||||
// remain compatible with version 0 Raft servers in the cluster.
|
||||
// 2: Transitional protocol used when migrating an existing cluster to the new
|
||||
// server ID system. Server IDs are still set to be the same as server
|
||||
// addresses, but all configuration changes are propagated using the new
|
||||
// LogConfiguration Raft log entry type, which can carry full ID information.
|
||||
// This version supports the old AddPeer/RemovePeer APIs as well as the new
|
||||
// ID-based AddVoter/RemoveServer APIs which should be used when adding
|
||||
// version 3 servers to the cluster later. This version sheds all
|
||||
// interoperability with version 0 servers, but can interoperate with newer
|
||||
// Raft servers running with protocol version 1 since they can understand the
|
||||
// new LogConfiguration Raft log entry, and this version can still understand
|
||||
// their RemovePeerDeprecated Raft log entries. We need this protocol version
|
||||
// as an intermediate step between 1 and 3 so that servers will propagate the
|
||||
// ID information that will come from newly-added (or -rolled) servers using
|
||||
// protocol version 3, but since they are still using their address-based IDs
|
||||
// from the previous step they will still be able to track commitments and
|
||||
// their own voting status properly. If we skipped this step, servers would
|
||||
// be started with their new IDs, but they wouldn't see themselves in the old
|
||||
// address-based configuration, so none of the servers would think they had a
|
||||
// vote.
|
||||
// 3: Protocol adding full support for server IDs and new ID-based server APIs
|
||||
// (AddVoter, AddNonvoter, etc.), old AddPeer/RemovePeer APIs are no longer
|
||||
// supported. Version 2 servers should be swapped out by removing them from
|
||||
// the cluster one-by-one and re-adding them with updated configuration for
|
||||
// this protocol version, along with their server ID. The remove/add cycle
|
||||
// is required to populate their server ID. Note that removing must be done
|
||||
// by ID, which will be the old server's address.
|
||||
type ProtocolVersion int
|
||||
|
||||
const (
|
||||
ProtocolVersionMin ProtocolVersion = 0
|
||||
ProtocolVersionMax = 3
|
||||
)
|
||||
|
||||
// These are versions of snapshots that this server can _understand_. Currently,
|
||||
// it is always assumed that this server generates the latest version, though
|
||||
// this may be changed in the future to include a configurable version.
|
||||
//
|
||||
// Version History
|
||||
//
|
||||
// 0: Original Raft library before versioning was added. The peers portion of
|
||||
// these snapshots is encoded in the legacy format which requires decodePeers
|
||||
// to parse. This version of snapshots should only be produced by the
|
||||
// unversioned Raft library.
|
||||
// 1: New format which adds support for a full configuration structure and its
|
||||
// associated log index, with support for server IDs and non-voting server
|
||||
// modes. To ease upgrades, this also includes the legacy peers structure but
|
||||
// that will never be used by servers that understand version 1 snapshots.
|
||||
// Since the original Raft library didn't enforce any versioning, we must
|
||||
// include the legacy peers structure for this version, but we can deprecate
|
||||
// it in the next snapshot version.
|
||||
type SnapshotVersion int
|
||||
|
||||
const (
|
||||
SnapshotVersionMin SnapshotVersion = 0
|
||||
SnapshotVersionMax = 1
|
||||
)
|
||||
|
||||
// Config provides any necessary configuration for the Raft server.
|
||||
type Config struct {
|
||||
// ProtocolVersion allows a Raft server to inter-operate with older
|
||||
// Raft servers running an older version of the code. This is used to
|
||||
// version the wire protocol as well as Raft-specific log entries that
|
||||
// the server uses when _speaking_ to other servers. There is currently
|
||||
// no auto-negotiation of versions so all servers must be manually
|
||||
// configured with compatible versions. See ProtocolVersionMin and
|
||||
// ProtocolVersionMax for the versions of the protocol that this server
|
||||
// can _understand_.
|
||||
ProtocolVersion ProtocolVersion
|
||||
|
||||
// HeartbeatTimeout specifies the time in follower state without
|
||||
// a leader before we attempt an election.
|
||||
HeartbeatTimeout time.Duration
|
||||
|
||||
// ElectionTimeout specifies the time in candidate state without
|
||||
// a leader before we attempt an election.
|
||||
ElectionTimeout time.Duration
|
||||
|
||||
// CommitTimeout controls the time without an Apply() operation
|
||||
// before we heartbeat to ensure a timely commit. Due to random
|
||||
// staggering, may be delayed as much as 2x this value.
|
||||
CommitTimeout time.Duration
|
||||
|
||||
// MaxAppendEntries controls the maximum number of append entries
|
||||
// to send at once. We want to strike a balance between efficiency
|
||||
// and avoiding waste if the follower is going to reject because of
|
||||
// an inconsistent log.
|
||||
MaxAppendEntries int
|
||||
|
||||
// If we are a member of a cluster, and RemovePeer is invoked for the
|
||||
// local node, then we forget all peers and transition into the follower state.
|
||||
// If ShutdownOnRemove is is set, we additional shutdown Raft. Otherwise,
|
||||
// we can become a leader of a cluster containing only this node.
|
||||
ShutdownOnRemove bool
|
||||
|
||||
// TrailingLogs controls how many logs we leave after a snapshot. This is
|
||||
// used so that we can quickly replay logs on a follower instead of being
|
||||
// forced to send an entire snapshot.
|
||||
TrailingLogs uint64
|
||||
|
||||
// SnapshotInterval controls how often we check if we should perform a snapshot.
|
||||
// We randomly stagger between this value and 2x this value to avoid the entire
|
||||
// cluster from performing a snapshot at once.
|
||||
SnapshotInterval time.Duration
|
||||
|
||||
// SnapshotThreshold controls how many outstanding logs there must be before
|
||||
// we perform a snapshot. This is to prevent excessive snapshots when we can
|
||||
// just replay a small set of logs.
|
||||
SnapshotThreshold uint64
|
||||
|
||||
// LeaderLeaseTimeout is used to control how long the "lease" lasts
|
||||
// for being the leader without being able to contact a quorum
|
||||
// of nodes. If we reach this interval without contact, we will
|
||||
// step down as leader.
|
||||
LeaderLeaseTimeout time.Duration
|
||||
|
||||
// StartAsLeader forces Raft to start in the leader state. This should
|
||||
// never be used except for testing purposes, as it can cause a split-brain.
|
||||
StartAsLeader bool
|
||||
|
||||
// The unique ID for this server across all time. When running with
|
||||
// ProtocolVersion < 3, you must set this to be the same as the network
|
||||
// address of your transport.
|
||||
LocalID ServerID
|
||||
|
||||
// NotifyCh is used to provide a channel that will be notified of leadership
|
||||
// changes. Raft will block writing to this channel, so it should either be
|
||||
// buffered or aggressively consumed.
|
||||
NotifyCh chan<- bool
|
||||
|
||||
// LogOutput is used as a sink for logs, unless Logger is specified.
|
||||
// Defaults to os.Stderr.
|
||||
LogOutput io.Writer
|
||||
|
||||
// Logger is a user-provided logger. If nil, a logger writing to LogOutput
|
||||
// is used.
|
||||
Logger *log.Logger
|
||||
}
|
||||
|
||||
// DefaultConfig returns a Config with usable defaults.
|
||||
func DefaultConfig() *Config {
|
||||
return &Config{
|
||||
ProtocolVersion: ProtocolVersionMax,
|
||||
HeartbeatTimeout: 1000 * time.Millisecond,
|
||||
ElectionTimeout: 1000 * time.Millisecond,
|
||||
CommitTimeout: 50 * time.Millisecond,
|
||||
MaxAppendEntries: 64,
|
||||
ShutdownOnRemove: true,
|
||||
TrailingLogs: 10240,
|
||||
SnapshotInterval: 120 * time.Second,
|
||||
SnapshotThreshold: 8192,
|
||||
LeaderLeaseTimeout: 500 * time.Millisecond,
|
||||
}
|
||||
}
|
||||
|
||||
// ValidateConfig is used to validate a sane configuration
|
||||
func ValidateConfig(config *Config) error {
|
||||
// We don't actually support running as 0 in the library any more, but
|
||||
// we do understand it.
|
||||
protocolMin := ProtocolVersionMin
|
||||
if protocolMin == 0 {
|
||||
protocolMin = 1
|
||||
}
|
||||
if config.ProtocolVersion < protocolMin ||
|
||||
config.ProtocolVersion > ProtocolVersionMax {
|
||||
return fmt.Errorf("Protocol version %d must be >= %d and <= %d",
|
||||
config.ProtocolVersion, protocolMin, ProtocolVersionMax)
|
||||
}
|
||||
if len(config.LocalID) == 0 {
|
||||
return fmt.Errorf("LocalID cannot be empty")
|
||||
}
|
||||
if config.HeartbeatTimeout < 5*time.Millisecond {
|
||||
return fmt.Errorf("Heartbeat timeout is too low")
|
||||
}
|
||||
if config.ElectionTimeout < 5*time.Millisecond {
|
||||
return fmt.Errorf("Election timeout is too low")
|
||||
}
|
||||
if config.CommitTimeout < time.Millisecond {
|
||||
return fmt.Errorf("Commit timeout is too low")
|
||||
}
|
||||
if config.MaxAppendEntries <= 0 {
|
||||
return fmt.Errorf("MaxAppendEntries must be positive")
|
||||
}
|
||||
if config.MaxAppendEntries > 1024 {
|
||||
return fmt.Errorf("MaxAppendEntries is too large")
|
||||
}
|
||||
if config.SnapshotInterval < 5*time.Millisecond {
|
||||
return fmt.Errorf("Snapshot interval is too low")
|
||||
}
|
||||
if config.LeaderLeaseTimeout < 5*time.Millisecond {
|
||||
return fmt.Errorf("Leader lease timeout is too low")
|
||||
}
|
||||
if config.LeaderLeaseTimeout > config.HeartbeatTimeout {
|
||||
return fmt.Errorf("Leader lease timeout cannot be larger than heartbeat timeout")
|
||||
}
|
||||
if config.ElectionTimeout < config.HeartbeatTimeout {
|
||||
return fmt.Errorf("Election timeout must be equal or greater than Heartbeat Timeout")
|
||||
}
|
||||
return nil
|
||||
}
|
|
@ -0,0 +1,343 @@
|
|||
package raft
|
||||
|
||||
import "fmt"
|
||||
|
||||
// ServerSuffrage determines whether a Server in a Configuration gets a vote.
|
||||
type ServerSuffrage int
|
||||
|
||||
// Note: Don't renumber these, since the numbers are written into the log.
|
||||
const (
|
||||
// Voter is a server whose vote is counted in elections and whose match index
|
||||
// is used in advancing the leader's commit index.
|
||||
Voter ServerSuffrage = iota
|
||||
// Nonvoter is a server that receives log entries but is not considered for
|
||||
// elections or commitment purposes.
|
||||
Nonvoter
|
||||
// Staging is a server that acts like a nonvoter with one exception: once a
|
||||
// staging server receives enough log entries to be sufficiently caught up to
|
||||
// the leader's log, the leader will invoke a membership change to change
|
||||
// the Staging server to a Voter.
|
||||
Staging
|
||||
)
|
||||
|
||||
func (s ServerSuffrage) String() string {
|
||||
switch s {
|
||||
case Voter:
|
||||
return "Voter"
|
||||
case Nonvoter:
|
||||
return "Nonvoter"
|
||||
case Staging:
|
||||
return "Staging"
|
||||
}
|
||||
return "ServerSuffrage"
|
||||
}
|
||||
|
||||
// ServerID is a unique string identifying a server for all time.
|
||||
type ServerID string
|
||||
|
||||
// ServerAddress is a network address for a server that a transport can contact.
|
||||
type ServerAddress string
|
||||
|
||||
// Server tracks the information about a single server in a configuration.
|
||||
type Server struct {
|
||||
// Suffrage determines whether the server gets a vote.
|
||||
Suffrage ServerSuffrage
|
||||
// ID is a unique string identifying this server for all time.
|
||||
ID ServerID
|
||||
// Address is its network address that a transport can contact.
|
||||
Address ServerAddress
|
||||
}
|
||||
|
||||
// Configuration tracks which servers are in the cluster, and whether they have
|
||||
// votes. This should include the local server, if it's a member of the cluster.
|
||||
// The servers are listed no particular order, but each should only appear once.
|
||||
// These entries are appended to the log during membership changes.
|
||||
type Configuration struct {
|
||||
Servers []Server
|
||||
}
|
||||
|
||||
// Clone makes a deep copy of a Configuration.
|
||||
func (c *Configuration) Clone() (copy Configuration) {
|
||||
copy.Servers = append(copy.Servers, c.Servers...)
|
||||
return
|
||||
}
|
||||
|
||||
// ConfigurationChangeCommand is the different ways to change the cluster
|
||||
// configuration.
|
||||
type ConfigurationChangeCommand uint8
|
||||
|
||||
const (
|
||||
// AddStaging makes a server Staging unless its Voter.
|
||||
AddStaging ConfigurationChangeCommand = iota
|
||||
// AddNonvoter makes a server Nonvoter unless its Staging or Voter.
|
||||
AddNonvoter
|
||||
// DemoteVoter makes a server Nonvoter unless its absent.
|
||||
DemoteVoter
|
||||
// RemoveServer removes a server entirely from the cluster membership.
|
||||
RemoveServer
|
||||
// Promote is created automatically by a leader; it turns a Staging server
|
||||
// into a Voter.
|
||||
Promote
|
||||
)
|
||||
|
||||
func (c ConfigurationChangeCommand) String() string {
|
||||
switch c {
|
||||
case AddStaging:
|
||||
return "AddStaging"
|
||||
case AddNonvoter:
|
||||
return "AddNonvoter"
|
||||
case DemoteVoter:
|
||||
return "DemoteVoter"
|
||||
case RemoveServer:
|
||||
return "RemoveServer"
|
||||
case Promote:
|
||||
return "Promote"
|
||||
}
|
||||
return "ConfigurationChangeCommand"
|
||||
}
|
||||
|
||||
// configurationChangeRequest describes a change that a leader would like to
|
||||
// make to its current configuration. It's used only within a single server
|
||||
// (never serialized into the log), as part of `configurationChangeFuture`.
|
||||
type configurationChangeRequest struct {
|
||||
command ConfigurationChangeCommand
|
||||
serverID ServerID
|
||||
serverAddress ServerAddress // only present for AddStaging, AddNonvoter
|
||||
// prevIndex, if nonzero, is the index of the only configuration upon which
|
||||
// this change may be applied; if another configuration entry has been
|
||||
// added in the meantime, this request will fail.
|
||||
prevIndex uint64
|
||||
}
|
||||
|
||||
// configurations is state tracked on every server about its Configurations.
|
||||
// Note that, per Diego's dissertation, there can be at most one uncommitted
|
||||
// configuration at a time (the next configuration may not be created until the
|
||||
// prior one has been committed).
|
||||
//
|
||||
// One downside to storing just two configurations is that if you try to take a
|
||||
// snahpsot when your state machine hasn't yet applied the committedIndex, we
|
||||
// have no record of the configuration that would logically fit into that
|
||||
// snapshot. We disallow snapshots in that case now. An alternative approach,
|
||||
// which LogCabin uses, is to track every configuration change in the
|
||||
// log.
|
||||
type configurations struct {
|
||||
// committed is the latest configuration in the log/snapshot that has been
|
||||
// committed (the one with the largest index).
|
||||
committed Configuration
|
||||
// committedIndex is the log index where 'committed' was written.
|
||||
committedIndex uint64
|
||||
// latest is the latest configuration in the log/snapshot (may be committed
|
||||
// or uncommitted)
|
||||
latest Configuration
|
||||
// latestIndex is the log index where 'latest' was written.
|
||||
latestIndex uint64
|
||||
}
|
||||
|
||||
// Clone makes a deep copy of a configurations object.
|
||||
func (c *configurations) Clone() (copy configurations) {
|
||||
copy.committed = c.committed.Clone()
|
||||
copy.committedIndex = c.committedIndex
|
||||
copy.latest = c.latest.Clone()
|
||||
copy.latestIndex = c.latestIndex
|
||||
return
|
||||
}
|
||||
|
||||
// hasVote returns true if the server identified by 'id' is a Voter in the
|
||||
// provided Configuration.
|
||||
func hasVote(configuration Configuration, id ServerID) bool {
|
||||
for _, server := range configuration.Servers {
|
||||
if server.ID == id {
|
||||
return server.Suffrage == Voter
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// checkConfiguration tests a cluster membership configuration for common
|
||||
// errors.
|
||||
func checkConfiguration(configuration Configuration) error {
|
||||
idSet := make(map[ServerID]bool)
|
||||
addressSet := make(map[ServerAddress]bool)
|
||||
var voters int
|
||||
for _, server := range configuration.Servers {
|
||||
if server.ID == "" {
|
||||
return fmt.Errorf("Empty ID in configuration: %v", configuration)
|
||||
}
|
||||
if server.Address == "" {
|
||||
return fmt.Errorf("Empty address in configuration: %v", server)
|
||||
}
|
||||
if idSet[server.ID] {
|
||||
return fmt.Errorf("Found duplicate ID in configuration: %v", server.ID)
|
||||
}
|
||||
idSet[server.ID] = true
|
||||
if addressSet[server.Address] {
|
||||
return fmt.Errorf("Found duplicate address in configuration: %v", server.Address)
|
||||
}
|
||||
addressSet[server.Address] = true
|
||||
if server.Suffrage == Voter {
|
||||
voters++
|
||||
}
|
||||
}
|
||||
if voters == 0 {
|
||||
return fmt.Errorf("Need at least one voter in configuration: %v", configuration)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// nextConfiguration generates a new Configuration from the current one and a
|
||||
// configuration change request. It's split from appendConfigurationEntry so
|
||||
// that it can be unit tested easily.
|
||||
func nextConfiguration(current Configuration, currentIndex uint64, change configurationChangeRequest) (Configuration, error) {
|
||||
if change.prevIndex > 0 && change.prevIndex != currentIndex {
|
||||
return Configuration{}, fmt.Errorf("Configuration changed since %v (latest is %v)", change.prevIndex, currentIndex)
|
||||
}
|
||||
|
||||
configuration := current.Clone()
|
||||
switch change.command {
|
||||
case AddStaging:
|
||||
// TODO: barf on new address?
|
||||
newServer := Server{
|
||||
// TODO: This should add the server as Staging, to be automatically
|
||||
// promoted to Voter later. However, the promoton to Voter is not yet
|
||||
// implemented, and doing so is not trivial with the way the leader loop
|
||||
// coordinates with the replication goroutines today. So, for now, the
|
||||
// server will have a vote right away, and the Promote case below is
|
||||
// unused.
|
||||
Suffrage: Voter,
|
||||
ID: change.serverID,
|
||||
Address: change.serverAddress,
|
||||
}
|
||||
found := false
|
||||
for i, server := range configuration.Servers {
|
||||
if server.ID == change.serverID {
|
||||
if server.Suffrage == Voter {
|
||||
configuration.Servers[i].Address = change.serverAddress
|
||||
} else {
|
||||
configuration.Servers[i] = newServer
|
||||
}
|
||||
found = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !found {
|
||||
configuration.Servers = append(configuration.Servers, newServer)
|
||||
}
|
||||
case AddNonvoter:
|
||||
newServer := Server{
|
||||
Suffrage: Nonvoter,
|
||||
ID: change.serverID,
|
||||
Address: change.serverAddress,
|
||||
}
|
||||
found := false
|
||||
for i, server := range configuration.Servers {
|
||||
if server.ID == change.serverID {
|
||||
if server.Suffrage != Nonvoter {
|
||||
configuration.Servers[i].Address = change.serverAddress
|
||||
} else {
|
||||
configuration.Servers[i] = newServer
|
||||
}
|
||||
found = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !found {
|
||||
configuration.Servers = append(configuration.Servers, newServer)
|
||||
}
|
||||
case DemoteVoter:
|
||||
for i, server := range configuration.Servers {
|
||||
if server.ID == change.serverID {
|
||||
configuration.Servers[i].Suffrage = Nonvoter
|
||||
break
|
||||
}
|
||||
}
|
||||
case RemoveServer:
|
||||
for i, server := range configuration.Servers {
|
||||
if server.ID == change.serverID {
|
||||
configuration.Servers = append(configuration.Servers[:i], configuration.Servers[i+1:]...)
|
||||
break
|
||||
}
|
||||
}
|
||||
case Promote:
|
||||
for i, server := range configuration.Servers {
|
||||
if server.ID == change.serverID && server.Suffrage == Staging {
|
||||
configuration.Servers[i].Suffrage = Voter
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Make sure we didn't do something bad like remove the last voter
|
||||
if err := checkConfiguration(configuration); err != nil {
|
||||
return Configuration{}, err
|
||||
}
|
||||
|
||||
return configuration, nil
|
||||
}
|
||||
|
||||
// encodePeers is used to serialize a Configuration into the old peers format.
|
||||
// This is here for backwards compatibility when operating with a mix of old
|
||||
// servers and should be removed once we deprecate support for protocol version 1.
|
||||
func encodePeers(configuration Configuration, trans Transport) []byte {
|
||||
// Gather up all the voters, other suffrage types are not supported by
|
||||
// this data format.
|
||||
var encPeers [][]byte
|
||||
for _, server := range configuration.Servers {
|
||||
if server.Suffrage == Voter {
|
||||
encPeers = append(encPeers, trans.EncodePeer(server.Address))
|
||||
}
|
||||
}
|
||||
|
||||
// Encode the entire array.
|
||||
buf, err := encodeMsgPack(encPeers)
|
||||
if err != nil {
|
||||
panic(fmt.Errorf("failed to encode peers: %v", err))
|
||||
}
|
||||
|
||||
return buf.Bytes()
|
||||
}
|
||||
|
||||
// decodePeers is used to deserialize an old list of peers into a Configuration.
|
||||
// This is here for backwards compatibility with old log entries and snapshots;
|
||||
// it should be removed eventually.
|
||||
func decodePeers(buf []byte, trans Transport) Configuration {
|
||||
// Decode the buffer first.
|
||||
var encPeers [][]byte
|
||||
if err := decodeMsgPack(buf, &encPeers); err != nil {
|
||||
panic(fmt.Errorf("failed to decode peers: %v", err))
|
||||
}
|
||||
|
||||
// Deserialize each peer.
|
||||
var servers []Server
|
||||
for _, enc := range encPeers {
|
||||
p := trans.DecodePeer(enc)
|
||||
servers = append(servers, Server{
|
||||
Suffrage: Voter,
|
||||
ID: ServerID(p),
|
||||
Address: ServerAddress(p),
|
||||
})
|
||||
}
|
||||
|
||||
return Configuration{
|
||||
Servers: servers,
|
||||
}
|
||||
}
|
||||
|
||||
// encodeConfiguration serializes a Configuration using MsgPack, or panics on
|
||||
// errors.
|
||||
func encodeConfiguration(configuration Configuration) []byte {
|
||||
buf, err := encodeMsgPack(configuration)
|
||||
if err != nil {
|
||||
panic(fmt.Errorf("failed to encode configuration: %v", err))
|
||||
}
|
||||
return buf.Bytes()
|
||||
}
|
||||
|
||||
// decodeConfiguration deserializes a Configuration using MsgPack, or panics on
|
||||
// errors.
|
||||
func decodeConfiguration(buf []byte) Configuration {
|
||||
var configuration Configuration
|
||||
if err := decodeMsgPack(buf, &configuration); err != nil {
|
||||
panic(fmt.Errorf("failed to decode configuration: %v", err))
|
||||
}
|
||||
return configuration
|
||||
}
|
|
@ -0,0 +1,49 @@
|
|||
package raft
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
)
|
||||
|
||||
// DiscardSnapshotStore is used to successfully snapshot while
|
||||
// always discarding the snapshot. This is useful for when the
|
||||
// log should be truncated but no snapshot should be retained.
|
||||
// This should never be used for production use, and is only
|
||||
// suitable for testing.
|
||||
type DiscardSnapshotStore struct{}
|
||||
|
||||
type DiscardSnapshotSink struct{}
|
||||
|
||||
// NewDiscardSnapshotStore is used to create a new DiscardSnapshotStore.
|
||||
func NewDiscardSnapshotStore() *DiscardSnapshotStore {
|
||||
return &DiscardSnapshotStore{}
|
||||
}
|
||||
|
||||
func (d *DiscardSnapshotStore) Create(version SnapshotVersion, index, term uint64,
|
||||
configuration Configuration, configurationIndex uint64, trans Transport) (SnapshotSink, error) {
|
||||
return &DiscardSnapshotSink{}, nil
|
||||
}
|
||||
|
||||
func (d *DiscardSnapshotStore) List() ([]*SnapshotMeta, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (d *DiscardSnapshotStore) Open(id string) (*SnapshotMeta, io.ReadCloser, error) {
|
||||
return nil, nil, fmt.Errorf("open is not supported")
|
||||
}
|
||||
|
||||
func (d *DiscardSnapshotSink) Write(b []byte) (int, error) {
|
||||
return len(b), nil
|
||||
}
|
||||
|
||||
func (d *DiscardSnapshotSink) Close() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (d *DiscardSnapshotSink) ID() string {
|
||||
return "discard"
|
||||
}
|
||||
|
||||
func (d *DiscardSnapshotSink) Cancel() error {
|
||||
return nil
|
||||
}
|
|
@ -0,0 +1,494 @@
|
|||
package raft
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"hash"
|
||||
"hash/crc64"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"log"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sort"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
const (
|
||||
testPath = "permTest"
|
||||
snapPath = "snapshots"
|
||||
metaFilePath = "meta.json"
|
||||
stateFilePath = "state.bin"
|
||||
tmpSuffix = ".tmp"
|
||||
)
|
||||
|
||||
// FileSnapshotStore implements the SnapshotStore interface and allows
|
||||
// snapshots to be made on the local disk.
|
||||
type FileSnapshotStore struct {
|
||||
path string
|
||||
retain int
|
||||
logger *log.Logger
|
||||
}
|
||||
|
||||
type snapMetaSlice []*fileSnapshotMeta
|
||||
|
||||
// FileSnapshotSink implements SnapshotSink with a file.
|
||||
type FileSnapshotSink struct {
|
||||
store *FileSnapshotStore
|
||||
logger *log.Logger
|
||||
dir string
|
||||
meta fileSnapshotMeta
|
||||
|
||||
stateFile *os.File
|
||||
stateHash hash.Hash64
|
||||
buffered *bufio.Writer
|
||||
|
||||
closed bool
|
||||
}
|
||||
|
||||
// fileSnapshotMeta is stored on disk. We also put a CRC
|
||||
// on disk so that we can verify the snapshot.
|
||||
type fileSnapshotMeta struct {
|
||||
SnapshotMeta
|
||||
CRC []byte
|
||||
}
|
||||
|
||||
// bufferedFile is returned when we open a snapshot. This way
|
||||
// reads are buffered and the file still gets closed.
|
||||
type bufferedFile struct {
|
||||
bh *bufio.Reader
|
||||
fh *os.File
|
||||
}
|
||||
|
||||
func (b *bufferedFile) Read(p []byte) (n int, err error) {
|
||||
return b.bh.Read(p)
|
||||
}
|
||||
|
||||
func (b *bufferedFile) Close() error {
|
||||
return b.fh.Close()
|
||||
}
|
||||
|
||||
// NewFileSnapshotStoreWithLogger creates a new FileSnapshotStore based
|
||||
// on a base directory. The `retain` parameter controls how many
|
||||
// snapshots are retained. Must be at least 1.
|
||||
func NewFileSnapshotStoreWithLogger(base string, retain int, logger *log.Logger) (*FileSnapshotStore, error) {
|
||||
if retain < 1 {
|
||||
return nil, fmt.Errorf("must retain at least one snapshot")
|
||||
}
|
||||
if logger == nil {
|
||||
logger = log.New(os.Stderr, "", log.LstdFlags)
|
||||
}
|
||||
|
||||
// Ensure our path exists
|
||||
path := filepath.Join(base, snapPath)
|
||||
if err := os.MkdirAll(path, 0755); err != nil && !os.IsExist(err) {
|
||||
return nil, fmt.Errorf("snapshot path not accessible: %v", err)
|
||||
}
|
||||
|
||||
// Setup the store
|
||||
store := &FileSnapshotStore{
|
||||
path: path,
|
||||
retain: retain,
|
||||
logger: logger,
|
||||
}
|
||||
|
||||
// Do a permissions test
|
||||
if err := store.testPermissions(); err != nil {
|
||||
return nil, fmt.Errorf("permissions test failed: %v", err)
|
||||
}
|
||||
return store, nil
|
||||
}
|
||||
|
||||
// NewFileSnapshotStore creates a new FileSnapshotStore based
|
||||
// on a base directory. The `retain` parameter controls how many
|
||||
// snapshots are retained. Must be at least 1.
|
||||
func NewFileSnapshotStore(base string, retain int, logOutput io.Writer) (*FileSnapshotStore, error) {
|
||||
if logOutput == nil {
|
||||
logOutput = os.Stderr
|
||||
}
|
||||
return NewFileSnapshotStoreWithLogger(base, retain, log.New(logOutput, "", log.LstdFlags))
|
||||
}
|
||||
|
||||
// testPermissions tries to touch a file in our path to see if it works.
|
||||
func (f *FileSnapshotStore) testPermissions() error {
|
||||
path := filepath.Join(f.path, testPath)
|
||||
fh, err := os.Create(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err = fh.Close(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err = os.Remove(path); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// snapshotName generates a name for the snapshot.
|
||||
func snapshotName(term, index uint64) string {
|
||||
now := time.Now()
|
||||
msec := now.UnixNano() / int64(time.Millisecond)
|
||||
return fmt.Sprintf("%d-%d-%d", term, index, msec)
|
||||
}
|
||||
|
||||
// Create is used to start a new snapshot
|
||||
func (f *FileSnapshotStore) Create(version SnapshotVersion, index, term uint64,
|
||||
configuration Configuration, configurationIndex uint64, trans Transport) (SnapshotSink, error) {
|
||||
// We only support version 1 snapshots at this time.
|
||||
if version != 1 {
|
||||
return nil, fmt.Errorf("unsupported snapshot version %d", version)
|
||||
}
|
||||
|
||||
// Create a new path
|
||||
name := snapshotName(term, index)
|
||||
path := filepath.Join(f.path, name+tmpSuffix)
|
||||
f.logger.Printf("[INFO] snapshot: Creating new snapshot at %s", path)
|
||||
|
||||
// Make the directory
|
||||
if err := os.MkdirAll(path, 0755); err != nil {
|
||||
f.logger.Printf("[ERR] snapshot: Failed to make snapshot directory: %v", err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Create the sink
|
||||
sink := &FileSnapshotSink{
|
||||
store: f,
|
||||
logger: f.logger,
|
||||
dir: path,
|
||||
meta: fileSnapshotMeta{
|
||||
SnapshotMeta: SnapshotMeta{
|
||||
Version: version,
|
||||
ID: name,
|
||||
Index: index,
|
||||
Term: term,
|
||||
Peers: encodePeers(configuration, trans),
|
||||
Configuration: configuration,
|
||||
ConfigurationIndex: configurationIndex,
|
||||
},
|
||||
CRC: nil,
|
||||
},
|
||||
}
|
||||
|
||||
// Write out the meta data
|
||||
if err := sink.writeMeta(); err != nil {
|
||||
f.logger.Printf("[ERR] snapshot: Failed to write metadata: %v", err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Open the state file
|
||||
statePath := filepath.Join(path, stateFilePath)
|
||||
fh, err := os.Create(statePath)
|
||||
if err != nil {
|
||||
f.logger.Printf("[ERR] snapshot: Failed to create state file: %v", err)
|
||||
return nil, err
|
||||
}
|
||||
sink.stateFile = fh
|
||||
|
||||
// Create a CRC64 hash
|
||||
sink.stateHash = crc64.New(crc64.MakeTable(crc64.ECMA))
|
||||
|
||||
// Wrap both the hash and file in a MultiWriter with buffering
|
||||
multi := io.MultiWriter(sink.stateFile, sink.stateHash)
|
||||
sink.buffered = bufio.NewWriter(multi)
|
||||
|
||||
// Done
|
||||
return sink, nil
|
||||
}
|
||||
|
||||
// List returns available snapshots in the store.
|
||||
func (f *FileSnapshotStore) List() ([]*SnapshotMeta, error) {
|
||||
// Get the eligible snapshots
|
||||
snapshots, err := f.getSnapshots()
|
||||
if err != nil {
|
||||
f.logger.Printf("[ERR] snapshot: Failed to get snapshots: %v", err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var snapMeta []*SnapshotMeta
|
||||
for _, meta := range snapshots {
|
||||
snapMeta = append(snapMeta, &meta.SnapshotMeta)
|
||||
if len(snapMeta) == f.retain {
|
||||
break
|
||||
}
|
||||
}
|
||||
return snapMeta, nil
|
||||
}
|
||||
|
||||
// getSnapshots returns all the known snapshots.
|
||||
func (f *FileSnapshotStore) getSnapshots() ([]*fileSnapshotMeta, error) {
|
||||
// Get the eligible snapshots
|
||||
snapshots, err := ioutil.ReadDir(f.path)
|
||||
if err != nil {
|
||||
f.logger.Printf("[ERR] snapshot: Failed to scan snapshot dir: %v", err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Populate the metadata
|
||||
var snapMeta []*fileSnapshotMeta
|
||||
for _, snap := range snapshots {
|
||||
// Ignore any files
|
||||
if !snap.IsDir() {
|
||||
continue
|
||||
}
|
||||
|
||||
// Ignore any temporary snapshots
|
||||
dirName := snap.Name()
|
||||
if strings.HasSuffix(dirName, tmpSuffix) {
|
||||
f.logger.Printf("[WARN] snapshot: Found temporary snapshot: %v", dirName)
|
||||
continue
|
||||
}
|
||||
|
||||
// Try to read the meta data
|
||||
meta, err := f.readMeta(dirName)
|
||||
if err != nil {
|
||||
f.logger.Printf("[WARN] snapshot: Failed to read metadata for %v: %v", dirName, err)
|
||||
continue
|
||||
}
|
||||
|
||||
// Make sure we can understand this version.
|
||||
if meta.Version < SnapshotVersionMin || meta.Version > SnapshotVersionMax {
|
||||
f.logger.Printf("[WARN] snapshot: Snapshot version for %v not supported: %d", dirName, meta.Version)
|
||||
continue
|
||||
}
|
||||
|
||||
// Append, but only return up to the retain count
|
||||
snapMeta = append(snapMeta, meta)
|
||||
}
|
||||
|
||||
// Sort the snapshot, reverse so we get new -> old
|
||||
sort.Sort(sort.Reverse(snapMetaSlice(snapMeta)))
|
||||
|
||||
return snapMeta, nil
|
||||
}
|
||||
|
||||
// readMeta is used to read the meta data for a given named backup
|
||||
func (f *FileSnapshotStore) readMeta(name string) (*fileSnapshotMeta, error) {
|
||||
// Open the meta file
|
||||
metaPath := filepath.Join(f.path, name, metaFilePath)
|
||||
fh, err := os.Open(metaPath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer fh.Close()
|
||||
|
||||
// Buffer the file IO
|
||||
buffered := bufio.NewReader(fh)
|
||||
|
||||
// Read in the JSON
|
||||
meta := &fileSnapshotMeta{}
|
||||
dec := json.NewDecoder(buffered)
|
||||
if err := dec.Decode(meta); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return meta, nil
|
||||
}
|
||||
|
||||
// Open takes a snapshot ID and returns a ReadCloser for that snapshot.
|
||||
func (f *FileSnapshotStore) Open(id string) (*SnapshotMeta, io.ReadCloser, error) {
|
||||
// Get the metadata
|
||||
meta, err := f.readMeta(id)
|
||||
if err != nil {
|
||||
f.logger.Printf("[ERR] snapshot: Failed to get meta data to open snapshot: %v", err)
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
// Open the state file
|
||||
statePath := filepath.Join(f.path, id, stateFilePath)
|
||||
fh, err := os.Open(statePath)
|
||||
if err != nil {
|
||||
f.logger.Printf("[ERR] snapshot: Failed to open state file: %v", err)
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
// Create a CRC64 hash
|
||||
stateHash := crc64.New(crc64.MakeTable(crc64.ECMA))
|
||||
|
||||
// Compute the hash
|
||||
_, err = io.Copy(stateHash, fh)
|
||||
if err != nil {
|
||||
f.logger.Printf("[ERR] snapshot: Failed to read state file: %v", err)
|
||||
fh.Close()
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
// Verify the hash
|
||||
computed := stateHash.Sum(nil)
|
||||
if bytes.Compare(meta.CRC, computed) != 0 {
|
||||
f.logger.Printf("[ERR] snapshot: CRC checksum failed (stored: %v computed: %v)",
|
||||
meta.CRC, computed)
|
||||
fh.Close()
|
||||
return nil, nil, fmt.Errorf("CRC mismatch")
|
||||
}
|
||||
|
||||
// Seek to the start
|
||||
if _, err := fh.Seek(0, 0); err != nil {
|
||||
f.logger.Printf("[ERR] snapshot: State file seek failed: %v", err)
|
||||
fh.Close()
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
// Return a buffered file
|
||||
buffered := &bufferedFile{
|
||||
bh: bufio.NewReader(fh),
|
||||
fh: fh,
|
||||
}
|
||||
|
||||
return &meta.SnapshotMeta, buffered, nil
|
||||
}
|
||||
|
||||
// ReapSnapshots reaps any snapshots beyond the retain count.
|
||||
func (f *FileSnapshotStore) ReapSnapshots() error {
|
||||
snapshots, err := f.getSnapshots()
|
||||
if err != nil {
|
||||
f.logger.Printf("[ERR] snapshot: Failed to get snapshots: %v", err)
|
||||
return err
|
||||
}
|
||||
|
||||
for i := f.retain; i < len(snapshots); i++ {
|
||||
path := filepath.Join(f.path, snapshots[i].ID)
|
||||
f.logger.Printf("[INFO] snapshot: reaping snapshot %v", path)
|
||||
if err := os.RemoveAll(path); err != nil {
|
||||
f.logger.Printf("[ERR] snapshot: Failed to reap snapshot %v: %v", path, err)
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// ID returns the ID of the snapshot, can be used with Open()
|
||||
// after the snapshot is finalized.
|
||||
func (s *FileSnapshotSink) ID() string {
|
||||
return s.meta.ID
|
||||
}
|
||||
|
||||
// Write is used to append to the state file. We write to the
|
||||
// buffered IO object to reduce the amount of context switches.
|
||||
func (s *FileSnapshotSink) Write(b []byte) (int, error) {
|
||||
return s.buffered.Write(b)
|
||||
}
|
||||
|
||||
// Close is used to indicate a successful end.
|
||||
func (s *FileSnapshotSink) Close() error {
|
||||
// Make sure close is idempotent
|
||||
if s.closed {
|
||||
return nil
|
||||
}
|
||||
s.closed = true
|
||||
|
||||
// Close the open handles
|
||||
if err := s.finalize(); err != nil {
|
||||
s.logger.Printf("[ERR] snapshot: Failed to finalize snapshot: %v", err)
|
||||
return err
|
||||
}
|
||||
|
||||
// Write out the meta data
|
||||
if err := s.writeMeta(); err != nil {
|
||||
s.logger.Printf("[ERR] snapshot: Failed to write metadata: %v", err)
|
||||
return err
|
||||
}
|
||||
|
||||
// Move the directory into place
|
||||
newPath := strings.TrimSuffix(s.dir, tmpSuffix)
|
||||
if err := os.Rename(s.dir, newPath); err != nil {
|
||||
s.logger.Printf("[ERR] snapshot: Failed to move snapshot into place: %v", err)
|
||||
return err
|
||||
}
|
||||
|
||||
// Reap any old snapshots
|
||||
if err := s.store.ReapSnapshots(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Cancel is used to indicate an unsuccessful end.
|
||||
func (s *FileSnapshotSink) Cancel() error {
|
||||
// Make sure close is idempotent
|
||||
if s.closed {
|
||||
return nil
|
||||
}
|
||||
s.closed = true
|
||||
|
||||
// Close the open handles
|
||||
if err := s.finalize(); err != nil {
|
||||
s.logger.Printf("[ERR] snapshot: Failed to finalize snapshot: %v", err)
|
||||
return err
|
||||
}
|
||||
|
||||
// Attempt to remove all artifacts
|
||||
return os.RemoveAll(s.dir)
|
||||
}
|
||||
|
||||
// finalize is used to close all of our resources.
|
||||
func (s *FileSnapshotSink) finalize() error {
|
||||
// Flush any remaining data
|
||||
if err := s.buffered.Flush(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Get the file size
|
||||
stat, statErr := s.stateFile.Stat()
|
||||
|
||||
// Close the file
|
||||
if err := s.stateFile.Close(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Set the file size, check after we close
|
||||
if statErr != nil {
|
||||
return statErr
|
||||
}
|
||||
s.meta.Size = stat.Size()
|
||||
|
||||
// Set the CRC
|
||||
s.meta.CRC = s.stateHash.Sum(nil)
|
||||
return nil
|
||||
}
|
||||
|
||||
// writeMeta is used to write out the metadata we have.
|
||||
func (s *FileSnapshotSink) writeMeta() error {
|
||||
// Open the meta file
|
||||
metaPath := filepath.Join(s.dir, metaFilePath)
|
||||
fh, err := os.Create(metaPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer fh.Close()
|
||||
|
||||
// Buffer the file IO
|
||||
buffered := bufio.NewWriter(fh)
|
||||
defer buffered.Flush()
|
||||
|
||||
// Write out as JSON
|
||||
enc := json.NewEncoder(buffered)
|
||||
if err := enc.Encode(&s.meta); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Implement the sort interface for []*fileSnapshotMeta.
|
||||
func (s snapMetaSlice) Len() int {
|
||||
return len(s)
|
||||
}
|
||||
|
||||
func (s snapMetaSlice) Less(i, j int) bool {
|
||||
if s[i].Term != s[j].Term {
|
||||
return s[i].Term < s[j].Term
|
||||
}
|
||||
if s[i].Index != s[j].Index {
|
||||
return s[i].Index < s[j].Index
|
||||
}
|
||||
return s[i].ID < s[j].ID
|
||||
}
|
||||
|
||||
func (s snapMetaSlice) Swap(i, j int) {
|
||||
s[i], s[j] = s[j], s[i]
|
||||
}
|
|
@ -0,0 +1,136 @@
|
|||
package raft
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"time"
|
||||
|
||||
"github.com/armon/go-metrics"
|
||||
)
|
||||
|
||||
// FSM provides an interface that can be implemented by
|
||||
// clients to make use of the replicated log.
|
||||
type FSM interface {
|
||||
// Apply log is invoked once a log entry is committed.
|
||||
// It returns a value which will be made available in the
|
||||
// ApplyFuture returned by Raft.Apply method if that
|
||||
// method was called on the same Raft node as the FSM.
|
||||
Apply(*Log) interface{}
|
||||
|
||||
// Snapshot is used to support log compaction. This call should
|
||||
// return an FSMSnapshot which can be used to save a point-in-time
|
||||
// snapshot of the FSM. Apply and Snapshot are not called in multiple
|
||||
// threads, but Apply will be called concurrently with Persist. This means
|
||||
// the FSM should be implemented in a fashion that allows for concurrent
|
||||
// updates while a snapshot is happening.
|
||||
Snapshot() (FSMSnapshot, error)
|
||||
|
||||
// Restore is used to restore an FSM from a snapshot. It is not called
|
||||
// concurrently with any other command. The FSM must discard all previous
|
||||
// state.
|
||||
Restore(io.ReadCloser) error
|
||||
}
|
||||
|
||||
// FSMSnapshot is returned by an FSM in response to a Snapshot
|
||||
// It must be safe to invoke FSMSnapshot methods with concurrent
|
||||
// calls to Apply.
|
||||
type FSMSnapshot interface {
|
||||
// Persist should dump all necessary state to the WriteCloser 'sink',
|
||||
// and call sink.Close() when finished or call sink.Cancel() on error.
|
||||
Persist(sink SnapshotSink) error
|
||||
|
||||
// Release is invoked when we are finished with the snapshot.
|
||||
Release()
|
||||
}
|
||||
|
||||
// runFSM is a long running goroutine responsible for applying logs
|
||||
// to the FSM. This is done async of other logs since we don't want
|
||||
// the FSM to block our internal operations.
|
||||
func (r *Raft) runFSM() {
|
||||
var lastIndex, lastTerm uint64
|
||||
|
||||
commit := func(req *commitTuple) {
|
||||
// Apply the log if a command
|
||||
var resp interface{}
|
||||
if req.log.Type == LogCommand {
|
||||
start := time.Now()
|
||||
resp = r.fsm.Apply(req.log)
|
||||
metrics.MeasureSince([]string{"raft", "fsm", "apply"}, start)
|
||||
}
|
||||
|
||||
// Update the indexes
|
||||
lastIndex = req.log.Index
|
||||
lastTerm = req.log.Term
|
||||
|
||||
// Invoke the future if given
|
||||
if req.future != nil {
|
||||
req.future.response = resp
|
||||
req.future.respond(nil)
|
||||
}
|
||||
}
|
||||
|
||||
restore := func(req *restoreFuture) {
|
||||
// Open the snapshot
|
||||
meta, source, err := r.snapshots.Open(req.ID)
|
||||
if err != nil {
|
||||
req.respond(fmt.Errorf("failed to open snapshot %v: %v", req.ID, err))
|
||||
return
|
||||
}
|
||||
|
||||
// Attempt to restore
|
||||
start := time.Now()
|
||||
if err := r.fsm.Restore(source); err != nil {
|
||||
req.respond(fmt.Errorf("failed to restore snapshot %v: %v", req.ID, err))
|
||||
source.Close()
|
||||
return
|
||||
}
|
||||
source.Close()
|
||||
metrics.MeasureSince([]string{"raft", "fsm", "restore"}, start)
|
||||
|
||||
// Update the last index and term
|
||||
lastIndex = meta.Index
|
||||
lastTerm = meta.Term
|
||||
req.respond(nil)
|
||||
}
|
||||
|
||||
snapshot := func(req *reqSnapshotFuture) {
|
||||
// Is there something to snapshot?
|
||||
if lastIndex == 0 {
|
||||
req.respond(ErrNothingNewToSnapshot)
|
||||
return
|
||||
}
|
||||
|
||||
// Start a snapshot
|
||||
start := time.Now()
|
||||
snap, err := r.fsm.Snapshot()
|
||||
metrics.MeasureSince([]string{"raft", "fsm", "snapshot"}, start)
|
||||
|
||||
// Respond to the request
|
||||
req.index = lastIndex
|
||||
req.term = lastTerm
|
||||
req.snapshot = snap
|
||||
req.respond(err)
|
||||
}
|
||||
|
||||
for {
|
||||
select {
|
||||
case ptr := <-r.fsmMutateCh:
|
||||
switch req := ptr.(type) {
|
||||
case *commitTuple:
|
||||
commit(req)
|
||||
|
||||
case *restoreFuture:
|
||||
restore(req)
|
||||
|
||||
default:
|
||||
panic(fmt.Errorf("bad type passed to fsmMutateCh: %#v", ptr))
|
||||
}
|
||||
|
||||
case req := <-r.fsmSnapshotCh:
|
||||
snapshot(req)
|
||||
|
||||
case <-r.shutdownCh:
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,289 @@
|
|||
package raft
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
// Future is used to represent an action that may occur in the future.
|
||||
type Future interface {
|
||||
// Error blocks until the future arrives and then
|
||||
// returns the error status of the future.
|
||||
// This may be called any number of times - all
|
||||
// calls will return the same value.
|
||||
// Note that it is not OK to call this method
|
||||
// twice concurrently on the same Future instance.
|
||||
Error() error
|
||||
}
|
||||
|
||||
// IndexFuture is used for future actions that can result in a raft log entry
|
||||
// being created.
|
||||
type IndexFuture interface {
|
||||
Future
|
||||
|
||||
// Index holds the index of the newly applied log entry.
|
||||
// This must not be called until after the Error method has returned.
|
||||
Index() uint64
|
||||
}
|
||||
|
||||
// ApplyFuture is used for Apply and can return the FSM response.
|
||||
type ApplyFuture interface {
|
||||
IndexFuture
|
||||
|
||||
// Response returns the FSM response as returned
|
||||
// by the FSM.Apply method. This must not be called
|
||||
// until after the Error method has returned.
|
||||
Response() interface{}
|
||||
}
|
||||
|
||||
// ConfigurationFuture is used for GetConfiguration and can return the
|
||||
// latest configuration in use by Raft.
|
||||
type ConfigurationFuture interface {
|
||||
IndexFuture
|
||||
|
||||
// Configuration contains the latest configuration. This must
|
||||
// not be called until after the Error method has returned.
|
||||
Configuration() Configuration
|
||||
}
|
||||
|
||||
// SnapshotFuture is used for waiting on a user-triggered snapshot to complete.
|
||||
type SnapshotFuture interface {
|
||||
Future
|
||||
|
||||
// Open is a function you can call to access the underlying snapshot and
|
||||
// its metadata. This must not be called until after the Error method
|
||||
// has returned.
|
||||
Open() (*SnapshotMeta, io.ReadCloser, error)
|
||||
}
|
||||
|
||||
// errorFuture is used to return a static error.
|
||||
type errorFuture struct {
|
||||
err error
|
||||
}
|
||||
|
||||
func (e errorFuture) Error() error {
|
||||
return e.err
|
||||
}
|
||||
|
||||
func (e errorFuture) Response() interface{} {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (e errorFuture) Index() uint64 {
|
||||
return 0
|
||||
}
|
||||
|
||||
// deferError can be embedded to allow a future
|
||||
// to provide an error in the future.
|
||||
type deferError struct {
|
||||
err error
|
||||
errCh chan error
|
||||
responded bool
|
||||
}
|
||||
|
||||
func (d *deferError) init() {
|
||||
d.errCh = make(chan error, 1)
|
||||
}
|
||||
|
||||
func (d *deferError) Error() error {
|
||||
if d.err != nil {
|
||||
// Note that when we've received a nil error, this
|
||||
// won't trigger, but the channel is closed after
|
||||
// send so we'll still return nil below.
|
||||
return d.err
|
||||
}
|
||||
if d.errCh == nil {
|
||||
panic("waiting for response on nil channel")
|
||||
}
|
||||
d.err = <-d.errCh
|
||||
return d.err
|
||||
}
|
||||
|
||||
func (d *deferError) respond(err error) {
|
||||
if d.errCh == nil {
|
||||
return
|
||||
}
|
||||
if d.responded {
|
||||
return
|
||||
}
|
||||
d.errCh <- err
|
||||
close(d.errCh)
|
||||
d.responded = true
|
||||
}
|
||||
|
||||
// There are several types of requests that cause a configuration entry to
|
||||
// be appended to the log. These are encoded here for leaderLoop() to process.
|
||||
// This is internal to a single server.
|
||||
type configurationChangeFuture struct {
|
||||
logFuture
|
||||
req configurationChangeRequest
|
||||
}
|
||||
|
||||
// bootstrapFuture is used to attempt a live bootstrap of the cluster. See the
|
||||
// Raft object's BootstrapCluster member function for more details.
|
||||
type bootstrapFuture struct {
|
||||
deferError
|
||||
|
||||
// configuration is the proposed bootstrap configuration to apply.
|
||||
configuration Configuration
|
||||
}
|
||||
|
||||
// logFuture is used to apply a log entry and waits until
|
||||
// the log is considered committed.
|
||||
type logFuture struct {
|
||||
deferError
|
||||
log Log
|
||||
response interface{}
|
||||
dispatch time.Time
|
||||
}
|
||||
|
||||
func (l *logFuture) Response() interface{} {
|
||||
return l.response
|
||||
}
|
||||
|
||||
func (l *logFuture) Index() uint64 {
|
||||
return l.log.Index
|
||||
}
|
||||
|
||||
type shutdownFuture struct {
|
||||
raft *Raft
|
||||
}
|
||||
|
||||
func (s *shutdownFuture) Error() error {
|
||||
if s.raft == nil {
|
||||
return nil
|
||||
}
|
||||
s.raft.waitShutdown()
|
||||
if closeable, ok := s.raft.trans.(WithClose); ok {
|
||||
closeable.Close()
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// userSnapshotFuture is used for waiting on a user-triggered snapshot to
|
||||
// complete.
|
||||
type userSnapshotFuture struct {
|
||||
deferError
|
||||
|
||||
// opener is a function used to open the snapshot. This is filled in
|
||||
// once the future returns with no error.
|
||||
opener func() (*SnapshotMeta, io.ReadCloser, error)
|
||||
}
|
||||
|
||||
// Open is a function you can call to access the underlying snapshot and its
|
||||
// metadata.
|
||||
func (u *userSnapshotFuture) Open() (*SnapshotMeta, io.ReadCloser, error) {
|
||||
if u.opener == nil {
|
||||
return nil, nil, fmt.Errorf("no snapshot available")
|
||||
} else {
|
||||
// Invalidate the opener so it can't get called multiple times,
|
||||
// which isn't generally safe.
|
||||
defer func() {
|
||||
u.opener = nil
|
||||
}()
|
||||
return u.opener()
|
||||
}
|
||||
}
|
||||
|
||||
// userRestoreFuture is used for waiting on a user-triggered restore of an
|
||||
// external snapshot to complete.
|
||||
type userRestoreFuture struct {
|
||||
deferError
|
||||
|
||||
// meta is the metadata that belongs with the snapshot.
|
||||
meta *SnapshotMeta
|
||||
|
||||
// reader is the interface to read the snapshot contents from.
|
||||
reader io.Reader
|
||||
}
|
||||
|
||||
// reqSnapshotFuture is used for requesting a snapshot start.
|
||||
// It is only used internally.
|
||||
type reqSnapshotFuture struct {
|
||||
deferError
|
||||
|
||||
// snapshot details provided by the FSM runner before responding
|
||||
index uint64
|
||||
term uint64
|
||||
snapshot FSMSnapshot
|
||||
}
|
||||
|
||||
// restoreFuture is used for requesting an FSM to perform a
|
||||
// snapshot restore. Used internally only.
|
||||
type restoreFuture struct {
|
||||
deferError
|
||||
ID string
|
||||
}
|
||||
|
||||
// verifyFuture is used to verify the current node is still
|
||||
// the leader. This is to prevent a stale read.
|
||||
type verifyFuture struct {
|
||||
deferError
|
||||
notifyCh chan *verifyFuture
|
||||
quorumSize int
|
||||
votes int
|
||||
voteLock sync.Mutex
|
||||
}
|
||||
|
||||
// configurationsFuture is used to retrieve the current configurations. This is
|
||||
// used to allow safe access to this information outside of the main thread.
|
||||
type configurationsFuture struct {
|
||||
deferError
|
||||
configurations configurations
|
||||
}
|
||||
|
||||
// Configuration returns the latest configuration in use by Raft.
|
||||
func (c *configurationsFuture) Configuration() Configuration {
|
||||
return c.configurations.latest
|
||||
}
|
||||
|
||||
// Index returns the index of the latest configuration in use by Raft.
|
||||
func (c *configurationsFuture) Index() uint64 {
|
||||
return c.configurations.latestIndex
|
||||
}
|
||||
|
||||
// vote is used to respond to a verifyFuture.
|
||||
// This may block when responding on the notifyCh.
|
||||
func (v *verifyFuture) vote(leader bool) {
|
||||
v.voteLock.Lock()
|
||||
defer v.voteLock.Unlock()
|
||||
|
||||
// Guard against having notified already
|
||||
if v.notifyCh == nil {
|
||||
return
|
||||
}
|
||||
|
||||
if leader {
|
||||
v.votes++
|
||||
if v.votes >= v.quorumSize {
|
||||
v.notifyCh <- v
|
||||
v.notifyCh = nil
|
||||
}
|
||||
} else {
|
||||
v.notifyCh <- v
|
||||
v.notifyCh = nil
|
||||
}
|
||||
}
|
||||
|
||||
// appendFuture is used for waiting on a pipelined append
|
||||
// entries RPC.
|
||||
type appendFuture struct {
|
||||
deferError
|
||||
start time.Time
|
||||
args *AppendEntriesRequest
|
||||
resp *AppendEntriesResponse
|
||||
}
|
||||
|
||||
func (a *appendFuture) Start() time.Time {
|
||||
return a.start
|
||||
}
|
||||
|
||||
func (a *appendFuture) Request() *AppendEntriesRequest {
|
||||
return a.args
|
||||
}
|
||||
|
||||
func (a *appendFuture) Response() *AppendEntriesResponse {
|
||||
return a.resp
|
||||
}
|
|
@ -0,0 +1,106 @@
|
|||
package raft
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"sync"
|
||||
)
|
||||
|
||||
// InmemSnapshotStore implements the SnapshotStore interface and
|
||||
// retains only the most recent snapshot
|
||||
type InmemSnapshotStore struct {
|
||||
latest *InmemSnapshotSink
|
||||
hasSnapshot bool
|
||||
sync.RWMutex
|
||||
}
|
||||
|
||||
// InmemSnapshotSink implements SnapshotSink in memory
|
||||
type InmemSnapshotSink struct {
|
||||
meta SnapshotMeta
|
||||
contents *bytes.Buffer
|
||||
}
|
||||
|
||||
// NewInmemSnapshotStore creates a blank new InmemSnapshotStore
|
||||
func NewInmemSnapshotStore() *InmemSnapshotStore {
|
||||
return &InmemSnapshotStore{
|
||||
latest: &InmemSnapshotSink{
|
||||
contents: &bytes.Buffer{},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// Create replaces the stored snapshot with a new one using the given args
|
||||
func (m *InmemSnapshotStore) Create(version SnapshotVersion, index, term uint64,
|
||||
configuration Configuration, configurationIndex uint64, trans Transport) (SnapshotSink, error) {
|
||||
// We only support version 1 snapshots at this time.
|
||||
if version != 1 {
|
||||
return nil, fmt.Errorf("unsupported snapshot version %d", version)
|
||||
}
|
||||
|
||||
name := snapshotName(term, index)
|
||||
|
||||
m.Lock()
|
||||
defer m.Unlock()
|
||||
|
||||
sink := &InmemSnapshotSink{
|
||||
meta: SnapshotMeta{
|
||||
Version: version,
|
||||
ID: name,
|
||||
Index: index,
|
||||
Term: term,
|
||||
Peers: encodePeers(configuration, trans),
|
||||
Configuration: configuration,
|
||||
ConfigurationIndex: configurationIndex,
|
||||
},
|
||||
contents: &bytes.Buffer{},
|
||||
}
|
||||
m.hasSnapshot = true
|
||||
m.latest = sink
|
||||
|
||||
return sink, nil
|
||||
}
|
||||
|
||||
// List returns the latest snapshot taken
|
||||
func (m *InmemSnapshotStore) List() ([]*SnapshotMeta, error) {
|
||||
m.RLock()
|
||||
defer m.RUnlock()
|
||||
|
||||
if !m.hasSnapshot {
|
||||
return []*SnapshotMeta{}, nil
|
||||
}
|
||||
return []*SnapshotMeta{&m.latest.meta}, nil
|
||||
}
|
||||
|
||||
// Open wraps an io.ReadCloser around the snapshot contents
|
||||
func (m *InmemSnapshotStore) Open(id string) (*SnapshotMeta, io.ReadCloser, error) {
|
||||
m.RLock()
|
||||
defer m.RUnlock()
|
||||
|
||||
if m.latest.meta.ID != id {
|
||||
return nil, nil, fmt.Errorf("[ERR] snapshot: failed to open snapshot id: %s", id)
|
||||
}
|
||||
|
||||
return &m.latest.meta, ioutil.NopCloser(m.latest.contents), nil
|
||||
}
|
||||
|
||||
// Write appends the given bytes to the snapshot contents
|
||||
func (s *InmemSnapshotSink) Write(p []byte) (n int, err error) {
|
||||
written, err := io.Copy(s.contents, bytes.NewReader(p))
|
||||
s.meta.Size += written
|
||||
return int(written), err
|
||||
}
|
||||
|
||||
// Close updates the Size and is otherwise a no-op
|
||||
func (s *InmemSnapshotSink) Close() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *InmemSnapshotSink) ID() string {
|
||||
return s.meta.ID
|
||||
}
|
||||
|
||||
func (s *InmemSnapshotSink) Cancel() error {
|
||||
return nil
|
||||
}
|
|
@ -0,0 +1,125 @@
|
|||
package raft
|
||||
|
||||
import (
|
||||
"sync"
|
||||
)
|
||||
|
||||
// InmemStore implements the LogStore and StableStore interface.
|
||||
// It should NOT EVER be used for production. It is used only for
|
||||
// unit tests. Use the MDBStore implementation instead.
|
||||
type InmemStore struct {
|
||||
l sync.RWMutex
|
||||
lowIndex uint64
|
||||
highIndex uint64
|
||||
logs map[uint64]*Log
|
||||
kv map[string][]byte
|
||||
kvInt map[string]uint64
|
||||
}
|
||||
|
||||
// NewInmemStore returns a new in-memory backend. Do not ever
|
||||
// use for production. Only for testing.
|
||||
func NewInmemStore() *InmemStore {
|
||||
i := &InmemStore{
|
||||
logs: make(map[uint64]*Log),
|
||||
kv: make(map[string][]byte),
|
||||
kvInt: make(map[string]uint64),
|
||||
}
|
||||
return i
|
||||
}
|
||||
|
||||
// FirstIndex implements the LogStore interface.
|
||||
func (i *InmemStore) FirstIndex() (uint64, error) {
|
||||
i.l.RLock()
|
||||
defer i.l.RUnlock()
|
||||
return i.lowIndex, nil
|
||||
}
|
||||
|
||||
// LastIndex implements the LogStore interface.
|
||||
func (i *InmemStore) LastIndex() (uint64, error) {
|
||||
i.l.RLock()
|
||||
defer i.l.RUnlock()
|
||||
return i.highIndex, nil
|
||||
}
|
||||
|
||||
// GetLog implements the LogStore interface.
|
||||
func (i *InmemStore) GetLog(index uint64, log *Log) error {
|
||||
i.l.RLock()
|
||||
defer i.l.RUnlock()
|
||||
l, ok := i.logs[index]
|
||||
if !ok {
|
||||
return ErrLogNotFound
|
||||
}
|
||||
*log = *l
|
||||
return nil
|
||||
}
|
||||
|
||||
// StoreLog implements the LogStore interface.
|
||||
func (i *InmemStore) StoreLog(log *Log) error {
|
||||
return i.StoreLogs([]*Log{log})
|
||||
}
|
||||
|
||||
// StoreLogs implements the LogStore interface.
|
||||
func (i *InmemStore) StoreLogs(logs []*Log) error {
|
||||
i.l.Lock()
|
||||
defer i.l.Unlock()
|
||||
for _, l := range logs {
|
||||
i.logs[l.Index] = l
|
||||
if i.lowIndex == 0 {
|
||||
i.lowIndex = l.Index
|
||||
}
|
||||
if l.Index > i.highIndex {
|
||||
i.highIndex = l.Index
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// DeleteRange implements the LogStore interface.
|
||||
func (i *InmemStore) DeleteRange(min, max uint64) error {
|
||||
i.l.Lock()
|
||||
defer i.l.Unlock()
|
||||
for j := min; j <= max; j++ {
|
||||
delete(i.logs, j)
|
||||
}
|
||||
if min <= i.lowIndex {
|
||||
i.lowIndex = max + 1
|
||||
}
|
||||
if max >= i.highIndex {
|
||||
i.highIndex = min - 1
|
||||
}
|
||||
if i.lowIndex > i.highIndex {
|
||||
i.lowIndex = 0
|
||||
i.highIndex = 0
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Set implements the StableStore interface.
|
||||
func (i *InmemStore) Set(key []byte, val []byte) error {
|
||||
i.l.Lock()
|
||||
defer i.l.Unlock()
|
||||
i.kv[string(key)] = val
|
||||
return nil
|
||||
}
|
||||
|
||||
// Get implements the StableStore interface.
|
||||
func (i *InmemStore) Get(key []byte) ([]byte, error) {
|
||||
i.l.RLock()
|
||||
defer i.l.RUnlock()
|
||||
return i.kv[string(key)], nil
|
||||
}
|
||||
|
||||
// SetUint64 implements the StableStore interface.
|
||||
func (i *InmemStore) SetUint64(key []byte, val uint64) error {
|
||||
i.l.Lock()
|
||||
defer i.l.Unlock()
|
||||
i.kvInt[string(key)] = val
|
||||
return nil
|
||||
}
|
||||
|
||||
// GetUint64 implements the StableStore interface.
|
||||
func (i *InmemStore) GetUint64(key []byte) (uint64, error) {
|
||||
i.l.RLock()
|
||||
defer i.l.RUnlock()
|
||||
return i.kvInt[string(key)], nil
|
||||
}
|
|
@ -0,0 +1,322 @@
|
|||
package raft
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
// NewInmemAddr returns a new in-memory addr with
|
||||
// a randomly generate UUID as the ID.
|
||||
func NewInmemAddr() ServerAddress {
|
||||
return ServerAddress(generateUUID())
|
||||
}
|
||||
|
||||
// inmemPipeline is used to pipeline requests for the in-mem transport.
|
||||
type inmemPipeline struct {
|
||||
trans *InmemTransport
|
||||
peer *InmemTransport
|
||||
peerAddr ServerAddress
|
||||
|
||||
doneCh chan AppendFuture
|
||||
inprogressCh chan *inmemPipelineInflight
|
||||
|
||||
shutdown bool
|
||||
shutdownCh chan struct{}
|
||||
shutdownLock sync.Mutex
|
||||
}
|
||||
|
||||
type inmemPipelineInflight struct {
|
||||
future *appendFuture
|
||||
respCh <-chan RPCResponse
|
||||
}
|
||||
|
||||
// InmemTransport Implements the Transport interface, to allow Raft to be
|
||||
// tested in-memory without going over a network.
|
||||
type InmemTransport struct {
|
||||
sync.RWMutex
|
||||
consumerCh chan RPC
|
||||
localAddr ServerAddress
|
||||
peers map[ServerAddress]*InmemTransport
|
||||
pipelines []*inmemPipeline
|
||||
timeout time.Duration
|
||||
}
|
||||
|
||||
// NewInmemTransport is used to initialize a new transport
|
||||
// and generates a random local address if none is specified
|
||||
func NewInmemTransport(addr ServerAddress) (ServerAddress, *InmemTransport) {
|
||||
if string(addr) == "" {
|
||||
addr = NewInmemAddr()
|
||||
}
|
||||
trans := &InmemTransport{
|
||||
consumerCh: make(chan RPC, 16),
|
||||
localAddr: addr,
|
||||
peers: make(map[ServerAddress]*InmemTransport),
|
||||
timeout: 50 * time.Millisecond,
|
||||
}
|
||||
return addr, trans
|
||||
}
|
||||
|
||||
// SetHeartbeatHandler is used to set optional fast-path for
|
||||
// heartbeats, not supported for this transport.
|
||||
func (i *InmemTransport) SetHeartbeatHandler(cb func(RPC)) {
|
||||
}
|
||||
|
||||
// Consumer implements the Transport interface.
|
||||
func (i *InmemTransport) Consumer() <-chan RPC {
|
||||
return i.consumerCh
|
||||
}
|
||||
|
||||
// LocalAddr implements the Transport interface.
|
||||
func (i *InmemTransport) LocalAddr() ServerAddress {
|
||||
return i.localAddr
|
||||
}
|
||||
|
||||
// AppendEntriesPipeline returns an interface that can be used to pipeline
|
||||
// AppendEntries requests.
|
||||
func (i *InmemTransport) AppendEntriesPipeline(target ServerAddress) (AppendPipeline, error) {
|
||||
i.RLock()
|
||||
peer, ok := i.peers[target]
|
||||
i.RUnlock()
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("failed to connect to peer: %v", target)
|
||||
}
|
||||
pipeline := newInmemPipeline(i, peer, target)
|
||||
i.Lock()
|
||||
i.pipelines = append(i.pipelines, pipeline)
|
||||
i.Unlock()
|
||||
return pipeline, nil
|
||||
}
|
||||
|
||||
// AppendEntries implements the Transport interface.
|
||||
func (i *InmemTransport) AppendEntries(target ServerAddress, args *AppendEntriesRequest, resp *AppendEntriesResponse) error {
|
||||
rpcResp, err := i.makeRPC(target, args, nil, i.timeout)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Copy the result back
|
||||
out := rpcResp.Response.(*AppendEntriesResponse)
|
||||
*resp = *out
|
||||
return nil
|
||||
}
|
||||
|
||||
// RequestVote implements the Transport interface.
|
||||
func (i *InmemTransport) RequestVote(target ServerAddress, args *RequestVoteRequest, resp *RequestVoteResponse) error {
|
||||
rpcResp, err := i.makeRPC(target, args, nil, i.timeout)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Copy the result back
|
||||
out := rpcResp.Response.(*RequestVoteResponse)
|
||||
*resp = *out
|
||||
return nil
|
||||
}
|
||||
|
||||
// InstallSnapshot implements the Transport interface.
|
||||
func (i *InmemTransport) InstallSnapshot(target ServerAddress, args *InstallSnapshotRequest, resp *InstallSnapshotResponse, data io.Reader) error {
|
||||
rpcResp, err := i.makeRPC(target, args, data, 10*i.timeout)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Copy the result back
|
||||
out := rpcResp.Response.(*InstallSnapshotResponse)
|
||||
*resp = *out
|
||||
return nil
|
||||
}
|
||||
|
||||
func (i *InmemTransport) makeRPC(target ServerAddress, args interface{}, r io.Reader, timeout time.Duration) (rpcResp RPCResponse, err error) {
|
||||
i.RLock()
|
||||
peer, ok := i.peers[target]
|
||||
i.RUnlock()
|
||||
|
||||
if !ok {
|
||||
err = fmt.Errorf("failed to connect to peer: %v", target)
|
||||
return
|
||||
}
|
||||
|
||||
// Send the RPC over
|
||||
respCh := make(chan RPCResponse)
|
||||
peer.consumerCh <- RPC{
|
||||
Command: args,
|
||||
Reader: r,
|
||||
RespChan: respCh,
|
||||
}
|
||||
|
||||
// Wait for a response
|
||||
select {
|
||||
case rpcResp = <-respCh:
|
||||
if rpcResp.Error != nil {
|
||||
err = rpcResp.Error
|
||||
}
|
||||
case <-time.After(timeout):
|
||||
err = fmt.Errorf("command timed out")
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// EncodePeer implements the Transport interface.
|
||||
func (i *InmemTransport) EncodePeer(p ServerAddress) []byte {
|
||||
return []byte(p)
|
||||
}
|
||||
|
||||
// DecodePeer implements the Transport interface.
|
||||
func (i *InmemTransport) DecodePeer(buf []byte) ServerAddress {
|
||||
return ServerAddress(buf)
|
||||
}
|
||||
|
||||
// Connect is used to connect this transport to another transport for
|
||||
// a given peer name. This allows for local routing.
|
||||
func (i *InmemTransport) Connect(peer ServerAddress, t Transport) {
|
||||
trans := t.(*InmemTransport)
|
||||
i.Lock()
|
||||
defer i.Unlock()
|
||||
i.peers[peer] = trans
|
||||
}
|
||||
|
||||
// Disconnect is used to remove the ability to route to a given peer.
|
||||
func (i *InmemTransport) Disconnect(peer ServerAddress) {
|
||||
i.Lock()
|
||||
defer i.Unlock()
|
||||
delete(i.peers, peer)
|
||||
|
||||
// Disconnect any pipelines
|
||||
n := len(i.pipelines)
|
||||
for idx := 0; idx < n; idx++ {
|
||||
if i.pipelines[idx].peerAddr == peer {
|
||||
i.pipelines[idx].Close()
|
||||
i.pipelines[idx], i.pipelines[n-1] = i.pipelines[n-1], nil
|
||||
idx--
|
||||
n--
|
||||
}
|
||||
}
|
||||
i.pipelines = i.pipelines[:n]
|
||||
}
|
||||
|
||||
// DisconnectAll is used to remove all routes to peers.
|
||||
func (i *InmemTransport) DisconnectAll() {
|
||||
i.Lock()
|
||||
defer i.Unlock()
|
||||
i.peers = make(map[ServerAddress]*InmemTransport)
|
||||
|
||||
// Handle pipelines
|
||||
for _, pipeline := range i.pipelines {
|
||||
pipeline.Close()
|
||||
}
|
||||
i.pipelines = nil
|
||||
}
|
||||
|
||||
// Close is used to permanently disable the transport
|
||||
func (i *InmemTransport) Close() error {
|
||||
i.DisconnectAll()
|
||||
return nil
|
||||
}
|
||||
|
||||
func newInmemPipeline(trans *InmemTransport, peer *InmemTransport, addr ServerAddress) *inmemPipeline {
|
||||
i := &inmemPipeline{
|
||||
trans: trans,
|
||||
peer: peer,
|
||||
peerAddr: addr,
|
||||
doneCh: make(chan AppendFuture, 16),
|
||||
inprogressCh: make(chan *inmemPipelineInflight, 16),
|
||||
shutdownCh: make(chan struct{}),
|
||||
}
|
||||
go i.decodeResponses()
|
||||
return i
|
||||
}
|
||||
|
||||
func (i *inmemPipeline) decodeResponses() {
|
||||
timeout := i.trans.timeout
|
||||
for {
|
||||
select {
|
||||
case inp := <-i.inprogressCh:
|
||||
var timeoutCh <-chan time.Time
|
||||
if timeout > 0 {
|
||||
timeoutCh = time.After(timeout)
|
||||
}
|
||||
|
||||
select {
|
||||
case rpcResp := <-inp.respCh:
|
||||
// Copy the result back
|
||||
*inp.future.resp = *rpcResp.Response.(*AppendEntriesResponse)
|
||||
inp.future.respond(rpcResp.Error)
|
||||
|
||||
select {
|
||||
case i.doneCh <- inp.future:
|
||||
case <-i.shutdownCh:
|
||||
return
|
||||
}
|
||||
|
||||
case <-timeoutCh:
|
||||
inp.future.respond(fmt.Errorf("command timed out"))
|
||||
select {
|
||||
case i.doneCh <- inp.future:
|
||||
case <-i.shutdownCh:
|
||||
return
|
||||
}
|
||||
|
||||
case <-i.shutdownCh:
|
||||
return
|
||||
}
|
||||
case <-i.shutdownCh:
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (i *inmemPipeline) AppendEntries(args *AppendEntriesRequest, resp *AppendEntriesResponse) (AppendFuture, error) {
|
||||
// Create a new future
|
||||
future := &appendFuture{
|
||||
start: time.Now(),
|
||||
args: args,
|
||||
resp: resp,
|
||||
}
|
||||
future.init()
|
||||
|
||||
// Handle a timeout
|
||||
var timeout <-chan time.Time
|
||||
if i.trans.timeout > 0 {
|
||||
timeout = time.After(i.trans.timeout)
|
||||
}
|
||||
|
||||
// Send the RPC over
|
||||
respCh := make(chan RPCResponse, 1)
|
||||
rpc := RPC{
|
||||
Command: args,
|
||||
RespChan: respCh,
|
||||
}
|
||||
select {
|
||||
case i.peer.consumerCh <- rpc:
|
||||
case <-timeout:
|
||||
return nil, fmt.Errorf("command enqueue timeout")
|
||||
case <-i.shutdownCh:
|
||||
return nil, ErrPipelineShutdown
|
||||
}
|
||||
|
||||
// Send to be decoded
|
||||
select {
|
||||
case i.inprogressCh <- &inmemPipelineInflight{future, respCh}:
|
||||
return future, nil
|
||||
case <-i.shutdownCh:
|
||||
return nil, ErrPipelineShutdown
|
||||
}
|
||||
}
|
||||
|
||||
func (i *inmemPipeline) Consumer() <-chan AppendFuture {
|
||||
return i.doneCh
|
||||
}
|
||||
|
||||
func (i *inmemPipeline) Close() error {
|
||||
i.shutdownLock.Lock()
|
||||
defer i.shutdownLock.Unlock()
|
||||
if i.shutdown {
|
||||
return nil
|
||||
}
|
||||
|
||||
i.shutdown = true
|
||||
close(i.shutdownCh)
|
||||
return nil
|
||||
}
|
|
@ -0,0 +1,72 @@
|
|||
package raft
|
||||
|
||||
// LogType describes various types of log entries.
|
||||
type LogType uint8
|
||||
|
||||
const (
|
||||
// LogCommand is applied to a user FSM.
|
||||
LogCommand LogType = iota
|
||||
|
||||
// LogNoop is used to assert leadership.
|
||||
LogNoop
|
||||
|
||||
// LogAddPeer is used to add a new peer. This should only be used with
|
||||
// older protocol versions designed to be compatible with unversioned
|
||||
// Raft servers. See comments in config.go for details.
|
||||
LogAddPeerDeprecated
|
||||
|
||||
// LogRemovePeer is used to remove an existing peer. This should only be
|
||||
// used with older protocol versions designed to be compatible with
|
||||
// unversioned Raft servers. See comments in config.go for details.
|
||||
LogRemovePeerDeprecated
|
||||
|
||||
// LogBarrier is used to ensure all preceding operations have been
|
||||
// applied to the FSM. It is similar to LogNoop, but instead of returning
|
||||
// once committed, it only returns once the FSM manager acks it. Otherwise
|
||||
// it is possible there are operations committed but not yet applied to
|
||||
// the FSM.
|
||||
LogBarrier
|
||||
|
||||
// LogConfiguration establishes a membership change configuration. It is
|
||||
// created when a server is added, removed, promoted, etc. Only used
|
||||
// when protocol version 1 or greater is in use.
|
||||
LogConfiguration
|
||||
)
|
||||
|
||||
// Log entries are replicated to all members of the Raft cluster
|
||||
// and form the heart of the replicated state machine.
|
||||
type Log struct {
|
||||
// Index holds the index of the log entry.
|
||||
Index uint64
|
||||
|
||||
// Term holds the election term of the log entry.
|
||||
Term uint64
|
||||
|
||||
// Type holds the type of the log entry.
|
||||
Type LogType
|
||||
|
||||
// Data holds the log entry's type-specific data.
|
||||
Data []byte
|
||||
}
|
||||
|
||||
// LogStore is used to provide an interface for storing
|
||||
// and retrieving logs in a durable fashion.
|
||||
type LogStore interface {
|
||||
// FirstIndex returns the first index written. 0 for no entries.
|
||||
FirstIndex() (uint64, error)
|
||||
|
||||
// LastIndex returns the last index written. 0 for no entries.
|
||||
LastIndex() (uint64, error)
|
||||
|
||||
// GetLog gets a log entry at a given index.
|
||||
GetLog(index uint64, log *Log) error
|
||||
|
||||
// StoreLog stores a log entry.
|
||||
StoreLog(log *Log) error
|
||||
|
||||
// StoreLogs stores multiple log entries.
|
||||
StoreLogs(logs []*Log) error
|
||||
|
||||
// DeleteRange deletes a range of log entries. The range is inclusive.
|
||||
DeleteRange(min, max uint64) error
|
||||
}
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue