add broken test for lock lost on connection error
Add a way to inject network errors by setting an immediate deadline on open consul connections. The consul client currently doesn't retry on some errors, and will force us to lose our lock. Once the consul api client is fixed, this test will fail.
This commit is contained in:
parent
fd9adcdb36
commit
25a8227291
|
@ -120,11 +120,7 @@ func (b *Backend) configure(ctx context.Context) error {
|
|||
config := consulapi.DefaultConfig()
|
||||
|
||||
// replace the default Transport Dialer to reduce the KeepAlive
|
||||
|
||||
config.Transport.DialContext = (&net.Dialer{
|
||||
Timeout: 30 * time.Second,
|
||||
KeepAlive: 17 * time.Second,
|
||||
}).DialContext
|
||||
config.Transport.DialContext = dialContext
|
||||
|
||||
if v, ok := data.GetOk("access_token"); ok && v.(string) != "" {
|
||||
config.Token = v.(string)
|
||||
|
@ -175,3 +171,10 @@ func (b *Backend) configure(ctx context.Context) error {
|
|||
b.client = client
|
||||
return nil
|
||||
}
|
||||
|
||||
// dialContext is the DialContext function for the consul client transport.
|
||||
// This is stored in a package var to inject a different dialer for tests.
|
||||
var dialContext = (&net.Dialer{
|
||||
Timeout: 30 * time.Second,
|
||||
KeepAlive: 17 * time.Second,
|
||||
}).DialContext
|
||||
|
|
|
@ -85,6 +85,11 @@ func (b *Backend) State(name string) (state.State, error) {
|
|||
stateMgr = &state.LockDisabled{Inner: stateMgr}
|
||||
}
|
||||
|
||||
// the default state always exists
|
||||
if name == backend.DefaultStateName {
|
||||
return stateMgr, nil
|
||||
}
|
||||
|
||||
// Grab a lock, we use this to write an empty state if one doesn't
|
||||
// exist already. We have to write an empty state as a sentinel value
|
||||
// so States() knows it exists.
|
||||
|
|
|
@ -32,6 +32,8 @@ const (
|
|||
lockReacquireInterval = 2 * time.Second
|
||||
)
|
||||
|
||||
var lostLockErr = errors.New("consul lock was lost")
|
||||
|
||||
// RemoteClient is a remote client that stores data in Consul.
|
||||
type RemoteClient struct {
|
||||
Client *consulapi.Client
|
||||
|
@ -409,7 +411,7 @@ func (c *RemoteClient) unlock(id string) error {
|
|||
|
||||
select {
|
||||
case <-c.lockCh:
|
||||
return errors.New("consul lock was lost")
|
||||
return lostLockErr
|
||||
default:
|
||||
}
|
||||
|
||||
|
|
|
@ -1,7 +1,10 @@
|
|||
package consul
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"net"
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
|
@ -188,3 +191,100 @@ func TestConsul_lostLock(t *testing.T) {
|
|||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestConsul_lostLockConnection(t *testing.T) {
|
||||
srv := newConsulTestServer(t)
|
||||
defer srv.Stop()
|
||||
|
||||
// create an "unreliable" network by closing all the consul client's
|
||||
// network connections
|
||||
conns := &unreliableConns{}
|
||||
origDialFn := dialContext
|
||||
defer func() {
|
||||
dialContext = origDialFn
|
||||
}()
|
||||
dialContext = conns.DialContext
|
||||
|
||||
path := fmt.Sprintf("tf-unit/%s", time.Now().String())
|
||||
|
||||
b := backend.TestBackendConfig(t, New(), map[string]interface{}{
|
||||
"address": srv.HTTPAddr,
|
||||
"path": path,
|
||||
})
|
||||
|
||||
s, err := b.State(backend.DefaultStateName)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
info := state.NewLockInfo()
|
||||
info.Operation = "test-lost-lock-connection"
|
||||
id, err := s.Lock(info)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// set a callback to know when the monitor loop re-connects
|
||||
dialed := make(chan struct{})
|
||||
conns.dialCallback = func() {
|
||||
close(dialed)
|
||||
conns.dialCallback = nil
|
||||
}
|
||||
|
||||
// kill any open connections
|
||||
// once the consul client is fixed, we should loop over this a few time to
|
||||
// be sure, since we can't hook into the client's internal lock monitor
|
||||
// loop.
|
||||
conns.Kill()
|
||||
// wait for a new connection to be dialed, and kill it again
|
||||
<-dialed
|
||||
conns.Kill()
|
||||
|
||||
// since the lock monitor loop is hidden in the consul api client, we can
|
||||
// only wait a bit to make sure we were notified of the failure
|
||||
time.Sleep(time.Second)
|
||||
|
||||
// once the consul client can reconnect properly, there will no longer be
|
||||
// an error here
|
||||
//if err := s.Unlock(id); err != nil {
|
||||
if err := s.Unlock(id); err != lostLockErr {
|
||||
t.Fatalf("expected lost lock error, got %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
type unreliableConns struct {
|
||||
sync.Mutex
|
||||
conns []net.Conn
|
||||
dialCallback func()
|
||||
}
|
||||
|
||||
func (u *unreliableConns) DialContext(ctx context.Context, netw, addr string) (net.Conn, error) {
|
||||
u.Lock()
|
||||
defer u.Unlock()
|
||||
|
||||
dialer := &net.Dialer{}
|
||||
conn, err := dialer.DialContext(ctx, netw, addr)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
u.conns = append(u.conns, conn)
|
||||
|
||||
if u.dialCallback != nil {
|
||||
u.dialCallback()
|
||||
}
|
||||
|
||||
return conn, nil
|
||||
}
|
||||
|
||||
// Kill these with a deadline, just to make sure we don't end up with any EOFs
|
||||
// that get ignored.
|
||||
func (u *unreliableConns) Kill() {
|
||||
u.Lock()
|
||||
defer u.Unlock()
|
||||
|
||||
for _, conn := range u.conns {
|
||||
conn.(*net.TCPConn).SetDeadline(time.Now())
|
||||
}
|
||||
u.conns = nil
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue