package nebula import ( "errors" "fmt" "net" "sync" "time" "github.com/golang/protobuf/proto" "github.com/rcrowley/go-metrics" "github.com/sirupsen/logrus" "github.com/slackhq/nebula/cert" ) var ErrHostNotKnown = errors.New("host not known") type LightHouse struct { sync.RWMutex //Because we concurrently read and write to our maps amLighthouse bool myIp uint32 punchConn *udpConn // Local cache of answers from light houses addrMap map[uint32][]*udpAddr // filters remote addresses allowed for each host // - When we are a lighthouse, this filters what addresses we store and // respond with. // - When we are not a lighthouse, this filters which addresses we accept // from lighthouses. remoteAllowList *AllowList // filters local addresses that we advertise to lighthouses localAllowList *AllowList // used to trigger the HandshakeManager when we receive HostQueryReply handshakeTrigger chan<- uint32 // staticList exists to avoid having a bool in each addrMap entry // since static should be rare staticList map[uint32]struct{} lighthouses map[uint32]struct{} interval int nebulaPort uint32 // 32 bits because protobuf does not have a uint16 punchBack bool punchDelay time.Duration metrics *MessageMetrics metricHolepunchTx metrics.Counter l *logrus.Logger } type EncWriter interface { SendMessageToVpnIp(t NebulaMessageType, st NebulaMessageSubType, vpnIp uint32, p, nb, out []byte) SendMessageToAll(t NebulaMessageType, st NebulaMessageSubType, vpnIp uint32, p, nb, out []byte) } func NewLightHouse(l *logrus.Logger, amLighthouse bool, myIp uint32, ips []uint32, interval int, nebulaPort uint32, pc *udpConn, punchBack bool, punchDelay time.Duration, metricsEnabled bool) *LightHouse { h := LightHouse{ amLighthouse: amLighthouse, myIp: myIp, addrMap: make(map[uint32][]*udpAddr), nebulaPort: nebulaPort, lighthouses: make(map[uint32]struct{}), staticList: make(map[uint32]struct{}), interval: interval, punchConn: pc, punchBack: punchBack, punchDelay: punchDelay, l: l, } if metricsEnabled { h.metrics = newLighthouseMetrics() h.metricHolepunchTx = metrics.GetOrRegisterCounter("messages.tx.holepunch", nil) } else { h.metricHolepunchTx = metrics.NilCounter{} } for _, ip := range ips { h.lighthouses[ip] = struct{}{} } return &h } func (lh *LightHouse) SetRemoteAllowList(allowList *AllowList) { lh.Lock() defer lh.Unlock() lh.remoteAllowList = allowList } func (lh *LightHouse) SetLocalAllowList(allowList *AllowList) { lh.Lock() defer lh.Unlock() lh.localAllowList = allowList } func (lh *LightHouse) ValidateLHStaticEntries() error { for lhIP, _ := range lh.lighthouses { if _, ok := lh.staticList[lhIP]; !ok { return fmt.Errorf("Lighthouse %s does not have a static_host_map entry", IntIp(lhIP)) } } return nil } func (lh *LightHouse) Query(ip uint32, f EncWriter) ([]*udpAddr, error) { if !lh.IsLighthouseIP(ip) { lh.QueryServer(ip, f) } lh.RLock() if v, ok := lh.addrMap[ip]; ok { lh.RUnlock() return v, nil } lh.RUnlock() return nil, ErrHostNotKnown } // This is asynchronous so no reply should be expected func (lh *LightHouse) QueryServer(ip uint32, f EncWriter) { if !lh.amLighthouse { // Send a query to the lighthouses and hope for the best next time query, err := proto.Marshal(NewLhQueryByInt(ip)) if err != nil { lh.l.WithError(err).WithField("vpnIp", IntIp(ip)).Error("Failed to marshal lighthouse query payload") return } lh.metricTx(NebulaMeta_HostQuery, int64(len(lh.lighthouses))) nb := make([]byte, 12, 12) out := make([]byte, mtu) for n := range lh.lighthouses { f.SendMessageToVpnIp(lightHouse, 0, n, query, nb, out) } } } // Query our local lighthouse cached results func (lh *LightHouse) QueryCache(ip uint32) []*udpAddr { lh.RLock() if v, ok := lh.addrMap[ip]; ok { lh.RUnlock() return v } lh.RUnlock() return nil } func (lh *LightHouse) DeleteVpnIP(vpnIP uint32) { // First we check the static mapping // and do nothing if it is there if _, ok := lh.staticList[vpnIP]; ok { return } lh.Lock() //l.Debugln(lh.addrMap) delete(lh.addrMap, vpnIP) lh.l.Debugf("deleting %s from lighthouse.", IntIp(vpnIP)) lh.Unlock() } func (lh *LightHouse) AddRemote(vpnIP uint32, toIp *udpAddr, static bool) { // First we check if the sender thinks this is a static entry // and do nothing if it is not, but should be considered static if static == false { if _, ok := lh.staticList[vpnIP]; ok { return } } lh.Lock() defer lh.Unlock() for _, v := range lh.addrMap[vpnIP] { if v.Equals(toIp) { return } } allow := lh.remoteAllowList.Allow(toIp.IP) lh.l.WithField("remoteIp", toIp).WithField("allow", allow).Debug("remoteAllowList.Allow") if !allow { return } //l.Debugf("Adding reply of %s as %s\n", IntIp(vpnIP), toIp) if static { lh.staticList[vpnIP] = struct{}{} } lh.addrMap[vpnIP] = append(lh.addrMap[vpnIP], toIp.Copy()) } func (lh *LightHouse) AddRemoteAndReset(vpnIP uint32, toIp *udpAddr) { if lh.amLighthouse { lh.DeleteVpnIP(vpnIP) lh.AddRemote(vpnIP, toIp, false) } } func (lh *LightHouse) IsLighthouseIP(vpnIP uint32) bool { if _, ok := lh.lighthouses[vpnIP]; ok { return true } return false } func NewLhQueryByInt(VpnIp uint32) *NebulaMeta { return &NebulaMeta{ Type: NebulaMeta_HostQuery, Details: &NebulaMetaDetails{ VpnIp: VpnIp, }, } } type ip4Or6 struct { v4 IpAndPort v6 Ip6AndPort } func NewIpAndPort(ip net.IP, port uint32) ip4Or6 { ipp := ip4Or6{} if ipv4 := ip.To4(); ipv4 != nil { ipp.v4 = IpAndPort{Port: port} ipp.v4.Ip = ip2int(ip) } else { ipp.v6 = Ip6AndPort{Port: port} ipp.v6.Ip = make([]byte, len(ip)) copy(ipp.v6.Ip, ip) } return ipp } func NewIpAndPortFromUDPAddr(addr *udpAddr) ip4Or6 { return NewIpAndPort(addr.IP, uint32(addr.Port)) } func NewUDPAddrFromLH4(ipp *IpAndPort) *udpAddr { ip := ipp.Ip return NewUDPAddr( net.IPv4(byte(ip&0xff000000>>24), byte(ip&0x00ff0000>>16), byte(ip&0x0000ff00>>8), byte(ip&0x000000ff)), uint16(ipp.Port), ) } func NewUDPAddrFromLH6(ipp *Ip6AndPort) *udpAddr { return NewUDPAddr(ipp.Ip, uint16(ipp.Port)) } func (lh *LightHouse) LhUpdateWorker(f EncWriter) { if lh.amLighthouse || lh.interval == 0 { return } for { lh.SendUpdate(f) time.Sleep(time.Second * time.Duration(lh.interval)) } } func (lh *LightHouse) SendUpdate(f EncWriter) { var v4 []*IpAndPort var v6 []*Ip6AndPort for _, e := range *localIps(lh.l, lh.localAllowList) { // Only add IPs that aren't my VPN/tun IP if ip2int(e) != lh.myIp { ipp := NewIpAndPort(e, lh.nebulaPort) if len(ipp.v6.Ip) > 0 { v6 = append(v6, &ipp.v6) } else { v4 = append(v4, &ipp.v4) } } } m := &NebulaMeta{ Type: NebulaMeta_HostUpdateNotification, Details: &NebulaMetaDetails{ VpnIp: lh.myIp, IpAndPorts: v4, Ip6AndPorts: v6, }, } lh.metricTx(NebulaMeta_HostUpdateNotification, int64(len(lh.lighthouses))) nb := make([]byte, 12, 12) out := make([]byte, mtu) for vpnIp := range lh.lighthouses { mm, err := proto.Marshal(m) if err != nil { lh.l.Debugf("Invalid marshal to update") } //l.Error("LIGHTHOUSE PACKET SEND", mm) f.SendMessageToVpnIp(lightHouse, 0, vpnIp, mm, nb, out) } } type LightHouseHandler struct { lh *LightHouse nb []byte out []byte meta *NebulaMeta iap []ip4Or6 iapp []*ip4Or6 } func (lh *LightHouse) NewRequestHandler() *LightHouseHandler { lhh := &LightHouseHandler{ lh: lh, nb: make([]byte, 12, 12), out: make([]byte, mtu), meta: &NebulaMeta{ Details: &NebulaMetaDetails{}, }, } lhh.resizeIpAndPorts(10) return lhh } // This method is similar to Reset(), but it re-uses the pointer structs // so that we don't have to re-allocate them func (lhh *LightHouseHandler) resetMeta() *NebulaMeta { details := lhh.meta.Details details.Reset() lhh.meta.Reset() lhh.meta.Details = details return lhh.meta } func (lhh *LightHouseHandler) resizeIpAndPorts(n int) { if cap(lhh.iap) < n { lhh.iap = make([]ip4Or6, n) lhh.iapp = make([]*ip4Or6, n) for i := range lhh.iap { lhh.iapp[i] = &lhh.iap[i] } } lhh.iap = lhh.iap[:n] lhh.iapp = lhh.iapp[:n] } func (lhh *LightHouseHandler) setIpAndPortsFromNetIps(ips []*udpAddr) []*ip4Or6 { lhh.resizeIpAndPorts(len(ips)) for i, e := range ips { lhh.iap[i] = NewIpAndPortFromUDPAddr(e) } return lhh.iapp } func (lhh *LightHouseHandler) HandleRequest(rAddr *udpAddr, vpnIp uint32, p []byte, c *cert.NebulaCertificate, f EncWriter) { lh := lhh.lh n := lhh.resetMeta() err := proto.UnmarshalMerge(p, n) if err != nil { lh.l.WithError(err).WithField("vpnIp", IntIp(vpnIp)).WithField("udpAddr", rAddr). Error("Failed to unmarshal lighthouse packet") //TODO: send recv_error? return } if n.Details == nil { lh.l.WithField("vpnIp", IntIp(vpnIp)).WithField("udpAddr", rAddr). Error("Invalid lighthouse update") //TODO: send recv_error? return } lh.metricRx(n.Type, 1) switch n.Type { case NebulaMeta_HostQuery: // Exit if we don't answer queries if !lh.amLighthouse { lh.l.Debugln("I don't answer queries, but received from: ", rAddr) return } //l.Debugln("Got Query") ips, err := lh.Query(n.Details.VpnIp, f) if err != nil { //l.Debugf("Can't answer query %s from %s because error: %s", IntIp(n.Details.VpnIp), rAddr, err) return } else { reqVpnIP := n.Details.VpnIp n = lhh.resetMeta() n.Type = NebulaMeta_HostQueryReply n.Details.VpnIp = reqVpnIP v4s := make([]*IpAndPort, 0) v6s := make([]*Ip6AndPort, 0) for _, v := range lhh.setIpAndPortsFromNetIps(ips) { if len(v.v6.Ip) > 0 { v6s = append(v6s, &v.v6) } else { v4s = append(v4s, &v.v4) } } if len(v4s) > 0 { n.Details.IpAndPorts = v4s } if len(v6s) > 0 { n.Details.Ip6AndPorts = v6s } reply, err := proto.Marshal(n) if err != nil { lh.l.WithError(err).WithField("vpnIp", IntIp(vpnIp)).Error("Failed to marshal lighthouse host query reply") return } lh.metricTx(NebulaMeta_HostQueryReply, 1) f.SendMessageToVpnIp(lightHouse, 0, vpnIp, reply, lhh.nb, lhh.out[:0]) // This signals the other side to punch some zero byte udp packets ips, err = lh.Query(vpnIp, f) if err != nil { lh.l.WithField("vpnIp", IntIp(vpnIp)).Debugln("Can't notify host to punch") return } else { //l.Debugln("Notify host to punch", iap) n = lhh.resetMeta() n.Type = NebulaMeta_HostPunchNotification n.Details.VpnIp = vpnIp v4s := make([]*IpAndPort, 0) v6s := make([]*Ip6AndPort, 0) for _, v := range lhh.setIpAndPortsFromNetIps(ips) { if len(v.v6.Ip) > 0 { v6s = append(v6s, &v.v6) } else { v4s = append(v4s, &v.v4) } } if len(v4s) > 0 { n.Details.IpAndPorts = v4s } if len(v6s) > 0 { n.Details.Ip6AndPorts = v6s } reply, _ := proto.Marshal(n) lh.metricTx(NebulaMeta_HostPunchNotification, 1) f.SendMessageToVpnIp(lightHouse, 0, reqVpnIP, reply, lhh.nb, lhh.out[:0]) } //fmt.Println(reply, remoteaddr) } case NebulaMeta_HostQueryReply: if !lh.IsLighthouseIP(vpnIp) { return } for _, a := range n.Details.IpAndPorts { ans := NewUDPAddrFromLH4(a) if ans != nil { lh.AddRemote(n.Details.VpnIp, ans, false) } } for _, a := range n.Details.Ip6AndPorts { ans := NewUDPAddrFromLH6(a) if ans != nil { lh.AddRemote(n.Details.VpnIp, ans, false) } } // Non-blocking attempt to trigger, skip if it would block select { case lh.handshakeTrigger <- n.Details.VpnIp: default: } case NebulaMeta_HostUpdateNotification: //Simple check that the host sent this not someone else if n.Details.VpnIp != vpnIp { lh.l.WithField("vpnIp", IntIp(vpnIp)).WithField("answer", IntIp(n.Details.VpnIp)).Debugln("Host sent invalid update") return } for _, a := range n.Details.IpAndPorts { ans := NewUDPAddrFromLH4(a) if ans != nil { lh.AddRemote(n.Details.VpnIp, ans, false) } } for _, a := range n.Details.Ip6AndPorts { ans := NewUDPAddrFromLH6(a) if ans != nil { lh.AddRemote(n.Details.VpnIp, ans, false) } } case NebulaMeta_HostMovedNotification: case NebulaMeta_HostPunchNotification: if !lh.IsLighthouseIP(vpnIp) { return } empty := []byte{0} for _, a := range n.Details.IpAndPorts { vpnPeer := NewUDPAddrFromLH4(a) if vpnPeer == nil { continue } go func() { time.Sleep(lh.punchDelay) lh.metricHolepunchTx.Inc(1) lh.punchConn.WriteTo(empty, vpnPeer) }() if lh.l.Level >= logrus.DebugLevel { //TODO: lacking the ip we are actually punching on, old: l.Debugf("Punching %s on %d for %s", IntIp(a.Ip), a.Port, IntIp(n.Details.VpnIp)) lh.l.Debugf("Punching on %d for %s", a.Port, IntIp(n.Details.VpnIp)) } } for _, a := range n.Details.Ip6AndPorts { vpnPeer := NewUDPAddrFromLH6(a) if vpnPeer == nil { continue } go func() { time.Sleep(lh.punchDelay) lh.metricHolepunchTx.Inc(1) lh.punchConn.WriteTo(empty, vpnPeer) }() if lh.l.Level >= logrus.DebugLevel { //TODO: lacking the ip we are actually punching on, old: l.Debugf("Punching %s on %d for %s", IntIp(a.Ip), a.Port, IntIp(n.Details.VpnIp)) lh.l.Debugf("Punching on %d for %s", a.Port, IntIp(n.Details.VpnIp)) } } // This sends a nebula test packet to the host trying to contact us. In the case // of a double nat or other difficult scenario, this may help establish // a tunnel. if lh.punchBack { go func() { time.Sleep(time.Second * 5) lh.l.Debugf("Sending a nebula test packet to vpn ip %s", IntIp(n.Details.VpnIp)) // TODO we have to allocate a new output buffer here since we are spawning a new goroutine // for each punchBack packet. We should move this into a timerwheel or a single goroutine // managed by a channel. f.SendMessageToVpnIp(test, testRequest, n.Details.VpnIp, []byte(""), make([]byte, 12, 12), make([]byte, mtu)) }() } } } func (lh *LightHouse) metricRx(t NebulaMeta_MessageType, i int64) { lh.metrics.Rx(NebulaMessageType(t), 0, i) } func (lh *LightHouse) metricTx(t NebulaMeta_MessageType, i int64) { lh.metrics.Tx(NebulaMessageType(t), 0, i) } /* func (f *Interface) sendPathCheck(ci *ConnectionState, endpoint *net.UDPAddr, counter int) { c := ci.messageCounter b := HeaderEncode(nil, Version, uint8(path_check), 0, ci.remoteIndex, c) ci.messageCounter++ if ci.eKey != nil { msg := ci.eKey.EncryptDanger(b, nil, []byte(strconv.Itoa(counter)), c) //msg := ci.eKey.EncryptDanger(b, nil, []byte(fmt.Sprintf("%d", counter)), c) f.outside.WriteTo(msg, endpoint) l.Debugf("path_check sent, remote index: %d, pathCounter %d", ci.remoteIndex, counter) } } func (f *Interface) sendPathCheckReply(ci *ConnectionState, endpoint *net.UDPAddr, counter []byte) { c := ci.messageCounter b := HeaderEncode(nil, Version, uint8(path_check_reply), 0, ci.remoteIndex, c) ci.messageCounter++ if ci.eKey != nil { msg := ci.eKey.EncryptDanger(b, nil, counter, c) f.outside.WriteTo(msg, endpoint) l.Debugln("path_check sent, remote index: ", ci.remoteIndex) } } */