From db1b3ac368f4252efb5575fd9c2f790cf27f547f Mon Sep 17 00:00:00 2001 From: ziggie Date: Wed, 8 May 2024 20:25:49 +0100 Subject: [PATCH] multi: Allow interrupt of server startup. This commit does two things. It starts up the server in a way that it can be interrupted and shutdown gracefully. Moreover it makes sure that subsystems clean themselves up when they fail to start. This makes sure that depending subsytems can shutdown gracefully as well and the shutdown process is not stuck. --- lnd.go | 24 +++++++++++++-- server.go | 90 ++++++++++++++++++++++++++++--------------------------- 2 files changed, 68 insertions(+), 46 deletions(-) diff --git a/lnd.go b/lnd.go index 0a85e1bf38..037f93724b 100644 --- a/lnd.go +++ b/lnd.go @@ -675,10 +675,30 @@ func Main(cfg *Config, lisCfg ListenerCfg, implCfg *ImplementationCfg, // With all the relevant chains initialized, we can finally start the // server itself. - if err := server.Start(); err != nil { + errChan := make(chan error) + go func() { + errChan <- server.Start() + }() + + defer func() { + err := server.Stop() + if err != nil { + ltndLog.Warnf("Stopping the server including all "+ + "its subsystems failed with %v", err) + } + }() + + select { + case err := <-errChan: + if err == nil { + break + } + return mkErr("unable to start server: %v", err) + + case <-interceptor.ShutdownChannel(): + return nil } - defer server.Stop() // We transition the server state to Active, as the server is up. interceptorChain.SetServerActive() diff --git a/server.go b/server.go index 6ab197f86a..8e7e2f5e25 100644 --- a/server.go +++ b/server.go @@ -1860,6 +1860,8 @@ func (c cleaner) run() { // Start starts the main daemon server, all requested listeners, and any helper // goroutines. // NOTE: This function is safe for concurrent access. +// +//nolint:funlen func (s *server) Start() error { var startErr error @@ -1869,26 +1871,26 @@ func (s *server) Start() error { cleanup := cleaner{} s.start.Do(func() { + cleanup = cleanup.add(s.customMessageServer.Stop) if err := s.customMessageServer.Start(); err != nil { startErr = err return } - cleanup = cleanup.add(s.customMessageServer.Stop) if s.hostAnn != nil { + cleanup = cleanup.add(s.hostAnn.Stop) if err := s.hostAnn.Start(); err != nil { startErr = err return } - cleanup = cleanup.add(s.hostAnn.Stop) } if s.livenessMonitor != nil { + cleanup = cleanup.add(s.livenessMonitor.Stop) if err := s.livenessMonitor.Start(); err != nil { startErr = err return } - cleanup = cleanup.add(s.livenessMonitor.Stop) } // Start the notification server. This is used so channel @@ -1896,161 +1898,158 @@ func (s *server) Start() error { // transaction reaches a sufficient number of confirmations, or // when the input for the funding transaction is spent in an // attempt at an uncooperative close by the counterparty. + cleanup = cleanup.add(s.sigPool.Stop) if err := s.sigPool.Start(); err != nil { startErr = err return } - cleanup = cleanup.add(s.sigPool.Stop) + cleanup = cleanup.add(s.writePool.Stop) if err := s.writePool.Start(); err != nil { startErr = err return } - cleanup = cleanup.add(s.writePool.Stop) + cleanup = cleanup.add(s.readPool.Stop) if err := s.readPool.Start(); err != nil { startErr = err return } - cleanup = cleanup.add(s.readPool.Stop) + cleanup = cleanup.add(s.cc.ChainNotifier.Stop) if err := s.cc.ChainNotifier.Start(); err != nil { startErr = err return } - cleanup = cleanup.add(s.cc.ChainNotifier.Stop) + cleanup = cleanup.add(s.cc.BestBlockTracker.Stop) if err := s.cc.BestBlockTracker.Start(); err != nil { startErr = err return } - cleanup = cleanup.add(s.cc.BestBlockTracker.Stop) + cleanup = cleanup.add(s.channelNotifier.Stop) if err := s.channelNotifier.Start(); err != nil { startErr = err return } - cleanup = cleanup.add(s.channelNotifier.Stop) + cleanup = cleanup.add(func() error { + return s.peerNotifier.Stop() + }) if err := s.peerNotifier.Start(); err != nil { startErr = err return } - cleanup = cleanup.add(func() error { - return s.peerNotifier.Stop() - }) + + cleanup = cleanup.add(s.htlcNotifier.Stop) if err := s.htlcNotifier.Start(); err != nil { startErr = err return } - cleanup = cleanup.add(s.htlcNotifier.Stop) if s.towerClientMgr != nil { + cleanup = cleanup.add(s.towerClientMgr.Stop) if err := s.towerClientMgr.Start(); err != nil { startErr = err return } - cleanup = cleanup.add(s.towerClientMgr.Stop) } + cleanup = cleanup.add(s.txPublisher.Stop) if err := s.txPublisher.Start(); err != nil { startErr = err return } - cleanup = cleanup.add(func() error { - s.txPublisher.Stop() - return nil - }) + cleanup = cleanup.add(s.sweeper.Stop) if err := s.sweeper.Start(); err != nil { startErr = err return } - cleanup = cleanup.add(s.sweeper.Stop) + cleanup = cleanup.add(s.utxoNursery.Stop) if err := s.utxoNursery.Start(); err != nil { startErr = err return } - cleanup = cleanup.add(s.utxoNursery.Stop) + cleanup = cleanup.add(s.breachArbitrator.Stop) if err := s.breachArbitrator.Start(); err != nil { startErr = err return } - cleanup = cleanup.add(s.breachArbitrator.Stop) + cleanup = cleanup.add(s.fundingMgr.Stop) if err := s.fundingMgr.Start(); err != nil { startErr = err return } - cleanup = cleanup.add(s.fundingMgr.Stop) // htlcSwitch must be started before chainArb since the latter // relies on htlcSwitch to deliver resolution message upon // start. + cleanup = cleanup.add(s.htlcSwitch.Stop) if err := s.htlcSwitch.Start(); err != nil { startErr = err return } - cleanup = cleanup.add(s.htlcSwitch.Stop) + cleanup = cleanup.add(s.interceptableSwitch.Stop) if err := s.interceptableSwitch.Start(); err != nil { startErr = err return } - cleanup = cleanup.add(s.interceptableSwitch.Stop) + cleanup = cleanup.add(s.chainArb.Stop) if err := s.chainArb.Start(); err != nil { startErr = err return } - cleanup = cleanup.add(s.chainArb.Stop) - if err := s.authGossiper.Start(); err != nil { + cleanup = cleanup.add(s.chanRouter.Stop) + if err := s.chanRouter.Start(); err != nil { startErr = err return } - cleanup = cleanup.add(s.authGossiper.Stop) - if err := s.chanRouter.Start(); err != nil { + // The authGossiper depends on the chanRouter and therefore + // should be started after it. + cleanup = cleanup.add(s.authGossiper.Stop) + if err := s.authGossiper.Start(); err != nil { startErr = err return } - cleanup = cleanup.add(s.chanRouter.Stop) + cleanup = cleanup.add(s.invoices.Stop) if err := s.invoices.Start(); err != nil { startErr = err return } - cleanup = cleanup.add(s.invoices.Stop) + cleanup = cleanup.add(s.sphinx.Stop) if err := s.sphinx.Start(); err != nil { startErr = err return } - cleanup = cleanup.add(s.sphinx.Stop) + cleanup = cleanup.add(s.chanStatusMgr.Stop) if err := s.chanStatusMgr.Start(); err != nil { startErr = err return } - cleanup = cleanup.add(s.chanStatusMgr.Stop) + cleanup = cleanup.add(s.chanEventStore.Stop) if err := s.chanEventStore.Start(); err != nil { startErr = err return } - cleanup = cleanup.add(func() error { - s.chanEventStore.Stop() - return nil - }) - s.missionControl.RunStoreTicker() cleanup.add(func() error { s.missionControl.StopStoreTicker() return nil }) + s.missionControl.RunStoreTicker() // Before we start the connMgr, we'll check to see if we have // any backups to recover. We do this now as we want to ensure @@ -2084,18 +2083,18 @@ func (s *server) Start() error { } } + cleanup = cleanup.add(s.chanSubSwapper.Stop) if err := s.chanSubSwapper.Start(); err != nil { startErr = err return } - cleanup = cleanup.add(s.chanSubSwapper.Stop) if s.torController != nil { + cleanup = cleanup.add(s.torController.Stop) if err := s.createNewHiddenService(); err != nil { startErr = err return } - cleanup = cleanup.add(s.torController.Stop) } if s.natTraversal != nil { @@ -2104,11 +2103,11 @@ func (s *server) Start() error { } // Start connmgr last to prevent connections before init. - s.connMgr.Start() cleanup = cleanup.add(func() error { s.connMgr.Stop() return nil }) + s.connMgr.Start() // If peers are specified as a config option, we'll add those // peers first. @@ -2295,9 +2294,9 @@ func (s *server) Stop() error { if err := s.sweeper.Stop(); err != nil { srvrLog.Warnf("failed to stop sweeper: %v", err) } - - s.txPublisher.Stop() - + if err := s.txPublisher.Stop(); err != nil { + srvrLog.Warnf("failed to stop txPublisher: %v", err) + } if err := s.channelNotifier.Stop(); err != nil { srvrLog.Warnf("failed to stop channelNotifier: %v", err) } @@ -2317,7 +2316,10 @@ func (s *server) Stop() error { srvrLog.Warnf("Unable to stop BestBlockTracker: %v", err) } - s.chanEventStore.Stop() + if err := s.chanEventStore.Stop(); err != nil { + srvrLog.Warnf("Unable to stop ChannelEventStore: %v", + err) + } s.missionControl.StopStoreTicker() // Disconnect from each active peers to ensure that