diff --git a/Documentation/configuration.md b/Documentation/configuration.md index e78d91a15..7142c19ef 100644 --- a/Documentation/configuration.md +++ b/Documentation/configuration.md @@ -37,7 +37,7 @@ configuration files. * `-peer-ca-file` - The path of the CAFile. Enables client/peer cert authentication when present. * `-peer-cert-file` - The cert file of the server. * `-peer-key-file` - The key file of the server. -* `-snapshot` - Open or close snapshot. Defaults to `false`. +* `-snapshot=false` - Disable log snapshots. Defaults to `true`. * `-v` - Enable verbose logging. Defaults to `false`. * `-vv` - Enable very verbose logging. Defaults to `false`. * `-version` - Print the version and exit. diff --git a/Documentation/tuning.md b/Documentation/tuning.md index 3d59c0394..607e7bc8e 100644 --- a/Documentation/tuning.md +++ b/Documentation/tuning.md @@ -47,30 +47,16 @@ election_timeout = 100 The values are specified in milliseconds. -### Enabling Snapshots +### Snapshots -By default, the Raft protocol appends all etcd changes to a log file. -This works well for smaller installations but etcd clusters that are heavily used can see the log grow significantly in size. +etcd appends all key changes to a log file. +This log grows forever and is a complete linear history of every change made to the keys. +A complete history works well for lightly used clusters but clusters that are heavily used would carry around a large log. -Snapshots provide a way for etcd to compact the log by saving the current state of the system and removing old logs. -You can enable snapshotting by adding the following to your command line: +To avoid having a huge log etcd makes periodic snapshots. +These snapshots provide a way for etcd to compact the log by saving the current state of the system and removing old logs. -```sh -# Command line arguments: -$ etcd -snapshot - -# Environment variables: -$ ETCD_SNAPSHOT=true etcd -``` - -You can also enable snapshotting within the configuration file: - -```toml -snapshot = true -``` - - -### Additional Snapshot Tuning +### Snapshot Tuning Creating snapshots can be expensive so they're only created after a given number of changes to etcd. By default, snapshots will be made after every 10,000 changes. @@ -78,15 +64,30 @@ If etcd's memory usage and disk usage are too high, you can lower the snapshot t ```sh # Command line arguments: -$ etcd -snapshot -snapshot-count=5000 +$ etcd -snapshot-count=5000 # Environment variables: -$ ETCD_SNAPSHOT=true ETCD_SNAPSHOT_COUNT=5000 etcd +$ ETCD_SNAPSHOT_COUNT=5000 etcd ``` Or you can change the setting in the configuration file: ```toml -snapshot = true snapshot_count = 5000 ``` + +You can also disable snapshotting by adding the following to your command line: + +```sh +# Command line arguments: +$ etcd -snapshot false + +# Environment variables: +$ ETCD_SNAPSHOT=false etcd +``` + +You can also enable snapshotting within the configuration file: + +```toml +snapshot = false +``` diff --git a/server/config.go b/server/config.go index 98db2e6ea..bec140d18 100644 --- a/server/config.go +++ b/server/config.go @@ -89,6 +89,7 @@ func NewConfig() *Config { c.MaxClusterSize = 9 c.MaxResultBuffer = 1024 c.MaxRetryAttempts = 3 + c.Snapshot = true c.SnapshotCount = 10000 c.Peer.Addr = "127.0.0.1:7001" c.Peer.HeartbeatTimeout = defaultHeartbeatTimeout diff --git a/server/peer_server.go b/server/peer_server.go index e8d692794..6cb16c7de 100644 --- a/server/peer_server.go +++ b/server/peer_server.go @@ -412,14 +412,25 @@ func (s *PeerServer) recordMetricEvent(event raft.Event) { (*s.metrics).Timer(name).Update(value) } +// logSnapshot logs about the snapshot that was taken. +func (s *PeerServer) logSnapshot(err error, currentIndex, count uint64) { + info := fmt.Sprintf("%s: snapshot of %d events at index %d", s.Config.Name, count, currentIndex) + + if err != nil { + log.Infof("%s attempted and failed: %v", info, err) + } else { + log.Infof("%s completed", info) + } +} + func (s *PeerServer) monitorSnapshot() { for { time.Sleep(s.snapConf.checkingInterval) currentIndex := s.RaftServer().CommitIndex() - count := currentIndex - s.snapConf.lastIndex if uint64(count) > s.snapConf.snapshotThr { - s.raftServer.TakeSnapshot() + err := s.raftServer.TakeSnapshot() + s.logSnapshot(err, currentIndex, count) s.snapConf.lastIndex = currentIndex } } diff --git a/server/usage.go b/server/usage.go index 4635711f1..55762f28e 100644 --- a/server/usage.go +++ b/server/usage.go @@ -52,7 +52,7 @@ Other Options: -max-result-buffer Max size of the result buffer. -max-retry-attempts Number of times a node will try to join a cluster. -max-cluster-size Maximum number of nodes in the cluster. - -snapshot Open or close the snapshot. + -snapshot=false Disable log snapshots -snapshot-count Number of transactions before issuing a snapshot. `