Merge pull request #507 from philips/turn-snapshots-on-by-default

feat(*): enable snapshots by default
2014-02-05 09:08:43 -08:00 · 2014-02-05 09:08:43 -08:00 · 9e43e726a9
parent 03cadc543f 9a0ddb3760
commit 9e43e726a9
5 changed files with 41 additions and 28 deletions
--- a/Documentation/configuration.md
+++ b/Documentation/configuration.md
@ -37,7 +37,7 @@ configuration files.
 * `-peer-ca-file` - The path of the CAFile. Enables client/peer cert authentication when present.
 * `-peer-cert-file` - The cert file of the server.
 * `-peer-key-file` - The key file of the server.
-* `-snapshot` - Open or close snapshot. Defaults to `false`.
+* `-snapshot=false` - Disable log snapshots. Defaults to `true`.
 * `-v` - Enable verbose logging. Defaults to `false`.
 * `-vv` - Enable very verbose logging. Defaults to `false`.
 * `-version` - Print the version and exit.
--- a/Documentation/tuning.md
+++ b/Documentation/tuning.md
@ -47,30 +47,16 @@ election_timeout = 100
 The values are specified in milliseconds.


-### Enabling Snapshots
+### Snapshots

-By default, the Raft protocol appends all etcd changes to a log file.
-This works well for smaller installations but etcd clusters that are heavily used can see the log grow significantly in size.
+etcd appends all key changes to a log file.
+This log grows forever and is a complete linear history of every change made to the keys.
+A complete history works well for lightly used clusters but clusters that are heavily used would carry around a large log.

-Snapshots provide a way for etcd to compact the log by saving the current state of the system and removing old logs.
-You can enable snapshotting by adding the following to your command line:
+To avoid having a huge log etcd makes periodic snapshots.
+These snapshots provide a way for etcd to compact the log by saving the current state of the system and removing old logs.

-```sh
-# Command line arguments:
-$ etcd -snapshot
-
-# Environment variables:
-$ ETCD_SNAPSHOT=true etcd
-```
-
-You can also enable snapshotting within the configuration file:
-
-```toml
-snapshot = true
-```
-
-
-### Additional Snapshot Tuning
+### Snapshot Tuning

 Creating snapshots can be expensive so they're only created after a given number of changes to etcd.
 By default, snapshots will be made after every 10,000 changes.
@ -78,15 +64,30 @@ If etcd's memory usage and disk usage are too high, you can lower the snapshot t

 ```sh
 # Command line arguments:
-$ etcd -snapshot -snapshot-count=5000
+$ etcd -snapshot-count=5000

 # Environment variables:
-$ ETCD_SNAPSHOT=true ETCD_SNAPSHOT_COUNT=5000 etcd
+$ ETCD_SNAPSHOT_COUNT=5000 etcd
 ```

 Or you can change the setting in the configuration file:

 ```toml
-snapshot = true
 snapshot_count = 5000
 ```
+
+You can also disable snapshotting by adding the following to your command line:
+
+```sh
+# Command line arguments:
+$ etcd -snapshot false
+
+# Environment variables:
+$ ETCD_SNAPSHOT=false etcd
+```
+
+You can also enable snapshotting within the configuration file:
+
+```toml
+snapshot = false
+```
--- a/server/config.go
+++ b/server/config.go
@ -89,6 +89,7 @@ func NewConfig() *Config {
 	c.MaxClusterSize = 9
 	c.MaxResultBuffer = 1024
 	c.MaxRetryAttempts = 3
+	c.Snapshot = true
 	c.SnapshotCount = 10000
 	c.Peer.Addr = "127.0.0.1:7001"
 	c.Peer.HeartbeatTimeout = defaultHeartbeatTimeout
--- a/server/peer_server.go
+++ b/server/peer_server.go
@ -412,14 +412,25 @@ func (s *PeerServer) recordMetricEvent(event raft.Event) {
 	(*s.metrics).Timer(name).Update(value)
 }

+// logSnapshot logs about the snapshot that was taken.
+func (s *PeerServer) logSnapshot(err error, currentIndex, count uint64) {
+	info := fmt.Sprintf("%s: snapshot of %d events at index %d", s.Config.Name, count, currentIndex)
+
+	if err != nil {
+		log.Infof("%s attempted and failed: %v", info, err)
+	} else {
+		log.Infof("%s completed", info)
+	}
+}
+
 func (s *PeerServer) monitorSnapshot() {
 	for {
 		time.Sleep(s.snapConf.checkingInterval)
 		currentIndex := s.RaftServer().CommitIndex()
-
 		count := currentIndex - s.snapConf.lastIndex
 		if uint64(count) > s.snapConf.snapshotThr {
-			s.raftServer.TakeSnapshot()
+			err := s.raftServer.TakeSnapshot()
+			s.logSnapshot(err, currentIndex, count)
 			s.snapConf.lastIndex = currentIndex
 		}
 	}
--- a/server/usage.go
+++ b/server/usage.go
@ -52,7 +52,7 @@ Other Options:
  -max-result-buffer   Max size of the result buffer.
  -max-retry-attempts  Number of times a node will try to join a cluster.
  -max-cluster-size    Maximum number of nodes in the cluster.
-  -snapshot            Open or close the snapshot.
+  -snapshot=false      Disable log snapshots
  -snapshot-count      Number of transactions before issuing a snapshot.
 `