Merge pull request #507 from philips/turn-snapshots-on-by-default
feat(*): enable snapshots by defaultrelease-0.4
commit
9e43e726a9
|
@ -37,7 +37,7 @@ configuration files.
|
|||
* `-peer-ca-file` - The path of the CAFile. Enables client/peer cert authentication when present.
|
||||
* `-peer-cert-file` - The cert file of the server.
|
||||
* `-peer-key-file` - The key file of the server.
|
||||
* `-snapshot` - Open or close snapshot. Defaults to `false`.
|
||||
* `-snapshot=false` - Disable log snapshots. Defaults to `true`.
|
||||
* `-v` - Enable verbose logging. Defaults to `false`.
|
||||
* `-vv` - Enable very verbose logging. Defaults to `false`.
|
||||
* `-version` - Print the version and exit.
|
||||
|
|
|
@ -47,30 +47,16 @@ election_timeout = 100
|
|||
The values are specified in milliseconds.
|
||||
|
||||
|
||||
### Enabling Snapshots
|
||||
### Snapshots
|
||||
|
||||
By default, the Raft protocol appends all etcd changes to a log file.
|
||||
This works well for smaller installations but etcd clusters that are heavily used can see the log grow significantly in size.
|
||||
etcd appends all key changes to a log file.
|
||||
This log grows forever and is a complete linear history of every change made to the keys.
|
||||
A complete history works well for lightly used clusters but clusters that are heavily used would carry around a large log.
|
||||
|
||||
Snapshots provide a way for etcd to compact the log by saving the current state of the system and removing old logs.
|
||||
You can enable snapshotting by adding the following to your command line:
|
||||
To avoid having a huge log etcd makes periodic snapshots.
|
||||
These snapshots provide a way for etcd to compact the log by saving the current state of the system and removing old logs.
|
||||
|
||||
```sh
|
||||
# Command line arguments:
|
||||
$ etcd -snapshot
|
||||
|
||||
# Environment variables:
|
||||
$ ETCD_SNAPSHOT=true etcd
|
||||
```
|
||||
|
||||
You can also enable snapshotting within the configuration file:
|
||||
|
||||
```toml
|
||||
snapshot = true
|
||||
```
|
||||
|
||||
|
||||
### Additional Snapshot Tuning
|
||||
### Snapshot Tuning
|
||||
|
||||
Creating snapshots can be expensive so they're only created after a given number of changes to etcd.
|
||||
By default, snapshots will be made after every 10,000 changes.
|
||||
|
@ -78,15 +64,30 @@ If etcd's memory usage and disk usage are too high, you can lower the snapshot t
|
|||
|
||||
```sh
|
||||
# Command line arguments:
|
||||
$ etcd -snapshot -snapshot-count=5000
|
||||
$ etcd -snapshot-count=5000
|
||||
|
||||
# Environment variables:
|
||||
$ ETCD_SNAPSHOT=true ETCD_SNAPSHOT_COUNT=5000 etcd
|
||||
$ ETCD_SNAPSHOT_COUNT=5000 etcd
|
||||
```
|
||||
|
||||
Or you can change the setting in the configuration file:
|
||||
|
||||
```toml
|
||||
snapshot = true
|
||||
snapshot_count = 5000
|
||||
```
|
||||
|
||||
You can also disable snapshotting by adding the following to your command line:
|
||||
|
||||
```sh
|
||||
# Command line arguments:
|
||||
$ etcd -snapshot false
|
||||
|
||||
# Environment variables:
|
||||
$ ETCD_SNAPSHOT=false etcd
|
||||
```
|
||||
|
||||
You can also enable snapshotting within the configuration file:
|
||||
|
||||
```toml
|
||||
snapshot = false
|
||||
```
|
||||
|
|
|
@ -89,6 +89,7 @@ func NewConfig() *Config {
|
|||
c.MaxClusterSize = 9
|
||||
c.MaxResultBuffer = 1024
|
||||
c.MaxRetryAttempts = 3
|
||||
c.Snapshot = true
|
||||
c.SnapshotCount = 10000
|
||||
c.Peer.Addr = "127.0.0.1:7001"
|
||||
c.Peer.HeartbeatTimeout = defaultHeartbeatTimeout
|
||||
|
|
|
@ -412,14 +412,25 @@ func (s *PeerServer) recordMetricEvent(event raft.Event) {
|
|||
(*s.metrics).Timer(name).Update(value)
|
||||
}
|
||||
|
||||
// logSnapshot logs about the snapshot that was taken.
|
||||
func (s *PeerServer) logSnapshot(err error, currentIndex, count uint64) {
|
||||
info := fmt.Sprintf("%s: snapshot of %d events at index %d", s.Config.Name, count, currentIndex)
|
||||
|
||||
if err != nil {
|
||||
log.Infof("%s attempted and failed: %v", info, err)
|
||||
} else {
|
||||
log.Infof("%s completed", info)
|
||||
}
|
||||
}
|
||||
|
||||
func (s *PeerServer) monitorSnapshot() {
|
||||
for {
|
||||
time.Sleep(s.snapConf.checkingInterval)
|
||||
currentIndex := s.RaftServer().CommitIndex()
|
||||
|
||||
count := currentIndex - s.snapConf.lastIndex
|
||||
if uint64(count) > s.snapConf.snapshotThr {
|
||||
s.raftServer.TakeSnapshot()
|
||||
err := s.raftServer.TakeSnapshot()
|
||||
s.logSnapshot(err, currentIndex, count)
|
||||
s.snapConf.lastIndex = currentIndex
|
||||
}
|
||||
}
|
||||
|
|
|
@ -52,7 +52,7 @@ Other Options:
|
|||
-max-result-buffer Max size of the result buffer.
|
||||
-max-retry-attempts Number of times a node will try to join a cluster.
|
||||
-max-cluster-size Maximum number of nodes in the cluster.
|
||||
-snapshot Open or close the snapshot.
|
||||
-snapshot=false Disable log snapshots
|
||||
-snapshot-count Number of transactions before issuing a snapshot.
|
||||
`
|
||||
|
||||
|
|
Loading…
Reference in New Issue