Compare commits

...

127 Commits

Author SHA1 Message Date
Gyuho Lee
9fd7e2b802 version: 3.3.20
Signed-off-by: Gyuho Lee <leegyuho@amazon.com>
2020-04-01 10:49:03 -07:00
Gyuho Lee
1aa5da9121 wal: add "etcd_wal_writes_bytes_total"
Signed-off-by: Gyuho Lee <leegyuho@amazon.com>
2020-04-01 10:49:00 -07:00
Gyuho Lee
89ecd19414 pkg/ioutil: add "FlushN"
Signed-off-by: Gyuho Lee <leegyuho@amazon.com>
2020-04-01 10:36:03 -07:00
Gyuho Lee
67da93f739 version: 3.3.19
Signed-off-by: Gyuho Lee <leegyuho@amazon.com>
2020-03-18 17:23:32 -07:00
Gyuho Lee
a463bd54ae words: whitelist "racey"
Signed-off-by: Gyuho Lee <leegyuho@amazon.com>
2020-03-18 17:23:16 -07:00
Gyuho Lee
cd200b49a2 Revert "version: 3.3.19"
This reverts commit acb9746d66.
2020-03-18 17:22:12 -07:00
Gyuho Lee
508808010c travis.yaml: use Go 1.12.12
Signed-off-by: Gyuho Lee <leegyuho@amazon.com>
2020-03-18 17:21:49 -07:00
Gyuho Lee
acb9746d66 version: 3.3.19
Signed-off-by: Gyuho Lee <leegyuho@amazon.com>
2020-03-18 17:18:46 -07:00
Gyuho Lee
07562e235c Revert "version: 3.3.19"
This reverts commit 3f6b978b0496080e8067e0d2d1270134a9a51ef8.
2020-03-18 17:18:34 -07:00
Gyuho Lee
10d50e0662 words: whitelist "hasleader"
Signed-off-by: Gyuho Lee <leegyuho@amazon.com>
2020-03-18 17:18:31 -07:00
Gyuho Lee
f9c89209f3 version: 3.3.19
Signed-off-by: Gyuho Lee <leegyuho@amazon.com>
2020-03-18 17:18:31 -07:00
Gyuho Lee
d9027cecf2 etcdserver/api/v3rpc: handle api version metadata, add metrics
ref.
https://github.com/etcd-io/etcd/pull/11687

Signed-off-by: Gyuho Lee <leegyuho@amazon.com>
2020-03-18 17:18:31 -07:00
Gyuho Lee
6f7ee076ea clientv3: embed api version in metadata
ref.
https://github.com/etcd-io/etcd/pull/11687

Signed-off-by: Gyuho Lee <leegyuho@amazon.com>

clientv3: fix racy writes to context key

=== RUN   TestWatchOverlapContextCancel

==================

WARNING: DATA RACE

Write at 0x00c42110dd40 by goroutine 99:

  runtime.mapassign()

      /usr/local/go/src/runtime/hashmap.go:485 +0x0

  github.com/coreos/etcd/clientv3.metadataSet()

      /go/src/github.com/coreos/etcd/gopath/src/github.com/coreos/etcd/clientv3/ctx.go:61 +0x8c

  github.com/coreos/etcd/clientv3.withVersion()

      /go/src/github.com/coreos/etcd/gopath/src/github.com/coreos/etcd/clientv3/ctx.go:47 +0x137

  github.com/coreos/etcd/clientv3.newStreamClientInterceptor.func1()

      /go/src/github.com/coreos/etcd/gopath/src/github.com/coreos/etcd/clientv3/client.go:309 +0x81

  google.golang.org/grpc.NewClientStream()

      /go/src/github.com/coreos/etcd/gopath/src/google.golang.org/grpc/stream.go:101 +0x10e

  github.com/coreos/etcd/etcdserver/etcdserverpb.(*watchClient).Watch()

      /go/src/github.com/coreos/etcd/gopath/src/github.com/coreos/etcd/etcdserver/etcdserverpb/rpc.pb.go:3193 +0xe9

  github.com/coreos/etcd/clientv3.(*watchGrpcStream).openWatchClient()

      /go/src/github.com/coreos/etcd/gopath/src/github.com/coreos/etcd/clientv3/watch.go:788 +0x143

  github.com/coreos/etcd/clientv3.(*watchGrpcStream).newWatchClient()

      /go/src/github.com/coreos/etcd/gopath/src/github.com/coreos/etcd/clientv3/watch.go:700 +0x5c3

  github.com/coreos/etcd/clientv3.(*watchGrpcStream).run()

      /go/src/github.com/coreos/etcd/gopath/src/github.com/coreos/etcd/clientv3/watch.go:431 +0x12b

Previous read at 0x00c42110dd40 by goroutine 130:

  reflect.maplen()

      /usr/local/go/src/runtime/hashmap.go:1165 +0x0

  reflect.Value.MapKeys()

      /usr/local/go/src/reflect/value.go:1090 +0x43b

  fmt.(*pp).printValue()

      /usr/local/go/src/fmt/print.go:741 +0x1885

  fmt.(*pp).printArg()

      /usr/local/go/src/fmt/print.go:682 +0x1b1

  fmt.(*pp).doPrintf()

      /usr/local/go/src/fmt/print.go:998 +0x1cad

  fmt.Sprintf()

      /usr/local/go/src/fmt/print.go:196 +0x77

  github.com/coreos/etcd/clientv3.streamKeyFromCtx()

      /go/src/github.com/coreos/etcd/gopath/src/github.com/coreos/etcd/clientv3/watch.go:825 +0xc8

  github.com/coreos/etcd/clientv3.(*watcher).Watch()

      /go/src/github.com/coreos/etcd/gopath/src/github.com/coreos/etcd/clientv3/watch.go:265 +0x426

  github.com/coreos/etcd/clientv3/integration.testWatchOverlapContextCancel.func1()

      /go/src/github.com/coreos/etcd/gopath/src/github.com/coreos/etcd/clientv3/integration/watch_test.go:959 +0x23e

Goroutine 99 (running) created at:

  github.com/coreos/etcd/clientv3.(*watcher).newWatcherGrpcStream()

      /go/src/github.com/coreos/etcd/gopath/src/github.com/coreos/etcd/clientv3/watch.go:236 +0x59d

  github.com/coreos/etcd/clientv3.(*watcher).Watch()

      /go/src/github.com/coreos/etcd/gopath/src/github.com/coreos/etcd/clientv3/watch.go:278 +0xbb6

  github.com/coreos/etcd/clientv3/integration.testWatchOverlapContextCancel.func1()

      /go/src/github.com/coreos/etcd/gopath/src/github.com/coreos/etcd/clientv3/integration/watch_test.go:959 +0x23e

Goroutine 130 (running) created at:

  github.com/coreos/etcd/clientv3/integration.testWatchOverlapContextCancel()

      /go/src/github.com/coreos/etcd/gopath/src/github.com/coreos/etcd/clientv3/integration/watch_test.go:979 +0x76d

  github.com/coreos/etcd/clientv3/integration.TestWatchOverlapContextCancel()

      /go/src/github.com/coreos/etcd/gopath/src/github.com/coreos/etcd/clientv3/integration/watch_test.go:922 +0x44

  testing.tRunner()

      /usr/local/go/src/testing/testing.go:657 +0x107

==================

Signed-off-by: Gyuho Lee <leegyuho@amazon.com>
2020-03-18 17:18:29 -07:00
Gyuho Lee
30aaceb1c3 etcdserver/api/etcdhttp: log server-side /health checks
ref.
https://github.com/etcd-io/etcd/pull/11704

Signed-off-by: Gyuho Lee <leegyuho@amazon.com>
2020-03-18 16:28:18 -07:00
Sam Batschelet
1228d6c1e7 proxy/grpcproxy: add return on error for metrics handler
Signed-off-by: Sam Batschelet <sbatsche@redhat.com>
2020-03-16 12:06:33 -04:00
Sahdev Zala
eb1df6d9d2 Merge pull request #11665 from jingyih/automated-cherry-pick-of-#11638-upstream-release-3.3
Automated cherry pick of #11638 on release-3.3
2020-03-11 19:22:55 -04:00
jingyih
c58133b2d4 etcdctl: fix member add command
Use members information from member add response, which is
guaranteed to be up to date.
2020-02-29 07:21:17 -08:00
Jingyi Hu
e21e355d91 Merge pull request #11632 from jingyih/automated-cherry-pick-of-#11630-upstream-release-3.3
Automated cherry pick of #11630 to release-3.3
2020-02-16 08:35:50 +08:00
jingyih
7b1a92cb7c mvcc/backend: check for nil boltOpenOptions
Check if boltOpenOptions is nil before use it.
2020-02-15 00:27:08 -08:00
Wenjia
b0a4038b79 Merge pull request #11623 from jpbetz/automated-cherry-pick-of-#11613-origin-release-3.3
Automated cherry pick of #11613 to release-3.3
2020-02-13 13:15:03 -08:00
Joe Betz
b3d9e29096 mvcc/backend: Delete orphaned db.tmp files before defrag 2020-02-13 12:32:04 -08:00
Hitoshi Mitake
70853d60e7 Merge pull request #11378 from jingyih/automated-cherry-pick-of-#10218-#10468-upstream-release-3.3
Automated cherry pick of #10218 #10468 on release 3.3
2020-01-26 01:40:43 +09:00
Gyuho Lee
3c8740a793 version: 3.3.18
Signed-off-by: Gyuho Lee <leegyuho@amazon.com>
2019-11-26 20:19:50 -08:00
Wenjia
9cd3eefd01 Merge pull request #11394 from jingyih/automated-cherry-pick-of-#11374-upstream-release-3.3
Automated cherry pick of #11374 on release 3.3
2019-11-26 15:02:17 -08:00
yoyinzyc
47d3dea2a9 mvcc: update to "etcd_debugging_mvcc_total_put_size_in_bytes" 2019-11-26 14:09:04 -08:00
yoyinzyc
aaa85715c3 mvcc: add "etcd_mvcc_put_size_in_bytes" to monitor the throughput of put request. 2019-11-26 14:07:50 -08:00
Jingyi Hu
e1508f94b6 integration: disable TestV3AuthOldRevConcurrent
Disable TestV3AuthOldRevConcurrent for now. See
https://github.com/etcd-io/etcd/pull/10468#issuecomment-463253361
2019-11-20 16:45:48 -08:00
Jingyi Hu
5a4821721e etcdserver: remove auth validation loop
Remove auth validation loop in v3_server.raftRequest(). Re-validation
when error ErrAuthOldRevision occurs should be handled on client side.
2019-11-20 16:45:48 -08:00
Maxim Vladimirskiy
95095f8406 etcdserver: Remove infinite loop in doSerialize
Once chk(ai) fails with auth.ErrAuthOldRevision it will always do,
regardless how many times you retry. So the error is better be returned
to fail the pending request and make the client re-authenticate.
2019-11-20 16:45:47 -08:00
Gyuho Lee
5cf80a6229 clientv3: fix retry/streamer error message
Signed-off-by: Gyuho Lee <leegyuho@amazon.com>
2019-10-31 10:08:53 -07:00
Gyuho Lee
069bce1384 Merge pull request #11314 from jingyih/automated-cherry-pick-of-#11308-upstream-release-3.3
Automated cherry pick of #11308 on release-3.3
2019-10-31 10:08:08 -07:00
Jingyi Hu
7c164a8948 etcdserver: wait purge file loop during shutdown
To prevent the purge file loop from accidentally acquiring the file lock
and remove the files during server shutdowm.
2019-10-30 16:47:06 -07:00
Gyuho Lee
ff5fb05bec scripts/release: list GPG key only when tagging is needed
Signed-off-by: Gyuho Lee <leegyuho@amazon.com>
2019-10-23 11:13:05 -07:00
Gyuho Lee
a977795f2d Merge pull request #11253 from YoyinZyc/automated-cherry-pick-of-#11247-origin-release-3.3
Automated cherry pick of #11247
2019-10-18 10:30:16 -07:00
Gyuho Lee
aedfe5458a Merge pull request #11261 from wenjiaswe/automated-cherry-pick-of-#10257-upstream-release-3.3
cherry pick "etcd_cluster_version" metric" (#10257, #11233, #11254, #11265) to release-3.3
2019-10-17 12:40:08 -07:00
Wenjia Zhang
e7888805e1 Add cluster version fix #11233, #11254, #11265 2019-10-16 13:27:07 -07:00
Gyuho Lee
7fbfdc2b6a tests/e2e: test cluster version
Signed-off-by: Gyuho Lee <leegyuho@amazon.com>
2019-10-15 18:07:53 -07:00
Gyuho Lee
5c19bd24f0 etcdserver/*: add "etcd_cluster_version" metric
Signed-off-by: Gyuho Lee <leegyuho@amazon.com>
2019-10-15 18:05:33 -07:00
Joe Betz
683a643fba Add version, tag and branch checks to release script 2019-10-14 12:57:35 -07:00
Gyuho Lee
660dc83e19 Merge pull request #11245 from YoyinZyc/prevent-darwin-build-3.3
scripts: avoid release builds on darwin machine.
2019-10-11 12:37:34 -07:00
yoyinzyc
ffcddac2ff scripts: avoid release builds on darwin machine. 2019-10-11 11:27:57 -07:00
Joe Betz
6d8052314b version: v3.3.17 2019-10-11 10:23:13 -07:00
Gyuho Lee
5e4d852e95 Merge pull request #11236 from YoyinZyc/change-git-clone
scripts: use https for git clone.
2019-10-10 22:52:46 -07:00
yoyinzyc
3827d6bd2d scripts: use https for git clone. 2019-10-10 16:46:37 -07:00
Joe Betz
3fae828623 vendor: v3.3.16 2019-10-10 10:59:40 -07:00
Gyuho Lee
011bd86bd6 Merge pull request #11196 from andyliuliming/release-3.3-cherry
etcdserver: cherry-pick skip client san verification option for 3.3 version.
2019-10-09 09:40:58 -07:00
Andy Liu
a311a80699 helper document update. 2019-10-09 13:15:56 +08:00
Joe Betz
ef61a56c0c Merge pull request #11215 from jpbetz/automated-cherry-pick-of-#11184-origin-release-3.3
Automated cherry pick of #11184
2019-10-08 18:47:12 -07:00
Joe Betz
a2f585d80c clientv3: Set authority used in cert checks to host of endpoint 2019-10-08 18:25:08 -07:00
Joe Betz
7558b41ccd Merge pull request #11216 from jpbetz/automated-cherry-pick-of-#11211-origin-release-3.3
Automated cherry pick of #11211
2019-10-08 17:16:01 -07:00
Joe Betz
4b227b6e71 clientv3: Replace endpoint.ParseHostPort with net.SplitHostPort to fix IPv6 client endpoints 2019-10-08 16:12:13 -07:00
Gyuho Lee
1be7ab4ee2 Merge pull request #11201 from jingyih/automated-cherry-pick-of-#11194-origin-release-3.3
Automated cherry pick of #11194 on release-3.3
2019-10-03 16:03:30 -07:00
Jingyi Hu
02a27c0851 etcdctl: fix member add command 2019-10-03 13:52:57 -07:00
Andy Liu
d851911f86 etcdserver: add unit test. 2019-10-03 16:06:30 +08:00
Andy Liu
86b1686c7e etcdserver: cherry-pick skip client san verification option for 3.3 version.
Co-authored-by: Martin Weindel <martin.weindel@sap.com>
Co-authored-by: Jingyi Hu <jingyih@google.com>
Co-authored-by: Liming Liu <andyliuliming@outlook.com>
2019-10-03 10:12:22 +08:00
Jingyi Hu
943832af44 Merge pull request #11134 from jingyih/automated-cherry-pick-of-#11126-origin-release-3.3
Automated cherry pick of #11126 on release-3.3
2019-09-07 00:03:44 -07:00
Jingyi Hu
8a8efa73e6 mvcc: add store revision metrics
Add experimental metrics etcd_debugging_mvcc_current_revision and
etcd_debugging_mvcc_compact_revision.
2019-09-06 17:19:46 -07:00
Gyuho Lee
a4f18a40b0 Merge pull request #11056 from jingyih/update_bbolt
vendor: update bbolt to v1.3.3
2019-08-20 09:08:39 -07:00
Gyuho Lee
7f067ceafd Merge pull request #11055 from jingyih/fix_gofmt_bom
*: Fix gofmt bom
2019-08-19 22:39:08 -07:00
Jingyi Hu
9244d2ba86 vendor: update bbolt to v1.3.3 2019-08-19 20:55:55 -07:00
Jingyi Hu
ffb43dff5b bom: regenerate 2019-08-19 20:35:44 -07:00
Jingyi Hu
74cf4ae9a2 scripts: fix updatebom.sh
Remove "./cmd/vendor".
2019-08-19 20:29:49 -07:00
Jingyi Hu
81fc7c23c2 *: fix gofmt 2019-08-19 20:22:15 -07:00
Gyuho Lee
94745a4eed version: 3.3.15
Signed-off-by: Gyuho Lee <leegyuho@amazon.com>
2019-08-19 11:26:22 -07:00
Gyuho Lee
e94188bc55 vendor: regenerate
Signed-off-by: Gyuho Lee <leegyuho@amazon.com>
2019-08-19 11:26:16 -07:00
Gyuho Lee
aa1e17aac3 go.mod: remove, change back to "glide"
Signed-off-by: Gyuho Lee <leegyuho@amazon.com>
2019-08-19 11:26:12 -07:00
Gyuho Lee
5cf5d88a18 version: 3.3.14
Signed-off-by: Gyuho Lee <leegyuho@amazon.com>
2019-08-16 16:21:44 -07:00
Gyuho Lee
af8cb6c5b9 Documentation/upgrades: special upgrade guides for >= 3.3.14
Signed-off-by: Gyuho Lee <leegyuho@amazon.com>
2019-08-16 16:21:11 -07:00
Gyuho Lee
9dd98b7c90 version: 3.3.14-rc.0
Signed-off-by: Gyuho Lee <leegyuho@amazon.com>
2019-08-15 15:03:43 -07:00
Gyuho Lee
2f3aa893ec vendor: regenerate
Signed-off-by: Gyuho Lee <leegyuho@amazon.com>
2019-08-15 15:02:26 -07:00
Gyuho Lee
d65219c1ef go.mod: regenerate
Signed-off-by: Gyuho Lee <leegyuho@amazon.com>
2019-08-15 15:02:03 -07:00
Gyuho Lee
b9c976eed8 gitignore: track vendor directory
Signed-off-by: Gyuho Lee <leegyuho@amazon.com>
2019-08-15 15:00:46 -07:00
Gyuho Lee
b196734290 *: test with Go 1.12.9
Signed-off-by: Gyuho Lee <leegyuho@amazon.com>
2019-08-15 14:42:32 -07:00
Gyuho Lee
1aa4af83c0 version: 3.3.14-beta.0
Signed-off-by: Gyuho Lee <leegyuho@amazon.com>
2019-08-14 11:52:26 -07:00
Gyuho Lee
95a5c57754 tests/e2e: add missing curl
Signed-off-by: Gyuho Lee <leegyuho@amazon.com>
2019-08-14 11:31:53 -07:00
Gyuho Lee
082c5e0705 e2e: move
Signed-off-by: Gyuho Lee <leegyuho@amazon.com>
2019-08-14 11:22:39 -07:00
Gyuho Lee
33668f4eff test: do not run "v2store" tests
Signed-off-by: Gyuho Lee <leegyuho@amazon.com>
2019-08-14 11:12:46 -07:00
Gyuho Lee
c7c09c61d0 test: bump up timeout for e2e tests
Signed-off-by: Gyuho Lee <leegyuho@amazon.com>
2019-08-14 09:52:16 -07:00
Gyuho Lee
4f1e65418f travis: fix functional tests
Signed-off-by: Gyuho Lee <leegyuho@amazon.com>
2019-08-14 09:40:16 -07:00
Gyuho Lee
e16b21be7b functional: add back, travis
Signed-off-by: Gyuho Lee <leegyuho@amazon.com>
2019-08-14 09:32:10 -07:00
Gyuho Lee
0e96b34d9f auth: fix tests
Signed-off-by: Gyuho Lee <leegyuho@amazon.com>
2019-08-14 09:32:10 -07:00
Gyuho Lee
3c2b1cd76a travis: do not run functional for now
Signed-off-by: Gyuho Lee <leegyuho@amazon.com>
2019-08-14 09:32:10 -07:00
Gyuho Lee
37d10dd8b8 travis: skip windows build
Signed-off-by: Gyuho Lee <leegyuho@amazon.com>
2019-08-14 09:32:10 -07:00
Gyuho Lee
84508f7c98 test: fix repo path
Signed-off-by: Gyuho Lee <leegyuho@amazon.com>
2019-08-14 09:32:10 -07:00
Gyuho Lee
be3babffb7 tests/e2e: fix
Signed-off-by: Gyuho Lee <leegyuho@amazon.com>
2019-08-14 09:32:10 -07:00
Gyuho Lee
61065db065 build: remove tools
Signed-off-by: Gyuho Lee <leegyuho@amazon.com>
2019-08-14 09:32:10 -07:00
Gyuho Lee
0ddda8c72e integration: fix tests
Signed-off-by: Gyuho Lee <leegyuho@amazon.com>
2019-08-14 09:32:10 -07:00
Gyuho Lee
b889245252 integration: fix "HashKVRequest"
Signed-off-by: Gyuho Lee <leegyuho@amazon.com>
2019-08-14 09:32:10 -07:00
Gyuho Lee
6e37ece3b9 functional: update
Signed-off-by: Gyuho Lee <leegyuho@amazon.com>
2019-08-14 09:32:10 -07:00
Gyuho Lee
f68fac655e travis.yml: fix, run e2e
Signed-off-by: Gyuho Lee <leegyuho@amazon.com>
2019-08-14 09:32:10 -07:00
Gyuho Lee
dbfc7bd612 integration: update
Signed-off-by: Gyuho Lee <leegyuho@amazon.com>
2019-08-14 09:32:10 -07:00
Gyuho Lee
e5c2dff346 etcdserver: detect leader change on reads
Signed-off-by: Gyuho Lee <leegyuho@amazon.com>
2019-08-14 09:32:10 -07:00
Gyuho Lee
9561f6b3b6 clientv3: rewrite based on 3.4
Signed-off-by: Gyuho Lee <leegyuho@amazon.com>
2019-08-14 09:32:06 -07:00
Gyuho Lee
a317433854 raft: fix compile error in "Panic"
Signed-off-by: Gyuho Lee <leegyuho@amazon.com>
2019-08-14 04:05:07 -07:00
Gyuho Lee
7eb9a29e26 pkg/*: add
Signed-off-by: Gyuho Lee <leegyuho@amazon.com>
2019-08-14 04:05:04 -07:00
Gyuho Lee
5a678bb4e3 etcdserver/api/v3rpc: support watch fragmentation
Signed-off-by: Gyuho Lee <leegyuho@amazon.com>
2019-08-14 01:22:29 -07:00
Gyuho Lee
92a750432f tests: update
Signed-off-by: Gyuho Lee <leegyuho@amazon.com>
2019-08-14 01:22:29 -07:00
Gyuho Lee
d167714b36 *: regenerate proto
Signed-off-by: Gyuho Lee <leegyuho@amazon.com>
2019-08-14 01:22:23 -07:00
Gyuho Lee
9f7294f1e0 etcdserver/etcdserverpb/rpc.proto: add watch progress/fragment
Signed-off-by: Gyuho Lee <leegyuho@amazon.com>
2019-08-14 01:17:29 -07:00
Gyuho Lee
830bba337f vendor: regenerate, upgrade gRPC to 1.23.0
Signed-off-by: Gyuho Lee <leegyuho@amazon.com>
2019-08-14 01:16:44 -07:00
Gyuho Lee
27cf72b231 go.mod: migrate to Go module
Signed-off-by: Gyuho Lee <leegyuho@amazon.com>
2019-08-14 01:16:09 -07:00
Gyuho Lee
d7fc66bcbb scripts: update release, genproto, dep
Signed-off-by: Gyuho Lee <leegyuho@amazon.com>
2019-08-14 01:14:34 -07:00
Gyuho Lee
cc1591aa4e Makefile/build: sync with 3.4 branch
Signed-off-by: Gyuho Lee <leegyuho@amazon.com>
2019-08-14 01:13:22 -07:00
Gyuho Lee
08124105ad *: use new adt.IntervalTree interface
Signed-off-by: Gyuho Lee <leegyuho@amazon.com>
2019-08-09 11:15:49 -07:00
Gyuho Lee
ffe90b9ff3 pkg/adt: remove TODO
Signed-off-by: Gyuho Lee <leegyuho@amazon.com>
2019-08-09 11:02:28 -07:00
xkey
036bd1ab09 pkg/adt: fix interval tree black-height property based on rbtree
Author: xkey <xk33430@ly.com>
ref. https://github.com/etcd-io/etcd/pull/10978

Signed-off-by: Gyuho Lee <leegyuho@amazon.com>
2019-08-09 11:02:21 -07:00
Gyuho Lee
33e4877b56 pkg/adt: document textbook implementation with pseudo-code
Signed-off-by: Gyuho Lee <leegyuho@amazon.com>
2019-08-09 11:02:15 -07:00
Gyuho Lee
c25f746f77 pkg/adt: mask test failure, add TODO
Signed-off-by: Gyuho Lee <leegyuho@amazon.com>
2019-08-09 11:02:07 -07:00
Gyuho Lee
f4341fd35c pkg/adt: add "IntervalTree.Delete" failure case
Described in https://github.com/etcd-io/etcd/issues/10877.

"black-height" property: Every path from a node to any descendant leaf node must have the same number of black nodes.

Expected

    After deleting 11 (requires rebalancing):
                            [510,511]
                             /      \
                   ----------        --------------------------
                  /                                            \
              [383,384]                                       [830,831]
              /       \                                      /          \
             /         \                                    /            \
      [261,262](red)  [410,411]                     [647,648]           [899,900](red)
          /               \                              \                      /    \
         /                 \                              \                    /      \
      [82,83]           [292,293]                      [815,816](red)   [888,889]    [972,973]
            \                                                           /
             \                                                         /
          [238,239](red)                                       [953,954](red)

Got

    After deleting 11 (requires rebalancing):
                            [510,511]
                             /      \
                   ----------        --------------------------
                  /                                            \
              [82,83]                                       [830,831]
                    \                                      /          \
                     \                                    /            \
                  [383,384]                        [647,648]            [899,900]
                  /       \                              \                  /    \
                 /         \                              \                /      \
           [261,262]      [410,411]                      [815,816]   [888,889]    [972,973]
             /   \                                                                  /
            /     \                                                                /
     [238,239]   [292,293]                                                  [953,954]

This violates "black-height" property.

Signed-off-by: Gyuho Lee <leegyuho@amazon.com>
2019-08-09 11:01:58 -07:00
Gyuho Lee
b3152365bb pkg/adt: test node "11" deletion
Signed-off-by: Gyuho Lee <leegyuho@amazon.com>
2019-08-09 11:01:51 -07:00
Gyuho Lee
d938435e44 pkg/adt: README "IntervalTree.Delete" test case images
Signed-off-by: Gyuho Lee <leegyuho@amazon.com>
2019-08-09 11:01:43 -07:00
Gyuho Lee
594e7d6627 pkg/adt: README initial commit
Signed-off-by: Gyuho Lee <leegyuho@amazon.com>
2019-08-09 11:01:35 -07:00
Gyuho Lee
266214d19e pkg/adt: add "visitLevel", make "IntervalTree" interface, more tests
Make "IntervalTree" an interface to abstract range tree interface

Signed-off-by: Gyuho Lee <leegyuho@amazon.com>
2019-08-09 11:01:16 -07:00
Gyuho Lee
0b37ae05b1 pkg: clean up code format
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2019-08-09 11:00:44 -07:00
Gyuho Lee
3aef9a1a8f travis: update
Signed-off-by: Gyuho Lee <leegyuho@amazon.com>
2019-08-09 10:57:38 -07:00
Gyuho Lee
4527f4c4b0 etcdserver: add "etcd_server_snapshot_apply_inflights_total"
Signed-off-by: Gyuho Lee <leegyuho@amazon.com>
2019-08-08 15:13:14 -07:00
Gyuho Lee
1c8fab7365 etcdserver/api: add "etcd_network_snapshot_send_inflights_total", "etcd_network_snapshot_receive_inflights_total"
Signed-off-by: Gyuho Lee <leegyuho@amazon.com>
2019-08-08 15:12:08 -07:00
Gyuho Lee
789ff21b18 Merge pull request #10570 from sbenderli/cherry-pick-of-#8334
raft: cherry pick of #8334 to release-3.3
2019-07-23 11:42:42 -07:00
Gyuho Lee
d12f13279f Merge pull request #10827 from yznima/pr-race-3.3
Raft HTTP: fix pause/resume race condition
2019-07-23 10:59:02 -07:00
Nima Yahyazadeh
9f1d6ca1c9 Raft HTTP: fix pause/resume race condition
(cherry picked from commit b1812a410f)
2019-06-17 13:33:27 -04:00
Gyuho Lee
5832014353 Merge pull request #10793 from jingyih/automated-cherry-pick-of-#10788-origin-release-3.3
Automated cherry pick of #10788 on release-3.3
2019-06-05 14:39:55 -07:00
Jingyi Hu
d005486359 ctlv3: add missing newline in EndpointHealth
To make the output consistent with the output before #9540.
2019-06-05 14:36:57 -07:00
Gyuho Lee
89429703db Merge pull request #10782 from jingyih/cherrypick_9540_to_release3p3
ctlv3: cherry pick of #9540 to release 3.3
2019-06-04 09:55:19 -07:00
Gyuho Lee
f835a85965 ctlv3: support "write-out" for "endpoint health" command
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2019-06-03 17:01:54 -07:00
Gyuho Lee
b0babe5d1e Merge pull request #10718 from rohitsardesai83/release-3.3
etcd: Replace ghodss/yaml with sigs.k8s.io/yaml in 3.3
2019-05-29 13:47:56 -07:00
Rohit Sardesai
8ed3e70d7c etcd: Replace ghodss/yaml with sigs.k8s.io/yaml 2019-05-29 23:03:16 +05:30
shawnli
ec22eb908a raft: cherry pick of #8334 to release-3.3 2019-03-21 16:02:30 -04:00
1726 changed files with 410952 additions and 250541 deletions

1
.gitignore vendored
View File

@@ -2,7 +2,6 @@
/coverage
/covdir
/docs
/vendor
/gopath
/gopath.proto
/go-bindata

View File

@@ -6,7 +6,10 @@ sudo: required
services: docker
go:
- 1.10.8
- 1.12.12
env:
- GO111MODULE=on
notifications:
on_success: never
@@ -14,60 +17,50 @@ notifications:
env:
matrix:
- TARGET=linux-amd64-integration
- TARGET=linux-amd64-integration-1-cpu
- TARGET=linux-amd64-integration-4-cpu
- TARGET=linux-amd64-functional
- TARGET=linux-amd64-unit
- TARGET=linux-amd64-e2e
- TARGET=all-build
- TARGET=linux-386-unit
matrix:
fast_finish: true
allow_failures:
- go: 1.10.8
- go: 1.12.12
env: TARGET=linux-386-unit
exclude:
- go: tip
env: TARGET=linux-386-unit
before_install:
- if [[ $TRAVIS_GO_VERSION == 1.* ]]; then docker pull gcr.io/etcd-development/etcd-test:go${TRAVIS_GO_VERSION}; fi
install:
- pushd cmd/etcd && go get -t -v ./... && popd
- go get -t -v -d ./...
script:
- echo "TRAVIS_GO_VERSION=${TRAVIS_GO_VERSION}"
- >
case "${TARGET}" in
linux-amd64-integration)
docker run --rm \
--volume=`pwd`:/go/src/github.com/coreos/etcd gcr.io/etcd-development/etcd-test:go${TRAVIS_GO_VERSION} \
/bin/bash -c "cd /go/src/github.com/coreos/etcd; GOARCH=amd64 PASSES='integration' ./test"
linux-amd64-integration-1-cpu)
GOARCH=amd64 CPU=1 PASSES='integration' ./test
;;
linux-amd64-integration-4-cpu)
GOARCH=amd64 CPU=4 PASSES='integration' ./test
;;
linux-amd64-functional)
docker run --rm \
--volume=`pwd`:/go/src/github.com/coreos/etcd gcr.io/etcd-development/etcd-test:go${TRAVIS_GO_VERSION} \
/bin/bash -c "cd /go/src/github.com/coreos/etcd; ./build && GOARCH=amd64 PASSES='functional' ./test"
./build && GOARCH=amd64 PASSES='functional' ./test
;;
linux-amd64-unit)
docker run --rm \
--volume=`pwd`:/go/src/github.com/coreos/etcd gcr.io/etcd-development/etcd-test:go${TRAVIS_GO_VERSION} \
/bin/bash -c "cd /go/src/github.com/coreos/etcd; GOARCH=amd64 PASSES='unit' ./test"
./build && GOARCH=amd64 PASSES='unit' ./test
;;
linux-amd64-e2e)
GOARCH=amd64 PASSES='build release e2e' MANUAL_VER=v3.3.13 ./test
;;
all-build)
docker run --rm \
--volume=`pwd`:/go/src/github.com/coreos/etcd gcr.io/etcd-development/etcd-test:go${TRAVIS_GO_VERSION} \
/bin/bash -c "cd /go/src/github.com/coreos/etcd; GOARCH=amd64 PASSES='build' ./test \
&& GOARCH=386 PASSES='build' ./test \
&& GO_BUILD_FLAGS='-v' GOOS=darwin GOARCH=amd64 ./build \
&& GO_BUILD_FLAGS='-v' GOOS=windows GOARCH=amd64 ./build \
&& GO_BUILD_FLAGS='-v' GOARCH=arm ./build \
&& GO_BUILD_FLAGS='-v' GOARCH=arm64 ./build \
&& GO_BUILD_FLAGS='-v' GOARCH=ppc64le ./build"
GOARCH=386 PASSES='build' ./test \
&& GO_BUILD_FLAGS='-v' GOOS=darwin GOARCH=amd64 ./build \
&& GO_BUILD_FLAGS='-v' GOARCH=arm ./build \
&& GO_BUILD_FLAGS='-v' GOARCH=arm64 ./build \
&& GO_BUILD_FLAGS='-v' GOARCH=ppc64le ./build
;;
linux-386-unit)
docker run --rm \
--volume=`pwd`:/go/src/github.com/coreos/etcd gcr.io/etcd-development/etcd-test:go${TRAVIS_GO_VERSION} \
/bin/bash -c "cd /go/src/github.com/coreos/etcd; GOARCH=386 PASSES='unit' ./test"
GOARCH=386 ./build && GOARCH=386 PASSES='unit' ./test
;;
esac

4
.words
View File

@@ -25,6 +25,8 @@ healthcheck
iff
inflight
keepalive
hasleader
racey
keepalives
keyspace
linearization
@@ -41,4 +43,4 @@ too_many_pings
uncontended
unprefixed
unlisting
WithDialer

View File

@@ -1,746 +0,0 @@
## [v3.3.0](https://github.com/coreos/etcd/releases/tag/v3.3.0)
See [code changes](https://github.com/coreos/etcd/compare/v3.2.0...v3.3.0) and [v3.3 upgrade guide](https://github.com/coreos/etcd/blob/master/Documentation/upgrades/upgrade_3_3.md) for any breaking changes.
### Improved
- Use [`coreos/bbolt`](https://github.com/coreos/bbolt/releases) to replace [`boltdb/bolt`](https://github.com/boltdb/bolt#project-status).
- Fix [etcd database size grows until `mvcc: database space exceeded`](https://github.com/coreos/etcd/issues/8009).
- [Reduce memory allocation](https://github.com/coreos/etcd/pull/8428) on [Range operations](https://github.com/coreos/etcd/pull/8475).
- [Rate limit](https://github.com/coreos/etcd/pull/8099) and [randomize](https://github.com/coreos/etcd/pull/8101) lease revoke on restart or leader elections.
- Prevent [spikes in Raft proposal rate](https://github.com/coreos/etcd/issues/8096).
- Support `clientv3` balancer failover under [network faults/partitions](https://github.com/coreos/etcd/issues/8711).
- Better warning on [mismatched `--initial-cluster`](https://github.com/coreos/etcd/pull/8083) flag.
### Changed(Breaking Changes)
- Require [Go 1.9+](https://github.com/coreos/etcd/issues/6174).
- Compile with *Go 1.9.2*.
- Deprecate [`golang.org/x/net/context`](https://github.com/coreos/etcd/pull/8511).
- Require [`google.golang.org/grpc`](https://github.com/grpc/grpc-go/releases) [**`v1.7.4`**](https://github.com/grpc/grpc-go/releases/tag/v1.7.4) or [**`v1.7.5+`**](https://github.com/grpc/grpc-go/releases/tag/v1.7.5):
- Deprecate [`metadata.Incoming/OutgoingContext`](https://github.com/coreos/etcd/pull/7896).
- Deprecate `grpclog.Logger`, upgrade to [`grpclog.LoggerV2`](https://github.com/coreos/etcd/pull/8533).
- Deprecate [`grpc.ErrClientConnTimeout`](https://github.com/coreos/etcd/pull/8505) errors in `clientv3`.
- Use [`MaxRecvMsgSize` and `MaxSendMsgSize`](https://github.com/coreos/etcd/pull/8437) to limit message size, in etcd server.
- Upgrade [`github.com/grpc-ecosystem/grpc-gateway`](https://github.com/grpc-ecosystem/grpc-gateway/releases) `v1.2.2` to `v1.3.0`.
- Translate [gRPC status error in v3 client `Snapshot` API](https://github.com/coreos/etcd/pull/9038).
- Upgrade [`github.com/ugorji/go/codec`](https://github.com/ugorji/go) for v2 `client`.
- [Regenerated](https://github.com/coreos/etcd/pull/8721) v2 `client` source code with latest `ugorji/go/codec`.
- Fix [`/health` endpoint JSON output](https://github.com/coreos/etcd/pull/8312).
- v3 `etcdctl` [`lease timetolive LEASE_ID`](https://github.com/coreos/etcd/issues/9028) on expired lease now prints [`lease LEASE_ID already expired`](https://github.com/coreos/etcd/pull/9047).
- <=3.2 prints `lease LEASE_ID granted with TTL(0s), remaining(-1s)`.
### Added(`etcd`)
- Add [`--experimental-enable-v2v3`](https://github.com/coreos/etcd/pull/8407) flag to [emulate v2 API with v3](https://github.com/coreos/etcd/issues/6925).
- Add [`--experimental-corrupt-check-time`](https://github.com/coreos/etcd/pull/8420) flag to [raise corrupt alarm monitoring](https://github.com/coreos/etcd/issues/7125).
- Add [`--experimental-initial-corrupt-check`](https://github.com/coreos/etcd/pull/8554) flag to [check database hash before serving client/peer traffic](https://github.com/coreos/etcd/issues/8313).
- Add [`--max-txn-ops`](https://github.com/coreos/etcd/pull/7976) flag to [configure maximum number operations in transaction](https://github.com/coreos/etcd/issues/7826).
- Add [`--max-request-bytes`](https://github.com/coreos/etcd/pull/7968) flag to [configure maximum client request size](https://github.com/coreos/etcd/issues/7923).
- If not configured, it defaults to 1.5 MiB.
- Add [`--client-crl-file`, `--peer-crl-file`](https://github.com/coreos/etcd/pull/8124) flags for [Certificate revocation list](https://github.com/coreos/etcd/issues/4034).
- Add [`--peer-require-cn`](https://github.com/coreos/etcd/pull/8616) flag to support [CN-based auth for inter-peer connection](https://github.com/coreos/etcd/issues/8262).
- Add [`--listen-metrics-urls`](https://github.com/coreos/etcd/pull/8242) flag for additional `/metrics` endpoints.
- Support [additional (non) TLS `/metrics` endpoints for a TLS-enabled cluster](https://github.com/coreos/etcd/pull/8282).
- e.g. `--listen-metrics-urls=https://localhost:2378,http://localhost:9379` to serve `/metrics` in secure port 2378 and insecure port 9379.
- Useful for [bypassing critical APIs when monitoring etcd](https://github.com/coreos/etcd/issues/8060).
- Add [`--auto-compaction-mode`](https://github.com/coreos/etcd/pull/8123) flag to [support revision-based compaction](https://github.com/coreos/etcd/issues/8098).
- Change `--auto-compaction-retention` flag to [accept string values](https://github.com/coreos/etcd/pull/8563) with [finer granularity](https://github.com/coreos/etcd/issues/8503).
- Add [`--grpc-keepalive-min-time`, `--grpc-keepalive-interval`, `--grpc-keepalive-timeout`](https://github.com/coreos/etcd/pull/8535) flags to configure server-side keepalive policies.
- Serve [`/health` endpoint as unhealthy](https://github.com/coreos/etcd/pull/8272) when [alarm is raised](https://github.com/coreos/etcd/issues/8207).
- Provide [error information in `/health`](https://github.com/coreos/etcd/pull/8312).
- e.g. `{"health":false,"errors":["NOSPACE"]}`.
- Move [logging setup to embed package](https://github.com/coreos/etcd/pull/8810)
- Disable gRPC server log by default.
- Use [monotonic time in Go 1.9](https://github.com/coreos/etcd/pull/8507) for `lease` package.
- Warn on [empty hosts in advertise URLs](https://github.com/coreos/etcd/pull/8384).
- Address [advertise client URLs accepts empty hosts](https://github.com/coreos/etcd/issues/8379).
- etcd `v3.4` will exit on this error.
- e.g. `--advertise-client-urls=http://:2379`.
- Warn on [shadowed environment variables](https://github.com/coreos/etcd/pull/8385).
- Address [error on shadowed environment variables](https://github.com/coreos/etcd/issues/8380).
- etcd `v3.4` will exit on this error.
### Added(API)
- Support [ranges in transaction comparisons](https://github.com/coreos/etcd/pull/8025) for [disconnected linearized reads](https://github.com/coreos/etcd/issues/7924).
- Add [nested transactions](https://github.com/coreos/etcd/pull/8102) to extend [proxy use cases](https://github.com/coreos/etcd/issues/7857).
- Add [lease comparison target in transaction](https://github.com/coreos/etcd/pull/8324).
- Add [lease list](https://github.com/coreos/etcd/pull/8358).
- Add [hash by revision](https://github.com/coreos/etcd/pull/8263) for [better corruption checking against boltdb](https://github.com/coreos/etcd/issues/8016).
### Added(`etcd/clientv3`)
- Add [health balancer](https://github.com/coreos/etcd/pull/8545) to fix [watch API hangs](https://github.com/coreos/etcd/issues/7247), improve [endpoint switch under network faults](https://github.com/coreos/etcd/issues/7941).
- [Refactor balancer](https://github.com/coreos/etcd/pull/8840) and add [client-side keepalive pings](https://github.com/coreos/etcd/pull/8199) to handle [network partitions](https://github.com/coreos/etcd/issues/8711).
- Add [`MaxCallSendMsgSize` and `MaxCallRecvMsgSize`](https://github.com/coreos/etcd/pull/9047) fields to [`clientv3.Config`](https://godoc.org/github.com/coreos/etcd/clientv3#Config).
- Fix [exceeded response size limit error in client-side](https://github.com/coreos/etcd/issues/9043).
- Address [kubernetes#51099](https://github.com/kubernetes/kubernetes/issues/51099).
- `MaxCallSendMsgSize` default value is 2 MiB, if not configured.
- `MaxCallRecvMsgSize` default value is `math.MaxInt32`, if not configured.
- Accept [`Compare_LEASE`](https://github.com/coreos/etcd/pull/8324) in [`clientv3.Compare`](https://godoc.org/github.com/coreos/etcd/clientv3#Compare).
- Add [`LeaseValue` helper](https://github.com/coreos/etcd/pull/8488) to `Cmp` `LeaseID` values in `Txn`.
- Add [`MoveLeader`](https://github.com/coreos/etcd/pull/8153) to `Maintenance`.
- Add [`HashKV`](https://github.com/coreos/etcd/pull/8351) to `Maintenance`.
- Add [`Leases`](https://github.com/coreos/etcd/pull/8358) to `Lease`.
- Add [`clientv3/ordering`](https://github.com/coreos/etcd/pull/8092) for enforce [ordering in serialized requests](https://github.com/coreos/etcd/issues/7623).
### Added(v2 `etcdctl`)
- Add [`backup --with-v3`](https://github.com/coreos/etcd/pull/8479) flag.
### Added(v3 `etcdctl`)
- Add [`--discovery-srv`](https://github.com/coreos/etcd/pull/8462) flag.
- Add [`--keepalive-time`, `--keepalive-timeout`](https://github.com/coreos/etcd/pull/8663) flags.
- Add [`lease list`](https://github.com/coreos/etcd/pull/8358) command.
- Add [`lease keep-alive --once`](https://github.com/coreos/etcd/pull/8775) flag.
- Make [`lease timetolive LEASE_ID`](https://github.com/coreos/etcd/issues/9028) on expired lease print [`lease LEASE_ID already expired`](https://github.com/coreos/etcd/pull/9047).
- <=3.2 prints `lease LEASE_ID granted with TTL(0s), remaining(-1s)`.
- Add [`defrag --data-dir`](https://github.com/coreos/etcd/pull/8367) flag.
- Add [`move-leader`](https://github.com/coreos/etcd/pull/8153) command.
- Add [`endpoint hashkv`](https://github.com/coreos/etcd/pull/8351) command.
- Add [`endpoint --cluster`](https://github.com/coreos/etcd/pull/8143) flag, equivalent to [v2 `etcdctl cluster-health`](https://github.com/coreos/etcd/issues/8117).
- Make `endpoint health` command terminate with [non-zero exit code on unhealthy status](https://github.com/coreos/etcd/pull/8342).
- Add [`lock --ttl`](https://github.com/coreos/etcd/pull/8370) flag.
- Support [`watch [key] [range_end] -- [exec-command…]`](https://github.com/coreos/etcd/pull/8919), equivalent to [v2 `etcdctl exec-watch`](https://github.com/coreos/etcd/issues/8814).
- Enable [`clientv3.WithRequireLeader(context.Context)` for `watch`](https://github.com/coreos/etcd/pull/8672) command.
- Print [`"del"` instead of `"delete"`](https://github.com/coreos/etcd/pull/8297) in `txn` interactive mode.
- Print [`ETCD_INITIAL_ADVERTISE_PEER_URLS` in `member add`](https://github.com/coreos/etcd/pull/8332).
### Added(metrics)
- Add [`etcd --listen-metrics-urls`](https://github.com/coreos/etcd/pull/8242) flag for additional `/metrics` endpoints.
- Useful for [bypassing critical APIs when monitoring etcd](https://github.com/coreos/etcd/issues/8060).
- Add [`etcd_server_version`](https://github.com/coreos/etcd/pull/8960) Prometheus metric.
- To replace [Kubernetes `etcd-version-monitor`](https://github.com/coreos/etcd/issues/8948).
- Add [`etcd_debugging_mvcc_db_compaction_keys_total`](https://github.com/coreos/etcd/pull/8280) Prometheus metric.
- Add [`etcd_debugging_server_lease_expired_total`](https://github.com/coreos/etcd/pull/8064) Prometheus metric.
- To improve [lease revoke monitoring](https://github.com/coreos/etcd/issues/8050).
- Document [Prometheus 2.0 rules](https://github.com/coreos/etcd/pull/8879).
- Initialize gRPC server [metrics with zero values](https://github.com/coreos/etcd/pull/8878).
### Added(`grpc-proxy`)
- Add [`grpc-proxy start --experimental-leasing-prefix`](https://github.com/coreos/etcd/pull/8341) flag:
- For disconnected linearized reads.
- Based on [V system leasing](https://github.com/coreos/etcd/issues/6065).
- See ["Disconnected consistent reads with etcd" blog post](https://coreos.com/blog/coreos-labs-disconnected-consistent-reads-with-etcd).
- Add [`grpc-proxy start --experimental-serializable-ordering`](https://github.com/coreos/etcd/pull/8315) flag.
- To ensure serializable reads have monotonically increasing store revisions across endpoints.
- Add [`grpc-proxy start --metrics-addr`](https://github.com/coreos/etcd/pull/8242) flag for an additional `/metrics` endpoint.
- Set `--metrics-addr=http://[HOST]:9379` to serve `/metrics` in insecure port 9379.
- Serve [`/health` endpoint in grpc-proxy](https://github.com/coreos/etcd/pull/8322).
- Add [`grpc-proxy start --debug`](https://github.com/coreos/etcd/pull/8994) flag.
### Added(gRPC gateway)
- Replace [gRPC gateway](https://github.com/grpc-ecosystem/grpc-gateway) endpoint with [`/v3beta`](https://github.com/coreos/etcd/pull/8880).
- To deprecate [`/v3alpha`](https://github.com/coreos/etcd/issues/8125) in `v3.4`.
- Support ["authorization" token](https://github.com/coreos/etcd/pull/7999).
- Support [websocket for bi-directional streams](https://github.com/coreos/etcd/pull/8257).
- Fix [`Watch` API with gRPC gateway](https://github.com/coreos/etcd/issues/8237).
- Upgrade gRPC gateway to [v1.3.0](https://github.com/coreos/etcd/issues/8838).
### Added(`etcd/raft`)
- Add [non-voting member](https://github.com/coreos/etcd/pull/8751).
- To implement [Raft thesis 4.2.1 Catching up new servers](https://github.com/coreos/etcd/issues/8568).
- `Learner` node does not vote or promote itself.
### Added/Fixed(Security/Auth)
- Add [CRL based connection rejection](https://github.com/coreos/etcd/pull/8124) to manage [revoked certs](https://github.com/coreos/etcd/issues/4034).
- Document [TLS authentication changes](https://github.com/coreos/etcd/pull/8895):
- [Server accepts connections if IP matches, without checking DNS entries](https://github.com/coreos/etcd/pull/8223). For instance, if peer cert contains IP addresses and DNS names in Subject Alternative Name (SAN) field, and the remote IP address matches one of those IP addresses, server just accepts connection without further checking the DNS names.
- [Server supports reverse-lookup on wildcard DNS `SAN`](https://github.com/coreos/etcd/pull/8281). For instance, if peer cert contains only DNS names (no IP addresses) in Subject Alternative Name (SAN) field, server first reverse-lookups the remote IP address to get a list of names mapping to that address (e.g. `nslookup IPADDR`). Then accepts the connection if those names have a matching name with peer cert's DNS names (either by exact or wildcard match). If none is matched, server forward-lookups each DNS entry in peer cert (e.g. look up `example.default.svc` when the entry is `*.example.default.svc`), and accepts connection only when the host's resolved addresses have the matching IP address with the peer's remote IP address.
- Add [`etcd --peer-require-cn`](https://github.com/coreos/etcd/pull/8616) flag.
- To support [CommonName(CN) based auth](https://github.com/coreos/etcd/issues/8262) for inter peer connection.
- [Swap priority](https://github.com/coreos/etcd/pull/8594) of cert CommonName(CN) and username + password.
- To address ["username and password specified in the request should take priority over CN in the cert"](https://github.com/coreos/etcd/issues/8584).
- Protect [lease revoke with auth](https://github.com/coreos/etcd/pull/8031).
- Provide user's role on [auth permission error](https://github.com/coreos/etcd/pull/8164).
- Fix [auth store panic with disabled token](https://github.com/coreos/etcd/pull/8695).
- Update `golang.org/x/crypto/bcrypt` (see [golang/crypto@6c586e1](https://github.com/golang/crypto/commit/6c586e17d90a7d08bbbc4069984180dce3b04117)).
### Fixed(v2)
- [Fail-over v2 client](https://github.com/coreos/etcd/pull/8519) to next endpoint on [oneshot failure](https://github.com/coreos/etcd/issues/8515).
- [Put back `/v2/machines`](https://github.com/coreos/etcd/pull/8062) endpoint for python-etcd wrapper.
### Fixed(v3)
- Fix [range/put/delete operation metrics](https://github.com/coreos/etcd/pull/8054) with transaction:
- `etcd_debugging_mvcc_range_total`
- `etcd_debugging_mvcc_put_total`
- `etcd_debugging_mvcc_delete_total`
- `etcd_debugging_mvcc_txn_total`
- Fix [`etcd_debugging_mvcc_keys_total`](https://github.com/coreos/etcd/pull/8390) on restore.
- Fix [`etcd_debugging_mvcc_db_total_size_in_bytes`](https://github.com/coreos/etcd/pull/8120) on restore.
- Also change to [`prometheus.NewGaugeFunc`](https://github.com/coreos/etcd/pull/8150).
- Fix [backend database in-memory index corruption](https://github.com/coreos/etcd/pull/8127) issue on restore (only 3.2.0 is affected).
- Fix [watch restore from snapshot](https://github.com/coreos/etcd/pull/8427).
- Fix ["put at-most-once" in `clientv3`](https://github.com/coreos/etcd/pull/8335).
- Handle [empty key permission](https://github.com/coreos/etcd/pull/8514) in `etcdctl`.
- [Fix server crash](https://github.com/coreos/etcd/pull/8010) on [invalid transaction request from gRPC gateway](https://github.com/coreos/etcd/issues/7889).
- Fix [`clientv3.WatchResponse.Canceled`](https://github.com/coreos/etcd/pull/8283) on [compacted watch request](https://github.com/coreos/etcd/issues/8231).
- Handle [WAL renaming failure on Windows](https://github.com/coreos/etcd/pull/8286).
- Make [peer dial timeout longer](https://github.com/coreos/etcd/pull/8599).
- See [coreos/etcd-operator#1300](https://github.com/coreos/etcd-operator/issues/1300) for more detail.
- Make server [wait up to request time-out](https://github.com/coreos/etcd/pull/8267) with [pending RPCs](https://github.com/coreos/etcd/issues/8224).
- Fix [`grpc.Server` panic on `GracefulStop`](https://github.com/coreos/etcd/pull/8987) with [TLS-enabled server](https://github.com/coreos/etcd/issues/8916).
- Fix ["multiple peer URLs cannot start" issue](https://github.com/coreos/etcd/issues/8383).
- Fix server-side auth so [concurrent auth operations do not return old revision error](https://github.com/coreos/etcd/pull/8442).
- Fix [`concurrency/stm` `Put` with serializable snapshot](https://github.com/coreos/etcd/pull/8439).
- Use store revision from first fetch to resolve write conflicts instead of modified revision.
- Fix [`grpc-proxy` Snapshot API error handling](https://github.com/coreos/etcd/commit/dbd16d52fbf81e5fd806d21ff5e9148d5bf203ab).
- Fix [`grpc-proxy` KV API `PrevKv` flag handling](https://github.com/coreos/etcd/pull/8366).
- Fix [`grpc-proxy` KV API `KeysOnly` flag handling](https://github.com/coreos/etcd/pull/8552).
- Upgrade [`coreos/go-systemd`](https://github.com/coreos/go-systemd/releases) to `v15` (see https://github.com/coreos/go-systemd/releases/tag/v15).
### Other
- Support previous two minor versions (see our [new release policy](https://github.com/coreos/etcd/pull/8805)).
- `v3.3.x` is the last release cycle that supports `ACI`:
- AppC was [officially suspended](https://github.com/appc/spec#-disclaimer-), as of late 2016.
- [`acbuild`](https://github.com/containers/build#this-project-is-currently-unmaintained) is not maintained anymore.
- `*.aci` files won't be available from etcd `v3.4` release.
- Add container registry [`gcr.io/etcd-development/etcd`](https://gcr.io/etcd-development/etcd).
- [quay.io/coreos/etcd](https://quay.io/coreos/etcd) is still supported as secondary.
## [v3.2.12](https://github.com/coreos/etcd/releases/tag/v3.2.12) (2017-12-20)
See [code changes](https://github.com/coreos/etcd/compare/v3.2.11...v3.2.12) and [v3.2 upgrade guide](https://github.com/coreos/etcd/blob/master/Documentation/upgrades/upgrade_3_2.md) for any breaking changes.
### Fixed
- Fix [error message of `Revision` compactor](https://github.com/coreos/etcd/pull/8999) in server-side.
### Added(`etcd/clientv3`,`etcdctl/v3`)
- Add [`MaxCallSendMsgSize` and `MaxCallRecvMsgSize`](https://github.com/coreos/etcd/pull/9047) fields to [`clientv3.Config`](https://godoc.org/github.com/coreos/etcd/clientv3#Config).
- Fix [exceeded response size limit error in client-side](https://github.com/coreos/etcd/issues/9043).
- Address [kubernetes#51099](https://github.com/kubernetes/kubernetes/issues/51099).
- `MaxCallSendMsgSize` default value is 2 MiB, if not configured.
- `MaxCallRecvMsgSize` default value is `math.MaxInt32`, if not configured.
### Other
- Pin [grpc v1.7.5](https://github.com/grpc/grpc-go/releases/tag/v1.7.5), [grpc-gateway v1.3.0](https://github.com/grpc-ecosystem/grpc-gateway/releases/tag/v1.3.0).
- No code change, just to be explicit about recommended versions.
## [v3.2.11](https://github.com/coreos/etcd/releases/tag/v3.2.11) (2017-12-05)
See [code changes](https://github.com/coreos/etcd/compare/v3.2.10...v3.2.11) and [v3.2 upgrade guide](https://github.com/coreos/etcd/blob/master/Documentation/upgrades/upgrade_3_2.md) for any breaking changes.
### Fixed
- Fix racey grpc-go's server handler transport `WriteStatus` call to prevent [TLS-enabled etcd server crash](https://github.com/coreos/etcd/issues/8904):
- Upgrade [`google.golang.org/grpc`](https://github.com/grpc/grpc-go/releases) `v1.7.3` to `v1.7.4`.
- Add [gRPC RPC failure warnings](https://github.com/coreos/etcd/pull/8939) to help debug such issues in the future.
- Remove `--listen-metrics-urls` flag in monitoring document (non-released in `v3.2.x`, planned for `v3.3.x`).
### Added
- Provide [more cert details](https://github.com/coreos/etcd/pull/8952/files) on TLS handshake failures.
## [v3.1.11](https://github.com/coreos/etcd/releases/tag/v3.1.11) (2017-11-28)
See [code changes](https://github.com/coreos/etcd/compare/v3.1.10...v3.1.11) and [v3.2 upgrade guide](https://github.com/coreos/etcd/blob/master/Documentation/upgrades/upgrade_3_2.md) for any breaking changes.
### Fixed
- [#8411](https://github.com/coreos/etcd/issues/8411),[#8806](https://github.com/coreos/etcd/pull/8806) mvcc: fix watch restore from snapshot
- [#8009](https://github.com/coreos/etcd/issues/8009),[#8902](https://github.com/coreos/etcd/pull/8902) backport coreos/bbolt v1.3.1-coreos.5
## [v3.2.10](https://github.com/coreos/etcd/releases/tag/v3.2.10) (2017-11-16)
See [code changes](https://github.com/coreos/etcd/compare/v3.2.9...v3.2.10) and [v3.2 upgrade guide](https://github.com/coreos/etcd/blob/master/Documentation/upgrades/upgrade_3_2.md) for any breaking changes.
### Fixed
- Replace backend key-value database `boltdb/bolt` with [`coreos/bbolt`](https://github.com/coreos/bbolt/releases) to address [backend database size issue](https://github.com/coreos/etcd/issues/8009).
- Fix `clientv3` balancer to handle [network partitions](https://github.com/coreos/etcd/issues/8711):
- Upgrade [`google.golang.org/grpc`](https://github.com/grpc/grpc-go/releases) `v1.2.1` to `v1.7.3`.
- Upgrade [`github.com/grpc-ecosystem/grpc-gateway`](https://github.com/grpc-ecosystem/grpc-gateway/releases) `v1.2` to `v1.3`.
- Revert [discovery SRV auth `ServerName` with `*.{ROOT_DOMAIN}`](https://github.com/coreos/etcd/pull/8651) to support non-wildcard subject alternative names in the certs (see [issue #8445](https://github.com/coreos/etcd/issues/8445) for more contexts).
- For instance, `etcd --discovery-srv=etcd.local` will only authenticate peers/clients when the provided certs have root domain `etcd.local` (**not `*.etcd.local`**) as an entry in Subject Alternative Name (SAN) field.
## [v3.2.9](https://github.com/coreos/etcd/releases/tag/v3.2.9) (2017-10-06)
See [code changes](https://github.com/coreos/etcd/compare/v3.2.8...v3.2.9) and [v3.2 upgrade guide](https://github.com/coreos/etcd/blob/master/Documentation/upgrades/upgrade_3_2.md) for any breaking changes.
### Fixed(Security)
- Compile with [Go 1.8.4](https://groups.google.com/d/msg/golang-nuts/sHfMg4gZNps/a-HDgDDDAAAJ).
- Update `golang.org/x/crypto/bcrypt` (see [golang/crypto@6c586e1](https://github.com/golang/crypto/commit/6c586e17d90a7d08bbbc4069984180dce3b04117)).
- Fix discovery SRV bootstrapping to [authenticate `ServerName` with `*.{ROOT_DOMAIN}`](https://github.com/coreos/etcd/pull/8651), in order to support sub-domain wildcard matching (see [issue #8445](https://github.com/coreos/etcd/issues/8445) for more contexts).
- For instance, `etcd --discovery-srv=etcd.local` will only authenticate peers/clients when the provided certs have root domain `*.etcd.local` as an entry in Subject Alternative Name (SAN) field.
## [v3.2.8](https://github.com/coreos/etcd/releases/tag/v3.2.8) (2017-09-29)
See [code changes](https://github.com/coreos/etcd/compare/v3.2.7...v3.2.8) and [v3.2 upgrade guide](https://github.com/coreos/etcd/blob/master/Documentation/upgrades/upgrade_3_2.md) for any breaking changes.
### Fixed
- Fix v2 client failover to next endpoint on mutable operation.
- Fix grpc-proxy to respect `KeysOnly` flag.
## [v3.2.7](https://github.com/coreos/etcd/releases/tag/v3.2.7) (2017-09-01)
See [code changes](https://github.com/coreos/etcd/compare/v3.2.6...v3.2.7) and [v3.2 upgrade guide](https://github.com/coreos/etcd/blob/master/Documentation/upgrades/upgrade_3_2.md) for any breaking changes.
### Fixed
- Fix server-side auth so concurrent auth operations do not return old revision error.
- Fix concurrency/stm Put with serializable snapshot
- Use store revision from first fetch to resolve write conflicts instead of modified revision.
## [v3.2.6](https://github.com/coreos/etcd/releases/tag/v3.2.6) (2017-08-21)
See [code changes](https://github.com/coreos/etcd/compare/v3.2.5...v3.2.6).
### Fixed
- Fix watch restore from snapshot.
- Fix `etcd_debugging_mvcc_keys_total` inconsistency.
- Fix multiple URLs for `--listen-peer-urls` flag.
- Add `--enable-pprof` flag to etcd configuration file format.
## [v3.2.5](https://github.com/coreos/etcd/releases/tag/v3.2.5) (2017-08-04)
See [code changes](https://github.com/coreos/etcd/compare/v3.2.4...v3.2.5) and [v3.2 upgrade guide](https://github.com/coreos/etcd/blob/master/Documentation/upgrades/upgrade_3_2.md) for any breaking changes.
### Changed
- Use reverse lookup to match wildcard DNS SAN.
- Return non-zero exit code on unhealthy `endpoint health`.
### Fixed
- Fix unreachable /metrics endpoint when `--enable-v2=false`.
- Fix grpc-proxy to respect `PrevKv` flag.
### Added
- Add container registry `gcr.io/etcd-development/etcd`.
## [v3.2.4](https://github.com/coreos/etcd/releases/tag/v3.2.4) (2017-07-19)
See [code changes](https://github.com/coreos/etcd/compare/v3.2.3...v3.2.4) and [v3.2 upgrade guide](https://github.com/coreos/etcd/blob/master/Documentation/upgrades/upgrade_3_2.md) for any breaking changes.
### Fixed
- Do not block on active client stream when stopping server
- Fix gRPC proxy Snapshot RPC error handling
## [v3.2.3](https://github.com/coreos/etcd/releases/tag/v3.2.3) (2017-07-14)
See [code changes](https://github.com/coreos/etcd/compare/v3.2.2...v3.2.3) and [v3.2 upgrade guide](https://github.com/coreos/etcd/blob/master/Documentation/upgrades/upgrade_3_2.md) for any breaking changes.
### Fixed
- Let clients establish unlimited streams
### Added
- Tag docker images with minor versions
- e.g. `docker pull quay.io/coreos/etcd:v3.2` to fetch latest v3.2 versions
## [v3.1.10](https://github.com/coreos/etcd/releases/tag/v3.1.10) (2017-07-14)
See [code changes](https://github.com/coreos/etcd/compare/v3.1.9...v3.1.10) and [v3.1 upgrade guide](https://github.com/coreos/etcd/blob/master/Documentation/upgrades/upgrade_3_1.md) for any breaking changes.
### Changed
- Compile with Go 1.8.3 to fix panic on `net/http.CloseNotify`
### Added
- Tag docker images with minor versions.
- e.g. `docker pull quay.io/coreos/etcd:v3.1` to fetch latest v3.1 versions.
## [v3.2.2](https://github.com/coreos/etcd/releases/tag/v3.2.2) (2017-07-07)
See [code changes](https://github.com/coreos/etcd/compare/v3.2.1...v3.2.2) and [v3.2 upgrade guide](https://github.com/coreos/etcd/blob/master/Documentation/upgrades/upgrade_3_2.md) for any breaking changes.
### Improved
- Rate-limit lease revoke on expiration.
- Extend leases on promote to avoid queueing effect on lease expiration.
### Fixed
- Use user-provided listen address to connect to gRPC gateway:
- `net.Listener` rewrites IPv4 0.0.0.0 to IPv6 [::], breaking IPv6 disabled hosts.
- Only v3.2.0, v3.2.1 are affected.
- Accept connection with matched IP SAN but no DNS match.
- Don't check DNS entries in certs if there's a matching IP.
- Fix 'tools/benchmark' watch command.
## [v3.2.1](https://github.com/coreos/etcd/releases/tag/v3.2.1) (2017-06-23)
See [code changes](https://github.com/coreos/etcd/compare/v3.2.0...v3.2.1) and [v3.2 upgrade guide](https://github.com/coreos/etcd/blob/master/Documentation/upgrades/upgrade_3_2.md) for any breaking changes.
### Fixed
- Fix backend database in-memory index corruption issue on restore (only 3.2.0 is affected).
- Fix gRPC gateway Txn marshaling issue.
- Fix backend database size debugging metrics.
## [v3.2.0](https://github.com/coreos/etcd/releases/tag/v3.2.0) (2017-06-09)
See [code changes](https://github.com/coreos/etcd/compare/v3.1.0...v3.2.0) and [v3.2 upgrade guide](https://github.com/coreos/etcd/blob/master/Documentation/upgrades/upgrade_3_2.md) for any breaking changes.
### Improved
- Improve backend read concurrency.
### Added
- Embedded etcd
- `Etcd.Peers` field is now `[]*peerListener`.
- RPCs
- Add Election, Lock service.
- Native client etcdserver/api/v3client
- client "embedded" in the server.
- gRPC proxy
- Proxy endpoint discovery.
- Namespaces.
- Coalesce lease requests.
- v3 client
- STM prefetching.
- Add namespace feature.
- Add `ErrOldCluster` with server version checking.
- Translate `WithPrefix()` into `WithFromKey()` for empty key.
- v3 etcdctl
- Add `check perf` command.
- Add `--from-key` flag to role grant-permission command.
- `lock` command takes an optional command to execute.
- etcd flags
- Add `--enable-v2` flag to configure v2 backend (enabled by default).
- Add `--auth-token` flag.
- `etcd gateway`
- Support DNS SRV priority.
- Auth
- Support Watch API.
- JWT tokens.
- Logging, monitoring
- Server warns large snapshot operations.
- Add `etcd_debugging_server_lease_expired_total` metrics.
- Security
- Deny incoming peer certs with wrong IP SAN.
- Resolve TLS `DNSNames` when SAN checking.
- Reload TLS certificates on every client connection.
- Release
- Annotate acbuild with supports-systemd-notify.
- Add `nsswitch.conf` to Docker container image.
- Add ppc64le, arm64(experimental) builds.
- Compile with `Go 1.8.3`.
### Changed
- v3 client
- `LeaseTimeToLive` returns TTL=-1 resp on lease not found.
- `clientv3.NewFromConfigFile` is moved to `clientv3/yaml.NewConfig`.
- concurrency package's elections updated to match RPC interfaces.
- let client dial endpoints not in the balancer.
- Dependencies
- Update [`google.golang.org/grpc`](https://github.com/grpc/grpc-go/releases) to `v1.2.1`.
- Update [`github.com/grpc-ecosystem/grpc-gateway`](https://github.com/grpc-ecosystem/grpc-gateway/releases) to `v1.2.0`.
### Fixed
- Allow v2 snapshot over 512MB.
## [v3.1.9](https://github.com/coreos/etcd/releases/tag/v3.1.9) (2017-06-09)
See [code changes](https://github.com/coreos/etcd/compare/v3.1.8...v3.1.9) and [v3.1 upgrade guide](https://github.com/coreos/etcd/blob/master/Documentation/upgrades/upgrade_3_1.md) for any breaking changes.
### Fixed
- Allow v2 snapshot over 512MB.
## [v3.1.8](https://github.com/coreos/etcd/releases/tag/v3.1.8) (2017-05-19)
See [code changes](https://github.com/coreos/etcd/compare/v3.1.7...v3.1.8) and [v3.1 upgrade guide](https://github.com/coreos/etcd/blob/master/Documentation/upgrades/upgrade_3_1.md) for any breaking changes.
## [v3.1.7](https://github.com/coreos/etcd/releases/tag/v3.1.7) (2017-04-28)
See [code changes](https://github.com/coreos/etcd/compare/v3.1.6...v3.1.7) and [v3.1 upgrade guide](https://github.com/coreos/etcd/blob/master/Documentation/upgrades/upgrade_3_1.md) for any breaking changes.
## [v3.1.6](https://github.com/coreos/etcd/releases/tag/v3.1.6) (2017-04-19)
See [code changes](https://github.com/coreos/etcd/compare/v3.1.5...v3.1.6) and [v3.1 upgrade guide](https://github.com/coreos/etcd/blob/master/Documentation/upgrades/upgrade_3_1.md) for any breaking changes.
### Changed
- Remove auth check in Status API.
### Fixed
- Fill in Auth API response header.
## [v3.1.5](https://github.com/coreos/etcd/releases/tag/v3.1.5) (2017-03-27)
See [code changes](https://github.com/coreos/etcd/compare/v3.1.4...v3.1.5) and [v3.1 upgrade guide](https://github.com/coreos/etcd/blob/master/Documentation/upgrades/upgrade_3_1.md) for any breaking changes.
### Added
- Add `/etc/nsswitch.conf` file to alpine-based Docker image.
### Fixed
- Fix raft memory leak issue.
- Fix Windows file path issues.
## [v3.1.4](https://github.com/coreos/etcd/releases/tag/v3.1.4) (2017-03-22)
See [code changes](https://github.com/coreos/etcd/compare/v3.1.3...v3.1.4) and [v3.1 upgrade guide](https://github.com/coreos/etcd/blob/master/Documentation/upgrades/upgrade_3_1.md) for any breaking changes.
## [v3.1.3](https://github.com/coreos/etcd/releases/tag/v3.1.3) (2017-03-10)
See [code changes](https://github.com/coreos/etcd/compare/v3.1.2...v3.1.3) and [v3.1 upgrade guide](https://github.com/coreos/etcd/blob/master/Documentation/upgrades/upgrade_3_1.md) for any breaking changes.
### Changed
- Use machine default host when advertise URLs are default values(`localhost:2379,2380`) AND if listen URL is `0.0.0.0`.
### Fixed
- Fix `etcd gateway` schema handling in DNS discovery.
- Fix sd_notify behaviors in `gateway`, `grpc-proxy`.
## [v3.1.2](https://github.com/coreos/etcd/releases/tag/v3.1.2) (2017-02-24)
See [code changes](https://github.com/coreos/etcd/compare/v3.1.1...v3.1.2) and [v3.1 upgrade guide](https://github.com/coreos/etcd/blob/master/Documentation/upgrades/upgrade_3_1.md) for any breaking changes.
### Changed
- Use IPv4 default host, by default (when IPv4 and IPv6 are available).
### Fixed
- Fix `etcd gateway` with multiple endpoints.
## [v3.1.1](https://github.com/coreos/etcd/releases/tag/v3.1.1) (2017-02-17)
See [code changes](https://github.com/coreos/etcd/compare/v3.1.0...v3.1.1) and [v3.1 upgrade guide](https://github.com/coreos/etcd/blob/master/Documentation/upgrades/upgrade_3_1.md) for any breaking changes.
### Changed
- Compile with `Go 1.7.5`.
## [v2.3.8](https://github.com/coreos/etcd/releases/tag/v2.3.8) (2017-02-17)
See [code changes](https://github.com/coreos/etcd/compare/v2.3.7...v2.3.8).
### Changed
- Compile with `Go 1.7.5`.
## [v3.1.0](https://github.com/coreos/etcd/releases/tag/v3.1.0) (2017-01-20)
See [code changes](https://github.com/coreos/etcd/compare/v3.0.0...v3.1.0) and [v3.1 upgrade guide](https://github.com/coreos/etcd/blob/master/Documentation/upgrades/upgrade_3_1.md) for any breaking changes.
### Improved
- Faster linearizable reads (implements Raft read-index).
- v3 authentication API is now stable.
### Added
- Automatic leadership transfer when leader steps down.
- etcd flags
- `--strict-reconfig-check` flag is set by default.
- Add `--log-output` flag.
- Add `--metrics` flag.
- v3 client
- Add `SetEndpoints` method; update endpoints at runtime.
- Add `Sync` method; auto-update endpoints at runtime.
- Add `Lease TimeToLive` API; fetch lease information.
- replace Config.Logger field with global logger.
- Get API responses are sorted in ascending order by default.
- v3 etcdctl
- Add `lease timetolive` command.
- Add `--print-value-only` flag to get command.
- Add `--dest-prefix` flag to make-mirror command.
- `get` command responses are sorted in ascending order by default.
- `recipes` now conform to sessions defined in `clientv3/concurrency`.
- ACI has symlinks to `/usr/local/bin/etcd*`.
- Experimental gRPC proxy feature.
### Changed
- Deprecated following gRPC metrics in favor of [go-grpc-prometheus](https://github.com/grpc-ecosystem/go-grpc-prometheus):
- `etcd_grpc_requests_total`
- `etcd_grpc_requests_failed_total`
- `etcd_grpc_active_streams`
- `etcd_grpc_unary_requests_duration_seconds`
- etcd uses default route IP if advertise URL is not given.
- Cluster rejects removing members if quorum will be lost.
- SRV records (e.g., infra1.example.com) must match the discovery domain (i.e., example.com) if no custom certificate authority is given.
- `TLSConfig.ServerName` is ignored with user-provided certificates for backwards compatibility; to be deprecated.
- For example, `etcd --discovery-srv=example.com` will only authenticate peers/clients when the provided certs have root domain `example.com` as an entry in Subject Alternative Name (SAN) field.
- Discovery now has upper limit for waiting on retries.
- Warn on binding listeners through domain names; to be deprecated.
## [v3.0.16](https://github.com/coreos/etcd/releases/tag/v3.0.16) (2016-11-13)
See [code changes](https://github.com/coreos/etcd/compare/v3.0.15...v3.0.16) and [v3.0 upgrade guide](https://github.com/coreos/etcd/blob/master/Documentation/upgrades/upgrade_3_0.md) for any breaking changes.
## [v3.0.15](https://github.com/coreos/etcd/releases/tag/v3.0.15) (2016-11-11)
See [code changes](https://github.com/coreos/etcd/compare/v3.0.14...v3.0.15) and [v3.0 upgrade guide](https://github.com/coreos/etcd/blob/master/Documentation/upgrades/upgrade_3_0.md) for any breaking changes.
### Fixed
- Fix cancel watch request with wrong range end.
## [v3.0.14](https://github.com/coreos/etcd/releases/tag/v3.0.14) (2016-11-04)
See [code changes](https://github.com/coreos/etcd/compare/v3.0.13...v3.0.14) and [v3.0 upgrade guide](https://github.com/coreos/etcd/blob/master/Documentation/upgrades/upgrade_3_0.md) for any breaking changes.
### Added
- v3 `etcdctl migrate` command now supports `--no-ttl` flag to discard keys on transform.
## [v3.0.13](https://github.com/coreos/etcd/releases/tag/v3.0.13) (2016-10-24)
See [code changes](https://github.com/coreos/etcd/compare/v3.0.12...v3.0.13) and [v3.0 upgrade guide](https://github.com/coreos/etcd/blob/master/Documentation/upgrades/upgrade_3_0.md) for any breaking changes.
## [v3.0.12](https://github.com/coreos/etcd/releases/tag/v3.0.12) (2016-10-07)
See [code changes](https://github.com/coreos/etcd/compare/v3.0.11...v3.0.12) and [v3.0 upgrade guide](https://github.com/coreos/etcd/blob/master/Documentation/upgrades/upgrade_3_0.md) for any breaking changes.
## [v3.0.11](https://github.com/coreos/etcd/releases/tag/v3.0.11) (2016-10-07)
See [code changes](https://github.com/coreos/etcd/compare/v3.0.10...v3.0.11) and [v3.0 upgrade guide](https://github.com/coreos/etcd/blob/master/Documentation/upgrades/upgrade_3_0.md) for any breaking changes.
### Added
- Server returns previous key-value (optional)
- `clientv3.WithPrevKV` option
- v3 etcdctl `put,watch,del --prev-kv` flag
## [v3.0.10](https://github.com/coreos/etcd/releases/tag/v3.0.10) (2016-09-23)
See [code changes](https://github.com/coreos/etcd/compare/v3.0.9...v3.0.10) and [v3.0 upgrade guide](https://github.com/coreos/etcd/blob/master/Documentation/upgrades/upgrade_3_0.md) for any breaking changes.
## [v3.0.9](https://github.com/coreos/etcd/releases/tag/v3.0.9) (2016-09-15)
See [code changes](https://github.com/coreos/etcd/compare/v3.0.8...v3.0.9) and [v3.0 upgrade guide](https://github.com/coreos/etcd/blob/master/Documentation/upgrades/upgrade_3_0.md) for any breaking changes.
### Added
- Warn on domain names on listen URLs (v3.2 will reject domain names).
## [v3.0.8](https://github.com/coreos/etcd/releases/tag/v3.0.8) (2016-09-09)
See [code changes](https://github.com/coreos/etcd/compare/v3.0.7...v3.0.8) and [v3.0 upgrade guide](https://github.com/coreos/etcd/blob/master/Documentation/upgrades/upgrade_3_0.md) for any breaking changes.
### Changed
- Allow only IP addresses in listen URLs (domain names are rejected).
## [v3.0.7](https://github.com/coreos/etcd/releases/tag/v3.0.7) (2016-08-31)
See [code changes](https://github.com/coreos/etcd/compare/v3.0.6...v3.0.7) and [v3.0 upgrade guide](https://github.com/coreos/etcd/blob/master/Documentation/upgrades/upgrade_3_0.md) for any breaking changes.
### Changed
- SRV records only allow A records (RFC 2052).
## [v3.0.6](https://github.com/coreos/etcd/releases/tag/v3.0.6) (2016-08-19)
See [code changes](https://github.com/coreos/etcd/compare/v3.0.5...v3.0.6) and [v3.0 upgrade guide](https://github.com/coreos/etcd/blob/master/Documentation/upgrades/upgrade_3_0.md) for any breaking changes.
## [v3.0.5](https://github.com/coreos/etcd/releases/tag/v3.0.5) (2016-08-19)
See [code changes](https://github.com/coreos/etcd/compare/v3.0.4...v3.0.5) and [v3.0 upgrade guide](https://github.com/coreos/etcd/blob/master/Documentation/upgrades/upgrade_3_0.md) for any breaking changes.
### Changed
- SRV records (e.g., infra1.example.com) must match the discovery domain (i.e., example.com) if no custom certificate authority is given.
## [v3.0.4](https://github.com/coreos/etcd/releases/tag/v3.0.4) (2016-07-27)
See [code changes](https://github.com/coreos/etcd/compare/v3.0.3...v3.0.4) and [v3.0 upgrade guide](https://github.com/coreos/etcd/blob/master/Documentation/upgrades/upgrade_3_0.md) for any breaking changes.
### Changed
- v2 auth can now use common name from TLS certificate when `--client-cert-auth` is enabled.
### Added
- v2 `etcdctl ls` command now supports `--output=json`.
- Add /var/lib/etcd directory to etcd official Docker image.
## [v3.0.3](https://github.com/coreos/etcd/releases/tag/v3.0.3) (2016-07-15)
See [code changes](https://github.com/coreos/etcd/compare/v3.0.2...v3.0.3) and [v3.0 upgrade guide](https://github.com/coreos/etcd/blob/master/Documentation/upgrades/upgrade_3_0.md) for any breaking changes.
### Changed
- Revert Dockerfile to use `CMD`, instead of `ENTRYPOINT`, to support `etcdctl` run.
- Docker commands for v3.0.2 won't work without specifying executable binary paths.
- v3 etcdctl default endpoints are now `127.0.0.1:2379`.
## [v3.0.2](https://github.com/coreos/etcd/releases/tag/v3.0.2) (2016-07-08)
See [code changes](https://github.com/coreos/etcd/compare/v3.0.1...v3.0.2) and [v3.0 upgrade guide](https://github.com/coreos/etcd/blob/master/Documentation/upgrades/upgrade_3_0.md) for any breaking changes.
### Changed
- Dockerfile uses `ENTRYPOINT`, instead of `CMD`, to run etcd without binary path specified.
## [v3.0.1](https://github.com/coreos/etcd/releases/tag/v3.0.1) (2016-07-01)
See [code changes](https://github.com/coreos/etcd/compare/v3.0.0...v3.0.1) and [v3.0 upgrade guide](https://github.com/coreos/etcd/blob/master/Documentation/upgrades/upgrade_3_0.md) for any breaking changes.
## [v3.0.0](https://github.com/coreos/etcd/releases/tag/v3.0.0) (2016-06-30)
See [code changes](https://github.com/coreos/etcd/compare/v2.3.0...v3.0.0) and [v3.0 upgrade guide](https://github.com/coreos/etcd/blob/master/Documentation/upgrades/upgrade_3_0.md) for any breaking changes.

View File

@@ -328,6 +328,11 @@ The security flags help to [build a secure etcd cluster][security].
+ default: ""
+ env variable: ETCD_CIPHER_SUITES
### --experimental-peer-skip-client-san-verification
+ Skip verification of SAN field in client certificate for peer connections.
+ default: false
+ env variable: ETCD_EXPERIMENTAL_PEER_SKIP_CLIENT_SAN_VERIFICATION
## Logging flags
### --logger

View File

@@ -369,6 +369,52 @@ After
docker pull gcr.io/etcd-development/etcd:v3.3.0
```
### Upgrades to >= v3.3.14
[v3.3.14](https://github.com/etcd-io/etcd/releases/tag/v3.3.14) had to include some features from 3.4, while trying to minimize the difference between client balancer implementation. This release fixes ["kube-apiserver 1.13.x refuses to work when first etcd-server is not available" (kubernetes#72102)](https://github.com/kubernetes/kubernetes/issues/72102).
`grpc.ErrClientConnClosing` has been [deprecated in gRPC >= 1.10](https://github.com/grpc/grpc-go/pull/1854).
```diff
import (
+ "go.etcd.io/etcd/clientv3"
"google.golang.org/grpc"
+ "google.golang.org/grpc/codes"
+ "google.golang.org/grpc/status"
)
_, err := kvc.Get(ctx, "a")
-if err == grpc.ErrClientConnClosing {
+if clientv3.IsConnCanceled(err) {
// or
+s, ok := status.FromError(err)
+if ok {
+ if s.Code() == codes.Canceled
```
[The new client balancer](https://github.com/etcd-io/etcd/blob/master/Documentation/learning/design-client.md) uses an asynchronous resolver to pass endpoints to the gRPC dial function. As a result, [v3.3.14](https://github.com/etcd-io/etcd/releases/tag/v3.3.14) or later requires `grpc.WithBlock` dial option to wait until the underlying connection is up.
```diff
import (
"time"
"go.etcd.io/etcd/clientv3"
+ "google.golang.org/grpc"
)
+// "grpc.WithBlock()" to block until the underlying connection is up
ccfg := clientv3.Config{
Endpoints: []string{"localhost:2379"},
DialTimeout: time.Second,
+ DialOptions: []grpc.DialOption{grpc.WithBlock()},
DialKeepAliveTime: time.Second,
DialKeepAliveTimeout: 500 * time.Millisecond,
}
```
Please see [CHANGELOG](https://github.com/etcd-io/etcd/blob/master/CHANGELOG-3.3.md) for a full list of changes.
### Server upgrade checklists
#### Upgrade requirements

View File

@@ -1,462 +0,0 @@
---
title: Upgrade etcd from 3.3 to 3.4
---
In the general case, upgrading from etcd 3.3 to 3.4 can be a zero-downtime, rolling upgrade:
- one by one, stop the etcd v3.3 processes and replace them with etcd v3.4 processes
- after running all v3.4 processes, new features in v3.4 are available to the cluster
Before [starting an upgrade](#upgrade-procedure), read through the rest of this guide to prepare.
### Upgrade checklists
**NOTE:** When [migrating from v2 with no v3 data](https://github.com/etcd-io/etcd/issues/9480), etcd server v3.2+ panics when etcd restores from existing snapshots but no v3 `ETCD_DATA_DIR/member/snap/db` file. This happens when the server had migrated from v2 with no previous v3 data. This also prevents accidental v3 data loss (e.g. `db` file might have been moved). etcd requires that post v3 migration can only happen with v3 data. Do not upgrade to newer v3 versions until v3.0 server contains v3 data.
Highlighted breaking changes in 3.4.
#### Make `ETCDCTL_API=3 etcdctl` default
`ETCDCTL_API=3` is now the default.
```diff
etcdctl set foo bar
Error: unknown command "set" for "etcdctl"
-etcdctl set foo bar
+ETCDCTL_API=2 etcdctl set foo bar
bar
ETCDCTL_API=3 etcdctl put foo bar
OK
-ETCDCTL_API=3 etcdctl put foo bar
+etcdctl put foo bar
```
#### Deprecated `etcd --ca-file` and `etcd --peer-ca-file` flags
`--ca-file` and `--peer-ca-file` flags are deprecated; they have been deprecated since v2.1.
```diff
-etcd --ca-file ca-client.crt
+etcd --trusted-ca-file ca-client.crt
```
```diff
-etcd --peer-ca-file ca-peer.crt
+etcd --peer-trusted-ca-file ca-peer.crt
```
#### Promote `etcd_debugging_mvcc_db_total_size_in_bytes` Prometheus metrics
v3.4 promotes `etcd_debugging_mvcc_db_total_size_in_bytes` Prometheus metrics to `etcd_mvcc_db_total_size_in_bytes`, in order to encourage etcd storage monitoring.
`etcd_debugging_mvcc_db_total_size_in_bytes` is still served in v3.4 for backward compatibilities. It will be completely deprecated in v3.5.
```diff
-etcd_debugging_mvcc_db_total_size_in_bytes
+etcd_mvcc_db_total_size_in_bytes
```
Note that `etcd_debugging_*` namespace metrics have been marked as experimental. As we improve monitoring guide, we will promote more metrics.
#### Deprecating `etcd --log-output` flag (now `--log-outputs`)
Rename [`etcd --log-output` to `--log-outputs`](https://github.com/etcd-io/etcd/pull/9624) to support multiple log outputs. **`etcd --logger=capnslog` does not support multiple log outputs.**
**`etcd --log-output`** will be deprecated in v3.5. **`etcd --logger=capnslog` will be deprecated in v3.5**.
```diff
-etcd --log-output=stderr
+etcd --log-outputs=stderr
+# to write logs to stderr and a.log file at the same time
+# only "--logger=zap" supports multiple writers
+etcd --logger=zap --log-outputs=stderr,a.log
```
v3.4 adds `etcd --logger=zap --log-outputs=stderr` support for structured logging and multiple log outputs. Main motivation is to promote automated etcd monitoring, rather than looking back server logs when it starts breaking. Future development will make etcd log as few as possible, and make etcd easier to monitor with metrics and alerts. **`etcd --logger=capnslog` will be deprecated in v3.5**.
#### Changed `log-outputs` field type in `etcd --config-file` to `[]string`
Now that `log-outputs` (old field name `log-output`) accepts multiple writers, etcd configuration YAML file `log-outputs` field must be changed to `[]string` type as below:
```diff
# Specify 'stdout' or 'stderr' to skip journald logging even when running under systemd.
-log-output: default
+log-outputs: [default]
```
#### Renamed `embed.Config.LogOutput` to `embed.Config.LogOutputs`
Renamed [**`embed.Config.LogOutput`** to **`embed.Config.LogOutputs`**](https://github.com/etcd-io/etcd/pull/9624) to support multiple log outputs. And changed [`embed.Config.LogOutput` type from `string` to `[]string`](https://github.com/etcd-io/etcd/pull/9579) to support multiple log outputs.
```diff
import "github.com/coreos/etcd/embed"
cfg := &embed.Config{Debug: false}
-cfg.LogOutput = "stderr"
+cfg.LogOutputs = []string{"stderr"}
```
#### v3.5 deprecates `capnslog`
**v3.5 will deprecate `etcd --log-package-levels` flag for `capnslog`**; `etcd --logger=zap --log-outputs=stderr` will the default. **v3.5 will deprecate `[CLIENT-URL]/config/local/log` endpoint.**
#### Deprecated `pkg/transport.TLSInfo.CAFile` field
Deprecated `pkg/transport.TLSInfo.CAFile` field.
```diff
import "github.com/coreos/etcd/pkg/transport"
tlsInfo := transport.TLSInfo{
CertFile: "/tmp/test-certs/test.pem",
KeyFile: "/tmp/test-certs/test-key.pem",
- CAFile: "/tmp/test-certs/trusted-ca.pem",
+ TrustedCAFile: "/tmp/test-certs/trusted-ca.pem",
}
tlsConfig, err := tlsInfo.ClientConfig()
if err != nil {
panic(err)
}
```
#### Changed `embed.Config.SnapCount` to `embed.Config.SnapshotCount`
To be consistent with the flag name `etcd --snapshot-count`, `embed.Config.SnapCount` field has been renamed to `embed.Config.SnapshotCount`:
```diff
import "github.com/coreos/etcd/embed"
cfg := embed.NewConfig()
-cfg.SnapCount = 100000
+cfg.SnapshotCount = 100000
```
#### Changed `etcdserver.ServerConfig.SnapCount` to `etcdserver.ServerConfig.SnapshotCount`
To be consistent with the flag name `etcd --snapshot-count`, `etcdserver.ServerConfig.SnapCount` field has been renamed to `etcdserver.ServerConfig.SnapshotCount`:
```diff
import "github.com/coreos/etcd/etcdserver"
srvcfg := etcdserver.ServerConfig{
- SnapCount: 100000,
+ SnapshotCount: 100000,
```
#### Changed function signature in package `wal`
Changed `wal` function signatures to support structured logger.
```diff
import "github.com/coreos/etcd/wal"
+import "go.uber.org/zap"
+lg, _ = zap.NewProduction()
-wal.Open(dirpath, snap)
+wal.Open(lg, dirpath, snap)
-wal.OpenForRead(dirpath, snap)
+wal.OpenForRead(lg, dirpath, snap)
-wal.Repair(dirpath)
+wal.Repair(lg, dirpath)
-wal.Create(dirpath, metadata)
+wal.Create(lg, dirpath, metadata)
```
#### Deprecated `embed.Config.SetupLogging`
`embed.Config.SetupLogging` has been removed in order to prevent wrong logging configuration, and now set up automatically.
```diff
import "github.com/coreos/etcd/embed"
cfg := &embed.Config{Debug: false}
-cfg.SetupLogging()
```
#### Changed gRPC gateway HTTP endpoints (replaced `/v3beta` with `/v3`)
Before
```bash
curl -L http://localhost:2379/v3beta/kv/put \
-X POST -d '{"key": "Zm9v", "value": "YmFy"}'
```
After
```bash
curl -L http://localhost:2379/v3/kv/put \
-X POST -d '{"key": "Zm9v", "value": "YmFy"}'
```
Requests to `/v3beta` endpoints will redirect to `/v3`, and `/v3beta` will be removed in 3.5 release.
#### Deprecated container image tags
`latest` and minor version images tags are deprecated:
```diff
-docker pull gcr.io/etcd-development/etcd:latest
+docker pull gcr.io/etcd-development/etcd:v3.4.0
-docker pull gcr.io/etcd-development/etcd:v3.4
+docker pull gcr.io/etcd-development/etcd:v3.4.0
-docker pull gcr.io/etcd-development/etcd:v3.4
+docker pull gcr.io/etcd-development/etcd:v3.4.1
-docker pull gcr.io/etcd-development/etcd:v3.4
+docker pull gcr.io/etcd-development/etcd:v3.4.2
```
### Server upgrade checklists
#### Upgrade requirements
To upgrade an existing etcd deployment to 3.4, the running cluster must be 3.3 or greater. If it's before 3.3, please [upgrade to 3.3](upgrade_3_3.md) before upgrading to 3.4.
Also, to ensure a smooth rolling upgrade, the running cluster must be healthy. Check the health of the cluster by using the `etcdctl endpoint health` command before proceeding.
#### Preparation
Before upgrading etcd, always test the services relying on etcd in a staging environment before deploying the upgrade to the production environment.
Before beginning, [download the snapshot backup](../op-guide/maintenance.md#snapshot-backup). Should something go wrong with the upgrade, it is possible to use this backup to [downgrade](#downgrade) back to existing etcd version. Please note that the `snapshot` command only backs up the v3 data. For v2 data, see [backing up v2 datastore](../v2/admin_guide.md#backing-up-the-datastore).
#### Mixed versions
While upgrading, an etcd cluster supports mixed versions of etcd members, and operates with the protocol of the lowest common version. The cluster is only considered upgraded once all of its members are upgraded to version 3.4. Internally, etcd members negotiate with each other to determine the overall cluster version, which controls the reported version and the supported features.
#### Limitations
Note: If the cluster only has v3 data and no v2 data, it is not subject to this limitation.
If the cluster is serving a v2 data set larger than 50MB, each newly upgraded member may take up to two minutes to catch up with the existing cluster. Check the size of a recent snapshot to estimate the total data size. In other words, it is safest to wait for 2 minutes between upgrading each member.
For a much larger total data size, 100MB or more , this one-time process might take even more time. Administrators of very large etcd clusters of this magnitude can feel free to contact the [etcd team][etcd-contact] before upgrading, and we'll be happy to provide advice on the procedure.
#### Downgrade
If all members have been upgraded to v3.4, the cluster will be upgraded to v3.4, and downgrade from this completed state is **not possible**. If any single member is still v3.3, however, the cluster and its operations remains "v3.3", and it is possible from this mixed cluster state to return to using a v3.3 etcd binary on all members.
Please [download the snapshot backup](../op-guide/maintenance.md#snapshot-backup) to make downgrading the cluster possible even after it has been completely upgraded.
### Upgrade procedure
This example shows how to upgrade a 3-member v3.3 ectd cluster running on a local machine.
#### Step 1: check upgrade requirements
Is the cluster healthy and running v3.3.x?
```bash
etcdctl --endpoints=localhost:2379,localhost:22379,localhost:32379 endpoint health
<<COMMENT
localhost:2379 is healthy: successfully committed proposal: took = 2.118638ms
localhost:22379 is healthy: successfully committed proposal: took = 3.631388ms
localhost:32379 is healthy: successfully committed proposal: took = 2.157051ms
COMMENT
curl http://localhost:2379/version
<<COMMENT
{"etcdserver":"3.3.5","etcdcluster":"3.3.0"}
COMMENT
curl http://localhost:22379/version
<<COMMENT
{"etcdserver":"3.3.5","etcdcluster":"3.3.0"}
COMMENT
curl http://localhost:32379/version
<<COMMENT
{"etcdserver":"3.3.5","etcdcluster":"3.3.0"}
COMMENT
```
#### Step 2: download snapshot backup from leader
[Download the snapshot backup](../op-guide/maintenance.md#snapshot-backup) to provide a downgrade path should any problems occur.
etcd leader is guaranteed to have the latest application data, thus fetch snapshot from leader:
```bash
curl -sL http://localhost:2379/metrics | grep etcd_server_is_leader
<<COMMENT
# HELP etcd_server_is_leader Whether or not this member is a leader. 1 if is, 0 otherwise.
# TYPE etcd_server_is_leader gauge
etcd_server_is_leader 1
COMMENT
curl -sL http://localhost:22379/metrics | grep etcd_server_is_leader
<<COMMENT
etcd_server_is_leader 0
COMMENT
curl -sL http://localhost:32379/metrics | grep etcd_server_is_leader
<<COMMENT
etcd_server_is_leader 0
COMMENT
etcdctl --endpoints=localhost:2379 snapshot save backup.db
<<COMMENT
{"level":"info","ts":1526585787.148433,"caller":"snapshot/v3_snapshot.go:109","msg":"created temporary db file","path":"backup.db.part"}
{"level":"info","ts":1526585787.1485257,"caller":"snapshot/v3_snapshot.go:120","msg":"fetching snapshot","endpoint":"localhost:2379"}
{"level":"info","ts":1526585787.1519694,"caller":"snapshot/v3_snapshot.go:133","msg":"fetched snapshot","endpoint":"localhost:2379","took":0.003502721}
{"level":"info","ts":1526585787.1520295,"caller":"snapshot/v3_snapshot.go:142","msg":"saved","path":"backup.db"}
Snapshot saved at backup.db
COMMENT
```
#### Step 3: stop one existing etcd server
When each etcd process is stopped, expected errors will be logged by other cluster members. This is normal since a cluster member connection has been (temporarily) broken:
```bash
10.237579 I | etcdserver: updating the cluster version from 3.0 to 3.3
10.238315 N | etcdserver/membership: updated the cluster version from 3.0 to 3.3
10.238451 I | etcdserver/api: enabled capabilities for version 3.3
^C21.192174 N | pkg/osutil: received interrupt signal, shutting down...
21.192459 I | etcdserver: 7339c4e5e833c029 starts leadership transfer from 7339c4e5e833c029 to 729934363faa4a24
21.192569 I | raft: 7339c4e5e833c029 [term 8] starts to transfer leadership to 729934363faa4a24
21.192619 I | raft: 7339c4e5e833c029 sends MsgTimeoutNow to 729934363faa4a24 immediately as 729934363faa4a24 already has up-to-date log
WARNING: 2018/05/17 12:45:21 grpc: addrConn.resetTransport failed to create client transport: connection error: desc = "transport: Error while dialing dial tcp: operation was canceled"; Reconnecting to {localhost:2379 0 <nil>}
WARNING: 2018/05/17 12:45:21 grpc: addrConn.transportMonitor exits due to: grpc: the connection is closing
21.193589 I | raft: 7339c4e5e833c029 [term: 8] received a MsgVote message with higher term from 729934363faa4a24 [term: 9]
21.193626 I | raft: 7339c4e5e833c029 became follower at term 9
21.193651 I | raft: 7339c4e5e833c029 [logterm: 8, index: 9, vote: 0] cast MsgVote for 729934363faa4a24 [logterm: 8, index: 9] at term 9
21.193675 I | raft: raft.node: 7339c4e5e833c029 lost leader 7339c4e5e833c029 at term 9
21.194424 I | raft: raft.node: 7339c4e5e833c029 elected leader 729934363faa4a24 at term 9
21.292898 I | etcdserver: 7339c4e5e833c029 finished leadership transfer from 7339c4e5e833c029 to 729934363faa4a24 (took 100.436391ms)
21.292975 I | rafthttp: stopping peer 729934363faa4a24...
21.293206 I | rafthttp: closed the TCP streaming connection with peer 729934363faa4a24 (stream MsgApp v2 writer)
21.293225 I | rafthttp: stopped streaming with peer 729934363faa4a24 (writer)
21.293437 I | rafthttp: closed the TCP streaming connection with peer 729934363faa4a24 (stream Message writer)
21.293459 I | rafthttp: stopped streaming with peer 729934363faa4a24 (writer)
21.293514 I | rafthttp: stopped HTTP pipelining with peer 729934363faa4a24
21.293590 W | rafthttp: lost the TCP streaming connection with peer 729934363faa4a24 (stream MsgApp v2 reader)
21.293610 I | rafthttp: stopped streaming with peer 729934363faa4a24 (stream MsgApp v2 reader)
21.293680 W | rafthttp: lost the TCP streaming connection with peer 729934363faa4a24 (stream Message reader)
21.293700 I | rafthttp: stopped streaming with peer 729934363faa4a24 (stream Message reader)
21.293711 I | rafthttp: stopped peer 729934363faa4a24
21.293720 I | rafthttp: stopping peer b548c2511513015...
21.293987 I | rafthttp: closed the TCP streaming connection with peer b548c2511513015 (stream MsgApp v2 writer)
21.294063 I | rafthttp: stopped streaming with peer b548c2511513015 (writer)
21.294467 I | rafthttp: closed the TCP streaming connection with peer b548c2511513015 (stream Message writer)
21.294561 I | rafthttp: stopped streaming with peer b548c2511513015 (writer)
21.294742 I | rafthttp: stopped HTTP pipelining with peer b548c2511513015
21.294867 W | rafthttp: lost the TCP streaming connection with peer b548c2511513015 (stream MsgApp v2 reader)
21.294892 I | rafthttp: stopped streaming with peer b548c2511513015 (stream MsgApp v2 reader)
21.294990 W | rafthttp: lost the TCP streaming connection with peer b548c2511513015 (stream Message reader)
21.295004 E | rafthttp: failed to read b548c2511513015 on stream Message (context canceled)
21.295013 I | rafthttp: peer b548c2511513015 became inactive
21.295024 I | rafthttp: stopped streaming with peer b548c2511513015 (stream Message reader)
21.295035 I | rafthttp: stopped peer b548c2511513015
```
#### Step 4: restart the etcd server with same configuration
Restart the etcd server with same configuration but with the new etcd binary.
```diff
-etcd-old --name s1 \
+etcd-new --name s1 \
--data-dir /tmp/etcd/s1 \
--listen-client-urls http://localhost:2379 \
--advertise-client-urls http://localhost:2379 \
--listen-peer-urls http://localhost:2380 \
--initial-advertise-peer-urls http://localhost:2380 \
--initial-cluster s1=http://localhost:2380,s2=http://localhost:22380,s3=http://localhost:32380 \
--initial-cluster-token tkn \
+ --initial-cluster-state new \
+ --logger zap \
+ --log-outputs stderr
```
The new v3.4 etcd will publish its information to the cluster. At this point, cluster still operates as v3.3 protocol, which is the lowest common version.
> `{"level":"info","ts":1526586617.1647713,"caller":"membership/cluster.go:485","msg":"set initial cluster version","cluster-id":"7dee9ba76d59ed53","local-member-id":"7339c4e5e833c029","cluster-version":"3.0"}`
> `{"level":"info","ts":1526586617.1648536,"caller":"api/capability.go:76","msg":"enabled capabilities for version","cluster-version":"3.0"}`
> `{"level":"info","ts":1526586617.1649303,"caller":"membership/cluster.go:473","msg":"updated cluster version","cluster-id":"7dee9ba76d59ed53","local-member-id":"7339c4e5e833c029","from":"3.0","from":"3.3"}`
> `{"level":"info","ts":1526586617.1649797,"caller":"api/capability.go:76","msg":"enabled capabilities for version","cluster-version":"3.3"}`
> `{"level":"info","ts":1526586617.2107732,"caller":"etcdserver/server.go:1770","msg":"published local member to cluster through raft","local-member-id":"7339c4e5e833c029","local-member-attributes":"{Name:s1 ClientURLs:[http://localhost:2379]}","request-path":"/0/members/7339c4e5e833c029/attributes","cluster-id":"7dee9ba76d59ed53","publish-timeout":7}`
Verify that each member, and then the entire cluster, becomes healthy with the new v3.4 etcd binary:
```bash
etcdctl endpoint health --endpoints=localhost:2379,localhost:22379,localhost:32379
<<COMMENT
localhost:32379 is healthy: successfully committed proposal: took = 2.337471ms
localhost:22379 is healthy: successfully committed proposal: took = 1.130717ms
localhost:2379 is healthy: successfully committed proposal: took = 2.124843ms
COMMENT
```
Un-upgraded members will log warnings like the following until the entire cluster is upgraded.
This is expected and will cease after all etcd cluster members are upgraded to v3.4:
```
:41.942121 W | etcdserver: member 7339c4e5e833c029 has a higher version 3.4.0
:45.945154 W | etcdserver: the local etcd version 3.3.5 is not up-to-date
```
#### Step 5: repeat *step 3* and *step 4* for rest of the members
When all members are upgraded, the cluster will report upgrading to 3.4 successfully:
Member 1:
> `{"level":"info","ts":1526586949.0920913,"caller":"api/capability.go:76","msg":"enabled capabilities for version","cluster-version":"3.4"}`
> `{"level":"info","ts":1526586949.0921566,"caller":"etcdserver/server.go:2272","msg":"cluster version is updated","cluster-version":"3.4"}`
Member 2:
> `{"level":"info","ts":1526586949.092117,"caller":"membership/cluster.go:473","msg":"updated cluster version","cluster-id":"7dee9ba76d59ed53","local-member-id":"729934363faa4a24","from":"3.3","from":"3.4"}`
> `{"level":"info","ts":1526586949.0923078,"caller":"api/capability.go:76","msg":"enabled capabilities for version","cluster-version":"3.4"}`
Member 3:
> `{"level":"info","ts":1526586949.0921423,"caller":"membership/cluster.go:473","msg":"updated cluster version","cluster-id":"7dee9ba76d59ed53","local-member-id":"b548c2511513015","from":"3.3","from":"3.4"}`
> `{"level":"info","ts":1526586949.0922918,"caller":"api/capability.go:76","msg":"enabled capabilities for version","cluster-version":"3.4"}`
```bash
endpoint health --endpoints=localhost:2379,localhost:22379,localhost:32379
<<COMMENT
localhost:2379 is healthy: successfully committed proposal: took = 492.834µs
localhost:22379 is healthy: successfully committed proposal: took = 1.015025ms
localhost:32379 is healthy: successfully committed proposal: took = 1.853077ms
COMMENT
curl http://localhost:2379/version
<<COMMENT
{"etcdserver":"3.4.0","etcdcluster":"3.4.0"}
COMMENT
curl http://localhost:22379/version
<<COMMENT
{"etcdserver":"3.4.0","etcdcluster":"3.4.0"}
COMMENT
curl http://localhost:32379/version
<<COMMENT
{"etcdserver":"3.4.0","etcdcluster":"3.4.0"}
COMMENT
```
[etcd-contact]: https://groups.google.com/forum/#!forum/etcd-dev

View File

@@ -50,7 +50,7 @@ docker-remove:
GO_VERSION ?= 1.10.3
GO_VERSION ?= 1.12.9
ETCD_VERSION ?= $(shell git rev-parse --short HEAD || echo "GitNotFound")
TEST_SUFFIX = $(shell date +%s | base64 | head -c 15)
@@ -64,11 +64,11 @@ endif
# Example:
# GO_VERSION=1.8.7 make build-docker-test
# GO_VERSION=1.12.9 make build-docker-test
# make build-docker-test
#
# gcloud docker -- login -u _json_key -p "$(cat /etc/gcp-key-etcd-development.json)" https://gcr.io
# GO_VERSION=1.8.7 make push-docker-test
# GO_VERSION=1.12.9 make push-docker-test
# make push-docker-test
#
# gsutil -m acl ch -u allUsers:R -r gs://artifacts.etcd-development.appspot.com
@@ -78,17 +78,17 @@ build-docker-test:
$(info GO_VERSION: $(GO_VERSION))
@sed -i.bak 's|REPLACE_ME_GO_VERSION|$(GO_VERSION)|g' ./tests/Dockerfile
docker build \
--tag gcr.io/etcd-development/etcd-test:go$(GO_VERSION) \
--tag gcr.io/etcd-development/etcd-test:go$(GO_VERSION)-release-3.3 \
--file ./tests/Dockerfile .
@mv ./tests/Dockerfile.bak ./tests/Dockerfile
push-docker-test:
$(info GO_VERSION: $(GO_VERSION))
gcloud docker -- push gcr.io/etcd-development/etcd-test:go$(GO_VERSION)
gcloud docker -- push gcr.io/etcd-development/etcd-test:go$(GO_VERSION)-release-3.3
pull-docker-test:
$(info GO_VERSION: $(GO_VERSION))
docker pull gcr.io/etcd-development/etcd-test:go$(GO_VERSION)
docker pull gcr.io/etcd-development/etcd-test:go$(GO_VERSION)-release-3.3
@@ -102,16 +102,16 @@ compile-with-docker-test:
docker run \
--rm \
--mount type=bind,source=`pwd`,destination=/go/src/github.com/coreos/etcd \
gcr.io/etcd-development/etcd-test:go$(GO_VERSION) \
/bin/bash -c "GO_BUILD_FLAGS=-v ./build && ./bin/etcd --version"
gcr.io/etcd-development/etcd-test:go$(GO_VERSION)-release-3.3 \
/bin/bash -c "GO_BUILD_FLAGS=-v GOOS=linux GOARCH=amd64 ./build && ./bin/etcd --version"
compile-setup-gopath-with-docker-test:
$(info GO_VERSION: $(GO_VERSION))
docker run \
--rm \
--mount type=bind,source=`pwd`,destination=/etcd \
gcr.io/etcd-development/etcd-test:go$(GO_VERSION) \
/bin/bash -c "cd /etcd && ETCD_SETUP_GOPATH=1 GO_BUILD_FLAGS=-v ./build && ./bin/etcd --version && rm -rf ./gopath"
gcr.io/etcd-development/etcd-test:go$(GO_VERSION)-release-3.3 \
/bin/bash -c "cd /etcd && ETCD_SETUP_GOPATH=1 GO_BUILD_FLAGS=-v GOOS=linux GOARCH=amd64 ./build && ./bin/etcd --version && rm -rf ./gopath"
@@ -145,7 +145,7 @@ test:
$(info TEST_OPTS: $(TEST_OPTS))
$(info log-file: test-$(TEST_SUFFIX).log)
$(TEST_OPTS) ./test 2>&1 | tee test-$(TEST_SUFFIX).log
! egrep "(--- FAIL:|panic: test timed out|appears to have leaked)" -B50 -A10 test-$(TEST_SUFFIX).log
! egrep "(--- FAIL:|DATA RACE|panic: test timed out|appears to have leaked)" -B50 -A10 test-$(TEST_SUFFIX).log
docker-test:
$(info GO_VERSION: $(GO_VERSION))
@@ -158,9 +158,9 @@ docker-test:
--rm \
$(TMP_DIR_MOUNT_FLAG) \
--mount type=bind,source=`pwd`,destination=/go/src/github.com/coreos/etcd \
gcr.io/etcd-development/etcd-test:go$(GO_VERSION) \
gcr.io/etcd-development/etcd-test:go$(GO_VERSION)-release-3.3 \
/bin/bash -c "$(TEST_OPTS) ./test 2>&1 | tee test-$(TEST_SUFFIX).log"
! egrep "(--- FAIL:|panic: test timed out|appears to have leaked)" -B50 -A10 test-$(TEST_SUFFIX).log
! egrep "(--- FAIL:|DATA RACE|panic: test timed out|appears to have leaked)" -B50 -A10 test-$(TEST_SUFFIX).log
docker-test-coverage:
$(info GO_VERSION: $(GO_VERSION))
@@ -172,9 +172,9 @@ docker-test-coverage:
--rm \
$(TMP_DIR_MOUNT_FLAG) \
--mount type=bind,source=`pwd`,destination=/go/src/github.com/coreos/etcd \
gcr.io/etcd-development/etcd-test:go$(GO_VERSION) \
gcr.io/etcd-development/etcd-test:go$(GO_VERSION)-release-3.3 \
/bin/bash -c "COVERDIR=covdir PASSES='build build_cov cov' ./test 2>&1 | tee docker-test-coverage-$(TEST_SUFFIX).log && /codecov -t 6040de41-c073-4d6f-bbf8-d89256ef31e1"
! egrep "(--- FAIL:|panic: test timed out|appears to have leaked)" -B50 -A10 docker-test-coverage-$(TEST_SUFFIX).log
! egrep "(--- FAIL:|DATA RACE|panic: test timed out|appears to have leaked)" -B50 -A10 docker-test-coverage-$(TEST_SUFFIX).log

View File

@@ -1,29 +1,16 @@
// Code generated by protoc-gen-gogo. DO NOT EDIT.
// source: auth.proto
/*
Package authpb is a generated protocol buffer package.
It is generated from these files:
auth.proto
It has these top-level messages:
User
Permission
Role
*/
package authpb
import (
"fmt"
proto "github.com/golang/protobuf/proto"
fmt "fmt"
io "io"
math "math"
math_bits "math/bits"
_ "github.com/gogo/protobuf/gogoproto"
io "io"
proto "github.com/golang/protobuf/proto"
)
// Reference imports to suppress errors if they are not otherwise used.
@@ -50,6 +37,7 @@ var Permission_Type_name = map[int32]string{
1: "WRITE",
2: "READWRITE",
}
var Permission_Type_value = map[string]int32{
"READ": 0,
"WRITE": 1,
@@ -59,53 +47,174 @@ var Permission_Type_value = map[string]int32{
func (x Permission_Type) String() string {
return proto.EnumName(Permission_Type_name, int32(x))
}
func (Permission_Type) EnumDescriptor() ([]byte, []int) { return fileDescriptorAuth, []int{1, 0} }
func (Permission_Type) EnumDescriptor() ([]byte, []int) {
return fileDescriptor_8bbd6f3875b0e874, []int{1, 0}
}
// User is a single entry in the bucket authUsers
type User struct {
Name []byte `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"`
Password []byte `protobuf:"bytes,2,opt,name=password,proto3" json:"password,omitempty"`
Roles []string `protobuf:"bytes,3,rep,name=roles" json:"roles,omitempty"`
Name []byte `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"`
Password []byte `protobuf:"bytes,2,opt,name=password,proto3" json:"password,omitempty"`
Roles []string `protobuf:"bytes,3,rep,name=roles,proto3" json:"roles,omitempty"`
XXX_NoUnkeyedLiteral struct{} `json:"-"`
XXX_unrecognized []byte `json:"-"`
XXX_sizecache int32 `json:"-"`
}
func (m *User) Reset() { *m = User{} }
func (m *User) String() string { return proto.CompactTextString(m) }
func (*User) ProtoMessage() {}
func (*User) Descriptor() ([]byte, []int) { return fileDescriptorAuth, []int{0} }
func (m *User) Reset() { *m = User{} }
func (m *User) String() string { return proto.CompactTextString(m) }
func (*User) ProtoMessage() {}
func (*User) Descriptor() ([]byte, []int) {
return fileDescriptor_8bbd6f3875b0e874, []int{0}
}
func (m *User) XXX_Unmarshal(b []byte) error {
return m.Unmarshal(b)
}
func (m *User) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
if deterministic {
return xxx_messageInfo_User.Marshal(b, m, deterministic)
} else {
b = b[:cap(b)]
n, err := m.MarshalToSizedBuffer(b)
if err != nil {
return nil, err
}
return b[:n], nil
}
}
func (m *User) XXX_Merge(src proto.Message) {
xxx_messageInfo_User.Merge(m, src)
}
func (m *User) XXX_Size() int {
return m.Size()
}
func (m *User) XXX_DiscardUnknown() {
xxx_messageInfo_User.DiscardUnknown(m)
}
var xxx_messageInfo_User proto.InternalMessageInfo
// Permission is a single entity
type Permission struct {
PermType Permission_Type `protobuf:"varint,1,opt,name=permType,proto3,enum=authpb.Permission_Type" json:"permType,omitempty"`
Key []byte `protobuf:"bytes,2,opt,name=key,proto3" json:"key,omitempty"`
RangeEnd []byte `protobuf:"bytes,3,opt,name=range_end,json=rangeEnd,proto3" json:"range_end,omitempty"`
PermType Permission_Type `protobuf:"varint,1,opt,name=permType,proto3,enum=authpb.Permission_Type" json:"permType,omitempty"`
Key []byte `protobuf:"bytes,2,opt,name=key,proto3" json:"key,omitempty"`
RangeEnd []byte `protobuf:"bytes,3,opt,name=range_end,json=rangeEnd,proto3" json:"range_end,omitempty"`
XXX_NoUnkeyedLiteral struct{} `json:"-"`
XXX_unrecognized []byte `json:"-"`
XXX_sizecache int32 `json:"-"`
}
func (m *Permission) Reset() { *m = Permission{} }
func (m *Permission) String() string { return proto.CompactTextString(m) }
func (*Permission) ProtoMessage() {}
func (*Permission) Descriptor() ([]byte, []int) { return fileDescriptorAuth, []int{1} }
func (m *Permission) Reset() { *m = Permission{} }
func (m *Permission) String() string { return proto.CompactTextString(m) }
func (*Permission) ProtoMessage() {}
func (*Permission) Descriptor() ([]byte, []int) {
return fileDescriptor_8bbd6f3875b0e874, []int{1}
}
func (m *Permission) XXX_Unmarshal(b []byte) error {
return m.Unmarshal(b)
}
func (m *Permission) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
if deterministic {
return xxx_messageInfo_Permission.Marshal(b, m, deterministic)
} else {
b = b[:cap(b)]
n, err := m.MarshalToSizedBuffer(b)
if err != nil {
return nil, err
}
return b[:n], nil
}
}
func (m *Permission) XXX_Merge(src proto.Message) {
xxx_messageInfo_Permission.Merge(m, src)
}
func (m *Permission) XXX_Size() int {
return m.Size()
}
func (m *Permission) XXX_DiscardUnknown() {
xxx_messageInfo_Permission.DiscardUnknown(m)
}
var xxx_messageInfo_Permission proto.InternalMessageInfo
// Role is a single entry in the bucket authRoles
type Role struct {
Name []byte `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"`
KeyPermission []*Permission `protobuf:"bytes,2,rep,name=keyPermission" json:"keyPermission,omitempty"`
Name []byte `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"`
KeyPermission []*Permission `protobuf:"bytes,2,rep,name=keyPermission,proto3" json:"keyPermission,omitempty"`
XXX_NoUnkeyedLiteral struct{} `json:"-"`
XXX_unrecognized []byte `json:"-"`
XXX_sizecache int32 `json:"-"`
}
func (m *Role) Reset() { *m = Role{} }
func (m *Role) String() string { return proto.CompactTextString(m) }
func (*Role) ProtoMessage() {}
func (*Role) Descriptor() ([]byte, []int) { return fileDescriptorAuth, []int{2} }
func (m *Role) Reset() { *m = Role{} }
func (m *Role) String() string { return proto.CompactTextString(m) }
func (*Role) ProtoMessage() {}
func (*Role) Descriptor() ([]byte, []int) {
return fileDescriptor_8bbd6f3875b0e874, []int{2}
}
func (m *Role) XXX_Unmarshal(b []byte) error {
return m.Unmarshal(b)
}
func (m *Role) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
if deterministic {
return xxx_messageInfo_Role.Marshal(b, m, deterministic)
} else {
b = b[:cap(b)]
n, err := m.MarshalToSizedBuffer(b)
if err != nil {
return nil, err
}
return b[:n], nil
}
}
func (m *Role) XXX_Merge(src proto.Message) {
xxx_messageInfo_Role.Merge(m, src)
}
func (m *Role) XXX_Size() int {
return m.Size()
}
func (m *Role) XXX_DiscardUnknown() {
xxx_messageInfo_Role.DiscardUnknown(m)
}
var xxx_messageInfo_Role proto.InternalMessageInfo
func init() {
proto.RegisterEnum("authpb.Permission_Type", Permission_Type_name, Permission_Type_value)
proto.RegisterType((*User)(nil), "authpb.User")
proto.RegisterType((*Permission)(nil), "authpb.Permission")
proto.RegisterType((*Role)(nil), "authpb.Role")
proto.RegisterEnum("authpb.Permission_Type", Permission_Type_name, Permission_Type_value)
}
func init() { proto.RegisterFile("auth.proto", fileDescriptor_8bbd6f3875b0e874) }
var fileDescriptor_8bbd6f3875b0e874 = []byte{
// 288 bytes of a gzipped FileDescriptorProto
0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x6c, 0x90, 0xc1, 0x4a, 0xc3, 0x30,
0x1c, 0xc6, 0x9b, 0xb6, 0x1b, 0xed, 0x5f, 0x27, 0x25, 0x0c, 0x0c, 0x13, 0x42, 0xe9, 0xa9, 0x78,
0xa8, 0xb0, 0x5d, 0xbc, 0x2a, 0xf6, 0x20, 0x78, 0x90, 0x50, 0xf1, 0x28, 0x1d, 0x0d, 0x75, 0x6c,
0x6d, 0x4a, 0x32, 0x91, 0xbe, 0x89, 0x07, 0x1f, 0x68, 0xc7, 0x3d, 0x82, 0xab, 0x2f, 0x22, 0x4d,
0x64, 0x43, 0xdc, 0xed, 0xfb, 0xbe, 0xff, 0x97, 0xe4, 0x97, 0x3f, 0x40, 0xfe, 0xb6, 0x7e, 0x4d,
0x1a, 0x29, 0xd6, 0x02, 0x0f, 0x7b, 0xdd, 0xcc, 0x27, 0xe3, 0x52, 0x94, 0x42, 0x47, 0x57, 0xbd,
0x32, 0xd3, 0xe8, 0x01, 0xdc, 0x27, 0xc5, 0x25, 0xc6, 0xe0, 0xd6, 0x79, 0xc5, 0x09, 0x0a, 0x51,
0x7c, 0xca, 0xb4, 0xc6, 0x13, 0xf0, 0x9a, 0x5c, 0xa9, 0x77, 0x21, 0x0b, 0x62, 0xeb, 0x7c, 0xef,
0xf1, 0x18, 0x06, 0x52, 0xac, 0xb8, 0x22, 0x4e, 0xe8, 0xc4, 0x3e, 0x33, 0x26, 0xfa, 0x44, 0x00,
0x8f, 0x5c, 0x56, 0x0b, 0xa5, 0x16, 0xa2, 0xc6, 0x33, 0xf0, 0x1a, 0x2e, 0xab, 0xac, 0x6d, 0xcc,
0xc5, 0x67, 0xd3, 0xf3, 0xc4, 0xd0, 0x24, 0x87, 0x56, 0xd2, 0x8f, 0xd9, 0xbe, 0x88, 0x03, 0x70,
0x96, 0xbc, 0xfd, 0x7d, 0xb0, 0x97, 0xf8, 0x02, 0x7c, 0x99, 0xd7, 0x25, 0x7f, 0xe1, 0x75, 0x41,
0x1c, 0x03, 0xa2, 0x83, 0xb4, 0x2e, 0xa2, 0x4b, 0x70, 0xf5, 0x31, 0x0f, 0x5c, 0x96, 0xde, 0xdc,
0x05, 0x16, 0xf6, 0x61, 0xf0, 0xcc, 0xee, 0xb3, 0x34, 0x40, 0x78, 0x04, 0x7e, 0x1f, 0x1a, 0x6b,
0x47, 0x19, 0xb8, 0x4c, 0xac, 0xf8, 0xd1, 0xcf, 0x5e, 0xc3, 0x68, 0xc9, 0xdb, 0x03, 0x16, 0xb1,
0x43, 0x27, 0x3e, 0x99, 0xe2, 0xff, 0xc0, 0xec, 0x6f, 0xf1, 0x96, 0x6c, 0x76, 0xd4, 0xda, 0xee,
0xa8, 0xb5, 0xe9, 0x28, 0xda, 0x76, 0x14, 0x7d, 0x75, 0x14, 0x7d, 0x7c, 0x53, 0x6b, 0x3e, 0xd4,
0x3b, 0x9e, 0xfd, 0x04, 0x00, 0x00, 0xff, 0xff, 0xcc, 0x76, 0x8d, 0x4f, 0x8f, 0x01, 0x00, 0x00,
}
func (m *User) Marshal() (dAtA []byte, err error) {
size := m.Size()
dAtA = make([]byte, size)
n, err := m.MarshalTo(dAtA)
n, err := m.MarshalToSizedBuffer(dAtA[:size])
if err != nil {
return nil, err
}
@@ -113,44 +222,49 @@ func (m *User) Marshal() (dAtA []byte, err error) {
}
func (m *User) MarshalTo(dAtA []byte) (int, error) {
var i int
size := m.Size()
return m.MarshalToSizedBuffer(dAtA[:size])
}
func (m *User) MarshalToSizedBuffer(dAtA []byte) (int, error) {
i := len(dAtA)
_ = i
var l int
_ = l
if len(m.Name) > 0 {
dAtA[i] = 0xa
i++
i = encodeVarintAuth(dAtA, i, uint64(len(m.Name)))
i += copy(dAtA[i:], m.Name)
}
if len(m.Password) > 0 {
dAtA[i] = 0x12
i++
i = encodeVarintAuth(dAtA, i, uint64(len(m.Password)))
i += copy(dAtA[i:], m.Password)
if m.XXX_unrecognized != nil {
i -= len(m.XXX_unrecognized)
copy(dAtA[i:], m.XXX_unrecognized)
}
if len(m.Roles) > 0 {
for _, s := range m.Roles {
for iNdEx := len(m.Roles) - 1; iNdEx >= 0; iNdEx-- {
i -= len(m.Roles[iNdEx])
copy(dAtA[i:], m.Roles[iNdEx])
i = encodeVarintAuth(dAtA, i, uint64(len(m.Roles[iNdEx])))
i--
dAtA[i] = 0x1a
i++
l = len(s)
for l >= 1<<7 {
dAtA[i] = uint8(uint64(l)&0x7f | 0x80)
l >>= 7
i++
}
dAtA[i] = uint8(l)
i++
i += copy(dAtA[i:], s)
}
}
return i, nil
if len(m.Password) > 0 {
i -= len(m.Password)
copy(dAtA[i:], m.Password)
i = encodeVarintAuth(dAtA, i, uint64(len(m.Password)))
i--
dAtA[i] = 0x12
}
if len(m.Name) > 0 {
i -= len(m.Name)
copy(dAtA[i:], m.Name)
i = encodeVarintAuth(dAtA, i, uint64(len(m.Name)))
i--
dAtA[i] = 0xa
}
return len(dAtA) - i, nil
}
func (m *Permission) Marshal() (dAtA []byte, err error) {
size := m.Size()
dAtA = make([]byte, size)
n, err := m.MarshalTo(dAtA)
n, err := m.MarshalToSizedBuffer(dAtA[:size])
if err != nil {
return nil, err
}
@@ -158,34 +272,45 @@ func (m *Permission) Marshal() (dAtA []byte, err error) {
}
func (m *Permission) MarshalTo(dAtA []byte) (int, error) {
var i int
size := m.Size()
return m.MarshalToSizedBuffer(dAtA[:size])
}
func (m *Permission) MarshalToSizedBuffer(dAtA []byte) (int, error) {
i := len(dAtA)
_ = i
var l int
_ = l
if m.PermType != 0 {
dAtA[i] = 0x8
i++
i = encodeVarintAuth(dAtA, i, uint64(m.PermType))
}
if len(m.Key) > 0 {
dAtA[i] = 0x12
i++
i = encodeVarintAuth(dAtA, i, uint64(len(m.Key)))
i += copy(dAtA[i:], m.Key)
if m.XXX_unrecognized != nil {
i -= len(m.XXX_unrecognized)
copy(dAtA[i:], m.XXX_unrecognized)
}
if len(m.RangeEnd) > 0 {
dAtA[i] = 0x1a
i++
i -= len(m.RangeEnd)
copy(dAtA[i:], m.RangeEnd)
i = encodeVarintAuth(dAtA, i, uint64(len(m.RangeEnd)))
i += copy(dAtA[i:], m.RangeEnd)
i--
dAtA[i] = 0x1a
}
return i, nil
if len(m.Key) > 0 {
i -= len(m.Key)
copy(dAtA[i:], m.Key)
i = encodeVarintAuth(dAtA, i, uint64(len(m.Key)))
i--
dAtA[i] = 0x12
}
if m.PermType != 0 {
i = encodeVarintAuth(dAtA, i, uint64(m.PermType))
i--
dAtA[i] = 0x8
}
return len(dAtA) - i, nil
}
func (m *Role) Marshal() (dAtA []byte, err error) {
size := m.Size()
dAtA = make([]byte, size)
n, err := m.MarshalTo(dAtA)
n, err := m.MarshalToSizedBuffer(dAtA[:size])
if err != nil {
return nil, err
}
@@ -193,41 +318,58 @@ func (m *Role) Marshal() (dAtA []byte, err error) {
}
func (m *Role) MarshalTo(dAtA []byte) (int, error) {
var i int
size := m.Size()
return m.MarshalToSizedBuffer(dAtA[:size])
}
func (m *Role) MarshalToSizedBuffer(dAtA []byte) (int, error) {
i := len(dAtA)
_ = i
var l int
_ = l
if len(m.Name) > 0 {
dAtA[i] = 0xa
i++
i = encodeVarintAuth(dAtA, i, uint64(len(m.Name)))
i += copy(dAtA[i:], m.Name)
if m.XXX_unrecognized != nil {
i -= len(m.XXX_unrecognized)
copy(dAtA[i:], m.XXX_unrecognized)
}
if len(m.KeyPermission) > 0 {
for _, msg := range m.KeyPermission {
dAtA[i] = 0x12
i++
i = encodeVarintAuth(dAtA, i, uint64(msg.Size()))
n, err := msg.MarshalTo(dAtA[i:])
if err != nil {
return 0, err
for iNdEx := len(m.KeyPermission) - 1; iNdEx >= 0; iNdEx-- {
{
size, err := m.KeyPermission[iNdEx].MarshalToSizedBuffer(dAtA[:i])
if err != nil {
return 0, err
}
i -= size
i = encodeVarintAuth(dAtA, i, uint64(size))
}
i += n
i--
dAtA[i] = 0x12
}
}
return i, nil
if len(m.Name) > 0 {
i -= len(m.Name)
copy(dAtA[i:], m.Name)
i = encodeVarintAuth(dAtA, i, uint64(len(m.Name)))
i--
dAtA[i] = 0xa
}
return len(dAtA) - i, nil
}
func encodeVarintAuth(dAtA []byte, offset int, v uint64) int {
offset -= sovAuth(v)
base := offset
for v >= 1<<7 {
dAtA[offset] = uint8(v&0x7f | 0x80)
v >>= 7
offset++
}
dAtA[offset] = uint8(v)
return offset + 1
return base
}
func (m *User) Size() (n int) {
if m == nil {
return 0
}
var l int
_ = l
l = len(m.Name)
@@ -244,10 +386,16 @@ func (m *User) Size() (n int) {
n += 1 + l + sovAuth(uint64(l))
}
}
if m.XXX_unrecognized != nil {
n += len(m.XXX_unrecognized)
}
return n
}
func (m *Permission) Size() (n int) {
if m == nil {
return 0
}
var l int
_ = l
if m.PermType != 0 {
@@ -261,10 +409,16 @@ func (m *Permission) Size() (n int) {
if l > 0 {
n += 1 + l + sovAuth(uint64(l))
}
if m.XXX_unrecognized != nil {
n += len(m.XXX_unrecognized)
}
return n
}
func (m *Role) Size() (n int) {
if m == nil {
return 0
}
var l int
_ = l
l = len(m.Name)
@@ -277,18 +431,14 @@ func (m *Role) Size() (n int) {
n += 1 + l + sovAuth(uint64(l))
}
}
if m.XXX_unrecognized != nil {
n += len(m.XXX_unrecognized)
}
return n
}
func sovAuth(x uint64) (n int) {
for {
n++
x >>= 7
if x == 0 {
break
}
}
return n
return (math_bits.Len64(x|1) + 6) / 7
}
func sozAuth(x uint64) (n int) {
return sovAuth(uint64((x << 1) ^ uint64((int64(x) >> 63))))
@@ -308,7 +458,7 @@ func (m *User) Unmarshal(dAtA []byte) error {
}
b := dAtA[iNdEx]
iNdEx++
wire |= (uint64(b) & 0x7F) << shift
wire |= uint64(b&0x7F) << shift
if b < 0x80 {
break
}
@@ -336,7 +486,7 @@ func (m *User) Unmarshal(dAtA []byte) error {
}
b := dAtA[iNdEx]
iNdEx++
byteLen |= (int(b) & 0x7F) << shift
byteLen |= int(b&0x7F) << shift
if b < 0x80 {
break
}
@@ -345,6 +495,9 @@ func (m *User) Unmarshal(dAtA []byte) error {
return ErrInvalidLengthAuth
}
postIndex := iNdEx + byteLen
if postIndex < 0 {
return ErrInvalidLengthAuth
}
if postIndex > l {
return io.ErrUnexpectedEOF
}
@@ -367,7 +520,7 @@ func (m *User) Unmarshal(dAtA []byte) error {
}
b := dAtA[iNdEx]
iNdEx++
byteLen |= (int(b) & 0x7F) << shift
byteLen |= int(b&0x7F) << shift
if b < 0x80 {
break
}
@@ -376,6 +529,9 @@ func (m *User) Unmarshal(dAtA []byte) error {
return ErrInvalidLengthAuth
}
postIndex := iNdEx + byteLen
if postIndex < 0 {
return ErrInvalidLengthAuth
}
if postIndex > l {
return io.ErrUnexpectedEOF
}
@@ -398,7 +554,7 @@ func (m *User) Unmarshal(dAtA []byte) error {
}
b := dAtA[iNdEx]
iNdEx++
stringLen |= (uint64(b) & 0x7F) << shift
stringLen |= uint64(b&0x7F) << shift
if b < 0x80 {
break
}
@@ -408,6 +564,9 @@ func (m *User) Unmarshal(dAtA []byte) error {
return ErrInvalidLengthAuth
}
postIndex := iNdEx + intStringLen
if postIndex < 0 {
return ErrInvalidLengthAuth
}
if postIndex > l {
return io.ErrUnexpectedEOF
}
@@ -422,9 +581,13 @@ func (m *User) Unmarshal(dAtA []byte) error {
if skippy < 0 {
return ErrInvalidLengthAuth
}
if (iNdEx + skippy) < 0 {
return ErrInvalidLengthAuth
}
if (iNdEx + skippy) > l {
return io.ErrUnexpectedEOF
}
m.XXX_unrecognized = append(m.XXX_unrecognized, dAtA[iNdEx:iNdEx+skippy]...)
iNdEx += skippy
}
}
@@ -449,7 +612,7 @@ func (m *Permission) Unmarshal(dAtA []byte) error {
}
b := dAtA[iNdEx]
iNdEx++
wire |= (uint64(b) & 0x7F) << shift
wire |= uint64(b&0x7F) << shift
if b < 0x80 {
break
}
@@ -477,7 +640,7 @@ func (m *Permission) Unmarshal(dAtA []byte) error {
}
b := dAtA[iNdEx]
iNdEx++
m.PermType |= (Permission_Type(b) & 0x7F) << shift
m.PermType |= Permission_Type(b&0x7F) << shift
if b < 0x80 {
break
}
@@ -496,7 +659,7 @@ func (m *Permission) Unmarshal(dAtA []byte) error {
}
b := dAtA[iNdEx]
iNdEx++
byteLen |= (int(b) & 0x7F) << shift
byteLen |= int(b&0x7F) << shift
if b < 0x80 {
break
}
@@ -505,6 +668,9 @@ func (m *Permission) Unmarshal(dAtA []byte) error {
return ErrInvalidLengthAuth
}
postIndex := iNdEx + byteLen
if postIndex < 0 {
return ErrInvalidLengthAuth
}
if postIndex > l {
return io.ErrUnexpectedEOF
}
@@ -527,7 +693,7 @@ func (m *Permission) Unmarshal(dAtA []byte) error {
}
b := dAtA[iNdEx]
iNdEx++
byteLen |= (int(b) & 0x7F) << shift
byteLen |= int(b&0x7F) << shift
if b < 0x80 {
break
}
@@ -536,6 +702,9 @@ func (m *Permission) Unmarshal(dAtA []byte) error {
return ErrInvalidLengthAuth
}
postIndex := iNdEx + byteLen
if postIndex < 0 {
return ErrInvalidLengthAuth
}
if postIndex > l {
return io.ErrUnexpectedEOF
}
@@ -553,9 +722,13 @@ func (m *Permission) Unmarshal(dAtA []byte) error {
if skippy < 0 {
return ErrInvalidLengthAuth
}
if (iNdEx + skippy) < 0 {
return ErrInvalidLengthAuth
}
if (iNdEx + skippy) > l {
return io.ErrUnexpectedEOF
}
m.XXX_unrecognized = append(m.XXX_unrecognized, dAtA[iNdEx:iNdEx+skippy]...)
iNdEx += skippy
}
}
@@ -580,7 +753,7 @@ func (m *Role) Unmarshal(dAtA []byte) error {
}
b := dAtA[iNdEx]
iNdEx++
wire |= (uint64(b) & 0x7F) << shift
wire |= uint64(b&0x7F) << shift
if b < 0x80 {
break
}
@@ -608,7 +781,7 @@ func (m *Role) Unmarshal(dAtA []byte) error {
}
b := dAtA[iNdEx]
iNdEx++
byteLen |= (int(b) & 0x7F) << shift
byteLen |= int(b&0x7F) << shift
if b < 0x80 {
break
}
@@ -617,6 +790,9 @@ func (m *Role) Unmarshal(dAtA []byte) error {
return ErrInvalidLengthAuth
}
postIndex := iNdEx + byteLen
if postIndex < 0 {
return ErrInvalidLengthAuth
}
if postIndex > l {
return io.ErrUnexpectedEOF
}
@@ -639,7 +815,7 @@ func (m *Role) Unmarshal(dAtA []byte) error {
}
b := dAtA[iNdEx]
iNdEx++
msglen |= (int(b) & 0x7F) << shift
msglen |= int(b&0x7F) << shift
if b < 0x80 {
break
}
@@ -648,6 +824,9 @@ func (m *Role) Unmarshal(dAtA []byte) error {
return ErrInvalidLengthAuth
}
postIndex := iNdEx + msglen
if postIndex < 0 {
return ErrInvalidLengthAuth
}
if postIndex > l {
return io.ErrUnexpectedEOF
}
@@ -665,9 +844,13 @@ func (m *Role) Unmarshal(dAtA []byte) error {
if skippy < 0 {
return ErrInvalidLengthAuth
}
if (iNdEx + skippy) < 0 {
return ErrInvalidLengthAuth
}
if (iNdEx + skippy) > l {
return io.ErrUnexpectedEOF
}
m.XXX_unrecognized = append(m.XXX_unrecognized, dAtA[iNdEx:iNdEx+skippy]...)
iNdEx += skippy
}
}
@@ -731,10 +914,13 @@ func skipAuth(dAtA []byte) (n int, err error) {
break
}
}
iNdEx += length
if length < 0 {
return 0, ErrInvalidLengthAuth
}
iNdEx += length
if iNdEx < 0 {
return 0, ErrInvalidLengthAuth
}
return iNdEx, nil
case 3:
for {
@@ -763,6 +949,9 @@ func skipAuth(dAtA []byte) (n int, err error) {
return 0, err
}
iNdEx = start + next
if iNdEx < 0 {
return 0, ErrInvalidLengthAuth
}
}
return iNdEx, nil
case 4:
@@ -781,27 +970,3 @@ var (
ErrInvalidLengthAuth = fmt.Errorf("proto: negative length found during unmarshaling")
ErrIntOverflowAuth = fmt.Errorf("proto: integer overflow")
)
func init() { proto.RegisterFile("auth.proto", fileDescriptorAuth) }
var fileDescriptorAuth = []byte{
// 288 bytes of a gzipped FileDescriptorProto
0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x6c, 0x90, 0xc1, 0x4a, 0xc3, 0x30,
0x1c, 0xc6, 0x9b, 0xb6, 0x1b, 0xed, 0x5f, 0x27, 0x25, 0x0c, 0x0c, 0x13, 0x42, 0xe9, 0xa9, 0x78,
0xa8, 0xb0, 0x5d, 0xbc, 0x2a, 0xf6, 0x20, 0x78, 0x90, 0x50, 0xf1, 0x28, 0x1d, 0x0d, 0x75, 0x6c,
0x6d, 0x4a, 0x32, 0x91, 0xbe, 0x89, 0x07, 0x1f, 0x68, 0xc7, 0x3d, 0x82, 0xab, 0x2f, 0x22, 0x4d,
0x64, 0x43, 0xdc, 0xed, 0xfb, 0xbe, 0xff, 0x97, 0xe4, 0x97, 0x3f, 0x40, 0xfe, 0xb6, 0x7e, 0x4d,
0x1a, 0x29, 0xd6, 0x02, 0x0f, 0x7b, 0xdd, 0xcc, 0x27, 0xe3, 0x52, 0x94, 0x42, 0x47, 0x57, 0xbd,
0x32, 0xd3, 0xe8, 0x01, 0xdc, 0x27, 0xc5, 0x25, 0xc6, 0xe0, 0xd6, 0x79, 0xc5, 0x09, 0x0a, 0x51,
0x7c, 0xca, 0xb4, 0xc6, 0x13, 0xf0, 0x9a, 0x5c, 0xa9, 0x77, 0x21, 0x0b, 0x62, 0xeb, 0x7c, 0xef,
0xf1, 0x18, 0x06, 0x52, 0xac, 0xb8, 0x22, 0x4e, 0xe8, 0xc4, 0x3e, 0x33, 0x26, 0xfa, 0x44, 0x00,
0x8f, 0x5c, 0x56, 0x0b, 0xa5, 0x16, 0xa2, 0xc6, 0x33, 0xf0, 0x1a, 0x2e, 0xab, 0xac, 0x6d, 0xcc,
0xc5, 0x67, 0xd3, 0xf3, 0xc4, 0xd0, 0x24, 0x87, 0x56, 0xd2, 0x8f, 0xd9, 0xbe, 0x88, 0x03, 0x70,
0x96, 0xbc, 0xfd, 0x7d, 0xb0, 0x97, 0xf8, 0x02, 0x7c, 0x99, 0xd7, 0x25, 0x7f, 0xe1, 0x75, 0x41,
0x1c, 0x03, 0xa2, 0x83, 0xb4, 0x2e, 0xa2, 0x4b, 0x70, 0xf5, 0x31, 0x0f, 0x5c, 0x96, 0xde, 0xdc,
0x05, 0x16, 0xf6, 0x61, 0xf0, 0xcc, 0xee, 0xb3, 0x34, 0x40, 0x78, 0x04, 0x7e, 0x1f, 0x1a, 0x6b,
0x47, 0x19, 0xb8, 0x4c, 0xac, 0xf8, 0xd1, 0xcf, 0x5e, 0xc3, 0x68, 0xc9, 0xdb, 0x03, 0x16, 0xb1,
0x43, 0x27, 0x3e, 0x99, 0xe2, 0xff, 0xc0, 0xec, 0x6f, 0xf1, 0x96, 0x6c, 0x76, 0xd4, 0xda, 0xee,
0xa8, 0xb5, 0xe9, 0x28, 0xda, 0x76, 0x14, 0x7d, 0x75, 0x14, 0x7d, 0x7c, 0x53, 0x6b, 0x3e, 0xd4,
0x3b, 0x9e, 0xfd, 0x04, 0x00, 0x00, 0xff, 0xff, 0xcc, 0x76, 0x8d, 0x4f, 0x8f, 0x01, 0x00, 0x00,
}

View File

@@ -27,8 +27,8 @@ func getMergedPerms(tx backend.BatchTx, userName string) *unifiedRangePermission
return nil
}
readPerms := &adt.IntervalTree{}
writePerms := &adt.IntervalTree{}
readPerms := adt.NewIntervalTree()
writePerms := adt.NewIntervalTree()
for _, roleName := range user.Roles {
role := getRole(tx, roleName)
@@ -128,6 +128,6 @@ func (as *authStore) invalidateCachedPerm(userName string) {
}
type unifiedRangePermissions struct {
readPerms *adt.IntervalTree
writePerms *adt.IntervalTree
readPerms adt.IntervalTree
writePerms adt.IntervalTree
}

View File

@@ -46,7 +46,7 @@ func TestRangePermission(t *testing.T) {
}
for i, tt := range tests {
readPerms := &adt.IntervalTree{}
readPerms := adt.NewIntervalTree()
for _, p := range tt.perms {
readPerms.Insert(p, struct{}{})
}

View File

@@ -672,25 +672,25 @@ func TestRolesOrder(t *testing.T) {
}
username := "user"
_, err = as.UserAdd(&pb.AuthUserAddRequest{username, "pass"})
_, err = as.UserAdd(&pb.AuthUserAddRequest{Name: username, Password: "pass"})
if err != nil {
t.Fatal(err)
}
roles := []string{"role1", "role2", "abc", "xyz", "role3"}
for _, role := range roles {
_, err = as.RoleAdd(&pb.AuthRoleAddRequest{role})
_, err = as.RoleAdd(&pb.AuthRoleAddRequest{Name: role})
if err != nil {
t.Fatal(err)
}
_, err = as.UserGrantRole(&pb.AuthUserGrantRoleRequest{username, role})
_, err = as.UserGrantRole(&pb.AuthUserGrantRoleRequest{User: username, Role: role})
if err != nil {
t.Fatal(err)
}
}
user, err := as.UserGet(&pb.AuthUserGetRequest{username})
user, err := as.UserGet(&pb.AuthUserGetRequest{Name: username})
if err != nil {
t.Fatal(err)
}

View File

@@ -98,21 +98,12 @@
}
]
},
{
"project": "github.com/ghodss/yaml",
"licenses": [
{
"type": "MIT License and BSD 3-clause \"New\" or \"Revised\" License",
"confidence": 1
}
]
},
{
"project": "github.com/gogo/protobuf",
"licenses": [
{
"type": "BSD 3-clause \"New\" or \"Revised\" License",
"confidence": 0.9090909090909091
"confidence": 0.9163346613545816
}
]
},
@@ -130,7 +121,7 @@
"licenses": [
{
"type": "BSD 3-clause \"New\" or \"Revised\" License",
"confidence": 0.92
"confidence": 0.9663865546218487
}
]
},
@@ -143,6 +134,15 @@
}
]
},
{
"project": "github.com/google/uuid",
"licenses": [
{
"type": "BSD 3-clause \"New\" or \"Revised\" License",
"confidence": 0.9663865546218487
}
]
},
{
"project": "github.com/gorilla/websocket",
"licenses": [
@@ -152,6 +152,15 @@
}
]
},
{
"project": "github.com/grpc-ecosystem/go-grpc-middleware",
"licenses": [
{
"type": "Apache License 2.0",
"confidence": 1
}
]
},
{
"project": "github.com/grpc-ecosystem/go-grpc-prometheus",
"licenses": [
@@ -173,10 +182,6 @@
{
"project": "github.com/inconshreveable/mousetrap",
"licenses": [
{
"type": "MIT License and BSD 3-clause \"New\" or \"Revised\" License",
"confidence": 1
},
{
"type": "Apache License 2.0",
"confidence": 1
@@ -427,7 +432,7 @@
]
},
{
"project": "google.golang.org/genproto/googleapis/rpc/status",
"project": "google.golang.org/genproto/googleapis",
"licenses": [
{
"type": "Apache License 2.0",
@@ -457,13 +462,26 @@
"project": "gopkg.in/yaml.v2",
"licenses": [
{
"type": "The Unlicense",
"confidence": 0.35294117647058826
"type": "Apache License 2.0",
"confidence": 1
},
{
"type": "MIT License",
"confidence": 0.8975609756097561
}
]
},
{
"project": "sigs.k8s.io/yaml",
"licenses": [
{
"type": "Apache License 2.0",
"confidence": 1
},
{
"type": "BSD 3-clause \"New\" or \"Revised\" License",
"confidence": 1
}
]
}
]

View File

@@ -8,10 +8,10 @@
]
},
{
"project": "github.com/ghodss/yaml",
"project": "sigs.k8s.io/yaml",
"licenses": [
{
"type": "MIT License and BSD 3-clause \"New\" or \"Revised\" License"
"type": "BSD 3-clause \"New\" or \"Revised\" License"
}
]
},

78
build
View File

@@ -1,66 +1,94 @@
#!/bin/sh -e
#!/usr/bin/env bash
# set some environment variables
ORG_PATH="github.com/coreos"
REPO_PATH="${ORG_PATH}/etcd"
GIT_SHA=$(git rev-parse --short HEAD || echo "GitNotFound")
if [ ! -z "$FAILPOINTS" ]; then
if [[ -n "$FAILPOINTS" ]]; then
GIT_SHA="$GIT_SHA"-FAILPOINTS
fi
# Set GO_LDFLAGS="-s" for building without symbols for debugging.
GO_LDFLAGS="$GO_LDFLAGS -X ${REPO_PATH}/cmd/vendor/${REPO_PATH}/version.GitSHA=${GIT_SHA}"
GO_LDFLAGS="$GO_LDFLAGS -X ${REPO_PATH}/version.GitSHA=${GIT_SHA}"
# enable/disable failpoints
toggle_failpoints() {
mode="$1"
if which gofail >/dev/null 2>&1; then
if command -v gofail >/dev/null 2>&1; then
gofail "$mode" etcdserver/ mvcc/backend/
elif [ "$mode" != "disable" ]; then
elif [[ "$mode" != "disable" ]]; then
echo "FAILPOINTS set but gofail not found"
exit 1
fi
}
etcd_setup_gopath() {
echo "Setting GOPATH from vendor directory at 'gopath'"
d=$(dirname "$0")
CDIR=$(cd "$d" || return && pwd)
cd "$CDIR" || return
etcdGOPATH="${CDIR}/gopath"
# preserve old gopath to support building with unvendored tooling deps (e.g., gofail)
if [[ -n "$GOPATH" ]]; then
GOPATH=":$GOPATH"
fi
rm -rf "${etcdGOPATH:?}/"
mkdir -p "${etcdGOPATH}/vendor" "${etcdGOPATH}/etcd_src/src/github.com/coreos"
export GOPATH=${etcdGOPATH}/vendor:${etcdGOPATH}/etcd_src${GOPATH}
ln -s "${CDIR}/vendor" "${etcdGOPATH}/vendor/src"
ln -s "${CDIR}" "${etcdGOPATH}/etcd_src/src/github.com/coreos/etcd"
}
toggle_failpoints_default() {
mode="disable"
if [ ! -z "$FAILPOINTS" ]; then mode="enable"; fi
if [[ -n "$FAILPOINTS" ]]; then mode="enable"; fi
toggle_failpoints "$mode"
}
etcd_build() {
out="bin"
if [ -n "${BINDIR}" ]; then out="${BINDIR}"; fi
if [[ -n "${BINDIR}" ]]; then out="${BINDIR}"; fi
toggle_failpoints_default
# Static compilation is useful when etcd is run in a container. $GO_BUILD_FLAGS is OK
# Static compilation is useful when etcd is run in a container. $GO_BUILD_FLAGS is OK
# shellcheck disable=SC2086
CGO_ENABLED=0 go build $GO_BUILD_FLAGS -installsuffix cgo -ldflags "$GO_LDFLAGS" -o "${out}/etcd" ${REPO_PATH}/cmd/etcd || return
CGO_ENABLED=0 go build $GO_BUILD_FLAGS \
-installsuffix cgo \
-ldflags "$GO_LDFLAGS" \
-o "${out}/etcd" ${REPO_PATH} || return
# shellcheck disable=SC2086
CGO_ENABLED=0 go build $GO_BUILD_FLAGS -installsuffix cgo -ldflags "$GO_LDFLAGS" -o "${out}/etcdctl" ${REPO_PATH}/cmd/etcdctl || return
CGO_ENABLED=0 go build $GO_BUILD_FLAGS \
-installsuffix cgo \
-ldflags "$GO_LDFLAGS" \
-o "${out}/etcdctl" ${REPO_PATH}/etcdctl || return
}
etcd_setup_gopath() {
d=$(dirname "$0")
CDIR=$(cd "$d" && pwd)
cd "$CDIR"
etcdGOPATH="${CDIR}/gopath"
# preserve old gopath to support building with unvendored tooling deps (e.g., gofail)
if [ -n "$GOPATH" ]; then
GOPATH=":$GOPATH"
fi
export GOPATH=${etcdGOPATH}$GOPATH
rm -rf "${etcdGOPATH}/src"
mkdir -p "${etcdGOPATH}"
ln -s "${CDIR}/cmd/vendor" "${etcdGOPATH}/src"
tools_build() {
out="bin"
if [[ -n "${BINDIR}" ]]; then out="${BINDIR}"; fi
tools_path="functional/cmd/etcd-agent
functional/cmd/etcd-proxy
functional/cmd/etcd-runner
functional/cmd/etcd-tester"
for tool in ${tools_path}
do
echo "Building" "'${tool}'"...
# shellcheck disable=SC2086
CGO_ENABLED=0 go build ${GO_BUILD_FLAGS} \
-installsuffix cgo \
-ldflags "${GO_LDFLAGS}" \
-o "${out}/${tool}" "${REPO_PATH}/${tool}" || return
done
}
toggle_failpoints_default
if [[ "${ETCD_SETUP_GOPATH}" == "1" ]]; then
etcd_setup_gopath
fi
# only build when called directly, not sourced
if echo "$0" | grep "build$" >/dev/null; then
# force new gopath so builds outside of gopath work
etcd_setup_gopath
etcd_build
fi

View File

@@ -1,85 +0,0 @@
# etcd/clientv3
[![Godoc](https://img.shields.io/badge/go-documentation-blue.svg?style=flat-square)](https://godoc.org/github.com/coreos/etcd/clientv3)
`etcd/clientv3` is the official Go etcd client for v3.
## Install
```bash
go get github.com/coreos/etcd/clientv3
```
## Get started
Create client using `clientv3.New`:
```go
cli, err := clientv3.New(clientv3.Config{
Endpoints: []string{"localhost:2379", "localhost:22379", "localhost:32379"},
DialTimeout: 5 * time.Second,
})
if err != nil {
// handle error!
}
defer cli.Close()
```
etcd v3 uses [`gRPC`](http://www.grpc.io) for remote procedure calls. And `clientv3` uses
[`grpc-go`](https://github.com/grpc/grpc-go) to connect to etcd. Make sure to close the client after using it.
If the client is not closed, the connection will have leaky goroutines. To specify client request timeout,
pass `context.WithTimeout` to APIs:
```go
ctx, cancel := context.WithTimeout(context.Background(), timeout)
resp, err := cli.Put(ctx, "sample_key", "sample_value")
cancel()
if err != nil {
// handle error!
}
// use the response
```
etcd uses `cmd/vendor` directory to store external dependencies, which are
to be compiled into etcd release binaries. `client` can be imported without
vendoring. For full compatibility, it is recommended to vendor builds using
etcd's vendored packages, using tools like godep, as in
[vendor directories](https://golang.org/cmd/go/#hdr-Vendor_Directories).
For more detail, please read [Go vendor design](https://golang.org/s/go15vendor).
## Error Handling
etcd client returns 2 types of errors:
1. context error: canceled or deadline exceeded.
2. gRPC error: see [api/v3rpc/rpctypes](https://godoc.org/github.com/coreos/etcd/etcdserver/api/v3rpc/rpctypes).
Here is the example code to handle client errors:
```go
resp, err := cli.Put(ctx, "", "")
if err != nil {
switch err {
case context.Canceled:
log.Fatalf("ctx is canceled by another routine: %v", err)
case context.DeadlineExceeded:
log.Fatalf("ctx is attached with a deadline is exceeded: %v", err)
case rpctypes.ErrEmptyKey:
log.Fatalf("client-side error: %v", err)
default:
log.Fatalf("bad cluster endpoints, which are not etcd servers: %v", err)
}
}
```
## Metrics
The etcd client optionally exposes RPC metrics through [go-grpc-prometheus](https://github.com/grpc-ecosystem/go-grpc-prometheus). See the [examples](https://github.com/coreos/etcd/blob/master/clientv3/example_metrics_test.go).
## Namespacing
The [namespace](https://godoc.org/github.com/coreos/etcd/clientv3/namespace) package provides `clientv3` interface wrappers to transparently isolate client requests to a user-defined prefix.
## Examples
More code examples can be found at [GoDoc](https://godoc.org/github.com/coreos/etcd/clientv3).

View File

@@ -216,8 +216,8 @@ func (auth *authenticator) close() {
auth.conn.Close()
}
func newAuthenticator(endpoint string, opts []grpc.DialOption, c *Client) (*authenticator, error) {
conn, err := grpc.Dial(endpoint, opts...)
func newAuthenticator(ctx context.Context, target string, opts []grpc.DialOption, c *Client) (*authenticator, error) {
conn, err := grpc.DialContext(ctx, target, opts...)
if err != nil {
return nil, err
}

View File

@@ -0,0 +1,293 @@
// Copyright 2018 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Package balancer implements client balancer.
package balancer
import (
"strconv"
"sync"
"time"
"github.com/coreos/etcd/clientv3/balancer/connectivity"
"github.com/coreos/etcd/clientv3/balancer/picker"
"go.uber.org/zap"
"google.golang.org/grpc/balancer"
grpcconnectivity "google.golang.org/grpc/connectivity"
"google.golang.org/grpc/resolver"
_ "google.golang.org/grpc/resolver/dns" // register DNS resolver
_ "google.golang.org/grpc/resolver/passthrough" // register passthrough resolver
)
// Config defines balancer configurations.
type Config struct {
// Policy configures balancer policy.
Policy picker.Policy
// Picker implements gRPC picker.
// Leave empty if "Policy" field is not custom.
// TODO: currently custom policy is not supported.
// Picker picker.Picker
// Name defines an additional name for balancer.
// Useful for balancer testing to avoid register conflicts.
// If empty, defaults to policy name.
Name string
// Logger configures balancer logging.
// If nil, logs are discarded.
Logger *zap.Logger
}
// RegisterBuilder creates and registers a builder. Since this function calls balancer.Register, it
// must be invoked at initialization time.
func RegisterBuilder(cfg Config) {
bb := &builder{cfg}
balancer.Register(bb)
bb.cfg.Logger.Debug(
"registered balancer",
zap.String("policy", bb.cfg.Policy.String()),
zap.String("name", bb.cfg.Name),
)
}
type builder struct {
cfg Config
}
// Build is called initially when creating "ccBalancerWrapper".
// "grpc.Dial" is called to this client connection.
// Then, resolved addresses will be handled via "HandleResolvedAddrs".
func (b *builder) Build(cc balancer.ClientConn, opt balancer.BuildOptions) balancer.Balancer {
bb := &baseBalancer{
id: strconv.FormatInt(time.Now().UnixNano(), 36),
policy: b.cfg.Policy,
name: b.cfg.Name,
lg: b.cfg.Logger,
addrToSc: make(map[resolver.Address]balancer.SubConn),
scToAddr: make(map[balancer.SubConn]resolver.Address),
scToSt: make(map[balancer.SubConn]grpcconnectivity.State),
currentConn: nil,
connectivityRecorder: connectivity.New(b.cfg.Logger),
// initialize picker always returns "ErrNoSubConnAvailable"
picker: picker.NewErr(balancer.ErrNoSubConnAvailable),
}
// TODO: support multiple connections
bb.mu.Lock()
bb.currentConn = cc
bb.mu.Unlock()
bb.lg.Info(
"built balancer",
zap.String("balancer-id", bb.id),
zap.String("policy", bb.policy.String()),
zap.String("resolver-target", cc.Target()),
)
return bb
}
// Name implements "grpc/balancer.Builder" interface.
func (b *builder) Name() string { return b.cfg.Name }
// Balancer defines client balancer interface.
type Balancer interface {
// Balancer is called on specified client connection. Client initiates gRPC
// connection with "grpc.Dial(addr, grpc.WithBalancerName)", and then those resolved
// addresses are passed to "grpc/balancer.Balancer.HandleResolvedAddrs".
// For each resolved address, balancer calls "balancer.ClientConn.NewSubConn".
// "grpc/balancer.Balancer.HandleSubConnStateChange" is called when connectivity state
// changes, thus requires failover logic in this method.
balancer.Balancer
// Picker calls "Pick" for every client request.
picker.Picker
}
type baseBalancer struct {
id string
policy picker.Policy
name string
lg *zap.Logger
mu sync.RWMutex
addrToSc map[resolver.Address]balancer.SubConn
scToAddr map[balancer.SubConn]resolver.Address
scToSt map[balancer.SubConn]grpcconnectivity.State
currentConn balancer.ClientConn
connectivityRecorder connectivity.Recorder
picker picker.Picker
}
// HandleResolvedAddrs implements "grpc/balancer.Balancer" interface.
// gRPC sends initial or updated resolved addresses from "Build".
func (bb *baseBalancer) HandleResolvedAddrs(addrs []resolver.Address, err error) {
if err != nil {
bb.lg.Warn("HandleResolvedAddrs called with error", zap.String("balancer-id", bb.id), zap.Error(err))
return
}
bb.lg.Info("resolved",
zap.String("picker", bb.picker.String()),
zap.String("balancer-id", bb.id),
zap.Strings("addresses", addrsToStrings(addrs)),
)
bb.mu.Lock()
defer bb.mu.Unlock()
resolved := make(map[resolver.Address]struct{})
for _, addr := range addrs {
resolved[addr] = struct{}{}
if _, ok := bb.addrToSc[addr]; !ok {
sc, err := bb.currentConn.NewSubConn([]resolver.Address{addr}, balancer.NewSubConnOptions{})
if err != nil {
bb.lg.Warn("NewSubConn failed", zap.String("picker", bb.picker.String()), zap.String("balancer-id", bb.id), zap.Error(err), zap.String("address", addr.Addr))
continue
}
bb.lg.Info("created subconn", zap.String("address", addr.Addr))
bb.addrToSc[addr] = sc
bb.scToAddr[sc] = addr
bb.scToSt[sc] = grpcconnectivity.Idle
sc.Connect()
}
}
for addr, sc := range bb.addrToSc {
if _, ok := resolved[addr]; !ok {
// was removed by resolver or failed to create subconn
bb.currentConn.RemoveSubConn(sc)
delete(bb.addrToSc, addr)
bb.lg.Info(
"removed subconn",
zap.String("picker", bb.picker.String()),
zap.String("balancer-id", bb.id),
zap.String("address", addr.Addr),
zap.String("subconn", scToString(sc)),
)
// Keep the state of this sc in bb.scToSt until sc's state becomes Shutdown.
// The entry will be deleted in HandleSubConnStateChange.
// (DO NOT) delete(bb.scToAddr, sc)
// (DO NOT) delete(bb.scToSt, sc)
}
}
}
// HandleSubConnStateChange implements "grpc/balancer.Balancer" interface.
func (bb *baseBalancer) HandleSubConnStateChange(sc balancer.SubConn, s grpcconnectivity.State) {
bb.mu.Lock()
defer bb.mu.Unlock()
old, ok := bb.scToSt[sc]
if !ok {
bb.lg.Warn(
"state change for an unknown subconn",
zap.String("picker", bb.picker.String()),
zap.String("balancer-id", bb.id),
zap.String("subconn", scToString(sc)),
zap.Int("subconn-size", len(bb.scToAddr)),
zap.String("state", s.String()),
)
return
}
bb.lg.Info(
"state changed",
zap.String("picker", bb.picker.String()),
zap.String("balancer-id", bb.id),
zap.Bool("connected", s == grpcconnectivity.Ready),
zap.String("subconn", scToString(sc)),
zap.Int("subconn-size", len(bb.scToAddr)),
zap.String("address", bb.scToAddr[sc].Addr),
zap.String("old-state", old.String()),
zap.String("new-state", s.String()),
)
bb.scToSt[sc] = s
switch s {
case grpcconnectivity.Idle:
sc.Connect()
case grpcconnectivity.Shutdown:
// When an address was removed by resolver, b called RemoveSubConn but
// kept the sc's state in scToSt. Remove state for this sc here.
delete(bb.scToAddr, sc)
delete(bb.scToSt, sc)
}
oldAggrState := bb.connectivityRecorder.GetCurrentState()
bb.connectivityRecorder.RecordTransition(old, s)
// Update balancer picker when one of the following happens:
// - this sc became ready from not-ready
// - this sc became not-ready from ready
// - the aggregated state of balancer became TransientFailure from non-TransientFailure
// - the aggregated state of balancer became non-TransientFailure from TransientFailure
if (s == grpcconnectivity.Ready) != (old == grpcconnectivity.Ready) ||
(bb.connectivityRecorder.GetCurrentState() == grpcconnectivity.TransientFailure) != (oldAggrState == grpcconnectivity.TransientFailure) {
bb.updatePicker()
}
bb.currentConn.UpdateBalancerState(bb.connectivityRecorder.GetCurrentState(), bb.picker)
}
func (bb *baseBalancer) updatePicker() {
if bb.connectivityRecorder.GetCurrentState() == grpcconnectivity.TransientFailure {
bb.picker = picker.NewErr(balancer.ErrTransientFailure)
bb.lg.Info(
"updated picker to transient error picker",
zap.String("picker", bb.picker.String()),
zap.String("balancer-id", bb.id),
zap.String("policy", bb.policy.String()),
)
return
}
// only pass ready subconns to picker
scToAddr := make(map[balancer.SubConn]resolver.Address)
for addr, sc := range bb.addrToSc {
if st, ok := bb.scToSt[sc]; ok && st == grpcconnectivity.Ready {
scToAddr[sc] = addr
}
}
bb.picker = picker.New(picker.Config{
Policy: bb.policy,
Logger: bb.lg,
SubConnToResolverAddress: scToAddr,
})
bb.lg.Info(
"updated picker",
zap.String("picker", bb.picker.String()),
zap.String("balancer-id", bb.id),
zap.String("policy", bb.policy.String()),
zap.Strings("subconn-ready", scsToStrings(scToAddr)),
zap.Int("subconn-size", len(scToAddr)),
)
}
// Close implements "grpc/balancer.Balancer" interface.
// Close is a nop because base balancer doesn't have internal state to clean up,
// and it doesn't need to call RemoveSubConn for the SubConns.
func (bb *baseBalancer) Close() {
// TODO
}

View File

@@ -0,0 +1,310 @@
// Copyright 2018 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package balancer
import (
"context"
"fmt"
"strings"
"testing"
"time"
"github.com/coreos/etcd/clientv3/balancer/picker"
"github.com/coreos/etcd/clientv3/balancer/resolver/endpoint"
pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
"github.com/coreos/etcd/pkg/mock/mockserver"
"go.uber.org/zap"
"google.golang.org/grpc"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/peer"
"google.golang.org/grpc/status"
)
// TestRoundRobinBalancedResolvableNoFailover ensures that
// requests to a resolvable endpoint can be balanced between
// multiple, if any, nodes. And there needs be no failover.
func TestRoundRobinBalancedResolvableNoFailover(t *testing.T) {
testCases := []struct {
name string
serverCount int
reqN int
network string
}{
{name: "rrBalanced_1", serverCount: 1, reqN: 5, network: "tcp"},
{name: "rrBalanced_1_unix_sockets", serverCount: 1, reqN: 5, network: "unix"},
{name: "rrBalanced_3", serverCount: 3, reqN: 7, network: "tcp"},
{name: "rrBalanced_5", serverCount: 5, reqN: 10, network: "tcp"},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
ms, err := mockserver.StartMockServersOnNetwork(tc.serverCount, tc.network)
if err != nil {
t.Fatalf("failed to start mock servers: %v", err)
}
defer ms.Stop()
var eps []string
for _, svr := range ms.Servers {
eps = append(eps, svr.ResolverAddress().Addr)
}
rsv, err := endpoint.NewResolverGroup("nofailover")
if err != nil {
t.Fatal(err)
}
defer rsv.Close()
rsv.SetEndpoints(eps)
name := genName()
cfg := Config{
Policy: picker.RoundrobinBalanced,
Name: name,
Logger: zap.NewExample(),
}
RegisterBuilder(cfg)
conn, err := grpc.Dial(fmt.Sprintf("endpoint://nofailover/*"), grpc.WithInsecure(), grpc.WithBalancerName(name))
if err != nil {
t.Fatalf("failed to dial mock server: %v", err)
}
defer conn.Close()
cli := pb.NewKVClient(conn)
reqFunc := func(ctx context.Context) (picked string, err error) {
var p peer.Peer
_, err = cli.Range(ctx, &pb.RangeRequest{Key: []byte("/x")}, grpc.Peer(&p))
if p.Addr != nil {
picked = p.Addr.String()
}
return picked, err
}
prev, switches := "", 0
for i := 0; i < tc.reqN; i++ {
picked, err := reqFunc(context.Background())
if err != nil {
t.Fatalf("#%d: unexpected failure %v", i, err)
}
if prev == "" {
prev = picked
continue
}
if prev != picked {
switches++
}
prev = picked
}
if tc.serverCount > 1 && switches < tc.reqN-3 { // -3 for initial resolutions
// TODO: FIX ME
t.Skipf("expected balanced loads for %d requests, got switches %d", tc.reqN, switches)
}
})
}
}
// TestRoundRobinBalancedResolvableFailoverFromServerFail ensures that
// loads be rebalanced while one server goes down and comes back.
func TestRoundRobinBalancedResolvableFailoverFromServerFail(t *testing.T) {
serverCount := 5
ms, err := mockserver.StartMockServers(serverCount)
if err != nil {
t.Fatalf("failed to start mock servers: %s", err)
}
defer ms.Stop()
var eps []string
for _, svr := range ms.Servers {
eps = append(eps, svr.ResolverAddress().Addr)
}
rsv, err := endpoint.NewResolverGroup("serverfail")
if err != nil {
t.Fatal(err)
}
defer rsv.Close()
rsv.SetEndpoints(eps)
name := genName()
cfg := Config{
Policy: picker.RoundrobinBalanced,
Name: name,
Logger: zap.NewExample(),
}
RegisterBuilder(cfg)
conn, err := grpc.Dial(fmt.Sprintf("endpoint://serverfail/mock.server"), grpc.WithInsecure(), grpc.WithBalancerName(name))
if err != nil {
t.Fatalf("failed to dial mock server: %s", err)
}
defer conn.Close()
cli := pb.NewKVClient(conn)
reqFunc := func(ctx context.Context) (picked string, err error) {
var p peer.Peer
_, err = cli.Range(ctx, &pb.RangeRequest{Key: []byte("/x")}, grpc.Peer(&p))
if p.Addr != nil {
picked = p.Addr.String()
}
return picked, err
}
// stop first server, loads should be redistributed
// stopped server should never be picked
ms.StopAt(0)
available := make(map[string]struct{})
for i := 1; i < serverCount; i++ {
available[eps[i]] = struct{}{}
}
reqN := 10
prev, switches := "", 0
for i := 0; i < reqN; i++ {
picked, err := reqFunc(context.Background())
if err != nil && strings.Contains(err.Error(), "transport is closing") {
continue
}
if prev == "" { // first failover
if eps[0] == picked {
t.Fatalf("expected failover from %q, picked %q", eps[0], picked)
}
prev = picked
continue
}
if _, ok := available[picked]; !ok {
t.Fatalf("picked unavailable address %q (available %v)", picked, available)
}
if prev != picked {
switches++
}
prev = picked
}
if switches < reqN-3 { // -3 for initial resolutions + failover
// TODO: FIX ME!
t.Skipf("expected balanced loads for %d requests, got switches %d", reqN, switches)
}
// now failed server comes back
ms.StartAt(0)
// enough time for reconnecting to recovered server
time.Sleep(time.Second)
prev, switches = "", 0
recoveredAddr, recovered := eps[0], 0
available[recoveredAddr] = struct{}{}
for i := 0; i < 2*reqN; i++ {
picked, err := reqFunc(context.Background())
if err != nil {
t.Fatalf("#%d: unexpected failure %v", i, err)
}
if prev == "" {
prev = picked
continue
}
if _, ok := available[picked]; !ok {
t.Fatalf("#%d: picked unavailable address %q (available %v)", i, picked, available)
}
if prev != picked {
switches++
}
if picked == recoveredAddr {
recovered++
}
prev = picked
}
if switches < reqN-3 { // -3 for initial resolutions
t.Fatalf("expected balanced loads for %d requests, got switches %d", reqN, switches)
}
if recovered < reqN/serverCount {
t.Fatalf("recovered server %q got only %d requests", recoveredAddr, recovered)
}
}
// TestRoundRobinBalancedResolvableFailoverFromRequestFail ensures that
// loads be rebalanced while some requests are failed.
func TestRoundRobinBalancedResolvableFailoverFromRequestFail(t *testing.T) {
serverCount := 5
ms, err := mockserver.StartMockServers(serverCount)
if err != nil {
t.Fatalf("failed to start mock servers: %s", err)
}
defer ms.Stop()
var eps []string
available := make(map[string]struct{})
for _, svr := range ms.Servers {
eps = append(eps, svr.ResolverAddress().Addr)
available[svr.Address] = struct{}{}
}
rsv, err := endpoint.NewResolverGroup("requestfail")
if err != nil {
t.Fatal(err)
}
defer rsv.Close()
rsv.SetEndpoints(eps)
name := genName()
cfg := Config{
Policy: picker.RoundrobinBalanced,
Name: name,
Logger: zap.NewExample(),
}
RegisterBuilder(cfg)
conn, err := grpc.Dial(fmt.Sprintf("endpoint://requestfail/mock.server"), grpc.WithInsecure(), grpc.WithBalancerName(name))
if err != nil {
t.Fatalf("failed to dial mock server: %s", err)
}
defer conn.Close()
cli := pb.NewKVClient(conn)
reqFunc := func(ctx context.Context) (picked string, err error) {
var p peer.Peer
_, err = cli.Range(ctx, &pb.RangeRequest{Key: []byte("/x")}, grpc.Peer(&p))
if p.Addr != nil {
picked = p.Addr.String()
}
return picked, err
}
reqN := 20
prev, switches := "", 0
for i := 0; i < reqN; i++ {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
if i%2 == 0 {
cancel()
}
picked, err := reqFunc(ctx)
if i%2 == 0 {
if s, ok := status.FromError(err); ok && s.Code() != codes.Canceled || picked != "" {
t.Fatalf("#%d: expected %v, got %v", i, context.Canceled, err)
}
continue
}
if prev == "" && picked != "" {
prev = picked
continue
}
if _, ok := available[picked]; !ok {
t.Fatalf("#%d: picked unavailable address %q (available %v)", i, picked, available)
}
if prev != picked {
switches++
}
prev = picked
}
if switches < reqN/2-3 { // -3 for initial resolutions + failover
t.Fatalf("expected balanced loads for %d requests, got switches %d", reqN, switches)
}
}

View File

@@ -0,0 +1,93 @@
// Copyright 2019 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Package connectivity implements client connectivity operations.
package connectivity
import (
"sync"
"go.uber.org/zap"
"google.golang.org/grpc/connectivity"
)
// Recorder records gRPC connectivity.
type Recorder interface {
GetCurrentState() connectivity.State
RecordTransition(oldState, newState connectivity.State)
}
// New returns a new Recorder.
func New(lg *zap.Logger) Recorder {
return &recorder{lg: lg}
}
// recorder takes the connectivity states of multiple SubConns
// and returns one aggregated connectivity state.
// ref. https://github.com/grpc/grpc-go/blob/master/balancer/balancer.go
type recorder struct {
lg *zap.Logger
mu sync.RWMutex
cur connectivity.State
numReady uint64 // Number of addrConns in ready state.
numConnecting uint64 // Number of addrConns in connecting state.
numTransientFailure uint64 // Number of addrConns in transientFailure.
}
func (rc *recorder) GetCurrentState() (state connectivity.State) {
rc.mu.RLock()
defer rc.mu.RUnlock()
return rc.cur
}
// RecordTransition records state change happening in subConn and based on that
// it evaluates what aggregated state should be.
//
// - If at least one SubConn in Ready, the aggregated state is Ready;
// - Else if at least one SubConn in Connecting, the aggregated state is Connecting;
// - Else the aggregated state is TransientFailure.
//
// Idle and Shutdown are not considered.
//
// ref. https://github.com/grpc/grpc-go/blob/master/balancer/balancer.go
func (rc *recorder) RecordTransition(oldState, newState connectivity.State) {
rc.mu.Lock()
defer rc.mu.Unlock()
for idx, state := range []connectivity.State{oldState, newState} {
updateVal := 2*uint64(idx) - 1 // -1 for oldState and +1 for new.
switch state {
case connectivity.Ready:
rc.numReady += updateVal
case connectivity.Connecting:
rc.numConnecting += updateVal
case connectivity.TransientFailure:
rc.numTransientFailure += updateVal
default:
rc.lg.Warn("connectivity recorder received unknown state", zap.String("connectivity-state", state.String()))
}
}
switch { // must be exclusive, no overlap
case rc.numReady > 0:
rc.cur = connectivity.Ready
case rc.numConnecting > 0:
rc.cur = connectivity.Connecting
default:
rc.cur = connectivity.TransientFailure
}
}

View File

@@ -0,0 +1,16 @@
// Copyright 2018 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Package picker defines/implements client balancer picker policy.
package picker

View File

@@ -0,0 +1,39 @@
// Copyright 2018 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package picker
import (
"context"
"google.golang.org/grpc/balancer"
)
// NewErr returns a picker that always returns err on "Pick".
func NewErr(err error) Picker {
return &errPicker{p: Error, err: err}
}
type errPicker struct {
p Policy
err error
}
func (ep *errPicker) String() string {
return ep.p.String()
}
func (ep *errPicker) Pick(context.Context, balancer.PickOptions) (balancer.SubConn, func(balancer.DoneInfo), error) {
return nil, nil, ep.err
}

View File

@@ -0,0 +1,91 @@
// Copyright 2018 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package picker
import (
"fmt"
"go.uber.org/zap"
"google.golang.org/grpc/balancer"
"google.golang.org/grpc/resolver"
)
// Picker defines balancer Picker methods.
type Picker interface {
balancer.Picker
String() string
}
// Config defines picker configuration.
type Config struct {
// Policy specifies etcd clientv3's built in balancer policy.
Policy Policy
// Logger defines picker logging object.
Logger *zap.Logger
// SubConnToResolverAddress maps each gRPC sub-connection to an address.
// Basically, it is a list of addresses that the Picker can pick from.
SubConnToResolverAddress map[balancer.SubConn]resolver.Address
}
// Policy defines balancer picker policy.
type Policy uint8
const (
// Error is error picker policy.
Error Policy = iota
// RoundrobinBalanced balances loads over multiple endpoints
// and implements failover in roundrobin fashion.
RoundrobinBalanced
// Custom defines custom balancer picker.
// TODO: custom picker is not supported yet.
Custom
)
func (p Policy) String() string {
switch p {
case Error:
return "picker-error"
case RoundrobinBalanced:
return "picker-roundrobin-balanced"
case Custom:
panic("'custom' picker policy is not supported yet")
default:
panic(fmt.Errorf("invalid balancer picker policy (%d)", p))
}
}
// New creates a new Picker.
func New(cfg Config) Picker {
switch cfg.Policy {
case Error:
panic("'error' picker policy is not supported here; use 'picker.NewErr'")
case RoundrobinBalanced:
return newRoundrobinBalanced(cfg)
case Custom:
panic("'custom' picker policy is not supported yet")
default:
panic(fmt.Errorf("invalid balancer picker policy (%d)", cfg.Policy))
}
}

View File

@@ -0,0 +1,95 @@
// Copyright 2018 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package picker
import (
"context"
"sync"
"go.uber.org/zap"
"go.uber.org/zap/zapcore"
"google.golang.org/grpc/balancer"
"google.golang.org/grpc/resolver"
)
// newRoundrobinBalanced returns a new roundrobin balanced picker.
func newRoundrobinBalanced(cfg Config) Picker {
scs := make([]balancer.SubConn, 0, len(cfg.SubConnToResolverAddress))
for sc := range cfg.SubConnToResolverAddress {
scs = append(scs, sc)
}
return &rrBalanced{
p: RoundrobinBalanced,
lg: cfg.Logger,
scs: scs,
scToAddr: cfg.SubConnToResolverAddress,
}
}
type rrBalanced struct {
p Policy
lg *zap.Logger
mu sync.RWMutex
next int
scs []balancer.SubConn
scToAddr map[balancer.SubConn]resolver.Address
}
func (rb *rrBalanced) String() string { return rb.p.String() }
// Pick is called for every client request.
func (rb *rrBalanced) Pick(ctx context.Context, opts balancer.PickOptions) (balancer.SubConn, func(balancer.DoneInfo), error) {
rb.mu.RLock()
n := len(rb.scs)
rb.mu.RUnlock()
if n == 0 {
return nil, nil, balancer.ErrNoSubConnAvailable
}
rb.mu.Lock()
cur := rb.next
sc := rb.scs[cur]
picked := rb.scToAddr[sc].Addr
rb.next = (rb.next + 1) % len(rb.scs)
rb.mu.Unlock()
rb.lg.Debug(
"picked",
zap.String("picker", rb.p.String()),
zap.String("address", picked),
zap.Int("subconn-index", cur),
zap.Int("subconn-size", n),
)
doneFunc := func(info balancer.DoneInfo) {
// TODO: error handling?
fss := []zapcore.Field{
zap.Error(info.Err),
zap.String("picker", rb.p.String()),
zap.String("address", picked),
zap.Bool("success", info.Err == nil),
zap.Bool("bytes-sent", info.BytesSent),
zap.Bool("bytes-received", info.BytesReceived),
}
if info.Err == nil {
rb.lg.Debug("balancer done", fss...)
} else {
rb.lg.Warn("balancer failed", fss...)
}
}
return sc, doneFunc, nil
}

View File

@@ -0,0 +1,247 @@
// Copyright 2018 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Package endpoint resolves etcd entpoints using grpc targets of the form 'endpoint://<id>/<endpoint>'.
package endpoint
import (
"context"
"fmt"
"net"
"net/url"
"strings"
"sync"
"google.golang.org/grpc/resolver"
)
const scheme = "endpoint"
var (
targetPrefix = fmt.Sprintf("%s://", scheme)
bldr *builder
)
func init() {
bldr = &builder{
resolverGroups: make(map[string]*ResolverGroup),
}
resolver.Register(bldr)
}
type builder struct {
mu sync.RWMutex
resolverGroups map[string]*ResolverGroup
}
// NewResolverGroup creates a new ResolverGroup with the given id.
func NewResolverGroup(id string) (*ResolverGroup, error) {
return bldr.newResolverGroup(id)
}
// ResolverGroup keeps all endpoints of resolvers using a common endpoint://<id>/ target
// up-to-date.
type ResolverGroup struct {
mu sync.RWMutex
id string
endpoints []string
resolvers []*Resolver
}
func (e *ResolverGroup) addResolver(r *Resolver) {
e.mu.Lock()
addrs := epsToAddrs(e.endpoints...)
e.resolvers = append(e.resolvers, r)
e.mu.Unlock()
r.cc.NewAddress(addrs)
}
func (e *ResolverGroup) removeResolver(r *Resolver) {
e.mu.Lock()
for i, er := range e.resolvers {
if er == r {
e.resolvers = append(e.resolvers[:i], e.resolvers[i+1:]...)
break
}
}
e.mu.Unlock()
}
// SetEndpoints updates the endpoints for ResolverGroup. All registered resolver are updated
// immediately with the new endpoints.
func (e *ResolverGroup) SetEndpoints(endpoints []string) {
addrs := epsToAddrs(endpoints...)
e.mu.Lock()
e.endpoints = endpoints
for _, r := range e.resolvers {
r.cc.NewAddress(addrs)
}
e.mu.Unlock()
}
// Target constructs a endpoint target using the endpoint id of the ResolverGroup.
func (e *ResolverGroup) Target(endpoint string) string {
return Target(e.id, endpoint)
}
// Target constructs a endpoint resolver target.
func Target(id, endpoint string) string {
return fmt.Sprintf("%s://%s/%s", scheme, id, endpoint)
}
// IsTarget checks if a given target string in an endpoint resolver target.
func IsTarget(target string) bool {
return strings.HasPrefix(target, "endpoint://")
}
func (e *ResolverGroup) Close() {
bldr.close(e.id)
}
// Build creates or reuses an etcd resolver for the etcd cluster name identified by the authority part of the target.
func (b *builder) Build(target resolver.Target, cc resolver.ClientConn, opts resolver.BuildOption) (resolver.Resolver, error) {
if len(target.Authority) < 1 {
return nil, fmt.Errorf("'etcd' target scheme requires non-empty authority identifying etcd cluster being routed to")
}
id := target.Authority
es, err := b.getResolverGroup(id)
if err != nil {
return nil, fmt.Errorf("failed to build resolver: %v", err)
}
r := &Resolver{
endpointID: id,
cc: cc,
}
es.addResolver(r)
return r, nil
}
func (b *builder) newResolverGroup(id string) (*ResolverGroup, error) {
b.mu.RLock()
_, ok := b.resolverGroups[id]
b.mu.RUnlock()
if ok {
return nil, fmt.Errorf("Endpoint already exists for id: %s", id)
}
es := &ResolverGroup{id: id}
b.mu.Lock()
b.resolverGroups[id] = es
b.mu.Unlock()
return es, nil
}
func (b *builder) getResolverGroup(id string) (*ResolverGroup, error) {
b.mu.RLock()
es, ok := b.resolverGroups[id]
b.mu.RUnlock()
if !ok {
return nil, fmt.Errorf("ResolverGroup not found for id: %s", id)
}
return es, nil
}
func (b *builder) close(id string) {
b.mu.Lock()
delete(b.resolverGroups, id)
b.mu.Unlock()
}
func (b *builder) Scheme() string {
return scheme
}
// Resolver provides a resolver for a single etcd cluster, identified by name.
type Resolver struct {
endpointID string
cc resolver.ClientConn
sync.RWMutex
}
// TODO: use balancer.epsToAddrs
func epsToAddrs(eps ...string) (addrs []resolver.Address) {
addrs = make([]resolver.Address, 0, len(eps))
for _, ep := range eps {
addrs = append(addrs, resolver.Address{Addr: ep})
}
return addrs
}
func (*Resolver) ResolveNow(o resolver.ResolveNowOption) {}
func (r *Resolver) Close() {
es, err := bldr.getResolverGroup(r.endpointID)
if err != nil {
return
}
es.removeResolver(r)
}
// ParseEndpoint endpoint parses an endpoint of the form
// (http|https)://<host>*|(unix|unixs)://<path>)
// and returns a protocol ('tcp' or 'unix'),
// host (or filepath if a unix socket),
// scheme (http, https, unix, unixs).
func ParseEndpoint(endpoint string) (proto string, host string, scheme string) {
proto = "tcp"
host = endpoint
url, uerr := url.Parse(endpoint)
if uerr != nil || !strings.Contains(endpoint, "://") {
return proto, host, scheme
}
scheme = url.Scheme
// strip scheme:// prefix since grpc dials by host
host = url.Host
switch url.Scheme {
case "http", "https":
case "unix", "unixs":
proto = "unix"
host = url.Host + url.Path
default:
proto, host = "", ""
}
return proto, host, scheme
}
// ParseTarget parses a endpoint://<id>/<endpoint> string and returns the parsed id and endpoint.
// If the target is malformed, an error is returned.
func ParseTarget(target string) (string, string, error) {
noPrefix := strings.TrimPrefix(target, targetPrefix)
if noPrefix == target {
return "", "", fmt.Errorf("malformed target, %s prefix is required: %s", targetPrefix, target)
}
parts := strings.SplitN(noPrefix, "/", 2)
if len(parts) != 2 {
return "", "", fmt.Errorf("malformed target, expected %s://<id>/<endpoint>, but got %s", scheme, target)
}
return parts[0], parts[1], nil
}
// Dialer dials a endpoint using net.Dialer.
// Context cancelation and timeout are supported.
func Dialer(ctx context.Context, dialEp string) (net.Conn, error) {
proto, host, _ := ParseEndpoint(dialEp)
select {
case <-ctx.Done():
return nil, ctx.Err()
default:
}
dialer := &net.Dialer{}
if deadline, ok := ctx.Deadline(); ok {
dialer.Deadline = deadline
}
return dialer.DialContext(ctx, proto, host)
}

View File

@@ -0,0 +1,68 @@
// Copyright 2018 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package balancer
import (
"fmt"
"net/url"
"sort"
"sync/atomic"
"time"
"google.golang.org/grpc/balancer"
"google.golang.org/grpc/resolver"
)
func scToString(sc balancer.SubConn) string {
return fmt.Sprintf("%p", sc)
}
func scsToStrings(scs map[balancer.SubConn]resolver.Address) (ss []string) {
ss = make([]string, 0, len(scs))
for sc, a := range scs {
ss = append(ss, fmt.Sprintf("%s (%s)", a.Addr, scToString(sc)))
}
sort.Strings(ss)
return ss
}
func addrsToStrings(addrs []resolver.Address) (ss []string) {
ss = make([]string, len(addrs))
for i := range addrs {
ss[i] = addrs[i].Addr
}
sort.Strings(ss)
return ss
}
func epsToAddrs(eps ...string) (addrs []resolver.Address) {
addrs = make([]resolver.Address, 0, len(eps))
for _, ep := range eps {
u, err := url.Parse(ep)
if err != nil {
addrs = append(addrs, resolver.Address{Addr: ep, Type: resolver.Backend})
continue
}
addrs = append(addrs, resolver.Address{Addr: u.Host, Type: resolver.Backend})
}
return addrs
}
var genN = new(uint32)
func genName() string {
now := time.Now().UnixNano()
return fmt.Sprintf("%X%X", now, atomic.AddUint32(genN, 1))
}

View File

@@ -0,0 +1,34 @@
// Copyright 2018 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package balancer
import (
"reflect"
"testing"
"google.golang.org/grpc/resolver"
)
func Test_epsToAddrs(t *testing.T) {
eps := []string{"https://example.com:2379", "127.0.0.1:2379"}
exp := []resolver.Address{
{Addr: "example.com:2379", Type: resolver.Backend},
{Addr: "127.0.0.1:2379", Type: resolver.Backend},
}
rs := epsToAddrs(eps...)
if !reflect.DeepEqual(rs, exp) {
t.Fatalf("expected %v, got %v", exp, rs)
}
}

View File

@@ -16,31 +16,58 @@ package clientv3
import (
"context"
"crypto/tls"
"errors"
"fmt"
"net"
"net/url"
"os"
"strconv"
"strings"
"sync"
"time"
"github.com/coreos/etcd/clientv3/balancer"
"github.com/coreos/etcd/clientv3/balancer/picker"
"github.com/coreos/etcd/clientv3/balancer/resolver/endpoint"
"github.com/coreos/etcd/clientv3/credentials"
"github.com/coreos/etcd/etcdserver/api/v3rpc/rpctypes"
"github.com/coreos/etcd/pkg/logutil"
"github.com/google/uuid"
"go.uber.org/zap"
"google.golang.org/grpc"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/credentials"
grpccredentials "google.golang.org/grpc/credentials"
"google.golang.org/grpc/keepalive"
"google.golang.org/grpc/metadata"
"google.golang.org/grpc/status"
)
var (
ErrNoAvailableEndpoints = errors.New("etcdclient: no available endpoints")
ErrOldCluster = errors.New("etcdclient: old cluster version")
roundRobinBalancerName = fmt.Sprintf("etcd-%s", picker.RoundrobinBalanced.String())
)
func init() {
lg := zap.NewNop()
if os.Getenv("ETCD_CLIENT_DEBUG") != "" {
lcfg := logutil.DefaultZapLoggerConfig
lcfg.Level = zap.NewAtomicLevelAt(zap.DebugLevel)
var err error
lg, err = lcfg.Build() // info level logging
if err != nil {
panic(err)
}
}
// TODO: support custom balancer
balancer.RegisterBuilder(balancer.Config{
Policy: picker.RoundrobinBalanced,
Name: roundRobinBalancerName,
Logger: lg,
})
}
// Client provides and manages an etcd v3 client session.
type Client struct {
Cluster
@@ -50,13 +77,12 @@ type Client struct {
Auth
Maintenance
conn *grpc.ClientConn
dialerrc chan error
conn *grpc.ClientConn
cfg Config
creds *credentials.TransportCredentials
balancer *healthBalancer
mu *sync.RWMutex
cfg Config
creds grpccredentials.TransportCredentials
resolverGroup *endpoint.ResolverGroup
mu *sync.RWMutex
ctx context.Context
cancel context.CancelFunc
@@ -64,11 +90,12 @@ type Client struct {
// Username is a user name for authentication.
Username string
// Password is a password for authentication.
Password string
// tokenCred is an instance of WithPerRPCCredentials()'s argument
tokenCred *authTokenCredential
Password string
authTokenBundle credentials.Bundle
callOpts []grpc.CallOption
lg *zap.Logger
}
// New creates a new etcdv3 client from a given configuration.
@@ -93,11 +120,19 @@ func NewFromURL(url string) (*Client, error) {
return New(Config{Endpoints: []string{url}})
}
// NewFromURLs creates a new etcdv3 client from URLs.
func NewFromURLs(urls []string) (*Client, error) {
return New(Config{Endpoints: urls})
}
// Close shuts down the client's etcd connections.
func (c *Client) Close() error {
c.cancel()
c.Watcher.Close()
c.Lease.Close()
if c.resolverGroup != nil {
c.resolverGroup.Close()
}
if c.conn != nil {
return toErr(c.ctx, c.conn.Close())
}
@@ -111,9 +146,9 @@ func (c *Client) Ctx() context.Context { return c.ctx }
// Endpoints lists the registered endpoints for the client.
func (c *Client) Endpoints() []string {
// copy the slice; protect original endpoints from being changed
c.mu.RLock()
defer c.mu.RUnlock()
// copy the slice; protect original endpoints from being changed
eps := make([]string, len(c.cfg.Endpoints))
copy(eps, c.cfg.Endpoints)
return eps
@@ -122,22 +157,9 @@ func (c *Client) Endpoints() []string {
// SetEndpoints updates client's endpoints.
func (c *Client) SetEndpoints(eps ...string) {
c.mu.Lock()
defer c.mu.Unlock()
c.cfg.Endpoints = eps
c.mu.Unlock()
c.balancer.updateAddrs(eps...)
// updating notifyCh can trigger new connections,
// need update addrs if all connections are down
// or addrs does not include pinAddr.
c.balancer.mu.RLock()
update := !hasAddr(c.balancer.addrs, c.balancer.pinAddr)
c.balancer.mu.RUnlock()
if update {
select {
case c.balancer.updateAddrsC <- notifyNext:
case <-c.balancer.stopc:
}
}
c.resolverGroup.SetEndpoints(eps)
}
// Sync synchronizes client's endpoints with the known endpoints from the etcd membership.
@@ -168,52 +190,13 @@ func (c *Client) autoSync() {
err := c.Sync(ctx)
cancel()
if err != nil && err != c.ctx.Err() {
logger.Println("Auto sync endpoints failed:", err)
lg.Lvl(4).Infof("Auto sync endpoints failed: %v", err)
}
}
}
}
type authTokenCredential struct {
token string
tokenMu *sync.RWMutex
}
func (cred authTokenCredential) RequireTransportSecurity() bool {
return false
}
func (cred authTokenCredential) GetRequestMetadata(ctx context.Context, s ...string) (map[string]string, error) {
cred.tokenMu.RLock()
defer cred.tokenMu.RUnlock()
return map[string]string{
"token": cred.token,
}, nil
}
func parseEndpoint(endpoint string) (proto string, host string, scheme string) {
proto = "tcp"
host = endpoint
url, uerr := url.Parse(endpoint)
if uerr != nil || !strings.Contains(endpoint, "://") {
return proto, host, scheme
}
scheme = url.Scheme
// strip scheme:// prefix since grpc dials by host
host = url.Host
switch url.Scheme {
case "http", "https":
case "unix", "unixs":
proto = "unix"
host = url.Host + url.Path
default:
proto, host = "", ""
}
return proto, host, scheme
}
func (c *Client) processCreds(scheme string) (creds *credentials.TransportCredentials) {
func (c *Client) processCreds(scheme string) (creds grpccredentials.TransportCredentials) {
creds = c.creds
switch scheme {
case "unix":
@@ -223,83 +206,83 @@ func (c *Client) processCreds(scheme string) (creds *credentials.TransportCreden
if creds != nil {
break
}
tlsconfig := &tls.Config{}
emptyCreds := credentials.NewTLS(tlsconfig)
creds = &emptyCreds
creds = credentials.NewBundle(credentials.Config{}).TransportCredentials()
default:
creds = nil
}
return creds
}
// dialSetupOpts gives the dial opts prior to any authentication
func (c *Client) dialSetupOpts(endpoint string, dopts ...grpc.DialOption) (opts []grpc.DialOption) {
if c.cfg.DialTimeout > 0 {
opts = []grpc.DialOption{grpc.WithTimeout(c.cfg.DialTimeout)}
}
// dialSetupOpts gives the dial opts prior to any authentication.
func (c *Client) dialSetupOpts(creds grpccredentials.TransportCredentials, dopts ...grpc.DialOption) (opts []grpc.DialOption, err error) {
if c.cfg.DialKeepAliveTime > 0 {
params := keepalive.ClientParameters{
Time: c.cfg.DialKeepAliveTime,
Timeout: c.cfg.DialKeepAliveTimeout,
Time: c.cfg.DialKeepAliveTime,
Timeout: c.cfg.DialKeepAliveTimeout,
PermitWithoutStream: c.cfg.PermitWithoutStream,
}
opts = append(opts, grpc.WithKeepaliveParams(params))
}
opts = append(opts, dopts...)
f := func(host string, t time.Duration) (net.Conn, error) {
proto, host, _ := parseEndpoint(c.balancer.endpoint(host))
if host == "" && endpoint != "" {
// dialing an endpoint not in the balancer; use
// endpoint passed into dial
proto, host, _ = parseEndpoint(endpoint)
}
if proto == "" {
return nil, fmt.Errorf("unknown scheme for %q", host)
}
select {
case <-c.ctx.Done():
return nil, c.ctx.Err()
default:
}
dialer := &net.Dialer{Timeout: t}
conn, err := dialer.DialContext(c.ctx, proto, host)
if err != nil {
select {
case c.dialerrc <- err:
default:
}
}
return conn, err
}
opts = append(opts, grpc.WithDialer(f))
creds := c.creds
if _, _, scheme := parseEndpoint(endpoint); len(scheme) != 0 {
creds = c.processCreds(scheme)
}
dialer := endpoint.Dialer
if creds != nil {
opts = append(opts, grpc.WithTransportCredentials(*creds))
opts = append(opts, grpc.WithTransportCredentials(creds))
// gRPC load balancer workaround. See credentials.transportCredential for details.
if credsDialer, ok := creds.(TransportCredentialsWithDialer); ok {
dialer = credsDialer.Dialer
}
} else {
opts = append(opts, grpc.WithInsecure())
}
opts = append(opts, grpc.WithContextDialer(dialer))
return opts
// Interceptor retry and backoff.
// TODO: Replace all of clientv3/retry.go with interceptor based retry, or with
// https://github.com/grpc/proposal/blob/master/A6-client-retries.md#retry-policy
// once it is available.
rrBackoff := withBackoff(c.roundRobinQuorumBackoff(defaultBackoffWaitBetween, defaultBackoffJitterFraction))
opts = append(opts,
// Disable stream retry by default since go-grpc-middleware/retry does not support client streams.
// Streams that are safe to retry are enabled individually.
grpc.WithStreamInterceptor(c.streamClientInterceptor(c.lg, withMax(0), rrBackoff)),
grpc.WithUnaryInterceptor(c.unaryClientInterceptor(c.lg, withMax(defaultUnaryMaxRetries), rrBackoff)),
)
return opts, nil
}
// Dial connects to a single endpoint using the client's config.
func (c *Client) Dial(endpoint string) (*grpc.ClientConn, error) {
return c.dial(endpoint)
func (c *Client) Dial(ep string) (*grpc.ClientConn, error) {
creds, err := c.directDialCreds(ep)
if err != nil {
return nil, err
}
// Use the grpc passthrough resolver to directly dial a single endpoint.
// This resolver passes through the 'unix' and 'unixs' endpoints schemes used
// by etcd without modification, allowing us to directly dial endpoints and
// using the same dial functions that we use for load balancer dialing.
return c.dial(fmt.Sprintf("passthrough:///%s", ep), creds)
}
func (c *Client) getToken(ctx context.Context) error {
var err error // return last error in a case of fail
var auth *authenticator
for i := 0; i < len(c.cfg.Endpoints); i++ {
endpoint := c.cfg.Endpoints[i]
host := getHost(endpoint)
eps := c.Endpoints()
for _, ep := range eps {
// use dial options without dopts to avoid reusing the client balancer
auth, err = newAuthenticator(host, c.dialSetupOpts(endpoint), c)
var dOpts []grpc.DialOption
_, host, _ := endpoint.ParseEndpoint(ep)
target := c.resolverGroup.Target(host)
creds := c.dialWithBalancerCreds(ep)
dOpts, err = c.dialSetupOpts(creds, c.cfg.DialOptions...)
if err != nil {
err = fmt.Errorf("failed to configure auth dialer: %v", err)
continue
}
dOpts = append(dOpts, grpc.WithBalancerName(roundRobinBalancerName))
auth, err = newAuthenticator(ctx, target, dOpts, c)
if err != nil {
continue
}
@@ -308,71 +291,114 @@ func (c *Client) getToken(ctx context.Context) error {
var resp *AuthenticateResponse
resp, err = auth.authenticate(ctx, c.Username, c.Password)
if err != nil {
// return err without retrying other endpoints
if err == rpctypes.ErrAuthNotEnabled {
return err
}
continue
}
c.tokenCred.tokenMu.Lock()
c.tokenCred.token = resp.Token
c.tokenCred.tokenMu.Unlock()
c.authTokenBundle.UpdateAuthToken(resp.Token)
return nil
}
return err
}
func (c *Client) dial(endpoint string, dopts ...grpc.DialOption) (*grpc.ClientConn, error) {
opts := c.dialSetupOpts(endpoint, dopts...)
host := getHost(endpoint)
// dialWithBalancer dials the client's current load balanced resolver group. The scheme of the host
// of the provided endpoint determines the scheme used for all endpoints of the client connection.
func (c *Client) dialWithBalancer(ep string, dopts ...grpc.DialOption) (*grpc.ClientConn, error) {
_, host, _ := endpoint.ParseEndpoint(ep)
target := c.resolverGroup.Target(host)
creds := c.dialWithBalancerCreds(ep)
return c.dial(target, creds, dopts...)
}
// dial configures and dials any grpc balancer target.
func (c *Client) dial(target string, creds grpccredentials.TransportCredentials, dopts ...grpc.DialOption) (*grpc.ClientConn, error) {
opts, err := c.dialSetupOpts(creds, dopts...)
if err != nil {
return nil, fmt.Errorf("failed to configure dialer: %v", err)
}
if c.Username != "" && c.Password != "" {
c.tokenCred = &authTokenCredential{
tokenMu: &sync.RWMutex{},
}
c.authTokenBundle = credentials.NewBundle(credentials.Config{})
ctx := c.ctx
ctx, cancel := c.ctx, func() {}
if c.cfg.DialTimeout > 0 {
cctx, cancel := context.WithTimeout(ctx, c.cfg.DialTimeout)
defer cancel()
ctx = cctx
ctx, cancel = context.WithTimeout(ctx, c.cfg.DialTimeout)
}
err := c.getToken(ctx)
err = c.getToken(ctx)
if err != nil {
if toErr(ctx, err) != rpctypes.ErrAuthNotEnabled {
if err == ctx.Err() && ctx.Err() != c.ctx.Err() {
err = context.DeadlineExceeded
}
cancel()
return nil, err
}
} else {
opts = append(opts, grpc.WithPerRPCCredentials(c.tokenCred))
opts = append(opts, grpc.WithPerRPCCredentials(c.authTokenBundle.PerRPCCredentials()))
}
cancel()
}
opts = append(opts, c.cfg.DialOptions...)
conn, err := grpc.DialContext(c.ctx, host, opts...)
dctx := c.ctx
if c.cfg.DialTimeout > 0 {
var cancel context.CancelFunc
dctx, cancel = context.WithTimeout(c.ctx, c.cfg.DialTimeout)
defer cancel() // TODO: Is this right for cases where grpc.WithBlock() is not set on the dial options?
}
conn, err := grpc.DialContext(dctx, target, opts...)
if err != nil {
return nil, err
}
return conn, nil
}
// WithRequireLeader requires client requests to only succeed
// when the cluster has a leader.
func WithRequireLeader(ctx context.Context) context.Context {
md := metadata.Pairs(rpctypes.MetadataRequireLeaderKey, rpctypes.MetadataHasLeader)
return metadata.NewOutgoingContext(ctx, md)
func (c *Client) directDialCreds(ep string) (grpccredentials.TransportCredentials, error) {
_, host, scheme := endpoint.ParseEndpoint(ep)
creds := c.creds
if len(scheme) != 0 {
creds = c.processCreds(scheme)
if creds != nil {
clone := creds.Clone()
// Set the server name must to the endpoint hostname without port since grpc
// otherwise attempts to check if x509 cert is valid for the full endpoint
// including the scheme and port, which fails.
overrideServerName, _, err := net.SplitHostPort(host)
if err != nil {
// Either the host didn't have a port or the host could not be parsed. Either way, continue with the
// original host string.
overrideServerName = host
}
clone.OverrideServerName(overrideServerName)
creds = clone
}
}
return creds, nil
}
func (c *Client) dialWithBalancerCreds(ep string) grpccredentials.TransportCredentials {
_, _, scheme := endpoint.ParseEndpoint(ep)
creds := c.creds
if len(scheme) != 0 {
creds = c.processCreds(scheme)
}
return creds
}
func newClient(cfg *Config) (*Client, error) {
if cfg == nil {
cfg = &Config{}
}
var creds *credentials.TransportCredentials
var creds grpccredentials.TransportCredentials
if cfg.TLS != nil {
c := credentials.NewTLS(cfg.TLS)
creds = &c
creds = credentials.NewBundle(credentials.Config{TLSConfig: cfg.TLS}).TransportCredentials()
}
// use a temporary skeleton client to bootstrap first connection
@@ -384,7 +410,6 @@ func newClient(cfg *Config) (*Client, error) {
ctx, cancel := context.WithCancel(baseCtx)
client := &Client{
conn: nil,
dialerrc: make(chan error, 1),
cfg: *cfg,
creds: creds,
ctx: ctx,
@@ -392,6 +417,17 @@ func newClient(cfg *Config) (*Client, error) {
mu: new(sync.RWMutex),
callOpts: defaultCallOpts,
}
lcfg := logutil.DefaultZapLoggerConfig
if cfg.LogConfig != nil {
lcfg = *cfg.LogConfig
}
var err error
client.lg, err = lcfg.Build()
if err != nil {
return nil, err
}
if cfg.Username != "" && cfg.Password != "" {
client.Username = cfg.Username
client.Password = cfg.Password
@@ -414,42 +450,31 @@ func newClient(cfg *Config) (*Client, error) {
client.callOpts = callOpts
}
client.balancer = newHealthBalancer(cfg.Endpoints, cfg.DialTimeout, func(ep string) (bool, error) {
return grpcHealthCheck(client, ep)
})
// use Endpoints[0] so that for https:// without any tls config given, then
// grpc will assume the certificate server name is the endpoint host.
conn, err := client.dial(cfg.Endpoints[0], grpc.WithBalancer(client.balancer))
// Prepare a 'endpoint://<unique-client-id>/' resolver for the client and create a endpoint target to pass
// to dial so the client knows to use this resolver.
client.resolverGroup, err = endpoint.NewResolverGroup(fmt.Sprintf("client-%s", uuid.New().String()))
if err != nil {
client.cancel()
client.balancer.Close()
return nil, err
}
client.conn = conn
client.resolverGroup.SetEndpoints(cfg.Endpoints)
// wait for a connection
if cfg.DialTimeout > 0 {
hasConn := false
waitc := time.After(cfg.DialTimeout)
select {
case <-client.balancer.ready():
hasConn = true
case <-ctx.Done():
case <-waitc:
}
if !hasConn {
err := context.DeadlineExceeded
select {
case err = <-client.dialerrc:
default:
}
client.cancel()
client.balancer.Close()
conn.Close()
return nil, err
}
if len(cfg.Endpoints) < 1 {
return nil, fmt.Errorf("at least one Endpoint must is required in client config")
}
dialEndpoint := cfg.Endpoints[0]
// Use a provided endpoint target so that for https:// without any tls config given, then
// grpc will assume the certificate server name is the endpoint host.
conn, err := client.dialWithBalancer(dialEndpoint, grpc.WithBalancerName(roundRobinBalancerName))
if err != nil {
client.cancel()
client.resolverGroup.Close()
return nil, err
}
// TODO: With the old grpc balancer interface, we waited until the dial timeout
// for the balancer to be ready. Is there an equivalent wait we should do with the new grpc balancer interface?
client.conn = conn
client.Cluster = NewCluster(client)
client.KV = NewKV(client)
@@ -469,15 +494,35 @@ func newClient(cfg *Config) (*Client, error) {
return client, nil
}
// roundRobinQuorumBackoff retries against quorum between each backoff.
// This is intended for use with a round robin load balancer.
func (c *Client) roundRobinQuorumBackoff(waitBetween time.Duration, jitterFraction float64) backoffFunc {
return func(attempt uint) time.Duration {
// after each round robin across quorum, backoff for our wait between duration
n := uint(len(c.Endpoints()))
quorum := (n/2 + 1)
if attempt%quorum == 0 {
c.lg.Debug("backoff", zap.Uint("attempt", attempt), zap.Uint("quorum", quorum), zap.Duration("waitBetween", waitBetween), zap.Float64("jitterFraction", jitterFraction))
return jitterUp(waitBetween, jitterFraction)
}
c.lg.Debug("backoff skipped", zap.Uint("attempt", attempt), zap.Uint("quorum", quorum))
return 0
}
}
func (c *Client) checkVersion() (err error) {
var wg sync.WaitGroup
errc := make(chan error, len(c.cfg.Endpoints))
eps := c.Endpoints()
errc := make(chan error, len(eps))
ctx, cancel := context.WithCancel(c.ctx)
if c.cfg.DialTimeout > 0 {
ctx, cancel = context.WithTimeout(ctx, c.cfg.DialTimeout)
cancel()
ctx, cancel = context.WithTimeout(c.ctx, c.cfg.DialTimeout)
}
wg.Add(len(c.cfg.Endpoints))
for _, ep := range c.cfg.Endpoints {
wg.Add(len(eps))
for _, ep := range eps {
// if cluster is current, any endpoint gives a recent version
go func(e string) {
defer wg.Done()
@@ -489,8 +534,15 @@ func (c *Client) checkVersion() (err error) {
vs := strings.Split(resp.Version, ".")
maj, min := 0, 0
if len(vs) >= 2 {
maj, _ = strconv.Atoi(vs[0])
min, rerr = strconv.Atoi(vs[1])
var serr error
if maj, serr = strconv.Atoi(vs[0]); serr != nil {
errc <- serr
return
}
if min, serr = strconv.Atoi(vs[1]); serr != nil {
errc <- serr
return
}
}
if maj < 3 || (maj == 3 && min < 2) {
rerr = ErrOldCluster
@@ -499,7 +551,7 @@ func (c *Client) checkVersion() (err error) {
}(ep)
}
// wait for success
for i := 0; i < len(c.cfg.Endpoints); i++ {
for range eps {
if err = <-errc; err == nil {
break
}
@@ -539,10 +591,13 @@ func isUnavailableErr(ctx context.Context, err error) bool {
if err == nil {
return false
}
ev, _ := status.FromError(err)
// Unavailable codes mean the system will be right back.
// (e.g., can't connect, lost leader)
return ev.Code() == codes.Unavailable
ev, ok := status.FromError(err)
if ok {
// Unavailable codes mean the system will be right back.
// (e.g., can't connect, lost leader)
return ev.Code() == codes.Unavailable
}
return false
}
func toErr(ctx context.Context, err error) error {
@@ -553,18 +608,16 @@ func toErr(ctx context.Context, err error) error {
if _, ok := err.(rpctypes.EtcdError); ok {
return err
}
ev, _ := status.FromError(err)
code := ev.Code()
switch code {
case codes.DeadlineExceeded:
fallthrough
case codes.Canceled:
if ctx.Err() != nil {
err = ctx.Err()
if ev, ok := status.FromError(err); ok {
code := ev.Code()
switch code {
case codes.DeadlineExceeded:
fallthrough
case codes.Canceled:
if ctx.Err() != nil {
err = ctx.Err()
}
}
case codes.Unavailable:
case codes.FailedPrecondition:
err = grpc.ErrClientConnClosing
}
return err
}
@@ -576,3 +629,32 @@ func canceledByCaller(stopCtx context.Context, err error) bool {
return err == context.Canceled || err == context.DeadlineExceeded
}
// IsConnCanceled returns true, if error is from a closed gRPC connection.
// ref. https://github.com/grpc/grpc-go/pull/1854
func IsConnCanceled(err error) bool {
if err == nil {
return false
}
// >= gRPC v1.23.x
s, ok := status.FromError(err)
if ok {
// connection is canceled or server has already closed the connection
return s.Code() == codes.Canceled || s.Message() == "transport is closing"
}
// >= gRPC v1.10.x
if err == context.Canceled {
return true
}
// <= gRPC v1.7.x returns 'errors.New("grpc: the client connection is closing")'
return strings.Contains(err.Error(), "grpc: the client connection is closing")
}
// TransportCredentialsWithDialer is for a gRPC load balancer workaround. See credentials.transportCredential for details.
type TransportCredentialsWithDialer interface {
grpccredentials.TransportCredentials
Dialer(ctx context.Context, dialEp string) (net.Conn, error)
}

View File

@@ -23,6 +23,7 @@ import (
"github.com/coreos/etcd/etcdserver/api/v3rpc/rpctypes"
"github.com/coreos/etcd/pkg/testutil"
"google.golang.org/grpc"
)
func TestDialCancel(t *testing.T) {
@@ -83,11 +84,13 @@ func TestDialTimeout(t *testing.T) {
testCfgs := []Config{
{
Endpoints: []string{"http://254.0.0.1:12345"},
DialOptions: []grpc.DialOption{grpc.WithBlock()},
DialTimeout: 2 * time.Second,
},
{
Endpoints: []string{"http://254.0.0.1:12345"},
DialTimeout: time.Second,
DialOptions: []grpc.DialOption{grpc.WithBlock()},
Username: "abc",
Password: "def",
},

View File

@@ -49,6 +49,7 @@ func NewElection(s *Session, pfx string) *Election {
func ResumeElection(s *Session, pfx string, leaderKey string, leaderRev int64) *Election {
return &Election{
session: s,
keyPrefix: pfx,
leaderKey: leaderKey,
leaderRev: leaderRev,
leaderSession: s,

View File

@@ -0,0 +1,114 @@
// Copyright 2018 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package concurrency_test
import (
"context"
"log"
"strings"
"testing"
"time"
"github.com/coreos/etcd/clientv3"
"github.com/coreos/etcd/clientv3/concurrency"
)
func TestResumeElection(t *testing.T) {
const prefix = "/resume-election/"
cli, err := clientv3.New(clientv3.Config{Endpoints: endpoints})
if err != nil {
log.Fatal(err)
}
defer cli.Close()
s, err := concurrency.NewSession(cli)
if err != nil {
log.Fatal(err)
}
defer s.Close()
e := concurrency.NewElection(s, prefix)
// Entire test should never take more than 10 seconds
ctx, cancel := context.WithTimeout(context.Background(), time.Second*10)
defer cancel()
// Become leader
if err := e.Campaign(ctx, "candidate1"); err != nil {
t.Fatalf("Campaign() returned non nil err: %s", err)
}
// Get the leadership details of the current election
leader, err := e.Leader(ctx)
if err != nil {
t.Fatalf("Leader() returned non nil err: %s", err)
}
// Recreate the election
e = concurrency.ResumeElection(s, prefix,
string(leader.Kvs[0].Key), leader.Kvs[0].CreateRevision)
respChan := make(chan *clientv3.GetResponse)
go func() {
o := e.Observe(ctx)
respChan <- nil
for {
select {
case resp, ok := <-o:
if !ok {
t.Fatal("Observe() channel closed prematurely")
}
// Ignore any observations that candidate1 was elected
if string(resp.Kvs[0].Value) == "candidate1" {
continue
}
respChan <- &resp
return
}
}
}()
// Wait until observe goroutine is running
<-respChan
// Put some random data to generate a change event, this put should be
// ignored by Observe() because it is not under the election prefix.
_, err = cli.Put(ctx, "foo", "bar")
if err != nil {
t.Fatalf("Put('foo') returned non nil err: %s", err)
}
// Resign as leader
if err := e.Resign(ctx); err != nil {
t.Fatalf("Resign() returned non nil err: %s", err)
}
// Elect a different candidate
if err := e.Campaign(ctx, "candidate2"); err != nil {
t.Fatalf("Campaign() returned non nil err: %s", err)
}
// Wait for observed leader change
resp := <-respChan
kv := resp.Kvs[0]
if !strings.HasPrefix(string(kv.Key), prefix) {
t.Errorf("expected observed election to have prefix '%s' got '%s'", prefix, string(kv.Key))
}
if string(kv.Value) != "candidate2" {
t.Errorf("expected new leader to be 'candidate1' got '%s'", string(kv.Value))
}
}

View File

@@ -19,6 +19,7 @@ import (
"crypto/tls"
"time"
"go.uber.org/zap"
"google.golang.org/grpc"
)
@@ -67,9 +68,19 @@ type Config struct {
RejectOldCluster bool `json:"reject-old-cluster"`
// DialOptions is a list of dial options for the grpc client (e.g., for interceptors).
// For example, pass "grpc.WithBlock()" to block until the underlying connection is up.
// Without this, Dial returns immediately and connecting the server happens in background.
DialOptions []grpc.DialOption
// LogConfig configures client-side logger.
// If nil, use the default logger.
// TODO: configure gRPC logger
LogConfig *zap.Config
// Context is the default client context; it can be used to cancel grpc dial out and
// other operations that do not have an explicit context.
Context context.Context
// PermitWithoutStream when set will allow client to send keepalive pings to server without any active streams(RPCs).
PermitWithoutStream bool `json:"permit-without-stream"`
}

View File

@@ -0,0 +1,173 @@
// Copyright 2019 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Package credentials implements gRPC credential interface with etcd specific logic.
// e.g., client handshake with custom authority parameter
package credentials
import (
"context"
"crypto/tls"
"net"
"sync"
"github.com/coreos/etcd/clientv3/balancer/resolver/endpoint"
"github.com/coreos/etcd/etcdserver/api/v3rpc/rpctypes"
grpccredentials "google.golang.org/grpc/credentials"
)
// Config defines gRPC credential configuration.
type Config struct {
TLSConfig *tls.Config
}
// Bundle defines gRPC credential interface.
type Bundle interface {
grpccredentials.Bundle
UpdateAuthToken(token string)
}
// NewBundle constructs a new gRPC credential bundle.
func NewBundle(cfg Config) Bundle {
return &bundle{
tc: newTransportCredential(cfg.TLSConfig),
rc: newPerRPCCredential(),
}
}
// bundle implements "grpccredentials.Bundle" interface.
type bundle struct {
tc *transportCredential
rc *perRPCCredential
}
func (b *bundle) TransportCredentials() grpccredentials.TransportCredentials {
return b.tc
}
func (b *bundle) PerRPCCredentials() grpccredentials.PerRPCCredentials {
return b.rc
}
func (b *bundle) NewWithMode(mode string) (grpccredentials.Bundle, error) {
// no-op
return nil, nil
}
// transportCredential implements "grpccredentials.TransportCredentials" interface.
// transportCredential wraps TransportCredentials to track which
// addresses are dialed for which endpoints, and then sets the authority when checking the endpoint's cert to the
// hostname or IP of the dialed endpoint.
// This is a workaround of a gRPC load balancer issue. gRPC uses the dialed target's service name as the authority when
// checking all endpoint certs, which does not work for etcd servers using their hostname or IP as the Subject Alternative Name
// in their TLS certs.
// To enable, include both WithTransportCredentials(creds) and WithContextDialer(creds.Dialer)
// when dialing.
type transportCredential struct {
gtc grpccredentials.TransportCredentials
mu sync.Mutex
// addrToEndpoint maps from the connection addresses that are dialed to the hostname or IP of the
// endpoint provided to the dialer when dialing
addrToEndpoint map[string]string
}
func newTransportCredential(cfg *tls.Config) *transportCredential {
return &transportCredential{
gtc: grpccredentials.NewTLS(cfg),
addrToEndpoint: map[string]string{},
}
}
func (tc *transportCredential) ClientHandshake(ctx context.Context, authority string, rawConn net.Conn) (net.Conn, grpccredentials.AuthInfo, error) {
// Set the authority when checking the endpoint's cert to the hostname or IP of the dialed endpoint
tc.mu.Lock()
dialEp, ok := tc.addrToEndpoint[rawConn.RemoteAddr().String()]
tc.mu.Unlock()
if ok {
_, host, _ := endpoint.ParseEndpoint(dialEp)
authority = host
}
return tc.gtc.ClientHandshake(ctx, authority, rawConn)
}
// return true if given string is an IP.
func isIP(ep string) bool {
return net.ParseIP(ep) != nil
}
func (tc *transportCredential) ServerHandshake(rawConn net.Conn) (net.Conn, grpccredentials.AuthInfo, error) {
return tc.gtc.ServerHandshake(rawConn)
}
func (tc *transportCredential) Info() grpccredentials.ProtocolInfo {
return tc.gtc.Info()
}
func (tc *transportCredential) Clone() grpccredentials.TransportCredentials {
copy := map[string]string{}
tc.mu.Lock()
for k, v := range tc.addrToEndpoint {
copy[k] = v
}
tc.mu.Unlock()
return &transportCredential{
gtc: tc.gtc.Clone(),
addrToEndpoint: copy,
}
}
func (tc *transportCredential) OverrideServerName(serverNameOverride string) error {
return tc.gtc.OverrideServerName(serverNameOverride)
}
func (tc *transportCredential) Dialer(ctx context.Context, dialEp string) (net.Conn, error) {
// Keep track of which addresses are dialed for which endpoints
conn, err := endpoint.Dialer(ctx, dialEp)
if conn != nil {
tc.mu.Lock()
tc.addrToEndpoint[conn.RemoteAddr().String()] = dialEp
tc.mu.Unlock()
}
return conn, err
}
// perRPCCredential implements "grpccredentials.PerRPCCredentials" interface.
type perRPCCredential struct {
authToken string
authTokenMu sync.RWMutex
}
func newPerRPCCredential() *perRPCCredential { return &perRPCCredential{} }
func (rc *perRPCCredential) RequireTransportSecurity() bool { return false }
func (rc *perRPCCredential) GetRequestMetadata(ctx context.Context, s ...string) (map[string]string, error) {
rc.authTokenMu.RLock()
authToken := rc.authToken
rc.authTokenMu.RUnlock()
return map[string]string{rpctypes.TokenFieldNameGRPC: authToken}, nil
}
func (b *bundle) UpdateAuthToken(token string) {
if b.rc == nil {
return
}
b.rc.UpdateAuthToken(token)
}
func (rc *perRPCCredential) UpdateAuthToken(token string) {
rc.authTokenMu.Lock()
rc.authToken = token
rc.authTokenMu.Unlock()
}

64
clientv3/ctx.go Normal file
View File

@@ -0,0 +1,64 @@
// Copyright 2020 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package clientv3
import (
"context"
"strings"
"github.com/coreos/etcd/etcdserver/api/v3rpc/rpctypes"
"github.com/coreos/etcd/version"
"google.golang.org/grpc/metadata"
)
// WithRequireLeader requires client requests to only succeed
// when the cluster has a leader.
func WithRequireLeader(ctx context.Context) context.Context {
md, ok := metadata.FromOutgoingContext(ctx)
if !ok { // no outgoing metadata ctx key, create one
md = metadata.Pairs(rpctypes.MetadataRequireLeaderKey, rpctypes.MetadataHasLeader)
return metadata.NewOutgoingContext(ctx, md)
}
copied := md.Copy() // avoid racey updates
// overwrite/add 'hasleader' key/value
metadataSet(copied, rpctypes.MetadataRequireLeaderKey, rpctypes.MetadataHasLeader)
return metadata.NewOutgoingContext(ctx, copied)
}
// embeds client version
func withVersion(ctx context.Context) context.Context {
md, ok := metadata.FromOutgoingContext(ctx)
if !ok { // no outgoing metadata ctx key, create one
md = metadata.Pairs(rpctypes.MetadataClientAPIVersionKey, version.APIVersion)
return metadata.NewOutgoingContext(ctx, md)
}
copied := md.Copy() // avoid racey updates
// overwrite/add version key/value
metadataSet(copied, rpctypes.MetadataClientAPIVersionKey, version.APIVersion)
return metadata.NewOutgoingContext(ctx, copied)
}
func metadataGet(md metadata.MD, k string) []string {
k = strings.ToLower(k)
return md[k]
}
func metadataSet(md metadata.MD, k string, vals ...string) {
if len(vals) == 0 {
return
}
k = strings.ToLower(k)
md[k] = vals
}

67
clientv3/ctx_test.go Normal file
View File

@@ -0,0 +1,67 @@
// Copyright 2020 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package clientv3
import (
"context"
"reflect"
"testing"
"github.com/coreos/etcd/etcdserver/api/v3rpc/rpctypes"
"github.com/coreos/etcd/version"
"google.golang.org/grpc/metadata"
)
func TestMetadataWithRequireLeader(t *testing.T) {
ctx := context.TODO()
md, ok := metadata.FromOutgoingContext(ctx)
if ok {
t.Fatal("expected no outgoing metadata ctx key")
}
// add a conflicting key with some other value
md = metadata.Pairs(rpctypes.MetadataRequireLeaderKey, "invalid")
// add a key, and expect not be overwritten
metadataSet(md, "hello", "1", "2")
ctx = metadata.NewOutgoingContext(ctx, md)
// expect overwrites but still keep other keys
ctx = WithRequireLeader(ctx)
md, ok = metadata.FromOutgoingContext(ctx)
if !ok {
t.Fatal("expected outgoing metadata ctx key")
}
if ss := metadataGet(md, rpctypes.MetadataRequireLeaderKey); !reflect.DeepEqual(ss, []string{rpctypes.MetadataHasLeader}) {
t.Fatalf("unexpected metadata for %q %v", rpctypes.MetadataRequireLeaderKey, ss)
}
if ss := metadataGet(md, "hello"); !reflect.DeepEqual(ss, []string{"1", "2"}) {
t.Fatalf("unexpected metadata for 'hello' %v", ss)
}
}
func TestMetadataWithClientAPIVersion(t *testing.T) {
ctx := withVersion(WithRequireLeader(context.TODO()))
md, ok := metadata.FromOutgoingContext(ctx)
if !ok {
t.Fatal("expected outgoing metadata ctx key")
}
if ss := metadataGet(md, rpctypes.MetadataRequireLeaderKey); !reflect.DeepEqual(ss, []string{rpctypes.MetadataHasLeader}) {
t.Fatalf("unexpected metadata for %q %v", rpctypes.MetadataRequireLeaderKey, ss)
}
if ss := metadataGet(md, rpctypes.MetadataClientAPIVersionKey); !reflect.DeepEqual(ss, []string{version.APIVersion}) {
t.Fatalf("unexpected metadata for %q %v", rpctypes.MetadataClientAPIVersionKey, ss)
}
}

View File

@@ -1,609 +0,0 @@
// Copyright 2017 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package clientv3
import (
"context"
"errors"
"net/url"
"strings"
"sync"
"time"
"google.golang.org/grpc"
"google.golang.org/grpc/codes"
healthpb "google.golang.org/grpc/health/grpc_health_v1"
"google.golang.org/grpc/status"
)
const (
minHealthRetryDuration = 3 * time.Second
unknownService = "unknown service grpc.health.v1.Health"
)
// ErrNoAddrAvilable is returned by Get() when the balancer does not have
// any active connection to endpoints at the time.
// This error is returned only when opts.BlockingWait is true.
var ErrNoAddrAvilable = status.Error(codes.Unavailable, "there is no address available")
type healthCheckFunc func(ep string) (bool, error)
type notifyMsg int
const (
notifyReset notifyMsg = iota
notifyNext
)
// healthBalancer does the bare minimum to expose multiple eps
// to the grpc reconnection code path
type healthBalancer struct {
// addrs are the client's endpoint addresses for grpc
addrs []grpc.Address
// eps holds the raw endpoints from the client
eps []string
// notifyCh notifies grpc of the set of addresses for connecting
notifyCh chan []grpc.Address
// readyc closes once the first connection is up
readyc chan struct{}
readyOnce sync.Once
// healthCheck checks an endpoint's health.
healthCheck healthCheckFunc
healthCheckTimeout time.Duration
unhealthyMu sync.RWMutex
unhealthyHostPorts map[string]time.Time
// mu protects all fields below.
mu sync.RWMutex
// upc closes when pinAddr transitions from empty to non-empty or the balancer closes.
upc chan struct{}
// downc closes when grpc calls down() on pinAddr
downc chan struct{}
// stopc is closed to signal updateNotifyLoop should stop.
stopc chan struct{}
stopOnce sync.Once
wg sync.WaitGroup
// donec closes when all goroutines are exited
donec chan struct{}
// updateAddrsC notifies updateNotifyLoop to update addrs.
updateAddrsC chan notifyMsg
// grpc issues TLS cert checks using the string passed into dial so
// that string must be the host. To recover the full scheme://host URL,
// have a map from hosts to the original endpoint.
hostPort2ep map[string]string
// pinAddr is the currently pinned address; set to the empty string on
// initialization and shutdown.
pinAddr string
closed bool
}
func newHealthBalancer(eps []string, timeout time.Duration, hc healthCheckFunc) *healthBalancer {
notifyCh := make(chan []grpc.Address)
addrs := eps2addrs(eps)
hb := &healthBalancer{
addrs: addrs,
eps: eps,
notifyCh: notifyCh,
readyc: make(chan struct{}),
healthCheck: hc,
unhealthyHostPorts: make(map[string]time.Time),
upc: make(chan struct{}),
stopc: make(chan struct{}),
downc: make(chan struct{}),
donec: make(chan struct{}),
updateAddrsC: make(chan notifyMsg),
hostPort2ep: getHostPort2ep(eps),
}
if timeout < minHealthRetryDuration {
timeout = minHealthRetryDuration
}
hb.healthCheckTimeout = timeout
close(hb.downc)
go hb.updateNotifyLoop()
hb.wg.Add(1)
go func() {
defer hb.wg.Done()
hb.updateUnhealthy()
}()
return hb
}
func (b *healthBalancer) Start(target string, config grpc.BalancerConfig) error { return nil }
func (b *healthBalancer) ConnectNotify() <-chan struct{} {
b.mu.Lock()
defer b.mu.Unlock()
return b.upc
}
func (b *healthBalancer) ready() <-chan struct{} { return b.readyc }
func (b *healthBalancer) endpoint(hostPort string) string {
b.mu.RLock()
defer b.mu.RUnlock()
return b.hostPort2ep[hostPort]
}
func (b *healthBalancer) pinned() string {
b.mu.RLock()
defer b.mu.RUnlock()
return b.pinAddr
}
func (b *healthBalancer) hostPortError(hostPort string, err error) {
if b.endpoint(hostPort) == "" {
logger.Lvl(4).Infof("clientv3/balancer: %q is stale (skip marking as unhealthy on %q)", hostPort, err.Error())
return
}
b.unhealthyMu.Lock()
b.unhealthyHostPorts[hostPort] = time.Now()
b.unhealthyMu.Unlock()
logger.Lvl(4).Infof("clientv3/balancer: %q is marked unhealthy (%q)", hostPort, err.Error())
}
func (b *healthBalancer) removeUnhealthy(hostPort, msg string) {
if b.endpoint(hostPort) == "" {
logger.Lvl(4).Infof("clientv3/balancer: %q was not in unhealthy (%q)", hostPort, msg)
return
}
b.unhealthyMu.Lock()
delete(b.unhealthyHostPorts, hostPort)
b.unhealthyMu.Unlock()
logger.Lvl(4).Infof("clientv3/balancer: %q is removed from unhealthy (%q)", hostPort, msg)
}
func (b *healthBalancer) countUnhealthy() (count int) {
b.unhealthyMu.RLock()
count = len(b.unhealthyHostPorts)
b.unhealthyMu.RUnlock()
return count
}
func (b *healthBalancer) isUnhealthy(hostPort string) (unhealthy bool) {
b.unhealthyMu.RLock()
_, unhealthy = b.unhealthyHostPorts[hostPort]
b.unhealthyMu.RUnlock()
return unhealthy
}
func (b *healthBalancer) cleanupUnhealthy() {
b.unhealthyMu.Lock()
for k, v := range b.unhealthyHostPorts {
if time.Since(v) > b.healthCheckTimeout {
delete(b.unhealthyHostPorts, k)
logger.Lvl(4).Infof("clientv3/balancer: removed %q from unhealthy after %v", k, b.healthCheckTimeout)
}
}
b.unhealthyMu.Unlock()
}
func (b *healthBalancer) liveAddrs() ([]grpc.Address, map[string]struct{}) {
unhealthyCnt := b.countUnhealthy()
b.mu.RLock()
defer b.mu.RUnlock()
hbAddrs := b.addrs
if len(b.addrs) == 1 || unhealthyCnt == 0 || unhealthyCnt == len(b.addrs) {
liveHostPorts := make(map[string]struct{}, len(b.hostPort2ep))
for k := range b.hostPort2ep {
liveHostPorts[k] = struct{}{}
}
return hbAddrs, liveHostPorts
}
addrs := make([]grpc.Address, 0, len(b.addrs)-unhealthyCnt)
liveHostPorts := make(map[string]struct{}, len(addrs))
for _, addr := range b.addrs {
if !b.isUnhealthy(addr.Addr) {
addrs = append(addrs, addr)
liveHostPorts[addr.Addr] = struct{}{}
}
}
return addrs, liveHostPorts
}
func (b *healthBalancer) updateUnhealthy() {
for {
select {
case <-time.After(b.healthCheckTimeout):
b.cleanupUnhealthy()
pinned := b.pinned()
if pinned == "" || b.isUnhealthy(pinned) {
select {
case b.updateAddrsC <- notifyNext:
case <-b.stopc:
return
}
}
case <-b.stopc:
return
}
}
}
func (b *healthBalancer) updateAddrs(eps ...string) {
np := getHostPort2ep(eps)
b.mu.Lock()
defer b.mu.Unlock()
match := len(np) == len(b.hostPort2ep)
if match {
for k, v := range np {
if b.hostPort2ep[k] != v {
match = false
break
}
}
}
if match {
// same endpoints, so no need to update address
return
}
b.hostPort2ep = np
b.addrs, b.eps = eps2addrs(eps), eps
b.unhealthyMu.Lock()
b.unhealthyHostPorts = make(map[string]time.Time)
b.unhealthyMu.Unlock()
}
func (b *healthBalancer) next() {
b.mu.RLock()
downc := b.downc
b.mu.RUnlock()
select {
case b.updateAddrsC <- notifyNext:
case <-b.stopc:
}
// wait until disconnect so new RPCs are not issued on old connection
select {
case <-downc:
case <-b.stopc:
}
}
func (b *healthBalancer) updateNotifyLoop() {
defer close(b.donec)
for {
b.mu.RLock()
upc, downc, addr := b.upc, b.downc, b.pinAddr
b.mu.RUnlock()
// downc or upc should be closed
select {
case <-downc:
downc = nil
default:
}
select {
case <-upc:
upc = nil
default:
}
switch {
case downc == nil && upc == nil:
// stale
select {
case <-b.stopc:
return
default:
}
case downc == nil:
b.notifyAddrs(notifyReset)
select {
case <-upc:
case msg := <-b.updateAddrsC:
b.notifyAddrs(msg)
case <-b.stopc:
return
}
case upc == nil:
select {
// close connections that are not the pinned address
case b.notifyCh <- []grpc.Address{{Addr: addr}}:
case <-downc:
case <-b.stopc:
return
}
select {
case <-downc:
b.notifyAddrs(notifyReset)
case msg := <-b.updateAddrsC:
b.notifyAddrs(msg)
case <-b.stopc:
return
}
}
}
}
func (b *healthBalancer) notifyAddrs(msg notifyMsg) {
if msg == notifyNext {
select {
case b.notifyCh <- []grpc.Address{}:
case <-b.stopc:
return
}
}
b.mu.RLock()
pinAddr := b.pinAddr
downc := b.downc
b.mu.RUnlock()
addrs, hostPorts := b.liveAddrs()
var waitDown bool
if pinAddr != "" {
_, ok := hostPorts[pinAddr]
waitDown = !ok
}
select {
case b.notifyCh <- addrs:
if waitDown {
select {
case <-downc:
case <-b.stopc:
}
}
case <-b.stopc:
}
}
func (b *healthBalancer) Up(addr grpc.Address) func(error) {
if !b.mayPin(addr) {
return func(err error) {}
}
b.mu.Lock()
defer b.mu.Unlock()
// gRPC might call Up after it called Close. We add this check
// to "fix" it up at application layer. Otherwise, will panic
// if b.upc is already closed.
if b.closed {
return func(err error) {}
}
// gRPC might call Up on a stale address.
// Prevent updating pinAddr with a stale address.
if !hasAddr(b.addrs, addr.Addr) {
return func(err error) {}
}
if b.pinAddr != "" {
logger.Lvl(4).Infof("clientv3/balancer: %q is up but not pinned (already pinned %q)", addr.Addr, b.pinAddr)
return func(err error) {}
}
// notify waiting Get()s and pin first connected address
close(b.upc)
b.downc = make(chan struct{})
b.pinAddr = addr.Addr
logger.Lvl(4).Infof("clientv3/balancer: pin %q", addr.Addr)
// notify client that a connection is up
b.readyOnce.Do(func() { close(b.readyc) })
return func(err error) {
// If connected to a black hole endpoint or a killed server, the gRPC ping
// timeout will induce a network I/O error, and retrying until success;
// finding healthy endpoint on retry could take several timeouts and redials.
// To avoid wasting retries, gray-list unhealthy endpoints.
b.hostPortError(addr.Addr, err)
b.mu.Lock()
b.upc = make(chan struct{})
close(b.downc)
b.pinAddr = ""
b.mu.Unlock()
logger.Lvl(4).Infof("clientv3/balancer: unpin %q (%q)", addr.Addr, err.Error())
}
}
func (b *healthBalancer) mayPin(addr grpc.Address) bool {
if b.endpoint(addr.Addr) == "" { // stale host:port
return false
}
b.unhealthyMu.RLock()
unhealthyCnt := len(b.unhealthyHostPorts)
failedTime, bad := b.unhealthyHostPorts[addr.Addr]
b.unhealthyMu.RUnlock()
b.mu.RLock()
skip := len(b.addrs) == 1 || unhealthyCnt == 0 || len(b.addrs) == unhealthyCnt
b.mu.RUnlock()
if skip || !bad {
return true
}
// prevent isolated member's endpoint from being infinitely retried, as follows:
// 1. keepalive pings detects GoAway with http2.ErrCodeEnhanceYourCalm
// 2. balancer 'Up' unpins with grpc: failed with network I/O error
// 3. grpc-healthcheck still SERVING, thus retry to pin
// instead, return before grpc-healthcheck if failed within healthcheck timeout
if elapsed := time.Since(failedTime); elapsed < b.healthCheckTimeout {
logger.Lvl(4).Infof("clientv3/balancer: %q is up but not pinned (failed %v ago, require minimum %v after failure)", addr.Addr, elapsed, b.healthCheckTimeout)
return false
}
if ok, _ := b.healthCheck(addr.Addr); ok {
b.removeUnhealthy(addr.Addr, "health check success")
return true
}
b.hostPortError(addr.Addr, errors.New("health check failed"))
return false
}
func (b *healthBalancer) Get(ctx context.Context, opts grpc.BalancerGetOptions) (grpc.Address, func(), error) {
var (
addr string
closed bool
)
// If opts.BlockingWait is false (for fail-fast RPCs), it should return
// an address it has notified via Notify immediately instead of blocking.
if !opts.BlockingWait {
b.mu.RLock()
closed = b.closed
addr = b.pinAddr
b.mu.RUnlock()
if closed {
return grpc.Address{Addr: ""}, nil, grpc.ErrClientConnClosing
}
if addr == "" {
return grpc.Address{Addr: ""}, nil, ErrNoAddrAvilable
}
return grpc.Address{Addr: addr}, func() {}, nil
}
for {
b.mu.RLock()
ch := b.upc
b.mu.RUnlock()
select {
case <-ch:
case <-b.donec:
return grpc.Address{Addr: ""}, nil, grpc.ErrClientConnClosing
case <-ctx.Done():
return grpc.Address{Addr: ""}, nil, ctx.Err()
}
b.mu.RLock()
closed = b.closed
addr = b.pinAddr
b.mu.RUnlock()
// Close() which sets b.closed = true can be called before Get(), Get() must exit if balancer is closed.
if closed {
return grpc.Address{Addr: ""}, nil, grpc.ErrClientConnClosing
}
if addr != "" {
break
}
}
return grpc.Address{Addr: addr}, func() {}, nil
}
func (b *healthBalancer) Notify() <-chan []grpc.Address { return b.notifyCh }
func (b *healthBalancer) Close() error {
b.mu.Lock()
// In case gRPC calls close twice. TODO: remove the checking
// when we are sure that gRPC wont call close twice.
if b.closed {
b.mu.Unlock()
<-b.donec
return nil
}
b.closed = true
b.stopOnce.Do(func() { close(b.stopc) })
b.pinAddr = ""
// In the case of following scenario:
// 1. upc is not closed; no pinned address
// 2. client issues an RPC, calling invoke(), which calls Get(), enters for loop, blocks
// 3. client.conn.Close() calls balancer.Close(); closed = true
// 4. for loop in Get() never exits since ctx is the context passed in by the client and may not be canceled
// we must close upc so Get() exits from blocking on upc
select {
case <-b.upc:
default:
// terminate all waiting Get()s
close(b.upc)
}
b.mu.Unlock()
b.wg.Wait()
// wait for updateNotifyLoop to finish
<-b.donec
close(b.notifyCh)
return nil
}
func grpcHealthCheck(client *Client, ep string) (bool, error) {
conn, err := client.dial(ep)
if err != nil {
return false, err
}
defer conn.Close()
cli := healthpb.NewHealthClient(conn)
ctx, cancel := context.WithTimeout(context.Background(), time.Second)
resp, err := cli.Check(ctx, &healthpb.HealthCheckRequest{})
cancel()
if err != nil {
if s, ok := status.FromError(err); ok && s.Code() == codes.Unavailable {
if s.Message() == unknownService { // etcd < v3.3.0
return true, nil
}
}
return false, err
}
return resp.Status == healthpb.HealthCheckResponse_SERVING, nil
}
func hasAddr(addrs []grpc.Address, targetAddr string) bool {
for _, addr := range addrs {
if targetAddr == addr.Addr {
return true
}
}
return false
}
func getHost(ep string) string {
url, uerr := url.Parse(ep)
if uerr != nil || !strings.Contains(ep, "://") {
return ep
}
return url.Host
}
func eps2addrs(eps []string) []grpc.Address {
addrs := make([]grpc.Address, len(eps))
for i := range eps {
addrs[i].Addr = getHost(eps[i])
}
return addrs
}
func getHostPort2ep(eps []string) map[string]string {
hm := make(map[string]string, len(eps))
for i := range eps {
_, host, _ := parseEndpoint(eps[i])
hm[host] = eps[i]
}
return hm
}

View File

@@ -1,298 +0,0 @@
// Copyright 2017 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package clientv3
import (
"context"
"errors"
"net"
"sync"
"testing"
"time"
pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
"github.com/coreos/etcd/pkg/testutil"
"google.golang.org/grpc"
)
var endpoints = []string{"localhost:2379", "localhost:22379", "localhost:32379"}
func TestBalancerGetUnblocking(t *testing.T) {
hb := newHealthBalancer(endpoints, minHealthRetryDuration, func(string) (bool, error) { return true, nil })
defer hb.Close()
if addrs := <-hb.Notify(); len(addrs) != len(endpoints) {
t.Errorf("Initialize newHealthBalancer should have triggered Notify() chan, but it didn't")
}
unblockingOpts := grpc.BalancerGetOptions{BlockingWait: false}
_, _, err := hb.Get(context.Background(), unblockingOpts)
if err != ErrNoAddrAvilable {
t.Errorf("Get() with no up endpoints should return ErrNoAddrAvailable, got: %v", err)
}
down1 := hb.Up(grpc.Address{Addr: endpoints[1]})
if addrs := <-hb.Notify(); len(addrs) != 1 {
t.Errorf("first Up() should have triggered balancer to send the first connected address via Notify chan so that other connections can be closed")
}
down2 := hb.Up(grpc.Address{Addr: endpoints[2]})
addrFirst, putFun, err := hb.Get(context.Background(), unblockingOpts)
if err != nil {
t.Errorf("Get() with up endpoints should success, got %v", err)
}
if addrFirst.Addr != endpoints[1] {
t.Errorf("Get() didn't return expected address, got %v", addrFirst)
}
if putFun == nil {
t.Errorf("Get() returned unexpected nil put function")
}
addrSecond, _, _ := hb.Get(context.Background(), unblockingOpts)
if addrFirst.Addr != addrSecond.Addr {
t.Errorf("Get() didn't return the same address as previous call, got %v and %v", addrFirst, addrSecond)
}
down1(errors.New("error"))
if addrs := <-hb.Notify(); len(addrs) != len(endpoints)-1 { // we call down on one endpoint
t.Errorf("closing the only connection should triggered balancer to send the %d endpoints via Notify chan so that we can establish a connection", len(endpoints)-1)
}
down2(errors.New("error"))
_, _, err = hb.Get(context.Background(), unblockingOpts)
if err != ErrNoAddrAvilable {
t.Errorf("Get() with no up endpoints should return ErrNoAddrAvailable, got: %v", err)
}
}
func TestBalancerGetBlocking(t *testing.T) {
hb := newHealthBalancer(endpoints, minHealthRetryDuration, func(string) (bool, error) { return true, nil })
defer hb.Close()
if addrs := <-hb.Notify(); len(addrs) != len(endpoints) {
t.Errorf("Initialize newHealthBalancer should have triggered Notify() chan, but it didn't")
}
blockingOpts := grpc.BalancerGetOptions{BlockingWait: true}
ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond*100)
_, _, err := hb.Get(ctx, blockingOpts)
cancel()
if err != context.DeadlineExceeded {
t.Errorf("Get() with no up endpoints should timeout, got %v", err)
}
downC := make(chan func(error), 1)
go func() {
// ensure hb.Up() will be called after hb.Get() to see if Up() releases blocking Get()
time.Sleep(time.Millisecond * 100)
f := hb.Up(grpc.Address{Addr: endpoints[1]})
if addrs := <-hb.Notify(); len(addrs) != 1 {
t.Errorf("first Up() should have triggered balancer to send the first connected address via Notify chan so that other connections can be closed")
}
downC <- f
}()
addrFirst, putFun, err := hb.Get(context.Background(), blockingOpts)
if err != nil {
t.Errorf("Get() with up endpoints should success, got %v", err)
}
if addrFirst.Addr != endpoints[1] {
t.Errorf("Get() didn't return expected address, got %v", addrFirst)
}
if putFun == nil {
t.Errorf("Get() returned unexpected nil put function")
}
down1 := <-downC
down2 := hb.Up(grpc.Address{Addr: endpoints[2]})
addrSecond, _, _ := hb.Get(context.Background(), blockingOpts)
if addrFirst.Addr != addrSecond.Addr {
t.Errorf("Get() didn't return the same address as previous call, got %v and %v", addrFirst, addrSecond)
}
down1(errors.New("error"))
if addrs := <-hb.Notify(); len(addrs) != len(endpoints)-1 { // we call down on one endpoint
t.Errorf("closing the only connection should triggered balancer to send the %d endpoints via Notify chan so that we can establish a connection", len(endpoints)-1)
}
down2(errors.New("error"))
ctx, cancel = context.WithTimeout(context.Background(), time.Millisecond*100)
_, _, err = hb.Get(ctx, blockingOpts)
cancel()
if err != context.DeadlineExceeded {
t.Errorf("Get() with no up endpoints should timeout, got %v", err)
}
}
// TestHealthBalancerGraylist checks one endpoint is tried after the other
// due to gray listing.
func TestHealthBalancerGraylist(t *testing.T) {
var wg sync.WaitGroup
// Use 3 endpoints so gray list doesn't fallback to all connections
// after failing on 2 endpoints.
lns, eps := make([]net.Listener, 3), make([]string, 3)
wg.Add(3)
connc := make(chan string, 2)
for i := range eps {
ln, err := net.Listen("tcp", ":0")
testutil.AssertNil(t, err)
lns[i], eps[i] = ln, ln.Addr().String()
go func() {
defer wg.Done()
for {
conn, err := ln.Accept()
if err != nil {
return
}
_, err = conn.Read(make([]byte, 512))
conn.Close()
if err == nil {
select {
case connc <- ln.Addr().String():
// sleep some so balancer catches up
// before attempted next reconnect.
time.Sleep(50 * time.Millisecond)
default:
}
}
}
}()
}
tf := func(s string) (bool, error) { return false, nil }
hb := newHealthBalancer(eps, 5*time.Second, tf)
conn, err := grpc.Dial("", grpc.WithInsecure(), grpc.WithBalancer(hb))
testutil.AssertNil(t, err)
defer conn.Close()
kvc := pb.NewKVClient(conn)
<-hb.ready()
kvc.Range(context.TODO(), &pb.RangeRequest{})
ep1 := <-connc
kvc.Range(context.TODO(), &pb.RangeRequest{})
ep2 := <-connc
for _, ln := range lns {
ln.Close()
}
wg.Wait()
if ep1 == ep2 {
t.Fatalf("expected %q != %q", ep1, ep2)
}
}
// TestBalancerDoNotBlockOnClose ensures that balancer and grpc don't deadlock each other
// due to rapid open/close conn. The deadlock causes balancer.Close() to block forever.
// See issue: https://github.com/coreos/etcd/issues/7283 for more detail.
func TestBalancerDoNotBlockOnClose(t *testing.T) {
defer testutil.AfterTest(t)
kcl := newKillConnListener(t, 3)
defer kcl.close()
for i := 0; i < 5; i++ {
hb := newHealthBalancer(kcl.endpoints(), minHealthRetryDuration, func(string) (bool, error) { return true, nil })
conn, err := grpc.Dial("", grpc.WithInsecure(), grpc.WithBalancer(hb))
if err != nil {
t.Fatal(err)
}
kvc := pb.NewKVClient(conn)
<-hb.readyc
var wg sync.WaitGroup
wg.Add(100)
cctx, cancel := context.WithCancel(context.TODO())
for j := 0; j < 100; j++ {
go func() {
defer wg.Done()
kvc.Range(cctx, &pb.RangeRequest{}, grpc.FailFast(false))
}()
}
// balancer.Close() might block
// if balancer and grpc deadlock each other.
bclosec, cclosec := make(chan struct{}), make(chan struct{})
go func() {
defer close(bclosec)
hb.Close()
}()
go func() {
defer close(cclosec)
conn.Close()
}()
select {
case <-bclosec:
case <-time.After(3 * time.Second):
testutil.FatalStack(t, "balancer close timeout")
}
select {
case <-cclosec:
case <-time.After(3 * time.Second):
t.Fatal("grpc conn close timeout")
}
cancel()
wg.Wait()
}
}
// killConnListener listens incoming conn and kills it immediately.
type killConnListener struct {
wg sync.WaitGroup
eps []string
stopc chan struct{}
t *testing.T
}
func newKillConnListener(t *testing.T, size int) *killConnListener {
kcl := &killConnListener{stopc: make(chan struct{}), t: t}
for i := 0; i < size; i++ {
ln, err := net.Listen("tcp", ":0")
if err != nil {
t.Fatal(err)
}
kcl.eps = append(kcl.eps, ln.Addr().String())
kcl.wg.Add(1)
go kcl.listen(ln)
}
return kcl
}
func (kcl *killConnListener) endpoints() []string {
return kcl.eps
}
func (kcl *killConnListener) listen(l net.Listener) {
go func() {
defer kcl.wg.Done()
for {
conn, err := l.Accept()
select {
case <-kcl.stopc:
return
default:
}
if err != nil {
kcl.t.Fatal(err)
}
time.Sleep(1 * time.Millisecond)
conn.Close()
}
}()
<-kcl.stopc
l.Close()
}
func (kcl *killConnListener) close() {
close(kcl.stopc)
kcl.wg.Wait()
}

View File

@@ -25,6 +25,7 @@ import (
"github.com/coreos/etcd/etcdserver/api/v3rpc/rpctypes"
"github.com/coreos/etcd/integration"
"github.com/coreos/etcd/pkg/testutil"
"google.golang.org/grpc"
)
// TestBalancerUnderBlackholeKeepAliveWatch tests when watch discovers it cannot talk to
@@ -35,7 +36,7 @@ func TestBalancerUnderBlackholeKeepAliveWatch(t *testing.T) {
clus := integration.NewClusterV3(t, &integration.ClusterConfig{
Size: 2,
GRPCKeepAliveMinTime: 1 * time.Millisecond, // avoid too_many_pings
GRPCKeepAliveMinTime: time.Millisecond, // avoid too_many_pings
})
defer clus.Terminate(t)
@@ -43,8 +44,9 @@ func TestBalancerUnderBlackholeKeepAliveWatch(t *testing.T) {
ccfg := clientv3.Config{
Endpoints: []string{eps[0]},
DialTimeout: 1 * time.Second,
DialKeepAliveTime: 1 * time.Second,
DialTimeout: time.Second,
DialOptions: []grpc.DialOption{grpc.WithBlock()},
DialKeepAliveTime: time.Second,
DialKeepAliveTimeout: 500 * time.Millisecond,
}
@@ -70,6 +72,9 @@ func TestBalancerUnderBlackholeKeepAliveWatch(t *testing.T) {
// endpoint can switch to eps[1] when it detects the failure of eps[0]
cli.SetEndpoints(eps...)
// give enough time for balancer resolution
time.Sleep(5 * time.Second)
clus.Members[0].Blackhole()
if _, err = clus.Client(1).Put(context.TODO(), "foo", "bar"); err != nil {
@@ -106,7 +111,7 @@ func TestBalancerUnderBlackholeKeepAliveWatch(t *testing.T) {
func TestBalancerUnderBlackholeNoKeepAlivePut(t *testing.T) {
testBalancerUnderBlackholeNoKeepAlive(t, func(cli *clientv3.Client, ctx context.Context) error {
_, err := cli.Put(ctx, "foo", "bar")
if err == context.DeadlineExceeded || isServerCtxTimeout(err) || err == rpctypes.ErrTimeout {
if isClientTimeout(err) || isServerCtxTimeout(err) || err == rpctypes.ErrTimeout {
return errExpected
}
return err
@@ -116,7 +121,7 @@ func TestBalancerUnderBlackholeNoKeepAlivePut(t *testing.T) {
func TestBalancerUnderBlackholeNoKeepAliveDelete(t *testing.T) {
testBalancerUnderBlackholeNoKeepAlive(t, func(cli *clientv3.Client, ctx context.Context) error {
_, err := cli.Delete(ctx, "foo")
if err == context.DeadlineExceeded || isServerCtxTimeout(err) || err == rpctypes.ErrTimeout {
if isClientTimeout(err) || isServerCtxTimeout(err) || err == rpctypes.ErrTimeout {
return errExpected
}
return err
@@ -129,7 +134,7 @@ func TestBalancerUnderBlackholeNoKeepAliveTxn(t *testing.T) {
If(clientv3.Compare(clientv3.Version("foo"), "=", 0)).
Then(clientv3.OpPut("foo", "bar")).
Else(clientv3.OpPut("foo", "baz")).Commit()
if err == context.DeadlineExceeded || isServerCtxTimeout(err) || err == rpctypes.ErrTimeout {
if isClientTimeout(err) || isServerCtxTimeout(err) || err == rpctypes.ErrTimeout {
return errExpected
}
return err
@@ -139,7 +144,7 @@ func TestBalancerUnderBlackholeNoKeepAliveTxn(t *testing.T) {
func TestBalancerUnderBlackholeNoKeepAliveLinearizableGet(t *testing.T) {
testBalancerUnderBlackholeNoKeepAlive(t, func(cli *clientv3.Client, ctx context.Context) error {
_, err := cli.Get(ctx, "a")
if err == context.DeadlineExceeded || isServerCtxTimeout(err) || err == rpctypes.ErrTimeout {
if isClientTimeout(err) || isServerCtxTimeout(err) || err == rpctypes.ErrTimeout {
return errExpected
}
return err
@@ -149,7 +154,7 @@ func TestBalancerUnderBlackholeNoKeepAliveLinearizableGet(t *testing.T) {
func TestBalancerUnderBlackholeNoKeepAliveSerializableGet(t *testing.T) {
testBalancerUnderBlackholeNoKeepAlive(t, func(cli *clientv3.Client, ctx context.Context) error {
_, err := cli.Get(ctx, "a", clientv3.WithSerializable())
if err == context.DeadlineExceeded || isServerCtxTimeout(err) {
if isClientTimeout(err) || isServerCtxTimeout(err) {
return errExpected
}
return err
@@ -172,6 +177,7 @@ func testBalancerUnderBlackholeNoKeepAlive(t *testing.T, op func(*clientv3.Clien
ccfg := clientv3.Config{
Endpoints: []string{eps[0]},
DialTimeout: 1 * time.Second,
DialOptions: []grpc.DialOption{grpc.WithBlock()},
}
cli, err := clientv3.New(ccfg)
if err != nil {
@@ -189,22 +195,23 @@ func testBalancerUnderBlackholeNoKeepAlive(t *testing.T, op func(*clientv3.Clien
// blackhole eps[0]
clus.Members[0].Blackhole()
// fail first due to blackhole, retry should succeed
// With round robin balancer, client will make a request to a healthy endpoint
// within a few requests.
// TODO: first operation can succeed
// when gRPC supports better retry on non-delivered request
for i := 0; i < 2; i++ {
ctx, cancel := context.WithTimeout(context.Background(), time.Second)
for i := 0; i < 5; i++ {
ctx, cancel := context.WithTimeout(context.Background(), time.Second*5)
err = op(cli, ctx)
cancel()
if err == nil {
break
}
if i == 0 {
if err != errExpected {
t.Errorf("#%d: expected %v, got %v", i, errExpected, err)
}
} else if err != nil {
} else if err == errExpected {
t.Logf("#%d: current error %v", i, err)
} else {
t.Errorf("#%d: failed with error %v", i, err)
}
}
if err != nil {
t.Fatal(err)
}
}

View File

@@ -26,6 +26,7 @@ import (
"github.com/coreos/etcd/integration"
"github.com/coreos/etcd/pkg/testutil"
"github.com/coreos/etcd/pkg/transport"
"google.golang.org/grpc"
)
var (
@@ -58,10 +59,11 @@ func TestDialTLSExpired(t *testing.T) {
_, err = clientv3.New(clientv3.Config{
Endpoints: []string{clus.Members[0].GRPCAddr()},
DialTimeout: 3 * time.Second,
DialOptions: []grpc.DialOption{grpc.WithBlock()},
TLS: tls,
})
if err != context.DeadlineExceeded {
t.Fatalf("expected %v, got %v", context.DeadlineExceeded, err)
if !isClientTimeout(err) {
t.Fatalf("expected dial timeout error, got %v", err)
}
}
@@ -72,12 +74,18 @@ func TestDialTLSNoConfig(t *testing.T) {
clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 1, ClientTLS: &testTLSInfo, SkipCreatingClient: true})
defer clus.Terminate(t)
// expect "signed by unknown authority"
_, err := clientv3.New(clientv3.Config{
c, err := clientv3.New(clientv3.Config{
Endpoints: []string{clus.Members[0].GRPCAddr()},
DialTimeout: time.Second,
DialOptions: []grpc.DialOption{grpc.WithBlock()},
})
if err != context.DeadlineExceeded {
t.Fatalf("expected %v, got %v", context.DeadlineExceeded, err)
defer func() {
if c != nil {
c.Close()
}
}()
if !isClientTimeout(err) {
t.Fatalf("expected dial timeout error, got %v", err)
}
}
@@ -104,7 +112,11 @@ func testDialSetEndpoints(t *testing.T, setBefore bool) {
}
toKill := rand.Intn(len(eps))
cfg := clientv3.Config{Endpoints: []string{eps[toKill]}, DialTimeout: 1 * time.Second}
cfg := clientv3.Config{
Endpoints: []string{eps[toKill]},
DialTimeout: 1 * time.Second,
DialOptions: []grpc.DialOption{grpc.WithBlock()},
}
cli, err := clientv3.New(cfg)
if err != nil {
t.Fatal(err)
@@ -121,6 +133,7 @@ func testDialSetEndpoints(t *testing.T, setBefore bool) {
if !setBefore {
cli.SetEndpoints(eps[toKill%3], eps[(toKill+1)%3])
}
time.Sleep(time.Second * 2)
ctx, cancel := context.WithTimeout(context.Background(), integration.RequestWaitTimeout)
if _, err = cli.Get(ctx, "foo", clientv3.WithSerializable()); err != nil {
t.Fatal(err)
@@ -158,6 +171,7 @@ func TestRejectOldCluster(t *testing.T) {
cfg := clientv3.Config{
Endpoints: []string{clus.Members[0].GRPCAddr(), clus.Members[1].GRPCAddr()},
DialTimeout: 5 * time.Second,
DialOptions: []grpc.DialOption{grpc.WithBlock()},
RejectOldCluster: true,
}
cli, err := clientv3.New(cfg)

View File

@@ -884,7 +884,7 @@ func TestKVLargeRequests(t *testing.T) {
},
// without proper client-side receive size limit
// "code = ResourceExhausted desc = grpc: received message larger than max (5242929 vs. 4194304)"
// "code = ResourceExhausted desc = received message larger than max (5242929 vs. 4194304)"
{
maxRequestBytesServer: 7*1024*1024 + 512*1024,
@@ -906,7 +906,7 @@ func TestKVLargeRequests(t *testing.T) {
maxCallSendBytesClient: 10 * 1024 * 1024,
maxCallRecvBytesClient: 0,
valueSize: 10 * 1024 * 1024,
expectError: grpc.Errorf(codes.ResourceExhausted, "grpc: trying to send message larger than max "),
expectError: grpc.Errorf(codes.ResourceExhausted, "trying to send message larger than max "),
},
{
maxRequestBytesServer: 10 * 1024 * 1024,
@@ -920,7 +920,7 @@ func TestKVLargeRequests(t *testing.T) {
maxCallSendBytesClient: 10 * 1024 * 1024,
maxCallRecvBytesClient: 0,
valueSize: 10*1024*1024 + 5,
expectError: grpc.Errorf(codes.ResourceExhausted, "grpc: trying to send message larger than max "),
expectError: grpc.Errorf(codes.ResourceExhausted, "trying to send message larger than max "),
},
}
for i, test := range tests {
@@ -940,7 +940,7 @@ func TestKVLargeRequests(t *testing.T) {
t.Errorf("#%d: expected %v, got %v", i, test.expectError, err)
}
} else if err != nil && !strings.HasPrefix(err.Error(), test.expectError.Error()) {
t.Errorf("#%d: expected %v, got %v", i, test.expectError, err)
t.Errorf("#%d: expected error starting with '%s', got '%s'", i, test.expectError.Error(), err.Error())
}
// put request went through, now expects large response back

View File

@@ -19,11 +19,9 @@ import (
"github.com/coreos/etcd/clientv3"
"github.com/coreos/pkg/capnslog"
"google.golang.org/grpc/grpclog"
)
func init() {
capnslog.SetGlobalLogLevel(capnslog.CRITICAL)
clientv3.SetLogger(grpclog.NewLoggerV2(ioutil.Discard, ioutil.Discard, ioutil.Discard))
}

View File

@@ -24,8 +24,10 @@ import (
"github.com/coreos/etcd/clientv3"
"github.com/coreos/etcd/etcdserver/api/v3rpc/rpctypes"
pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
"github.com/coreos/etcd/integration"
"github.com/coreos/etcd/pkg/testutil"
"google.golang.org/grpc"
)
var errExpected = errors.New("expected error")
@@ -36,7 +38,7 @@ var errExpected = errors.New("expected error")
func TestBalancerUnderNetworkPartitionPut(t *testing.T) {
testBalancerUnderNetworkPartition(t, func(cli *clientv3.Client, ctx context.Context) error {
_, err := cli.Put(ctx, "a", "b")
if err == context.DeadlineExceeded || isServerCtxTimeout(err) || err == rpctypes.ErrTimeout {
if isClientTimeout(err) || isServerCtxTimeout(err) || err == rpctypes.ErrTimeout {
return errExpected
}
return err
@@ -46,7 +48,7 @@ func TestBalancerUnderNetworkPartitionPut(t *testing.T) {
func TestBalancerUnderNetworkPartitionDelete(t *testing.T) {
testBalancerUnderNetworkPartition(t, func(cli *clientv3.Client, ctx context.Context) error {
_, err := cli.Delete(ctx, "a")
if err == context.DeadlineExceeded || isServerCtxTimeout(err) || err == rpctypes.ErrTimeout {
if isClientTimeout(err) || isServerCtxTimeout(err) || err == rpctypes.ErrTimeout {
return errExpected
}
return err
@@ -59,7 +61,7 @@ func TestBalancerUnderNetworkPartitionTxn(t *testing.T) {
If(clientv3.Compare(clientv3.Version("foo"), "=", 0)).
Then(clientv3.OpPut("foo", "bar")).
Else(clientv3.OpPut("foo", "baz")).Commit()
if err == context.DeadlineExceeded || isServerCtxTimeout(err) || err == rpctypes.ErrTimeout {
if isClientTimeout(err) || isServerCtxTimeout(err) || err == rpctypes.ErrTimeout {
return errExpected
}
return err
@@ -72,6 +74,9 @@ func TestBalancerUnderNetworkPartitionTxn(t *testing.T) {
func TestBalancerUnderNetworkPartitionLinearizableGetWithLongTimeout(t *testing.T) {
testBalancerUnderNetworkPartition(t, func(cli *clientv3.Client, ctx context.Context) error {
_, err := cli.Get(ctx, "a")
if err == rpctypes.ErrTimeout {
return errExpected
}
return err
}, 7*time.Second)
}
@@ -82,7 +87,7 @@ func TestBalancerUnderNetworkPartitionLinearizableGetWithLongTimeout(t *testing.
func TestBalancerUnderNetworkPartitionLinearizableGetWithShortTimeout(t *testing.T) {
testBalancerUnderNetworkPartition(t, func(cli *clientv3.Client, ctx context.Context) error {
_, err := cli.Get(ctx, "a")
if err == context.DeadlineExceeded || isServerCtxTimeout(err) {
if isClientTimeout(err) || isServerCtxTimeout(err) {
return errExpected
}
return err
@@ -111,6 +116,7 @@ func testBalancerUnderNetworkPartition(t *testing.T, op func(*clientv3.Client, c
ccfg := clientv3.Config{
Endpoints: []string{eps[0]},
DialTimeout: 3 * time.Second,
DialOptions: []grpc.DialOption{grpc.WithBlock()},
}
cli, err := clientv3.New(ccfg)
if err != nil {
@@ -123,9 +129,10 @@ func testBalancerUnderNetworkPartition(t *testing.T, op func(*clientv3.Client, c
// add other endpoints for later endpoint switch
cli.SetEndpoints(eps...)
time.Sleep(time.Second * 2)
clus.Members[0].InjectPartition(t, clus.Members[1:]...)
for i := 0; i < 2; i++ {
for i := 0; i < 5; i++ {
ctx, cancel := context.WithTimeout(context.Background(), timeout)
err = op(cli, ctx)
cancel()
@@ -133,7 +140,7 @@ func testBalancerUnderNetworkPartition(t *testing.T, op func(*clientv3.Client, c
break
}
if err != errExpected {
t.Errorf("#%d: expected %v, got %v", i, errExpected, err)
t.Errorf("#%d: expected '%v', got '%v'", i, errExpected, err)
}
// give enough time for endpoint switch
// TODO: remove random sleep by syncing directly with balancer
@@ -165,16 +172,14 @@ func TestBalancerUnderNetworkPartitionLinearizableGetLeaderElection(t *testing.T
cli, err := clientv3.New(clientv3.Config{
Endpoints: []string{eps[(lead+1)%2]},
DialTimeout: 1 * time.Second,
DialTimeout: 2 * time.Second,
DialOptions: []grpc.DialOption{grpc.WithBlock()},
})
if err != nil {
t.Fatal(err)
}
defer cli.Close()
// wait for non-leader to be pinned
mustWaitPinReady(t, cli)
// add all eps to list, so that when the original pined one fails
// the client can switch to other available eps
cli.SetEndpoints(eps[lead], eps[(lead+1)%2])
@@ -182,10 +187,15 @@ func TestBalancerUnderNetworkPartitionLinearizableGetLeaderElection(t *testing.T
// isolate leader
clus.Members[lead].InjectPartition(t, clus.Members[(lead+1)%3], clus.Members[(lead+2)%3])
// expects balancer endpoint switch while ongoing leader election
ctx, cancel := context.WithTimeout(context.TODO(), timeout)
_, err = cli.Get(ctx, "a")
cancel()
// expects balancer to round robin to leader within two attempts
for i := 0; i < 2; i++ {
ctx, cancel := context.WithTimeout(context.TODO(), timeout)
_, err = cli.Get(ctx, "a")
cancel()
if err == nil {
break
}
}
if err != nil {
t.Fatal(err)
}
@@ -218,7 +228,10 @@ func testBalancerUnderNetworkPartitionWatch(t *testing.T, isolateLeader bool) {
}
// pin eps[target]
watchCli, err := clientv3.New(clientv3.Config{Endpoints: []string{eps[target]}})
watchCli, err := clientv3.New(clientv3.Config{
Endpoints: []string{eps[target]},
DialOptions: []grpc.DialOption{grpc.WithBlock()},
})
if err != nil {
t.Fatal(err)
}
@@ -256,3 +269,63 @@ func testBalancerUnderNetworkPartitionWatch(t *testing.T, isolateLeader bool) {
t.Fatal("took too long to detect leader lost")
}
}
func TestDropReadUnderNetworkPartition(t *testing.T) {
defer testutil.AfterTest(t)
clus := integration.NewClusterV3(t, &integration.ClusterConfig{
Size: 3,
SkipCreatingClient: true,
})
defer clus.Terminate(t)
leaderIndex := clus.WaitLeader(t)
// get a follower endpoint
eps := []string{clus.Members[(leaderIndex+1)%3].GRPCAddr()}
ccfg := clientv3.Config{
Endpoints: eps,
DialTimeout: 10 * time.Second,
DialOptions: []grpc.DialOption{grpc.WithBlock()},
}
cli, err := clientv3.New(ccfg)
if err != nil {
t.Fatal(err)
}
defer cli.Close()
// wait for eps[0] to be pinned
mustWaitPinReady(t, cli)
// add other endpoints for later endpoint switch
cli.SetEndpoints(eps...)
time.Sleep(time.Second * 2)
conn, err := cli.Dial(clus.Members[(leaderIndex+1)%3].GRPCAddr())
if err != nil {
t.Fatal(err)
}
defer conn.Close()
clus.Members[leaderIndex].InjectPartition(t, clus.Members[(leaderIndex+1)%3], clus.Members[(leaderIndex+2)%3])
kvc := clientv3.NewKVFromKVClient(pb.NewKVClient(conn), nil)
ctx, cancel := context.WithTimeout(context.TODO(), 10*time.Second)
_, err = kvc.Get(ctx, "a")
cancel()
if err.Error() != rpctypes.ErrLeaderChanged.Error() {
t.Fatalf("expected %v, got %v", rpctypes.ErrLeaderChanged, err)
}
for i := 0; i < 5; i++ {
ctx, cancel = context.WithTimeout(context.TODO(), 10*time.Second)
_, err = kvc.Get(ctx, "a")
cancel()
if err != nil {
if err == rpctypes.ErrTimeout {
<-time.After(time.Second)
i++
continue
}
t.Fatalf("expected nil or timeout, got %v", err)
}
// No error returned and no retry required
break
}
}

View File

@@ -26,6 +26,7 @@ import (
"github.com/coreos/etcd/integration"
"github.com/coreos/etcd/pkg/testutil"
"google.golang.org/grpc"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
)
@@ -75,16 +76,16 @@ func TestBalancerUnderServerShutdownWatch(t *testing.T) {
select {
case ev := <-wch:
if werr := ev.Err(); werr != nil {
t.Fatal(werr)
t.Error(werr)
}
if len(ev.Events) != 1 {
t.Fatalf("expected one event, got %+v", ev)
t.Errorf("expected one event, got %+v", ev)
}
if !bytes.Equal(ev.Events[0].Kv.Value, []byte(val)) {
t.Fatalf("expected %q, got %+v", val, ev.Events[0].Kv)
t.Errorf("expected %q, got %+v", val, ev.Events[0].Kv)
}
case <-time.After(7 * time.Second):
t.Fatal("took too long to receive events")
t.Error("took too long to receive events")
}
}()
@@ -92,7 +93,10 @@ func TestBalancerUnderServerShutdownWatch(t *testing.T) {
clus.Members[lead].Terminate(t)
// writes to eps[lead+1]
putCli, err := clientv3.New(clientv3.Config{Endpoints: []string{eps[(lead+1)%3]}})
putCli, err := clientv3.New(clientv3.Config{
Endpoints: []string{eps[(lead+1)%3]},
DialOptions: []grpc.DialOption{grpc.WithBlock()},
})
if err != nil {
t.Fatal(err)
}
@@ -104,7 +108,7 @@ func TestBalancerUnderServerShutdownWatch(t *testing.T) {
if err == nil {
break
}
if err == context.DeadlineExceeded || isServerCtxTimeout(err) || err == rpctypes.ErrTimeout || err == rpctypes.ErrTimeoutDueToLeaderFail {
if isClientTimeout(err) || isServerCtxTimeout(err) || err == rpctypes.ErrTimeout || err == rpctypes.ErrTimeoutDueToLeaderFail {
continue
}
t.Fatal(err)
@@ -156,7 +160,10 @@ func testBalancerUnderServerShutdownMutable(t *testing.T, op func(*clientv3.Clie
eps := []string{clus.Members[0].GRPCAddr(), clus.Members[1].GRPCAddr(), clus.Members[2].GRPCAddr()}
// pin eps[0]
cli, err := clientv3.New(clientv3.Config{Endpoints: []string{eps[0]}})
cli, err := clientv3.New(clientv3.Config{
Endpoints: []string{eps[0]},
DialOptions: []grpc.DialOption{grpc.WithBlock()},
})
if err != nil {
t.Fatal(err)
}
@@ -337,10 +344,20 @@ func testBalancerUnderServerStopInflightRangeOnRestart(t *testing.T, linearizabl
defer close(donec)
ctx, cancel := context.WithTimeout(context.TODO(), clientTimeout)
readyc <- struct{}{}
_, err := cli.Get(ctx, "abc", gops...)
// TODO: The new grpc load balancer will not pin to an endpoint
// as intended by this test. But it will round robin member within
// two attempts.
// Remove retry loop once the new grpc load balancer provides retry.
for i := 0; i < 2; i++ {
_, err = cli.Get(ctx, "abc", gops...)
if err == nil {
break
}
}
cancel()
if err != nil {
t.Fatal(err)
t.Errorf("unexpected error: %v", err)
}
}()
@@ -361,7 +378,57 @@ func isServerCtxTimeout(err error) bool {
if err == nil {
return false
}
ev, _ := status.FromError(err)
ev, ok := status.FromError(err)
if !ok {
return false
}
code := ev.Code()
return code == codes.DeadlineExceeded && strings.Contains(err.Error(), "context deadline exceeded")
}
// In grpc v1.11.3+ dial timeouts can error out with transport.ErrConnClosing. Previously dial timeouts
// would always error out with context.DeadlineExceeded.
func isClientTimeout(err error) bool {
if err == nil {
return false
}
if err == context.DeadlineExceeded {
return true
}
ev, ok := status.FromError(err)
if !ok {
return false
}
code := ev.Code()
return code == codes.DeadlineExceeded
}
func isCanceled(err error) bool {
if err == nil {
return false
}
if err == context.Canceled {
return true
}
ev, ok := status.FromError(err)
if !ok {
return false
}
code := ev.Code()
return code == codes.Canceled
}
func isUnavailable(err error) bool {
if err == nil {
return false
}
if err == context.Canceled {
return true
}
ev, ok := status.FromError(err)
if !ok {
return false
}
code := ev.Code()
return code == codes.Unavailable
}

View File

@@ -25,7 +25,7 @@ import (
// mustWaitPinReady waits up to 3-second until connection is up (pin endpoint).
// Fatal on time-out.
func mustWaitPinReady(t *testing.T, cli *clientv3.Client) {
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
_, err := cli.Get(ctx, "foo")
cancel()
if err != nil {

View File

@@ -0,0 +1,125 @@
// Copyright 2018 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// +build !cluster_proxy
package integration
import (
"context"
"fmt"
"strings"
"testing"
"time"
"github.com/coreos/etcd/clientv3"
"github.com/coreos/etcd/integration"
"github.com/coreos/etcd/pkg/testutil"
)
// TestWatchFragmentDisable ensures that large watch
// response exceeding server-side request limit can
// arrive even without watch response fragmentation.
func TestWatchFragmentDisable(t *testing.T) {
testWatchFragment(t, false, false)
}
// TestWatchFragmentDisableWithGRPCLimit verifies
// large watch response exceeding server-side request
// limit and client-side gRPC response receive limit
// cannot arrive without watch events fragmentation,
// because multiple events exceed client-side gRPC
// response receive limit.
func TestWatchFragmentDisableWithGRPCLimit(t *testing.T) {
testWatchFragment(t, false, true)
}
// TestWatchFragmentEnable ensures that large watch
// response exceeding server-side request limit arrive
// with watch response fragmentation.
func TestWatchFragmentEnable(t *testing.T) {
testWatchFragment(t, true, false)
}
// TestWatchFragmentEnableWithGRPCLimit verifies
// large watch response exceeding server-side request
// limit and client-side gRPC response receive limit
// can arrive only when watch events are fragmented.
func TestWatchFragmentEnableWithGRPCLimit(t *testing.T) {
testWatchFragment(t, true, true)
}
// testWatchFragment triggers watch response that spans over multiple
// revisions exceeding server request limits when combined.
func testWatchFragment(t *testing.T, fragment, exceedRecvLimit bool) {
cfg := &integration.ClusterConfig{
Size: 1,
MaxRequestBytes: 1.5 * 1024 * 1024,
}
if exceedRecvLimit {
cfg.ClientMaxCallRecvMsgSize = 1.5 * 1024 * 1024
}
clus := integration.NewClusterV3(t, cfg)
defer clus.Terminate(t)
cli := clus.Client(0)
errc := make(chan error)
for i := 0; i < 10; i++ {
go func(i int) {
_, err := cli.Put(context.TODO(),
fmt.Sprint("foo", i),
strings.Repeat("a", 1024*1024),
)
errc <- err
}(i)
}
for i := 0; i < 10; i++ {
if err := <-errc; err != nil {
t.Fatalf("failed to put: %v", err)
}
}
opts := []clientv3.OpOption{clientv3.WithPrefix(), clientv3.WithRev(1)}
if fragment {
opts = append(opts, clientv3.WithFragment())
}
wch := cli.Watch(context.TODO(), "foo", opts...)
// expect 10 MiB watch response
select {
case ws := <-wch:
// without fragment, should exceed gRPC client receive limit
if !fragment && exceedRecvLimit {
if len(ws.Events) != 0 {
t.Fatalf("expected 0 events with watch fragmentation, got %d", len(ws.Events))
}
exp := "code = ResourceExhausted desc = grpc: received message larger than max ("
if !strings.Contains(ws.Err().Error(), exp) {
t.Fatalf("expected 'ResourceExhausted' error, got %v", ws.Err())
}
return
}
// still expect merged watch events
if len(ws.Events) != 10 {
t.Fatalf("expected 10 events with watch fragmentation, got %d", len(ws.Events))
}
if ws.Err() != nil {
t.Fatalf("unexpected error %v", ws.Err())
}
case <-time.After(testutil.RequestTimeout):
t.Fatalf("took too long to receive events")
}
}

View File

@@ -30,7 +30,6 @@ import (
mvccpb "github.com/coreos/etcd/mvcc/mvccpb"
"github.com/coreos/etcd/pkg/testutil"
"google.golang.org/grpc"
"google.golang.org/grpc/metadata"
)
@@ -583,6 +582,78 @@ func testWatchWithProgressNotify(t *testing.T, watchOnPut bool) {
}
}
func TestWatchRequestProgress(t *testing.T) {
testCases := []struct {
name string
watchers []string
}{
{"0-watcher", []string{}},
{"1-watcher", []string{"/"}},
{"2-watcher", []string{"/", "/"}},
}
for _, c := range testCases {
t.Run(c.name, func(t *testing.T) {
defer testutil.AfterTest(t)
watchTimeout := 3 * time.Second
clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 3})
defer clus.Terminate(t)
wc := clus.RandClient()
var watchChans []clientv3.WatchChan
for _, prefix := range c.watchers {
watchChans = append(watchChans, wc.Watch(context.Background(), prefix, clientv3.WithPrefix()))
}
_, err := wc.Put(context.Background(), "/a", "1")
if err != nil {
t.Fatal(err)
}
for _, rch := range watchChans {
select {
case resp := <-rch: // wait for notification
if len(resp.Events) != 1 {
t.Fatalf("resp.Events expected 1, got %d", len(resp.Events))
}
case <-time.After(watchTimeout):
t.Fatalf("watch response expected in %v, but timed out", watchTimeout)
}
}
// put a value not being watched to increment revision
_, err = wc.Put(context.Background(), "x", "1")
if err != nil {
t.Fatal(err)
}
err = wc.RequestProgress(context.Background())
if err != nil {
t.Fatal(err)
}
// verify all watch channels receive a progress notify
for _, rch := range watchChans {
select {
case resp := <-rch:
if !resp.IsProgressNotify() {
t.Fatalf("expected resp.IsProgressNotify() == true")
}
if resp.Header.Revision != 3 {
t.Fatalf("resp.Header.Revision expected 3, got %d", resp.Header.Revision)
}
case <-time.After(watchTimeout):
t.Fatalf("progress response expected in %v, but timed out", watchTimeout)
}
}
})
}
}
func TestWatchEventType(t *testing.T) {
cluster := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 1})
defer cluster.Terminate(t)
@@ -667,8 +738,9 @@ func TestWatchErrConnClosed(t *testing.T) {
go func() {
defer close(donec)
ch := cli.Watch(context.TODO(), "foo")
if wr := <-ch; grpc.ErrorDesc(wr.Err()) != grpc.ErrClientConnClosing.Error() {
t.Fatalf("expected %v, got %v", grpc.ErrClientConnClosing, grpc.ErrorDesc(wr.Err()))
if wr := <-ch; !isCanceled(wr.Err()) {
t.Errorf("expected context canceled, got %v", wr.Err())
}
}()
@@ -699,8 +771,8 @@ func TestWatchAfterClose(t *testing.T) {
donec := make(chan struct{})
go func() {
cli.Watch(context.TODO(), "foo")
if err := cli.Close(); err != nil && err != grpc.ErrClientConnClosing {
t.Fatalf("expected %v, got %v", grpc.ErrClientConnClosing, err)
if err := cli.Close(); err != nil && err != context.Canceled {
t.Errorf("expected %v, got %v", context.Canceled, err)
}
close(donec)
}()
@@ -1061,3 +1133,24 @@ func TestWatchCancelDisconnected(t *testing.T) {
t.Fatal("took too long to cancel disconnected watcher")
}
}
// TestWatchClose ensures that close does not return error
func TestWatchClose(t *testing.T) {
runWatchTest(t, testWatchClose)
}
func testWatchClose(t *testing.T, wctx *watchctx) {
ctx, cancel := context.WithCancel(context.Background())
wch := wctx.w.Watch(ctx, "a")
cancel()
if wch == nil {
t.Fatalf("expected watcher channel, got nil")
}
if wctx.w.Close() != nil {
t.Fatalf("watch did not close successfully")
}
wresp, ok := <-wch
if ok {
t.Fatalf("read wch got %v; expected closed channel", wresp)
}
}

View File

@@ -85,7 +85,7 @@ func (txn *txnLeasing) eval() (*v3.TxnResponse, error) {
if !ok {
return nil, nil
}
return &v3.TxnResponse{copyHeader(txn.lkv.leases.header), succeeded, resps}, nil
return &v3.TxnResponse{Header: copyHeader(txn.lkv.leases.header), Succeeded: succeeded, Responses: resps}, nil
}
// fallback computes the ops to fetch all possible conflicting

View File

@@ -18,28 +18,14 @@ import (
"io/ioutil"
"sync"
"github.com/coreos/etcd/pkg/logutil"
"google.golang.org/grpc/grpclog"
)
// Logger is the logger used by client library.
// It implements grpclog.LoggerV2 interface.
type Logger interface {
grpclog.LoggerV2
// Lvl returns logger if logger's verbosity level >= "lvl".
// Otherwise, logger that discards all logs.
Lvl(lvl int) Logger
// to satisfy capnslog
Print(args ...interface{})
Printf(format string, args ...interface{})
Println(args ...interface{})
}
var (
loggerMu sync.RWMutex
logger Logger
lgMu sync.RWMutex
lg logutil.Logger
)
type settableLogger struct {
@@ -49,29 +35,29 @@ type settableLogger struct {
func init() {
// disable client side logs by default
logger = &settableLogger{}
lg = &settableLogger{}
SetLogger(grpclog.NewLoggerV2(ioutil.Discard, ioutil.Discard, ioutil.Discard))
}
// SetLogger sets client-side Logger.
func SetLogger(l grpclog.LoggerV2) {
loggerMu.Lock()
logger = NewLogger(l)
lgMu.Lock()
lg = logutil.NewLogger(l)
// override grpclog so that any changes happen with locking
grpclog.SetLoggerV2(logger)
loggerMu.Unlock()
grpclog.SetLoggerV2(lg)
lgMu.Unlock()
}
// GetLogger returns the current logger.
func GetLogger() Logger {
loggerMu.RLock()
l := logger
loggerMu.RUnlock()
// GetLogger returns the current logutil.Logger.
func GetLogger() logutil.Logger {
lgMu.RLock()
l := lg
lgMu.RUnlock()
return l
}
// NewLogger returns a new Logger with grpclog.LoggerV2.
func NewLogger(gl grpclog.LoggerV2) Logger {
// NewLogger returns a new Logger with logutil.Logger.
func NewLogger(gl grpclog.LoggerV2) logutil.Logger {
return &settableLogger{l: gl}
}
@@ -104,32 +90,12 @@ func (s *settableLogger) Print(args ...interface{}) { s.get().In
func (s *settableLogger) Printf(format string, args ...interface{}) { s.get().Infof(format, args...) }
func (s *settableLogger) Println(args ...interface{}) { s.get().Infoln(args...) }
func (s *settableLogger) V(l int) bool { return s.get().V(l) }
func (s *settableLogger) Lvl(lvl int) Logger {
func (s *settableLogger) Lvl(lvl int) grpclog.LoggerV2 {
s.mu.RLock()
l := s.l
s.mu.RUnlock()
if l.V(lvl) {
return s
}
return &noLogger{}
return logutil.NewDiscardLogger()
}
type noLogger struct{}
func (*noLogger) Info(args ...interface{}) {}
func (*noLogger) Infof(format string, args ...interface{}) {}
func (*noLogger) Infoln(args ...interface{}) {}
func (*noLogger) Warning(args ...interface{}) {}
func (*noLogger) Warningf(format string, args ...interface{}) {}
func (*noLogger) Warningln(args ...interface{}) {}
func (*noLogger) Error(args ...interface{}) {}
func (*noLogger) Errorf(format string, args ...interface{}) {}
func (*noLogger) Errorln(args ...interface{}) {}
func (*noLogger) Fatal(args ...interface{}) {}
func (*noLogger) Fatalf(format string, args ...interface{}) {}
func (*noLogger) Fatalln(args ...interface{}) {}
func (*noLogger) Print(args ...interface{}) {}
func (*noLogger) Printf(format string, args ...interface{}) {}
func (*noLogger) Println(args ...interface{}) {}
func (*noLogger) V(l int) bool { return false }
func (ng *noLogger) Lvl(lvl int) Logger { return ng }

View File

@@ -1,51 +0,0 @@
// Copyright 2017 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package clientv3
import (
"bytes"
"io/ioutil"
"strings"
"testing"
"google.golang.org/grpc/grpclog"
)
func TestLogger(t *testing.T) {
buf := new(bytes.Buffer)
l := NewLogger(grpclog.NewLoggerV2WithVerbosity(buf, buf, buf, 10))
l.Infof("hello world!")
if !strings.Contains(buf.String(), "hello world!") {
t.Fatalf("expected 'hello world!', got %q", buf.String())
}
buf.Reset()
l.Lvl(10).Infof("Level 10")
l.Lvl(30).Infof("Level 30")
if !strings.Contains(buf.String(), "Level 10") {
t.Fatalf("expected 'Level 10', got %q", buf.String())
}
if strings.Contains(buf.String(), "Level 30") {
t.Fatalf("unexpected 'Level 30', got %q", buf.String())
}
buf.Reset()
l = NewLogger(grpclog.NewLoggerV2(ioutil.Discard, ioutil.Discard, ioutil.Discard))
l.Infof("ignore this")
if len(buf.Bytes()) > 0 {
t.Fatalf("unexpected logs %q", buf.String())
}
}

View File

@@ -16,6 +16,7 @@ package clientv3
import (
"context"
"fmt"
"io"
pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
@@ -57,6 +58,8 @@ type Maintenance interface {
HashKV(ctx context.Context, endpoint string, rev int64) (*HashKVResponse, error)
// Snapshot provides a reader for a point-in-time snapshot of etcd.
// If the context "ctx" is canceled or timed out, reading from returned
// "io.ReadCloser" would error out (e.g. context.Canceled, context.DeadlineExceeded).
Snapshot(ctx context.Context) (io.ReadCloser, error)
// MoveLeader requests current leader to transfer its leadership to the transferee.
@@ -73,9 +76,9 @@ type maintenance struct {
func NewMaintenance(c *Client) Maintenance {
api := &maintenance{
dial: func(endpoint string) (pb.MaintenanceClient, func(), error) {
conn, err := c.dial(endpoint)
conn, err := c.Dial(endpoint)
if err != nil {
return nil, nil, err
return nil, nil, fmt.Errorf("failed to dial endpoint %s with maintenance client: %v", endpoint, err)
}
cancel := func() { conn.Close() }
return RetryMaintenanceClient(c, conn), cancel, nil
@@ -173,6 +176,7 @@ func (m *maintenance) Status(ctx context.Context, endpoint string) (*StatusRespo
func (m *maintenance) HashKV(ctx context.Context, endpoint string, rev int64) (*HashKVResponse, error) {
remote, cancel, err := m.dial(endpoint)
if err != nil {
return nil, toErr(ctx, err)
}
defer cancel()
@@ -184,7 +188,7 @@ func (m *maintenance) HashKV(ctx context.Context, endpoint string, rev int64) (*
}
func (m *maintenance) Snapshot(ctx context.Context) (io.ReadCloser, error) {
ss, err := m.remote.Snapshot(ctx, &pb.SnapshotRequest{}, m.callOpts...)
ss, err := m.remote.Snapshot(ctx, &pb.SnapshotRequest{}, append(m.callOpts, withMax(defaultStreamMaxRetries))...)
if err != nil {
return nil, toErr(ctx, err)
}

View File

@@ -53,6 +53,12 @@ type Op struct {
// for watch, put, delete
prevKV bool
// for watch
// fragmentation should be disabled by default
// if true, split watch events when total exceeds
// "--max-request-bytes" flag value + 512-byte
fragment bool
// for put
ignoreValue bool
ignoreLease bool
@@ -511,3 +517,14 @@ func toLeaseTimeToLiveRequest(id LeaseID, opts ...LeaseOption) *pb.LeaseTimeToLi
ret.applyOpts(opts)
return &pb.LeaseTimeToLiveRequest{ID: int64(id), Keys: ret.attachedKeys}
}
// WithFragment to receive raw watch response with fragmentation.
// Fragmentation is disabled by default. If fragmentation is enabled,
// etcd watch server will split watch response before sending to clients
// when the total size of watch events exceed server-side request limit.
// The default server-side request limit is 1.5 MiB, which can be configured
// as "--max-request-bytes" flag value + gRPC-overhead 512 bytes.
// See "etcdserver/api/v3rpc/watch.go" for more details.
func WithFragment() OpOption {
return func(op *Op) { op.fragment = true }
}

View File

@@ -16,17 +16,17 @@ package clientv3
import (
"math"
"time"
"google.golang.org/grpc"
)
var (
// Disable gRPC internal retrial logic
// TODO: enable when gRPC retry is stable (FailFast=false)
// Reference:
// - https://github.com/grpc/grpc-go/issues/1532
// - https://github.com/grpc/proposal/blob/master/A6-client-retries.md
defaultFailFast = grpc.FailFast(true)
// client-side handling retrying of request failures where data was not written to the wire or
// where server indicates it did not process the data. gRPC default is default is "FailFast(true)"
// but for etcd we default to "FailFast(false)" to minimize client request error responses due to
// transient failures.
defaultFailFast = grpc.FailFast(false)
// client-side request send limit, gRPC default is math.MaxInt32
// Make sure that "client-side send limit < server-side default send/recv limit"
@@ -38,6 +38,22 @@ var (
// because range response can easily exceed request send limits
// Default to math.MaxInt32; writes exceeding server-side send limit fails anyway
defaultMaxCallRecvMsgSize = grpc.MaxCallRecvMsgSize(math.MaxInt32)
// client-side non-streaming retry limit, only applied to requests where server responds with
// a error code clearly indicating it was unable to process the request such as codes.Unavailable.
// If set to 0, retry is disabled.
defaultUnaryMaxRetries uint = 100
// client-side streaming retry limit, only applied to requests where server responds with
// a error code clearly indicating it was unable to process the request such as codes.Unavailable.
// If set to 0, retry is disabled.
defaultStreamMaxRetries = ^uint(0) // max uint
// client-side retry backoff wait between requests.
defaultBackoffWaitBetween = 25 * time.Millisecond
// client-side retry backoff default jitter fraction.
defaultBackoffJitterFraction = 0.10
)
// defaultCallOpts defines a list of default "gRPC.CallOption".

View File

@@ -1,30 +0,0 @@
// Copyright 2017 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package clientv3
import "context"
// TODO: remove this when "FailFast=false" is fixed.
// See https://github.com/grpc/grpc-go/issues/1532.
func readyWait(rpcCtx, clientCtx context.Context, ready <-chan struct{}) error {
select {
case <-ready:
return nil
case <-rpcCtx.Done():
return rpcCtx.Err()
case <-clientCtx.Done():
return clientCtx.Err()
}
}

View File

@@ -32,465 +32,263 @@ const (
nonRepeatable
)
type rpcFunc func(ctx context.Context) error
type retryRPCFunc func(context.Context, rpcFunc, retryPolicy) error
type retryStopErrFunc func(error) bool
func (rp retryPolicy) String() string {
switch rp {
case repeatable:
return "repeatable"
case nonRepeatable:
return "nonRepeatable"
default:
return "UNKNOWN"
}
}
// isSafeRetryImmutableRPC returns "true" when an immutable request is safe for retry.
//
// immutable requests (e.g. Get) should be retried unless it's
// an obvious server-side error (e.g. rpctypes.ErrRequestTooLarge).
//
// "isRepeatableStopError" returns "true" when an immutable request
// is interrupted by server-side or gRPC-side error and its status
// code is not transient (!= codes.Unavailable).
//
// Returning "true" means retry should stop, since client cannot
// Returning "false" means retry should stop, since client cannot
// handle itself even with retries.
func isRepeatableStopError(err error) bool {
func isSafeRetryImmutableRPC(err error) bool {
eErr := rpctypes.Error(err)
// always stop retry on etcd errors
if serverErr, ok := eErr.(rpctypes.EtcdError); ok && serverErr.Code() != codes.Unavailable {
return true
// interrupted by non-transient server-side or gRPC-side error
// client cannot handle itself (e.g. rpctypes.ErrCompacted)
return false
}
// only retry if unavailable
ev, _ := status.FromError(err)
return ev.Code() != codes.Unavailable
ev, ok := status.FromError(err)
if !ok {
// all errors from RPC is typed "grpc/status.(*statusError)"
// (ref. https://github.com/grpc/grpc-go/pull/1782)
//
// if the error type is not "grpc/status.(*statusError)",
// it could be from "Dial"
// TODO: do not retry for now
// ref. https://github.com/grpc/grpc-go/issues/1581
return false
}
return ev.Code() == codes.Unavailable
}
// isSafeRetryMutableRPC returns "true" when a mutable request is safe for retry.
//
// mutable requests (e.g. Put, Delete, Txn) should only be retried
// when the status code is codes.Unavailable when initial connection
// has not been established (no pinned endpoint).
// has not been established (no endpoint is up).
//
// "isNonRepeatableStopError" returns "true" when a mutable request
// is interrupted by non-transient error that client cannot handle itself,
// or transient error while the connection has already been established
// (pinned endpoint exists).
//
// Returning "true" means retry should stop, otherwise it violates
// Returning "false" means retry should stop, otherwise it violates
// write-at-most-once semantics.
func isNonRepeatableStopError(err error) bool {
ev, _ := status.FromError(err)
if ev.Code() != codes.Unavailable {
return true
func isSafeRetryMutableRPC(err error) bool {
if ev, ok := status.FromError(err); ok && ev.Code() != codes.Unavailable {
// not safe for mutable RPCs
// e.g. interrupted by non-transient error that client cannot handle itself,
// or transient error while the connection has already been established
return false
}
desc := rpctypes.ErrorDesc(err)
return desc != "there is no address available" && desc != "there is no connection available"
}
func (c *Client) newRetryWrapper() retryRPCFunc {
return func(rpcCtx context.Context, f rpcFunc, rp retryPolicy) error {
var isStop retryStopErrFunc
switch rp {
case repeatable:
isStop = isRepeatableStopError
case nonRepeatable:
isStop = isNonRepeatableStopError
}
for {
if err := readyWait(rpcCtx, c.ctx, c.balancer.ConnectNotify()); err != nil {
return err
}
pinned := c.balancer.pinned()
err := f(rpcCtx)
if err == nil {
return nil
}
logger.Lvl(4).Infof("clientv3/retry: error %q on pinned endpoint %q", err.Error(), pinned)
if s, ok := status.FromError(err); ok && (s.Code() == codes.Unavailable || s.Code() == codes.DeadlineExceeded || s.Code() == codes.Internal) {
// mark this before endpoint switch is triggered
c.balancer.hostPortError(pinned, err)
c.balancer.next()
logger.Lvl(4).Infof("clientv3/retry: switching from %q due to error %q", pinned, err.Error())
}
if isStop(err) {
return err
}
}
}
}
func (c *Client) newAuthRetryWrapper(retryf retryRPCFunc) retryRPCFunc {
return func(rpcCtx context.Context, f rpcFunc, rp retryPolicy) error {
for {
pinned := c.balancer.pinned()
err := retryf(rpcCtx, f, rp)
if err == nil {
return nil
}
logger.Lvl(4).Infof("clientv3/auth-retry: error %q on pinned endpoint %q", err.Error(), pinned)
// always stop retry on etcd errors other than invalid auth token
if rpctypes.Error(err) == rpctypes.ErrInvalidAuthToken {
gterr := c.getToken(rpcCtx)
if gterr != nil {
logger.Lvl(4).Infof("clientv3/auth-retry: cannot retry due to error %q(%q) on pinned endpoint %q", err.Error(), gterr.Error(), pinned)
return err // return the original error for simplicity
}
continue
}
return err
}
}
return desc == "there is no address available" || desc == "there is no connection available"
}
type retryKVClient struct {
kc pb.KVClient
retryf retryRPCFunc
kc pb.KVClient
}
// RetryKVClient implements a KVClient.
func RetryKVClient(c *Client) pb.KVClient {
return &retryKVClient{
kc: pb.NewKVClient(c.conn),
retryf: c.newAuthRetryWrapper(c.newRetryWrapper()),
kc: pb.NewKVClient(c.conn),
}
}
func (rkv *retryKVClient) Range(ctx context.Context, in *pb.RangeRequest, opts ...grpc.CallOption) (resp *pb.RangeResponse, err error) {
err = rkv.retryf(ctx, func(rctx context.Context) error {
resp, err = rkv.kc.Range(rctx, in, opts...)
return err
}, repeatable)
return resp, err
return rkv.kc.Range(ctx, in, append(opts, withRetryPolicy(repeatable))...)
}
func (rkv *retryKVClient) Put(ctx context.Context, in *pb.PutRequest, opts ...grpc.CallOption) (resp *pb.PutResponse, err error) {
err = rkv.retryf(ctx, func(rctx context.Context) error {
resp, err = rkv.kc.Put(rctx, in, opts...)
return err
}, nonRepeatable)
return resp, err
return rkv.kc.Put(ctx, in, opts...)
}
func (rkv *retryKVClient) DeleteRange(ctx context.Context, in *pb.DeleteRangeRequest, opts ...grpc.CallOption) (resp *pb.DeleteRangeResponse, err error) {
err = rkv.retryf(ctx, func(rctx context.Context) error {
resp, err = rkv.kc.DeleteRange(rctx, in, opts...)
return err
}, nonRepeatable)
return resp, err
return rkv.kc.DeleteRange(ctx, in, opts...)
}
func (rkv *retryKVClient) Txn(ctx context.Context, in *pb.TxnRequest, opts ...grpc.CallOption) (resp *pb.TxnResponse, err error) {
// TODO: "repeatable" for read-only txn
err = rkv.retryf(ctx, func(rctx context.Context) error {
resp, err = rkv.kc.Txn(rctx, in, opts...)
return err
}, nonRepeatable)
return resp, err
return rkv.kc.Txn(ctx, in, opts...)
}
func (rkv *retryKVClient) Compact(ctx context.Context, in *pb.CompactionRequest, opts ...grpc.CallOption) (resp *pb.CompactionResponse, err error) {
err = rkv.retryf(ctx, func(rctx context.Context) error {
resp, err = rkv.kc.Compact(rctx, in, opts...)
return err
}, nonRepeatable)
return resp, err
return rkv.kc.Compact(ctx, in, opts...)
}
type retryLeaseClient struct {
lc pb.LeaseClient
retryf retryRPCFunc
lc pb.LeaseClient
}
// RetryLeaseClient implements a LeaseClient.
func RetryLeaseClient(c *Client) pb.LeaseClient {
return &retryLeaseClient{
lc: pb.NewLeaseClient(c.conn),
retryf: c.newAuthRetryWrapper(c.newRetryWrapper()),
lc: pb.NewLeaseClient(c.conn),
}
}
func (rlc *retryLeaseClient) LeaseTimeToLive(ctx context.Context, in *pb.LeaseTimeToLiveRequest, opts ...grpc.CallOption) (resp *pb.LeaseTimeToLiveResponse, err error) {
err = rlc.retryf(ctx, func(rctx context.Context) error {
resp, err = rlc.lc.LeaseTimeToLive(rctx, in, opts...)
return err
}, repeatable)
return resp, err
return rlc.lc.LeaseTimeToLive(ctx, in, append(opts, withRetryPolicy(repeatable))...)
}
func (rlc *retryLeaseClient) LeaseLeases(ctx context.Context, in *pb.LeaseLeasesRequest, opts ...grpc.CallOption) (resp *pb.LeaseLeasesResponse, err error) {
err = rlc.retryf(ctx, func(rctx context.Context) error {
resp, err = rlc.lc.LeaseLeases(rctx, in, opts...)
return err
}, repeatable)
return resp, err
return rlc.lc.LeaseLeases(ctx, in, append(opts, withRetryPolicy(repeatable))...)
}
func (rlc *retryLeaseClient) LeaseGrant(ctx context.Context, in *pb.LeaseGrantRequest, opts ...grpc.CallOption) (resp *pb.LeaseGrantResponse, err error) {
err = rlc.retryf(ctx, func(rctx context.Context) error {
resp, err = rlc.lc.LeaseGrant(rctx, in, opts...)
return err
}, repeatable)
return resp, err
return rlc.lc.LeaseGrant(ctx, in, append(opts, withRetryPolicy(repeatable))...)
}
func (rlc *retryLeaseClient) LeaseRevoke(ctx context.Context, in *pb.LeaseRevokeRequest, opts ...grpc.CallOption) (resp *pb.LeaseRevokeResponse, err error) {
err = rlc.retryf(ctx, func(rctx context.Context) error {
resp, err = rlc.lc.LeaseRevoke(rctx, in, opts...)
return err
}, repeatable)
return resp, err
return rlc.lc.LeaseRevoke(ctx, in, append(opts, withRetryPolicy(repeatable))...)
}
func (rlc *retryLeaseClient) LeaseKeepAlive(ctx context.Context, opts ...grpc.CallOption) (stream pb.Lease_LeaseKeepAliveClient, err error) {
err = rlc.retryf(ctx, func(rctx context.Context) error {
stream, err = rlc.lc.LeaseKeepAlive(rctx, opts...)
return err
}, repeatable)
return stream, err
return rlc.lc.LeaseKeepAlive(ctx, append(opts, withRetryPolicy(repeatable))...)
}
type retryClusterClient struct {
cc pb.ClusterClient
retryf retryRPCFunc
cc pb.ClusterClient
}
// RetryClusterClient implements a ClusterClient.
func RetryClusterClient(c *Client) pb.ClusterClient {
return &retryClusterClient{
cc: pb.NewClusterClient(c.conn),
retryf: c.newRetryWrapper(),
cc: pb.NewClusterClient(c.conn),
}
}
func (rcc *retryClusterClient) MemberList(ctx context.Context, in *pb.MemberListRequest, opts ...grpc.CallOption) (resp *pb.MemberListResponse, err error) {
err = rcc.retryf(ctx, func(rctx context.Context) error {
resp, err = rcc.cc.MemberList(rctx, in, opts...)
return err
}, repeatable)
return resp, err
return rcc.cc.MemberList(ctx, in, append(opts, withRetryPolicy(repeatable))...)
}
func (rcc *retryClusterClient) MemberAdd(ctx context.Context, in *pb.MemberAddRequest, opts ...grpc.CallOption) (resp *pb.MemberAddResponse, err error) {
err = rcc.retryf(ctx, func(rctx context.Context) error {
resp, err = rcc.cc.MemberAdd(rctx, in, opts...)
return err
}, nonRepeatable)
return resp, err
return rcc.cc.MemberAdd(ctx, in, opts...)
}
func (rcc *retryClusterClient) MemberRemove(ctx context.Context, in *pb.MemberRemoveRequest, opts ...grpc.CallOption) (resp *pb.MemberRemoveResponse, err error) {
err = rcc.retryf(ctx, func(rctx context.Context) error {
resp, err = rcc.cc.MemberRemove(rctx, in, opts...)
return err
}, nonRepeatable)
return resp, err
return rcc.cc.MemberRemove(ctx, in, opts...)
}
func (rcc *retryClusterClient) MemberUpdate(ctx context.Context, in *pb.MemberUpdateRequest, opts ...grpc.CallOption) (resp *pb.MemberUpdateResponse, err error) {
err = rcc.retryf(ctx, func(rctx context.Context) error {
resp, err = rcc.cc.MemberUpdate(rctx, in, opts...)
return err
}, nonRepeatable)
return resp, err
return rcc.cc.MemberUpdate(ctx, in, opts...)
}
type retryMaintenanceClient struct {
mc pb.MaintenanceClient
retryf retryRPCFunc
mc pb.MaintenanceClient
}
// RetryMaintenanceClient implements a Maintenance.
func RetryMaintenanceClient(c *Client, conn *grpc.ClientConn) pb.MaintenanceClient {
return &retryMaintenanceClient{
mc: pb.NewMaintenanceClient(conn),
retryf: c.newRetryWrapper(),
mc: pb.NewMaintenanceClient(conn),
}
}
func (rmc *retryMaintenanceClient) Alarm(ctx context.Context, in *pb.AlarmRequest, opts ...grpc.CallOption) (resp *pb.AlarmResponse, err error) {
err = rmc.retryf(ctx, func(rctx context.Context) error {
resp, err = rmc.mc.Alarm(rctx, in, opts...)
return err
}, repeatable)
return resp, err
return rmc.mc.Alarm(ctx, in, append(opts, withRetryPolicy(repeatable))...)
}
func (rmc *retryMaintenanceClient) Status(ctx context.Context, in *pb.StatusRequest, opts ...grpc.CallOption) (resp *pb.StatusResponse, err error) {
err = rmc.retryf(ctx, func(rctx context.Context) error {
resp, err = rmc.mc.Status(rctx, in, opts...)
return err
}, repeatable)
return resp, err
return rmc.mc.Status(ctx, in, append(opts, withRetryPolicy(repeatable))...)
}
func (rmc *retryMaintenanceClient) Hash(ctx context.Context, in *pb.HashRequest, opts ...grpc.CallOption) (resp *pb.HashResponse, err error) {
err = rmc.retryf(ctx, func(rctx context.Context) error {
resp, err = rmc.mc.Hash(rctx, in, opts...)
return err
}, repeatable)
return resp, err
return rmc.mc.Hash(ctx, in, append(opts, withRetryPolicy(repeatable))...)
}
func (rmc *retryMaintenanceClient) HashKV(ctx context.Context, in *pb.HashKVRequest, opts ...grpc.CallOption) (resp *pb.HashKVResponse, err error) {
err = rmc.retryf(ctx, func(rctx context.Context) error {
resp, err = rmc.mc.HashKV(rctx, in, opts...)
return err
}, repeatable)
return resp, err
return rmc.mc.HashKV(ctx, in, append(opts, withRetryPolicy(repeatable))...)
}
func (rmc *retryMaintenanceClient) Snapshot(ctx context.Context, in *pb.SnapshotRequest, opts ...grpc.CallOption) (stream pb.Maintenance_SnapshotClient, err error) {
err = rmc.retryf(ctx, func(rctx context.Context) error {
stream, err = rmc.mc.Snapshot(rctx, in, opts...)
return err
}, repeatable)
return stream, err
return rmc.mc.Snapshot(ctx, in, append(opts, withRetryPolicy(repeatable))...)
}
func (rmc *retryMaintenanceClient) MoveLeader(ctx context.Context, in *pb.MoveLeaderRequest, opts ...grpc.CallOption) (resp *pb.MoveLeaderResponse, err error) {
err = rmc.retryf(ctx, func(rctx context.Context) error {
resp, err = rmc.mc.MoveLeader(rctx, in, opts...)
return err
}, repeatable)
return resp, err
return rmc.mc.MoveLeader(ctx, in, append(opts, withRetryPolicy(repeatable))...)
}
func (rmc *retryMaintenanceClient) Defragment(ctx context.Context, in *pb.DefragmentRequest, opts ...grpc.CallOption) (resp *pb.DefragmentResponse, err error) {
err = rmc.retryf(ctx, func(rctx context.Context) error {
resp, err = rmc.mc.Defragment(rctx, in, opts...)
return err
}, nonRepeatable)
return resp, err
return rmc.mc.Defragment(ctx, in, opts...)
}
type retryAuthClient struct {
ac pb.AuthClient
retryf retryRPCFunc
ac pb.AuthClient
}
// RetryAuthClient implements a AuthClient.
func RetryAuthClient(c *Client) pb.AuthClient {
return &retryAuthClient{
ac: pb.NewAuthClient(c.conn),
retryf: c.newRetryWrapper(),
ac: pb.NewAuthClient(c.conn),
}
}
func (rac *retryAuthClient) UserList(ctx context.Context, in *pb.AuthUserListRequest, opts ...grpc.CallOption) (resp *pb.AuthUserListResponse, err error) {
err = rac.retryf(ctx, func(rctx context.Context) error {
resp, err = rac.ac.UserList(rctx, in, opts...)
return err
}, repeatable)
return resp, err
return rac.ac.UserList(ctx, in, append(opts, withRetryPolicy(repeatable))...)
}
func (rac *retryAuthClient) UserGet(ctx context.Context, in *pb.AuthUserGetRequest, opts ...grpc.CallOption) (resp *pb.AuthUserGetResponse, err error) {
err = rac.retryf(ctx, func(rctx context.Context) error {
resp, err = rac.ac.UserGet(rctx, in, opts...)
return err
}, repeatable)
return resp, err
return rac.ac.UserGet(ctx, in, append(opts, withRetryPolicy(repeatable))...)
}
func (rac *retryAuthClient) RoleGet(ctx context.Context, in *pb.AuthRoleGetRequest, opts ...grpc.CallOption) (resp *pb.AuthRoleGetResponse, err error) {
err = rac.retryf(ctx, func(rctx context.Context) error {
resp, err = rac.ac.RoleGet(rctx, in, opts...)
return err
}, repeatable)
return resp, err
return rac.ac.RoleGet(ctx, in, append(opts, withRetryPolicy(repeatable))...)
}
func (rac *retryAuthClient) RoleList(ctx context.Context, in *pb.AuthRoleListRequest, opts ...grpc.CallOption) (resp *pb.AuthRoleListResponse, err error) {
err = rac.retryf(ctx, func(rctx context.Context) error {
resp, err = rac.ac.RoleList(rctx, in, opts...)
return err
}, repeatable)
return resp, err
return rac.ac.RoleList(ctx, in, append(opts, withRetryPolicy(repeatable))...)
}
func (rac *retryAuthClient) AuthEnable(ctx context.Context, in *pb.AuthEnableRequest, opts ...grpc.CallOption) (resp *pb.AuthEnableResponse, err error) {
err = rac.retryf(ctx, func(rctx context.Context) error {
resp, err = rac.ac.AuthEnable(rctx, in, opts...)
return err
}, nonRepeatable)
return resp, err
return rac.ac.AuthEnable(ctx, in, opts...)
}
func (rac *retryAuthClient) AuthDisable(ctx context.Context, in *pb.AuthDisableRequest, opts ...grpc.CallOption) (resp *pb.AuthDisableResponse, err error) {
err = rac.retryf(ctx, func(rctx context.Context) error {
resp, err = rac.ac.AuthDisable(rctx, in, opts...)
return err
}, nonRepeatable)
return resp, err
return rac.ac.AuthDisable(ctx, in, opts...)
}
func (rac *retryAuthClient) UserAdd(ctx context.Context, in *pb.AuthUserAddRequest, opts ...grpc.CallOption) (resp *pb.AuthUserAddResponse, err error) {
err = rac.retryf(ctx, func(rctx context.Context) error {
resp, err = rac.ac.UserAdd(rctx, in, opts...)
return err
}, nonRepeatable)
return resp, err
return rac.ac.UserAdd(ctx, in, opts...)
}
func (rac *retryAuthClient) UserDelete(ctx context.Context, in *pb.AuthUserDeleteRequest, opts ...grpc.CallOption) (resp *pb.AuthUserDeleteResponse, err error) {
err = rac.retryf(ctx, func(rctx context.Context) error {
resp, err = rac.ac.UserDelete(rctx, in, opts...)
return err
}, nonRepeatable)
return resp, err
return rac.ac.UserDelete(ctx, in, opts...)
}
func (rac *retryAuthClient) UserChangePassword(ctx context.Context, in *pb.AuthUserChangePasswordRequest, opts ...grpc.CallOption) (resp *pb.AuthUserChangePasswordResponse, err error) {
err = rac.retryf(ctx, func(rctx context.Context) error {
resp, err = rac.ac.UserChangePassword(rctx, in, opts...)
return err
}, nonRepeatable)
return resp, err
return rac.ac.UserChangePassword(ctx, in, opts...)
}
func (rac *retryAuthClient) UserGrantRole(ctx context.Context, in *pb.AuthUserGrantRoleRequest, opts ...grpc.CallOption) (resp *pb.AuthUserGrantRoleResponse, err error) {
err = rac.retryf(ctx, func(rctx context.Context) error {
resp, err = rac.ac.UserGrantRole(rctx, in, opts...)
return err
}, nonRepeatable)
return resp, err
return rac.ac.UserGrantRole(ctx, in, opts...)
}
func (rac *retryAuthClient) UserRevokeRole(ctx context.Context, in *pb.AuthUserRevokeRoleRequest, opts ...grpc.CallOption) (resp *pb.AuthUserRevokeRoleResponse, err error) {
err = rac.retryf(ctx, func(rctx context.Context) error {
resp, err = rac.ac.UserRevokeRole(rctx, in, opts...)
return err
}, nonRepeatable)
return resp, err
return rac.ac.UserRevokeRole(ctx, in, opts...)
}
func (rac *retryAuthClient) RoleAdd(ctx context.Context, in *pb.AuthRoleAddRequest, opts ...grpc.CallOption) (resp *pb.AuthRoleAddResponse, err error) {
err = rac.retryf(ctx, func(rctx context.Context) error {
resp, err = rac.ac.RoleAdd(rctx, in, opts...)
return err
}, nonRepeatable)
return resp, err
return rac.ac.RoleAdd(ctx, in, opts...)
}
func (rac *retryAuthClient) RoleDelete(ctx context.Context, in *pb.AuthRoleDeleteRequest, opts ...grpc.CallOption) (resp *pb.AuthRoleDeleteResponse, err error) {
err = rac.retryf(ctx, func(rctx context.Context) error {
resp, err = rac.ac.RoleDelete(rctx, in, opts...)
return err
}, nonRepeatable)
return resp, err
return rac.ac.RoleDelete(ctx, in, opts...)
}
func (rac *retryAuthClient) RoleGrantPermission(ctx context.Context, in *pb.AuthRoleGrantPermissionRequest, opts ...grpc.CallOption) (resp *pb.AuthRoleGrantPermissionResponse, err error) {
err = rac.retryf(ctx, func(rctx context.Context) error {
resp, err = rac.ac.RoleGrantPermission(rctx, in, opts...)
return err
}, nonRepeatable)
return resp, err
return rac.ac.RoleGrantPermission(ctx, in, opts...)
}
func (rac *retryAuthClient) RoleRevokePermission(ctx context.Context, in *pb.AuthRoleRevokePermissionRequest, opts ...grpc.CallOption) (resp *pb.AuthRoleRevokePermissionResponse, err error) {
err = rac.retryf(ctx, func(rctx context.Context) error {
resp, err = rac.ac.RoleRevokePermission(rctx, in, opts...)
return err
}, nonRepeatable)
return resp, err
return rac.ac.RoleRevokePermission(ctx, in, opts...)
}
func (rac *retryAuthClient) Authenticate(ctx context.Context, in *pb.AuthenticateRequest, opts ...grpc.CallOption) (resp *pb.AuthenticateResponse, err error) {
err = rac.retryf(ctx, func(rctx context.Context) error {
resp, err = rac.ac.Authenticate(rctx, in, opts...)
return err
}, nonRepeatable)
return resp, err
return rac.ac.Authenticate(ctx, in, opts...)
}

View File

@@ -0,0 +1,392 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Based on github.com/grpc-ecosystem/go-grpc-middleware/retry, but modified to support the more
// fine grained error checking required by write-at-most-once retry semantics of etcd.
package clientv3
import (
"context"
"io"
"sync"
"time"
"github.com/coreos/etcd/etcdserver/api/v3rpc/rpctypes"
"go.uber.org/zap"
"google.golang.org/grpc"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/metadata"
"google.golang.org/grpc/status"
)
// unaryClientInterceptor returns a new retrying unary client interceptor.
//
// The default configuration of the interceptor is to not retry *at all*. This behaviour can be
// changed through options (e.g. WithMax) on creation of the interceptor or on call (through grpc.CallOptions).
func (c *Client) unaryClientInterceptor(logger *zap.Logger, optFuncs ...retryOption) grpc.UnaryClientInterceptor {
intOpts := reuseOrNewWithCallOptions(defaultOptions, optFuncs)
return func(ctx context.Context, method string, req, reply interface{}, cc *grpc.ClientConn, invoker grpc.UnaryInvoker, opts ...grpc.CallOption) error {
ctx = withVersion(ctx)
grpcOpts, retryOpts := filterCallOptions(opts)
callOpts := reuseOrNewWithCallOptions(intOpts, retryOpts)
// short circuit for simplicity, and avoiding allocations.
if callOpts.max == 0 {
return invoker(ctx, method, req, reply, cc, grpcOpts...)
}
var lastErr error
for attempt := uint(0); attempt < callOpts.max; attempt++ {
if err := waitRetryBackoff(ctx, attempt, callOpts); err != nil {
return err
}
logger.Debug(
"retrying of unary invoker",
zap.String("target", cc.Target()),
zap.Uint("attempt", attempt),
)
lastErr = invoker(ctx, method, req, reply, cc, grpcOpts...)
if lastErr == nil {
return nil
}
logger.Warn(
"retrying of unary invoker failed",
zap.String("target", cc.Target()),
zap.Uint("attempt", attempt),
zap.Error(lastErr),
)
if isContextError(lastErr) {
if ctx.Err() != nil {
// its the context deadline or cancellation.
return lastErr
}
// its the callCtx deadline or cancellation, in which case try again.
continue
}
if callOpts.retryAuth && rpctypes.Error(lastErr) == rpctypes.ErrInvalidAuthToken {
gterr := c.getToken(ctx)
if gterr != nil {
logger.Warn(
"retrying of unary invoker failed to fetch new auth token",
zap.String("target", cc.Target()),
zap.Error(gterr),
)
return gterr // lastErr must be invalid auth token
}
continue
}
if !isSafeRetry(c.lg, lastErr, callOpts) {
return lastErr
}
}
return lastErr
}
}
// streamClientInterceptor returns a new retrying stream client interceptor for server side streaming calls.
//
// The default configuration of the interceptor is to not retry *at all*. This behaviour can be
// changed through options (e.g. WithMax) on creation of the interceptor or on call (through grpc.CallOptions).
//
// Retry logic is available *only for ServerStreams*, i.e. 1:n streams, as the internal logic needs
// to buffer the messages sent by the client. If retry is enabled on any other streams (ClientStreams,
// BidiStreams), the retry interceptor will fail the call.
func (c *Client) streamClientInterceptor(logger *zap.Logger, optFuncs ...retryOption) grpc.StreamClientInterceptor {
intOpts := reuseOrNewWithCallOptions(defaultOptions, optFuncs)
return func(ctx context.Context, desc *grpc.StreamDesc, cc *grpc.ClientConn, method string, streamer grpc.Streamer, opts ...grpc.CallOption) (grpc.ClientStream, error) {
ctx = withVersion(ctx)
grpcOpts, retryOpts := filterCallOptions(opts)
callOpts := reuseOrNewWithCallOptions(intOpts, retryOpts)
// short circuit for simplicity, and avoiding allocations.
if callOpts.max == 0 {
return streamer(ctx, desc, cc, method, grpcOpts...)
}
if desc.ClientStreams {
return nil, status.Errorf(codes.Unimplemented, "clientv3/retry_interceptor: cannot retry on ClientStreams, set Disable()")
}
newStreamer, err := streamer(ctx, desc, cc, method, grpcOpts...)
if err != nil {
logger.Error("streamer failed to create ClientStream", zap.Error(err))
return nil, err // TODO(mwitkow): Maybe dial and transport errors should be retriable?
}
retryingStreamer := &serverStreamingRetryingStream{
client: c,
ClientStream: newStreamer,
callOpts: callOpts,
ctx: ctx,
streamerCall: func(ctx context.Context) (grpc.ClientStream, error) {
return streamer(ctx, desc, cc, method, grpcOpts...)
},
}
return retryingStreamer, nil
}
}
// type serverStreamingRetryingStream is the implementation of grpc.ClientStream that acts as a
// proxy to the underlying call. If any of the RecvMsg() calls fail, it will try to reestablish
// a new ClientStream according to the retry policy.
type serverStreamingRetryingStream struct {
grpc.ClientStream
client *Client
bufferedSends []interface{} // single message that the client can sen
receivedGood bool // indicates whether any prior receives were successful
wasClosedSend bool // indicates that CloseSend was closed
ctx context.Context
callOpts *options
streamerCall func(ctx context.Context) (grpc.ClientStream, error)
mu sync.RWMutex
}
func (s *serverStreamingRetryingStream) setStream(clientStream grpc.ClientStream) {
s.mu.Lock()
s.ClientStream = clientStream
s.mu.Unlock()
}
func (s *serverStreamingRetryingStream) getStream() grpc.ClientStream {
s.mu.RLock()
defer s.mu.RUnlock()
return s.ClientStream
}
func (s *serverStreamingRetryingStream) SendMsg(m interface{}) error {
s.mu.Lock()
s.bufferedSends = append(s.bufferedSends, m)
s.mu.Unlock()
return s.getStream().SendMsg(m)
}
func (s *serverStreamingRetryingStream) CloseSend() error {
s.mu.Lock()
s.wasClosedSend = true
s.mu.Unlock()
return s.getStream().CloseSend()
}
func (s *serverStreamingRetryingStream) Header() (metadata.MD, error) {
return s.getStream().Header()
}
func (s *serverStreamingRetryingStream) Trailer() metadata.MD {
return s.getStream().Trailer()
}
func (s *serverStreamingRetryingStream) RecvMsg(m interface{}) error {
attemptRetry, lastErr := s.receiveMsgAndIndicateRetry(m)
if !attemptRetry {
return lastErr // success or hard failure
}
// We start off from attempt 1, because zeroth was already made on normal SendMsg().
for attempt := uint(1); attempt < s.callOpts.max; attempt++ {
if err := waitRetryBackoff(s.ctx, attempt, s.callOpts); err != nil {
return err
}
newStream, err := s.reestablishStreamAndResendBuffer(s.ctx)
if err != nil {
s.client.lg.Error("failed reestablishStreamAndResendBuffer", zap.Error(err))
return err // TODO(mwitkow): Maybe dial and transport errors should be retriable?
}
s.setStream(newStream)
s.client.lg.Warn("retrying RecvMsg", zap.Error(lastErr))
attemptRetry, lastErr = s.receiveMsgAndIndicateRetry(m)
if !attemptRetry {
return lastErr
}
}
return lastErr
}
func (s *serverStreamingRetryingStream) receiveMsgAndIndicateRetry(m interface{}) (bool, error) {
s.mu.RLock()
wasGood := s.receivedGood
s.mu.RUnlock()
err := s.getStream().RecvMsg(m)
if err == nil || err == io.EOF {
s.mu.Lock()
s.receivedGood = true
s.mu.Unlock()
return false, err
} else if wasGood {
// previous RecvMsg in the stream succeeded, no retry logic should interfere
return false, err
}
if isContextError(err) {
if s.ctx.Err() != nil {
return false, err
}
// its the callCtx deadline or cancellation, in which case try again.
return true, err
}
if s.callOpts.retryAuth && rpctypes.Error(err) == rpctypes.ErrInvalidAuthToken {
gterr := s.client.getToken(s.ctx)
if gterr != nil {
s.client.lg.Warn("retry failed to fetch new auth token", zap.Error(gterr))
return false, err // return the original error for simplicity
}
return true, err
}
return isSafeRetry(s.client.lg, err, s.callOpts), err
}
func (s *serverStreamingRetryingStream) reestablishStreamAndResendBuffer(callCtx context.Context) (grpc.ClientStream, error) {
s.mu.RLock()
bufferedSends := s.bufferedSends
s.mu.RUnlock()
newStream, err := s.streamerCall(callCtx)
if err != nil {
return nil, err
}
for _, msg := range bufferedSends {
if err := newStream.SendMsg(msg); err != nil {
return nil, err
}
}
if err := newStream.CloseSend(); err != nil {
return nil, err
}
return newStream, nil
}
func waitRetryBackoff(ctx context.Context, attempt uint, callOpts *options) error {
waitTime := time.Duration(0)
if attempt > 0 {
waitTime = callOpts.backoffFunc(attempt)
}
if waitTime > 0 {
timer := time.NewTimer(waitTime)
select {
case <-ctx.Done():
timer.Stop()
return contextErrToGrpcErr(ctx.Err())
case <-timer.C:
}
}
return nil
}
// isSafeRetry returns "true", if request is safe for retry with the given error.
func isSafeRetry(lg *zap.Logger, err error, callOpts *options) bool {
if isContextError(err) {
return false
}
switch callOpts.retryPolicy {
case repeatable:
return isSafeRetryImmutableRPC(err)
case nonRepeatable:
return isSafeRetryMutableRPC(err)
default:
lg.Warn("unrecognized retry policy", zap.String("retryPolicy", callOpts.retryPolicy.String()))
return false
}
}
func isContextError(err error) bool {
return grpc.Code(err) == codes.DeadlineExceeded || grpc.Code(err) == codes.Canceled
}
func contextErrToGrpcErr(err error) error {
switch err {
case context.DeadlineExceeded:
return status.Errorf(codes.DeadlineExceeded, err.Error())
case context.Canceled:
return status.Errorf(codes.Canceled, err.Error())
default:
return status.Errorf(codes.Unknown, err.Error())
}
}
var (
defaultOptions = &options{
retryPolicy: nonRepeatable,
max: 0, // disable
backoffFunc: backoffLinearWithJitter(50*time.Millisecond /*jitter*/, 0.10),
retryAuth: true,
}
)
// backoffFunc denotes a family of functions that control the backoff duration between call retries.
//
// They are called with an identifier of the attempt, and should return a time the system client should
// hold off for. If the time returned is longer than the `context.Context.Deadline` of the request
// the deadline of the request takes precedence and the wait will be interrupted before proceeding
// with the next iteration.
type backoffFunc func(attempt uint) time.Duration
// withRetryPolicy sets the retry policy of this call.
func withRetryPolicy(rp retryPolicy) retryOption {
return retryOption{applyFunc: func(o *options) {
o.retryPolicy = rp
}}
}
// withMax sets the maximum number of retries on this call, or this interceptor.
func withMax(maxRetries uint) retryOption {
return retryOption{applyFunc: func(o *options) {
o.max = maxRetries
}}
}
// WithBackoff sets the `BackoffFunc `used to control time between retries.
func withBackoff(bf backoffFunc) retryOption {
return retryOption{applyFunc: func(o *options) {
o.backoffFunc = bf
}}
}
type options struct {
retryPolicy retryPolicy
max uint
backoffFunc backoffFunc
retryAuth bool
}
// retryOption is a grpc.CallOption that is local to clientv3's retry interceptor.
type retryOption struct {
grpc.EmptyCallOption // make sure we implement private after() and before() fields so we don't panic.
applyFunc func(opt *options)
}
func reuseOrNewWithCallOptions(opt *options, retryOptions []retryOption) *options {
if len(retryOptions) == 0 {
return opt
}
optCopy := &options{}
*optCopy = *opt
for _, f := range retryOptions {
f.applyFunc(optCopy)
}
return optCopy
}
func filterCallOptions(callOptions []grpc.CallOption) (grpcOptions []grpc.CallOption, retryOptions []retryOption) {
for _, opt := range callOptions {
if co, ok := opt.(retryOption); ok {
retryOptions = append(retryOptions, co)
} else {
grpcOptions = append(grpcOptions, opt)
}
}
return grpcOptions, retryOptions
}
// BackoffLinearWithJitter waits a set period of time, allowing for jitter (fractional adjustment).
//
// For example waitBetween=1s and jitter=0.10 can generate waits between 900ms and 1100ms.
func backoffLinearWithJitter(waitBetween time.Duration, jitterFraction float64) backoffFunc {
return func(attempt uint) time.Duration {
return jitterUp(waitBetween, jitterFraction)
}
}

16
clientv3/snapshot/doc.go Normal file
View File

@@ -0,0 +1,16 @@
// Copyright 2018 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Package snapshot implements utilities around etcd snapshot.
package snapshot

Binary file not shown.

35
clientv3/snapshot/util.go Normal file
View File

@@ -0,0 +1,35 @@
// Copyright 2018 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package snapshot
import "encoding/binary"
type revision struct {
main int64
sub int64
}
func bytesToRev(bytes []byte) revision {
return revision{
main: int64(binary.BigEndian.Uint64(bytes[0:8])),
sub: int64(binary.BigEndian.Uint64(bytes[9:])),
}
}
// initIndex implements ConsistentIndexGetter so the snapshot won't block
// the new raft instance by waiting for a future raft index.
type initIndex int
func (i *initIndex) ConsistentIndex() uint64 { return uint64(*i) }

View File

@@ -0,0 +1,492 @@
// Copyright 2018 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package snapshot
import (
"context"
"crypto/sha256"
"encoding/json"
"fmt"
"hash/crc32"
"io"
"math"
"os"
"path/filepath"
"reflect"
"strings"
"time"
bolt "github.com/coreos/bbolt"
"github.com/coreos/etcd/clientv3"
"github.com/coreos/etcd/etcdserver"
"github.com/coreos/etcd/etcdserver/etcdserverpb"
"github.com/coreos/etcd/etcdserver/membership"
"github.com/coreos/etcd/lease"
"github.com/coreos/etcd/mvcc"
"github.com/coreos/etcd/mvcc/backend"
"github.com/coreos/etcd/pkg/fileutil"
"github.com/coreos/etcd/pkg/types"
"github.com/coreos/etcd/raft"
"github.com/coreos/etcd/raft/raftpb"
"github.com/coreos/etcd/snap"
"github.com/coreos/etcd/store"
"github.com/coreos/etcd/wal"
"github.com/coreos/etcd/wal/walpb"
"go.uber.org/zap"
)
// Manager defines snapshot methods.
type Manager interface {
// Save fetches snapshot from remote etcd server and saves data
// to target path. If the context "ctx" is canceled or timed out,
// snapshot save stream will error out (e.g. context.Canceled,
// context.DeadlineExceeded). Make sure to specify only one endpoint
// in client configuration. Snapshot API must be requested to a
// selected node, and saved snapshot is the point-in-time state of
// the selected node.
Save(ctx context.Context, cfg clientv3.Config, dbPath string) error
// Status returns the snapshot file information.
Status(dbPath string) (Status, error)
// Restore restores a new etcd data directory from given snapshot
// file. It returns an error if specified data directory already
// exists, to prevent unintended data directory overwrites.
Restore(cfg RestoreConfig) error
}
// NewV3 returns a new snapshot Manager for v3.x snapshot.
func NewV3(lg *zap.Logger) Manager {
if lg == nil {
lg = zap.NewExample()
}
return &v3Manager{lg: lg}
}
type v3Manager struct {
lg *zap.Logger
name string
dbPath string
walDir string
snapDir string
cl *membership.RaftCluster
skipHashCheck bool
}
// Save fetches snapshot from remote etcd server and saves data to target path.
func (s *v3Manager) Save(ctx context.Context, cfg clientv3.Config, dbPath string) error {
if len(cfg.Endpoints) != 1 {
return fmt.Errorf("snapshot must be requested to one selected node, not multiple %v", cfg.Endpoints)
}
cli, err := clientv3.New(cfg)
if err != nil {
return err
}
defer cli.Close()
partpath := dbPath + ".part"
defer os.RemoveAll(partpath)
var f *os.File
f, err = os.OpenFile(partpath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, fileutil.PrivateFileMode)
if err != nil {
return fmt.Errorf("could not open %s (%v)", partpath, err)
}
s.lg.Info(
"created temporary db file",
zap.String("path", partpath),
)
now := time.Now()
var rd io.ReadCloser
rd, err = cli.Snapshot(ctx)
if err != nil {
return err
}
s.lg.Info(
"fetching snapshot",
zap.String("endpoint", cfg.Endpoints[0]),
)
if _, err = io.Copy(f, rd); err != nil {
return err
}
if err = fileutil.Fsync(f); err != nil {
return err
}
if err = f.Close(); err != nil {
return err
}
s.lg.Info(
"fetched snapshot",
zap.String("endpoint", cfg.Endpoints[0]),
zap.Duration("took", time.Since(now)),
)
if err = os.Rename(partpath, dbPath); err != nil {
return fmt.Errorf("could not rename %s to %s (%v)", partpath, dbPath, err)
}
s.lg.Info("saved", zap.String("path", dbPath))
return nil
}
// Status is the snapshot file status.
type Status struct {
Hash uint32 `json:"hash"`
Revision int64 `json:"revision"`
TotalKey int `json:"totalKey"`
TotalSize int64 `json:"totalSize"`
}
// Status returns the snapshot file information.
func (s *v3Manager) Status(dbPath string) (ds Status, err error) {
if _, err = os.Stat(dbPath); err != nil {
return ds, err
}
db, err := bolt.Open(dbPath, 0400, &bolt.Options{ReadOnly: true})
if err != nil {
return ds, err
}
defer db.Close()
h := crc32.New(crc32.MakeTable(crc32.Castagnoli))
if err = db.View(func(tx *bolt.Tx) error {
// check snapshot file integrity first
var dbErrStrings []string
for dbErr := range tx.Check() {
dbErrStrings = append(dbErrStrings, dbErr.Error())
}
if len(dbErrStrings) > 0 {
return fmt.Errorf("snapshot file integrity check failed. %d errors found.\n"+strings.Join(dbErrStrings, "\n"), len(dbErrStrings))
}
ds.TotalSize = tx.Size()
c := tx.Cursor()
for next, _ := c.First(); next != nil; next, _ = c.Next() {
b := tx.Bucket(next)
if b == nil {
return fmt.Errorf("cannot get hash of bucket %s", string(next))
}
h.Write(next)
iskeyb := (string(next) == "key")
b.ForEach(func(k, v []byte) error {
h.Write(k)
h.Write(v)
if iskeyb {
rev := bytesToRev(k)
ds.Revision = rev.main
}
ds.TotalKey++
return nil
})
}
return nil
}); err != nil {
return ds, err
}
ds.Hash = h.Sum32()
return ds, nil
}
// RestoreConfig configures snapshot restore operation.
type RestoreConfig struct {
// SnapshotPath is the path of snapshot file to restore from.
SnapshotPath string
// Name is the human-readable name of this member.
Name string
// OutputDataDir is the target data directory to save restored data.
// OutputDataDir should not conflict with existing etcd data directory.
// If OutputDataDir already exists, it will return an error to prevent
// unintended data directory overwrites.
// If empty, defaults to "[Name].etcd" if not given.
OutputDataDir string
// OutputWALDir is the target WAL data directory.
// If empty, defaults to "[OutputDataDir]/member/wal" if not given.
OutputWALDir string
// PeerURLs is a list of member's peer URLs to advertise to the rest of the cluster.
PeerURLs []string
// InitialCluster is the initial cluster configuration for restore bootstrap.
InitialCluster string
// InitialClusterToken is the initial cluster token for etcd cluster during restore bootstrap.
InitialClusterToken string
// SkipHashCheck is "true" to ignore snapshot integrity hash value
// (required if copied from data directory).
SkipHashCheck bool
}
// Restore restores a new etcd data directory from given snapshot file.
func (s *v3Manager) Restore(cfg RestoreConfig) error {
pURLs, err := types.NewURLs(cfg.PeerURLs)
if err != nil {
return err
}
var ics types.URLsMap
ics, err = types.NewURLsMap(cfg.InitialCluster)
if err != nil {
return err
}
srv := etcdserver.ServerConfig{
Name: cfg.Name,
PeerURLs: pURLs,
InitialPeerURLsMap: ics,
InitialClusterToken: cfg.InitialClusterToken,
}
if err = srv.VerifyBootstrap(); err != nil {
return err
}
s.cl, err = membership.NewClusterFromURLsMap(cfg.InitialClusterToken, ics)
if err != nil {
return err
}
dataDir := cfg.OutputDataDir
if dataDir == "" {
dataDir = cfg.Name + ".etcd"
}
if fileutil.Exist(dataDir) {
return fmt.Errorf("data-dir %q exists", dataDir)
}
walDir := cfg.OutputWALDir
if walDir == "" {
walDir = filepath.Join(dataDir, "member", "wal")
} else if fileutil.Exist(walDir) {
return fmt.Errorf("wal-dir %q exists", walDir)
}
s.name = cfg.Name
s.dbPath = cfg.SnapshotPath
s.walDir = walDir
s.snapDir = filepath.Join(dataDir, "member", "snap")
s.skipHashCheck = cfg.SkipHashCheck
s.lg.Info(
"restoring snapshot",
zap.String("path", s.dbPath),
zap.String("wal-dir", s.walDir),
zap.String("data-dir", dataDir),
zap.String("snap-dir", s.snapDir),
)
if err = s.saveDB(); err != nil {
return err
}
if err = s.saveWALAndSnap(); err != nil {
return err
}
s.lg.Info(
"restored snapshot",
zap.String("path", s.dbPath),
zap.String("wal-dir", s.walDir),
zap.String("data-dir", dataDir),
zap.String("snap-dir", s.snapDir),
)
return nil
}
// saveDB copies the database snapshot to the snapshot directory
func (s *v3Manager) saveDB() error {
f, ferr := os.OpenFile(s.dbPath, os.O_RDONLY, 0600)
if ferr != nil {
return ferr
}
defer f.Close()
// get snapshot integrity hash
if _, err := f.Seek(-sha256.Size, io.SeekEnd); err != nil {
return err
}
sha := make([]byte, sha256.Size)
if _, err := f.Read(sha); err != nil {
return err
}
if _, err := f.Seek(0, io.SeekStart); err != nil {
return err
}
if err := fileutil.CreateDirAll(s.snapDir); err != nil {
return err
}
dbpath := filepath.Join(s.snapDir, "db")
db, dberr := os.OpenFile(dbpath, os.O_RDWR|os.O_CREATE, 0600)
if dberr != nil {
return dberr
}
if _, err := io.Copy(db, f); err != nil {
return err
}
// truncate away integrity hash, if any.
off, serr := db.Seek(0, io.SeekEnd)
if serr != nil {
return serr
}
hasHash := (off % 512) == sha256.Size
if hasHash {
if err := db.Truncate(off - sha256.Size); err != nil {
return err
}
}
if !hasHash && !s.skipHashCheck {
return fmt.Errorf("snapshot missing hash but --skip-hash-check=false")
}
if hasHash && !s.skipHashCheck {
// check for match
if _, err := db.Seek(0, io.SeekStart); err != nil {
return err
}
h := sha256.New()
if _, err := io.Copy(h, db); err != nil {
return err
}
dbsha := h.Sum(nil)
if !reflect.DeepEqual(sha, dbsha) {
return fmt.Errorf("expected sha256 %v, got %v", sha, dbsha)
}
}
// db hash is OK, can now modify DB so it can be part of a new cluster
db.Close()
commit := len(s.cl.Members())
// update consistentIndex so applies go through on etcdserver despite
// having a new raft instance
be := backend.NewDefaultBackend(dbpath)
// a lessor never timeouts leases
lessor := lease.NewLessor(be, math.MaxInt64)
mvs := mvcc.NewStore(be, lessor, (*initIndex)(&commit))
txn := mvs.Write()
btx := be.BatchTx()
del := func(k, v []byte) error {
txn.DeleteRange(k, nil)
return nil
}
// delete stored members from old cluster since using new members
btx.UnsafeForEach([]byte("members"), del)
// todo: add back new members when we start to deprecate old snap file.
btx.UnsafeForEach([]byte("members_removed"), del)
// trigger write-out of new consistent index
txn.End()
mvs.Commit()
mvs.Close()
be.Close()
return nil
}
// saveWALAndSnap creates a WAL for the initial cluster
func (s *v3Manager) saveWALAndSnap() error {
if err := fileutil.CreateDirAll(s.walDir); err != nil {
return err
}
// add members again to persist them to the store we create.
st := store.New(etcdserver.StoreClusterPrefix, etcdserver.StoreKeysPrefix)
s.cl.SetStore(st)
for _, m := range s.cl.Members() {
s.cl.AddMember(m)
}
m := s.cl.MemberByName(s.name)
md := &etcdserverpb.Metadata{NodeID: uint64(m.ID), ClusterID: uint64(s.cl.ID())}
metadata, merr := md.Marshal()
if merr != nil {
return merr
}
w, walerr := wal.Create(s.walDir, metadata)
if walerr != nil {
return walerr
}
defer w.Close()
peers := make([]raft.Peer, len(s.cl.MemberIDs()))
for i, id := range s.cl.MemberIDs() {
ctx, err := json.Marshal((*s.cl).Member(id))
if err != nil {
return err
}
peers[i] = raft.Peer{ID: uint64(id), Context: ctx}
}
ents := make([]raftpb.Entry, len(peers))
nodeIDs := make([]uint64, len(peers))
for i, p := range peers {
nodeIDs[i] = p.ID
cc := raftpb.ConfChange{
Type: raftpb.ConfChangeAddNode,
NodeID: p.ID,
Context: p.Context,
}
d, err := cc.Marshal()
if err != nil {
return err
}
ents[i] = raftpb.Entry{
Type: raftpb.EntryConfChange,
Term: 1,
Index: uint64(i + 1),
Data: d,
}
}
commit, term := uint64(len(ents)), uint64(1)
if err := w.Save(raftpb.HardState{
Term: term,
Vote: peers[0].ID,
Commit: commit,
}, ents); err != nil {
return err
}
b, berr := st.Save()
if berr != nil {
return berr
}
raftSnap := raftpb.Snapshot{
Data: b,
Metadata: raftpb.SnapshotMetadata{
Index: commit,
Term: term,
ConfState: raftpb.ConfState{
Nodes: nodeIDs,
},
},
}
sn := snap.New(s.snapDir)
if err := sn.SaveSnap(raftSnap); err != nil {
return err
}
return w.SaveSnapshot(walpb.Snapshot{Index: commit, Term: term})
}

49
clientv3/utils.go Normal file
View File

@@ -0,0 +1,49 @@
// Copyright 2018 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package clientv3
import (
"math/rand"
"reflect"
"runtime"
"strings"
"time"
)
// jitterUp adds random jitter to the duration.
//
// This adds or subtracts time from the duration within a given jitter fraction.
// For example for 10s and jitter 0.1, it will return a time within [9s, 11s])
//
// Reference: https://godoc.org/github.com/grpc-ecosystem/go-grpc-middleware/util/backoffutils
func jitterUp(duration time.Duration, jitter float64) time.Duration {
multiplier := jitter * (rand.Float64()*2 - 1)
return time.Duration(float64(duration) * (1 + multiplier))
}
// Check if the provided function is being called in the op options.
func isOpFuncCalled(op string, opts []OpOption) bool {
for _, opt := range opts {
v := reflect.ValueOf(opt)
if v.Kind() == reflect.Func {
if opFunc := runtime.FuncForPC(v.Pointer()); opFunc != nil {
if strings.Contains(opFunc.Name(), op) {
return true
}
}
}
}
return false
}

View File

@@ -16,6 +16,7 @@ package clientv3
import (
"context"
"errors"
"fmt"
"sync"
"time"
@@ -46,8 +47,33 @@ type Watcher interface {
// through the returned channel. If revisions waiting to be sent over the
// watch are compacted, then the watch will be canceled by the server, the
// client will post a compacted error watch response, and the channel will close.
// If the context "ctx" is canceled or timed out, returned "WatchChan" is closed,
// and "WatchResponse" from this closed channel has zero events and nil "Err()".
// The context "ctx" MUST be canceled, as soon as watcher is no longer being used,
// to release the associated resources.
//
// If the context is "context.Background/TODO", returned "WatchChan" will
// not be closed and block until event is triggered, except when server
// returns a non-recoverable error (e.g. ErrCompacted).
// For example, when context passed with "WithRequireLeader" and the
// connected server has no leader (e.g. due to network partition),
// error "etcdserver: no leader" (ErrNoLeader) will be returned,
// and then "WatchChan" is closed with non-nil "Err()".
// In order to prevent a watch stream being stuck in a partitioned node,
// make sure to wrap context with "WithRequireLeader".
//
// Otherwise, as long as the context has not been canceled or timed out,
// watch will retry on other recoverable errors forever until reconnected.
//
// TODO: explicitly set context error in the last "WatchResponse" message and close channel?
// Currently, client contexts are overwritten with "valCtx" that never closes.
// TODO(v3.4): configure watch retry policy, limit maximum retry number
// (see https://github.com/etcd-io/etcd/issues/8980)
Watch(ctx context.Context, key string, opts ...OpOption) WatchChan
// RequestProgress requests a progress notify response be sent in all watch channels.
RequestProgress(ctx context.Context) error
// Close closes the watcher and cancels all watch requests.
Close() error
}
@@ -134,7 +160,7 @@ type watchGrpcStream struct {
resuming []*watcherStream
// reqc sends a watch request from Watch() to the main goroutine
reqc chan *watchRequest
reqc chan watchStreamRequest
// respc receives data from the watch client
respc chan *pb.WatchResponse
// donec closes to broadcast shutdown
@@ -152,16 +178,27 @@ type watchGrpcStream struct {
closeErr error
}
// watchStreamRequest is a union of the supported watch request operation types
type watchStreamRequest interface {
toPB() *pb.WatchRequest
}
// watchRequest is issued by the subscriber to start a new watcher
type watchRequest struct {
ctx context.Context
key string
end string
rev int64
// send created notification event if this field is true
createdNotify bool
// progressNotify is for progress updates
progressNotify bool
// fragmentation should be disabled by default
// if true, split watch events when total exceeds
// "--max-request-bytes" flag value + 512-byte
fragment bool
// filters is the list of events to filter out
filters []pb.WatchCreateRequest_FilterType
// get the previous key-value pair before the event happens
@@ -170,6 +207,10 @@ type watchRequest struct {
retc chan chan WatchResponse
}
// progressRequest is issued by the subscriber to request watch progress
type progressRequest struct {
}
// watcherStream represents a registered watcher
type watcherStream struct {
// initReq is the request that initiated this request
@@ -227,7 +268,7 @@ func (w *watcher) newWatcherGrpcStream(inctx context.Context) *watchGrpcStream {
cancel: cancel,
substreams: make(map[int64]*watcherStream),
respc: make(chan *pb.WatchResponse),
reqc: make(chan *watchRequest),
reqc: make(chan watchStreamRequest),
donec: make(chan struct{}),
errc: make(chan error, 1),
closingc: make(chan *watcherStream),
@@ -256,6 +297,7 @@ func (w *watcher) Watch(ctx context.Context, key string, opts ...OpOption) Watch
end: string(ow.end),
rev: ow.rev,
progressNotify: ow.progressNotify,
fragment: ow.fragment,
filters: filters,
prevKV: ow.prevKV,
retc: make(chan chan WatchResponse, 1),
@@ -292,7 +334,7 @@ func (w *watcher) Watch(ctx context.Context, key string, opts ...OpOption) Watch
case <-wr.ctx.Done():
case <-donec:
if wgs.closeErr != nil {
closeCh <- WatchResponse{closeErr: wgs.closeErr}
closeCh <- WatchResponse{Canceled: true, closeErr: wgs.closeErr}
break
}
// retry; may have dropped stream from no ctxs
@@ -307,7 +349,7 @@ func (w *watcher) Watch(ctx context.Context, key string, opts ...OpOption) Watch
case <-ctx.Done():
case <-donec:
if wgs.closeErr != nil {
closeCh <- WatchResponse{closeErr: wgs.closeErr}
closeCh <- WatchResponse{Canceled: true, closeErr: wgs.closeErr}
break
}
// retry; may have dropped stream from no ctxs
@@ -329,9 +371,50 @@ func (w *watcher) Close() (err error) {
err = werr
}
}
// Consider context.Canceled as a successful close
if err == context.Canceled {
err = nil
}
return err
}
// RequestProgress requests a progress notify response be sent in all watch channels.
func (w *watcher) RequestProgress(ctx context.Context) (err error) {
ctxKey := streamKeyFromCtx(ctx)
w.mu.Lock()
if w.streams == nil {
w.mu.Unlock()
return fmt.Errorf("no stream found for context")
}
wgs := w.streams[ctxKey]
if wgs == nil {
wgs = w.newWatcherGrpcStream(ctx)
w.streams[ctxKey] = wgs
}
donec := wgs.donec
reqc := wgs.reqc
w.mu.Unlock()
pr := &progressRequest{}
select {
case reqc <- pr:
return nil
case <-ctx.Done():
if err == nil {
return ctx.Err()
}
return err
case <-donec:
if wgs.closeErr != nil {
return wgs.closeErr
}
// retry; may have dropped stream from no ctxs
return w.RequestProgress(ctx)
}
}
func (w *watchGrpcStream) close() (err error) {
w.cancel()
<-w.donec
@@ -353,7 +436,9 @@ func (w *watcher) closeStream(wgs *watchGrpcStream) {
}
func (w *watchGrpcStream) addSubstream(resp *pb.WatchResponse, ws *watcherStream) {
if resp.WatchId == -1 {
// check watch ID for backward compatibility (<= v3.3)
if resp.WatchId == -1 || (resp.Canceled && resp.CancelReason != "") {
w.closeErr = v3rpc.Error(errors.New(resp.CancelReason))
// failed; no channel
close(ws.recvc)
return
@@ -379,7 +464,7 @@ func (w *watchGrpcStream) closeSubstream(ws *watcherStream) {
}
// close subscriber's channel
if closeErr := w.closeErr; closeErr != nil && ws.initReq.ctx.Err() == nil {
go w.sendCloseSubstream(ws, &WatchResponse{closeErr: w.closeErr})
go w.sendCloseSubstream(ws, &WatchResponse{Canceled: true, closeErr: w.closeErr})
} else if ws.outc != nil {
close(ws.outc)
}
@@ -434,31 +519,48 @@ func (w *watchGrpcStream) run() {
cancelSet := make(map[int64]struct{})
var cur *pb.WatchResponse
for {
select {
// Watch() requested
case wreq := <-w.reqc:
outc := make(chan WatchResponse, 1)
ws := &watcherStream{
initReq: *wreq,
id: -1,
outc: outc,
// unbuffered so resumes won't cause repeat events
recvc: make(chan *WatchResponse),
case req := <-w.reqc:
switch wreq := req.(type) {
case *watchRequest:
outc := make(chan WatchResponse, 1)
// TODO: pass custom watch ID?
ws := &watcherStream{
initReq: *wreq,
id: -1,
outc: outc,
// unbuffered so resumes won't cause repeat events
recvc: make(chan *WatchResponse),
}
ws.donec = make(chan struct{})
w.wg.Add(1)
go w.serveSubstream(ws, w.resumec)
// queue up for watcher creation/resume
w.resuming = append(w.resuming, ws)
if len(w.resuming) == 1 {
// head of resume queue, can register a new watcher
wc.Send(ws.initReq.toPB())
}
case *progressRequest:
wc.Send(wreq.toPB())
}
ws.donec = make(chan struct{})
w.wg.Add(1)
go w.serveSubstream(ws, w.resumec)
// queue up for watcher creation/resume
w.resuming = append(w.resuming, ws)
if len(w.resuming) == 1 {
// head of resume queue, can register a new watcher
wc.Send(ws.initReq.toPB())
}
// New events from the watch client
// new events from the watch client
case pbresp := <-w.respc:
if cur == nil || pbresp.Created || pbresp.Canceled {
cur = pbresp
} else if cur != nil && cur.WatchId == pbresp.WatchId {
// merge new events
cur.Events = append(cur.Events, pbresp.Events...)
// update "Fragment" field; last response with "Fragment" == false
cur.Fragment = pbresp.Fragment
}
switch {
case pbresp.Created:
// response to head of queue creation
@@ -467,9 +569,14 @@ func (w *watchGrpcStream) run() {
w.dispatchEvent(pbresp)
w.resuming[0] = nil
}
if ws := w.nextResume(); ws != nil {
wc.Send(ws.initReq.toPB())
}
// reset for next iteration
cur = nil
case pbresp.Canceled && pbresp.CompactRevision == 0:
delete(cancelSet, pbresp.WatchId)
if ws, ok := w.substreams[pbresp.WatchId]; ok {
@@ -477,15 +584,31 @@ func (w *watchGrpcStream) run() {
close(ws.recvc)
closing[ws] = struct{}{}
}
// reset for next iteration
cur = nil
case cur.Fragment:
// watch response events are still fragmented
// continue to fetch next fragmented event arrival
continue
default:
// dispatch to appropriate watch stream
if ok := w.dispatchEvent(pbresp); ok {
ok := w.dispatchEvent(cur)
// reset for next iteration
cur = nil
if ok {
break
}
// watch response on unexpected watch id; cancel id
if _, ok := cancelSet[pbresp.WatchId]; ok {
break
}
cancelSet[pbresp.WatchId] = struct{}{}
cr := &pb.WatchRequest_CancelRequest{
CancelRequest: &pb.WatchCancelRequest{
@@ -495,6 +618,7 @@ func (w *watchGrpcStream) run() {
req := &pb.WatchRequest{RequestUnion: cr}
wc.Send(req)
}
// watch client failed on Recv; spawn another if possible
case err := <-w.errc:
if isHaltErr(w.ctx, err) || toErr(w.ctx, err) == v3rpc.ErrNoLeader {
@@ -508,13 +632,15 @@ func (w *watchGrpcStream) run() {
wc.Send(ws.initReq.toPB())
}
cancelSet = make(map[int64]struct{})
case <-w.ctx.Done():
return
case ws := <-w.closingc:
w.closeSubstream(ws)
delete(closing, ws)
// no more watchers on this stream, shutdown
if len(w.substreams)+len(w.resuming) == 0 {
// no more watchers on this stream, shutdown
return
}
}
@@ -539,6 +665,7 @@ func (w *watchGrpcStream) dispatchEvent(pbresp *pb.WatchResponse) bool {
for i, ev := range pbresp.Events {
events[i] = (*Event)(ev)
}
// TODO: return watch ID?
wr := &WatchResponse{
Header: *pbresp.Header,
Events: events,
@@ -547,7 +674,31 @@ func (w *watchGrpcStream) dispatchEvent(pbresp *pb.WatchResponse) bool {
Canceled: pbresp.Canceled,
cancelReason: pbresp.CancelReason,
}
ws, ok := w.substreams[pbresp.WatchId]
// watch IDs are zero indexed, so request notify watch responses are assigned a watch ID of -1 to
// indicate they should be broadcast.
if wr.IsProgressNotify() && pbresp.WatchId == -1 {
return w.broadcastResponse(wr)
}
return w.unicastResponse(wr, pbresp.WatchId)
}
// broadcastResponse send a watch response to all watch substreams.
func (w *watchGrpcStream) broadcastResponse(wr *WatchResponse) bool {
for _, ws := range w.substreams {
select {
case ws.recvc <- wr:
case <-ws.donec:
}
}
return true
}
// unicastResponse sends a watch response to a specific watch substream.
func (w *watchGrpcStream) unicastResponse(wr *WatchResponse, watchId int64) bool {
ws, ok := w.substreams[watchId]
if !ok {
return false
}
@@ -815,11 +966,19 @@ func (wr *watchRequest) toPB() *pb.WatchRequest {
ProgressNotify: wr.progressNotify,
Filters: wr.filters,
PrevKv: wr.prevKV,
Fragment: wr.fragment,
}
cr := &pb.WatchRequest_CreateRequest{CreateRequest: req}
return &pb.WatchRequest{RequestUnion: cr}
}
// toPB converts an internal progress request structure to its protobuf WatchRequest structure.
func (pr *progressRequest) toPB() *pb.WatchRequest {
req := &pb.WatchProgressRequest{}
cr := &pb.WatchRequest_ProgressRequest{ProgressRequest: req}
return &pb.WatchRequest{RequestUnion: cr}
}
func streamKeyFromCtx(ctx context.Context) string {
if md, ok := metadata.FromOutgoingContext(ctx); ok {
return fmt.Sprintf("%+v", md)

View File

@@ -20,7 +20,7 @@ import (
"crypto/x509"
"io/ioutil"
"github.com/ghodss/yaml"
"sigs.k8s.io/yaml"
"github.com/coreos/etcd/clientv3"
"github.com/coreos/etcd/pkg/tlsutil"

View File

@@ -21,7 +21,7 @@ import (
"reflect"
"testing"
"github.com/ghodss/yaml"
"sigs.k8s.io/yaml"
)
var (

View File

@@ -1,4 +0,0 @@
## cmd
This directory is meant to enforce vendoring for etcd binaries without polluting
the etcd client libraries with vendored dependencies.

View File

@@ -1 +0,0 @@
../

View File

@@ -1 +0,0 @@
../etcdctl

View File

@@ -1 +0,0 @@
../functional

View File

@@ -1 +0,0 @@
../tools

View File

@@ -1,10 +0,0 @@
package bolt
// maxMapSize represents the largest mmap size supported by Bolt.
const maxMapSize = 0x7FFFFFFF // 2GB
// maxAllocSize is the size used when creating array pointers.
const maxAllocSize = 0xFFFFFFF
// Are unaligned load/stores broken on this arch?
var brokenUnaligned = false

View File

@@ -1,10 +0,0 @@
package bolt
// maxMapSize represents the largest mmap size supported by Bolt.
const maxMapSize = 0xFFFFFFFFFFFF // 256TB
// maxAllocSize is the size used when creating array pointers.
const maxAllocSize = 0x7FFFFFFF
// Are unaligned load/stores broken on this arch?
var brokenUnaligned = false

View File

@@ -1,28 +0,0 @@
package bolt
import "unsafe"
// maxMapSize represents the largest mmap size supported by Bolt.
const maxMapSize = 0x7FFFFFFF // 2GB
// maxAllocSize is the size used when creating array pointers.
const maxAllocSize = 0xFFFFFFF
// Are unaligned load/stores broken on this arch?
var brokenUnaligned bool
func init() {
// Simple check to see whether this arch handles unaligned load/stores
// correctly.
// ARM9 and older devices require load/stores to be from/to aligned
// addresses. If not, the lower 2 bits are cleared and that address is
// read in a jumbled up order.
// See http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.faqs/ka15414.html
raw := [6]byte{0xfe, 0xef, 0x11, 0x22, 0x22, 0x11}
val := *(*uint32)(unsafe.Pointer(uintptr(unsafe.Pointer(&raw)) + 2))
brokenUnaligned = val != 0x11222211
}

View File

@@ -1,12 +0,0 @@
// +build arm64
package bolt
// maxMapSize represents the largest mmap size supported by Bolt.
const maxMapSize = 0xFFFFFFFFFFFF // 256TB
// maxAllocSize is the size used when creating array pointers.
const maxAllocSize = 0x7FFFFFFF
// Are unaligned load/stores broken on this arch?
var brokenUnaligned = false

View File

@@ -1,10 +0,0 @@
package bolt
import (
"syscall"
)
// fdatasync flushes written data to a file descriptor.
func fdatasync(db *DB) error {
return syscall.Fdatasync(int(db.file.Fd()))
}

View File

@@ -1,12 +0,0 @@
// +build mips64 mips64le
package bolt
// maxMapSize represents the largest mmap size supported by Bolt.
const maxMapSize = 0x8000000000 // 512GB
// maxAllocSize is the size used when creating array pointers.
const maxAllocSize = 0x7FFFFFFF
// Are unaligned load/stores broken on this arch?
var brokenUnaligned = false

View File

@@ -1,12 +0,0 @@
// +build mips mipsle
package bolt
// maxMapSize represents the largest mmap size supported by Bolt.
const maxMapSize = 0x40000000 // 1GB
// maxAllocSize is the size used when creating array pointers.
const maxAllocSize = 0xFFFFFFF
// Are unaligned load/stores broken on this arch?
var brokenUnaligned = false

View File

@@ -1,27 +0,0 @@
package bolt
import (
"syscall"
"unsafe"
)
const (
msAsync = 1 << iota // perform asynchronous writes
msSync // perform synchronous writes
msInvalidate // invalidate cached data
)
func msync(db *DB) error {
_, _, errno := syscall.Syscall(syscall.SYS_MSYNC, uintptr(unsafe.Pointer(db.data)), uintptr(db.datasz), msInvalidate)
if errno != 0 {
return errno
}
return nil
}
func fdatasync(db *DB) error {
if db.data != nil {
return msync(db)
}
return db.file.Sync()
}

View File

@@ -1,9 +0,0 @@
// +build ppc
package bolt
// maxMapSize represents the largest mmap size supported by Bolt.
const maxMapSize = 0x7FFFFFFF // 2GB
// maxAllocSize is the size used when creating array pointers.
const maxAllocSize = 0xFFFFFFF

View File

@@ -1,12 +0,0 @@
// +build ppc64
package bolt
// maxMapSize represents the largest mmap size supported by Bolt.
const maxMapSize = 0xFFFFFFFFFFFF // 256TB
// maxAllocSize is the size used when creating array pointers.
const maxAllocSize = 0x7FFFFFFF
// Are unaligned load/stores broken on this arch?
var brokenUnaligned = false

View File

@@ -1,12 +0,0 @@
// +build ppc64le
package bolt
// maxMapSize represents the largest mmap size supported by Bolt.
const maxMapSize = 0xFFFFFFFFFFFF // 256TB
// maxAllocSize is the size used when creating array pointers.
const maxAllocSize = 0x7FFFFFFF
// Are unaligned load/stores broken on this arch?
var brokenUnaligned = false

View File

@@ -1,12 +0,0 @@
// +build s390x
package bolt
// maxMapSize represents the largest mmap size supported by Bolt.
const maxMapSize = 0xFFFFFFFFFFFF // 256TB
// maxAllocSize is the size used when creating array pointers.
const maxAllocSize = 0x7FFFFFFF
// Are unaligned load/stores broken on this arch?
var brokenUnaligned = false

View File

@@ -1,92 +0,0 @@
// +build !windows,!plan9,!solaris
package bolt
import (
"fmt"
"os"
"syscall"
"time"
"unsafe"
)
// flock acquires an advisory lock on a file descriptor.
func flock(db *DB, mode os.FileMode, exclusive bool, timeout time.Duration) error {
var t time.Time
if timeout != 0 {
t = time.Now()
}
fd := db.file.Fd()
flag := syscall.LOCK_NB
if exclusive {
flag |= syscall.LOCK_EX
} else {
flag |= syscall.LOCK_SH
}
for {
// Attempt to obtain an exclusive lock.
err := syscall.Flock(int(fd), flag)
if err == nil {
return nil
} else if err != syscall.EWOULDBLOCK {
return err
}
// If we timed out then return an error.
if timeout != 0 && time.Since(t) > timeout-flockRetryTimeout {
return ErrTimeout
}
// Wait for a bit and try again.
time.Sleep(flockRetryTimeout)
}
}
// funlock releases an advisory lock on a file descriptor.
func funlock(db *DB) error {
return syscall.Flock(int(db.file.Fd()), syscall.LOCK_UN)
}
// mmap memory maps a DB's data file.
func mmap(db *DB, sz int) error {
// Map the data file to memory.
b, err := syscall.Mmap(int(db.file.Fd()), 0, sz, syscall.PROT_READ, syscall.MAP_SHARED|db.MmapFlags)
if err != nil {
return err
}
// Advise the kernel that the mmap is accessed randomly.
if err := madvise(b, syscall.MADV_RANDOM); err != nil {
return fmt.Errorf("madvise: %s", err)
}
// Save the original byte slice and convert to a byte array pointer.
db.dataref = b
db.data = (*[maxMapSize]byte)(unsafe.Pointer(&b[0]))
db.datasz = sz
return nil
}
// munmap unmaps a DB's data file from memory.
func munmap(db *DB) error {
// Ignore the unmap if we have no mapped data.
if db.dataref == nil {
return nil
}
// Unmap using the original byte slice.
err := syscall.Munmap(db.dataref)
db.dataref = nil
db.data = nil
db.datasz = 0
return err
}
// NOTE: This function is copied from stdlib because it is not available on darwin.
func madvise(b []byte, advice int) (err error) {
_, _, e1 := syscall.Syscall(syscall.SYS_MADVISE, uintptr(unsafe.Pointer(&b[0])), uintptr(len(b)), uintptr(advice))
if e1 != 0 {
err = e1
}
return
}

View File

@@ -1,89 +0,0 @@
package bolt
import (
"fmt"
"os"
"syscall"
"time"
"unsafe"
"golang.org/x/sys/unix"
)
// flock acquires an advisory lock on a file descriptor.
func flock(db *DB, mode os.FileMode, exclusive bool, timeout time.Duration) error {
var t time.Time
if timeout != 0 {
t = time.Now()
}
fd := db.file.Fd()
var lockType int16
if exclusive {
lockType = syscall.F_WRLCK
} else {
lockType = syscall.F_RDLCK
}
for {
// Attempt to obtain an exclusive lock.
lock := syscall.Flock_t{Type: lockType}
err := syscall.FcntlFlock(fd, syscall.F_SETLK, &lock)
if err == nil {
return nil
} else if err != syscall.EAGAIN {
return err
}
// If we timed out then return an error.
if timeout != 0 && time.Since(t) > timeout-flockRetryTimeout {
return ErrTimeout
}
// Wait for a bit and try again.
time.Sleep(flockRetryTimeout)
}
}
// funlock releases an advisory lock on a file descriptor.
func funlock(db *DB) error {
var lock syscall.Flock_t
lock.Start = 0
lock.Len = 0
lock.Type = syscall.F_UNLCK
lock.Whence = 0
return syscall.FcntlFlock(uintptr(db.file.Fd()), syscall.F_SETLK, &lock)
}
// mmap memory maps a DB's data file.
func mmap(db *DB, sz int) error {
// Map the data file to memory.
b, err := unix.Mmap(int(db.file.Fd()), 0, sz, syscall.PROT_READ, syscall.MAP_SHARED|db.MmapFlags)
if err != nil {
return err
}
// Advise the kernel that the mmap is accessed randomly.
if err := unix.Madvise(b, syscall.MADV_RANDOM); err != nil {
return fmt.Errorf("madvise: %s", err)
}
// Save the original byte slice and convert to a byte array pointer.
db.dataref = b
db.data = (*[maxMapSize]byte)(unsafe.Pointer(&b[0]))
db.datasz = sz
return nil
}
// munmap unmaps a DB's data file from memory.
func munmap(db *DB) error {
// Ignore the unmap if we have no mapped data.
if db.dataref == nil {
return nil
}
// Unmap using the original byte slice.
err := unix.Munmap(db.dataref)
db.dataref = nil
db.data = nil
db.datasz = 0
return err
}

View File

@@ -1,145 +0,0 @@
package bolt
import (
"fmt"
"os"
"syscall"
"time"
"unsafe"
)
// LockFileEx code derived from golang build filemutex_windows.go @ v1.5.1
var (
modkernel32 = syscall.NewLazyDLL("kernel32.dll")
procLockFileEx = modkernel32.NewProc("LockFileEx")
procUnlockFileEx = modkernel32.NewProc("UnlockFileEx")
)
const (
lockExt = ".lock"
// see https://msdn.microsoft.com/en-us/library/windows/desktop/aa365203(v=vs.85).aspx
flagLockExclusive = 2
flagLockFailImmediately = 1
// see https://msdn.microsoft.com/en-us/library/windows/desktop/ms681382(v=vs.85).aspx
errLockViolation syscall.Errno = 0x21
)
func lockFileEx(h syscall.Handle, flags, reserved, locklow, lockhigh uint32, ol *syscall.Overlapped) (err error) {
r, _, err := procLockFileEx.Call(uintptr(h), uintptr(flags), uintptr(reserved), uintptr(locklow), uintptr(lockhigh), uintptr(unsafe.Pointer(ol)))
if r == 0 {
return err
}
return nil
}
func unlockFileEx(h syscall.Handle, reserved, locklow, lockhigh uint32, ol *syscall.Overlapped) (err error) {
r, _, err := procUnlockFileEx.Call(uintptr(h), uintptr(reserved), uintptr(locklow), uintptr(lockhigh), uintptr(unsafe.Pointer(ol)), 0)
if r == 0 {
return err
}
return nil
}
// fdatasync flushes written data to a file descriptor.
func fdatasync(db *DB) error {
return db.file.Sync()
}
// flock acquires an advisory lock on a file descriptor.
func flock(db *DB, mode os.FileMode, exclusive bool, timeout time.Duration) error {
// Create a separate lock file on windows because a process
// cannot share an exclusive lock on the same file. This is
// needed during Tx.WriteTo().
f, err := os.OpenFile(db.path+lockExt, os.O_CREATE, mode)
if err != nil {
return err
}
db.lockfile = f
var t time.Time
if timeout != 0 {
t = time.Now()
}
fd := f.Fd()
var flag uint32 = flagLockFailImmediately
if exclusive {
flag |= flagLockExclusive
}
for {
// Attempt to obtain an exclusive lock.
err := lockFileEx(syscall.Handle(fd), flag, 0, 1, 0, &syscall.Overlapped{})
if err == nil {
return nil
} else if err != errLockViolation {
return err
}
// If we timed oumercit then return an error.
if timeout != 0 && time.Since(t) > timeout-flockRetryTimeout {
return ErrTimeout
}
// Wait for a bit and try again.
time.Sleep(flockRetryTimeout)
}
}
// funlock releases an advisory lock on a file descriptor.
func funlock(db *DB) error {
err := unlockFileEx(syscall.Handle(db.lockfile.Fd()), 0, 1, 0, &syscall.Overlapped{})
db.lockfile.Close()
os.Remove(db.path + lockExt)
return err
}
// mmap memory maps a DB's data file.
// Based on: https://github.com/edsrzf/mmap-go
func mmap(db *DB, sz int) error {
if !db.readOnly {
// Truncate the database to the size of the mmap.
if err := db.file.Truncate(int64(sz)); err != nil {
return fmt.Errorf("truncate: %s", err)
}
}
// Open a file mapping handle.
sizelo := uint32(sz >> 32)
sizehi := uint32(sz) & 0xffffffff
h, errno := syscall.CreateFileMapping(syscall.Handle(db.file.Fd()), nil, syscall.PAGE_READONLY, sizelo, sizehi, nil)
if h == 0 {
return os.NewSyscallError("CreateFileMapping", errno)
}
// Create the memory map.
addr, errno := syscall.MapViewOfFile(h, syscall.FILE_MAP_READ, 0, 0, uintptr(sz))
if addr == 0 {
return os.NewSyscallError("MapViewOfFile", errno)
}
// Close mapping handle.
if err := syscall.CloseHandle(syscall.Handle(h)); err != nil {
return os.NewSyscallError("CloseHandle", err)
}
// Convert to a byte array.
db.data = ((*[maxMapSize]byte)(unsafe.Pointer(addr)))
db.datasz = sz
return nil
}
// munmap unmaps a pointer from a file.
// Based on: https://github.com/edsrzf/mmap-go
func munmap(db *DB) error {
if db.data == nil {
return nil
}
addr := (uintptr)(unsafe.Pointer(&db.data[0]))
if err := syscall.UnmapViewOfFile(addr); err != nil {
return os.NewSyscallError("UnmapViewOfFile", err)
}
return nil
}

View File

@@ -1,8 +0,0 @@
// +build !windows,!plan9,!linux,!openbsd
package bolt
// fdatasync flushes written data to a file descriptor.
func fdatasync(db *DB) error {
return db.file.Sync()
}

View File

@@ -1,775 +0,0 @@
package bolt
import (
"bytes"
"fmt"
"unsafe"
)
const (
// MaxKeySize is the maximum length of a key, in bytes.
MaxKeySize = 32768
// MaxValueSize is the maximum length of a value, in bytes.
MaxValueSize = (1 << 31) - 2
)
const bucketHeaderSize = int(unsafe.Sizeof(bucket{}))
const (
minFillPercent = 0.1
maxFillPercent = 1.0
)
// DefaultFillPercent is the percentage that split pages are filled.
// This value can be changed by setting Bucket.FillPercent.
const DefaultFillPercent = 0.5
// Bucket represents a collection of key/value pairs inside the database.
type Bucket struct {
*bucket
tx *Tx // the associated transaction
buckets map[string]*Bucket // subbucket cache
page *page // inline page reference
rootNode *node // materialized node for the root page.
nodes map[pgid]*node // node cache
// Sets the threshold for filling nodes when they split. By default,
// the bucket will fill to 50% but it can be useful to increase this
// amount if you know that your write workloads are mostly append-only.
//
// This is non-persisted across transactions so it must be set in every Tx.
FillPercent float64
}
// bucket represents the on-file representation of a bucket.
// This is stored as the "value" of a bucket key. If the bucket is small enough,
// then its root page can be stored inline in the "value", after the bucket
// header. In the case of inline buckets, the "root" will be 0.
type bucket struct {
root pgid // page id of the bucket's root-level page
sequence uint64 // monotonically incrementing, used by NextSequence()
}
// newBucket returns a new bucket associated with a transaction.
func newBucket(tx *Tx) Bucket {
var b = Bucket{tx: tx, FillPercent: DefaultFillPercent}
if tx.writable {
b.buckets = make(map[string]*Bucket)
b.nodes = make(map[pgid]*node)
}
return b
}
// Tx returns the tx of the bucket.
func (b *Bucket) Tx() *Tx {
return b.tx
}
// Root returns the root of the bucket.
func (b *Bucket) Root() pgid {
return b.root
}
// Writable returns whether the bucket is writable.
func (b *Bucket) Writable() bool {
return b.tx.writable
}
// Cursor creates a cursor associated with the bucket.
// The cursor is only valid as long as the transaction is open.
// Do not use a cursor after the transaction is closed.
func (b *Bucket) Cursor() *Cursor {
// Update transaction statistics.
b.tx.stats.CursorCount++
// Allocate and return a cursor.
return &Cursor{
bucket: b,
stack: make([]elemRef, 0),
}
}
// Bucket retrieves a nested bucket by name.
// Returns nil if the bucket does not exist.
// The bucket instance is only valid for the lifetime of the transaction.
func (b *Bucket) Bucket(name []byte) *Bucket {
if b.buckets != nil {
if child := b.buckets[string(name)]; child != nil {
return child
}
}
// Move cursor to key.
c := b.Cursor()
k, v, flags := c.seek(name)
// Return nil if the key doesn't exist or it is not a bucket.
if !bytes.Equal(name, k) || (flags&bucketLeafFlag) == 0 {
return nil
}
// Otherwise create a bucket and cache it.
var child = b.openBucket(v)
if b.buckets != nil {
b.buckets[string(name)] = child
}
return child
}
// Helper method that re-interprets a sub-bucket value
// from a parent into a Bucket
func (b *Bucket) openBucket(value []byte) *Bucket {
var child = newBucket(b.tx)
// If unaligned load/stores are broken on this arch and value is
// unaligned simply clone to an aligned byte array.
unaligned := brokenUnaligned && uintptr(unsafe.Pointer(&value[0]))&3 != 0
if unaligned {
value = cloneBytes(value)
}
// If this is a writable transaction then we need to copy the bucket entry.
// Read-only transactions can point directly at the mmap entry.
if b.tx.writable && !unaligned {
child.bucket = &bucket{}
*child.bucket = *(*bucket)(unsafe.Pointer(&value[0]))
} else {
child.bucket = (*bucket)(unsafe.Pointer(&value[0]))
}
// Save a reference to the inline page if the bucket is inline.
if child.root == 0 {
child.page = (*page)(unsafe.Pointer(&value[bucketHeaderSize]))
}
return &child
}
// CreateBucket creates a new bucket at the given key and returns the new bucket.
// Returns an error if the key already exists, if the bucket name is blank, or if the bucket name is too long.
// The bucket instance is only valid for the lifetime of the transaction.
func (b *Bucket) CreateBucket(key []byte) (*Bucket, error) {
if b.tx.db == nil {
return nil, ErrTxClosed
} else if !b.tx.writable {
return nil, ErrTxNotWritable
} else if len(key) == 0 {
return nil, ErrBucketNameRequired
}
// Move cursor to correct position.
c := b.Cursor()
k, _, flags := c.seek(key)
// Return an error if there is an existing key.
if bytes.Equal(key, k) {
if (flags & bucketLeafFlag) != 0 {
return nil, ErrBucketExists
}
return nil, ErrIncompatibleValue
}
// Create empty, inline bucket.
var bucket = Bucket{
bucket: &bucket{},
rootNode: &node{isLeaf: true},
FillPercent: DefaultFillPercent,
}
var value = bucket.write()
// Insert into node.
key = cloneBytes(key)
c.node().put(key, key, value, 0, bucketLeafFlag)
// Since subbuckets are not allowed on inline buckets, we need to
// dereference the inline page, if it exists. This will cause the bucket
// to be treated as a regular, non-inline bucket for the rest of the tx.
b.page = nil
return b.Bucket(key), nil
}
// CreateBucketIfNotExists creates a new bucket if it doesn't already exist and returns a reference to it.
// Returns an error if the bucket name is blank, or if the bucket name is too long.
// The bucket instance is only valid for the lifetime of the transaction.
func (b *Bucket) CreateBucketIfNotExists(key []byte) (*Bucket, error) {
child, err := b.CreateBucket(key)
if err == ErrBucketExists {
return b.Bucket(key), nil
} else if err != nil {
return nil, err
}
return child, nil
}
// DeleteBucket deletes a bucket at the given key.
// Returns an error if the bucket does not exists, or if the key represents a non-bucket value.
func (b *Bucket) DeleteBucket(key []byte) error {
if b.tx.db == nil {
return ErrTxClosed
} else if !b.Writable() {
return ErrTxNotWritable
}
// Move cursor to correct position.
c := b.Cursor()
k, _, flags := c.seek(key)
// Return an error if bucket doesn't exist or is not a bucket.
if !bytes.Equal(key, k) {
return ErrBucketNotFound
} else if (flags & bucketLeafFlag) == 0 {
return ErrIncompatibleValue
}
// Recursively delete all child buckets.
child := b.Bucket(key)
err := child.ForEach(func(k, v []byte) error {
if v == nil {
if err := child.DeleteBucket(k); err != nil {
return fmt.Errorf("delete bucket: %s", err)
}
}
return nil
})
if err != nil {
return err
}
// Remove cached copy.
delete(b.buckets, string(key))
// Release all bucket pages to freelist.
child.nodes = nil
child.rootNode = nil
child.free()
// Delete the node if we have a matching key.
c.node().del(key)
return nil
}
// Get retrieves the value for a key in the bucket.
// Returns a nil value if the key does not exist or if the key is a nested bucket.
// The returned value is only valid for the life of the transaction.
func (b *Bucket) Get(key []byte) []byte {
k, v, flags := b.Cursor().seek(key)
// Return nil if this is a bucket.
if (flags & bucketLeafFlag) != 0 {
return nil
}
// If our target node isn't the same key as what's passed in then return nil.
if !bytes.Equal(key, k) {
return nil
}
return v
}
// Put sets the value for a key in the bucket.
// If the key exist then its previous value will be overwritten.
// Supplied value must remain valid for the life of the transaction.
// Returns an error if the bucket was created from a read-only transaction, if the key is blank, if the key is too large, or if the value is too large.
func (b *Bucket) Put(key []byte, value []byte) error {
if b.tx.db == nil {
return ErrTxClosed
} else if !b.Writable() {
return ErrTxNotWritable
} else if len(key) == 0 {
return ErrKeyRequired
} else if len(key) > MaxKeySize {
return ErrKeyTooLarge
} else if int64(len(value)) > MaxValueSize {
return ErrValueTooLarge
}
// Move cursor to correct position.
c := b.Cursor()
k, _, flags := c.seek(key)
// Return an error if there is an existing key with a bucket value.
if bytes.Equal(key, k) && (flags&bucketLeafFlag) != 0 {
return ErrIncompatibleValue
}
// Insert into node.
key = cloneBytes(key)
c.node().put(key, key, value, 0, 0)
return nil
}
// Delete removes a key from the bucket.
// If the key does not exist then nothing is done and a nil error is returned.
// Returns an error if the bucket was created from a read-only transaction.
func (b *Bucket) Delete(key []byte) error {
if b.tx.db == nil {
return ErrTxClosed
} else if !b.Writable() {
return ErrTxNotWritable
}
// Move cursor to correct position.
c := b.Cursor()
k, _, flags := c.seek(key)
// Return nil if the key doesn't exist.
if !bytes.Equal(key, k) {
return nil
}
// Return an error if there is already existing bucket value.
if (flags & bucketLeafFlag) != 0 {
return ErrIncompatibleValue
}
// Delete the node if we have a matching key.
c.node().del(key)
return nil
}
// Sequence returns the current integer for the bucket without incrementing it.
func (b *Bucket) Sequence() uint64 { return b.bucket.sequence }
// SetSequence updates the sequence number for the bucket.
func (b *Bucket) SetSequence(v uint64) error {
if b.tx.db == nil {
return ErrTxClosed
} else if !b.Writable() {
return ErrTxNotWritable
}
// Materialize the root node if it hasn't been already so that the
// bucket will be saved during commit.
if b.rootNode == nil {
_ = b.node(b.root, nil)
}
// Increment and return the sequence.
b.bucket.sequence = v
return nil
}
// NextSequence returns an autoincrementing integer for the bucket.
func (b *Bucket) NextSequence() (uint64, error) {
if b.tx.db == nil {
return 0, ErrTxClosed
} else if !b.Writable() {
return 0, ErrTxNotWritable
}
// Materialize the root node if it hasn't been already so that the
// bucket will be saved during commit.
if b.rootNode == nil {
_ = b.node(b.root, nil)
}
// Increment and return the sequence.
b.bucket.sequence++
return b.bucket.sequence, nil
}
// ForEach executes a function for each key/value pair in a bucket.
// If the provided function returns an error then the iteration is stopped and
// the error is returned to the caller. The provided function must not modify
// the bucket; this will result in undefined behavior.
func (b *Bucket) ForEach(fn func(k, v []byte) error) error {
if b.tx.db == nil {
return ErrTxClosed
}
c := b.Cursor()
for k, v := c.First(); k != nil; k, v = c.Next() {
if err := fn(k, v); err != nil {
return err
}
}
return nil
}
// Stat returns stats on a bucket.
func (b *Bucket) Stats() BucketStats {
var s, subStats BucketStats
pageSize := b.tx.db.pageSize
s.BucketN += 1
if b.root == 0 {
s.InlineBucketN += 1
}
b.forEachPage(func(p *page, depth int) {
if (p.flags & leafPageFlag) != 0 {
s.KeyN += int(p.count)
// used totals the used bytes for the page
used := pageHeaderSize
if p.count != 0 {
// If page has any elements, add all element headers.
used += leafPageElementSize * int(p.count-1)
// Add all element key, value sizes.
// The computation takes advantage of the fact that the position
// of the last element's key/value equals to the total of the sizes
// of all previous elements' keys and values.
// It also includes the last element's header.
lastElement := p.leafPageElement(p.count - 1)
used += int(lastElement.pos + lastElement.ksize + lastElement.vsize)
}
if b.root == 0 {
// For inlined bucket just update the inline stats
s.InlineBucketInuse += used
} else {
// For non-inlined bucket update all the leaf stats
s.LeafPageN++
s.LeafInuse += used
s.LeafOverflowN += int(p.overflow)
// Collect stats from sub-buckets.
// Do that by iterating over all element headers
// looking for the ones with the bucketLeafFlag.
for i := uint16(0); i < p.count; i++ {
e := p.leafPageElement(i)
if (e.flags & bucketLeafFlag) != 0 {
// For any bucket element, open the element value
// and recursively call Stats on the contained bucket.
subStats.Add(b.openBucket(e.value()).Stats())
}
}
}
} else if (p.flags & branchPageFlag) != 0 {
s.BranchPageN++
lastElement := p.branchPageElement(p.count - 1)
// used totals the used bytes for the page
// Add header and all element headers.
used := pageHeaderSize + (branchPageElementSize * int(p.count-1))
// Add size of all keys and values.
// Again, use the fact that last element's position equals to
// the total of key, value sizes of all previous elements.
used += int(lastElement.pos + lastElement.ksize)
s.BranchInuse += used
s.BranchOverflowN += int(p.overflow)
}
// Keep track of maximum page depth.
if depth+1 > s.Depth {
s.Depth = (depth + 1)
}
})
// Alloc stats can be computed from page counts and pageSize.
s.BranchAlloc = (s.BranchPageN + s.BranchOverflowN) * pageSize
s.LeafAlloc = (s.LeafPageN + s.LeafOverflowN) * pageSize
// Add the max depth of sub-buckets to get total nested depth.
s.Depth += subStats.Depth
// Add the stats for all sub-buckets
s.Add(subStats)
return s
}
// forEachPage iterates over every page in a bucket, including inline pages.
func (b *Bucket) forEachPage(fn func(*page, int)) {
// If we have an inline page then just use that.
if b.page != nil {
fn(b.page, 0)
return
}
// Otherwise traverse the page hierarchy.
b.tx.forEachPage(b.root, 0, fn)
}
// forEachPageNode iterates over every page (or node) in a bucket.
// This also includes inline pages.
func (b *Bucket) forEachPageNode(fn func(*page, *node, int)) {
// If we have an inline page or root node then just use that.
if b.page != nil {
fn(b.page, nil, 0)
return
}
b._forEachPageNode(b.root, 0, fn)
}
func (b *Bucket) _forEachPageNode(pgid pgid, depth int, fn func(*page, *node, int)) {
var p, n = b.pageNode(pgid)
// Execute function.
fn(p, n, depth)
// Recursively loop over children.
if p != nil {
if (p.flags & branchPageFlag) != 0 {
for i := 0; i < int(p.count); i++ {
elem := p.branchPageElement(uint16(i))
b._forEachPageNode(elem.pgid, depth+1, fn)
}
}
} else {
if !n.isLeaf {
for _, inode := range n.inodes {
b._forEachPageNode(inode.pgid, depth+1, fn)
}
}
}
}
// spill writes all the nodes for this bucket to dirty pages.
func (b *Bucket) spill() error {
// Spill all child buckets first.
for name, child := range b.buckets {
// If the child bucket is small enough and it has no child buckets then
// write it inline into the parent bucket's page. Otherwise spill it
// like a normal bucket and make the parent value a pointer to the page.
var value []byte
if child.inlineable() {
child.free()
value = child.write()
} else {
if err := child.spill(); err != nil {
return err
}
// Update the child bucket header in this bucket.
value = make([]byte, unsafe.Sizeof(bucket{}))
var bucket = (*bucket)(unsafe.Pointer(&value[0]))
*bucket = *child.bucket
}
// Skip writing the bucket if there are no materialized nodes.
if child.rootNode == nil {
continue
}
// Update parent node.
var c = b.Cursor()
k, _, flags := c.seek([]byte(name))
if !bytes.Equal([]byte(name), k) {
panic(fmt.Sprintf("misplaced bucket header: %x -> %x", []byte(name), k))
}
if flags&bucketLeafFlag == 0 {
panic(fmt.Sprintf("unexpected bucket header flag: %x", flags))
}
c.node().put([]byte(name), []byte(name), value, 0, bucketLeafFlag)
}
// Ignore if there's not a materialized root node.
if b.rootNode == nil {
return nil
}
// Spill nodes.
if err := b.rootNode.spill(); err != nil {
return err
}
b.rootNode = b.rootNode.root()
// Update the root node for this bucket.
if b.rootNode.pgid >= b.tx.meta.pgid {
panic(fmt.Sprintf("pgid (%d) above high water mark (%d)", b.rootNode.pgid, b.tx.meta.pgid))
}
b.root = b.rootNode.pgid
return nil
}
// inlineable returns true if a bucket is small enough to be written inline
// and if it contains no subbuckets. Otherwise returns false.
func (b *Bucket) inlineable() bool {
var n = b.rootNode
// Bucket must only contain a single leaf node.
if n == nil || !n.isLeaf {
return false
}
// Bucket is not inlineable if it contains subbuckets or if it goes beyond
// our threshold for inline bucket size.
var size = pageHeaderSize
for _, inode := range n.inodes {
size += leafPageElementSize + len(inode.key) + len(inode.value)
if inode.flags&bucketLeafFlag != 0 {
return false
} else if size > b.maxInlineBucketSize() {
return false
}
}
return true
}
// Returns the maximum total size of a bucket to make it a candidate for inlining.
func (b *Bucket) maxInlineBucketSize() int {
return b.tx.db.pageSize / 4
}
// write allocates and writes a bucket to a byte slice.
func (b *Bucket) write() []byte {
// Allocate the appropriate size.
var n = b.rootNode
var value = make([]byte, bucketHeaderSize+n.size())
// Write a bucket header.
var bucket = (*bucket)(unsafe.Pointer(&value[0]))
*bucket = *b.bucket
// Convert byte slice to a fake page and write the root node.
var p = (*page)(unsafe.Pointer(&value[bucketHeaderSize]))
n.write(p)
return value
}
// rebalance attempts to balance all nodes.
func (b *Bucket) rebalance() {
for _, n := range b.nodes {
n.rebalance()
}
for _, child := range b.buckets {
child.rebalance()
}
}
// node creates a node from a page and associates it with a given parent.
func (b *Bucket) node(pgid pgid, parent *node) *node {
_assert(b.nodes != nil, "nodes map expected")
// Retrieve node if it's already been created.
if n := b.nodes[pgid]; n != nil {
return n
}
// Otherwise create a node and cache it.
n := &node{bucket: b, parent: parent}
if parent == nil {
b.rootNode = n
} else {
parent.children = append(parent.children, n)
}
// Use the inline page if this is an inline bucket.
var p = b.page
if p == nil {
p = b.tx.page(pgid)
}
// Read the page into the node and cache it.
n.read(p)
b.nodes[pgid] = n
// Update statistics.
b.tx.stats.NodeCount++
return n
}
// free recursively frees all pages in the bucket.
func (b *Bucket) free() {
if b.root == 0 {
return
}
var tx = b.tx
b.forEachPageNode(func(p *page, n *node, _ int) {
if p != nil {
tx.db.freelist.free(tx.meta.txid, p)
} else {
n.free()
}
})
b.root = 0
}
// dereference removes all references to the old mmap.
func (b *Bucket) dereference() {
if b.rootNode != nil {
b.rootNode.root().dereference()
}
for _, child := range b.buckets {
child.dereference()
}
}
// pageNode returns the in-memory node, if it exists.
// Otherwise returns the underlying page.
func (b *Bucket) pageNode(id pgid) (*page, *node) {
// Inline buckets have a fake page embedded in their value so treat them
// differently. We'll return the rootNode (if available) or the fake page.
if b.root == 0 {
if id != 0 {
panic(fmt.Sprintf("inline bucket non-zero page access(2): %d != 0", id))
}
if b.rootNode != nil {
return nil, b.rootNode
}
return b.page, nil
}
// Check the node cache for non-inline buckets.
if b.nodes != nil {
if n := b.nodes[id]; n != nil {
return nil, n
}
}
// Finally lookup the page from the transaction if no node is materialized.
return b.tx.page(id), nil
}
// BucketStats records statistics about resources used by a bucket.
type BucketStats struct {
// Page count statistics.
BranchPageN int // number of logical branch pages
BranchOverflowN int // number of physical branch overflow pages
LeafPageN int // number of logical leaf pages
LeafOverflowN int // number of physical leaf overflow pages
// Tree statistics.
KeyN int // number of keys/value pairs
Depth int // number of levels in B+tree
// Page size utilization.
BranchAlloc int // bytes allocated for physical branch pages
BranchInuse int // bytes actually used for branch data
LeafAlloc int // bytes allocated for physical leaf pages
LeafInuse int // bytes actually used for leaf data
// Bucket statistics
BucketN int // total number of buckets including the top bucket
InlineBucketN int // total number on inlined buckets
InlineBucketInuse int // bytes used for inlined buckets (also accounted for in LeafInuse)
}
func (s *BucketStats) Add(other BucketStats) {
s.BranchPageN += other.BranchPageN
s.BranchOverflowN += other.BranchOverflowN
s.LeafPageN += other.LeafPageN
s.LeafOverflowN += other.LeafOverflowN
s.KeyN += other.KeyN
if s.Depth < other.Depth {
s.Depth = other.Depth
}
s.BranchAlloc += other.BranchAlloc
s.BranchInuse += other.BranchInuse
s.LeafAlloc += other.LeafAlloc
s.LeafInuse += other.LeafInuse
s.BucketN += other.BucketN
s.InlineBucketN += other.InlineBucketN
s.InlineBucketInuse += other.InlineBucketInuse
}
// cloneBytes returns a copy of a given slice.
func cloneBytes(v []byte) []byte {
var clone = make([]byte, len(v))
copy(clone, v)
return clone
}

View File

@@ -1,400 +0,0 @@
package bolt
import (
"bytes"
"fmt"
"sort"
)
// Cursor represents an iterator that can traverse over all key/value pairs in a bucket in sorted order.
// Cursors see nested buckets with value == nil.
// Cursors can be obtained from a transaction and are valid as long as the transaction is open.
//
// Keys and values returned from the cursor are only valid for the life of the transaction.
//
// Changing data while traversing with a cursor may cause it to be invalidated
// and return unexpected keys and/or values. You must reposition your cursor
// after mutating data.
type Cursor struct {
bucket *Bucket
stack []elemRef
}
// Bucket returns the bucket that this cursor was created from.
func (c *Cursor) Bucket() *Bucket {
return c.bucket
}
// First moves the cursor to the first item in the bucket and returns its key and value.
// If the bucket is empty then a nil key and value are returned.
// The returned key and value are only valid for the life of the transaction.
func (c *Cursor) First() (key []byte, value []byte) {
_assert(c.bucket.tx.db != nil, "tx closed")
c.stack = c.stack[:0]
p, n := c.bucket.pageNode(c.bucket.root)
c.stack = append(c.stack, elemRef{page: p, node: n, index: 0})
c.first()
// If we land on an empty page then move to the next value.
// https://github.com/boltdb/bolt/issues/450
if c.stack[len(c.stack)-1].count() == 0 {
c.next()
}
k, v, flags := c.keyValue()
if (flags & uint32(bucketLeafFlag)) != 0 {
return k, nil
}
return k, v
}
// Last moves the cursor to the last item in the bucket and returns its key and value.
// If the bucket is empty then a nil key and value are returned.
// The returned key and value are only valid for the life of the transaction.
func (c *Cursor) Last() (key []byte, value []byte) {
_assert(c.bucket.tx.db != nil, "tx closed")
c.stack = c.stack[:0]
p, n := c.bucket.pageNode(c.bucket.root)
ref := elemRef{page: p, node: n}
ref.index = ref.count() - 1
c.stack = append(c.stack, ref)
c.last()
k, v, flags := c.keyValue()
if (flags & uint32(bucketLeafFlag)) != 0 {
return k, nil
}
return k, v
}
// Next moves the cursor to the next item in the bucket and returns its key and value.
// If the cursor is at the end of the bucket then a nil key and value are returned.
// The returned key and value are only valid for the life of the transaction.
func (c *Cursor) Next() (key []byte, value []byte) {
_assert(c.bucket.tx.db != nil, "tx closed")
k, v, flags := c.next()
if (flags & uint32(bucketLeafFlag)) != 0 {
return k, nil
}
return k, v
}
// Prev moves the cursor to the previous item in the bucket and returns its key and value.
// If the cursor is at the beginning of the bucket then a nil key and value are returned.
// The returned key and value are only valid for the life of the transaction.
func (c *Cursor) Prev() (key []byte, value []byte) {
_assert(c.bucket.tx.db != nil, "tx closed")
// Attempt to move back one element until we're successful.
// Move up the stack as we hit the beginning of each page in our stack.
for i := len(c.stack) - 1; i >= 0; i-- {
elem := &c.stack[i]
if elem.index > 0 {
elem.index--
break
}
c.stack = c.stack[:i]
}
// If we've hit the end then return nil.
if len(c.stack) == 0 {
return nil, nil
}
// Move down the stack to find the last element of the last leaf under this branch.
c.last()
k, v, flags := c.keyValue()
if (flags & uint32(bucketLeafFlag)) != 0 {
return k, nil
}
return k, v
}
// Seek moves the cursor to a given key and returns it.
// If the key does not exist then the next key is used. If no keys
// follow, a nil key is returned.
// The returned key and value are only valid for the life of the transaction.
func (c *Cursor) Seek(seek []byte) (key []byte, value []byte) {
k, v, flags := c.seek(seek)
// If we ended up after the last element of a page then move to the next one.
if ref := &c.stack[len(c.stack)-1]; ref.index >= ref.count() {
k, v, flags = c.next()
}
if k == nil {
return nil, nil
} else if (flags & uint32(bucketLeafFlag)) != 0 {
return k, nil
}
return k, v
}
// Delete removes the current key/value under the cursor from the bucket.
// Delete fails if current key/value is a bucket or if the transaction is not writable.
func (c *Cursor) Delete() error {
if c.bucket.tx.db == nil {
return ErrTxClosed
} else if !c.bucket.Writable() {
return ErrTxNotWritable
}
key, _, flags := c.keyValue()
// Return an error if current value is a bucket.
if (flags & bucketLeafFlag) != 0 {
return ErrIncompatibleValue
}
c.node().del(key)
return nil
}
// seek moves the cursor to a given key and returns it.
// If the key does not exist then the next key is used.
func (c *Cursor) seek(seek []byte) (key []byte, value []byte, flags uint32) {
_assert(c.bucket.tx.db != nil, "tx closed")
// Start from root page/node and traverse to correct page.
c.stack = c.stack[:0]
c.search(seek, c.bucket.root)
ref := &c.stack[len(c.stack)-1]
// If the cursor is pointing to the end of page/node then return nil.
if ref.index >= ref.count() {
return nil, nil, 0
}
// If this is a bucket then return a nil value.
return c.keyValue()
}
// first moves the cursor to the first leaf element under the last page in the stack.
func (c *Cursor) first() {
for {
// Exit when we hit a leaf page.
var ref = &c.stack[len(c.stack)-1]
if ref.isLeaf() {
break
}
// Keep adding pages pointing to the first element to the stack.
var pgid pgid
if ref.node != nil {
pgid = ref.node.inodes[ref.index].pgid
} else {
pgid = ref.page.branchPageElement(uint16(ref.index)).pgid
}
p, n := c.bucket.pageNode(pgid)
c.stack = append(c.stack, elemRef{page: p, node: n, index: 0})
}
}
// last moves the cursor to the last leaf element under the last page in the stack.
func (c *Cursor) last() {
for {
// Exit when we hit a leaf page.
ref := &c.stack[len(c.stack)-1]
if ref.isLeaf() {
break
}
// Keep adding pages pointing to the last element in the stack.
var pgid pgid
if ref.node != nil {
pgid = ref.node.inodes[ref.index].pgid
} else {
pgid = ref.page.branchPageElement(uint16(ref.index)).pgid
}
p, n := c.bucket.pageNode(pgid)
var nextRef = elemRef{page: p, node: n}
nextRef.index = nextRef.count() - 1
c.stack = append(c.stack, nextRef)
}
}
// next moves to the next leaf element and returns the key and value.
// If the cursor is at the last leaf element then it stays there and returns nil.
func (c *Cursor) next() (key []byte, value []byte, flags uint32) {
for {
// Attempt to move over one element until we're successful.
// Move up the stack as we hit the end of each page in our stack.
var i int
for i = len(c.stack) - 1; i >= 0; i-- {
elem := &c.stack[i]
if elem.index < elem.count()-1 {
elem.index++
break
}
}
// If we've hit the root page then stop and return. This will leave the
// cursor on the last element of the last page.
if i == -1 {
return nil, nil, 0
}
// Otherwise start from where we left off in the stack and find the
// first element of the first leaf page.
c.stack = c.stack[:i+1]
c.first()
// If this is an empty page then restart and move back up the stack.
// https://github.com/boltdb/bolt/issues/450
if c.stack[len(c.stack)-1].count() == 0 {
continue
}
return c.keyValue()
}
}
// search recursively performs a binary search against a given page/node until it finds a given key.
func (c *Cursor) search(key []byte, pgid pgid) {
p, n := c.bucket.pageNode(pgid)
if p != nil && (p.flags&(branchPageFlag|leafPageFlag)) == 0 {
panic(fmt.Sprintf("invalid page type: %d: %x", p.id, p.flags))
}
e := elemRef{page: p, node: n}
c.stack = append(c.stack, e)
// If we're on a leaf page/node then find the specific node.
if e.isLeaf() {
c.nsearch(key)
return
}
if n != nil {
c.searchNode(key, n)
return
}
c.searchPage(key, p)
}
func (c *Cursor) searchNode(key []byte, n *node) {
var exact bool
index := sort.Search(len(n.inodes), func(i int) bool {
// TODO(benbjohnson): Optimize this range search. It's a bit hacky right now.
// sort.Search() finds the lowest index where f() != -1 but we need the highest index.
ret := bytes.Compare(n.inodes[i].key, key)
if ret == 0 {
exact = true
}
return ret != -1
})
if !exact && index > 0 {
index--
}
c.stack[len(c.stack)-1].index = index
// Recursively search to the next page.
c.search(key, n.inodes[index].pgid)
}
func (c *Cursor) searchPage(key []byte, p *page) {
// Binary search for the correct range.
inodes := p.branchPageElements()
var exact bool
index := sort.Search(int(p.count), func(i int) bool {
// TODO(benbjohnson): Optimize this range search. It's a bit hacky right now.
// sort.Search() finds the lowest index where f() != -1 but we need the highest index.
ret := bytes.Compare(inodes[i].key(), key)
if ret == 0 {
exact = true
}
return ret != -1
})
if !exact && index > 0 {
index--
}
c.stack[len(c.stack)-1].index = index
// Recursively search to the next page.
c.search(key, inodes[index].pgid)
}
// nsearch searches the leaf node on the top of the stack for a key.
func (c *Cursor) nsearch(key []byte) {
e := &c.stack[len(c.stack)-1]
p, n := e.page, e.node
// If we have a node then search its inodes.
if n != nil {
index := sort.Search(len(n.inodes), func(i int) bool {
return bytes.Compare(n.inodes[i].key, key) != -1
})
e.index = index
return
}
// If we have a page then search its leaf elements.
inodes := p.leafPageElements()
index := sort.Search(int(p.count), func(i int) bool {
return bytes.Compare(inodes[i].key(), key) != -1
})
e.index = index
}
// keyValue returns the key and value of the current leaf element.
func (c *Cursor) keyValue() ([]byte, []byte, uint32) {
ref := &c.stack[len(c.stack)-1]
if ref.count() == 0 || ref.index >= ref.count() {
return nil, nil, 0
}
// Retrieve value from node.
if ref.node != nil {
inode := &ref.node.inodes[ref.index]
return inode.key, inode.value, inode.flags
}
// Or retrieve value from page.
elem := ref.page.leafPageElement(uint16(ref.index))
return elem.key(), elem.value(), elem.flags
}
// node returns the node that the cursor is currently positioned on.
func (c *Cursor) node() *node {
_assert(len(c.stack) > 0, "accessing a node with a zero-length cursor stack")
// If the top of the stack is a leaf node then just return it.
if ref := &c.stack[len(c.stack)-1]; ref.node != nil && ref.isLeaf() {
return ref.node
}
// Start from root and traverse down the hierarchy.
var n = c.stack[0].node
if n == nil {
n = c.bucket.node(c.stack[0].page.id, nil)
}
for _, ref := range c.stack[:len(c.stack)-1] {
_assert(!n.isLeaf, "expected branch node")
n = n.childAt(int(ref.index))
}
_assert(n.isLeaf, "expected leaf node")
return n
}
// elemRef represents a reference to an element on a given page/node.
type elemRef struct {
page *page
node *node
index int
}
// isLeaf returns whether the ref is pointing at a leaf page/node.
func (r *elemRef) isLeaf() bool {
if r.node != nil {
return r.node.isLeaf
}
return (r.page.flags & leafPageFlag) != 0
}
// count returns the number of inodes or page elements.
func (r *elemRef) count() int {
if r.node != nil {
return len(r.node.inodes)
}
return int(r.page.count)
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,44 +0,0 @@
/*
Package bolt implements a low-level key/value store in pure Go. It supports
fully serializable transactions, ACID semantics, and lock-free MVCC with
multiple readers and a single writer. Bolt can be used for projects that
want a simple data store without the need to add large dependencies such as
Postgres or MySQL.
Bolt is a single-level, zero-copy, B+tree data store. This means that Bolt is
optimized for fast read access and does not require recovery in the event of a
system crash. Transactions which have not finished committing will simply be
rolled back in the event of a crash.
The design of Bolt is based on Howard Chu's LMDB database project.
Bolt currently works on Windows, Mac OS X, and Linux.
Basics
There are only a few types in Bolt: DB, Bucket, Tx, and Cursor. The DB is
a collection of buckets and is represented by a single file on disk. A bucket is
a collection of unique keys that are associated with values.
Transactions provide either read-only or read-write access to the database.
Read-only transactions can retrieve key/value pairs and can use Cursors to
iterate over the dataset sequentially. Read-write transactions can create and
delete buckets and can insert and remove keys. Only one read-write transaction
is allowed at a time.
Caveats
The database uses a read-only, memory-mapped data file to ensure that
applications cannot corrupt the database, however, this means that keys and
values returned from Bolt cannot be changed. Writing to a read-only byte slice
will cause Go to panic.
Keys and values retrieved from the database are only valid for the life of
the transaction. When used outside the transaction, these byte slices can
point to different data or can point to invalid memory which will cause a panic.
*/
package bolt

View File

@@ -1,71 +0,0 @@
package bolt
import "errors"
// These errors can be returned when opening or calling methods on a DB.
var (
// ErrDatabaseNotOpen is returned when a DB instance is accessed before it
// is opened or after it is closed.
ErrDatabaseNotOpen = errors.New("database not open")
// ErrDatabaseOpen is returned when opening a database that is
// already open.
ErrDatabaseOpen = errors.New("database already open")
// ErrInvalid is returned when both meta pages on a database are invalid.
// This typically occurs when a file is not a bolt database.
ErrInvalid = errors.New("invalid database")
// ErrVersionMismatch is returned when the data file was created with a
// different version of Bolt.
ErrVersionMismatch = errors.New("version mismatch")
// ErrChecksum is returned when either meta page checksum does not match.
ErrChecksum = errors.New("checksum error")
// ErrTimeout is returned when a database cannot obtain an exclusive lock
// on the data file after the timeout passed to Open().
ErrTimeout = errors.New("timeout")
)
// These errors can occur when beginning or committing a Tx.
var (
// ErrTxNotWritable is returned when performing a write operation on a
// read-only transaction.
ErrTxNotWritable = errors.New("tx not writable")
// ErrTxClosed is returned when committing or rolling back a transaction
// that has already been committed or rolled back.
ErrTxClosed = errors.New("tx closed")
// ErrDatabaseReadOnly is returned when a mutating transaction is started on a
// read-only database.
ErrDatabaseReadOnly = errors.New("database is in read-only mode")
)
// These errors can occur when putting or deleting a value or a bucket.
var (
// ErrBucketNotFound is returned when trying to access a bucket that has
// not been created yet.
ErrBucketNotFound = errors.New("bucket not found")
// ErrBucketExists is returned when creating a bucket that already exists.
ErrBucketExists = errors.New("bucket already exists")
// ErrBucketNameRequired is returned when creating a bucket with a blank name.
ErrBucketNameRequired = errors.New("bucket name required")
// ErrKeyRequired is returned when inserting a zero-length key.
ErrKeyRequired = errors.New("key required")
// ErrKeyTooLarge is returned when inserting a key that is larger than MaxKeySize.
ErrKeyTooLarge = errors.New("key too large")
// ErrValueTooLarge is returned when inserting a value that is larger than MaxValueSize.
ErrValueTooLarge = errors.New("value too large")
// ErrIncompatibleValue is returned when trying create or delete a bucket
// on an existing non-bucket key or when trying to create or delete a
// non-bucket key on an existing bucket key.
ErrIncompatibleValue = errors.New("incompatible value")
)

View File

@@ -1,333 +0,0 @@
package bolt
import (
"fmt"
"sort"
"unsafe"
)
// txPending holds a list of pgids and corresponding allocation txns
// that are pending to be freed.
type txPending struct {
ids []pgid
alloctx []txid // txids allocating the ids
lastReleaseBegin txid // beginning txid of last matching releaseRange
}
// freelist represents a list of all pages that are available for allocation.
// It also tracks pages that have been freed but are still in use by open transactions.
type freelist struct {
ids []pgid // all free and available free page ids.
allocs map[pgid]txid // mapping of txid that allocated a pgid.
pending map[txid]*txPending // mapping of soon-to-be free page ids by tx.
cache map[pgid]bool // fast lookup of all free and pending page ids.
}
// newFreelist returns an empty, initialized freelist.
func newFreelist() *freelist {
return &freelist{
allocs: make(map[pgid]txid),
pending: make(map[txid]*txPending),
cache: make(map[pgid]bool),
}
}
// size returns the size of the page after serialization.
func (f *freelist) size() int {
n := f.count()
if n >= 0xFFFF {
// The first element will be used to store the count. See freelist.write.
n++
}
return pageHeaderSize + (int(unsafe.Sizeof(pgid(0))) * n)
}
// count returns count of pages on the freelist
func (f *freelist) count() int {
return f.free_count() + f.pending_count()
}
// free_count returns count of free pages
func (f *freelist) free_count() int {
return len(f.ids)
}
// pending_count returns count of pending pages
func (f *freelist) pending_count() int {
var count int
for _, txp := range f.pending {
count += len(txp.ids)
}
return count
}
// copyall copies into dst a list of all free ids and all pending ids in one sorted list.
// f.count returns the minimum length required for dst.
func (f *freelist) copyall(dst []pgid) {
m := make(pgids, 0, f.pending_count())
for _, txp := range f.pending {
m = append(m, txp.ids...)
}
sort.Sort(m)
mergepgids(dst, f.ids, m)
}
// allocate returns the starting page id of a contiguous list of pages of a given size.
// If a contiguous block cannot be found then 0 is returned.
func (f *freelist) allocate(txid txid, n int) pgid {
if len(f.ids) == 0 {
return 0
}
var initial, previd pgid
for i, id := range f.ids {
if id <= 1 {
panic(fmt.Sprintf("invalid page allocation: %d", id))
}
// Reset initial page if this is not contiguous.
if previd == 0 || id-previd != 1 {
initial = id
}
// If we found a contiguous block then remove it and return it.
if (id-initial)+1 == pgid(n) {
// If we're allocating off the beginning then take the fast path
// and just adjust the existing slice. This will use extra memory
// temporarily but the append() in free() will realloc the slice
// as is necessary.
if (i + 1) == n {
f.ids = f.ids[i+1:]
} else {
copy(f.ids[i-n+1:], f.ids[i+1:])
f.ids = f.ids[:len(f.ids)-n]
}
// Remove from the free cache.
for i := pgid(0); i < pgid(n); i++ {
delete(f.cache, initial+i)
}
f.allocs[initial] = txid
return initial
}
previd = id
}
return 0
}
// free releases a page and its overflow for a given transaction id.
// If the page is already free then a panic will occur.
func (f *freelist) free(txid txid, p *page) {
if p.id <= 1 {
panic(fmt.Sprintf("cannot free page 0 or 1: %d", p.id))
}
// Free page and all its overflow pages.
txp := f.pending[txid]
if txp == nil {
txp = &txPending{}
f.pending[txid] = txp
}
allocTxid, ok := f.allocs[p.id]
if ok {
delete(f.allocs, p.id)
} else if (p.flags & freelistPageFlag) != 0 {
// Freelist is always allocated by prior tx.
allocTxid = txid - 1
}
for id := p.id; id <= p.id+pgid(p.overflow); id++ {
// Verify that page is not already free.
if f.cache[id] {
panic(fmt.Sprintf("page %d already freed", id))
}
// Add to the freelist and cache.
txp.ids = append(txp.ids, id)
txp.alloctx = append(txp.alloctx, allocTxid)
f.cache[id] = true
}
}
// release moves all page ids for a transaction id (or older) to the freelist.
func (f *freelist) release(txid txid) {
m := make(pgids, 0)
for tid, txp := range f.pending {
if tid <= txid {
// Move transaction's pending pages to the available freelist.
// Don't remove from the cache since the page is still free.
m = append(m, txp.ids...)
delete(f.pending, tid)
}
}
sort.Sort(m)
f.ids = pgids(f.ids).merge(m)
}
// releaseRange moves pending pages allocated within an extent [begin,end] to the free list.
func (f *freelist) releaseRange(begin, end txid) {
if begin > end {
return
}
var m pgids
for tid, txp := range f.pending {
if tid < begin || tid > end {
continue
}
// Don't recompute freed pages if ranges haven't updated.
if txp.lastReleaseBegin == begin {
continue
}
for i := 0; i < len(txp.ids); i++ {
if atx := txp.alloctx[i]; atx < begin || atx > end {
continue
}
m = append(m, txp.ids[i])
txp.ids[i] = txp.ids[len(txp.ids)-1]
txp.ids = txp.ids[:len(txp.ids)-1]
txp.alloctx[i] = txp.alloctx[len(txp.alloctx)-1]
txp.alloctx = txp.alloctx[:len(txp.alloctx)-1]
i--
}
txp.lastReleaseBegin = begin
if len(txp.ids) == 0 {
delete(f.pending, tid)
}
}
sort.Sort(m)
f.ids = pgids(f.ids).merge(m)
}
// rollback removes the pages from a given pending tx.
func (f *freelist) rollback(txid txid) {
// Remove page ids from cache.
txp := f.pending[txid]
if txp == nil {
return
}
var m pgids
for i, pgid := range txp.ids {
delete(f.cache, pgid)
tx := txp.alloctx[i]
if tx == 0 {
continue
}
if tx != txid {
// Pending free aborted; restore page back to alloc list.
f.allocs[pgid] = tx
} else {
// Freed page was allocated by this txn; OK to throw away.
m = append(m, pgid)
}
}
// Remove pages from pending list and mark as free if allocated by txid.
delete(f.pending, txid)
sort.Sort(m)
f.ids = pgids(f.ids).merge(m)
}
// freed returns whether a given page is in the free list.
func (f *freelist) freed(pgid pgid) bool {
return f.cache[pgid]
}
// read initializes the freelist from a freelist page.
func (f *freelist) read(p *page) {
if (p.flags & freelistPageFlag) == 0 {
panic(fmt.Sprintf("invalid freelist page: %d, page type is %s", p.id, p.typ()))
}
// If the page.count is at the max uint16 value (64k) then it's considered
// an overflow and the size of the freelist is stored as the first element.
idx, count := 0, int(p.count)
if count == 0xFFFF {
idx = 1
count = int(((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[0])
}
// Copy the list of page ids from the freelist.
if count == 0 {
f.ids = nil
} else {
ids := ((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[idx : idx+count]
f.ids = make([]pgid, len(ids))
copy(f.ids, ids)
// Make sure they're sorted.
sort.Sort(pgids(f.ids))
}
// Rebuild the page cache.
f.reindex()
}
// read initializes the freelist from a given list of ids.
func (f *freelist) readIDs(ids []pgid) {
f.ids = ids
f.reindex()
}
// write writes the page ids onto a freelist page. All free and pending ids are
// saved to disk since in the event of a program crash, all pending ids will
// become free.
func (f *freelist) write(p *page) error {
// Combine the old free pgids and pgids waiting on an open transaction.
// Update the header flag.
p.flags |= freelistPageFlag
// The page.count can only hold up to 64k elements so if we overflow that
// number then we handle it by putting the size in the first element.
lenids := f.count()
if lenids == 0 {
p.count = uint16(lenids)
} else if lenids < 0xFFFF {
p.count = uint16(lenids)
f.copyall(((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[:])
} else {
p.count = 0xFFFF
((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[0] = pgid(lenids)
f.copyall(((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[1:])
}
return nil
}
// reload reads the freelist from a page and filters out pending items.
func (f *freelist) reload(p *page) {
f.read(p)
// Build a cache of only pending pages.
pcache := make(map[pgid]bool)
for _, txp := range f.pending {
for _, pendingID := range txp.ids {
pcache[pendingID] = true
}
}
// Check each page in the freelist and build a new available freelist
// with any pages not in the pending lists.
var a []pgid
for _, id := range f.ids {
if !pcache[id] {
a = append(a, id)
}
}
f.ids = a
// Once the available list is rebuilt then rebuild the free cache so that
// it includes the available and pending free pages.
f.reindex()
}
// reindex rebuilds the free cache based on available and pending free lists.
func (f *freelist) reindex() {
f.cache = make(map[pgid]bool, len(f.ids))
for _, id := range f.ids {
f.cache[id] = true
}
for _, txp := range f.pending {
for _, pendingID := range txp.ids {
f.cache[pendingID] = true
}
}
}

View File

@@ -1,604 +0,0 @@
package bolt
import (
"bytes"
"fmt"
"sort"
"unsafe"
)
// node represents an in-memory, deserialized page.
type node struct {
bucket *Bucket
isLeaf bool
unbalanced bool
spilled bool
key []byte
pgid pgid
parent *node
children nodes
inodes inodes
}
// root returns the top-level node this node is attached to.
func (n *node) root() *node {
if n.parent == nil {
return n
}
return n.parent.root()
}
// minKeys returns the minimum number of inodes this node should have.
func (n *node) minKeys() int {
if n.isLeaf {
return 1
}
return 2
}
// size returns the size of the node after serialization.
func (n *node) size() int {
sz, elsz := pageHeaderSize, n.pageElementSize()
for i := 0; i < len(n.inodes); i++ {
item := &n.inodes[i]
sz += elsz + len(item.key) + len(item.value)
}
return sz
}
// sizeLessThan returns true if the node is less than a given size.
// This is an optimization to avoid calculating a large node when we only need
// to know if it fits inside a certain page size.
func (n *node) sizeLessThan(v int) bool {
sz, elsz := pageHeaderSize, n.pageElementSize()
for i := 0; i < len(n.inodes); i++ {
item := &n.inodes[i]
sz += elsz + len(item.key) + len(item.value)
if sz >= v {
return false
}
}
return true
}
// pageElementSize returns the size of each page element based on the type of node.
func (n *node) pageElementSize() int {
if n.isLeaf {
return leafPageElementSize
}
return branchPageElementSize
}
// childAt returns the child node at a given index.
func (n *node) childAt(index int) *node {
if n.isLeaf {
panic(fmt.Sprintf("invalid childAt(%d) on a leaf node", index))
}
return n.bucket.node(n.inodes[index].pgid, n)
}
// childIndex returns the index of a given child node.
func (n *node) childIndex(child *node) int {
index := sort.Search(len(n.inodes), func(i int) bool { return bytes.Compare(n.inodes[i].key, child.key) != -1 })
return index
}
// numChildren returns the number of children.
func (n *node) numChildren() int {
return len(n.inodes)
}
// nextSibling returns the next node with the same parent.
func (n *node) nextSibling() *node {
if n.parent == nil {
return nil
}
index := n.parent.childIndex(n)
if index >= n.parent.numChildren()-1 {
return nil
}
return n.parent.childAt(index + 1)
}
// prevSibling returns the previous node with the same parent.
func (n *node) prevSibling() *node {
if n.parent == nil {
return nil
}
index := n.parent.childIndex(n)
if index == 0 {
return nil
}
return n.parent.childAt(index - 1)
}
// put inserts a key/value.
func (n *node) put(oldKey, newKey, value []byte, pgid pgid, flags uint32) {
if pgid >= n.bucket.tx.meta.pgid {
panic(fmt.Sprintf("pgid (%d) above high water mark (%d)", pgid, n.bucket.tx.meta.pgid))
} else if len(oldKey) <= 0 {
panic("put: zero-length old key")
} else if len(newKey) <= 0 {
panic("put: zero-length new key")
}
// Find insertion index.
index := sort.Search(len(n.inodes), func(i int) bool { return bytes.Compare(n.inodes[i].key, oldKey) != -1 })
// Add capacity and shift nodes if we don't have an exact match and need to insert.
exact := (len(n.inodes) > 0 && index < len(n.inodes) && bytes.Equal(n.inodes[index].key, oldKey))
if !exact {
n.inodes = append(n.inodes, inode{})
copy(n.inodes[index+1:], n.inodes[index:])
}
inode := &n.inodes[index]
inode.flags = flags
inode.key = newKey
inode.value = value
inode.pgid = pgid
_assert(len(inode.key) > 0, "put: zero-length inode key")
}
// del removes a key from the node.
func (n *node) del(key []byte) {
// Find index of key.
index := sort.Search(len(n.inodes), func(i int) bool { return bytes.Compare(n.inodes[i].key, key) != -1 })
// Exit if the key isn't found.
if index >= len(n.inodes) || !bytes.Equal(n.inodes[index].key, key) {
return
}
// Delete inode from the node.
n.inodes = append(n.inodes[:index], n.inodes[index+1:]...)
// Mark the node as needing rebalancing.
n.unbalanced = true
}
// read initializes the node from a page.
func (n *node) read(p *page) {
n.pgid = p.id
n.isLeaf = ((p.flags & leafPageFlag) != 0)
n.inodes = make(inodes, int(p.count))
for i := 0; i < int(p.count); i++ {
inode := &n.inodes[i]
if n.isLeaf {
elem := p.leafPageElement(uint16(i))
inode.flags = elem.flags
inode.key = elem.key()
inode.value = elem.value()
} else {
elem := p.branchPageElement(uint16(i))
inode.pgid = elem.pgid
inode.key = elem.key()
}
_assert(len(inode.key) > 0, "read: zero-length inode key")
}
// Save first key so we can find the node in the parent when we spill.
if len(n.inodes) > 0 {
n.key = n.inodes[0].key
_assert(len(n.key) > 0, "read: zero-length node key")
} else {
n.key = nil
}
}
// write writes the items onto one or more pages.
func (n *node) write(p *page) {
// Initialize page.
if n.isLeaf {
p.flags |= leafPageFlag
} else {
p.flags |= branchPageFlag
}
if len(n.inodes) >= 0xFFFF {
panic(fmt.Sprintf("inode overflow: %d (pgid=%d)", len(n.inodes), p.id))
}
p.count = uint16(len(n.inodes))
// Stop here if there are no items to write.
if p.count == 0 {
return
}
// Loop over each item and write it to the page.
b := (*[maxAllocSize]byte)(unsafe.Pointer(&p.ptr))[n.pageElementSize()*len(n.inodes):]
for i, item := range n.inodes {
_assert(len(item.key) > 0, "write: zero-length inode key")
// Write the page element.
if n.isLeaf {
elem := p.leafPageElement(uint16(i))
elem.pos = uint32(uintptr(unsafe.Pointer(&b[0])) - uintptr(unsafe.Pointer(elem)))
elem.flags = item.flags
elem.ksize = uint32(len(item.key))
elem.vsize = uint32(len(item.value))
} else {
elem := p.branchPageElement(uint16(i))
elem.pos = uint32(uintptr(unsafe.Pointer(&b[0])) - uintptr(unsafe.Pointer(elem)))
elem.ksize = uint32(len(item.key))
elem.pgid = item.pgid
_assert(elem.pgid != p.id, "write: circular dependency occurred")
}
// If the length of key+value is larger than the max allocation size
// then we need to reallocate the byte array pointer.
//
// See: https://github.com/boltdb/bolt/pull/335
klen, vlen := len(item.key), len(item.value)
if len(b) < klen+vlen {
b = (*[maxAllocSize]byte)(unsafe.Pointer(&b[0]))[:]
}
// Write data for the element to the end of the page.
copy(b[0:], item.key)
b = b[klen:]
copy(b[0:], item.value)
b = b[vlen:]
}
// DEBUG ONLY: n.dump()
}
// split breaks up a node into multiple smaller nodes, if appropriate.
// This should only be called from the spill() function.
func (n *node) split(pageSize int) []*node {
var nodes []*node
node := n
for {
// Split node into two.
a, b := node.splitTwo(pageSize)
nodes = append(nodes, a)
// If we can't split then exit the loop.
if b == nil {
break
}
// Set node to b so it gets split on the next iteration.
node = b
}
return nodes
}
// splitTwo breaks up a node into two smaller nodes, if appropriate.
// This should only be called from the split() function.
func (n *node) splitTwo(pageSize int) (*node, *node) {
// Ignore the split if the page doesn't have at least enough nodes for
// two pages or if the nodes can fit in a single page.
if len(n.inodes) <= (minKeysPerPage*2) || n.sizeLessThan(pageSize) {
return n, nil
}
// Determine the threshold before starting a new node.
var fillPercent = n.bucket.FillPercent
if fillPercent < minFillPercent {
fillPercent = minFillPercent
} else if fillPercent > maxFillPercent {
fillPercent = maxFillPercent
}
threshold := int(float64(pageSize) * fillPercent)
// Determine split position and sizes of the two pages.
splitIndex, _ := n.splitIndex(threshold)
// Split node into two separate nodes.
// If there's no parent then we'll need to create one.
if n.parent == nil {
n.parent = &node{bucket: n.bucket, children: []*node{n}}
}
// Create a new node and add it to the parent.
next := &node{bucket: n.bucket, isLeaf: n.isLeaf, parent: n.parent}
n.parent.children = append(n.parent.children, next)
// Split inodes across two nodes.
next.inodes = n.inodes[splitIndex:]
n.inodes = n.inodes[:splitIndex]
// Update the statistics.
n.bucket.tx.stats.Split++
return n, next
}
// splitIndex finds the position where a page will fill a given threshold.
// It returns the index as well as the size of the first page.
// This is only be called from split().
func (n *node) splitIndex(threshold int) (index, sz int) {
sz = pageHeaderSize
// Loop until we only have the minimum number of keys required for the second page.
for i := 0; i < len(n.inodes)-minKeysPerPage; i++ {
index = i
inode := n.inodes[i]
elsize := n.pageElementSize() + len(inode.key) + len(inode.value)
// If we have at least the minimum number of keys and adding another
// node would put us over the threshold then exit and return.
if i >= minKeysPerPage && sz+elsize > threshold {
break
}
// Add the element size to the total size.
sz += elsize
}
return
}
// spill writes the nodes to dirty pages and splits nodes as it goes.
// Returns an error if dirty pages cannot be allocated.
func (n *node) spill() error {
var tx = n.bucket.tx
if n.spilled {
return nil
}
// Spill child nodes first. Child nodes can materialize sibling nodes in
// the case of split-merge so we cannot use a range loop. We have to check
// the children size on every loop iteration.
sort.Sort(n.children)
for i := 0; i < len(n.children); i++ {
if err := n.children[i].spill(); err != nil {
return err
}
}
// We no longer need the child list because it's only used for spill tracking.
n.children = nil
// Split nodes into appropriate sizes. The first node will always be n.
var nodes = n.split(tx.db.pageSize)
for _, node := range nodes {
// Add node's page to the freelist if it's not new.
if node.pgid > 0 {
tx.db.freelist.free(tx.meta.txid, tx.page(node.pgid))
node.pgid = 0
}
// Allocate contiguous space for the node.
p, err := tx.allocate((node.size() + tx.db.pageSize - 1) / tx.db.pageSize)
if err != nil {
return err
}
// Write the node.
if p.id >= tx.meta.pgid {
panic(fmt.Sprintf("pgid (%d) above high water mark (%d)", p.id, tx.meta.pgid))
}
node.pgid = p.id
node.write(p)
node.spilled = true
// Insert into parent inodes.
if node.parent != nil {
var key = node.key
if key == nil {
key = node.inodes[0].key
}
node.parent.put(key, node.inodes[0].key, nil, node.pgid, 0)
node.key = node.inodes[0].key
_assert(len(node.key) > 0, "spill: zero-length node key")
}
// Update the statistics.
tx.stats.Spill++
}
// If the root node split and created a new root then we need to spill that
// as well. We'll clear out the children to make sure it doesn't try to respill.
if n.parent != nil && n.parent.pgid == 0 {
n.children = nil
return n.parent.spill()
}
return nil
}
// rebalance attempts to combine the node with sibling nodes if the node fill
// size is below a threshold or if there are not enough keys.
func (n *node) rebalance() {
if !n.unbalanced {
return
}
n.unbalanced = false
// Update statistics.
n.bucket.tx.stats.Rebalance++
// Ignore if node is above threshold (25%) and has enough keys.
var threshold = n.bucket.tx.db.pageSize / 4
if n.size() > threshold && len(n.inodes) > n.minKeys() {
return
}
// Root node has special handling.
if n.parent == nil {
// If root node is a branch and only has one node then collapse it.
if !n.isLeaf && len(n.inodes) == 1 {
// Move root's child up.
child := n.bucket.node(n.inodes[0].pgid, n)
n.isLeaf = child.isLeaf
n.inodes = child.inodes[:]
n.children = child.children
// Reparent all child nodes being moved.
for _, inode := range n.inodes {
if child, ok := n.bucket.nodes[inode.pgid]; ok {
child.parent = n
}
}
// Remove old child.
child.parent = nil
delete(n.bucket.nodes, child.pgid)
child.free()
}
return
}
// If node has no keys then just remove it.
if n.numChildren() == 0 {
n.parent.del(n.key)
n.parent.removeChild(n)
delete(n.bucket.nodes, n.pgid)
n.free()
n.parent.rebalance()
return
}
_assert(n.parent.numChildren() > 1, "parent must have at least 2 children")
// Destination node is right sibling if idx == 0, otherwise left sibling.
var target *node
var useNextSibling = (n.parent.childIndex(n) == 0)
if useNextSibling {
target = n.nextSibling()
} else {
target = n.prevSibling()
}
// If both this node and the target node are too small then merge them.
if useNextSibling {
// Reparent all child nodes being moved.
for _, inode := range target.inodes {
if child, ok := n.bucket.nodes[inode.pgid]; ok {
child.parent.removeChild(child)
child.parent = n
child.parent.children = append(child.parent.children, child)
}
}
// Copy over inodes from target and remove target.
n.inodes = append(n.inodes, target.inodes...)
n.parent.del(target.key)
n.parent.removeChild(target)
delete(n.bucket.nodes, target.pgid)
target.free()
} else {
// Reparent all child nodes being moved.
for _, inode := range n.inodes {
if child, ok := n.bucket.nodes[inode.pgid]; ok {
child.parent.removeChild(child)
child.parent = target
child.parent.children = append(child.parent.children, child)
}
}
// Copy over inodes to target and remove node.
target.inodes = append(target.inodes, n.inodes...)
n.parent.del(n.key)
n.parent.removeChild(n)
delete(n.bucket.nodes, n.pgid)
n.free()
}
// Either this node or the target node was deleted from the parent so rebalance it.
n.parent.rebalance()
}
// removes a node from the list of in-memory children.
// This does not affect the inodes.
func (n *node) removeChild(target *node) {
for i, child := range n.children {
if child == target {
n.children = append(n.children[:i], n.children[i+1:]...)
return
}
}
}
// dereference causes the node to copy all its inode key/value references to heap memory.
// This is required when the mmap is reallocated so inodes are not pointing to stale data.
func (n *node) dereference() {
if n.key != nil {
key := make([]byte, len(n.key))
copy(key, n.key)
n.key = key
_assert(n.pgid == 0 || len(n.key) > 0, "dereference: zero-length node key on existing node")
}
for i := range n.inodes {
inode := &n.inodes[i]
key := make([]byte, len(inode.key))
copy(key, inode.key)
inode.key = key
_assert(len(inode.key) > 0, "dereference: zero-length inode key")
value := make([]byte, len(inode.value))
copy(value, inode.value)
inode.value = value
}
// Recursively dereference children.
for _, child := range n.children {
child.dereference()
}
// Update statistics.
n.bucket.tx.stats.NodeDeref++
}
// free adds the node's underlying page to the freelist.
func (n *node) free() {
if n.pgid != 0 {
n.bucket.tx.db.freelist.free(n.bucket.tx.meta.txid, n.bucket.tx.page(n.pgid))
n.pgid = 0
}
}
// dump writes the contents of the node to STDERR for debugging purposes.
/*
func (n *node) dump() {
// Write node header.
var typ = "branch"
if n.isLeaf {
typ = "leaf"
}
warnf("[NODE %d {type=%s count=%d}]", n.pgid, typ, len(n.inodes))
// Write out abbreviated version of each item.
for _, item := range n.inodes {
if n.isLeaf {
if item.flags&bucketLeafFlag != 0 {
bucket := (*bucket)(unsafe.Pointer(&item.value[0]))
warnf("+L %08x -> (bucket root=%d)", trunc(item.key, 4), bucket.root)
} else {
warnf("+L %08x -> %08x", trunc(item.key, 4), trunc(item.value, 4))
}
} else {
warnf("+B %08x -> pgid=%d", trunc(item.key, 4), item.pgid)
}
}
warn("")
}
*/
type nodes []*node
func (s nodes) Len() int { return len(s) }
func (s nodes) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
func (s nodes) Less(i, j int) bool { return bytes.Compare(s[i].inodes[0].key, s[j].inodes[0].key) == -1 }
// inode represents an internal node inside of a node.
// It can be used to point to elements in a page or point
// to an element which hasn't been added to a page yet.
type inode struct {
flags uint32
pgid pgid
key []byte
value []byte
}
type inodes []inode

View File

@@ -1,197 +0,0 @@
package bolt
import (
"fmt"
"os"
"sort"
"unsafe"
)
const pageHeaderSize = int(unsafe.Offsetof(((*page)(nil)).ptr))
const minKeysPerPage = 2
const branchPageElementSize = int(unsafe.Sizeof(branchPageElement{}))
const leafPageElementSize = int(unsafe.Sizeof(leafPageElement{}))
const (
branchPageFlag = 0x01
leafPageFlag = 0x02
metaPageFlag = 0x04
freelistPageFlag = 0x10
)
const (
bucketLeafFlag = 0x01
)
type pgid uint64
type page struct {
id pgid
flags uint16
count uint16
overflow uint32
ptr uintptr
}
// typ returns a human readable page type string used for debugging.
func (p *page) typ() string {
if (p.flags & branchPageFlag) != 0 {
return "branch"
} else if (p.flags & leafPageFlag) != 0 {
return "leaf"
} else if (p.flags & metaPageFlag) != 0 {
return "meta"
} else if (p.flags & freelistPageFlag) != 0 {
return "freelist"
}
return fmt.Sprintf("unknown<%02x>", p.flags)
}
// meta returns a pointer to the metadata section of the page.
func (p *page) meta() *meta {
return (*meta)(unsafe.Pointer(&p.ptr))
}
// leafPageElement retrieves the leaf node by index
func (p *page) leafPageElement(index uint16) *leafPageElement {
n := &((*[0x7FFFFFF]leafPageElement)(unsafe.Pointer(&p.ptr)))[index]
return n
}
// leafPageElements retrieves a list of leaf nodes.
func (p *page) leafPageElements() []leafPageElement {
if p.count == 0 {
return nil
}
return ((*[0x7FFFFFF]leafPageElement)(unsafe.Pointer(&p.ptr)))[:]
}
// branchPageElement retrieves the branch node by index
func (p *page) branchPageElement(index uint16) *branchPageElement {
return &((*[0x7FFFFFF]branchPageElement)(unsafe.Pointer(&p.ptr)))[index]
}
// branchPageElements retrieves a list of branch nodes.
func (p *page) branchPageElements() []branchPageElement {
if p.count == 0 {
return nil
}
return ((*[0x7FFFFFF]branchPageElement)(unsafe.Pointer(&p.ptr)))[:]
}
// dump writes n bytes of the page to STDERR as hex output.
func (p *page) hexdump(n int) {
buf := (*[maxAllocSize]byte)(unsafe.Pointer(p))[:n]
fmt.Fprintf(os.Stderr, "%x\n", buf)
}
type pages []*page
func (s pages) Len() int { return len(s) }
func (s pages) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
func (s pages) Less(i, j int) bool { return s[i].id < s[j].id }
// branchPageElement represents a node on a branch page.
type branchPageElement struct {
pos uint32
ksize uint32
pgid pgid
}
// key returns a byte slice of the node key.
func (n *branchPageElement) key() []byte {
buf := (*[maxAllocSize]byte)(unsafe.Pointer(n))
return (*[maxAllocSize]byte)(unsafe.Pointer(&buf[n.pos]))[:n.ksize]
}
// leafPageElement represents a node on a leaf page.
type leafPageElement struct {
flags uint32
pos uint32
ksize uint32
vsize uint32
}
// key returns a byte slice of the node key.
func (n *leafPageElement) key() []byte {
buf := (*[maxAllocSize]byte)(unsafe.Pointer(n))
return (*[maxAllocSize]byte)(unsafe.Pointer(&buf[n.pos]))[:n.ksize:n.ksize]
}
// value returns a byte slice of the node value.
func (n *leafPageElement) value() []byte {
buf := (*[maxAllocSize]byte)(unsafe.Pointer(n))
return (*[maxAllocSize]byte)(unsafe.Pointer(&buf[n.pos+n.ksize]))[:n.vsize:n.vsize]
}
// PageInfo represents human readable information about a page.
type PageInfo struct {
ID int
Type string
Count int
OverflowCount int
}
type pgids []pgid
func (s pgids) Len() int { return len(s) }
func (s pgids) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
func (s pgids) Less(i, j int) bool { return s[i] < s[j] }
// merge returns the sorted union of a and b.
func (a pgids) merge(b pgids) pgids {
// Return the opposite slice if one is nil.
if len(a) == 0 {
return b
}
if len(b) == 0 {
return a
}
merged := make(pgids, len(a)+len(b))
mergepgids(merged, a, b)
return merged
}
// mergepgids copies the sorted union of a and b into dst.
// If dst is too small, it panics.
func mergepgids(dst, a, b pgids) {
if len(dst) < len(a)+len(b) {
panic(fmt.Errorf("mergepgids bad len %d < %d + %d", len(dst), len(a), len(b)))
}
// Copy in the opposite slice if one is nil.
if len(a) == 0 {
copy(dst, b)
return
}
if len(b) == 0 {
copy(dst, a)
return
}
// Merged will hold all elements from both lists.
merged := dst[:0]
// Assign lead to the slice with a lower starting value, follow to the higher value.
lead, follow := a, b
if b[0] < a[0] {
lead, follow = b, a
}
// Continue while there are elements in the lead.
for len(lead) > 0 {
// Merge largest prefix of lead that is ahead of follow[0].
n := sort.Search(len(lead), func(i int) bool { return lead[i] > follow[0] })
merged = append(merged, lead[:n]...)
if n >= len(lead) {
break
}
// Swap lead and follow.
lead, follow = follow, lead[n:]
}
// Append what's left in follow.
_ = append(merged, follow...)
}

View File

@@ -1,705 +0,0 @@
package bolt
import (
"fmt"
"io"
"os"
"sort"
"strings"
"time"
"unsafe"
)
// txid represents the internal transaction identifier.
type txid uint64
// Tx represents a read-only or read/write transaction on the database.
// Read-only transactions can be used for retrieving values for keys and creating cursors.
// Read/write transactions can create and remove buckets and create and remove keys.
//
// IMPORTANT: You must commit or rollback transactions when you are done with
// them. Pages can not be reclaimed by the writer until no more transactions
// are using them. A long running read transaction can cause the database to
// quickly grow.
type Tx struct {
writable bool
managed bool
db *DB
meta *meta
root Bucket
pages map[pgid]*page
stats TxStats
commitHandlers []func()
// WriteFlag specifies the flag for write-related methods like WriteTo().
// Tx opens the database file with the specified flag to copy the data.
//
// By default, the flag is unset, which works well for mostly in-memory
// workloads. For databases that are much larger than available RAM,
// set the flag to syscall.O_DIRECT to avoid trashing the page cache.
WriteFlag int
}
// init initializes the transaction.
func (tx *Tx) init(db *DB) {
tx.db = db
tx.pages = nil
// Copy the meta page since it can be changed by the writer.
tx.meta = &meta{}
db.meta().copy(tx.meta)
// Copy over the root bucket.
tx.root = newBucket(tx)
tx.root.bucket = &bucket{}
*tx.root.bucket = tx.meta.root
// Increment the transaction id and add a page cache for writable transactions.
if tx.writable {
tx.pages = make(map[pgid]*page)
tx.meta.txid += txid(1)
}
}
// ID returns the transaction id.
func (tx *Tx) ID() int {
return int(tx.meta.txid)
}
// DB returns a reference to the database that created the transaction.
func (tx *Tx) DB() *DB {
return tx.db
}
// Size returns current database size in bytes as seen by this transaction.
func (tx *Tx) Size() int64 {
return int64(tx.meta.pgid) * int64(tx.db.pageSize)
}
// Writable returns whether the transaction can perform write operations.
func (tx *Tx) Writable() bool {
return tx.writable
}
// Cursor creates a cursor associated with the root bucket.
// All items in the cursor will return a nil value because all root bucket keys point to buckets.
// The cursor is only valid as long as the transaction is open.
// Do not use a cursor after the transaction is closed.
func (tx *Tx) Cursor() *Cursor {
return tx.root.Cursor()
}
// Stats retrieves a copy of the current transaction statistics.
func (tx *Tx) Stats() TxStats {
return tx.stats
}
// Bucket retrieves a bucket by name.
// Returns nil if the bucket does not exist.
// The bucket instance is only valid for the lifetime of the transaction.
func (tx *Tx) Bucket(name []byte) *Bucket {
return tx.root.Bucket(name)
}
// CreateBucket creates a new bucket.
// Returns an error if the bucket already exists, if the bucket name is blank, or if the bucket name is too long.
// The bucket instance is only valid for the lifetime of the transaction.
func (tx *Tx) CreateBucket(name []byte) (*Bucket, error) {
return tx.root.CreateBucket(name)
}
// CreateBucketIfNotExists creates a new bucket if it doesn't already exist.
// Returns an error if the bucket name is blank, or if the bucket name is too long.
// The bucket instance is only valid for the lifetime of the transaction.
func (tx *Tx) CreateBucketIfNotExists(name []byte) (*Bucket, error) {
return tx.root.CreateBucketIfNotExists(name)
}
// DeleteBucket deletes a bucket.
// Returns an error if the bucket cannot be found or if the key represents a non-bucket value.
func (tx *Tx) DeleteBucket(name []byte) error {
return tx.root.DeleteBucket(name)
}
// ForEach executes a function for each bucket in the root.
// If the provided function returns an error then the iteration is stopped and
// the error is returned to the caller.
func (tx *Tx) ForEach(fn func(name []byte, b *Bucket) error) error {
return tx.root.ForEach(func(k, v []byte) error {
return fn(k, tx.root.Bucket(k))
})
}
// OnCommit adds a handler function to be executed after the transaction successfully commits.
func (tx *Tx) OnCommit(fn func()) {
tx.commitHandlers = append(tx.commitHandlers, fn)
}
// Commit writes all changes to disk and updates the meta page.
// Returns an error if a disk write error occurs, or if Commit is
// called on a read-only transaction.
func (tx *Tx) Commit() error {
_assert(!tx.managed, "managed tx commit not allowed")
if tx.db == nil {
return ErrTxClosed
} else if !tx.writable {
return ErrTxNotWritable
}
// TODO(benbjohnson): Use vectorized I/O to write out dirty pages.
// Rebalance nodes which have had deletions.
var startTime = time.Now()
tx.root.rebalance()
if tx.stats.Rebalance > 0 {
tx.stats.RebalanceTime += time.Since(startTime)
}
// spill data onto dirty pages.
startTime = time.Now()
if err := tx.root.spill(); err != nil {
tx.rollback()
return err
}
tx.stats.SpillTime += time.Since(startTime)
// Free the old root bucket.
tx.meta.root.root = tx.root.root
// Free the old freelist because commit writes out a fresh freelist.
if tx.meta.freelist != pgidNoFreelist {
tx.db.freelist.free(tx.meta.txid, tx.db.page(tx.meta.freelist))
}
if !tx.db.NoFreelistSync {
err := tx.commitFreelist()
if err != nil {
return err
}
} else {
tx.meta.freelist = pgidNoFreelist
}
// Write dirty pages to disk.
startTime = time.Now()
if err := tx.write(); err != nil {
tx.rollback()
return err
}
// If strict mode is enabled then perform a consistency check.
// Only the first consistency error is reported in the panic.
if tx.db.StrictMode {
ch := tx.Check()
var errs []string
for {
err, ok := <-ch
if !ok {
break
}
errs = append(errs, err.Error())
}
if len(errs) > 0 {
panic("check fail: " + strings.Join(errs, "\n"))
}
}
// Write meta to disk.
if err := tx.writeMeta(); err != nil {
tx.rollback()
return err
}
tx.stats.WriteTime += time.Since(startTime)
// Finalize the transaction.
tx.close()
// Execute commit handlers now that the locks have been removed.
for _, fn := range tx.commitHandlers {
fn()
}
return nil
}
func (tx *Tx) commitFreelist() error {
// Allocate new pages for the new free list. This will overestimate
// the size of the freelist but not underestimate the size (which would be bad).
opgid := tx.meta.pgid
p, err := tx.allocate((tx.db.freelist.size() / tx.db.pageSize) + 1)
if err != nil {
tx.rollback()
return err
}
if err := tx.db.freelist.write(p); err != nil {
tx.rollback()
return err
}
tx.meta.freelist = p.id
// If the high water mark has moved up then attempt to grow the database.
if tx.meta.pgid > opgid {
if err := tx.db.grow(int(tx.meta.pgid+1) * tx.db.pageSize); err != nil {
tx.rollback()
return err
}
}
return nil
}
// Rollback closes the transaction and ignores all previous updates. Read-only
// transactions must be rolled back and not committed.
func (tx *Tx) Rollback() error {
_assert(!tx.managed, "managed tx rollback not allowed")
if tx.db == nil {
return ErrTxClosed
}
tx.rollback()
return nil
}
func (tx *Tx) rollback() {
if tx.db == nil {
return
}
if tx.writable {
tx.db.freelist.rollback(tx.meta.txid)
tx.db.freelist.reload(tx.db.page(tx.db.meta().freelist))
}
tx.close()
}
func (tx *Tx) close() {
if tx.db == nil {
return
}
if tx.writable {
// Grab freelist stats.
var freelistFreeN = tx.db.freelist.free_count()
var freelistPendingN = tx.db.freelist.pending_count()
var freelistAlloc = tx.db.freelist.size()
// Remove transaction ref & writer lock.
tx.db.rwtx = nil
tx.db.rwlock.Unlock()
// Merge statistics.
tx.db.statlock.Lock()
tx.db.stats.FreePageN = freelistFreeN
tx.db.stats.PendingPageN = freelistPendingN
tx.db.stats.FreeAlloc = (freelistFreeN + freelistPendingN) * tx.db.pageSize
tx.db.stats.FreelistInuse = freelistAlloc
tx.db.stats.TxStats.add(&tx.stats)
tx.db.statlock.Unlock()
} else {
tx.db.removeTx(tx)
}
// Clear all references.
tx.db = nil
tx.meta = nil
tx.root = Bucket{tx: tx}
tx.pages = nil
}
// Copy writes the entire database to a writer.
// This function exists for backwards compatibility. Use WriteTo() instead.
func (tx *Tx) Copy(w io.Writer) error {
_, err := tx.WriteTo(w)
return err
}
// WriteTo writes the entire database to a writer.
// If err == nil then exactly tx.Size() bytes will be written into the writer.
func (tx *Tx) WriteTo(w io.Writer) (n int64, err error) {
// Attempt to open reader with WriteFlag
f, err := os.OpenFile(tx.db.path, os.O_RDONLY|tx.WriteFlag, 0)
if err != nil {
return 0, err
}
defer func() {
if cerr := f.Close(); err == nil {
err = cerr
}
}()
// Generate a meta page. We use the same page data for both meta pages.
buf := make([]byte, tx.db.pageSize)
page := (*page)(unsafe.Pointer(&buf[0]))
page.flags = metaPageFlag
*page.meta() = *tx.meta
// Write meta 0.
page.id = 0
page.meta().checksum = page.meta().sum64()
nn, err := w.Write(buf)
n += int64(nn)
if err != nil {
return n, fmt.Errorf("meta 0 copy: %s", err)
}
// Write meta 1 with a lower transaction id.
page.id = 1
page.meta().txid -= 1
page.meta().checksum = page.meta().sum64()
nn, err = w.Write(buf)
n += int64(nn)
if err != nil {
return n, fmt.Errorf("meta 1 copy: %s", err)
}
// Move past the meta pages in the file.
if _, err := f.Seek(int64(tx.db.pageSize*2), io.SeekStart); err != nil {
return n, fmt.Errorf("seek: %s", err)
}
// Copy data pages.
wn, err := io.CopyN(w, f, tx.Size()-int64(tx.db.pageSize*2))
n += wn
if err != nil {
return n, err
}
return n, nil
}
// CopyFile copies the entire database to file at the given path.
// A reader transaction is maintained during the copy so it is safe to continue
// using the database while a copy is in progress.
func (tx *Tx) CopyFile(path string, mode os.FileMode) error {
f, err := os.OpenFile(path, os.O_RDWR|os.O_CREATE|os.O_TRUNC, mode)
if err != nil {
return err
}
err = tx.Copy(f)
if err != nil {
_ = f.Close()
return err
}
return f.Close()
}
// Check performs several consistency checks on the database for this transaction.
// An error is returned if any inconsistency is found.
//
// It can be safely run concurrently on a writable transaction. However, this
// incurs a high cost for large databases and databases with a lot of subbuckets
// because of caching. This overhead can be removed if running on a read-only
// transaction, however, it is not safe to execute other writer transactions at
// the same time.
func (tx *Tx) Check() <-chan error {
ch := make(chan error)
go tx.check(ch)
return ch
}
func (tx *Tx) check(ch chan error) {
// Force loading free list if opened in ReadOnly mode.
tx.db.loadFreelist()
// Check if any pages are double freed.
freed := make(map[pgid]bool)
all := make([]pgid, tx.db.freelist.count())
tx.db.freelist.copyall(all)
for _, id := range all {
if freed[id] {
ch <- fmt.Errorf("page %d: already freed", id)
}
freed[id] = true
}
// Track every reachable page.
reachable := make(map[pgid]*page)
reachable[0] = tx.page(0) // meta0
reachable[1] = tx.page(1) // meta1
if tx.meta.freelist != pgidNoFreelist {
for i := uint32(0); i <= tx.page(tx.meta.freelist).overflow; i++ {
reachable[tx.meta.freelist+pgid(i)] = tx.page(tx.meta.freelist)
}
}
// Recursively check buckets.
tx.checkBucket(&tx.root, reachable, freed, ch)
// Ensure all pages below high water mark are either reachable or freed.
for i := pgid(0); i < tx.meta.pgid; i++ {
_, isReachable := reachable[i]
if !isReachable && !freed[i] {
ch <- fmt.Errorf("page %d: unreachable unfreed", int(i))
}
}
// Close the channel to signal completion.
close(ch)
}
func (tx *Tx) checkBucket(b *Bucket, reachable map[pgid]*page, freed map[pgid]bool, ch chan error) {
// Ignore inline buckets.
if b.root == 0 {
return
}
// Check every page used by this bucket.
b.tx.forEachPage(b.root, 0, func(p *page, _ int) {
if p.id > tx.meta.pgid {
ch <- fmt.Errorf("page %d: out of bounds: %d", int(p.id), int(b.tx.meta.pgid))
}
// Ensure each page is only referenced once.
for i := pgid(0); i <= pgid(p.overflow); i++ {
var id = p.id + i
if _, ok := reachable[id]; ok {
ch <- fmt.Errorf("page %d: multiple references", int(id))
}
reachable[id] = p
}
// We should only encounter un-freed leaf and branch pages.
if freed[p.id] {
ch <- fmt.Errorf("page %d: reachable freed", int(p.id))
} else if (p.flags&branchPageFlag) == 0 && (p.flags&leafPageFlag) == 0 {
ch <- fmt.Errorf("page %d: invalid type: %s", int(p.id), p.typ())
}
})
// Check each bucket within this bucket.
_ = b.ForEach(func(k, v []byte) error {
if child := b.Bucket(k); child != nil {
tx.checkBucket(child, reachable, freed, ch)
}
return nil
})
}
// allocate returns a contiguous block of memory starting at a given page.
func (tx *Tx) allocate(count int) (*page, error) {
p, err := tx.db.allocate(tx.meta.txid, count)
if err != nil {
return nil, err
}
// Save to our page cache.
tx.pages[p.id] = p
// Update statistics.
tx.stats.PageCount += count
tx.stats.PageAlloc += count * tx.db.pageSize
return p, nil
}
// write writes any dirty pages to disk.
func (tx *Tx) write() error {
// Sort pages by id.
pages := make(pages, 0, len(tx.pages))
for _, p := range tx.pages {
pages = append(pages, p)
}
// Clear out page cache early.
tx.pages = make(map[pgid]*page)
sort.Sort(pages)
// Write pages to disk in order.
for _, p := range pages {
size := (int(p.overflow) + 1) * tx.db.pageSize
offset := int64(p.id) * int64(tx.db.pageSize)
// Write out page in "max allocation" sized chunks.
ptr := (*[maxAllocSize]byte)(unsafe.Pointer(p))
for {
// Limit our write to our max allocation size.
sz := size
if sz > maxAllocSize-1 {
sz = maxAllocSize - 1
}
// Write chunk to disk.
buf := ptr[:sz]
if _, err := tx.db.ops.writeAt(buf, offset); err != nil {
return err
}
// Update statistics.
tx.stats.Write++
// Exit inner for loop if we've written all the chunks.
size -= sz
if size == 0 {
break
}
// Otherwise move offset forward and move pointer to next chunk.
offset += int64(sz)
ptr = (*[maxAllocSize]byte)(unsafe.Pointer(&ptr[sz]))
}
}
// Ignore file sync if flag is set on DB.
if !tx.db.NoSync || IgnoreNoSync {
if err := fdatasync(tx.db); err != nil {
return err
}
}
// Put small pages back to page pool.
for _, p := range pages {
// Ignore page sizes over 1 page.
// These are allocated using make() instead of the page pool.
if int(p.overflow) != 0 {
continue
}
buf := (*[maxAllocSize]byte)(unsafe.Pointer(p))[:tx.db.pageSize]
// See https://go.googlesource.com/go/+/f03c9202c43e0abb130669852082117ca50aa9b1
for i := range buf {
buf[i] = 0
}
tx.db.pagePool.Put(buf)
}
return nil
}
// writeMeta writes the meta to the disk.
func (tx *Tx) writeMeta() error {
// Create a temporary buffer for the meta page.
buf := make([]byte, tx.db.pageSize)
p := tx.db.pageInBuffer(buf, 0)
tx.meta.write(p)
// Write the meta page to file.
if _, err := tx.db.ops.writeAt(buf, int64(p.id)*int64(tx.db.pageSize)); err != nil {
return err
}
if !tx.db.NoSync || IgnoreNoSync {
if err := fdatasync(tx.db); err != nil {
return err
}
}
// Update statistics.
tx.stats.Write++
return nil
}
// page returns a reference to the page with a given id.
// If page has been written to then a temporary buffered page is returned.
func (tx *Tx) page(id pgid) *page {
// Check the dirty pages first.
if tx.pages != nil {
if p, ok := tx.pages[id]; ok {
return p
}
}
// Otherwise return directly from the mmap.
return tx.db.page(id)
}
// forEachPage iterates over every page within a given page and executes a function.
func (tx *Tx) forEachPage(pgid pgid, depth int, fn func(*page, int)) {
p := tx.page(pgid)
// Execute function.
fn(p, depth)
// Recursively loop over children.
if (p.flags & branchPageFlag) != 0 {
for i := 0; i < int(p.count); i++ {
elem := p.branchPageElement(uint16(i))
tx.forEachPage(elem.pgid, depth+1, fn)
}
}
}
// Page returns page information for a given page number.
// This is only safe for concurrent use when used by a writable transaction.
func (tx *Tx) Page(id int) (*PageInfo, error) {
if tx.db == nil {
return nil, ErrTxClosed
} else if pgid(id) >= tx.meta.pgid {
return nil, nil
}
// Build the page info.
p := tx.db.page(pgid(id))
info := &PageInfo{
ID: id,
Count: int(p.count),
OverflowCount: int(p.overflow),
}
// Determine the type (or if it's free).
if tx.db.freelist.freed(pgid(id)) {
info.Type = "free"
} else {
info.Type = p.typ()
}
return info, nil
}
// TxStats represents statistics about the actions performed by the transaction.
type TxStats struct {
// Page statistics.
PageCount int // number of page allocations
PageAlloc int // total bytes allocated
// Cursor statistics.
CursorCount int // number of cursors created
// Node statistics
NodeCount int // number of node allocations
NodeDeref int // number of node dereferences
// Rebalance statistics.
Rebalance int // number of node rebalances
RebalanceTime time.Duration // total time spent rebalancing
// Split/Spill statistics.
Split int // number of nodes split
Spill int // number of nodes spilled
SpillTime time.Duration // total time spent spilling
// Write statistics.
Write int // number of writes performed
WriteTime time.Duration // total time spent writing to disk
}
func (s *TxStats) add(other *TxStats) {
s.PageCount += other.PageCount
s.PageAlloc += other.PageAlloc
s.CursorCount += other.CursorCount
s.NodeCount += other.NodeCount
s.NodeDeref += other.NodeDeref
s.Rebalance += other.Rebalance
s.RebalanceTime += other.RebalanceTime
s.Split += other.Split
s.Spill += other.Spill
s.SpillTime += other.SpillTime
s.Write += other.Write
s.WriteTime += other.WriteTime
}
// Sub calculates and returns the difference between two sets of transaction stats.
// This is useful when obtaining stats at two different points and time and
// you need the performance counters that occurred within that time span.
func (s *TxStats) Sub(other *TxStats) TxStats {
var diff TxStats
diff.PageCount = s.PageCount - other.PageCount
diff.PageAlloc = s.PageAlloc - other.PageAlloc
diff.CursorCount = s.CursorCount - other.CursorCount
diff.NodeCount = s.NodeCount - other.NodeCount
diff.NodeDeref = s.NodeDeref - other.NodeDeref
diff.Rebalance = s.Rebalance - other.Rebalance
diff.RebalanceTime = s.RebalanceTime - other.RebalanceTime
diff.Split = s.Split - other.Split
diff.Spill = s.Spill - other.Spill
diff.SpillTime = s.SpillTime - other.SpillTime
diff.Write = s.Write - other.Write
diff.WriteTime = s.WriteTime - other.WriteTime
return diff
}

1
cmd/vendor/github.com/coreos/etcd generated vendored
View File

@@ -1 +0,0 @@
../../../../

View File

@@ -1,63 +0,0 @@
// Copyright 2014 Docker, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Code forked from Docker project
package daemon
import (
"net"
"os"
)
// SdNotify sends a message to the init daemon. It is common to ignore the error.
// If `unsetEnvironment` is true, the environment variable `NOTIFY_SOCKET`
// will be unconditionally unset.
//
// It returns one of the following:
// (false, nil) - notification not supported (i.e. NOTIFY_SOCKET is unset)
// (false, err) - notification supported, but failure happened (e.g. error connecting to NOTIFY_SOCKET or while sending data)
// (true, nil) - notification supported, data has been sent
func SdNotify(unsetEnvironment bool, state string) (sent bool, err error) {
socketAddr := &net.UnixAddr{
Name: os.Getenv("NOTIFY_SOCKET"),
Net: "unixgram",
}
// NOTIFY_SOCKET not set
if socketAddr.Name == "" {
return false, nil
}
if unsetEnvironment {
err = os.Unsetenv("NOTIFY_SOCKET")
}
if err != nil {
return false, err
}
conn, err := net.DialUnix(socketAddr.Net, nil, socketAddr)
// Error connecting to NOTIFY_SOCKET
if err != nil {
return false, err
}
defer conn.Close()
_, err = conn.Write([]byte(state))
// Error sending the message
if err != nil {
return false, err
}
return true, nil
}

View File

@@ -1,72 +0,0 @@
// Copyright 2016 CoreOS, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package daemon
import (
"fmt"
"os"
"strconv"
"time"
)
// SdWatchdogEnabled return watchdog information for a service.
// Process should send daemon.SdNotify("WATCHDOG=1") every time / 2.
// If `unsetEnvironment` is true, the environment variables `WATCHDOG_USEC`
// and `WATCHDOG_PID` will be unconditionally unset.
//
// It returns one of the following:
// (0, nil) - watchdog isn't enabled or we aren't the watched PID.
// (0, err) - an error happened (e.g. error converting time).
// (time, nil) - watchdog is enabled and we can send ping.
// time is delay before inactive service will be killed.
func SdWatchdogEnabled(unsetEnvironment bool) (time.Duration, error) {
wusec := os.Getenv("WATCHDOG_USEC")
wpid := os.Getenv("WATCHDOG_PID")
if unsetEnvironment {
wusecErr := os.Unsetenv("WATCHDOG_USEC")
wpidErr := os.Unsetenv("WATCHDOG_PID")
if wusecErr != nil {
return 0, wusecErr
}
if wpidErr != nil {
return 0, wpidErr
}
}
if wusec == "" {
return 0, nil
}
s, err := strconv.Atoi(wusec)
if err != nil {
return 0, fmt.Errorf("error converting WATCHDOG_USEC: %s", err)
}
if s <= 0 {
return 0, fmt.Errorf("error WATCHDOG_USEC must be a positive number")
}
interval := time.Duration(s) * time.Microsecond
if wpid == "" {
return interval, nil
}
p, err := strconv.Atoi(wpid)
if err != nil {
return 0, fmt.Errorf("error converting WATCHDOG_PID: %s", err)
}
if os.Getpid() != p {
return 0, nil
}
return interval, nil
}

View File

@@ -1,179 +0,0 @@
// Copyright 2015 CoreOS, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Package journal provides write bindings to the local systemd journal.
// It is implemented in pure Go and connects to the journal directly over its
// unix socket.
//
// To read from the journal, see the "sdjournal" package, which wraps the
// sd-journal a C API.
//
// http://www.freedesktop.org/software/systemd/man/systemd-journald.service.html
package journal
import (
"bytes"
"encoding/binary"
"errors"
"fmt"
"io"
"io/ioutil"
"net"
"os"
"strconv"
"strings"
"syscall"
)
// Priority of a journal message
type Priority int
const (
PriEmerg Priority = iota
PriAlert
PriCrit
PriErr
PriWarning
PriNotice
PriInfo
PriDebug
)
var conn net.Conn
func init() {
var err error
conn, err = net.Dial("unixgram", "/run/systemd/journal/socket")
if err != nil {
conn = nil
}
}
// Enabled returns true if the local systemd journal is available for logging
func Enabled() bool {
return conn != nil
}
// Send a message to the local systemd journal. vars is a map of journald
// fields to values. Fields must be composed of uppercase letters, numbers,
// and underscores, but must not start with an underscore. Within these
// restrictions, any arbitrary field name may be used. Some names have special
// significance: see the journalctl documentation
// (http://www.freedesktop.org/software/systemd/man/systemd.journal-fields.html)
// for more details. vars may be nil.
func Send(message string, priority Priority, vars map[string]string) error {
if conn == nil {
return journalError("could not connect to journald socket")
}
data := new(bytes.Buffer)
appendVariable(data, "PRIORITY", strconv.Itoa(int(priority)))
appendVariable(data, "MESSAGE", message)
for k, v := range vars {
appendVariable(data, k, v)
}
_, err := io.Copy(conn, data)
if err != nil && isSocketSpaceError(err) {
file, err := tempFd()
if err != nil {
return journalError(err.Error())
}
defer file.Close()
_, err = io.Copy(file, data)
if err != nil {
return journalError(err.Error())
}
rights := syscall.UnixRights(int(file.Fd()))
/* this connection should always be a UnixConn, but better safe than sorry */
unixConn, ok := conn.(*net.UnixConn)
if !ok {
return journalError("can't send file through non-Unix connection")
}
unixConn.WriteMsgUnix([]byte{}, rights, nil)
} else if err != nil {
return journalError(err.Error())
}
return nil
}
// Print prints a message to the local systemd journal using Send().
func Print(priority Priority, format string, a ...interface{}) error {
return Send(fmt.Sprintf(format, a...), priority, nil)
}
func appendVariable(w io.Writer, name, value string) {
if !validVarName(name) {
journalError("variable name contains invalid character, ignoring")
}
if strings.ContainsRune(value, '\n') {
/* When the value contains a newline, we write:
* - the variable name, followed by a newline
* - the size (in 64bit little endian format)
* - the data, followed by a newline
*/
fmt.Fprintln(w, name)
binary.Write(w, binary.LittleEndian, uint64(len(value)))
fmt.Fprintln(w, value)
} else {
/* just write the variable and value all on one line */
fmt.Fprintf(w, "%s=%s\n", name, value)
}
}
func validVarName(name string) bool {
/* The variable name must be in uppercase and consist only of characters,
* numbers and underscores, and may not begin with an underscore. (from the docs)
*/
valid := name[0] != '_'
for _, c := range name {
valid = valid && ('A' <= c && c <= 'Z') || ('0' <= c && c <= '9') || c == '_'
}
return valid
}
func isSocketSpaceError(err error) bool {
opErr, ok := err.(*net.OpError)
if !ok {
return false
}
sysErr, ok := opErr.Err.(syscall.Errno)
if !ok {
return false
}
return sysErr == syscall.EMSGSIZE || sysErr == syscall.ENOBUFS
}
func tempFd() (*os.File, error) {
file, err := ioutil.TempFile("/dev/shm/", "journal.XXXXX")
if err != nil {
return nil, err
}
syscall.Unlink(file.Name())
if err != nil {
return nil, err
}
return file, nil
}
func journalError(s string) error {
s = "journal error: " + s
fmt.Fprintln(os.Stderr, s)
return errors.New(s)
}

View File

@@ -1,240 +0,0 @@
// Copyright 2015 CoreOS, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package capnslog
import (
"errors"
"strings"
"sync"
)
// LogLevel is the set of all log levels.
type LogLevel int8
const (
// CRITICAL is the lowest log level; only errors which will end the program will be propagated.
CRITICAL LogLevel = iota - 1
// ERROR is for errors that are not fatal but lead to troubling behavior.
ERROR
// WARNING is for errors which are not fatal and not errors, but are unusual. Often sourced from misconfigurations.
WARNING
// NOTICE is for normal but significant conditions.
NOTICE
// INFO is a log level for common, everyday log updates.
INFO
// DEBUG is the default hidden level for more verbose updates about internal processes.
DEBUG
// TRACE is for (potentially) call by call tracing of programs.
TRACE
)
// Char returns a single-character representation of the log level.
func (l LogLevel) Char() string {
switch l {
case CRITICAL:
return "C"
case ERROR:
return "E"
case WARNING:
return "W"
case NOTICE:
return "N"
case INFO:
return "I"
case DEBUG:
return "D"
case TRACE:
return "T"
default:
panic("Unhandled loglevel")
}
}
// String returns a multi-character representation of the log level.
func (l LogLevel) String() string {
switch l {
case CRITICAL:
return "CRITICAL"
case ERROR:
return "ERROR"
case WARNING:
return "WARNING"
case NOTICE:
return "NOTICE"
case INFO:
return "INFO"
case DEBUG:
return "DEBUG"
case TRACE:
return "TRACE"
default:
panic("Unhandled loglevel")
}
}
// Update using the given string value. Fulfills the flag.Value interface.
func (l *LogLevel) Set(s string) error {
value, err := ParseLevel(s)
if err != nil {
return err
}
*l = value
return nil
}
// ParseLevel translates some potential loglevel strings into their corresponding levels.
func ParseLevel(s string) (LogLevel, error) {
switch s {
case "CRITICAL", "C":
return CRITICAL, nil
case "ERROR", "0", "E":
return ERROR, nil
case "WARNING", "1", "W":
return WARNING, nil
case "NOTICE", "2", "N":
return NOTICE, nil
case "INFO", "3", "I":
return INFO, nil
case "DEBUG", "4", "D":
return DEBUG, nil
case "TRACE", "5", "T":
return TRACE, nil
}
return CRITICAL, errors.New("couldn't parse log level " + s)
}
type RepoLogger map[string]*PackageLogger
type loggerStruct struct {
sync.Mutex
repoMap map[string]RepoLogger
formatter Formatter
}
// logger is the global logger
var logger = new(loggerStruct)
// SetGlobalLogLevel sets the log level for all packages in all repositories
// registered with capnslog.
func SetGlobalLogLevel(l LogLevel) {
logger.Lock()
defer logger.Unlock()
for _, r := range logger.repoMap {
r.setRepoLogLevelInternal(l)
}
}
// GetRepoLogger may return the handle to the repository's set of packages' loggers.
func GetRepoLogger(repo string) (RepoLogger, error) {
logger.Lock()
defer logger.Unlock()
r, ok := logger.repoMap[repo]
if !ok {
return nil, errors.New("no packages registered for repo " + repo)
}
return r, nil
}
// MustRepoLogger returns the handle to the repository's packages' loggers.
func MustRepoLogger(repo string) RepoLogger {
r, err := GetRepoLogger(repo)
if err != nil {
panic(err)
}
return r
}
// SetRepoLogLevel sets the log level for all packages in the repository.
func (r RepoLogger) SetRepoLogLevel(l LogLevel) {
logger.Lock()
defer logger.Unlock()
r.setRepoLogLevelInternal(l)
}
func (r RepoLogger) setRepoLogLevelInternal(l LogLevel) {
for _, v := range r {
v.level = l
}
}
// ParseLogLevelConfig parses a comma-separated string of "package=loglevel", in
// order, and returns a map of the results, for use in SetLogLevel.
func (r RepoLogger) ParseLogLevelConfig(conf string) (map[string]LogLevel, error) {
setlist := strings.Split(conf, ",")
out := make(map[string]LogLevel)
for _, setstring := range setlist {
setting := strings.Split(setstring, "=")
if len(setting) != 2 {
return nil, errors.New("oddly structured `pkg=level` option: " + setstring)
}
l, err := ParseLevel(setting[1])
if err != nil {
return nil, err
}
out[setting[0]] = l
}
return out, nil
}
// SetLogLevel takes a map of package names within a repository to their desired
// loglevel, and sets the levels appropriately. Unknown packages are ignored.
// "*" is a special package name that corresponds to all packages, and will be
// processed first.
func (r RepoLogger) SetLogLevel(m map[string]LogLevel) {
logger.Lock()
defer logger.Unlock()
if l, ok := m["*"]; ok {
r.setRepoLogLevelInternal(l)
}
for k, v := range m {
l, ok := r[k]
if !ok {
continue
}
l.level = v
}
}
// SetFormatter sets the formatting function for all logs.
func SetFormatter(f Formatter) {
logger.Lock()
defer logger.Unlock()
logger.formatter = f
}
// NewPackageLogger creates a package logger object.
// This should be defined as a global var in your package, referencing your repo.
func NewPackageLogger(repo string, pkg string) (p *PackageLogger) {
logger.Lock()
defer logger.Unlock()
if logger.repoMap == nil {
logger.repoMap = make(map[string]RepoLogger)
}
r, rok := logger.repoMap[repo]
if !rok {
logger.repoMap[repo] = make(RepoLogger)
r = logger.repoMap[repo]
}
p, pok := r[pkg]
if !pok {
r[pkg] = &PackageLogger{
pkg: pkg,
level: INFO,
}
p = r[pkg]
}
return
}

View File

@@ -1,177 +0,0 @@
// Copyright 2015 CoreOS, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package capnslog
import (
"fmt"
"os"
)
type PackageLogger struct {
pkg string
level LogLevel
}
const calldepth = 2
func (p *PackageLogger) internalLog(depth int, inLevel LogLevel, entries ...interface{}) {
logger.Lock()
defer logger.Unlock()
if inLevel != CRITICAL && p.level < inLevel {
return
}
if logger.formatter != nil {
logger.formatter.Format(p.pkg, inLevel, depth+1, entries...)
}
}
func (p *PackageLogger) LevelAt(l LogLevel) bool {
logger.Lock()
defer logger.Unlock()
return p.level >= l
}
// Log a formatted string at any level between ERROR and TRACE
func (p *PackageLogger) Logf(l LogLevel, format string, args ...interface{}) {
p.internalLog(calldepth, l, fmt.Sprintf(format, args...))
}
// Log a message at any level between ERROR and TRACE
func (p *PackageLogger) Log(l LogLevel, args ...interface{}) {
p.internalLog(calldepth, l, fmt.Sprint(args...))
}
// log stdlib compatibility
func (p *PackageLogger) Println(args ...interface{}) {
p.internalLog(calldepth, INFO, fmt.Sprintln(args...))
}
func (p *PackageLogger) Printf(format string, args ...interface{}) {
p.Logf(INFO, format, args...)
}
func (p *PackageLogger) Print(args ...interface{}) {
p.internalLog(calldepth, INFO, fmt.Sprint(args...))
}
// Panic and fatal
func (p *PackageLogger) Panicf(format string, args ...interface{}) {
s := fmt.Sprintf(format, args...)
p.internalLog(calldepth, CRITICAL, s)
panic(s)
}
func (p *PackageLogger) Panic(args ...interface{}) {
s := fmt.Sprint(args...)
p.internalLog(calldepth, CRITICAL, s)
panic(s)
}
func (p *PackageLogger) Fatalf(format string, args ...interface{}) {
p.Logf(CRITICAL, format, args...)
os.Exit(1)
}
func (p *PackageLogger) Fatal(args ...interface{}) {
s := fmt.Sprint(args...)
p.internalLog(calldepth, CRITICAL, s)
os.Exit(1)
}
func (p *PackageLogger) Fatalln(args ...interface{}) {
s := fmt.Sprintln(args...)
p.internalLog(calldepth, CRITICAL, s)
os.Exit(1)
}
// Error Functions
func (p *PackageLogger) Errorf(format string, args ...interface{}) {
p.Logf(ERROR, format, args...)
}
func (p *PackageLogger) Error(entries ...interface{}) {
p.internalLog(calldepth, ERROR, entries...)
}
// Warning Functions
func (p *PackageLogger) Warningf(format string, args ...interface{}) {
p.Logf(WARNING, format, args...)
}
func (p *PackageLogger) Warning(entries ...interface{}) {
p.internalLog(calldepth, WARNING, entries...)
}
// Notice Functions
func (p *PackageLogger) Noticef(format string, args ...interface{}) {
p.Logf(NOTICE, format, args...)
}
func (p *PackageLogger) Notice(entries ...interface{}) {
p.internalLog(calldepth, NOTICE, entries...)
}
// Info Functions
func (p *PackageLogger) Infof(format string, args ...interface{}) {
p.Logf(INFO, format, args...)
}
func (p *PackageLogger) Info(entries ...interface{}) {
p.internalLog(calldepth, INFO, entries...)
}
// Debug Functions
func (p *PackageLogger) Debugf(format string, args ...interface{}) {
if p.level < DEBUG {
return
}
p.Logf(DEBUG, format, args...)
}
func (p *PackageLogger) Debug(entries ...interface{}) {
if p.level < DEBUG {
return
}
p.internalLog(calldepth, DEBUG, entries...)
}
// Trace Functions
func (p *PackageLogger) Tracef(format string, args ...interface{}) {
if p.level < TRACE {
return
}
p.Logf(TRACE, format, args...)
}
func (p *PackageLogger) Trace(entries ...interface{}) {
if p.level < TRACE {
return
}
p.internalLog(calldepth, TRACE, entries...)
}
func (p *PackageLogger) Flush() {
logger.Lock()
defer logger.Unlock()
logger.formatter.Flush()
}

View File

@@ -1,108 +0,0 @@
package humanize
import (
"bytes"
"math"
"math/big"
"strconv"
"strings"
)
// Comma produces a string form of the given number in base 10 with
// commas after every three orders of magnitude.
//
// e.g. Comma(834142) -> 834,142
func Comma(v int64) string {
sign := ""
// Min int64 can't be negated to a usable value, so it has to be special cased.
if v == math.MinInt64 {
return "-9,223,372,036,854,775,808"
}
if v < 0 {
sign = "-"
v = 0 - v
}
parts := []string{"", "", "", "", "", "", ""}
j := len(parts) - 1
for v > 999 {
parts[j] = strconv.FormatInt(v%1000, 10)
switch len(parts[j]) {
case 2:
parts[j] = "0" + parts[j]
case 1:
parts[j] = "00" + parts[j]
}
v = v / 1000
j--
}
parts[j] = strconv.Itoa(int(v))
return sign + strings.Join(parts[j:], ",")
}
// Commaf produces a string form of the given number in base 10 with
// commas after every three orders of magnitude.
//
// e.g. Commaf(834142.32) -> 834,142.32
func Commaf(v float64) string {
buf := &bytes.Buffer{}
if v < 0 {
buf.Write([]byte{'-'})
v = 0 - v
}
comma := []byte{','}
parts := strings.Split(strconv.FormatFloat(v, 'f', -1, 64), ".")
pos := 0
if len(parts[0])%3 != 0 {
pos += len(parts[0]) % 3
buf.WriteString(parts[0][:pos])
buf.Write(comma)
}
for ; pos < len(parts[0]); pos += 3 {
buf.WriteString(parts[0][pos : pos+3])
buf.Write(comma)
}
buf.Truncate(buf.Len() - 1)
if len(parts) > 1 {
buf.Write([]byte{'.'})
buf.WriteString(parts[1])
}
return buf.String()
}
// BigComma produces a string form of the given big.Int in base 10
// with commas after every three orders of magnitude.
func BigComma(b *big.Int) string {
sign := ""
if b.Sign() < 0 {
sign = "-"
b.Abs(b)
}
athousand := big.NewInt(1000)
c := (&big.Int{}).Set(b)
_, m := oom(c, athousand)
parts := make([]string, m+1)
j := len(parts) - 1
mod := &big.Int{}
for b.Cmp(athousand) >= 0 {
b.DivMod(b, athousand, mod)
parts[j] = strconv.FormatInt(mod.Int64(), 10)
switch len(parts[j]) {
case 2:
parts[j] = "0" + parts[j]
case 1:
parts[j] = "00" + parts[j]
}
j--
}
parts[j] = strconv.Itoa(int(b.Int64()))
return sign + strings.Join(parts[j:], ",")
}

Some files were not shown because too many files have changed in this diff Show More