Compare commits

..

486 Commits

Author SHA1 Message Date
Gyu-Ho Lee
bc9ddf2601 *: bump to v2.2.5 2016-02-01 11:59:45 -08:00
Gyu-Ho Lee
e7285f5626 *: updates for gRPC Godeps update 2016-02-01 11:28:39 -08:00
Gyu-Ho Lee
4613a7e61b Godeps: update gRPC 2016-02-01 11:28:29 -08:00
Xiang Li
31d1fa20bf Godeps: update boltdb 2016-01-28 10:48:22 -08:00
Hitoshi Mitake
259f89d59a etcdserver, auth: not cache a flag of auth status
This commit removes a flag that indicates auth is enabled or disabled
because it doesn't have an invalidation mechanism.

Fixes https://github.com/coreos/etcd/issues/3601 and https://github.com/coreos/etcd/issues/3964

Conflicts (Resolved):
	etcdserver/auth/auth.go
2016-01-27 15:36:16 -08:00
Gyu-Ho Lee
756d701f13 client: do not timeout when wait is true
Current V2 watch waits by encoding URL with wait=true.
When a client sets 'no-sync', it requests directly to
proxy and the proxy redirects it by cloning the request
object, which leads to cancel the original request when
it times out and the cloned request gets closed prematurely.

This fixes coreos#3894 by querying
the original client request in order to not use context timeout
when 'wait=true'.
2016-01-27 15:29:56 -08:00
Xiang Li
d14a673ace etcdmain: proxy should only lookup srv if there is no existing cluster file 2016-01-27 15:29:54 -08:00
Gyu-Ho Lee
4abb231505 *: bump to v2.2.4+git 2016-01-13 14:22:16 -08:00
Gyu-Ho Lee
bdee27b19e *: bump to v2.2.4 2016-01-13 14:07:39 -08:00
Anthony Romano
7f684641a3 Godeps: remove golang.org/x/net/netutil
Now using our own LimitListener to support KeepAlives.
2016-01-13 13:32:38 -08:00
Anthony Romano
d54cf26bed etcdmain: support keep alive listeners on limit listener connections
Fixes #4171
2016-01-13 13:32:26 -08:00
Xiang Li
d3e73aadab etcdmain: tls listener MUST be at the outer layer of all listeners
go HTTP library uses type assertion to determine if a connection
is a TLS connection. If we wrapper TLS Listener with any customized
Listener that can create customized Conn, HTTPs will be broken.

This commit fixes the issue.
2016-01-13 13:32:08 -08:00
Gyu-Ho Lee
e340928988 etcdctl: ignore value in updatedir command
Fixes coreos#4145.
client.KeysAPI ignores value if SetOptions.Dir is true.
2016-01-13 13:31:49 -08:00
Gyu-Ho Lee (etcd)
e6ffe22e16 *: bump to v2.2.3+git 2015-12-30 13:54:57 -08:00
Gyu-Ho Lee
05b564a394 *: bump to v2.2.3 2015-12-30 13:41:16 -08:00
Xiang Li
cb779b2305 etcdctl: fix syncWithPeerAPI by breaking the loop when there is no error 2015-12-30 11:24:27 -08:00
Xiang Li
22c3208fb3 etcdserver: always check if the data dir is writable before starting etcd 2015-12-30 10:22:52 -08:00
Xiang Li
e44372e430 etcdsever: avoid creating member dir before finishing validate bootstrap
This commit fixes the issue of creating member dir before validating
the configuration. When member dir exists, it indicates the local etcd
process is a valid etcd member. So we should only create member dir
after we finish configuration validation, joining validation or
discovery validation.
2015-12-30 10:19:51 -08:00
Xiang Li
05a90bc1e5 etcdmain: fix incomplete proxy config file
etcd might generate incomplete proxy config file after a power failure.
It is because we use ioutil.WriteFile. And iotuile.WriteFile does
not call Sync before closing the file.
2015-12-22 12:30:39 -08:00
Xiang Li
6751727809 etcdctl: support basic operations with etcd 0.4.
For CoreOS users, they will get a updated version of etcdctl without updating
the etcd server version. And the users cannot really control this behavior.
We do not want to suddenly break them without enough communication.

So we still want the most basic opeartions like get, set, watch of etcdctl2 work
with etcd 0.4. This patches solve the incompability issue.
2015-12-22 12:29:16 -08:00
Xiang Li
916106c3a2 client: support reset Endpoints.
ResetEndpoints is useful when the there is a scheduled cluster
changes or when manually manage the cluster without auto-sync
enabled.
2015-12-22 12:25:59 -08:00
Xiang Li
e0c7768f94 store: fix data race when modify event in watchHub.
The event got from watchHub should be considered as readonly.
To modify it, we first need to get a clone of it or there might
be a data race.
2015-12-14 14:08:39 -08:00
Andrei Korzhevskii
0fb2d5d4d3 client: fix goroutine leak in unreleased context
If headerTimeout is not zero then two context are created but only one is released.
cancelCtx in this case is never released which leads to goroutine leak inside it.
2015-12-14 13:58:43 -08:00
Xiang Li
fc61fc7c7a etcdctl: cluster health exit with non-zero when cluster is unhealthy 2015-12-14 13:58:10 -08:00
Yicheng Qin
09b81bad15 *: bump to v2.2.2+git 2015-11-19 14:24:59 -08:00
Yicheng Qin
b4bddf685b *: bump to v2.2.2 2015-11-19 14:24:35 -08:00
Xiang Li
af1c711270 auth: use canonical path for pre-defined guest role 2015-11-19 13:41:27 -08:00
Xiang Li
c269426be8 *: fix various data races detected by race detector
Conflicts:
	rafthttp/transport.go
2015-11-19 13:38:28 -08:00
Xiang Li
20b7df3c12 rafthttp: fix data races detected by go race detector
Conflicts:
	rafthttp/pipeline.go
2015-11-19 13:34:49 -08:00
Yicheng Qin
e342de3cc5 etcdmain: fix parsing discovery error
The discovery error is wrapped into a struct now, and cannot be compared
to predefined errors. Correct the comparison behavior to fix the
problem.
2015-11-19 13:26:50 -08:00
Yicheng Qin
26cc2111cd etcdmain: improve log when join discovery fails
Before this PR, the log is

```
2015/09/1 13:18:31 etcdmain: client: etcd cluster is unavailable or
misconfigured
```

It is quite hard for people to understand what happens.

Now we print out the exact reason for the failure, and explains the way
to handle it.
2015-11-19 13:26:42 -08:00
Jonathan Boulle
5d6457e658 godeps: bump coreos/pkg/capnslog
Update to catch coreos/pkg#43 which should fix SYSLOG_IDENTIFIER getting
set when etcd is logging to the journal.
2015-11-19 13:26:29 -08:00
Wojciech Tyczynski
53bc644168 client: regenerate code to unmarshal key response
Regenerate code for unmarshaling key response with a new version of
ugorji/go/codec.
2015-11-19 13:26:19 -08:00
Wojciech Tyczynski
ad3bb484ca Godeps: update ugorji/go/codec dependency
Update ugorji/go/codec dependency to the newer version.
2015-11-19 13:26:08 -08:00
mqliang
15f7b736e4 etcdctl:fix health check condition 2015-11-19 13:25:57 -08:00
Yicheng Qin
4dc835c718 *: bump to v2.2.1+git 2015-10-15 21:59:15 -07:00
Yicheng Qin
75f8282eef *: bump to v2.2.1 2015-10-15 21:31:51 -07:00
Gyu-Ho Lee
45c86af0eb etcdctl/command: mk command with PrevNoExist
This attempts to fix #3676. `PrevNoExist` checks if the key previously exists
and if so, it returns an error, which is how `mk` command is supposed to work.
The previous code ignores the previous key and overwrites with the later value.

/cc @yichengq
2015-10-15 15:26:52 -07:00
Yicheng Qin
71e5467807 Godeps: update prometheus dependency
prometheus updates its directory layout
(https://github.com/prometheus/client_golang#where-is-model-extraction-and-text)
and makes Godeps restore/save unable to work.

Remove all prometheus dependency manually and godep save again to fix
this problem.
2015-10-15 15:26:42 -07:00
Wojciech Tyczynski
0169fec873 client: regenerate code to unmarshal key response
Regenerate code for unmarshaling key response with a new version of
ugorji/go/codec
2015-10-15 15:24:21 -07:00
Wojciech Tyczynski
766023b1b0 Godeps: update ugorji/go/codec dependency
Update ugorji/go/codec dependency to the newer version (a bunch of fixed were made).
2015-10-15 15:24:12 -07:00
Yicheng Qin
ca9e63dde2 pkg/types: fix unwanted unescape in NewURLsMap
We use url.ParseQuery to parse names-to-urls string, but it has side
effect that unescape the string. If the initial-cluster string has ipv6
which contains `%25`, it will unescape it to `%` and make further url
parse failed.

Fix it by modifiying the parse process.

Go1.4 doesn't support literal IPv6 address w/ zone in
URI(https://github.com/golang/go/issues/6530), so we only enable tests
in Go1.5+.
2015-10-15 15:24:00 -07:00
Xiang Li
7659bbb1b2 etcdmain: print out error and suggestion for fixing notify issue 2015-10-15 15:23:49 -07:00
Xiang Li
f8b98d3925 etcdhttp: add Content-Type: application/json header to version handler 2015-10-15 15:23:34 -07:00
Yicheng Qin
9ee3ed777b etcdmain: exit after print out ErrDuplicateID
etcd should exit after printing log for unhandlable error.
2015-10-15 15:23:24 -07:00
Xiang Li
c9bd125490 etcdsever: mismatch error uses the same format as the corresponding flags 2015-10-15 15:22:52 -07:00
Guohua ouyang
ec49496111 proxy: improve log for retrying an unavailable endpoint
Fixes #3541

Signed-off-by: Guohua ouyang <guohuaouyang@gmail.com>
2015-10-15 15:22:40 -07:00
Xiang Li
baaefd18e2 etcdmain: better logging when user forget to set initial flags 2015-10-15 15:22:29 -07:00
Hitoshi Mitake
72c18eb7ba etcdctl: use a context with -total-timeout in simple commands
Like the commit 8ebc933111, this commit lets simple etcdctl commands
use a context with timeout value passed via -total-timeout.

This commit doesn't change complex commands like watch,
cluster-health, and import because it is not obvious that using the
context in the commands is good or not.
2015-10-15 15:22:13 -07:00
Hitoshi Mitake
2e87d71bc6 etcdctl: use user specified timeout value for entire command execution
etcdctl should be capable to use a user specified timeout value for
total command execution, not only per request timeout. This commit
adds a new option --total-timeout to the command. The value passed via
this option is used as a timeout value of entire command execution.

Fixes coreos#3517
2015-10-15 15:21:38 -07:00
Brandon Philips
217dccd617 raft: improve panic error message
Give a human being some insight into how we might have gotten to this
state based on feedback from #3504.
2015-10-15 15:20:12 -07:00
Yicheng Qin
3ceb5dd270 client: add Nodes to codecgen and regenerate 2015-10-15 15:19:58 -07:00
Yicheng Qin
49b77a59cf client: add Nodes type to faciliate sorting
This helps users to sort easily.
2015-10-15 15:19:52 -07:00
Yicheng Qin
db0511e28c *: bump to v2.2.0+git 2015-09-10 10:03:07 -07:00
Yicheng Qin
e4561dd8cf *: bump to v2.2.0 2015-09-10 10:02:45 -07:00
Yicheng Qin
6e7725cd51 Merge pull request #3478 from endocode/kayrus/typo_fix
doc: member id typo fixed
2015-09-10 00:11:26 -07:00
kayrus
37392ad223 doc: member id typo fixed 2015-09-10 08:47:45 +02:00
Xiang Li
9b032c6a00 Merge pull request #3473 from MrLawes/master
doc: fix bad url in using a directory TTL section
2015-09-09 18:57:09 -07:00
MrLawes
1c058e9706 doc: fix bad url in using a directory TTL section 2015-09-10 09:23:10 +08:00
Yicheng Qin
f3085d2ea4 Merge pull request #3459 from yichengq/release-doc
docs/dev: add release doc
2015-09-09 17:46:10 -07:00
Yicheng Qin
b70e6fc677 docs/dev: add release doc
It documents the standard way to release etcd today. Maintainer should
follow this doc to cut release, and update it in time to fit current
situation.
2015-09-09 16:42:31 -07:00
Yicheng Qin
c34cf04c27 Merge pull request #3448 from yichengq/release-script
scripts: add release.sh
2015-09-09 13:54:15 -07:00
Yicheng Qin
bdd8774169 Merge pull request #3204 from endocode/kayrus/recovery
Improved "disaster restore" doc, added "member update" command descri…
2015-09-09 12:23:51 -07:00
kayrus
19ad634673 doc: improved "disaster restore" doc, added "member update" command description 2015-09-09 20:07:31 +02:00
Yicheng Qin
7d4cd7c76a scripts: add release.sh
It could build all binaries and images for the given version.
2015-09-09 09:50:41 -07:00
Xiang Li
af0474f2e3 Merge pull request #3465 from raoofm/patch-1
etcdmain: Proxy doesnt specify - listening on http or https
2015-09-08 14:38:55 -07:00
Raoof Mohammed
2de1c36061 etcdmain: Proxy doesnt specify - listening on http or https
etcdmain: Proxy doesnt specify - listening on http or https

Fixes #3464
2015-09-08 17:19:23 -04:00
Yicheng Qin
ccdd10c757 Merge pull request #3463 from yichengq/update-roadmap
roadmap: remove 2.2 section
2015-09-08 13:55:50 -07:00
Yicheng Qin
c837f0526f roadmap: remove 2.2 section
We have finished all of them.
2015-09-08 13:43:39 -07:00
Xiang Li
d8e6e217fd Merge pull request #3461 from xiang90/doc
doc: remove one limitation in upgrade doc
2015-09-08 13:29:43 -07:00
Xiang Li
3689ea3071 doc: remove one limitation in upgrade doc 2015-09-08 13:28:23 -07:00
Xiang Li
a44da0b62a Merge pull request #3451 from raoofm/patch-1
discovery: log error only if both ssl and non-ssl srv lookups fail
2015-09-06 20:54:43 -07:00
Raoof Mohammed
9a2809f0b5 discovery: log error only if both ssl and non-ssl srv lookups fail
discovery: log error only if both ssl and non-ssl srv lookups fail
Earlier we were logging as soon as one of the lookups failed.

Fixes #3414
2015-09-06 23:44:19 -04:00
Yicheng Qin
184337568d scripts/build-docker: build docker in image-docker dir
The docker build command will use whatever directory contains the
Dockerfile as the build context (including all of its subdirectories).
And the <src> path of ADD must be inside the context of the build.
So change it to build in a specific directory for clean and fast.
2015-09-06 00:17:41 -07:00
Yicheng Qin
15d1db9bf8 scripts/build-aci: support BINARYDIR and BUILDDIR
This makes it more configurable, and is ready for overall release script.
2015-09-06 00:17:41 -07:00
Yicheng Qin
6b70fa72fe scripts: build-release -> build-binary
This makes the functionality of the script more clear, and always use
bash to run the script because it has bash-specific grammar.
2015-09-06 00:16:51 -07:00
Yicheng Qin
cf6cb82caa scripts/build-docker: stop creating scratch image
Scratch image has become docker's reserved image.
2015-09-06 00:16:08 -07:00
Yicheng Qin
a1b01c266a scripts/build-aci: fix the way to check executability
Or it may treat runnable command as unexecutable.
2015-09-06 00:15:31 -07:00
Xiang Li
b9646b5734 Merge pull request #3447 from xiang90/txn
etcdctlv3: fix txn command
2015-09-05 18:21:11 -07:00
Xiang Li
1532f7585b etcdctlv3: fix txn command 2015-09-05 16:08:15 -07:00
Xiang Li
dab0871acb Merge pull request #3446 from xiang90/v3
etcdserver: refactor v3demo do
2015-09-05 15:41:00 -07:00
Xiang Li
95d5556445 etcdserver: refactor v3demo do 2015-09-05 15:31:28 -07:00
Xiang Li
d5ab71a4e8 Merge pull request #3445 from xiang90/api_doc
doc: add monitoring section to admin doc
2015-09-05 08:27:11 -07:00
Xiang Li
13b3c64c10 doc: add monitoring section to admin doc 2015-09-05 08:25:35 -07:00
Xiang Li
51d0630a8e Merge pull request #3440 from yichengq/memory-bench
docs/benchmark: add 2.2.0-rc memory usage benchmark
2015-09-04 20:23:56 -07:00
Yicheng Qin
91b5b247e9 docs/benchmark: add 2.2.0-rc memory usage benchmark
It records the memory usage for different average value size, and
records the data size limitation.
2015-09-04 18:27:49 -07:00
Xiang Li
106d918dd5 Merge pull request #3444 from xiang90/doc
etcdctl: suggest endpoint over peer
2015-09-04 13:22:03 -07:00
Xiang Li
322aab133d etcdctl: suggest endpoint over peer 2015-09-04 13:16:33 -07:00
Xiang Li
9fa05ad8a0 Merge pull request #3443 from xiang90/test
test: now raft has no shadow issue
2015-09-04 11:31:44 -07:00
Xiang Li
39580479b5 Merge pull request #3442 from xiang90/b
etcdctl: prepare for health endpoint change
2015-09-04 11:30:44 -07:00
Xiang Li
a6e67a6dec test: now raft has no shadow issue
We can test raft pkg now!
2015-09-04 10:52:14 -07:00
Xiang Li
778f8d8fea Merge pull request #3434 from xiang90/index_revision
*: v3api index->revision
2015-09-04 10:48:59 -07:00
Xiang Li
3f18ded10a *: v3api index->revision 2015-09-04 10:41:20 -07:00
Yicheng Qin
5a5f15de39 Merge pull request #3438 from yichengq/storage-test
storage: add mock tests for store struct
2015-09-04 10:26:08 -07:00
Xiang Li
04539c6240 etcdctl: prepare for health endpoint change
We made a mistake on the health endpoint by returning a string "true".
We have to make the etcdctl works for the next version of etcd which
will correct the mistake on the server side.

It is too late to change the server side right now since we already
released a version of etcdctl that only understands "true".
2015-09-04 10:20:24 -07:00
Yicheng Qin
215f27c2f5 storage: add mock tests for store struct 2015-09-04 08:53:49 -07:00
Yicheng Qin
8ca76a789b Merge pull request #3439 from akolb1/godep_all_fixes
Godep: fixed missing dependencies
2015-09-03 22:20:48 -07:00
Alex Kolbasov
2782418923 Godep: fixed missing dependencies 2015-09-04 04:51:44 +00:00
Yicheng Qin
5ae2eb4731 storage: avoid one extra round of wait
It could exit early if it knows that there is no more keys.
2015-09-03 19:12:27 -07:00
Yicheng Qin
9175df7c71 storage: correct revision for range when deleteRange
to make it logically reasonable.
2015-09-03 19:12:27 -07:00
Yicheng Qin
797a4796d9 storage: remove check for DELETE type KeyValue
kvindex always returns kvs that exist at given revision, so there is no
need to check for whether the KeyValue range from backend is DELETE type.
2015-09-03 19:12:27 -07:00
Yicheng Qin
00e31f13a6 storage: remove unnecessary rev parameter 2015-09-03 19:12:27 -07:00
Xiang Li
2f2b084ab5 Merge pull request #3436 from xiang90/remove_consistent_token
*: replace consistent token with revision in v3 api
2015-09-03 17:16:07 -07:00
Xiang Li
254d641ff9 Merge pull request #3429 from xiang90/upgrade_doc
doc: add upgrade to 2.2 doc
2015-09-03 15:47:10 -07:00
Xiang Li
2ac9af4924 *: replace consistent token with revision in v3 api 2015-09-03 15:41:33 -07:00
Xiang Li
243fe519a9 Merge pull request #3435 from xiang90/gogoproto
*: update gogoproto
2015-09-03 15:35:48 -07:00
Xiang Li
ef7cf058a2 *: update gogoproto 2015-09-03 15:32:25 -07:00
Xiang Li
356aba7595 doc: add upgrade to 2.2 doc 2015-09-03 11:48:30 -07:00
Xiang Li
ae2b43b588 Merge pull request #3433 from tamird/proto-import-path
*: regenerate proto to use local import path
2015-09-03 10:52:37 -07:00
Tamir Duberstein
45390b9fb8 *: regenerate proto to use local import path
Using Go-style import paths in protos is not idiomatic. Normally, this
detail would be internal to etcd, but the path from which gogoproto
is imported affects downstream consumers (e.g. cockroachdb).

In cockroach, we want to avoid including `$GOPATH/src` in our protoc
include path for various reasons. This patch puts etcd on the same
convention, which allows this for cockroach.

More information: https://github.com/cockroachdb/cockroach/pull/2339#discussion_r38663417

This commit also regenerates all the protos, which seem to have
drifted a tiny bit.
2015-09-03 13:38:28 -04:00
Xiang Li
84d1527df6 Merge pull request #3432 from coreos/robszumski-patch-1
docs: insert whitespace
2015-09-03 09:56:57 -07:00
Rob Szumski
49e7e6eb9f docs: insert whitespace
Fixes the rendering of this page on https://coreos.com/etcd/docs/2.1.0/proxy.html
2015-09-03 09:50:07 -07:00
Yicheng Qin
1eaf169057 Merge pull request #3395 from yichengq/backend-test
storage/backend: add unit tests for backend and batchTx
2015-09-03 07:23:38 -07:00
Yicheng Qin
44fd734038 storage/backend: add unit tests for backend and batchTx 2015-09-02 16:57:13 -07:00
Yicheng Qin
16e9e4b3d5 Merge pull request #3412 from yichengq/etcdctl-sync
etcdctl: better logging for sync process
2015-09-02 16:49:00 -07:00
Yicheng Qin
8e040efed9 etcdctl: log more about sync process
Users don't even know that etcdctl is doing sync and fails on sync
process. So we add more logs for sync process.
2015-09-02 16:10:25 -07:00
Yicheng Qin
3a8db488ca Merge pull request #3415 from yichengq/better-err
etcdctl/command: print more details about ErrNoEndpoint
2015-09-02 10:11:45 -07:00
Yicheng Qin
41cc16481f Merge pull request #3418 from AdoHe/fix_build_script_error
build: fix build error on ubuntu
2015-09-01 22:44:23 -07:00
Tony
9665cda7c1 build: fix build error on ubuntu 2015-09-02 13:28:55 +08:00
Xiang Li
484a115813 Merge pull request #3424 from akolb1/bolt_solaris1
Godeps: boltdb dependency missing solaris support
2015-09-01 16:19:23 -07:00
Alex Kolbasov
ecbc44fb63 Godeps: boltdb dependency missing solaris support 2015-09-01 23:17:36 +00:00
Yicheng Qin
423e3bbbd8 etcdctl/cluster_health: provide better message for empty client urls
It skips sync when init client, and prints out unreachable messagen and
points to notice when checking health of etcd members one by one.
2015-09-01 14:42:19 -07:00
Yicheng Qin
aa0c8fea55 Merge pull request #3321 from yichengq/doc-tls-setup
docs/security: link cfssl example
2015-09-01 14:28:40 -07:00
Yicheng Qin
6caae58814 docs/security: recommend cfssl instead of etcd-ca
This provides a more general and stable way for users to set TLS cluster.
2015-09-01 14:07:26 -07:00
Yicheng Qin
d412eaa3a2 Merge pull request #3308 from yichengq/go-codec
Use ugorji codec for unmarshalling key responses in client
2015-09-01 14:04:38 -07:00
Xiang Li
53b8175d3f Merge pull request #3421 from xiang90/3411
etcdmain: proxy does not need to belong to the discovered cluster
2015-09-01 13:49:31 -07:00
Xiang Li
7957677cf2 etcdmain: proxy does not need to belong to the discovered cluster 2015-09-01 11:24:02 -07:00
Xiang Li
a94118893c Merge pull request #3413 from xiang90/snapshot_dir
*: support wal dir
2015-09-01 10:03:50 -07:00
Xiang Li
d94e712d91 *: support wal dir 2015-09-01 09:54:27 -07:00
Yicheng Qin
85b6c51a23 Merge pull request #3420 from yichengq/wait-more
storage: extend timeout to wait for put complete
2015-09-01 09:25:46 -07:00
Yicheng Qin
a21166c3aa storage: extend timeout to wait for put complete
travis is sometimes slow, and it could fail to complete the put in 10ms.
2015-09-01 09:03:03 -07:00
Yicheng Qin
8ac981e1ee Merge pull request #3416 from yichengq/get-cluster-timeout
etcdserver: add timeout param on getClusterFromRemotePeers
2015-09-01 09:00:19 -07:00
Yicheng Qin
f3bfcb9dee etcdserver: add timeout param on getClusterFromRemotePeers
It sets 10s timeout for public GetClusterFromRemotePeers.

This helps the following cases to work well in high latency scenario:

1. proxy sync members from the cluster
2. newly-joined member sync members from the cluster

Besides 10s request timeout, the request is also controlled by dial
timeout and read connection timeout.
2015-09-01 08:49:01 -07:00
Yicheng Qin
1fabc48968 Merge pull request #3404 from bdarnell/multinode-propose-panic
raft: A removed node can no longer be leader.
2015-08-31 20:06:34 -07:00
Ben Darnell
4f20e01f60 raft: Ignore proposals if not a current member.
Fixes another panic in MultiNode.Propose.
2015-08-31 20:31:14 -04:00
Yicheng Qin
c2caa4ae3b etcdctl/command: print more details about ErrNoEndpoint
This commit prints more details if getting ErrNoEndpoint when sync with
cluster. This helps users to know what happens.
2015-08-31 16:28:43 -07:00
Yicheng Qin
4b9b0cbcc1 storage: add newBackend and newBatchTx
This is for ease of testing.
2015-08-31 13:25:10 -07:00
Xiang Li
57b39aca4e Merge pull request #3403 from xiang90/doc
doc: add 0.4.9 to 2.2 migration guide
2015-08-31 11:28:25 -07:00
Xiang Li
3c1f80bdff Merge pull request #3401 from xiang90/more_metrics
more on storage metrics
2015-08-31 09:55:29 -07:00
Xiang Li
406bb6749e doc: add 0.4.9 to 2.2 migration guide 2015-08-31 09:55:12 -07:00
Xiang Li
bc71aab07a Merge pull request #3409 from xiang90/fix_force_new
etcdserver: ignore confChangeUpdateNode in getIDs
2015-08-31 09:44:10 -07:00
Xiang Li
1bcaa9f4a1 etcdserver: ignore confChangeUpdateNode in getIDs 2015-08-31 09:36:39 -07:00
Xiang Li
aaa7dfc14d Merge pull request #3407 from MSamman/fix-build-warning
build: fixed build warning
2015-08-31 07:47:23 -07:00
Mohammad Samman
dd4317db43 build: fixed build warning
to clear warning and ensure git sha linkage works in the future

Fixes #3406
2015-08-30 15:05:56 -07:00
Xiang Li
b9632e0f8d storage: register txnCounter 2015-08-28 15:17:16 -07:00
Xiang Li
dd443be41b storage: report total number of keys 2015-08-28 15:16:53 -07:00
Yicheng Qin
d2cb732c7b test: activate test on storage/backend 2015-08-28 13:52:31 -07:00
Yicheng Qin
054fab84ee storage/backend: remove startc var
This makes start logic cleaner.
2015-08-28 13:52:31 -07:00
Xiang Li
fca98c9071 Merge pull request #3398 from xiang90/storage_metrics
storage: add initial metrics for kv
2015-08-28 13:50:44 -07:00
Xiang Li
b5838edb93 storage: add initial metrics for kv 2015-08-28 13:41:42 -07:00
Xiang Li
6cbaaa715c Merge pull request #3396 from bdarnell/multinode-propose-panic
raft: Fix a nil-pointer panic in MultiNode.Propose.
2015-08-28 12:34:49 -07:00
Yicheng Qin
cba7c6a180 *: bump to v2.2.0-rc.0+git 2015-08-28 10:26:56 -07:00
Yicheng Qin
dc3e027288 *: bump to v2.2.0-rc.0 2015-08-28 10:26:32 -07:00
Yicheng Qin
b40e077047 Merge pull request #3388 from sckott/docfix-tuning
fix docs, change tuning link in api.md from section to file
2015-08-28 09:23:58 -07:00
Ben Darnell
05924b330a raft: Fix a nil-pointer panic in MultiNode.Propose. 2015-08-28 11:17:59 +02:00
Yicheng Qin
f04884f74d storage/backend: fix off-by-one error for pending var
Or it may commit until batchLimit + 1.
2015-08-27 22:51:32 -07:00
Yicheng Qin
7ed929fb3d storage/backend: fix limit doesn't effect in range 2015-08-27 22:51:32 -07:00
Yicheng Qin
37d9354aa2 Merge pull request #3394 from yichengq/bench-2.2
adjust file and README in docs/benchmark
2015-08-27 21:09:39 -07:00
Xiang Li
9d78d84270 Merge pull request #3390 from xiang90/ctl_peer
etcdctl: suggest endpoint over peers flag
2015-08-27 21:03:39 -07:00
Xiang Li
8d8033df55 etcdctl: suggest endpoint over peers flag 2015-08-27 18:52:17 -07:00
Yicheng Qin
753a079700 docs/benchmark: add benchmark result links in README 2015-08-27 17:08:49 -07:00
Yicheng Qin
425afa66ea docs/benchmarks: update bench version for more accuracy 2015-08-27 17:08:30 -07:00
Xiang Li
f68e4a1a5d Merge pull request #3392 from yichengq/bench-2.2
docs/benchmark: update etcd 2.2 bench
2015-08-27 16:58:04 -07:00
Yicheng Qin
605f0ce730 docs/benchmark: update etcd 2.2 bench
This benchmark is for etcd 2.2 rc after fixing several performance
downgrade bugs.
2015-08-27 16:52:55 -07:00
Scott Chamberlain
b0192118dd doc: change tuning link in api.md from section to file 2015-08-27 15:04:07 -07:00
Xiang Li
1124a06860 Merge pull request #3387 from yichengq/fix-quorum
doc: correct calculation of fault tolerance of an etcd cluster in adm…
2015-08-27 14:48:39 -07:00
Raoof Mohammed
bc2b8856d7 doc: correct calculation of fault tolerance of an etcd cluster in admin_guide.md
doc: correct calculation of fault tolerance of an etcd cluster in admin_guide.md
2015-08-27 14:30:12 -07:00
Yicheng Qin
df83af944b Merge pull request #3384 from yichengq/fix-shadow
test: use go vet shadow feature instead of go-nyet
2015-08-27 14:27:57 -07:00
Yicheng Qin
92cd24d5bd *: fix govet shadow check failure 2015-08-27 14:15:30 -07:00
Xiang Li
b2d33e6dcb Merge pull request #3382 from xiang90/env
pkg/flags: print out evn usage information
2015-08-27 13:36:55 -07:00
Yicheng Qin
ccdb850e1e test: use go vet shadow feature instead of go-nyet
Use official support instead of home-made one.
2015-08-27 13:29:12 -07:00
Yicheng Qin
4ac4648b5b Merge pull request #3383 from cognusion/fixes2
Test Fixes: Take 2
2015-08-27 13:22:19 -07:00
Matt Keller
327632014e cors: Removed new(?) header from test, resolving failure
"X-Content-Type-Options" was being autoadded, but none of the
test maps took it into account. I saw that "Content-Type" was
also being deleted, so I figured that was the best solution
for this as well.
2015-08-27 15:23:14 -04:00
Matt Keller
19a28c8efd storage: Fixed backend test
./backend_test.go:23: multiple-value batchTx.UnsafeRange() in single-value context
2015-08-27 15:20:29 -04:00
Matt Keller
32372e1d70 raft: Fixed a test misassumption
network_test.go:56: total = 59.22354ms, want > 50ms
59 is > 50, but the equation added 10 to the right side
2015-08-27 15:15:34 -04:00
Xiang Li
c8f5e03b75 pkg/flags: print out evn usage information 2015-08-27 12:08:31 -07:00
Xiang Li
25c87f13fd Merge pull request #3354 from mx2323/faq
add faq documentation
2015-08-26 16:36:04 -07:00
Michael Xu
8f3ea5ebed doc: add faq documentation 2015-08-26 16:34:52 -07:00
Yicheng Qin
59a5a7e309 Merge pull request #3368 from yichengq/storage-test
add unit tests for storage
2015-08-26 15:32:02 -07:00
Yicheng Qin
0d38c13990 storage: use temp path to handle test file 2015-08-26 15:01:41 -07:00
Yicheng Qin
2d01eb4e11 storage: add tests for kvstore_compaction 2015-08-26 15:01:13 -07:00
Yicheng Qin
f38778160d Merge pull request #3376 from yichengq/connection-down
etcdserver: specify request timeout error due to connection down
2015-08-26 13:09:30 -07:00
Yicheng Qin
0813139140 storage: add more tests for index 2015-08-26 12:53:30 -07:00
Yicheng Qin
3723f01b48 storage: add more unit tests for keyIndex 2015-08-26 12:53:30 -07:00
Yicheng Qin
ad8a291dc1 storage: return error when tombstone on new generation
It is not allowed to put tombstone on an empty generation.
2015-08-26 12:53:30 -07:00
Yicheng Qin
ffa87f9678 storage: fix the comment in generation.walk 2015-08-26 12:53:30 -07:00
Yicheng Qin
8f6bf029f8 etcdserver: specify request timeout error due to connection lost
It specifies request timeout error possibly caused by connection lost,
and print out better log for user to understand.

It handles two cases:
1. the leader cannot connect to majority of cluster.
2. the connection between follower and leader is down for a while,
and it losts proposals.

log format:
```
20:04:19 etcd3 | 2015-08-25 20:04:19.368126 E | etcdhttp: etcdserver:
request timed out, possibly due to connection lost
20:04:19 etcd3 | 2015-08-25 20:04:19.368227 E | etcdhttp: etcdserver:
request timed out, possibly due to connection lost
```
2015-08-26 12:38:37 -07:00
Yicheng Qin
76db9747f8 Merge pull request #3377 from yichengq/tls-info-string
pkg/transport: print ClientCertAuth in TLSInfo.String()
2015-08-25 22:45:10 -07:00
Yicheng Qin
45bb88069b Merge pull request #3378 from yichengq/set-late
etcdmain: check error before assigning peer transport
2015-08-25 22:38:36 -07:00
Yicheng Qin
58455a2ae4 etcdmain: check error before assigning peer transport
Or it may panic when new transport fails, e.g., TLS info is invalid.
2015-08-25 22:04:26 -07:00
Yicheng Qin
57e88465bf pkg/transport: print ClientCertAuth in TLSInfo.String()
It is good to print it in debug output:

```
21:56:12 etcd1 | 2015-08-25 21:56:12.162406 I | etcdmain: peerTLS: cert
= certs/etcd1.pem, key = certs/etcd1-key.pem, ca = , trusted-ca =
certs/ca.pem, client-cert-auth = true
```
2015-08-25 21:53:52 -07:00
Yicheng Qin
6250fed8a8 Merge pull request #3096 from philips/tls-info-debug
pkg/transport: include debug output for trusted-ca
2015-08-25 20:08:19 -07:00
Xiang Li
008f988f6b Merge pull request #3375 from xiang90/doc
doc: add evn variable name to configuration.md
2015-08-25 14:48:35 -07:00
Yicheng Qin
2b58da1699 Merge pull request #3374 from yichengq/gomaxprocs
etcdmain: change default GOMAXPROCS when compiling in go1.5
2015-08-25 14:48:00 -07:00
Xiang Li
35a0459cc8 doc: add evn variable name to configuration.md 2015-08-25 14:35:15 -07:00
Xiang Li
32ab3f6931 Merge pull request #3372 from xiang90/doc
improve clustering.md doc
2015-08-25 14:04:30 -07:00
Xiang Li
c30c85898e doc: add explanation for client urls 2015-08-25 13:46:27 -07:00
Yicheng Qin
2ac9a329ab etcdmain: stop setting GOMAXPROCS explicitly
We always want to use GOMAXPROCS() as the way go parses it. When in go1.4, we
want to expose GOMAXPROCS value, so we set GOMAXPROCS explicitly as the
way go 1.4 does and print it out.

But it becomes a problem when go 1.5 changes the way to set GOMAXPROCS.

Fix the problem by stop setting GOMAXPROCS and get its value directly.

Due to this change, it sets default GOMAXPROCS to the
number of CPUs available when compiling in go 1.5, which matches how go 1.5 works:
https://docs.google.com/document/d/1At2Ls5_fhJQ59kDK2DFVhFu3g5mATSXqqV5QrxinasI/edit

This is a behavior change in etcd 2.2.
2015-08-25 13:38:16 -07:00
Yicheng Qin
a4285ef5c9 Merge pull request #3367 from MSamman/master
etcdserver: handle malformed basic auth
2015-08-25 13:12:48 -07:00
Mohammad Samman
e2e002f94e etcdserver: handle malformed basic auth
return insufficient credentials if basic auth header is malformed

Fixes #3280
2015-08-25 12:37:24 -07:00
Xiang Li
7bd558b2e0 Merge pull request #3373 from ecnahc515/add_report_bugs_contributing
Contributing: Link to reporting bugs doc
2015-08-25 12:17:06 -07:00
Chance Zibolski
ad843341a9 Contributing: Link to reporting bugs doc 2015-08-25 12:15:03 -07:00
Xiang Li
f56c5455f3 doc: mention reconfiguration design in clustering.md 2015-08-25 11:22:08 -07:00
Xiang Li
986f354694 Merge pull request #3371 from xiang90/bolt
Godeps: update bolt dependency
2015-08-25 11:17:14 -07:00
Xiang Li
e8f40b0412 storage/backend: add commitAndStop
After the upgrade of boltdb, db.Close waits for all txn to finish.
CommitAndStop commits the current txn and stop creating new ones.
2015-08-25 10:57:25 -07:00
Xiang Li
8738a88fae Godeps: update bolt dependency 2015-08-25 10:39:29 -07:00
Yicheng Qin
2d06f6b371 Merge pull request #3362 from yichengq/rafthttp-cancel
rafthttp: always cancel in-flight request when stop streamReader
2015-08-25 09:26:46 -07:00
Yicheng Qin
61a75b3d48 rafthttp: always cancel in-flight request when pipeline.send
This fits the way for go1.5 to cancel request.
2015-08-25 09:07:49 -07:00
Yicheng Qin
27b9963959 client: always cancel in-flight request when do request
This fits the way for go1.5 to cancel request.
2015-08-25 09:04:58 -07:00
Yicheng Qin
ece39c9462 proxy: always cancel in-flight request
This fits the way for go1.5 to cancel request.
2015-08-25 08:59:59 -07:00
Yicheng Qin
6fc638673c rafthttp: return err if stopped before setting cancel in dial()
The original workflow may fail to cancel if stop() cancels the finished
request just before dial() assigning a new cancel. This commit checks
streamReader status before setting cancel to avoid this problem.

It is tested at travis for 300 times. go 1.5 always works well, while
go 1.4 fails to stop once.
2015-08-25 08:59:12 -07:00
Yicheng Qin
fc95ec0cc6 rafthttp: always cancel in-flight request when stop streamReader
This problem is totally fixed at 1.5.

go1.5 adds a Request.Cancel channel, which allows for "race free"
cancellation
(8b4278ffb7).
Our implementation relies on it to always cancel in-flight request.
2015-08-25 08:54:13 -07:00
Yicheng Qin
0132b091d2 Merge pull request #3360 from yichengq/bench-3
*: add initial read benchmark for etcd v3
2015-08-25 07:58:30 -07:00
Yicheng Qin
3632a1b9b1 *: add initial read benchmark for etcd v3
It includes the initial read benchmark for etcd v3.

This is the first step to give some rough thoughts. I haven't digged
deeper to answer some questions, including why its performance is not
better than HTTP + json, why one put will cause performance downgrade.
2015-08-25 07:50:18 -07:00
Xiang Li
e3ef1d363a Merge pull request #3366 from xiang90/v3_proto
update v3 proto and doc
2015-08-24 11:22:29 -07:00
Xiang Li
0cb45aee64 rfc: update v3 proto 2015-08-24 11:00:51 -07:00
Xiang Li
1cccbb5ebd etcdserverpb: add comments for compaction 2015-08-24 10:52:54 -07:00
Xiang Li
3a60d490d1 storagepb: fix comment location 2015-08-24 10:42:16 -07:00
Xiang Li
4a5b94478e etcdserverpb: update comment for txn request 2015-08-24 10:40:05 -07:00
Xiang Li
98ceb3cdbd etcdserverpb: add more field into rangeResponse 2015-08-24 10:33:20 -07:00
Yicheng Qin
c7f10ed975 Merge pull request #3361 from yichengq/no-log
integration: only print critical log
2015-08-24 09:44:13 -07:00
Yicheng Qin
3702be476b integration: only print critical log
This limits the logs printed out in integration test, so it will not
have log flood and help us read fatal log in travis.
2015-08-23 21:22:21 -07:00
Yicheng Qin
514c4371a9 Merge pull request #3359 from yichengq/storage-test
functional tests for storage package and some related fixes
2015-08-23 21:12:36 -07:00
Yicheng Qin
1e2b0acf6d test: activate test for storage package 2015-08-23 20:59:06 -07:00
Yicheng Qin
9c0c314425 storage: add functional tests for the package
It adds and reorganize tests to construct functional tests.
2015-08-23 20:59:06 -07:00
Yicheng Qin
9960651c3f storage: let range work in the process of txn
range should work in the process of txn to help check the status during the
txn.
2015-08-23 20:59:06 -07:00
Yicheng Qin
6d97dcaf3f storage: ensure that desired compaction is persisted
It needs to persist the desired compaction, so it won't forget the compaction
if it crashes later.
2015-08-23 20:59:06 -07:00
Yicheng Qin
353f10ca2b storage: reject to compact on future rev
Compaction on future rev is unreasonable.
2015-08-23 20:59:06 -07:00
Yicheng Qin
47b243be5d storage: let TxnDeleteRange return rev if no error
If it doesn't return error, it should return valid rev.
2015-08-23 20:59:06 -07:00
Yicheng Qin
62f7481b19 storage: keyIndex.get returns err when key is tombstoned
Before this commit, it will return wrong create index, mod index.

It lets findGeneration return error when rev is at the gap of two
generations. This leads to the change of compact() code.
2015-08-23 20:59:02 -07:00
Yicheng Qin
3b2fa9f1de storage: fix TestKeyIndexCompact
It fails to pass before.
2015-08-23 17:22:49 -07:00
Xiang Li
97b211c8ba Merge pull request #3357 from ccding/master
go vet
2015-08-22 10:29:29 -07:00
Cong Ding
c09b667d57 *: fix go vet reported issues 2015-08-22 12:19:02 -05:00
Xiang Li
044b23c3ca Merge pull request #3356 from xiang90/travis
*: test gofmt with -s and fix reported issues
2015-08-21 18:59:51 -07:00
Xiang Li
6b23a8131f *: test gofmt with -s and fix reported issues 2015-08-21 18:52:16 -07:00
Yicheng Qin
301b7f57c0 Merge pull request #3355 from yichengq/health-var
etcdctl/cluster_health: set health var when checked healthy
2015-08-21 15:37:15 -07:00
Yicheng Qin
224755855d etcdctl/cluster_health: set health var when checked healthy
This was a typo.
2015-08-21 15:27:35 -07:00
Xiang Li
84b614c508 Merge pull request #3342 from xiang90/travis
travis: test for go 1.5 build
2015-08-21 14:49:00 -07:00
Xiang Li
1dcc145aef client: fix test 2015-08-21 14:36:29 -07:00
Yicheng Qin
8c0610d4f5 Merge pull request #3352 from yichengq/fix-name-url
fix that etcd fails to start if using both IP and hostname when discovery srv
2015-08-21 12:38:38 -07:00
Yicheng Qin
3c1e6b54b3 pkg/netutil: stop resolving in place
It helps to copy out a and b, and not modify the original a and b.
2015-08-21 12:09:17 -07:00
Yicheng Qin
1c334979cd pkg/netutil: not introduce empty url when converting
It should not make slices with length and append elements at the same
time.
2015-08-21 12:08:17 -07:00
Yicheng Qin
7b871aab41 pkg/netutil: not export resolve and urlsEqual functions
They are only used in this package, so there is no need to public them.
2015-08-21 11:58:37 -07:00
Yicheng Qin
b1192e5c48 pkg/netutil: fix false negative comparison
Sort the resolved URLs before DeepEqual, so it will not compare URLs
that may be out of order due to resolution.
2015-08-21 10:15:08 -07:00
Yicheng Qin
72462a72fb etcdserver: remove TODO to delete URLStringsEqual
Discovery SRV supports to compare IP addresses with domain names,
so we need URLStringsEqual function.
2015-08-21 09:52:17 -07:00
Yicheng Qin
8ea3d157c5 Revert "Revert "Treat URLs have same IP address as same""
This reverts commit 3153e635d5.

Conflicts:
	etcdserver/config.go
2015-08-21 09:41:13 -07:00
Xiang Li
07af0b3e5b Merge pull request #3346 from xiang90/auth_skip
etcdserver/auth: cache auth enable result
2015-08-20 23:32:29 -07:00
Xiang Li
11a689d063 etcdserver/auth: cache auth enable result 2015-08-20 23:05:00 -07:00
Xiang Li
e8e507b29b Merge pull request #3348 from xiang90/l
use limited listener from golang
2015-08-20 22:44:51 -07:00
Xiang Li
ff37cc455c pkg/transport: remove home-grown limitedListener 2015-08-20 20:03:27 -07:00
Xiang Li
92634356c1 *: use limitedListener from golang 2015-08-20 20:02:35 -07:00
Xiang Li
da9a12b97c Merge pull request #3344 from xiang90/startup_version
etcdmain: print out version information on startup
2015-08-20 15:10:25 -07:00
Xiang Li
6b77c146ec etcdmain: print out version information on startup 2015-08-20 14:50:16 -07:00
Xiang Li
31395d257c travis: test for go 1.5 build 2015-08-20 11:39:41 -07:00
Xiang Li
7cf9770e12 Merge pull request #3340 from xiang90/fix_perallocate
pkg/fileutil: treat not support error as nil error in preallocate
2015-08-20 11:38:03 -07:00
Xiang Li
3ca5482251 pkg/fileutil: treat not support error as nil error in preallocate 2015-08-20 11:15:02 -07:00
Xiang Li
4a6d6b0052 Merge pull request #3338 from spacejam/master
Reversion->Revision
2015-08-20 10:16:31 -07:00
Tyler Neely
acd7a92f03 storage: reversion -> revision 2015-08-20 08:39:07 -07:00
Yicheng Qin
e1dfcec0ab Merge pull request #3327 from yichengq/bench-2.2
docs/benchmarks: add benchmark result for 2.2
2015-08-20 00:18:32 -07:00
Yicheng Qin
807de81172 docs/benchmarks: add benchmark result for 2.2
And it analyzes the reason for performance changes.
2015-08-19 23:59:33 -07:00
Yicheng Qin
795e962403 Merge pull request #3334 from mitake/snap-marsharing-prometheus
snap: export durations of marsharing cost during snapshot save
2015-08-19 20:59:04 -07:00
Hitoshi Mitake
7a6d33620f snap: export durations of marshalling cost during snapshot save
Currently, total duration of snapshot saving is exported for
prometheus. For more detailed analysis, this commit let etcd export
durations of marshalling for prometheus.
2015-08-20 12:47:07 +09:00
Yicheng Qin
46a2ae77a1 hack/benchmark: add script for benchmark
This is for etcd benchmark.
2015-08-19 20:37:27 -07:00
Xiang Li
b0303e948c Merge pull request #3323 from xiang90/cl_health
etcdctl: use health endpoint to greatly simplify health checking
2015-08-19 17:15:52 -07:00
Xiang Li
568d1c6783 etcdctl: use health endpoint to greatly simplify health checking 2015-08-19 11:47:08 -07:00
Xiang Li
60387dc408 Merge pull request #3320 from yichengq/doc-rtt
docs: document how to set heartbeat interval and election timeout
2015-08-19 11:08:05 -07:00
Xiang Li
28b61acd9e Merge pull request #3324 from xiang90/raft_logging
raft: downgrade the logging around snapshot to debugf
2015-08-18 17:18:08 -07:00
Xiang Li
d01b6cd639 Merge pull request #3326 from elimisteve/master
client: fixed typo in WatcherOptions docs
2015-08-18 16:49:43 -07:00
Steve Phillips
952827157a client: fixed typo in WatcherOptions docs
specifices -> specifies
2015-08-18 16:43:09 -07:00
Xiang Li
b3d2a621ab Merge pull request #3325 from elimisteve/master
client: spelling error in docs (occured -> occurred)
2015-08-18 16:35:13 -07:00
Steve Phillips
69fc796926 client: spelling error in docs (occured -> occurred) 2015-08-18 16:26:52 -07:00
Xiang Li
50c1db3fbf raft: downgrade the logging around snapshot to debugf
Snapshot related logging is spamming when leader trying to
sync a failed peer.
2015-08-18 15:43:53 -07:00
Yicheng Qin
7082d3a765 docs: document how to set heartbeat interval and election timeout
It gives more details about how to set heartbeat interval and election
timeout correctly based on RTT.
2015-08-18 13:54:44 -07:00
Xiang Li
28cec1128d Merge pull request #3322 from philips/use-proxy-as-default-endpoint
Procfile: use proxy as default
2015-08-18 12:38:51 -07:00
Yicheng Qin
087061e434 Merge pull request #3303 from yichengq/auth-path
use canonical path for auth
2015-08-18 12:06:48 -07:00
Yicheng Qin
4778d780a8 pkg/pathutil: change copyright for path.go
The file only contains the function that is borrowed from std http lib,
so we use their copyright.
2015-08-18 11:48:22 -07:00
Brandon Philips
9106675fd4 Procfile: use proxy as default
I think it makes sense to make the proxy listen on the default port so
we can give the proxy more testing by default. Also, this should make it
easy to kill a single etcd member and test that etcdctl still works,
etc.

However, I have hit a bug: the proxy takes several seconds
2015-08-18 09:42:13 -07:00
Yicheng Qin
fab3feab66 etcdctl/role: reject non-canonical permission path
Non-canonical permission path is useless because the path received
by auth is always canonical, which is due to our ServeMux always
redirects request to canonical path().

This helps users to detect path permission setting error early.

Ref: http://godoc.org/net/http#ServeMux
2015-08-18 08:59:53 -07:00
Yicheng Qin
b5ec7f543a client: use canonical url path in request
The main change is that it keeps the trailing slash. This helps
auth feature to judge path permission accurately.
2015-08-18 08:59:48 -07:00
Yicheng Qin
927d5f3d26 Merge pull request #3301 from yichengq/ca-file
etcdmain: update -ca-file description
2015-08-17 23:36:33 -07:00
Yicheng Qin
c0747a7b8b etcdmain: update -ca-file description
so people could deprecate old flags and use new flags much easier.
2015-08-17 22:36:04 -07:00
Yicheng Qin
bcb4d5d53e Merge pull request #3311 from yichengq/request-timeout
extend hardcoded timeout for globally-deployed etcd cluster
2015-08-17 17:00:24 -07:00
Xiang Li
dfc6b4436f Merge pull request #3315 from xiang90/key_err
etcdhttp:write etcderror for all errors in keyhandler
2015-08-17 16:54:12 -07:00
Yicheng Qin
ffae601af5 etcdmain: calculate dial timeout for peer transport
This helps peer communication in globally-deployed cluster.
2015-08-17 16:52:53 -07:00
Yicheng Qin
1375ef8985 etcdserver: remove getVersion timeout
The request can still time out because we have set dial timeout and
read/write timeout. It increases timeout expectation from 1s to 5s,
but it makes it workable in globally-deployer cluster.
2015-08-17 16:50:40 -07:00
Xiang Li
c7fbc01ef1 Merge pull request #3314 from sebschrader/proxy-loop
Warn about proxy loops with incorrect advertise-client-urls
2015-08-17 16:04:00 -07:00
Xiang Li
d487cf6b63 etcdhttp:write etcderror for all errors in keyhandler 2015-08-17 15:51:29 -07:00
Sebastian Schrader
f70950ff93 docs: warn about proxy loops with incorrect advertise-client-urls 2015-08-18 00:42:48 +02:00
Yicheng Qin
c530385d6d Merge pull request #3313 from yichengq/internal-timeout
etcdserver: use ReqTimeout only
2015-08-17 15:05:46 -07:00
Xiang Li
af6d1d3d95 Merge pull request #3310 from xiang90/http_err
*: key handler should write auth error as etcd error
2015-08-17 14:57:19 -07:00
Yicheng Qin
2d5b95c49f etcdserver: use ReqTimeout only
We cannot refer RTT value from heartbeat interval, so CommitTimeout
is invalid. Remove it and use ReqTimeout instead.
2015-08-17 14:54:25 -07:00
Xiang Li
87f061bab2 *: key handler should write auth error as etcd error 2015-08-17 14:45:45 -07:00
Xiang Li
ba3a9b5f92 Merge pull request #3309 from xiang90/enforce
etcdserver: add version enforcement when setting cluster version
2015-08-17 12:41:04 -07:00
Xiang Li
15e03d801f etcdserver: add version enforcement when setting cluster version 2015-08-17 11:12:39 -07:00
Xiang Li
f615f9a999 Merge pull request #3305 from xiang90/c_v
*: only print out major.minor version for cluster version
2015-08-17 09:40:01 -07:00
Yicheng Qin
7083828ae3 Godeps: import github.com/ugorji/go/codec 2015-08-16 18:13:44 -07:00
Yicheng Qin
a364af72af client: use ugorij/go/codec to unmarshal key response
This change speeds up response unmarshal ~2x:

```
BenchmarkSmallResponseUnmarshal	   20000	     75243 ns/op
BenchmarkManySmallResponseUnmarshal	     200	   6629661 ns/op
BenchmarkMediumResponseUnmarshal	    1000	   1359041 ns/op
BenchmarkLargeResponseUnmarshal	      20	  61600978 ns/op
```
2015-08-16 18:08:54 -07:00
Yicheng Qin
95d100e957 client: add response unmarshal benchmark
The benchmark result:

```
BenchmarkSmallResponseUnmarshal	  10000	   164524 ns/op
BenchmarkManySmallResponseUnmarshal	    100	 13916636 ns/op
BenchmarkMediumResponseUnmarshal	   1000	  1974295 ns/op
BenchmarkLargeResponseUnmarshal	     20	 80462001 ns/op
ok		github.com/coreos/etcd/client	7.777s
```
2015-08-16 16:44:50 -07:00
Xiang Li
d95c7d8a94 Merge pull request #3307 from ian-kelling/master
documentation: fix misspelled word
2015-08-15 18:53:58 -07:00
Ian Kelling
8dd44465c3 documentation: fix misspelled word 2015-08-15 17:56:17 -07:00
Xiang Li
f199a484af *: only print out major.minor version for cluster version 2015-08-15 08:30:06 -07:00
Xiang Li
bbcb38189c Merge pull request #3302 from xiang90/v
etcdserver: better version detection log output
2015-08-14 16:14:55 -07:00
Xiang Li
0076ab154b etcdserver: better version detection log output
Fix https://github.com/coreos/etcd/issues/3288
2015-08-14 16:08:33 -07:00
Xiang Li
dd56b7e05e Merge pull request #3299 from xiang90/txn
initial support for txn
2015-08-14 16:05:16 -07:00
Xiang Li
5cd109949a etcdctl: support txn 2015-08-14 15:58:38 -07:00
Xiang Li
9233fff48f etcdserver: support txn 2015-08-14 11:45:31 -07:00
Xiang Li
46865fa5a5 etcdserverpb: update proto 2015-08-14 11:45:07 -07:00
Yicheng Qin
d448593bbc Merge pull request #3295 from yichengq/err-example
client: fix clusterError typo in README
2015-08-14 09:35:31 -07:00
Yicheng Qin
5eed141d54 client: fix clusterError typo in README
It helps users to use client better.
2015-08-13 16:38:41 -07:00
Yicheng Qin
fefb273389 *: bump to v2.2.0-alpha.1+git 2015-08-13 16:01:31 -07:00
Yicheng Qin
201bb4b3d8 *: bump to v2.2.0-alpha.1 2015-08-13 16:01:09 -07:00
Yicheng Qin
3cc4957d98 Merge pull request #3293 from yichengq/improve-err
etcdserver: improve error message when timeout due to leader fail
2015-08-13 15:58:48 -07:00
Yicheng Qin
c229e6e655 etcdserver: improve error message when timeout due to leader fail 2015-08-13 15:46:21 -07:00
Yicheng Qin
394894e03e Merge pull request #3291 from yichengq/auth-cap
etcdhttp: add auth capability in 2.2
2015-08-13 15:01:59 -07:00
Yicheng Qin
ceb27b1c48 etcdhttp: add auth capability in 2.2 2015-08-13 14:49:10 -07:00
Yicheng Qin
a17288558e Merge pull request #3289 from yichengq/marshal
etcdserver: go back to marshal request in 2.1 way
2015-08-13 14:20:24 -07:00
Yicheng Qin
334bdd1c26 Merge pull request #3153 from gtank/tls-setup
hack: TLS setup using cfssl
2015-08-13 13:53:14 -07:00
Xiang Li
959feb70d1 Merge pull request #3275 from xiang90/sort
improve in order key generation
2015-08-13 13:51:19 -07:00
Xiang Li
a7b9bff939 store: add 0 as padding for better lexicographic sorting. 2015-08-13 13:42:37 -07:00
Yicheng Qin
0fdb77aea2 etcdserver: go back to marshal request in 2.1 way
It fixes the problem that 2.1 cannot roll upgrade to 2.2 smoothly
because 2.1 cannot understand the bytes marshalled at 2.2.
2015-08-13 13:41:52 -07:00
Yicheng Qin
003d096138 Merge pull request #3286 from yichengq/fit-2.2
*: update MinClusterVersion and supportedStream map
2015-08-13 13:31:37 -07:00
Yicheng Qin
c9cca6a93b *: update MinClusterVersion and supportedStream map 2015-08-13 13:05:14 -07:00
Xiang Li
846b1fdbcd Merge pull request #3287 from xiang90/update_roadmap
Update roadmap
2015-08-13 13:00:01 -07:00
Xiang Li
329647ab62 roadmap: update roadmap 2015-08-13 12:56:23 -07:00
Xiang Li
6a64051245 roadmap: remove 2.1 milestone 2015-08-13 12:51:58 -07:00
Yicheng Qin
80005af5b2 Merge pull request #3285 from yichengq/bump-capnslog
godeps: bump capnslog to 42a8c3b1a6f917bb8346ef738f32712a7ca0ede7
2015-08-13 11:49:38 -07:00
Yicheng Qin
d66ede7186 godeps: bump capnslog to 42a8c3b1a6f917bb8346ef738f32712a7ca0ede7 2015-08-13 11:32:45 -07:00
Yicheng Qin
a46943548a *: bump to v2.2.0-alpha.0+git 2015-08-13 10:21:36 -07:00
Yicheng Qin
ab5a69cb18 *: bump to v2.2.0-alpha.0 2015-08-13 10:20:05 -07:00
Yicheng Qin
976ce93539 Merge pull request #3277 from yichengq/better-log
etcdserver: specify timeout caused by leader election
2015-08-12 17:02:27 -07:00
Yicheng Qin
27170e67b9 etcdserver: specify timeout caused by leader election
Before this PR, the timeout caused by leader election returns:

```
14:45:37 etcd2 | 2015-08-12 14:45:37.786349 E | etcdhttp: got unexpected
response error (etcdserver: request timed out)
```

After this PR:

```
15:52:54 etcd1 | 2015-08-12 15:52:54.389523 E | etcdhttp: etcdserver:
request timed out, possibly due to leader down
```
2015-08-12 16:53:18 -07:00
Xiang Li
ddfe343e77 Merge pull request #3271 from yichengq/doc-discovery
docs: add discovery protocol doc
2015-08-12 13:51:32 -07:00
Yicheng Qin
a45f0ede56 docs: add discovery protocol doc
This document talks about the technical details of discovery service
protocol. It helps users to learn about how discovery service works and
what behavior to expect.
2015-08-12 13:15:21 -07:00
Alex Polvi
7bd9d9aede Merge pull request #3273 from polvi/kube-hack
add etcd on k8s example
2015-08-12 22:13:15 +03:00
Alex Polvi
cfb3522b63 add etcd on k8s example 2015-08-12 22:12:00 +03:00
Xiang Li
f468d8b51a Merge pull request #3270 from xiang90/better_err
Better error message for etcdctl
2015-08-12 10:27:42 -07:00
Xiang Li
7e04a79fb4 etcdctl: print out better error information 2015-08-12 10:09:56 -07:00
Xiang Li
5d06d4ec44 client: print url as string 2015-08-12 10:09:40 -07:00
Xiang Li
e894756144 Merge pull request #3190 from yichengq/adjust-prop-timeout
etcdserver: adjust proposal timeout based on config
2015-08-12 09:41:25 -07:00
Yicheng Qin
c3d4d11402 etcdhttp: adjust request timeout based on config
It uses heartbeat interval and election timeout to estimate the
expected request timeout.

This PR helps etcd survive under high roundtrip-time environment,
e.g., globally-deployed cluster.
2015-08-12 09:22:59 -07:00
Xiang Li
18ecc297bc Merge pull request #3254 from es-chow/log-group
set groupID in multinode as log context so it can be logged
2015-08-12 08:05:50 -07:00
es-chow
cc362ccdad raft: set logger to raft so log context such as multinode groupID can be logged 2015-08-12 22:56:00 +08:00
Yicheng Qin
5a91937367 etcdserver: adjust commit timeout based on config
It uses heartbeat interval and election timeout to estimate the
commit timeout for internal requests.

This PR helps etcd survive under high roundtrip-time environment,
e.g., globally-deployed cluster.
2015-08-11 21:09:03 -07:00
Yicheng Qin
042afcf2a3 Merge pull request #3266 from yichengq/client-readme
client: clean up README
2015-08-11 16:21:13 -07:00
Yicheng Qin
7d618c46ad client: clean up README
Address rob's comments about sentences in README.
2015-08-11 15:33:56 -07:00
Xiang Li
18a1c95f22 Merge pull request #3263 from xiang90/ctl_tr
etcdctl: add per request timeout
2015-08-11 14:17:12 -07:00
Yicheng Qin
dceacacd49 Merge pull request #3194 from yichengq/client-readme
client: add README
2015-08-11 13:35:54 -07:00
Xiang Li
e36c499d0f etcdctl: add per request timeout 2015-08-11 13:33:50 -07:00
Yicheng Qin
8a7cf56e13 client: add README
It describes some basic usage and caveat of etcd/client package.

Write it together with Xiang.
2015-08-11 12:07:24 -07:00
Yicheng Qin
83efc08137 Merge pull request #3262 from yichengq/client-deadline
client: return context.DeadlineExceeded instead of ClusterError
2015-08-11 10:42:29 -07:00
Yicheng Qin
a1ef699aeb client: return context.DeadlineExceeded instead of ClusterError
This is done to match user expectation to see context.DeadlineExceeded
when it reaches deadline.
2015-08-11 10:18:38 -07:00
Yicheng Qin
1fe52e1ec3 Merge pull request #3245 from yichengq/client_timeout
client: set timeout for each request
2015-08-11 10:10:42 -07:00
Yicheng Qin
f4c29a5f55 client: support to set timeout for each request
Add HeaderTimeout field in Config, so users could set timeout for each request.
Before this, one hanged request may block the call for long time. After
this, if the network is good, the user could set short timeout and expect
that API call can attempt next available endpoint quickly.
2015-08-11 10:01:05 -07:00
Xiang Li
a718329ad3 Merge pull request #3248 from xiang90/v3
initial v3 demo
2015-08-10 13:59:03 -07:00
Xiang Li
fb5e1ac548 Merge pull request #3256 from xiang90/update_log
update logger
2015-08-10 13:54:28 -07:00
Xiang Li
6c58333969 etcdmain: use default formatter
The default formatter would use syslog style when running
under init system, and would use pretty format otherwise.
2015-08-10 13:38:22 -07:00
Xiang Li
48e36bbb84 Godep: update capnslog dependency 2015-08-10 13:38:00 -07:00
Xiang Li
b0ea4ab3b1 doc: link to v3 api doc 2015-08-10 11:22:55 -07:00
Xiang Li
c32919e6d1 *: rename v3etcdctl to etcdctlv3 2015-08-10 11:21:37 -07:00
Xiang Li
c1e0b19f9f *: better flag 2015-08-10 09:53:17 -07:00
Xiang Li
48b1cd54f3 Merge pull request #3243 from xiang90/conf
doc: add runtime reconfiguration design doc
2015-08-09 10:56:51 -07:00
Xiang Li
89bf5824c2 Merge pull request #3159 from sofuture/master
use /usr/bin/env to find bash
2015-08-09 10:56:12 -07:00
Xiang Li
601801ced5 doc: add runtime reconfiguration design doc 2015-08-09 10:55:34 -07:00
Brandon Philips
45f3a0c547 Merge pull request #3249 from philips/get-etcd-running-under-arm64
Get etcd running under arm64
2015-08-08 20:32:33 -07:00
Jeff Zellner
1239e1ce6f test, scripts: use /usr/bin/env to find bash
use /usr/bin/env to find bash

add set -e back into scripts it was removed from
2015-08-08 20:52:53 -06:00
Brandon Philips
1b894c6b0b test: race detector doesn't work on armv7l
Test fails without this fix on armv7l:

    go test: -race is only supported on linux/amd64, freebsd/amd64, darwin/amd64 and windows/amd64
2015-08-08 18:11:41 -07:00
Brandon Philips
fb1951204c etcdserver: move atomics to make etcd work on arm64
Follow the simple rule in the atomic package:

"On both ARM and x86-32, it is the caller's responsibility to arrange
for 64-bit alignment of 64-bit words accessed atomically. The first word
in a global variable or in an allocated struct or slice can be relied
upon to be 64-bit aligned."

Tested on a system with /proc/cpuinfo reporting:

processor       : 0
model name      : ARMv7 Processor rev 1 (v7l)
Features        : swp half thumb fastmult vfp edsp thumbee neon vfpv3
tls vfpv4 idiva idivt vfpd32 lpae evtstrm
CPU implementer : 0x41
CPU architecture: 7
CPU variant     : 0x0
CPU part        : 0xc0d
CPU revision    : 1
2015-08-08 18:11:41 -07:00
Xiang Li
9ff7075ce8 etcdserver: use v3server interface 2015-08-08 10:39:04 -07:00
Xiang Li
523567bcc7 v3etcdctl: initial v3 ctl support 2015-08-08 05:58:58 -07:00
Xiang Li
f004b4dac7 *: etcdserver supports v3 demo 2015-08-08 05:58:29 -07:00
Xiang Li
82afadbcc6 etcdserverpb: update proto 2015-08-08 05:31:35 -07:00
Xiang Li
668a8a8367 Merge pull request #3242 from xiang90/typo
*: fix typos vaild->valid
2015-08-07 10:58:39 -07:00
Xiang Li
845c51fedd *: fix typos vaild->valid 2015-08-07 10:57:11 -07:00
Yicheng Qin
f0a5874473 Merge pull request #3241 from yichengq/sync-pin
client: Sync() pin the endpoint when member list doesn't change
2015-08-07 10:24:29 -07:00
Yicheng Qin
0ab16db728 client: Sync() pin the endpoint when member list doesn't change
This helps client to pin the same endpoint as long as cluster doesn't change.
2015-08-07 10:08:28 -07:00
Xiang Li
d7adcc3e65 Merge pull request #3239 from xiang90/improve_probing
rafthttp: use customized transport for probing
2015-08-07 09:37:32 -07:00
Xiang Li
b6580a9591 rafthttp: use customized transport for probing
We need to support TLS verification when probing.
2015-08-06 16:20:44 -07:00
Xiang Li
d2363afd52 Merge pull request #3240 from xiang90/fix_log
etcdmain: fix path printing
2015-08-06 15:56:14 -07:00
Yicheng Qin
f03f048232 Merge pull request #3184 from yichengq/fast-bootstrap
etcdserver: tick ElectionTicks before starting when bootstrap new cluster
2015-08-06 15:54:40 -07:00
Xiang Li
1b572ae2dd etcdmain: fix path printing 2015-08-06 15:53:24 -07:00
Yicheng Qin
21f5b885f2 etcdserver: fast election timeout when bootstrap cluster
The behavior accelarates the happen of the first-time leader election,
so the cluster could elect its leader fast. Technically, it could
help to reduce `electionMs - heartbeatMs` wait time for the first leader election.

Main usage:
1. Quick start for the local cluster when setting a little longer
election timeout
2. Quick start for the global cluster, which sets election timeout to
its maximum 50s.
2015-08-06 15:44:26 -07:00
Yicheng Qin
a637e86372 Merge pull request #3220 from yichengq/fix-auth-check
etcdhttp: fix access check for multiple roles in auth
2015-08-06 15:09:04 -07:00
Xiang Li
b9c6b64d61 Merge pull request #3216 from yichengq/cancel-err
client: return context canceled error correctly
2015-08-06 15:04:49 -07:00
Yicheng Qin
b965c4b415 Merge pull request #3217 from yichengq/update-migrate-example
update commands used in admin_guide.md
2015-08-06 15:00:04 -07:00
Yicheng Qin
78af793338 client: return context canceled error correctly
If the body is closed to stop watching, it will ignore the error from
reading body and return context error.

Before this PR, the cancel when watching always returns error `read tcp
127.0.0.1:57824: use of closed network connection`. After this PR, it
will return expected context canceled error.
2015-08-06 14:52:04 -07:00
Xiang Li
b04bb3e0ea Merge pull request #3229 from xiang90/f_cerr
client: return context.Canceled error when user cancels the request
2015-08-06 14:41:19 -07:00
Yicheng Qin
25ad71fbac Merge pull request #3225 from yichengq/client-record-err
client: return correct error for 50x response
2015-08-06 14:40:38 -07:00
Xiang Li
7314310aed Merge pull request #3233 from xiang90/srv_discovery
better dns discovery error and doc
2015-08-06 14:35:22 -07:00
Yicheng Qin
cfeaf3d172 client: return correct error for 50x response
etcd always returns 500/503 response when it may have no leader.
So we should log the other 50x response in a normal way.

This helps to log correctly when discovery meets 504 error. Before this
PR, it logs like this:

```
18:31:58 etcd2 | 2015/08/4 18:31:58 discovery: error #0: client: etcd
member https://discovery.etcd.io has no leader
18:31:58 etcd2 | 2015/08/4 18:31:58 discovery: waiting for other nodes:
error connecting to https://discovery.etcd.io, retrying in 4s
```

After this PR:

```
22:20:25 etcd2 | 2015/08/4 22:20:25 discovery: error #0: client: etcd
member https://discovery.etcd.io returns server error [Gateway Timeout]
22:20:25 etcd2 | 2015/08/4 22:20:25 discovery: waiting for other nodes:
error connecting to https://discovery.etcd.io, retrying in 4s
```
2015-08-06 14:25:03 -07:00
Xiang Li
e9f05e8959 doc: explain srv error 2015-08-06 14:24:58 -07:00
Yicheng Qin
2c2249dadc Merge pull request #3219 from yichengq/limit-listener
etcdmain: stop accepting client conns when it reachs limit
2015-08-06 12:17:49 -07:00
Yicheng Qin
97923ca3fc etcdmain: close client conns when it exceeds limit
This solves the problem that etcd may fatal because its critical path
cannot get file descriptor resource when the number of clients is too
big. The PR lets the client listener close client connections
immediately after they are accepted when
the file descriptor usage in the process reaches some pre-set limit, so
it ensures that the internal critical path could always get file
descriptor when it needs.

When there are tons to clients connecting to the server, the original
behavior is like this:

```
2015/08/4 16:42:08 etcdserver: cannot monitor file descriptor usage
(open /proc/self/fd: too many open files)
2015/08/4 16:42:33 etcdserver: failed to purge snap file open
default2.etcd/member/snap: too many open files
[halted]
```

Current behavior is like this:

```
2015/08/6 19:05:25 transport: accept error: closing connection,
exceed file descriptor usage limitation (fd limit=874)
2015/08/6 19:05:25 transport: accept error: closing connection,
exceed file descriptor usage limitation (fd limit=874)
2015/08/6 19:05:26 transport: accept error: closing connection,
exceed file descriptor usage limitation (fd limit=874)
2015/08/6 19:05:27 transport: accept error: closing connection,
exceed file descriptor usage limitation (fd limit=874)
2015/08/6 19:05:28 transport: accept error: closing connection,
exceed file descriptor usage limitation (fd limit=874)
2015/08/6 19:05:28 etcdserver: 80% of the file descriptor limit is
used [used = 873, limit = 1024]
```

It is available at linux system today because pkg/runtime only has linux
support.
2015-08-06 12:03:20 -07:00
Xiang Li
203e0f178b etcdmian: better error for srv discovery failure 2015-08-06 11:38:53 -07:00
Xiang Li
01c286ccb6 Merge pull request #3231 from xiang90/fallocate
pkg/fileutil: support perallocate
2015-08-06 10:25:28 -07:00
Xiang Li
39a4b6a5e5 pkg/fileutil: support perallocate 2015-08-06 10:10:58 -07:00
Xiang Li
9a8607fce1 Merge pull request #3187 from yichengq/client-keep-sync
client: add KeepSync function
2015-08-06 00:16:28 -07:00
Yicheng Qin
c53b3016ae client: add AutoSync function
AutoSync provides the way for client to syncing member list from
etcd cluster automatically.
2015-08-05 13:22:56 -07:00
Yicheng Qin
807a6f209e docs/admin_guide: decouple example from CoreOS specific details
This makes the example commands general, while keeping it easy to
understand. It also fixes some name mismatch.
2015-08-05 11:33:46 -07:00
Xiang Li
f38187bbdb client: return context.Canceled error when user cancels the request 2015-08-05 09:52:30 -07:00
Xiang Li
ff0b8723c7 Merge pull request #2688 from xiang90/versioning
etcdserver: internal request union
2015-08-05 09:27:32 -07:00
Xiang Li
58503817ec etcdserver: internal request union 2015-08-05 07:47:10 -07:00
Xiang Li
487639b2d8 Merge pull request #3222 from mitake/wal-log-error
wal: log errors in wal.Close()
2015-08-04 23:19:45 -07:00
Xiang Li
9cbeffc720 Merge pull request #3224 from xiang90/fix_ls
etcdctl: ls takes / as default key arg
2015-08-04 23:15:29 -07:00
Hitoshi Mitake
ba76e27875 wal: log errors in wal.Close()
This patch adds error logging in wal.Close() if unlocking and
destroying fail. Though it is hard to handling the errors, logging
would be helpful for trouble shooting.
2015-08-05 15:03:45 +09:00
Xiang Li
9527a97720 etcdctl: ls takes / as default key arg 2015-08-04 22:56:55 -07:00
Xiang Li
718a42f408 Merge pull request #3210 from xiang90/probing
monitoring connectivity between peers
2015-08-04 16:56:31 -07:00
Yicheng Qin
18169e896c etcdhttp: fix access check for multiple roles in auth
Check access for multiple roles should go through all roles.
2015-08-04 14:31:07 -07:00
Yicheng Qin
0650170a1b Merge pull request #3196 from eyakubovich/fix-watch-timeout
client: handle watch timing out elegantly
2015-08-04 13:52:42 -07:00
Xiang Li
1e048b5c24 rafthttp: cleanup prober when stopping the transport 2015-08-04 17:42:51 +08:00
Xiang Li
709718ed97 godeps: update probing pkg 2015-08-04 17:40:39 +08:00
Xiang Li
0fc764200d rafthttp: monitor connection 2015-08-04 17:39:40 +08:00
Xiang Li
ff5c3469c1 Merge pull request #3197 from xiang90/health
etcdctl: cluster-health supports forever flag
2015-08-03 20:48:06 -07:00
Eugene Yakubovich
6312e22b1d client: handle empty watch responses elegantly
Even though current etcd does not time out
watches, the client could be running against
an old etcd version or the server may close
polling connection for other reasons.
This patch ignores successful (as in 200)
responses with emtpy bodies instead
of producing JSON errors.
2015-08-03 11:47:21 -07:00
Xiang Li
306085db5f Godeps: add probing dependency 2015-08-03 09:07:43 +08:00
Xiang Li
f7f00b0af6 etcdctl: cluster-health supports forever flag
cluster-health command supports checking the cluster health
forever.
2015-08-01 22:29:08 +08:00
Xiang Li
3da1df2648 Merge pull request #3207 from xiang90/rm_migration
*: remove migration related stuff from 2.2
2015-08-01 19:47:17 +08:00
Xiang Li
2b8abeb093 *: remove migration related stuff from 2.2 2015-08-01 19:37:20 +08:00
Xiang Li
eee1c8b8ee Merge pull request #3200 from xiang90/d_doc
doc: unique names must be specified when using public discovery service
2015-08-01 07:34:25 +08:00
Yicheng Qin
8bd9554338 Merge pull request #3202 from yichengq/fix-etcdctl-watch
etcdctl: fix watch -after-index parsing
2015-07-31 14:41:45 -07:00
Yicheng Qin
4a89b3f8f3 Merge pull request #3116 from offscale/master
build: implemented build shell-script for Windows
2015-07-31 11:55:42 -07:00
Xiang Li
05b2d06788 Merge pull request #3199 from xiang90/sdnotify
etcdmain: support sdnotify for readiness
2015-07-31 19:04:35 +08:00
Samuel Marks
4a0d8ee4bd build: implemented build shell-script for Windows 2015-07-31 17:43:47 +10:00
Xiang Li
0cbac56fa2 etcdmain: support sdnotify for readiness 2015-07-31 13:33:18 +08:00
Xiang Li
beeecc32b0 doc: unique names must be specified when using public discovery service 2015-07-31 09:12:44 +08:00
Barak Michener
c1c5c7c99c Merge pull request #3091 from barakmich/client_auth_cov
etcdhttp: Improve test coverage surrounding auth
2015-07-30 17:00:49 -04:00
Barak Michener
dd1a8fe330 etcdhttp: Improve test coverage surrounding auth 2015-07-30 14:21:08 -04:00
Yicheng Qin
147885078c etcdctl: fix watch -after-index parsing
It uses -after-index incorrectly now:

```
$ ./bin/etcdctl --debug watch -after-index 31 foo
Cluster-Endpoints: http://localhost:2379, http://localhost:4001
cURL Command: curl -X GET
http://localhost:2379/v2/keys/foo?recursive=false&wait=true&waitIndex=33
```

After this PR:

```
$ ./bin/etcdctl --debug watch -after-index 31 foo
Cluster-Endpoints: http://localhost:2379, http://localhost:4001
cURL Command: curl -X GET
http://localhost:2379/v2/keys/foo?recursive=false&wait=true&waitIndex=32
```
2015-07-30 11:15:43 -07:00
Yicheng Qin
219ed1695b Merge pull request #3178 from yichengq/refactor-cluster-health
etcdctl: refactor the way to check cluster health
2015-07-29 18:16:26 -07:00
Xiang Li
80b794dccc Merge pull request #3185 from xiang90/add_debug_endpoint
etcdhttp: add config/local/debug endpoint
2015-07-30 08:46:07 +08:00
Xiang Li
4e31df2c2b etcdhttp: add config/local/log endpoint
PUT on the endpoint sets the GlobalDebugLevel to json level value.
The action overwrites the origianl log level setting from
users. We need to write doc to warn this.
2015-07-30 08:35:01 +08:00
Yicheng Qin
e62a3b8a62 Merge pull request #2891 from glensc/patch-1
build: use posix shell
2015-07-29 17:15:57 -07:00
Xiang Li
ff945c7404 Merge pull request #3181 from xiang90/2.2-client-error
client: return cluster error if the etcd cluster is not avaliable
2015-07-30 08:08:09 +08:00
Yicheng Qin
f1aaa7a9e3 etcdctl: refactor the way to check cluster health
This method uses raft status exposed at /debug/varz to determine the
health of the cluster. It uses whether commit index increases to
determine the cluster health, and uses whether match index increases to
determine the member health.

This could fix the bug #2711 that fails to detect follower is unhealthy
because it doesn't rely on whether message in long-polling connection is sent.

This health check is stricter than the old one, and reflects the
situation that whether followers are healthy in the view of the leader. One
example is that if the follower is receiving the snapshot, it will turns
out to be unhealthy because it doesn't move forward.

`etcdctl cluster-health` will reflect the healthy view in the raft level,
while connectivity checks reflects the healthy view in transport level.
2015-07-29 17:06:55 -07:00
Xiang Li
a47e661fff discovery: print out detailed cluster error 2015-07-29 23:06:57 +08:00
Xiang Li
5fa8652241 client: return cluster error if the etcd cluster is not avaliable
Add a new ClusterError type. It contians all encountered errors and
return ClusterNotAvailable as the error string.
2015-07-29 22:55:15 +08:00
Yicheng Qin
6b8b507312 Merge pull request #3176 from yichengq/reject-high-election
etcdmain: reject unreasonably high values of -election-timeout
2015-07-28 10:33:58 -07:00
Yicheng Qin
ec214030d0 etcdmain: reject unreasonably high values of -election-timeout
This helps users to detect setting problem early.
2015-07-28 10:07:57 -07:00
George Tankersley
edfec45bf5 hack: TLS setup using cfssl
this demonstrates basic TLS setup with cfssl. it's much easier than other
available tools.
2015-07-27 14:51:17 -07:00
Yicheng Qin
7831a30e46 Merge pull request #3180 from shafreeck/master
Update libraries-and-tools.md
2015-07-27 14:45:31 -07:00
Yicheng Qin
6184e271a4 Merge pull request #3164 from yichengq/pin-endpoint
client: pin itself to an endpoint that given
2015-07-27 14:35:51 -07:00
Yicheng Qin
6fc9dbfe56 Merge pull request #3114 from yichengq/clean-raft-init
etcdserver: clean up start and stop logic of raft
2015-07-27 14:19:25 -07:00
Yicheng Qin
ea2347a40f client: pin itself to an endpoint that given
1. When reset endpoints, client will choose a random endpoint to pin.
2. If the pinned endpoint is healthy, client will keep using it.
3. If the pinned endpoint becomes unhealthy, client will attempt other
endpoints and update its pin.
2015-07-27 13:36:53 -07:00
Yicheng Qin
7696dd3280 etcdserver: clean up start and stop logic of raft
kill TODO and make it more readable.
2015-07-27 13:24:26 -07:00
Yicheng Qin
5e3dc31e6f Merge pull request #3150 from gouyang/master
pkg/mflag: add modified flag package
2015-07-24 15:26:07 -07:00
Xiang Li
a7eef376b7 Merge pull request #3183 from xiang90/txn
*: tnx -> txn
2015-07-25 01:48:06 +08:00
Xiang Li
53a77fa519 *: tnx -> txn 2015-07-24 23:21:09 +08:00
Guohua Ouyang
c9769ee966 etcdmain: Don't print flags when flag parse error
At present it prints the whole usage and flags, which cause the exact
error message is hidden two screens above.

Fixes #3141

Signed-off-by: Guohua Ouyang <gouyang@redhat.com>
2015-07-24 21:29:21 +08:00
Shafreeck Sea
e75446ca27 docs: add cetcd into libraries-and-tools.md 2015-07-24 12:08:39 +00:00
Yicheng Qin
b407f72766 Merge pull request #3166 from yichengq/publish-timeout
etcdserver: rename defaultPublishRetryInterval -> defaultPublishTimeout
2015-07-23 10:30:41 -07:00
Yicheng Qin
b7892b20c1 etcdserver: rename defaultPublishRetryInterval -> defaultPublishTimeout
This makes code more readable and reasonable.
2015-07-23 10:09:28 -07:00
Xiang Li
58bc617dd0 Merge pull request #3175 from xiang90/2.2-ctl-bug
etcdctl: fix exec watch command
2015-07-23 14:37:38 +08:00
Xiang Li
448ca20cdc etcdctl: fix exec watch command
The previous flag parsing has a small issue. It uses
`recursive == true` and `after-index == 0` to determine
if user specifies the sub flags. This is incorrect since
user can specify `after-index = 0`. Then the flag parsing
would be confused.

This commit explicitly find the `--` in the remaining args
and determine the key and cmdArgs accordingly.
2015-07-23 13:13:15 +08:00
Xiang Li
43f4b99d52 Merge pull request #3174 from xiang90/2.2_submit_bug
doc: add reporting bug doc
2015-07-23 13:08:35 +08:00
Xiang Li
1b5e41e3f4 doc: add reporting bug doc 2015-07-23 12:55:38 +08:00
Yicheng Qin
93002caca5 Merge pull request #3165 from yichengq/client-quorum
client: add Quorum option in getOption
2015-07-22 16:54:14 -07:00
Yicheng Qin
b20b87893f client: add Quorum option in getOption 2015-07-22 15:19:34 -07:00
Xiang Li
6be02ff5ec etcdmian: fix initialization confilct
Fix #3142

Ignore flags if etcd is already initialized.
2015-07-21 12:53:21 -07:00
Yicheng Qin
24db661401 etcdmain: warn when listening on HTTP if TLS is set
If the user sets TLS info, this implies that he wants to listen on TLS.
If etcd finds that urls to listen is still HTTP schema, it prints out
warning to notify user about possible wrong setting.
2015-07-21 12:53:21 -07:00
Yicheng Qin
604709cad7 etcdctl: update -peers to default to use schema
Change its default value from `127.0.0.1:4001,127.0.0.1:2379` to
`http://127.0.0.1:4001,http://127.0.0.1:2379`

Adding HTTP schema makes its format consistent with etcd's xxx-urls
flags.
2015-07-21 12:53:21 -07:00
Xiang Li
d9c27138fa discovery: return bad discovery endpoint error 2015-07-21 12:53:21 -07:00
Xiang Li
d2dac0fe59 client: consume json error and return ErrInvaildJSON
The default JSON error is not very readable. We let client
consume the error and return a more understandable error in
the context of etcd.

Fix #3120
2015-07-21 12:53:21 -07:00
Yicheng Qin
6317abf7e4 pkg/transport: fix HTTPS downgrade bug for keepalive listener
If TLS config is empty, etcd downgrades keepalive listener from HTTPS to
HTTP without warning. This results in HTTPS downgrade bug for client urls.
The commit returns error if it cannot listen on TLS.
2015-07-21 12:53:21 -07:00
Mohammad Samman
43437e21f9 etcdctl: added domain discovery flag
provided a domain, will look up SRV records for etcd endpoints

Fixes #2636
2015-07-21 12:53:21 -07:00
Xiang Li
dc3f7f5d90 *: detect duplicate name for discovery bootstrap 2015-07-21 12:53:20 -07:00
Xiang Li
b8279b3591 types: add len func for urlmaps 2015-07-21 12:53:20 -07:00
Xiang Li
ee82ee05b4 etcdctl: support member update command 2015-07-21 12:53:20 -07:00
Xiang Li
6e3769d39e client: add member update 2015-07-21 12:53:20 -07:00
Xiang Li
9f9661f513 etcdctl: print out key and action when watching recursively 2015-07-21 12:53:20 -07:00
Xiang Li
87ef0f0b3e godep: remove go-etcd dependency 2015-07-21 12:53:20 -07:00
Xiang Li
071ad9f72b etcdctl: health use etcd/client 2015-07-21 12:53:20 -07:00
Xiang Li
0b1ddce889 etcdctl: import snap use etcd/client 2015-07-21 12:53:20 -07:00
Xiang Li
adeb101e04 etcdctl: remove old stuff 2015-07-21 12:53:20 -07:00
Xiang Li
759c156e3e etcdctl: exec_watch use etcd/client 2015-07-21 12:53:20 -07:00
Xiang Li
5b01b3877f etcdctl: watch use etcd/client 2015-07-21 12:53:20 -07:00
Xiang Li
b20c06348d etcdctl: ls use etcd/client 2015-07-21 12:53:19 -07:00
Xiang Li
ae1669de26 etcdctl: updatedir use etcd/client 2015-07-21 12:53:19 -07:00
Xiang Li
f12ae45c6a etcdctl: update use etcd/client 2015-07-21 12:53:19 -07:00
Xiang Li
58b19a7c1e etcdctl: rmdir use etcd/client 2015-07-21 12:53:19 -07:00
Xiang Li
9d7a8dd2b0 etcdctl: mk use etcd/client 2015-07-21 12:53:19 -07:00
Xiang Li
61befc7ce6 etcdctl: minor cleanup 2015-07-21 12:53:19 -07:00
Xiang Li
e3fcc450cf etcdctl: make rm use etcd/client 2015-07-21 12:53:19 -07:00
Xiang Li
9d9c3a7180 etcdctl: make setdir/mkdir use etcd/client 2015-07-21 12:53:19 -07:00
Xiang Li
db4b18aee3 etcdctl: make set command use etcd/client 2015-07-21 12:53:19 -07:00
Xiang Li
e9478ba630 etcdctl: make get command use etcd/client 2015-07-21 12:53:19 -07:00
Brandon Philips
09b9c30beb pkg/transport: include debug output for trusted-ca
since --peer-ca-file is deprecated we need to update the debug output

before:

```
$ etcd ... --peer-cert-file infra1.crt -peer-key-file
 infra1.key.insecure -peer-trusted-ca-file ca.crt --client-cert-auth
etcdmain: peerTLS: cert = infra1.crt, key = infra1.key.insecure, ca =
```

after:

```
$ etcd ... --peer-cert-file infra1.crt -peer-key-file
 infra1.key.insecure -peer-trusted-ca-file ca.crt --client-cert-auth
etcdmain: peerTLS: cert = infra1.crt, key = infra1.key.insecure, ca = , trusted-ca = ca.crt
```
2015-07-04 14:28:18 -07:00
Elan Ruusamäe
77c3613d94 build: use posix shell 2015-05-30 09:34:54 +03:00
1118 changed files with 194512 additions and 36587 deletions

View File

@@ -2,6 +2,7 @@ language: go
sudo: false
go:
- 1.4
- 1.5
install:
- go get github.com/barakmich/go-nyet

View File

@@ -12,6 +12,14 @@ etcd is Apache 2.0 licensed and accepts contributions via GitHub pull requests.
- Fork the repository on GitHub
- Read the README.md for build instructions
## Reporting Bugs and Creating Issues
Reporting bugs is one of the best ways to contribute. However, a good bug report
has some very specific qualities, so please read over our short document on
[reporting bugs](https://github.com/coreos/etcd/blob/master/Documentation/reporting_bugs.md)
before you submit your bug report. This document might contain links known
issues, another good reason to take a look there, before reporting your bug.
## Contribution flow
This is a rough outline of what a contributor's workflow looks like:

View File

@@ -0,0 +1,31 @@
## Snapshot Migration
You can migrate a snapshot of your data from a v0.4.9+ cluster into a new etcd 2.2 cluster using a snapshot migration. After snapshot migration, the etcd indexes of your data will change. Many etcd applications rely on these indexes to behave correctly. This operation should only be done while all etcd applications are stopped.
To get started get the newest data snapshot from the 0.4.9+ cluster:
```
curl http://cluster.example.com:4001/v2/migration/snapshot > backup.snap
```
Now, import the snapshot into your new cluster:
```
etcdctl --endpoint new_cluster.example.com import --snap backup.snap
```
If you have a large amount of data, you can specify more concurrent works to copy data in parallel by using `-c` flag.
If you have hidden keys to copy, you can use `--hidden` flag to specify.
And the data will quickly copy into the new cluster:
```
entering dir: /
entering dir: /foo
entering dir: /foo/bar
copying key: /foo/bar/1 1
entering dir: /
entering dir: /foo2
entering dir: /foo2/bar2
copying key: /foo2/bar2/2 2
```

View File

@@ -8,14 +8,17 @@ When first started, etcd stores its configuration into a data directory specifie
Configuration is stored in the write ahead log and includes: the local member ID, cluster ID, and initial cluster configuration.
The write ahead log and snapshot files are used during member operation and to recover after a restart.
If a members data directory is ever lost or corrupted then the user should remove the etcd member from the cluster via the [members API][members-api].
Having a dedicated disk to store wal files can improve the throughput and stabilize the cluster.
It is highly recommended to dedicate a wal disk and set `--wal-dir` to point to a directory on that device for a production cluster deployment.
If a members data directory is ever lost or corrupted then the user should [remove][remove-a-member] the etcd member from the cluster using `etcdctl` tool.
A user should avoid restarting an etcd member with a data directory from an out-of-date backup.
Using an out-of-date data directory can lead to inconsistency as the member had agreed to store information via raft then re-joins saying it needs that information again.
For maximum safety, if an etcd member suffers any sort of data corruption or loss, it must be removed from the cluster.
Once removed the member can be re-added with an empty data directory.
[members-api]: other_apis.md#members-api
[remove-a-member]: runtime-configuration.md#remove-a-member
#### Contents
@@ -24,6 +27,8 @@ The data directory has two sub-directories in it:
1. wal: write ahead log files are stored here. For details see the [wal package documentation][wal-pkg]
2. snap: log snapshots are stored here. For details see the [snap package documentation][snap-pkg]
If `--wal-dir` flag is set, etcd will write the write ahead log files to the specified directory instead of data directory.
[wal-pkg]: http://godoc.org/github.com/coreos/etcd/wal
[snap-pkg]: http://godoc.org/github.com/coreos/etcd/snap
@@ -34,6 +39,74 @@ The data directory has two sub-directories in it:
If you are spinning up multiple clusters for testing it is recommended that you specify a unique initial-cluster-token for the different clusters.
This can protect you from cluster corruption in case of mis-configuration because two members started with different cluster tokens will refuse members from each other.
#### Monitoring
It is important to monitor your production etcd cluster for healthy information and runtime metrics.
##### Health Monitoring
At lowest level, etcd exposes health information via HTTP at `/health` in JSON format. If it returns `{"health": "true"}`, then the cluster is healthy. Please note the `/health` endpoint is still an experimental one as in etcd 2.2.
```
$ curl -L http://127.0.0.1:2379/health
{"health": "true"}
```
You can also use etcdctl to check the cluster-wide health information. It will contact all the members of the cluster and collect the health information for you.
```
$./etcdctl cluster-health
member 8211f1d0f64f3269 is healthy: got healthy result from http://127.0.0.1:12379
member 91bc3c398fb3c146 is healthy: got healthy result from http://127.0.0.1:22379
member fd422379fda50e48 is healthy: got healthy result from http://127.0.0.1:32379
cluster is healthy
```
##### Runtime Metrics
etcd uses [Prometheus](http://prometheus.io/) for metrics reporting in the server. You can read more through the runtime metrics [doc](metrics.md).
#### Debugging
Debugging a distributed system can be difficult. etcd provides several ways to make debug
easier.
##### Enabling Debug Logging
When you want to debug etcd without stopping it, you can enable debug logging at runtime.
etcd exposes logging configuration at `/config/local/log`.
```
$ curl http://127.0.0.1:2379/config/local/log -XPUT -d '{"Level":"DEBUG"}'
$ # debug logging enabled
$
$ curl http://127.0.0.1:2379/config/local/log -XPUT -d '{"Level":"INFO"}'
$ # debug logging disabled
```
##### Debugging Variables
Debug variables are exposed for real-time debugging purposes. Developers who are familiar with etcd can utilize these variables to debug unexpected behavior. etcd exposes debug variables via HTTP at `/debug/vars` in JSON format. The debug variables contains
`cmdline`, `file_descriptor_limit`, `memstats` and `raft.status`.
`cmdline` is the command line arguments passed into etcd.
`file_descriptor_limit` is the max number of file descriptors etcd can utilize.
`memstats` is well explained [here](http://golang.org/pkg/runtime/#MemStats).
`raft.status` is useful when you want to debug low level raft issues if you are familiar with raft internals. In most cases, you do not need to check `raft.status`.
```json
{
"cmdline": ["./etcd"],
"file_descriptor_limit": 0,
"memstats": {"Alloc":4105744,"TotalAlloc":42337320,"Sys":12560632,"...":"..."},
"raft.status": {"id":"ce2a822cea30bfca","term":5,"vote":"ce2a822cea30bfca","commit":23509,"lead":"ce2a822cea30bfca","raftState":"StateLeader","progress":{"ce2a822cea30bfca":{"match":23509,"next":23510,"state":"ProgressStateProbe"}}}
}
```
#### Optimal Cluster Size
The recommended etcd cluster size is 3, 5 or 7, which is decided by the fault tolerance requirement. A 7-member cluster can provide enough fault tolerance in most cases. While larger cluster provides better fault tolerance the write performance reduces since data needs to be replicated to more machines.
@@ -57,7 +130,7 @@ As you can see, adding another member to bring the size of cluster up to an odd
#### Changing Cluster Size
After your cluster is up and running, adding or removing members is done via [runtime reconfiguration](runtime-configuration.md), which allows the cluster to be modified without downtime. The `etcdctl` tool has a `member list`, `member add` and `member remove` commands to complete this process.
After your cluster is up and running, adding or removing members is done via [runtime reconfiguration](runtime-configuration.md#cluster-reconfiguration-operations), which allows the cluster to be modified without downtime. The `etcdctl` tool has a `member list`, `member add` and `member remove` commands to complete this process.
### Member Migration
@@ -67,7 +140,7 @@ The data directory contains all the data to recover a member to its point-in-tim
* Stop the member process
* Copy the data directory of the now-idle member to the new machine
* Update the peer URLs for that member to reflect the new machine according to the [member api] [change peer url]
* Update the peer URLs for that member to reflect the new machine according to the [runtime configuration] [change peer url]
* Start etcd on the new machine, using the same configuration and the copy of the data directory
This example will walk you through the process of migrating the infra1 member to a new machine:
@@ -78,11 +151,11 @@ This example will walk you through the process of migrating the infra1 member to
|infra1|10.0.1.11:2380|
|infra2|10.0.1.12:2380|
```
```sh
$ export ETCDCTL_PEERS=http://10.0.1.10:2379,http://10.0.1.11:2379,http://10.0.1.12:2379
```
```
```sh
$ etcdctl member list
84194f7c5edd8b37: name=infra0 peerURLs=http://10.0.1.10:2380 clientURLs=http://127.0.0.1:2379,http://10.0.1.10:2379
b4db3bf5e495e255: name=infra1 peerURLs=http://10.0.1.11:2380 clientURLs=http://127.0.0.1:2379,http://10.0.1.11:2379
@@ -91,53 +164,59 @@ bc1083c870280d44: name=infra2 peerURLs=http://10.0.1.12:2380 clientURLs=http://1
#### Stop the member etcd process
```
$ ssh core@10.0.1.11
```sh
$ ssh 10.0.1.11
```
```
$ sudo systemctl stop etcd
```sh
$ kill `pgrep etcd`
```
#### Copy the data directory of the now-idle member to the new machine
```
$ tar -cvzf node1.etcd.tar.gz /var/lib/etcd/node1.etcd
$ tar -cvzf infra1.etcd.tar.gz %data_dir%
```
```
$ scp node1.etcd.tar.gz core@10.0.1.13:~/
```sh
$ scp infra1.etcd.tar.gz 10.0.1.13:~/
```
#### Update the peer URLs for that member to reflect the new machine
```
```sh
$ curl http://10.0.1.10:2379/v2/members/b4db3bf5e495e255 -XPUT \
-H "Content-Type: application/json" -d '{"peerURLs":["http://10.0.1.13:2380"]}'
```
Or use `etcdctl member update` command
```sh
$ etcdctl member update b4db3bf5e495e255 http://10.0.1.13:2380
```
#### Start etcd on the new machine, using the same configuration and the copy of the data directory
```sh
$ ssh 10.0.1.13
```
$ ssh core@10.0.1.13
```sh
$ tar -xzvf infra1.etcd.tar.gz -C %data_dir%
```
```
$ tar -xzvf node1.etcd.tar.gz -C /var/lib/etcd
```
```
etcd -name node1 \
etcd -name infra1 \
-listen-peer-urls http://10.0.1.13:2380 \
-listen-client-urls http://10.0.1.13:2379,http://127.0.0.1:2379 \
-advertise-client-urls http://10.0.1.13:2379,http://127.0.0.1:2379
```
[change peer url]: other_apis.md#change-the-peer-urls-of-a-member
[change peer url]: runtime-configuration.md#update-a-member
### Disaster Recovery
etcd is designed to be resilient to machine failures. An etcd cluster can automatically recover from any number of temporary failures (for example, machine reboots), and a cluster of N members can tolerate up to _(N/2)-1_ permanent failures (where a member can no longer access the cluster, due to hardware failure or disk corruption). However, in extreme circumstances, a cluster might permanently lose enough members such that quorum is irrevocably lost. For example, if a three-node cluster suffered two simultaneous and unrecoverable machine failures, it would be normally impossible for the cluster to restore quorum and continue functioning.
etcd is designed to be resilient to machine failures. An etcd cluster can automatically recover from any number of temporary failures (for example, machine reboots), and a cluster of N members can tolerate up to _(N-1)/2_ permanent failures (where a member can no longer access the cluster, due to hardware failure or disk corruption). However, in extreme circumstances, a cluster might permanently lose enough members such that quorum is irrevocably lost. For example, if a three-node cluster suffered two simultaneous and unrecoverable machine failures, it would be normally impossible for the cluster to restore quorum and continue functioning.
To recover from such scenarios, etcd provides functionality to backup and restore the datastore and recreate the cluster without data loss.
@@ -149,8 +228,8 @@ The first step of the recovery is to backup the data directory on a functioning
```sh
etcdctl backup \
--data-dir /var/lib/etcd \
--backup-dir /tmp/etcd_backup
--data-dir %data_dir% \
--backup-dir %backup_data_dir%
```
This command will rewrite some of the metadata contained in the backup (specifically, the node ID and cluster ID), which means that the node will lose its former identity. In order to recreate a cluster from the backup, you will need to start a new, single-node cluster. The metadata is rewritten to prevent the new node from inadvertently being joined onto an existing cluster.
@@ -161,7 +240,7 @@ To restore a backup using the procedure created above, start etcd with the `-for
```sh
etcd \
-data-dir=/tmp/etcd_backup \
-data-dir=%backup_data_dir% \
-force-new-cluster \
...
```
@@ -172,18 +251,18 @@ Once you have verified that etcd has started successfully, shut it down and move
```sh
pkill etcd
rm -fr /var/lib/etcd
mv /tmp/etcd_backup /var/lib/etcd
rm -fr %data_dir%
mv %backup_data_dir% %data_dir%
etcd \
-data-dir=/var/lib/etcd \
-data-dir=%data_dir% \
...
```
#### Restoring the cluster
Now that the node is running successfully, you should [change its advertised peer URLs](other_apis.md#change-the-peer-urls-of-a-member), as the `--force-new-cluster` has set the peer URL to the default (listening on localhost).
Now that if the node is running successfully, you should [change its advertised peer URLs](runtime-configuration.md#update-a-member), as the `--force-new-cluster` has set the peer URL to the default (listening on localhost).
You can then add more nodes to the cluster and restore resiliency. See the [runtime configuration](runtime-configuration.md) guide for more details.
You can then add more nodes to the cluster and restore resiliency. See the [add a new member](runtime-configuration.md#add-a-new-member) guide for more details. **NB:** If you are trying to restore your cluster using old failed etcd nodes, please make sure you have stopped old etcd instances and removed their old data directories specified by the data-dir configuration parameter.
### Client Request Timeout

View File

@@ -82,7 +82,7 @@ X-Raft-Term: 1
- `X-Raft-Index` is similar to the etcd index but is for the underlying raft protocol
- `X-Raft-Term` is an integer that will increase whenever an etcd master election happens in the cluster. If this number is increasing rapidly, you may need to tune the election timeout. See the [tuning][tuning] section for details.
[tuning]: #tuning
[tuning]: tuning.md
### Get the value of a key
@@ -356,6 +356,13 @@ So the first watch after the get should be:
curl 'http://127.0.0.1:2379/v2/keys/foo?wait=true&waitIndex=2008'
```
#### Connection being closed prematurely
The server may close a long polling connection before emitting any events.
This can happend due to a timeout or the server being shutdown.
Since the HTTP header is sent immediately upon accepting the connection, the response will be seen as empty: `200 OK` and empty body.
The clients should be prepared to deal with this scenario and retry the watch.
### Atomically Creating In-Order Keys
Using `POST` on a directory, you can create keys with key names that are created in-order.
@@ -373,7 +380,7 @@ curl http://127.0.0.1:2379/v2/keys/queue -XPOST -d value=Job1
"action": "create",
"node": {
"createdIndex": 6,
"key": "/queue/6",
"key": "/queue/00000000000000000006",
"modifiedIndex": 6,
"value": "Job1"
}
@@ -392,7 +399,7 @@ curl http://127.0.0.1:2379/v2/keys/queue -XPOST -d value=Job2
"action": "create",
"node": {
"createdIndex": 29,
"key": "/queue/29",
"key": "/queue/00000000000000000029",
"modifiedIndex": 29,
"value": "Job2"
}
@@ -416,13 +423,13 @@ curl -s 'http://127.0.0.1:2379/v2/keys/queue?recursive=true&sorted=true'
"nodes": [
{
"createdIndex": 2,
"key": "/queue/2",
"key": "/queue/00000000000000000002",
"modifiedIndex": 2,
"value": "Job1"
},
{
"createdIndex": 3,
"key": "/queue/3",
"key": "/queue/00000000000000000003",
"modifiedIndex": 3,
"value": "Job2"
}
@@ -465,7 +472,7 @@ curl http://127.0.0.1:2379/v2/keys/dir -XPUT -d ttl=30 -d dir=true -d prevExist=
Keys that are under this directory work as usual, but when the directory expires, a watcher on a key under the directory will get an expire event:
```sh
curl 'http://127.0.0.1:2379/v2/keys/dir/asdf?wait=true'
curl 'http://127.0.0.1:2379/v2/keys/dir?wait=true'
```
```json

View File

@@ -24,49 +24,6 @@ https://github.com/coreos/etcd/blob/master/Documentation/configuration.md.
The default data dir location has changed from {$hostname}.etcd to {name}.etcd.
## Data Directory Migration
The disk format within the data directory changed with etcd 2.0.
If you run etcd 2.0 on an etcd 0.4 data directory it will automatically migrate the data and start.
You will want to coordinate this upgrade by walking through each of your machines in the cluster, stopping etcd 0.4 and then starting etcd 2.0.
If you would rather manually do the migration, to test it out first in another environment, you can use the [migration tool doc][migrationtooldoc].
[migrationtooldoc]: https://github.com/coreos/etcd/blob/master/tools/etcd-migrate/README.md
## Snapshot Migration
If you are only interested in the data in etcd you can migrate a snapshot of your data from a v0.4.9+ cluster into a new etcd 2.0 cluster using a snapshot migration.
The advantage of this method is that you are directly dumping only the etcd data so you can run your old and new cluster side-by-side, snapshot the data, import it and then point your applications at this cluster.
The disadvantage is that the etcd indexes of your data will change which may confuse applications that use etcd.
To get started get the newest data snapshot from the 0.4.9+ cluster:
```
curl http://cluster.example.com:4001/v2/migration/snapshot > backup.snap
```
Now, import the snapshot into your new cluster:
```
etcdctl -C new_cluster.example.com import --snap backup.snap
```
If you have a large amount of data, you can specify more concurrent works to copy data in parallel by using `-c` flag.
If you have hidden keys to copy, you can use `--hidden` flag to specify.
And the data will quickly copy into the new cluster:
```
entering dir: /
entering dir: /foo
entering dir: /foo/bar
copying key: /foo/bar/1 1
entering dir: /
entering dir: /foo2
entering dir: /foo2/bar2
copying key: /foo2/bar2/2 2
```
## Key-Value API
### Read consistency flag

View File

@@ -2,4 +2,12 @@
etcd benchmarks will be published regularly and tracked for each release below:
- [etcd v2.1.0](etcd-2-1-0-benchmarks.md)
- [etcd v2.1.0-alpha](./etcd-2-1-0-alpha-benchmarks.md)
- [etcd v2.2.0-rc](./etcd-2-2-0-rc-benchmarks.md)
- [etcd v3 demo](./etcd-3-demo-benchmarks.md)
# Memory Usage Benchmarks
It records expected memory usage in different scenarios.
- [etcd v2.2.0-rc](./etcd-2-2-0-rc-memory-benchmarks.md)

View File

@@ -6,7 +6,7 @@ GCE n1-highcpu-2 machine type
- 1x dedicated slow disk for the OS
- 1.8 GB memory
- 2x CPUs
- etcd version 2.1.0
- etcd version 2.1.0 alpha
## etcd Cluster

View File

@@ -0,0 +1,67 @@
## Physical machines
GCE n1-highcpu-2 machine type
- 1x dedicated local SSD mounted under /var/lib/etcd
- 1x dedicated slow disk for the OS
- 1.8 GB memory
- 2x CPUs
## etcd Cluster
3 etcd 2.2.0-rc members, each runs on a single machine.
Detailed versions:
```
etcd Version: 2.2.0-alpha.1+git
Git SHA: 59a5a7e
Go Version: go1.4.2
Go OS/Arch: linux/amd64
```
Also, we use 3 etcd 2.1.0 alpha-stage members to form cluster to get base performance. etcd's commit head is at [c7146bd5](https://github.com/coreos/etcd/commits/c7146bd5f2c73716091262edc638401bb8229144), which is the same as the one that we use in [etcd 2.1 benchmark](./etcd-2-1-0-benchmarks.md).
## Testing
Bootstrap another machine and use benchmark tool [boom](https://github.com/rakyll/boom) to send requests to each etcd member. Check [here](../../hack/benchmark/) for instructions.
## Performance
### reading one single key
| key size in bytes | number of clients | target etcd server | read QPS | 90th Percentile Latency (ms) |
|-------------------|-------------------|--------------------|----------|---------------|
| 64 | 1 | leader only | 2804 (-5%) | 0.4 (+0%) |
| 64 | 64 | leader only | 17816 (+0%) | 5.7 (-6%) |
| 64 | 256 | leader only | 18667 (-6%) | 20.4 (+2%) |
| 256 | 1 | leader only | 2181 (-15%) | 0.5 (+25%) |
| 256 | 64 | leader only | 17435 (-7%) | 6.0 (+9%) |
| 256 | 256 | leader only | 18180 (-8%) | 21.3 (+3%) |
| 64 | 64 | all servers | 46965 (-4%) | 2.1 (+0%) |
| 64 | 256 | all servers | 55286 (-6%) | 7.4 (+6%) |
| 256 | 64 | all servers | 46603 (-6%) | 2.1 (+5%) |
| 256 | 256 | all servers | 55291 (-6%) | 7.3 (+4%) |
### writing one single key
| key size in bytes | number of clients | target etcd server | write QPS | 90th Percentile Latency (ms) |
|-------------------|-------------------|--------------------|-----------|---------------|
| 64 | 1 | leader only | 76 (+22%) | 19.4 (-15%) |
| 64 | 64 | leader only | 2461 (+45%) | 31.8 (-32%) |
| 64 | 256 | leader only | 4275 (+1%) | 69.6 (-10%) |
| 256 | 1 | leader only | 64 (+20%) | 16.7 (-30%) |
| 256 | 64 | leader only | 2385 (+30%) | 31.5 (-19%) |
| 256 | 256 | leader only | 4353 (-3%) | 74.0 (+9%) |
| 64 | 64 | all servers | 2005 (+81%) | 49.8 (-55%) |
| 64 | 256 | all servers | 4868 (+35%) | 81.5 (-40%) |
| 256 | 64 | all servers | 1925 (+72%) | 47.7 (-59%) |
| 256 | 256 | all servers | 4975 (+36%) | 70.3 (-36%) |
### performance changes explanation
- read QPS in most scenarios is decreased by 5~8%. The reason is that etcd records store metrics for each store operation. The metrics is important for monitoring and debugging, so this is acceptable.
- write QPS to leader is increased by 20~30%. This is because we decouple raft main loop and entry apply loop, which avoids them blocking each other.
- write QPS to all servers is increased by 30~80% because follower could receive latest commit index earlier and commit proposals faster.

View File

@@ -0,0 +1,47 @@
## Physical machine
GCE n1-standard-2 machine type
- 1x dedicated local SSD mounted under /var/lib/etcd
- 1x dedicated slow disk for the OS
- 7.5 GB memory
- 2x CPUs
## etcd
```
etcd Version: 2.2.0-rc.0+git
Git SHA: 103cb5c
Go Version: go1.5
Go OS/Arch: linux/amd64
```
## Testing
Start 3-member etcd cluster, each of which uses 2 cores.
The length of key name is always 64 bytes, which is a reasonable length of average key bytes.
## Memory Maximal Usage
- etcd may use maximal memory if one follower is dead and the leader keeps sending snapshots.
- `max RSS` is the maximal memory usage recorded in 3 runs.
| value bytes | key number | data size(MB) | max RSS(MB) | max RSS/data rate on leader |
|-------------|-------------|---------------|-------------|-----------------------------|
| 128 | 50000 | 6 | 433 | 72x |
| 128 | 100000 | 12 | 659 | 54x |
| 128 | 200000 | 24 | 1466 | 61x |
| 1024 | 50000 | 48 | 1253 | 26x |
| 1024 | 100000 | 96 | 2344 | 24x |
| 1024 | 200000 | 192 | 4361 | 22x |
## Data Size Threshold
- When etcd reaches data size threshold, it may trigger leader election easily and drop part of proposals.
- At most cases, etcd cluster should work smoothly if it doesn't hit the threshold. If it doesn't work well due to insufficient resources, you need to decrease its data size.
| value bytes | key number limitation | suggested data size threshold(MB) | consumed RSS(MB) |
|-------------|-----------------------|-----------------------------------|------------------|
| 128 | 400K | 48 | 2400 |
| 1024 | 300K | 292 | 6500 |

View File

@@ -0,0 +1,40 @@
## Physical machines
GCE n1-highcpu-2 machine type
- 1x dedicated local SSD mounted under /var/lib/etcd
- 1x dedicated slow disk for the OS
- 1.8 GB memory
- 2x CPUs
- etcd version 2.2.0
## etcd Cluster
1 etcd member running in v3 demo mode
## Testing
Use [etcd v3 benchmark tool](../../hack/v3benchmark/).
## Performance
### reading one single key
| key size in bytes | number of clients | read QPS | 90th Percentile Latency (ms) |
|-------------------|-------------------|----------|---------------|
| 256 | 1 | 2716 | 0.4 |
| 256 | 64 | 16623 | 6.1 |
| 256 | 256 | 16622 | 21.7 |
The performance is nearly the same as the one with empty server handler.
### reading one single key after putting
| key size in bytes | number of clients | read QPS | 90th Percentile Latency (ms) |
|-------------------|-------------------|----------|---------------|
| 256 | 1 | 2269 | 0.5 |
| 256 | 64 | 13582 | 8.6 |
| 256 | 256 | 13262 | 47.5 |
The performance with empty server handler is not affected by one put. So the
performance downgrade should be caused by storage package.

View File

@@ -4,7 +4,7 @@
Starting an etcd cluster statically requires that each member knows another in the cluster. In a number of cases, you might not know the IPs of your cluster members ahead of time. In these cases, you can bootstrap an etcd cluster with the help of a discovery service.
Once an etcd cluster is up and running, adding or removing members is done via [runtime reconfiguration](runtime-configuration.md).
Once an etcd cluster is up and running, adding or removing members is done via [runtime reconfiguration](runtime-configuration.md). To better understand the design behind runtime reconfiguration, we suggest you read [this](runtime-reconf-design.md).
This guide will cover the following mechanisms for bootstrapping an etcd cluster:
@@ -38,6 +38,8 @@ Note that the URLs specified in `initial-cluster` are the _advertised peer URLs_
If you are spinning up multiple clusters (or creating and destroying a single cluster) with same configuration for testing purpose, it is highly recommended that you specify a unique `initial-cluster-token` for the different clusters. By doing this, etcd can generate unique cluster IDs and member IDs for the clusters even if they otherwise have the exact same configuration. This can protect you from cross-cluster-interaction, which might corrupt your clusters.
etcd listens on [`listen-client-urls`](configuration.md#-listen-client-urls) to accept client traffic. etcd member advertises the URLs specified in [`advertise-client-urls`](configuration.md#-advertise-client-urls) to other members, proxies, clients. Please make sure the `advertise-client-urls` are reachable from intended clients. A common mistake is setting `advertise-client-urls` to localhost or leave it as default when you want the remote clients to reach etcd.
On each machine you would start etcd with these flags:
```
@@ -122,6 +124,8 @@ There two methods that can be used for discovery:
### etcd Discovery
To better understand the design about discovery service protocol, we suggest you read [this](./discovery_protocol.md).
#### Lifetime of a Discovery URL
A discovery URL identifies a unique etcd cluster. Instead of reusing a discovery URL, you should always create discovery URLs for new clusters.
@@ -144,6 +148,8 @@ If you bootstrap an etcd cluster using discovery service with more than the expe
The URL you will use in this case will be `https://myetcd.local/v2/keys/discovery/6c007a14875d53d9bf0ef5a6fc0257c817f0fb83` and the etcd members will use the `https://myetcd.local/v2/keys/discovery/6c007a14875d53d9bf0ef5a6fc0257c817f0fb83` directory for registration as they start.
Each member must have a different name flag specified. Or discovery will fail due to duplicated name.
Now we start etcd with those relevant flags for each member:
```
@@ -194,6 +200,8 @@ ETCD_DISCOVERY=https://discovery.etcd.io/3e86b59982e49066c5d813af1c2e2579cbf573d
-discovery https://discovery.etcd.io/3e86b59982e49066c5d813af1c2e2579cbf573de
```
Each member must have a different name flag specified. Or discovery will fail due to duplicated name.
Now we start etcd with those relevant flags for each member:
```
@@ -296,6 +304,8 @@ infra2.example.com. 300 IN A 10.0.1.12
etcd cluster members can listen on domain names or IP address, the bootstrap process will resolve DNS A records.
The resolved address in `-initial-advertise-peer-urls` *must match* one of the resolved addresses in the SRV targets. The etcd member reads the resolved address to find out if it belongs to the cluster defined in the SRV records.
```
$ etcd -name infra0 \
-discovery-srv example.com \
@@ -372,6 +382,10 @@ DNS SRV records can also be used to configure the list of peers for an etcd serv
$ etcd --proxy on -discovery-srv example.com
```
#### Error Cases
You might see the an error like `cannot find local etcd $name from SRV records.`. That means the etcd member fails to find itself from the cluster defined in SRV records. The resolved address in `-initial-advertise-peer-urls` *must match* one of the resolved addresses in the SRV targets.
# 0.4 to 2.0+ Migration Guide
In etcd 2.0 we introduced the ability to listen on more than one address and to advertise multiple addresses. This makes using etcd easier when you have complex networking, such as private and public networks on various cloud providers.

View File

@@ -13,45 +13,64 @@ To start etcd automatically using custom settings at startup in Linux, using a [
##### -name
+ Human-readable name for this member.
+ default: "default"
+ env variable: ETCD_NAME
+ This value is referenced as this node's own entries listed in the `-initial-cluster` flag (Ex: `default=http://localhost:2380` or `default=http://localhost:2380,default=http://localhost:7001`). This needs to match the key used in the flag if you're using [static boostrapping](clustering.md#static).
##### -data-dir
+ Path to the data directory.
+ default: "${name}.etcd"
+ env variable: ETCD_DATA_DIR
##### -wal-dir
+ Path to the dedicated wal directory. If this flag is set, etcd will write the WAL files to the walDir rather than the dataDir. This allows a dedicated disk to be used, and helps avoid io competition between logging and other IO operations.
+ default: ""
+ env variable: ETCD_WAL_DIR
##### -snapshot-count
+ Number of committed transactions to trigger a snapshot to disk.
+ default: "10000"
+ env variable: ETCD_SNAPSHOT_COUNT
##### -heartbeat-interval
+ Time (in milliseconds) of a heartbeat interval.
+ default: "100"
+ env variable: ETCD_HEARTBEAT_INTERVAL
##### -election-timeout
+ Time (in milliseconds) for an election to timeout. See [Documentation/tuning.md](tuning.md#time-parameters) for details.
+ default: "1000"
+ env variable: ETCD_ELECTION_TIMEOUT
##### -listen-peer-urls
+ List of URLs to listen on for peer traffic.
+ List of URLs to listen on for peer traffic. This flag tells the etcd to accept incoming requests from its peers on the specified scheme://IP:port combinations. Scheme can be either http or https.If 0.0.0.0 is specified as the IP, etcd listens to the given port on all interfaces. If an IP address is given as well as a port, etcd will listen on the given port and interface. Multiple URLs may be used to specify a number of addresses and ports to listen on. The etcd will respond to requests from any of the listed addresses and ports.
+ default: "http://localhost:2380,http://localhost:7001"
+ env variable: ETCD_LISTEN_PEER_URLS
+ example: "http://10.0.0.1:2380"
+ invalid example: "http://example.com:2380" (domain name is invalid for binding)
##### -listen-client-urls
+ List of URLs to listen on for client traffic.
+ List of URLs to listen on for client traffic. This flag tells the etcd to accept incoming requests from the clients on the specified scheme://IP:port combinations. Scheme can be either http or https. If 0.0.0.0 is specified as the IP, etcd listens to the given port on all interfaces. If an IP address is given as well as a port, etcd will listen on the given port and interface. Multiple URLs may be used to specify a number of addresses and ports to listen on. The etcd will respond to requests from any of the listed addresses and ports.
+ default: "http://localhost:2379,http://localhost:4001"
+ env variable: ETCD_LISTEN_CLIENT_URLS
+ example: "http://10.0.0.1:2379"
+ invalid example: "http://example.com:2379" (domain name is invalid for binding)
##### -max-snapshots
+ Maximum number of snapshot files to retain (0 is unlimited)
+ default: 5
+ env variable: ETCD_MAX_SNAPSHOTS
+ The default for users on Windows is unlimited, and manual purging down to 5 (or your preference for safety) is recommended.
##### -max-wals
+ Maximum number of wal files to retain (0 is unlimited)
+ default: 5
+ env variable: ETCD_MAX_WALS
+ The default for users on Windows is unlimited, and manual purging down to 5 (or your preference for safety) is recommended.
##### -cors
+ Comma-separated white list of origins for CORS (cross-origin resource sharing).
+ default: none
+ env variable: ETCD_CORS
### Clustering Flags
@@ -61,43 +80,55 @@ To start etcd automatically using custom settings at startup in Linux, using a [
##### -initial-advertise-peer-urls
+ List of this member's peer URLs to advertise to the rest of the cluster. These addresses are used for communicating etcd data around the cluster. At least one must be routable to all cluster members.
+ List of this member's peer URLs to advertise to the rest of the cluster. These addresses are used for communicating etcd data around the cluster. At least one must be routable to all cluster members. These URLs can contain domain names.
+ default: "http://localhost:2380,http://localhost:7001"
+ env variable: ETCD_INITIAL_ADVERTISE_PEER_URLS
+ example: "http://example.com:2380, http://10.0.0.1:2380"
##### -initial-cluster
+ Initial cluster configuration for bootstrapping.
+ default: "default=http://localhost:2380,default=http://localhost:7001"
+ env variable: ETCD_INITIAL_CLUSTER
+ The key is the value of the `-name` flag for each node provided. The default uses `default` for the key because this is the default for the `-name` flag.
##### -initial-cluster-state
+ Initial cluster state ("new" or "existing"). Set to `new` for all members present during initial static or DNS bootstrapping. If this option is set to `existing`, etcd will attempt to join the existing cluster. If the wrong value is set, etcd will attempt to start but fail safely.
+ default: "new"
+ env variable: ETCD_INITIAL_CLUSTER_STATE
[static bootstrap]: clustering.md#static
##### -initial-cluster-token
+ Initial cluster token for the etcd cluster during bootstrap.
+ default: "etcd-cluster"
+ env variable: ETCD_INITIAL_CLUSTER_TOKEN
##### -advertise-client-urls
+ List of this member's client URLs to advertise to the rest of the cluster.
+ List of this member's client URLs to advertise to the rest of the cluster. These URLs can contain domain names.
+ default: "http://localhost:2379,http://localhost:4001"
+ env variable: ETCD_ADVERTISE_CLIENT_URLS
+ example: "http://example.com:2379, http://10.0.0.1:2379"
+ Be careful if you are advertising URLs such as http://localhost:2379 from a cluster member and are using the proxy feature of etcd. This will cause loops, because the proxy will be forwarding requests to itself until its resources (memory, file descriptors) are eventually depleted.
##### -discovery
+ Discovery URL used to bootstrap the cluster.
+ default: none
+ env variable: ETCD_DISCOVERY
##### -discovery-srv
+ DNS srv domain used to bootstrap the cluster.
+ default: none
+ env variable: ETCD_DISCOVERY_SRV
##### -discovery-fallback
+ Expected behavior ("exit" or "proxy") when discovery services fails.
+ default: "proxy"
+ env variable: ETCD_DISCOVERY_FALLBACK
##### -discovery-proxy
+ HTTP proxy to use for traffic to discovery service.
+ default: none
+ env variable: ETCD_DISCOVERY_PROXY
### Proxy Flags
@@ -106,81 +137,99 @@ To start etcd automatically using custom settings at startup in Linux, using a [
##### -proxy
+ Proxy mode setting ("off", "readonly" or "on").
+ default: "off"
+ env variable: ETCD_PROXY
##### -proxy-failure-wait
+ Time (in milliseconds) an endpoint will be held in a failed state before being reconsidered for proxied requests.
+ default: 5000
+ env variable: ETCD_PROXY_FAILURE_WAIT
##### -proxy-refresh-interval
+ Time (in milliseconds) of the endpoints refresh interval.
+ default: 30000
+ env variable: ETCD_PROXY_REFRESH_INTERVAL
##### -proxy-dial-timeout
+ Time (in milliseconds) for a dial to timeout or 0 to disable the timeout
+ default: 1000
+ env variable: ETCD_PROXY_DIAL_TIMEOUT
##### -proxy-write-timeout
+ Time (in milliseconds) for a write to timeout or 0 to disable the timeout.
+ default: 5000
+ env variable: ETCD_PROXY_WRITE_TIMEOUT
##### -proxy-read-timeout
+ Time (in milliseconds) for a read to timeout or 0 to disable the timeout.
+ Don't change this value if you use watches because they are using long polling requests.
+ default: 0
+ env variable: ETCD_PROXY_READ_TIMEOUT
### Security Flags
The security flags help to [build a secure etcd cluster][security].
##### -ca-file [DEPRECATED]
+ Path to the client server TLS CA file.
+ Path to the client server TLS CA file. `-ca-file ca.crt` could be replaced by `-trusted-ca-file ca.crt -client-cert-auth` and etcd will perform the same.
+ default: none
+ env variable: ETCD_CA_FILE
##### -cert-file
+ Path to the client server TLS cert file.
+ default: none
+ env variable: ETCD_CERT_FILE
##### -key-file
+ Path to the client server TLS key file.
+ default: none
+ env variable: ETCD_KEY_FILE
##### -client-cert-auth
+ Enable client cert authentication.
+ default: false
+ env variable: ETCD_CLIENT_CERT_AUTH
##### -trusted-ca-file
+ Path to the client server TLS trusted CA key file.
+ default: none
+ env variable: ETCD_TRUSTED_CA_FILE
##### -peer-ca-file [DEPRECATED]
+ Path to the peer server TLS CA file.
+ Path to the peer server TLS CA file. `-peer-ca-file ca.crt` could be replaced by `-peer-trusted-ca-file ca.crt -peer-client-cert-auth` and etcd will perform the same.
+ default: none
+ env variable: ETCD_PEER_CA_FILE
##### -peer-cert-file
+ Path to the peer server TLS cert file.
+ default: none
+ env variable: ETCD_PEER_CERT_FILE
##### -peer-key-file
+ Path to the peer server TLS key file.
+ default: none
+ env variable: ETCD_PEER_KEY_FILE
##### -peer-client-cert-auth
+ Enable peer client cert authentication.
+ default: false
+ env variable: ETCD_PEER_CLIENT_CERT_AUTH
##### -peer-trusted-ca-file
+ Path to the peer server TLS trusted CA file.
+ default: none
+ env variable: ETCD_PEER_TRUSTED_CA_FILE
### Logging Flags
##### -debug
+ Drop the default log level to DEBUG for all subpackages.
+ default: false (INFO for all packages)
+ env variable: ETCD_DEBUG
##### -log-package-levels
+ Set individual etcd subpackages to specific log levels. An example being `etcdserver=WARNING,security=DEBUG`
+ default: none (INFO for all packages)
+ env variable: ETCD_LOG_PACKAGE_LEVELS
### Unsafe Flags
@@ -192,6 +241,14 @@ Follow the instructions when using these flags.
##### -force-new-cluster
+ Force to create a new one-member cluster. It commits configuration changes in force to remove all existing members in the cluster and add itself. It needs to be set to [restore a backup][restore].
+ default: false
+ env variable: ETCD_FORCE_NEW_CLUSTER
### Experimental Flags
##### -experimental-v3demo
+ Enable experimental [v3 demo API](rfc/v3api.proto).
+ default: false
+ env variable: ETCD_EXPERIMENTAL_V3DEMO
### Miscellaneous Flags

View File

@@ -0,0 +1,109 @@
# etcd release guide
The guide talks about how to release a new version of etcd.
The procedure includes some manual steps for sanity checking but it can probably be further scripted. Please keep this document up-to-date if you want to make changes to the release process.
## Prepare Release
Set desired version as environment variable for following steps. Here is an example to release 2.1.3:
```
export VERSION=v2.1.3
export PREV_VERSION=v2.1.2
```
All releases version numbers follow the format of [semantic versioning 2.0.0](http://semver.org/).
### Major, Minor Version Release, or its Pre-release
- Ensure the relevant milestone on GitHub is complete. All referenced issues should be closed, or moved elsewhere.
- Remove this release from [roadmap](https://github.com/coreos/etcd/blob/master/ROADMAP.md), if necessary.
- Ensure the latest upgrade documentation is available.
- Bump [hardcoded MinClusterVerion in the repository](https://github.com/coreos/etcd/blob/master/version/version.go#L29), if necessary.
- Add feature capability maps for the new version, if necessary.
### Patch Version Release
- Discuss about commits that are backported to the patch release. The commits should not include merge commits.
- Cherry-pick these commits starting from the oldest one into stable branch.
## Write Release Note
- Write introduction for the new release. For example, what major bug we fix, what new features we introduce or what performance improvement we make.
- Write changelog for the last release. ChangeLog should be straightforward and easy to understand for the end-user.
- Put `[GH XXXX]` at the head of change line to reference Pull Request that introduces the change. Moreover, add a link on it to jump to the Pull Request.
## Tag Version
- Bump [hardcoded Version in the repository](https://github.com/coreos/etcd/blob/master/version/version.go#L30) to the latest version `${VERSION}`.
- Ensure all tests on CI system are passed.
- Manually check etcd is buildable in Linux, Darwin and Windows.
- Manually check upgrade etcd cluster of previous minor version works well.
- Manually check new features work well.
- Add a signed tag through `git tag -s ${VERSION}`.
- Sanity check tag correctness through `git show tags/$VERSION`.
- Push the tag to GitHub through `git push origin tags/$VERSION`. This assumes `origin` corresponds to "https://github.com/coreos/etcd".
## Build Release Binaries and Images
- Ensure `actool` is available, or installing it through `go get github.com/appc/spec/actool`.
- Ensure `docker` is available.
Run release script in root directory:
```
./scripts/release.sh ${VERSION}
```
It generates all release binaries and images under directory ./release.
## Sign Binaries and Images
Choose appropriate private key to sign the generated binaries and images.
The following commands are used for public release sign:
```
cd release
# personal GPG is okay for now
for i in etcd-*{.zip,.tar.gz}; do gpg --sign ${i}; done
# use `CoreOS ACI Builder <release@coreos.com>` secret key
gpg -u 88182190 -a --output etcd-${VERSION}-linux-amd64.aci.asc --detach-sig etcd-${VERSION}-linux-amd64.aci
```
## Publish Release Page in GitHub
- Set release title as the version name.
- Follow the format of previous release pages.
- Attach the generated binaries, aci image and signatures.
- Select whether it is a pre-release.
- Publish the release!
## Publish Docker Image in Quay.io
- Push docker image:
```
docker login quay.io
docker push quay.io/coreos/etcd:${VERSION}
```
- Add `latest` tag to the new image on [quay.io](https://quay.io/repository/coreos/etcd?tag=latest&tab=tags) if this is a stable release.
## Announce to etcd-dev Googlegroup
- Follow the format of [previous release emails](https://groups.google.com/forum/#!forum/etcd-dev).
- Make sure to include a list of authors that contributed since the previous release - something like the following might be handy:
```
git log ...${PREV_VERSION} --pretty=format:"%an" | sort | uniq | tr '\n' ',' | sed -e 's#,#, #g' -e 's#, $##'
```
- Send email to etcd-dev@googlegroups.com
## Post Release
- Create new stable branch through `git push origin ${VERSION_MAJOR}.${VERSION_MINOR}` if this is a major stable release. This assumes `origin` corresponds to "https://github.com/coreos/etcd".
- Bump [hardcoded Version in the repository](https://github.com/coreos/etcd/blob/master/version/version.go#L30) to the version `${VERSION}+git`.

View File

@@ -0,0 +1,109 @@
# Discovery Service Protocol
Discovery service protocol helps new etcd member to discover all other members in cluster bootstrap phase using a shared discovery URL.
Discovery service protocol is _only_ used in cluster bootstrap phase, and cannot be used for runtime reconfiguration or cluster monitoring.
The protocol uses a new discovery token to bootstrap one _unique_ etcd cluster. Remember that one discovery token can represent only one etcd cluster. As long as discovery protocol on this token starts, even if fails halfway, it must not be used to bootstrap another etcd cluster.
The rest of this article will walk through the discovery process with examples that correspond to a self-hosted discovery cluster. The public discovery service, discovery.etcd.io, functions the same way, but with a layer of polish to abstract away ugly URLs, generate UUIDs automatically, and provide some protections against excessive requests. At its core, the public discovery service still uses an etcd cluster as the data store as described in this document.
## The Protocol Workflow
The idea of discovery protocol is to use an internal etcd cluster to coordinate bootstrap of a new cluster. First, all new members interact with discovery service and help to generate the expected member list. Then each new member bootstraps its server using this list, which performs the same functionality as -initial-cluster flag.
In the following example workflow, we will list each step of protocol in curl format for ease of understanding.
By convention the etcd discovery protocol uses the key prefix `_etcd/registry`. If `http://example.com` hosts a etcd cluster for discovery service, a full URL to discovery keyspace will be `http://example.com/v2/keys/_etcd/registry`. We will use this as the URL prefix in the example.
### Creating a New Discovery Token
Generate a unique token that will identify the new cluster. This will be used as a unique prefix in discovery keyspace in the following steps. An easy way to do this is to use `uuidgen`:
```
UUID=$(uuidgen)
```
### Specifying the Expected Cluster Size
You need to specify the expected cluster size for this discovery token. The size is used by the discovery service to know when it has found all members that will initially form the cluster.
```
curl -X PUT http://example.com/v2/keys/_etcd/registry/${UUID}/_config/size -d value=${cluster_size}
```
Usually the cluster size is 3, 5 or 7. Check [optimal cluster size](admin_guide.md#optimal-cluster-size) for more details.
### Bringing up etcd Processes
Now that you have your discovery URL, you can use it as `-discovery` flag and bring up etcd processes. Every etcd process will follow this next few steps internally if given a `-discovery` flag.
### Registering itself
The first thing for etcd process is to register itself into the discovery URL as a member. This is done by creating member ID as a key in the discovery URL.
```
curl -X PUT http://example.com/v2/keys/_etcd/registry/${UUID}/${member_id}?prevExist=false -d value="${member_name}=${member_peer_url_1}&${member_name}=${member_peer_url_2}"
```
### Checking the Status
It checks the expected cluster size and registration status in discovery URL, and decides what the next action is.
```
curl -X GET http://example.com/v2/keys/_etcd/registry/${UUID}/_config/size
curl -X GET http://example.com/v2/keys/_etcd/registry/${UUID}
```
If registered members are still not enough, it will wait for left members to appear.
If the number of registered members is bigger than the expected size N, it treats the first N registered members as the member list for the cluster. If the member itself is in the member list, the discovery procedure succeeds and it fetches all peers through the member list. If it is not in the member list, the discovery procedure finishes with the failure that the cluster has been full.
In etcd implementation, the member may check the cluster status even before registering itself. So it could fail quickly if the cluster has been full.
### Waiting for All Members
The wait process is described in details [here](https://github.com/coreos/etcd/blob/master/Documentation/api.md#waiting-for-a-change).
```
curl -X GET http://example.com/v2/keys/_etcd/registry/${UUID}?wait=true&waitIndex=${current_etcd_index}
```
It keeps waiting until finding all members.
## Public Discovery Service
CoreOS Inc. hosts a public discovery service at https://discovery.etcd.io/ , which provides some nice features for ease of use.
### Mask Key Prefix
Public discovery service will redirect `https://discovery.etcd.io/${UUID}` to etcd cluster behind for the key at `/v2/keys/_etcd/registry`. It masks register key prefix for short and readable discovery url.
### Get new token
```
GET /new
Sent query:
size=${cluster_size}
Possible status codes:
200 OK
400 Bad Request
200 Body:
generated discovery url
```
The generation process in the service follows the step from [Creating a New Discovery Token](#creating-a-new-discovery-token) to [Specifying the Expected Cluster Size](#specifying-the-expected-cluster-size).
### Check Discovery Status
```
GET /${UUID}
```
You can check the status for this discovery token, including the machines that have been registered, by requesting the value of the UUID.
### Open-source repository
The repository is located at https://github.com/coreos/discovery.etcd.io. You could use it to build your own public discovery service.

80
Documentation/faq.md Normal file
View File

@@ -0,0 +1,80 @@
# FAQ
## 1) How come I can read an old version of the data when a majority of the members are down?
In situations where a client connects to a minority, etcd
favors by default availability over consistency. This means that even though
data might be “out of date”, it is still better to return something versus
nothing.
In order to confirm that a read is up to date with a majority of the cluster,
the client can use the `quorum=true` parameter on reads of keys. This means
that a majority of the cluster is checked on reads before returning the data,
otherwise the read will timeout and fail.
## 2) With quorum=false, doesnt this mean that if my client switched the member it was connected to, that it could experience a logical ordering where the cluster goes backwards in time?
Yes, but this could be handled at the etcd client implementation via
remembering the last seen index. The “index” is the cluster's single
irrevocable sequence of the entire modification history. The client could
remember the last seen index, and determine via comparing the index returned on
the GET whether or not the state of the key-value pair is before or after its
last seen state.
## 3) What happens if a watch is registered on a minority member?
The watch will stay untriggered, even as modifications are occurring in the
majority quorum. This is an open issue, and is being addressed in v3. There are
multiple ways to work around the watch trigger not firing.
1) build a signaling mechanism independent of etcd. This could be as simple as
a “pulse” to the client to reissue a GET with quorum=true for the most recent
version of the data.
2) poll on the `/v2/keys` endpoint and check that the raft-index is increasing every
timeout.
## 4) What is a proxy used for?
A proxy is a redirection server to the etcd cluster. The proxy handles the
redirection of a client to the current configuration of the etcd cluster. A
typical usecase is to start a proxy on a machine, and on first boot up of the
proxy specify both the `--proxy` flag and the `--initial-cluster` flag.
From there, any etcdctl client that starts up automatically speaks to the local
proxy and the proxy redirects operations to the current configuration of the
cluster it was originally paired with.
In the v2 spec of etcd, proxies cannot be promoted to members of the cluster.
They also cannot be promoted to followers or at any point become part of the
replication of the etcd cluster itself.
## 5) How is cluster membership and health handled in etcd v2?
The design goal of etcd is that reconfiguration is simply an API, and health
monitoring and addition/removal of members is up to the individual application
and their integration with the reconfiguration API.
Thus, a member that is down, even infinitely, will never be automatically
removed from the etcd cluster member list.
This makes sense because its usually an application level / administrative
action to determine whether a reconfiguration should happen based on health.
For more information, refer to [Documentation/runtime-reconfiguration.md].
## 6) how does --peers work with etcdctl?
The `--peers` flag can specify any number of etcd cluster members in a comma
separated list. This list might be a subset, equal to, or more than the actual
etcd cluster member list itself.
If only one peer is specified via the `--peers` flag, the etcdctl discovers the
rest of the cluster via the member list of that one peer, and then it randomly
chooses a member to use. Again, the client can use the `quorum=true` flag on
reads, which will always fail when using a member in the minority.
If peers from multiple clusters are specified via the `--peers` flag, etcdctl
will randomly choose a peer, and the request will simply get routed to one of
the clusters. This is probably not what you want.

View File

@@ -45,6 +45,7 @@
**C libraries**
- [jdarcy/etcd-api](https://github.com/jdarcy/etcd-api) - Supports v2
- [shafreeck/cetcd](https://github.com/shafreeck/cetcd) - Supports v2
**C++ libraries**
- [edwardcapriolo/etcdcpp](https://github.com/edwardcapriolo/etcdcpp) - Supports v2

View File

@@ -1,12 +1,12 @@
## Proxy
etcd can now run as a transparent proxy. Running etcd as a proxy allows for easily discovery of etcd within your infrastructure, since it can run on each machine as a local service. In this mode, etcd acts as a reverse proxy and forwards client requests to an active etcd cluster. The etcd proxy does not participant in the consensus replication of the etcd cluster, thus it neither increases the resilience nor decreases the write performance of the etcd cluster.
etcd can now run as a transparent proxy. Running etcd as a proxy allows for easily discovery of etcd within your infrastructure, since it can run on each machine as a local service. In this mode, etcd acts as a reverse proxy and forwards client requests to an active etcd cluster. The etcd proxy does not participate in the consensus replication of the etcd cluster, thus it neither increases the resilience nor decreases the write performance of the etcd cluster.
etcd currently supports two proxy modes: `readwrite` and `readonly`. The default mode is `readwrite`, which forwards both read and write requests to the etcd cluster. A `readonly` etcd proxy only forwards read requests to the etcd cluster, and returns `HTTP 501` to all write requests.
The proxy will shuffle the list of cluster members periodically to avoid sending all connections to a single member.
The member list used by proxy consists of all client URLs advertised within the cluster, as specified in each members' `-advertise-client-urls` flag. If this flag is set incorrectly, requests sent to the proxy are forwarded to wrong addresses and then fail. The fix for this problem is to restart etcd member with correct `-advertise-client-urls` flag. After client URLs list in proxy is recalculated, which happens every 30 seconds, requests will be forwarded correctly.
The member list used by proxy consists of all client URLs advertised within the cluster, as specified in each members' `-advertise-client-urls` flag. If this flag is set incorrectly, requests sent to the proxy are forwarded to wrong addresses and then fail. Including URLs in the `-advertise-client-urls` flag that point to the proxy itself, e.g. http://localhost:2379, is even more problematic as it will cause loops, because the proxy keeps trying to forward requests to itself until its resources (memory, file descriptors) are eventually depleted. The fix for this problem is to restart etcd member with correct `-advertise-client-urls` flag. After client URLs list in proxy is recalculated, which happens every 30 seconds, requests will be forwarded correctly.
### Using an etcd proxy
To start etcd in proxy mode, you need to provide three flags: `proxy`, `listen-client-urls`, and `initial-cluster` (or `discovery`).
@@ -17,6 +17,7 @@ The proxy will be listening on `listen-client-urls` and forward requests to the
#### Start an etcd proxy with a static configuration
To start a proxy that will connect to a statically defined etcd cluster, specify the `initial-cluster` flag:
```
etcd -proxy on -listen-client-urls http://127.0.0.1:8080 -initial-cluster infra0=http://10.0.1.10:2380,infra1=http://10.0.1.11:2380,infra2=http://10.0.1.12:2380
```

View File

@@ -0,0 +1,43 @@
## Reporting Bugs
If you find bugs or documentation mistakes in etcd project, please let us know by [opening an issue](https://github.com/coreos/etcd/issues/new). We treat bugs and mistakes very seriously and believe no issue is too small. Before creating a bug report, please check there that one does not already exist.
To make your bug report accurate and easy to understand, please try to create bug reports that are:
- Specific. Include as much details as possible: which version, what environment, what configuration, etc. You can also attach etcd log (the starting log with etcd configuration is especially important).
- Reproducible. Include the steps to reproduce the problem. We understand some issues might be hard to reproduce, please includes the steps that might lead to the problem. You can also attach the affected etcd data dir and stack strace to the bug report.
- Isolated. Please try to isolate and reproduce the bug with minimum dependencies. It would significantly slow down the speed to fix a bug if too many dependencies are involved in a bug report. Debugging external systems that rely on etcd is out of scope, but we are happy to point you in the right direction or help you interact with etcd in the correct manner.
- Unique. Do not duplicate existing bug report.
- Scoped. One bug per report. Do not follow up with another bug inside one report.
You might also want to read [Elika Etemads article on filing good bug reports](http://fantasai.inkedblade.net/style/talks/filing-good-bugs/) before creating a bug report.
We might ask you for further information to locate a bug. A duplicated bug report will be closed.
## Frequently Asked Questions
### How to get stack trace
``` bash
$ kill -QUIT $PID
```
### How to get etcd version
``` bash
$ etcd --version
```
### How to get etcd configuration and log when it runs as systemd service etcd2.service
``` bash
$ sudo systemctl cat etcd2
$ sudo journalctl -u etcd2
```
Due to an upstream systemd bug, journald may miss the last few log lines when its process exit. If journalctl tells you that etcd stops without fatal or panic message, you could try `sudo journalctl -f -t etcd2` to get full log.

View File

@@ -14,14 +14,14 @@
- more efficient/ low cost keep alive
- a logical group of TTL keys
5. Replace CAS/CAD with multi-object Tnx
5. Replace CAS/CAD with multi-object Txn
- MUCH MORE powerful and flexible
6. Support efficient watching with multiple ranges
7. RPC API supports the completed set of APIs.
- more efficient than JSON/HTTP
- additional tnx/lease support
- additional txn/lease support
8. HTTP API supports a subset of APIs.
- easy for people to try out etcd
@@ -42,7 +42,7 @@ Put( PutRequest { key = foo, value = bar } )
PutResponse {
cluster_id = 0x1000,
member_id = 0x1,
index = 1,
revision = 1,
raft_term = 0x1,
}
```
@@ -54,14 +54,14 @@ Get ( RangeRequest { key = foo } )
RangeResponse {
cluster_id = 0x1000,
member_id = 0x1,
index = 1,
revision = 1,
raft_term = 0x1,
kvs = {
{
key = foo,
value = bar,
create_index = 1,
mod_index = 1,
create_revision = 1,
mod_revision = 1,
version = 1;
},
},
@@ -75,35 +75,35 @@ Range ( RangeRequest { key = foo, end_key = foo80, limit = 30 } )
RangeResponse {
cluster_id = 0x1000,
member_id = 0x1,
index = 100,
revision = 100,
raft_term = 0x1,
kvs = {
{
key = foo0,
value = bar0,
create_index = 1,
mod_index = 1,
create_revision = 1,
mod_revision = 1,
version = 1;
},
...,
{
key = foo30,
value = bar30,
create_index = 30,
mod_index = 30,
create_revision = 30,
mod_revision = 30,
version = 1;
},
},
}
```
#### Finish a tnx (assume we have foo0=bar0, foo1=bar1)
#### Finish a txn (assume we have foo0=bar0, foo1=bar1)
```
Tnx(TnxRequest {
// mod_index of foo0 is equal to 1, mod_index of foo1 is greater than 1
Txn(TxnRequest {
// mod_revision of foo0 is equal to 1, mod_revision of foo1 is greater than 1
compare = {
{compareType = equal, key = foo0, mod_index = 1},
{compareType = greater, key = foo1, mod_index = 1}}
{compareType = equal, key = foo0, mod_revision = 1},
{compareType = greater, key = foo1, mod_revision = 1}}
},
// if the comparison succeeds, put foo2 = bar2
success = {PutRequest { key = foo2, value = success }},
@@ -111,10 +111,10 @@ Tnx(TnxRequest {
failure = {PutRequest { key = foo2, value = failure }},
)
TnxResponse {
TxnResponse {
cluster_id = 0x1000,
member_id = 0x1,
index = 3,
revision = 3,
raft_term = 0x1,
succeeded = true,
responses = {
@@ -122,7 +122,7 @@ TnxResponse {
{
cluster_id = 0x1000,
member_id = 0x1,
index = 3,
revision = 3,
raft_term = 0x1,
}
}
@@ -135,8 +135,8 @@ TnxResponse {
Watch( WatchRequest{
key = foo,
end_key = fop, // prefix foo
start_index = 20,
end_index = 10000,
start_revision = 20,
end_revision = 10000,
// server decided notification frequency
progress_notification = true,
}
@@ -147,14 +147,14 @@ Watch( WatchRequest{
WatchResponse {
cluster_id = 0x1000,
member_id = 0x1,
index = 3,
revision = 3,
raft_term = 0x1,
event_type = put,
kv = {
key = foo0,
value = bar0,
create_index = 1,
mod_index = 1,
create_revision = 1,
mod_revision = 1,
version = 1;
},
}
@@ -164,7 +164,7 @@ WatchResponse {
WatchResponse {
cluster_id = 0x1000,
member_id = 0x1,
index = 2000,
revision = 2000,
raft_term = 0x1,
// nil event as notification
}
@@ -175,14 +175,14 @@ WatchResponse {
WatchResponse {
cluster_id = 0x1000,
member_id = 0x1,
index = 3000,
revision = 3000,
raft_term = 0x1,
event_type = put,
kv = {
key = foo0,
value = bar3000,
create_index = 1,
mod_index = 3000,
create_revision = 1,
mod_revision = 3000,
version = 2;
},
}

View File

@@ -6,19 +6,19 @@ service etcd {
rpc Range(RangeRequest) returns (RangeResponse) {}
// Put puts the given key into the store.
// A put request increases the index of the store,
// A put request increases the revision of the store,
// and generates one event in the event history.
rpc Put(PutRequest) returns (PutResponse) {}
// Delete deletes the given range from the store.
// A delete request increase the index of the store,
// A delete request increase the revision of the store,
// and generates one event in the event history.
rpc DeleteRange(DeleteRangeRequest) returns (DeleteRangeResponse) {}
// Tnx processes all the requests in one transaction.
// A tnx request increases the index of the store,
// and generates events with the same index in the event history.
rpc Tnx(TnxRequest) returns (TnxResponse) {}
// Txn processes all the requests in one transaction.
// A txn request increases the revision of the store,
// and generates events with the same revision in the event history.
rpc Txn(TxnRequest) returns (TxnResponse) {}
// Watch watches the events happening or happened in etcd. Both input and output
// are stream. One watch rpc can watch for multiple ranges and get a stream of
@@ -41,10 +41,10 @@ service etcd {
// LeaseAttach attaches keys with a lease.
rpc LeaseAttach(LeaseAttachRequest) returns (LeaseAttachResponse) {}
// LeaseTnx likes Tnx. It has two addition success and failure LeaseAttachRequest list.
// If the Tnx is successful, then the success list will be executed. Or the failure list
// LeaseTxn likes Txn. It has two addition success and failure LeaseAttachRequest list.
// If the Txn is successful, then the success list will be executed. Or the failure list
// will be executed.
rpc LeaseTnx(LeaseTnxRequest) returns (LeaseTnxResponse) {}
rpc LeaseTxn(LeaseTxnRequest) returns (LeaseTxnResponse) {}
// KeepAlive keeps the lease alive.
rpc LeaseKeepAlive(stream LeaseKeepAliveRequest) returns (stream LeaseKeepAliveResponse) {}
@@ -52,51 +52,54 @@ service etcd {
message ResponseHeader {
// an error type message?
optional string error = 1;
optional uint64 cluster_id = 2;
optional uint64 member_id = 3;
// index of the store when the request was applied.
optional int64 index = 4;
string error = 1;
uint64 cluster_id = 2;
uint64 member_id = 3;
// revision of the store when the request was applied.
int64 revision = 4;
// term of raft when the request was applied.
optional uint64 raft_term = 5;
uint64 raft_term = 5;
}
message RangeRequest {
// if the range_end is not given, the request returns the key.
optional bytes key = 1;
bytes key = 1;
// if the range_end is given, it gets the keys in range [key, range_end).
optional bytes range_end = 2;
bytes range_end = 2;
// limit the number of keys returned.
optional int64 limit = 3;
// the response will be consistent with previous request with same token if the token is
// given and is vaild.
optional bytes consistent_token = 4;
int64 limit = 3;
// range over the store at the given revision.
// if revision is less or equal to zero, range over the newest store.
// if the revision has been compacted, ErrCompaction will be returned in
// response.
int64 revision = 4;
}
message RangeResponse {
optional ResponseHeader header = 1;
repeated KeyValue kvs = 2;
optional bytes consistent_token = 3;
ResponseHeader header = 1;
repeated storagepb.KeyValue kvs = 2;
// more indicates if there are more keys to return in the requested range.
bool more = 3;
}
message PutRequest {
optional bytes key = 1;
optional bytes value = 2;
bytes key = 1;
bytes value = 2;
}
message PutResponse {
optional ResponseHeader header = 1;
ResponseHeader header = 1;
}
message DeleteRangeRequest {
// if the range_end is not given, the request deletes the key.
optional bytes key = 1;
bytes key = 1;
// if the range_end is given, it deletes the keys in range [key, range_end).
optional bytes range_end = 2;
bytes range_end = 2;
}
message DeleteRangeResponse {
optional ResponseHeader header = 1;
ResponseHeader header = 1;
}
message RequestUnion {
@@ -109,38 +112,44 @@ message RequestUnion {
message ResponseUnion {
oneof response {
RangeResponse reponse_range = 1;
RangeResponse response_range = 1;
PutResponse response_put = 2;
DeleteRangeResponse response_delete_range = 3;
}
}
message Compare {
enum CompareType {
enum CompareResult {
EQUAL = 0;
GREATER = 1;
LESS = 2;
}
optional CompareType type = 1;
enum CompareTarget {
VERSION = 0;
CREATE = 1;
MOD = 2;
VALUE= 3;
}
CompareResult result = 1;
CompareTarget target = 2;
// key path
optional bytes key = 2;
oneof target {
bytes key = 3;
oneof target_union {
// version of the given key
int64 version = 3;
// create index of the given key
int64 create_index = 4;
// last modified index of the given key
int64 mod_index = 5;
int64 version = 4;
// create revision of the given key
int64 create_revision = 5;
// last modified revision of the given key
int64 mod_revision = 6;
// value of the given key
bytes value = 6;
bytes value = 7;
}
}
// First all the compare requests are processed.
// If all the compare succeed, all the success
// requests will be processed.
// Or all the failure requests will be processed and
// all the errors in the comparison will be returned.
// If the comparisons succeed, then the success requests will be processed in order,
// and the response will contain their respective responses in order.
// If the comparisons fail, then the failure requests will be processed in order,
// and the response will contain their respective responses in order.
// From google paxosdb paper:
// Our implementation hinges around a powerful primitive which we call MultiOp. All other database
@@ -157,44 +166,44 @@ message Compare {
// if guard evaluates to
// true.
// 3. A list of database operations called f op. Like t op, but executed if guard evaluates to false.
message TnxRequest {
message TxnRequest {
repeated Compare compare = 1;
repeated RequestUnion success = 2;
repeated RequestUnion failure = 3;
}
message TnxResponse {
optional ResponseHeader header = 1;
optional bool succeeded = 2;
message TxnResponse {
ResponseHeader header = 1;
bool succeeded = 2;
repeated ResponseUnion responses = 3;
}
message KeyValue {
optional bytes key = 1;
// mod_index is the last modified index of the key.
optional int64 create_index = 2;
optional int64 mod_index = 3;
bytes key = 1;
int64 create_revision = 2;
// mod_revision is the last modified revision of the key.
int64 mod_revision = 3;
// version is the version of the key. A deletion resets
// the version to zero and any modification of the key
// increases its version.
optional int64 version = 4;
optional bytes value = 5;
int64 version = 4;
bytes value = 5;
}
message WatchRangeRequest {
// if the range_end is not given, the request returns the key.
optional bytes key = 1;
bytes key = 1;
// if the range_end is given, it gets the keys in range [key, range_end).
optional bytes range_end = 2;
// start_index is an optional index (including) to watch from. No start_index is "now".
optional int64 start_index = 3;
// end_index is an optional index (excluding) to end watch. No end_index is "forever".
optional int64 end_index = 4;
optional bool progress_notification = 5;
bytes range_end = 2;
// start_revision is an optional revision (including) to watch from. No start_revision is "now".
int64 start_revision = 3;
// end_revision is an optional revision (excluding) to end watch. No end_revision is "forever".
int64 end_revision = 4;
bool progress_notification = 5;
}
message WatchRangeResponse {
optional ResponseHeader header = 1;
ResponseHeader header = 1;
repeated Event events = 2;
}
@@ -204,69 +213,73 @@ message Event {
DELETE = 1;
EXPIRE = 2;
}
optional EventType event_type = 1;
EventType event_type = 1;
// a put event contains the current key-value
// a delete/expire event contains the previous
// key-value
optional KeyValue kv = 2;
KeyValue kv = 2;
}
// Compaction compacts the kv store upto the given revision (including).
// It removes the old versions of a key. It keeps the newest version of
// the key even if its latest modification revision is smaller than the given
// revision.
message CompactionRequest {
optional int64 index = 1;
int64 revision = 1;
}
message CompactionResponse {
optional ResponseHeader header = 1;
ResponseHeader header = 1;
}
message LeaseCreateRequest {
// advisory ttl in seconds
optional int64 ttl = 1;
int64 ttl = 1;
}
message LeaseCreateResponse {
optional ResponseHeader header = 1;
optional int64 lease_id = 2;
ResponseHeader header = 1;
int64 lease_id = 2;
// server decided ttl in second
optional int64 ttl = 3;
optional string error = 4;
int64 ttl = 3;
string error = 4;
}
message LeaseRevokeRequest {
optional int64 lease_id = 1;
int64 lease_id = 1;
}
message LeaseRevokeResponse {
optional ResponseHeader header = 1;
ResponseHeader header = 1;
}
message LeaseTnxRequest {
optional TnxRequest request = 1;
message LeaseTxnRequest {
TxnRequest request = 1;
repeated LeaseAttachRequest success = 2;
repeated LeaseAttachRequest failure = 3;
}
message LeaseTnxResponse {
optional ResponseHeader header = 1;
optional TnxResponse response = 2;
message LeaseTxnResponse {
ResponseHeader header = 1;
TxnResponse response = 2;
repeated LeaseAttachResponse attach_responses = 3;
}
message LeaseAttachRequest {
optional int64 lease_id = 1;
optional bytes key = 2;
int64 lease_id = 1;
bytes key = 2;
}
message LeaseAttachResponse {
optional ResponseHeader header = 1;
ResponseHeader header = 1;
}
message LeaseKeepAliveRequest {
optional int64 lease_id = 1;
int64 lease_id = 1;
}
message LeaseKeepAliveResponse {
optional ResponseHeader header = 1;
optional int64 lease_id = 2;
optional int64 ttl = 3;
ResponseHeader header = 1;
int64 lease_id = 2;
int64 ttl = 3;
}

View File

@@ -4,6 +4,8 @@ etcd comes with support for incremental runtime reconfiguration, which allows us
Reconfiguration requests can only be processed when the the majority of the cluster members are functioning. It is **highly recommended** to always have a cluster size greater than two in production. It is unsafe to remove a member from a two member cluster. The majority of a two member cluster is also two. If there is a failure during the removal process, the cluster might not able to make progress and need to [restart from majority failure][majority failure].
To better understand the design behind runtime reconfiguration, we suggest you read [this](runtime-reconf-design.md).
[majority failure]: #restart-cluster-from-majority-failure
## Reconfiguration Use Cases
@@ -37,7 +39,7 @@ To replace the machine, follow the instructions for [removing the member][remove
### Restart Cluster from Majority Failure
If the majority of your cluster is lost, then you need to take manual action in order to recover safely.
If the majority of your cluster is lost or all of your nodes have changed IP addresses, then you need to take manual action in order to recover safely.
The basic steps in the recovery process include [creating a new cluster using the old data][disaster recovery], forcing a single member to act as the leader, and finally using runtime configuration to [add new members][add member] to this new cluster one at a time.
[add member]: #add-a-new-member
@@ -52,28 +54,38 @@ This is essentially the same requirement as for any other write to etcd.
All changes to the cluster are done one at a time:
To replace a single member you will make an add then a remove operation
To increase from 3 to 5 members you will make two add operations
To decrease from 5 to 3 you will make two remove operations
* To update a single member peerURLs you will make an update operation
* To replace a single member you will make an add then a remove operation
* To increase from 3 to 5 members you will make two add operations
* To decrease from 5 to 3 you will make two remove operations
All of these examples will use the `etcdctl` command line tool that ships with etcd.
If you want to use the member API directly you can find the documentation [here](other_apis.md).
### Remove a Member
### Update a Member
First, we need to find the target member's ID. You can list all members with `etcdctl`:
If you would like to update a member IP address (peerURLs), first, we need to find the target member's ID. You can list all members with `etcdctl`:
```
```sh
$ etcdctl member list
6e3bd23ae5f1eae0: name=node2 peerURLs=http://localhost:7002 clientURLs=http://127.0.0.1:4002
924e2e83e93f2560: name=node3 peerURLs=http://localhost:7003 clientURLs=http://127.0.0.1:4003
a8266ecf031671f3: name=node1 peerURLs=http://localhost:7001 clientURLs=http://127.0.0.1:4001
6e3bd23ae5f1eae0: name=node2 peerURLs=http://localhost:23802 clientURLs=http://127.0.0.1:23792
924e2e83e93f2560: name=node3 peerURLs=http://localhost:23803 clientURLs=http://127.0.0.1:23793
a8266ecf031671f3: name=node1 peerURLs=http://localhost:23801 clientURLs=http://127.0.0.1:23791
```
In this example let's `update` a8266ecf031671f3 member ID and change its peerURLs value to http://10.0.1.10:2380
```sh
$ etcdctl member update a8266ecf031671f3 http://10.0.1.10:2380
Updated member with ID a8266ecf031671f3 in cluster
```
### Remove a Member
Let us say the member ID we want to remove is a8266ecf031671f3.
We then use the `remove` command to perform the removal:
```
```sh
$ etcdctl member remove a8266ecf031671f3
Removed member a8266ecf031671f3 from cluster
```
@@ -95,7 +107,7 @@ Adding a member is a two step process:
Using `etcdctl` let's add the new member to the cluster by specifying its [name](configuration.md#-name) and [advertised peer URLs](configuration.md#-initial-advertise-peer-urls):
```
```sh
$ etcdctl member add infra3 http://10.0.1.13:2380
added member 9bf1b35fc7761a23 to cluster
@@ -107,11 +119,11 @@ ETCD_INITIAL_CLUSTER_STATE=existing
`etcdctl` has informed the cluster about the new member and printed out the environment variables needed to successfully start it.
Now start the new etcd process with the relevant flags for the new member:
```
```sh
$ export ETCD_NAME="infra3"
$ export ETCD_INITIAL_CLUSTER="infra0=http://10.0.1.10:2380,infra1=http://10.0.1.11:2380,infra2=http://10.0.1.12:2380,infra3=http://10.0.1.13:2380"
$ export ETCD_INITIAL_CLUSTER_STATE=existing
$ etcd -listen-client-urls http://10.0.1.13:2379 -advertise-client-urls http://10.0.1.13:2379 -listen-peer-urls http://10.0.1.13:2380 -initial-advertise-peer-urls http://10.0.1.13:2380
$ etcd -listen-client-urls http://10.0.1.13:2379 -advertise-client-urls http://10.0.1.13:2379 -listen-peer-urls http://10.0.1.13:2380 -initial-advertise-peer-urls http://10.0.1.13:2380 -data-dir %data_dir%
```
The new member will run as a part of the cluster and immediately begin catching up with the rest of the cluster.
@@ -124,7 +136,7 @@ If you add a new member to a 1-node cluster, the cluster cannot make progress be
In the following case we have not included our new host in the list of enumerated nodes.
If this is a new cluster, the node must be added to the list of initial cluster members.
```
```sh
$ etcd -name infra3 \
-initial-cluster infra0=http://10.0.1.10:2380,infra1=http://10.0.1.11:2380,infra2=http://10.0.1.12:2380 \
-initial-cluster-state existing
@@ -134,7 +146,7 @@ exit 1
In this case we give a different address (10.0.1.14:2380) to the one that we used to join the cluster (10.0.1.13:2380).
```
```sh
$ etcd -name infra4 \
-initial-cluster infra0=http://10.0.1.10:2380,infra1=http://10.0.1.11:2380,infra2=http://10.0.1.12:2380,infra4=http://10.0.1.14:2380 \
-initial-cluster-state existing
@@ -144,7 +156,7 @@ exit 1
When we start etcd using the data directory of a removed member, etcd will exit automatically if it connects to any alive member in the cluster:
```
```sh
$ etcd
etcd: this member has been permanently removed from the cluster. Exiting.
exit 1

View File

@@ -0,0 +1,47 @@
### Design of Runtime Reconfiguration
Runtime reconfiguration is one of the hardest and most error prone features in a distributed system, especially in a consensus based system like etcd.
Read on to learn about the design of etcd's runtime reconfiguration commands and how we tackled these problems.
### Two Phase Config Changes Keep you Safe
In etcd, every runtime reconfiguration has to go through [two phases](Documentation/runtime-configuration.md#add-a-new-member) for safety reasons. For example, to add a member you need to first inform cluster of new configuration and then start the new member.
Phase 1 - Inform cluster of new configuration
To add a member into etcd cluster, you need to make an API call to request a new member to be added to the cluster. And this is only way that you can add a new member into an existing cluster. The API call returns when the cluster agrees on the configuration change.
Phase 2 - Start new member
To join the etcd member into the existing cluster, you need to specify the correct `initial-cluster` and set `initial-cluster-state` to `existing`. When the member starts, it will contact the existing cluster first and verify the current cluster configuration matches the expected one specified in `initial-cluster`. When the new member successfully starts, you know your cluster reached the expected configuration.
By splitting the process into two discrete phases users are forced to be explicit regarding cluster membership changes. This actually gives users more flexibility and makes things easier to reason about. For example, if there is an attempt to add a new member with the same ID as an existing member in an etcd cluster, the action will fail immediately during phase one without impacting the running cluster. Similar protection is provided to prevent adding new members by mistake. If a new etcd member attempts to join the cluster before the cluster has accepted the configuration change,, it will not be accepted by the cluster.
Without the explicit workflow around cluster membership etcd would be vulnerable to unexpected cluster membership changes. For example, if etcd is running under an init system such as systemd, etcd would be restarted after being removed via the membership API, and attempt to rejoin the cluster on startup. This cycle would continue every time a member is removed via the API and systemd is set to restart etcd after failing, which is unexpected.
We think runtime reconfiguration should be a low frequent operation. We made the decision to keep it explicit and user-driven to ensure configuration safety and keep your cluster always running smoothly under your control.
### Permanent Loss of Quorum Requires New Cluster
If a cluster permanently loses a majority of its members, a new cluster will need to be started from an old data directory to recover the previous state.
It is entirely possible to force removing the failed members from the existing cluster to recover. However, we decided not to support this method since it bypasses the normal consensus committing phase, which is unsafe. If the member to remove is not actually dead or you force to remove different members through different members in the same cluster, you will end up with diverged cluster with same clusterID. This is very dangerous and hard to debug/fix afterwards.
If you have a correct deployment, the possibility of permanent majority lose is very low. But it is a severe enough problem that worth special care. We strongly suggest you to read the [disaster recovery documentation](admin_guide.md#disaster-recovery) and prepare for permanent majority lose before you put etcd into production.
### Do Not Use Public Discovery Service For Runtime Reconfiguration
The public discovery service should only be used for bootstrapping a cluster. To join member into an existing cluster, you should use runtime reconfiguration API.
Discovery service is designed for bootstrapping an etcd cluster in the cloud environment, when you do not know the IP addresses of all the members beforehand. After you successfully bootstrap a cluster, the IP addresses of all the members are known. Technically, you should not need the discovery service any more.
It seems that using public discovery service is a convenient way to do runtime reconfiguration, after all discovery service already has all the cluster configuration information. However relying on public discovery service brings troubles:
1. it introduces a external dependencies for the entire life-cycle of your cluster, not just bootstrap time. If there is a network issue between your cluster and public discover service, your cluster will suffer from it.
2. public discovery service must reflect correct runtime configuration of your cluster during it life-cycle. It has to provide security mechanism to avoid bad actions, and it is hard.
3. public discovery service has to keep tens of thousands of cluster configurations. Our public discovery service backend is not ready for that workload.
If you want to have a discovery service that supports runtime reconfiguration, the best choice is to build your private one.

View File

@@ -4,7 +4,7 @@ etcd supports SSL/TLS as well as authentication through client certificates, bot
To get up and running you first need to have a CA certificate and a signed key pair for one member. It is recommended to create and sign a new key pair for every member in a cluster.
For convenience the [etcd-ca](https://github.com/coreos/etcd-ca) tool provides an easy interface to certificate generation, alternatively this site provides a good reference on how to generate self-signed key pairs:
For convenience the [cfssl](https://github.com/cloudflare/cfssl) tool provides an easy interface to certificate generation, and we provide a full example using the tool at [here](../hack/tls-setup). Alternatively this site provides a good reference on how to generate self-signed key pairs:
http://www.g-loaded.eu/2005/11/10/be-your-own-ca/

View File

@@ -10,7 +10,7 @@ The network isn't the only source of latency. Each request and response may be i
The underlying distributed consensus protocol relies on two separate time parameters to ensure that nodes can handoff leadership if one stalls or goes offline.
The first parameter is called the *Heartbeat Interval*.
This is the frequency with which the leader will notify followers that it is still the leader.
etcd batches commands together for higher throughput so this heartbeat interval is also a delay for how long it takes for commands to be committed.
For best pratices, the parameter should be set around round-trip time between members.
By default, etcd uses a `100ms` heartbeat interval.
The second parameter is the *Election Timeout*.
@@ -18,18 +18,22 @@ This timeout is how long a follower node will go without hearing a heartbeat bef
By default, etcd uses a `1000ms` election timeout.
Adjusting these values is a trade off.
Lowering the heartbeat interval will cause individual commands to be committed faster but it will lower the overall throughput of etcd.
If your etcd instances have low utilization then lowering the heartbeat interval can improve your command response time.
The value of heartbeat interval is recommended to be around the maximum of average round-trip time (RTT) between members, normally around 0.5-1.5x the round-trip time.
If heartbeat interval is too low, etcd will send unnecessary messages that increase the usage of CPU and network resources.
On the other side, a too high heartbeat interval leads to high election timeout. Higher election timeout takes longer time to detect a leader failure.
The easiest way to measure round-trip time (RTT) is to use [PING utility](https://en.wikipedia.org/wiki/Ping_(networking_utility)).
The election timeout should be set based on the heartbeat interval and your network ping time between nodes.
Election timeouts should be at least 10 times your ping time so it can account for variance in your network.
For example, if the ping time between your nodes is 10ms then you should have at least a 100ms election timeout.
The election timeout should be set based on the heartbeat interval and average round-trip time between members.
Election timeouts must be at least 10 times the round-trip time so it can account for variance in your network.
For example, if the round-trip time between your members is 10ms then you should have at least a 100ms election timeout.
The upper limit of election timeout is 50000ms, which should only be used when deploying global etcd cluster. First, 5s is the upper limit of average global round-trip time. A reasonable round-trip time for the continental united states is 130ms, and the time between US and japan is around 350-400ms. Because package gets delayed a lot, and network situation may be terrible, 5s is a safe value for it. Then, because election timeout should be an order of magnitude bigger than broadcast time, 50s becomes its maximum.
You should also set your election timeout to at least 5 to 10 times your heartbeat interval to account for variance in leader replication.
For a heartbeat interval of 50ms you should set your election timeout to at least 250ms - 500ms.
The heartbeat interval and election timeout value should be the same for all members in one cluster. Setting different values for etcd members may disrupt cluster stability.
You can override the default values on the command line:
```sh

View File

@@ -0,0 +1,128 @@
## Upgrade etcd from 2.1 to 2.2
In the general case, upgrading from etcd 2.1 to 2.2 can be a zero-downtime, rolling upgrade:
- one by one, stop the etcd v2.1 processes and replace them with etcd v2.2 processes
- after you are running all v2.2 processes, new features in v2.2 are available to the cluster
Before [starting an upgrade](#upgrade-procedure), read through the rest of this guide to prepare.
### Upgrade Checklists
#### Upgrade Requirement
To upgrade an existing etcd deployment to 2.2, you must be running 2.1. If youre running a version of etcd before 2.1, you must upgrade to [2.1](https://github.com/coreos/etcd/releases/tag/v2.1.2) before upgrading to 2.2.
Also, to ensure a smooth rolling upgrade, your running cluster must be healthy. You can check the health of the cluster by using `etcdctl cluster-health` command.
#### Preparedness
Before upgrading etcd, always test the services relying on etcd in a staging environment before deploying the upgrade to the production environment.
You might also want to [backup your data directory](admin_guide.md#backing-up-the-datastore) for a potential [downgrade](#downgrade).
#### Mixed Versions
While upgrading, an etcd cluster supports mixed versions of etcd members. The cluster is only considered upgraded once all its members are upgraded to 2.2.
Internally, etcd members negotiate with each other to determine the overall etcd cluster version, which controls the reported cluster version and the supported features.
#### Limitations
If you have a data size larger than 100MB you should contact us before upgrading, so we can make sure the upgrades work smoothly.
Every etcd 2.2 member will do health checking across the cluster periodically. etcd 2.1 member does not support health checking. During the upgrade, etcd 2.2 member will log warning about the unhealthy state of etcd 2.1 member. You can ignore the warning.
#### Downgrade
If all members have been upgraded to v2.2, the cluster will be upgraded to v2.2, and downgrade is **not possible**. If any member is still v2.1, the cluster will remain in v2.1, and you can go back to use v2.1 binary.
Please [backup your data directory](admin_guide.md#backing-up-the-datastore) of all etcd members if you want to downgrade the cluster, even if it is upgraded.
### Upgrade Procedure
In the example, we upgrade a three member v2.1 cluster running on local machine.
#### 1. Check upgrade requirements.
```
$ etcdctl cluster-health
member 6e3bd23ae5f1eae0 is healthy: got healthy result from http://localhost:22379
member 924e2e83e93f2560 is healthy: got healthy result from http://localhost:32379
member a8266ecf031671f3 is healthy: got healthy result from http://localhost:12379
cluster is healthy
$ curl http://localhost:4001/version
{"etcdserver":"2.1.x","etcdcluster":"2.1.0"}
```
#### 2. Stop the existing etcd process
You will see similar error logging from other etcd processes in your cluster. This is normal, since you just shut down a member and the connection is broken.
```
2015/09/2 09:48:35 etcdserver: failed to reach the peerURL(http://localhost:12380) of member a8266ecf031671f3 (Get http://localhost:12380/version: dial tcp [::1]:12380: getsockopt: connection refused)
2015/09/2 09:48:35 etcdserver: cannot get the version of member a8266ecf031671f3 (Get http://localhost:12380/version: dial tcp [::1]:12380: getsockopt: connection refused)
2015/09/2 09:48:35 rafthttp: failed to write a8266ecf031671f3 on stream Message (write tcp 127.0.0.1:32380->127.0.0.1:64394: write: broken pipe)
2015/09/2 09:48:35 rafthttp: failed to write a8266ecf031671f3 on pipeline (dial tcp [::1]:12380: getsockopt: connection refused)
2015/09/2 09:48:40 etcdserver: failed to reach the peerURL(http://localhost:7001) of member a8266ecf031671f3 (Get http://localhost:7001/version: dial tcp [::1]:12380: getsockopt: connection refused)
2015/09/2 09:48:40 etcdserver: cannot get the version of member a8266ecf031671f3 (Get http://localhost:12380/version: dial tcp [::1]:12380: getsockopt: connection refused)
2015/09/2 09:48:40 rafthttp: failed to heartbeat a8266ecf031671f3 on stream MsgApp v2 (write tcp 127.0.0.1:32380->127.0.0.1:64393: write: broken pipe)
```
You will see logging output like this from ungraded member due to a mixed version cluster. You can ignore this while upgrading.
```
2015/09/2 09:48:45 etcdserver: the etcd version 2.1.2+git is not up-to-date
2015/09/2 09:48:45 etcdserver: member a8266ecf031671f3 has a higher version &{2.2.0-rc.0+git 2.1.0}
```
You will also see logging output like this from the newly upgraded member, since etcd 2.1 member does not support health checking. You can ignore this while upgrading.
```
2015-09-02 09:55:42.691384 W | rafthttp: the connection to peer 6e3bd23ae5f1eae0 is unhealthy
2015-09-02 09:55:42.705626 W | rafthttp: the connection to peer 924e2e83e93f2560 is unhealthy
```
You could [backup your data directory](https://github.com/coreos/etcd/blob/7f7e2cc79d9c5c342a6eb1e48c386b0223cf934e/Documentation/admin_guide.md#backing-up-the-datastore) for data safety.
```
$ etcdctl backup \
--data-dir /var/lib/etcd \
--backup-dir /tmp/etcd_backup
```
#### 3. Drop-in etcd v2.2 binary and start the new etcd process
Now, you can start the etcd v2.2 binary with the previous configuration.
You will see the etcd start and publish its information to the cluster.
```
2015-09-02 09:56:46.117609 I | etcdserver: published {Name:infra2 ClientURLs:[http://localhost:22380]} to cluster e9c7614f68f35fb2
```
You could verify the cluster becomes healthy.
```
$ etcdctl cluster-health
member 6e3bd23ae5f1eae0 is healthy: got healthy result from http://localhost:22379
member 924e2e83e93f2560 is healthy: got healthy result from http://localhost:32379
member a8266ecf031671f3 is healthy: got healthy result from http://localhost:12379
cluster is healthy
```
#### 4. Repeat step 2 to step 3 for all other members
#### 5. Finish
When all members are upgraded, you will see the cluster is upgraded to 2.2 successfully:
```
2015-09-02 09:56:54.896848 N | etcdserver: updated the cluster version from 2.1 to 2.2
```
```
$ curl http://127.0.0.1:4001/version
{"etcdserver":"2.2.x","etcdcluster":"2.2.0"}
```

136
Godeps/Godeps.json generated
View File

@@ -1,6 +1,6 @@
{
"ImportPath": "github.com/coreos/etcd",
"GoVersion": "go1.4.1",
"GoVersion": "go1.5.1",
"Packages": [
"./..."
],
@@ -10,44 +10,68 @@
"Comment": "null-5",
"Rev": "75cd24fc2f2c2a2088577d12123ddee5f54e0675"
},
{
"ImportPath": "github.com/akrennmair/gopcap",
"Rev": "00e11033259acb75598ba416495bb708d864a010"
},
{
"ImportPath": "github.com/beorn7/perks/quantile",
"Rev": "b965b613227fddccbfffe13eae360ed3fa822f8d"
},
{
"ImportPath": "github.com/bgentry/speakeasy",
"Rev": "5dfe43257d1f86b96484e760f2f0c4e2559089c7"
"Rev": "36e9cfdd690967f4f690c6edcc9ffacd006014a0"
},
{
"ImportPath": "github.com/boltdb/bolt",
"Comment": "v1.0-71-g71f28ea",
"Rev": "71f28eaecbebd00604d87bb1de0dae8fcfa54bbd"
"Comment": "v1.1.0-19-g0b00eff",
"Rev": "0b00effdd7a8270ebd91c24297e51643e370dd52"
},
{
"ImportPath": "github.com/bradfitz/http2",
"Rev": "3e36af6d3af0e56fa3da71099f864933dea3d9fb"
"ImportPath": "github.com/cheggaaa/pb",
"Rev": "da1f27ad1d9509b16f65f52fd9d8138b0f2dc7b2"
},
{
"ImportPath": "github.com/codegangsta/cli",
"Comment": "1.2.0-26-gf7ebb76",
"Rev": "f7ebb761e83e21225d1d8954fde853bf8edd46c4"
"Comment": "1.2.0-183-gb5232bb",
"Rev": "b5232bb2934f606f9f27a1305f1eea224e8e8b88"
},
{
"ImportPath": "github.com/coreos/go-etcd/etcd",
"Comment": "v2.0.0-13-g4cceaf7",
"Rev": "4cceaf7283b76f27c4a732b20730dcdb61053bf5"
"ImportPath": "github.com/coreos/gexpect",
"Rev": "5173270e159f5aa8fbc999dc7e3dcb50f4098a69"
},
{
"ImportPath": "github.com/coreos/go-semver/semver",
"Rev": "568e959cd89871e61434c1143528d9162da89ef2"
},
{
"ImportPath": "github.com/coreos/go-systemd/daemon",
"Comment": "v3-6-gcea488b",
"Rev": "cea488b4e6855fee89b6c22a811e3c5baca861b6"
},
{
"ImportPath": "github.com/coreos/go-systemd/journal",
"Comment": "v3-6-gcea488b",
"Rev": "cea488b4e6855fee89b6c22a811e3c5baca861b6"
},
{
"ImportPath": "github.com/coreos/go-systemd/util",
"Comment": "v3-6-gcea488b",
"Rev": "cea488b4e6855fee89b6c22a811e3c5baca861b6"
},
{
"ImportPath": "github.com/coreos/pkg/capnslog",
"Rev": "99f6e6b8f8ea30b0f82769c1411691c44a66d015"
"Rev": "2c77715c4df99b5420ffcae14ead08f52104065d"
},
{
"ImportPath": "github.com/cpuguy83/go-md2man/md2man",
"Comment": "v1.0.4",
"Rev": "71acacd42f85e5e82f70a55327789582a5200a90"
},
{
"ImportPath": "github.com/gogo/protobuf/proto",
"Rev": "64f27bf06efee53589314a6e5a4af34cdd85adf6"
"Comment": "v0.1-118-ge8904f5",
"Rev": "e8904f58e872a473a5b91bc9bf3377d223555263"
},
{
"ImportPath": "github.com/golang/glog",
@@ -55,43 +79,79 @@
},
{
"ImportPath": "github.com/golang/protobuf/proto",
"Rev": "5677a0e3d5e89854c9974e1256839ee23f8233ca"
"Rev": "6aaa8d47701fa6cf07e914ec01fde3d4a1fe79c3"
},
{
"ImportPath": "github.com/google/btree",
"Rev": "cc6329d4279e3f025a53a83c397d2339b5705c45"
},
{
"ImportPath": "github.com/inconshreveable/mousetrap",
"Rev": "76626ae9c91c4f2a10f34cad8ce83ea42c93bb75"
},
{
"ImportPath": "github.com/jonboulle/clockwork",
"Rev": "72f9bd7c4e0c2a40055ab3d0f09654f730cce982"
},
{
"ImportPath": "github.com/kballard/go-shellquote",
"Rev": "d8ec1a69a250a17bb0e419c386eac1f3711dc142"
},
{
"ImportPath": "github.com/kr/pty",
"Comment": "release.r56-29-gf7ee69f",
"Rev": "f7ee69f31298ecbe5d2b349c711e2547a617d398"
},
{
"ImportPath": "github.com/matttproud/golang_protobuf_extensions/pbutil",
"Rev": "fc2b8d3a73c4867e51861bbdd5ae3c1f0869dd6a"
},
{
"ImportPath": "github.com/prometheus/client_golang/model",
"Comment": "0.5.0-10-ga842dc1",
"Rev": "a842dc11e0621c34a71cab634d1d0190a59802a8"
"ImportPath": "github.com/olekukonko/ts",
"Rev": "ecf753e7c962639ab5a1fb46f7da627d4c0a04b8"
},
{
"ImportPath": "github.com/prometheus/client_golang/prometheus",
"Comment": "0.5.0-10-ga842dc1",
"Rev": "a842dc11e0621c34a71cab634d1d0190a59802a8"
},
{
"ImportPath": "github.com/prometheus/client_golang/text",
"Comment": "0.5.0-10-ga842dc1",
"Rev": "a842dc11e0621c34a71cab634d1d0190a59802a8"
"Comment": "0.7.0-52-ge51041b",
"Rev": "e51041b3fa41cece0dca035740ba6411905be473"
},
{
"ImportPath": "github.com/prometheus/client_model/go",
"Comment": "model-0.0.2-12-gfa8ad6f",
"Rev": "fa8ad6fec33561be4280a8f0514318c79d7f6cb6"
},
{
"ImportPath": "github.com/prometheus/common/expfmt",
"Rev": "ffe929a3f4c4faeaa10f2b9535c2b1be3ad15650"
},
{
"ImportPath": "github.com/prometheus/common/model",
"Rev": "ffe929a3f4c4faeaa10f2b9535c2b1be3ad15650"
},
{
"ImportPath": "github.com/prometheus/procfs",
"Rev": "ee2372b58cee877abe07cde670d04d3b3bac5ee6"
"Rev": "454a56f35412459b5e684fd5ec0f9211b94f002a"
},
{
"ImportPath": "github.com/russross/blackfriday",
"Comment": "v1.4-2-g300106c",
"Rev": "300106c228d52c8941d4b3de6054a6062a86dda3"
},
{
"ImportPath": "github.com/shurcooL/sanitized_anchor_name",
"Rev": "10ef21a441db47d8b13ebcc5fd2310f636973c77"
},
{
"ImportPath": "github.com/spacejam/loghisto",
"Rev": "323309774dec8b7430187e46cd0793974ccca04a"
},
{
"ImportPath": "github.com/spf13/cobra",
"Rev": "1c44ec8d3f1552cac48999f9306da23c4d8a288b"
},
{
"ImportPath": "github.com/spf13/pflag",
"Rev": "08b1a584251b5b62f458943640fc8ebd4d50aaa5"
},
{
"ImportPath": "github.com/stretchr/testify/assert",
@@ -99,7 +159,11 @@
},
{
"ImportPath": "github.com/ugorji/go/codec",
"Rev": "821cda7e48749cacf7cad2c6ed01e96457ca7e9d"
"Rev": "f1f1a805ed361a0e078bb537e4ea78cd37dcf065"
},
{
"ImportPath": "github.com/xiang90/probing",
"Rev": "6a0cc1ae81b4cc11db5e491e030e4b98fba79c19"
},
{
"ImportPath": "golang.org/x/crypto/bcrypt",
@@ -111,23 +175,27 @@
},
{
"ImportPath": "golang.org/x/net/context",
"Rev": "7dbad50ab5b31073856416cdcfeb2796d682f844"
"Rev": "04b9de9b512f58addf28c9853d50ebef61c3953e"
},
{
"ImportPath": "golang.org/x/oauth2",
"Rev": "3046bc76d6dfd7d3707f6640f85e42d9c4050f50"
"ImportPath": "golang.org/x/net/http2",
"Rev": "04b9de9b512f58addf28c9853d50ebef61c3953e"
},
{
"ImportPath": "google.golang.org/cloud/compute/metadata",
"Rev": "f20d6dcccb44ed49de45ae3703312cb46e627db1"
"ImportPath": "golang.org/x/net/internal/timeseries",
"Rev": "04b9de9b512f58addf28c9853d50ebef61c3953e"
},
{
"ImportPath": "google.golang.org/cloud/internal",
"Rev": "f20d6dcccb44ed49de45ae3703312cb46e627db1"
"ImportPath": "golang.org/x/net/trace",
"Rev": "04b9de9b512f58addf28c9853d50ebef61c3953e"
},
{
"ImportPath": "golang.org/x/sys/unix",
"Rev": "9c60d1c508f5134d1ca726b4641db998f2523357"
},
{
"ImportPath": "google.golang.org/grpc",
"Rev": "f5ebd86be717593ab029545492c93ddf8914832b"
"Rev": "e29d659177655e589850ba7d3d83f7ce12ef23dd"
}
]
}

View File

@@ -0,0 +1,5 @@
#*
*~
/tools/pass/pass
/tools/pcaptest/pcaptest
/tools/tcpdump/tcpdump

View File

@@ -0,0 +1,27 @@
Copyright (c) 2009-2011 Andreas Krennmair. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Andreas Krennmair nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View File

@@ -0,0 +1,11 @@
# PCAP
This is a simple wrapper around libpcap for Go. Originally written by Andreas
Krennmair <ak@synflood.at> and only minorly touched up by Mark Smith <mark@qq.is>.
Please see the included pcaptest.go and tcpdump.go programs for instructions on
how to use this library.
Miek Gieben <miek@miek.nl> has created a more Go-like package and replaced functionality
with standard functions from the standard library. The package has also been renamed to
pcap.

View File

@@ -0,0 +1,527 @@
package pcap
import (
"encoding/binary"
"fmt"
"net"
"reflect"
"strings"
)
const (
TYPE_IP = 0x0800
TYPE_ARP = 0x0806
TYPE_IP6 = 0x86DD
TYPE_VLAN = 0x8100
IP_ICMP = 1
IP_INIP = 4
IP_TCP = 6
IP_UDP = 17
)
const (
ERRBUF_SIZE = 256
// According to pcap-linktype(7).
LINKTYPE_NULL = 0
LINKTYPE_ETHERNET = 1
LINKTYPE_TOKEN_RING = 6
LINKTYPE_ARCNET = 7
LINKTYPE_SLIP = 8
LINKTYPE_PPP = 9
LINKTYPE_FDDI = 10
LINKTYPE_ATM_RFC1483 = 100
LINKTYPE_RAW = 101
LINKTYPE_PPP_HDLC = 50
LINKTYPE_PPP_ETHER = 51
LINKTYPE_C_HDLC = 104
LINKTYPE_IEEE802_11 = 105
LINKTYPE_FRELAY = 107
LINKTYPE_LOOP = 108
LINKTYPE_LINUX_SLL = 113
LINKTYPE_LTALK = 104
LINKTYPE_PFLOG = 117
LINKTYPE_PRISM_HEADER = 119
LINKTYPE_IP_OVER_FC = 122
LINKTYPE_SUNATM = 123
LINKTYPE_IEEE802_11_RADIO = 127
LINKTYPE_ARCNET_LINUX = 129
LINKTYPE_LINUX_IRDA = 144
LINKTYPE_LINUX_LAPD = 177
)
type addrHdr interface {
SrcAddr() string
DestAddr() string
Len() int
}
type addrStringer interface {
String(addr addrHdr) string
}
func decodemac(pkt []byte) uint64 {
mac := uint64(0)
for i := uint(0); i < 6; i++ {
mac = (mac << 8) + uint64(pkt[i])
}
return mac
}
// Decode decodes the headers of a Packet.
func (p *Packet) Decode() {
if len(p.Data) <= 14 {
return
}
p.Type = int(binary.BigEndian.Uint16(p.Data[12:14]))
p.DestMac = decodemac(p.Data[0:6])
p.SrcMac = decodemac(p.Data[6:12])
if len(p.Data) >= 15 {
p.Payload = p.Data[14:]
}
switch p.Type {
case TYPE_IP:
p.decodeIp()
case TYPE_IP6:
p.decodeIp6()
case TYPE_ARP:
p.decodeArp()
case TYPE_VLAN:
p.decodeVlan()
}
}
func (p *Packet) headerString(headers []interface{}) string {
// If there's just one header, return that.
if len(headers) == 1 {
if hdr, ok := headers[0].(fmt.Stringer); ok {
return hdr.String()
}
}
// If there are two headers (IPv4/IPv6 -> TCP/UDP/IP..)
if len(headers) == 2 {
// Commonly the first header is an address.
if addr, ok := p.Headers[0].(addrHdr); ok {
if hdr, ok := p.Headers[1].(addrStringer); ok {
return fmt.Sprintf("%s %s", p.Time, hdr.String(addr))
}
}
}
// For IP in IP, we do a recursive call.
if len(headers) >= 2 {
if addr, ok := headers[0].(addrHdr); ok {
if _, ok := headers[1].(addrHdr); ok {
return fmt.Sprintf("%s > %s IP in IP: ",
addr.SrcAddr(), addr.DestAddr(), p.headerString(headers[1:]))
}
}
}
var typeNames []string
for _, hdr := range headers {
typeNames = append(typeNames, reflect.TypeOf(hdr).String())
}
return fmt.Sprintf("unknown [%s]", strings.Join(typeNames, ","))
}
// String prints a one-line representation of the packet header.
// The output is suitable for use in a tcpdump program.
func (p *Packet) String() string {
// If there are no headers, print "unsupported protocol".
if len(p.Headers) == 0 {
return fmt.Sprintf("%s unsupported protocol %d", p.Time, int(p.Type))
}
return fmt.Sprintf("%s %s", p.Time, p.headerString(p.Headers))
}
// Arphdr is a ARP packet header.
type Arphdr struct {
Addrtype uint16
Protocol uint16
HwAddressSize uint8
ProtAddressSize uint8
Operation uint16
SourceHwAddress []byte
SourceProtAddress []byte
DestHwAddress []byte
DestProtAddress []byte
}
func (arp *Arphdr) String() (s string) {
switch arp.Operation {
case 1:
s = "ARP request"
case 2:
s = "ARP Reply"
}
if arp.Addrtype == LINKTYPE_ETHERNET && arp.Protocol == TYPE_IP {
s = fmt.Sprintf("%012x (%s) > %012x (%s)",
decodemac(arp.SourceHwAddress), arp.SourceProtAddress,
decodemac(arp.DestHwAddress), arp.DestProtAddress)
} else {
s = fmt.Sprintf("addrtype = %d protocol = %d", arp.Addrtype, arp.Protocol)
}
return
}
func (p *Packet) decodeArp() {
if len(p.Payload) < 8 {
return
}
pkt := p.Payload
arp := new(Arphdr)
arp.Addrtype = binary.BigEndian.Uint16(pkt[0:2])
arp.Protocol = binary.BigEndian.Uint16(pkt[2:4])
arp.HwAddressSize = pkt[4]
arp.ProtAddressSize = pkt[5]
arp.Operation = binary.BigEndian.Uint16(pkt[6:8])
if len(pkt) < int(8+2*arp.HwAddressSize+2*arp.ProtAddressSize) {
return
}
arp.SourceHwAddress = pkt[8 : 8+arp.HwAddressSize]
arp.SourceProtAddress = pkt[8+arp.HwAddressSize : 8+arp.HwAddressSize+arp.ProtAddressSize]
arp.DestHwAddress = pkt[8+arp.HwAddressSize+arp.ProtAddressSize : 8+2*arp.HwAddressSize+arp.ProtAddressSize]
arp.DestProtAddress = pkt[8+2*arp.HwAddressSize+arp.ProtAddressSize : 8+2*arp.HwAddressSize+2*arp.ProtAddressSize]
p.Headers = append(p.Headers, arp)
if len(pkt) >= int(8+2*arp.HwAddressSize+2*arp.ProtAddressSize) {
p.Payload = p.Payload[8+2*arp.HwAddressSize+2*arp.ProtAddressSize:]
}
}
// IPadr is the header of an IP packet.
type Iphdr struct {
Version uint8
Ihl uint8
Tos uint8
Length uint16
Id uint16
Flags uint8
FragOffset uint16
Ttl uint8
Protocol uint8
Checksum uint16
SrcIp []byte
DestIp []byte
}
func (p *Packet) decodeIp() {
if len(p.Payload) < 20 {
return
}
pkt := p.Payload
ip := new(Iphdr)
ip.Version = uint8(pkt[0]) >> 4
ip.Ihl = uint8(pkt[0]) & 0x0F
ip.Tos = pkt[1]
ip.Length = binary.BigEndian.Uint16(pkt[2:4])
ip.Id = binary.BigEndian.Uint16(pkt[4:6])
flagsfrags := binary.BigEndian.Uint16(pkt[6:8])
ip.Flags = uint8(flagsfrags >> 13)
ip.FragOffset = flagsfrags & 0x1FFF
ip.Ttl = pkt[8]
ip.Protocol = pkt[9]
ip.Checksum = binary.BigEndian.Uint16(pkt[10:12])
ip.SrcIp = pkt[12:16]
ip.DestIp = pkt[16:20]
pEnd := int(ip.Length)
if pEnd > len(pkt) {
pEnd = len(pkt)
}
if len(pkt) >= pEnd && int(ip.Ihl*4) < pEnd {
p.Payload = pkt[ip.Ihl*4 : pEnd]
} else {
p.Payload = []byte{}
}
p.Headers = append(p.Headers, ip)
p.IP = ip
switch ip.Protocol {
case IP_TCP:
p.decodeTcp()
case IP_UDP:
p.decodeUdp()
case IP_ICMP:
p.decodeIcmp()
case IP_INIP:
p.decodeIp()
}
}
func (ip *Iphdr) SrcAddr() string { return net.IP(ip.SrcIp).String() }
func (ip *Iphdr) DestAddr() string { return net.IP(ip.DestIp).String() }
func (ip *Iphdr) Len() int { return int(ip.Length) }
type Vlanhdr struct {
Priority byte
DropEligible bool
VlanIdentifier int
Type int // Not actually part of the vlan header, but the type of the actual packet
}
func (v *Vlanhdr) String() {
fmt.Sprintf("VLAN Priority:%d Drop:%v Tag:%d", v.Priority, v.DropEligible, v.VlanIdentifier)
}
func (p *Packet) decodeVlan() {
pkt := p.Payload
vlan := new(Vlanhdr)
if len(pkt) < 4 {
return
}
vlan.Priority = (pkt[2] & 0xE0) >> 13
vlan.DropEligible = pkt[2]&0x10 != 0
vlan.VlanIdentifier = int(binary.BigEndian.Uint16(pkt[:2])) & 0x0FFF
vlan.Type = int(binary.BigEndian.Uint16(p.Payload[2:4]))
p.Headers = append(p.Headers, vlan)
if len(pkt) >= 5 {
p.Payload = p.Payload[4:]
}
switch vlan.Type {
case TYPE_IP:
p.decodeIp()
case TYPE_IP6:
p.decodeIp6()
case TYPE_ARP:
p.decodeArp()
}
}
type Tcphdr struct {
SrcPort uint16
DestPort uint16
Seq uint32
Ack uint32
DataOffset uint8
Flags uint16
Window uint16
Checksum uint16
Urgent uint16
Data []byte
}
const (
TCP_FIN = 1 << iota
TCP_SYN
TCP_RST
TCP_PSH
TCP_ACK
TCP_URG
TCP_ECE
TCP_CWR
TCP_NS
)
func (p *Packet) decodeTcp() {
if len(p.Payload) < 20 {
return
}
pkt := p.Payload
tcp := new(Tcphdr)
tcp.SrcPort = binary.BigEndian.Uint16(pkt[0:2])
tcp.DestPort = binary.BigEndian.Uint16(pkt[2:4])
tcp.Seq = binary.BigEndian.Uint32(pkt[4:8])
tcp.Ack = binary.BigEndian.Uint32(pkt[8:12])
tcp.DataOffset = (pkt[12] & 0xF0) >> 4
tcp.Flags = binary.BigEndian.Uint16(pkt[12:14]) & 0x1FF
tcp.Window = binary.BigEndian.Uint16(pkt[14:16])
tcp.Checksum = binary.BigEndian.Uint16(pkt[16:18])
tcp.Urgent = binary.BigEndian.Uint16(pkt[18:20])
if len(pkt) >= int(tcp.DataOffset*4) {
p.Payload = pkt[tcp.DataOffset*4:]
}
p.Headers = append(p.Headers, tcp)
p.TCP = tcp
}
func (tcp *Tcphdr) String(hdr addrHdr) string {
return fmt.Sprintf("TCP %s:%d > %s:%d %s SEQ=%d ACK=%d LEN=%d",
hdr.SrcAddr(), int(tcp.SrcPort), hdr.DestAddr(), int(tcp.DestPort),
tcp.FlagsString(), int64(tcp.Seq), int64(tcp.Ack), hdr.Len())
}
func (tcp *Tcphdr) FlagsString() string {
var sflags []string
if 0 != (tcp.Flags & TCP_SYN) {
sflags = append(sflags, "syn")
}
if 0 != (tcp.Flags & TCP_FIN) {
sflags = append(sflags, "fin")
}
if 0 != (tcp.Flags & TCP_ACK) {
sflags = append(sflags, "ack")
}
if 0 != (tcp.Flags & TCP_PSH) {
sflags = append(sflags, "psh")
}
if 0 != (tcp.Flags & TCP_RST) {
sflags = append(sflags, "rst")
}
if 0 != (tcp.Flags & TCP_URG) {
sflags = append(sflags, "urg")
}
if 0 != (tcp.Flags & TCP_NS) {
sflags = append(sflags, "ns")
}
if 0 != (tcp.Flags & TCP_CWR) {
sflags = append(sflags, "cwr")
}
if 0 != (tcp.Flags & TCP_ECE) {
sflags = append(sflags, "ece")
}
return fmt.Sprintf("[%s]", strings.Join(sflags, " "))
}
type Udphdr struct {
SrcPort uint16
DestPort uint16
Length uint16
Checksum uint16
}
func (p *Packet) decodeUdp() {
if len(p.Payload) < 8 {
return
}
pkt := p.Payload
udp := new(Udphdr)
udp.SrcPort = binary.BigEndian.Uint16(pkt[0:2])
udp.DestPort = binary.BigEndian.Uint16(pkt[2:4])
udp.Length = binary.BigEndian.Uint16(pkt[4:6])
udp.Checksum = binary.BigEndian.Uint16(pkt[6:8])
p.Headers = append(p.Headers, udp)
p.UDP = udp
if len(p.Payload) >= 8 {
p.Payload = pkt[8:]
}
}
func (udp *Udphdr) String(hdr addrHdr) string {
return fmt.Sprintf("UDP %s:%d > %s:%d LEN=%d CHKSUM=%d",
hdr.SrcAddr(), int(udp.SrcPort), hdr.DestAddr(), int(udp.DestPort),
int(udp.Length), int(udp.Checksum))
}
type Icmphdr struct {
Type uint8
Code uint8
Checksum uint16
Id uint16
Seq uint16
Data []byte
}
func (p *Packet) decodeIcmp() *Icmphdr {
if len(p.Payload) < 8 {
return nil
}
pkt := p.Payload
icmp := new(Icmphdr)
icmp.Type = pkt[0]
icmp.Code = pkt[1]
icmp.Checksum = binary.BigEndian.Uint16(pkt[2:4])
icmp.Id = binary.BigEndian.Uint16(pkt[4:6])
icmp.Seq = binary.BigEndian.Uint16(pkt[6:8])
p.Payload = pkt[8:]
p.Headers = append(p.Headers, icmp)
return icmp
}
func (icmp *Icmphdr) String(hdr addrHdr) string {
return fmt.Sprintf("ICMP %s > %s Type = %d Code = %d ",
hdr.SrcAddr(), hdr.DestAddr(), icmp.Type, icmp.Code)
}
func (icmp *Icmphdr) TypeString() (result string) {
switch icmp.Type {
case 0:
result = fmt.Sprintf("Echo reply seq=%d", icmp.Seq)
case 3:
switch icmp.Code {
case 0:
result = "Network unreachable"
case 1:
result = "Host unreachable"
case 2:
result = "Protocol unreachable"
case 3:
result = "Port unreachable"
default:
result = "Destination unreachable"
}
case 8:
result = fmt.Sprintf("Echo request seq=%d", icmp.Seq)
case 30:
result = "Traceroute"
}
return
}
type Ip6hdr struct {
// http://www.networksorcery.com/enp/protocol/ipv6.htm
Version uint8 // 4 bits
TrafficClass uint8 // 8 bits
FlowLabel uint32 // 20 bits
Length uint16 // 16 bits
NextHeader uint8 // 8 bits, same as Protocol in Iphdr
HopLimit uint8 // 8 bits
SrcIp []byte // 16 bytes
DestIp []byte // 16 bytes
}
func (p *Packet) decodeIp6() {
if len(p.Payload) < 40 {
return
}
pkt := p.Payload
ip6 := new(Ip6hdr)
ip6.Version = uint8(pkt[0]) >> 4
ip6.TrafficClass = uint8((binary.BigEndian.Uint16(pkt[0:2]) >> 4) & 0x00FF)
ip6.FlowLabel = binary.BigEndian.Uint32(pkt[0:4]) & 0x000FFFFF
ip6.Length = binary.BigEndian.Uint16(pkt[4:6])
ip6.NextHeader = pkt[6]
ip6.HopLimit = pkt[7]
ip6.SrcIp = pkt[8:24]
ip6.DestIp = pkt[24:40]
if len(p.Payload) >= 40 {
p.Payload = pkt[40:]
}
p.Headers = append(p.Headers, ip6)
switch ip6.NextHeader {
case IP_TCP:
p.decodeTcp()
case IP_UDP:
p.decodeUdp()
case IP_ICMP:
p.decodeIcmp()
case IP_INIP:
p.decodeIp()
}
}
func (ip6 *Ip6hdr) SrcAddr() string { return net.IP(ip6.SrcIp).String() }
func (ip6 *Ip6hdr) DestAddr() string { return net.IP(ip6.DestIp).String() }
func (ip6 *Ip6hdr) Len() int { return int(ip6.Length) }

View File

@@ -0,0 +1,247 @@
package pcap
import (
"bytes"
"testing"
"time"
)
var testSimpleTcpPacket *Packet = &Packet{
Data: []byte{
0x00, 0x00, 0x0c, 0x9f, 0xf0, 0x20, 0xbc, 0x30, 0x5b, 0xe8, 0xd3, 0x49,
0x08, 0x00, 0x45, 0x00, 0x01, 0xa4, 0x39, 0xdf, 0x40, 0x00, 0x40, 0x06,
0x55, 0x5a, 0xac, 0x11, 0x51, 0x49, 0xad, 0xde, 0xfe, 0xe1, 0xc5, 0xf7,
0x00, 0x50, 0xc5, 0x7e, 0x0e, 0x48, 0x49, 0x07, 0x42, 0x32, 0x80, 0x18,
0x00, 0x73, 0xab, 0xb1, 0x00, 0x00, 0x01, 0x01, 0x08, 0x0a, 0x03, 0x77,
0x37, 0x9c, 0x42, 0x77, 0x5e, 0x3a, 0x47, 0x45, 0x54, 0x20, 0x2f, 0x20,
0x48, 0x54, 0x54, 0x50, 0x2f, 0x31, 0x2e, 0x31, 0x0d, 0x0a, 0x48, 0x6f,
0x73, 0x74, 0x3a, 0x20, 0x77, 0x77, 0x77, 0x2e, 0x66, 0x69, 0x73, 0x68,
0x2e, 0x63, 0x6f, 0x6d, 0x0d, 0x0a, 0x43, 0x6f, 0x6e, 0x6e, 0x65, 0x63,
0x74, 0x69, 0x6f, 0x6e, 0x3a, 0x20, 0x6b, 0x65, 0x65, 0x70, 0x2d, 0x61,
0x6c, 0x69, 0x76, 0x65, 0x0d, 0x0a, 0x55, 0x73, 0x65, 0x72, 0x2d, 0x41,
0x67, 0x65, 0x6e, 0x74, 0x3a, 0x20, 0x4d, 0x6f, 0x7a, 0x69, 0x6c, 0x6c,
0x61, 0x2f, 0x35, 0x2e, 0x30, 0x20, 0x28, 0x58, 0x31, 0x31, 0x3b, 0x20,
0x4c, 0x69, 0x6e, 0x75, 0x78, 0x20, 0x78, 0x38, 0x36, 0x5f, 0x36, 0x34,
0x29, 0x20, 0x41, 0x70, 0x70, 0x6c, 0x65, 0x57, 0x65, 0x62, 0x4b, 0x69,
0x74, 0x2f, 0x35, 0x33, 0x35, 0x2e, 0x32, 0x20, 0x28, 0x4b, 0x48, 0x54,
0x4d, 0x4c, 0x2c, 0x20, 0x6c, 0x69, 0x6b, 0x65, 0x20, 0x47, 0x65, 0x63,
0x6b, 0x6f, 0x29, 0x20, 0x43, 0x68, 0x72, 0x6f, 0x6d, 0x65, 0x2f, 0x31,
0x35, 0x2e, 0x30, 0x2e, 0x38, 0x37, 0x34, 0x2e, 0x31, 0x32, 0x31, 0x20,
0x53, 0x61, 0x66, 0x61, 0x72, 0x69, 0x2f, 0x35, 0x33, 0x35, 0x2e, 0x32,
0x0d, 0x0a, 0x41, 0x63, 0x63, 0x65, 0x70, 0x74, 0x3a, 0x20, 0x74, 0x65,
0x78, 0x74, 0x2f, 0x68, 0x74, 0x6d, 0x6c, 0x2c, 0x61, 0x70, 0x70, 0x6c,
0x69, 0x63, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x2f, 0x78, 0x68, 0x74, 0x6d,
0x6c, 0x2b, 0x78, 0x6d, 0x6c, 0x2c, 0x61, 0x70, 0x70, 0x6c, 0x69, 0x63,
0x61, 0x74, 0x69, 0x6f, 0x6e, 0x2f, 0x78, 0x6d, 0x6c, 0x3b, 0x71, 0x3d,
0x30, 0x2e, 0x39, 0x2c, 0x2a, 0x2f, 0x2a, 0x3b, 0x71, 0x3d, 0x30, 0x2e,
0x38, 0x0d, 0x0a, 0x41, 0x63, 0x63, 0x65, 0x70, 0x74, 0x2d, 0x45, 0x6e,
0x63, 0x6f, 0x64, 0x69, 0x6e, 0x67, 0x3a, 0x20, 0x67, 0x7a, 0x69, 0x70,
0x2c, 0x64, 0x65, 0x66, 0x6c, 0x61, 0x74, 0x65, 0x2c, 0x73, 0x64, 0x63,
0x68, 0x0d, 0x0a, 0x41, 0x63, 0x63, 0x65, 0x70, 0x74, 0x2d, 0x4c, 0x61,
0x6e, 0x67, 0x75, 0x61, 0x67, 0x65, 0x3a, 0x20, 0x65, 0x6e, 0x2d, 0x55,
0x53, 0x2c, 0x65, 0x6e, 0x3b, 0x71, 0x3d, 0x30, 0x2e, 0x38, 0x0d, 0x0a,
0x41, 0x63, 0x63, 0x65, 0x70, 0x74, 0x2d, 0x43, 0x68, 0x61, 0x72, 0x73,
0x65, 0x74, 0x3a, 0x20, 0x49, 0x53, 0x4f, 0x2d, 0x38, 0x38, 0x35, 0x39,
0x2d, 0x31, 0x2c, 0x75, 0x74, 0x66, 0x2d, 0x38, 0x3b, 0x71, 0x3d, 0x30,
0x2e, 0x37, 0x2c, 0x2a, 0x3b, 0x71, 0x3d, 0x30, 0x2e, 0x33, 0x0d, 0x0a,
0x0d, 0x0a,
}}
func BenchmarkDecodeSimpleTcpPacket(b *testing.B) {
for i := 0; i < b.N; i++ {
testSimpleTcpPacket.Decode()
}
}
func TestDecodeSimpleTcpPacket(t *testing.T) {
p := testSimpleTcpPacket
p.Decode()
if p.DestMac != 0x00000c9ff020 {
t.Error("Dest mac", p.DestMac)
}
if p.SrcMac != 0xbc305be8d349 {
t.Error("Src mac", p.SrcMac)
}
if len(p.Headers) != 2 {
t.Error("Incorrect number of headers", len(p.Headers))
return
}
if ip, ipOk := p.Headers[0].(*Iphdr); ipOk {
if ip.Version != 4 {
t.Error("ip Version", ip.Version)
}
if ip.Ihl != 5 {
t.Error("ip header length", ip.Ihl)
}
if ip.Tos != 0 {
t.Error("ip TOS", ip.Tos)
}
if ip.Length != 420 {
t.Error("ip Length", ip.Length)
}
if ip.Id != 14815 {
t.Error("ip ID", ip.Id)
}
if ip.Flags != 0x02 {
t.Error("ip Flags", ip.Flags)
}
if ip.FragOffset != 0 {
t.Error("ip Fragoffset", ip.FragOffset)
}
if ip.Ttl != 64 {
t.Error("ip TTL", ip.Ttl)
}
if ip.Protocol != 6 {
t.Error("ip Protocol", ip.Protocol)
}
if ip.Checksum != 0x555A {
t.Error("ip Checksum", ip.Checksum)
}
if !bytes.Equal(ip.SrcIp, []byte{172, 17, 81, 73}) {
t.Error("ip Src", ip.SrcIp)
}
if !bytes.Equal(ip.DestIp, []byte{173, 222, 254, 225}) {
t.Error("ip Dest", ip.DestIp)
}
if tcp, tcpOk := p.Headers[1].(*Tcphdr); tcpOk {
if tcp.SrcPort != 50679 {
t.Error("tcp srcport", tcp.SrcPort)
}
if tcp.DestPort != 80 {
t.Error("tcp destport", tcp.DestPort)
}
if tcp.Seq != 0xc57e0e48 {
t.Error("tcp seq", tcp.Seq)
}
if tcp.Ack != 0x49074232 {
t.Error("tcp ack", tcp.Ack)
}
if tcp.DataOffset != 8 {
t.Error("tcp dataoffset", tcp.DataOffset)
}
if tcp.Flags != 0x18 {
t.Error("tcp flags", tcp.Flags)
}
if tcp.Window != 0x73 {
t.Error("tcp window", tcp.Window)
}
if tcp.Checksum != 0xabb1 {
t.Error("tcp checksum", tcp.Checksum)
}
if tcp.Urgent != 0 {
t.Error("tcp urgent", tcp.Urgent)
}
} else {
t.Error("Second header is not TCP header")
}
} else {
t.Error("First header is not IP header")
}
if string(p.Payload) != "GET / HTTP/1.1\r\nHost: www.fish.com\r\nConnection: keep-alive\r\nUser-Agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.874.121 Safari/535.2\r\nAccept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8\r\nAccept-Encoding: gzip,deflate,sdch\r\nAccept-Language: en-US,en;q=0.8\r\nAccept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.3\r\n\r\n" {
t.Error("--- PAYLOAD STRING ---\n", string(p.Payload), "\n--- PAYLOAD BYTES ---\n", p.Payload)
}
}
// Makes sure packet payload doesn't display the 6 trailing null of this packet
// as part of the payload. They're actually the ethernet trailer.
func TestDecodeSmallTcpPacketHasEmptyPayload(t *testing.T) {
p := &Packet{
// This packet is only 54 bits (an empty TCP RST), thus 6 trailing null
// bytes are added by the ethernet layer to make it the minimum packet size.
Data: []byte{
0xbc, 0x30, 0x5b, 0xe8, 0xd3, 0x49, 0xb8, 0xac, 0x6f, 0x92, 0xd5, 0xbf,
0x08, 0x00, 0x45, 0x00, 0x00, 0x28, 0x00, 0x00, 0x40, 0x00, 0x40, 0x06,
0x3f, 0x9f, 0xac, 0x11, 0x51, 0xc5, 0xac, 0x11, 0x51, 0x49, 0x00, 0x63,
0x9a, 0xef, 0x00, 0x00, 0x00, 0x00, 0x2e, 0xc1, 0x27, 0x83, 0x50, 0x14,
0x00, 0x00, 0xc3, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
}}
p.Decode()
if p.Payload == nil {
t.Error("Nil payload")
}
if len(p.Payload) != 0 {
t.Error("Non-empty payload:", p.Payload)
}
}
func TestDecodeVlanPacket(t *testing.T) {
p := &Packet{
Data: []byte{
0x00, 0x10, 0xdb, 0xff, 0x10, 0x00, 0x00, 0x15, 0x2c, 0x9d, 0xcc, 0x00, 0x81, 0x00, 0x01, 0xf7,
0x08, 0x00, 0x45, 0x00, 0x00, 0x28, 0x29, 0x8d, 0x40, 0x00, 0x7d, 0x06, 0x83, 0xa0, 0xac, 0x1b,
0xca, 0x8e, 0x45, 0x16, 0x94, 0xe2, 0xd4, 0x0a, 0x00, 0x50, 0xdf, 0xab, 0x9c, 0xc6, 0xcd, 0x1e,
0xe5, 0xd1, 0x50, 0x10, 0x01, 0x00, 0x5a, 0x74, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
}}
p.Decode()
if p.Type != TYPE_VLAN {
t.Error("Didn't detect vlan")
}
if len(p.Headers) != 3 {
t.Error("Incorrect number of headers:", len(p.Headers))
for i, h := range p.Headers {
t.Errorf("Header %d: %#v", i, h)
}
t.FailNow()
}
if _, ok := p.Headers[0].(*Vlanhdr); !ok {
t.Errorf("First header isn't vlan: %q", p.Headers[0])
}
if _, ok := p.Headers[1].(*Iphdr); !ok {
t.Errorf("Second header isn't IP: %q", p.Headers[1])
}
if _, ok := p.Headers[2].(*Tcphdr); !ok {
t.Errorf("Third header isn't TCP: %q", p.Headers[2])
}
}
func TestDecodeFuzzFallout(t *testing.T) {
testData := []struct {
Data []byte
}{
{[]byte("000000000000\x81\x000")},
{[]byte("000000000000\x81\x00000")},
{[]byte("000000000000\x86\xdd0")},
{[]byte("000000000000\b\x000")},
{[]byte("000000000000\b\x060")},
{[]byte{}},
{[]byte("000000000000\b\x0600000000")},
{[]byte("000000000000\x86\xdd000000\x01000000000000000000000000000000000")},
{[]byte("000000000000\x81\x0000\b\x0600000000")},
{[]byte("000000000000\b\x00n0000000000000000000")},
{[]byte("000000000000\x86\xdd000000\x0100000000000000000000000000000000000")},
{[]byte("000000000000\x81\x0000\b\x00g0000000000000000000")},
//{[]byte()},
{[]byte("000000000000\b\x00400000000\x110000000000")},
{[]byte("0nMء\xfe\x13\x13\x81\x00gr\b\x00&x\xc9\xe5b'\x1e0\x00\x04\x00\x0020596224")},
{[]byte("000000000000\x81\x0000\b\x00400000000\x110000000000")},
{[]byte("000000000000\b\x00000000000\x0600\xff0000000")},
{[]byte("000000000000\x86\xdd000000\x06000000000000000000000000000000000")},
{[]byte("000000000000\x81\x0000\b\x00000000000\x0600b0000000")},
{[]byte("000000000000\x81\x0000\b\x00400000000\x060000000000")},
{[]byte("000000000000\x86\xdd000000\x11000000000000000000000000000000000")},
{[]byte("000000000000\x86\xdd000000\x0600000000000000000000000000000000000000000000M")},
{[]byte("000000000000\b\x00500000000\x0600000000000")},
{[]byte("0nM\xd80\xfe\x13\x13\x81\x00gr\b\x00&x\xc9\xe5b'\x1e0\x00\x04\x00\x0020596224")},
}
for _, entry := range testData {
pkt := &Packet{
Time: time.Now(),
Caplen: uint32(len(entry.Data)),
Len: uint32(len(entry.Data)),
Data: entry.Data,
}
pkt.Decode()
/*
func() {
defer func() {
if err := recover(); err != nil {
t.Fatalf("%d. %q failed: %v", idx, string(entry.Data), err)
}
}()
pkt.Decode()
}()
*/
}
}

View File

@@ -0,0 +1,206 @@
package pcap
import (
"encoding/binary"
"fmt"
"io"
"time"
)
// FileHeader is the parsed header of a pcap file.
// http://wiki.wireshark.org/Development/LibpcapFileFormat
type FileHeader struct {
MagicNumber uint32
VersionMajor uint16
VersionMinor uint16
TimeZone int32
SigFigs uint32
SnapLen uint32
Network uint32
}
type PacketTime struct {
Sec int32
Usec int32
}
// Convert the PacketTime to a go Time struct.
func (p *PacketTime) Time() time.Time {
return time.Unix(int64(p.Sec), int64(p.Usec)*1000)
}
// Packet is a single packet parsed from a pcap file.
//
// Convenient access to IP, TCP, and UDP headers is provided after Decode()
// is called if the packet is of the appropriate type.
type Packet struct {
Time time.Time // packet send/receive time
Caplen uint32 // bytes stored in the file (caplen <= len)
Len uint32 // bytes sent/received
Data []byte // packet data
Type int // protocol type, see LINKTYPE_*
DestMac uint64
SrcMac uint64
Headers []interface{} // decoded headers, in order
Payload []byte // remaining non-header bytes
IP *Iphdr // IP header (for IP packets, after decoding)
TCP *Tcphdr // TCP header (for TCP packets, after decoding)
UDP *Udphdr // UDP header (for UDP packets after decoding)
}
// Reader parses pcap files.
type Reader struct {
flip bool
buf io.Reader
err error
fourBytes []byte
twoBytes []byte
sixteenBytes []byte
Header FileHeader
}
// NewReader reads pcap data from an io.Reader.
func NewReader(reader io.Reader) (*Reader, error) {
r := &Reader{
buf: reader,
fourBytes: make([]byte, 4),
twoBytes: make([]byte, 2),
sixteenBytes: make([]byte, 16),
}
switch magic := r.readUint32(); magic {
case 0xa1b2c3d4:
r.flip = false
case 0xd4c3b2a1:
r.flip = true
default:
return nil, fmt.Errorf("pcap: bad magic number: %0x", magic)
}
r.Header = FileHeader{
MagicNumber: 0xa1b2c3d4,
VersionMajor: r.readUint16(),
VersionMinor: r.readUint16(),
TimeZone: r.readInt32(),
SigFigs: r.readUint32(),
SnapLen: r.readUint32(),
Network: r.readUint32(),
}
return r, nil
}
// Next returns the next packet or nil if no more packets can be read.
func (r *Reader) Next() *Packet {
d := r.sixteenBytes
r.err = r.read(d)
if r.err != nil {
return nil
}
timeSec := asUint32(d[0:4], r.flip)
timeUsec := asUint32(d[4:8], r.flip)
capLen := asUint32(d[8:12], r.flip)
origLen := asUint32(d[12:16], r.flip)
data := make([]byte, capLen)
if r.err = r.read(data); r.err != nil {
return nil
}
return &Packet{
Time: time.Unix(int64(timeSec), int64(timeUsec)),
Caplen: capLen,
Len: origLen,
Data: data,
}
}
func (r *Reader) read(data []byte) error {
var err error
n, err := r.buf.Read(data)
for err == nil && n != len(data) {
var chunk int
chunk, err = r.buf.Read(data[n:])
n += chunk
}
if len(data) == n {
return nil
}
return err
}
func (r *Reader) readUint32() uint32 {
data := r.fourBytes
if r.err = r.read(data); r.err != nil {
return 0
}
return asUint32(data, r.flip)
}
func (r *Reader) readInt32() int32 {
data := r.fourBytes
if r.err = r.read(data); r.err != nil {
return 0
}
return int32(asUint32(data, r.flip))
}
func (r *Reader) readUint16() uint16 {
data := r.twoBytes
if r.err = r.read(data); r.err != nil {
return 0
}
return asUint16(data, r.flip)
}
// Writer writes a pcap file.
type Writer struct {
writer io.Writer
buf []byte
}
// NewWriter creates a Writer that stores output in an io.Writer.
// The FileHeader is written immediately.
func NewWriter(writer io.Writer, header *FileHeader) (*Writer, error) {
w := &Writer{
writer: writer,
buf: make([]byte, 24),
}
binary.LittleEndian.PutUint32(w.buf, header.MagicNumber)
binary.LittleEndian.PutUint16(w.buf[4:], header.VersionMajor)
binary.LittleEndian.PutUint16(w.buf[6:], header.VersionMinor)
binary.LittleEndian.PutUint32(w.buf[8:], uint32(header.TimeZone))
binary.LittleEndian.PutUint32(w.buf[12:], header.SigFigs)
binary.LittleEndian.PutUint32(w.buf[16:], header.SnapLen)
binary.LittleEndian.PutUint32(w.buf[20:], header.Network)
if _, err := writer.Write(w.buf); err != nil {
return nil, err
}
return w, nil
}
// Writer writes a packet to the underlying writer.
func (w *Writer) Write(pkt *Packet) error {
binary.LittleEndian.PutUint32(w.buf, uint32(pkt.Time.Unix()))
binary.LittleEndian.PutUint32(w.buf[4:], uint32(pkt.Time.Nanosecond()))
binary.LittleEndian.PutUint32(w.buf[8:], uint32(pkt.Time.Unix()))
binary.LittleEndian.PutUint32(w.buf[12:], pkt.Len)
if _, err := w.writer.Write(w.buf[:16]); err != nil {
return err
}
_, err := w.writer.Write(pkt.Data)
return err
}
func asUint32(data []byte, flip bool) uint32 {
if flip {
return binary.BigEndian.Uint32(data)
}
return binary.LittleEndian.Uint32(data)
}
func asUint16(data []byte, flip bool) uint16 {
if flip {
return binary.BigEndian.Uint16(data)
}
return binary.LittleEndian.Uint16(data)
}

View File

@@ -0,0 +1,266 @@
// Interface to both live and offline pcap parsing.
package pcap
/*
#cgo linux LDFLAGS: -lpcap
#cgo freebsd LDFLAGS: -lpcap
#cgo darwin LDFLAGS: -lpcap
#cgo windows CFLAGS: -I C:/WpdPack/Include
#cgo windows,386 LDFLAGS: -L C:/WpdPack/Lib -lwpcap
#cgo windows,amd64 LDFLAGS: -L C:/WpdPack/Lib/x64 -lwpcap
#include <stdlib.h>
#include <pcap.h>
// Workaround for not knowing how to cast to const u_char**
int hack_pcap_next_ex(pcap_t *p, struct pcap_pkthdr **pkt_header,
u_char **pkt_data) {
return pcap_next_ex(p, pkt_header, (const u_char **)pkt_data);
}
*/
import "C"
import (
"errors"
"net"
"syscall"
"time"
"unsafe"
)
type Pcap struct {
cptr *C.pcap_t
}
type Stat struct {
PacketsReceived uint32
PacketsDropped uint32
PacketsIfDropped uint32
}
type Interface struct {
Name string
Description string
Addresses []IFAddress
// TODO: add more elements
}
type IFAddress struct {
IP net.IP
Netmask net.IPMask
// TODO: add broadcast + PtP dst ?
}
func (p *Pcap) Next() (pkt *Packet) {
rv, _ := p.NextEx()
return rv
}
// Openlive opens a device and returns a *Pcap handler
func Openlive(device string, snaplen int32, promisc bool, timeout_ms int32) (handle *Pcap, err error) {
var buf *C.char
buf = (*C.char)(C.calloc(ERRBUF_SIZE, 1))
h := new(Pcap)
var pro int32
if promisc {
pro = 1
}
dev := C.CString(device)
defer C.free(unsafe.Pointer(dev))
h.cptr = C.pcap_open_live(dev, C.int(snaplen), C.int(pro), C.int(timeout_ms), buf)
if nil == h.cptr {
handle = nil
err = errors.New(C.GoString(buf))
} else {
handle = h
}
C.free(unsafe.Pointer(buf))
return
}
func Openoffline(file string) (handle *Pcap, err error) {
var buf *C.char
buf = (*C.char)(C.calloc(ERRBUF_SIZE, 1))
h := new(Pcap)
cf := C.CString(file)
defer C.free(unsafe.Pointer(cf))
h.cptr = C.pcap_open_offline(cf, buf)
if nil == h.cptr {
handle = nil
err = errors.New(C.GoString(buf))
} else {
handle = h
}
C.free(unsafe.Pointer(buf))
return
}
func (p *Pcap) NextEx() (pkt *Packet, result int32) {
var pkthdr *C.struct_pcap_pkthdr
var buf_ptr *C.u_char
var buf unsafe.Pointer
result = int32(C.hack_pcap_next_ex(p.cptr, &pkthdr, &buf_ptr))
buf = unsafe.Pointer(buf_ptr)
if nil == buf {
return
}
pkt = new(Packet)
pkt.Time = time.Unix(int64(pkthdr.ts.tv_sec), int64(pkthdr.ts.tv_usec)*1000)
pkt.Caplen = uint32(pkthdr.caplen)
pkt.Len = uint32(pkthdr.len)
pkt.Data = C.GoBytes(buf, C.int(pkthdr.caplen))
return
}
func (p *Pcap) Close() {
C.pcap_close(p.cptr)
}
func (p *Pcap) Geterror() error {
return errors.New(C.GoString(C.pcap_geterr(p.cptr)))
}
func (p *Pcap) Getstats() (stat *Stat, err error) {
var cstats _Ctype_struct_pcap_stat
if -1 == C.pcap_stats(p.cptr, &cstats) {
return nil, p.Geterror()
}
stats := new(Stat)
stats.PacketsReceived = uint32(cstats.ps_recv)
stats.PacketsDropped = uint32(cstats.ps_drop)
stats.PacketsIfDropped = uint32(cstats.ps_ifdrop)
return stats, nil
}
func (p *Pcap) Setfilter(expr string) (err error) {
var bpf _Ctype_struct_bpf_program
cexpr := C.CString(expr)
defer C.free(unsafe.Pointer(cexpr))
if -1 == C.pcap_compile(p.cptr, &bpf, cexpr, 1, 0) {
return p.Geterror()
}
if -1 == C.pcap_setfilter(p.cptr, &bpf) {
C.pcap_freecode(&bpf)
return p.Geterror()
}
C.pcap_freecode(&bpf)
return nil
}
func Version() string {
return C.GoString(C.pcap_lib_version())
}
func (p *Pcap) Datalink() int {
return int(C.pcap_datalink(p.cptr))
}
func (p *Pcap) Setdatalink(dlt int) error {
if -1 == C.pcap_set_datalink(p.cptr, C.int(dlt)) {
return p.Geterror()
}
return nil
}
func DatalinkValueToName(dlt int) string {
if name := C.pcap_datalink_val_to_name(C.int(dlt)); name != nil {
return C.GoString(name)
}
return ""
}
func DatalinkValueToDescription(dlt int) string {
if desc := C.pcap_datalink_val_to_description(C.int(dlt)); desc != nil {
return C.GoString(desc)
}
return ""
}
func Findalldevs() (ifs []Interface, err error) {
var buf *C.char
buf = (*C.char)(C.calloc(ERRBUF_SIZE, 1))
defer C.free(unsafe.Pointer(buf))
var alldevsp *C.pcap_if_t
if -1 == C.pcap_findalldevs((**C.pcap_if_t)(&alldevsp), buf) {
return nil, errors.New(C.GoString(buf))
}
defer C.pcap_freealldevs((*C.pcap_if_t)(alldevsp))
dev := alldevsp
var i uint32
for i = 0; dev != nil; dev = (*C.pcap_if_t)(dev.next) {
i++
}
ifs = make([]Interface, i)
dev = alldevsp
for j := uint32(0); dev != nil; dev = (*C.pcap_if_t)(dev.next) {
var iface Interface
iface.Name = C.GoString(dev.name)
iface.Description = C.GoString(dev.description)
iface.Addresses = findalladdresses(dev.addresses)
// TODO: add more elements
ifs[j] = iface
j++
}
return
}
func findalladdresses(addresses *_Ctype_struct_pcap_addr) (retval []IFAddress) {
// TODO - make it support more than IPv4 and IPv6?
retval = make([]IFAddress, 0, 1)
for curaddr := addresses; curaddr != nil; curaddr = (*_Ctype_struct_pcap_addr)(curaddr.next) {
var a IFAddress
var err error
if a.IP, err = sockaddr_to_IP((*syscall.RawSockaddr)(unsafe.Pointer(curaddr.addr))); err != nil {
continue
}
if a.Netmask, err = sockaddr_to_IP((*syscall.RawSockaddr)(unsafe.Pointer(curaddr.addr))); err != nil {
continue
}
retval = append(retval, a)
}
return
}
func sockaddr_to_IP(rsa *syscall.RawSockaddr) (IP []byte, err error) {
switch rsa.Family {
case syscall.AF_INET:
pp := (*syscall.RawSockaddrInet4)(unsafe.Pointer(rsa))
IP = make([]byte, 4)
for i := 0; i < len(IP); i++ {
IP[i] = pp.Addr[i]
}
return
case syscall.AF_INET6:
pp := (*syscall.RawSockaddrInet6)(unsafe.Pointer(rsa))
IP = make([]byte, 16)
for i := 0; i < len(IP); i++ {
IP[i] = pp.Addr[i]
}
return
}
err = errors.New("Unsupported address type")
return
}
func (p *Pcap) Inject(data []byte) (err error) {
buf := (*C.char)(C.malloc((C.size_t)(len(data))))
for i := 0; i < len(data); i++ {
*(*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(buf)) + uintptr(i))) = data[i]
}
if -1 == C.pcap_sendpacket(p.cptr, (*C.u_char)(unsafe.Pointer(buf)), (C.int)(len(data))) {
err = p.Geterror()
}
C.free(unsafe.Pointer(buf))
return
}

View File

@@ -0,0 +1,49 @@
package main
import (
"flag"
"fmt"
"os"
"runtime/pprof"
"time"
"github.com/coreos/etcd/Godeps/_workspace/src/github.com/akrennmair/gopcap"
)
func main() {
var filename *string = flag.String("file", "", "filename")
var decode *bool = flag.Bool("d", false, "If true, decode each packet")
var cpuprofile *string = flag.String("cpuprofile", "", "filename")
flag.Parse()
h, err := pcap.Openoffline(*filename)
if err != nil {
fmt.Printf("Couldn't create pcap reader: %v", err)
}
if *cpuprofile != "" {
if out, err := os.Create(*cpuprofile); err == nil {
pprof.StartCPUProfile(out)
defer func() {
pprof.StopCPUProfile()
out.Close()
}()
} else {
panic(err)
}
}
i, nilPackets := 0, 0
start := time.Now()
for pkt, code := h.NextEx(); code != -2; pkt, code = h.NextEx() {
if pkt == nil {
nilPackets++
} else if *decode {
pkt.Decode()
}
i++
}
duration := time.Since(start)
fmt.Printf("Took %v to process %v packets, %v per packet, %d nil packets\n", duration, i, duration/time.Duration(i), nilPackets)
}

View File

@@ -0,0 +1,96 @@
package main
// Parses a pcap file, writes it back to disk, then verifies the files
// are the same.
import (
"bufio"
"flag"
"fmt"
"io"
"os"
"github.com/coreos/etcd/Godeps/_workspace/src/github.com/akrennmair/gopcap"
)
var input *string = flag.String("input", "", "input file")
var output *string = flag.String("output", "", "output file")
var decode *bool = flag.Bool("decode", false, "print decoded packets")
func copyPcap(dest, src string) {
f, err := os.Open(src)
if err != nil {
fmt.Printf("couldn't open %q: %v\n", src, err)
return
}
defer f.Close()
reader, err := pcap.NewReader(bufio.NewReader(f))
if err != nil {
fmt.Printf("couldn't create reader: %v\n", err)
return
}
w, err := os.Create(dest)
if err != nil {
fmt.Printf("couldn't open %q: %v\n", dest, err)
return
}
defer w.Close()
buf := bufio.NewWriter(w)
writer, err := pcap.NewWriter(buf, &reader.Header)
if err != nil {
fmt.Printf("couldn't create writer: %v\n", err)
return
}
for {
pkt := reader.Next()
if pkt == nil {
break
}
if *decode {
pkt.Decode()
fmt.Println(pkt.String())
}
writer.Write(pkt)
}
buf.Flush()
}
func check(dest, src string) {
f, err := os.Open(src)
if err != nil {
fmt.Printf("couldn't open %q: %v\n", src, err)
return
}
defer f.Close()
freader := bufio.NewReader(f)
g, err := os.Open(dest)
if err != nil {
fmt.Printf("couldn't open %q: %v\n", src, err)
return
}
defer g.Close()
greader := bufio.NewReader(g)
for {
fb, ferr := freader.ReadByte()
gb, gerr := greader.ReadByte()
if ferr == io.EOF && gerr == io.EOF {
break
}
if fb == gb {
continue
}
fmt.Println("FAIL")
return
}
fmt.Println("PASS")
}
func main() {
flag.Parse()
copyPcap(*output, *input)
check(*output, *input)
}

View File

@@ -0,0 +1,82 @@
package main
import (
"flag"
"fmt"
"time"
"github.com/coreos/etcd/Godeps/_workspace/src/github.com/akrennmair/gopcap"
)
func min(x uint32, y uint32) uint32 {
if x < y {
return x
}
return y
}
func main() {
var device *string = flag.String("d", "", "device")
var file *string = flag.String("r", "", "file")
var expr *string = flag.String("e", "", "filter expression")
flag.Parse()
var h *pcap.Pcap
var err error
ifs, err := pcap.Findalldevs()
if len(ifs) == 0 {
fmt.Printf("Warning: no devices found : %s\n", err)
} else {
for i := 0; i < len(ifs); i++ {
fmt.Printf("dev %d: %s (%s)\n", i+1, ifs[i].Name, ifs[i].Description)
}
}
if *device != "" {
h, err = pcap.Openlive(*device, 65535, true, 0)
if h == nil {
fmt.Printf("Openlive(%s) failed: %s\n", *device, err)
return
}
} else if *file != "" {
h, err = pcap.Openoffline(*file)
if h == nil {
fmt.Printf("Openoffline(%s) failed: %s\n", *file, err)
return
}
} else {
fmt.Printf("usage: pcaptest [-d <device> | -r <file>]\n")
return
}
defer h.Close()
fmt.Printf("pcap version: %s\n", pcap.Version())
if *expr != "" {
fmt.Printf("Setting filter: %s\n", *expr)
err := h.Setfilter(*expr)
if err != nil {
fmt.Printf("Warning: setting filter failed: %s\n", err)
}
}
for pkt := h.Next(); pkt != nil; pkt = h.Next() {
fmt.Printf("time: %d.%06d (%s) caplen: %d len: %d\nData:",
int64(pkt.Time.Second()), int64(pkt.Time.Nanosecond()),
time.Unix(int64(pkt.Time.Second()), 0).String(), int64(pkt.Caplen), int64(pkt.Len))
for i := uint32(0); i < pkt.Caplen; i++ {
if i%32 == 0 {
fmt.Printf("\n")
}
if 32 <= pkt.Data[i] && pkt.Data[i] <= 126 {
fmt.Printf("%c", pkt.Data[i])
} else {
fmt.Printf(".")
}
}
fmt.Printf("\n\n")
}
}

View File

@@ -0,0 +1,121 @@
package main
import (
"bufio"
"flag"
"fmt"
"os"
"github.com/coreos/etcd/Godeps/_workspace/src/github.com/akrennmair/gopcap"
)
const (
TYPE_IP = 0x0800
TYPE_ARP = 0x0806
TYPE_IP6 = 0x86DD
IP_ICMP = 1
IP_INIP = 4
IP_TCP = 6
IP_UDP = 17
)
var out *bufio.Writer
var errout *bufio.Writer
func main() {
var device *string = flag.String("i", "", "interface")
var snaplen *int = flag.Int("s", 65535, "snaplen")
var hexdump *bool = flag.Bool("X", false, "hexdump")
expr := ""
out = bufio.NewWriter(os.Stdout)
errout = bufio.NewWriter(os.Stderr)
flag.Usage = func() {
fmt.Fprintf(errout, "usage: %s [ -i interface ] [ -s snaplen ] [ -X ] [ expression ]\n", os.Args[0])
errout.Flush()
os.Exit(1)
}
flag.Parse()
if len(flag.Args()) > 0 {
expr = flag.Arg(0)
}
if *device == "" {
devs, err := pcap.Findalldevs()
if err != nil {
fmt.Fprintf(errout, "tcpdump: couldn't find any devices: %s\n", err)
}
if 0 == len(devs) {
flag.Usage()
}
*device = devs[0].Name
}
h, err := pcap.Openlive(*device, int32(*snaplen), true, 0)
if h == nil {
fmt.Fprintf(errout, "tcpdump: %s\n", err)
errout.Flush()
return
}
defer h.Close()
if expr != "" {
ferr := h.Setfilter(expr)
if ferr != nil {
fmt.Fprintf(out, "tcpdump: %s\n", ferr)
out.Flush()
}
}
for pkt := h.Next(); pkt != nil; pkt = h.Next() {
pkt.Decode()
fmt.Fprintf(out, "%s\n", pkt.String())
if *hexdump {
Hexdump(pkt)
}
out.Flush()
}
}
func min(a, b int) int {
if a < b {
return a
}
return b
}
func Hexdump(pkt *pcap.Packet) {
for i := 0; i < len(pkt.Data); i += 16 {
Dumpline(uint32(i), pkt.Data[i:min(i+16, len(pkt.Data))])
}
}
func Dumpline(addr uint32, line []byte) {
fmt.Fprintf(out, "\t0x%04x: ", int32(addr))
var i uint16
for i = 0; i < 16 && i < uint16(len(line)); i++ {
if i%2 == 0 {
out.WriteString(" ")
}
fmt.Fprintf(out, "%02x", line[i])
}
for j := i; j <= 16; j++ {
if j%2 == 0 {
out.WriteString(" ")
}
out.WriteString(" ")
}
out.WriteString(" ")
for i = 0; i < 16 && i < uint16(len(line)); i++ {
if line[i] >= 32 && line[i] <= 126 {
fmt.Fprintf(out, "%c", line[i])
} else {
out.WriteString(".")
}
}
out.WriteString("\n")
}

View File

@@ -4,7 +4,7 @@
// Original code is based on code by RogerV in the golang-nuts thread:
// https://groups.google.com/group/golang-nuts/browse_thread/thread/40cc41e9d9fc9247
// +build darwin freebsd linux netbsd openbsd
// +build darwin freebsd linux netbsd openbsd solaris
package speakeasy
@@ -19,9 +19,8 @@ import (
const sttyArg0 = "/bin/stty"
var (
sttyArgvEOff []string = []string{"stty", "-echo"}
sttyArgvEOn []string = []string{"stty", "echo"}
ws syscall.WaitStatus = 0
sttyArgvEOff = []string{"stty", "-echo"}
sttyArgvEOn = []string{"stty", "echo"}
)
// getPassword gets input hidden from the terminal from a user. This is
@@ -47,10 +46,11 @@ func getPassword() (password string, err error) {
}
// Turn on the terminal echo and stop listening for signals.
defer signal.Stop(sig)
defer close(brk)
defer echoOn(fd)
syscall.Wait4(pid, &ws, 0, nil)
syscall.Wait4(pid, nil, 0, nil)
line, err := readline()
if err == nil {
@@ -76,7 +76,7 @@ func echoOn(fd []uintptr) {
// Turn on the terminal echo.
pid, e := syscall.ForkExec(sttyArg0, sttyArgvEOn, &syscall.ProcAttr{Dir: "", Files: fd})
if e == nil {
syscall.Wait4(pid, &ws, 0, nil)
syscall.Wait4(pid, nil, 0, nil)
}
}

View File

@@ -1,3 +1,4 @@
*.prof
*.test
*.swp
/bin/

View File

@@ -1,8 +1,8 @@
Bolt [![Build Status](https://drone.io/github.com/boltdb/bolt/status.png)](https://drone.io/github.com/boltdb/bolt/latest) [![Coverage Status](https://coveralls.io/repos/boltdb/bolt/badge.png?branch=master)](https://coveralls.io/r/boltdb/bolt?branch=master) [![GoDoc](https://godoc.org/github.com/boltdb/bolt?status.png)](https://godoc.org/github.com/boltdb/bolt) ![Version](http://img.shields.io/badge/version-1.0-green.png)
====
Bolt is a pure Go key/value store inspired by [Howard Chu's][hyc_symas] and
the [LMDB project][lmdb]. The goal of the project is to provide a simple,
Bolt is a pure Go key/value store inspired by [Howard Chu's][hyc_symas]
[LMDB project][lmdb]. The goal of the project is to provide a simple,
fast, and reliable database for projects that don't require a full database
server such as Postgres or MySQL.
@@ -87,6 +87,11 @@ are not thread safe. To work with data in multiple goroutines you must start
a transaction for each one or use locking to ensure only one goroutine accesses
a transaction at a time. Creating transaction from the `DB` is thread safe.
Read-only transactions and read-write transactions should not depend on one
another and generally shouldn't be opened simultaneously in the same goroutine.
This can cause a deadlock as the read-write transaction needs to periodically
re-map the data file but it cannot do so while a read-only transaction is open.
#### Read-write transactions
@@ -175,8 +180,8 @@ and then safely close your transaction if an error is returned. This is the
recommended way to use Bolt transactions.
However, sometimes you may want to manually start and end your transactions.
You can use the `Tx.Begin()` function directly but _please_ be sure to close the
transaction.
You can use the `Tx.Begin()` function directly but **please** be sure to close
the transaction.
```go
// Start a writable transaction.
@@ -251,7 +256,7 @@ db.View(func(tx *bolt.Tx) error {
```
The `Get()` function does not return an error because its operation is
guarenteed to work (unless there is some kind of system failure). If the key
guaranteed to work (unless there is some kind of system failure). If the key
exists then it will return its byte slice value. If it doesn't exist then it
will return `nil`. It's important to note that you can have a zero-length value
set to a key which is different than the key not existing.
@@ -263,6 +268,50 @@ transaction is open. If you need to use a value outside of the transaction
then you must use `copy()` to copy it to another byte slice.
### Autoincrementing integer for the bucket
By using the NextSequence() function, you can let Bolt determine a sequence
which can be used as the unique identifier for your key/value pairs. See the
example below.
```go
// CreateUser saves u to the store. The new user ID is set on u once the data is persisted.
func (s *Store) CreateUser(u *User) error {
return s.db.Update(func(tx *bolt.Tx) error {
// Retrieve the users bucket.
// This should be created when the DB is first opened.
b := tx.Bucket([]byte("users"))
// Generate ID for the user.
// This returns an error only if the Tx is closed or not writeable.
// That can't happen in an Update() call so I ignore the error check.
id, _ = b.NextSequence()
u.ID = int(id)
// Marshal user data into bytes.
buf, err := json.Marshal(u)
if err != nil {
return err
}
// Persist bytes to users bucket.
return b.Put(itob(u.ID), buf)
})
}
// itob returns an 8-byte big endian representation of v.
func itob(v int) []byte {
b := make([]byte, 8)
binary.BigEndian.PutUint64(b, uint64(v))
return b
}
type User struct {
ID int
...
}
```
### Iterating over keys
Bolt stores its keys in byte-sorted order within a bucket. This makes sequential
@@ -377,8 +426,11 @@ func (*Bucket) DeleteBucket(key []byte) error
Bolt is a single file so it's easy to backup. You can use the `Tx.WriteTo()`
function to write a consistent view of the database to a writer. If you call
this from a read-only transaction, it will perform a hot backup and not block
your other database reads and writes. It will also use `O_DIRECT` when available
to prevent page cache trashing.
your other database reads and writes.
By default, it will use a regular file handle which will utilize the operating
system's page cache. See the [`Tx`](https://godoc.org/github.com/boltdb/bolt#Tx)
documentation for information about optimizing for larger-than-RAM datasets.
One common use case is to backup over HTTP so you can use tools like `cURL` to
do database backups:
@@ -446,6 +498,21 @@ It's also useful to pipe these stats to a service such as statsd for monitoring
or to provide an HTTP endpoint that will perform a fixed-length sample.
### Read-Only Mode
Sometimes it is useful to create a shared, read-only Bolt database. To this,
set the `Options.ReadOnly` flag when opening your database. Read-only mode
uses a shared lock to allow multiple processes to read from the database but
it will block any processes from opening the database in read-write mode.
```go
db, err := bolt.Open("my.db", 0666, &bolt.Options{ReadOnly: true})
if err != nil {
log.Fatal(err)
}
```
## Resources
For more information on getting started with Bolt, check out the following articles:
@@ -480,7 +547,7 @@ they are libraries bundled into the application, however, their underlying
structure is a log-structured merge-tree (LSM tree). An LSM tree optimizes
random writes by using a write ahead log and multi-tiered, sorted files called
SSTables. Bolt uses a B+tree internally and only a single file. Both approaches
have trade offs.
have trade-offs.
If you require a high random write throughput (>10,000 w/sec) or you need to use
spinning disks then LevelDB could be a good choice. If your application is
@@ -548,7 +615,14 @@ Here are a few things to note when evaluating and using Bolt:
can in memory and will release memory as needed to other processes. This means
that Bolt can show very high memory usage when working with large databases.
However, this is expected and the OS will release memory as needed. Bolt can
handle databases much larger than the available physical RAM.
handle databases much larger than the available physical RAM, provided its
memory-map fits in the process virtual address space. It may be problematic
on 32-bits systems.
* The data structures in the Bolt database are memory mapped so the data file
will be endian specific. This means that you cannot copy a Bolt file from a
little endian machine to a big endian machine and have it work. For most
users this is not a concern since most modern CPUs are little endian.
* Because of the way pages are laid out on disk, Bolt cannot truncate data files
and return free pages back to the disk. Instead, Bolt maintains a free list
@@ -562,12 +636,62 @@ Here are a few things to note when evaluating and using Bolt:
[page-allocation]: https://github.com/boltdb/bolt/issues/308#issuecomment-74811638
## Reading the Source
Bolt is a relatively small code base (<3KLOC) for an embedded, serializable,
transactional key/value database so it can be a good starting point for people
interested in how databases work.
The best places to start are the main entry points into Bolt:
- `Open()` - Initializes the reference to the database. It's responsible for
creating the database if it doesn't exist, obtaining an exclusive lock on the
file, reading the meta pages, & memory-mapping the file.
- `DB.Begin()` - Starts a read-only or read-write transaction depending on the
value of the `writable` argument. This requires briefly obtaining the "meta"
lock to keep track of open transactions. Only one read-write transaction can
exist at a time so the "rwlock" is acquired during the life of a read-write
transaction.
- `Bucket.Put()` - Writes a key/value pair into a bucket. After validating the
arguments, a cursor is used to traverse the B+tree to the page and position
where they key & value will be written. Once the position is found, the bucket
materializes the underlying page and the page's parent pages into memory as
"nodes". These nodes are where mutations occur during read-write transactions.
These changes get flushed to disk during commit.
- `Bucket.Get()` - Retrieves a key/value pair from a bucket. This uses a cursor
to move to the page & position of a key/value pair. During a read-only
transaction, the key and value data is returned as a direct reference to the
underlying mmap file so there's no allocation overhead. For read-write
transactions, this data may reference the mmap file or one of the in-memory
node values.
- `Cursor` - This object is simply for traversing the B+tree of on-disk pages
or in-memory nodes. It can seek to a specific key, move to the first or last
value, or it can move forward or backward. The cursor handles the movement up
and down the B+tree transparently to the end user.
- `Tx.Commit()` - Converts the in-memory dirty nodes and the list of free pages
into pages to be written to disk. Writing to disk then occurs in two phases.
First, the dirty pages are written to disk and an `fsync()` occurs. Second, a
new meta page with an incremented transaction ID is written and another
`fsync()` occurs. This two phase write ensures that partially written data
pages are ignored in the event of a crash since the meta page pointing to them
is never written. Partially written meta pages are invalidated because they
are written with a checksum.
If you have additional notes that could be helpful for others, please submit
them via pull request.
## Other Projects Using Bolt
Below is a list of public, open source projects that use Bolt:
* [Operation Go: A Routine Mission](http://gocode.io) - An online programming game for Golang using Bolt for user accounts and a leaderboard.
* [Bazil](https://github.com/bazillion/bazil) - A file system that lets your data reside where it is most convenient for it to reside.
* [Bazil](https://bazil.org/) - A file system that lets your data reside where it is most convenient for it to reside.
* [DVID](https://github.com/janelia-flyem/dvid) - Added Bolt as optional storage engine and testing it against Basho-tuned leveldb.
* [Skybox Analytics](https://github.com/skybox/skybox) - A standalone funnel analysis tool for web analytics.
* [Scuttlebutt](https://github.com/benbjohnson/scuttlebutt) - Uses Bolt to store and process all Twitter mentions of GitHub projects.
@@ -587,5 +711,14 @@ Below is a list of public, open source projects that use Bolt:
* [SkyDB](https://github.com/skydb/sky) - Behavioral analytics database.
* [Seaweed File System](https://github.com/chrislusf/weed-fs) - Highly scalable distributed key~file system with O(1) disk read.
* [InfluxDB](http://influxdb.com) - Scalable datastore for metrics, events, and real-time analytics.
* [Freehold](http://tshannon.bitbucket.org/freehold/) - An open, secure, and lightweight platform for your files and data.
* [Prometheus Annotation Server](https://github.com/oliver006/prom_annotation_server) - Annotation server for PromDash & Prometheus service monitoring system.
* [Consul](https://github.com/hashicorp/consul) - Consul is service discovery and configuration made easy. Distributed, highly available, and datacenter-aware.
* [Kala](https://github.com/ajvb/kala) - Kala is a modern job scheduler optimized to run on a single node. It is persistent, JSON over HTTP API, ISO 8601 duration notation, and dependent jobs.
* [drive](https://github.com/odeke-em/drive) - drive is an unofficial Google Drive command line client for \*NIX operating systems.
* [stow](https://github.com/djherbis/stow) - a persistence manager for objects
backed by boltdb.
* [buckets](https://github.com/joyrexus/buckets) - a bolt wrapper streamlining
simple tx and key scans.
If you are using Bolt in a project please send a pull request to add it to the list.

View File

@@ -20,6 +20,9 @@ import (
// take permanent effect only after a successful return is seen in
// caller.
//
// The maximum batch size and delay can be adjusted with DB.MaxBatchSize
// and DB.MaxBatchDelay, respectively.
//
// Batch is only useful when there are multiple goroutines calling it.
func (db *DB) Batch(fn func(*Tx) error) error {
errCh := make(chan error, 1)

View File

@@ -0,0 +1,9 @@
// +build arm64
package bolt
// maxMapSize represents the largest mmap size supported by Bolt.
const maxMapSize = 0xFFFFFFFFFFFF // 256TB
// maxAllocSize is the size used when creating array pointers.
const maxAllocSize = 0x7FFFFFFF

View File

@@ -4,8 +4,6 @@ import (
"syscall"
)
var odirect = syscall.O_DIRECT
// fdatasync flushes written data to a file descriptor.
func fdatasync(db *DB) error {
return syscall.Fdatasync(int(db.file.Fd()))

View File

@@ -11,8 +11,6 @@ const (
msInvalidate // invalidate cached data
)
var odirect int
func msync(db *DB) error {
_, _, errno := syscall.Syscall(syscall.SYS_MSYNC, uintptr(unsafe.Pointer(db.data)), uintptr(db.datasz), msInvalidate)
if errno != 0 {

View File

@@ -0,0 +1,9 @@
// +build ppc64le
package bolt
// maxMapSize represents the largest mmap size supported by Bolt.
const maxMapSize = 0xFFFFFFFFFFFF // 256TB
// maxAllocSize is the size used when creating array pointers.
const maxAllocSize = 0x7FFFFFFF

View File

@@ -0,0 +1,9 @@
// +build s390x
package bolt
// maxMapSize represents the largest mmap size supported by Bolt.
const maxMapSize = 0xFFFFFFFFFFFF // 256TB
// maxAllocSize is the size used when creating array pointers.
const maxAllocSize = 0x7FFFFFFF

View File

@@ -1,4 +1,4 @@
// +build !windows,!plan9
// +build !windows,!plan9,!solaris
package bolt
@@ -11,7 +11,7 @@ import (
)
// flock acquires an advisory lock on a file descriptor.
func flock(f *os.File, timeout time.Duration) error {
func flock(f *os.File, exclusive bool, timeout time.Duration) error {
var t time.Time
for {
// If we're beyond our timeout then return an error.
@@ -21,9 +21,13 @@ func flock(f *os.File, timeout time.Duration) error {
} else if timeout > 0 && time.Since(t) > timeout {
return ErrTimeout
}
flag := syscall.LOCK_SH
if exclusive {
flag = syscall.LOCK_EX
}
// Otherwise attempt to obtain an exclusive lock.
err := syscall.Flock(int(f.Fd()), syscall.LOCK_EX|syscall.LOCK_NB)
err := syscall.Flock(int(f.Fd()), flag|syscall.LOCK_NB)
if err == nil {
return nil
} else if err != syscall.EWOULDBLOCK {
@@ -44,19 +48,26 @@ func funlock(f *os.File) error {
func mmap(db *DB, sz int) error {
// Truncate and fsync to ensure file size metadata is flushed.
// https://github.com/boltdb/bolt/issues/284
if err := db.file.Truncate(int64(sz)); err != nil {
return fmt.Errorf("file resize error: %s", err)
}
if err := db.file.Sync(); err != nil {
return fmt.Errorf("file sync error: %s", err)
if !db.NoGrowSync && !db.readOnly {
if err := db.file.Truncate(int64(sz)); err != nil {
return fmt.Errorf("file resize error: %s", err)
}
if err := db.file.Sync(); err != nil {
return fmt.Errorf("file sync error: %s", err)
}
}
// Map the data file to memory.
b, err := syscall.Mmap(int(db.file.Fd()), 0, sz, syscall.PROT_READ, syscall.MAP_SHARED)
b, err := syscall.Mmap(int(db.file.Fd()), 0, sz, syscall.PROT_READ, syscall.MAP_SHARED|db.MmapFlags)
if err != nil {
return err
}
// Advise the kernel that the mmap is accessed randomly.
if err := madvise(b, syscall.MADV_RANDOM); err != nil {
return fmt.Errorf("madvise: %s", err)
}
// Save the original byte slice and convert to a byte array pointer.
db.dataref = b
db.data = (*[maxMapSize]byte)(unsafe.Pointer(&b[0]))
@@ -78,3 +89,12 @@ func munmap(db *DB) error {
db.datasz = 0
return err
}
// NOTE: This function is copied from stdlib because it is not available on darwin.
func madvise(b []byte, advice int) (err error) {
_, _, e1 := syscall.Syscall(syscall.SYS_MADVISE, uintptr(unsafe.Pointer(&b[0])), uintptr(len(b)), uintptr(advice))
if e1 != 0 {
err = e1
}
return
}

View File

@@ -0,0 +1,101 @@
package bolt
import (
"fmt"
"os"
"syscall"
"time"
"unsafe"
"github.com/coreos/etcd/Godeps/_workspace/src/golang.org/x/sys/unix"
)
// flock acquires an advisory lock on a file descriptor.
func flock(f *os.File, exclusive bool, timeout time.Duration) error {
var t time.Time
for {
// If we're beyond our timeout then return an error.
// This can only occur after we've attempted a flock once.
if t.IsZero() {
t = time.Now()
} else if timeout > 0 && time.Since(t) > timeout {
return ErrTimeout
}
var lock syscall.Flock_t
lock.Start = 0
lock.Len = 0
lock.Pid = 0
lock.Whence = 0
lock.Pid = 0
if exclusive {
lock.Type = syscall.F_WRLCK
} else {
lock.Type = syscall.F_RDLCK
}
err := syscall.FcntlFlock(f.Fd(), syscall.F_SETLK, &lock)
if err == nil {
return nil
} else if err != syscall.EAGAIN {
return err
}
// Wait for a bit and try again.
time.Sleep(50 * time.Millisecond)
}
}
// funlock releases an advisory lock on a file descriptor.
func funlock(f *os.File) error {
var lock syscall.Flock_t
lock.Start = 0
lock.Len = 0
lock.Type = syscall.F_UNLCK
lock.Whence = 0
return syscall.FcntlFlock(uintptr(f.Fd()), syscall.F_SETLK, &lock)
}
// mmap memory maps a DB's data file.
func mmap(db *DB, sz int) error {
// Truncate and fsync to ensure file size metadata is flushed.
// https://github.com/boltdb/bolt/issues/284
if !db.NoGrowSync && !db.readOnly {
if err := db.file.Truncate(int64(sz)); err != nil {
return fmt.Errorf("file resize error: %s", err)
}
if err := db.file.Sync(); err != nil {
return fmt.Errorf("file sync error: %s", err)
}
}
// Map the data file to memory.
b, err := unix.Mmap(int(db.file.Fd()), 0, sz, syscall.PROT_READ, syscall.MAP_SHARED|db.MmapFlags)
if err != nil {
return err
}
// Advise the kernel that the mmap is accessed randomly.
if err := unix.Madvise(b, syscall.MADV_RANDOM); err != nil {
return fmt.Errorf("madvise: %s", err)
}
// Save the original byte slice and convert to a byte array pointer.
db.dataref = b
db.data = (*[maxMapSize]byte)(unsafe.Pointer(&b[0]))
db.datasz = sz
return nil
}
// munmap unmaps a DB's data file from memory.
func munmap(db *DB) error {
// Ignore the unmap if we have no mapped data.
if db.dataref == nil {
return nil
}
// Unmap using the original byte slice.
err := unix.Munmap(db.dataref)
db.dataref = nil
db.data = nil
db.datasz = 0
return err
}

View File

@@ -8,7 +8,37 @@ import (
"unsafe"
)
var odirect int
// LockFileEx code derived from golang build filemutex_windows.go @ v1.5.1
var (
modkernel32 = syscall.NewLazyDLL("kernel32.dll")
procLockFileEx = modkernel32.NewProc("LockFileEx")
procUnlockFileEx = modkernel32.NewProc("UnlockFileEx")
)
const (
// see https://msdn.microsoft.com/en-us/library/windows/desktop/aa365203(v=vs.85).aspx
flagLockExclusive = 2
flagLockFailImmediately = 1
// see https://msdn.microsoft.com/en-us/library/windows/desktop/ms681382(v=vs.85).aspx
errLockViolation syscall.Errno = 0x21
)
func lockFileEx(h syscall.Handle, flags, reserved, locklow, lockhigh uint32, ol *syscall.Overlapped) (err error) {
r, _, err := procLockFileEx.Call(uintptr(h), uintptr(flags), uintptr(reserved), uintptr(locklow), uintptr(lockhigh), uintptr(unsafe.Pointer(ol)))
if r == 0 {
return err
}
return nil
}
func unlockFileEx(h syscall.Handle, reserved, locklow, lockhigh uint32, ol *syscall.Overlapped) (err error) {
r, _, err := procUnlockFileEx.Call(uintptr(h), uintptr(reserved), uintptr(locklow), uintptr(lockhigh), uintptr(unsafe.Pointer(ol)), 0)
if r == 0 {
return err
}
return nil
}
// fdatasync flushes written data to a file descriptor.
func fdatasync(db *DB) error {
@@ -16,21 +46,47 @@ func fdatasync(db *DB) error {
}
// flock acquires an advisory lock on a file descriptor.
func flock(f *os.File, _ time.Duration) error {
return nil
func flock(f *os.File, exclusive bool, timeout time.Duration) error {
var t time.Time
for {
// If we're beyond our timeout then return an error.
// This can only occur after we've attempted a flock once.
if t.IsZero() {
t = time.Now()
} else if timeout > 0 && time.Since(t) > timeout {
return ErrTimeout
}
var flag uint32 = flagLockFailImmediately
if exclusive {
flag |= flagLockExclusive
}
err := lockFileEx(syscall.Handle(f.Fd()), flag, 0, 1, 0, &syscall.Overlapped{})
if err == nil {
return nil
} else if err != errLockViolation {
return err
}
// Wait for a bit and try again.
time.Sleep(50 * time.Millisecond)
}
}
// funlock releases an advisory lock on a file descriptor.
func funlock(f *os.File) error {
return nil
return unlockFileEx(syscall.Handle(f.Fd()), 0, 1, 0, &syscall.Overlapped{})
}
// mmap memory maps a DB's data file.
// Based on: https://github.com/edsrzf/mmap-go
func mmap(db *DB, sz int) error {
// Truncate the database to the size of the mmap.
if err := db.file.Truncate(int64(sz)); err != nil {
return fmt.Errorf("truncate: %s", err)
if !db.readOnly {
// Truncate the database to the size of the mmap.
if err := db.file.Truncate(int64(sz)); err != nil {
return fmt.Errorf("truncate: %s", err)
}
}
// Open a file mapping handle.

View File

@@ -2,8 +2,6 @@
package bolt
var odirect int
// fdatasync flushes written data to a file descriptor.
func fdatasync(db *DB) error {
return db.file.Sync()

View File

@@ -11,7 +11,7 @@ const (
MaxKeySize = 32768
// MaxValueSize is the maximum length of a value, in bytes.
MaxValueSize = 4294967295
MaxValueSize = (1 << 31) - 2
)
const (
@@ -99,6 +99,7 @@ func (b *Bucket) Cursor() *Cursor {
// Bucket retrieves a nested bucket by name.
// Returns nil if the bucket does not exist.
// The bucket instance is only valid for the lifetime of the transaction.
func (b *Bucket) Bucket(name []byte) *Bucket {
if b.buckets != nil {
if child := b.buckets[string(name)]; child != nil {
@@ -148,6 +149,7 @@ func (b *Bucket) openBucket(value []byte) *Bucket {
// CreateBucket creates a new bucket at the given key and returns the new bucket.
// Returns an error if the key already exists, if the bucket name is blank, or if the bucket name is too long.
// The bucket instance is only valid for the lifetime of the transaction.
func (b *Bucket) CreateBucket(key []byte) (*Bucket, error) {
if b.tx.db == nil {
return nil, ErrTxClosed
@@ -192,6 +194,7 @@ func (b *Bucket) CreateBucket(key []byte) (*Bucket, error) {
// CreateBucketIfNotExists creates a new bucket if it doesn't already exist and returns a reference to it.
// Returns an error if the bucket name is blank, or if the bucket name is too long.
// The bucket instance is only valid for the lifetime of the transaction.
func (b *Bucket) CreateBucketIfNotExists(key []byte) (*Bucket, error) {
child, err := b.CreateBucket(key)
if err == ErrBucketExists {
@@ -270,6 +273,7 @@ func (b *Bucket) Get(key []byte) []byte {
// Put sets the value for a key in the bucket.
// If the key exist then its previous value will be overwritten.
// Supplied value must remain valid for the life of the transaction.
// Returns an error if the bucket was created from a read-only transaction, if the key is blank, if the key is too large, or if the value is too large.
func (b *Bucket) Put(key []byte, value []byte) error {
if b.tx.db == nil {
@@ -346,7 +350,8 @@ func (b *Bucket) NextSequence() (uint64, error) {
// ForEach executes a function for each key/value pair in a bucket.
// If the provided function returns an error then the iteration is stopped and
// the error is returned to the caller.
// the error is returned to the caller. The provided function must not modify
// the bucket; this will result in undefined behavior.
func (b *Bucket) ForEach(fn func(k, v []byte) error) error {
if b.tx.db == nil {
return ErrTxClosed

View File

@@ -253,7 +253,7 @@ func TestBucket_Delete_FreelistOverflow(t *testing.T) {
b := tx.Bucket([]byte("0"))
c := b.Cursor()
for k, _ := c.First(); k != nil; k, _ = c.Next() {
b.Delete(k)
c.Delete()
}
return nil
})
@@ -640,6 +640,22 @@ func TestBucket_Put_KeyTooLarge(t *testing.T) {
})
}
// Ensure that an error is returned when inserting a value that's too large.
func TestBucket_Put_ValueTooLarge(t *testing.T) {
if os.Getenv("DRONE") == "true" {
t.Skip("not enough RAM for test")
}
db := NewTestDB()
defer db.Close()
db.Update(func(tx *bolt.Tx) error {
tx.CreateBucket([]byte("widgets"))
err := tx.Bucket([]byte("widgets")).Put([]byte("foo"), make([]byte, bolt.MaxValueSize+1))
equals(t, err, bolt.ErrValueTooLarge)
return nil
})
}
// Ensure a bucket can calculate stats.
func TestBucket_Stats(t *testing.T) {
db := NewTestDB()

View File

@@ -344,7 +344,7 @@ func (cmd *DumpCommand) Run(args ...string) error {
for i, pageID := range pageIDs {
// Print a separator.
if i > 0 {
fmt.Fprintln(cmd.Stdout, "===============================================\n")
fmt.Fprintln(cmd.Stdout, "===============================================")
}
// Print page to stdout.
@@ -465,7 +465,7 @@ func (cmd *PageCommand) Run(args ...string) error {
for i, pageID := range pageIDs {
// Print a separator.
if i > 0 {
fmt.Fprintln(cmd.Stdout, "===============================================\n")
fmt.Fprintln(cmd.Stdout, "===============================================")
}
// Retrieve page info and page size.
@@ -917,7 +917,7 @@ func (cmd *BenchCommand) Run(args ...string) error {
// Write to the database.
var results BenchResults
if err := cmd.runWrites(db, options, &results); err != nil {
return fmt.Errorf("write: ", err)
return fmt.Errorf("write: %v", err)
}
// Read from the database.

View File

@@ -34,6 +34,13 @@ func (c *Cursor) First() (key []byte, value []byte) {
p, n := c.bucket.pageNode(c.bucket.root)
c.stack = append(c.stack, elemRef{page: p, node: n, index: 0})
c.first()
// If we land on an empty page then move to the next value.
// https://github.com/boltdb/bolt/issues/450
if c.stack[len(c.stack)-1].count() == 0 {
c.next()
}
k, v, flags := c.keyValue()
if (flags & uint32(bucketLeafFlag)) != 0 {
return k, nil
@@ -209,28 +216,37 @@ func (c *Cursor) last() {
// next moves to the next leaf element and returns the key and value.
// If the cursor is at the last leaf element then it stays there and returns nil.
func (c *Cursor) next() (key []byte, value []byte, flags uint32) {
// Attempt to move over one element until we're successful.
// Move up the stack as we hit the end of each page in our stack.
var i int
for i = len(c.stack) - 1; i >= 0; i-- {
elem := &c.stack[i]
if elem.index < elem.count()-1 {
elem.index++
break
for {
// Attempt to move over one element until we're successful.
// Move up the stack as we hit the end of each page in our stack.
var i int
for i = len(c.stack) - 1; i >= 0; i-- {
elem := &c.stack[i]
if elem.index < elem.count()-1 {
elem.index++
break
}
}
}
// If we've hit the root page then stop and return. This will leave the
// cursor on the last element of the last page.
if i == -1 {
return nil, nil, 0
}
// If we've hit the root page then stop and return. This will leave the
// cursor on the last element of the last page.
if i == -1 {
return nil, nil, 0
}
// Otherwise start from where we left off in the stack and find the
// first element of the first leaf page.
c.stack = c.stack[:i+1]
c.first()
return c.keyValue()
// Otherwise start from where we left off in the stack and find the
// first element of the first leaf page.
c.stack = c.stack[:i+1]
c.first()
// If this is an empty page then restart and move back up the stack.
// https://github.com/boltdb/bolt/issues/450
if c.stack[len(c.stack)-1].count() == 0 {
continue
}
return c.keyValue()
}
}
// search recursively performs a binary search against a given page/node until it finds a given key.

View File

@@ -303,6 +303,49 @@ func TestCursor_Restart(t *testing.T) {
tx.Rollback()
}
// Ensure that a cursor can skip over empty pages that have been deleted.
func TestCursor_First_EmptyPages(t *testing.T) {
db := NewTestDB()
defer db.Close()
// Create 1000 keys in the "widgets" bucket.
db.Update(func(tx *bolt.Tx) error {
b, err := tx.CreateBucket([]byte("widgets"))
if err != nil {
t.Fatal(err)
}
for i := 0; i < 1000; i++ {
if err := b.Put(u64tob(uint64(i)), []byte{}); err != nil {
t.Fatal(err)
}
}
return nil
})
// Delete half the keys and then try to iterate.
db.Update(func(tx *bolt.Tx) error {
b := tx.Bucket([]byte("widgets"))
for i := 0; i < 600; i++ {
if err := b.Delete(u64tob(uint64(i))); err != nil {
t.Fatal(err)
}
}
c := b.Cursor()
var n int
for k, _ := c.First(); k != nil; k, _ = c.Next() {
n++
}
if n != 400 {
t.Fatalf("unexpected key count: %d", n)
}
return nil
})
}
// Ensure that a Tx can iterate over all elements in a bucket.
func TestCursor_QuickCheck(t *testing.T) {
f := func(items testdata) bool {

View File

@@ -55,6 +55,18 @@ type DB struct {
// THIS IS UNSAFE. PLEASE USE WITH CAUTION.
NoSync bool
// When true, skips the truncate call when growing the database.
// Setting this to true is only safe on non-ext3/ext4 systems.
// Skipping truncation avoids preallocation of hard drive space and
// bypasses a truncate() and fsync() syscall on remapping.
//
// https://github.com/boltdb/bolt/issues/284
NoGrowSync bool
// If you want to read the entire database fast, you can set MmapFlag to
// syscall.MAP_POPULATE on Linux 2.6.23+ for sequential read-ahead.
MmapFlags int
// MaxBatchSize is the maximum size of a batch. Default value is
// copied from DefaultMaxBatchSize in Open.
//
@@ -96,6 +108,10 @@ type DB struct {
ops struct {
writeAt func(b []byte, off int64) (n int, err error)
}
// Read only mode.
// When true, Update() and Begin(true) return ErrDatabaseReadOnly immediately.
readOnly bool
}
// Path returns the path to currently open database file.
@@ -123,24 +139,35 @@ func Open(path string, mode os.FileMode, options *Options) (*DB, error) {
if options == nil {
options = DefaultOptions
}
db.NoGrowSync = options.NoGrowSync
db.MmapFlags = options.MmapFlags
// Set default values for later DB operations.
db.MaxBatchSize = DefaultMaxBatchSize
db.MaxBatchDelay = DefaultMaxBatchDelay
flag := os.O_RDWR
if options.ReadOnly {
flag = os.O_RDONLY
db.readOnly = true
}
// Open data file and separate sync handler for metadata writes.
db.path = path
var err error
if db.file, err = os.OpenFile(db.path, os.O_RDWR|os.O_CREATE, mode); err != nil {
if db.file, err = os.OpenFile(db.path, flag|os.O_CREATE, mode); err != nil {
_ = db.close()
return nil, err
}
// Lock file so that other processes using Bolt cannot use the database
// at the same time. This would cause corruption since the two processes
// would write meta pages and free pages separately.
if err := flock(db.file, options.Timeout); err != nil {
// Lock file so that other processes using Bolt in read-write mode cannot
// use the database at the same time. This would cause corruption since
// the two processes would write meta pages and free pages separately.
// The database file is locked exclusively (only one process can grab the lock)
// if !options.ReadOnly.
// The database file is locked using the shared lock (more than one process may
// hold a lock at the same time) otherwise (options.ReadOnly is set).
if err := flock(db.file, !db.readOnly, options.Timeout); err != nil {
_ = db.close()
return nil, err
}
@@ -247,8 +274,8 @@ func (db *DB) munmap() error {
// of the database. The minimum size is 1MB and doubles until it reaches 1GB.
// Returns an error if the new mmap size is greater than the max allowed.
func (db *DB) mmapSize(size int) (int, error) {
// Double the size from 1MB until 1GB.
for i := uint(20); i <= 30; i++ {
// Double the size from 32KB until 1GB.
for i := uint(15); i <= 30; i++ {
if size <= 1<<i {
return 1 << i, nil
}
@@ -329,8 +356,15 @@ func (db *DB) init() error {
// Close releases all database resources.
// All transactions must be closed before closing the database.
func (db *DB) Close() error {
db.rwlock.Lock()
defer db.rwlock.Unlock()
db.metalock.Lock()
defer db.metalock.Unlock()
db.mmaplock.RLock()
defer db.mmaplock.RUnlock()
return db.close()
}
@@ -350,8 +384,11 @@ func (db *DB) close() error {
// Close file handles.
if db.file != nil {
// Unlock the file.
_ = funlock(db.file)
// No need to unlock read-only file.
if !db.readOnly {
// Unlock the file.
_ = funlock(db.file)
}
// Close the file descriptor.
if err := db.file.Close(); err != nil {
@@ -369,6 +406,11 @@ func (db *DB) close() error {
// will cause the calls to block and be serialized until the current write
// transaction finishes.
//
// Transactions should not be depedent on one another. Opening a read
// transaction and a write transaction in the same goroutine can cause the
// writer to deadlock because the database periodically needs to re-mmap itself
// as it grows and it cannot do that while a read transaction is open.
//
// IMPORTANT: You must close read-only transactions after you are finished or
// else the database will not reclaim old pages.
func (db *DB) Begin(writable bool) (*Tx, error) {
@@ -417,6 +459,11 @@ func (db *DB) beginTx() (*Tx, error) {
}
func (db *DB) beginRWTx() (*Tx, error) {
// If the database was opened with Options.ReadOnly, return an error.
if db.readOnly {
return nil, ErrDatabaseReadOnly
}
// Obtain writer lock. This is released by the transaction when it closes.
// This enforces only one writer transaction at a time.
db.rwlock.Lock()
@@ -547,6 +594,12 @@ func (db *DB) View(fn func(*Tx) error) error {
return nil
}
// Sync executes fdatasync() against the database file handle.
//
// This is not necessary under normal operation, however, if you use NoSync
// then it allows you to force the database file to sync against the disk.
func (db *DB) Sync() error { return fdatasync(db) }
// Stats retrieves ongoing performance stats for the database.
// This is only updated when a transaction closes.
func (db *DB) Stats() Stats {
@@ -607,18 +660,33 @@ func (db *DB) allocate(count int) (*page, error) {
return p, nil
}
func (db *DB) IsReadOnly() bool {
return db.readOnly
}
// Options represents the options that can be set when opening a database.
type Options struct {
// Timeout is the amount of time to wait to obtain a file lock.
// When set to zero it will wait indefinitely. This option is only
// available on Darwin and Linux.
Timeout time.Duration
// Sets the DB.NoGrowSync flag before memory mapping the file.
NoGrowSync bool
// Open database in read-only mode. Uses flock(..., LOCK_SH |LOCK_NB) to
// grab a shared lock (UNIX).
ReadOnly bool
// Sets the DB.MmapFlags flag before memory mapping the file.
MmapFlags int
}
// DefaultOptions represent the options used if nil options are passed into Open().
// No timeout is used which will cause Bolt to wait indefinitely for a lock.
var DefaultOptions = &Options{
Timeout: 0,
Timeout: 0,
NoGrowSync: false,
}
// Stats represents statistics about the database.

View File

@@ -39,8 +39,8 @@ func TestOpen(t *testing.T) {
// Ensure that opening an already open database file will timeout.
func TestOpen_Timeout(t *testing.T) {
if runtime.GOOS == "windows" {
t.Skip("timeout not supported on windows")
if runtime.GOOS == "solaris" {
t.Skip("solaris fcntl locks don't support intra-process locking")
}
path := tempfile()
@@ -63,8 +63,8 @@ func TestOpen_Timeout(t *testing.T) {
// Ensure that opening an already open database file will wait until its closed.
func TestOpen_Wait(t *testing.T) {
if runtime.GOOS == "windows" {
t.Skip("timeout not supported on windows")
if runtime.GOOS == "solaris" {
t.Skip("solaris fcntl locks don't support intra-process locking")
}
path := tempfile()
@@ -224,6 +224,80 @@ func TestDB_Open_FileTooSmall(t *testing.T) {
equals(t, errors.New("file size too small"), err)
}
// Ensure that a database can be opened in read-only mode by multiple processes
// and that a database can not be opened in read-write mode and in read-only
// mode at the same time.
func TestOpen_ReadOnly(t *testing.T) {
if runtime.GOOS == "solaris" {
t.Skip("solaris fcntl locks don't support intra-process locking")
}
bucket, key, value := []byte(`bucket`), []byte(`key`), []byte(`value`)
path := tempfile()
defer os.Remove(path)
// Open in read-write mode.
db, err := bolt.Open(path, 0666, nil)
ok(t, db.Update(func(tx *bolt.Tx) error {
b, err := tx.CreateBucket(bucket)
if err != nil {
return err
}
return b.Put(key, value)
}))
assert(t, db != nil, "")
assert(t, !db.IsReadOnly(), "")
ok(t, err)
ok(t, db.Close())
// Open in read-only mode.
db0, err := bolt.Open(path, 0666, &bolt.Options{ReadOnly: true})
ok(t, err)
defer db0.Close()
// Opening in read-write mode should return an error.
_, err = bolt.Open(path, 0666, &bolt.Options{Timeout: time.Millisecond * 100})
assert(t, err != nil, "")
// And again (in read-only mode).
db1, err := bolt.Open(path, 0666, &bolt.Options{ReadOnly: true})
ok(t, err)
defer db1.Close()
// Verify both read-only databases are accessible.
for _, db := range []*bolt.DB{db0, db1} {
// Verify is is in read only mode indeed.
assert(t, db.IsReadOnly(), "")
// Read-only databases should not allow updates.
assert(t,
bolt.ErrDatabaseReadOnly == db.Update(func(*bolt.Tx) error {
panic(`should never get here`)
}),
"")
// Read-only databases should not allow beginning writable txns.
_, err = db.Begin(true)
assert(t, bolt.ErrDatabaseReadOnly == err, "")
// Verify the data.
ok(t, db.View(func(tx *bolt.Tx) error {
b := tx.Bucket(bucket)
if b == nil {
return fmt.Errorf("expected bucket `%s`", string(bucket))
}
got := string(b.Get(key))
expected := string(value)
if got != expected {
return fmt.Errorf("expected `%s`, got `%s`", expected, got)
}
return nil
}))
}
}
// TODO(benbjohnson): Test corruption at every byte of the first two pages.
// Ensure that a database cannot open a transaction when it's not open.
@@ -254,6 +328,49 @@ func TestDB_BeginRW_Closed(t *testing.T) {
assert(t, tx == nil, "")
}
func TestDB_Close_PendingTx_RW(t *testing.T) { testDB_Close_PendingTx(t, true) }
func TestDB_Close_PendingTx_RO(t *testing.T) { testDB_Close_PendingTx(t, false) }
// Ensure that a database cannot close while transactions are open.
func testDB_Close_PendingTx(t *testing.T, writable bool) {
db := NewTestDB()
defer db.Close()
// Start transaction.
tx, err := db.Begin(true)
if err != nil {
t.Fatal(err)
}
// Open update in separate goroutine.
done := make(chan struct{})
go func() {
db.Close()
close(done)
}()
// Ensure database hasn't closed.
time.Sleep(100 * time.Millisecond)
select {
case <-done:
t.Fatal("database closed too early")
default:
}
// Commit transaction.
if err := tx.Commit(); err != nil {
t.Fatal(err)
}
// Ensure database closed now.
time.Sleep(100 * time.Millisecond)
select {
case <-done:
default:
t.Fatal("database did not close")
}
}
// Ensure a database can provide a transactional block.
func TestDB_Update(t *testing.T) {
db := NewTestDB()
@@ -499,7 +616,7 @@ func TestDB_Consistency(t *testing.T) {
})
}
// Ensure that DB stats can be substracted from one another.
// Ensure that DB stats can be subtracted from one another.
func TestDBStats_Sub(t *testing.T) {
var a, b bolt.Stats
a.TxStats.PageCount = 3
@@ -678,7 +795,7 @@ func (db *TestDB) PrintStats() {
// MustCheck runs a consistency check on the database and panics if any errors are found.
func (db *TestDB) MustCheck() {
db.View(func(tx *bolt.Tx) error {
db.Update(func(tx *bolt.Tx) error {
// Collect all the errors.
var errors []error
for err := range tx.Check() {

View File

@@ -36,6 +36,10 @@ var (
// ErrTxClosed is returned when committing or rolling back a transaction
// that has already been committed or rolled back.
ErrTxClosed = errors.New("tx closed")
// ErrDatabaseReadOnly is returned when a mutating transaction is started on a
// read-only database.
ErrDatabaseReadOnly = errors.New("database is in read-only mode")
)
// These errors can occur when putting or deleting a value or a bucket.

View File

@@ -48,15 +48,14 @@ func (f *freelist) pending_count() int {
// all returns a list of all free ids and all pending ids in one sorted list.
func (f *freelist) all() []pgid {
ids := make([]pgid, len(f.ids))
copy(ids, f.ids)
m := make(pgids, 0)
for _, list := range f.pending {
ids = append(ids, list...)
m = append(m, list...)
}
sort.Sort(pgids(ids))
return ids
sort.Sort(m)
return pgids(f.ids).merge(m)
}
// allocate returns the starting page id of a contiguous list of pages of a given size.
@@ -127,15 +126,17 @@ func (f *freelist) free(txid txid, p *page) {
// release moves all page ids for a transaction id (or older) to the freelist.
func (f *freelist) release(txid txid) {
m := make(pgids, 0)
for tid, ids := range f.pending {
if tid <= txid {
// Move transaction's pending pages to the available freelist.
// Don't remove from the cache since the page is still free.
f.ids = append(f.ids, ids...)
m = append(m, ids...)
delete(f.pending, tid)
}
}
sort.Sort(pgids(f.ids))
sort.Sort(m)
f.ids = pgids(f.ids).merge(m)
}
// rollback removes the pages from a given pending tx.

View File

@@ -1,7 +1,9 @@
package bolt
import (
"math/rand"
"reflect"
"sort"
"testing"
"unsafe"
)
@@ -127,3 +129,28 @@ func TestFreelist_write(t *testing.T) {
t.Fatalf("exp=%v; got=%v", exp, f2.ids)
}
}
func Benchmark_FreelistRelease10K(b *testing.B) { benchmark_FreelistRelease(b, 10000) }
func Benchmark_FreelistRelease100K(b *testing.B) { benchmark_FreelistRelease(b, 100000) }
func Benchmark_FreelistRelease1000K(b *testing.B) { benchmark_FreelistRelease(b, 1000000) }
func Benchmark_FreelistRelease10000K(b *testing.B) { benchmark_FreelistRelease(b, 10000000) }
func benchmark_FreelistRelease(b *testing.B, size int) {
ids := randomPgids(size)
pending := randomPgids(len(ids) / 400)
b.ResetTimer()
for i := 0; i < b.N; i++ {
f := &freelist{ids: ids, pending: map[txid][]pgid{1: pending}}
f.release(1)
}
}
func randomPgids(n int) []pgid {
rand.Seed(42)
pgids := make(pgids, n)
for i := range pgids {
pgids[i] = pgid(rand.Int63())
}
sort.Sort(pgids)
return pgids
}

View File

@@ -221,11 +221,20 @@ func (n *node) write(p *page) {
_assert(elem.pgid != p.id, "write: circular dependency occurred")
}
// If the length of key+value is larger than the max allocation size
// then we need to reallocate the byte array pointer.
//
// See: https://github.com/boltdb/bolt/pull/335
klen, vlen := len(item.key), len(item.value)
if len(b) < klen+vlen {
b = (*[maxAllocSize]byte)(unsafe.Pointer(&b[0]))[:]
}
// Write data for the element to the end of the page.
copy(b[0:], item.key)
b = b[len(item.key):]
b = b[klen:]
copy(b[0:], item.value)
b = b[len(item.value):]
b = b[vlen:]
}
// DEBUG ONLY: n.dump()

View File

@@ -3,6 +3,7 @@ package bolt
import (
"fmt"
"os"
"sort"
"unsafe"
)
@@ -96,7 +97,7 @@ type branchPageElement struct {
// key returns a byte slice of the node key.
func (n *branchPageElement) key() []byte {
buf := (*[maxAllocSize]byte)(unsafe.Pointer(n))
return buf[n.pos : n.pos+n.ksize]
return (*[maxAllocSize]byte)(unsafe.Pointer(&buf[n.pos]))[:n.ksize]
}
// leafPageElement represents a node on a leaf page.
@@ -110,13 +111,13 @@ type leafPageElement struct {
// key returns a byte slice of the node key.
func (n *leafPageElement) key() []byte {
buf := (*[maxAllocSize]byte)(unsafe.Pointer(n))
return buf[n.pos : n.pos+n.ksize]
return (*[maxAllocSize]byte)(unsafe.Pointer(&buf[n.pos]))[:n.ksize]
}
// value returns a byte slice of the node value.
func (n *leafPageElement) value() []byte {
buf := (*[maxAllocSize]byte)(unsafe.Pointer(n))
return buf[n.pos+n.ksize : n.pos+n.ksize+n.vsize]
return (*[maxAllocSize]byte)(unsafe.Pointer(&buf[n.pos+n.ksize]))[:n.vsize]
}
// PageInfo represents human readable information about a page.
@@ -132,3 +133,40 @@ type pgids []pgid
func (s pgids) Len() int { return len(s) }
func (s pgids) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
func (s pgids) Less(i, j int) bool { return s[i] < s[j] }
// merge returns the sorted union of a and b.
func (a pgids) merge(b pgids) pgids {
// Return the opposite slice if one is nil.
if len(a) == 0 {
return b
} else if len(b) == 0 {
return a
}
// Create a list to hold all elements from both lists.
merged := make(pgids, 0, len(a)+len(b))
// Assign lead to the slice with a lower starting value, follow to the higher value.
lead, follow := a, b
if b[0] < a[0] {
lead, follow = b, a
}
// Continue while there are elements in the lead.
for len(lead) > 0 {
// Merge largest prefix of lead that is ahead of follow[0].
n := sort.Search(len(lead), func(i int) bool { return lead[i] > follow[0] })
merged = append(merged, lead[:n]...)
if n >= len(lead) {
break
}
// Swap lead and follow.
lead, follow = follow, lead[n:]
}
// Append what's left in follow.
merged = append(merged, follow...)
return merged
}

View File

@@ -1,7 +1,10 @@
package bolt
import (
"reflect"
"sort"
"testing"
"testing/quick"
)
// Ensure that the page type can be returned in human readable format.
@@ -27,3 +30,43 @@ func TestPage_typ(t *testing.T) {
func TestPage_dump(t *testing.T) {
(&page{id: 256}).hexdump(16)
}
func TestPgids_merge(t *testing.T) {
a := pgids{4, 5, 6, 10, 11, 12, 13, 27}
b := pgids{1, 3, 8, 9, 25, 30}
c := a.merge(b)
if !reflect.DeepEqual(c, pgids{1, 3, 4, 5, 6, 8, 9, 10, 11, 12, 13, 25, 27, 30}) {
t.Errorf("mismatch: %v", c)
}
a = pgids{4, 5, 6, 10, 11, 12, 13, 27, 35, 36}
b = pgids{8, 9, 25, 30}
c = a.merge(b)
if !reflect.DeepEqual(c, pgids{4, 5, 6, 8, 9, 10, 11, 12, 13, 25, 27, 30, 35, 36}) {
t.Errorf("mismatch: %v", c)
}
}
func TestPgids_merge_quick(t *testing.T) {
if err := quick.Check(func(a, b pgids) bool {
// Sort incoming lists.
sort.Sort(a)
sort.Sort(b)
// Merge the two lists together.
got := a.merge(b)
// The expected value should be the two lists combined and sorted.
exp := append(a, b...)
sort.Sort(exp)
if !reflect.DeepEqual(exp, got) {
t.Errorf("\nexp=%+v\ngot=%+v\n", exp, got)
return false
}
return true
}, nil); err != nil {
t.Fatal(err)
}
}

View File

@@ -29,6 +29,14 @@ type Tx struct {
pages map[pgid]*page
stats TxStats
commitHandlers []func()
// WriteFlag specifies the flag for write-related methods like WriteTo().
// Tx opens the database file with the specified flag to copy the data.
//
// By default, the flag is unset, which works well for mostly in-memory
// workloads. For databases that are much larger than available RAM,
// set the flag to syscall.O_DIRECT to avoid trashing the page cache.
WriteFlag int
}
// init initializes the transaction.
@@ -87,18 +95,21 @@ func (tx *Tx) Stats() TxStats {
// Bucket retrieves a bucket by name.
// Returns nil if the bucket does not exist.
// The bucket instance is only valid for the lifetime of the transaction.
func (tx *Tx) Bucket(name []byte) *Bucket {
return tx.root.Bucket(name)
}
// CreateBucket creates a new bucket.
// Returns an error if the bucket already exists, if the bucket name is blank, or if the bucket name is too long.
// The bucket instance is only valid for the lifetime of the transaction.
func (tx *Tx) CreateBucket(name []byte) (*Bucket, error) {
return tx.root.CreateBucket(name)
}
// CreateBucketIfNotExists creates a new bucket if it doesn't already exist.
// Returns an error if the bucket name is blank, or if the bucket name is too long.
// The bucket instance is only valid for the lifetime of the transaction.
func (tx *Tx) CreateBucketIfNotExists(name []byte) (*Bucket, error) {
return tx.root.CreateBucketIfNotExists(name)
}
@@ -127,7 +138,8 @@ func (tx *Tx) OnCommit(fn func()) {
}
// Commit writes all changes to disk and updates the meta page.
// Returns an error if a disk write error occurs.
// Returns an error if a disk write error occurs, or if Commit is
// called on a read-only transaction.
func (tx *Tx) Commit() error {
_assert(!tx.managed, "managed tx commit not allowed")
if tx.db == nil {
@@ -203,7 +215,8 @@ func (tx *Tx) Commit() error {
return nil
}
// Rollback closes the transaction and ignores all previous updates.
// Rollback closes the transaction and ignores all previous updates. Read-only
// transactions must be rolled back and not committed.
func (tx *Tx) Rollback() error {
_assert(!tx.managed, "managed tx rollback not allowed")
if tx.db == nil {
@@ -234,7 +247,8 @@ func (tx *Tx) close() {
var freelistPendingN = tx.db.freelist.pending_count()
var freelistAlloc = tx.db.freelist.size()
// Remove writer lock.
// Remove transaction ref & writer lock.
tx.db.rwtx = nil
tx.db.rwlock.Unlock()
// Merge statistics.
@@ -248,7 +262,12 @@ func (tx *Tx) close() {
} else {
tx.db.removeTx(tx)
}
// Clear all references.
tx.db = nil
tx.meta = nil
tx.root = Bucket{tx: tx}
tx.pages = nil
}
// Copy writes the entire database to a writer.
@@ -261,21 +280,18 @@ func (tx *Tx) Copy(w io.Writer) error {
// WriteTo writes the entire database to a writer.
// If err == nil then exactly tx.Size() bytes will be written into the writer.
func (tx *Tx) WriteTo(w io.Writer) (n int64, err error) {
// Attempt to open reader directly.
var f *os.File
if f, err = os.OpenFile(tx.db.path, os.O_RDONLY|odirect, 0); err != nil {
// Fallback to a regular open if that doesn't work.
if f, err = os.OpenFile(tx.db.path, os.O_RDONLY, 0); err != nil {
return 0, err
}
// Attempt to open reader with WriteFlag
f, err := os.OpenFile(tx.db.path, os.O_RDONLY|tx.WriteFlag, 0)
if err != nil {
return 0, err
}
defer f.Close()
// Copy the meta pages.
tx.db.metalock.Lock()
n, err = io.CopyN(w, f, int64(tx.db.pageSize*2))
tx.db.metalock.Unlock()
if err != nil {
_ = f.Close()
return n, fmt.Errorf("meta copy: %s", err)
}
@@ -283,7 +299,6 @@ func (tx *Tx) WriteTo(w io.Writer) (n int64, err error) {
wn, err := io.CopyN(w, f, tx.Size()-int64(tx.db.pageSize*2))
n += wn
if err != nil {
_ = f.Close()
return n, err
}
@@ -421,15 +436,39 @@ func (tx *Tx) write() error {
// Write pages to disk in order.
for _, p := range pages {
size := (int(p.overflow) + 1) * tx.db.pageSize
buf := (*[maxAllocSize]byte)(unsafe.Pointer(p))[:size]
offset := int64(p.id) * int64(tx.db.pageSize)
if _, err := tx.db.ops.writeAt(buf, offset); err != nil {
return err
}
// Update statistics.
tx.stats.Write++
// Write out page in "max allocation" sized chunks.
ptr := (*[maxAllocSize]byte)(unsafe.Pointer(p))
for {
// Limit our write to our max allocation size.
sz := size
if sz > maxAllocSize-1 {
sz = maxAllocSize - 1
}
// Write chunk to disk.
buf := ptr[:sz]
if _, err := tx.db.ops.writeAt(buf, offset); err != nil {
return err
}
// Update statistics.
tx.stats.Write++
// Exit inner for loop if we've written all the chunks.
size -= sz
if size == 0 {
break
}
// Otherwise move offset forward and move pointer to next chunk.
offset += int64(sz)
ptr = (*[maxAllocSize]byte)(unsafe.Pointer(&ptr[sz]))
}
}
// Ignore file sync if flag is set on DB.
if !tx.db.NoSync || IgnoreNoSync {
if err := fdatasync(tx.db); err != nil {
return err

View File

@@ -252,6 +252,38 @@ func TestTx_DeleteBucket_NotFound(t *testing.T) {
})
}
// Ensure that no error is returned when a tx.ForEach function does not return
// an error.
func TestTx_ForEach_NoError(t *testing.T) {
db := NewTestDB()
defer db.Close()
db.Update(func(tx *bolt.Tx) error {
tx.CreateBucket([]byte("widgets"))
tx.Bucket([]byte("widgets")).Put([]byte("foo"), []byte("bar"))
equals(t, nil, tx.ForEach(func(name []byte, b *bolt.Bucket) error {
return nil
}))
return nil
})
}
// Ensure that an error is returned when a tx.ForEach function returns an error.
func TestTx_ForEach_WithError(t *testing.T) {
db := NewTestDB()
defer db.Close()
db.Update(func(tx *bolt.Tx) error {
tx.CreateBucket([]byte("widgets"))
tx.Bucket([]byte("widgets")).Put([]byte("foo"), []byte("bar"))
err := errors.New("foo")
equals(t, err, tx.ForEach(func(name []byte, b *bolt.Bucket) error {
return err
}))
return nil
})
}
// Ensure that Tx commit handlers are called after a transaction successfully commits.
func TestTx_OnCommit(t *testing.T) {
var x int

View File

@@ -1 +0,0 @@
*~

View File

@@ -1,19 +0,0 @@
# This file is like Go's AUTHORS file: it lists Copyright holders.
# The list of humans who have contributd is in the CONTRIBUTORS file.
#
# To contribute to this project, because it will eventually be folded
# back in to Go itself, you need to submit a CLA:
#
# http://golang.org/doc/contribute.html#copyright
#
# Then you get added to CONTRIBUTORS and you or your company get added
# to the AUTHORS file.
Blake Mizerany <blake.mizerany@gmail.com> github=bmizerany
Daniel Morsing <daniel.morsing@gmail.com> github=DanielMorsing
Gabriel Aszalos <gabriel.aszalos@gmail.com> github=gbbr
Google, Inc.
Keith Rarick <kr@xph.us> github=kr
Matthew Keenan <tank.en.mate@gmail.com> <github@mattkeenan.net> github=mattkeenan
Matt Layher <mdlayher@gmail.com> github=mdlayher
Tatsuhiro Tsujikawa <tatsuhiro.t@gmail.com> github=tatsuhiro-t

View File

@@ -1,19 +0,0 @@
# This file is like Go's CONTRIBUTORS file: it lists humans.
# The list of copyright holders (which may be companies) are in the AUTHORS file.
#
# To contribute to this project, because it will eventually be folded
# back in to Go itself, you need to submit a CLA:
#
# http://golang.org/doc/contribute.html#copyright
#
# Then you get added to CONTRIBUTORS and you or your company get added
# to the AUTHORS file.
Blake Mizerany <blake.mizerany@gmail.com> github=bmizerany
Brad Fitzpatrick <bradfitz@golang.org> github=bradfitz
Daniel Morsing <daniel.morsing@gmail.com> github=DanielMorsing
Gabriel Aszalos <gabriel.aszalos@gmail.com> github=gbbr
Keith Rarick <kr@xph.us> github=kr
Matthew Keenan <tank.en.mate@gmail.com> <github@mattkeenan.net> github=mattkeenan
Matt Layher <mdlayher@gmail.com> github=mdlayher
Tatsuhiro Tsujikawa <tatsuhiro.t@gmail.com> github=tatsuhiro-t

View File

@@ -1,5 +0,0 @@
We only accept contributions from users who have gone through Go's
contribution process (signed a CLA).
Please acknowledge whether you have (and use the same email) if
sending a pull request.

View File

@@ -1,7 +0,0 @@
Copyright 2014 Google & the Go AUTHORS
Go AUTHORS are:
See https://code.google.com/p/go/source/browse/AUTHORS
Licensed under the terms of Go itself:
https://code.google.com/p/go/source/browse/LICENSE

View File

@@ -1,75 +0,0 @@
// Copyright 2014 The Go Authors.
// See https://code.google.com/p/go/source/browse/CONTRIBUTORS
// Licensed under the same terms as Go itself:
// https://code.google.com/p/go/source/browse/LICENSE
package http2
import (
"errors"
)
// buffer is an io.ReadWriteCloser backed by a fixed size buffer.
// It never allocates, but moves old data as new data is written.
type buffer struct {
buf []byte
r, w int
closed bool
err error // err to return to reader
}
var (
errReadEmpty = errors.New("read from empty buffer")
errWriteFull = errors.New("write on full buffer")
)
// Read copies bytes from the buffer into p.
// It is an error to read when no data is available.
func (b *buffer) Read(p []byte) (n int, err error) {
n = copy(p, b.buf[b.r:b.w])
b.r += n
if b.closed && b.r == b.w {
err = b.err
} else if b.r == b.w && n == 0 {
err = errReadEmpty
}
return n, err
}
// Len returns the number of bytes of the unread portion of the buffer.
func (b *buffer) Len() int {
return b.w - b.r
}
// Write copies bytes from p into the buffer.
// It is an error to write more data than the buffer can hold.
func (b *buffer) Write(p []byte) (n int, err error) {
if b.closed {
return 0, errors.New("closed")
}
// Slide existing data to beginning.
if b.r > 0 && len(p) > len(b.buf)-b.w {
copy(b.buf, b.buf[b.r:b.w])
b.w -= b.r
b.r = 0
}
// Write new data.
n = copy(b.buf[b.w:], p)
b.w += n
if n < len(p) {
err = errWriteFull
}
return n, err
}
// Close marks the buffer as closed. Future calls to Write will
// return an error. Future calls to Read, once the buffer is
// empty, will return err.
func (b *buffer) Close(err error) {
if !b.closed {
b.closed = true
b.err = err
}
}

View File

@@ -1,73 +0,0 @@
// Copyright 2014 The Go Authors.
// See https://code.google.com/p/go/source/browse/CONTRIBUTORS
// Licensed under the same terms as Go itself:
// https://code.google.com/p/go/source/browse/LICENSE
package http2
import (
"io"
"reflect"
"testing"
)
var bufferReadTests = []struct {
buf buffer
read, wn int
werr error
wp []byte
wbuf buffer
}{
{
buffer{[]byte{'a', 0}, 0, 1, false, nil},
5, 1, nil, []byte{'a'},
buffer{[]byte{'a', 0}, 1, 1, false, nil},
},
{
buffer{[]byte{'a', 0}, 0, 1, true, io.EOF},
5, 1, io.EOF, []byte{'a'},
buffer{[]byte{'a', 0}, 1, 1, true, io.EOF},
},
{
buffer{[]byte{0, 'a'}, 1, 2, false, nil},
5, 1, nil, []byte{'a'},
buffer{[]byte{0, 'a'}, 2, 2, false, nil},
},
{
buffer{[]byte{0, 'a'}, 1, 2, true, io.EOF},
5, 1, io.EOF, []byte{'a'},
buffer{[]byte{0, 'a'}, 2, 2, true, io.EOF},
},
{
buffer{[]byte{}, 0, 0, false, nil},
5, 0, errReadEmpty, []byte{},
buffer{[]byte{}, 0, 0, false, nil},
},
{
buffer{[]byte{}, 0, 0, true, io.EOF},
5, 0, io.EOF, []byte{},
buffer{[]byte{}, 0, 0, true, io.EOF},
},
}
func TestBufferRead(t *testing.T) {
for i, tt := range bufferReadTests {
read := make([]byte, tt.read)
n, err := tt.buf.Read(read)
if n != tt.wn {
t.Errorf("#%d: wn = %d want %d", i, n, tt.wn)
continue
}
if err != tt.werr {
t.Errorf("#%d: werr = %v want %v", i, err, tt.werr)
continue
}
read = read[:n]
if !reflect.DeepEqual(read, tt.wp) {
t.Errorf("#%d: read = %+v want %+v", i, read, tt.wp)
}
if !reflect.DeepEqual(tt.buf, tt.wbuf) {
t.Errorf("#%d: buf = %+v want %+v", i, tt.buf, tt.wbuf)
}
}
}

View File

@@ -1,5 +0,0 @@
h2demo.linux: h2demo.go
GOOS=linux go build --tags=h2demo -o h2demo.linux .
upload: h2demo.linux
cat h2demo.linux | go run launch.go --write_object=http2-demo-server-tls/h2demo --write_object_is_public

View File

@@ -1,43 +0,0 @@
// Copyright 2014 The Go Authors.
// See https://code.google.com/p/go/source/browse/CONTRIBUTORS
// Licensed under the same terms as Go itself:
// https://code.google.com/p/go/source/browse/LICENSE
package http2
import (
"sync"
)
type pipe struct {
b buffer
c sync.Cond
m sync.Mutex
}
// Read waits until data is available and copies bytes
// from the buffer into p.
func (r *pipe) Read(p []byte) (n int, err error) {
r.c.L.Lock()
defer r.c.L.Unlock()
for r.b.Len() == 0 && !r.b.closed {
r.c.Wait()
}
return r.b.Read(p)
}
// Write copies bytes from p into the buffer and wakes a reader.
// It is an error to write more data than the buffer can hold.
func (w *pipe) Write(p []byte) (n int, err error) {
w.c.L.Lock()
defer w.c.L.Unlock()
defer w.c.Signal()
return w.b.Write(p)
}
func (c *pipe) Close(err error) {
c.c.L.Lock()
defer c.c.L.Unlock()
defer c.c.Signal()
c.b.Close(err)
}

View File

@@ -1,24 +0,0 @@
// Copyright 2014 The Go Authors.
// See https://code.google.com/p/go/source/browse/CONTRIBUTORS
// Licensed under the same terms as Go itself:
// https://code.google.com/p/go/source/browse/LICENSE
package http2
import (
"errors"
"testing"
)
func TestPipeClose(t *testing.T) {
var p pipe
p.c.L = &p.m
a := errors.New("a")
b := errors.New("b")
p.Close(a)
p.Close(b)
_, err := p.Read(make([]byte, 1))
if err != a {
t.Errorf("err = %v want %v", err, a)
}
}

View File

@@ -1,553 +0,0 @@
// Copyright 2015 The Go Authors.
// See https://go.googlesource.com/go/+/master/CONTRIBUTORS
// Licensed under the same terms as Go itself:
// https://go.googlesource.com/go/+/master/LICENSE
package http2
import (
"bufio"
"bytes"
"crypto/tls"
"errors"
"fmt"
"io"
"log"
"net"
"net/http"
"strconv"
"strings"
"sync"
"github.com/coreos/etcd/Godeps/_workspace/src/github.com/bradfitz/http2/hpack"
)
type Transport struct {
Fallback http.RoundTripper
// TODO: remove this and make more general with a TLS dial hook, like http
InsecureTLSDial bool
connMu sync.Mutex
conns map[string][]*clientConn // key is host:port
}
type clientConn struct {
t *Transport
tconn *tls.Conn
tlsState *tls.ConnectionState
connKey []string // key(s) this connection is cached in, in t.conns
readerDone chan struct{} // closed on error
readerErr error // set before readerDone is closed
hdec *hpack.Decoder
nextRes *http.Response
mu sync.Mutex
closed bool
goAway *GoAwayFrame // if non-nil, the GoAwayFrame we received
streams map[uint32]*clientStream
nextStreamID uint32
bw *bufio.Writer
werr error // first write error that has occurred
br *bufio.Reader
fr *Framer
// Settings from peer:
maxFrameSize uint32
maxConcurrentStreams uint32
initialWindowSize uint32
hbuf bytes.Buffer // HPACK encoder writes into this
henc *hpack.Encoder
}
type clientStream struct {
ID uint32
resc chan resAndError
pw *io.PipeWriter
pr *io.PipeReader
}
type stickyErrWriter struct {
w io.Writer
err *error
}
func (sew stickyErrWriter) Write(p []byte) (n int, err error) {
if *sew.err != nil {
return 0, *sew.err
}
n, err = sew.w.Write(p)
*sew.err = err
return
}
func (t *Transport) RoundTrip(req *http.Request) (*http.Response, error) {
if req.URL.Scheme != "https" {
if t.Fallback == nil {
return nil, errors.New("http2: unsupported scheme and no Fallback")
}
return t.Fallback.RoundTrip(req)
}
host, port, err := net.SplitHostPort(req.URL.Host)
if err != nil {
host = req.URL.Host
port = "443"
}
for {
cc, err := t.getClientConn(host, port)
if err != nil {
return nil, err
}
res, err := cc.roundTrip(req)
if shouldRetryRequest(err) { // TODO: or clientconn is overloaded (too many outstanding requests)?
continue
}
if err != nil {
return nil, err
}
return res, nil
}
}
// CloseIdleConnections closes any connections which were previously
// connected from previous requests but are now sitting idle.
// It does not interrupt any connections currently in use.
func (t *Transport) CloseIdleConnections() {
t.connMu.Lock()
defer t.connMu.Unlock()
for _, vv := range t.conns {
for _, cc := range vv {
cc.closeIfIdle()
}
}
}
var errClientConnClosed = errors.New("http2: client conn is closed")
func shouldRetryRequest(err error) bool {
// TODO: or GOAWAY graceful shutdown stuff
return err == errClientConnClosed
}
func (t *Transport) removeClientConn(cc *clientConn) {
t.connMu.Lock()
defer t.connMu.Unlock()
for _, key := range cc.connKey {
vv, ok := t.conns[key]
if !ok {
continue
}
newList := filterOutClientConn(vv, cc)
if len(newList) > 0 {
t.conns[key] = newList
} else {
delete(t.conns, key)
}
}
}
func filterOutClientConn(in []*clientConn, exclude *clientConn) []*clientConn {
out := in[:0]
for _, v := range in {
if v != exclude {
out = append(out, v)
}
}
return out
}
func (t *Transport) getClientConn(host, port string) (*clientConn, error) {
t.connMu.Lock()
defer t.connMu.Unlock()
key := net.JoinHostPort(host, port)
for _, cc := range t.conns[key] {
if cc.canTakeNewRequest() {
return cc, nil
}
}
if t.conns == nil {
t.conns = make(map[string][]*clientConn)
}
cc, err := t.newClientConn(host, port, key)
if err != nil {
return nil, err
}
t.conns[key] = append(t.conns[key], cc)
return cc, nil
}
func (t *Transport) newClientConn(host, port, key string) (*clientConn, error) {
cfg := &tls.Config{
ServerName: host,
NextProtos: []string{NextProtoTLS},
InsecureSkipVerify: t.InsecureTLSDial,
}
tconn, err := tls.Dial("tcp", host+":"+port, cfg)
if err != nil {
return nil, err
}
if err := tconn.Handshake(); err != nil {
return nil, err
}
if !t.InsecureTLSDial {
if err := tconn.VerifyHostname(cfg.ServerName); err != nil {
return nil, err
}
}
state := tconn.ConnectionState()
if p := state.NegotiatedProtocol; p != NextProtoTLS {
// TODO(bradfitz): fall back to Fallback
return nil, fmt.Errorf("bad protocol: %v", p)
}
if !state.NegotiatedProtocolIsMutual {
return nil, errors.New("could not negotiate protocol mutually")
}
if _, err := tconn.Write(clientPreface); err != nil {
return nil, err
}
cc := &clientConn{
t: t,
tconn: tconn,
connKey: []string{key}, // TODO: cert's validated hostnames too
tlsState: &state,
readerDone: make(chan struct{}),
nextStreamID: 1,
maxFrameSize: 16 << 10, // spec default
initialWindowSize: 65535, // spec default
maxConcurrentStreams: 1000, // "infinite", per spec. 1000 seems good enough.
streams: make(map[uint32]*clientStream),
}
cc.bw = bufio.NewWriter(stickyErrWriter{tconn, &cc.werr})
cc.br = bufio.NewReader(tconn)
cc.fr = NewFramer(cc.bw, cc.br)
cc.henc = hpack.NewEncoder(&cc.hbuf)
cc.fr.WriteSettings()
// TODO: re-send more conn-level flow control tokens when server uses all these.
cc.fr.WriteWindowUpdate(0, 1<<30) // um, 0x7fffffff doesn't work to Google? it hangs?
cc.bw.Flush()
if cc.werr != nil {
return nil, cc.werr
}
// Read the obligatory SETTINGS frame
f, err := cc.fr.ReadFrame()
if err != nil {
return nil, err
}
sf, ok := f.(*SettingsFrame)
if !ok {
return nil, fmt.Errorf("expected settings frame, got: %T", f)
}
cc.fr.WriteSettingsAck()
cc.bw.Flush()
sf.ForeachSetting(func(s Setting) error {
switch s.ID {
case SettingMaxFrameSize:
cc.maxFrameSize = s.Val
case SettingMaxConcurrentStreams:
cc.maxConcurrentStreams = s.Val
case SettingInitialWindowSize:
cc.initialWindowSize = s.Val
default:
// TODO(bradfitz): handle more
log.Printf("Unhandled Setting: %v", s)
}
return nil
})
// TODO: figure out henc size
cc.hdec = hpack.NewDecoder(initialHeaderTableSize, cc.onNewHeaderField)
go cc.readLoop()
return cc, nil
}
func (cc *clientConn) setGoAway(f *GoAwayFrame) {
cc.mu.Lock()
defer cc.mu.Unlock()
cc.goAway = f
}
func (cc *clientConn) canTakeNewRequest() bool {
cc.mu.Lock()
defer cc.mu.Unlock()
return cc.goAway == nil &&
int64(len(cc.streams)+1) < int64(cc.maxConcurrentStreams) &&
cc.nextStreamID < 2147483647
}
func (cc *clientConn) closeIfIdle() {
cc.mu.Lock()
if len(cc.streams) > 0 {
cc.mu.Unlock()
return
}
cc.closed = true
// TODO: do clients send GOAWAY too? maybe? Just Close:
cc.mu.Unlock()
cc.tconn.Close()
}
func (cc *clientConn) roundTrip(req *http.Request) (*http.Response, error) {
cc.mu.Lock()
if cc.closed {
cc.mu.Unlock()
return nil, errClientConnClosed
}
cs := cc.newStream()
hasBody := false // TODO
// we send: HEADERS[+CONTINUATION] + (DATA?)
hdrs := cc.encodeHeaders(req)
first := true
for len(hdrs) > 0 {
chunk := hdrs
if len(chunk) > int(cc.maxFrameSize) {
chunk = chunk[:cc.maxFrameSize]
}
hdrs = hdrs[len(chunk):]
endHeaders := len(hdrs) == 0
if first {
cc.fr.WriteHeaders(HeadersFrameParam{
StreamID: cs.ID,
BlockFragment: chunk,
EndStream: !hasBody,
EndHeaders: endHeaders,
})
first = false
} else {
cc.fr.WriteContinuation(cs.ID, endHeaders, chunk)
}
}
cc.bw.Flush()
werr := cc.werr
cc.mu.Unlock()
if hasBody {
// TODO: write data. and it should probably be interleaved:
// go ... io.Copy(dataFrameWriter{cc, cs, ...}, req.Body) ... etc
}
if werr != nil {
return nil, werr
}
re := <-cs.resc
if re.err != nil {
return nil, re.err
}
res := re.res
res.Request = req
res.TLS = cc.tlsState
return res, nil
}
// requires cc.mu be held.
func (cc *clientConn) encodeHeaders(req *http.Request) []byte {
cc.hbuf.Reset()
// TODO(bradfitz): figure out :authority-vs-Host stuff between http2 and Go
host := req.Host
if host == "" {
host = req.URL.Host
}
path := req.URL.Path
if path == "" {
path = "/"
}
cc.writeHeader(":authority", host) // probably not right for all sites
cc.writeHeader(":method", req.Method)
cc.writeHeader(":path", path)
cc.writeHeader(":scheme", "https")
for k, vv := range req.Header {
lowKey := strings.ToLower(k)
if lowKey == "host" {
continue
}
for _, v := range vv {
cc.writeHeader(lowKey, v)
}
}
return cc.hbuf.Bytes()
}
func (cc *clientConn) writeHeader(name, value string) {
log.Printf("sending %q = %q", name, value)
cc.henc.WriteField(hpack.HeaderField{Name: name, Value: value})
}
type resAndError struct {
res *http.Response
err error
}
// requires cc.mu be held.
func (cc *clientConn) newStream() *clientStream {
cs := &clientStream{
ID: cc.nextStreamID,
resc: make(chan resAndError, 1),
}
cc.nextStreamID += 2
cc.streams[cs.ID] = cs
return cs
}
func (cc *clientConn) streamByID(id uint32, andRemove bool) *clientStream {
cc.mu.Lock()
defer cc.mu.Unlock()
cs := cc.streams[id]
if andRemove {
delete(cc.streams, id)
}
return cs
}
// runs in its own goroutine.
func (cc *clientConn) readLoop() {
defer cc.t.removeClientConn(cc)
defer close(cc.readerDone)
activeRes := map[uint32]*clientStream{} // keyed by streamID
// Close any response bodies if the server closes prematurely.
// TODO: also do this if we've written the headers but not
// gotten a response yet.
defer func() {
err := cc.readerErr
if err == io.EOF {
err = io.ErrUnexpectedEOF
}
for _, cs := range activeRes {
cs.pw.CloseWithError(err)
}
}()
// continueStreamID is the stream ID we're waiting for
// continuation frames for.
var continueStreamID uint32
for {
f, err := cc.fr.ReadFrame()
if err != nil {
cc.readerErr = err
return
}
log.Printf("Transport received %v: %#v", f.Header(), f)
streamID := f.Header().StreamID
_, isContinue := f.(*ContinuationFrame)
if isContinue {
if streamID != continueStreamID {
log.Printf("Protocol violation: got CONTINUATION with id %d; want %d", streamID, continueStreamID)
cc.readerErr = ConnectionError(ErrCodeProtocol)
return
}
} else if continueStreamID != 0 {
// Continue frames need to be adjacent in the stream
// and we were in the middle of headers.
log.Printf("Protocol violation: got %T for stream %d, want CONTINUATION for %d", f, streamID, continueStreamID)
cc.readerErr = ConnectionError(ErrCodeProtocol)
return
}
if streamID%2 == 0 {
// Ignore streams pushed from the server for now.
// These always have an even stream id.
continue
}
streamEnded := false
if ff, ok := f.(streamEnder); ok {
streamEnded = ff.StreamEnded()
}
cs := cc.streamByID(streamID, streamEnded)
if cs == nil {
log.Printf("Received frame for untracked stream ID %d", streamID)
continue
}
switch f := f.(type) {
case *HeadersFrame:
cc.nextRes = &http.Response{
Proto: "HTTP/2.0",
ProtoMajor: 2,
Header: make(http.Header),
}
cs.pr, cs.pw = io.Pipe()
cc.hdec.Write(f.HeaderBlockFragment())
case *ContinuationFrame:
cc.hdec.Write(f.HeaderBlockFragment())
case *DataFrame:
log.Printf("DATA: %q", f.Data())
cs.pw.Write(f.Data())
case *GoAwayFrame:
cc.t.removeClientConn(cc)
if f.ErrCode != 0 {
// TODO: deal with GOAWAY more. particularly the error code
log.Printf("transport got GOAWAY with error code = %v", f.ErrCode)
}
cc.setGoAway(f)
default:
log.Printf("Transport: unhandled response frame type %T", f)
}
headersEnded := false
if he, ok := f.(headersEnder); ok {
headersEnded = he.HeadersEnded()
if headersEnded {
continueStreamID = 0
} else {
continueStreamID = streamID
}
}
if streamEnded {
cs.pw.Close()
delete(activeRes, streamID)
}
if headersEnded {
if cs == nil {
panic("couldn't find stream") // TODO be graceful
}
// TODO: set the Body to one which notes the
// Close and also sends the server a
// RST_STREAM
cc.nextRes.Body = cs.pr
res := cc.nextRes
activeRes[streamID] = cs
cs.resc <- resAndError{res: res}
}
}
}
func (cc *clientConn) onNewHeaderField(f hpack.HeaderField) {
// TODO: verifiy pseudo headers come before non-pseudo headers
// TODO: verifiy the status is set
log.Printf("Header field: %+v", f)
if f.Name == ":status" {
code, err := strconv.Atoi(f.Value)
if err != nil {
panic("TODO: be graceful")
}
cc.nextRes.Status = f.Value + " " + http.StatusText(code)
cc.nextRes.StatusCode = code
return
}
if strings.HasPrefix(f.Name, ":") {
// "Endpoints MUST NOT generate pseudo-header fields other than those defined in this document."
// TODO: treat as invalid?
return
}
cc.nextRes.Header.Add(http.CanonicalHeaderKey(f.Name), f.Value)
}

View File

@@ -1,168 +0,0 @@
// Copyright 2015 The Go Authors.
// See https://go.googlesource.com/go/+/master/CONTRIBUTORS
// Licensed under the same terms as Go itself:
// https://go.googlesource.com/go/+/master/LICENSE
package http2
import (
"flag"
"io"
"io/ioutil"
"net/http"
"os"
"reflect"
"strings"
"testing"
"time"
)
var (
extNet = flag.Bool("extnet", false, "do external network tests")
transportHost = flag.String("transporthost", "http2.golang.org", "hostname to use for TestTransport")
insecure = flag.Bool("insecure", false, "insecure TLS dials")
)
func TestTransportExternal(t *testing.T) {
if !*extNet {
t.Skip("skipping external network test")
}
req, _ := http.NewRequest("GET", "https://"+*transportHost+"/", nil)
rt := &Transport{
InsecureTLSDial: *insecure,
}
res, err := rt.RoundTrip(req)
if err != nil {
t.Fatalf("%v", err)
}
res.Write(os.Stdout)
}
func TestTransport(t *testing.T) {
const body = "sup"
st := newServerTester(t, func(w http.ResponseWriter, r *http.Request) {
io.WriteString(w, body)
})
defer st.Close()
tr := &Transport{InsecureTLSDial: true}
defer tr.CloseIdleConnections()
req, err := http.NewRequest("GET", st.ts.URL, nil)
if err != nil {
t.Fatal(err)
}
res, err := tr.RoundTrip(req)
if err != nil {
t.Fatal(err)
}
defer res.Body.Close()
t.Logf("Got res: %+v", res)
if g, w := res.StatusCode, 200; g != w {
t.Errorf("StatusCode = %v; want %v", g, w)
}
if g, w := res.Status, "200 OK"; g != w {
t.Errorf("Status = %q; want %q", g, w)
}
wantHeader := http.Header{
"Content-Length": []string{"3"},
"Content-Type": []string{"text/plain; charset=utf-8"},
}
if !reflect.DeepEqual(res.Header, wantHeader) {
t.Errorf("res Header = %v; want %v", res.Header, wantHeader)
}
if res.Request != req {
t.Errorf("Response.Request = %p; want %p", res.Request, req)
}
if res.TLS == nil {
t.Errorf("Response.TLS = nil; want non-nil", res.TLS)
}
slurp, err := ioutil.ReadAll(res.Body)
if err != nil {
t.Error("Body read: %v", err)
} else if string(slurp) != body {
t.Errorf("Body = %q; want %q", slurp, body)
}
}
func TestTransportReusesConns(t *testing.T) {
st := newServerTester(t, func(w http.ResponseWriter, r *http.Request) {
io.WriteString(w, r.RemoteAddr)
}, optOnlyServer)
defer st.Close()
tr := &Transport{InsecureTLSDial: true}
defer tr.CloseIdleConnections()
get := func() string {
req, err := http.NewRequest("GET", st.ts.URL, nil)
if err != nil {
t.Fatal(err)
}
res, err := tr.RoundTrip(req)
if err != nil {
t.Fatal(err)
}
defer res.Body.Close()
slurp, err := ioutil.ReadAll(res.Body)
if err != nil {
t.Fatalf("Body read: %v", err)
}
addr := strings.TrimSpace(string(slurp))
if addr == "" {
t.Fatalf("didn't get an addr in response")
}
return addr
}
first := get()
second := get()
if first != second {
t.Errorf("first and second responses were on different connections: %q vs %q", first, second)
}
}
func TestTransportAbortClosesPipes(t *testing.T) {
shutdown := make(chan struct{})
st := newServerTester(t,
func(w http.ResponseWriter, r *http.Request) {
w.(http.Flusher).Flush()
<-shutdown
},
optOnlyServer,
)
defer st.Close()
defer close(shutdown) // we must shutdown before st.Close() to avoid hanging
done := make(chan struct{})
requestMade := make(chan struct{})
go func() {
defer close(done)
tr := &Transport{
InsecureTLSDial: true,
}
req, err := http.NewRequest("GET", st.ts.URL, nil)
if err != nil {
t.Fatal(err)
}
res, err := tr.RoundTrip(req)
if err != nil {
t.Fatal(err)
}
defer res.Body.Close()
close(requestMade)
_, err = ioutil.ReadAll(res.Body)
if err == nil {
t.Error("expected error from res.Body.Read")
}
}()
<-requestMade
// Now force the serve loop to end, via closing the connection.
st.closeConn()
// deadlock? that's a bug.
select {
case <-done:
case <-time.After(3 * time.Second):
t.Fatal("timeout")
}
}

View File

@@ -0,0 +1,4 @@
language: go
go:
- 1.4.2
sudo: false

12
Godeps/_workspace/src/github.com/cheggaaa/pb/LICENSE generated vendored Normal file
View File

@@ -0,0 +1,12 @@
Copyright (c) 2012, Sergey Cherepanov
All rights reserved.
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
* Neither the name of the author nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

98
Godeps/_workspace/src/github.com/cheggaaa/pb/README.md generated vendored Normal file
View File

@@ -0,0 +1,98 @@
## Terminal progress bar for Go
Simple progress bar for console programms.
### Installation
```
go get github.com/cheggaaa/pb
```
### Usage
```Go
package main
import (
"github.com/cheggaaa/pb"
"time"
)
func main() {
count := 100000
bar := pb.StartNew(count)
for i := 0; i < count; i++ {
bar.Increment()
time.Sleep(time.Millisecond)
}
bar.FinishPrint("The End!")
}
```
Result will be like this:
```
> go run test.go
37158 / 100000 [================>_______________________________] 37.16% 1m11s
```
More functions?
```Go
// create bar
bar := pb.New(count)
// refresh info every second (default 200ms)
bar.SetRefreshRate(time.Second)
// show percents (by default already true)
bar.ShowPercent = true
// show bar (by default already true)
bar.ShowBar = true
// no need counters
bar.ShowCounters = false
// show "time left"
bar.ShowTimeLeft = true
// show average speed
bar.ShowSpeed = true
// sets the width of the progress bar
bar.SetWidth(80)
// sets the width of the progress bar, but if terminal size smaller will be ignored
bar.SetMaxWidth(80)
// convert output to readable format (like KB, MB)
bar.SetUnits(pb.U_BYTES)
// and start
bar.Start()
```
Want handle progress of io operations?
```Go
// create and start bar
bar := pb.New(myDataLen).SetUnits(pb.U_BYTES)
bar.Start()
// my io.Reader
r := myReader
// my io.Writer
w := myWriter
// create multi writer
writer := io.MultiWriter(w, bar)
// and copy
io.Copy(writer, r)
// show example/copy/copy.go for advanced example
```
Not like the looks?
```Go
bar.Format("<.- >")
```

View File

@@ -0,0 +1,81 @@
package main
import (
"fmt"
"github.com/coreos/etcd/Godeps/_workspace/src/github.com/cheggaaa/pb"
"io"
"net/http"
"os"
"strconv"
"strings"
"time"
)
func main() {
// check args
if len(os.Args) < 3 {
printUsage()
return
}
sourceName, destName := os.Args[1], os.Args[2]
// check source
var source io.Reader
var sourceSize int64
if strings.HasPrefix(sourceName, "http://") {
// open as url
resp, err := http.Get(sourceName)
if err != nil {
fmt.Printf("Can't get %s: %v\n", sourceName, err)
return
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
fmt.Printf("Server return non-200 status: %v\n", resp.Status)
return
}
i, _ := strconv.Atoi(resp.Header.Get("Content-Length"))
sourceSize = int64(i)
source = resp.Body
} else {
// open as file
s, err := os.Open(sourceName)
if err != nil {
fmt.Printf("Can't open %s: %v\n", sourceName, err)
return
}
defer s.Close()
// get source size
sourceStat, err := s.Stat()
if err != nil {
fmt.Printf("Can't stat %s: %v\n", sourceName, err)
return
}
sourceSize = sourceStat.Size()
source = s
}
// create dest
dest, err := os.Create(destName)
if err != nil {
fmt.Printf("Can't create %s: %v\n", destName, err)
return
}
defer dest.Close()
// create bar
bar := pb.New(int(sourceSize)).SetUnits(pb.U_BYTES).SetRefreshRate(time.Millisecond * 10)
bar.ShowSpeed = true
bar.Start()
// create multi writer
writer := io.MultiWriter(dest, bar)
// and copy
io.Copy(writer, source)
bar.Finish()
}
func printUsage() {
fmt.Println("copy [source file or url] [dest file]")
}

View File

@@ -0,0 +1,30 @@
package main
import (
"github.com/coreos/etcd/Godeps/_workspace/src/github.com/cheggaaa/pb"
"time"
)
func main() {
count := 5000
bar := pb.New(count)
// show percents (by default already true)
bar.ShowPercent = true
// show bar (by default already true)
bar.ShowBar = true
// no need counters
bar.ShowCounters = true
bar.ShowTimeLeft = true
// and start
bar.Start()
for i := 0; i < count; i++ {
bar.Increment()
time.Sleep(time.Millisecond)
}
bar.FinishPrint("The End!")
}

45
Godeps/_workspace/src/github.com/cheggaaa/pb/format.go generated vendored Normal file
View File

@@ -0,0 +1,45 @@
package pb
import (
"fmt"
"strconv"
"strings"
)
type Units int
const (
// By default, without type handle
U_NO Units = iota
// Handle as b, Kb, Mb, etc
U_BYTES
)
// Format integer
func Format(i int64, units Units) string {
switch units {
case U_BYTES:
return FormatBytes(i)
default:
// by default just convert to string
return strconv.FormatInt(i, 10)
}
}
// Convert bytes to human readable string. Like a 2 MB, 64.2 KB, 52 B
func FormatBytes(i int64) (result string) {
switch {
case i > (1024 * 1024 * 1024 * 1024):
result = fmt.Sprintf("%.02f TB", float64(i)/1024/1024/1024/1024)
case i > (1024 * 1024 * 1024):
result = fmt.Sprintf("%.02f GB", float64(i)/1024/1024/1024)
case i > (1024 * 1024):
result = fmt.Sprintf("%.02f MB", float64(i)/1024/1024)
case i > 1024:
result = fmt.Sprintf("%.02f KB", float64(i)/1024)
default:
result = fmt.Sprintf("%d B", i)
}
result = strings.Trim(result, " ")
return
}

View File

@@ -0,0 +1,37 @@
package pb
import (
"fmt"
"strconv"
"testing"
)
func Test_DefaultsToInteger(t *testing.T) {
value := int64(1000)
expected := strconv.Itoa(int(value))
actual := Format(value, -1)
if actual != expected {
t.Error(fmt.Sprintf("Expected {%s} was {%s}", expected, actual))
}
}
func Test_CanFormatAsInteger(t *testing.T) {
value := int64(1000)
expected := strconv.Itoa(int(value))
actual := Format(value, U_NO)
if actual != expected {
t.Error(fmt.Sprintf("Expected {%s} was {%s}", expected, actual))
}
}
func Test_CanFormatAsBytes(t *testing.T) {
value := int64(1000)
expected := "1000 B"
actual := Format(value, U_BYTES)
if actual != expected {
t.Error(fmt.Sprintf("Expected {%s} was {%s}", expected, actual))
}
}

367
Godeps/_workspace/src/github.com/cheggaaa/pb/pb.go generated vendored Normal file
View File

@@ -0,0 +1,367 @@
package pb
import (
"fmt"
"io"
"math"
"strings"
"sync"
"sync/atomic"
"time"
"unicode/utf8"
)
const (
// Default refresh rate - 200ms
DEFAULT_REFRESH_RATE = time.Millisecond * 200
FORMAT = "[=>-]"
)
// DEPRECATED
// variables for backward compatibility, from now do not work
// use pb.Format and pb.SetRefreshRate
var (
DefaultRefreshRate = DEFAULT_REFRESH_RATE
BarStart, BarEnd, Empty, Current, CurrentN string
)
// Create new progress bar object
func New(total int) *ProgressBar {
return New64(int64(total))
}
// Create new progress bar object uding int64 as total
func New64(total int64) *ProgressBar {
pb := &ProgressBar{
Total: total,
RefreshRate: DEFAULT_REFRESH_RATE,
ShowPercent: true,
ShowCounters: true,
ShowBar: true,
ShowTimeLeft: true,
ShowFinalTime: true,
Units: U_NO,
ManualUpdate: false,
isFinish: make(chan struct{}),
currentValue: -1,
}
return pb.Format(FORMAT)
}
// Create new object and start
func StartNew(total int) *ProgressBar {
return New(total).Start()
}
// Callback for custom output
// For example:
// bar.Callback = func(s string) {
// mySuperPrint(s)
// }
//
type Callback func(out string)
type ProgressBar struct {
current int64 // current must be first member of struct (https://code.google.com/p/go/issues/detail?id=5278)
Total int64
RefreshRate time.Duration
ShowPercent, ShowCounters bool
ShowSpeed, ShowTimeLeft, ShowBar bool
ShowFinalTime bool
Output io.Writer
Callback Callback
NotPrint bool
Units Units
Width int
ForceWidth bool
ManualUpdate bool
finishOnce sync.Once //Guards isFinish
isFinish chan struct{}
startTime time.Time
startValue int64
currentValue int64
prefix, postfix string
BarStart string
BarEnd string
Empty string
Current string
CurrentN string
}
// Start print
func (pb *ProgressBar) Start() *ProgressBar {
pb.startTime = time.Now()
pb.startValue = pb.current
if pb.Total == 0 {
pb.ShowTimeLeft = false
pb.ShowPercent = false
}
if !pb.ManualUpdate {
go pb.writer()
}
return pb
}
// Increment current value
func (pb *ProgressBar) Increment() int {
return pb.Add(1)
}
// Set current value
func (pb *ProgressBar) Set(current int) *ProgressBar {
return pb.Set64(int64(current))
}
// Set64 sets the current value as int64
func (pb *ProgressBar) Set64(current int64) *ProgressBar {
atomic.StoreInt64(&pb.current, current)
return pb
}
// Add to current value
func (pb *ProgressBar) Add(add int) int {
return int(pb.Add64(int64(add)))
}
func (pb *ProgressBar) Add64(add int64) int64 {
return atomic.AddInt64(&pb.current, add)
}
// Set prefix string
func (pb *ProgressBar) Prefix(prefix string) *ProgressBar {
pb.prefix = prefix
return pb
}
// Set postfix string
func (pb *ProgressBar) Postfix(postfix string) *ProgressBar {
pb.postfix = postfix
return pb
}
// Set custom format for bar
// Example: bar.Format("[=>_]")
func (pb *ProgressBar) Format(format string) *ProgressBar {
formatEntries := strings.Split(format, "")
if len(formatEntries) == 5 {
pb.BarStart = formatEntries[0]
pb.BarEnd = formatEntries[4]
pb.Empty = formatEntries[3]
pb.Current = formatEntries[1]
pb.CurrentN = formatEntries[2]
}
return pb
}
// Set bar refresh rate
func (pb *ProgressBar) SetRefreshRate(rate time.Duration) *ProgressBar {
pb.RefreshRate = rate
return pb
}
// Set units
// bar.SetUnits(U_NO) - by default
// bar.SetUnits(U_BYTES) - for Mb, Kb, etc
func (pb *ProgressBar) SetUnits(units Units) *ProgressBar {
pb.Units = units
return pb
}
// Set max width, if width is bigger than terminal width, will be ignored
func (pb *ProgressBar) SetMaxWidth(width int) *ProgressBar {
pb.Width = width
pb.ForceWidth = false
return pb
}
// Set bar width
func (pb *ProgressBar) SetWidth(width int) *ProgressBar {
pb.Width = width
pb.ForceWidth = true
return pb
}
// End print
func (pb *ProgressBar) Finish() {
//Protect multiple calls
pb.finishOnce.Do(func() {
close(pb.isFinish)
pb.write(atomic.LoadInt64(&pb.current))
if !pb.NotPrint {
fmt.Println()
}
})
}
// End print and write string 'str'
func (pb *ProgressBar) FinishPrint(str string) {
pb.Finish()
fmt.Println(str)
}
// implement io.Writer
func (pb *ProgressBar) Write(p []byte) (n int, err error) {
n = len(p)
pb.Add(n)
return
}
// implement io.Reader
func (pb *ProgressBar) Read(p []byte) (n int, err error) {
n = len(p)
pb.Add(n)
return
}
// Create new proxy reader over bar
func (pb *ProgressBar) NewProxyReader(r io.Reader) *Reader {
return &Reader{r, pb}
}
func (pb *ProgressBar) write(current int64) {
width := pb.GetWidth()
var percentBox, countersBox, timeLeftBox, speedBox, barBox, end, out string
// percents
if pb.ShowPercent {
percent := float64(current) / (float64(pb.Total) / float64(100))
percentBox = fmt.Sprintf(" %.02f %% ", percent)
}
// counters
if pb.ShowCounters {
if pb.Total > 0 {
countersBox = fmt.Sprintf("%s / %s ", Format(current, pb.Units), Format(pb.Total, pb.Units))
} else {
countersBox = Format(current, pb.Units) + " / ? "
}
}
// time left
fromStart := time.Now().Sub(pb.startTime)
currentFromStart := current - pb.startValue
select {
case <-pb.isFinish:
if pb.ShowFinalTime {
left := (fromStart / time.Second) * time.Second
timeLeftBox = left.String()
}
default:
if pb.ShowTimeLeft && currentFromStart > 0 {
perEntry := fromStart / time.Duration(currentFromStart)
left := time.Duration(pb.Total-currentFromStart) * perEntry
left = (left / time.Second) * time.Second
timeLeftBox = left.String()
}
}
// speed
if pb.ShowSpeed && currentFromStart > 0 {
fromStart := time.Now().Sub(pb.startTime)
speed := float64(currentFromStart) / (float64(fromStart) / float64(time.Second))
speedBox = Format(int64(speed), pb.Units) + "/s "
}
barWidth := utf8.RuneCountInString(countersBox + pb.BarStart + pb.BarEnd + percentBox + timeLeftBox + speedBox + pb.prefix + pb.postfix)
// bar
if pb.ShowBar {
size := width - barWidth
if size > 0 {
if pb.Total > 0 {
curCount := int(math.Ceil((float64(current) / float64(pb.Total)) * float64(size)))
emptCount := size - curCount
barBox = pb.BarStart
if emptCount < 0 {
emptCount = 0
}
if curCount > size {
curCount = size
}
if emptCount <= 0 {
barBox += strings.Repeat(pb.Current, curCount)
} else if curCount > 0 {
barBox += strings.Repeat(pb.Current, curCount-1) + pb.CurrentN
}
barBox += strings.Repeat(pb.Empty, emptCount) + pb.BarEnd
} else {
barBox = pb.BarStart
pos := size - int(current)%int(size)
if pos-1 > 0 {
barBox += strings.Repeat(pb.Empty, pos-1)
}
barBox += pb.Current
if size-pos-1 > 0 {
barBox += strings.Repeat(pb.Empty, size-pos-1)
}
barBox += pb.BarEnd
}
}
}
// check len
out = pb.prefix + countersBox + barBox + percentBox + speedBox + timeLeftBox + pb.postfix
if utf8.RuneCountInString(out) < width {
end = strings.Repeat(" ", width-utf8.RuneCountInString(out))
}
// and print!
switch {
case pb.Output != nil:
fmt.Fprint(pb.Output, "\r"+out+end)
case pb.Callback != nil:
pb.Callback(out + end)
case !pb.NotPrint:
fmt.Print("\r" + out + end)
}
}
func (pb *ProgressBar) GetWidth() int {
if pb.ForceWidth {
return pb.Width
}
width := pb.Width
termWidth, _ := terminalWidth()
if width == 0 || termWidth <= width {
width = termWidth
}
return width
}
// Write the current state of the progressbar
func (pb *ProgressBar) Update() {
c := atomic.LoadInt64(&pb.current)
if c != pb.currentValue {
pb.write(c)
pb.currentValue = c
}
}
// Internal loop for writing progressbar
func (pb *ProgressBar) writer() {
pb.Update()
for {
select {
case <-pb.isFinish:
return
case <-time.After(pb.RefreshRate):
pb.Update()
}
}
}
type window struct {
Row uint16
Col uint16
Xpixel uint16
Ypixel uint16
}

View File

@@ -0,0 +1,7 @@
// +build linux darwin freebsd netbsd openbsd
package pb
import "syscall"
const sys_ioctl = syscall.SYS_IOCTL

View File

@@ -0,0 +1,5 @@
// +build solaris
package pb
const sys_ioctl = 54

View File

@@ -0,0 +1,37 @@
package pb
import (
"testing"
)
func Test_IncrementAddsOne(t *testing.T) {
count := 5000
bar := New(count)
expected := 1
actual := bar.Increment()
if actual != expected {
t.Errorf("Expected {%d} was {%d}", expected, actual)
}
}
func Test_Width(t *testing.T) {
count := 5000
bar := New(count)
width := 100
bar.SetWidth(100).Callback = func(out string) {
if len(out) != width {
t.Errorf("Bar width expected {%d} was {%d}", len(out), width)
}
}
bar.Start()
bar.Increment()
bar.Finish()
}
func Test_MultipleFinish(t *testing.T) {
bar := New(5000)
bar.Add(2000)
bar.Finish()
bar.Finish()
}

16
Godeps/_workspace/src/github.com/cheggaaa/pb/pb_win.go generated vendored Normal file
View File

@@ -0,0 +1,16 @@
// +build windows
package pb
import (
"github.com/coreos/etcd/Godeps/_workspace/src/github.com/olekukonko/ts"
)
func bold(str string) string {
return str
}
func terminalWidth() (int, error) {
size, err := ts.GetSize()
return size.Col(), err
}

46
Godeps/_workspace/src/github.com/cheggaaa/pb/pb_x.go generated vendored Normal file
View File

@@ -0,0 +1,46 @@
// +build linux darwin freebsd netbsd openbsd solaris
package pb
import (
"os"
"runtime"
"syscall"
"unsafe"
)
const (
TIOCGWINSZ = 0x5413
TIOCGWINSZ_OSX = 1074295912
)
var tty *os.File
func init() {
var err error
tty, err = os.Open("/dev/tty")
if err != nil {
tty = os.Stdin
}
}
func bold(str string) string {
return "\033[1m" + str + "\033[0m"
}
func terminalWidth() (int, error) {
w := new(window)
tio := syscall.TIOCGWINSZ
if runtime.GOOS == "darwin" {
tio = TIOCGWINSZ_OSX
}
res, _, err := syscall.Syscall(sys_ioctl,
tty.Fd(),
uintptr(tio),
uintptr(unsafe.Pointer(w)),
)
if int(res) == -1 {
return 0, err
}
return int(w.Col), nil
}

17
Godeps/_workspace/src/github.com/cheggaaa/pb/reader.go generated vendored Normal file
View File

@@ -0,0 +1,17 @@
package pb
import (
"io"
)
// It's proxy reader, implement io.Reader
type Reader struct {
io.Reader
bar *ProgressBar
}
func (r *Reader) Read(p []byte) (n int, err error) {
n, err = r.Reader.Read(p)
r.bar.Add(n)
return
}

View File

@@ -1,5 +1,18 @@
language: go
go: 1.1
sudo: false
go:
- 1.0.3
- 1.1.2
- 1.2.2
- 1.3.3
- 1.4.2
- 1.5.1
- tip
matrix:
allow_failures:
- go: tip
script:
- go vet ./...

View File

@@ -1,18 +1,17 @@
[![Coverage](http://gocover.io/_badge/github.com/codegangsta/cli?0)](http://gocover.io/github.com/codegangsta/cli)
[![Build Status](https://travis-ci.org/codegangsta/cli.png?branch=master)](https://travis-ci.org/codegangsta/cli)
[![GoDoc](https://godoc.org/github.com/codegangsta/cli?status.svg)](https://godoc.org/github.com/codegangsta/cli)
# cli.go
cli.go is simple, fast, and fun package for building command line apps in Go. The goal is to enable developers to write fast and distributable command line applications in an expressive way.
You can view the API docs here:
http://godoc.org/github.com/codegangsta/cli
`cli.go` is simple, fast, and fun package for building command line apps in Go. The goal is to enable developers to write fast and distributable command line applications in an expressive way.
## Overview
Command line apps are usually so tiny that there is absolutely no reason why your code should *not* be self-documenting. Things like generating help text and parsing command flags/options should not hinder productivity when writing a command line app.
**This is where cli.go comes into play.** cli.go makes command line programming fun, organized, and expressive!
**This is where `cli.go` comes into play.** `cli.go` makes command line programming fun, organized, and expressive!
## Installation
Make sure you have a working Go environment (go 1.1 is *required*). [See the install instructions](http://golang.org/doc/install.html).
Make sure you have a working Go environment (go 1.1+ is *required*). [See the install instructions](http://golang.org/doc/install.html).
To install `cli.go`, simply run:
```
@@ -25,7 +24,7 @@ export PATH=$PATH:$GOPATH/bin
```
## Getting Started
One of the philosophies behind cli.go is that an API should be playful and full of discovery. So a cli.go app can be as little as one line of code in `main()`.
One of the philosophies behind `cli.go` is that an API should be playful and full of discovery. So a `cli.go` app can be as little as one line of code in `main()`.
``` go
package main
@@ -68,8 +67,9 @@ Running this already gives you a ton of functionality, plus support for things l
Being a programmer can be a lonely job. Thankfully by the power of automation that is not the case! Let's create a greeter app to fend off our demons of loneliness!
Start by creating a directory named `greet`, and within it, add a file, `greet.go` with the following code in it:
``` go
/* greet.go */
package main
import (
@@ -84,7 +84,7 @@ func main() {
app.Action = func(c *cli.Context) {
println("Hello friend!")
}
app.Run(os.Args)
}
```
@@ -102,7 +102,8 @@ $ greet
Hello friend!
```
cli.go also generates some bitchass help text:
`cli.go` also generates neat help text:
```
$ greet help
NAME:
@@ -157,6 +158,34 @@ app.Action = func(c *cli.Context) {
...
```
You can also set a destination variable for a flag, to which the content will be scanned.
``` go
...
var language string
app.Flags = []cli.Flag {
cli.StringFlag{
Name: "lang",
Value: "english",
Usage: "language for the greeting",
Destination: &language,
},
}
app.Action = func(c *cli.Context) {
name := "someone"
if len(c.Args()) > 0 {
name = c.Args()[0]
}
if language == "spanish" {
println("Hola", name)
} else {
println("Hello", name)
}
}
...
```
See full list of flags at http://godoc.org/github.com/codegangsta/cli
#### Alternate Names
You can set alternate (or short) names for flags by providing a comma-delimited list for the `Name`. e.g.
@@ -171,6 +200,8 @@ app.Flags = []cli.Flag {
}
```
That flag can then be set with `--lang spanish` or `-l spanish`. Note that giving two different forms of the same flag in the same command invocation is an error.
#### Values from the Environment
You can also have the default value set from the environment via `EnvVar`. e.g.
@@ -186,7 +217,18 @@ app.Flags = []cli.Flag {
}
```
That flag can then be set with `--lang spanish` or `-l spanish`. Note that giving two different forms of the same flag in the same command invocation is an error.
The `EnvVar` may also be given as a comma-delimited "cascade", where the first environment variable that resolves is used as the default.
``` go
app.Flags = []cli.Flag {
cli.StringFlag{
Name: "lang, l",
Value: "english",
Usage: "language for the greeting",
EnvVar: "LEGACY_COMPAT_LANG,APP_LANG,LANG",
},
}
```
### Subcommands
@@ -196,7 +238,7 @@ Subcommands can be defined for a more git-like command line app.
app.Commands = []cli.Command{
{
Name: "add",
ShortName: "a",
Aliases: []string{"a"},
Usage: "add a task to the list",
Action: func(c *cli.Context) {
println("added task: ", c.Args().First())
@@ -204,7 +246,7 @@ app.Commands = []cli.Command{
},
{
Name: "complete",
ShortName: "c",
Aliases: []string{"c"},
Usage: "complete a task on the list",
Action: func(c *cli.Context) {
println("completed task: ", c.Args().First())
@@ -212,7 +254,7 @@ app.Commands = []cli.Command{
},
{
Name: "template",
ShortName: "r",
Aliases: []string{"r"},
Usage: "options for task templates",
Subcommands: []cli.Command{
{
@@ -230,7 +272,7 @@ app.Commands = []cli.Command{
},
},
},
},
},
}
...
```
@@ -248,8 +290,8 @@ app := cli.NewApp()
app.EnableBashCompletion = true
app.Commands = []cli.Command{
{
Name: "complete",
ShortName: "c",
Name: "complete",
Aliases: []string{"c"},
Usage: "complete a task on the list",
Action: func(c *cli.Context) {
println("completed task: ", c.Args().First())
@@ -275,13 +317,25 @@ setting the `PROG` variable to the name of your program:
`PROG=myprogram source /.../cli/autocomplete/bash_autocomplete`
#### To Distribute
Copy `autocomplete/bash_autocomplete` into `/etc/bash_completion.d/` and rename
it to the name of the program you wish to add autocomplete support for (or
automatically install it there if you are distributing a package). Don't forget
to source the file to make it active in the current shell.
```
sudo cp src/bash_autocomplete /etc/bash_completion.d/<myprogram>
source /etc/bash_completion.d/<myprogram>
```
Alternatively, you can just document that users should source the generic
`autocomplete/bash_autocomplete` in their bash configuration with `$PROG` set
to the name of their program (as above).
## Contribution Guidelines
Feel free to put up a pull request to fix a bug or maybe add a feature. I will give it a code review and make sure that it does not break backwards compatibility. If I or any other collaborators agree that it is in line with the vision of the project, we will work with you to get the code into a mergeable state and merge it into the master branch.
If you are have contributed something significant to the project, I will most likely add you as a collaborator. As a collaborator you are given the ability to merge others pull requests. It is very important that new code does not break existing code, so be careful about what code you do choose to merge. If you have any questions feel free to link @codegangsta to the issue in question and we can review it together.
If you have contributed something significant to the project, I will most likely add you as a collaborator. As a collaborator you are given the ability to merge others pull requests. It is very important that new code does not break existing code, so be careful about what code you do choose to merge. If you have any questions feel free to link @codegangsta to the issue in question and we can review it together.
If you feel like you have contributed to the project but have not yet been added as a collaborator, I probably forgot to add you. Hit @codegangsta up over email and we will get it figured out.
## About
cli.go is written by none other than the [Code Gangsta](http://codegangsta.io)

View File

@@ -2,18 +2,23 @@ package cli
import (
"fmt"
"io"
"io/ioutil"
"os"
"time"
)
// App is the main structure of a cli application. It is recomended that
// and app be created with the cli.NewApp() function
// an app be created with the cli.NewApp() function
type App struct {
// The name of the program. Defaults to os.Args[0]
Name string
// Full name of command for help, defaults to Name
HelpName string
// Description of the program.
Usage string
// Description of the program argument format.
ArgsUsage string
// Version of the program
Version string
// List of commands to execute
@@ -24,21 +29,32 @@ type App struct {
EnableBashCompletion bool
// Boolean to hide built-in help command
HideHelp bool
// Boolean to hide built-in version flag
HideVersion bool
// An action to execute when the bash-completion flag is set
BashComplete func(context *Context)
// An action to execute before any subcommands are run, but after the context is ready
// If a non-nil error is returned, no subcommands are run
Before func(context *Context) error
// An action to execute after any subcommands are run, but after the subcommand has finished
// It is run even if Action() panics
After func(context *Context) error
// The action to execute when no subcommands are specified
Action func(context *Context)
// Execute this function if the proper command cannot be found
CommandNotFound func(context *Context, command string)
// Compilation date
Compiled time.Time
// Author
// List of all authors who contributed
Authors []Author
// Copyright of the binary if any
Copyright string
// Name of Author (Note: Use App.Authors, this is deprecated)
Author string
// Author e-mail
// Email of Author (Note: Use App.Authors, this is deprecated)
Email string
// Writer writer to write output to
Writer io.Writer
}
// Tries to find out when this binary was compiled.
@@ -55,61 +71,95 @@ func compileTime() time.Time {
func NewApp() *App {
return &App{
Name: os.Args[0],
HelpName: os.Args[0],
Usage: "A new cli application",
Version: "0.0.0",
BashComplete: DefaultAppComplete,
Action: helpCommand.Action,
Compiled: compileTime(),
Writer: os.Stdout,
}
}
// Entry point to the cli app. Parses the arguments slice and routes to the proper flag/args combination
func (a *App) Run(arguments []string) error {
func (a *App) Run(arguments []string) (err error) {
if a.Author != "" || a.Email != "" {
a.Authors = append(a.Authors, Author{Name: a.Author, Email: a.Email})
}
newCmds := []Command{}
for _, c := range a.Commands {
if c.HelpName == "" {
c.HelpName = fmt.Sprintf("%s %s", a.HelpName, c.Name)
}
newCmds = append(newCmds, c)
}
a.Commands = newCmds
// append help to commands
if a.Command(helpCommand.Name) == nil && !a.HideHelp {
a.Commands = append(a.Commands, helpCommand)
a.appendFlag(HelpFlag)
if (HelpFlag != BoolFlag{}) {
a.appendFlag(HelpFlag)
}
}
//append version/help flags
if a.EnableBashCompletion {
a.appendFlag(BashCompletionFlag)
}
a.appendFlag(VersionFlag)
if !a.HideVersion {
a.appendFlag(VersionFlag)
}
// parse flags
set := flagSet(a.Name, a.Flags)
set.SetOutput(ioutil.Discard)
err := set.Parse(arguments[1:])
err = set.Parse(arguments[1:])
nerr := normalizeFlags(a.Flags, set)
if nerr != nil {
fmt.Println(nerr)
context := NewContext(a, set, set)
fmt.Fprintln(a.Writer, nerr)
context := NewContext(a, set, nil)
ShowAppHelp(context)
fmt.Println("")
return nerr
}
context := NewContext(a, set, set)
if err != nil {
fmt.Printf("Incorrect Usage.\n\n")
ShowAppHelp(context)
fmt.Println("")
return err
}
context := NewContext(a, set, nil)
if checkCompletions(context) {
return nil
}
if checkHelp(context) {
if err != nil {
fmt.Fprintln(a.Writer, "Incorrect Usage.")
fmt.Fprintln(a.Writer)
ShowAppHelp(context)
return err
}
if !a.HideHelp && checkHelp(context) {
ShowAppHelp(context)
return nil
}
if checkVersion(context) {
if !a.HideVersion && checkVersion(context) {
ShowVersion(context)
return nil
}
if a.After != nil {
defer func() {
afterErr := a.After(context)
if afterErr != nil {
if err != nil {
err = NewMultiError(err, afterErr)
} else {
err = afterErr
}
}
}()
}
if a.Before != nil {
err := a.Before(context)
if err != nil {
@@ -134,21 +184,32 @@ func (a *App) Run(arguments []string) error {
// Another entry point to the cli app, takes care of passing arguments and error handling
func (a *App) RunAndExitOnError() {
if err := a.Run(os.Args); err != nil {
os.Stderr.WriteString(fmt.Sprintln(err))
fmt.Fprintln(os.Stderr, err)
os.Exit(1)
}
}
// Invokes the subcommand given the context, parses ctx.Args() to generate command-specific flags
func (a *App) RunAsSubcommand(ctx *Context) error {
func (a *App) RunAsSubcommand(ctx *Context) (err error) {
// append help to commands
if len(a.Commands) > 0 {
if a.Command(helpCommand.Name) == nil && !a.HideHelp {
a.Commands = append(a.Commands, helpCommand)
a.appendFlag(HelpFlag)
if (HelpFlag != BoolFlag{}) {
a.appendFlag(HelpFlag)
}
}
}
newCmds := []Command{}
for _, c := range a.Commands {
if c.HelpName == "" {
c.HelpName = fmt.Sprintf("%s %s", a.HelpName, c.Name)
}
newCmds = append(newCmds, c)
}
a.Commands = newCmds
// append flags
if a.EnableBashCompletion {
a.appendFlag(BashCompletionFlag)
@@ -157,31 +218,32 @@ func (a *App) RunAsSubcommand(ctx *Context) error {
// parse flags
set := flagSet(a.Name, a.Flags)
set.SetOutput(ioutil.Discard)
err := set.Parse(ctx.Args().Tail())
err = set.Parse(ctx.Args().Tail())
nerr := normalizeFlags(a.Flags, set)
context := NewContext(a, set, ctx.globalSet)
context := NewContext(a, set, ctx)
if nerr != nil {
fmt.Println(nerr)
fmt.Fprintln(a.Writer, nerr)
fmt.Fprintln(a.Writer)
if len(a.Commands) > 0 {
ShowSubcommandHelp(context)
} else {
ShowCommandHelp(ctx, context.Args().First())
}
fmt.Println("")
return nerr
}
if err != nil {
fmt.Printf("Incorrect Usage.\n\n")
ShowSubcommandHelp(context)
return err
}
if checkCompletions(context) {
return nil
}
if err != nil {
fmt.Fprintln(a.Writer, "Incorrect Usage.")
fmt.Fprintln(a.Writer)
ShowSubcommandHelp(context)
return err
}
if len(a.Commands) > 0 {
if checkSubcommandHelp(context) {
return nil
@@ -192,6 +254,19 @@ func (a *App) RunAsSubcommand(ctx *Context) error {
}
}
if a.After != nil {
defer func() {
afterErr := a.After(context)
if afterErr != nil {
if err != nil {
err = NewMultiError(err, afterErr)
} else {
err = afterErr
}
}
}()
}
if a.Before != nil {
err := a.Before(context)
if err != nil {
@@ -209,11 +284,7 @@ func (a *App) RunAsSubcommand(ctx *Context) error {
}
// Run default Action
if len(a.Commands) > 0 {
a.Action(context)
} else {
a.Action(ctx)
}
a.Action(context)
return nil
}
@@ -244,3 +315,19 @@ func (a *App) appendFlag(flag Flag) {
a.Flags = append(a.Flags, flag)
}
}
// Author represents someone who has contributed to a cli project.
type Author struct {
Name string // The Authors name
Email string // The Authors email
}
// String makes Author comply to the Stringer interface, to allow an easy print in the templating process
func (a Author) String() string {
e := ""
if a.Email != "" {
e = "<" + a.Email + "> "
}
return fmt.Sprintf("%v %v", a.Name, e)
}

View File

@@ -1,423 +0,0 @@
package cli_test
import (
"fmt"
"os"
"testing"
"github.com/coreos/etcd/Godeps/_workspace/src/github.com/codegangsta/cli"
)
func ExampleApp() {
// set args for examples sake
os.Args = []string{"greet", "--name", "Jeremy"}
app := cli.NewApp()
app.Name = "greet"
app.Flags = []cli.Flag{
cli.StringFlag{Name: "name", Value: "bob", Usage: "a name to say"},
}
app.Action = func(c *cli.Context) {
fmt.Printf("Hello %v\n", c.String("name"))
}
app.Run(os.Args)
// Output:
// Hello Jeremy
}
func ExampleAppSubcommand() {
// set args for examples sake
os.Args = []string{"say", "hi", "english", "--name", "Jeremy"}
app := cli.NewApp()
app.Name = "say"
app.Commands = []cli.Command{
{
Name: "hello",
ShortName: "hi",
Usage: "use it to see a description",
Description: "This is how we describe hello the function",
Subcommands: []cli.Command{
{
Name: "english",
ShortName: "en",
Usage: "sends a greeting in english",
Description: "greets someone in english",
Flags: []cli.Flag{
cli.StringFlag{
Name: "name",
Value: "Bob",
Usage: "Name of the person to greet",
},
},
Action: func(c *cli.Context) {
fmt.Println("Hello,", c.String("name"))
},
},
},
},
}
app.Run(os.Args)
// Output:
// Hello, Jeremy
}
func ExampleAppHelp() {
// set args for examples sake
os.Args = []string{"greet", "h", "describeit"}
app := cli.NewApp()
app.Name = "greet"
app.Flags = []cli.Flag{
cli.StringFlag{Name: "name", Value: "bob", Usage: "a name to say"},
}
app.Commands = []cli.Command{
{
Name: "describeit",
ShortName: "d",
Usage: "use it to see a description",
Description: "This is how we describe describeit the function",
Action: func(c *cli.Context) {
fmt.Printf("i like to describe things")
},
},
}
app.Run(os.Args)
// Output:
// NAME:
// describeit - use it to see a description
//
// USAGE:
// command describeit [arguments...]
//
// DESCRIPTION:
// This is how we describe describeit the function
}
func ExampleAppBashComplete() {
// set args for examples sake
os.Args = []string{"greet", "--generate-bash-completion"}
app := cli.NewApp()
app.Name = "greet"
app.EnableBashCompletion = true
app.Commands = []cli.Command{
{
Name: "describeit",
ShortName: "d",
Usage: "use it to see a description",
Description: "This is how we describe describeit the function",
Action: func(c *cli.Context) {
fmt.Printf("i like to describe things")
},
}, {
Name: "next",
Usage: "next example",
Description: "more stuff to see when generating bash completion",
Action: func(c *cli.Context) {
fmt.Printf("the next example")
},
},
}
app.Run(os.Args)
// Output:
// describeit
// d
// next
// help
// h
}
func TestApp_Run(t *testing.T) {
s := ""
app := cli.NewApp()
app.Action = func(c *cli.Context) {
s = s + c.Args().First()
}
err := app.Run([]string{"command", "foo"})
expect(t, err, nil)
err = app.Run([]string{"command", "bar"})
expect(t, err, nil)
expect(t, s, "foobar")
}
var commandAppTests = []struct {
name string
expected bool
}{
{"foobar", true},
{"batbaz", true},
{"b", true},
{"f", true},
{"bat", false},
{"nothing", false},
}
func TestApp_Command(t *testing.T) {
app := cli.NewApp()
fooCommand := cli.Command{Name: "foobar", ShortName: "f"}
batCommand := cli.Command{Name: "batbaz", ShortName: "b"}
app.Commands = []cli.Command{
fooCommand,
batCommand,
}
for _, test := range commandAppTests {
expect(t, app.Command(test.name) != nil, test.expected)
}
}
func TestApp_CommandWithArgBeforeFlags(t *testing.T) {
var parsedOption, firstArg string
app := cli.NewApp()
command := cli.Command{
Name: "cmd",
Flags: []cli.Flag{
cli.StringFlag{Name: "option", Value: "", Usage: "some option"},
},
Action: func(c *cli.Context) {
parsedOption = c.String("option")
firstArg = c.Args().First()
},
}
app.Commands = []cli.Command{command}
app.Run([]string{"", "cmd", "my-arg", "--option", "my-option"})
expect(t, parsedOption, "my-option")
expect(t, firstArg, "my-arg")
}
func TestApp_Float64Flag(t *testing.T) {
var meters float64
app := cli.NewApp()
app.Flags = []cli.Flag{
cli.Float64Flag{Name: "height", Value: 1.5, Usage: "Set the height, in meters"},
}
app.Action = func(c *cli.Context) {
meters = c.Float64("height")
}
app.Run([]string{"", "--height", "1.93"})
expect(t, meters, 1.93)
}
func TestApp_ParseSliceFlags(t *testing.T) {
var parsedOption, firstArg string
var parsedIntSlice []int
var parsedStringSlice []string
app := cli.NewApp()
command := cli.Command{
Name: "cmd",
Flags: []cli.Flag{
cli.IntSliceFlag{Name: "p", Value: &cli.IntSlice{}, Usage: "set one or more ip addr"},
cli.StringSliceFlag{Name: "ip", Value: &cli.StringSlice{}, Usage: "set one or more ports to open"},
},
Action: func(c *cli.Context) {
parsedIntSlice = c.IntSlice("p")
parsedStringSlice = c.StringSlice("ip")
parsedOption = c.String("option")
firstArg = c.Args().First()
},
}
app.Commands = []cli.Command{command}
app.Run([]string{"", "cmd", "my-arg", "-p", "22", "-p", "80", "-ip", "8.8.8.8", "-ip", "8.8.4.4"})
IntsEquals := func(a, b []int) bool {
if len(a) != len(b) {
return false
}
for i, v := range a {
if v != b[i] {
return false
}
}
return true
}
StrsEquals := func(a, b []string) bool {
if len(a) != len(b) {
return false
}
for i, v := range a {
if v != b[i] {
return false
}
}
return true
}
var expectedIntSlice = []int{22, 80}
var expectedStringSlice = []string{"8.8.8.8", "8.8.4.4"}
if !IntsEquals(parsedIntSlice, expectedIntSlice) {
t.Errorf("%v does not match %v", parsedIntSlice, expectedIntSlice)
}
if !StrsEquals(parsedStringSlice, expectedStringSlice) {
t.Errorf("%v does not match %v", parsedStringSlice, expectedStringSlice)
}
}
func TestApp_BeforeFunc(t *testing.T) {
beforeRun, subcommandRun := false, false
beforeError := fmt.Errorf("fail")
var err error
app := cli.NewApp()
app.Before = func(c *cli.Context) error {
beforeRun = true
s := c.String("opt")
if s == "fail" {
return beforeError
}
return nil
}
app.Commands = []cli.Command{
cli.Command{
Name: "sub",
Action: func(c *cli.Context) {
subcommandRun = true
},
},
}
app.Flags = []cli.Flag{
cli.StringFlag{Name: "opt"},
}
// run with the Before() func succeeding
err = app.Run([]string{"command", "--opt", "succeed", "sub"})
if err != nil {
t.Fatalf("Run error: %s", err)
}
if beforeRun == false {
t.Errorf("Before() not executed when expected")
}
if subcommandRun == false {
t.Errorf("Subcommand not executed when expected")
}
// reset
beforeRun, subcommandRun = false, false
// run with the Before() func failing
err = app.Run([]string{"command", "--opt", "fail", "sub"})
// should be the same error produced by the Before func
if err != beforeError {
t.Errorf("Run error expected, but not received")
}
if beforeRun == false {
t.Errorf("Before() not executed when expected")
}
if subcommandRun == true {
t.Errorf("Subcommand executed when NOT expected")
}
}
func TestAppHelpPrinter(t *testing.T) {
oldPrinter := cli.HelpPrinter
defer func() {
cli.HelpPrinter = oldPrinter
}()
var wasCalled = false
cli.HelpPrinter = func(template string, data interface{}) {
wasCalled = true
}
app := cli.NewApp()
app.Run([]string{"-h"})
if wasCalled == false {
t.Errorf("Help printer expected to be called, but was not")
}
}
func TestAppVersionPrinter(t *testing.T) {
oldPrinter := cli.VersionPrinter
defer func() {
cli.VersionPrinter = oldPrinter
}()
var wasCalled = false
cli.VersionPrinter = func(c *cli.Context) {
wasCalled = true
}
app := cli.NewApp()
ctx := cli.NewContext(app, nil, nil)
cli.ShowVersion(ctx)
if wasCalled == false {
t.Errorf("Version printer expected to be called, but was not")
}
}
func TestAppCommandNotFound(t *testing.T) {
beforeRun, subcommandRun := false, false
app := cli.NewApp()
app.CommandNotFound = func(c *cli.Context, command string) {
beforeRun = true
}
app.Commands = []cli.Command{
cli.Command{
Name: "bar",
Action: func(c *cli.Context) {
subcommandRun = true
},
},
}
app.Run([]string{"command", "foo"})
expect(t, beforeRun, true)
expect(t, subcommandRun, false)
}
func TestGlobalFlagsInSubcommands(t *testing.T) {
subcommandRun := false
app := cli.NewApp()
app.Flags = []cli.Flag{
cli.BoolFlag{Name: "debug, d", Usage: "Enable debugging"},
}
app.Commands = []cli.Command{
cli.Command{
Name: "foo",
Subcommands: []cli.Command{
{
Name: "bar",
Action: func(c *cli.Context) {
if c.GlobalBool("debug") {
subcommandRun = true
}
},
},
},
},
}
app.Run([]string{"command", "-d", "foo", "bar"})
expect(t, subcommandRun, true)
}

View File

@@ -1,13 +1,14 @@
#! /bin/bash
: ${PROG:=$(basename ${BASH_SOURCE})}
_cli_bash_autocomplete() {
local cur prev opts base
local cur opts base
COMPREPLY=()
cur="${COMP_WORDS[COMP_CWORD]}"
prev="${COMP_WORDS[COMP_CWORD-1]}"
opts=$( ${COMP_WORDS[@]:0:$COMP_CWORD} --generate-bash-completion )
COMPREPLY=( $(compgen -W "${opts}" -- ${cur}) )
return 0
}
complete -F _cli_bash_autocomplete $PROG
complete -F _cli_bash_autocomplete $PROG

View File

@@ -17,3 +17,24 @@
// app.Run(os.Args)
// }
package cli
import (
"strings"
)
type MultiError struct {
Errors []error
}
func NewMultiError(err ...error) MultiError {
return MultiError{Errors: err}
}
func (m MultiError) Error() string {
errs := make([]string, len(m.Errors))
for i, err := range m.Errors {
errs[i] = err.Error()
}
return strings.Join(errs, "\n")
}

View File

@@ -1,100 +0,0 @@
package cli_test
import (
"os"
"github.com/coreos/etcd/Godeps/_workspace/src/github.com/codegangsta/cli"
)
func Example() {
app := cli.NewApp()
app.Name = "todo"
app.Usage = "task list on the command line"
app.Commands = []cli.Command{
{
Name: "add",
ShortName: "a",
Usage: "add a task to the list",
Action: func(c *cli.Context) {
println("added task: ", c.Args().First())
},
},
{
Name: "complete",
ShortName: "c",
Usage: "complete a task on the list",
Action: func(c *cli.Context) {
println("completed task: ", c.Args().First())
},
},
}
app.Run(os.Args)
}
func ExampleSubcommand() {
app := cli.NewApp()
app.Name = "say"
app.Commands = []cli.Command{
{
Name: "hello",
ShortName: "hi",
Usage: "use it to see a description",
Description: "This is how we describe hello the function",
Subcommands: []cli.Command{
{
Name: "english",
ShortName: "en",
Usage: "sends a greeting in english",
Description: "greets someone in english",
Flags: []cli.Flag{
cli.StringFlag{
Name: "name",
Value: "Bob",
Usage: "Name of the person to greet",
},
},
Action: func(c *cli.Context) {
println("Hello, ", c.String("name"))
},
}, {
Name: "spanish",
ShortName: "sp",
Usage: "sends a greeting in spanish",
Flags: []cli.Flag{
cli.StringFlag{
Name: "surname",
Value: "Jones",
Usage: "Surname of the person to greet",
},
},
Action: func(c *cli.Context) {
println("Hola, ", c.String("surname"))
},
}, {
Name: "french",
ShortName: "fr",
Usage: "sends a greeting in french",
Flags: []cli.Flag{
cli.StringFlag{
Name: "nickname",
Value: "Stevie",
Usage: "Nickname of the person to greet",
},
},
Action: func(c *cli.Context) {
println("Bonjour, ", c.String("nickname"))
},
},
},
}, {
Name: "bye",
Usage: "says goodbye",
Action: func(c *cli.Context) {
println("bye")
},
},
}
app.Run(os.Args)
}

View File

@@ -10,17 +10,24 @@ import (
type Command struct {
// The name of the command
Name string
// short name of the command. Typically one character
// short name of the command. Typically one character (deprecated, use `Aliases`)
ShortName string
// A list of aliases for the command
Aliases []string
// A short description of the usage of this command
Usage string
// A longer explanation of how the command works
Description string
// A short description of the arguments of this command
ArgsUsage string
// The function to call when checking for bash command completions
BashComplete func(context *Context)
// An action to execute before any sub-subcommands are run, but after the context is ready
// If a non-nil error is returned, no sub-subcommands are run
Before func(context *Context) error
// An action to execute after any subcommands are run, but after the subcommand has finished
// It is run even if Action() panics
After func(context *Context) error
// The function to call when this command is invoked
Action func(context *Context)
// List of child commands
@@ -31,16 +38,28 @@ type Command struct {
SkipFlagParsing bool
// Boolean to hide built-in help command
HideHelp bool
// Full name of command for help, defaults to full command name, including parent commands.
HelpName string
commandNamePath []string
}
// Returns the full name of the command.
// For subcommands this ensures that parent commands are part of the command path
func (c Command) FullName() string {
if c.commandNamePath == nil {
return c.Name
}
return strings.Join(c.commandNamePath, " ")
}
// Invokes the command given the context, parses ctx.Args() to generate command-specific flags
func (c Command) Run(ctx *Context) error {
if len(c.Subcommands) > 0 || c.Before != nil {
if len(c.Subcommands) > 0 || c.Before != nil || c.After != nil {
return c.startApp(ctx)
}
if !c.HideHelp {
if !c.HideHelp && (HelpFlag != BoolFlag{}) {
// append help to flags
c.Flags = append(
c.Flags,
@@ -55,40 +74,57 @@ func (c Command) Run(ctx *Context) error {
set := flagSet(c.Name, c.Flags)
set.SetOutput(ioutil.Discard)
firstFlagIndex := -1
for index, arg := range ctx.Args() {
if strings.HasPrefix(arg, "-") {
firstFlagIndex = index
break
var err error
if !c.SkipFlagParsing {
firstFlagIndex := -1
terminatorIndex := -1
for index, arg := range ctx.Args() {
if arg == "--" {
terminatorIndex = index
break
} else if strings.HasPrefix(arg, "-") && firstFlagIndex == -1 {
firstFlagIndex = index
}
}
if firstFlagIndex > -1 {
args := ctx.Args()
regularArgs := make([]string, len(args[1:firstFlagIndex]))
copy(regularArgs, args[1:firstFlagIndex])
var flagArgs []string
if terminatorIndex > -1 {
flagArgs = args[firstFlagIndex:terminatorIndex]
regularArgs = append(regularArgs, args[terminatorIndex:]...)
} else {
flagArgs = args[firstFlagIndex:]
}
err = set.Parse(append(flagArgs, regularArgs...))
} else {
err = set.Parse(ctx.Args().Tail())
}
} else {
if c.SkipFlagParsing {
err = set.Parse(append([]string{"--"}, ctx.Args().Tail()...))
}
}
var err error
if firstFlagIndex > -1 && !c.SkipFlagParsing {
args := ctx.Args()
regularArgs := args[1:firstFlagIndex]
flagArgs := args[firstFlagIndex:]
err = set.Parse(append(flagArgs, regularArgs...))
} else {
err = set.Parse(ctx.Args().Tail())
}
if err != nil {
fmt.Printf("Incorrect Usage.\n\n")
fmt.Fprintln(ctx.App.Writer, "Incorrect Usage.")
fmt.Fprintln(ctx.App.Writer)
ShowCommandHelp(ctx, c.Name)
fmt.Println("")
return err
}
nerr := normalizeFlags(c.Flags, set)
if nerr != nil {
fmt.Println(nerr)
fmt.Println("")
fmt.Fprintln(ctx.App.Writer, nerr)
fmt.Fprintln(ctx.App.Writer)
ShowCommandHelp(ctx, c.Name)
fmt.Println("")
return nerr
}
context := NewContext(ctx.App, set, ctx.globalSet)
context := NewContext(ctx.App, set, ctx)
if checkCommandCompletions(context, c.Name) {
return nil
@@ -102,9 +138,24 @@ func (c Command) Run(ctx *Context) error {
return nil
}
func (c Command) Names() []string {
names := []string{c.Name}
if c.ShortName != "" {
names = append(names, c.ShortName)
}
return append(names, c.Aliases...)
}
// Returns true if Command.Name or Command.ShortName matches given name
func (c Command) HasName(name string) bool {
return c.Name == name || c.ShortName == name
for _, n := range c.Names() {
if n == name {
return true
}
}
return false
}
func (c Command) startApp(ctx *Context) error {
@@ -112,6 +163,12 @@ func (c Command) startApp(ctx *Context) error {
// set the name and usage
app.Name = fmt.Sprintf("%s %s", ctx.App.Name, c.Name)
if c.HelpName == "" {
app.HelpName = c.HelpName
} else {
app.HelpName = fmt.Sprintf("%s %s", ctx.App.Name, c.Name)
}
if c.Description != "" {
app.Usage = c.Description
} else {
@@ -126,6 +183,13 @@ func (c Command) startApp(ctx *Context) error {
app.Flags = c.Flags
app.HideHelp = c.HideHelp
app.Version = ctx.App.Version
app.HideVersion = ctx.App.HideVersion
app.Compiled = ctx.App.Compiled
app.Author = ctx.App.Author
app.Email = ctx.App.Email
app.Writer = ctx.App.Writer
// bash completion
app.EnableBashCompletion = ctx.App.EnableBashCompletion
if c.BashComplete != nil {
@@ -134,11 +198,19 @@ func (c Command) startApp(ctx *Context) error {
// set the actions
app.Before = c.Before
app.After = c.After
if c.Action != nil {
app.Action = c.Action
} else {
app.Action = helpSubcommand.Action
}
var newCmds []Command
for _, cc := range app.Commands {
cc.commandNamePath = []string{c.Name, cc.Name}
newCmds = append(newCmds, cc)
}
app.Commands = newCmds
return app.RunAsSubcommand(ctx)
}

Some files were not shown because too many files have changed in this diff Show More