etcd/wal/wal.go

264 lines
6.5 KiB
Go
Raw Normal View History

2014-07-28 07:58:39 +04:00
/*
Copyright 2014 CoreOS Inc.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
2014-07-26 01:20:21 +04:00
package wal
import (
2014-09-04 02:10:15 +04:00
"errors"
2014-07-26 01:20:21 +04:00
"fmt"
2014-09-04 02:10:15 +04:00
"hash/crc32"
2014-07-26 11:10:59 +04:00
"io"
2014-08-03 20:51:10 +04:00
"log"
2014-07-26 01:20:21 +04:00
"os"
2014-08-22 23:44:18 +04:00
"path"
"sort"
2014-07-26 01:20:21 +04:00
2014-09-04 02:10:15 +04:00
"github.com/coreos/etcd/raft/raftpb"
2014-09-04 03:46:42 +04:00
"github.com/coreos/etcd/wal/walpb"
2014-07-26 01:20:21 +04:00
)
2014-08-22 23:44:18 +04:00
const (
infoType int64 = iota + 1
entryType
stateType
2014-09-04 02:10:15 +04:00
crcType
2014-08-22 23:44:18 +04:00
)
2014-07-26 01:20:21 +04:00
var (
2014-09-04 03:46:42 +04:00
ErrIdMismatch = errors.New("wal: unmatch id")
ErrNotFound = errors.New("wal: file is not found")
2014-09-04 02:10:15 +04:00
ErrCRCMismatch = errors.New("wal: crc mismatch")
crcTable = crc32.MakeTable(crc32.Castagnoli)
2014-07-26 01:20:21 +04:00
)
2014-09-04 02:10:15 +04:00
// WAL is a logical repersentation of the stable storage.
// WAL is either in read mode or append mode but not both.
// A newly created WAL is in append mode, and ready for appending records.
// A just opened WAL is in read mode, and ready for reading records.
// The WAL will be ready for appending after reading out all the previous records.
2014-07-26 01:20:21 +04:00
type WAL struct {
2014-09-04 02:10:15 +04:00
dir string // the living directory of the underlay files
2014-07-26 01:20:21 +04:00
2014-09-04 02:10:15 +04:00
ri int64 // index of entry to start reading
decoder *decoder // decoder to decode records
2014-07-28 07:32:23 +04:00
2014-09-04 02:10:15 +04:00
f *os.File // underlay file opened for appending, sync
seq int64 // current sequence of the wal file
encoder *encoder // encoder to encode records
2014-08-22 23:44:18 +04:00
}
2014-09-04 02:10:15 +04:00
// Create creates a WAL ready for appending records.
2014-08-22 23:44:18 +04:00
func Create(dirpath string) (*WAL, error) {
log.Printf("path=%s wal.create", dirpath)
if Exist(dirpath) {
2014-07-26 01:54:08 +04:00
return nil, os.ErrExist
}
2014-08-22 23:44:18 +04:00
p := path.Join(dirpath, fmt.Sprintf("%016x-%016x.wal", 0, 0))
f, err := os.OpenFile(p, os.O_WRONLY|os.O_APPEND|os.O_CREATE, 0600)
2014-07-26 01:20:21 +04:00
if err != nil {
return nil, err
}
2014-09-04 02:10:15 +04:00
w := &WAL{
dir: dirpath,
seq: 0,
f: f,
encoder: newEncoder(f, 0),
}
if err := w.saveCrc(0); err != nil {
return nil, err
}
return w, nil
2014-07-26 01:20:21 +04:00
}
2014-09-04 02:10:15 +04:00
// OpenFromIndex opens the WAL files containing all the entries after
// the given index.
// The returned WAL is ready to read. The WAL cannot be appended to before
// reading out all of its previous records.
func OpenFromIndex(dirpath string, index int64) (*WAL, error) {
log.Printf("path=%s wal.load index=%d", dirpath, index)
2014-08-22 23:44:18 +04:00
names, err := readDir(dirpath)
if err != nil {
return nil, err
}
names = checkWalNames(names)
if len(names) == 0 {
return nil, ErrNotFound
}
2014-09-04 02:10:15 +04:00
sort.Sort(sort.StringSlice(names))
2014-07-28 08:01:39 +04:00
2014-09-04 02:10:15 +04:00
nameIndex, ok := searchIndex(names, index)
if !ok || !isValidSeq(names[nameIndex:]) {
return nil, ErrNotFound
2014-07-26 01:54:08 +04:00
}
2014-09-04 02:10:15 +04:00
// open the wal files for reading
rcs := make([]io.ReadCloser, 0)
for _, name := range names[nameIndex:] {
f, err := os.Open(path.Join(dirpath, name))
if err != nil {
return nil, err
}
rcs = append(rcs, f)
2014-07-26 01:20:21 +04:00
}
2014-09-04 02:10:15 +04:00
rc := MultiReadCloser(rcs...)
2014-07-26 01:20:21 +04:00
2014-09-04 02:10:15 +04:00
// open the lastest wal file for appending
last := path.Join(dirpath, names[len(names)-1])
f, err := os.OpenFile(last, os.O_WRONLY|os.O_APPEND, 0)
2014-07-26 01:20:21 +04:00
if err != nil {
2014-09-04 02:10:15 +04:00
rc.Close()
return nil, err
2014-07-26 01:20:21 +04:00
}
2014-09-04 02:10:15 +04:00
// create a WAL ready for reading
w := &WAL{
ri: index,
decoder: newDecoder(rc),
2014-07-26 01:20:21 +04:00
2014-09-04 02:10:15 +04:00
f: f,
2014-07-26 11:10:59 +04:00
}
2014-09-04 02:10:15 +04:00
return w, nil
2014-07-26 11:10:59 +04:00
}
2014-09-04 02:10:15 +04:00
// ReadAll reads out all records of the current WAL.
// After ReadAll, the WAL will be ready for appending new records.
func (w *WAL) ReadAll() (int64, raftpb.State, []raftpb.Entry, error) {
var id int64
var state raftpb.State
var entries []raftpb.Entry
2014-08-22 23:44:18 +04:00
2014-09-04 03:46:42 +04:00
rec := &walpb.Record{}
2014-09-04 02:10:15 +04:00
decoder := w.decoder
var err error
for err = decoder.decode(rec); err == nil; err = decoder.decode(rec) {
2014-08-05 02:46:12 +04:00
switch rec.Type {
2014-07-26 11:10:59 +04:00
case entryType:
2014-09-04 02:10:15 +04:00
e := mustUnmarshalEntry(rec.Data)
if e.Index > w.ri {
entries = append(entries[:e.Index-w.ri-1], e)
2014-07-26 11:10:59 +04:00
}
case stateType:
2014-09-04 02:10:15 +04:00
state = mustUnmarshalState(rec.Data)
case infoType:
i := mustUnmarshalInfo(rec.Data)
if id != 0 && id != i.Id {
state.Reset()
return 0, state, nil, ErrIdMismatch
2014-07-26 11:10:59 +04:00
}
2014-09-04 02:10:15 +04:00
id = i.Id
case crcType:
crc := decoder.crc.Sum32()
// current crc of decoder must match the crc of the record.
// do no need to match 0 crc, since the decoder is a new one at this case.
2014-09-04 03:46:42 +04:00
if crc != 0 && rec.Validate(crc) != nil {
2014-09-04 02:10:15 +04:00
state.Reset()
return 0, state, nil, ErrCRCMismatch
}
decoder.updateCRC(rec.Crc)
2014-07-26 11:10:59 +04:00
default:
2014-09-04 02:10:15 +04:00
state.Reset()
return 0, state, nil, fmt.Errorf("unexpected block type %d", rec.Type)
2014-07-26 11:10:59 +04:00
}
}
2014-07-28 04:23:04 +04:00
if err != io.EOF {
2014-09-04 02:10:15 +04:00
state.Reset()
return 0, state, nil, err
2014-08-22 23:44:18 +04:00
}
2014-09-04 02:10:15 +04:00
// close decoder, disable reading
w.decoder.close()
w.ri = 0
// create encoder (chain crc with the decoder), enable appending
w.encoder = newEncoder(w.f, w.decoder.lastCRC())
w.decoder = nil
return id, state, entries, nil
2014-08-22 23:44:18 +04:00
}
2014-09-04 02:10:15 +04:00
// index should be the index of last log entry.
// Cut closes current file written and creates a new one ready to append.
func (w *WAL) Cut(index int64) error {
log.Printf("wal.cut index=%d", index)
// create a new wal file with name sequence + 1
fpath := path.Join(w.dir, fmt.Sprintf("%016x-%016x.wal", w.seq+1, index))
f, err := os.OpenFile(fpath, os.O_WRONLY|os.O_APPEND|os.O_CREATE, 0600)
2014-08-22 23:44:18 +04:00
if err != nil {
2014-09-04 02:10:15 +04:00
return err
2014-08-22 23:44:18 +04:00
}
2014-09-04 02:10:15 +04:00
w.Sync()
w.f.Close()
2014-08-22 23:44:18 +04:00
2014-09-04 02:10:15 +04:00
// update writer and save the previous crc
w.f = f
w.seq++
prevCrc := w.encoder.crc.Sum32()
w.encoder = newEncoder(w.f, prevCrc)
return w.saveCrc(prevCrc)
2014-08-22 23:44:18 +04:00
}
2014-09-04 02:10:15 +04:00
func (w *WAL) Sync() error {
if w.encoder != nil {
if err := w.encoder.flush(); err != nil {
return err
2014-08-22 23:44:18 +04:00
}
}
2014-09-04 02:10:15 +04:00
return w.f.Sync()
2014-08-22 23:44:18 +04:00
}
2014-09-04 02:10:15 +04:00
func (w *WAL) Close() {
log.Printf("path=%s wal.close", w.f.Name())
if w.f != nil {
w.Sync()
w.f.Close()
2014-08-22 23:44:18 +04:00
}
2014-07-26 11:10:59 +04:00
}
2014-09-04 02:10:15 +04:00
func (w *WAL) SaveInfo(i *raftpb.Info) error {
log.Printf("path=%s wal.saveInfo id=%d", w.f.Name(), i.Id)
b, err := i.Marshal()
2014-08-04 07:44:02 +04:00
if err != nil {
panic(err)
2014-07-26 11:10:59 +04:00
}
2014-09-04 03:46:42 +04:00
rec := &walpb.Record{Type: infoType, Data: b}
2014-09-04 02:10:15 +04:00
return w.encoder.encode(rec)
2014-07-26 11:10:59 +04:00
}
2014-09-04 02:10:15 +04:00
func (w *WAL) SaveEntry(e *raftpb.Entry) error {
b, err := e.Marshal()
2014-08-03 05:21:25 +04:00
if err != nil {
panic(err)
}
2014-09-04 03:46:42 +04:00
rec := &walpb.Record{Type: entryType, Data: b}
2014-09-04 02:10:15 +04:00
return w.encoder.encode(rec)
2014-07-26 11:10:59 +04:00
}
2014-09-04 02:10:15 +04:00
func (w *WAL) SaveState(s *raftpb.State) error {
log.Printf("path=%s wal.saveState state=\"%+v\"", w.f.Name(), s)
b, err := s.Marshal()
2014-08-22 23:44:18 +04:00
if err != nil {
2014-09-04 02:10:15 +04:00
panic(err)
2014-08-22 23:44:18 +04:00
}
2014-09-04 03:46:42 +04:00
rec := &walpb.Record{Type: stateType, Data: b}
2014-09-04 02:10:15 +04:00
return w.encoder.encode(rec)
2014-08-22 23:44:18 +04:00
}
2014-09-04 02:10:15 +04:00
func (w *WAL) saveCrc(prevCrc uint32) error {
2014-09-04 03:46:42 +04:00
return w.encoder.encode(&walpb.Record{Type: crcType, Crc: prevCrc})
2014-07-26 02:21:04 +04:00
}