|
|
|
@ -53,6 +53,7 @@ typedef struct VitastorClient
|
|
|
|
|
char *etcd_host;
|
|
|
|
|
char *etcd_prefix;
|
|
|
|
|
char *image;
|
|
|
|
|
int skip_parents;
|
|
|
|
|
uint64_t inode;
|
|
|
|
|
uint64_t pool;
|
|
|
|
|
uint64_t size;
|
|
|
|
@ -63,6 +64,10 @@ typedef struct VitastorClient
|
|
|
|
|
int rdma_gid_index;
|
|
|
|
|
int rdma_mtu;
|
|
|
|
|
QemuMutex mutex;
|
|
|
|
|
|
|
|
|
|
uint64_t last_bitmap_inode, last_bitmap_offset, last_bitmap_len;
|
|
|
|
|
uint32_t last_bitmap_granularity;
|
|
|
|
|
uint8_t *last_bitmap;
|
|
|
|
|
} VitastorClient;
|
|
|
|
|
|
|
|
|
|
typedef struct VitastorRPC
|
|
|
|
@ -72,6 +77,9 @@ typedef struct VitastorRPC
|
|
|
|
|
QEMUIOVector *iov;
|
|
|
|
|
long ret;
|
|
|
|
|
int complete;
|
|
|
|
|
uint64_t inode, offset, len;
|
|
|
|
|
uint32_t bitmap_granularity;
|
|
|
|
|
uint8_t *bitmap;
|
|
|
|
|
} VitastorRPC;
|
|
|
|
|
|
|
|
|
|
static void vitastor_co_init_task(BlockDriverState *bs, VitastorRPC *task);
|
|
|
|
@ -147,6 +155,7 @@ static void vitastor_parse_filename(const char *filename, QDict *options, Error
|
|
|
|
|
if (!strcmp(name, "inode") ||
|
|
|
|
|
!strcmp(name, "pool") ||
|
|
|
|
|
!strcmp(name, "size") ||
|
|
|
|
|
!strcmp(name, "skip-parents") ||
|
|
|
|
|
!strcmp(name, "use-rdma") ||
|
|
|
|
|
!strcmp(name, "rdma-port_num") ||
|
|
|
|
|
!strcmp(name, "rdma-gid-index") ||
|
|
|
|
@ -227,13 +236,16 @@ static void vitastor_aio_set_fd_handler(void *ctx, int fd, int unused1, IOHandle
|
|
|
|
|
|
|
|
|
|
static int vitastor_file_open(BlockDriverState *bs, QDict *options, int flags, Error **errp)
|
|
|
|
|
{
|
|
|
|
|
VitastorRPC task;
|
|
|
|
|
VitastorClient *client = bs->opaque;
|
|
|
|
|
void *image = NULL;
|
|
|
|
|
int64_t ret = 0;
|
|
|
|
|
qemu_mutex_init(&client->mutex);
|
|
|
|
|
client->config_path = g_strdup(qdict_get_try_str(options, "config-path"));
|
|
|
|
|
// FIXME: Rename to etcd_address
|
|
|
|
|
client->etcd_host = g_strdup(qdict_get_try_str(options, "etcd-host"));
|
|
|
|
|
client->etcd_prefix = g_strdup(qdict_get_try_str(options, "etcd-prefix"));
|
|
|
|
|
client->skip_parents = qdict_get_try_int(options, "skip-parents", 0);
|
|
|
|
|
client->use_rdma = qdict_get_try_int(options, "use-rdma", -1);
|
|
|
|
|
client->rdma_device = g_strdup(qdict_get_try_str(options, "rdma-device"));
|
|
|
|
|
client->rdma_port_num = qdict_get_try_int(options, "rdma-port-num", 0);
|
|
|
|
@ -243,23 +255,25 @@ static int vitastor_file_open(BlockDriverState *bs, QDict *options, int flags, E
|
|
|
|
|
vitastor_aio_set_fd_handler, bdrv_get_aio_context(bs), client->config_path, client->etcd_host, client->etcd_prefix,
|
|
|
|
|
client->use_rdma, client->rdma_device, client->rdma_port_num, client->rdma_gid_index, client->rdma_mtu, 0
|
|
|
|
|
);
|
|
|
|
|
client->image = g_strdup(qdict_get_try_str(options, "image"));
|
|
|
|
|
image = client->image = g_strdup(qdict_get_try_str(options, "image"));
|
|
|
|
|
client->readonly = (flags & BDRV_O_RDWR) ? 1 : 0;
|
|
|
|
|
// Get image metadata (size and readonly flag) or just wait until the client is ready
|
|
|
|
|
if (!image)
|
|
|
|
|
client->image = "x";
|
|
|
|
|
task.complete = 0;
|
|
|
|
|
task.bs = bs;
|
|
|
|
|
if (qemu_in_coroutine())
|
|
|
|
|
{
|
|
|
|
|
vitastor_co_get_metadata(&task);
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
bdrv_coroutine_enter(bs, qemu_coroutine_create((void(*)(void*))vitastor_co_get_metadata, &task));
|
|
|
|
|
BDRV_POLL_WHILE(bs, !task.complete);
|
|
|
|
|
}
|
|
|
|
|
client->image = image;
|
|
|
|
|
if (client->image)
|
|
|
|
|
{
|
|
|
|
|
// Get image metadata (size and readonly flag)
|
|
|
|
|
VitastorRPC task;
|
|
|
|
|
task.complete = 0;
|
|
|
|
|
task.bs = bs;
|
|
|
|
|
if (qemu_in_coroutine())
|
|
|
|
|
{
|
|
|
|
|
vitastor_co_get_metadata(&task);
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
bdrv_coroutine_enter(bs, qemu_coroutine_create((void(*)(void*))vitastor_co_get_metadata, &task));
|
|
|
|
|
BDRV_POLL_WHILE(bs, !task.complete);
|
|
|
|
|
}
|
|
|
|
|
client->watch = (void*)task.ret;
|
|
|
|
|
client->readonly = client->readonly || vitastor_c_inode_get_readonly(client->watch);
|
|
|
|
|
client->size = vitastor_c_inode_get_size(client->watch);
|
|
|
|
@ -284,6 +298,7 @@ static int vitastor_file_open(BlockDriverState *bs, QDict *options, int flags, E
|
|
|
|
|
client->inode = (client->inode & (((uint64_t)1 << (64-POOL_ID_BITS)) - 1)) | (client->pool << (64-POOL_ID_BITS));
|
|
|
|
|
}
|
|
|
|
|
client->size = qdict_get_try_int(options, "size", 0);
|
|
|
|
|
vitastor_c_close_watch(client->proxy, (void*)task.ret);
|
|
|
|
|
}
|
|
|
|
|
if (!client->size)
|
|
|
|
|
{
|
|
|
|
@ -305,6 +320,7 @@ static int vitastor_file_open(BlockDriverState *bs, QDict *options, int flags, E
|
|
|
|
|
qdict_del(options, "inode");
|
|
|
|
|
qdict_del(options, "pool");
|
|
|
|
|
qdict_del(options, "size");
|
|
|
|
|
qdict_del(options, "skip-parents");
|
|
|
|
|
return ret;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
@ -321,6 +337,8 @@ static void vitastor_close(BlockDriverState *bs)
|
|
|
|
|
g_free(client->etcd_prefix);
|
|
|
|
|
if (client->image)
|
|
|
|
|
g_free(client->image);
|
|
|
|
|
free(client->last_bitmap);
|
|
|
|
|
client->last_bitmap = NULL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#if QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR > 2
|
|
|
|
@ -486,6 +504,13 @@ static int coroutine_fn vitastor_co_pwritev(BlockDriverState *bs,
|
|
|
|
|
vitastor_co_init_task(bs, &task);
|
|
|
|
|
task.iov = iov;
|
|
|
|
|
|
|
|
|
|
if (client->last_bitmap)
|
|
|
|
|
{
|
|
|
|
|
// Invalidate last bitmap on write
|
|
|
|
|
free(client->last_bitmap);
|
|
|
|
|
client->last_bitmap = NULL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
uint64_t inode = client->watch ? vitastor_c_inode_get_num(client->watch) : client->inode;
|
|
|
|
|
qemu_mutex_lock(&client->mutex);
|
|
|
|
|
vitastor_c_write(client->proxy, inode, offset, bytes, 0, iov->iov, iov->niov, vitastor_co_generic_bh_cb, &task);
|
|
|
|
@ -499,6 +524,140 @@ static int coroutine_fn vitastor_co_pwritev(BlockDriverState *bs,
|
|
|
|
|
return task.ret;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#if defined VITASTOR_C_API_VERSION && VITASTOR_C_API_VERSION >= 1
|
|
|
|
|
#if QEMU_VERSION_MAJOR >= 2 || QEMU_VERSION_MAJOR == 1 && QEMU_VERSION_MINOR >= 7
|
|
|
|
|
static void vitastor_co_read_bitmap_cb(void *opaque, long retval, uint8_t *bitmap)
|
|
|
|
|
{
|
|
|
|
|
VitastorRPC *task = opaque;
|
|
|
|
|
VitastorClient *client = task->bs->opaque;
|
|
|
|
|
task->ret = retval;
|
|
|
|
|
task->complete = 1;
|
|
|
|
|
if (retval >= 0)
|
|
|
|
|
{
|
|
|
|
|
task->bitmap = bitmap;
|
|
|
|
|
if (client->last_bitmap_inode == task->inode &&
|
|
|
|
|
client->last_bitmap_offset == task->offset &&
|
|
|
|
|
client->last_bitmap_len == task->len)
|
|
|
|
|
{
|
|
|
|
|
free(client->last_bitmap);
|
|
|
|
|
client->last_bitmap = bitmap;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if (qemu_coroutine_self() != task->co)
|
|
|
|
|
{
|
|
|
|
|
#if QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR > 8
|
|
|
|
|
aio_co_wake(task->co);
|
|
|
|
|
#else
|
|
|
|
|
qemu_coroutine_enter(task->co, NULL);
|
|
|
|
|
qemu_aio_release(task);
|
|
|
|
|
#endif
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static int coroutine_fn vitastor_co_block_status(
|
|
|
|
|
BlockDriverState *bs, bool want_zero, int64_t offset, int64_t bytes,
|
|
|
|
|
int64_t *pnum, int64_t *map, BlockDriverState **file)
|
|
|
|
|
{
|
|
|
|
|
// Allocated => return BDRV_BLOCK_DATA|BDRV_BLOCK_OFFSET_VALID
|
|
|
|
|
// Not allocated => return 0
|
|
|
|
|
// Error => return -errno
|
|
|
|
|
// Set pnum to length of the extent, `*map` = `offset`, `*file` = `bs`
|
|
|
|
|
VitastorRPC task;
|
|
|
|
|
VitastorClient *client = bs->opaque;
|
|
|
|
|
uint64_t inode = client->watch ? vitastor_c_inode_get_num(client->watch) : client->inode;
|
|
|
|
|
uint8_t bit = 0;
|
|
|
|
|
if (client->last_bitmap && client->last_bitmap_inode == inode &&
|
|
|
|
|
client->last_bitmap_offset <= offset &&
|
|
|
|
|
client->last_bitmap_offset+client->last_bitmap_len >= (want_zero ? offset+1 : offset+bytes))
|
|
|
|
|
{
|
|
|
|
|
// Use the previously read bitmap
|
|
|
|
|
task.bitmap_granularity = client->last_bitmap_granularity;
|
|
|
|
|
task.offset = client->last_bitmap_offset;
|
|
|
|
|
task.len = client->last_bitmap_len;
|
|
|
|
|
task.bitmap = client->last_bitmap;
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
// Read bitmap from this position, rounding to full inode PG blocks
|
|
|
|
|
uint32_t block_size = vitastor_c_inode_get_block_size(client->proxy, inode);
|
|
|
|
|
if (!block_size)
|
|
|
|
|
return -EAGAIN;
|
|
|
|
|
// Init coroutine
|
|
|
|
|
vitastor_co_init_task(bs, &task);
|
|
|
|
|
free(client->last_bitmap);
|
|
|
|
|
task.inode = client->last_bitmap_inode = inode;
|
|
|
|
|
task.bitmap_granularity = client->last_bitmap_granularity = vitastor_c_inode_get_bitmap_granularity(client->proxy, inode);
|
|
|
|
|
task.offset = client->last_bitmap_offset = offset / block_size * block_size;
|
|
|
|
|
task.len = client->last_bitmap_len = (offset+bytes+block_size-1) / block_size * block_size - task.offset;
|
|
|
|
|
task.bitmap = client->last_bitmap = NULL;
|
|
|
|
|
qemu_mutex_lock(&client->mutex);
|
|
|
|
|
vitastor_c_read_bitmap(client->proxy, task.inode, task.offset, task.len, !client->skip_parents, vitastor_co_read_bitmap_cb, &task);
|
|
|
|
|
qemu_mutex_unlock(&client->mutex);
|
|
|
|
|
while (!task.complete)
|
|
|
|
|
{
|
|
|
|
|
qemu_coroutine_yield();
|
|
|
|
|
}
|
|
|
|
|
if (task.ret < 0)
|
|
|
|
|
{
|
|
|
|
|
// Error
|
|
|
|
|
return task.ret;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if (want_zero)
|
|
|
|
|
{
|
|
|
|
|
// Get precise mapping with all holes
|
|
|
|
|
uint64_t bmp_pos = (offset-task.offset) / task.bitmap_granularity;
|
|
|
|
|
uint64_t bmp_len = task.len / task.bitmap_granularity;
|
|
|
|
|
uint64_t bmp_end = bmp_pos+1;
|
|
|
|
|
bit = (task.bitmap[bmp_pos >> 3] >> (bmp_pos & 0x7)) & 1;
|
|
|
|
|
while (bmp_end < bmp_len && ((task.bitmap[bmp_end >> 3] >> (bmp_end & 0x7)) & 1) == bit)
|
|
|
|
|
{
|
|
|
|
|
bmp_end++;
|
|
|
|
|
}
|
|
|
|
|
*pnum = (bmp_end-bmp_pos) * task.bitmap_granularity;
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
// Get larger allocated extents, possibly with false positives
|
|
|
|
|
uint64_t bmp_pos = (offset-task.offset) / task.bitmap_granularity;
|
|
|
|
|
uint64_t bmp_end = (offset+bytes-task.offset) / task.bitmap_granularity - bmp_pos;
|
|
|
|
|
while (bmp_pos < bmp_end)
|
|
|
|
|
{
|
|
|
|
|
if (!(bmp_pos & 7) && bmp_end >= bmp_pos+8)
|
|
|
|
|
{
|
|
|
|
|
bit = bit || task.bitmap[bmp_pos >> 3];
|
|
|
|
|
bmp_pos += 8;
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
bit = bit || ((task.bitmap[bmp_pos >> 3] >> (bmp_pos & 0x7)) & 1);
|
|
|
|
|
bmp_pos++;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
*pnum = bytes;
|
|
|
|
|
}
|
|
|
|
|
if (bit)
|
|
|
|
|
{
|
|
|
|
|
*map = offset;
|
|
|
|
|
*file = bs;
|
|
|
|
|
}
|
|
|
|
|
return (bit ? (BDRV_BLOCK_DATA|BDRV_BLOCK_OFFSET_VALID) : 0);
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
#if QEMU_VERSION_MAJOR == 1 && QEMU_VERSION_MINOR >= 7 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR < 12
|
|
|
|
|
// QEMU 1.7-2.11
|
|
|
|
|
static int64_t coroutine_fn vitastor_co_get_block_status(BlockDriverState *bs,
|
|
|
|
|
int64_t sector_num, int nb_sectors, int *pnum, BlockDriverState **file)
|
|
|
|
|
{
|
|
|
|
|
int64_t map = 0;
|
|
|
|
|
int64_t pnumbytes = 0;
|
|
|
|
|
int r = vitastor_co_block_status(bs, 1, sector_num*BDRV_SECTOR_SIZE, nb_sectors*BDRV_SECTOR_SIZE, &pnumbytes, &map, &file);
|
|
|
|
|
*pnum = pnumbytes/BDRV_SECTOR_SIZE;
|
|
|
|
|
return r;
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
#if !( QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 7 )
|
|
|
|
|
static int coroutine_fn vitastor_co_readv(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *iov)
|
|
|
|
|
{
|
|
|
|
@ -606,6 +765,15 @@ static BlockDriver bdrv_vitastor = {
|
|
|
|
|
.bdrv_co_truncate = vitastor_co_truncate,
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
#if defined VITASTOR_C_API_VERSION && VITASTOR_C_API_VERSION >= 1
|
|
|
|
|
#if QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 12
|
|
|
|
|
// For snapshot export
|
|
|
|
|
.bdrv_co_block_status = vitastor_co_block_status,
|
|
|
|
|
#elif QEMU_VERSION_MAJOR == 1 && QEMU_VERSION_MINOR >= 7 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR < 12
|
|
|
|
|
.bdrv_co_get_block_status = vitastor_co_get_block_status,
|
|
|
|
|
#endif
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
#if QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 7
|
|
|
|
|
.bdrv_co_preadv = vitastor_co_preadv,
|
|
|
|
|
.bdrv_co_pwritev = vitastor_co_pwritev,
|
|
|
|
|