Merge remote-tracking branch 'kwolf/for-anthony' into staging

* kwolf/for-anthony: (46 commits)
  qed: remove incoming live migration blocker
  qed: honor BDRV_O_INCOMING for incoming live migration
  migration: clear BDRV_O_INCOMING flags on end of incoming live migration
  qed: add bdrv_invalidate_cache to be called after incoming live migration
  blockdev: open images with BDRV_O_INCOMING on incoming live migration
  block: add a function to clear incoming live migration flags
  block: Add new BDRV_O_INCOMING flag to notice incoming live migration
  block stream: close unused files and update ->backing_hd
  qemu-iotests: Fix call syntax for qemu-io
  qemu-iotests: Fix call syntax for qemu-img
  qemu-iotests: Test unknown qcow2 header extensions
  qemu-iotests: qcow2.py
  sheepdog: fix send req helpers
  sheepdog: implement SD_OP_FLUSH_VDI operation
  block: bdrv_append() fixes
  qed: track dirty flag status
  qemu-img: add dirty flag status
  qed: image fragmentation statistics
  qemu-img: add image fragmentation statistics
  block: document job API
  ...
master
Anthony Liguori 2012-04-10 08:16:12 -05:00
commit bb5d8dd757
44 changed files with 1089 additions and 502 deletions

88
block.c
View File

@ -813,6 +813,9 @@ unlink_and_fail:
void bdrv_close(BlockDriverState *bs)
{
if (bs->drv) {
if (bs->job) {
block_job_cancel_sync(bs->job);
}
if (bs == bs_snapshots) {
bs_snapshots = NULL;
}
@ -889,14 +892,16 @@ void bdrv_make_anon(BlockDriverState *bs)
* This will modify the BlockDriverState fields, and swap contents
* between bs_new and bs_top. Both bs_new and bs_top are modified.
*
* bs_new is required to be anonymous.
*
* This function does not create any image files.
*/
void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
{
BlockDriverState tmp;
/* the new bs must not be in bdrv_states */
bdrv_make_anon(bs_new);
/* bs_new must be anonymous */
assert(bs_new->device_name[0] == '\0');
tmp = *bs_new;
@ -941,11 +946,18 @@ void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
* swapping bs_new and bs_top contents. */
tmp.backing_hd = bs_new;
pstrcpy(tmp.backing_file, sizeof(tmp.backing_file), bs_top->filename);
bdrv_get_format(bs_top, tmp.backing_format, sizeof(tmp.backing_format));
/* swap contents of the fixed new bs and the current top */
*bs_new = *bs_top;
*bs_top = tmp;
/* device_name[] was carried over from the old bs_top. bs_new
* shouldn't be in bdrv_states, so we need to make device_name[]
* reflect the anonymity of bs_new
*/
bs_new->device_name[0] = '\0';
/* clear the copied fields in the new backing file */
bdrv_detach_dev(bs_new, bs_new->dev);
@ -966,6 +978,8 @@ void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
void bdrv_delete(BlockDriverState *bs)
{
assert(!bs->dev);
assert(!bs->job);
assert(!bs->in_use);
/* remove from list, if necessary */
bdrv_make_anon(bs);
@ -1463,6 +1477,17 @@ static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
qemu_iovec_init_external(&qiov, &iov, 1);
/**
* In sync call context, when the vcpu is blocked, this throttling timer
* will not fire; so the I/O throttling function has to be disabled here
* if it has been enabled.
*/
if (bs->io_limits_enabled) {
fprintf(stderr, "Disabling I/O throttling on '%s' due "
"to synchronous I/O.\n", bdrv_get_device_name(bs));
bdrv_io_limits_disable(bs);
}
if (qemu_in_coroutine()) {
/* Fast-path if already in coroutine context */
bdrv_rw_co_entry(&rwco);
@ -1969,10 +1994,19 @@ static int guess_disk_lchs(BlockDriverState *bs,
struct partition *p;
uint32_t nr_sects;
uint64_t nb_sectors;
bool enabled;
bdrv_get_geometry(bs, &nb_sectors);
/**
* The function will be invoked during startup not only in sync I/O mode,
* but also in async I/O mode. So the I/O throttling function has to
* be disabled temporarily here, not permanently.
*/
enabled = bs->io_limits_enabled;
bs->io_limits_enabled = false;
ret = bdrv_read(bs, 0, buf, 1);
bs->io_limits_enabled = enabled;
if (ret < 0)
return -1;
/* test msdos magic */
@ -2331,9 +2365,7 @@ void bdrv_flush_all(void)
BlockDriverState *bs;
QTAILQ_FOREACH(bs, &bdrv_states, list) {
if (!bdrv_is_read_only(bs) && bdrv_is_inserted(bs)) {
bdrv_flush(bs);
}
bdrv_flush(bs);
}
}
@ -3520,7 +3552,7 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
{
int ret;
if (!bs->drv) {
if (!bs || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
return 0;
}
@ -3538,7 +3570,7 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
}
if (bs->drv->bdrv_co_flush_to_disk) {
return bs->drv->bdrv_co_flush_to_disk(bs);
ret = bs->drv->bdrv_co_flush_to_disk(bs);
} else if (bs->drv->bdrv_aio_flush) {
BlockDriverAIOCB *acb;
CoroutineIOCompletion co = {
@ -3547,10 +3579,10 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
if (acb == NULL) {
return -EIO;
ret = -EIO;
} else {
qemu_coroutine_yield();
return co.ret;
ret = co.ret;
}
} else {
/*
@ -3564,8 +3596,16 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
*
* Let's hope the user knows what he's doing.
*/
return 0;
ret = 0;
}
if (ret < 0) {
return ret;
}
/* Now flush the underlying protocol. It will also have BDRV_O_NO_FLUSH
* in the case of cache=unsafe, so there are no useless flushes.
*/
return bdrv_co_flush(bs->file);
}
void bdrv_invalidate_cache(BlockDriverState *bs)
@ -3584,6 +3624,15 @@ void bdrv_invalidate_cache_all(void)
}
}
void bdrv_clear_incoming_migration_all(void)
{
BlockDriverState *bs;
QTAILQ_FOREACH(bs, &bdrv_states, list) {
bs->open_flags = bs->open_flags & ~(BDRV_O_INCOMING);
}
}
int bdrv_flush(BlockDriverState *bs)
{
Coroutine *co;
@ -4054,10 +4103,16 @@ void block_job_complete(BlockJob *job, int ret)
int block_job_set_speed(BlockJob *job, int64_t value)
{
int rc;
if (!job->job_type->set_speed) {
return -ENOTSUP;
}
return job->job_type->set_speed(job, value);
rc = job->job_type->set_speed(job, value);
if (rc == 0) {
job->speed = value;
}
return rc;
}
void block_job_cancel(BlockJob *job)
@ -4069,3 +4124,14 @@ bool block_job_is_cancelled(BlockJob *job)
{
return job->cancelled;
}
void block_job_cancel_sync(BlockJob *job)
{
BlockDriverState *bs = job->bs;
assert(bs->job == job);
block_job_cancel(job);
while (bs->job != NULL && bs->job->busy) {
qemu_aio_wait();
}
}

21
block.h
View File

@ -15,8 +15,15 @@ typedef struct BlockDriverInfo {
int cluster_size;
/* offset at which the VM state can be saved (0 if not possible) */
int64_t vm_state_offset;
bool is_dirty;
} BlockDriverInfo;
typedef struct BlockFragInfo {
uint64_t allocated_clusters;
uint64_t total_clusters;
uint64_t fragmented_clusters;
} BlockFragInfo;
typedef struct QEMUSnapshotInfo {
char id_str[128]; /* unique snapshot id */
/* the following fields are informative. They are not needed for
@ -71,6 +78,7 @@ typedef struct BlockDevOps {
#define BDRV_O_NO_BACKING 0x0100 /* don't open the backing file */
#define BDRV_O_NO_FLUSH 0x0200 /* disable flushing on this disk */
#define BDRV_O_COPY_ON_READ 0x0400 /* copy read backing sectors into image */
#define BDRV_O_INCOMING 0x0800 /* consistency hint for incoming migration */
#define BDRV_O_CACHE_MASK (BDRV_O_NOCACHE | BDRV_O_CACHE_WB | BDRV_O_NO_FLUSH)
@ -175,13 +183,12 @@ typedef struct BdrvCheckResult {
int corruptions;
int leaks;
int check_errors;
BlockFragInfo bfi;
} BdrvCheckResult;
int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res);
/* async block I/O */
typedef struct BlockDriverAIOCB BlockDriverAIOCB;
typedef void BlockDriverCompletionFunc(void *opaque, int ret);
typedef void BlockDriverDirtyHandler(BlockDriverState *bs, int64_t sector,
int sector_num);
BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
@ -222,6 +229,8 @@ BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
void bdrv_invalidate_cache(BlockDriverState *bs);
void bdrv_invalidate_cache_all(void);
void bdrv_clear_incoming_migration_all(void);
/* Ensure contents are flushed to disk. */
int bdrv_flush(BlockDriverState *bs);
int coroutine_fn bdrv_co_flush(BlockDriverState *bs);
@ -437,10 +446,10 @@ static inline unsigned int get_physical_block_exp(BlockConf *conf)
#define DEFINE_BLOCK_PROPERTIES(_state, _conf) \
DEFINE_PROP_DRIVE("drive", _state, _conf.bs), \
DEFINE_PROP_UINT16("logical_block_size", _state, \
_conf.logical_block_size, 512), \
DEFINE_PROP_UINT16("physical_block_size", _state, \
_conf.physical_block_size, 512), \
DEFINE_PROP_BLOCKSIZE("logical_block_size", _state, \
_conf.logical_block_size, 512), \
DEFINE_PROP_BLOCKSIZE("physical_block_size", _state, \
_conf.physical_block_size, 512), \
DEFINE_PROP_UINT16("min_io_size", _state, _conf.min_io_size, 0), \
DEFINE_PROP_UINT32("opt_io_size", _state, _conf.opt_io_size, 0), \
DEFINE_PROP_INT32("bootindex", _state, _conf.bootindex, -1), \

View File

@ -397,12 +397,6 @@ static void blkdebug_close(BlockDriverState *bs)
}
}
static BlockDriverAIOCB *blkdebug_aio_flush(BlockDriverState *bs,
BlockDriverCompletionFunc *cb, void *opaque)
{
return bdrv_aio_flush(bs->file, cb, opaque);
}
static void process_rule(BlockDriverState *bs, struct BlkdebugRule *rule,
BlkdebugVars *old_vars)
{
@ -452,7 +446,6 @@ static BlockDriver bdrv_blkdebug = {
.bdrv_aio_readv = blkdebug_aio_readv,
.bdrv_aio_writev = blkdebug_aio_writev,
.bdrv_aio_flush = blkdebug_aio_flush,
.bdrv_debug_event = blkdebug_debug_event,
};

View File

@ -318,11 +318,6 @@ exit:
return ret;
}
static coroutine_fn int cow_co_flush(BlockDriverState *bs)
{
return bdrv_co_flush(bs->file);
}
static QEMUOptionParameter cow_create_options[] = {
{
.name = BLOCK_OPT_SIZE,
@ -348,7 +343,6 @@ static BlockDriver bdrv_cow = {
.bdrv_read = cow_co_read,
.bdrv_write = cow_co_write,
.bdrv_co_flush_to_disk = cow_co_flush,
.bdrv_co_is_allocated = cow_co_is_allocated,
.create_options = cow_create_options,

View File

@ -835,11 +835,6 @@ fail:
return ret;
}
static coroutine_fn int qcow_co_flush(BlockDriverState *bs)
{
return bdrv_co_flush(bs->file);
}
static int qcow_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
{
BDRVQcowState *s = bs->opaque;
@ -877,7 +872,6 @@ static BlockDriver bdrv_qcow = {
.bdrv_co_readv = qcow_co_readv,
.bdrv_co_writev = qcow_co_writev,
.bdrv_co_flush_to_disk = qcow_co_flush,
.bdrv_co_is_allocated = qcow_co_is_allocated,
.bdrv_set_key = qcow_set_key,

View File

@ -466,7 +466,6 @@ out:
*/
static int get_cluster_table(BlockDriverState *bs, uint64_t offset,
uint64_t **new_l2_table,
uint64_t *new_l2_offset,
int *new_l2_index)
{
BDRVQcowState *s = bs->opaque;
@ -514,7 +513,6 @@ static int get_cluster_table(BlockDriverState *bs, uint64_t offset,
l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1);
*new_l2_table = l2_table;
*new_l2_offset = l2_offset;
*new_l2_index = l2_index;
return 0;
@ -539,11 +537,11 @@ uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
{
BDRVQcowState *s = bs->opaque;
int l2_index, ret;
uint64_t l2_offset, *l2_table;
uint64_t *l2_table;
int64_t cluster_offset;
int nb_csectors;
ret = get_cluster_table(bs, offset, &l2_table, &l2_offset, &l2_index);
ret = get_cluster_table(bs, offset, &l2_table, &l2_index);
if (ret < 0) {
return 0;
}
@ -588,7 +586,7 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m)
{
BDRVQcowState *s = bs->opaque;
int i, j = 0, l2_index, ret;
uint64_t *old_cluster, start_sect, l2_offset, *l2_table;
uint64_t *old_cluster, start_sect, *l2_table;
uint64_t cluster_offset = m->alloc_offset;
bool cow = false;
@ -633,7 +631,7 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m)
}
qcow2_cache_set_dependency(bs, s->l2_table_cache, s->refcount_block_cache);
ret = get_cluster_table(bs, m->offset, &l2_table, &l2_offset, &l2_index);
ret = get_cluster_table(bs, m->offset, &l2_table, &l2_index);
if (ret < 0) {
goto err;
}
@ -817,7 +815,7 @@ int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset,
{
BDRVQcowState *s = bs->opaque;
int l2_index, ret, sectors;
uint64_t l2_offset, *l2_table;
uint64_t *l2_table;
unsigned int nb_clusters, keep_clusters;
uint64_t cluster_offset;
@ -825,7 +823,7 @@ int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset,
n_start, n_end);
/* Find L2 entry for the first involved cluster */
ret = get_cluster_table(bs, offset, &l2_table, &l2_offset, &l2_index);
ret = get_cluster_table(bs, offset, &l2_table, &l2_index);
if (ret < 0) {
return ret;
}
@ -1000,12 +998,12 @@ static int discard_single_l2(BlockDriverState *bs, uint64_t offset,
unsigned int nb_clusters)
{
BDRVQcowState *s = bs->opaque;
uint64_t l2_offset, *l2_table;
uint64_t *l2_table;
int l2_index;
int ret;
int i;
ret = get_cluster_table(bs, offset, &l2_table, &l2_offset, &l2_index);
ret = get_cluster_table(bs, offset, &l2_table, &l2_index);
if (ret < 0) {
return ret;
}

View File

@ -1253,11 +1253,6 @@ static coroutine_fn int qcow2_co_flush_to_os(BlockDriverState *bs)
return 0;
}
static coroutine_fn int qcow2_co_flush_to_disk(BlockDriverState *bs)
{
return bdrv_co_flush(bs->file);
}
static int64_t qcow2_vm_state_offset(BDRVQcowState *s)
{
return (int64_t)s->l1_vm_state_index << (s->cluster_bits + s->l2_bits);
@ -1377,7 +1372,6 @@ static BlockDriver bdrv_qcow2 = {
.bdrv_co_readv = qcow2_co_readv,
.bdrv_co_writev = qcow2_co_writev,
.bdrv_co_flush_to_os = qcow2_co_flush_to_os,
.bdrv_co_flush_to_disk = qcow2_co_flush_to_disk,
.bdrv_co_discard = qcow2_co_discard,
.bdrv_truncate = qcow2_truncate,

View File

@ -68,6 +68,7 @@ static unsigned int qed_check_l2_table(QEDCheck *check, QEDTable *table)
{
BDRVQEDState *s = check->s;
unsigned int i, num_invalid = 0;
uint64_t last_offset = 0;
for (i = 0; i < s->table_nelems; i++) {
uint64_t offset = table->offsets[i];
@ -76,6 +77,11 @@ static unsigned int qed_check_l2_table(QEDCheck *check, QEDTable *table)
qed_offset_is_zero_cluster(offset)) {
continue;
}
check->result->bfi.allocated_clusters++;
if (last_offset && (last_offset + s->header.cluster_size != offset)) {
check->result->bfi.fragmented_clusters++;
}
last_offset = offset;
/* Detect invalid cluster offset */
if (!qed_check_cluster_offset(s, offset)) {
@ -200,6 +206,9 @@ int qed_check(BDRVQEDState *s, BdrvCheckResult *result, bool fix)
check.used_clusters = g_malloc0(((check.nclusters + 31) / 32) *
sizeof(check.used_clusters[0]));
check.result->bfi.total_clusters =
(s->header.image_size + s->header.cluster_size - 1) /
s->header.cluster_size;
ret = qed_check_l1_table(&check, s->l1_table);
if (ret == 0) {
/* Only check for leaks if entire image was scanned successfully */

View File

@ -450,7 +450,7 @@ static int bdrv_qed_open(BlockDriverState *bs, int flags)
* feature is no longer valid.
*/
if ((s->header.autoclear_features & ~QED_AUTOCLEAR_FEATURE_MASK) != 0 &&
!bdrv_is_read_only(bs->file)) {
!bdrv_is_read_only(bs->file) && !(flags & BDRV_O_INCOMING)) {
s->header.autoclear_features &= QED_AUTOCLEAR_FEATURE_MASK;
ret = qed_write_header_sync(s);
@ -477,7 +477,8 @@ static int bdrv_qed_open(BlockDriverState *bs, int flags)
* potentially inconsistent images to be opened read-only. This can
* aid data recovery from an otherwise inconsistent image.
*/
if (!bdrv_is_read_only(bs->file)) {
if (!bdrv_is_read_only(bs->file) &&
!(flags & BDRV_O_INCOMING)) {
BdrvCheckResult result = {0};
ret = qed_check(s, &result, true);
@ -497,12 +498,6 @@ static int bdrv_qed_open(BlockDriverState *bs, int flags)
s->need_check_timer = qemu_new_timer_ns(vm_clock,
qed_need_check_timer_cb, s);
error_set(&s->migration_blocker,
QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
"qed", bs->device_name, "live migration");
migrate_add_blocker(s->migration_blocker);
out:
if (ret) {
qed_free_l2_cache(&s->l2_cache);
@ -515,9 +510,6 @@ static void bdrv_qed_close(BlockDriverState *bs)
{
BDRVQEDState *s = bs->opaque;
migrate_del_blocker(s->migration_blocker);
error_free(s->migration_blocker);
qed_cancel_need_check_timer(s);
qemu_free_timer(s->need_check_timer);
@ -1350,13 +1342,6 @@ static BlockDriverAIOCB *bdrv_qed_aio_writev(BlockDriverState *bs,
opaque, QED_AIOCB_WRITE);
}
static BlockDriverAIOCB *bdrv_qed_aio_flush(BlockDriverState *bs,
BlockDriverCompletionFunc *cb,
void *opaque)
{
return bdrv_aio_flush(bs->file, cb, opaque);
}
typedef struct {
Coroutine *co;
int ret;
@ -1441,6 +1426,7 @@ static int bdrv_qed_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
memset(bdi, 0, sizeof(*bdi));
bdi->cluster_size = s->header.cluster_size;
bdi->is_dirty = s->header.features & QED_F_NEED_CHECK;
return 0;
}
@ -1516,6 +1502,15 @@ static int bdrv_qed_change_backing_file(BlockDriverState *bs,
return ret;
}
static void bdrv_qed_invalidate_cache(BlockDriverState *bs)
{
BDRVQEDState *s = bs->opaque;
bdrv_qed_close(bs);
memset(s, 0, sizeof(BDRVQEDState));
bdrv_qed_open(bs, bs->open_flags);
}
static int bdrv_qed_check(BlockDriverState *bs, BdrvCheckResult *result)
{
BDRVQEDState *s = bs->opaque;
@ -1562,12 +1557,12 @@ static BlockDriver bdrv_qed = {
.bdrv_make_empty = bdrv_qed_make_empty,
.bdrv_aio_readv = bdrv_qed_aio_readv,
.bdrv_aio_writev = bdrv_qed_aio_writev,
.bdrv_aio_flush = bdrv_qed_aio_flush,
.bdrv_co_write_zeroes = bdrv_qed_co_write_zeroes,
.bdrv_truncate = bdrv_qed_truncate,
.bdrv_getlength = bdrv_qed_getlength,
.bdrv_get_info = bdrv_qed_get_info,
.bdrv_change_backing_file = bdrv_qed_change_backing_file,
.bdrv_invalidate_cache = bdrv_qed_invalidate_cache,
.bdrv_check = bdrv_qed_check,
};

View File

@ -169,8 +169,6 @@ typedef struct {
/* Periodic flush and clear need check flag */
QEMUTimer *need_check_timer;
Error *migration_blocker;
} BDRVQEDState;
enum {

View File

@ -25,11 +25,6 @@ static void raw_close(BlockDriverState *bs)
{
}
static int coroutine_fn raw_co_flush(BlockDriverState *bs)
{
return bdrv_co_flush(bs->file);
}
static int64_t raw_getlength(BlockDriverState *bs)
{
return bdrv_getlength(bs->file);
@ -113,7 +108,6 @@ static BlockDriver bdrv_raw = {
.bdrv_co_readv = raw_co_readv,
.bdrv_co_writev = raw_co_writev,
.bdrv_co_flush_to_disk = raw_co_flush,
.bdrv_co_discard = raw_co_discard,
.bdrv_probe = raw_probe,

View File

@ -32,9 +32,11 @@
#define SD_OP_RELEASE_VDI 0x13
#define SD_OP_GET_VDI_INFO 0x14
#define SD_OP_READ_VDIS 0x15
#define SD_OP_FLUSH_VDI 0x16
#define SD_FLAG_CMD_WRITE 0x01
#define SD_FLAG_CMD_COW 0x02
#define SD_FLAG_CMD_CACHE 0x04
#define SD_RES_SUCCESS 0x00 /* Success */
#define SD_RES_UNKNOWN 0x01 /* Unknown error */
@ -293,10 +295,12 @@ typedef struct BDRVSheepdogState {
char name[SD_MAX_VDI_LEN];
int is_snapshot;
uint8_t cache_enabled;
char *addr;
char *port;
int fd;
int flush_fd;
CoMutex lock;
Coroutine *co_send;
@ -506,6 +510,7 @@ static int send_req(int sockfd, SheepdogReq *hdr, void *data,
ret = qemu_send_full(sockfd, hdr, sizeof(*hdr), 0);
if (ret < sizeof(*hdr)) {
error_report("failed to send a req, %s", strerror(errno));
return ret;
}
ret = qemu_send_full(sockfd, data, *wlen, 0);
@ -516,6 +521,24 @@ static int send_req(int sockfd, SheepdogReq *hdr, void *data,
return ret;
}
static int send_co_req(int sockfd, SheepdogReq *hdr, void *data,
unsigned int *wlen)
{
int ret;
ret = qemu_co_send(sockfd, hdr, sizeof(*hdr));
if (ret < sizeof(*hdr)) {
error_report("failed to send a req, %s", strerror(errno));
return ret;
}
ret = qemu_co_send(sockfd, data, *wlen);
if (ret < *wlen) {
error_report("failed to send a req, %s", strerror(errno));
}
return ret;
}
static int do_req(int sockfd, SheepdogReq *hdr, void *data,
unsigned int *wlen, unsigned int *rlen)
{
@ -550,6 +573,40 @@ out:
return ret;
}
static int do_co_req(int sockfd, SheepdogReq *hdr, void *data,
unsigned int *wlen, unsigned int *rlen)
{
int ret;
socket_set_block(sockfd);
ret = send_co_req(sockfd, hdr, data, wlen);
if (ret < 0) {
goto out;
}
ret = qemu_co_recv(sockfd, hdr, sizeof(*hdr));
if (ret < sizeof(*hdr)) {
error_report("failed to get a rsp, %s", strerror(errno));
goto out;
}
if (*rlen > hdr->data_length) {
*rlen = hdr->data_length;
}
if (*rlen) {
ret = qemu_co_recv(sockfd, data, *rlen);
if (ret < *rlen) {
error_report("failed to get the data, %s", strerror(errno));
goto out;
}
}
ret = 0;
out:
socket_set_nonblock(sockfd);
return ret;
}
static int coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
struct iovec *iov, int niov, int create,
enum AIOCBState aiocb_type);
@ -900,6 +957,10 @@ static int coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
hdr.flags = SD_FLAG_CMD_WRITE | flags;
}
if (s->cache_enabled) {
hdr.flags |= SD_FLAG_CMD_CACHE;
}
hdr.oid = oid;
hdr.cow_oid = old_oid;
hdr.copies = s->inode.nr_copies;
@ -942,7 +1003,7 @@ static int coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
static int read_write_object(int fd, char *buf, uint64_t oid, int copies,
unsigned int datalen, uint64_t offset,
int write, int create)
int write, int create, uint8_t cache)
{
SheepdogObjReq hdr;
SheepdogObjRsp *rsp = (SheepdogObjRsp *)&hdr;
@ -965,6 +1026,11 @@ static int read_write_object(int fd, char *buf, uint64_t oid, int copies,
rlen = datalen;
hdr.opcode = SD_OP_READ_OBJ;
}
if (cache) {
hdr.flags |= SD_FLAG_CMD_CACHE;
}
hdr.oid = oid;
hdr.data_length = datalen;
hdr.offset = offset;
@ -986,15 +1052,18 @@ static int read_write_object(int fd, char *buf, uint64_t oid, int copies,
}
static int read_object(int fd, char *buf, uint64_t oid, int copies,
unsigned int datalen, uint64_t offset)
unsigned int datalen, uint64_t offset, uint8_t cache)
{
return read_write_object(fd, buf, oid, copies, datalen, offset, 0, 0);
return read_write_object(fd, buf, oid, copies, datalen, offset, 0, 0,
cache);
}
static int write_object(int fd, char *buf, uint64_t oid, int copies,
unsigned int datalen, uint64_t offset, int create)
unsigned int datalen, uint64_t offset, int create,
uint8_t cache)
{
return read_write_object(fd, buf, oid, copies, datalen, offset, 1, create);
return read_write_object(fd, buf, oid, copies, datalen, offset, 1, create,
cache);
}
static int sd_open(BlockDriverState *bs, const char *filename, int flags)
@ -1026,6 +1095,15 @@ static int sd_open(BlockDriverState *bs, const char *filename, int flags)
goto out;
}
if (flags & BDRV_O_CACHE_WB) {
s->cache_enabled = 1;
s->flush_fd = connect_to_sdog(s->addr, s->port);
if (s->flush_fd < 0) {
error_report("failed to connect");
goto out;
}
}
if (snapid) {
dprintf("%" PRIx32 " snapshot inode was open.\n", vid);
s->is_snapshot = 1;
@ -1038,7 +1116,8 @@ static int sd_open(BlockDriverState *bs, const char *filename, int flags)
}
buf = g_malloc(SD_INODE_SIZE);
ret = read_object(fd, buf, vid_to_vdi_oid(vid), 0, SD_INODE_SIZE, 0);
ret = read_object(fd, buf, vid_to_vdi_oid(vid), 0, SD_INODE_SIZE, 0,
s->cache_enabled);
closesocket(fd);
@ -1272,6 +1351,9 @@ static void sd_close(BlockDriverState *bs)
qemu_aio_set_fd_handler(s->fd, NULL, NULL, NULL, NULL, NULL);
closesocket(s->fd);
if (s->cache_enabled) {
closesocket(s->flush_fd);
}
g_free(s->addr);
}
@ -1305,7 +1387,7 @@ static int sd_truncate(BlockDriverState *bs, int64_t offset)
datalen = SD_INODE_SIZE - sizeof(s->inode.data_vdi_id);
s->inode.vdi_size = offset;
ret = write_object(fd, (char *)&s->inode, vid_to_vdi_oid(s->inode.vdi_id),
s->inode.nr_copies, datalen, 0, 0);
s->inode.nr_copies, datalen, 0, 0, s->cache_enabled);
close(fd);
if (ret < 0) {
@ -1387,7 +1469,7 @@ static int sd_create_branch(BDRVSheepdogState *s)
}
ret = read_object(fd, buf, vid_to_vdi_oid(vid), s->inode.nr_copies,
SD_INODE_SIZE, 0);
SD_INODE_SIZE, 0, s->cache_enabled);
closesocket(fd);
@ -1575,6 +1657,36 @@ static coroutine_fn int sd_co_readv(BlockDriverState *bs, int64_t sector_num,
return acb->ret;
}
static int coroutine_fn sd_co_flush_to_disk(BlockDriverState *bs)
{
BDRVSheepdogState *s = bs->opaque;
SheepdogObjReq hdr = { 0 };
SheepdogObjRsp *rsp = (SheepdogObjRsp *)&hdr;
SheepdogInode *inode = &s->inode;
int ret;
unsigned int wlen = 0, rlen = 0;
if (!s->cache_enabled) {
return 0;
}
hdr.opcode = SD_OP_FLUSH_VDI;
hdr.oid = vid_to_vdi_oid(inode->vdi_id);
ret = do_co_req(s->flush_fd, (SheepdogReq *)&hdr, NULL, &wlen, &rlen);
if (ret) {
error_report("failed to send a request to the sheep");
return ret;
}
if (rsp->result != SD_RES_SUCCESS) {
error_report("%s", sd_strerror(rsp->result));
return -EIO;
}
return 0;
}
static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
{
BDRVSheepdogState *s = bs->opaque;
@ -1610,7 +1722,7 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
}
ret = write_object(fd, (char *)&s->inode, vid_to_vdi_oid(s->inode.vdi_id),
s->inode.nr_copies, datalen, 0, 0);
s->inode.nr_copies, datalen, 0, 0, s->cache_enabled);
if (ret < 0) {
error_report("failed to write snapshot's inode.");
ret = -EIO;
@ -1629,7 +1741,7 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
inode = (SheepdogInode *)g_malloc(datalen);
ret = read_object(fd, (char *)inode, vid_to_vdi_oid(new_vid),
s->inode.nr_copies, datalen, 0);
s->inode.nr_copies, datalen, 0, s->cache_enabled);
if (ret < 0) {
error_report("failed to read new inode info. %s", strerror(errno));
@ -1684,7 +1796,7 @@ static int sd_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
buf = g_malloc(SD_INODE_SIZE);
ret = read_object(fd, buf, vid_to_vdi_oid(vid), s->inode.nr_copies,
SD_INODE_SIZE, 0);
SD_INODE_SIZE, 0, s->cache_enabled);
closesocket(fd);
@ -1779,7 +1891,8 @@ static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)
/* we don't need to read entire object */
ret = read_object(fd, (char *)&inode, vid_to_vdi_oid(vid),
0, SD_INODE_SIZE - sizeof(inode.data_vdi_id), 0);
0, SD_INODE_SIZE - sizeof(inode.data_vdi_id), 0,
s->cache_enabled);
if (ret) {
continue;
@ -1835,10 +1948,12 @@ static int do_load_save_vmstate(BDRVSheepdogState *s, uint8_t *data,
create = (offset == 0);
if (load) {
ret = read_object(fd, (char *)data, vmstate_oid,
s->inode.nr_copies, data_len, offset);
s->inode.nr_copies, data_len, offset,
s->cache_enabled);
} else {
ret = write_object(fd, (char *)data, vmstate_oid,
s->inode.nr_copies, data_len, offset, create);
s->inode.nr_copies, data_len, offset, create,
s->cache_enabled);
}
if (ret < 0) {
@ -1904,6 +2019,7 @@ BlockDriver bdrv_sheepdog = {
.bdrv_co_readv = sd_co_readv,
.bdrv_co_writev = sd_co_writev,
.bdrv_co_flush_to_disk = sd_co_flush_to_disk,
.bdrv_snapshot_create = sd_snapshot_create,
.bdrv_snapshot_goto = sd_snapshot_goto,

View File

@ -76,6 +76,39 @@ static int coroutine_fn stream_populate(BlockDriverState *bs,
return bdrv_co_copy_on_readv(bs, sector_num, nb_sectors, &qiov);
}
static void close_unused_images(BlockDriverState *top, BlockDriverState *base,
const char *base_id)
{
BlockDriverState *intermediate;
intermediate = top->backing_hd;
while (intermediate) {
BlockDriverState *unused;
/* reached base */
if (intermediate == base) {
break;
}
unused = intermediate;
intermediate = intermediate->backing_hd;
unused->backing_hd = NULL;
bdrv_delete(unused);
}
top->backing_hd = base;
pstrcpy(top->backing_file, sizeof(top->backing_file), "");
pstrcpy(top->backing_format, sizeof(top->backing_format), "");
if (base_id) {
pstrcpy(top->backing_file, sizeof(top->backing_file), base_id);
if (base->drv) {
pstrcpy(top->backing_format, sizeof(top->backing_format),
base->drv->format_name);
}
}
}
/*
* Given an image chain: [BASE] -> [INTER1] -> [INTER2] -> [TOP]
*
@ -175,7 +208,7 @@ retry:
break;
}
s->common.busy = true;
if (base) {
ret = is_allocated_base(bs, base, sector_num,
STREAM_BUFFER_SIZE / BDRV_SECTOR_SIZE, &n);
@ -189,6 +222,7 @@ retry:
if (s->common.speed) {
uint64_t delay_ns = ratelimit_calculate_delay(&s->limit, n);
if (delay_ns > 0) {
s->common.busy = false;
co_sleep_ns(rt_clock, delay_ns);
/* Recheck cancellation and that sectors are unallocated */
@ -208,6 +242,7 @@ retry:
/* Note that even when no rate limit is applied we need to yield
* with no pending I/O here so that qemu_aio_flush() returns.
*/
s->common.busy = false;
co_sleep_ns(rt_clock, 0);
}
@ -215,12 +250,13 @@ retry:
bdrv_disable_copy_on_read(bs);
}
if (sector_num == end && ret == 0) {
if (!block_job_is_cancelled(&s->common) && sector_num == end && ret == 0) {
const char *base_id = NULL;
if (base) {
base_id = s->backing_file_id;
}
ret = bdrv_change_backing_file(bs, base_id, NULL);
close_unused_images(bs, base, base_id);
}
qemu_vfree(buf);
@ -234,7 +270,6 @@ static int stream_set_speed(BlockJob *job, int64_t value)
if (value < 0) {
return -EINVAL;
}
job->speed = value;
ratelimit_set_speed(&s->limit, value / BDRV_SECTOR_SIZE);
return 0;
}

View File

@ -143,29 +143,6 @@ void uuid_unparse(const uuid_t uu, char *out)
}
#endif
typedef struct {
BlockDriverAIOCB common;
int64_t sector_num;
QEMUIOVector *qiov;
uint8_t *buf;
/* Total number of sectors. */
int nb_sectors;
/* Number of sectors for current AIO. */
int n_sectors;
/* New allocated block map entry. */
uint32_t bmap_first;
uint32_t bmap_last;
/* Buffer for new allocated block. */
void *block_buffer;
void *orig_buf;
bool is_write;
int header_modified;
BlockDriverAIOCB *hd_aiocb;
struct iovec hd_iov;
QEMUIOVector hd_qiov;
QEMUBH *bh;
} VdiAIOCB;
typedef struct {
char text[0x40];
uint32_t signature;
@ -489,332 +466,150 @@ static int coroutine_fn vdi_co_is_allocated(BlockDriverState *bs,
return VDI_IS_ALLOCATED(bmap_entry);
}
static void vdi_aio_cancel(BlockDriverAIOCB *blockacb)
static int vdi_co_read(BlockDriverState *bs,
int64_t sector_num, uint8_t *buf, int nb_sectors)
{
/* TODO: This code is untested. How can I get it executed? */
VdiAIOCB *acb = container_of(blockacb, VdiAIOCB, common);
logout("\n");
if (acb->hd_aiocb) {
bdrv_aio_cancel(acb->hd_aiocb);
}
qemu_aio_release(acb);
}
static AIOPool vdi_aio_pool = {
.aiocb_size = sizeof(VdiAIOCB),
.cancel = vdi_aio_cancel,
};
static VdiAIOCB *vdi_aio_setup(BlockDriverState *bs, int64_t sector_num,
QEMUIOVector *qiov, int nb_sectors,
BlockDriverCompletionFunc *cb, void *opaque, int is_write)
{
VdiAIOCB *acb;
logout("%p, %" PRId64 ", %p, %d, %p, %p, %d\n",
bs, sector_num, qiov, nb_sectors, cb, opaque, is_write);
acb = qemu_aio_get(&vdi_aio_pool, bs, cb, opaque);
acb->hd_aiocb = NULL;
acb->sector_num = sector_num;
acb->qiov = qiov;
acb->is_write = is_write;
if (qiov->niov > 1) {
acb->buf = qemu_blockalign(bs, qiov->size);
acb->orig_buf = acb->buf;
if (is_write) {
qemu_iovec_to_buffer(qiov, acb->buf);
}
} else {
acb->buf = (uint8_t *)qiov->iov->iov_base;
}
acb->nb_sectors = nb_sectors;
acb->n_sectors = 0;
acb->bmap_first = VDI_UNALLOCATED;
acb->bmap_last = VDI_UNALLOCATED;
acb->block_buffer = NULL;
acb->header_modified = 0;
return acb;
}
static int vdi_schedule_bh(QEMUBHFunc *cb, VdiAIOCB *acb)
{
logout("\n");
if (acb->bh) {
return -EIO;
}
acb->bh = qemu_bh_new(cb, acb);
if (!acb->bh) {
return -EIO;
}
qemu_bh_schedule(acb->bh);
return 0;
}
static void vdi_aio_read_cb(void *opaque, int ret);
static void vdi_aio_write_cb(void *opaque, int ret);
static void vdi_aio_rw_bh(void *opaque)
{
VdiAIOCB *acb = opaque;
logout("\n");
qemu_bh_delete(acb->bh);
acb->bh = NULL;
if (acb->is_write) {
vdi_aio_write_cb(opaque, 0);
} else {
vdi_aio_read_cb(opaque, 0);
}
}
static void vdi_aio_read_cb(void *opaque, int ret)
{
VdiAIOCB *acb = opaque;
BlockDriverState *bs = acb->common.bs;
BDRVVdiState *s = bs->opaque;
uint32_t bmap_entry;
uint32_t block_index;
uint32_t sector_in_block;
uint32_t n_sectors;
logout("%u sectors read\n", acb->n_sectors);
acb->hd_aiocb = NULL;
if (ret < 0) {
goto done;
}
acb->nb_sectors -= acb->n_sectors;
if (acb->nb_sectors == 0) {
/* request completed */
ret = 0;
goto done;
}
acb->sector_num += acb->n_sectors;
acb->buf += acb->n_sectors * SECTOR_SIZE;
block_index = acb->sector_num / s->block_sectors;
sector_in_block = acb->sector_num % s->block_sectors;
n_sectors = s->block_sectors - sector_in_block;
if (n_sectors > acb->nb_sectors) {
n_sectors = acb->nb_sectors;
}
logout("will read %u sectors starting at sector %" PRIu64 "\n",
n_sectors, acb->sector_num);
/* prepare next AIO request */
acb->n_sectors = n_sectors;
bmap_entry = le32_to_cpu(s->bmap[block_index]);
if (!VDI_IS_ALLOCATED(bmap_entry)) {
/* Block not allocated, return zeros, no need to wait. */
memset(acb->buf, 0, n_sectors * SECTOR_SIZE);
ret = vdi_schedule_bh(vdi_aio_rw_bh, acb);
if (ret < 0) {
goto done;
}
} else {
uint64_t offset = s->header.offset_data / SECTOR_SIZE +
(uint64_t)bmap_entry * s->block_sectors +
sector_in_block;
acb->hd_iov.iov_base = (void *)acb->buf;
acb->hd_iov.iov_len = n_sectors * SECTOR_SIZE;
qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1);
acb->hd_aiocb = bdrv_aio_readv(bs->file, offset, &acb->hd_qiov,
n_sectors, vdi_aio_read_cb, acb);
}
return;
done:
if (acb->qiov->niov > 1) {
qemu_iovec_from_buffer(acb->qiov, acb->orig_buf, acb->qiov->size);
qemu_vfree(acb->orig_buf);
}
acb->common.cb(acb->common.opaque, ret);
qemu_aio_release(acb);
}
static BlockDriverAIOCB *vdi_aio_readv(BlockDriverState *bs,
int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
BlockDriverCompletionFunc *cb, void *opaque)
{
VdiAIOCB *acb;
int ret;
int ret = 0;
logout("\n");
acb = vdi_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
ret = vdi_schedule_bh(vdi_aio_rw_bh, acb);
if (ret < 0) {
if (acb->qiov->niov > 1) {
qemu_vfree(acb->orig_buf);
while (ret >= 0 && nb_sectors > 0) {
block_index = sector_num / s->block_sectors;
sector_in_block = sector_num % s->block_sectors;
n_sectors = s->block_sectors - sector_in_block;
if (n_sectors > nb_sectors) {
n_sectors = nb_sectors;
}
qemu_aio_release(acb);
return NULL;
logout("will read %u sectors starting at sector %" PRIu64 "\n",
n_sectors, sector_num);
/* prepare next AIO request */
bmap_entry = le32_to_cpu(s->bmap[block_index]);
if (!VDI_IS_ALLOCATED(bmap_entry)) {
/* Block not allocated, return zeros, no need to wait. */
memset(buf, 0, n_sectors * SECTOR_SIZE);
ret = 0;
} else {
uint64_t offset = s->header.offset_data / SECTOR_SIZE +
(uint64_t)bmap_entry * s->block_sectors +
sector_in_block;
ret = bdrv_read(bs->file, offset, buf, n_sectors);
}
logout("%u sectors read\n", n_sectors);
nb_sectors -= n_sectors;
sector_num += n_sectors;
buf += n_sectors * SECTOR_SIZE;
}
return &acb->common;
return ret;
}
static void vdi_aio_write_cb(void *opaque, int ret)
static int vdi_co_write(BlockDriverState *bs,
int64_t sector_num, const uint8_t *buf, int nb_sectors)
{
VdiAIOCB *acb = opaque;
BlockDriverState *bs = acb->common.bs;
BDRVVdiState *s = bs->opaque;
uint32_t bmap_entry;
uint32_t block_index;
uint32_t sector_in_block;
uint32_t n_sectors;
uint32_t bmap_first = VDI_UNALLOCATED;
uint32_t bmap_last = VDI_UNALLOCATED;
uint8_t *block = NULL;
int ret = 0;
acb->hd_aiocb = NULL;
logout("\n");
if (ret < 0) {
goto done;
}
while (ret >= 0 && nb_sectors > 0) {
block_index = sector_num / s->block_sectors;
sector_in_block = sector_num % s->block_sectors;
n_sectors = s->block_sectors - sector_in_block;
if (n_sectors > nb_sectors) {
n_sectors = nb_sectors;
}
acb->nb_sectors -= acb->n_sectors;
acb->sector_num += acb->n_sectors;
acb->buf += acb->n_sectors * SECTOR_SIZE;
logout("will write %u sectors starting at sector %" PRIu64 "\n",
n_sectors, sector_num);
if (acb->nb_sectors == 0) {
logout("finished data write\n");
acb->n_sectors = 0;
if (acb->header_modified) {
VdiHeader *header = acb->block_buffer;
logout("now writing modified header\n");
assert(VDI_IS_ALLOCATED(acb->bmap_first));
*header = s->header;
vdi_header_to_le(header);
acb->header_modified = 0;
acb->hd_iov.iov_base = acb->block_buffer;
acb->hd_iov.iov_len = SECTOR_SIZE;
qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1);
acb->hd_aiocb = bdrv_aio_writev(bs->file, 0, &acb->hd_qiov, 1,
vdi_aio_write_cb, acb);
return;
} else if (VDI_IS_ALLOCATED(acb->bmap_first)) {
/* One or more new blocks were allocated. */
/* prepare next AIO request */
bmap_entry = le32_to_cpu(s->bmap[block_index]);
if (!VDI_IS_ALLOCATED(bmap_entry)) {
/* Allocate new block and write to it. */
uint64_t offset;
uint32_t bmap_first;
uint32_t bmap_last;
g_free(acb->block_buffer);
acb->block_buffer = NULL;
bmap_first = acb->bmap_first;
bmap_last = acb->bmap_last;
logout("now writing modified block map entry %u...%u\n",
bmap_first, bmap_last);
/* Write modified sectors from block map. */
bmap_first /= (SECTOR_SIZE / sizeof(uint32_t));
bmap_last /= (SECTOR_SIZE / sizeof(uint32_t));
n_sectors = bmap_last - bmap_first + 1;
offset = s->bmap_sector + bmap_first;
acb->bmap_first = VDI_UNALLOCATED;
acb->hd_iov.iov_base = (void *)((uint8_t *)&s->bmap[0] +
bmap_first * SECTOR_SIZE);
acb->hd_iov.iov_len = n_sectors * SECTOR_SIZE;
qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1);
logout("will write %u block map sectors starting from entry %u\n",
n_sectors, bmap_first);
acb->hd_aiocb = bdrv_aio_writev(bs->file, offset, &acb->hd_qiov,
n_sectors, vdi_aio_write_cb, acb);
return;
bmap_entry = s->header.blocks_allocated;
s->bmap[block_index] = cpu_to_le32(bmap_entry);
s->header.blocks_allocated++;
offset = s->header.offset_data / SECTOR_SIZE +
(uint64_t)bmap_entry * s->block_sectors;
if (block == NULL) {
block = g_malloc(s->block_size);
bmap_first = block_index;
}
bmap_last = block_index;
/* Copy data to be written to new block and zero unused parts. */
memset(block, 0, sector_in_block * SECTOR_SIZE);
memcpy(block + sector_in_block * SECTOR_SIZE,
buf, n_sectors * SECTOR_SIZE);
memset(block + (sector_in_block + n_sectors) * SECTOR_SIZE, 0,
(s->block_sectors - n_sectors - sector_in_block) * SECTOR_SIZE);
ret = bdrv_write(bs->file, offset, block, s->block_sectors);
} else {
uint64_t offset = s->header.offset_data / SECTOR_SIZE +
(uint64_t)bmap_entry * s->block_sectors +
sector_in_block;
ret = bdrv_write(bs->file, offset, buf, n_sectors);
}
ret = 0;
goto done;
nb_sectors -= n_sectors;
sector_num += n_sectors;
buf += n_sectors * SECTOR_SIZE;
logout("%u sectors written\n", n_sectors);
}
logout("%u sectors written\n", acb->n_sectors);
block_index = acb->sector_num / s->block_sectors;
sector_in_block = acb->sector_num % s->block_sectors;
n_sectors = s->block_sectors - sector_in_block;
if (n_sectors > acb->nb_sectors) {
n_sectors = acb->nb_sectors;
}
logout("will write %u sectors starting at sector %" PRIu64 "\n",
n_sectors, acb->sector_num);
/* prepare next AIO request */
acb->n_sectors = n_sectors;
bmap_entry = le32_to_cpu(s->bmap[block_index]);
if (!VDI_IS_ALLOCATED(bmap_entry)) {
/* Allocate new block and write to it. */
uint64_t offset;
uint8_t *block;
bmap_entry = s->header.blocks_allocated;
s->bmap[block_index] = cpu_to_le32(bmap_entry);
s->header.blocks_allocated++;
offset = s->header.offset_data / SECTOR_SIZE +
(uint64_t)bmap_entry * s->block_sectors;
block = acb->block_buffer;
if (block == NULL) {
block = g_malloc(s->block_size);
acb->block_buffer = block;
acb->bmap_first = block_index;
assert(!acb->header_modified);
acb->header_modified = 1;
}
acb->bmap_last = block_index;
/* Copy data to be written to new block and zero unused parts. */
memset(block, 0, sector_in_block * SECTOR_SIZE);
memcpy(block + sector_in_block * SECTOR_SIZE,
acb->buf, n_sectors * SECTOR_SIZE);
memset(block + (sector_in_block + n_sectors) * SECTOR_SIZE, 0,
(s->block_sectors - n_sectors - sector_in_block) * SECTOR_SIZE);
acb->hd_iov.iov_base = (void *)block;
acb->hd_iov.iov_len = s->block_size;
qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1);
acb->hd_aiocb = bdrv_aio_writev(bs->file, offset,
&acb->hd_qiov, s->block_sectors,
vdi_aio_write_cb, acb);
} else {
uint64_t offset = s->header.offset_data / SECTOR_SIZE +
(uint64_t)bmap_entry * s->block_sectors +
sector_in_block;
acb->hd_iov.iov_base = (void *)acb->buf;
acb->hd_iov.iov_len = n_sectors * SECTOR_SIZE;
qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1);
acb->hd_aiocb = bdrv_aio_writev(bs->file, offset, &acb->hd_qiov,
n_sectors, vdi_aio_write_cb, acb);
}
return;
done:
if (acb->qiov->niov > 1) {
qemu_vfree(acb->orig_buf);
}
acb->common.cb(acb->common.opaque, ret);
qemu_aio_release(acb);
}
static BlockDriverAIOCB *vdi_aio_writev(BlockDriverState *bs,
int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
BlockDriverCompletionFunc *cb, void *opaque)
{
VdiAIOCB *acb;
int ret;
logout("\n");
acb = vdi_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
ret = vdi_schedule_bh(vdi_aio_rw_bh, acb);
logout("finished data write\n");
if (ret < 0) {
if (acb->qiov->niov > 1) {
qemu_vfree(acb->orig_buf);
}
qemu_aio_release(acb);
return NULL;
return ret;
}
return &acb->common;
if (block) {
/* One or more new blocks were allocated. */
VdiHeader *header = (VdiHeader *) block;
uint8_t *base;
uint64_t offset;
logout("now writing modified header\n");
assert(VDI_IS_ALLOCATED(bmap_first));
*header = s->header;
vdi_header_to_le(header);
ret = bdrv_write(bs->file, 0, block, 1);
g_free(block);
block = NULL;
if (ret < 0) {
return ret;
}
logout("now writing modified block map entry %u...%u\n",
bmap_first, bmap_last);
/* Write modified sectors from block map. */
bmap_first /= (SECTOR_SIZE / sizeof(uint32_t));
bmap_last /= (SECTOR_SIZE / sizeof(uint32_t));
n_sectors = bmap_last - bmap_first + 1;
offset = s->bmap_sector + bmap_first;
base = ((uint8_t *)&s->bmap[0]) + bmap_first * SECTOR_SIZE;
logout("will write %u block map sectors starting from entry %u\n",
n_sectors, bmap_first);
ret = bdrv_write(bs->file, offset, base, n_sectors);
}
return ret;
}
static int vdi_create(const char *filename, QEMUOptionParameter *options)
@ -930,13 +725,6 @@ static void vdi_close(BlockDriverState *bs)
error_free(s->migration_blocker);
}
static coroutine_fn int vdi_co_flush(BlockDriverState *bs)
{
logout("\n");
return bdrv_co_flush(bs->file);
}
static QEMUOptionParameter vdi_create_options[] = {
{
.name = BLOCK_OPT_SIZE,
@ -969,13 +757,12 @@ static BlockDriver bdrv_vdi = {
.bdrv_open = vdi_open,
.bdrv_close = vdi_close,
.bdrv_create = vdi_create,
.bdrv_co_flush_to_disk = vdi_co_flush,
.bdrv_co_is_allocated = vdi_co_is_allocated,
.bdrv_make_empty = vdi_make_empty,
.bdrv_aio_readv = vdi_aio_readv,
.bdrv_read = vdi_co_read,
#if defined(CONFIG_VDI_WRITE)
.bdrv_aio_writev = vdi_aio_writev,
.bdrv_write = vdi_co_write,
#endif
.bdrv_get_info = vdi_get_info,

View File

@ -1525,10 +1525,10 @@ static void vmdk_close(BlockDriverState *bs)
static coroutine_fn int vmdk_co_flush(BlockDriverState *bs)
{
int i, ret, err;
BDRVVmdkState *s = bs->opaque;
int i, err;
int ret = 0;
ret = bdrv_co_flush(bs->file);
for (i = 0; i < s->num_extents; i++) {
err = bdrv_co_flush(s->extents[i].file);
if (err < 0) {

View File

@ -189,6 +189,9 @@ static int vpc_open(BlockDriverState *bs, int flags)
fprintf(stderr, "block-vpc: The header checksum of '%s' is "
"incorrect.\n", bs->filename);
/* Write 'checksum' back to footer, or else will leave it with zero. */
footer->checksum = be32_to_cpu(checksum);
// The visible size of a image in Virtual PC depends on the geometry
// rather than on the size stored in the footer (the size in the footer
// is too large usually)
@ -507,11 +510,6 @@ static coroutine_fn int vpc_co_write(BlockDriverState *bs, int64_t sector_num,
return ret;
}
static coroutine_fn int vpc_co_flush(BlockDriverState *bs)
{
return bdrv_co_flush(bs->file);
}
/*
* Calculates the number of cylinders, heads and sectors per cylinder
* based on a given number of sectors. This is the algorithm described
@ -789,7 +787,6 @@ static BlockDriver bdrv_vpc = {
.bdrv_read = vpc_co_read,
.bdrv_write = vpc_co_write,
.bdrv_co_flush_to_disk = vpc_co_flush,
.create_options = vpc_create_options,
};

View File

@ -53,12 +53,6 @@
typedef struct BdrvTrackedRequest BdrvTrackedRequest;
typedef struct AIOPool {
void (*cancel)(BlockDriverAIOCB *acb);
int aiocb_size;
BlockDriverAIOCB *free_aiocb;
} AIOPool;
typedef struct BlockIOLimit {
int64_t bps[3];
int64_t iops[3];
@ -69,8 +63,13 @@ typedef struct BlockIOBaseValue {
uint64_t ios[2];
} BlockIOBaseValue;
typedef void BlockJobCancelFunc(void *opaque);
typedef struct BlockJob BlockJob;
/**
* BlockJobType:
*
* A class type for block job objects.
*/
typedef struct BlockJobType {
/** Derived BlockJob struct size */
size_t instance_size;
@ -83,19 +82,48 @@ typedef struct BlockJobType {
} BlockJobType;
/**
* Long-running operation on a BlockDriverState
* BlockJob:
*
* Long-running operation on a BlockDriverState.
*/
struct BlockJob {
/** The job type, including the job vtable. */
const BlockJobType *job_type;
/** The block device on which the job is operating. */
BlockDriverState *bs;
/**
* Set to true if the job should cancel itself. The flag must
* always be tested just before toggling the busy flag from false
* to true. After a job has detected that the cancelled flag is
* true, it should not anymore issue any I/O operation to the
* block device.
*/
bool cancelled;
/* These fields are published by the query-block-jobs QMP API */
/**
* Set to false by the job while it is in a quiescent state, where
* no I/O is pending and cancellation can be processed without
* issuing new I/O. The busy flag must be set to false when the
* job goes to sleep on any condition that is not detected by
* #qemu_aio_wait, such as a timer.
*/
bool busy;
/** Offset that is published by the query-block-jobs QMP API */
int64_t offset;
/** Length that is published by the query-block-jobs QMP API */
int64_t len;
/** Speed that was set with @block_job_set_speed. */
int64_t speed;
/** The completion function that will be called when the job completes. */
BlockDriverCompletionFunc *cb;
/** The opaque value that is passed to the completion function. */
void *opaque;
};
@ -302,20 +330,8 @@ struct BlockDriverState {
BlockJob *job;
};
struct BlockDriverAIOCB {
AIOPool *pool;
BlockDriverState *bs;
BlockDriverCompletionFunc *cb;
void *opaque;
BlockDriverAIOCB *next;
};
void get_tmp_filename(char *filename, int size);
void *qemu_aio_get(AIOPool *pool, BlockDriverState *bs,
BlockDriverCompletionFunc *cb, void *opaque);
void qemu_aio_release(void *p);
void bdrv_set_io_limits(BlockDriverState *bs,
BlockIOLimit *io_limits);
@ -323,13 +339,90 @@ void bdrv_set_io_limits(BlockDriverState *bs,
int is_windows_drive(const char *filename);
#endif
/**
* block_job_create:
* @job_type: The class object for the newly-created job.
* @bs: The block
* @cb: Completion function for the job.
* @opaque: Opaque pointer value passed to @cb.
*
* Create a new long-running block device job and return it. The job
* will call @cb asynchronously when the job completes. Note that
* @bs may have been closed at the time the @cb it is called. If
* this is the case, the job may be reported as either cancelled or
* completed.
*
* This function is not part of the public job interface; it should be
* called from a wrapper that is specific to the job type.
*/
void *block_job_create(const BlockJobType *job_type, BlockDriverState *bs,
BlockDriverCompletionFunc *cb, void *opaque);
/**
* block_job_complete:
* @job: The job being completed.
* @ret: The status code.
*
* Call the completion function that was registered at creation time, and
* free @job.
*/
void block_job_complete(BlockJob *job, int ret);
/**
* block_job_set_speed:
* @job: The job to set the speed for.
* @speed: The new value
*
* Set a rate-limiting parameter for the job; the actual meaning may
* vary depending on the job type.
*/
int block_job_set_speed(BlockJob *job, int64_t value);
/**
* block_job_cancel:
* @job: The job to be canceled.
*
* Asynchronously cancel the specified job.
*/
void block_job_cancel(BlockJob *job);
/**
* block_job_is_cancelled:
* @job: The job being queried.
*
* Returns whether the job is scheduled for cancellation.
*/
bool block_job_is_cancelled(BlockJob *job);
/**
* block_job_cancel:
* @job: The job to be canceled.
*
* Asynchronously cancel the job and wait for it to reach a quiescent
* state. Note that the completion callback will still be called
* asynchronously, hence it is *not* valid to call #bdrv_delete
* immediately after #block_job_cancel_sync. Users of block jobs
* will usually protect the BlockDriverState objects with a reference
* count, should this be a concern.
*/
void block_job_cancel_sync(BlockJob *job);
/**
* stream_start:
* @bs: Block device to operate on.
* @base: Block device that will become the new base, or %NULL to
* flatten the whole backing file chain onto @bs.
* @base_id: The file name that will be written to @bs as the new
* backing file if the job completes. Ignored if @base is %NULL.
* @cb: Completion function for the job.
* @opaque: Opaque pointer value passed to @cb.
*
* Start a streaming operation on @bs. Clusters that are unallocated
* in @bs, but allocated in any image between @base and @bs (both
* exclusive) will be written to @bs. At the end of a successful
* streaming job, the backing file of @bs will be changed to
* @base_id in the written image and to @base in the live BlockDriverState.
*/
int stream_start(BlockDriverState *bs, BlockDriverState *base,
const char *base_id, BlockDriverCompletionFunc *cb,
void *opaque);

View File

@ -64,6 +64,9 @@ void blockdev_mark_auto_del(BlockDriverState *bs)
{
DriveInfo *dinfo = drive_get_by_blockdev(bs);
if (bs->job) {
block_job_cancel(bs->job);
}
if (dinfo) {
dinfo->auto_del = 1;
}
@ -532,8 +535,9 @@ DriveInfo *drive_init(QemuOpts *opts, int default_to_scsi)
dinfo->unit = unit_id;
dinfo->opts = opts;
dinfo->refcount = 1;
if (serial)
strncpy(dinfo->serial, serial, sizeof(dinfo->serial) - 1);
if (serial) {
pstrcpy(dinfo->serial, sizeof(dinfo->serial), serial);
}
QTAILQ_INSERT_TAIL(&drives, dinfo, next);
bdrv_set_on_error(dinfo->bdrv, on_read_error, on_write_error);
@ -591,6 +595,10 @@ DriveInfo *drive_init(QemuOpts *opts, int default_to_scsi)
bdrv_flags |= BDRV_O_COPY_ON_READ;
}
if (runstate_check(RUN_STATE_INMIGRATE)) {
bdrv_flags |= BDRV_O_INCOMING;
}
if (media == MEDIA_CDROM) {
/* CDROM is fine for any interface, don't check. */
ro = 1;

View File

@ -8,7 +8,6 @@
*/
#include "dma.h"
#include "block_int.h"
#include "trace.h"
void qemu_sglist_init(QEMUSGList *qsg, int alloc_hint)
@ -42,7 +41,7 @@ typedef struct {
BlockDriverAIOCB *acb;
QEMUSGList *sg;
uint64_t sector_num;
bool to_dev;
DMADirection dir;
bool in_cancel;
int sg_cur_index;
dma_addr_t sg_cur_byte;
@ -76,7 +75,8 @@ static void dma_bdrv_unmap(DMAAIOCB *dbs)
for (i = 0; i < dbs->iov.niov; ++i) {
cpu_physical_memory_unmap(dbs->iov.iov[i].iov_base,
dbs->iov.iov[i].iov_len, !dbs->to_dev,
dbs->iov.iov[i].iov_len,
dbs->dir != DMA_DIRECTION_TO_DEVICE,
dbs->iov.iov[i].iov_len);
}
qemu_iovec_reset(&dbs->iov);
@ -123,7 +123,8 @@ static void dma_bdrv_cb(void *opaque, int ret)
while (dbs->sg_cur_index < dbs->sg->nsg) {
cur_addr = dbs->sg->sg[dbs->sg_cur_index].base + dbs->sg_cur_byte;
cur_len = dbs->sg->sg[dbs->sg_cur_index].len - dbs->sg_cur_byte;
mem = cpu_physical_memory_map(cur_addr, &cur_len, !dbs->to_dev);
mem = cpu_physical_memory_map(cur_addr, &cur_len,
dbs->dir != DMA_DIRECTION_TO_DEVICE);
if (!mem)
break;
qemu_iovec_add(&dbs->iov, mem, cur_len);
@ -170,11 +171,11 @@ static AIOPool dma_aio_pool = {
BlockDriverAIOCB *dma_bdrv_io(
BlockDriverState *bs, QEMUSGList *sg, uint64_t sector_num,
DMAIOFunc *io_func, BlockDriverCompletionFunc *cb,
void *opaque, bool to_dev)
void *opaque, DMADirection dir)
{
DMAAIOCB *dbs = qemu_aio_get(&dma_aio_pool, bs, cb, opaque);
trace_dma_bdrv_io(dbs, bs, sector_num, to_dev);
trace_dma_bdrv_io(dbs, bs, sector_num, (dir == DMA_DIRECTION_TO_DEVICE));
dbs->acb = NULL;
dbs->bs = bs;
@ -182,7 +183,7 @@ BlockDriverAIOCB *dma_bdrv_io(
dbs->sector_num = sector_num;
dbs->sg_cur_index = 0;
dbs->sg_cur_byte = 0;
dbs->to_dev = to_dev;
dbs->dir = dir;
dbs->io_func = io_func;
dbs->bh = NULL;
qemu_iovec_init(&dbs->iov, sg->nsg);
@ -195,14 +196,16 @@ BlockDriverAIOCB *dma_bdrv_read(BlockDriverState *bs,
QEMUSGList *sg, uint64_t sector,
void (*cb)(void *opaque, int ret), void *opaque)
{
return dma_bdrv_io(bs, sg, sector, bdrv_aio_readv, cb, opaque, false);
return dma_bdrv_io(bs, sg, sector, bdrv_aio_readv, cb, opaque,
DMA_DIRECTION_FROM_DEVICE);
}
BlockDriverAIOCB *dma_bdrv_write(BlockDriverState *bs,
QEMUSGList *sg, uint64_t sector,
void (*cb)(void *opaque, int ret), void *opaque)
{
return dma_bdrv_io(bs, sg, sector, bdrv_aio_writev, cb, opaque, true);
return dma_bdrv_io(bs, sg, sector, bdrv_aio_writev, cb, opaque,
DMA_DIRECTION_TO_DEVICE);
}

12
dma.h
View File

@ -16,6 +16,11 @@
typedef struct ScatterGatherEntry ScatterGatherEntry;
typedef enum {
DMA_DIRECTION_TO_DEVICE = 0,
DMA_DIRECTION_FROM_DEVICE = 1,
} DMADirection;
struct QEMUSGList {
ScatterGatherEntry *sg;
int nsg;
@ -28,11 +33,6 @@ typedef target_phys_addr_t dma_addr_t;
#define DMA_ADDR_FMT TARGET_FMT_plx
typedef enum {
DMA_DIRECTION_TO_DEVICE = 0,
DMA_DIRECTION_FROM_DEVICE = 1,
} DMADirection;
struct ScatterGatherEntry {
dma_addr_t base;
dma_addr_t len;
@ -50,7 +50,7 @@ typedef BlockDriverAIOCB *DMAIOFunc(BlockDriverState *bs, int64_t sector_num,
BlockDriverAIOCB *dma_bdrv_io(BlockDriverState *bs,
QEMUSGList *sg, uint64_t sector_num,
DMAIOFunc *io_func, BlockDriverCompletionFunc *cb,
void *opaque, bool to_dev);
void *opaque, DMADirection dir);
BlockDriverAIOCB *dma_bdrv_read(BlockDriverState *bs,
QEMUSGList *sg, uint64_t sector,
BlockDriverCompletionFunc *cb, void *opaque);

View File

@ -31,7 +31,6 @@
#include "sysemu.h"
#include "dma.h"
#include "blockdev.h"
#include "block_int.h"
#include <hw/ide/internal.h>
@ -101,7 +100,7 @@ static void ide_identify(IDEState *s)
put_le16(p + 21, 512); /* cache size in sectors */
put_le16(p + 22, 4); /* ecc bytes */
padstr((char *)(p + 23), s->version, 8); /* firmware version */
padstr((char *)(p + 27), "QEMU HARDDISK", 40); /* model */
padstr((char *)(p + 27), s->drive_model_str, 40); /* model */
#if MAX_MULT_SECTORS > 1
put_le16(p + 47, 0x8000 | MAX_MULT_SECTORS);
#endif
@ -143,17 +142,25 @@ static void ide_identify(IDEState *s)
put_le16(p + 82, (1 << 14) | (1 << 5) | 1);
/* 13=flush_cache_ext,12=flush_cache,10=lba48 */
put_le16(p + 83, (1 << 14) | (1 << 13) | (1 <<12) | (1 << 10));
/* 14=set to 1, 1=SMART self test, 0=SMART error logging */
put_le16(p + 84, (1 << 14) | 0);
/* 14=set to 1, 8=has WWN, 1=SMART self test, 0=SMART error logging */
if (s->wwn) {
put_le16(p + 84, (1 << 14) | (1 << 8) | 0);
} else {
put_le16(p + 84, (1 << 14) | 0);
}
/* 14 = NOP supported, 5=WCACHE enabled, 0=SMART feature set enabled */
if (bdrv_enable_write_cache(s->bs))
put_le16(p + 85, (1 << 14) | (1 << 5) | 1);
else
put_le16(p + 85, (1 << 14) | 1);
/* 13=flush_cache_ext,12=flush_cache,10=lba48 */
put_le16(p + 86, (1 << 14) | (1 << 13) | (1 <<12) | (1 << 10));
/* 14=set to 1, 1=smart self test, 0=smart error logging */
put_le16(p + 87, (1 << 14) | 0);
put_le16(p + 86, (1 << 13) | (1 <<12) | (1 << 10));
/* 14=set to 1, 8=has WWN, 1=SMART self test, 0=SMART error logging */
if (s->wwn) {
put_le16(p + 87, (1 << 14) | (1 << 8) | 0);
} else {
put_le16(p + 87, (1 << 14) | 0);
}
put_le16(p + 88, 0x3f | (1 << 13)); /* udma5 set and supported */
put_le16(p + 93, 1 | (1 << 14) | 0x2000);
put_le16(p + 100, s->nb_sectors);
@ -163,6 +170,13 @@ static void ide_identify(IDEState *s)
if (dev && dev->conf.physical_block_size)
put_le16(p + 106, 0x6000 | get_physical_block_exp(&dev->conf));
if (s->wwn) {
/* LE 16-bit words 111-108 contain 64-bit World Wide Name */
put_le16(p + 108, s->wwn >> 48);
put_le16(p + 109, s->wwn >> 32);
put_le16(p + 110, s->wwn >> 16);
put_le16(p + 111, s->wwn);
}
if (dev && dev->conf.discard_granularity) {
put_le16(p + 169, 1); /* TRIM support */
}
@ -189,7 +203,7 @@ static void ide_atapi_identify(IDEState *s)
put_le16(p + 21, 512); /* cache size in sectors */
put_le16(p + 22, 4); /* ecc bytes */
padstr((char *)(p + 23), s->version, 8); /* firmware version */
padstr((char *)(p + 27), "QEMU DVD-ROM", 40); /* model */
padstr((char *)(p + 27), s->drive_model_str, 40); /* model */
put_le16(p + 48, 1); /* dword I/O (XXX: should not be set on CDROM) */
#ifdef USE_DMA_CDROM
put_le16(p + 49, 1 << 9 | 1 << 8); /* DMA and LBA supported */
@ -246,7 +260,7 @@ static void ide_cfata_identify(IDEState *s)
padstr((char *)(p + 10), s->drive_serial_str, 20); /* serial number */
put_le16(p + 22, 0x0004); /* ECC bytes */
padstr((char *) (p + 23), s->version, 8); /* Firmware Revision */
padstr((char *) (p + 27), "QEMU MICRODRIVE", 40);/* Model number */
padstr((char *) (p + 27), s->drive_model_str, 40);/* Model number */
#if MAX_MULT_SECTORS > 1
put_le16(p + 47, 0x8000 | MAX_MULT_SECTORS);
#else
@ -604,7 +618,8 @@ void ide_dma_cb(void *opaque, int ret)
break;
case IDE_DMA_TRIM:
s->bus->dma->aiocb = dma_bdrv_io(s->bs, &s->sg, sector_num,
ide_issue_trim, ide_dma_cb, s, true);
ide_issue_trim, ide_dma_cb, s,
DMA_DIRECTION_TO_DEVICE);
break;
}
return;
@ -1834,7 +1849,8 @@ static const BlockDevOps ide_cd_block_ops = {
};
int ide_init_drive(IDEState *s, BlockDriverState *bs, IDEDriveKind kind,
const char *version, const char *serial)
const char *version, const char *serial, const char *model,
uint64_t wwn)
{
int cylinders, heads, secs;
uint64_t nb_sectors;
@ -1860,6 +1876,7 @@ int ide_init_drive(IDEState *s, BlockDriverState *bs, IDEDriveKind kind,
s->heads = heads;
s->sectors = secs;
s->nb_sectors = nb_sectors;
s->wwn = wwn;
/* The SMART values should be preserved across power cycles
but they aren't. */
s->smart_enabled = 1;
@ -1880,11 +1897,27 @@ int ide_init_drive(IDEState *s, BlockDriverState *bs, IDEDriveKind kind,
}
}
if (serial) {
strncpy(s->drive_serial_str, serial, sizeof(s->drive_serial_str));
pstrcpy(s->drive_serial_str, sizeof(s->drive_serial_str), serial);
} else {
snprintf(s->drive_serial_str, sizeof(s->drive_serial_str),
"QM%05d", s->drive_serial);
}
if (model) {
pstrcpy(s->drive_model_str, sizeof(s->drive_model_str), model);
} else {
switch (kind) {
case IDE_CD:
strcpy(s->drive_model_str, "QEMU DVD-ROM");
break;
case IDE_CFATA:
strcpy(s->drive_model_str, "QEMU MICRODRIVE");
break;
default:
strcpy(s->drive_model_str, "QEMU HARDDISK");
break;
}
}
if (version) {
pstrcpy(s->version, sizeof(s->version), version);
} else {
@ -1977,7 +2010,8 @@ void ide_init2_with_non_qdev_drives(IDEBus *bus, DriveInfo *hd0,
if (dinfo) {
if (ide_init_drive(&bus->ifs[i], dinfo->bdrv,
dinfo->media_cd ? IDE_CD : IDE_HD, NULL,
*dinfo->serial ? dinfo->serial : NULL) < 0) {
*dinfo->serial ? dinfo->serial : NULL,
NULL, 0) < 0) {
error_report("Can't set up IDE drive %s", dinfo->id);
exit(1);
}

View File

@ -348,6 +348,8 @@ struct IDEState {
uint8_t identify_data[512];
int drive_serial;
char drive_serial_str[21];
char drive_model_str[41];
uint64_t wwn;
/* ide regs */
uint8_t feature;
uint8_t error;
@ -468,6 +470,8 @@ struct IDEDevice {
BlockConf conf;
char *version;
char *serial;
char *model;
uint64_t wwn;
};
#define BM_STATUS_DMAING 0x01
@ -534,7 +538,8 @@ void ide_data_writel(void *opaque, uint32_t addr, uint32_t val);
uint32_t ide_data_readl(void *opaque, uint32_t addr);
int ide_init_drive(IDEState *s, BlockDriverState *bs, IDEDriveKind kind,
const char *version, const char *serial);
const char *version, const char *serial, const char *model,
uint64_t wwn);
void ide_init2(IDEBus *bus, qemu_irq irq);
void ide_init2_with_non_qdev_drives(IDEBus *bus, DriveInfo *hd0,
DriveInfo *hd1, qemu_irq irq);

View File

@ -149,7 +149,8 @@ static void pmac_ide_transfer_cb(void *opaque, int ret)
break;
case IDE_DMA_TRIM:
m->aiocb = dma_bdrv_io(s->bs, &s->sg, sector_num,
ide_issue_trim, pmac_ide_transfer_cb, s, true);
ide_issue_trim, pmac_ide_transfer_cb, s,
DMA_DIRECTION_TO_DEVICE);
break;
}
return;

View File

@ -136,7 +136,8 @@ static int ide_dev_initfn(IDEDevice *dev, IDEDriveKind kind)
}
}
if (ide_init_drive(s, dev->conf.bs, kind, dev->version, serial) < 0) {
if (ide_init_drive(s, dev->conf.bs, kind,
dev->version, serial, dev->model, dev->wwn) < 0) {
return -1;
}
@ -173,7 +174,9 @@ static int ide_drive_initfn(IDEDevice *dev)
#define DEFINE_IDE_DEV_PROPERTIES() \
DEFINE_BLOCK_PROPERTIES(IDEDrive, dev.conf), \
DEFINE_PROP_STRING("ver", IDEDrive, dev.version), \
DEFINE_PROP_STRING("serial", IDEDrive, dev.serial)
DEFINE_PROP_HEX64("wwn", IDEDrive, dev.wwn, 0), \
DEFINE_PROP_STRING("serial", IDEDrive, dev.serial),\
DEFINE_PROP_STRING("model", IDEDrive, dev.model)
static Property ide_hd_properties[] = {
DEFINE_IDE_DEV_PROPERTIES(),

View File

@ -15,7 +15,6 @@
#include "hw.h"
#include "pci.h"
#include "scsi.h"
#include "block_int.h"
#include "dma.h"
//#define DEBUG_LSI

View File

@ -877,6 +877,52 @@ PropertyInfo qdev_prop_pci_devfn = {
.max = 0xFFFFFFFFULL,
};
/* --- blocksize --- */
static void set_blocksize(Object *obj, Visitor *v, void *opaque,
const char *name, Error **errp)
{
DeviceState *dev = DEVICE(obj);
Property *prop = opaque;
int16_t *ptr = qdev_get_prop_ptr(dev, prop);
Error *local_err = NULL;
int64_t value;
if (dev->state != DEV_STATE_CREATED) {
error_set(errp, QERR_PERMISSION_DENIED);
return;
}
visit_type_int(v, &value, name, &local_err);
if (local_err) {
error_propagate(errp, local_err);
return;
}
if (value < prop->info->min || value > prop->info->max) {
error_set(errp, QERR_PROPERTY_VALUE_OUT_OF_RANGE,
dev->id?:"", name, value, prop->info->min,
prop->info->max);
return;
}
/* We rely on power-of-2 blocksizes for bitmasks */
if ((value & (value - 1)) != 0) {
error_set(errp, QERR_PROPERTY_VALUE_NOT_POWER_OF_2,
dev->id?:"", name, value);
return;
}
*ptr = value;
}
PropertyInfo qdev_prop_blocksize = {
.name = "blocksize",
.get = get_int16,
.set = set_blocksize,
.min = 512,
.max = 65024,
};
/* --- public helpers --- */
static Property *qdev_prop_walk(Property *props, const char *name)

View File

@ -223,6 +223,7 @@ extern PropertyInfo qdev_prop_drive;
extern PropertyInfo qdev_prop_netdev;
extern PropertyInfo qdev_prop_vlan;
extern PropertyInfo qdev_prop_pci_devfn;
extern PropertyInfo qdev_prop_blocksize;
#define DEFINE_PROP(_name, _state, _field, _prop, _type) { \
.name = (_name), \
@ -284,6 +285,8 @@ extern PropertyInfo qdev_prop_pci_devfn;
#define DEFINE_PROP_LOSTTICKPOLICY(_n, _s, _f, _d) \
DEFINE_PROP_DEFAULT(_n, _s, _f, _d, qdev_prop_losttickpolicy, \
LostTickPolicy)
#define DEFINE_PROP_BLOCKSIZE(_n, _s, _f, _d) \
DEFINE_PROP_DEFAULT(_n, _s, _f, _d, qdev_prop_blocksize, uint16_t)
#define DEFINE_PROP_END_OF_LIST() \
{}

View File

@ -9,7 +9,6 @@
*/
#include "qemu-common.h"
#include "qemu-aio.h"
#include "block_int.h"
#include "block/raw-posix-aio.h"
#include <sys/eventfd.h>

View File

@ -91,6 +91,7 @@ void process_incoming_migration(QEMUFile *f)
qemu_announce_self();
DPRINTF("successfully loaded vm state\n");
bdrv_clear_incoming_migration_all();
/* Make sure all file formats flush their mutable metadata */
bdrv_invalidate_cache_all();

View File

@ -17,6 +17,27 @@
#include "qemu-common.h"
#include "qemu-char.h"
typedef struct BlockDriverAIOCB BlockDriverAIOCB;
typedef void BlockDriverCompletionFunc(void *opaque, int ret);
typedef struct AIOPool {
void (*cancel)(BlockDriverAIOCB *acb);
int aiocb_size;
BlockDriverAIOCB *free_aiocb;
} AIOPool;
struct BlockDriverAIOCB {
AIOPool *pool;
BlockDriverState *bs;
BlockDriverCompletionFunc *cb;
void *opaque;
BlockDriverAIOCB *next;
};
void *qemu_aio_get(AIOPool *pool, BlockDriverState *bs,
BlockDriverCompletionFunc *cb, void *opaque);
void qemu_aio_release(void *p);
/* Returns 1 if there are still outstanding AIO requests; 0 otherwise */
typedef int (AioFlushHandler)(void *opaque);

View File

@ -428,6 +428,13 @@ static int img_check(int argc, char **argv)
}
}
if (result.bfi.total_clusters != 0 && result.bfi.allocated_clusters != 0) {
printf("%" PRId64 "/%" PRId64 "= %0.2f%% allocated, %0.2f%% fragmented\n",
result.bfi.allocated_clusters, result.bfi.total_clusters,
result.bfi.allocated_clusters * 100.0 / result.bfi.total_clusters,
result.bfi.fragmented_clusters * 100.0 / result.bfi.allocated_clusters);
}
bdrv_delete(bs);
if (ret < 0 || result.check_errors) {
@ -716,7 +723,7 @@ static int img_convert(int argc, char **argv)
ret = -1;
goto out;
}
qemu_progress_init(progress, 2.0);
qemu_progress_print(0, 100);
@ -1125,6 +1132,9 @@ static int img_info(int argc, char **argv)
if (bdi.cluster_size != 0) {
printf("cluster_size: %d\n", bdi.cluster_size);
}
if (bdi.is_dirty) {
printf("cleanly shut down: no\n");
}
}
bdrv_get_backing_filename(bs, backing_filename, sizeof(backing_filename));
if (backing_filename[0] != '\0') {

View File

@ -17,6 +17,7 @@
#include "qemu-common.h"
#include "block_int.h"
#include "cmd.h"
#include "trace/control.h"
#define VERSION "0.0.1"
@ -1783,6 +1784,7 @@ static void usage(const char *name)
" -g, --growable allow file to grow (only applies to protocols)\n"
" -m, --misalign misalign allocations for O_DIRECT\n"
" -k, --native-aio use kernel AIO implementation (on Linux only)\n"
" -T, --trace FILE enable trace events listed in the given file\n"
" -h, --help display this help and exit\n"
" -V, --version output version information and exit\n"
"\n",
@ -1794,7 +1796,7 @@ int main(int argc, char **argv)
{
int readonly = 0;
int growable = 0;
const char *sopt = "hVc:rsnmgk";
const char *sopt = "hVc:rsnmgkT:";
const struct option lopt[] = {
{ "help", 0, NULL, 'h' },
{ "version", 0, NULL, 'V' },
@ -1806,6 +1808,7 @@ int main(int argc, char **argv)
{ "misalign", 0, NULL, 'm' },
{ "growable", 0, NULL, 'g' },
{ "native-aio", 0, NULL, 'k' },
{ "trace", 1, NULL, 'T' },
{ NULL, 0, NULL, 0 }
};
int c;
@ -1837,6 +1840,11 @@ int main(int argc, char **argv)
case 'k':
flags |= BDRV_O_NATIVE_AIO;
break;
case 'T':
if (!trace_backend_init(optarg, NULL)) {
exit(1); /* error message will have been printed */
}
break;
case 'V':
printf("%s version %s\n", progname, VERSION);
exit(0);

View File

@ -240,10 +240,15 @@ static const QErrorStringTable qerror_table[] = {
.error_fmt = QERR_PROPERTY_VALUE_NOT_FOUND,
.desc = "Property '%(device).%(property)' can't find value '%(value)'",
},
{
.error_fmt = QERR_PROPERTY_VALUE_NOT_POWER_OF_2,
.desc = "Property '%(device).%(property)' doesn't take "
"value '%(value)', it's not a power of 2",
},
{
.error_fmt = QERR_PROPERTY_VALUE_OUT_OF_RANGE,
.desc = "Property '%(device).%(property)' doesn't take "
"value %(value) (minimum: %(min), maximum: %(max)'",
"value %(value) (minimum: %(min), maximum: %(max))",
},
{
.error_fmt = QERR_QGA_COMMAND_FAILED,

View File

@ -202,6 +202,10 @@ QError *qobject_to_qerror(const QObject *obj);
#define QERR_PROPERTY_VALUE_NOT_FOUND \
"{ 'class': 'PropertyValueNotFound', 'data': { 'device': %s, 'property': %s, 'value': %s } }"
#define QERR_PROPERTY_VALUE_NOT_POWER_OF_2 \
"{ 'class': 'PropertyValueNotPowerOf2', 'data': { " \
"'device': %s, 'property': %s, 'value': %"PRId64" } }"
#define QERR_PROPERTY_VALUE_OUT_OF_RANGE \
"{ 'class': 'PropertyValueOutOfRange', 'data': { 'device': %s, 'property': %s, 'value': %"PRId64", 'min': %"PRId64", 'max': %"PRId64" } }"

View File

@ -81,6 +81,10 @@ get_args()
args=${1#*\(}
args=${args%%\)*}
echo "$args"
if (echo "$args" | grep "[ *]next\($\|[, ]\)" > /dev/null 2>&1); then
echo -e "\n#error 'next' is a bad argument name (clash with systemtap keyword)\n "
fi
}
# Get the argument name list of a trace event

View File

@ -53,10 +53,10 @@ _make_test_img $size
echo
echo "creating pattern"
$QEMU_IO \
-c "write 2048k 4k -P 65" \
-c "write -P 65 2048k 4k" \
-c "write 4k 4k" \
-c "write 9M 4k" \
-c "read 2044k 8k -P 65 -s 4k -l 4k" \
-c "read -P 65 -s 4k -l 4k 2044k 8k" \
$TEST_IMG | _filter_qemu_io
echo

View File

@ -53,11 +53,11 @@ _make_test_img $size
echo
echo "creating pattern"
$QEMU_IO \
-c "write 2048k 4k -P 165" \
-c "write -P 165 2048k 4k" \
-c "write 64k 4k" \
-c "write 9M 4k" \
-c "write 2044k 4k -P 165" \
-c "write 8M 4k -P 99" \
-c "write -P 165 2044k 4k" \
-c "write -P 99 8M 4k" \
-c "read -P 165 2044k 8k" \
$TEST_IMG | _filter_qemu_io

View File

@ -60,7 +60,7 @@ for i in `seq 1 10`; do
# Note that we filter away the actual offset. That's because qemu
# may re-order the two aio requests. We only want to make sure the
# filesystem isn't corrupted afterwards anyway.
$QEMU_IO $TEST_IMG -c "aio_write $off1 1M" -c "aio_write $off2 1M" | \
$QEMU_IO -c "aio_write $off1 1M" -c "aio_write $off2 1M" $TEST_IMG | \
_filter_qemu_io | \
sed -e 's/bytes at offset [0-9]*/bytes at offset XXX/g'
done

72
tests/qemu-iotests/031 Executable file
View File

@ -0,0 +1,72 @@
#!/bin/bash
#
# Test that all qcow2 header extensions survive a header rewrite
#
# Copyright (C) 2011 Red Hat, Inc.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
# creator
owner=kwolf@redhat.com
seq=`basename $0`
echo "QA output created by $seq"
here=`pwd`
tmp=/tmp/$$
status=1 # failure is the default!
_cleanup()
{
_cleanup_test_img
}
trap "_cleanup; exit \$status" 0 1 2 3 15
# get standard environment, filters and checks
. ./common.rc
. ./common.filter
. ./common.pattern
# This tests qcow2-specific low-level functionality
_supported_fmt qcow2
_supported_proto generic
_supported_os Linux
CLUSTER_SIZE=65536
echo
echo === Create image with unknown header extension ===
echo
_make_test_img 64M
./qcow2.py $TEST_IMG add-header-ext 0x12345678 "This is a test header extension"
./qcow2.py $TEST_IMG dump-header
_check_test_img
echo
echo === Rewrite header with no backing file ===
echo
$QEMU_IMG rebase -u -b "" $TEST_IMG
./qcow2.py $TEST_IMG dump-header
_check_test_img
echo
echo === Add a backing file and format ===
echo
$QEMU_IMG rebase -u -b "/some/backing/file/path" -F host_device $TEST_IMG
./qcow2.py $TEST_IMG dump-header
# success, all done
echo "*** done"
rm -f $seq.full
status=0

View File

@ -0,0 +1,76 @@
QA output created by 031
=== Create image with unknown header extension ===
Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 cluster_size=65536
magic 0x514649fb
version 2
backing_file_offset 0x0
backing_file_size 0x0
cluster_bits 16
size 67108864
crypt_method 0
l1_size 1
l1_table_offset 0x30000
refcount_table_offset 0x10000
refcount_table_clusters 1
nb_snapshots 0
snapshot_offset 0x0
Header extension:
magic 0x12345678
length 31
data 'This is a test header extension'
No errors were found on the image.
=== Rewrite header with no backing file ===
magic 0x514649fb
version 2
backing_file_offset 0x0
backing_file_size 0x0
cluster_bits 16
size 67108864
crypt_method 0
l1_size 1
l1_table_offset 0x30000
refcount_table_offset 0x10000
refcount_table_clusters 1
nb_snapshots 0
snapshot_offset 0x0
Header extension:
magic 0x12345678
length 31
data 'This is a test header extension'
No errors were found on the image.
=== Add a backing file and format ===
magic 0x514649fb
version 2
backing_file_offset 0x90
backing_file_size 0x17
cluster_bits 16
size 67108864
crypt_method 0
l1_size 1
l1_table_offset 0x30000
refcount_table_offset 0x10000
refcount_table_clusters 1
nb_snapshots 0
snapshot_offset 0x0
Header extension:
magic 0xe2792aca
length 11
data 'host_device'
Header extension:
magic 0x12345678
length 31
data 'This is a test header extension'
*** done

View File

@ -57,16 +57,21 @@ _make_test_img()
{
# extra qemu-img options can be added by tests
# at least one argument (the image size) needs to be added
local extra_img_options=$*
local extra_img_options=""
local cluster_size_filter="s# cluster_size=[0-9]\\+##g"
local image_size=$*
if [ "$1" = "-b" ]; then
extra_img_options="$1 $2"
image_size=$3
fi
if [ \( "$IMGFMT" = "qcow2" -o "$IMGFMT" = "qed" \) -a -n "$CLUSTER_SIZE" ]; then
extra_img_options="-o cluster_size=$CLUSTER_SIZE $extra_img_options"
cluster_size_filter=""
fi
# XXX(hch): have global image options?
$QEMU_IMG create -f $IMGFMT $TEST_IMG $extra_img_options | \
$QEMU_IMG create -f $IMGFMT $extra_img_options $TEST_IMG $image_size | \
sed -e "s#$IMGPROTO:$TEST_DIR#TEST_DIR#g" | \
sed -e "s#$TEST_DIR#TEST_DIR#g" | \
sed -e "s#$IMGFMT#IMGFMT#g" | \

View File

@ -37,3 +37,4 @@
028 rw backing auto
029 rw auto quick
030 rw auto
031 rw auto quick

207
tests/qemu-iotests/qcow2.py Executable file
View File

@ -0,0 +1,207 @@
#!/usr/bin/env python
import sys
import struct
import string
class QcowHeaderExtension:
def __init__(self, magic, length, data):
self.magic = magic
self.length = length
self.data = data
@classmethod
def create(cls, magic, data):
return QcowHeaderExtension(magic, len(data), data)
class QcowHeader:
uint32_t = 'I'
uint64_t = 'Q'
fields = [
# Version 2 header fields
[ uint32_t, '%#x', 'magic' ],
[ uint32_t, '%d', 'version' ],
[ uint64_t, '%#x', 'backing_file_offset' ],
[ uint32_t, '%#x', 'backing_file_size' ],
[ uint32_t, '%d', 'cluster_bits' ],
[ uint64_t, '%d', 'size' ],
[ uint32_t, '%d', 'crypt_method' ],
[ uint32_t, '%d', 'l1_size' ],
[ uint64_t, '%#x', 'l1_table_offset' ],
[ uint64_t, '%#x', 'refcount_table_offset' ],
[ uint32_t, '%d', 'refcount_table_clusters' ],
[ uint32_t, '%d', 'nb_snapshots' ],
[ uint64_t, '%#x', 'snapshot_offset' ],
];
fmt = '>' + ''.join(field[0] for field in fields)
def __init__(self, fd):
buf_size = struct.calcsize(QcowHeader.fmt)
fd.seek(0)
buf = fd.read(buf_size)
header = struct.unpack(QcowHeader.fmt, buf)
self.__dict__ = dict((field[2], header[i])
for i, field in enumerate(QcowHeader.fields))
self.cluster_size = 1 << self.cluster_bits
fd.seek(self.get_header_length())
self.load_extensions(fd)
if self.backing_file_offset:
fd.seek(self.backing_file_offset)
self.backing_file = fd.read(self.backing_file_size)
else:
self.backing_file = None
def get_header_length(self):
if self.version == 2:
return 72
else:
raise Exception("version != 2 not supported")
def load_extensions(self, fd):
self.extensions = []
if self.backing_file_offset != 0:
end = min(self.cluster_size, self.backing_file_offset)
else:
end = self.cluster_size
while fd.tell() < end:
(magic, length) = struct.unpack('>II', fd.read(8))
if magic == 0:
break
else:
padded = (length + 7) & ~7
data = fd.read(padded)
self.extensions.append(QcowHeaderExtension(magic, length, data))
def update_extensions(self, fd):
fd.seek(self.get_header_length())
extensions = self.extensions
extensions.append(QcowHeaderExtension(0, 0, ""))
for ex in extensions:
buf = struct.pack('>II', ex.magic, ex.length)
fd.write(buf)
fd.write(ex.data)
if self.backing_file != None:
self.backing_file_offset = fd.tell()
fd.write(self.backing_file)
if fd.tell() > self.cluster_size:
raise Exception("I think I just broke the image...")
def update(self, fd):
header_bytes = self.get_header_length()
self.update_extensions(fd)
fd.seek(0)
header = tuple(self.__dict__[f] for t, p, f in QcowHeader.fields)
buf = struct.pack(QcowHeader.fmt, *header)
buf = buf[0:header_bytes-1]
fd.write(buf)
def dump(self):
for f in QcowHeader.fields:
print "%-25s" % f[2], f[1] % self.__dict__[f[2]]
print ""
def dump_extensions(self):
for ex in self.extensions:
data = ex.data[:ex.length]
if all(c in string.printable for c in data):
data = "'%s'" % data
else:
data = "<binary>"
print "Header extension:"
print "%-25s %#x" % ("magic", ex.magic)
print "%-25s %d" % ("length", ex.length)
print "%-25s %s" % ("data", data)
print ""
def cmd_dump_header(fd):
h = QcowHeader(fd)
h.dump()
h.dump_extensions()
def cmd_add_header_ext(fd, magic, data):
try:
magic = int(magic, 0)
except:
print "'%s' is not a valid magic number" % magic
sys.exit(1)
h = QcowHeader(fd)
h.extensions.append(QcowHeaderExtension.create(magic, data))
h.update(fd)
def cmd_del_header_ext(fd, magic):
try:
magic = int(magic, 0)
except:
print "'%s' is not a valid magic number" % magic
sys.exit(1)
h = QcowHeader(fd)
found = False
for ex in h.extensions:
if ex.magic == magic:
found = True
h.extensions.remove(ex)
if not found:
print "No such header extension"
return
h.update(fd)
cmds = [
[ 'dump-header', cmd_dump_header, 0, 'Dump image header and header extensions' ],
[ 'add-header-ext', cmd_add_header_ext, 2, 'Add a header extension' ],
[ 'del-header-ext', cmd_del_header_ext, 1, 'Delete a header extension' ],
]
def main(filename, cmd, args):
fd = open(filename, "r+b")
try:
for name, handler, num_args, desc in cmds:
if name != cmd:
continue
elif len(args) != num_args:
usage()
return
else:
handler(fd, *args)
return
print "Unknown command '%s'" % cmd
finally:
fd.close()
def usage():
print "Usage: %s <file> <cmd> [<arg>, ...]" % sys.argv[0]
print ""
print "Supported commands:"
for name, handler, num_args, desc in cmds:
print " %-20s - %s" % (name, desc)
if len(sys.argv) < 3:
usage()
sys.exit(1)
main(sys.argv[1], sys.argv[2], sys.argv[3:])

View File

@ -562,7 +562,7 @@ qemu_coroutine_terminate(void *co) "self %p"
# qemu-coroutine-lock.c
qemu_co_queue_next_bh(void) ""
qemu_co_queue_next(void *next) "next %p"
qemu_co_queue_next(void *nxt) "next %p"
qemu_co_mutex_lock_entry(void *mutex, void *self) "mutex %p self %p"
qemu_co_mutex_lock_return(void *mutex, void *self) "mutex %p self %p"
qemu_co_mutex_unlock_entry(void *mutex, void *self) "mutex %p self %p"