forked from vitalif/vitastor
Compare commits
1 Commits
epoch-dele
...
qemu-libvi
Author | SHA1 | Date | |
---|---|---|---|
590c157c32 |
@@ -10,25 +10,18 @@ function add_pg_history(new_pg_history, new_pg, prev_pgs, prev_pg_history, old_p
|
||||
if (!new_pg_history[new_pg])
|
||||
{
|
||||
new_pg_history[new_pg] = {
|
||||
osd_set_epochs: {},
|
||||
osd_sets: {},
|
||||
all_peers: {},
|
||||
epoch: 0,
|
||||
};
|
||||
}
|
||||
const nh = new_pg_history[new_pg], oh = prev_pg_history[old_pg];
|
||||
nh.osd_set_epochs[prev_pgs[old_pg].join(' ')] = { osd_set: prev_pgs[old_pg] };
|
||||
nh.osd_sets[prev_pgs[old_pg].join(' ')] = prev_pgs[old_pg];
|
||||
if (oh && oh.osd_sets && oh.osd_sets.length)
|
||||
{
|
||||
for (const pg of oh.osd_sets)
|
||||
{
|
||||
nh.osd_set_epochs[pg.join(' ')] = { osd_set: pg };
|
||||
}
|
||||
}
|
||||
if (oh && oh.osd_set_epochs && oh.osd_set_epochs.length)
|
||||
{
|
||||
for (const pg of oh.osd_set_epochs)
|
||||
{
|
||||
nh.osd_set_epochs[pg.osd_set.join(' ')] = { osd_set: pg.osd_set };
|
||||
nh.osd_sets[pg.join(' ')] = pg;
|
||||
}
|
||||
}
|
||||
if (oh && oh.all_peers && oh.all_peers.length)
|
||||
@@ -46,7 +39,7 @@ function add_pg_history(new_pg_history, new_pg, prev_pgs, prev_pg_history, old_p
|
||||
|
||||
function finish_pg_history(merged_history)
|
||||
{
|
||||
merged_history.osd_set_epochs = Object.values(merged_history.osd_set_epochs);
|
||||
merged_history.osd_sets = Object.values(merged_history.osd_sets);
|
||||
merged_history.all_peers = Object.values(merged_history.all_peers);
|
||||
}
|
||||
|
||||
|
19
mon/mon.js
19
mon/mon.js
@@ -276,12 +276,7 @@ const etcd_tree = {
|
||||
history: {
|
||||
/* <pool_id>: {
|
||||
<pg_id>: {
|
||||
osd_set_epochs: {
|
||||
osd_set: osd_num_t[],
|
||||
min_epoch: uint64_t,
|
||||
max_epoch: uint64_t,
|
||||
}[],
|
||||
osd_sets: osd_num_t[][], // outdated
|
||||
osd_sets: osd_num_t[][],
|
||||
all_peers: osd_num_t[],
|
||||
epoch: uint64_t,
|
||||
},
|
||||
@@ -952,6 +947,18 @@ class Mon
|
||||
osd_set,
|
||||
primary: this.pick_primary(pool_id, osd_set, up_osds, aff_osds),
|
||||
};
|
||||
if (prev_pgs[i] && prev_pgs[i].join(' ') != osd_set.join(' ') &&
|
||||
prev_pgs[i].filter(osd_num => osd_num).length > 0)
|
||||
{
|
||||
pg_history[i] = pg_history[i] || {};
|
||||
pg_history[i].osd_sets = pg_history[i].osd_sets || [];
|
||||
pg_history[i].osd_sets.push(prev_pgs[i]);
|
||||
}
|
||||
if (pg_history[i] && pg_history[i].osd_sets)
|
||||
{
|
||||
pg_history[i].osd_sets = Object.values(pg_history[i].osd_sets
|
||||
.reduce((a, c) => { a[c.join(' ')] = c; return a; }, {}));
|
||||
}
|
||||
});
|
||||
for (let i = 0; i < new_pgs.length || i < prev_pgs.length; i++)
|
||||
{
|
||||
|
659
patches/libvirt-6.0.0-vitastor.diff
Normal file
659
patches/libvirt-6.0.0-vitastor.diff
Normal file
@@ -0,0 +1,659 @@
|
||||
commit 7f01510ef207940b07fac4f5fc8b9f1580b443aa
|
||||
Author: Vitaliy Filippov <vitalif@yourcmc.ru>
|
||||
Date: Sun Jun 27 12:52:40 2021 +0300
|
||||
|
||||
Add Vitastor support
|
||||
|
||||
diff --git a/docs/schemas/domaincommon.rng b/docs/schemas/domaincommon.rng
|
||||
index aa50eac..082b4f8 100644
|
||||
--- a/docs/schemas/domaincommon.rng
|
||||
+++ b/docs/schemas/domaincommon.rng
|
||||
@@ -1766,6 +1766,35 @@
|
||||
</element>
|
||||
</define>
|
||||
|
||||
+ <define name="diskSourceNetworkProtocolVitastor">
|
||||
+ <element name="source">
|
||||
+ <interleave>
|
||||
+ <attribute name="protocol">
|
||||
+ <value>vitastor</value>
|
||||
+ </attribute>
|
||||
+ <ref name="diskSourceCommon"/>
|
||||
+ <optional>
|
||||
+ <attribute name="name"/>
|
||||
+ </optional>
|
||||
+ <optional>
|
||||
+ <attribute name="query"/>
|
||||
+ </optional>
|
||||
+ <zeroOrMore>
|
||||
+ <ref name="diskSourceNetworkHost"/>
|
||||
+ </zeroOrMore>
|
||||
+ <optional>
|
||||
+ <element name="config">
|
||||
+ <attribute name="file">
|
||||
+ <ref name="absFilePath"/>
|
||||
+ </attribute>
|
||||
+ <empty/>
|
||||
+ </element>
|
||||
+ </optional>
|
||||
+ <empty/>
|
||||
+ </interleave>
|
||||
+ </element>
|
||||
+ </define>
|
||||
+
|
||||
<define name="diskSourceNetworkProtocolISCSI">
|
||||
<element name="source">
|
||||
<attribute name="protocol">
|
||||
@@ -1891,6 +1920,7 @@
|
||||
<ref name="diskSourceNetworkProtocolHTTP"/>
|
||||
<ref name="diskSourceNetworkProtocolSimple"/>
|
||||
<ref name="diskSourceNetworkProtocolVxHS"/>
|
||||
+ <ref name="diskSourceNetworkProtocolVitastor"/>
|
||||
</choice>
|
||||
</define>
|
||||
|
||||
diff --git a/include/libvirt/libvirt-storage.h b/include/libvirt/libvirt-storage.h
|
||||
index 4bf2b5f..dbc011b 100644
|
||||
--- a/include/libvirt/libvirt-storage.h
|
||||
+++ b/include/libvirt/libvirt-storage.h
|
||||
@@ -245,6 +245,7 @@ typedef enum {
|
||||
VIR_CONNECT_LIST_STORAGE_POOLS_ZFS = 1 << 17,
|
||||
VIR_CONNECT_LIST_STORAGE_POOLS_VSTORAGE = 1 << 18,
|
||||
VIR_CONNECT_LIST_STORAGE_POOLS_ISCSI_DIRECT = 1 << 19,
|
||||
+ VIR_CONNECT_LIST_STORAGE_POOLS_VITASTOR = 1 << 20,
|
||||
} virConnectListAllStoragePoolsFlags;
|
||||
|
||||
int virConnectListAllStoragePools(virConnectPtr conn,
|
||||
diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c
|
||||
index 222bb8c..2c30c55 100644
|
||||
--- a/src/conf/domain_conf.c
|
||||
+++ b/src/conf/domain_conf.c
|
||||
@@ -5114,8 +5114,7 @@ virDomainDiskDefPostParse(virDomainDiskD
|
||||
const virDomainDef *def,
|
||||
virDomainXMLOptionPtr xmlopt)
|
||||
{
|
||||
- /* internal snapshots and config files are currently supported
|
||||
- * only with rbd: */
|
||||
+ /* internal snapshots are currently supported only with rbd: */
|
||||
if (virStorageSourceGetActualType(disk->src) != VIR_STORAGE_TYPE_NETWORK &&
|
||||
disk->src->protocol != VIR_STORAGE_NET_PROTOCOL_RBD) {
|
||||
if (disk->src->snapshot) {
|
||||
@@ -5124,11 +5123,15 @@ virDomainDiskDefPostParse(virDomainDiskD
|
||||
"only with 'rbd' disks"));
|
||||
return -1;
|
||||
}
|
||||
-
|
||||
+ }
|
||||
+ /* config files are currently supported only with rbd and vitastor: */
|
||||
+ if (virStorageSourceGetActualType(disk->src) != VIR_STORAGE_TYPE_NETWORK &&
|
||||
+ disk->src->protocol != VIR_STORAGE_NET_PROTOCOL_RBD &&
|
||||
+ disk->src->protocol != VIR_STORAGE_NET_PROTOCOL_VITASTOR) {
|
||||
if (disk->src->configFile) {
|
||||
virReportError(VIR_ERR_XML_ERROR, "%s",
|
||||
_("<config> element is currently supported "
|
||||
- "only with 'rbd' disks"));
|
||||
+ "only with 'rbd' and 'vitastor' disks"));
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
@@ -9258,6 +9261,10 @@ virDomainDiskSourceNetworkParse(xmlNodeP
|
||||
return -1;
|
||||
}
|
||||
|
||||
+ if (src->protocol == VIR_STORAGE_NET_PROTOCOL_VITASTOR) {
|
||||
+ src->relPath = virXMLPropString(node, "query");
|
||||
+ }
|
||||
+
|
||||
if ((haveTLS = virXMLPropString(node, "tls")) &&
|
||||
(src->haveTLS = virTristateBoolTypeFromString(haveTLS)) <= 0) {
|
||||
virReportError(VIR_ERR_XML_ERROR,
|
||||
@@ -9303,6 +9310,10 @@ virDomainDiskSourceNetworkParse(xmlNodeP
|
||||
/* config file currently only works with remote disks */
|
||||
src->configFile = virXPathString("string(./config/@file)", ctxt);
|
||||
|
||||
+ if (src->protocol == VIR_STORAGE_NET_PROTOCOL_HTTP ||
|
||||
+ src->protocol == VIR_STORAGE_NET_PROTOCOL_HTTPS)
|
||||
+ src->query = virXMLPropString(node, "query");
|
||||
+
|
||||
if (virDomainStorageNetworkParseHosts(node, &src->hosts, &src->nhosts) < 0)
|
||||
return -1;
|
||||
|
||||
@@ -24141,6 +24152,10 @@ virDomainDiskSourceFormatNetwork(virBuff
|
||||
|
||||
virBufferEscapeString(attrBuf, " name='%s'", path ? path : src->path);
|
||||
|
||||
+ if (src->protocol == VIR_STORAGE_NET_PROTOCOL_VITASTOR && src->relPath != NULL) {
|
||||
+ virBufferEscapeString(attrBuf, " query='%s'", src->relPath);
|
||||
+ }
|
||||
+
|
||||
if (src->haveTLS != VIR_TRISTATE_BOOL_ABSENT &&
|
||||
!(flags & VIR_DOMAIN_DEF_FORMAT_MIGRATABLE &&
|
||||
src->tlsFromConfig))
|
||||
@@ -31402,6 +31417,7 @@ virDomainDiskTranslateSourcePool(virDomainDiskDefPtr def)
|
||||
|
||||
case VIR_STORAGE_POOL_MPATH:
|
||||
case VIR_STORAGE_POOL_RBD:
|
||||
+ case VIR_STORAGE_POOL_VITASTOR:
|
||||
case VIR_STORAGE_POOL_SHEEPDOG:
|
||||
case VIR_STORAGE_POOL_GLUSTER:
|
||||
case VIR_STORAGE_POOL_LAST:
|
||||
diff --git a/src/conf/storage_conf.c b/src/conf/storage_conf.c
|
||||
index 55db7a9..7cbe937 100644
|
||||
--- a/src/conf/storage_conf.c
|
||||
+++ b/src/conf/storage_conf.c
|
||||
@@ -59,7 +59,7 @@ VIR_ENUM_IMPL(virStoragePool,
|
||||
"logical", "disk", "iscsi",
|
||||
"iscsi-direct", "scsi", "mpath",
|
||||
"rbd", "sheepdog", "gluster",
|
||||
- "zfs", "vstorage",
|
||||
+ "zfs", "vstorage", "vitastor",
|
||||
);
|
||||
|
||||
VIR_ENUM_IMPL(virStoragePoolFormatFileSystem,
|
||||
@@ -248,6 +248,18 @@ static virStoragePoolTypeInfo poolTypeInfo[] = {
|
||||
.formatToString = virStorageFileFormatTypeToString,
|
||||
}
|
||||
},
|
||||
+ {.poolType = VIR_STORAGE_POOL_VITASTOR,
|
||||
+ .poolOptions = {
|
||||
+ .flags = (VIR_STORAGE_POOL_SOURCE_HOST |
|
||||
+ VIR_STORAGE_POOL_SOURCE_NETWORK |
|
||||
+ VIR_STORAGE_POOL_SOURCE_NAME),
|
||||
+ },
|
||||
+ .volOptions = {
|
||||
+ .defaultFormat = VIR_STORAGE_FILE_RAW,
|
||||
+ .formatFromString = virStorageVolumeFormatFromString,
|
||||
+ .formatToString = virStorageFileFormatTypeToString,
|
||||
+ }
|
||||
+ },
|
||||
{.poolType = VIR_STORAGE_POOL_SHEEPDOG,
|
||||
.poolOptions = {
|
||||
.flags = (VIR_STORAGE_POOL_SOURCE_HOST |
|
||||
@@ -550,6 +562,11 @@ virStoragePoolDefParseSource(xmlXPathContextPtr ctxt,
|
||||
_("element 'name' is mandatory for RBD pool"));
|
||||
goto cleanup;
|
||||
}
|
||||
+ if (pool_type == VIR_STORAGE_POOL_VITASTOR && source->name == NULL) {
|
||||
+ virReportError(VIR_ERR_XML_ERROR, "%s",
|
||||
+ _("element 'name' is mandatory for Vitastor pool"));
|
||||
+ return -1;
|
||||
+ }
|
||||
|
||||
if (options->formatFromString) {
|
||||
char *format = virXPathString("string(./format/@type)", ctxt);
|
||||
@@ -1173,6 +1190,7 @@ virStoragePoolDefFormatBuf(virBufferPtr buf,
|
||||
/* RBD, Sheepdog, Gluster and Iscsi-direct devices are not local block devs nor
|
||||
* files, so they don't have a target */
|
||||
if (def->type != VIR_STORAGE_POOL_RBD &&
|
||||
+ def->type != VIR_STORAGE_POOL_VITASTOR &&
|
||||
def->type != VIR_STORAGE_POOL_SHEEPDOG &&
|
||||
def->type != VIR_STORAGE_POOL_GLUSTER &&
|
||||
def->type != VIR_STORAGE_POOL_ISCSI_DIRECT) {
|
||||
diff --git a/src/conf/storage_conf.h b/src/conf/storage_conf.h
|
||||
index dc0aa2a..ed4983d 100644
|
||||
--- a/src/conf/storage_conf.h
|
||||
+++ b/src/conf/storage_conf.h
|
||||
@@ -110,6 +110,7 @@ typedef enum {
|
||||
VIR_STORAGE_POOL_GLUSTER, /* Gluster device */
|
||||
VIR_STORAGE_POOL_ZFS, /* ZFS */
|
||||
VIR_STORAGE_POOL_VSTORAGE, /* Virtuozzo Storage */
|
||||
+ VIR_STORAGE_POOL_VITASTOR, /* Vitastor */
|
||||
|
||||
VIR_STORAGE_POOL_LAST,
|
||||
} virStoragePoolType;
|
||||
@@ -466,6 +467,7 @@ VIR_ENUM_DECL(virStoragePartedFs)
|
||||
VIR_CONNECT_LIST_STORAGE_POOLS_SCSI | \
|
||||
VIR_CONNECT_LIST_STORAGE_POOLS_MPATH | \
|
||||
VIR_CONNECT_LIST_STORAGE_POOLS_RBD | \
|
||||
+ VIR_CONNECT_LIST_STORAGE_POOLS_VITASTOR | \
|
||||
VIR_CONNECT_LIST_STORAGE_POOLS_SHEEPDOG | \
|
||||
VIR_CONNECT_LIST_STORAGE_POOLS_GLUSTER | \
|
||||
VIR_CONNECT_LIST_STORAGE_POOLS_ZFS | \
|
||||
diff --git a/src/conf/virstorageobj.c b/src/conf/virstorageobj.c
|
||||
index 6ea6a97..3ba45b9 100644
|
||||
--- a/src/conf/virstorageobj.c
|
||||
+++ b/src/conf/virstorageobj.c
|
||||
@@ -1493,6 +1493,7 @@ virStoragePoolObjSourceFindDuplicateCb(const void *payload,
|
||||
return 1;
|
||||
break;
|
||||
|
||||
+ case VIR_STORAGE_POOL_VITASTOR:
|
||||
case VIR_STORAGE_POOL_RBD:
|
||||
case VIR_STORAGE_POOL_LAST:
|
||||
break;
|
||||
@@ -1994,6 +1995,8 @@ virStoragePoolObjMatch(virStoragePoolObjPtr obj,
|
||||
(obj->def->type == VIR_STORAGE_POOL_MPATH)) ||
|
||||
(MATCH(VIR_CONNECT_LIST_STORAGE_POOLS_RBD) &&
|
||||
(obj->def->type == VIR_STORAGE_POOL_RBD)) ||
|
||||
+ (MATCH(VIR_CONNECT_LIST_STORAGE_POOLS_VITASTOR) &&
|
||||
+ (obj->def->type == VIR_STORAGE_POOL_VITASTOR)) ||
|
||||
(MATCH(VIR_CONNECT_LIST_STORAGE_POOLS_SHEEPDOG) &&
|
||||
(obj->def->type == VIR_STORAGE_POOL_SHEEPDOG)) ||
|
||||
(MATCH(VIR_CONNECT_LIST_STORAGE_POOLS_GLUSTER) &&
|
||||
diff --git a/src/libvirt-storage.c b/src/libvirt-storage.c
|
||||
index 2ea3e94..d5d2273 100644
|
||||
--- a/src/libvirt-storage.c
|
||||
+++ b/src/libvirt-storage.c
|
||||
@@ -92,6 +92,7 @@ virStoragePoolGetConnect(virStoragePoolPtr pool)
|
||||
* VIR_CONNECT_LIST_STORAGE_POOLS_SCSI
|
||||
* VIR_CONNECT_LIST_STORAGE_POOLS_MPATH
|
||||
* VIR_CONNECT_LIST_STORAGE_POOLS_RBD
|
||||
+ * VIR_CONNECT_LIST_STORAGE_POOLS_VITASTOR
|
||||
* VIR_CONNECT_LIST_STORAGE_POOLS_SHEEPDOG
|
||||
*
|
||||
* Returns the number of storage pools found or -1 and sets @pools to
|
||||
diff --git a/src/libxl/libxl_conf.c b/src/libxl/libxl_conf.c
|
||||
index 73e988a..ab7bb81 100644
|
||||
--- a/src/libxl/libxl_conf.c
|
||||
+++ b/src/libxl/libxl_conf.c
|
||||
@@ -888,6 +888,7 @@ libxlMakeNetworkDiskSrcStr(virStorageSourcePtr src,
|
||||
case VIR_STORAGE_NET_PROTOCOL_SHEEPDOG:
|
||||
case VIR_STORAGE_NET_PROTOCOL_SSH:
|
||||
case VIR_STORAGE_NET_PROTOCOL_VXHS:
|
||||
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
|
||||
case VIR_STORAGE_NET_PROTOCOL_LAST:
|
||||
case VIR_STORAGE_NET_PROTOCOL_NONE:
|
||||
virReportError(VIR_ERR_NO_SUPPORT,
|
||||
diff --git a/src/libxl/xen_xl.c b/src/libxl/xen_xl.c
|
||||
index 17b93d0..c5a0084 100644
|
||||
--- a/src/libxl/xen_xl.c
|
||||
+++ b/src/libxl/xen_xl.c
|
||||
@@ -1601,6 +1601,7 @@ xenFormatXLDiskSrcNet(virStorageSourcePtr src)
|
||||
case VIR_STORAGE_NET_PROTOCOL_SHEEPDOG:
|
||||
case VIR_STORAGE_NET_PROTOCOL_SSH:
|
||||
case VIR_STORAGE_NET_PROTOCOL_VXHS:
|
||||
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
|
||||
case VIR_STORAGE_NET_PROTOCOL_LAST:
|
||||
case VIR_STORAGE_NET_PROTOCOL_NONE:
|
||||
virReportError(VIR_ERR_NO_SUPPORT,
|
||||
diff --git a/src/qemu/qemu_block.c b/src/qemu/qemu_block.c
|
||||
index cbf0aa4..f0ca9e7 100644
|
||||
--- a/src/qemu/qemu_block.c
|
||||
+++ b/src/qemu/qemu_block.c
|
||||
@@ -869,6 +869,42 @@ qemuBlockStorageSourceGetRBDProps(virStorageSourcePtr src)
|
||||
}
|
||||
|
||||
|
||||
+static virJSONValuePtr
|
||||
+qemuBlockStorageSourceGetVitastorProps(virStorageSource *src)
|
||||
+{
|
||||
+ virJSONValuePtr ret = NULL;
|
||||
+ virStorageNetHostDefPtr host;
|
||||
+ size_t i;
|
||||
+ virBuffer buf = VIR_BUFFER_INITIALIZER;
|
||||
+ char *etcd = NULL;
|
||||
+
|
||||
+ for (i = 0; i < src->nhosts; i++) {
|
||||
+ host = src->hosts + i;
|
||||
+ if ((virStorageNetHostTransport)host->transport != VIR_STORAGE_NET_HOST_TRANS_TCP) {
|
||||
+ goto cleanup;
|
||||
+ }
|
||||
+ virBufferAsprintf(&buf, i > 0 ? ",%s:%u" : "%s:%u", host->name, host->port);
|
||||
+ }
|
||||
+ if (src->nhosts > 0) {
|
||||
+ etcd = virBufferContentAndReset(&buf);
|
||||
+ }
|
||||
+
|
||||
+ if (virJSONValueObjectCreate(&ret,
|
||||
+ "s:driver", "vitastor",
|
||||
+ "S:etcd-host", etcd,
|
||||
+ "S:etcd-prefix", src->relPath,
|
||||
+ "S:config-path", src->configFile,
|
||||
+ "s:image", src->path,
|
||||
+ NULL) < 0)
|
||||
+ goto cleanup;
|
||||
+
|
||||
+cleanup:
|
||||
+ VIR_FREE(etcd);
|
||||
+ virBufferFreeAndReset(&buf);
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
+
|
||||
static virJSONValuePtr
|
||||
qemuBlockStorageSourceGetSheepdogProps(virStorageSourcePtr src)
|
||||
{
|
||||
@@ -1130,6 +1166,11 @@ qemuBlockStorageSourceGetBackendProps(virStorageSourcePtr src,
|
||||
return NULL;
|
||||
break;
|
||||
|
||||
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
|
||||
+ if (!(fileprops = qemuBlockStorageSourceGetVitastorProps(src)))
|
||||
+ return NULL;
|
||||
+ break;
|
||||
+
|
||||
case VIR_STORAGE_NET_PROTOCOL_SHEEPDOG:
|
||||
if (!(fileprops = qemuBlockStorageSourceGetSheepdogProps(src)))
|
||||
return NULL;
|
||||
diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c
|
||||
index 822d5f8..abec34e 100644
|
||||
--- a/src/qemu/qemu_command.c
|
||||
+++ b/src/qemu/qemu_command.c
|
||||
@@ -1078,6 +1078,43 @@ qemuBuildNetworkDriveStr(virStorageSourcePtr src,
|
||||
ret = virBufferContentAndReset(&buf);
|
||||
break;
|
||||
|
||||
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
|
||||
+ if (strchr(src->path, ':')) {
|
||||
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
|
||||
+ _("':' not allowed in Vitastor source volume name '%s'"),
|
||||
+ src->path);
|
||||
+ return NULL;
|
||||
+ }
|
||||
+
|
||||
+ virBufferStrcat(&buf, "vitastor:image=", src->path, NULL);
|
||||
+
|
||||
+ if (src->nhosts > 0) {
|
||||
+ virBufferAddLit(&buf, ":etcd-host=");
|
||||
+ for (i = 0; i < src->nhosts; i++) {
|
||||
+ if (i)
|
||||
+ virBufferAddLit(&buf, ",");
|
||||
+
|
||||
+ /* assume host containing : is ipv6 */
|
||||
+ if (strchr(src->hosts[i].name, ':'))
|
||||
+ virBufferEscape(&buf, '\\', ":", "[%s]",
|
||||
+ src->hosts[i].name);
|
||||
+ else
|
||||
+ virBufferAsprintf(&buf, "%s", src->hosts[i].name);
|
||||
+
|
||||
+ if (src->hosts[i].port)
|
||||
+ virBufferAsprintf(&buf, "\\:%u", src->hosts[i].port);
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ if (src->configFile)
|
||||
+ virBufferEscape(&buf, '\\', ":", ":config-path=%s", src->configFile);
|
||||
+
|
||||
+ if (src->relPath)
|
||||
+ virBufferEscape(&buf, '\\', ":", ":etcd-prefix=%s", src->relPath);
|
||||
+
|
||||
+ ret = virBufferContentAndReset(&buf);
|
||||
+ break;
|
||||
+
|
||||
case VIR_STORAGE_NET_PROTOCOL_VXHS:
|
||||
virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
|
||||
_("VxHS protocol does not support URI syntax"));
|
||||
diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
|
||||
index ec6b340..f399efa 100644
|
||||
--- a/src/qemu/qemu_domain.c
|
||||
+++ b/src/qemu/qemu_domain.c
|
||||
@@ -6862,6 +6862,16 @@ qemuDomainValidateStorageSource(virStora
|
||||
return -1;
|
||||
}
|
||||
|
||||
+ if (src->query &&
|
||||
+ (actualType != VIR_STORAGE_TYPE_NETWORK ||
|
||||
+ (src->protocol != VIR_STORAGE_NET_PROTOCOL_HTTPS &&
|
||||
+ src->protocol != VIR_STORAGE_NET_PROTOCOL_HTTP &&
|
||||
+ src->protocol != VIR_STORAGE_NET_PROTOCOL_VITASTOR))) {
|
||||
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
|
||||
+ _("query is supported only with HTTP(S) protocols"));
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -13836,6 +13846,7 @@ qemuDomainPrepareStorageSourceTLS(virStorageSourcePtr src,
|
||||
break;
|
||||
|
||||
case VIR_STORAGE_NET_PROTOCOL_RBD:
|
||||
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
|
||||
case VIR_STORAGE_NET_PROTOCOL_SHEEPDOG:
|
||||
case VIR_STORAGE_NET_PROTOCOL_GLUSTER:
|
||||
case VIR_STORAGE_NET_PROTOCOL_ISCSI:
|
||||
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
|
||||
index 1d96170..2d24396 100644
|
||||
--- a/src/qemu/qemu_driver.c
|
||||
+++ b/src/qemu/qemu_driver.c
|
||||
@@ -14841,6 +14841,7 @@ qemuDomainSnapshotPrepareDiskExternalInactive(virDomainSnapshotDiskDefPtr snapdi
|
||||
case VIR_STORAGE_NET_PROTOCOL_TFTP:
|
||||
case VIR_STORAGE_NET_PROTOCOL_SSH:
|
||||
case VIR_STORAGE_NET_PROTOCOL_VXHS:
|
||||
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
|
||||
case VIR_STORAGE_NET_PROTOCOL_LAST:
|
||||
virReportError(VIR_ERR_INTERNAL_ERROR,
|
||||
_("external inactive snapshots are not supported on "
|
||||
@@ -14925,6 +14926,7 @@ qemuDomainSnapshotPrepareDiskExternalActive(virDomainSnapshotDiskDefPtr snapdisk
|
||||
case VIR_STORAGE_NET_PROTOCOL_TFTP:
|
||||
case VIR_STORAGE_NET_PROTOCOL_SSH:
|
||||
case VIR_STORAGE_NET_PROTOCOL_VXHS:
|
||||
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
|
||||
case VIR_STORAGE_NET_PROTOCOL_LAST:
|
||||
virReportError(VIR_ERR_INTERNAL_ERROR,
|
||||
_("external active snapshots are not supported on "
|
||||
@@ -15054,6 +15056,7 @@ qemuDomainSnapshotPrepareDiskInternal(virDomainDiskDefPtr disk,
|
||||
case VIR_STORAGE_NET_PROTOCOL_TFTP:
|
||||
case VIR_STORAGE_NET_PROTOCOL_SSH:
|
||||
case VIR_STORAGE_NET_PROTOCOL_VXHS:
|
||||
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
|
||||
case VIR_STORAGE_NET_PROTOCOL_LAST:
|
||||
virReportError(VIR_ERR_INTERNAL_ERROR,
|
||||
_("internal inactive snapshots are not supported on "
|
||||
diff --git a/src/storage/storage_driver.c b/src/storage/storage_driver.c
|
||||
index 4a13e90..33301c7 100644
|
||||
--- a/src/storage/storage_driver.c
|
||||
+++ b/src/storage/storage_driver.c
|
||||
@@ -1641,6 +1641,7 @@ storageVolLookupByPathCallback(virStoragePoolObjPtr obj,
|
||||
case VIR_STORAGE_POOL_RBD:
|
||||
case VIR_STORAGE_POOL_SHEEPDOG:
|
||||
case VIR_STORAGE_POOL_ZFS:
|
||||
+ case VIR_STORAGE_POOL_VITASTOR:
|
||||
case VIR_STORAGE_POOL_LAST:
|
||||
ignore_value(VIR_STRDUP(stable_path, data->path));
|
||||
break;
|
||||
diff --git a/src/test/test_driver.c b/src/test/test_driver.c
|
||||
index 29c4c86..a27ad94 100644
|
||||
--- a/src/test/test_driver.c
|
||||
+++ b/src/test/test_driver.c
|
||||
@@ -7086,6 +7086,7 @@ testStorageVolumeTypeForPool(int pooltype)
|
||||
case VIR_STORAGE_POOL_ISCSI_DIRECT:
|
||||
case VIR_STORAGE_POOL_GLUSTER:
|
||||
case VIR_STORAGE_POOL_RBD:
|
||||
+ case VIR_STORAGE_POOL_VITASTOR:
|
||||
return VIR_STORAGE_VOL_NETWORK;
|
||||
case VIR_STORAGE_POOL_LOGICAL:
|
||||
case VIR_STORAGE_POOL_DISK:
|
||||
diff --git a/src/util/virstoragefile.c b/src/util/virstoragefile.c
|
||||
index 0d3c2af..edb7f9e 100644
|
||||
--- a/src/util/virstoragefile.c
|
||||
+++ b/src/util/virstoragefile.c
|
||||
@@ -90,6 +90,7 @@ VIR_ENUM_IMPL(virStorageNetProtocol,
|
||||
"tftp",
|
||||
"ssh",
|
||||
"vxhs",
|
||||
+ "vitastor",
|
||||
);
|
||||
|
||||
VIR_ENUM_IMPL(virStorageNetHostTransport,
|
||||
@@ -2927,6 +2928,73 @@ virStorageSourceParseRBDColonString(cons
|
||||
return 0;
|
||||
}
|
||||
|
||||
+static int
|
||||
+virStorageSourceParseVitastorColonString(const char *colonstr,
|
||||
+ virStorageSourcePtr src)
|
||||
+{
|
||||
+ char *p, *e, *next;
|
||||
+ g_autofree char *options = NULL;
|
||||
+
|
||||
+ /* optionally skip the "vitastor:" prefix if provided */
|
||||
+ if (STRPREFIX(colonstr, "vitastor:"))
|
||||
+ colonstr += strlen("vitastor:");
|
||||
+
|
||||
+ options = g_strdup(colonstr);
|
||||
+
|
||||
+ p = options;
|
||||
+ while (*p) {
|
||||
+ /* find : delimiter or end of string */
|
||||
+ for (e = p; *e && *e != ':'; ++e) {
|
||||
+ if (*e == '\\') {
|
||||
+ e++;
|
||||
+ if (*e == '\0')
|
||||
+ break;
|
||||
+ }
|
||||
+ }
|
||||
+ if (*e == '\0') {
|
||||
+ next = e; /* last kv pair */
|
||||
+ } else {
|
||||
+ next = e + 1;
|
||||
+ *e = '\0';
|
||||
+ }
|
||||
+
|
||||
+ if (STRPREFIX(p, "image=")) {
|
||||
+ src->path = g_strdup(p + strlen("image="));
|
||||
+ } else if (STRPREFIX(p, "etcd-prefix=")) {
|
||||
+ src->query = g_strdup(p + strlen("etcd-prefix="));
|
||||
+ } else if (STRPREFIX(p, "config-path=")) {
|
||||
+ src->configFile = g_strdup(p + strlen("config-path="));
|
||||
+ } else if (STRPREFIX(p, "etcd-host=")) {
|
||||
+ char *h, *sep;
|
||||
+
|
||||
+ h = p + strlen("etcd-host=");
|
||||
+ while (h < e) {
|
||||
+ for (sep = h; sep < e; ++sep) {
|
||||
+ if (*sep == '\\' && (sep[1] == ',' ||
|
||||
+ sep[1] == ';' ||
|
||||
+ sep[1] == ' ')) {
|
||||
+ *sep = '\0';
|
||||
+ sep += 2;
|
||||
+ break;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ if (virStorageSourceRBDAddHost(src, h) < 0)
|
||||
+ return -1;
|
||||
+
|
||||
+ h = sep;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ p = next;
|
||||
+ }
|
||||
+
|
||||
+ if (!src->path) {
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
|
||||
static int
|
||||
virStorageSourceParseNBDColonString(const char *nbdstr,
|
||||
@@ -3022,6 +3090,11 @@ virStorageSourceParseBackingColon(virSto
|
||||
return -1;
|
||||
break;
|
||||
|
||||
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
|
||||
+ if (virStorageSourceParseVitastorColonString(path, src) < 0)
|
||||
+ return -1;
|
||||
+ break;
|
||||
+
|
||||
case VIR_STORAGE_NET_PROTOCOL_SHEEPDOG:
|
||||
case VIR_STORAGE_NET_PROTOCOL_LAST:
|
||||
case VIR_STORAGE_NET_PROTOCOL_NONE:
|
||||
@@ -3507,6 +3580,54 @@ virStorageSourceParseBackingJSONRBD(virS
|
||||
}
|
||||
|
||||
static int
|
||||
+virStorageSourceParseBackingJSONVitastor(virStorageSourcePtr src,
|
||||
+ virJSONValuePtr json,
|
||||
+ const char *jsonstr G_GNUC_UNUSED,
|
||||
+ int opaque G_GNUC_UNUSED)
|
||||
+{
|
||||
+ const char *filename;
|
||||
+ const char *image = virJSONValueObjectGetString(json, "image");
|
||||
+ const char *conf = virJSONValueObjectGetString(json, "config-path");
|
||||
+ const char *etcd_prefix = virJSONValueObjectGetString(json, "etcd-prefix");
|
||||
+ virJSONValuePtr servers = virJSONValueObjectGetArray(json, "server");
|
||||
+ size_t nservers;
|
||||
+ size_t i;
|
||||
+
|
||||
+ src->type = VIR_STORAGE_TYPE_NETWORK;
|
||||
+ src->protocol = VIR_STORAGE_NET_PROTOCOL_VITASTOR;
|
||||
+
|
||||
+ /* legacy syntax passed via 'filename' option */
|
||||
+ if ((filename = virJSONValueObjectGetString(json, "filename")))
|
||||
+ return virStorageSourceParseVitastorColonString(filename, src);
|
||||
+
|
||||
+ if (!image) {
|
||||
+ virReportError(VIR_ERR_INVALID_ARG, "%s",
|
||||
+ _("missing image name in Vitastor backing volume "
|
||||
+ "JSON specification"));
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ src->path = g_strdup(image);
|
||||
+ src->configFile = g_strdup(conf);
|
||||
+ src->query = g_strdup(etcd_prefix);
|
||||
+
|
||||
+ if (servers) {
|
||||
+ nservers = virJSONValueArraySize(servers);
|
||||
+
|
||||
+ src->hosts = g_new0(virStorageNetHostDef, nservers);
|
||||
+ src->nhosts = nservers;
|
||||
+
|
||||
+ for (i = 0; i < nservers; i++) {
|
||||
+ if (virStorageSourceParseBackingJSONInetSocketAddress(src->hosts + i,
|
||||
+ virJSONValueArrayGet(servers, i)) < 0)
|
||||
+ return -1;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static int
|
||||
virStorageSourceParseBackingJSONRaw(virStorageSourcePtr src,
|
||||
virJSONValuePtr json,
|
||||
int opaque G_GNUC_UNUSED)
|
||||
@@ -3578,6 +3699,7 @@ static const struct virStorageSourceJSON
|
||||
{"sheepdog", virStorageSourceParseBackingJSONSheepdog, 0},
|
||||
{"ssh", virStorageSourceParseBackingJSONSSH, 0},
|
||||
{"rbd", virStorageSourceParseBackingJSONRBD, 0},
|
||||
+ {"vitastor", virStorageSourceParseBackingJSONVitastor, 0},
|
||||
{"raw", virStorageSourceParseBackingJSONRaw, 0},
|
||||
{"vxhs", virStorageSourceParseBackingJSONVxHS, 0},
|
||||
};
|
||||
@@ -4364,6 +4486,7 @@ virStorageSourceNetworkDefaultPort(virSt
|
||||
case VIR_STORAGE_NET_PROTOCOL_GLUSTER:
|
||||
return 24007;
|
||||
|
||||
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
|
||||
case VIR_STORAGE_NET_PROTOCOL_RBD:
|
||||
/* we don't provide a default for RBD */
|
||||
return 0;
|
||||
diff --git a/src/util/virstoragefile.h b/src/util/virstoragefile.h
|
||||
index 1d6161a..8d83bf3 100644
|
||||
--- a/src/util/virstoragefile.h
|
||||
+++ b/src/util/virstoragefile.h
|
||||
@@ -134,6 +134,7 @@ typedef enum {
|
||||
VIR_STORAGE_NET_PROTOCOL_TFTP,
|
||||
VIR_STORAGE_NET_PROTOCOL_SSH,
|
||||
VIR_STORAGE_NET_PROTOCOL_VXHS,
|
||||
+ VIR_STORAGE_NET_PROTOCOL_VITASTOR,
|
||||
|
||||
VIR_STORAGE_NET_PROTOCOL_LAST
|
||||
} virStorageNetProtocol;
|
||||
@@ -265,6 +266,7 @@ struct _virStorageSource {
|
||||
char *snapshot; /* for storage systems supporting internal snapshots */
|
||||
char *configFile; /* some storage systems use config file as part of
|
||||
the source definition */
|
||||
+ char *query; /* query string for HTTP based protocols */
|
||||
size_t nhosts;
|
||||
virStorageNetHostDefPtr hosts;
|
||||
virStorageSourcePoolDefPtr srcpool;
|
||||
diff --git a/tools/virsh-pool.c b/tools/virsh-pool.c
|
||||
index 70ca39b..9caef51 100644
|
||||
--- a/tools/virsh-pool.c
|
||||
+++ b/tools/virsh-pool.c
|
||||
@@ -1219,6 +1219,9 @@ cmdPoolList(vshControl *ctl, const vshCmd *cmd ATTRIBUTE_UNUSED)
|
||||
case VIR_STORAGE_POOL_VSTORAGE:
|
||||
flags |= VIR_CONNECT_LIST_STORAGE_POOLS_VSTORAGE;
|
||||
break;
|
||||
+ case VIR_STORAGE_POOL_VITASTOR:
|
||||
+ flags |= VIR_CONNECT_LIST_STORAGE_POOLS_VITASTOR;
|
||||
+ break;
|
||||
case VIR_STORAGE_POOL_LAST:
|
||||
break;
|
||||
}
|
203
patches/qemu-4.2.0-vitastor.patch
Normal file
203
patches/qemu-4.2.0-vitastor.patch
Normal file
@@ -0,0 +1,203 @@
|
||||
diff -NaurpbB qemu-4.2/block/Makefile.objs qemu-4.2/block/Makefile.objs
|
||||
--- qemu-4.2/block/Makefile.objs 2019-12-13 01:20:47.000000000 +0700
|
||||
+++ qemu-4.2/block/Makefile.objs 2022-05-19 09:05:29.047996624 +0700
|
||||
@@ -29,6 +29,7 @@ block-obj-$(if $(CONFIG_LIBISCSI),y,n) +
|
||||
block-obj-$(CONFIG_LIBNFS) += nfs.o
|
||||
block-obj-$(CONFIG_CURL) += curl.o
|
||||
block-obj-$(CONFIG_RBD) += rbd.o
|
||||
+block-obj-$(CONFIG_VITASTOR) += vitastor.o
|
||||
block-obj-$(CONFIG_GLUSTERFS) += gluster.o
|
||||
block-obj-$(CONFIG_VXHS) += vxhs.o
|
||||
block-obj-$(CONFIG_LIBSSH) += ssh.o
|
||||
@@ -53,6 +54,8 @@ curl.o-cflags := $(CURL_CFLAGS)
|
||||
curl.o-libs := $(CURL_LIBS)
|
||||
rbd.o-cflags := $(RBD_CFLAGS)
|
||||
rbd.o-libs := $(RBD_LIBS)
|
||||
+vitastor.o-cflags := $(VITASTOR_CFLAGS)
|
||||
+vitastor.o-libs := $(VITASTOR_LIBS)
|
||||
gluster.o-cflags := $(GLUSTERFS_CFLAGS)
|
||||
gluster.o-libs := $(GLUSTERFS_LIBS)
|
||||
vxhs.o-libs := $(VXHS_LIBS
|
||||
diff -NaurpbB qemu-4.2/configure qemu-4.2/configure
|
||||
--- qemu-4.2/configure 2022-05-19 08:32:27.000000000 +0700
|
||||
+++ qemu-4.2/configure 2022-05-19 08:52:16.844594720 +0700
|
||||
@@ -436,6 +436,7 @@ trace_backends="log"
|
||||
trace_file="trace"
|
||||
spice=""
|
||||
rbd=""
|
||||
+vitastor="yes"
|
||||
smartcard=""
|
||||
libusb=""
|
||||
usb_redir=""
|
||||
@@ -1317,6 +1318,10 @@ for opt do
|
||||
;;
|
||||
--enable-rbd) rbd="yes"
|
||||
;;
|
||||
+ --disable-vitastor) vitastor="no"
|
||||
+ ;;
|
||||
+ --enable-vitastor) vitastor="yes"
|
||||
+ ;;
|
||||
--disable-xfsctl) xfs="no"
|
||||
;;
|
||||
--enable-xfsctl) xfs="yes"
|
||||
@@ -1813,6 +1818,7 @@ disabled with --disable-FEATURE, default
|
||||
vhost-user vhost-user backend support
|
||||
spice spice
|
||||
rbd rados block device (rbd)
|
||||
+ vitastor vitastor block device
|
||||
libiscsi iscsi support
|
||||
libnfs nfs support
|
||||
smartcard smartcard support (libcacard)
|
||||
@@ -4015,6 +4021,27 @@ EOF
|
||||
fi
|
||||
fi
|
||||
|
||||
+#########################################
|
||||
+# vitastor probe
|
||||
+if test "$vitastor" != "no" ; then
|
||||
+ cat > $TMPC <<EOF
|
||||
+#include <vitastor_c.h>
|
||||
+int main(void) {
|
||||
+ vitastor_c_create_qemu(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
|
||||
+ return 0;
|
||||
+}
|
||||
+EOF
|
||||
+ vitastor_libs="-lvitastor_client"
|
||||
+ if compile_prog "" "$vitastor_libs" ; then
|
||||
+ vitastor=yes
|
||||
+ else
|
||||
+ if test "$vitastor" = "yes" ; then
|
||||
+ feature_not_found "vitastor block device" "Install vitastor-client-dev"
|
||||
+ fi
|
||||
+ vitastor=no
|
||||
+ fi
|
||||
+fi
|
||||
+
|
||||
##########################################
|
||||
# libssh probe
|
||||
if test "$libssh" != "no" ; then
|
||||
@@ -6586,6 +6613,7 @@ echo "Trace output file $trace_file-<pid
|
||||
fi
|
||||
echo "spice support $spice $(echo_version $spice $spice_protocol_version/$spice_server_version)"
|
||||
echo "rbd support $rbd"
|
||||
+echo "vitastor support $vitastor"
|
||||
echo "xfsctl support $xfs"
|
||||
echo "smartcard support $smartcard"
|
||||
echo "libusb $libusb"
|
||||
@@ -7222,6 +7250,11 @@ if test "$rbd" = "yes" ; then
|
||||
echo "RBD_CFLAGS=$rbd_cflags" >> $config_host_mak
|
||||
echo "RBD_LIBS=$rbd_libs" >> $config_host_mak
|
||||
fi
|
||||
+if test "$vitastor" = "yes" ; then
|
||||
+ echo "CONFIG_VITASTOR=m" >> $config_host_mak
|
||||
+ echo "VITASTOR_CFLAGS=$vitastor_cflags" >> $config_host_mak
|
||||
+ echo "VITASTOR_LIBS=$vitastor_libs" >> $config_host_mak
|
||||
+fi
|
||||
|
||||
echo "CONFIG_COROUTINE_BACKEND=$coroutine" >> $config_host_mak
|
||||
if test "$coroutine_pool" = "yes" ; then
|
||||
diff -NaurpbB qemu-4.2/qapi/block-core.json qemu-4.2/qapi/block-core.json
|
||||
--- qemu-4.2/qapi/block-core.json 2022-05-19 08:32:27.000000000 +0700
|
||||
+++ qemu-4.2/qapi/block-core.json 2022-05-19 09:14:38.869133795 +0700
|
||||
@@ -2894,7 +2894,7 @@
|
||||
'nbd', 'nfs', 'null-aio', 'null-co', 'nvme', 'parallels', 'qcow',
|
||||
'qcow2', 'qed', 'quorum', 'raw', 'rbd',
|
||||
{ 'name': 'replication', 'if': 'defined(CONFIG_REPLICATION)' },
|
||||
- 'sheepdog',
|
||||
+ 'sheepdog', 'vitastor',
|
||||
'ssh', 'throttle', 'vdi', 'vhdx', 'vmdk', 'vpc', 'vvfat', 'vxhs' ] }
|
||||
|
||||
##
|
||||
@@ -3728,6 +3728,28 @@
|
||||
'*tag': 'str' } }
|
||||
|
||||
##
|
||||
+# @BlockdevOptionsVitastor:
|
||||
+#
|
||||
+# Driver specific block device options for vitastor
|
||||
+#
|
||||
+# @image: Image name
|
||||
+# @inode: Inode number
|
||||
+# @pool: Pool ID
|
||||
+# @size: Desired image size in bytes
|
||||
+# @config-path: Path to Vitastor configuration
|
||||
+# @etcd-host: etcd connection address(es)
|
||||
+# @etcd-prefix: etcd key/value prefix
|
||||
+##
|
||||
+{ 'struct': 'BlockdevOptionsVitastor',
|
||||
+ 'data': { '*inode': 'uint64',
|
||||
+ '*pool': 'uint64',
|
||||
+ '*size': 'uint64',
|
||||
+ '*image': 'str',
|
||||
+ '*config-path': 'str',
|
||||
+ '*etcd-host': 'str',
|
||||
+ '*etcd-prefix': 'str' } }
|
||||
+
|
||||
+##
|
||||
# @ReplicationMode:
|
||||
#
|
||||
# An enumeration of replication modes.
|
||||
@@ -4087,6 +4109,7 @@
|
||||
'replication': { 'type': 'BlockdevOptionsReplication',
|
||||
'if': 'defined(CONFIG_REPLICATION)' },
|
||||
'sheepdog': 'BlockdevOptionsSheepdog',
|
||||
+ 'vitastor': 'BlockdevOptionsVitastor',
|
||||
'ssh': 'BlockdevOptionsSsh',
|
||||
'throttle': 'BlockdevOptionsThrottle',
|
||||
'vdi': 'BlockdevOptionsGenericFormat',
|
||||
@@ -4457,6 +4480,17 @@
|
||||
'*cluster-size' : 'size' } }
|
||||
|
||||
##
|
||||
+# @BlockdevCreateOptionsVitastor:
|
||||
+#
|
||||
+# Driver specific image creation options for Vitastor.
|
||||
+#
|
||||
+# @size: Size of the virtual disk in bytes
|
||||
+##
|
||||
+{ 'struct': 'BlockdevCreateOptionsVitastor',
|
||||
+ 'data': { 'location': 'BlockdevOptionsVitastor',
|
||||
+ 'size': 'size' } }
|
||||
+
|
||||
+##
|
||||
# @BlockdevVmdkSubformat:
|
||||
#
|
||||
# Subformat options for VMDK images
|
||||
@@ -4718,6 +4752,7 @@
|
||||
'qed': 'BlockdevCreateOptionsQed',
|
||||
'rbd': 'BlockdevCreateOptionsRbd',
|
||||
'sheepdog': 'BlockdevCreateOptionsSheepdog',
|
||||
+ 'vitastor': 'BlockdevCreateOptionsVitastor',
|
||||
'ssh': 'BlockdevCreateOptionsSsh',
|
||||
'vdi': 'BlockdevCreateOptionsVdi',
|
||||
'vhdx': 'BlockdevCreateOptionsVhdx',
|
||||
diff -NaurpbB qemu-4.2/debian/rules qemu-4.2/debian/rules
|
||||
--- qemu-4.2/debian/rules 2022-05-19 15:34:26.354263017 +0700
|
||||
+++ qemu-4.2/debian/rules 2022-05-19 15:32:15.720952678 +0700
|
||||
@@ -148,7 +148,7 @@ ifneq ($(filter $(DEB_HOST_ARCH),amd64 i
|
||||
--${enable_system}-system \
|
||||
--disable-linux-user \
|
||||
--enable-xen \
|
||||
- --target-list="aarch64-softmmu arm-softmmu i386-softmmu x86_64-softmmu"
|
||||
+ --target-list="aarch64-softmmu arm-softmmu i386-softmmu x86_64-softmmu" \
|
||||
--enable-modules \
|
||||
--enable-module-upgrades \
|
||||
$(shell sh debian/extract-config-opts \
|
||||
@@ -167,7 +167,7 @@ ifneq ($(filter $(DEB_HOST_ARCH),amd64),
|
||||
cd b/qemu-microvm && \
|
||||
../../configure ${common_configure_opts} --disable-user \
|
||||
--enable-system --enable-kvm \
|
||||
- --disable-linux-user --disable-modules --disable-docs \
|
||||
+ --disable-linux-user --enable-modules --disable-docs \
|
||||
--disable-libssh --disable-tcmalloc --disable-glusterfs \
|
||||
--disable-seccomp --disable-bzip2 --disable-slirp --disable-vde \
|
||||
--disable-netmap --disable-hax --disable-hvf --disable-whpx \
|
||||
@@ -201,7 +201,7 @@ ifeq ($(enable_linux_user),enable)
|
||||
rm -rf b/user-static; mkdir b/user-static
|
||||
cd b/user-static && \
|
||||
../../configure ${common_configure_opts} \
|
||||
- --static --disable-system --disable-xen \
|
||||
+ --enable-modules --enable-module-upgrades --disable-system --disable-xen \
|
||||
--target-list="$(addsuffix -linux-user,${user_targets})"
|
||||
endif
|
||||
touch $@
|
@@ -95,7 +95,7 @@ inode_list_t* cluster_client_t::list_inode_start(inode_t inode,
|
||||
}
|
||||
for (auto & hist_item: pg.target_history)
|
||||
{
|
||||
for (auto pg_osd: hist_item.osd_set)
|
||||
for (auto pg_osd: hist_item)
|
||||
{
|
||||
if (pg_osd != 0)
|
||||
{
|
||||
@@ -105,14 +105,11 @@ inode_list_t* cluster_client_t::list_inode_start(inode_t inode,
|
||||
}
|
||||
for (osd_num_t peer_osd: all_peers)
|
||||
{
|
||||
if (st_cli.peer_states.find(peer_osd) != st_cli.peer_states.end())
|
||||
{
|
||||
r->list_osds.push_back((inode_list_osd_t){
|
||||
.pg = r,
|
||||
.osd_num = peer_osd,
|
||||
.sent = false,
|
||||
});
|
||||
}
|
||||
r->list_osds.push_back((inode_list_osd_t){
|
||||
.pg = r,
|
||||
.osd_num = peer_osd,
|
||||
.sent = false,
|
||||
});
|
||||
}
|
||||
}
|
||||
else
|
||||
|
@@ -845,27 +845,7 @@ void etcd_state_client_t::parse_state(const etcd_kv_t & kv)
|
||||
{
|
||||
history_set.push_back(pg_osd.uint64_value());
|
||||
}
|
||||
pg_cfg.target_history.push_back((pg_history_set_t){ .osd_set = history_set });
|
||||
}
|
||||
// Newer format with epochs
|
||||
for (auto hist_item: value["osd_set_epochs"].array_items())
|
||||
{
|
||||
pg_history_set_t history_set;
|
||||
history_set.min_epoch = hist_item["min_epoch"].uint64_value();
|
||||
history_set.max_epoch = hist_item["max_epoch"].uint64_value();
|
||||
if (history_set.max_epoch < history_set.min_epoch)
|
||||
{
|
||||
history_set.max_epoch = 0;
|
||||
history_set.min_epoch = 0;
|
||||
}
|
||||
for (auto pg_osd: hist_item["osd_set"].array_items())
|
||||
{
|
||||
history_set.osd_set.push_back(pg_osd.uint64_value());
|
||||
}
|
||||
if (history_set.max_epoch || history_set.osd_set.size())
|
||||
{
|
||||
pg_cfg.target_history.push_back(history_set);
|
||||
}
|
||||
pg_cfg.target_history.push_back(history_set);
|
||||
}
|
||||
// Include these additional OSDs when peering the PG
|
||||
for (auto pg_osd: value["all_peers"].array_items())
|
||||
|
@@ -26,7 +26,7 @@ struct pg_config_t
|
||||
bool exists;
|
||||
osd_num_t primary;
|
||||
std::vector<osd_num_t> target_set;
|
||||
std::vector<pg_history_set_t> target_history;
|
||||
std::vector<std::vector<osd_num_t>> target_history;
|
||||
std::vector<osd_num_t> all_peers;
|
||||
bool pause;
|
||||
osd_num_t cur_primary;
|
||||
|
@@ -36,7 +36,6 @@ struct sec_data
|
||||
/* The list of completed io_u structs. */
|
||||
std::vector<io_u*> completed;
|
||||
uint64_t inflight = 0;
|
||||
int mirror_fd = -1;
|
||||
bool trace = false;
|
||||
};
|
||||
|
||||
@@ -47,7 +46,6 @@ struct sec_options
|
||||
char *etcd_host = NULL;
|
||||
char *etcd_prefix = NULL;
|
||||
char *image = NULL;
|
||||
char *mirror_file = NULL;
|
||||
uint64_t pool = 0;
|
||||
uint64_t inode = 0;
|
||||
int cluster_log = 0;
|
||||
@@ -134,15 +132,6 @@ static struct fio_option options[] = {
|
||||
.category = FIO_OPT_C_ENGINE,
|
||||
.group = FIO_OPT_G_FILENAME,
|
||||
},
|
||||
{
|
||||
.name = "mirror_file",
|
||||
.lname = "File name to mirror writes to",
|
||||
.type = FIO_OPT_STR_STORE,
|
||||
.off1 = offsetof(struct sec_options, mirror_file),
|
||||
.help = "File name to mirror writes to (for debug purpose)",
|
||||
.category = FIO_OPT_C_ENGINE,
|
||||
.group = FIO_OPT_G_FILENAME,
|
||||
},
|
||||
{
|
||||
.name = "use_rdma",
|
||||
.lname = "Use RDMA",
|
||||
@@ -223,16 +212,6 @@ static int sec_setup(struct thread_data *td)
|
||||
td->o.open_files++;
|
||||
}
|
||||
|
||||
if (o->mirror_file)
|
||||
{
|
||||
bsd->mirror_fd = open(o->mirror_file, O_CREAT|O_RDWR, 0666);
|
||||
if (bsd->mirror_fd < 0)
|
||||
{
|
||||
td_verror(td, errno, "open mirror file");
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (!o->image)
|
||||
{
|
||||
if (!(o->inode & (((uint64_t)1 << (64-POOL_ID_BITS)) - 1)))
|
||||
@@ -286,10 +265,6 @@ static void sec_cleanup(struct thread_data *td)
|
||||
sec_data *bsd = (sec_data*)td->io_ops_data;
|
||||
if (bsd)
|
||||
{
|
||||
if (bsd->mirror_fd >= 0)
|
||||
{
|
||||
close(bsd->mirror_fd);
|
||||
}
|
||||
if (bsd->watch)
|
||||
{
|
||||
vitastor_c_close_watch(bsd->cli, bsd->watch);
|
||||
@@ -350,24 +325,6 @@ static enum fio_q_status sec_queue(struct thread_data *td, struct io_u *io)
|
||||
bsd->last_sync = false;
|
||||
break;
|
||||
case DDIR_WRITE:
|
||||
if (opt->mirror_file)
|
||||
{
|
||||
size_t done = 0;
|
||||
while (done < io->xfer_buflen)
|
||||
{
|
||||
ssize_t r = pwrite(bsd->mirror_fd, io->xfer_buf+done, io->xfer_buflen-done, io->offset+done);
|
||||
if (r < 0 && errno != EAGAIN)
|
||||
{
|
||||
fprintf(stderr, "Error writing mirror file: %s\n", strerror(errno));
|
||||
io->error = errno;
|
||||
return FIO_Q_COMPLETED;
|
||||
}
|
||||
if (r > 0)
|
||||
{
|
||||
done += r;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (opt->image && vitastor_c_inode_get_readonly(bsd->watch))
|
||||
{
|
||||
io->error = EROFS;
|
||||
|
@@ -333,10 +333,7 @@ void osd_t::on_change_pg_history_hook(pool_id_t pool_id, pg_num_t pg_num)
|
||||
{
|
||||
oid = op.first;
|
||||
first = false;
|
||||
if (op.second->req.hdr.opcode == OSD_OP_DELETE)
|
||||
continue_primary_del(op.second);
|
||||
else
|
||||
continue_primary_write(op.second);
|
||||
continue_primary_write(op.second);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -611,7 +608,7 @@ void osd_t::apply_pg_config()
|
||||
}
|
||||
for (auto & hist_item: pg_cfg.target_history)
|
||||
{
|
||||
for (auto pg_osd: hist_item.osd_set)
|
||||
for (auto pg_osd: hist_item)
|
||||
{
|
||||
if (pg_osd != 0)
|
||||
{
|
||||
@@ -802,40 +799,11 @@ void osd_t::report_pg_states()
|
||||
// Prevent race conditions (for the case when the monitor is updating this key at the same time)
|
||||
pg.history_changed = false;
|
||||
std::string history_key = base64_encode(st_cli.etcd_prefix+"/pg/history/"+std::to_string(pg.pool_id)+"/"+std::to_string(pg.pg_num));
|
||||
json11::Json::array target_history;
|
||||
for (auto & pgh: pg.target_history)
|
||||
{
|
||||
target_history.push_back(json11::Json::object {
|
||||
{ "osd_set", pgh.osd_set },
|
||||
{ "min_epoch", pgh.min_epoch },
|
||||
{ "max_epoch", pgh.max_epoch },
|
||||
});
|
||||
}
|
||||
std::vector<osd_num_t> all_peers;
|
||||
for (auto peer_osd: pg.all_peers)
|
||||
{
|
||||
bool found = false;
|
||||
for (auto target_peer: pg.target_set)
|
||||
{
|
||||
if (target_peer == peer_osd)
|
||||
{
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!found)
|
||||
{
|
||||
all_peers.push_back(peer_osd);
|
||||
}
|
||||
}
|
||||
json11::Json::object history_value = {
|
||||
{ "epoch", pg.epoch },
|
||||
{ "osd_set_epochs", target_history },
|
||||
{ "all_peers", pg.all_peers },
|
||||
{ "osd_sets", pg.target_history },
|
||||
};
|
||||
if (all_peers.size())
|
||||
{
|
||||
history_value["all_peers"] = all_peers;
|
||||
}
|
||||
checks.push_back(json11::Json::object {
|
||||
{ "target", "MOD" },
|
||||
{ "key", history_key },
|
||||
|
@@ -268,25 +268,6 @@ bool osd_t::pick_next_recovery(osd_recovery_op_t &op)
|
||||
|
||||
void osd_t::submit_recovery_op(osd_recovery_op_t *op)
|
||||
{
|
||||
// Check if the object is deleted
|
||||
bool is_deleted = false;
|
||||
pool_id_t pool_id = INODE_POOL(op->oid.inode);
|
||||
auto pool_cfg_it = st_cli.pool_config.find(pool_id);
|
||||
if (pool_cfg_it != st_cli.pool_config.end())
|
||||
{
|
||||
pg_num_t pg_num = (op->oid.stripe/pool_cfg_it->second.pg_stripe_size) % pg_counts[pool_id] + 1; // like map_to_pg()
|
||||
auto pg_it = pgs.find({ .pool_id = pool_id, .pg_num = pg_num });
|
||||
if (pg_it != pgs.end())
|
||||
{
|
||||
pg_osd_set_state_t *object_state;
|
||||
get_object_osd_set(pg_it->second, op->oid, pg_it->second.cur_set.data(), &object_state);
|
||||
if (object_state && (object_state->state & OBJ_DELETED))
|
||||
{
|
||||
// Object is deleted, but not from all OSDs - delete remaining copies
|
||||
is_deleted = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
op->osd_op = new osd_op_t();
|
||||
op->osd_op->op_type = OSD_OP_OUT;
|
||||
op->osd_op->req = (osd_any_op_t){
|
||||
@@ -294,7 +275,7 @@ void osd_t::submit_recovery_op(osd_recovery_op_t *op)
|
||||
.header = {
|
||||
.magic = SECONDARY_OSD_OP_MAGIC,
|
||||
.id = 1,
|
||||
.opcode = (uint64_t)(is_deleted ? OSD_OP_DELETE : OSD_OP_WRITE),
|
||||
.opcode = OSD_OP_WRITE,
|
||||
},
|
||||
.inode = op->oid.inode,
|
||||
.offset = op->oid.stripe,
|
||||
|
@@ -3,8 +3,6 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
|
||||
#define POOL_SCHEME_REPLICATED 1
|
||||
#define POOL_SCHEME_XOR 2
|
||||
#define POOL_SCHEME_JERASURE 3
|
||||
@@ -30,9 +28,3 @@ inline bool operator < (const pool_pg_num_t & a, const pool_pg_num_t & b)
|
||||
{
|
||||
return a.pool_id < b.pool_id || a.pool_id == b.pool_id && a.pg_num < b.pg_num;
|
||||
}
|
||||
|
||||
struct pg_history_set_t
|
||||
{
|
||||
std::vector<osd_num_t> osd_set;
|
||||
uint64_t min_epoch, max_epoch;
|
||||
};
|
||||
|
@@ -228,7 +228,7 @@ void osd_t::start_pg_peering(pg_t & pg)
|
||||
for (auto & history_set: pg.target_history)
|
||||
{
|
||||
bool found = true;
|
||||
for (auto history_osd: history_set.osd_set)
|
||||
for (auto history_osd: history_set)
|
||||
{
|
||||
if (history_osd != 0)
|
||||
{
|
||||
@@ -539,114 +539,47 @@ void osd_t::finish_stop_pg(pg_t & pg)
|
||||
report_pg_state(pg);
|
||||
}
|
||||
|
||||
static int count_nonzero_osds(const std::vector<osd_num_t> & v)
|
||||
{
|
||||
int n = 0;
|
||||
for (auto & osd_num: v)
|
||||
{
|
||||
if (osd_num != 0)
|
||||
{
|
||||
n++;
|
||||
}
|
||||
}
|
||||
return n;
|
||||
}
|
||||
|
||||
void osd_t::report_pg_state(pg_t & pg)
|
||||
{
|
||||
pg.print_state();
|
||||
this->pg_state_dirty.insert({ .pool_id = pg.pool_id, .pg_num = pg.pg_num });
|
||||
if ((pg.state == PG_ACTIVE || pg.state == (PG_ACTIVE|PG_LEFT_ON_DEAD)) &&
|
||||
(pg.target_history.size() != 1 ||
|
||||
pg.target_history[0].osd_set != pg.target_set ||
|
||||
pg.target_history[0].min_epoch != 0 ||
|
||||
pg.target_history[0].max_epoch != pg.epoch ||
|
||||
pg.all_peers.size() > count_nonzero_osds(pg.target_set)))
|
||||
if (pg.state == PG_ACTIVE && (pg.target_history.size() > 0 || pg.all_peers.size() > pg.target_set.size()))
|
||||
{
|
||||
// Clear history of active+clean PGs
|
||||
pg.history_changed = true;
|
||||
pg.target_history.clear();
|
||||
pg.target_history.push_back((pg_history_set_t){
|
||||
.osd_set = pg.cur_set,
|
||||
.min_epoch = 0,
|
||||
.max_epoch = pg.epoch,
|
||||
});
|
||||
if (pg.state == PG_ACTIVE)
|
||||
pg.all_peers = pg.target_set;
|
||||
pg.cur_peers = pg.target_set;
|
||||
}
|
||||
else if (pg.state == (PG_ACTIVE|PG_LEFT_ON_DEAD))
|
||||
{
|
||||
// Clear history of active+left_on_dead PGs, but leave dead OSDs in all_peers
|
||||
pg.history_changed = true;
|
||||
pg.target_history.clear();
|
||||
std::set<osd_num_t> dead_peers;
|
||||
for (auto pg_osd: pg.all_peers)
|
||||
{
|
||||
pg.all_peers.clear();
|
||||
for (auto pg_osd: pg.target_set)
|
||||
dead_peers.insert(pg_osd);
|
||||
}
|
||||
for (auto pg_osd: pg.cur_peers)
|
||||
{
|
||||
dead_peers.erase(pg_osd);
|
||||
}
|
||||
for (auto pg_osd: pg.target_set)
|
||||
{
|
||||
if (pg_osd)
|
||||
{
|
||||
if (pg_osd)
|
||||
pg.all_peers.push_back(pg_osd);
|
||||
dead_peers.insert(pg_osd);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Clear history of active+left_on_dead PGs, but leave dead OSDs in all_peers
|
||||
std::set<osd_num_t> dead_peers(pg.all_peers.begin(), pg.all_peers.end());
|
||||
for (auto pg_osd: pg.cur_peers)
|
||||
{
|
||||
dead_peers.erase(pg_osd);
|
||||
}
|
||||
for (auto pg_osd: pg.target_set)
|
||||
{
|
||||
if (pg_osd)
|
||||
dead_peers.insert(pg_osd);
|
||||
}
|
||||
pg.all_peers.clear();
|
||||
pg.all_peers.insert(pg.all_peers.begin(), dead_peers.begin(), dead_peers.end());
|
||||
}
|
||||
pg.all_peers.clear();
|
||||
pg.all_peers.insert(pg.all_peers.begin(), dead_peers.begin(), dead_peers.end());
|
||||
pg.cur_peers.clear();
|
||||
for (auto pg_osd: pg.target_set)
|
||||
{
|
||||
if (pg_osd)
|
||||
{
|
||||
pg.cur_peers.push_back(pg_osd);
|
||||
}
|
||||
}
|
||||
if (pg.history_changed)
|
||||
{
|
||||
bool epoch_already_reported = false;
|
||||
int max_epoch_pos = -1;
|
||||
for (int i = pg.target_history.size()-1; i >= 0; i--)
|
||||
{
|
||||
if (pg.target_history[i].min_epoch > pg.epoch)
|
||||
{
|
||||
printf("[PG %u/%u] Invalid PG history: there is an entry with min_epoch (%lu) > current epoch (%lu)\n",
|
||||
pg.pool_id, pg.pg_num, pg.target_history[i].min_epoch, pg.epoch);
|
||||
force_stop(1);
|
||||
return;
|
||||
}
|
||||
if (max_epoch_pos < 0 || pg.target_history[i].max_epoch > pg.target_history[max_epoch_pos].max_epoch)
|
||||
{
|
||||
max_epoch_pos = i;
|
||||
}
|
||||
if (pg.target_history[i].min_epoch <= pg.epoch &&
|
||||
pg.target_history[i].max_epoch >= pg.epoch)
|
||||
{
|
||||
if (pg.target_history[i].osd_set != pg.cur_set)
|
||||
{
|
||||
printf("[PG %u/%u] Invalid target_history: epoch %lu has another OSD set already registered\n", pg.pool_id, pg.pg_num, pg.epoch);
|
||||
force_stop(1);
|
||||
return;
|
||||
}
|
||||
// Already reported
|
||||
epoch_already_reported = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!epoch_already_reported)
|
||||
{
|
||||
if (max_epoch_pos >= 0 && pg.target_history[max_epoch_pos].osd_set == pg.cur_set)
|
||||
{
|
||||
pg.target_history[max_epoch_pos].max_epoch = pg.epoch;
|
||||
}
|
||||
else
|
||||
{
|
||||
pg.target_history.push_back((pg_history_set_t){
|
||||
.osd_set = pg.cur_set,
|
||||
.min_epoch = pg.epoch,
|
||||
.max_epoch = pg.epoch,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -52,7 +52,6 @@ struct pg_obj_state_check_t
|
||||
|
||||
void walk();
|
||||
void start_object();
|
||||
void recheck_version_osd_set();
|
||||
void handle_version();
|
||||
void finish_object();
|
||||
};
|
||||
@@ -85,19 +84,27 @@ void pg_obj_state_check_t::walk()
|
||||
pg->state = PG_INCOMPLETE | PG_HAS_INVALID;
|
||||
return;
|
||||
}
|
||||
// Activate PG
|
||||
if (pg->pg_cursize < pg->pg_size)
|
||||
{
|
||||
// History will be reported on first write
|
||||
// Report PG history and activate
|
||||
pg->state |= PG_DEGRADED | PG_PEERED;
|
||||
std::vector<osd_num_t> history_set;
|
||||
for (auto peer_osd: pg->cur_set)
|
||||
{
|
||||
if (peer_osd != 0)
|
||||
{
|
||||
history_set.push_back(peer_osd);
|
||||
}
|
||||
}
|
||||
pg->target_history.push_back(history_set);
|
||||
pg->history_changed = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Just activate
|
||||
pg->state |= PG_ACTIVE;
|
||||
// Clear history
|
||||
pg->history_changed = true;
|
||||
}
|
||||
if (pg->cur_peers.size() < pg->all_peers.size())
|
||||
if (pg->state == PG_ACTIVE && pg->cur_peers.size() < pg->all_peers.size())
|
||||
{
|
||||
pg->state |= PG_LEFT_ON_DEAD;
|
||||
}
|
||||
@@ -114,82 +121,13 @@ void pg_obj_state_check_t::start_object()
|
||||
n_unstable = n_invalid = 0;
|
||||
}
|
||||
|
||||
void pg_obj_state_check_t::recheck_version_osd_set()
|
||||
{
|
||||
uint64_t epoch = (last_ver >> (64-PG_EPOCH_BITS));
|
||||
if (!pg->epoch_sizes_differ && n_copies >= pg->pg_size)
|
||||
{
|
||||
// Enough copies
|
||||
return;
|
||||
}
|
||||
auto epoch_it = pg->target_by_epoch.lower_bound(epoch);
|
||||
if (epoch_it == pg->target_by_epoch.end() || epoch_it->second.min_epoch > epoch)
|
||||
{
|
||||
// Epoch info not found
|
||||
return;
|
||||
}
|
||||
if (pg->epoch_sizes_differ && n_copies >= epoch_it->second.osd_set.size())
|
||||
{
|
||||
// For the (unlikely) case of PG size change - enough copies
|
||||
return;
|
||||
}
|
||||
// Recheck version against the OSD set corresponding to epoch if it's known
|
||||
if (epoch_it != pg->target_by_epoch.end() && epoch_it->second.min_epoch <= epoch)
|
||||
{
|
||||
for (int j = 0; j < epoch_it->second.osd_set.size(); j++)
|
||||
{
|
||||
osd_num_t cur_osd = epoch_it->second.osd_set[j];
|
||||
bool found = false;
|
||||
for (int i = ver_start; i < ver_end; i++)
|
||||
{
|
||||
if (cur_osd == list[i].osd_num)
|
||||
{
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!found)
|
||||
{
|
||||
// Check if a newer version is present on the same OSD and masks the older one
|
||||
// It happens for overwritten replicas in the following case:
|
||||
// Version 1 is present on OSD 1,2,3
|
||||
// Client tries to write Version 2
|
||||
// OSD 3 succeeds to write Version 2, others don't. OSD 3 crashes, then starts again
|
||||
// OSD 1 sees: version 1 on OSD 1,2 and version 2 on OSD 3
|
||||
// (version 1 on OSD 3 is already masked/removed)
|
||||
// Version 1 is not present on a full set, but it must not be removed
|
||||
if (replicated)
|
||||
{
|
||||
for (int i = obj_start; i < ver_start; i++)
|
||||
{
|
||||
if (cur_osd == list[i].osd_num)
|
||||
{
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!found)
|
||||
{
|
||||
// Object is missing from one of the OSDs of that set.
|
||||
// This means it's deleted or moved and we can safely drop this version.
|
||||
target_ver = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void pg_obj_state_check_t::handle_version()
|
||||
{
|
||||
if (!target_ver && last_ver != list[list_pos].version && (n_stable > 0 || n_roles >= pg->pg_data_size))
|
||||
{
|
||||
// Version is either stable or recoverable
|
||||
ver_end = list_pos;
|
||||
target_ver = last_ver;
|
||||
// Skip versions that are not present on any of OSDs for the corresponding PG epoch
|
||||
recheck_version_osd_set();
|
||||
ver_end = list_pos;
|
||||
}
|
||||
if (!target_ver)
|
||||
{
|
||||
@@ -253,8 +191,6 @@ void pg_obj_state_check_t::finish_object()
|
||||
// Version is either stable or recoverable
|
||||
target_ver = last_ver;
|
||||
ver_end = list_pos;
|
||||
// Skip versions that are not present on any of OSDs for the corresponding PG epoch
|
||||
recheck_version_osd_set();
|
||||
}
|
||||
obj_end = list_pos;
|
||||
// Remember the decision
|
||||
@@ -308,23 +244,11 @@ void pg_obj_state_check_t::finish_object()
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!target_ver && (n_unstable >= obj_end-obj_start))
|
||||
if (!target_ver)
|
||||
{
|
||||
return;
|
||||
}
|
||||
if (!target_ver)
|
||||
{
|
||||
// Object is present, but should not be :) i.e. it's a deleted object that reappeared
|
||||
if (log_level > 1)
|
||||
{
|
||||
printf("Object is deleted: %lx:%lx version=%lu/%lu\n", oid.inode, oid.stripe, target_ver, max_ver);
|
||||
}
|
||||
state = OBJ_DELETED;
|
||||
pg->state = pg->state | PG_HAS_MISPLACED;
|
||||
// To record all versions as outdated:
|
||||
ver_end = obj_start;
|
||||
}
|
||||
else if (!replicated && n_roles < pg->pg_data_size)
|
||||
if (!replicated && n_roles < pg->pg_data_size)
|
||||
{
|
||||
if (log_level > 1)
|
||||
{
|
||||
@@ -352,7 +276,7 @@ void pg_obj_state_check_t::finish_object()
|
||||
pg->state = pg->state | PG_HAS_MISPLACED;
|
||||
}
|
||||
if (log_level > 1 && (state & (OBJ_INCOMPLETE | OBJ_DEGRADED)) ||
|
||||
log_level > 2 && (state & (OBJ_MISPLACED | OBJ_DELETED)))
|
||||
log_level > 2 && (state & OBJ_MISPLACED))
|
||||
{
|
||||
for (int i = obj_start; i < obj_end; i++)
|
||||
{
|
||||
@@ -361,9 +285,9 @@ void pg_obj_state_check_t::finish_object()
|
||||
}
|
||||
}
|
||||
pg->total_count++;
|
||||
osd_set.clear();
|
||||
if (target_ver != 0 && (state != 0 || ver_end < obj_end))
|
||||
if (state != 0 || ver_end < obj_end)
|
||||
{
|
||||
osd_set.clear();
|
||||
for (int i = ver_start; i < ver_end; i++)
|
||||
{
|
||||
osd_set.push_back((pg_obj_loc_t){
|
||||
@@ -386,8 +310,7 @@ void pg_obj_state_check_t::finish_object()
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (j >= osd_set.size() && ((state & OBJ_DELETED) ||
|
||||
pg->cur_set[list[i].oid.stripe & STRIPE_MASK] != list[i].osd_num))
|
||||
if (j >= osd_set.size() && pg->cur_set[list[i].oid.stripe & STRIPE_MASK] != list[i].osd_num)
|
||||
{
|
||||
osd_set.push_back((pg_obj_loc_t){
|
||||
.role = (list[i].oid.stripe & STRIPE_MASK),
|
||||
@@ -402,11 +325,7 @@ void pg_obj_state_check_t::finish_object()
|
||||
}
|
||||
}
|
||||
}
|
||||
if (state & OBJ_DELETED)
|
||||
{
|
||||
pg->ver_override[oid] = max_ver;
|
||||
}
|
||||
else if (target_ver < max_ver)
|
||||
if (target_ver < max_ver)
|
||||
{
|
||||
pg->ver_override[oid] = target_ver;
|
||||
}
|
||||
@@ -460,7 +379,6 @@ void pg_obj_state_check_t::finish_object()
|
||||
}
|
||||
else
|
||||
{
|
||||
assert(it->second.state == state);
|
||||
it->second.object_count++;
|
||||
}
|
||||
if (state & OBJ_INCOMPLETE)
|
||||
@@ -481,34 +399,6 @@ void pg_obj_state_check_t::finish_object()
|
||||
// FIXME: Write at least some tests for this function
|
||||
void pg_t::calc_object_states(int log_level)
|
||||
{
|
||||
// Calculate intersections of target_history with cur_peers
|
||||
for (auto & history_item: target_history)
|
||||
{
|
||||
if (history_item.max_epoch)
|
||||
{
|
||||
pg_history_set_t & set_copy = target_by_epoch[history_item.max_epoch];
|
||||
set_copy.min_epoch = history_item.min_epoch;
|
||||
set_copy.max_epoch = history_item.max_epoch;
|
||||
for (int i = 0; i < history_item.osd_set.size(); i++)
|
||||
{
|
||||
if (history_item.osd_set[i] != 0)
|
||||
{
|
||||
for (int j = 0; j < cur_set.size(); j++)
|
||||
{
|
||||
if (cur_set[j] == history_item.osd_set[i])
|
||||
{
|
||||
set_copy.osd_set.push_back(history_item.osd_set[i]);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (set_copy.osd_set.size() != pg_size)
|
||||
{
|
||||
epoch_sizes_differ = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Copy all object lists into one array
|
||||
pg_obj_state_check_t st;
|
||||
st.log_level = log_level;
|
||||
@@ -545,18 +435,10 @@ void pg_t::calc_object_states(int log_level)
|
||||
std::sort(st.list.begin(), st.list.end());
|
||||
// Walk over it and check object states
|
||||
st.walk();
|
||||
target_by_epoch.clear();
|
||||
if (this->state != PG_ACTIVE)
|
||||
if (this->state & (PG_DEGRADED|PG_LEFT_ON_DEAD))
|
||||
{
|
||||
assert(epoch != (((uint64_t)1 << PG_EPOCH_BITS)-1));
|
||||
epoch++;
|
||||
for (auto & pgh: target_history)
|
||||
{
|
||||
if (epoch <= pgh.max_epoch)
|
||||
{
|
||||
epoch = pgh.max_epoch+1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -89,9 +89,7 @@ struct pg_t
|
||||
// epoch number - should increase with each non-clean activation of the PG
|
||||
uint64_t epoch = 0, reported_epoch = 0;
|
||||
// target history and all potential peers
|
||||
std::vector<pg_history_set_t> target_history;
|
||||
std::map<uint64_t, pg_history_set_t> target_by_epoch;
|
||||
bool epoch_sizes_differ = false;
|
||||
std::vector<std::vector<osd_num_t>> target_history;
|
||||
std::vector<osd_num_t> all_peers;
|
||||
bool history_changed = false;
|
||||
// peer list from the last peering event
|
||||
|
@@ -199,21 +199,6 @@ void osd_t::continue_primary_read(osd_op_t *cur_op)
|
||||
{
|
||||
// PG may be degraded or have misplaced objects
|
||||
op_data->prev_set = get_object_osd_set(pg, op_data->oid, pg.cur_set.data(), &op_data->object_state);
|
||||
if (op_data->object_state && (op_data->object_state->state & OBJ_DELETED))
|
||||
{
|
||||
// Object is deleted, just return zeroes
|
||||
cur_op->reply.rw.version = 0;
|
||||
cur_op->reply.rw.bitmap_len = op_data->pg_data_size * clean_entry_bitmap_size;
|
||||
uint64_t zero_len = cur_op->reply.rw.bitmap_len + cur_op->req.rw.len;
|
||||
while (zero_len >= 0)
|
||||
{
|
||||
uint64_t cur_zero_len = zero_buffer_size > zero_len ? zero_len : zero_buffer_size;
|
||||
cur_op->iov.push_back(zero_buffer, cur_zero_len);
|
||||
zero_len -= cur_zero_len;
|
||||
}
|
||||
finish_op(cur_op, cur_op->req.rw.len);
|
||||
return;
|
||||
}
|
||||
}
|
||||
if (pg.state == PG_ACTIVE || op_data->scheme == POOL_SCHEME_REPLICATED)
|
||||
{
|
||||
@@ -305,7 +290,7 @@ void osd_t::remove_object_from_state(object_id & oid, pg_osd_set_state_t *object
|
||||
report_pg_state(pg);
|
||||
}
|
||||
}
|
||||
else if (object_state->state & (OBJ_MISPLACED | OBJ_DELETED))
|
||||
else if (object_state->state & OBJ_MISPLACED)
|
||||
{
|
||||
this->misplaced_objects--;
|
||||
pg.misplaced_objects.erase(oid);
|
||||
@@ -344,6 +329,12 @@ void osd_t::continue_primary_del(osd_op_t *cur_op)
|
||||
else if (op_data->st == 4) goto resume_4;
|
||||
else if (op_data->st == 5) goto resume_5;
|
||||
assert(op_data->st == 0);
|
||||
// Delete is forbidden even in active PGs if they're also degraded or have previous dead OSDs
|
||||
if (pg.state & (PG_DEGRADED | PG_LEFT_ON_DEAD))
|
||||
{
|
||||
finish_op(cur_op, -EBUSY);
|
||||
return;
|
||||
}
|
||||
if (!check_write_queue(cur_op, pg))
|
||||
{
|
||||
return;
|
||||
@@ -351,18 +342,11 @@ void osd_t::continue_primary_del(osd_op_t *cur_op)
|
||||
resume_1:
|
||||
// Determine which OSDs contain this object and delete it
|
||||
op_data->prev_set = get_object_osd_set(pg, op_data->oid, pg.cur_set.data(), &op_data->object_state);
|
||||
if (op_data->object_state && (op_data->object_state->state & OBJ_DELETED))
|
||||
{
|
||||
op_data->fact_ver = pg.ver_override[op_data->oid];
|
||||
}
|
||||
else
|
||||
{
|
||||
// Submit 1 read to determine the actual version number
|
||||
submit_primary_subops(SUBMIT_RMW_READ, UINT64_MAX, op_data->prev_set, cur_op);
|
||||
// Submit 1 read to determine the actual version number
|
||||
submit_primary_subops(SUBMIT_RMW_READ, UINT64_MAX, op_data->prev_set, cur_op);
|
||||
resume_2:
|
||||
op_data->st = 2;
|
||||
return;
|
||||
}
|
||||
op_data->st = 2;
|
||||
return;
|
||||
resume_3:
|
||||
if (op_data->errors > 0)
|
||||
{
|
||||
|
@@ -133,12 +133,6 @@ int osd_t::collect_bitmap_requests(osd_op_t *cur_op, pg_t & pg, std::vector<bitm
|
||||
uint64_t target_version = vo_it != pg.ver_override.end() ? vo_it->second : UINT64_MAX;
|
||||
pg_osd_set_state_t *object_state;
|
||||
uint64_t* cur_set = get_object_osd_set(pg, cur_oid, pg.cur_set.data(), &object_state);
|
||||
if (object_state && (object_state->state & OBJ_DELETED))
|
||||
{
|
||||
// Object is deleted, zero out the bitmap
|
||||
memset((uint8_t*)op_data->snapshot_bitmaps + chain_num*clean_entry_bitmap_size, 0, clean_entry_bitmap_size);
|
||||
continue;
|
||||
}
|
||||
if (pg.scheme == POOL_SCHEME_REPLICATED)
|
||||
{
|
||||
osd_num_t read_target = 0;
|
||||
|
@@ -116,19 +116,17 @@ void osd_t::submit_primary_subops(int submit_type, uint64_t op_version, const ui
|
||||
if (osd_set[role] != 0 && (wr || !rep && stripes[role].read_end != 0))
|
||||
n_subops++;
|
||||
}
|
||||
if (!n_subops && (submit_type == SUBMIT_RMW_READ || rep) && zero_read >= 0)
|
||||
if (!n_subops && (submit_type == SUBMIT_RMW_READ || rep))
|
||||
n_subops = 1;
|
||||
else
|
||||
zero_read = -1;
|
||||
osd_op_t *subops = new osd_op_t[n_subops];
|
||||
op_data->fact_ver = 0;
|
||||
op_data->done = op_data->errors = 0;
|
||||
op_data->n_subops = n_subops;
|
||||
if (n_subops > 0)
|
||||
{
|
||||
op_data->subops = new osd_op_t[n_subops];
|
||||
int sent = submit_primary_subop_batch(submit_type, op_data->oid.inode, op_version, op_data->stripes, osd_set, cur_op, 0, zero_read);
|
||||
assert(sent == n_subops);
|
||||
}
|
||||
op_data->subops = subops;
|
||||
int sent = submit_primary_subop_batch(submit_type, op_data->oid.inode, op_version, op_data->stripes, osd_set, cur_op, 0, zero_read);
|
||||
assert(sent == n_subops);
|
||||
}
|
||||
|
||||
int osd_t::submit_primary_subop_batch(int submit_type, inode_t inode, uint64_t op_version,
|
||||
|
@@ -154,9 +154,10 @@ resume_3:
|
||||
if (pg.epoch > pg.reported_epoch)
|
||||
{
|
||||
// Report newer epoch before writing
|
||||
// FIXME: We don't have to report all changed PG states here
|
||||
// FIXME: We may report only one PG state here...
|
||||
this->pg_state_dirty.insert({ .pool_id = pg.pool_id, .pg_num = pg.pg_num });
|
||||
pg.history_changed = true;
|
||||
report_pg_state(pg);
|
||||
report_pg_states();
|
||||
resume_10:
|
||||
if (pg.epoch > pg.reported_epoch)
|
||||
{
|
||||
|
@@ -34,7 +34,6 @@
|
||||
#define OBJ_DEGRADED 0x02
|
||||
#define OBJ_INCOMPLETE 0x04
|
||||
#define OBJ_MISPLACED 0x08
|
||||
#define OBJ_DELETED 0x10
|
||||
#define OBJ_NEEDS_STABLE 0x10000
|
||||
#define OBJ_NEEDS_ROLLBACK 0x20000
|
||||
|
||||
|
@@ -18,7 +18,7 @@ done
|
||||
cd mon
|
||||
npm install
|
||||
cd ..
|
||||
node mon/mon-main.js --etcd_url $ETCD_URL --etcd_prefix "/vitastor" --verbose 1 &>./testdata/mon.log &
|
||||
node mon/mon-main.js --etcd_url $ETCD_URL --etcd_prefix "/vitastor" &>./testdata/mon.log &
|
||||
MON_PID=$!
|
||||
|
||||
if [ -n "$GLOBAL_CONF" ]; then
|
||||
@@ -31,31 +31,15 @@ else
|
||||
$ETCDCTL put /vitastor/config/pools '{"1":{"name":"testpool","scheme":"xor","pg_size":'$PG_SIZE',"pg_minsize":'$PG_MINSIZE',"parity_chunks":1,"pg_count":'$PG_COUNT',"failure_domain":"osd"}}'
|
||||
fi
|
||||
|
||||
wait_up()
|
||||
{
|
||||
local sec=$1
|
||||
local i=0
|
||||
local configured=0
|
||||
while [[ $i -lt $sec ]]; do
|
||||
if $ETCDCTL get /vitastor/config/pgs --print-value-only | jq -s -e '(. | length) != 0 and ([ .[0].items["1"][] |
|
||||
select(((.osd_set | select(. != 0) | sort | unique) | length) == '$PG_SIZE') ] | length) == '$PG_COUNT; then
|
||||
configured=1
|
||||
if $ETCDCTL get /vitastor/pg/state/1/ --prefix --print-value-only | jq -s -e '[ .[] | select(.state == ["active"]) ] | length == '$PG_COUNT; then
|
||||
break
|
||||
fi
|
||||
fi
|
||||
sleep 1
|
||||
i=$((i+1))
|
||||
if [ $i -eq $sec ]; then
|
||||
if [[ $configured -ne 0 ]]; then
|
||||
format_error "FAILED: $PG_COUNT PG(s) NOT CONFIGURED"
|
||||
fi
|
||||
format_error "FAILED: $PG_COUNT PG(s) NOT UP"
|
||||
fi
|
||||
done
|
||||
}
|
||||
sleep 3
|
||||
|
||||
wait_up 60
|
||||
if ! ($ETCDCTL get /vitastor/config/pgs --print-value-only | jq -s -e '(. | length) != 0 and ([ .[0].items["1"][] | select(((.osd_set | select(. != 0) | sort | unique) | length) == '$PG_SIZE') ] | length) == '$PG_COUNT); then
|
||||
format_error "FAILED: $PG_COUNT PG(s) NOT CONFIGURED"
|
||||
fi
|
||||
|
||||
if ! ($ETCDCTL get /vitastor/pg/state/1/ --prefix --print-value-only | jq -s -e '[ .[] | select(.state == ["active"]) ] | length == '$PG_COUNT); then
|
||||
format_error "FAILED: $PG_COUNT PG(s) NOT UP"
|
||||
fi
|
||||
|
||||
if ! cmp build/src/block-vitastor.so /usr/lib/x86_64-linux-gnu/qemu/block-vitastor.so; then
|
||||
sudo rm -f /usr/lib/x86_64-linux-gnu/qemu/block-vitastor.so
|
||||
|
@@ -4,25 +4,19 @@
|
||||
|
||||
if [ "$IMMEDIATE_COMMIT" != "" ]; then
|
||||
NO_SAME="--journal_no_same_sector_overwrites true --journal_sector_buffer_count 1024 --disable_data_fsync 1 --immediate_commit all --log_level 1"
|
||||
$ETCDCTL put /vitastor/config/global '{"recovery_queue_depth":1,"osd_out_time":1,"immediate_commit":"all"}'
|
||||
$ETCDCTL put /vitastor/config/global '{"recovery_queue_depth":1,"osd_out_time":5,"immediate_commit":"all"}'
|
||||
else
|
||||
NO_SAME="--journal_sector_buffer_count 1024 --log_level 1"
|
||||
$ETCDCTL put /vitastor/config/global '{"recovery_queue_depth":1,"osd_out_time":1}'
|
||||
$ETCDCTL put /vitastor/config/global '{"recovery_queue_depth":1,"osd_out_time":5}'
|
||||
fi
|
||||
|
||||
start_osd()
|
||||
{
|
||||
local i=$1
|
||||
build/src/vitastor-osd --osd_num $i --bind_address 127.0.0.1 $NO_SAME $OSD_ARGS --etcd_address $ETCD_URL $(build/src/vitastor-cli simple-offsets --format options ./testdata/test_osd$i.bin 2>/dev/null) &>./testdata/osd$i.log &
|
||||
eval OSD${i}_PID=$!
|
||||
}
|
||||
|
||||
OSD_SIZE=1024
|
||||
OSD_COUNT=7
|
||||
OSD_ARGS=
|
||||
for i in $(seq 1 $OSD_COUNT); do
|
||||
dd if=/dev/zero of=./testdata/test_osd$i.bin bs=1024 count=1 seek=$((OSD_SIZE*1024-1))
|
||||
start_osd $i
|
||||
build/src/vitastor-osd --osd_num $i --bind_address 127.0.0.1 $NO_SAME $OSD_ARGS --etcd_address $ETCD_URL $(build/src/vitastor-cli simple-offsets --format options ./testdata/test_osd$i.bin 2>/dev/null) &>./testdata/osd$i.log &
|
||||
eval OSD${i}_PID=$!
|
||||
done
|
||||
|
||||
cd mon
|
||||
@@ -32,11 +26,11 @@ node mon/mon-main.js --etcd_url $ETCD_URL --etcd_prefix "/vitastor" --verbose 1
|
||||
MON_PID=$!
|
||||
|
||||
if [ "$EC" != "" ]; then
|
||||
PG_SIZE=3
|
||||
POOLCFG='"scheme":"xor","pg_size":3,"pg_minsize":2,"parity_chunks":1'
|
||||
PG_SIZE=3
|
||||
else
|
||||
PG_SIZE=${PG_SIZE:-2}
|
||||
POOLCFG='"scheme":"replicated","pg_size":'$PG_SIZE',"pg_minsize":2'
|
||||
POOLCFG='"scheme":"replicated","pg_size":2,"pg_minsize":2'
|
||||
PG_SIZE=2
|
||||
fi
|
||||
$ETCDCTL put /vitastor/config/pools '{"1":{"name":"testpool",'$POOLCFG',"pg_count":32,"failure_domain":"osd"}}'
|
||||
|
||||
@@ -65,10 +59,10 @@ wait_finish_rebalance()
|
||||
while [[ $i -lt $sec ]]; do
|
||||
($ETCDCTL get --prefix /vitastor/pg/state/ --print-value-only | jq -s -e '([ .[] | select(.state == ["active"]) ] | length) == 32') && \
|
||||
break
|
||||
sleep 1
|
||||
i=$((i+1))
|
||||
if [ $i -eq $sec ]; then
|
||||
if [ $i -eq 60 ]; then
|
||||
format_error "Rebalance couldn't finish in $sec seconds"
|
||||
fi
|
||||
sleep 1
|
||||
i=$((i+1))
|
||||
done
|
||||
}
|
||||
|
@@ -12,8 +12,6 @@ EC=1 ./test_change_pg_count.sh
|
||||
|
||||
./test_change_pg_size.sh
|
||||
|
||||
./test_degraded_delete.sh
|
||||
|
||||
./test_etcd_fail.sh
|
||||
|
||||
./test_failure_domain.sh
|
||||
|
@@ -1,119 +0,0 @@
|
||||
#!/bin/bash -ex
|
||||
|
||||
# Run 3 OSDs
|
||||
|
||||
. `dirname $0`/run_3osds.sh
|
||||
|
||||
# Write inodes 1 and 2
|
||||
|
||||
LD_PRELOAD="build/src/libfio_vitastor.so" \
|
||||
fio -thread -name=test -ioengine=build/src/libfio_vitastor.so -bs=1M -direct=1 -iodepth=4 \
|
||||
-rw=write -etcd=$ETCD_URL -pool=1 -inode=1 -size=128M -runtime=10
|
||||
|
||||
LD_PRELOAD="build/src/libfio_vitastor.so" \
|
||||
fio -thread -name=test -ioengine=build/src/libfio_vitastor.so -bs=1M -direct=1 -iodepth=4 \
|
||||
-rw=write -etcd=$ETCD_URL -pool=1 -inode=2 -size=128M -runtime=10
|
||||
|
||||
# Stop OSD 1
|
||||
|
||||
kill -INT $OSD1_PID
|
||||
sleep 2
|
||||
|
||||
# Remove inode 2
|
||||
|
||||
build/src/vitastor-cli rm-data --etcd_address $ETCD_URL --pool 1 --inode 2
|
||||
|
||||
# Run 3 more OSDs and move PG to 4,5,6
|
||||
|
||||
for i in $(seq 4 6); do
|
||||
dd if=/dev/zero of=./testdata/test_osd$i.bin bs=1024 count=1 seek=$((OSD_SIZE*1024-1))
|
||||
build/src/vitastor-osd --osd_num $i --bind_address 127.0.0.1 $OSD_ARGS --etcd_address $ETCD_URL $(build/src/vitastor-cli simple-offsets --format options ./testdata/test_osd$i.bin 2>/dev/null) &>./testdata/osd$i.log &
|
||||
eval OSD${i}_PID=$!
|
||||
done
|
||||
|
||||
$ETCDCTL put /vitastor/config/osd/1 '{"reweight":0}'
|
||||
$ETCDCTL put /vitastor/config/osd/2 '{"reweight":0}'
|
||||
$ETCDCTL put /vitastor/config/osd/3 '{"reweight":0}'
|
||||
|
||||
# Wait for rebalance to finish
|
||||
|
||||
wait_finish_rebalance()
|
||||
{
|
||||
local sec=$1
|
||||
local st=$2
|
||||
local i=0
|
||||
while [[ $i -lt $sec ]]; do
|
||||
if $ETCDCTL get --prefix /vitastor/pg/state/ --print-value-only | jq -s -e \
|
||||
'([ .[] | select(.state == ['$st'] and (.peers | contains([1]) | not) and (.peers | contains([2,3]) | not)) ] | length) == '$PG_COUNT; then
|
||||
break
|
||||
fi
|
||||
sleep 1
|
||||
i=$((i+1))
|
||||
if [ $i -eq $sec ]; then
|
||||
format_error "Rebalance couldn't finish in $sec seconds"
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
wait_finish_rebalance 60 '"active","left_on_dead"'
|
||||
|
||||
# Stop OSD 2,3
|
||||
|
||||
kill -INT $OSD2_PID
|
||||
kill -INT $OSD3_PID
|
||||
sleep 2
|
||||
|
||||
# Verify that PGs are still active
|
||||
|
||||
if ! ($ETCDCTL get /vitastor/pg/state/1/ --prefix --print-value-only | jq -s -e '[ .[] | select(.state == ["active","left_on_dead"]) ] | length == '$PG_COUNT); then
|
||||
format_error "FAILED: $PG_COUNT PG(s) NOT UP"
|
||||
fi
|
||||
|
||||
# Start OSD 1
|
||||
|
||||
build/src/vitastor-osd --osd_num 1 --bind_address 127.0.0.1 $OSD_ARGS --etcd_address $ETCD_URL $(build/src/vitastor-cli simple-offsets --format options ./testdata/test_osd1.bin 2>/dev/null) &>./testdata/osd1.log &
|
||||
OSD1_PID=$!
|
||||
|
||||
# Verify that inode 2 is removed and inode 1 is in place
|
||||
|
||||
wait_repeer_1()
|
||||
{
|
||||
local sec=$1
|
||||
local i=0
|
||||
while [[ $i -lt $sec ]]; do
|
||||
if grep -q 'Repeer because of OSD 1' testdata/osd4.log testdata/osd5.log testdata/osd6.log; then
|
||||
break
|
||||
fi
|
||||
sleep 1
|
||||
i=$((i+1))
|
||||
if [ $i -eq $sec ]; then
|
||||
format_error "OSD 4/5/6 do not peer with older OSD 1"
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
wait_repeer_1 15
|
||||
|
||||
wait_finish_rebalance 15 '"active"'
|
||||
|
||||
if ! ($ETCDCTL get /vitastor/pg/stats/1/1 --print-value-only | jq -s -e '.[0].object_count == 512'); then
|
||||
format_error "FAILED: PG SHOULD CONTAIN EXACTLY 128 MB OF DATA, BUT IT DOESN'T"
|
||||
fi
|
||||
|
||||
qemu-img convert -S 4096 -p \
|
||||
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:pool=1:inode=1:size=4096" \
|
||||
-O raw ./testdata/inode1.bin
|
||||
|
||||
qemu-img convert -S 4096 -p \
|
||||
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:pool=1:inode=2:size="$((128*1024*1024)) \
|
||||
-O raw ./testdata/inode2.bin
|
||||
|
||||
if (dd if=/dev/zero bs=4096 count=1 | diff - ./testdata/inode1.bin); then
|
||||
format_error "FAILED: INODE 1 SEEMS LOST"
|
||||
fi
|
||||
|
||||
if ! (dd if=/dev/zero bs=1M count=128 | diff - ./testdata/inode2.bin); then
|
||||
format_error "FAILED: INODE 2 SEEMS RESTORED"
|
||||
fi
|
||||
|
||||
format_green OK
|
@@ -1,52 +0,0 @@
|
||||
#!/bin/bash -ex
|
||||
|
||||
# Kill OSDs while writing
|
||||
|
||||
PG_SIZE=3
|
||||
|
||||
. `dirname $0`/run_7osds.sh
|
||||
|
||||
IMG_SIZE=960
|
||||
|
||||
$ETCDCTL put /vitastor/config/inode/1/1 '{"name":"testimg","size":'$((IMG_SIZE*1024*1024))'}'
|
||||
|
||||
LD_PRELOAD="build/src/libfio_vitastor.so" \
|
||||
fio -thread -name=test -ioengine=build/src/libfio_vitastor.so -bs=4M -direct=1 -iodepth=1 -fsync=1 -rw=write \
|
||||
-mirror_file=./testdata/mirror.bin -etcd=$ETCD_URL -image=testimg -cluster_log_level=10
|
||||
|
||||
kill_osds()
|
||||
{
|
||||
sleep 5
|
||||
|
||||
kill -9 $OSD1_PID
|
||||
$ETCDCTL del /vitastor/osd/state/1
|
||||
|
||||
for i in 2 3 4 5 6 7; do
|
||||
sleep 15
|
||||
echo Killing OSD $i and starting OSD $((i-1))
|
||||
p=OSD${i}_PID
|
||||
kill -9 ${!p}
|
||||
$ETCDCTL del /vitastor/osd/state/$i
|
||||
start_osd $((i-1))
|
||||
sleep 15
|
||||
done
|
||||
|
||||
sleep 5
|
||||
start_osd 7
|
||||
|
||||
sleep 5
|
||||
}
|
||||
|
||||
kill_osds &
|
||||
|
||||
LD_PRELOAD="build/src/libfio_vitastor.so" \
|
||||
fio -thread -name=test -ioengine=build/src/libfio_vitastor.so -bs=4k -direct=1 -iodepth=16 -fsync=256 -rw=randwrite \
|
||||
-mirror_file=./testdata/mirror.bin -etcd=$ETCD_URL -image=testimg -loops=10 -runtime=120 2>/dev/null
|
||||
|
||||
qemu-img convert -S 4096 -p \
|
||||
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:image=testimg" \
|
||||
-O raw ./testdata/read.bin
|
||||
|
||||
diff ./testdata/read.bin ./testdata/mirror.bin
|
||||
|
||||
format_green OK
|
Reference in New Issue
Block a user