-----BEGIN PGP SIGNATURE-----

Version: GnuPG v1
 
 iQEcBAABAgAGBQJeXiwOAAoJEO8Ells5jWIRvD4H/2acXnglnA2wrCUcIq9j/8/D
 QvzPh77LSzTTvgvyF2n/K2Z7gtM9HwVq0i159fYYJCqZSy330gK//qe8yE37qx5Z
 Rgqu/p1VKxasnN8jwaiOaneNx/O/B89BvPfG5Azi7HVO9Zdep+d3VrbWWjQymEoM
 yf90XUu/DX1PPiNJkEUlsrjCZrzBU5zfyNr7CpFOfi1gzbJMV1wblHIcOIadyisY
 tvMfGfp+K2TgKBlSkrJd7TSZrnqmPi5NKQ8hJRlXDuNFrXsgxuUa2WbNqOwzArKn
 ZL4eXPRB+Sp2/Zy8BVENgSjOvU5Tho8dJm2rSdGXKfF78Ox8uAHbZ8LeTAYUlpM=
 =jL0I
 -----END PGP SIGNATURE-----

Merge remote-tracking branch 'remotes/jasowang/tags/net-pull-request' into staging

# gpg: Signature made Tue 03 Mar 2020 10:06:06 GMT
# gpg:                using RSA key EF04965B398D6211
# gpg: Good signature from "Jason Wang (Jason Wang on RedHat) <jasowang@redhat.com>" [marginal]
# gpg: WARNING: This key is not certified with sufficiently trusted signatures!
# gpg:          It is not certain that the signature belongs to the owner.
# Primary key fingerprint: 215D 46F4 8246 689E C77F  3562 EF04 965B 398D 6211

* remotes/jasowang/tags/net-pull-request: (23 commits)
  l2tpv3: fix RFC number typo in qemu-options.hx
  colo: Update Documentation for continuous replication
  net/filter.c: Add Options to insert filters anywhere in the filter list
  tests/test-replication.c: Add test for for secondary node continuing replication
  block/replication.c: Ignore requests after failover
  hw: net: cadence_gem: Fix build errors in DB_PRINT()
  NetRxPkt: fix hash calculation of IPV6 TCP
  NetRxPkt: Introduce support for additional hash types
  e1000e: Avoid hw_error if legacy mode used
  dp8393x: Don't stop reception upon RBE interrupt assertion
  dp8393x: Don't reset Silicon Revision register
  dp8393x: Always update RRA pointers and sequence numbers
  dp8393x: Clear descriptor in_use field to release packet
  dp8393x: Pad frames to word or long word boundary
  dp8393x: Use long-word-aligned RRA pointers in 32-bit mode
  dp8393x: Don't clobber packet checksum
  dp8393x: Implement packet size limit and RBAE interrupt
  dp8393x: Clear RRRA command register bit only when appropriate
  dp8393x: Update LLFA and CRDA registers from rx descriptor
  dp8393x: Have dp8393x_receive() return the packet size
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
master
Peter Maydell 2020-03-03 12:03:59 +00:00
commit 104933c4a9
13 changed files with 590 additions and 156 deletions

View File

@ -450,6 +450,17 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode,
aio_context_acquire(aio_context);
s = bs->opaque;
if (s->stage == BLOCK_REPLICATION_DONE ||
s->stage == BLOCK_REPLICATION_FAILOVER) {
/*
* This case happens when a secondary is promoted to primary.
* Ignore the request because the secondary side of replication
* doesn't have to do anything anymore.
*/
aio_context_release(aio_context);
return;
}
if (s->stage != BLOCK_REPLICATION_NONE) {
error_setg(errp, "Block replication is running or done");
aio_context_release(aio_context);
@ -574,6 +585,17 @@ static void replication_do_checkpoint(ReplicationState *rs, Error **errp)
aio_context_acquire(aio_context);
s = bs->opaque;
if (s->stage == BLOCK_REPLICATION_DONE ||
s->stage == BLOCK_REPLICATION_FAILOVER) {
/*
* This case happens when a secondary was promoted to primary.
* Ignore the request because the secondary side of replication
* doesn't have to do anything anymore.
*/
aio_context_release(aio_context);
return;
}
if (s->mode == REPLICATION_MODE_SECONDARY) {
secondary_do_checkpoint(s, errp);
}
@ -590,7 +612,7 @@ static void replication_get_error(ReplicationState *rs, Error **errp)
aio_context_acquire(aio_context);
s = bs->opaque;
if (s->stage != BLOCK_REPLICATION_RUNNING) {
if (s->stage == BLOCK_REPLICATION_NONE) {
error_setg(errp, "Block replication is not running");
aio_context_release(aio_context);
return;
@ -632,6 +654,17 @@ static void replication_stop(ReplicationState *rs, bool failover, Error **errp)
aio_context_acquire(aio_context);
s = bs->opaque;
if (s->stage == BLOCK_REPLICATION_DONE ||
s->stage == BLOCK_REPLICATION_FAILOVER) {
/*
* This case happens when a secondary was promoted to primary.
* Ignore the request because the secondary side of replication
* doesn't have to do anything anymore.
*/
aio_context_release(aio_context);
return;
}
if (s->stage != BLOCK_REPLICATION_RUNNING) {
error_setg(errp, "Block replication is not running");
aio_context_release(aio_context);

View File

@ -145,81 +145,189 @@ The diagram just shows the main qmp command, you can get the detail
in test procedure.
== Test procedure ==
1. Startup qemu
Primary:
# qemu-system-x86_64 -accel kvm -m 2048 -smp 2 -qmp stdio -name primary \
-device piix3-usb-uhci -vnc :7 \
-device usb-tablet -netdev tap,id=hn0,vhost=off \
-device virtio-net-pci,id=net-pci0,netdev=hn0 \
-drive if=virtio,id=primary-disk0,driver=quorum,read-pattern=fifo,vote-threshold=1,\
children.0.file.filename=1.raw,\
children.0.driver=raw -S
Secondary:
# qemu-system-x86_64 -accel kvm -m 2048 -smp 2 -qmp stdio -name secondary \
-device piix3-usb-uhci -vnc :7 \
-device usb-tablet -netdev tap,id=hn0,vhost=off \
-device virtio-net-pci,id=net-pci0,netdev=hn0 \
-drive if=none,id=secondary-disk0,file.filename=1.raw,driver=raw,node-name=node0 \
-drive if=virtio,id=active-disk0,driver=replication,mode=secondary,\
file.driver=qcow2,top-id=active-disk0,\
file.file.filename=/mnt/ramfs/active_disk.img,\
file.backing.driver=qcow2,\
file.backing.file.filename=/mnt/ramfs/hidden_disk.img,\
file.backing.backing=secondary-disk0 \
-incoming tcp:0:8888
Note: Here we are running both instances on the same host for testing,
change the IP Addresses if you want to run it on two hosts. Initally
127.0.0.1 is the Primary Host and 127.0.0.2 is the Secondary Host.
2. On Secondary VM's QEMU monitor, issue command
== Startup qemu ==
1. Primary:
Note: Initally, $imagefolder/primary.qcow2 needs to be copied to all hosts.
You don't need to change any IP's here, because 0.0.0.0 listens on any
interface. The chardev's with 127.0.0.1 IP's loopback to the local qemu
instance.
# imagefolder="/mnt/vms/colo-test-primary"
# qemu-system-x86_64 -enable-kvm -cpu qemu64,+kvmclock -m 512 -smp 1 -qmp stdio \
-device piix3-usb-uhci -device usb-tablet -name primary \
-netdev tap,id=hn0,vhost=off,helper=/usr/lib/qemu/qemu-bridge-helper \
-device rtl8139,id=e0,netdev=hn0 \
-chardev socket,id=mirror0,host=0.0.0.0,port=9003,server,nowait \
-chardev socket,id=compare1,host=0.0.0.0,port=9004,server,wait \
-chardev socket,id=compare0,host=127.0.0.1,port=9001,server,nowait \
-chardev socket,id=compare0-0,host=127.0.0.1,port=9001 \
-chardev socket,id=compare_out,host=127.0.0.1,port=9005,server,nowait \
-chardev socket,id=compare_out0,host=127.0.0.1,port=9005 \
-object filter-mirror,id=m0,netdev=hn0,queue=tx,outdev=mirror0 \
-object filter-redirector,netdev=hn0,id=redire0,queue=rx,indev=compare_out \
-object filter-redirector,netdev=hn0,id=redire1,queue=rx,outdev=compare0 \
-object iothread,id=iothread1 \
-object colo-compare,id=comp0,primary_in=compare0-0,secondary_in=compare1,\
outdev=compare_out0,iothread=iothread1 \
-drive if=ide,id=colo-disk0,driver=quorum,read-pattern=fifo,vote-threshold=1,\
children.0.file.filename=$imagefolder/primary.qcow2,children.0.driver=qcow2 -S
2. Secondary:
Note: Active and hidden images need to be created only once and the
size should be the same as primary.qcow2. Again, you don't need to change
any IP's here, except for the $primary_ip variable.
# imagefolder="/mnt/vms/colo-test-secondary"
# primary_ip=127.0.0.1
# qemu-img create -f qcow2 $imagefolder/secondary-active.qcow2 10G
# qemu-img create -f qcow2 $imagefolder/secondary-hidden.qcow2 10G
# qemu-system-x86_64 -enable-kvm -cpu qemu64,+kvmclock -m 512 -smp 1 -qmp stdio \
-device piix3-usb-uhci -device usb-tablet -name secondary \
-netdev tap,id=hn0,vhost=off,helper=/usr/lib/qemu/qemu-bridge-helper \
-device rtl8139,id=e0,netdev=hn0 \
-chardev socket,id=red0,host=$primary_ip,port=9003,reconnect=1 \
-chardev socket,id=red1,host=$primary_ip,port=9004,reconnect=1 \
-object filter-redirector,id=f1,netdev=hn0,queue=tx,indev=red0 \
-object filter-redirector,id=f2,netdev=hn0,queue=rx,outdev=red1 \
-object filter-rewriter,id=rew0,netdev=hn0,queue=all \
-drive if=none,id=parent0,file.filename=$imagefolder/primary.qcow2,driver=qcow2 \
-drive if=none,id=childs0,driver=replication,mode=secondary,file.driver=qcow2,\
top-id=colo-disk0,file.file.filename=$imagefolder/secondary-active.qcow2,\
file.backing.driver=qcow2,file.backing.file.filename=$imagefolder/secondary-hidden.qcow2,\
file.backing.backing=parent0 \
-drive if=ide,id=colo-disk0,driver=quorum,read-pattern=fifo,vote-threshold=1,\
children.0=childs0 \
-incoming tcp:0.0.0.0:9998
3. On Secondary VM's QEMU monitor, issue command
{'execute':'qmp_capabilities'}
{ 'execute': 'nbd-server-start',
'arguments': {'addr': {'type': 'inet', 'data': {'host': 'xx.xx.xx.xx', 'port': '8889'} } }
}
{'execute': 'nbd-server-add', 'arguments': {'device': 'secondary-disk0', 'writable': true } }
{'execute': 'nbd-server-start', 'arguments': {'addr': {'type': 'inet', 'data': {'host': '0.0.0.0', 'port': '9999'} } } }
{'execute': 'nbd-server-add', 'arguments': {'device': 'parent0', 'writable': true } }
Note:
a. The qmp command nbd-server-start and nbd-server-add must be run
before running the qmp command migrate on primary QEMU
b. Active disk, hidden disk and nbd target's length should be the
same.
c. It is better to put active disk and hidden disk in ramdisk.
c. It is better to put active disk and hidden disk in ramdisk. They
will be merged into the parent disk on failover.
3. On Primary VM's QEMU monitor, issue command:
4. On Primary VM's QEMU monitor, issue command:
{'execute':'qmp_capabilities'}
{ 'execute': 'human-monitor-command',
'arguments': {'command-line': 'drive_add -n buddy driver=replication,mode=primary,file.driver=nbd,file.host=xx.xx.xx.xx,file.port=8889,file.export=secondary-disk0,node-name=nbd_client0'}}
{ 'execute':'x-blockdev-change', 'arguments':{'parent': 'primary-disk0', 'node': 'nbd_client0' } }
{ 'execute': 'migrate-set-capabilities',
'arguments': {'capabilities': [ {'capability': 'x-colo', 'state': true } ] } }
{ 'execute': 'migrate', 'arguments': {'uri': 'tcp:xx.xx.xx.xx:8888' } }
{'execute': 'human-monitor-command', 'arguments': {'command-line': 'drive_add -n buddy driver=replication,mode=primary,file.driver=nbd,file.host=127.0.0.2,file.port=9999,file.export=parent0,node-name=replication0'}}
{'execute': 'x-blockdev-change', 'arguments':{'parent': 'colo-disk0', 'node': 'replication0' } }
{'execute': 'migrate-set-capabilities', 'arguments': {'capabilities': [ {'capability': 'x-colo', 'state': true } ] } }
{'execute': 'migrate', 'arguments': {'uri': 'tcp:127.0.0.2:9998' } }
Note:
a. There should be only one NBD Client for each primary disk.
b. xx.xx.xx.xx is the secondary physical machine's hostname or IP
c. The qmp command line must be run after running qmp command line in
b. The qmp command line must be run after running qmp command line in
secondary qemu.
4. After the above steps, you will see, whenever you make changes to PVM, SVM will be synced.
5. After the above steps, you will see, whenever you make changes to PVM, SVM will be synced.
You can issue command '{ "execute": "migrate-set-parameters" , "arguments":{ "x-checkpoint-delay": 2000 } }'
to change the checkpoint period time
to change the idle checkpoint period time
5. Failover test
You can kill Primary VM and run 'x_colo_lost_heartbeat' in Secondary VM's
monitor at the same time, then SVM will failover and client will not detect this
change.
6. Failover test
You can kill one of the VMs and Failover on the surviving VM:
Before issuing '{ "execute": "x-colo-lost-heartbeat" }' command, we have to
issue block related command to stop block replication.
Primary:
Remove the nbd child from the quorum:
{ 'execute': 'x-blockdev-change', 'arguments': {'parent': 'colo-disk0', 'child': 'children.1'}}
{ 'execute': 'human-monitor-command','arguments': {'command-line': 'drive_del blk-buddy0'}}
Note: there is no qmp command to remove the blockdev now
If you killed the Secondary, then follow "Primary Failover". After that,
if you want to resume the replication, follow "Primary resume replication"
Secondary:
The primary host is down, so we should do the following thing:
{ 'execute': 'nbd-server-stop' }
If you killed the Primary, then follow "Secondary Failover". After that,
if you want to resume the replication, follow "Secondary resume replication"
== Primary Failover ==
The Secondary died, resume on the Primary
{'execute': 'x-blockdev-change', 'arguments':{ 'parent': 'colo-disk0', 'child': 'children.1'} }
{'execute': 'human-monitor-command', 'arguments':{ 'command-line': 'drive_del replication0' } }
{'execute': 'object-del', 'arguments':{ 'id': 'comp0' } }
{'execute': 'object-del', 'arguments':{ 'id': 'iothread1' } }
{'execute': 'object-del', 'arguments':{ 'id': 'm0' } }
{'execute': 'object-del', 'arguments':{ 'id': 'redire0' } }
{'execute': 'object-del', 'arguments':{ 'id': 'redire1' } }
{'execute': 'x-colo-lost-heartbeat' }
== Secondary Failover ==
The Primary died, resume on the Secondary and prepare to become the new Primary
{'execute': 'nbd-server-stop'}
{'execute': 'x-colo-lost-heartbeat'}
{'execute': 'object-del', 'arguments':{ 'id': 'f2' } }
{'execute': 'object-del', 'arguments':{ 'id': 'f1' } }
{'execute': 'chardev-remove', 'arguments':{ 'id': 'red1' } }
{'execute': 'chardev-remove', 'arguments':{ 'id': 'red0' } }
{'execute': 'chardev-add', 'arguments':{ 'id': 'mirror0', 'backend': {'type': 'socket', 'data': {'addr': { 'type': 'inet', 'data': { 'host': '0.0.0.0', 'port': '9003' } }, 'server': true } } } }
{'execute': 'chardev-add', 'arguments':{ 'id': 'compare1', 'backend': {'type': 'socket', 'data': {'addr': { 'type': 'inet', 'data': { 'host': '0.0.0.0', 'port': '9004' } }, 'server': true } } } }
{'execute': 'chardev-add', 'arguments':{ 'id': 'compare0', 'backend': {'type': 'socket', 'data': {'addr': { 'type': 'inet', 'data': { 'host': '127.0.0.1', 'port': '9001' } }, 'server': true } } } }
{'execute': 'chardev-add', 'arguments':{ 'id': 'compare0-0', 'backend': {'type': 'socket', 'data': {'addr': { 'type': 'inet', 'data': { 'host': '127.0.0.1', 'port': '9001' } }, 'server': false } } } }
{'execute': 'chardev-add', 'arguments':{ 'id': 'compare_out', 'backend': {'type': 'socket', 'data': {'addr': { 'type': 'inet', 'data': { 'host': '127.0.0.1', 'port': '9005' } }, 'server': true } } } }
{'execute': 'chardev-add', 'arguments':{ 'id': 'compare_out0', 'backend': {'type': 'socket', 'data': {'addr': { 'type': 'inet', 'data': { 'host': '127.0.0.1', 'port': '9005' } }, 'server': false } } } }
== Primary resume replication ==
Resume replication after new Secondary is up.
Start the new Secondary (Steps 2 and 3 above), then on the Primary:
{'execute': 'drive-mirror', 'arguments':{ 'device': 'colo-disk0', 'job-id': 'resync', 'target': 'nbd://127.0.0.2:9999/parent0', 'mode': 'existing', 'format': 'raw', 'sync': 'full'} }
Wait until disk is synced, then:
{'execute': 'stop'}
{'execute': 'block-job-cancel', 'arguments':{ 'device': 'resync'} }
{'execute': 'human-monitor-command', 'arguments':{ 'command-line': 'drive_add -n buddy driver=replication,mode=primary,file.driver=nbd,file.host=127.0.0.2,file.port=9999,file.export=parent0,node-name=replication0'}}
{'execute': 'x-blockdev-change', 'arguments':{ 'parent': 'colo-disk0', 'node': 'replication0' } }
{'execute': 'object-add', 'arguments':{ 'qom-type': 'filter-mirror', 'id': 'm0', 'props': { 'netdev': 'hn0', 'queue': 'tx', 'outdev': 'mirror0' } } }
{'execute': 'object-add', 'arguments':{ 'qom-type': 'filter-redirector', 'id': 'redire0', 'props': { 'netdev': 'hn0', 'queue': 'rx', 'indev': 'compare_out' } } }
{'execute': 'object-add', 'arguments':{ 'qom-type': 'filter-redirector', 'id': 'redire1', 'props': { 'netdev': 'hn0', 'queue': 'rx', 'outdev': 'compare0' } } }
{'execute': 'object-add', 'arguments':{ 'qom-type': 'iothread', 'id': 'iothread1' } }
{'execute': 'object-add', 'arguments':{ 'qom-type': 'colo-compare', 'id': 'comp0', 'props': { 'primary_in': 'compare0-0', 'secondary_in': 'compare1', 'outdev': 'compare_out0', 'iothread': 'iothread1' } } }
{'execute': 'migrate-set-capabilities', 'arguments':{ 'capabilities': [ {'capability': 'x-colo', 'state': true } ] } }
{'execute': 'migrate', 'arguments':{ 'uri': 'tcp:127.0.0.2:9998' } }
Note:
If this Primary previously was a Secondary, then we need to insert the
filters before the filter-rewriter by using the
"'insert': 'before', 'position': 'id=rew0'" Options. See below.
== Secondary resume replication ==
Become Primary and resume replication after new Secondary is up. Note
that now 127.0.0.1 is the Secondary and 127.0.0.2 is the Primary.
Start the new Secondary (Steps 2 and 3 above, but with primary_ip=127.0.0.2),
then on the old Secondary:
{'execute': 'drive-mirror', 'arguments':{ 'device': 'colo-disk0', 'job-id': 'resync', 'target': 'nbd://127.0.0.1:9999/parent0', 'mode': 'existing', 'format': 'raw', 'sync': 'full'} }
Wait until disk is synced, then:
{'execute': 'stop'}
{'execute': 'block-job-cancel', 'arguments':{ 'device': 'resync' } }
{'execute': 'human-monitor-command', 'arguments':{ 'command-line': 'drive_add -n buddy driver=replication,mode=primary,file.driver=nbd,file.host=127.0.0.1,file.port=9999,file.export=parent0,node-name=replication0'}}
{'execute': 'x-blockdev-change', 'arguments':{ 'parent': 'colo-disk0', 'node': 'replication0' } }
{'execute': 'object-add', 'arguments':{ 'qom-type': 'filter-mirror', 'id': 'm0', 'props': { 'insert': 'before', 'position': 'id=rew0', 'netdev': 'hn0', 'queue': 'tx', 'outdev': 'mirror0' } } }
{'execute': 'object-add', 'arguments':{ 'qom-type': 'filter-redirector', 'id': 'redire0', 'props': { 'insert': 'before', 'position': 'id=rew0', 'netdev': 'hn0', 'queue': 'rx', 'indev': 'compare_out' } } }
{'execute': 'object-add', 'arguments':{ 'qom-type': 'filter-redirector', 'id': 'redire1', 'props': { 'insert': 'before', 'position': 'id=rew0', 'netdev': 'hn0', 'queue': 'rx', 'outdev': 'compare0' } } }
{'execute': 'object-add', 'arguments':{ 'qom-type': 'iothread', 'id': 'iothread1' } }
{'execute': 'object-add', 'arguments':{ 'qom-type': 'colo-compare', 'id': 'comp0', 'props': { 'primary_in': 'compare0-0', 'secondary_in': 'compare1', 'outdev': 'compare_out0', 'iothread': 'iothread1' } } }
{'execute': 'migrate-set-capabilities', 'arguments':{ 'capabilities': [ {'capability': 'x-colo', 'state': true } ] } }
{'execute': 'migrate', 'arguments':{ 'uri': 'tcp:127.0.0.1:9998' } }
== TODO ==
1. Support continuous VM replication.
2. Support shared storage.
3. Develop the heartbeat part.
4. Reduce checkpoint VMs downtime while doing checkpoint.
1. Support shared storage.
2. Develop the heartbeat part.
3. Reduce checkpoint VMs downtime while doing checkpoint.

View File

@ -65,12 +65,12 @@ blocks that are already in QEMU.
^ || .----------
| || | Secondary
1 Quorum || '----------
/ \ ||
/ \ ||
Primary 2 filter
disk ^ virtio-blk
| ^
3 NBD -------> 3 NBD |
/ \ || virtio-blk
/ \ || ^
Primary 2 filter |
disk ^ 7 Quorum
| /
3 NBD -------> 3 NBD /
client || server 2 filter
|| ^ ^
--------. || | |
@ -106,6 +106,10 @@ any state that would otherwise be lost by the speculative write-through
of the NBD server into the secondary disk. So before block replication,
the primary disk and secondary disk should contain the same data.
7) The secondary also has a quorum node, so after secondary failover it
can become the new primary and continue replication.
== Failure Handling ==
There are 7 internal errors when block replication is running:
1. I/O error on primary disk
@ -171,16 +175,18 @@ Primary:
leading whitespace.
5. The qmp command line must be run after running qmp command line in
secondary qemu.
6. After failover we need remove children.1 (replication driver).
6. After primary failover we need remove children.1 (replication driver).
Secondary:
-drive if=none,driver=raw,file.filename=1.raw,id=colo1 \
-drive if=xxx,id=topxxx,driver=replication,mode=secondary,top-id=topxxx\
-drive if=none,id=childs1,driver=replication,mode=secondary,top-id=childs1
file.file.filename=active_disk.qcow2,\
file.driver=qcow2,\
file.backing.file.filename=hidden_disk.qcow2,\
file.backing.driver=qcow2,\
file.backing.backing=colo1
-drive if=xxx,driver=quorum,read-pattern=fifo,id=top-disk1,\
vote-threshold=1,children.0=childs1
Then run qmp command in secondary qemu:
{ 'execute': 'nbd-server-start',
@ -234,6 +240,8 @@ Secondary:
The primary host is down, so we should do the following thing:
{ 'execute': 'nbd-server-stop' }
Promote Secondary to Primary:
see COLO-FT.txt
TODO:
1. Continuous block replication
2. Shared disk
1. Shared disk

View File

@ -987,8 +987,9 @@ static ssize_t gem_receive(NetClientState *nc, const uint8_t *buf, size_t size)
return -1;
}
DB_PRINT("copy %d bytes to 0x%x\n", MIN(bytes_to_copy, rxbufsize),
rx_desc_get_buffer(s->rx_desc[q]));
DB_PRINT("copy %u bytes to 0x%" PRIx64 "\n",
MIN(bytes_to_copy, rxbufsize),
rx_desc_get_buffer(s, s->rx_desc[q]));
/* Copy packet data to emulated DMA buffer */
address_space_write(&s->dma_as, rx_desc_get_buffer(s, s->rx_desc[q]) +
@ -1159,9 +1160,9 @@ static void gem_transmit(CadenceGEMState *s)
if (tx_desc_get_length(desc) > sizeof(tx_packet) -
(p - tx_packet)) {
DB_PRINT("TX descriptor @ 0x%x too large: size 0x%x space " \
"0x%x\n", (unsigned)packet_desc_addr,
(unsigned)tx_desc_get_length(desc),
DB_PRINT("TX descriptor @ 0x%" HWADDR_PRIx \
" too large: size 0x%x space 0x%zx\n",
packet_desc_addr, tx_desc_get_length(desc),
sizeof(tx_packet) - (p - tx_packet));
break;
}

View File

@ -137,6 +137,7 @@ do { printf("sonic ERROR: %s: " fmt, __func__ , ## __VA_ARGS__); } while (0)
#define SONIC_TCR_CRCI 0x2000
#define SONIC_TCR_PINT 0x8000
#define SONIC_ISR_RBAE 0x0010
#define SONIC_ISR_RBE 0x0020
#define SONIC_ISR_RDE 0x0040
#define SONIC_ISR_TC 0x0080
@ -145,6 +146,9 @@ do { printf("sonic ERROR: %s: " fmt, __func__ , ## __VA_ARGS__); } while (0)
#define SONIC_ISR_PINT 0x0800
#define SONIC_ISR_LCD 0x1000
#define SONIC_DESC_EOL 0x0001
#define SONIC_DESC_ADDR 0xFFFE
#define TYPE_DP8393X "dp8393x"
#define DP8393X(obj) OBJECT_CHECK(dp8393xState, (obj), TYPE_DP8393X)
@ -154,6 +158,7 @@ typedef struct dp8393xState {
/* Hardware */
uint8_t it_shift;
bool big_endian;
bool last_rba_is_full;
qemu_irq irq;
#ifdef DEBUG_SONIC
int irq_level;
@ -197,7 +202,8 @@ static uint32_t dp8393x_crba(dp8393xState *s)
static uint32_t dp8393x_crda(dp8393xState *s)
{
return (s->regs[SONIC_URDA] << 16) | s->regs[SONIC_CRDA];
return (s->regs[SONIC_URDA] << 16) |
(s->regs[SONIC_CRDA] & SONIC_DESC_ADDR);
}
static uint32_t dp8393x_rbwc(dp8393xState *s)
@ -217,7 +223,8 @@ static uint32_t dp8393x_tsa(dp8393xState *s)
static uint32_t dp8393x_ttda(dp8393xState *s)
{
return (s->regs[SONIC_UTDA] << 16) | s->regs[SONIC_TTDA];
return (s->regs[SONIC_UTDA] << 16) |
(s->regs[SONIC_TTDA] & SONIC_DESC_ADDR);
}
static uint32_t dp8393x_wt(dp8393xState *s)
@ -241,9 +248,19 @@ static void dp8393x_put(dp8393xState *s, int width, int offset,
uint16_t val)
{
if (s->big_endian) {
s->data[offset * width + width - 1] = cpu_to_be16(val);
if (width == 2) {
s->data[offset * 2] = 0;
s->data[offset * 2 + 1] = cpu_to_be16(val);
} else {
s->data[offset] = cpu_to_be16(val);
}
} else {
s->data[offset * width] = cpu_to_le16(val);
if (width == 2) {
s->data[offset * 2] = cpu_to_le16(val);
s->data[offset * 2 + 1] = 0;
} else {
s->data[offset] = cpu_to_le16(val);
}
}
}
@ -331,15 +348,15 @@ static void dp8393x_do_read_rra(dp8393xState *s)
s->regs[SONIC_RRP] = s->regs[SONIC_RSA];
}
/* Check resource exhaustion */
/* Warn the host if CRBA now has the last available resource */
if (s->regs[SONIC_RRP] == s->regs[SONIC_RWP])
{
s->regs[SONIC_ISR] |= SONIC_ISR_RBE;
dp8393x_update_irq(s);
}
/* Done */
s->regs[SONIC_CR] &= ~SONIC_CR_RRRA;
/* Allow packet reception */
s->last_rba_is_full = false;
}
static void dp8393x_do_software_reset(dp8393xState *s)
@ -509,7 +526,7 @@ static void dp8393x_do_transmit_packets(dp8393xState *s)
MEMTXATTRS_UNSPECIFIED, s->data,
size);
s->regs[SONIC_CTDA] = dp8393x_get(s, width, 0) & ~0x1;
if (dp8393x_get(s, width, 0) & 0x1) {
if (dp8393x_get(s, width, 0) & SONIC_DESC_EOL) {
/* EOL detected */
break;
}
@ -550,8 +567,10 @@ static void dp8393x_do_command(dp8393xState *s, uint16_t command)
dp8393x_do_start_timer(s);
if (command & SONIC_CR_RST)
dp8393x_do_software_reset(s);
if (command & SONIC_CR_RRRA)
if (command & SONIC_CR_RRRA) {
dp8393x_do_read_rra(s);
s->regs[SONIC_CR] &= ~SONIC_CR_RRRA;
}
if (command & SONIC_CR_LCAM)
dp8393x_do_load_cam(s);
}
@ -585,7 +604,7 @@ static uint64_t dp8393x_read(void *opaque, hwaddr addr, unsigned int size)
DPRINTF("read 0x%04x from reg %s\n", val, reg_names[reg]);
return val;
return s->big_endian ? val << 16 : val;
}
static void dp8393x_write(void *opaque, hwaddr addr, uint64_t data,
@ -593,13 +612,14 @@ static void dp8393x_write(void *opaque, hwaddr addr, uint64_t data,
{
dp8393xState *s = opaque;
int reg = addr >> s->it_shift;
uint32_t val = s->big_endian ? data >> 16 : data;
DPRINTF("write 0x%04x to reg %s\n", (uint16_t)data, reg_names[reg]);
DPRINTF("write 0x%04x to reg %s\n", (uint16_t)val, reg_names[reg]);
switch (reg) {
/* Command register */
case SONIC_CR:
dp8393x_do_command(s, data);
dp8393x_do_command(s, val);
break;
/* Prevent write to read-only registers */
case SONIC_CAP2:
@ -612,59 +632,60 @@ static void dp8393x_write(void *opaque, hwaddr addr, uint64_t data,
/* Accept write to some registers only when in reset mode */
case SONIC_DCR:
if (s->regs[SONIC_CR] & SONIC_CR_RST) {
s->regs[reg] = data & 0xbfff;
s->regs[reg] = val & 0xbfff;
} else {
DPRINTF("writing to DCR invalid\n");
}
break;
case SONIC_DCR2:
if (s->regs[SONIC_CR] & SONIC_CR_RST) {
s->regs[reg] = data & 0xf017;
s->regs[reg] = val & 0xf017;
} else {
DPRINTF("writing to DCR2 invalid\n");
}
break;
/* 12 lower bytes are Read Only */
case SONIC_TCR:
s->regs[reg] = data & 0xf000;
s->regs[reg] = val & 0xf000;
break;
/* 9 lower bytes are Read Only */
case SONIC_RCR:
s->regs[reg] = data & 0xffe0;
s->regs[reg] = val & 0xffe0;
break;
/* Ignore most significant bit */
case SONIC_IMR:
s->regs[reg] = data & 0x7fff;
s->regs[reg] = val & 0x7fff;
dp8393x_update_irq(s);
break;
/* Clear bits by writing 1 to them */
case SONIC_ISR:
data &= s->regs[reg];
s->regs[reg] &= ~data;
if (data & SONIC_ISR_RBE) {
val &= s->regs[reg];
s->regs[reg] &= ~val;
if (val & SONIC_ISR_RBE) {
dp8393x_do_read_rra(s);
}
dp8393x_update_irq(s);
if (dp8393x_can_receive(s->nic->ncs)) {
qemu_flush_queued_packets(qemu_get_queue(s->nic));
}
break;
/* Ignore least significant bit */
/* The guest is required to store aligned pointers here */
case SONIC_RSA:
case SONIC_REA:
case SONIC_RRP:
case SONIC_RWP:
s->regs[reg] = data & 0xfffe;
if (s->regs[SONIC_DCR] & SONIC_DCR_DW) {
s->regs[reg] = val & 0xfffc;
} else {
s->regs[reg] = val & 0xfffe;
}
break;
/* Invert written value for some registers */
case SONIC_CRCT:
case SONIC_FAET:
case SONIC_MPT:
s->regs[reg] = data ^ 0xffff;
s->regs[reg] = val ^ 0xffff;
break;
/* All other registers have no special contrainst */
default:
s->regs[reg] = data;
s->regs[reg] = val;
}
if (reg == SONIC_WT0 || reg == SONIC_WT1) {
@ -675,8 +696,8 @@ static void dp8393x_write(void *opaque, hwaddr addr, uint64_t data,
static const MemoryRegionOps dp8393x_ops = {
.read = dp8393x_read,
.write = dp8393x_write,
.impl.min_access_size = 2,
.impl.max_access_size = 2,
.impl.min_access_size = 4,
.impl.max_access_size = 4,
.endianness = DEVICE_NATIVE_ENDIAN,
};
@ -703,8 +724,6 @@ static int dp8393x_can_receive(NetClientState *nc)
if (!(s->regs[SONIC_CR] & SONIC_CR_RXEN))
return 0;
if (s->regs[SONIC_ISR] & SONIC_ISR_RBE)
return 0;
return 1;
}
@ -743,40 +762,69 @@ static int dp8393x_receive_filter(dp8393xState *s, const uint8_t * buf,
}
static ssize_t dp8393x_receive(NetClientState *nc, const uint8_t * buf,
size_t size)
size_t pkt_size)
{
dp8393xState *s = qemu_get_nic_opaque(nc);
int packet_type;
uint32_t available, address;
int width, rx_len = size;
int width, rx_len, padded_len;
uint32_t checksum;
width = (s->regs[SONIC_DCR] & SONIC_DCR_DW) ? 2 : 1;
int size;
s->regs[SONIC_RCR] &= ~(SONIC_RCR_PRX | SONIC_RCR_LBK | SONIC_RCR_FAER |
SONIC_RCR_CRCR | SONIC_RCR_LPKT | SONIC_RCR_BC | SONIC_RCR_MC);
packet_type = dp8393x_receive_filter(s, buf, size);
if (s->last_rba_is_full) {
return pkt_size;
}
rx_len = pkt_size + sizeof(checksum);
if (s->regs[SONIC_DCR] & SONIC_DCR_DW) {
width = 2;
padded_len = ((rx_len - 1) | 3) + 1;
} else {
width = 1;
padded_len = ((rx_len - 1) | 1) + 1;
}
if (padded_len > dp8393x_rbwc(s) * 2) {
DPRINTF("oversize packet, pkt_size is %d\n", pkt_size);
s->regs[SONIC_ISR] |= SONIC_ISR_RBAE;
dp8393x_update_irq(s);
s->regs[SONIC_RCR] |= SONIC_RCR_LPKT;
goto done;
}
packet_type = dp8393x_receive_filter(s, buf, pkt_size);
if (packet_type < 0) {
DPRINTF("packet not for netcard\n");
return -1;
}
/* XXX: Check byte ordering */
/* Check for EOL */
if (s->regs[SONIC_LLFA] & 0x1) {
if (s->regs[SONIC_LLFA] & SONIC_DESC_EOL) {
/* Are we still in resource exhaustion? */
size = sizeof(uint16_t) * 1 * width;
address = dp8393x_crda(s) + sizeof(uint16_t) * 5 * width;
address_space_read(&s->as, address, MEMTXATTRS_UNSPECIFIED,
s->data, size);
if (dp8393x_get(s, width, 0) & 0x1) {
s->regs[SONIC_LLFA] = dp8393x_get(s, width, 0);
if (s->regs[SONIC_LLFA] & SONIC_DESC_EOL) {
/* Still EOL ; stop reception */
return -1;
} else {
s->regs[SONIC_CRDA] = s->regs[SONIC_LLFA];
}
/* Link has been updated by host */
/* Clear in_use */
size = sizeof(uint16_t) * width;
address = dp8393x_crda(s) + sizeof(uint16_t) * 6 * width;
dp8393x_put(s, width, 0, 0);
address_space_rw(&s->as, address, MEMTXATTRS_UNSPECIFIED,
(uint8_t *)s->data, size, 1);
/* Move to next descriptor */
s->regs[SONIC_CRDA] = s->regs[SONIC_LLFA];
s->regs[SONIC_ISR] |= SONIC_ISR_PKTRX;
}
/* Save current position */
@ -784,21 +832,32 @@ static ssize_t dp8393x_receive(NetClientState *nc, const uint8_t * buf,
s->regs[SONIC_TRBA0] = s->regs[SONIC_CRBA0];
/* Calculate the ethernet checksum */
checksum = cpu_to_le32(crc32(0, buf, rx_len));
checksum = cpu_to_le32(crc32(0, buf, pkt_size));
/* Put packet into RBA */
DPRINTF("Receive packet at %08x\n", dp8393x_crba(s));
address = dp8393x_crba(s);
address_space_write(&s->as, address, MEMTXATTRS_UNSPECIFIED,
buf, rx_len);
address += rx_len;
buf, pkt_size);
address += pkt_size;
/* Put frame checksum into RBA */
address_space_write(&s->as, address, MEMTXATTRS_UNSPECIFIED,
&checksum, 4);
rx_len += 4;
&checksum, sizeof(checksum));
address += sizeof(checksum);
/* Pad short packets to keep pointers aligned */
if (rx_len < padded_len) {
size = padded_len - rx_len;
address_space_rw(&s->as, address, MEMTXATTRS_UNSPECIFIED,
(uint8_t *)"\xFF\xFF\xFF", size, 1);
address += size;
}
s->regs[SONIC_CRBA1] = address >> 16;
s->regs[SONIC_CRBA0] = address & 0xffff;
available = dp8393x_rbwc(s);
available -= rx_len / 2;
available -= padded_len >> 1;
s->regs[SONIC_RBWC1] = available >> 16;
s->regs[SONIC_RBWC0] = available & 0xffff;
@ -825,39 +884,46 @@ static ssize_t dp8393x_receive(NetClientState *nc, const uint8_t * buf,
MEMTXATTRS_UNSPECIFIED,
s->data, size);
/* Move to next descriptor */
/* Check link field */
size = sizeof(uint16_t) * width;
address_space_read(&s->as,
dp8393x_crda(s) + sizeof(uint16_t) * 5 * width,
MEMTXATTRS_UNSPECIFIED, s->data, size);
s->regs[SONIC_LLFA] = dp8393x_get(s, width, 0);
if (s->regs[SONIC_LLFA] & 0x1) {
if (s->regs[SONIC_LLFA] & SONIC_DESC_EOL) {
/* EOL detected */
s->regs[SONIC_ISR] |= SONIC_ISR_RDE;
} else {
/* Clear in_use, but it is always 16bit wide */
int offset = dp8393x_crda(s) + sizeof(uint16_t) * 6 * width;
if (s->big_endian && width == 2) {
/* we need to adjust the offset of the 16bit field */
offset += sizeof(uint16_t);
}
s->data[0] = 0;
address_space_write(&s->as, offset, MEMTXATTRS_UNSPECIFIED,
s->data, sizeof(uint16_t));
/* Clear in_use */
size = sizeof(uint16_t) * width;
address = dp8393x_crda(s) + sizeof(uint16_t) * 6 * width;
dp8393x_put(s, width, 0, 0);
address_space_write(&s->as, address, MEMTXATTRS_UNSPECIFIED,
s->data, size);
/* Move to next descriptor */
s->regs[SONIC_CRDA] = s->regs[SONIC_LLFA];
s->regs[SONIC_ISR] |= SONIC_ISR_PKTRX;
s->regs[SONIC_RSC] = (s->regs[SONIC_RSC] & 0xff00) | (((s->regs[SONIC_RSC] & 0x00ff) + 1) & 0x00ff);
}
if (s->regs[SONIC_RCR] & SONIC_RCR_LPKT) {
/* Read next RRA */
dp8393x_update_irq(s);
s->regs[SONIC_RSC] = (s->regs[SONIC_RSC] & 0xff00) |
((s->regs[SONIC_RSC] + 1) & 0x00ff);
done:
if (s->regs[SONIC_RCR] & SONIC_RCR_LPKT) {
if (s->regs[SONIC_RRP] == s->regs[SONIC_RWP]) {
/* Stop packet reception */
s->last_rba_is_full = true;
} else {
/* Read next resource */
dp8393x_do_read_rra(s);
}
}
/* Done */
dp8393x_update_irq(s);
return size;
return pkt_size;
}
static void dp8393x_reset(DeviceState *dev)
@ -866,6 +932,7 @@ static void dp8393x_reset(DeviceState *dev)
timer_del(s->watchdog);
memset(s->regs, 0, sizeof(s->regs));
s->regs[SONIC_SR] = 0x0004; /* only revision recognized by Linux/mips */
s->regs[SONIC_CR] = SONIC_CR_RST | SONIC_CR_STP | SONIC_CR_RXDIS;
s->regs[SONIC_DCR] &= ~(SONIC_DCR_EXBUS | SONIC_DCR_LBR);
s->regs[SONIC_RCR] &= ~(SONIC_RCR_LB0 | SONIC_RCR_LB1 | SONIC_RCR_BRD | SONIC_RCR_RNT);
@ -918,7 +985,6 @@ static void dp8393x_realize(DeviceState *dev, Error **errp)
qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a);
s->watchdog = timer_new_ns(QEMU_CLOCK_VIRTUAL, dp8393x_watchdog, s);
s->regs[SONIC_SR] = 0x0004; /* only revision recognized by Linux */
memory_region_init_ram(&s->prom, OBJECT(dev),
"dp8393x-prom", SONIC_PROM_SIZE, &local_err);

View File

@ -582,7 +582,7 @@ e1000e_rss_calc_hash(E1000ECore *core,
type = NetPktRssIpV4Tcp;
break;
case E1000_MRQ_RSS_TYPE_IPV6TCP:
type = NetPktRssIpV6Tcp;
type = NetPktRssIpV6TcpEx;
break;
case E1000_MRQ_RSS_TYPE_IPV6:
type = NetPktRssIpV6;
@ -2813,12 +2813,15 @@ e1000e_set_eitr(E1000ECore *core, int index, uint32_t val)
static void
e1000e_set_psrctl(E1000ECore *core, int index, uint32_t val)
{
if ((val & E1000_PSRCTL_BSIZE0_MASK) == 0) {
hw_error("e1000e: PSRCTL.BSIZE0 cannot be zero");
}
if (core->mac[RCTL] & E1000_RCTL_DTYP_MASK) {
if ((val & E1000_PSRCTL_BSIZE1_MASK) == 0) {
hw_error("e1000e: PSRCTL.BSIZE1 cannot be zero");
if ((val & E1000_PSRCTL_BSIZE0_MASK) == 0) {
hw_error("e1000e: PSRCTL.BSIZE0 cannot be zero");
}
if ((val & E1000_PSRCTL_BSIZE1_MASK) == 0) {
hw_error("e1000e: PSRCTL.BSIZE1 cannot be zero");
}
}
core->mac[PSRCTL] = val;

View File

@ -307,6 +307,20 @@ _net_rx_rss_prepare_tcp(uint8_t *rss_input,
&tcphdr->th_dport, sizeof(uint16_t));
}
static inline void
_net_rx_rss_prepare_udp(uint8_t *rss_input,
struct NetRxPkt *pkt,
size_t *bytes_written)
{
struct udp_header *udphdr = &pkt->l4hdr_info.hdr.udp;
_net_rx_rss_add_chunk(rss_input, bytes_written,
&udphdr->uh_sport, sizeof(uint16_t));
_net_rx_rss_add_chunk(rss_input, bytes_written,
&udphdr->uh_dport, sizeof(uint16_t));
}
uint32_t
net_rx_pkt_calc_rss_hash(struct NetRxPkt *pkt,
NetRxPktRssType type,
@ -334,7 +348,7 @@ net_rx_pkt_calc_rss_hash(struct NetRxPkt *pkt,
assert(pkt->isip6);
assert(pkt->istcp);
trace_net_rx_pkt_rss_ip6_tcp();
_net_rx_rss_prepare_ip6(&rss_input[0], pkt, true, &rss_length);
_net_rx_rss_prepare_ip6(&rss_input[0], pkt, false, &rss_length);
_net_rx_rss_prepare_tcp(&rss_input[0], pkt, &rss_length);
break;
case NetPktRssIpV6:
@ -347,6 +361,34 @@ net_rx_pkt_calc_rss_hash(struct NetRxPkt *pkt,
trace_net_rx_pkt_rss_ip6_ex();
_net_rx_rss_prepare_ip6(&rss_input[0], pkt, true, &rss_length);
break;
case NetPktRssIpV6TcpEx:
assert(pkt->isip6);
assert(pkt->istcp);
trace_net_rx_pkt_rss_ip6_ex_tcp();
_net_rx_rss_prepare_ip6(&rss_input[0], pkt, true, &rss_length);
_net_rx_rss_prepare_tcp(&rss_input[0], pkt, &rss_length);
break;
case NetPktRssIpV4Udp:
assert(pkt->isip4);
assert(pkt->isudp);
trace_net_rx_pkt_rss_ip4_udp();
_net_rx_rss_prepare_ip4(&rss_input[0], pkt, &rss_length);
_net_rx_rss_prepare_udp(&rss_input[0], pkt, &rss_length);
break;
case NetPktRssIpV6Udp:
assert(pkt->isip6);
assert(pkt->isudp);
trace_net_rx_pkt_rss_ip6_udp();
_net_rx_rss_prepare_ip6(&rss_input[0], pkt, false, &rss_length);
_net_rx_rss_prepare_udp(&rss_input[0], pkt, &rss_length);
break;
case NetPktRssIpV6UdpEx:
assert(pkt->isip6);
assert(pkt->isudp);
trace_net_rx_pkt_rss_ip6_ex_udp();
_net_rx_rss_prepare_ip6(&rss_input[0], pkt, true, &rss_length);
_net_rx_rss_prepare_udp(&rss_input[0], pkt, &rss_length);
break;
default:
assert(false);
break;

View File

@ -133,7 +133,11 @@ typedef enum {
NetPktRssIpV4Tcp,
NetPktRssIpV6Tcp,
NetPktRssIpV6,
NetPktRssIpV6Ex
NetPktRssIpV6Ex,
NetPktRssIpV6TcpEx,
NetPktRssIpV4Udp,
NetPktRssIpV6Udp,
NetPktRssIpV6UdpEx,
} NetRxPktRssType;
/**

View File

@ -92,9 +92,13 @@ net_rx_pkt_l3_csum_validate_csum(size_t l3hdr_off, uint32_t csl, uint32_t cntr,
net_rx_pkt_rss_ip4(void) "Calculating IPv4 RSS hash"
net_rx_pkt_rss_ip4_tcp(void) "Calculating IPv4/TCP RSS hash"
net_rx_pkt_rss_ip4_udp(void) "Calculating IPv4/UDP RSS hash"
net_rx_pkt_rss_ip6_tcp(void) "Calculating IPv6/TCP RSS hash"
net_rx_pkt_rss_ip6_udp(void) "Calculating IPv6/UDP RSS hash"
net_rx_pkt_rss_ip6(void) "Calculating IPv6 RSS hash"
net_rx_pkt_rss_ip6_ex(void) "Calculating IPv6/EX RSS hash"
net_rx_pkt_rss_ip6_ex_tcp(void) "Calculating IPv6/EX/TCP RSS hash"
net_rx_pkt_rss_ip6_ex_udp(void) "Calculating IPv6/EX/UDP RSS hash"
net_rx_pkt_rss_hash(size_t rss_length, uint32_t rss_hash) "RSS hash for %zu bytes: 0x%X"
net_rx_pkt_rss_add_chunk(void* ptr, size_t size, size_t input_offset) "Add RSS chunk %p, %zu bytes, RSS input offset %zu bytes"

View File

@ -62,6 +62,8 @@ struct NetFilterState {
NetClientState *netdev;
NetFilterDirection direction;
bool on;
char *position;
bool insert_before_flag;
QTAILQ_ENTRY(NetFilterState) next;
};

View File

@ -171,11 +171,47 @@ static void netfilter_set_status(Object *obj, const char *str, Error **errp)
}
}
static char *netfilter_get_position(Object *obj, Error **errp)
{
NetFilterState *nf = NETFILTER(obj);
return g_strdup(nf->position);
}
static void netfilter_set_position(Object *obj, const char *str, Error **errp)
{
NetFilterState *nf = NETFILTER(obj);
nf->position = g_strdup(str);
}
static char *netfilter_get_insert(Object *obj, Error **errp)
{
NetFilterState *nf = NETFILTER(obj);
return nf->insert_before_flag ? g_strdup("before") : g_strdup("behind");
}
static void netfilter_set_insert(Object *obj, const char *str, Error **errp)
{
NetFilterState *nf = NETFILTER(obj);
if (strcmp(str, "before") && strcmp(str, "behind")) {
error_setg(errp, "Invalid value for netfilter insert, "
"should be 'before' or 'behind'");
return;
}
nf->insert_before_flag = !strcmp(str, "before");
}
static void netfilter_init(Object *obj)
{
NetFilterState *nf = NETFILTER(obj);
nf->on = true;
nf->insert_before_flag = false;
nf->position = g_strdup("tail");
object_property_add_str(obj, "netdev",
netfilter_get_netdev_id, netfilter_set_netdev_id,
@ -187,11 +223,18 @@ static void netfilter_init(Object *obj)
object_property_add_str(obj, "status",
netfilter_get_status, netfilter_set_status,
NULL);
object_property_add_str(obj, "position",
netfilter_get_position, netfilter_set_position,
NULL);
object_property_add_str(obj, "insert",
netfilter_get_insert, netfilter_set_insert,
NULL);
}
static void netfilter_complete(UserCreatable *uc, Error **errp)
{
NetFilterState *nf = NETFILTER(uc);
NetFilterState *position = NULL;
NetClientState *ncs[MAX_QUEUE_NUM];
NetFilterClass *nfc = NETFILTER_GET_CLASS(uc);
int queues;
@ -219,6 +262,41 @@ static void netfilter_complete(UserCreatable *uc, Error **errp)
return;
}
if (strcmp(nf->position, "head") && strcmp(nf->position, "tail")) {
Object *container;
Object *obj;
char *position_id;
if (!g_str_has_prefix(nf->position, "id=")) {
error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "position",
"'head', 'tail' or 'id=<id>'");
return;
}
/* get the id from the string */
position_id = g_strndup(nf->position + 3, strlen(nf->position) - 3);
/* Search for the position to insert before/behind */
container = object_get_objects_root();
obj = object_resolve_path_component(container, position_id);
if (!obj) {
error_setg(errp, "filter '%s' not found", position_id);
g_free(position_id);
return;
}
position = NETFILTER(obj);
if (position->netdev != ncs[0]) {
error_setg(errp, "filter '%s' belongs to a different netdev",
position_id);
g_free(position_id);
return;
}
g_free(position_id);
}
nf->netdev = ncs[0];
if (nfc->setup) {
@ -228,7 +306,18 @@ static void netfilter_complete(UserCreatable *uc, Error **errp)
return;
}
}
QTAILQ_INSERT_TAIL(&nf->netdev->filters, nf, next);
if (position) {
if (nf->insert_before_flag) {
QTAILQ_INSERT_BEFORE(position, nf, next);
} else {
QTAILQ_INSERT_AFTER(&nf->netdev->filters, position, nf, next);
}
} else if (!strcmp(nf->position, "head")) {
QTAILQ_INSERT_HEAD(&nf->netdev->filters, nf, next);
} else if (!strcmp(nf->position, "tail")) {
QTAILQ_INSERT_TAIL(&nf->netdev->filters, nf, next);
}
}
static void netfilter_finalize(Object *obj)
@ -245,6 +334,7 @@ static void netfilter_finalize(Object *obj)
QTAILQ_REMOVE(&nf->netdev->filters, nf, next);
}
g_free(nf->netdev_id);
g_free(nf->position);
}
static void default_handle_event(NetFilterState *nf, int event, Error **errp)

View File

@ -2330,7 +2330,7 @@ DEF("netdev", HAS_ARG, QEMU_OPTION_netdev,
" Linux kernel 3.3+ as well as most routers can talk\n"
" L2TPv3. This transport allows connecting a VM to a VM,\n"
" VM to a router and even VM to Host. It is a nearly-universal\n"
" standard (RFC3391). Note - this implementation uses static\n"
" standard (RFC3931). Note - this implementation uses static\n"
" pre-configured tunnels (same as the Linux kernel).\n"
" use 'src=' to specify source address\n"
" use 'dst=' to specify destination address\n"
@ -2737,7 +2737,7 @@ Example (send packets from host's 1.2.3.4):
@end example
@item -netdev l2tpv3,id=@var{id},src=@var{srcaddr},dst=@var{dstaddr}[,srcport=@var{srcport}][,dstport=@var{dstport}],txsession=@var{txsession}[,rxsession=@var{rxsession}][,ipv6][,udp][,cookie64][,counter][,pincounter][,txcookie=@var{txcookie}][,rxcookie=@var{rxcookie}][,offset=@var{offset}]
Configure a L2TPv3 pseudowire host network backend. L2TPv3 (RFC3391) is a
Configure a L2TPv3 pseudowire host network backend. L2TPv3 (RFC3931) is a
popular protocol to transport Ethernet (and other Layer 2) data frames between
two systems. It is present in routers, firewalls and the Linux kernel
(from version 3.3 onwards).
@ -4546,7 +4546,7 @@ applications, they can do this through this parameter. Its format is
a gnutls priority string as described at
@url{https://gnutls.org/manual/html_node/Priority-Strings.html}.
@item -object filter-buffer,id=@var{id},netdev=@var{netdevid},interval=@var{t}[,queue=@var{all|rx|tx}][,status=@var{on|off}]
@item -object filter-buffer,id=@var{id},netdev=@var{netdevid},interval=@var{t}[,queue=@var{all|rx|tx}][,status=@var{on|off}][,position=@var{head|tail|id=<id>}][,insert=@var{behind|before}]
Interval @var{t} can't be 0, this filter batches the packet delivery: all
packets arriving in a given interval on netdev @var{netdevid} are delayed
@ -4565,11 +4565,32 @@ queue @var{all|rx|tx} is an option that can be applied to any netfilter.
@option{tx}: the filter is attached to the transmit queue of the netdev,
where it will receive packets sent by the netdev.
@item -object filter-mirror,id=@var{id},netdev=@var{netdevid},outdev=@var{chardevid},queue=@var{all|rx|tx}[,vnet_hdr_support]
position @var{head|tail|id=<id>} is an option to specify where the
filter should be inserted in the filter list. It can be applied to any
netfilter.
@option{head}: the filter is inserted at the head of the filter
list, before any existing filters.
@option{tail}: the filter is inserted at the tail of the filter
list, behind any existing filters (default).
@option{id=<id>}: the filter is inserted before or behind the filter
specified by <id>, see the insert option below.
insert @var{behind|before} is an option to specify where to insert the
new filter relative to the one specified with position=id=<id>. It can
be applied to any netfilter.
@option{before}: insert before the specified filter.
@option{behind}: insert behind the specified filter (default).
@item -object filter-mirror,id=@var{id},netdev=@var{netdevid},outdev=@var{chardevid},queue=@var{all|rx|tx}[,vnet_hdr_support][,position=@var{head|tail|id=<id>}][,insert=@var{behind|before}]
filter-mirror on netdev @var{netdevid},mirror net packet to chardev@var{chardevid}, if it has the vnet_hdr_support flag, filter-mirror will mirror packet with vnet_hdr_len.
@item -object filter-redirector,id=@var{id},netdev=@var{netdevid},indev=@var{chardevid},outdev=@var{chardevid},queue=@var{all|rx|tx}[,vnet_hdr_support]
@item -object filter-redirector,id=@var{id},netdev=@var{netdevid},indev=@var{chardevid},outdev=@var{chardevid},queue=@var{all|rx|tx}[,vnet_hdr_support][,position=@var{head|tail|id=<id>}][,insert=@var{behind|before}]
filter-redirector on netdev @var{netdevid},redirect filter's net packet to chardev
@var{chardevid},and redirect indev's packet to filter.if it has the vnet_hdr_support flag,
@ -4578,7 +4599,7 @@ Create a filter-redirector we need to differ outdev id from indev id, id can not
be the same. we can just use indev or outdev, but at least one of indev or outdev
need to be specified.
@item -object filter-rewriter,id=@var{id},netdev=@var{netdevid},queue=@var{all|rx|tx},[vnet_hdr_support]
@item -object filter-rewriter,id=@var{id},netdev=@var{netdevid},queue=@var{all|rx|tx},[vnet_hdr_support][,position=@var{head|tail|id=<id>}][,insert=@var{behind|before}]
Filter-rewriter is a part of COLO project.It will rewrite tcp packet to
secondary from primary to keep secondary tcp connection,and rewrite
@ -4591,7 +4612,7 @@ colo secondary:
-object filter-redirector,id=f2,netdev=hn0,queue=rx,outdev=red1
-object filter-rewriter,id=rew0,netdev=hn0,queue=all
@item -object filter-dump,id=@var{id},netdev=@var{dev}[,file=@var{filename}][,maxlen=@var{len}]
@item -object filter-dump,id=@var{id},netdev=@var{dev}[,file=@var{filename}][,maxlen=@var{len}][,position=@var{head|tail|id=<id>}][,insert=@var{behind|before}]
Dump the network traffic on netdev @var{dev} to the file specified by
@var{filename}. At most @var{len} bytes (64k by default) per packet are stored.

View File

@ -490,6 +490,56 @@ static void test_secondary_stop(void)
teardown_secondary();
}
static void test_secondary_continuous_replication(void)
{
BlockBackend *top_blk, *local_blk;
Error *local_err = NULL;
top_blk = start_secondary();
replication_start_all(REPLICATION_MODE_SECONDARY, &local_err);
g_assert(!local_err);
/* write 0x22 to s_local_disk (IMG_SIZE / 2, IMG_SIZE) */
local_blk = blk_by_name(S_LOCAL_DISK_ID);
test_blk_write(local_blk, 0x22, IMG_SIZE / 2, IMG_SIZE / 2, false);
/* replication will backup s_local_disk to s_hidden_disk */
test_blk_read(top_blk, 0x11, IMG_SIZE / 2,
IMG_SIZE / 2, 0, IMG_SIZE, false);
/* write 0x33 to s_active_disk (0, IMG_SIZE / 2) */
test_blk_write(top_blk, 0x33, 0, IMG_SIZE / 2, false);
/* do failover (active commit) */
replication_stop_all(true, &local_err);
g_assert(!local_err);
/* it should ignore all requests from now on */
/* start after failover */
replication_start_all(REPLICATION_MODE_PRIMARY, &local_err);
g_assert(!local_err);
/* checkpoint */
replication_do_checkpoint_all(&local_err);
g_assert(!local_err);
/* stop */
replication_stop_all(true, &local_err);
g_assert(!local_err);
/* read from s_local_disk (0, IMG_SIZE / 2) */
test_blk_read(top_blk, 0x33, 0, IMG_SIZE / 2,
0, IMG_SIZE / 2, false);
/* read from s_local_disk (IMG_SIZE / 2, IMG_SIZE) */
test_blk_read(top_blk, 0x22, IMG_SIZE / 2,
IMG_SIZE / 2, 0, IMG_SIZE, false);
teardown_secondary();
}
static void test_secondary_do_checkpoint(void)
{
BlockBackend *top_blk, *local_blk;
@ -585,6 +635,8 @@ int main(int argc, char **argv)
g_test_add_func("/replication/secondary/write", test_secondary_write);
g_test_add_func("/replication/secondary/start", test_secondary_start);
g_test_add_func("/replication/secondary/stop", test_secondary_stop);
g_test_add_func("/replication/secondary/continuous_replication",
test_secondary_continuous_replication);
g_test_add_func("/replication/secondary/do_checkpoint",
test_secondary_do_checkpoint);
g_test_add_func("/replication/secondary/get_error_all",