diff --git a/mon/mon.js b/mon/mon.js
index 66f8b6cd..564fdbac 100644
--- a/mon/mon.js
+++ b/mon/mon.js
@@ -110,7 +110,15 @@ const etcd_tree = {
             autosync_interval: 5,
             autosync_writes: 128,
             client_queue_depth: 128, // unused
-            recovery_queue_depth: 4,
+            recovery_queue_depth: 1,
+            recovery_sleep_us: 0,
+            recovery_tune_min_util: 0.1,
+            recovery_tune_min_client_util: 0,
+            recovery_tune_max_util: 1.0,
+            recovery_tune_max_client_util: 0.5,
+            recovery_tune_interval: 1,
+            recovery_tune_ewma_rate: 0.5,
+            recovery_tune_sleep_min_us: 10, // 10 microseconds
             recovery_pg_switch: 128,
             recovery_sync_batch: 16,
             no_recovery: false,
diff --git a/src/osd.cpp b/src/osd.cpp
index 6513e417..943da8ba 100644
--- a/src/osd.cpp
+++ b/src/osd.cpp
@@ -68,14 +68,21 @@ osd_t::osd_t(const json11::Json & config, ring_loop_t *ringloop)
         }
     }
 
-    print_stats_timer_id = this->tfd->set_timer(print_stats_interval*1000, true, [this](int timer_id)
+    if (print_stats_timer_id == -1)
     {
-        print_stats();
-    });
-    slow_log_timer_id = this->tfd->set_timer(slow_log_interval*1000, true, [this](int timer_id)
+        print_stats_timer_id = this->tfd->set_timer(print_stats_interval*1000, true, [this](int timer_id)
+        {
+            print_stats();
+        });
+    }
+    if (slow_log_timer_id == -1)
     {
-        print_slow();
-    });
+        slow_log_timer_id = this->tfd->set_timer(slow_log_interval*1000, true, [this](int timer_id)
+        {
+            print_slow();
+        });
+    }
+    apply_recovery_tune_interval();
 
     msgr.tfd = this->tfd;
     msgr.ringloop = this->ringloop;
@@ -97,6 +104,11 @@ osd_t::~osd_t()
         tfd->clear_timer(slow_log_timer_id);
         slow_log_timer_id = -1;
     }
+    if (rtune_timer_id >= 0)
+    {
+        tfd->clear_timer(rtune_timer_id);
+        rtune_timer_id = -1;
+    }
     if (print_stats_timer_id >= 0)
     {
         tfd->clear_timer(print_stats_timer_id);
@@ -196,6 +208,22 @@ void osd_t::parse_config(bool init)
     recovery_queue_depth = config["recovery_queue_depth"].uint64_value();
     if (recovery_queue_depth < 1 || recovery_queue_depth > MAX_RECOVERY_QUEUE)
         recovery_queue_depth = DEFAULT_RECOVERY_QUEUE;
+    recovery_sleep_us = config["recovery_sleep_us"].uint64_value();
+    recovery_tune_min_util = config["recovery_tune_min_util"].is_null()
+        ? 0.1 : config["recovery_tune_min_util"].number_value();
+    recovery_tune_max_util = config["recovery_tune_max_util"].is_null()
+        ? 1.0 : config["recovery_tune_max_util"].number_value();
+    recovery_tune_min_client_util = config["recovery_tune_min_client_util"].is_null()
+        ? 0 : config["recovery_tune_min_client_util"].number_value();
+    recovery_tune_max_client_util = config["recovery_tune_max_client_util"].is_null()
+        ? 0.5 : config["recovery_tune_max_client_util"].number_value();
+    auto old_recovery_tune_interval = recovery_tune_interval;
+    recovery_tune_interval = config["recovery_tune_interval"].is_null()
+        ? 1 : config["recovery_tune_interval"].uint64_value();
+    recovery_tune_ewma_rate = config["recovery_tune_ewma_rate"].is_null()
+        ? 0.5 : config["recovery_tune_ewma_rate"].number_value();
+    recovery_tune_sleep_min_us = config["recovery_tune_sleep_min_us"].is_null()
+        ? 10 : config["recovery_tune_sleep_min_us"].uint64_value();
     recovery_pg_switch = config["recovery_pg_switch"].uint64_value();
     if (recovery_pg_switch < 1)
         recovery_pg_switch = DEFAULT_RECOVERY_PG_SWITCH;
@@ -274,6 +302,10 @@ void osd_t::parse_config(bool init)
             print_slow();
         });
     }
+    if (old_recovery_tune_interval != recovery_tune_interval)
+    {
+        apply_recovery_tune_interval();
+    }
 }
 
 void osd_t::bind_socket()
diff --git a/src/osd.h b/src/osd.h
index 36a62423..fd60d021 100644
--- a/src/osd.h
+++ b/src/osd.h
@@ -34,7 +34,7 @@
 #define DEFAULT_AUTOSYNC_INTERVAL 5
 #define DEFAULT_AUTOSYNC_WRITES 128
 #define MAX_RECOVERY_QUEUE 2048
-#define DEFAULT_RECOVERY_QUEUE 4
+#define DEFAULT_RECOVERY_QUEUE 1
 #define DEFAULT_RECOVERY_PG_SWITCH 128
 #define DEFAULT_RECOVERY_BATCH 16
 
@@ -116,7 +116,15 @@ class osd_t
     int immediate_commit = IMMEDIATE_NONE;
     int autosync_interval = DEFAULT_AUTOSYNC_INTERVAL; // "emergency" sync every 5 seconds
     int autosync_writes = DEFAULT_AUTOSYNC_WRITES;
-    int recovery_queue_depth = DEFAULT_RECOVERY_QUEUE;
+    uint64_t recovery_queue_depth = 1;
+    uint64_t recovery_sleep_us = 0;
+    double recovery_tune_min_util = 0.1;
+    double recovery_tune_min_client_util = 0;
+    double recovery_tune_max_util = 1.0;
+    double recovery_tune_max_client_util = 0.5;
+    int recovery_tune_interval = 1;
+    double recovery_tune_ewma_rate = 0.5;
+    int recovery_tune_sleep_min_us = 10;
     int recovery_pg_switch = DEFAULT_RECOVERY_PG_SWITCH;
     int recovery_sync_batch = DEFAULT_RECOVERY_BATCH;
     int inode_vanish_time = 60;
@@ -197,6 +205,16 @@ class osd_t
     recovery_stat_t recovery_stat[2];
     recovery_stat_t recovery_print_prev[2];
 
+    // recovery auto-tuning
+    int rtune_timer_id = -1;
+    uint64_t rtune_avg_lat = 0;
+    double rtune_avg_count = 0;
+    double rtune_client_util = 0, rtune_target_util = 1;
+    osd_op_stats_t rtune_prev_stats;
+    recovery_stat_t rtune_prev_recovery[2];
+    uint64_t recovery_target_queue_depth = 1;
+    uint64_t recovery_target_sleep_us = 0;
+
     // cluster connection
     void parse_config(bool init);
     void init_cluster();
@@ -213,6 +231,8 @@ class osd_t
     void create_osd_state();
     void renew_lease(bool reload);
     void print_stats();
+    void tune_recovery();
+    void apply_recovery_tune_interval();
     void print_slow();
     json11::Json get_statistics();
     void report_statistics();
@@ -242,6 +262,7 @@ class osd_t
     bool submit_flush_op(pool_id_t pool_id, pg_num_t pg_num, pg_flush_batch_t *fb, bool rollback, osd_num_t peer_osd, int count, obj_ver_id *data);
     bool pick_next_recovery(osd_recovery_op_t &op);
     void submit_recovery_op(osd_recovery_op_t *op);
+    void finish_recovery_op(osd_recovery_op_t *op);
     bool continue_recovery();
     pg_osd_set_state_t* change_osd_set(pg_osd_set_state_t *st, pg_t *pg);
 
diff --git a/src/osd_flush.cpp b/src/osd_flush.cpp
index a8e5ff48..0bacf3c5 100644
--- a/src/osd_flush.cpp
+++ b/src/osd_flush.cpp
@@ -325,30 +325,116 @@ void osd_t::submit_recovery_op(osd_recovery_op_t *op)
         {
             printf("Recovery operation done for %lx:%lx\n", op->oid.inode, op->oid.stripe);
         }
-        // CAREFUL! op = &recovery_ops[op->oid]. Don't access op->* after recovery_ops.erase()
-        op->osd_op = NULL;
-        recovery_ops.erase(op->oid);
-        delete osd_op;
-        if (immediate_commit != IMMEDIATE_ALL)
+        if (recovery_target_sleep_us)
         {
-            recovery_done++;
-            if (recovery_done >= recovery_sync_batch)
+            this->tfd->set_timer_us(recovery_target_sleep_us, false, [this, op](int timer_id)
             {
-                // Force sync every <recovery_sync_batch> operations
-                // This is required not to pile up an excessive amount of delete operations
-                autosync();
-                recovery_done = 0;
-            }
+                finish_recovery_op(op);
+            });
+        }
+        else
+        {
+            finish_recovery_op(op);
         }
-        continue_recovery();
     };
     exec_op(op->osd_op);
 }
 
+void osd_t::apply_recovery_tune_interval()
+{
+    if (rtune_timer_id >= 0)
+    {
+        tfd->clear_timer(rtune_timer_id);
+        rtune_timer_id = -1;
+    }
+    if (recovery_tune_interval != 0)
+    {
+        rtune_timer_id = this->tfd->set_timer(recovery_tune_interval*1000, true, [this](int timer_id)
+        {
+            tune_recovery();
+        });
+    }
+    else
+    {
+        recovery_target_queue_depth = recovery_queue_depth;
+        recovery_target_sleep_us = recovery_sleep_us;
+    }
+}
+
+void osd_t::finish_recovery_op(osd_recovery_op_t *op)
+{
+    // CAREFUL! op = &recovery_ops[op->oid]. Don't access op->* after recovery_ops.erase()
+    delete op->osd_op;
+    op->osd_op = NULL;
+    recovery_ops.erase(op->oid);
+    if (immediate_commit != IMMEDIATE_ALL)
+    {
+        recovery_done++;
+        if (recovery_done >= recovery_sync_batch)
+        {
+            // Force sync every <recovery_sync_batch> operations
+            // This is required not to pile up an excessive amount of delete operations
+            autosync();
+            recovery_done = 0;
+        }
+    }
+    continue_recovery();
+}
+
+void osd_t::tune_recovery()
+{
+    static int total_client_ops[] = { OSD_OP_READ, OSD_OP_WRITE, OSD_OP_SYNC, OSD_OP_DELETE };
+    uint64_t total_client_usec = 0;
+    for (int i = 0; i < sizeof(total_client_ops)/sizeof(total_client_ops[0]); i++)
+    {
+        total_client_usec += (msgr.stats.op_stat_sum[total_client_ops[i]] - rtune_prev_stats.op_stat_sum[total_client_ops[i]]);
+        rtune_prev_stats.op_stat_sum[total_client_ops[i]] = msgr.stats.op_stat_sum[total_client_ops[i]];
+    }
+    uint64_t total_recovery_usec = 0, recovery_count = 0;
+    total_recovery_usec += recovery_stat[0].usec-rtune_prev_recovery[0].usec;
+    total_recovery_usec += recovery_stat[1].usec-rtune_prev_recovery[1].usec;
+    recovery_count += recovery_stat[0].count-rtune_prev_recovery[0].count;
+    recovery_count += recovery_stat[1].count-rtune_prev_recovery[1].count;
+    memcpy(rtune_prev_recovery, recovery_stat, sizeof(recovery_stat));
+    if (recovery_count == 0)
+    {
+        return;
+    }
+    rtune_avg_lat = total_recovery_usec/recovery_count*recovery_tune_ewma_rate +
+        rtune_avg_lat*(1-recovery_tune_ewma_rate);
+    rtune_avg_count = recovery_count*recovery_tune_ewma_rate +
+        rtune_avg_count*(1-recovery_tune_ewma_rate);
+    // client_util = count/interval * usec/1000000.0/count = usec/1000000.0/interval :-)
+    double client_util = total_client_usec/1000000.0/recovery_tune_interval;
+    rtune_client_util = rtune_client_util*(1-recovery_tune_ewma_rate) + client_util*recovery_tune_ewma_rate;
+    rtune_target_util = (rtune_client_util < recovery_tune_min_client_util
+        ? recovery_tune_max_util
+        : recovery_tune_min_util + (rtune_client_util >= recovery_tune_max_client_util
+            ? 0 : (recovery_tune_max_util-recovery_tune_min_util)*
+                (recovery_tune_max_client_util-rtune_client_util)/(recovery_tune_max_client_util-recovery_tune_min_client_util)
+        )
+    );
+    // for example: utilisation = 8, target = 1
+    // intuitively target latency should be 8x of real
+    // target_lat = rtune_avg_lat * utilisation / target_util
+    //            = rtune_avg_lat * rtune_avg_lat * rtune_avg_iops / target_util
+    //            = 0.0625
+    recovery_target_queue_depth = (int)rtune_target_util + (rtune_target_util < 1 || rtune_target_util-(int)rtune_target_util >= 0.1 ? 1 : 0);
+    uint64_t target_lat = rtune_avg_lat * rtune_avg_lat/1000000.0*rtune_avg_count/recovery_tune_interval/rtune_target_util;
+    recovery_target_sleep_us = target_lat > rtune_avg_lat+recovery_tune_sleep_min_us ? target_lat-rtune_avg_lat : 0;
+    if (log_level > 3)
+    {
+        printf(
+            "recovery tune: client util %.2f (ewma %.2f), target util %.2f -> queue %ld, lat %lu us, real %lu us, pause %lu us\n",
+            client_util, rtune_client_util, rtune_target_util, recovery_target_queue_depth, target_lat, rtune_avg_lat, recovery_target_sleep_us
+        );
+    }
+}
+
 // Just trigger write requests for degraded objects. They'll be recovered during writing
 bool osd_t::continue_recovery()
 {
-    while (recovery_ops.size() < recovery_queue_depth)
+    while (recovery_ops.size() < recovery_target_queue_depth)
     {
         osd_recovery_op_t op;
         if (pick_next_recovery(op))
diff --git a/src/osd_primary_write.cpp b/src/osd_primary_write.cpp
index dee2da01..5a92f142 100644
--- a/src/osd_primary_write.cpp
+++ b/src/osd_primary_write.cpp
@@ -296,6 +296,7 @@ resume_7:
             if (!recovery_stat[recovery_type].count) // wrapped
             {
                 memset(&recovery_print_prev[recovery_type], 0, sizeof(recovery_print_prev[recovery_type]));
+                memset(&rtune_prev_recovery[recovery_type], 0, sizeof(rtune_prev_recovery[recovery_type]));
                 memset(&recovery_stat[recovery_type], 0, sizeof(recovery_stat[recovery_type]));
                 recovery_stat[recovery_type].count++;
             }