drbd: fix disconnect/reconnect loop, if ping-timeout == ping-int
If there is no replication traffic within the idle timeout (ping-int seconds), DRBD will send a P_PING, and adjust the timeout to ping-timeout. If there is no P_PING_ACK received within this ping-timeout, DRBD finally drops the connection, and tries to re-establish it. To decide which timeout was active, we compared the current timeout with the ping-timeout, and dropped the connection, if that was the case. By default, ping-int is 10 seconds, ping-timeout is 500 ms. Unfortunately, if you configure ping-timeout to be the same as ping-int, expiry of the idle-timeout had been mistaken for a missing ping ack, and caused an immediate reconnection attempt. Fix: Allow both timeouts to be equal, use a local variable to store which timeout is active. Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com> Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
This commit is contained in:
parent
53ea433145
commit
f36af18c7b
@ -4554,6 +4554,7 @@ int drbd_asender(struct drbd_thread *thi)
|
||||
int received = 0;
|
||||
int expect = sizeof(struct p_header80);
|
||||
int empty;
|
||||
int ping_timeout_active = 0;
|
||||
|
||||
sprintf(current->comm, "drbd%d_asender", mdev_to_minor(mdev));
|
||||
|
||||
@ -4566,6 +4567,7 @@ int drbd_asender(struct drbd_thread *thi)
|
||||
ERR_IF(!drbd_send_ping(mdev)) goto reconnect;
|
||||
mdev->meta.socket->sk->sk_rcvtimeo =
|
||||
mdev->net_conf->ping_timeo*HZ/10;
|
||||
ping_timeout_active = 1;
|
||||
}
|
||||
|
||||
/* conditionally cork;
|
||||
@ -4620,8 +4622,7 @@ int drbd_asender(struct drbd_thread *thi)
|
||||
dev_err(DEV, "meta connection shut down by peer.\n");
|
||||
goto reconnect;
|
||||
} else if (rv == -EAGAIN) {
|
||||
if (mdev->meta.socket->sk->sk_rcvtimeo ==
|
||||
mdev->net_conf->ping_timeo*HZ/10) {
|
||||
if (ping_timeout_active) {
|
||||
dev_err(DEV, "PingAck did not arrive in time.\n");
|
||||
goto reconnect;
|
||||
}
|
||||
@ -4660,6 +4661,11 @@ int drbd_asender(struct drbd_thread *thi)
|
||||
if (!cmd->process(mdev, h))
|
||||
goto reconnect;
|
||||
|
||||
/* the idle_timeout (ping-int)
|
||||
* has been restored in got_PingAck() */
|
||||
if (cmd == get_asender_cmd(P_PING_ACK))
|
||||
ping_timeout_active = 0;
|
||||
|
||||
buf = h;
|
||||
received = 0;
|
||||
expect = sizeof(struct p_header80);
|
||||
|
Loading…
Reference in New Issue
Block a user