[PATCH v1 1/1] [SRU][J:linux-bluefield] platform/mellanox: mlxbf-tmfifo: Drop Tx network packet when Tx TmFIFO is full
Hans de Goede
hdegoede at redhat.com
Thu Feb 1 09:34:41 UTC 2024
Hi Liming,
It is unclear to me why you are resending this patch ?
It has already been merged upstream:
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/drivers/platform/mellanox?id=8cbc756b802605dee3dd40019bd75960772bacf5
Or perhaps your intend was to send this only to the
Ubuntu kernel team?
Regards,
Hans
On 1/31/24 19:32, Liming Sun wrote:
> BugLink: https://bugs.launchpad.net/bugs/2051871
>
> Starting from Linux 5.16 kernel, Tx timeout mechanism was added
> in the virtio_net driver which prints the "Tx timeout" warning
> message when a packet stays in Tx queue for too long. Below is an
> example of the reported message:
>
> "[494105.316739] virtio_net virtio1 tmfifo_net0: TX timeout on
> queue: 0, sq: output.0, vq: 0×1, name: output.0, usecs since
> last trans: 3079892256".
>
> This issue could happen when external host driver which drains the
> FIFO is restared, stopped or upgraded. To avoid such confusing
> "Tx timeout" messages, this commit adds logic to drop the outstanding
> Tx packet if it's not able to transmit in two seconds due to Tx FIFO
> full, which can be considered as congestion or out-of-resource drop.
>
> This commit also handles the special case that the packet is half-
> transmitted into the Tx FIFO. In such case, the packet is discarded
> with remaining length stored in vring->rem_padding. So paddings with
> zeros can be sent out when Tx space is available to maintain the
> integrity of the packet format. The padded packet will be dropped on
> the receiving side.
>
> Signed-off-by: Liming Sun <limings at nvidia.com>
> Reviewed-by: Ilpo Järvinen <ilpo.jarvinen at linux.intel.com>
> Link: https://lore.kernel.org/r/20240111173106.96958-1-limings@nvidia.com
> Signed-off-by: Hans de Goede <hdegoede at redhat.com>
> (cherry picked from commit 8cbc756b802605dee3dd40019bd75960772bacf5)
> Signed-off-by: Liming Sun <limings at nvidia.com>
> ---
> drivers/platform/mellanox/mlxbf-tmfifo.c | 67 ++++++++++++++++++++++++
> 1 file changed, 67 insertions(+)
>
> diff --git a/drivers/platform/mellanox/mlxbf-tmfifo.c b/drivers/platform/mellanox/mlxbf-tmfifo.c
> index a83f800f8e5e..58b5dc83d6c4 100644
> --- a/drivers/platform/mellanox/mlxbf-tmfifo.c
> +++ b/drivers/platform/mellanox/mlxbf-tmfifo.c
> @@ -50,6 +50,9 @@
> /* ACPI chip identifier for BlueField-3. */
> #define MLXBF_TMFIFO_BF3_UID "1"
>
> +/* Tx timeout in milliseconds. */
> +#define TMFIFO_TX_TIMEOUT 2000
> +
> struct mlxbf_tmfifo;
>
> /**
> @@ -62,12 +65,14 @@ struct mlxbf_tmfifo;
> * @drop_desc: dummy desc for packet dropping
> * @cur_len: processed length of the current descriptor
> * @rem_len: remaining length of the pending packet
> + * @rem_padding: remaining bytes to send as paddings
> * @pkt_len: total length of the pending packet
> * @next_avail: next avail descriptor id
> * @num: vring size (number of descriptors)
> * @align: vring alignment size
> * @index: vring index
> * @vdev_id: vring virtio id (VIRTIO_ID_xxx)
> + * @tx_timeout: expire time of last tx packet
> * @fifo: pointer to the tmfifo structure
> */
> struct mlxbf_tmfifo_vring {
> @@ -79,12 +84,14 @@ struct mlxbf_tmfifo_vring {
> struct vring_desc drop_desc;
> int cur_len;
> int rem_len;
> + int rem_padding;
> u32 pkt_len;
> u16 next_avail;
> int num;
> int align;
> int index;
> int vdev_id;
> + unsigned long tx_timeout;
> struct mlxbf_tmfifo *fifo;
> };
>
> @@ -954,6 +961,50 @@ static bool mlxbf_tmfifo_rxtx_one_desc(struct mlxbf_tmfifo_vring *vring,
> return true;
> }
>
> +static void mlxbf_tmfifo_check_tx_timeout(struct mlxbf_tmfifo_vring *vring)
> +{
> + unsigned long flags;
> +
> + /* Only handle Tx timeout for network vdev. */
> + if (vring->vdev_id != VIRTIO_ID_NET)
> + return;
> +
> + /* Initialize the timeout or return if not expired. */
> + if (!vring->tx_timeout) {
> + /* Initialize the timeout. */
> + vring->tx_timeout = jiffies +
> + msecs_to_jiffies(TMFIFO_TX_TIMEOUT);
> + return;
> + } else if (time_before(jiffies, vring->tx_timeout)) {
> + /* Return if not timeout yet. */
> + return;
> + }
> +
> + /*
> + * Drop the packet after timeout. The outstanding packet is
> + * released and the remaining bytes will be sent with padding byte 0x00
> + * as a recovery. On the peer(host) side, the padding bytes 0x00 will be
> + * either dropped directly, or appended into existing outstanding packet
> + * thus dropped as corrupted network packet.
> + */
> + vring->rem_padding = round_up(vring->rem_len, sizeof(u64));
> + mlxbf_tmfifo_release_pkt(vring);
> + vring->cur_len = 0;
> + vring->rem_len = 0;
> + vring->fifo->vring[0] = NULL;
> +
> + /*
> + * Make sure the load/store are in order before
> + * returning back to virtio.
> + */
> + virtio_mb(false);
> +
> + /* Notify upper layer. */
> + spin_lock_irqsave(&vring->fifo->spin_lock[0], flags);
> + vring_interrupt(0, vring->vq);
> + spin_unlock_irqrestore(&vring->fifo->spin_lock[0], flags);
> +}
> +
> /* Rx & Tx processing of a queue. */
> static void mlxbf_tmfifo_rxtx(struct mlxbf_tmfifo_vring *vring, bool is_rx)
> {
> @@ -976,6 +1027,7 @@ static void mlxbf_tmfifo_rxtx(struct mlxbf_tmfifo_vring *vring, bool is_rx)
> return;
>
> do {
> +retry:
> /* Get available FIFO space. */
> if (avail == 0) {
> if (is_rx)
> @@ -986,6 +1038,17 @@ static void mlxbf_tmfifo_rxtx(struct mlxbf_tmfifo_vring *vring, bool is_rx)
> break;
> }
>
> + /* Insert paddings for discarded Tx packet. */
> + if (!is_rx) {
> + vring->tx_timeout = 0;
> + while (vring->rem_padding >= sizeof(u64)) {
> + writeq(0, vring->fifo->tx_data);
> + vring->rem_padding -= sizeof(u64);
> + if (--avail == 0)
> + goto retry;
> + }
> + }
> +
> /* Console output always comes from the Tx buffer. */
> if (!is_rx && devid == VIRTIO_ID_CONSOLE) {
> /* Check if there is any control data to send. */
> @@ -998,6 +1061,10 @@ static void mlxbf_tmfifo_rxtx(struct mlxbf_tmfifo_vring *vring, bool is_rx)
> /* Handle one descriptor. */
> more = mlxbf_tmfifo_rxtx_one_desc(vring, is_rx, &avail);
> } while (more);
> +
> + /* Check Tx timeout. */
> + if (avail <= 0 && !is_rx)
> + mlxbf_tmfifo_check_tx_timeout(vring);
> }
>
> /* Handle Rx or Tx queues. */
More information about the kernel-team
mailing list