From 3c95c567322e1dfaa7e0348171181aaf63aa6049 Mon Sep 17 00:00:00 2001 From: Sinan Kaya Date: Sun, 25 Mar 2018 10:39:15 -0400 Subject: [PATCH 1/7] net: qla3xxx: Eliminate duplicate barriers on weakly-ordered archs Code includes wmb() followed by writel(). writel() already has a barrier on some architectures like arm64. This ends up CPU observing two barriers back to back before executing the register write. Since code already has an explicit barrier call, changing code to wmb() writel_relaxed() mmiowb() for multi-arch support. Signed-off-by: Sinan Kaya Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qla3xxx.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qla3xxx.c b/drivers/net/ethernet/qlogic/qla3xxx.c index 9e5264d8773b..b48f76182049 100644 --- a/drivers/net/ethernet/qlogic/qla3xxx.c +++ b/drivers/net/ethernet/qlogic/qla3xxx.c @@ -1858,8 +1858,9 @@ static void ql_update_small_bufq_prod_index(struct ql3_adapter *qdev) qdev->small_buf_release_cnt -= 8; } wmb(); - writel(qdev->small_buf_q_producer_index, - &port_regs->CommonRegs.rxSmallQProducerIndex); + writel_relaxed(qdev->small_buf_q_producer_index, + &port_regs->CommonRegs.rxSmallQProducerIndex); + mmiowb(); } } From f9442ac498a86b1b0ec383a332be2635fbb1ed98 Mon Sep 17 00:00:00 2001 From: Sinan Kaya Date: Sun, 25 Mar 2018 10:39:16 -0400 Subject: [PATCH 2/7] qlcnic: Eliminate duplicate barriers on weakly-ordered archs Code includes wmb() followed by writel(). writel() already has a barrier on some architectures like arm64. This ends up CPU observing two barriers back to back before executing the register write. Since code already has an explicit barrier call, changing writel() to writel_relaxed(). Signed-off-by: Sinan Kaya Acked-by: Manish Chopra Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c index 46b0372dd032..97c146e7698a 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c @@ -478,7 +478,7 @@ irqreturn_t qlcnic_83xx_clear_legacy_intr(struct qlcnic_adapter *adapter) wmb(); /* clear the interrupt trigger control register */ - writel(0, adapter->isr_int_vec); + writel_relaxed(0, adapter->isr_int_vec); intr_val = readl(adapter->isr_int_vec); do { intr_val = readl(adapter->tgt_status_reg); From edd874235ac20a04483f15b27251203e9508dec5 Mon Sep 17 00:00:00 2001 From: Sinan Kaya Date: Sun, 25 Mar 2018 10:39:17 -0400 Subject: [PATCH 3/7] bnx2x: Replace doorbell barrier() with wmb() barrier() doesn't guarantee memory writes to be observed by the hardware on all architectures. barrier() only tells compiler not to move this code with respect to other read/writes. If memory write needs to be observed by the HW, wmb() is the right choice. Signed-off-by: Sinan Kaya Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c | 3 ++- drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index d7c98e807ca8..0f86f1850de8 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c @@ -4153,7 +4153,8 @@ netdev_tx_t bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev) wmb(); txdata->tx_db.data.prod += nbd; - barrier(); + /* make sure descriptor update is observed by HW */ + wmb(); DOORBELL(bp, txdata->cid, txdata->tx_db.raw); diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c index 1e33abde4a3e..39af4f85379d 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c @@ -2591,7 +2591,8 @@ static int bnx2x_run_loopback(struct bnx2x *bp, int loopback_mode) wmb(); txdata->tx_db.data.prod += 2; - barrier(); + /* make sure descriptor update is observed by the HW */ + wmb(); DOORBELL(bp, txdata->cid, txdata->tx_db.raw); mmiowb(); From 7f883c774e1bad34d5474d8a74989336631ce178 Mon Sep 17 00:00:00 2001 From: Sinan Kaya Date: Sun, 25 Mar 2018 10:39:18 -0400 Subject: [PATCH 4/7] bnx2x: Eliminate duplicate barriers on weakly-ordered archs Code includes wmb() followed by writel(). writel() already has a barrier on some architectures like arm64. This ends up CPU observing two barriers back to back before executing the register write. Since code already has an explicit barrier call, changing writel() to writel_relaxed(). Signed-off-by: Sinan Kaya Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2x/bnx2x.h | 12 ++++++++---- drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c | 2 +- drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h | 4 ++-- drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c | 2 +- drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c | 4 ++-- drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c | 4 +++- 6 files changed, 17 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h index 352beff796ae..d847e1b9c37b 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h @@ -166,6 +166,12 @@ do { \ #define REG_RD8(bp, offset) readb(REG_ADDR(bp, offset)) #define REG_RD16(bp, offset) readw(REG_ADDR(bp, offset)) +#define REG_WR_RELAXED(bp, offset, val) \ + writel_relaxed((u32)val, REG_ADDR(bp, offset)) + +#define REG_WR16_RELAXED(bp, offset, val) \ + writew_relaxed((u16)val, REG_ADDR(bp, offset)) + #define REG_WR(bp, offset, val) writel((u32)val, REG_ADDR(bp, offset)) #define REG_WR8(bp, offset, val) writeb((u8)val, REG_ADDR(bp, offset)) #define REG_WR16(bp, offset, val) writew((u16)val, REG_ADDR(bp, offset)) @@ -758,10 +764,8 @@ struct bnx2x_fastpath { #if (BNX2X_DB_SHIFT < BNX2X_DB_MIN_SHIFT) #error "Min DB doorbell stride is 8" #endif -#define DOORBELL(bp, cid, val) \ - do { \ - writel((u32)(val), bp->doorbells + (bp->db_size * (cid))); \ - } while (0) +#define DOORBELL_RELAXED(bp, cid, val) \ + writel_relaxed((u32)(val), (bp)->doorbells + ((bp)->db_size * (cid))) /* TX CSUM helpers */ #define SKB_CS_OFF(skb) (offsetof(struct tcphdr, check) - \ diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index 0f86f1850de8..95871576ab92 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c @@ -4156,7 +4156,7 @@ netdev_tx_t bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev) /* make sure descriptor update is observed by HW */ wmb(); - DOORBELL(bp, txdata->cid, txdata->tx_db.raw); + DOORBELL_RELAXED(bp, txdata->cid, txdata->tx_db.raw); mmiowb(); diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h index a5265e1344f1..a8ce5c55bbb0 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h @@ -522,8 +522,8 @@ static inline void bnx2x_update_rx_prod(struct bnx2x *bp, wmb(); for (i = 0; i < sizeof(rx_prods)/4; i++) - REG_WR(bp, fp->ustorm_rx_prods_offset + i*4, - ((u32 *)&rx_prods)[i]); + REG_WR_RELAXED(bp, fp->ustorm_rx_prods_offset + i * 4, + ((u32 *)&rx_prods)[i]); mmiowb(); /* keep prod updates ordered */ diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c index 39af4f85379d..da18aa239acb 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c @@ -2593,7 +2593,7 @@ static int bnx2x_run_loopback(struct bnx2x *bp, int loopback_mode) txdata->tx_db.data.prod += 2; /* make sure descriptor update is observed by the HW */ wmb(); - DOORBELL(bp, txdata->cid, txdata->tx_db.raw); + DOORBELL_RELAXED(bp, txdata->cid, txdata->tx_db.raw); mmiowb(); barrier(); diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index fd97250d6371..c766ae23bc74 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -3817,8 +3817,8 @@ static void bnx2x_sp_prod_update(struct bnx2x *bp) */ mb(); - REG_WR16(bp, BAR_XSTRORM_INTMEM + XSTORM_SPQ_PROD_OFFSET(func), - bp->spq_prod_idx); + REG_WR16_RELAXED(bp, BAR_XSTRORM_INTMEM + XSTORM_SPQ_PROD_OFFSET(func), + bp->spq_prod_idx); mmiowb(); } diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c index 76a4668c50fe..8e0a317b31f7 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c @@ -170,7 +170,9 @@ static int bnx2x_send_msg2pf(struct bnx2x *bp, u8 *done, dma_addr_t msg_mapping) wmb(); /* Trigger the PF FW */ - writeb(1, &zone_data->trigger.vf_pf_channel.addr_valid); + writeb_relaxed(1, &zone_data->trigger.vf_pf_channel.addr_valid); + + mmiowb(); /* Wait for PF to complete */ while ((tout >= 0) && (!*done)) { From e42d8cee343a545ac2d9557a3b28708bbca2bd31 Mon Sep 17 00:00:00 2001 From: Sinan Kaya Date: Sun, 25 Mar 2018 10:39:19 -0400 Subject: [PATCH 5/7] net: qlge: Eliminate duplicate barriers on weakly-ordered archs Code includes wmb() followed by writel(). writel() already has a barrier on some architectures like arm64. This ends up CPU observing two barriers back to back before executing the register write. Create a new wrapper function with relaxed write operator. Use the new wrapper when a write is following a wmb(). Signed-off-by: Sinan Kaya Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qlge/qlge.h | 16 ++++++++++++++++ drivers/net/ethernet/qlogic/qlge/qlge_main.c | 3 ++- 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qlge/qlge.h b/drivers/net/ethernet/qlogic/qlge/qlge.h index 84ac50f92c9c..3e71b65a9546 100644 --- a/drivers/net/ethernet/qlogic/qlge/qlge.h +++ b/drivers/net/ethernet/qlogic/qlge/qlge.h @@ -2184,6 +2184,22 @@ static inline void ql_write_db_reg(u32 val, void __iomem *addr) mmiowb(); } +/* + * Doorbell Registers: + * Doorbell registers are virtual registers in the PCI memory space. + * The space is allocated by the chip during PCI initialization. The + * device driver finds the doorbell address in BAR 3 in PCI config space. + * The registers are used to control outbound and inbound queues. For + * example, the producer index for an outbound queue. Each queue uses + * 1 4k chunk of memory. The lower half of the space is for outbound + * queues. The upper half is for inbound queues. + * Caller has to guarantee ordering. + */ +static inline void ql_write_db_reg_relaxed(u32 val, void __iomem *addr) +{ + writel_relaxed(val, addr); +} + /* * Shadow Registers: * Outbound queues have a consumer index that is maintained by the chip. diff --git a/drivers/net/ethernet/qlogic/qlge/qlge_main.c b/drivers/net/ethernet/qlogic/qlge/qlge_main.c index 50038d946ced..8293c2028002 100644 --- a/drivers/net/ethernet/qlogic/qlge/qlge_main.c +++ b/drivers/net/ethernet/qlogic/qlge/qlge_main.c @@ -2700,7 +2700,8 @@ static netdev_tx_t qlge_send(struct sk_buff *skb, struct net_device *ndev) tx_ring->prod_idx = 0; wmb(); - ql_write_db_reg(tx_ring->prod_idx, tx_ring->prod_idx_db_reg); + ql_write_db_reg_relaxed(tx_ring->prod_idx, tx_ring->prod_idx_db_reg); + mmiowb(); netif_printk(qdev, tx_queued, KERN_DEBUG, qdev->ndev, "tx queued, slot %d, len %d\n", tx_ring->prod_idx, skb->len); From fd141fa47c03018aa1f77c335b0f444493e145d5 Mon Sep 17 00:00:00 2001 From: Sinan Kaya Date: Sun, 25 Mar 2018 10:39:20 -0400 Subject: [PATCH 6/7] bnxt_en: Eliminate duplicate barriers on weakly-ordered archs Code includes wmb() followed by writel(). writel() already has a barrier on some architectures like arm64. This ends up CPU observing two barriers back to back before executing the register write. Create a new wrapper function with relaxed write operator. Use the new wrapper when a write is following a wmb(). Since code already has an explicit barrier call, changing writel() to writel_relaxed(). Also add mmiowb() so that write code doesn't move outside of scope. Signed-off-by: Sinan Kaya Acked-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 +- drivers/net/ethernet/broadcom/bnxt/bnxt.h | 9 +++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index c7e5e6f09647..3ff5f65758a3 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -1922,7 +1922,7 @@ static int bnxt_poll_work(struct bnxt *bp, struct bnxt_napi *bnapi, int budget) /* Sync BD data before updating doorbell */ wmb(); - bnxt_db_write(bp, db, DB_KEY_TX | prod); + bnxt_db_write_relaxed(bp, db, DB_KEY_TX | prod); } cpr->cp_raw_cons = raw_cons; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 5e3d62189cab..960162c9386c 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -1402,6 +1402,15 @@ static inline u32 bnxt_tx_avail(struct bnxt *bp, struct bnxt_tx_ring_info *txr) ((txr->tx_prod - txr->tx_cons) & bp->tx_ring_mask); } +/* For TX and RX ring doorbells with no ordering guarantee*/ +static inline void bnxt_db_write_relaxed(struct bnxt *bp, void __iomem *db, + u32 val) +{ + writel_relaxed(val, db); + if (bp->flags & BNXT_FLAG_DOUBLE_DB) + writel_relaxed(val, db); +} + /* For TX and RX ring doorbells */ static inline void bnxt_db_write(struct bnxt *bp, void __iomem *db, u32 val) { From 6d2e1a8d5e25e5f4563f5ea24bcb5da1ae261b26 Mon Sep 17 00:00:00 2001 From: Sinan Kaya Date: Sun, 25 Mar 2018 10:39:21 -0400 Subject: [PATCH 7/7] net: ena: Eliminate duplicate barriers on weakly-ordered archs Code includes barrier() followed by writel(). writel() already has a barrier on some architectures like arm64. This ends up CPU observing two barriers back to back before executing the register write. Create a new wrapper function with relaxed write operator. Use the new wrapper when a write is following a barrier(). Since code already has an explicit barrier call, changing writel() to writel_relaxed() and adding mmiowb() for ordering protection. Signed-off-by: Sinan Kaya Signed-off-by: David S. Miller --- drivers/net/ethernet/amazon/ena/ena_com.c | 8 ++++++-- drivers/net/ethernet/amazon/ena/ena_eth_com.h | 8 ++++++-- drivers/net/ethernet/amazon/ena/ena_netdev.c | 5 +++-- 3 files changed, 15 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c index bf2de5298005..1b9d3130af4d 100644 --- a/drivers/net/ethernet/amazon/ena/ena_com.c +++ b/drivers/net/ethernet/amazon/ena/ena_com.c @@ -631,8 +631,10 @@ static u32 ena_com_reg_bar_read32(struct ena_com_dev *ena_dev, u16 offset) */ wmb(); - writel(mmio_read_reg, ena_dev->reg_bar + ENA_REGS_MMIO_REG_READ_OFF); + writel_relaxed(mmio_read_reg, + ena_dev->reg_bar + ENA_REGS_MMIO_REG_READ_OFF); + mmiowb(); for (i = 0; i < timeout; i++) { if (read_resp->req_id == mmio_read->seq_num) break; @@ -1826,7 +1828,9 @@ void ena_com_aenq_intr_handler(struct ena_com_dev *dev, void *data) /* write the aenq doorbell after all AENQ descriptors were read */ mb(); - writel((u32)aenq->head, dev->reg_bar + ENA_REGS_AENQ_HEAD_DB_OFF); + writel_relaxed((u32)aenq->head, + dev->reg_bar + ENA_REGS_AENQ_HEAD_DB_OFF); + mmiowb(); } int ena_com_dev_reset(struct ena_com_dev *ena_dev, diff --git a/drivers/net/ethernet/amazon/ena/ena_eth_com.h b/drivers/net/ethernet/amazon/ena/ena_eth_com.h index 2f7657227cfe..6fdc753d9483 100644 --- a/drivers/net/ethernet/amazon/ena/ena_eth_com.h +++ b/drivers/net/ethernet/amazon/ena/ena_eth_com.h @@ -107,7 +107,8 @@ static inline int ena_com_sq_empty_space(struct ena_com_io_sq *io_sq) return io_sq->q_depth - 1 - cnt; } -static inline int ena_com_write_sq_doorbell(struct ena_com_io_sq *io_sq) +static inline int ena_com_write_sq_doorbell(struct ena_com_io_sq *io_sq, + bool relaxed) { u16 tail; @@ -116,7 +117,10 @@ static inline int ena_com_write_sq_doorbell(struct ena_com_io_sq *io_sq) pr_debug("write submission queue doorbell for queue: %d tail: %d\n", io_sq->qid, tail); - writel(tail, io_sq->db_addr); + if (relaxed) + writel_relaxed(tail, io_sq->db_addr); + else + writel(tail, io_sq->db_addr); return 0; } diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 6975150d144e..a822e70c2af3 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -556,7 +556,8 @@ static int ena_refill_rx_bufs(struct ena_ring *rx_ring, u32 num) * issue a doorbell */ wmb(); - ena_com_write_sq_doorbell(rx_ring->ena_com_io_sq); + ena_com_write_sq_doorbell(rx_ring->ena_com_io_sq, true); + mmiowb(); } rx_ring->next_to_use = next_to_use; @@ -2151,7 +2152,7 @@ static netdev_tx_t ena_start_xmit(struct sk_buff *skb, struct net_device *dev) if (netif_xmit_stopped(txq) || !skb->xmit_more) { /* trigger the dma engine */ - ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq); + ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq, false); u64_stats_update_begin(&tx_ring->syncp); tx_ring->tx_stats.doorbells++; u64_stats_update_end(&tx_ring->syncp);