diff --git a/Documentation/netlink/specs/handshake.yaml b/Documentation/netlink/specs/handshake.yaml index 95c3fade7a8d..1024297b3851 100644 --- a/Documentation/netlink/specs/handshake.yaml +++ b/Documentation/netlink/specs/handshake.yaml @@ -12,6 +12,12 @@ protocol: genetlink doc: Netlink protocol to request a transport layer security handshake. definitions: + - + type: const + name: max-errno + value: 4095 + header: linux/err.h + scope: kernel - type: enum name: handler-class @@ -80,6 +86,8 @@ attribute-sets: - name: status type: u32 + checks: + max: max-errno - name: sockfd type: s32 diff --git a/drivers/dpll/dpll_netlink.c b/drivers/dpll/dpll_netlink.c index 0ff1658c2dc1..75e3ae0c16d0 100644 --- a/drivers/dpll/dpll_netlink.c +++ b/drivers/dpll/dpll_netlink.c @@ -829,12 +829,21 @@ int dpll_device_delete_ntf(struct dpll_device *dpll) return dpll_device_event_send(DPLL_CMD_DEVICE_DELETE_NTF, dpll); } -static int -__dpll_device_change_ntf(struct dpll_device *dpll) +/** + * __dpll_device_change_ntf - notify that the dpll device has been changed + * @dpll: registered dpll pointer + * + * Context: caller must hold dpll_lock. Suitable for use inside device + * callbacks which are already invoked under dpll_lock. + * Return: 0 if succeeds, error code otherwise. + */ +int __dpll_device_change_ntf(struct dpll_device *dpll) { + lockdep_assert_held(&dpll_lock); dpll_device_notify(dpll, DPLL_DEVICE_CHANGED); return dpll_device_event_send(DPLL_CMD_DEVICE_CHANGE_NTF, dpll); } +EXPORT_SYMBOL_GPL(__dpll_device_change_ntf); /** * dpll_device_change_ntf - notify that the dpll device has been changed diff --git a/drivers/dpll/zl3073x/core.c b/drivers/dpll/zl3073x/core.c index 5f1e70f3e40a..0a133b0f2d97 100644 --- a/drivers/dpll/zl3073x/core.c +++ b/drivers/dpll/zl3073x/core.c @@ -762,18 +762,15 @@ zl3073x_dev_periodic_work(struct kthread_work *work) dev_warn(zldev->dev, "Failed to update phase offsets: %pe\n", ERR_PTR(rc)); - /* Update measured input reference frequencies if any DPLL has - * frequency monitoring enabled. + /* Update measured input reference frequencies if frequency + * monitoring is enabled. */ - list_for_each_entry(zldpll, &zldev->dplls, list) { - if (zldpll->freq_monitor) { - rc = zl3073x_ref_freq_meas_update(zldev); - if (rc) - dev_warn(zldev->dev, - "Failed to update measured frequencies: %pe\n", - ERR_PTR(rc)); - break; - } + if (zldev->freq_monitor) { + rc = zl3073x_ref_freq_meas_update(zldev); + if (rc) + dev_warn(zldev->dev, + "Failed to update measured frequencies: %pe\n", + ERR_PTR(rc)); } /* Update references' fractional frequency offsets */ diff --git a/drivers/dpll/zl3073x/core.h b/drivers/dpll/zl3073x/core.h index 99440620407d..addba378b0df 100644 --- a/drivers/dpll/zl3073x/core.h +++ b/drivers/dpll/zl3073x/core.h @@ -57,6 +57,7 @@ struct zl3073x_chip_info { * @work: periodic work * @clock_id: clock id of the device * @phase_avg_factor: phase offset measurement averaging factor + * @freq_monitor: is frequency monitor enabled */ struct zl3073x_dev { struct device *dev; @@ -77,9 +78,10 @@ struct zl3073x_dev { struct kthread_worker *kworker; struct kthread_delayed_work work; - /* Devlink parameters */ + /* Per-chip parameters */ u64 clock_id; u8 phase_avg_factor; + bool freq_monitor; }; extern const struct regmap_config zl3073x_regmap_config; diff --git a/drivers/dpll/zl3073x/dpll.c b/drivers/dpll/zl3073x/dpll.c index 64b4e9e3e8fe..0bfcbae2109f 100644 --- a/drivers/dpll/zl3073x/dpll.c +++ b/drivers/dpll/zl3073x/dpll.c @@ -1079,15 +1079,6 @@ zl3073x_dpll_phase_offset_avg_factor_get(const struct dpll_device *dpll, return 0; } -static void -zl3073x_dpll_change_work(struct work_struct *work) -{ - struct zl3073x_dpll *zldpll; - - zldpll = container_of(work, struct zl3073x_dpll, change_work); - dpll_device_change_ntf(zldpll->dpll_dev); -} - static int zl3073x_dpll_phase_offset_avg_factor_set(const struct dpll_device *dpll, void *dpll_priv, u32 factor, @@ -1113,8 +1104,10 @@ zl3073x_dpll_phase_offset_avg_factor_set(const struct dpll_device *dpll, * we have to send a notification for other DPLL devices. */ list_for_each_entry(item, &zldpll->dev->dplls, list) { - if (item != zldpll) - schedule_work(&item->change_work); + struct dpll_device *dpll_dev = READ_ONCE(item->dpll_dev); + + if (item != zldpll && dpll_dev) + __dpll_device_change_ntf(dpll_dev); } return 0; @@ -1219,7 +1212,7 @@ zl3073x_dpll_freq_monitor_get(const struct dpll_device *dpll, { struct zl3073x_dpll *zldpll = dpll_priv; - if (zldpll->freq_monitor) + if (zldpll->dev->freq_monitor) *state = DPLL_FEATURE_STATE_ENABLE; else *state = DPLL_FEATURE_STATE_DISABLE; @@ -1233,9 +1226,19 @@ zl3073x_dpll_freq_monitor_set(const struct dpll_device *dpll, enum dpll_feature_state state, struct netlink_ext_ack *extack) { - struct zl3073x_dpll *zldpll = dpll_priv; + struct zl3073x_dpll *item, *zldpll = dpll_priv; - zldpll->freq_monitor = (state == DPLL_FEATURE_STATE_ENABLE); + zldpll->dev->freq_monitor = (state == DPLL_FEATURE_STATE_ENABLE); + + /* The frequency monitoring is common for all DPLL channels so after + * change we have to send a notification for other DPLL devices. + */ + list_for_each_entry(item, &zldpll->dev->dplls, list) { + struct dpll_device *dpll_dev = READ_ONCE(item->dpll_dev); + + if (item != zldpll && dpll_dev) + __dpll_device_change_ntf(dpll_dev); + } return 0; } @@ -1627,13 +1630,13 @@ zl3073x_dpll_device_register(struct zl3073x_dpll *zldpll) static void zl3073x_dpll_device_unregister(struct zl3073x_dpll *zldpll) { - WARN(!zldpll->dpll_dev, "DPLL device is not registered\n"); + struct dpll_device *dpll_dev = READ_ONCE(zldpll->dpll_dev); - cancel_work_sync(&zldpll->change_work); + WARN(!dpll_dev, "DPLL device is not registered\n"); - dpll_device_unregister(zldpll->dpll_dev, &zldpll->ops, zldpll); - dpll_device_put(zldpll->dpll_dev, &zldpll->tracker); - zldpll->dpll_dev = NULL; + WRITE_ONCE(zldpll->dpll_dev, NULL); + dpll_device_unregister(dpll_dev, &zldpll->ops, zldpll); + dpll_device_put(dpll_dev, &zldpll->tracker); } /** @@ -1752,7 +1755,7 @@ zl3073x_dpll_pin_measured_freq_check(struct zl3073x_dpll_pin *pin) u8 ref_id; u32 freq; - if (!zldpll->freq_monitor) + if (!zldpll->dev->freq_monitor) return false; ref_id = zl3073x_input_pin_ref_get(pin->id); @@ -1785,10 +1788,8 @@ zl3073x_dpll_changes_check(struct zl3073x_dpll *zldpll) struct zl3073x_dev *zldev = zldpll->dev; enum dpll_lock_status lock_status; struct device *dev = zldev->dev; - const struct zl3073x_chan *chan; struct zl3073x_dpll_pin *pin; int rc; - u8 mode; zldpll->check_count++; @@ -1807,15 +1808,6 @@ zl3073x_dpll_changes_check(struct zl3073x_dpll *zldpll) dpll_device_change_ntf(zldpll->dpll_dev); } - /* Input pin monitoring does make sense only in automatic - * or forced reference modes. - */ - chan = zl3073x_chan_state_get(zldev, zldpll->id); - mode = zl3073x_chan_mode_get(chan); - if (mode != ZL_DPLL_MODE_REFSEL_MODE_AUTO && - mode != ZL_DPLL_MODE_REFSEL_MODE_REFLOCK) - return; - /* Update phase offset latch registers for this DPLL if the phase * offset monitor feature is enabled. */ @@ -1926,7 +1918,6 @@ zl3073x_dpll_alloc(struct zl3073x_dev *zldev, u8 ch) zldpll->dev = zldev; zldpll->id = ch; INIT_LIST_HEAD(&zldpll->pins); - INIT_WORK(&zldpll->change_work, zl3073x_dpll_change_work); return zldpll; } diff --git a/drivers/dpll/zl3073x/dpll.h b/drivers/dpll/zl3073x/dpll.h index 434c32a7db12..21adcc18e45e 100644 --- a/drivers/dpll/zl3073x/dpll.h +++ b/drivers/dpll/zl3073x/dpll.h @@ -15,13 +15,11 @@ * @id: DPLL index * @check_count: periodic check counter * @phase_monitor: is phase offset monitor enabled - * @freq_monitor: is frequency monitor enabled * @ops: DPLL device operations for this instance * @dpll_dev: pointer to registered DPLL device * @tracker: tracking object for the acquired reference * @lock_status: last saved DPLL lock status * @pins: list of pins - * @change_work: device change notification work */ struct zl3073x_dpll { struct list_head list; @@ -29,13 +27,11 @@ struct zl3073x_dpll { u8 id; u8 check_count; bool phase_monitor; - bool freq_monitor; struct dpll_device_ops ops; struct dpll_device *dpll_dev; dpll_tracker tracker; enum dpll_lock_status lock_status; struct list_head pins; - struct work_struct change_work; }; struct zl3073x_dpll *zl3073x_dpll_alloc(struct zl3073x_dev *zldev, u8 ch); diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index af82a3df2c5d..82e779f7916b 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1890,6 +1890,12 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev, struct sockaddr_storage ss; int res = 0, i; + if (slave_dev->type == ARPHRD_CAN) { + BOND_NL_ERR(bond_dev, extack, + "CAN devices cannot be enslaved"); + return -EPERM; + } + if (slave_dev->flags & IFF_MASTER && !netif_is_bond_master(slave_dev)) { BOND_NL_ERR(bond_dev, extack, diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_main.c b/drivers/net/ethernet/hisilicon/hibmcge/hbg_main.c index 068da2fd1fea..f721e9893804 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_main.c +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_main.c @@ -420,6 +420,9 @@ static int hbg_pci_init(struct pci_dev *pdev) return -ENOMEM; pci_set_master(pdev); + pcie_capability_clear_word(pdev, PCI_EXP_DEVCTL, + PCI_EXP_DEVCTL_RELAX_EN); + pci_save_state(pdev); return 0; } diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_txrx.c b/drivers/net/ethernet/hisilicon/hibmcge/hbg_txrx.c index a4ea92c31c2f..0ae314994676 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_txrx.c +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_txrx.c @@ -452,12 +452,12 @@ static bool hbg_sync_data_from_hw(struct hbg_priv *priv, { struct hbg_rx_desc *rx_desc; - /* make sure HW write desc complete */ - dma_rmb(); - dma_sync_single_for_cpu(&priv->pdev->dev, buffer->page_dma, buffer->page_size, DMA_FROM_DEVICE); + /* make sure HW write desc complete */ + dma_rmb(); + rx_desc = (struct hbg_rx_desc *)buffer->page_addr; return FIELD_GET(HBG_RX_DESC_W2_PKT_LEN_M, rx_desc->word2) != 0; } diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c index e40b79076358..3cf131508ecf 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c @@ -436,7 +436,7 @@ struct rvu_pfvf *rvu_get_pfvf(struct rvu *rvu, int pcifunc) return &rvu->pf[rvu_get_pf(rvu->pdev, pcifunc)]; } -static bool is_pf_func_valid(struct rvu *rvu, u16 pcifunc) +bool is_pf_func_valid(struct rvu *rvu, u16 pcifunc) { int pf, vf, nvfs; u64 cfg; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h index a466181cf908..de3fbd3d15d6 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h @@ -917,6 +917,7 @@ u16 rvu_get_rsrc_mapcount(struct rvu_pfvf *pfvf, int blkaddr); struct rvu_pfvf *rvu_get_pfvf(struct rvu *rvu, int pcifunc); void rvu_get_pf_numvfs(struct rvu *rvu, int pf, int *numvfs, int *hwvf); bool is_block_implemented(struct rvu_hwinfo *hw, int blkaddr); +bool is_pf_func_valid(struct rvu *rvu, u16 pcifunc); bool is_pffunc_map_valid(struct rvu *rvu, u16 pcifunc, int blktype); int rvu_get_lf(struct rvu *rvu, struct rvu_block *block, u16 pcifunc, u16 slot); int rvu_lf_reset(struct rvu *rvu, struct rvu_block *block, int lf); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_rep.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_rep.c index 901f6fd40fd4..a2781e0f504e 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_rep.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_rep.c @@ -97,6 +97,14 @@ int rvu_mbox_handler_rep_event_notify(struct rvu *rvu, struct rep_event *req, { struct rep_evtq_ent *qentry; + /* The mailbox dispatcher normalises only the header pcifunc; the + * nested struct rep_event::pcifunc body field is sender-controlled + * and is later used by rvu_rep_up_notify() to index rvu->pf[] / + * rvu->hwvf[]. Reject out-of-range body selectors before queueing. + */ + if (!is_pf_func_valid(rvu, req->pcifunc)) + return -EINVAL; + qentry = kmalloc_obj(*qentry, GFP_ATOMIC); if (!qentry) return -ENOMEM; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.c index aca77853abb8..5a172c572a68 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.c @@ -1320,8 +1320,10 @@ mlx5_cmd_hws_packet_reformat_alloc(struct mlx5_flow_root_namespace *ns, break; case MLX5_REFORMAT_TYPE_REMOVE_HDR: hws_action = mlx5_fs_get_action_remove_header_vlan(fs_ctx, params); - if (!hws_action) + if (!hws_action) { mlx5_core_err(dev, "Only vlan remove header supported\n"); + return -EOPNOTSUPP; + } break; default: mlx5_core_err(ns->dev, "Packet-reformat not supported(%d)\n", diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index f904f4d16b45..fb009120a924 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -808,7 +808,8 @@ static bool macsec_post_decrypt(struct sk_buff *skb, struct macsec_secy *secy, u if (pn + 1 > rx_sa->next_pn_halves.lower) { rx_sa->next_pn_halves.lower = pn + 1; } else if (secy->xpn && - !pn_same_half(pn, rx_sa->next_pn_halves.lower)) { + (pn + 1 == 0 || + !pn_same_half(pn, rx_sa->next_pn_halves.lower))) { rx_sa->next_pn_halves.upper++; rx_sa->next_pn_halves.lower = pn + 1; } diff --git a/drivers/net/phy/air_en8811h.c b/drivers/net/phy/air_en8811h.c index 29ae73e65caa..a86129ce693c 100644 --- a/drivers/net/phy/air_en8811h.c +++ b/drivers/net/phy/air_en8811h.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -170,9 +171,23 @@ #define AN8811HB_CLK_DRV_CKO_LDPWD BIT(13) #define AN8811HB_CLK_DRV_CKO_LPPWD BIT(14) +#define AN8811HB_MCU_SW_RST 0x5cf9f8 +#define AN8811HB_MCU_SW_RST_HOLD BIT(16) +#define AN8811HB_MCU_SW_RST_RUN (BIT(16) | BIT(0)) +#define AN8811HB_MCU_SW_START 0x5cf9fc +#define AN8811HB_MCU_SW_START_EN BIT(16) + +/* MII register constants for PBUS access (PHY addr + 8) */ +#define AIR_PBUS_ADDR_HIGH 0x1c +#define AIR_PBUS_DATA_HIGH 0x10 +#define AIR_PBUS_REG_ADDR_HIGH_MASK GENMASK(15, 6) +#define AIR_PBUS_REG_ADDR_LOW_MASK GENMASK(5, 2) + /* Led definitions */ #define EN8811H_LED_COUNT 3 +#define EN8811H_PBUS_ADDR_OFFS 8 + /* Default LED setup: * GPIO5 <-> LED0 On: Link detected, blink Rx/Tx * GPIO4 <-> LED1 On: Link detected at 2500 or 1000 Mbps @@ -201,6 +216,7 @@ struct en8811h_priv { struct clk_hw hw; struct phy_device *phydev; unsigned int cko_is_enabled; + struct mdio_device *pbusdev; }; enum { @@ -254,6 +270,31 @@ static int air_phy_write_page(struct phy_device *phydev, int page) return __phy_write(phydev, AIR_EXT_PAGE_ACCESS, page); } +static int __air_pbus_reg_write(struct mdio_device *mdiodev, + u32 pbus_reg, u32 pbus_data) +{ + int ret; + + ret = __mdiobus_write(mdiodev->bus, mdiodev->addr, AIR_EXT_PAGE_ACCESS, + upper_16_bits(pbus_reg)); + if (ret < 0) + return ret; + + ret = __mdiobus_write(mdiodev->bus, mdiodev->addr, AIR_PBUS_ADDR_HIGH, + FIELD_GET(AIR_PBUS_REG_ADDR_HIGH_MASK, pbus_reg)); + if (ret < 0) + return ret; + + ret = __mdiobus_write(mdiodev->bus, mdiodev->addr, + FIELD_GET(AIR_PBUS_REG_ADDR_LOW_MASK, pbus_reg), + lower_16_bits(pbus_data)); + if (ret < 0) + return ret; + + return __mdiobus_write(mdiodev->bus, mdiodev->addr, AIR_PBUS_DATA_HIGH, + upper_16_bits(pbus_data)); +} + static int __air_buckpbus_reg_write(struct phy_device *phydev, u32 pbus_address, u32 pbus_data) { @@ -570,10 +611,67 @@ static int an8811hb_load_file(struct phy_device *phydev, const char *name, return ret; } +static int an8811hb_mcu_assert(struct phy_device *phydev) +{ + struct en8811h_priv *priv = phydev->priv; + int ret; + + phy_lock_mdio_bus(phydev); + + ret = __air_pbus_reg_write(priv->pbusdev, AN8811HB_MCU_SW_RST, + AN8811HB_MCU_SW_RST_HOLD); + if (ret < 0) + goto unlock; + + ret = __air_pbus_reg_write(priv->pbusdev, AN8811HB_MCU_SW_START, 0); + if (ret < 0) + goto unlock; + + msleep(50); + phydev_dbg(phydev, "MCU asserted\n"); + +unlock: + phy_unlock_mdio_bus(phydev); + return ret; +} + +static int an8811hb_mcu_deassert(struct phy_device *phydev) +{ + struct en8811h_priv *priv = phydev->priv; + int ret; + + phy_lock_mdio_bus(phydev); + + ret = __air_pbus_reg_write(priv->pbusdev, AN8811HB_MCU_SW_START, + AN8811HB_MCU_SW_START_EN); + if (ret < 0) + goto unlock; + + ret = __air_pbus_reg_write(priv->pbusdev, AN8811HB_MCU_SW_RST, + AN8811HB_MCU_SW_RST_RUN); + if (ret < 0) + goto unlock; + + msleep(50); + phydev_dbg(phydev, "MCU deasserted\n"); + +unlock: + phy_unlock_mdio_bus(phydev); + return ret; +} + static int an8811hb_load_firmware(struct phy_device *phydev) { int ret; + ret = an8811hb_mcu_assert(phydev); + if (ret < 0) + return ret; + + ret = an8811hb_mcu_deassert(phydev); + if (ret < 0) + return ret; + ret = air_buckpbus_reg_write(phydev, EN8811H_FW_CTRL_1, EN8811H_FW_CTRL_1_START); if (ret < 0) @@ -662,6 +760,16 @@ static int en8811h_restart_mcu(struct phy_device *phydev) { int ret; + if (phy_id_compare_model(phydev->phy_id, AN8811HB_PHY_ID)) { + ret = an8811hb_mcu_assert(phydev); + if (ret < 0) + return ret; + + ret = an8811hb_mcu_deassert(phydev); + if (ret < 0) + return ret; + } + ret = air_buckpbus_reg_write(phydev, EN8811H_FW_CTRL_1, EN8811H_FW_CTRL_1_START); if (ret < 0) @@ -1166,6 +1274,7 @@ static int en8811h_leds_setup(struct phy_device *phydev) static int an8811hb_probe(struct phy_device *phydev) { + struct mdio_device *mdiodev; struct en8811h_priv *priv; int ret; @@ -1175,10 +1284,28 @@ static int an8811hb_probe(struct phy_device *phydev) return -ENOMEM; phydev->priv = priv; + /* + * The AN8811HB PHY address is restricted to 8-15 (decimal), + * depending on the board hardware strapping. + * This means the PBUS address is only in the range 16-21 (decimal), + * so we do not need to handle the case + * where the PBUS address exceeds 31 (decimal). + */ + mdiodev = mdio_device_create(phydev->mdio.bus, + phydev->mdio.addr + EN8811H_PBUS_ADDR_OFFS); + if (IS_ERR(mdiodev)) + return PTR_ERR(mdiodev); + + ret = mdio_device_register(mdiodev); + if (ret) + goto err_dev_free; + + priv->pbusdev = mdiodev; + ret = an8811hb_load_firmware(phydev); if (ret < 0) { phydev_err(phydev, "Load firmware failed: %d\n", ret); - return ret; + goto err_dev_create; } en8811h_print_fw_version(phydev); @@ -1191,22 +1318,29 @@ static int an8811hb_probe(struct phy_device *phydev) ret = en8811h_leds_setup(phydev); if (ret < 0) - return ret; + goto err_dev_create; priv->phydev = phydev; /* Co-Clock Output */ ret = an8811hb_clk_provider_setup(&phydev->mdio.dev, &priv->hw); if (ret) - return ret; + goto err_dev_create; /* Configure led gpio pins as output */ ret = air_buckpbus_reg_modify(phydev, AN8811HB_GPIO_OUTPUT, AN8811HB_GPIO_OUTPUT_345, AN8811HB_GPIO_OUTPUT_345); if (ret < 0) - return ret; + goto err_dev_create; return 0; + +err_dev_create: + mdio_device_remove(mdiodev); + +err_dev_free: + mdio_device_free(mdiodev); + return ret; } static int en8811h_probe(struct phy_device *phydev) @@ -1561,6 +1695,16 @@ static int en8811h_suspend(struct phy_device *phydev) return genphy_suspend(phydev); } +static void an8811hb_remove(struct phy_device *phydev) +{ + struct en8811h_priv *priv = phydev->priv; + + if (priv->pbusdev) { + mdio_device_remove(priv->pbusdev); + mdio_device_free(priv->pbusdev); + } +} + static struct phy_driver en8811h_driver[] = { { PHY_ID_MATCH_MODEL(EN8811H_PHY_ID), @@ -1587,6 +1731,7 @@ static struct phy_driver en8811h_driver[] = { PHY_ID_MATCH_MODEL(AN8811HB_PHY_ID), .name = "Airoha AN8811HB", .probe = an8811hb_probe, + .remove = an8811hb_remove, .get_features = en8811h_get_features, .config_init = an8811hb_config_init, .get_rate_matching = en8811h_get_rate_matching, diff --git a/drivers/net/tap.c b/drivers/net/tap.c index a590e07ce0a9..fae115915c8e 100644 --- a/drivers/net/tap.c +++ b/drivers/net/tap.c @@ -1052,6 +1052,7 @@ static int tap_get_user_xdp(struct tap_queue *q, struct xdp_buff *xdp) int err, depth; if (unlikely(xdp->data_end - xdp->data < ETH_HLEN)) { + put_page(virt_to_head_page(xdp->data)); err = -EINVAL; goto err; } @@ -1061,6 +1062,7 @@ static int tap_get_user_xdp(struct tap_queue *q, struct xdp_buff *xdp) skb = build_skb(xdp->data_hard_start, buflen); if (!skb) { + put_page(virt_to_head_page(xdp->data)); err = -ENOMEM; goto err; } diff --git a/drivers/net/team/team_core.c b/drivers/net/team/team_core.c index 0c87f9972457..f51388d50307 100644 --- a/drivers/net/team/team_core.c +++ b/drivers/net/team/team_core.c @@ -534,21 +534,23 @@ static void team_adjust_ops(struct team *team) if (!team->tx_en_port_count || !team_is_mode_set(team) || !team->mode->ops->transmit) - team->ops.transmit = team_dummy_transmit; + WRITE_ONCE(team->ops.transmit, team_dummy_transmit); else - team->ops.transmit = team->mode->ops->transmit; + WRITE_ONCE(team->ops.transmit, team->mode->ops->transmit); if (!team->rx_en_port_count || !team_is_mode_set(team) || !team->mode->ops->receive) - team->ops.receive = team_dummy_receive; + WRITE_ONCE(team->ops.receive, team_dummy_receive); else - team->ops.receive = team->mode->ops->receive; + WRITE_ONCE(team->ops.receive, team->mode->ops->receive); } /* - * We can benefit from the fact that it's ensured no port is present - * at the time of mode change. Therefore no packets are in fly so there's no - * need to set mode operations in any special way. + * team_change_mode() ensures no ports are present during mode change, + * but lockless readers can still reach team_xmit(). Avoid touching + * transmit/receive -- they are already set to dummies by + * team_adjust_ops() since no ports are enabled. synchronize_net() + * drains in-flight readers before destroying old mode state. */ static int __team_change_mode(struct team *team, const struct team_mode *new_mode) @@ -557,9 +559,21 @@ static int __team_change_mode(struct team *team, if (team_is_mode_set(team)) { void (*exit_op)(struct team *team) = team->ops.exit; - /* Clear ops area so no callback is called any longer */ - memset(&team->ops, 0, sizeof(struct team_mode_ops)); - team_adjust_ops(team); + /* Clear cold-path ops used only under RTNL. transmit and + * receive are already dummies (no ports) so leave them + * alone -- overwriting them is the source of the race. + */ + team->ops.init = NULL; + team->ops.exit = NULL; + team->ops.port_enter = NULL; + team->ops.port_leave = NULL; + team->ops.port_change_dev_addr = NULL; + team->ops.port_tx_disabled = NULL; + + /* Wait for in-flight readers before tearing down mode + * state they may reference. + */ + synchronize_net(); if (exit_op) exit_op(team); @@ -582,7 +596,12 @@ static int __team_change_mode(struct team *team, } team->mode = new_mode; - memcpy(&team->ops, new_mode->ops, sizeof(struct team_mode_ops)); + team->ops.init = new_mode->ops->init; + team->ops.exit = new_mode->ops->exit; + team->ops.port_enter = new_mode->ops->port_enter; + team->ops.port_leave = new_mode->ops->port_leave; + team->ops.port_change_dev_addr = new_mode->ops->port_change_dev_addr; + team->ops.port_tx_disabled = new_mode->ops->port_tx_disabled; team_adjust_ops(team); return 0; @@ -743,7 +762,7 @@ static rx_handler_result_t team_handle_frame(struct sk_buff **pskb) /* allow exact match delivery for disabled ports */ res = RX_HANDLER_EXACT; } else { - res = team->ops.receive(team, port, skb); + res = READ_ONCE(team->ops.receive)(team, port, skb); } if (res == RX_HANDLER_ANOTHER) { struct team_pcpu_stats *pcpu_stats; @@ -1845,7 +1864,7 @@ static netdev_tx_t team_xmit(struct sk_buff *skb, struct net_device *dev) tx_success = team_queue_override_transmit(team, skb); if (!tx_success) - tx_success = team->ops.transmit(team, skb); + tx_success = READ_ONCE(team->ops.transmit)(team, skb); if (tx_success) { struct team_pcpu_stats *pcpu_stats; diff --git a/drivers/net/tun.c b/drivers/net/tun.c index b183189f1853..9e7744eb57a3 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -2394,8 +2394,10 @@ static int tun_xdp_one(struct tun_struct *tun, bool skb_xdp = false; struct page *page; - if (unlikely(datasize < ETH_HLEN)) + if (unlikely(datasize < ETH_HLEN)) { + put_page(virt_to_head_page(xdp->data)); return -EINVAL; + } xdp_prog = rcu_dereference(tun->xdp_prog); if (xdp_prog) { @@ -2437,6 +2439,7 @@ static int tun_xdp_one(struct tun_struct *tun, build: skb = build_skb(xdp->data_hard_start, buflen); if (!skb) { + put_page(virt_to_head_page(xdp->data)); ret = -ENOMEM; goto out; } diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c index e88798497503..b5b1253ac08b 100644 --- a/drivers/net/vxlan/vxlan_core.c +++ b/drivers/net/vxlan/vxlan_core.c @@ -2531,7 +2531,7 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, goto out_unlock; } - tos = ip_tunnel_ecn_encap(tos, old_iph, skb); + tos = ip_tunnel_ecn_encap(tos, ip_hdr(skb), skb); ttl = ttl ? : ip4_dst_hoplimit(&rt->dst); err = vxlan_build_skb(skb, ndst, sizeof(struct iphdr), vni, md, flags, udp_sum); @@ -2605,7 +2605,7 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, goto out_unlock; } - tos = ip_tunnel_ecn_encap(tos, old_iph, skb); + tos = ip_tunnel_ecn_encap(tos, ip_hdr(skb), skb); ttl = ttl ? : ip6_dst_hoplimit(ndst); skb_scrub_packet(skb, xnet); err = vxlan_build_skb(skb, ndst, sizeof(struct ipv6hdr), diff --git a/drivers/nfc/nxp-nci/i2c.c b/drivers/nfc/nxp-nci/i2c.c index b3d34433bd14..a6c08175d9dd 100644 --- a/drivers/nfc/nxp-nci/i2c.c +++ b/drivers/nfc/nxp-nci/i2c.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -267,6 +268,7 @@ static int nxp_nci_i2c_probe(struct i2c_client *client) { struct device *dev = &client->dev; struct nxp_nci_i2c_phy *phy; + unsigned long irqflags; int r; if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) { @@ -303,9 +305,26 @@ static int nxp_nci_i2c_probe(struct i2c_client *client) if (r < 0) return r; + /* + * ACPI platforms may report incorrect IRQ trigger types + * (e.g. level-high), which can lead to interrupt storms. + * + * Use the historically stable rising-edge trigger for ACPI devices. + * + * On non-ACPI systems (e.g. Device Tree), prefer the firmware- + * provided trigger type, falling back to rising-edge if not set. + */ + if (ACPI_COMPANION(dev)) { + irqflags = IRQF_TRIGGER_RISING; + } else { + irqflags = irq_get_trigger_type(client->irq); + if (!irqflags) + irqflags = IRQF_TRIGGER_RISING; + } + r = request_threaded_irq(client->irq, NULL, nxp_nci_i2c_irq_thread_fn, - IRQF_ONESHOT, + irqflags | IRQF_ONESHOT, NXP_NCI_I2C_DRIVER_NAME, phy); if (r < 0) nfc_err(&client->dev, "Unable to register IRQ handler\n"); diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 15d36d6a728e..68a1d7640494 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -1702,7 +1702,7 @@ static void nvme_tcp_tls_done(void *data, int status, key_serial_t pskid) qid, pskid, status); if (status) { - queue->tls_err = -status; + queue->tls_err = status; goto out_complete; } diff --git a/include/linux/dpll.h b/include/linux/dpll.h index f8037f1ab20b..2dbe8567eafc 100644 --- a/include/linux/dpll.h +++ b/include/linux/dpll.h @@ -284,6 +284,7 @@ void dpll_pin_on_pin_unregister(struct dpll_pin *parent, struct dpll_pin *pin, int dpll_pin_ref_sync_pair_add(struct dpll_pin *pin, struct dpll_pin *ref_sync_pin); +int __dpll_device_change_ntf(struct dpll_device *dpll); int dpll_device_change_ntf(struct dpll_device *dpll); int __dpll_pin_change_ntf(struct dpll_pin *pin); diff --git a/include/linux/netfilter/nf_conntrack_proto_gre.h b/include/linux/netfilter/nf_conntrack_proto_gre.h index 9ee7014400e8..ad5563f0f864 100644 --- a/include/linux/netfilter/nf_conntrack_proto_gre.h +++ b/include/linux/netfilter/nf_conntrack_proto_gre.h @@ -18,9 +18,10 @@ struct nf_ct_gre_keymap { struct rcu_head rcu; }; -/* add new tuple->key_reply pair to keymap */ -int nf_ct_gre_keymap_add(struct nf_conn *ct, enum ip_conntrack_dir dir, - struct nf_conntrack_tuple *t); +/* add tuple->key_reply pairs to keymap */ +bool nf_ct_gre_keymap_add(struct nf_conn *ct, + const struct nf_conntrack_tuple *orig, + const struct nf_conntrack_tuple *repl); /* delete keymap entries */ void nf_ct_gre_keymap_destroy(struct nf_conn *ct); diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 2bcf78a4de7b..3f06254ab1b7 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -821,6 +821,7 @@ enum skb_tstamp_type { * @_sk_redir: socket redirection information for skmsg * @_nfct: Associated connection, if any (with nfctinfo bits) * @skb_iif: ifindex of device we arrived on + * @tc_depth: counter for packet duplication * @tc_index: Traffic control index * @hash: the packet hash * @queue_mapping: Queue mapping for multiqueue devices @@ -1030,6 +1031,7 @@ struct sk_buff { __u8 csum_not_inet:1; #endif __u8 unreadable:1; + __u8 tc_depth:2; #if defined(CONFIG_NET_SCHED) || defined(CONFIG_NET_XGRESS) __u16 tc_index; /* traffic control index */ #endif diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index cff7b773e972..9d844354c4d9 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -180,6 +180,13 @@ static inline u64 nft_reg_load64(const u32 *sreg) return get_unaligned((u64 *)sreg); } +static inline bool nft_reg_overlap(u8 src, u8 dst, u32 len) +{ + unsigned int n = DIV_ROUND_UP(len, sizeof(u32)); + + return src != dst && src < dst + n && dst < src + n; +} + static inline void nft_data_copy(u32 *dst, const struct nft_data *src, unsigned int len) { diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index c04a4d0889ae..b9591dd755f9 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -1000,19 +1000,25 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[], br_port_flags_change(p, changed_mask); if (tb[IFLA_BRPORT_COST]) { + spin_lock_bh(&p->br->lock); err = br_stp_set_path_cost(p, nla_get_u32(tb[IFLA_BRPORT_COST])); + spin_unlock_bh(&p->br->lock); if (err) return err; } if (tb[IFLA_BRPORT_PRIORITY]) { + spin_lock_bh(&p->br->lock); err = br_stp_set_port_priority(p, nla_get_u16(tb[IFLA_BRPORT_PRIORITY])); + spin_unlock_bh(&p->br->lock); if (err) return err; } if (tb[IFLA_BRPORT_STATE]) { + spin_lock_bh(&p->br->lock); err = br_set_port_state(p, nla_get_u8(tb[IFLA_BRPORT_STATE])); + spin_unlock_bh(&p->br->lock); if (err) return err; } @@ -1114,9 +1120,7 @@ int br_setlink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags, if (err) return err; - spin_lock_bh(&p->br->lock); err = br_setport(p, tb, extack); - spin_unlock_bh(&p->br->lock); } else { /* Binary compatibility with old RSTP */ if (nla_len(protinfo) < sizeof(u8)) @@ -1203,17 +1207,10 @@ static int br_port_slave_changelink(struct net_device *brdev, struct nlattr *data[], struct netlink_ext_ack *extack) { - struct net_bridge *br = netdev_priv(brdev); - int ret; - if (!data) return 0; - spin_lock_bh(&br->lock); - ret = br_setport(br_port_get_rtnl(dev), data, extack); - spin_unlock_bh(&br->lock); - - return ret; + return br_setport(br_port_get_rtnl(dev), data, extack); } static int br_port_fill_slave_info(struct sk_buff *skb, diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c index 18b558a931ad..ee3ad9dfbab9 100644 --- a/net/bridge/br_switchdev.c +++ b/net/bridge/br_switchdev.c @@ -99,7 +99,6 @@ int br_switchdev_set_port_flag(struct net_bridge_port *p, attr.u.brport_flags.val = flags; attr.u.brport_flags.mask = mask; - /* We run from atomic context here */ err = call_switchdev_notifiers(SWITCHDEV_PORT_ATTR_SET, p->dev, &info.info, extack); err = notifier_to_errno(err); diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c index 1f57c36a7fc0..d6df81fa0d13 100644 --- a/net/bridge/br_sysfs_if.c +++ b/net/bridge/br_sysfs_if.c @@ -86,16 +86,34 @@ static ssize_t show_path_cost(struct net_bridge_port *p, char *buf) return sysfs_emit(buf, "%d\n", p->path_cost); } -static BRPORT_ATTR(path_cost, 0644, - show_path_cost, br_stp_set_path_cost); +static int store_path_cost(struct net_bridge_port *p, unsigned long v) +{ + int ret; + + spin_lock_bh(&p->br->lock); + ret = br_stp_set_path_cost(p, v); + spin_unlock_bh(&p->br->lock); + return ret; +} + +static BRPORT_ATTR(path_cost, 0644, show_path_cost, store_path_cost); static ssize_t show_priority(struct net_bridge_port *p, char *buf) { return sysfs_emit(buf, "%d\n", p->priority); } -static BRPORT_ATTR(priority, 0644, - show_priority, br_stp_set_port_priority); +static int store_priority(struct net_bridge_port *p, unsigned long v) +{ + int ret; + + spin_lock_bh(&p->br->lock); + ret = br_stp_set_port_priority(p, v); + spin_unlock_bh(&p->br->lock); + return ret; +} + +static BRPORT_ATTR(priority, 0644, show_priority, store_priority); static ssize_t show_designated_root(struct net_bridge_port *p, char *buf) { @@ -334,17 +352,13 @@ static ssize_t brport_store(struct kobject *kobj, ret = -ENOMEM; goto out_unlock; } - spin_lock_bh(&p->br->lock); ret = brport_attr->store_raw(p, buf_copy); - spin_unlock_bh(&p->br->lock); kfree(buf_copy); } else if (brport_attr->store) { val = simple_strtoul(buf, &endp, 0); if (endp == buf) goto out_unlock; - spin_lock_bh(&p->br->lock); ret = brport_attr->store(p, val); - spin_unlock_bh(&p->br->lock); } if (!ret) { diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index b9f4daac09af..8a6a069329d2 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -1956,6 +1956,25 @@ enum compat_mwt { EBT_COMPAT_TARGET, }; +static bool match_size_ok(const struct xt_match *match, unsigned int match_size) +{ + u16 csize; + + if (match->matchsize == -1) /* cannot validate ebt_among */ + return true; + + csize = match->compatsize ? : match->matchsize; + + return match_size >= csize; +} + +static bool tgt_size_ok(const struct xt_target *tgt, unsigned int tgt_size) +{ + u16 csize = tgt->compatsize ? : tgt->targetsize; + + return tgt_size >= csize; +} + static int compat_mtw_from_user(const struct compat_ebt_entry_mwt *mwt, enum compat_mwt compat_mwt, struct ebt_entries_buf_state *state, @@ -1981,6 +2000,11 @@ static int compat_mtw_from_user(const struct compat_ebt_entry_mwt *mwt, if (IS_ERR(match)) return PTR_ERR(match); + if (!match_size_ok(match, match_size)) { + module_put(match->me); + return -EINVAL; + } + off = ebt_compat_match_offset(match, match_size); if (dst) { if (match->compat_from_user) @@ -2000,6 +2024,12 @@ static int compat_mtw_from_user(const struct compat_ebt_entry_mwt *mwt, mwt->u.revision); if (IS_ERR(wt)) return PTR_ERR(wt); + + if (!tgt_size_ok(wt, match_size)) { + module_put(wt->me); + return -EINVAL; + } + off = xt_compat_target_offset(wt); if (dst) { diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 44ac121cfccb..0d3cc115f2e7 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2787,6 +2787,8 @@ int ___pskb_trim(struct sk_buff *skb, unsigned int len) skb->data_len = 0; skb_set_tail_pointer(skb, len); } + if (!skb_shinfo(skb)->nr_frags && !skb_has_frag_list(skb)) + skb->unreadable = 0; if (!skb->sk || skb->destructor == sock_edemux) skb_condense(skb); @@ -2794,16 +2796,37 @@ int ___pskb_trim(struct sk_buff *skb, unsigned int len) } EXPORT_SYMBOL(___pskb_trim); +static int pskb_trim_rcsum_complete(struct sk_buff *skb, unsigned int len) +{ + int delta = skb->len - len; + + if (skb_frags_readable(skb)) { + skb->csum = csum_block_sub(skb->csum, + skb_checksum(skb, len, delta, 0), + len); + return 0; + } + + if (len > skb_headlen(skb)) + return -EFAULT; + + /* The trimmed bytes are unreadable, but the remaining packet can be + * checksummed by software after trimming. + */ + skb->ip_summed = CHECKSUM_NONE; + return 0; +} + /* Note : use pskb_trim_rcsum() instead of calling this directly */ int pskb_trim_rcsum_slow(struct sk_buff *skb, unsigned int len) { if (skb->ip_summed == CHECKSUM_COMPLETE) { - int delta = skb->len - len; + int err; - skb->csum = csum_block_sub(skb->csum, - skb_checksum(skb, len, delta, 0), - len); + err = pskb_trim_rcsum_complete(skb, len); + if (err) + return err; } else if (skb->ip_summed == CHECKSUM_PARTIAL) { int hdlen = (len > skb_headlen(skb)) ? skb_headlen(skb) : len; int offset = skb_checksum_start_offset(skb) + skb->csum_offset; @@ -6810,6 +6833,8 @@ static int pskb_carve_inside_header(struct sk_buff *skb, const u32 off, skb_kfree_head(data); return -ENOMEM; } + if (skb_zcopy(skb)) + net_zcopy_get(skb_zcopy(skb)); for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) skb_frag_ref(skb, i); if (skb_has_frag_list(skb)) @@ -6953,6 +6978,8 @@ static int pskb_carve_inside_nonlinear(struct sk_buff *skb, const u32 off, skb_kfree_head(data); return -ENOMEM; } + if (skb_zcopy(skb)) + net_zcopy_get(skb_zcopy(skb)); skb_release_data(skb, SKB_CONSUMED); skb->head = data; diff --git a/net/ethtool/cmis.h b/net/ethtool/cmis.h index 4a9a946cabf0..778783a0f23c 100644 --- a/net/ethtool/cmis.h +++ b/net/ethtool/cmis.h @@ -63,9 +63,9 @@ struct ethtool_cmis_cdb_request { * struct ethtool_cmis_cdb_cmd_args - CDB commands execution arguments * @req: CDB command fields as described in the CMIS standard. * @max_duration: Maximum duration time for command completion in msec. + * @msleep_pre_rpl: Waiting time before checking reply in msec. * @read_write_len_ext: Allowable additional number of byte octets to the LPL * in a READ or a WRITE commands. - * @msleep_pre_rpl: Waiting time before checking reply in msec. * @rpl_exp_len: Expected reply length in bytes. * @flags: Validation flags for CDB commands. * @err_msg: Error message to be sent to user space. @@ -73,8 +73,8 @@ struct ethtool_cmis_cdb_request { struct ethtool_cmis_cdb_cmd_args { struct ethtool_cmis_cdb_request req; u16 max_duration; + u16 msleep_pre_rpl; u8 read_write_len_ext; - u8 msleep_pre_rpl; u8 rpl_exp_len; u8 flags; char *err_msg; diff --git a/net/ethtool/cmis_cdb.c b/net/ethtool/cmis_cdb.c index 3670ca42dd40..f3a53a984460 100644 --- a/net/ethtool/cmis_cdb.c +++ b/net/ethtool/cmis_cdb.c @@ -513,8 +513,13 @@ static int cmis_cdb_process_reply(struct net_device *dev, } rpl = (struct ethtool_cmis_cdb_rpl *)page_data->data; - if ((args->rpl_exp_len > rpl->hdr.rpl_len + rpl_hdr_len) || - !rpl->hdr.rpl_chk_code) { + if (rpl->hdr.rpl_len != args->rpl_exp_len) { + netdev_warn(dev, "CDB reply length mismatch, expected %u got %u\n", + args->rpl_exp_len, rpl->hdr.rpl_len); + err = -EIO; + goto out; + } + if (!rpl->hdr.rpl_chk_code) { err = -EIO; goto out; } diff --git a/net/ethtool/cmis_fw_update.c b/net/ethtool/cmis_fw_update.c index df5f344209c4..291d04d2776a 100644 --- a/net/ethtool/cmis_fw_update.c +++ b/net/ethtool/cmis_fw_update.c @@ -44,6 +44,20 @@ enum cmis_cdb_fw_write_mechanism { CMIS_CDB_FW_WRITE_MECHANISM_BOTH = 0x11, }; +/* See section 9.7.2 "CMD 0101h: Start Firmware Download" in CMIS standard + * revision 5.2. + * struct cmis_cdb_start_fw_download_pl is a structured layout of the + * flat array, ethtool_cmis_cdb_request::payload. + */ +struct cmis_cdb_start_fw_download_pl { + __struct_group(cmis_cdb_start_fw_download_pl_h, head, /* no attrs */, + __be32 image_size; + __be32 resv1; + ); + u8 vendor_data[ETHTOOL_CMIS_CDB_LPL_MAX_PL_LENGTH - + sizeof(struct cmis_cdb_start_fw_download_pl_h)]; +}; + static int cmis_fw_update_fw_mng_features_get(struct ethtool_cmis_cdb *cdb, struct net_device *dev, @@ -86,6 +100,14 @@ cmis_fw_update_fw_mng_features_get(struct ethtool_cmis_cdb *cdb, */ cdb->read_write_len_ext = rpl->read_write_len_ext; fw_mng->start_cmd_payload_size = rpl->start_cmd_payload_size; + if (fw_mng->start_cmd_payload_size > + sizeof_field(struct cmis_cdb_start_fw_download_pl, vendor_data)) { + ethnl_module_fw_flash_ntf_err(dev, ntf_params, + "Start cmd payload size exceeds max LPL payload", + NULL); + return -EINVAL; + } + fw_mng->write_mechanism = rpl->write_mechanism == CMIS_CDB_FW_WRITE_MECHANISM_LPL ? CMIS_CDB_FW_WRITE_MECHANISM_LPL : @@ -97,20 +119,6 @@ cmis_fw_update_fw_mng_features_get(struct ethtool_cmis_cdb *cdb, return 0; } -/* See section 9.7.2 "CMD 0101h: Start Firmware Download" in CMIS standard - * revision 5.2. - * struct cmis_cdb_start_fw_download_pl is a structured layout of the - * flat array, ethtool_cmis_cdb_request::payload. - */ -struct cmis_cdb_start_fw_download_pl { - __struct_group(cmis_cdb_start_fw_download_pl_h, head, /* no attrs */, - __be32 image_size; - __be32 resv1; - ); - u8 vendor_data[ETHTOOL_CMIS_CDB_LPL_MAX_PL_LENGTH - - sizeof(struct cmis_cdb_start_fw_download_pl_h)]; -}; - static int cmis_fw_update_start_download(struct ethtool_cmis_cdb *cdb, struct ethtool_cmis_fw_update_params *fw_update, @@ -122,6 +130,14 @@ cmis_fw_update_start_download(struct ethtool_cmis_cdb *cdb, u8 lpl_len; int err; + if (fw_update->fw->size < vendor_data_size) { + ethnl_module_fw_flash_ntf_err(fw_update->dev, + &fw_update->ntf_params, + "Firmware image too small for module's start payload", + NULL); + return -EINVAL; + } + pl.image_size = cpu_to_be32(fw_update->fw->size); memcpy(pl.vendor_data, fw_update->fw->data, vendor_data_size); diff --git a/net/ethtool/coalesce.c b/net/ethtool/coalesce.c index 1e2c5c7048a8..e73fc3e5a02b 100644 --- a/net/ethtool/coalesce.c +++ b/net/ethtool/coalesce.c @@ -472,6 +472,12 @@ static int ethnl_update_profile(struct net_device *dev, nla_for_each_nested_type(nest, ETHTOOL_A_PROFILE_IRQ_MODERATION, nests, rem) { + if (i >= NET_DIM_PARAMS_NUM_PROFILES) { + NL_SET_BAD_ATTR(extack, nest); + ret = -E2BIG; + goto err_out; + } + ret = nla_parse_nested(tb, len_irq_moder - 1, nest, coalesce_irq_moderation_policy, extack); diff --git a/net/ethtool/eeprom.c b/net/ethtool/eeprom.c index a557e3996c85..0b8cfeddb014 100644 --- a/net/ethtool/eeprom.c +++ b/net/ethtool/eeprom.c @@ -44,6 +44,9 @@ static int fallback_set_params(struct eeprom_req_info *request, if (offset >= modinfo->eeprom_len) return -EINVAL; + if (length > modinfo->eeprom_len - offset) + return -EINVAL; + eeprom->cmd = ETHTOOL_GMODULEEEPROM; eeprom->len = length; eeprom->offset = offset; @@ -69,7 +72,7 @@ static int eeprom_fallback(struct eeprom_req_info *request, if (err < 0) return err; - data = kmalloc(eeprom.len, GFP_KERNEL); + data = kzalloc(eeprom.len, GFP_KERNEL); if (!data) return -ENOMEM; err = ethtool_get_module_eeprom_call(dev, &eeprom, data); @@ -141,12 +144,11 @@ static int eeprom_prepare_data(const struct ethnl_req_info *req_base, return 0; err_ops: + if (ret == -EOPNOTSUPP) + ret = eeprom_fallback(request, reply); ethnl_ops_complete(dev); err_free: kfree(page_data.data); - - if (ret == -EOPNOTSUPP) - return eeprom_fallback(request, reply); return ret; } diff --git a/net/ethtool/linkstate.c b/net/ethtool/linkstate.c index 8a5985fd7712..24569e92942c 100644 --- a/net/ethtool/linkstate.c +++ b/net/ethtool/linkstate.c @@ -106,10 +106,8 @@ static int linkstate_prepare_data(const struct ethnl_req_info *req_base, phydev = ethnl_req_get_phydev(req_base, tb, ETHTOOL_A_LINKSTATE_HEADER, info->extack); - if (IS_ERR(phydev)) { - ret = PTR_ERR(phydev); - goto out; - } + if (IS_ERR(phydev)) + return PTR_ERR(phydev); ret = ethnl_ops_begin(dev); if (ret < 0) diff --git a/net/ethtool/module.c b/net/ethtool/module.c index cad2eb25b5a4..ea4fb2a76650 100644 --- a/net/ethtool/module.c +++ b/net/ethtool/module.c @@ -120,12 +120,6 @@ ethnl_set_module_validate(struct ethnl_req_info *req_info, if (!tb[ETHTOOL_A_MODULE_POWER_MODE_POLICY]) return 0; - if (req_info->dev->ethtool->module_fw_flash_in_progress) { - NL_SET_ERR_MSG(info->extack, - "Module firmware flashing is in progress"); - return -EBUSY; - } - if (!ops->get_module_power_mode || !ops->set_module_power_mode) { NL_SET_ERR_MSG_ATTR(info->extack, tb[ETHTOOL_A_MODULE_POWER_MODE_POLICY], @@ -148,6 +142,12 @@ ethnl_set_module(struct ethnl_req_info *req_info, struct genl_info *info) ops = dev->ethtool_ops; + if (dev->ethtool->module_fw_flash_in_progress) { + NL_SET_ERR_MSG(info->extack, + "Module firmware flashing is in progress"); + return -EBUSY; + } + power_new.policy = nla_get_u8(tb[ETHTOOL_A_MODULE_POWER_MODE_POLICY]); ret = ops->get_module_power_mode(dev, &power, info->extack); if (ret < 0) @@ -221,14 +221,22 @@ static void module_flash_fw_work_list_del(struct list_head *list) static void module_flash_fw_work(struct work_struct *work) { struct ethtool_module_fw_flash *module_fw; + struct net_device *dev; module_fw = container_of(work, struct ethtool_module_fw_flash, work); + dev = module_fw->fw_update.dev; ethtool_cmis_fw_update(&module_fw->fw_update); module_flash_fw_work_list_del(&module_fw->list); - module_fw->fw_update.dev->ethtool->module_fw_flash_in_progress = false; - netdev_put(module_fw->fw_update.dev, &module_fw->dev_tracker); + + rtnl_lock(); + netdev_lock_ops(dev); + dev->ethtool->module_fw_flash_in_progress = false; + netdev_unlock_ops(dev); + rtnl_unlock(); + + netdev_put(dev, &module_fw->dev_tracker); release_firmware(module_fw->fw_update.fw); kfree(module_fw); } @@ -283,11 +291,9 @@ void ethnl_module_fw_flash_sock_destroy(struct ethnl_sock_priv *sk_priv) spin_lock(&module_fw_flash_work_list_lock); list_for_each_entry(work, &module_fw_flash_work_list, list) { - if (work->fw_update.dev == sk_priv->dev && - work->fw_update.ntf_params.portid == sk_priv->portid) { + if (work->fw_update.ntf_params.portid == sk_priv->portid && + dev_net(work->fw_update.dev) == sk_priv->net) work->fw_update.ntf_params.closed_sock = true; - break; - } } spin_unlock(&module_fw_flash_work_list_lock); } @@ -319,14 +325,13 @@ module_flash_fw_schedule(struct net_device *dev, const char *file_name, if (err < 0) goto err_release_firmware; - dev->ethtool->module_fw_flash_in_progress = true; - netdev_hold(dev, &module_fw->dev_tracker, GFP_KERNEL); fw_update->dev = dev; fw_update->ntf_params.portid = info->snd_portid; fw_update->ntf_params.seq = info->snd_seq; fw_update->ntf_params.closed_sock = false; - err = ethnl_sock_priv_set(skb, dev, fw_update->ntf_params.portid, + err = ethnl_sock_priv_set(skb, dev_net(dev), + fw_update->ntf_params.portid, ETHTOOL_SOCK_TYPE_MODULE_FW_FLASH); if (err < 0) goto err_release_firmware; @@ -335,6 +340,9 @@ module_flash_fw_schedule(struct net_device *dev, const char *file_name, if (err < 0) goto err_release_firmware; + dev->ethtool->module_fw_flash_in_progress = true; + netdev_hold(dev, &module_fw->dev_tracker, GFP_KERNEL); + schedule_work(&module_fw->work); return 0; @@ -427,10 +435,11 @@ int ethnl_act_module_fw_flash(struct sk_buff *skb, struct genl_info *info) ret = ethnl_module_fw_flash_validate(dev, info->extack); if (ret < 0) - goto out_unlock; + goto out_complete; ret = module_flash_fw(dev, tb, skb, info); +out_complete: ethnl_ops_complete(dev); out_unlock: diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index 5046023a30b1..7d45f9a884e5 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -53,7 +53,7 @@ const struct nla_policy ethnl_header_policy_phy_stats[] = { [ETHTOOL_A_HEADER_PHY_INDEX] = NLA_POLICY_MIN(NLA_U32, 1), }; -int ethnl_sock_priv_set(struct sk_buff *skb, struct net_device *dev, u32 portid, +int ethnl_sock_priv_set(struct sk_buff *skb, struct net *net, u32 portid, enum ethnl_sock_type type) { struct ethnl_sock_priv *sk_priv; @@ -62,7 +62,7 @@ int ethnl_sock_priv_set(struct sk_buff *skb, struct net_device *dev, u32 portid, if (IS_ERR(sk_priv)) return PTR_ERR(sk_priv); - sk_priv->dev = dev; + sk_priv->net = net; sk_priv->portid = portid; sk_priv->type = type; diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h index aaf6f2468768..fd2198e45d2b 100644 --- a/net/ethtool/netlink.h +++ b/net/ethtool/netlink.h @@ -318,12 +318,12 @@ enum ethnl_sock_type { }; struct ethnl_sock_priv { - struct net_device *dev; + struct net *net; u32 portid; enum ethnl_sock_type type; }; -int ethnl_sock_priv_set(struct sk_buff *skb, struct net_device *dev, u32 portid, +int ethnl_sock_priv_set(struct sk_buff *skb, struct net *net, u32 portid, enum ethnl_sock_type type); /** diff --git a/net/ethtool/pse-pd.c b/net/ethtool/pse-pd.c index 2eb9bdc2dcb9..757c9e0cc856 100644 --- a/net/ethtool/pse-pd.c +++ b/net/ethtool/pse-pd.c @@ -62,14 +62,14 @@ static int pse_prepare_data(const struct ethnl_req_info *req_base, struct phy_device *phydev; int ret; - ret = ethnl_ops_begin(dev); - if (ret < 0) - return ret; - phydev = ethnl_req_get_phydev(req_base, tb, ETHTOOL_A_PSE_HEADER, info->extack); if (IS_ERR(phydev)) - return -ENODEV; + return PTR_ERR(phydev); + + ret = ethnl_ops_begin(dev); + if (ret < 0) + return ret; ret = pse_get_pse_attributes(phydev, info->extack, data); diff --git a/net/ethtool/rss.c b/net/ethtool/rss.c index 353110b862ab..53792f53f922 100644 --- a/net/ethtool/rss.c +++ b/net/ethtool/rss.c @@ -134,8 +134,7 @@ rss_get_data_alloc(struct net_device *dev, struct rss_reply_data *data) if (!rss_config) return -ENOMEM; - if (data->indir_size) - data->indir_table = (u32 *)rss_config; + data->indir_table = (u32 *)rss_config; if (data->hkey_size) data->hkey = rss_config + indir_bytes; @@ -170,8 +169,10 @@ rss_prepare_get(const struct rss_req_info *request, struct net_device *dev, rxfh.key = data->hkey; ret = ops->get_rxfh(dev, &rxfh); - if (ret) + if (ret) { + rss_get_data_free(data); goto out_unlock; + } data->hfunc = rxfh.hfunc; data->input_xfrm = rxfh.input_xfrm; @@ -686,7 +687,7 @@ rss_set_prep_indir(struct net_device *dev, struct genl_info *info, ethtool_rxfh_indir_default(i, num_rx_rings); } - *mod |= memcmp(rxfh->indir, data->indir_table, data->indir_size); + *mod |= memcmp(rxfh->indir, data->indir_table, alloc_size); return user_size; @@ -981,11 +982,17 @@ ethnl_rss_create_validate(struct net_device *dev, struct genl_info *info) } static void -ethnl_rss_create_send_ntf(struct sk_buff *rsp, struct net_device *dev) +ethnl_rss_create_send_ntf(const struct sk_buff *rsp, struct net_device *dev) { - struct nlmsghdr *nlh = (void *)rsp->data; struct genlmsghdr *genl_hdr; + struct nlmsghdr *nlh; + struct sk_buff *ntf; + ntf = skb_copy_expand(rsp, 0, 0, GFP_KERNEL); + if (!ntf) + return; + + nlh = nlmsg_hdr(ntf); /* Convert the reply into a notification */ nlh->nlmsg_pid = 0; nlh->nlmsg_seq = ethnl_bcast_seq_next(); @@ -993,7 +1000,7 @@ ethnl_rss_create_send_ntf(struct sk_buff *rsp, struct net_device *dev) genl_hdr = nlmsg_data(nlh); genl_hdr->cmd = ETHTOOL_MSG_RSS_CREATE_NTF; - ethnl_multicast(rsp, dev); + ethnl_multicast(ntf, dev); } int ethnl_rss_create_doit(struct sk_buff *skb, struct genl_info *info) @@ -1099,17 +1106,13 @@ int ethnl_rss_create_doit(struct sk_buff *skb, struct genl_info *info) ntf_fail |= rss_fill_reply(rsp, &req.base, &data.base); if (WARN_ON(!hdr || ntf_fail)) { ret = -EMSGSIZE; - goto exit_unlock; + goto err_remove_ctx; } genlmsg_end(rsp, hdr); - /* Use the same skb for the response and the notification, - * genlmsg_reply() will copy the skb if it has elevated user count. - */ - skb_get(rsp); - ret = genlmsg_reply(rsp, info); ethnl_rss_create_send_ntf(rsp, dev); + ret = genlmsg_reply(rsp, info); rsp = NULL; exit_unlock: @@ -1131,6 +1134,10 @@ int ethnl_rss_create_doit(struct sk_buff *skb, struct genl_info *info) nlmsg_free(rsp); return ret; +err_remove_ctx: + if (ops->remove_rxfh_context(dev, ctx, req.rss_context, NULL)) + /* leave the context on failure, like ethnl_rss_delete_doit() */ + goto exit_unlock; err_ctx_id_free: xa_erase(&dev->ethtool->rss_ctx, req.rss_context); err_unlock_free_ctx: @@ -1168,8 +1175,10 @@ int ethnl_rss_delete_doit(struct sk_buff *skb, struct genl_info *info) dev = req.dev; ops = dev->ethtool_ops; - if (!ops->create_rxfh_context) + if (!ops->create_rxfh_context) { + ret = -EOPNOTSUPP; goto exit_free_dev; + } rtnl_lock(); netdev_lock_ops(dev); diff --git a/net/ethtool/strset.c b/net/ethtool/strset.c index bb1e829ba099..94c4718d31ae 100644 --- a/net/ethtool/strset.c +++ b/net/ethtool/strset.c @@ -311,7 +311,7 @@ static int strset_prepare_data(const struct ethnl_req_info *req_base, return 0; } - phydev = ethnl_req_get_phydev(req_base, tb, ETHTOOL_A_HEADER_FLAGS, + phydev = ethnl_req_get_phydev(req_base, tb, ETHTOOL_A_STRSET_HEADER, info->extack); /* phydev can be NULL, check for errors only */ diff --git a/net/ethtool/tsconfig.c b/net/ethtool/tsconfig.c index e4f518e49d4c..fc4f93cfa459 100644 --- a/net/ethtool/tsconfig.c +++ b/net/ethtool/tsconfig.c @@ -69,8 +69,10 @@ static int tsconfig_prepare_data(const struct ethnl_req_info *req_base, if (ret) goto out; - if (ts_info.phc_index == -1) - return -ENODEV; + if (ts_info.phc_index == -1) { + ret = -ENODEV; + goto out; + } data->hwprov_desc.index = ts_info.phc_index; data->hwprov_desc.qualifier = ts_info.phc_qualifier; @@ -224,16 +226,21 @@ static int tsconfig_send_reply(struct net_device *dev, struct genl_info *info) reply_len = ret + ethnl_reply_header_size(); rskb = ethnl_reply_init(reply_len, dev, ETHTOOL_MSG_TSCONFIG_SET_REPLY, ETHTOOL_A_TSCONFIG_HEADER, info, &reply_payload); - if (!rskb) + if (!rskb) { + ret = -ENOMEM; goto err_cleanup; + } ret = tsconfig_fill_reply(rskb, &req_info->base, &reply_data->base); if (ret < 0) - goto err_cleanup; + goto err_free_msg; genlmsg_end(rskb, reply_payload); ret = genlmsg_reply(rskb, info); + rskb = NULL; +err_free_msg: + nlmsg_free(rskb); err_cleanup: kfree(reply_data); kfree(req_info); diff --git a/net/ethtool/tsinfo.c b/net/ethtool/tsinfo.c index a865f0fdd26b..14bf01e3b55c 100644 --- a/net/ethtool/tsinfo.c +++ b/net/ethtool/tsinfo.c @@ -83,6 +83,11 @@ tsinfo_parse_request(struct ethnl_req_info *req_base, if (!tb[ETHTOOL_A_TSINFO_HWTSTAMP_PROVIDER]) return 0; + if (req_base->flags & ETHTOOL_FLAG_STATS) { + NL_SET_ERR_MSG(extack, "can't query statistics for a provider"); + return -EOPNOTSUPP; + } + return ts_parse_hwtst_provider(tb[ETHTOOL_A_TSINFO_HWTSTAMP_PROVIDER], &req->hwprov_desc, extack, &mod); } @@ -402,10 +407,8 @@ static int ethnl_tsinfo_dump_one_netdev(struct sk_buff *skb, continue; ehdr = ethnl_tsinfo_prepare_dump(skb, dev, reply_data, cb); - if (IS_ERR(ehdr)) { - ret = PTR_ERR(ehdr); - goto err; - } + if (IS_ERR(ehdr)) + return PTR_ERR(ehdr); reply_data->ts_info.phc_qualifier = ctx->pos_phcqualifier; ret = ops->get_ts_info(dev, &reply_data->ts_info); @@ -523,6 +526,12 @@ int ethnl_tsinfo_start(struct netlink_callback *cb) if (ret < 0) goto free_reply_data; + if (req_info->base.flags & ETHTOOL_FLAG_STATS) { + NL_SET_ERR_MSG(cb->extack, "stats not supported in dump"); + ret = -EOPNOTSUPP; + goto err_dev_put; + } + ctx->req_info = req_info; ctx->reply_data = reply_data; ctx->pos_ifindex = 0; @@ -532,6 +541,8 @@ int ethnl_tsinfo_start(struct netlink_callback *cb) return 0; +err_dev_put: + ethnl_parse_header_dev_put(&req_info->base); free_reply_data: kfree(reply_data); free_req_info: diff --git a/net/handshake/genl.c b/net/handshake/genl.c index 870612609491..4b20cd9cdd0e 100644 --- a/net/handshake/genl.c +++ b/net/handshake/genl.c @@ -10,6 +10,7 @@ #include "genl.h" #include +#include /* HANDSHAKE_CMD_ACCEPT - do */ static const struct nla_policy handshake_accept_nl_policy[HANDSHAKE_A_ACCEPT_HANDLER_CLASS + 1] = { @@ -18,7 +19,7 @@ static const struct nla_policy handshake_accept_nl_policy[HANDSHAKE_A_ACCEPT_HAN /* HANDSHAKE_CMD_DONE - do */ static const struct nla_policy handshake_done_nl_policy[HANDSHAKE_A_DONE_REMOTE_AUTH + 1] = { - [HANDSHAKE_A_DONE_STATUS] = { .type = NLA_U32, }, + [HANDSHAKE_A_DONE_STATUS] = NLA_POLICY_MAX(NLA_U32, MAX_ERRNO), [HANDSHAKE_A_DONE_SOCKFD] = { .type = NLA_S32, }, [HANDSHAKE_A_DONE_REMOTE_AUTH] = { .type = NLA_U32, }, }; diff --git a/net/handshake/genl.h b/net/handshake/genl.h index 8d3e18672daf..46b65f131669 100644 --- a/net/handshake/genl.h +++ b/net/handshake/genl.h @@ -11,6 +11,7 @@ #include #include +#include int handshake_nl_accept_doit(struct sk_buff *skb, struct genl_info *info); int handshake_nl_done_doit(struct sk_buff *skb, struct genl_info *info); diff --git a/net/handshake/handshake-test.c b/net/handshake/handshake-test.c index 55442b2f518a..3dd507470d5f 100644 --- a/net/handshake/handshake-test.c +++ b/net/handshake/handshake-test.c @@ -25,7 +25,7 @@ static int test_accept_func(struct handshake_req *req, struct genl_info *info, return 0; } -static void test_done_func(struct handshake_req *req, unsigned int status, +static void test_done_func(struct handshake_req *req, int status, struct genl_info *info) { } @@ -208,6 +208,7 @@ static void handshake_req_submit_test3(struct kunit *test) static void handshake_req_submit_test4(struct kunit *test) { struct handshake_req *req, *result; + unsigned long fcount_before; struct socket *sock; struct file *filp; int err; @@ -224,8 +225,10 @@ static void handshake_req_submit_test4(struct kunit *test) KUNIT_ASSERT_NOT_NULL(test, sock->sk); sock->file = filp; + fcount_before = file_count(filp); err = handshake_req_submit(sock, req, GFP_KERNEL); KUNIT_ASSERT_EQ(test, err, 0); + KUNIT_EXPECT_EQ(test, file_count(filp), fcount_before + 1); /* Act */ result = handshake_req_hash_lookup(sock->sk); @@ -235,11 +238,13 @@ static void handshake_req_submit_test4(struct kunit *test) KUNIT_EXPECT_PTR_EQ(test, req, result); handshake_req_cancel(sock->sk); + KUNIT_EXPECT_EQ(test, file_count(filp), fcount_before); fput(filp); } static void handshake_req_submit_test5(struct kunit *test) { + unsigned long fcount_before; struct handshake_req *req; struct handshake_net *hn; struct socket *sock; @@ -265,12 +270,14 @@ static void handshake_req_submit_test5(struct kunit *test) saved = hn->hn_pending; hn->hn_pending = hn->hn_pending_max + 1; + fcount_before = file_count(filp); /* Act */ err = handshake_req_submit(sock, req, GFP_KERNEL); /* Assert */ KUNIT_EXPECT_EQ(test, err, -EAGAIN); + KUNIT_EXPECT_EQ(test, file_count(filp), fcount_before); fput(filp); hn->hn_pending = saved; @@ -279,6 +286,7 @@ static void handshake_req_submit_test5(struct kunit *test) static void handshake_req_submit_test6(struct kunit *test) { struct handshake_req *req1, *req2; + unsigned long fcount_before; struct socket *sock; struct file *filp; int err; @@ -296,21 +304,26 @@ static void handshake_req_submit_test6(struct kunit *test) KUNIT_ASSERT_NOT_ERR_OR_NULL(test, filp); KUNIT_ASSERT_NOT_NULL(test, sock->sk); sock->file = filp; + fcount_before = file_count(filp); /* Act */ err = handshake_req_submit(sock, req1, GFP_KERNEL); KUNIT_ASSERT_EQ(test, err, 0); + KUNIT_EXPECT_EQ(test, file_count(filp), fcount_before + 1); err = handshake_req_submit(sock, req2, GFP_KERNEL); /* Assert */ KUNIT_EXPECT_EQ(test, err, -EBUSY); + KUNIT_EXPECT_EQ(test, file_count(filp), fcount_before + 1); handshake_req_cancel(sock->sk); + KUNIT_EXPECT_EQ(test, file_count(filp), fcount_before); fput(filp); } static void handshake_req_cancel_test1(struct kunit *test) { + unsigned long fcount_before; struct handshake_req *req; struct socket *sock; struct file *filp; @@ -329,8 +342,10 @@ static void handshake_req_cancel_test1(struct kunit *test) KUNIT_ASSERT_NOT_ERR_OR_NULL(test, filp); sock->file = filp; + fcount_before = file_count(filp); err = handshake_req_submit(sock, req, GFP_KERNEL); KUNIT_ASSERT_EQ(test, err, 0); + KUNIT_EXPECT_EQ(test, file_count(filp), fcount_before + 1); /* NB: handshake_req hasn't been accepted */ @@ -339,12 +354,14 @@ static void handshake_req_cancel_test1(struct kunit *test) /* Assert */ KUNIT_EXPECT_TRUE(test, result); + KUNIT_EXPECT_EQ(test, file_count(filp), fcount_before); fput(filp); } static void handshake_req_cancel_test2(struct kunit *test) { + unsigned long fcount_before; struct handshake_req *req, *next; struct handshake_net *hn; struct socket *sock; @@ -365,8 +382,10 @@ static void handshake_req_cancel_test2(struct kunit *test) KUNIT_ASSERT_NOT_ERR_OR_NULL(test, filp); sock->file = filp; + fcount_before = file_count(filp); err = handshake_req_submit(sock, req, GFP_KERNEL); KUNIT_ASSERT_EQ(test, err, 0); + KUNIT_EXPECT_EQ(test, file_count(filp), fcount_before + 1); net = sock_net(sock->sk); hn = handshake_pernet(net); @@ -375,18 +394,24 @@ static void handshake_req_cancel_test2(struct kunit *test) /* Pretend to accept this request */ next = handshake_req_next(hn, HANDSHAKE_HANDLER_CLASS_TLSHD); KUNIT_ASSERT_PTR_EQ(test, req, next); + /* Simulate FD_PREPARE() consuming the file reference handed + * off by handshake_req_next(); see handshake_nl_accept_doit(). + */ + fput(filp); /* Act */ result = handshake_req_cancel(sock->sk); /* Assert */ KUNIT_EXPECT_TRUE(test, result); + KUNIT_EXPECT_EQ(test, file_count(filp), fcount_before); fput(filp); } static void handshake_req_cancel_test3(struct kunit *test) { + unsigned long fcount_before; struct handshake_req *req, *next; struct handshake_net *hn; struct socket *sock; @@ -407,8 +432,10 @@ static void handshake_req_cancel_test3(struct kunit *test) KUNIT_ASSERT_NOT_ERR_OR_NULL(test, filp); sock->file = filp; + fcount_before = file_count(filp); err = handshake_req_submit(sock, req, GFP_KERNEL); KUNIT_ASSERT_EQ(test, err, 0); + KUNIT_EXPECT_EQ(test, file_count(filp), fcount_before + 1); net = sock_net(sock->sk); hn = handshake_pernet(net); @@ -417,15 +444,21 @@ static void handshake_req_cancel_test3(struct kunit *test) /* Pretend to accept this request */ next = handshake_req_next(hn, HANDSHAKE_HANDLER_CLASS_TLSHD); KUNIT_ASSERT_PTR_EQ(test, req, next); + /* Simulate FD_PREPARE() consuming the file reference handed + * off by handshake_req_next(); see handshake_nl_accept_doit(). + */ + fput(filp); /* Pretend to complete this request */ handshake_complete(next, -ETIMEDOUT, NULL); + KUNIT_EXPECT_EQ(test, file_count(filp), fcount_before); /* Act */ result = handshake_req_cancel(sock->sk); /* Assert */ KUNIT_EXPECT_FALSE(test, result); + KUNIT_EXPECT_EQ(test, file_count(filp), fcount_before); fput(filp); } @@ -446,6 +479,7 @@ static struct handshake_proto handshake_req_alloc_proto_destroy = { static void handshake_req_destroy_test1(struct kunit *test) { + unsigned long fcount_before; struct handshake_req *req; struct socket *sock; struct file *filp; @@ -465,10 +499,12 @@ static void handshake_req_destroy_test1(struct kunit *test) KUNIT_ASSERT_NOT_ERR_OR_NULL(test, filp); sock->file = filp; + fcount_before = file_count(filp); err = handshake_req_submit(sock, req, GFP_KERNEL); KUNIT_ASSERT_EQ(test, err, 0); handshake_req_cancel(sock->sk); + KUNIT_EXPECT_EQ(test, file_count(filp), fcount_before); /* Act */ /* Ensure the close/release/put process has run to diff --git a/net/handshake/handshake.h b/net/handshake/handshake.h index a48163765a7a..da61cadd1ad3 100644 --- a/net/handshake/handshake.h +++ b/net/handshake/handshake.h @@ -24,6 +24,7 @@ enum hn_flags_bits { HANDSHAKE_F_NET_DRAINING, }; +struct file; struct handshake_proto; /* One handshake request */ @@ -32,6 +33,7 @@ struct handshake_req { struct rhash_head hr_rhash; unsigned long hr_flags; const struct handshake_proto *hr_proto; + struct file *hr_file; struct sock *hr_sk; void (*hr_odestruct)(struct sock *sk); @@ -57,7 +59,7 @@ struct handshake_proto { int (*hp_accept)(struct handshake_req *req, struct genl_info *info, int fd); void (*hp_done)(struct handshake_req *req, - unsigned int status, + int status, struct genl_info *info); void (*hp_destroy)(struct handshake_req *req); }; @@ -86,7 +88,7 @@ struct handshake_req *handshake_req_hash_lookup(struct sock *sk); struct handshake_req *handshake_req_next(struct handshake_net *hn, int class); int handshake_req_submit(struct socket *sock, struct handshake_req *req, gfp_t flags); -void handshake_complete(struct handshake_req *req, unsigned int status, +void handshake_complete(struct handshake_req *req, int status, struct genl_info *info); bool handshake_req_cancel(struct sock *sk); diff --git a/net/handshake/netlink.c b/net/handshake/netlink.c index b989456fc4c5..3fd4fef9bab1 100644 --- a/net/handshake/netlink.c +++ b/net/handshake/netlink.c @@ -92,7 +92,6 @@ int handshake_nl_accept_doit(struct sk_buff *skb, struct genl_info *info) struct net *net = sock_net(skb->sk); struct handshake_net *hn = handshake_pernet(net); struct handshake_req *req = NULL; - struct socket *sock; int class, err; err = -EOPNOTSUPP; @@ -107,15 +106,13 @@ int handshake_nl_accept_doit(struct sk_buff *skb, struct genl_info *info) err = -EAGAIN; req = handshake_req_next(hn, class); if (req) { - sock = req->hr_sk->sk_socket; - - FD_PREPARE(fdf, O_CLOEXEC, sock->file); + FD_PREPARE(fdf, O_CLOEXEC, req->hr_file); if (fdf.err) { + fput(req->hr_file); /* drop ref from handshake_req_next() */ err = fdf.err; goto out_complete; } - get_file(sock->file); /* FD_PREPARE() consumes a reference. */ err = req->hr_proto->hp_accept(req, info, fd_prepare_fd(fdf)); if (err) goto out_complete; /* Automatic cleanup handles fput */ @@ -160,7 +157,7 @@ int handshake_nl_done_doit(struct sk_buff *skb, struct genl_info *info) status = -EIO; if (info->attrs[HANDSHAKE_A_DONE_STATUS]) - status = nla_get_u32(info->attrs[HANDSHAKE_A_DONE_STATUS]); + status = -(int)nla_get_u32(info->attrs[HANDSHAKE_A_DONE_STATUS]); handshake_complete(req, status, info); sockfd_put(sock); @@ -202,21 +199,21 @@ static void __net_exit handshake_net_exit(struct net *net) * accepted and are in progress will be destroyed when * the socket is closed. */ - spin_lock(&hn->hn_lock); + spin_lock_bh(&hn->hn_lock); set_bit(HANDSHAKE_F_NET_DRAINING, &hn->hn_flags); - list_splice_init(&requests, &hn->hn_requests); - spin_unlock(&hn->hn_lock); + list_splice_init(&hn->hn_requests, &requests); + list_for_each_entry(req, &requests, hr_list) + get_file(req->hr_file); + spin_unlock_bh(&hn->hn_lock); while (!list_empty(&requests)) { + struct file *file; + req = list_first_entry(&requests, struct handshake_req, hr_list); - list_del(&req->hr_list); - - /* - * Requests on this list have not yet been - * accepted, so they do not have an fd to put. - */ - + file = req->hr_file; + list_del_init(&req->hr_list); handshake_complete(req, -ETIMEDOUT, NULL); + fput(file); } } diff --git a/net/handshake/request.c b/net/handshake/request.c index 2829adbeb149..cd30d54d0501 100644 --- a/net/handshake/request.c +++ b/net/handshake/request.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -162,35 +163,56 @@ static void __remove_pending_locked(struct handshake_net *hn, * otherwise %false. * * If @req was on a pending list, it has not yet been accepted. + * Returns %false when the net namespace is draining; the drain + * loop has taken ownership of the pending list. */ static bool remove_pending(struct handshake_net *hn, struct handshake_req *req) { bool ret = false; - spin_lock(&hn->hn_lock); - if (!list_empty(&req->hr_list)) { + spin_lock_bh(&hn->hn_lock); + if (!test_bit(HANDSHAKE_F_NET_DRAINING, &hn->hn_flags) && + !list_empty(&req->hr_list)) { __remove_pending_locked(hn, req); ret = true; } - spin_unlock(&hn->hn_lock); + spin_unlock_bh(&hn->hn_lock); return ret; } +/** + * handshake_req_next - Return the next queued handshake request + * @hn: per-net handshake state + * @class: handler class to match + * + * On a non-NULL return, the caller owns an extra reference + * on @req->hr_file. FD_PREPARE() consumes it on success; on + * the FD_PREPARE() failure path the caller must fput() it. + * + * Return: pointer to a removed handshake_req, or NULL. + */ struct handshake_req *handshake_req_next(struct handshake_net *hn, int class) { struct handshake_req *req, *pos; req = NULL; - spin_lock(&hn->hn_lock); + spin_lock_bh(&hn->hn_lock); list_for_each_entry(pos, &hn->hn_requests, hr_list) { if (pos->hr_proto->hp_handler_class != class) continue; __remove_pending_locked(hn, pos); + /* Hand off a file reference to the accept side under + * hn_lock. A concurrent handshake_req_cancel() can drop + * hr_file before accept reaches FD_PREPARE(); this extra + * reference keeps the file alive until FD_PREPARE() takes + * ownership. + */ + get_file(pos->hr_file); req = pos; break; } - spin_unlock(&hn->hn_lock); + spin_unlock_bh(&hn->hn_lock); return req; } @@ -215,9 +237,16 @@ EXPORT_SYMBOL_IF_KUNIT(handshake_req_next); * A zero return value from handshake_req_submit() means that * exactly one subsequent completion callback is guaranteed. * - * A negative return value from handshake_req_submit() means that - * no completion callback will be done and that @req has been - * destroyed. + * A negative return value from handshake_req_submit() guarantees that + * no completion callback will occur and that @req is no longer owned by + * the caller. If cancellation wins the completion race after the request + * has been published, final destruction is deferred until socket teardown. + * + * The caller must hold a reference on @sock->file for the duration + * of this call. Once the request is published to the accept side, a + * concurrent completion or cancellation may release the request's pin on + * @sock->file; the caller's reference is what keeps @sock->sk valid until + * handshake_req_submit() returns. */ int handshake_req_submit(struct socket *sock, struct handshake_req *req, gfp_t flags) @@ -236,6 +265,14 @@ int handshake_req_submit(struct socket *sock, struct handshake_req *req, kfree(req); return -EINVAL; } + + /* + * Pin sock->file for the lifetime of the request so the + * accept side does not race a consumer that releases the + * socket while a handshake is pending. + */ + req->hr_file = get_file(sock->file); + req->hr_odestruct = req->hr_sk->sk_destruct; req->hr_sk->sk_destruct = handshake_sk_destruct; @@ -249,7 +286,7 @@ int handshake_req_submit(struct socket *sock, struct handshake_req *req, if (READ_ONCE(hn->hn_pending) >= hn->hn_pending_max) goto out_err; - spin_lock(&hn->hn_lock); + spin_lock_bh(&hn->hn_lock); ret = -EOPNOTSUPP; if (test_bit(HANDSHAKE_F_NET_DRAINING, &hn->hn_flags)) goto out_unlock; @@ -258,7 +295,7 @@ int handshake_req_submit(struct socket *sock, struct handshake_req *req, goto out_unlock; if (!__add_pending_locked(hn, req)) goto out_unlock; - spin_unlock(&hn->hn_lock); + spin_unlock_bh(&hn->hn_lock); ret = handshake_genl_notify(net, req->hr_proto, flags); if (ret) { @@ -267,35 +304,36 @@ int handshake_req_submit(struct socket *sock, struct handshake_req *req, goto out_err; } - /* Prevent socket release while a handshake request is pending */ - sock_hold(req->hr_sk); - trace_handshake_submit(net, req, req->hr_sk); return 0; out_unlock: - spin_unlock(&hn->hn_lock); + spin_unlock_bh(&hn->hn_lock); out_err: - /* Restore original destructor so socket teardown still runs on failure */ - req->hr_sk->sk_destruct = req->hr_odestruct; trace_handshake_submit_err(net, req, req->hr_sk, ret); - handshake_req_destroy(req); + if (!test_and_set_bit(HANDSHAKE_F_REQ_COMPLETED, &req->hr_flags)) { + /* Restore original destructor so socket teardown still runs. */ + req->hr_sk->sk_destruct = req->hr_odestruct; + fput(req->hr_file); + handshake_req_destroy(req); + } return ret; } EXPORT_SYMBOL(handshake_req_submit); -void handshake_complete(struct handshake_req *req, unsigned int status, +void handshake_complete(struct handshake_req *req, int status, struct genl_info *info) { struct sock *sk = req->hr_sk; struct net *net = sock_net(sk); if (!test_and_set_bit(HANDSHAKE_F_REQ_COMPLETED, &req->hr_flags)) { + struct file *file = req->hr_file; + trace_handshake_complete(net, req, sk, status); req->hr_proto->hp_done(req, status, info); - /* Handshake request is no longer pending */ - sock_put(sk); + fput(file); } } EXPORT_SYMBOL_IF_KUNIT(handshake_complete); @@ -342,8 +380,7 @@ bool handshake_req_cancel(struct sock *sk) out_true: trace_handshake_cancel(net, req, sk); - /* Handshake request is no longer pending */ - sock_put(sk); + fput(req->hr_file); return true; } EXPORT_SYMBOL(handshake_req_cancel); diff --git a/net/handshake/tlshd.c b/net/handshake/tlshd.c index 8f9532a15f43..7567150c2a4f 100644 --- a/net/handshake/tlshd.c +++ b/net/handshake/tlshd.c @@ -93,7 +93,7 @@ static void tls_handshake_remote_peerids(struct tls_handshake_req *treq, * */ static void tls_handshake_done(struct handshake_req *req, - unsigned int status, struct genl_info *info) + int status, struct genl_info *info) { struct tls_handshake_req *treq = handshake_req_private(req); @@ -104,7 +104,7 @@ static void tls_handshake_done(struct handshake_req *req, if (!status) set_bit(HANDSHAKE_F_REQ_SESSION, &req->hr_flags); - treq->th_consumer_done(treq->th_consumer_data, -status, + treq->th_consumer_done(treq->th_consumer_data, status, treq->th_peerid[0]); } @@ -425,6 +425,8 @@ EXPORT_SYMBOL(tls_server_hello_psk); * Request cancellation races with request completion. To determine * who won, callers examine the return value from this function. * + * Context: May be called from process or softirq context. + * * Return values: * %true - Uncompleted handshake request was canceled * %false - Handshake request already completed or not found diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c index 0aca859c88cb..f669a226d728 100644 --- a/net/hsr/hsr_forward.c +++ b/net/hsr/hsr_forward.c @@ -84,7 +84,7 @@ static bool is_supervision_frame(struct hsr_priv *hsr, struct sk_buff *skb) /* Get next tlv */ total_length += hsr_sup_tag->tlv.HSR_TLV_length; - if (!pskb_may_pull(skb, total_length)) + if (!pskb_may_pull(skb, total_length + sizeof(struct hsr_sup_tlv))) return false; skb_pull(skb, total_length); hsr_sup_tlv = (struct hsr_sup_tlv *)skb->data; @@ -100,7 +100,7 @@ static bool is_supervision_frame(struct hsr_priv *hsr, struct sk_buff *skb) /* make sure another tlv follows */ total_length += sizeof(struct hsr_sup_tlv) + hsr_sup_tlv->HSR_TLV_length; - if (!pskb_may_pull(skb, total_length)) + if (!pskb_may_pull(skb, total_length + sizeof(struct hsr_sup_tlv))) return false; /* get next tlv */ diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 2667f53482bd..d3c677e9bff2 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -212,7 +212,7 @@ EXPORT_SYMBOL_GPL(iptunnel_handle_offloads); */ static int iptunnel_pmtud_build_icmp(struct sk_buff *skb, int mtu) { - const struct iphdr *iph = ip_hdr(skb); + const struct iphdr *iph; struct icmphdr *icmph; struct iphdr *niph; struct ethhdr eh; @@ -226,7 +226,6 @@ static int iptunnel_pmtud_build_icmp(struct sk_buff *skb, int mtu) skb_copy_bits(skb, skb_mac_offset(skb), &eh, ETH_HLEN); pskb_pull(skb, ETH_HLEN); - skb_reset_network_header(skb); err = pskb_trim(skb, 576 - sizeof(*niph) - sizeof(*icmph)); if (err) @@ -236,7 +235,7 @@ static int iptunnel_pmtud_build_icmp(struct sk_buff *skb, int mtu) err = skb_cow(skb, sizeof(*niph) + sizeof(*icmph) + ETH_HLEN); if (err) return err; - + iph = ip_hdr(skb); icmph = skb_push(skb, sizeof(*icmph)); *icmph = (struct icmphdr) { .type = ICMP_DEST_UNREACH, @@ -281,7 +280,6 @@ static int iptunnel_pmtud_build_icmp(struct sk_buff *skb, int mtu) */ static int iptunnel_pmtud_check_icmp(struct sk_buff *skb, int mtu) { - const struct icmphdr *icmph = icmp_hdr(skb); const struct iphdr *iph = ip_hdr(skb); if (mtu < 576 || iph->frag_off != htons(IP_DF)) @@ -292,9 +290,17 @@ static int iptunnel_pmtud_check_icmp(struct sk_buff *skb, int mtu) ipv4_is_lbcast(iph->saddr) || ipv4_is_multicast(iph->saddr)) return 0; - if (iph->protocol == IPPROTO_ICMP && icmp_is_err(icmph->type)) - return 0; + if (iph->protocol == IPPROTO_ICMP) { + const struct icmphdr *icmph; + if (!pskb_network_may_pull(skb, iph->ihl * 4 + + offsetofend(struct icmphdr, type))) + return 0; + iph = ip_hdr(skb); + icmph = (void *)iph + iph->ihl * 4; + if (icmp_is_err(icmph->type)) + return 0; + } return iptunnel_pmtud_build_icmp(skb, mtu); } @@ -308,7 +314,7 @@ static int iptunnel_pmtud_check_icmp(struct sk_buff *skb, int mtu) */ static int iptunnel_pmtud_build_icmpv6(struct sk_buff *skb, int mtu) { - const struct ipv6hdr *ip6h = ipv6_hdr(skb); + const struct ipv6hdr *ip6h; struct icmp6hdr *icmp6h; struct ipv6hdr *nip6h; struct ethhdr eh; @@ -323,7 +329,6 @@ static int iptunnel_pmtud_build_icmpv6(struct sk_buff *skb, int mtu) skb_copy_bits(skb, skb_mac_offset(skb), &eh, ETH_HLEN); pskb_pull(skb, ETH_HLEN); - skb_reset_network_header(skb); err = pskb_trim(skb, IPV6_MIN_MTU - sizeof(*nip6h) - sizeof(*icmp6h)); if (err) @@ -334,6 +339,7 @@ static int iptunnel_pmtud_build_icmpv6(struct sk_buff *skb, int mtu) if (err) return err; + ip6h = ipv6_hdr(skb); icmp6h = skb_push(skb, sizeof(*icmp6h)); *icmp6h = (struct icmp6hdr) { .icmp6_type = ICMPV6_PKT_TOOBIG, diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index d8bdb1bdbff1..c0e85cc171ae 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -1705,10 +1705,10 @@ static __net_exit void ipv4_sysctl_exit_net(struct net *net) { const struct ctl_table *table; - kfree(net->ipv4.sysctl_local_reserved_ports); table = net->ipv4.ipv4_hdr->ctl_table_arg; unregister_net_sysctl_table(net->ipv4.ipv4_hdr); kfree(table); + kfree(net->ipv4.sysctl_local_reserved_ports); } static __net_initdata struct pernet_operations ipv4_sysctl_ops = { diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index ca3605acb433..38d7b4845281 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -617,6 +617,18 @@ void ip6_datagram_recv_common_ctl(struct sock *sk, struct msghdr *msg, } } +static u16 ipv6_get_exthdr_len(const struct sk_buff *skb, const u8 *ptr) +{ + u16 len; + + if (ptr + 2 > skb_tail_pointer(skb)) + return 0; + + len = (ptr[1] + 1) << 3; + + return (len <= skb_tail_pointer(skb) - ptr) ? len : 0; +} + void ip6_datagram_recv_specific_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb) { @@ -643,7 +655,10 @@ void ip6_datagram_recv_specific_ctl(struct sock *sk, struct msghdr *msg, /* HbH is allowed only once */ if (np->rxopt.bits.hopopts && (opt->flags & IP6SKB_HOPBYHOP)) { u8 *ptr = nh + sizeof(struct ipv6hdr); - put_cmsg(msg, SOL_IPV6, IPV6_HOPOPTS, (ptr[1]+1)<<3, ptr); + u16 len = ipv6_get_exthdr_len(skb, ptr); + + if (len) + put_cmsg(msg, SOL_IPV6, IPV6_HOPOPTS, len, ptr); } if (opt->lastopt && @@ -664,26 +679,37 @@ void ip6_datagram_recv_specific_ctl(struct sock *sk, struct msghdr *msg, unsigned int len; u8 *ptr = nh + off; + if (ptr + 2 > skb_tail_pointer(skb)) + return; + switch (nexthdr) { case IPPROTO_DSTOPTS: nexthdr = ptr[0]; - len = (ptr[1] + 1) << 3; + len = ipv6_get_exthdr_len(skb, ptr); + if (!len) + return; if (np->rxopt.bits.dstopts) put_cmsg(msg, SOL_IPV6, IPV6_DSTOPTS, len, ptr); break; case IPPROTO_ROUTING: nexthdr = ptr[0]; - len = (ptr[1] + 1) << 3; + len = ipv6_get_exthdr_len(skb, ptr); + if (!len) + return; if (np->rxopt.bits.srcrt) put_cmsg(msg, SOL_IPV6, IPV6_RTHDR, len, ptr); break; case IPPROTO_AH: nexthdr = ptr[0]; len = (ptr[1] + 2) << 2; + if (ptr + len > skb_tail_pointer(skb)) + return; break; default: nexthdr = ptr[0]; - len = (ptr[1] + 1) << 3; + len = ipv6_get_exthdr_len(skb, ptr); + if (!len) + return; break; } @@ -705,19 +731,31 @@ void ip6_datagram_recv_specific_ctl(struct sock *sk, struct msghdr *msg, } if (np->rxopt.bits.ohopopts && (opt->flags & IP6SKB_HOPBYHOP)) { u8 *ptr = nh + sizeof(struct ipv6hdr); - put_cmsg(msg, SOL_IPV6, IPV6_2292HOPOPTS, (ptr[1]+1)<<3, ptr); + u16 len = ipv6_get_exthdr_len(skb, ptr); + + if (len) + put_cmsg(msg, SOL_IPV6, IPV6_2292HOPOPTS, len, ptr); } if (np->rxopt.bits.odstopts && opt->dst0) { u8 *ptr = nh + opt->dst0; - put_cmsg(msg, SOL_IPV6, IPV6_2292DSTOPTS, (ptr[1]+1)<<3, ptr); + u16 len = ipv6_get_exthdr_len(skb, ptr); + + if (len) + put_cmsg(msg, SOL_IPV6, IPV6_2292DSTOPTS, len, ptr); } if (np->rxopt.bits.osrcrt && opt->srcrt) { struct ipv6_rt_hdr *rthdr = (struct ipv6_rt_hdr *)(nh + opt->srcrt); - put_cmsg(msg, SOL_IPV6, IPV6_2292RTHDR, (rthdr->hdrlen+1) << 3, rthdr); + u16 len = ipv6_get_exthdr_len(skb, (u8 *)rthdr); + + if (len) + put_cmsg(msg, SOL_IPV6, IPV6_2292RTHDR, len, rthdr); } if (np->rxopt.bits.odstopts && opt->dst1) { u8 *ptr = nh + opt->dst1; - put_cmsg(msg, SOL_IPV6, IPV6_2292DSTOPTS, (ptr[1]+1)<<3, ptr); + u16 len = ipv6_get_exthdr_len(skb, ptr); + + if (len) + put_cmsg(msg, SOL_IPV6, IPV6_2292DSTOPTS, len, ptr); } if (np->rxopt.bits.rxorigdstaddr) { struct sockaddr_in6 sin6; diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index cf90f933ca1a..43f46ef9c53b 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -184,6 +184,8 @@ static bool ip6_parse_tlv(bool hopbyhop, case IPV6_TLV_JUMBO: if (!ipv6_hop_jumbo(skb, off)) return false; + + nh = skb_network_header(skb); break; case IPV6_TLV_CALIPSO: if (!ipv6_hop_calipso(skb, off)) @@ -201,6 +203,8 @@ static bool ip6_parse_tlv(bool hopbyhop, case IPV6_TLV_HAO: if (!ipv6_dest_hao(skb, off)) return false; + + nh = skb_network_header(skb); break; #endif default: @@ -544,7 +548,7 @@ static int ipv6_rpl_srh_rcv(struct sk_buff *skb) * unsigned char which is segments_left field. Should not be * higher than that. */ - if (r || (n + 1) > 255) { + if (r || (n + 1) > 127) { kfree_skb(skb); return -1; } diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index ad5290be4dd6..df793c8bfffb 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -722,10 +722,11 @@ vti6_tnl_change(struct ip6_tnl *t, const struct __ip6_tnl_parm *p, static int vti6_update(struct ip6_tnl *t, struct __ip6_tnl_parm *p, bool keep_mtu) { - struct net *net = dev_net(t->dev); - struct vti6_net *ip6n = net_generic(net, vti6_net_id); + struct net *net = t->net; + struct vti6_net *ip6n; int err; + ip6n = net_generic(net, vti6_net_id); vti6_tnl_unlink(ip6n, t); synchronize_net(); err = vti6_tnl_change(t, p, keep_mtu); @@ -834,17 +835,24 @@ vti6_siocdevprivate(struct net_device *dev, struct ifreq *ifr, void __user *data if (p.proto != IPPROTO_IPV6 && p.proto != 0) break; vti6_parm_from_user(&p1, &p); - t = vti6_locate(net, &p1, cmd == SIOCADDTUNNEL); if (dev != ip6n->fb_tnl_dev && cmd == SIOCCHGTUNNEL) { + struct ip6_tnl *self = netdev_priv(dev); + + err = -EPERM; + if (!ns_capable(self->net->user_ns, CAP_NET_ADMIN)) + break; + t = vti6_locate(self->net, &p1, false); if (t) { if (t->dev != dev) { err = -EEXIST; break; } } else - t = netdev_priv(dev); + t = self; err = vti6_update(t, &p1, false); + } else { + t = vti6_locate(net, &p1, cmd == SIOCADDTUNNEL); } if (t) { err = 0; @@ -1031,11 +1039,12 @@ static int vti6_changelink(struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { - struct ip6_tnl *t; + struct ip6_tnl *t = netdev_priv(dev); + struct net *net = t->net; struct __ip6_tnl_parm p; - struct net *net = dev_net(dev); - struct vti6_net *ip6n = net_generic(net, vti6_net_id); + struct vti6_net *ip6n; + ip6n = net_generic(net, vti6_net_id); if (dev == ip6n->fb_tnl_dev) return -EINVAL; diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c index 8b2dba88ee96..c0a0075e2590 100644 --- a/net/ipv6/netfilter/nft_fib_ipv6.c +++ b/net/ipv6/netfilter/nft_fib_ipv6.c @@ -160,17 +160,33 @@ static bool nft_fib6_info_nh_dev_match(const struct net_device *nh_dev, l3mdev_master_ifindex_rcu(nh_dev) == dev->ifindex; } +static int nft_fib6_nh_match_dev_cb(struct fib6_nh *nh, void *arg) +{ + const struct net_device *dev = arg; + + return nft_fib6_info_nh_dev_match(nh->fib_nh_dev, dev); +} + static bool nft_fib6_info_nh_uses_dev(struct fib6_info *rt, const struct net_device *dev) { const struct net_device *nh_dev; struct fib6_info *iter; + /* External nexthop: fib6_siblings slot aliases nh_list, walk via nh. */ + if (rt->nh) + return nexthop_for_each_fib6_nh(rt->nh, + nft_fib6_nh_match_dev_cb, + (void *)dev); + nh_dev = fib6_info_nh_dev(rt); if (nft_fib6_info_nh_dev_match(nh_dev, dev)) return true; - list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) { + if (!READ_ONCE(rt->fib6_nsiblings)) + return false; + + list_for_each_entry_rcu(iter, &rt->fib6_siblings, fib6_siblings) { nh_dev = fib6_info_nh_dev(iter); if (nft_fib6_info_nh_dev_match(nh_dev, dev)) diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 72dfccd4e3d5..c2dc3338670e 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -1540,7 +1540,7 @@ static int iucv_sock_getsockopt(struct socket *sock, int level, int optname, struct sock *sk = sock->sk; struct iucv_sock *iucv = iucv_sk(sk); unsigned int val; - int len; + int len, rc; if (level != SOL_IUCV) return -ENOPROTOOPT; @@ -1553,26 +1553,34 @@ static int iucv_sock_getsockopt(struct socket *sock, int level, int optname, len = min_t(unsigned int, len, sizeof(int)); + rc = 0; + + lock_sock(sk); switch (optname) { case SO_IPRMDATA_MSG: val = (iucv->flags & IUCV_IPRMDATA) ? 1 : 0; break; case SO_MSGLIMIT: - lock_sock(sk); val = (iucv->path != NULL) ? iucv->path->msglim /* connected */ : iucv->msglimit; /* default */ - release_sock(sk); break; case SO_MSGSIZE: - if (sk->sk_state == IUCV_OPEN) - return -EBADFD; + if (sk->sk_state == IUCV_OPEN) { + rc = -EBADFD; + break; + } val = (iucv->hs_dev) ? iucv->hs_dev->mtu - sizeof(struct af_iucv_trans_hdr) - ETH_HLEN : 0x7fffffff; break; default: - return -ENOPROTOOPT; + rc = -ENOPROTOOPT; + break; } + release_sock(sk); + + if (rc) + return rc; if (put_user(len, optlen)) return -EFAULT; diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 1455f67e01dd..9419c8555d22 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -441,12 +441,13 @@ struct l2tp_session *l2tp_session_get_by_ifname(const struct net *net, idr_for_each_entry_ul(&pn->l2tp_tunnel_idr, tunnel, tmp, tunnel_id) { if (tunnel) { list_for_each_entry_rcu(session, &tunnel->session_list, list) { - if (!strcmp(session->ifname, ifname)) { - refcount_inc(&session->ref_count); - rcu_read_unlock_bh(); + if (strcmp(session->ifname, ifname)) + continue; + if (!refcount_inc_not_zero(&session->ref_count)) + continue; + rcu_read_unlock_bh(); - return session; - } + return session; } } } diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 8ba5b22a1eef..b521b5ebd664 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -568,6 +568,13 @@ static void destroy_gre_conntrack(struct nf_conn *ct) #endif } +static void warn_on_keymap_list_leak(const struct net *net) +{ +#ifdef CONFIG_NF_CT_PROTO_GRE + WARN_ON_ONCE(!list_empty(&net->ct.nf_ct_proto.gre.keymap_list)); +#endif +} + void nf_ct_destroy(struct nf_conntrack *nfct) { struct nf_conn *ct = (struct nf_conn *)nfct; @@ -2510,6 +2517,7 @@ void nf_conntrack_cleanup_net_list(struct list_head *net_exit_list) } list_for_each_entry(net, net_exit_list, exit_list) { + warn_on_keymap_list_leak(net); nf_conntrack_ecache_pernet_fini(net); nf_conntrack_expect_pernet_fini(net); free_percpu(net->ct.stat); diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c index 4c679638df06..dc23e4181618 100644 --- a/net/netfilter/nf_conntrack_pptp.c +++ b/net/netfilter/nf_conntrack_pptp.c @@ -225,13 +225,9 @@ static int exp_gre(struct nf_conn *ct, __be16 callid, __be16 peer_callid) if (nf_ct_expect_related(exp_reply, 0) != 0) goto out_unexpect_orig; - /* Add GRE keymap entries */ - if (nf_ct_gre_keymap_add(ct, IP_CT_DIR_ORIGINAL, &exp_orig->tuple) != 0) + if (!nf_ct_gre_keymap_add(ct, &exp_orig->tuple, + &exp_reply->tuple)) goto out_unexpect_both; - if (nf_ct_gre_keymap_add(ct, IP_CT_DIR_REPLY, &exp_reply->tuple) != 0) { - nf_ct_gre_keymap_destroy(ct); - goto out_unexpect_both; - } ret = 0; out_put_both: diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c index 94c19bc4edc5..35e22082d65a 100644 --- a/net/netfilter/nf_conntrack_proto_gre.c +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -87,41 +87,97 @@ static __be16 gre_keymap_lookup(struct net *net, struct nf_conntrack_tuple *t) return key; } -/* add a single keymap entry, associate with specified master ct */ -int nf_ct_gre_keymap_add(struct nf_conn *ct, enum ip_conntrack_dir dir, - struct nf_conntrack_tuple *t) +enum nf_ct_gre_km_act { + NF_CT_GRE_KM_NEW, + NF_CT_GRE_KM_BAD, + NF_CT_GRE_KM_DUP +}; + +static enum nf_ct_gre_km_act +nf_ct_gre_km_acceptable(const struct nf_ct_pptp_master *ct_pptp_info, + const struct nf_conntrack_tuple *orig, + const struct nf_conntrack_tuple *repl) +{ + struct nf_ct_gre_keymap *km_orig, *km_repl; + + lockdep_assert_held(&keymap_lock); + + km_orig = ct_pptp_info->keymap[IP_CT_DIR_ORIGINAL]; + km_repl = ct_pptp_info->keymap[IP_CT_DIR_REPLY]; + + if (km_orig && km_repl) { + if (!gre_key_cmpfn(km_orig, orig)) + return NF_CT_GRE_KM_BAD; + + if (!gre_key_cmpfn(km_repl, repl)) + return NF_CT_GRE_KM_BAD; + + return NF_CT_GRE_KM_DUP; + } + + DEBUG_NET_WARN_ON_ONCE(km_orig); + DEBUG_NET_WARN_ON_ONCE(km_repl); + return NF_CT_GRE_KM_NEW; +} + +/* add keymap entries, associate with specified master ct */ +bool nf_ct_gre_keymap_add(struct nf_conn *ct, + const struct nf_conntrack_tuple *orig, + const struct nf_conntrack_tuple *repl) { struct net *net = nf_ct_net(ct); struct nf_gre_net *net_gre = gre_pernet(net); struct nf_ct_pptp_master *ct_pptp_info = nfct_help_data(ct); - struct nf_ct_gre_keymap **kmp, *km; + struct nf_ct_gre_keymap *km_orig, *km_repl; + bool ret = false; - kmp = &ct_pptp_info->keymap[dir]; - if (*kmp) { - /* check whether it's a retransmission */ - list_for_each_entry_rcu(km, &net_gre->keymap_list, list) { - if (gre_key_cmpfn(km, t) && km == *kmp) - return 0; - } - pr_debug("trying to override keymap_%s for ct %p\n", - dir == IP_CT_DIR_REPLY ? "reply" : "orig", ct); - return -EEXIST; - } + km_orig = kmalloc_obj(*km_orig, GFP_ATOMIC); + if (!km_orig) + return false; + km_repl = kmalloc_obj(*km_repl, GFP_ATOMIC); + if (!km_repl) + goto km_free; - km = kmalloc_obj(*km, GFP_ATOMIC); - if (!km) - return -ENOMEM; - memcpy(&km->tuple, t, sizeof(*t)); - *kmp = km; - - pr_debug("adding new entry %p: ", km); - nf_ct_dump_tuple(&km->tuple); + memcpy(&km_orig->tuple, orig, sizeof(*orig)); + memcpy(&km_repl->tuple, repl, sizeof(*repl)); spin_lock_bh(&keymap_lock); - list_add_tail(&km->list, &net_gre->keymap_list); + if (nf_ct_is_dying(ct)) + goto unlock_free; + + switch (nf_ct_gre_km_acceptable(ct_pptp_info, orig, repl)) { + case NF_CT_GRE_KM_NEW: + break; + case NF_CT_GRE_KM_DUP: + ret = true; + goto unlock_free; + case NF_CT_GRE_KM_BAD: + pr_debug("trying to override keymap for ct %p\n", ct); + goto unlock_free; + } + + if (ct_pptp_info->keymap[IP_CT_DIR_ORIGINAL] || + ct_pptp_info->keymap[IP_CT_DIR_REPLY]) + goto unlock_free; + + pr_debug("adding new entries %p,%p: ", km_orig, km_repl); + nf_ct_dump_tuple(&km_orig->tuple); + nf_ct_dump_tuple(&km_repl->tuple); + + list_add_tail_rcu(&km_orig->list, &net_gre->keymap_list); + list_add_tail_rcu(&km_repl->list, &net_gre->keymap_list); + ct_pptp_info->keymap[IP_CT_DIR_ORIGINAL] = km_orig; + ct_pptp_info->keymap[IP_CT_DIR_REPLY] = km_repl; spin_unlock_bh(&keymap_lock); - return 0; + return true; + +unlock_free: + spin_unlock_bh(&keymap_lock); +km_free: + kfree(km_orig); + kfree(km_repl); + return ret; } EXPORT_SYMBOL_GPL(nf_ct_gre_keymap_add); diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index b67426c2189b..e99ab1e88e9f 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -1221,7 +1221,8 @@ int nf_conntrack_tcp_packet(struct nf_conn *ct, new_state = old_state; } if (((test_bit(IPS_SEEN_REPLY_BIT, &ct->status) - && ct->proto.tcp.last_index == TCP_SYN_SET) + && ct->proto.tcp.last_index == TCP_SYN_SET + && ct->proto.tcp.last_dir != dir) || (!test_bit(IPS_ASSURED_BIT, &ct->status) && ct->proto.tcp.last_index == TCP_ACK_SET)) && ntohl(th->ack_seq) == ct->proto.tcp.last_end) { diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c index 57f57e2fc80a..036c8586f49b 100644 --- a/net/netfilter/nf_synproxy_core.c +++ b/net/netfilter/nf_synproxy_core.c @@ -200,6 +200,8 @@ synproxy_tstamp_adjust(struct sk_buff *skb, unsigned int protoff, if (skb_ensure_writable(skb, optend)) return 0; + th = (struct tcphdr *)(skb->data + protoff); + while (optoff < optend) { unsigned char *op = skb->data + optoff; diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 984a0eb9e149..60ab88d45096 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -1141,6 +1141,9 @@ nfqnl_mangle(void *data, unsigned int data_len, struct nf_queue_entry *e, int di { struct sk_buff *nskb; + if (e->state.net->user_ns != &init_user_ns) + return -EPERM; + if (diff < 0) { unsigned int min_len = skb_transport_offset(e->skb); @@ -1537,8 +1540,7 @@ static int nfqnl_recv_verdict(struct sk_buff *skb, const struct nfnl_info *info, if (nfqnl_mangle(nla_data(nfqa[NFQA_PAYLOAD]), payload_len, entry, diff) < 0) verdict = NF_DROP; - - if (ct && diff) + else if (ct && diff) nfnl_ct->seq_adjust(entry->skb, ct, ctinfo, diff); } diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c index 94dccdcfa06b..785b8e9731d1 100644 --- a/net/netfilter/nft_bitwise.c +++ b/net/netfilter/nft_bitwise.c @@ -43,8 +43,10 @@ static void nft_bitwise_eval_lshift(u32 *dst, const u32 *src, u32 carry = 0; for (i = DIV_ROUND_UP(priv->len, sizeof(u32)); i > 0; i--) { - dst[i - 1] = (src[i - 1] << shift) | carry; - carry = src[i - 1] >> (BITS_PER_TYPE(u32) - shift); + u32 tmp_src = src[i - 1]; + + dst[i - 1] = (tmp_src << shift) | carry; + carry = tmp_src >> (BITS_PER_TYPE(u32) - shift); } } @@ -56,8 +58,10 @@ static void nft_bitwise_eval_rshift(u32 *dst, const u32 *src, u32 carry = 0; for (i = 0; i < DIV_ROUND_UP(priv->len, sizeof(u32)); i++) { - dst[i] = carry | (src[i] >> shift); - carry = src[i] << (BITS_PER_TYPE(u32) - shift); + u32 tmp_src = src[i]; + + dst[i] = carry | (tmp_src >> shift); + carry = tmp_src << (BITS_PER_TYPE(u32) - shift); } } @@ -235,6 +239,9 @@ static int nft_bitwise_init_bool(const struct nft_ctx *ctx, &priv->sreg2, priv->len); if (err < 0) return err; + + if (nft_reg_overlap(priv->sreg2, priv->dreg, priv->len)) + return -EINVAL; } return 0; @@ -265,6 +272,9 @@ static int nft_bitwise_init(const struct nft_ctx *ctx, if (err < 0) return err; + if (nft_reg_overlap(priv->sreg, priv->dreg, priv->len)) + return -EINVAL; + if (tb[NFTA_BITWISE_OP]) { priv->op = ntohl(nla_get_be32(tb[NFTA_BITWISE_OP])); switch (priv->op) { diff --git a/net/netfilter/nft_byteorder.c b/net/netfilter/nft_byteorder.c index e00dddfa2fc0..2316c77f4228 100644 --- a/net/netfilter/nft_byteorder.c +++ b/net/netfilter/nft_byteorder.c @@ -144,9 +144,16 @@ static int nft_byteorder_init(const struct nft_ctx *ctx, if (err < 0) return err; - return nft_parse_register_store(ctx, tb[NFTA_BYTEORDER_DREG], - &priv->dreg, NULL, NFT_DATA_VALUE, - priv->len); + err = nft_parse_register_store(ctx, tb[NFTA_BYTEORDER_DREG], + &priv->dreg, NULL, NFT_DATA_VALUE, + priv->len); + if (err < 0) + return err; + + if (nft_reg_overlap(priv->sreg, priv->dreg, priv->len)) + return -EINVAL; + + return 0; } static int nft_byteorder_dump(struct sk_buff *skb, diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index 01e13e5255a9..484a5490832e 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -917,6 +917,9 @@ static int nft_payload_set_init(const struct nft_ctx *ctx, struct nft_payload_set *priv = nft_expr_priv(expr); int err; + if (ctx->net->user_ns != &init_user_ns) + return -EPERM; + priv->base = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_BASE])); priv->len = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_LEN])); diff --git a/net/netfilter/xt_cpu.c b/net/netfilter/xt_cpu.c index 3bdc302a0f91..9cb259902a58 100644 --- a/net/netfilter/xt_cpu.c +++ b/net/netfilter/xt_cpu.c @@ -34,7 +34,7 @@ static bool cpu_mt(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_cpu_info *info = par->matchinfo; - return (info->cpu == smp_processor_id()) ^ info->invert; + return (info->cpu == raw_smp_processor_id()) ^ info->invert; } static struct xt_match cpu_mt_reg __read_mostly = { diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 2aeb0680807d..7269e23b578d 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1482,9 +1482,14 @@ static void do_one_broadcast(struct sock *sk, p->skb2 = NULL; goto out; } - NETLINK_CB(p->skb2).nsid = peernet2id(sock_net(sk), p->net); - if (NETLINK_CB(p->skb2).nsid != NETNSA_NSID_NOT_ASSIGNED) - NETLINK_CB(p->skb2).nsid_is_set = true; + + NETLINK_CB(p->skb2).nsid_is_set = false; + if (!net_eq(sock_net(sk), p->net)) { + NETLINK_CB(p->skb2).nsid = peernet2id(sock_net(sk), p->net); + if (NETLINK_CB(p->skb2).nsid != NETNSA_NSID_NOT_ASSIGNED) + NETLINK_CB(p->skb2).nsid_is_set = true; + } + val = netlink_broadcast_deliver(sk, p->skb2); if (val < 0) { netlink_overrun(sk); diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c index 0d33c81a15fe..ba6f0310ffd7 100644 --- a/net/nfc/hci/core.c +++ b/net/nfc/hci/core.c @@ -861,6 +861,11 @@ static void nfc_hci_recv_from_llc(struct nfc_hci_dev *hdev, struct sk_buff *skb) struct sk_buff *frag_skb; int msg_len; + if (!pskb_may_pull(skb, NFC_HCI_HCP_PACKET_HEADER_LEN)) { + kfree_skb(skb); + return; + } + packet = (struct hcp_packet *)skb->data; if ((packet->header & ~NFC_HCI_FRAGMENT) == 0) { skb_queue_tail(&hdev->rx_hcp_frags, skb); @@ -904,6 +909,11 @@ static void nfc_hci_recv_from_llc(struct nfc_hci_dev *hdev, struct sk_buff *skb) * unblock waiting cmd context. Otherwise, enqueue to dispatch * in separate context where handler can also execute command. */ + if (!pskb_may_pull(hcp_skb, NFC_HCI_HCP_HEADER_LEN)) { + kfree_skb(hcp_skb); + return; + } + packet = (struct hcp_packet *)hcp_skb->data; type = HCP_MSG_GET_TYPE(packet->message.header); if (type == NFC_HCI_HCP_RESPONSE) { diff --git a/net/nfc/llcp_core.c b/net/nfc/llcp_core.c index db5bc6a878dd..dc65c719f35f 100644 --- a/net/nfc/llcp_core.c +++ b/net/nfc/llcp_core.c @@ -1218,6 +1218,15 @@ static void nfc_llcp_recv_cc(struct nfc_llcp_local *local, sk = &llcp_sock->sk; + lock_sock(sk); + + /* Check if socket was destroyed whilst waiting for the lock */ + if (!sk_hashed(sk)) { + release_sock(sk); + nfc_llcp_sock_put(llcp_sock); + return; + } + /* Unlink from connecting and link to the client array */ nfc_llcp_sock_unlink(&local->connecting_sockets, sk); nfc_llcp_sock_link(&local->sockets, sk); @@ -1229,6 +1238,8 @@ static void nfc_llcp_recv_cc(struct nfc_llcp_local *local, sk->sk_state = LLCP_CONNECTED; sk->sk_state_change(sk); + release_sock(sk); + nfc_llcp_sock_put(llcp_sock); } diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c index f1be1e84f665..feab29fc62f4 100644 --- a/net/nfc/llcp_sock.c +++ b/net/nfc/llcp_sock.c @@ -633,6 +633,8 @@ static int llcp_sock_release(struct socket *sock) if (sock->type == SOCK_RAW) nfc_llcp_sock_unlink(&local->raw_sockets, sk); + else if (sk->sk_state == LLCP_CONNECTING) + nfc_llcp_sock_unlink(&local->connecting_sockets, sk); else nfc_llcp_sock_unlink(&local->sockets, sk); diff --git a/net/nfc/nci/hci.c b/net/nfc/nci/hci.c index 40ae8e5a7ec7..c03e8a0bd3bd 100644 --- a/net/nfc/nci/hci.c +++ b/net/nfc/nci/hci.c @@ -439,6 +439,11 @@ void nci_hci_data_received_cb(void *context, return; } + if (!pskb_may_pull(skb, NCI_HCI_HCP_PACKET_HEADER_LEN)) { + kfree_skb(skb); + return; + } + packet = (struct nci_hcp_packet *)skb->data; if ((packet->header & ~NCI_HCI_FRAGMENT) == 0) { skb_queue_tail(&ndev->hci_dev->rx_hcp_frags, skb); @@ -482,6 +487,11 @@ void nci_hci_data_received_cb(void *context, * unblock waiting cmd context. Otherwise, enqueue to dispatch * in separate context where handler can also execute command. */ + if (!pskb_may_pull(hcp_skb, NCI_HCI_HCP_HEADER_LEN)) { + kfree_skb(hcp_skb); + return; + } + packet = (struct nci_hcp_packet *)hcp_skb->data; type = NCI_HCP_MSG_GET_TYPE(packet->message.header); if (type == NCI_HCI_HCP_RESPONSE) { diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 2c5a7a321a94..553342c55cf7 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -26,6 +26,10 @@ #include #include +#define MIRRED_DEFER_LIMIT 3 +_Static_assert(MIRRED_DEFER_LIMIT <= 3, + "MIRRED_DEFER_LIMIT exceeds tc_depth bitfield width"); + static LIST_HEAD(mirred_list); static DEFINE_SPINLOCK(mirred_list_lock); @@ -234,12 +238,15 @@ tcf_mirred_forward(bool at_ingress, bool want_ingress, struct sk_buff *skb) { int err; - if (!want_ingress) + if (!want_ingress) { err = tcf_dev_queue_xmit(skb, dev_queue_xmit); - else if (!at_ingress) - err = netif_rx(skb); - else - err = netif_receive_skb(skb); + } else { + skb->tc_depth++; + if (!at_ingress) + err = netif_rx(skb); + else + err = netif_receive_skb(skb); + } return err; } @@ -365,7 +372,8 @@ static int tcf_blockcast_redir(struct sk_buff *skb, struct tcf_mirred *m, dev_is_mac_header_xmit(dev_prev), m_eaction, retval); - return retval; + /* If the packet wasn't redirected, we have to register as a drop */ + return TC_ACT_SHOT; } static int tcf_blockcast_mirror(struct sk_buff *skb, struct tcf_mirred *m, @@ -389,14 +397,12 @@ static int tcf_blockcast_mirror(struct sk_buff *skb, struct tcf_mirred *m, static int tcf_blockcast(struct sk_buff *skb, struct tcf_mirred *m, const u32 blockid, struct tcf_result *res, - int retval) + int m_eaction, int retval) { const u32 exception_ifindex = skb->dev->ifindex; struct tcf_block *block; bool is_redirect; - int m_eaction; - m_eaction = READ_ONCE(m->tcfm_eaction); is_redirect = tcf_mirred_is_act_redirect(m_eaction); /* we are already under rcu protection, so can call block lookup @@ -405,7 +411,7 @@ static int tcf_blockcast(struct sk_buff *skb, struct tcf_mirred *m, block = tcf_block_lookup(dev_net(skb->dev), blockid); if (!block || xa_empty(&block->ports)) { tcf_action_inc_overlimit_qstats(&m->common); - return retval; + return is_redirect ? TC_ACT_SHOT : retval; } if (is_redirect) @@ -423,9 +429,10 @@ TC_INDIRECT_SCOPE int tcf_mirred_act(struct sk_buff *skb, { struct tcf_mirred *m = to_mirred(a); int retval = READ_ONCE(m->tcf_action); + bool m_mac_header_xmit, is_redirect; struct netdev_xmit *xmit; - bool m_mac_header_xmit; struct net_device *dev; + bool want_ingress; int i, m_eaction; u32 blockid; @@ -434,7 +441,8 @@ TC_INDIRECT_SCOPE int tcf_mirred_act(struct sk_buff *skb, #else xmit = this_cpu_ptr(&softnet_data.xmit); #endif - if (unlikely(xmit->sched_mirred_nest >= MIRRED_NEST_LIMIT)) { + if (unlikely(xmit->sched_mirred_nest >= MIRRED_NEST_LIMIT || + skb->tc_depth >= MIRRED_DEFER_LIMIT)) { net_warn_ratelimited("Packet exceeded mirred recursion limit on dev %s\n", netdev_name(skb->dev)); return TC_ACT_SHOT; @@ -444,34 +452,51 @@ TC_INDIRECT_SCOPE int tcf_mirred_act(struct sk_buff *skb, tcf_action_update_bstats(&m->common, skb); blockid = READ_ONCE(m->tcfm_blockid); - if (blockid) - return tcf_blockcast(skb, m, blockid, res, retval); + m_eaction = READ_ONCE(m->tcfm_eaction); + want_ingress = tcf_mirred_act_wants_ingress(m_eaction); + if (blockid) { + if (!want_ingress) + xmit->sched_mirred_dev[xmit->sched_mirred_nest++] = NULL; + retval = tcf_blockcast(skb, m, blockid, res, m_eaction, retval); + if (!want_ingress) + xmit->sched_mirred_nest--; + return retval; + } + + is_redirect = tcf_mirred_is_act_redirect(m_eaction); dev = rcu_dereference_bh(m->tcfm_dev); if (unlikely(!dev)) { pr_notice_once("tc mirred: target device is gone\n"); tcf_action_inc_overlimit_qstats(&m->common); - return retval; - } - for (i = 0; i < xmit->sched_mirred_nest; i++) { - if (xmit->sched_mirred_dev[i] != dev) - continue; - pr_notice_once("tc mirred: loop on device %s\n", - netdev_name(dev)); - tcf_action_inc_overlimit_qstats(&m->common); - return retval; + goto err_out; } - xmit->sched_mirred_dev[xmit->sched_mirred_nest++] = dev; + if (!want_ingress) { + for (i = 0; i < xmit->sched_mirred_nest; i++) { + if (xmit->sched_mirred_dev[i] != dev) + continue; + pr_notice_once("tc mirred: loop on device %s\n", + netdev_name(dev)); + tcf_action_inc_overlimit_qstats(&m->common); + goto err_out; + } + xmit->sched_mirred_dev[xmit->sched_mirred_nest++] = dev; + } m_mac_header_xmit = READ_ONCE(m->tcfm_mac_header_xmit); - m_eaction = READ_ONCE(m->tcfm_eaction); retval = tcf_mirred_to_dev(skb, m, dev, m_mac_header_xmit, m_eaction, retval); - xmit->sched_mirred_nest--; + if (!want_ingress) + xmit->sched_mirred_nest--; return retval; + +err_out: + if (is_redirect) + retval = TC_ACT_SHOT; + return retval; } static void tcf_stats_update(struct tc_action *a, u64 bytes, u64 packets, diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index bc18e1976b6e..17a79fe2f091 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -461,7 +461,8 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, skb->prev = NULL; /* Random duplication */ - if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor, &q->prng)) + if (q->duplicate && skb->tc_depth == 0 && + q->duplicate >= get_crandom(&q->dup_cor, &q->prng)) ++count; /* Drop packet? */ @@ -540,11 +541,9 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, */ if (skb2) { struct Qdisc *rootq = qdisc_root_bh(sch); - u32 dupsave = q->duplicate; /* prevent duplicating a dup... */ - q->duplicate = 0; + skb2->tc_depth++; /* prevent duplicating a dup... */ rootq->enqueue(skb2, rootq, to_free); - q->duplicate = dupsave; skb2 = NULL; } @@ -1007,41 +1006,6 @@ static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla, return 0; } -static const struct Qdisc_class_ops netem_class_ops; - -static int check_netem_in_tree(struct Qdisc *sch, bool duplicates, - struct netlink_ext_ack *extack) -{ - struct Qdisc *root, *q; - unsigned int i; - - root = qdisc_root_sleeping(sch); - - if (sch != root && root->ops->cl_ops == &netem_class_ops) { - if (duplicates || - ((struct netem_sched_data *)qdisc_priv(root))->duplicate) - goto err; - } - - if (!qdisc_dev(root)) - return 0; - - hash_for_each(qdisc_dev(root)->qdisc_hash, i, q, hash) { - if (sch != q && q->ops->cl_ops == &netem_class_ops) { - if (duplicates || - ((struct netem_sched_data *)qdisc_priv(q))->duplicate) - goto err; - } - } - - return 0; - -err: - NL_SET_ERR_MSG(extack, - "netem: cannot mix duplicating netems with other netems in tree"); - return -EINVAL; -} - /* Parse netlink message to set options */ static int netem_change(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack) @@ -1118,11 +1082,6 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt, q->gap = qopt->gap; q->counter = 0; q->loss = qopt->loss; - - ret = check_netem_in_tree(sch, qopt->duplicate, extack); - if (ret) - goto unlock; - q->duplicate = qopt->duplicate; /* for compatibility with earlier versions. diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index dffbd529762d..b5db69073e20 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -188,10 +188,12 @@ static bool smc_hs_congested(const struct sock *sk) struct smc_hashinfo smc_v4_hashinfo = { .lock = __RW_LOCK_UNLOCKED(smc_v4_hashinfo.lock), + .ht = HLIST_HEAD_INIT, }; struct smc_hashinfo smc_v6_hashinfo = { .lock = __RW_LOCK_UNLOCKED(smc_v6_hashinfo.lock), + .ht = HLIST_HEAD_INIT, }; int smc_hash_sk(struct sock *sk) @@ -3517,8 +3519,6 @@ static int __init smc_init(void) pr_err("%s: sock_register fails with %d\n", __func__, rc); goto out_proto6; } - INIT_HLIST_HEAD(&smc_v4_hashinfo.ht); - INIT_HLIST_HEAD(&smc_v6_hashinfo.ht); rc = smc_ib_register_client(); if (rc) { diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 44037b066a5f..2ce1063d4a67 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -642,7 +642,7 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk) */ sock_reset_flag(sk, SOCK_DONE); sk->sk_state = TCP_CLOSE; - vsk->peer_shutdown = 0; + WRITE_ONCE(vsk->peer_shutdown, 0); } if (sk->sk_type == SOCK_SEQPACKET) { @@ -933,7 +933,7 @@ static struct sock *__vsock_create(struct net *net, vsk->rejected = false; vsk->sent_request = false; vsk->ignore_connecting_rst = false; - vsk->peer_shutdown = 0; + WRITE_ONCE(vsk->peer_shutdown, 0); INIT_DELAYED_WORK(&vsk->connect_work, vsock_connect_timeout); INIT_DELAYED_WORK(&vsk->pending_work, vsock_pending_work); @@ -1241,6 +1241,25 @@ static int vsock_shutdown(struct socket *sock, int mode) return err; } +static __poll_t vsock_poll_shutdown(struct sock *sk, u32 peer_shutdown) +{ + __poll_t mask = 0; + + /* INET sockets treat local write shutdown and peer write shutdown as a + * case of EPOLLHUP set. + */ + if (sk->sk_shutdown == SHUTDOWN_MASK || + ((sk->sk_shutdown & SEND_SHUTDOWN) && + (peer_shutdown & SEND_SHUTDOWN))) + mask |= EPOLLHUP; + + if (sk->sk_shutdown & RCV_SHUTDOWN || + peer_shutdown & SEND_SHUTDOWN) + mask |= EPOLLRDHUP; + + return mask; +} + static __poll_t vsock_poll(struct file *file, struct socket *sock, poll_table *wait) { @@ -1258,24 +1277,17 @@ static __poll_t vsock_poll(struct file *file, struct socket *sock, /* Signify that there has been an error on this socket. */ mask |= EPOLLERR; - /* INET sockets treat local write shutdown and peer write shutdown as a - * case of EPOLLHUP set. - */ - if ((sk->sk_shutdown == SHUTDOWN_MASK) || - ((sk->sk_shutdown & SEND_SHUTDOWN) && - (vsk->peer_shutdown & SEND_SHUTDOWN))) { - mask |= EPOLLHUP; - } - - if (sk->sk_shutdown & RCV_SHUTDOWN || - vsk->peer_shutdown & SEND_SHUTDOWN) { - mask |= EPOLLRDHUP; - } - if (sk_is_readable(sk)) mask |= EPOLLIN | EPOLLRDNORM; if (sock->type == SOCK_DGRAM) { + u32 peer_shutdown = READ_ONCE(vsk->peer_shutdown); + + /* DGRAM sockets do not take lock_sock() in poll(), so use one + * lockless snapshot for all shutdown-derived mask bits. + */ + mask |= vsock_poll_shutdown(sk, peer_shutdown); + /* For datagram sockets we can read if there is something in * the queue and write as long as the socket isn't shutdown for * sending. @@ -1290,6 +1302,7 @@ static __poll_t vsock_poll(struct file *file, struct socket *sock, } else if (sock_type_connectible(sk->sk_type)) { const struct vsock_transport *transport; + u32 peer_shutdown; lock_sock(sk); @@ -1322,8 +1335,10 @@ static __poll_t vsock_poll(struct file *file, struct socket *sock, * terminated should also be considered read, and we check the * shutdown flag for that. */ + peer_shutdown = READ_ONCE(vsk->peer_shutdown); + mask |= vsock_poll_shutdown(sk, peer_shutdown); if (sk->sk_shutdown & RCV_SHUTDOWN || - vsk->peer_shutdown & SEND_SHUTDOWN) { + peer_shutdown & SEND_SHUTDOWN) { mask |= EPOLLIN | EPOLLRDNORM; } diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c index 7a8963595bf9..b3394946b2ed 100644 --- a/net/vmw_vsock/hyperv_transport.c +++ b/net/vmw_vsock/hyperv_transport.c @@ -264,7 +264,7 @@ static void hvs_do_close_lock_held(struct vsock_sock *vsk, struct sock *sk = sk_vsock(vsk); sock_set_flag(sk, SOCK_DONE); - vsk->peer_shutdown = SHUTDOWN_MASK; + WRITE_ONCE(vsk->peer_shutdown, SHUTDOWN_MASK); if (vsock_stream_has_data(vsk) <= 0) sk->sk_state = TCP_CLOSING; sk->sk_state_change(sk); @@ -593,7 +593,9 @@ static int hvs_update_recv_data(struct hvsock *hvs) return -EIO; if (payload_len == 0) - hvs->vsk->peer_shutdown |= SEND_SHUTDOWN; + WRITE_ONCE(hvs->vsk->peer_shutdown, + READ_ONCE(hvs->vsk->peer_shutdown) | + SEND_SHUTDOWN); hvs->recv_data_len = payload_len; hvs->recv_data_off = 0; @@ -736,7 +738,8 @@ static s64 hvs_stream_has_data(struct vsock_sock *vsk) return ret; return hvs->recv_data_len; case 0: - vsk->peer_shutdown |= SEND_SHUTDOWN; + WRITE_ONCE(vsk->peer_shutdown, + READ_ONCE(vsk->peer_shutdown) | SEND_SHUTDOWN); ret = 0; break; default: /* -1 */ diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index df3b418e0392..b143290a311d 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -417,7 +417,7 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk, static bool virtio_transport_inc_rx_pkt(struct virtio_vsock_sock *vvs, u32 len) { - u64 skb_overhead = (skb_queue_len(&vvs->rx_queue) + 1) * SKB_TRUESIZE(0); + u64 skb_overhead = ((u64)skb_queue_len(&vvs->rx_queue) + 1) * SKB_TRUESIZE(0); /* Allow at most buf_alloc * 2 total budget (payload + overhead), * similar to how SO_RCVBUF is doubled to reserve space for sk_buff @@ -1228,7 +1228,7 @@ static void virtio_transport_do_close(struct vsock_sock *vsk, struct sock *sk = sk_vsock(vsk); sock_set_flag(sk, SOCK_DONE); - vsk->peer_shutdown = SHUTDOWN_MASK; + WRITE_ONCE(vsk->peer_shutdown, SHUTDOWN_MASK); if (vsock_stream_has_data(vsk) <= 0) sk->sk_state = TCP_CLOSING; sk->sk_state_change(sk); @@ -1431,12 +1431,15 @@ virtio_transport_recv_connected(struct sock *sk, case VIRTIO_VSOCK_OP_CREDIT_UPDATE: sk->sk_write_space(sk); break; - case VIRTIO_VSOCK_OP_SHUTDOWN: + case VIRTIO_VSOCK_OP_SHUTDOWN: { + u32 peer_shutdown = READ_ONCE(vsk->peer_shutdown); + if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SHUTDOWN_RCV) - vsk->peer_shutdown |= RCV_SHUTDOWN; + peer_shutdown |= RCV_SHUTDOWN; if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SHUTDOWN_SEND) - vsk->peer_shutdown |= SEND_SHUTDOWN; - if (vsk->peer_shutdown == SHUTDOWN_MASK) { + peer_shutdown |= SEND_SHUTDOWN; + WRITE_ONCE(vsk->peer_shutdown, peer_shutdown); + if (peer_shutdown == SHUTDOWN_MASK) { if (vsock_stream_has_data(vsk) <= 0 && !sock_flag(sk, SOCK_DONE)) { (void)virtio_transport_reset(vsk, NULL); virtio_transport_do_close(vsk, true); @@ -1451,6 +1454,7 @@ virtio_transport_recv_connected(struct sock *sk, if (le32_to_cpu(virtio_vsock_hdr(skb)->flags)) sk->sk_state_change(sk); break; + } case VIRTIO_VSOCK_OP_RST: virtio_transport_do_close(vsk, true); break; diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c index d2579380f51e..5c1ecd5bfdbc 100644 --- a/net/vmw_vsock/vmci_transport.c +++ b/net/vmw_vsock/vmci_transport.c @@ -819,7 +819,7 @@ static void vmci_transport_handle_detach(struct sock *sk) /* On a detach the peer will not be sending or receiving * anymore. */ - vsk->peer_shutdown = SHUTDOWN_MASK; + WRITE_ONCE(vsk->peer_shutdown, SHUTDOWN_MASK); /* We should not be sending anymore since the peer won't be * there to receive, but we can still receive if there is data @@ -1542,7 +1542,9 @@ static int vmci_transport_recv_connected(struct sock *sk, if (pkt->u.mode) { vsk = vsock_sk(sk); - vsk->peer_shutdown |= pkt->u.mode; + WRITE_ONCE(vsk->peer_shutdown, + READ_ONCE(vsk->peer_shutdown) | + pkt->u.mode); sk->sk_state_change(sk); } break; @@ -1559,7 +1561,7 @@ static int vmci_transport_recv_connected(struct sock *sk, * a clean shutdown. */ sock_set_flag(sk, SOCK_DONE); - vsk->peer_shutdown = SHUTDOWN_MASK; + WRITE_ONCE(vsk->peer_shutdown, SHUTDOWN_MASK); if (vsock_stream_has_data(vsk) <= 0) sk->sk_state = TCP_CLOSING; diff --git a/tools/testing/selftests/net/link_netns.py b/tools/testing/selftests/net/link_netns.py index aab043c59d69..6d1f863b6262 100755 --- a/tools/testing/selftests/net/link_netns.py +++ b/tools/testing/selftests/net/link_netns.py @@ -3,13 +3,14 @@ import time -from lib.py import ksft_run, ksft_exit, ksft_true +from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_true from lib.py import ip from lib.py import NetNS, NetNSEnter from lib.py import RtnlFamily LINK_NETNSID = 100 +LINK_NETNSID2 = 200 def test_event() -> None: @@ -32,6 +33,57 @@ def test_event() -> None: "Received unexpected link notification") +def test_event_all_nsid() -> None: + """NETLINK_LISTEN_ALL_NSID notifications: local events must not + carry nsid even with a self-referential mapping. Remote events + must carry the correct nsid.""" + + with NetNS() as ns1, NetNS() as ns2: + net1, net2 = str(ns1), str(ns2) + + with NetNSEnter(net1): + rtnl = RtnlFamily() + rtnl.ntf_listen_all_nsid() + rtnl.ntf_subscribe("rtnlgrp-link") + + # Case 1: no nsid assigned, local event, no nsid expected. + ip("link add dummy-lo type dummy", ns=net1) + + # Case 2: self-referential nsid, local event, still no nsid. + ip(f"netns set {net1} {LINK_NETNSID}", ns=net1) + ip("link add dummy-sr type dummy", ns=net1) + + # Case 3: remote event, nsid present. + ip(f"netns set {net2} {LINK_NETNSID2}", ns=net1) + ip("link add dummy-re type dummy", ns=net2) + + # Collect the three newlink events, ignoring unrelated noise. + events = {} + for msg in rtnl.poll_ntf(duration=1): + if msg['name'] == 'getlink': + ifname = msg['msg'].get('ifname') + if ifname in ('dummy-lo', 'dummy-sr', 'dummy-re'): + events[ifname] = msg + if len(events) == 3: + break + + ksft_true('dummy-lo' in events, "missing local event") + ksft_true(events['dummy-lo'].get('nsid') is None, + "local event without nsid should not carry nsid") + + ksft_true('dummy-sr' in events, "missing self-ref event") + ksft_true(events['dummy-sr'].get('nsid') is None, + "local event with self-ref nsid should not carry nsid") + + ksft_true('dummy-re' in events, "missing remote event") + ksft_eq(events['dummy-re'].get('nsid'), LINK_NETNSID2, + "remote event should carry nsid") + + ip("link del dummy-lo", ns=net1) + ip("link del dummy-sr", ns=net1) + ip("link del dummy-re", ns=net2) + + def validate_link_netns(netns, ifname, link_netnsid) -> bool: link_info = ip(f"-d link show dev {ifname}", ns=netns, json=True) if not link_info: @@ -133,7 +185,12 @@ def test_peer_net() -> None: def main() -> None: - ksft_run([test_event, test_link_net, test_peer_net]) + ksft_run([ + test_event, + test_event_all_nsid, + test_link_net, + test_peer_net, + ]) ksft_exit() diff --git a/tools/testing/selftests/net/netfilter/Makefile b/tools/testing/selftests/net/netfilter/Makefile index ee2d1a5254f8..d953ee218c0f 100644 --- a/tools/testing/selftests/net/netfilter/Makefile +++ b/tools/testing/selftests/net/netfilter/Makefile @@ -26,6 +26,7 @@ TEST_PROGS := \ nft_concat_range.sh \ nft_conntrack_helper.sh \ nft_fib.sh \ + nft_fib_nexthop.sh \ nft_flowtable.sh \ nft_interface_stress.sh \ nft_meta.sh \ diff --git a/tools/testing/selftests/net/netfilter/nft_fib_nexthop.sh b/tools/testing/selftests/net/netfilter/nft_fib_nexthop.sh new file mode 100755 index 000000000000..c4f203057382 --- /dev/null +++ b/tools/testing/selftests/net/netfilter/nft_fib_nexthop.sh @@ -0,0 +1,152 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# shellcheck disable=SC2154 +# +# Exercise nft_fib6_eval()'s sibling/nh enumeration on three route shapes: +# 1) route via a single external nexthop (nhid) +# 2) route via an external nexthop group (nhid -> group, two members) +# 3) route via old-style multipath (nexthop ... nexthop ...) +# +# In each scenario the route's nexthop set contains veth0 (the iif of the +# test packet). nft_fib6_info_nh_uses_dev() must walk the set and report +# veth0 as a valid oif. For (2) and (3) the matching nexthop is the second +# member, so the walk has to traverse beyond the primary nh. +# +# After sending $PKTS ICMPv6 echo requests from ns1, check two counters on +# nsrouter: +# nf_ok -- `fib daddr . iif oif eq "veth0"` must equal $PKTS +# nf_bad -- `fib daddr . iif oif missing` must stay at 0 +# Both rules also match on iif veth0 and ip6 daddr dead:dead::/64 so that +# kernel-generated ND/MLD/RA traffic cannot pollute the counters. +# +# Topology similar to nft_fib.sh, without ns2; two dummy interfaces on +# nsrouter host extra nh devices: +# +# dead:1::99 dead:1::1 +# ns1 <----veth----> nsrouter --- dummy0 dead:2::1 +# \-- dummy1 dead:9::1 + +source lib.sh + +ret=0 +PKTS=3 + +checktool "nft --version" "run test without nft" +checktool "ip -V" "run test without iproute2" + +setup_ns nsrouter ns1 +trap cleanup_all_ns EXIT + +if ! ip link add veth0 netns "$nsrouter" type veth peer name eth0 netns "$ns1" \ + > /dev/null 2>&1; then + echo "SKIP: No virtual ethernet pair device support in kernel" + exit $ksft_skip +fi + +ip -net "$ns1" link set lo up +ip -net "$ns1" link set eth0 up +ip -net "$ns1" -6 addr add dead:1::99/64 dev eth0 nodad +ip -net "$ns1" -6 route add default via dead:1::1 + +ip -net "$nsrouter" link set lo up +ip -net "$nsrouter" link set veth0 up +ip -net "$nsrouter" -6 addr add dead:1::1/64 dev veth0 nodad + +if ! ip -net "$nsrouter" link add dummy0 type dummy 2>/dev/null; then + echo "SKIP: dummy netdev not available" + exit $ksft_skip +fi +ip -net "$nsrouter" link set dummy0 up +ip -net "$nsrouter" -6 addr add dead:2::1/64 dev dummy0 nodad + +ip -net "$nsrouter" link add dummy1 type dummy +ip -net "$nsrouter" link set dummy1 up +ip -net "$nsrouter" -6 addr add dead:9::1/64 dev dummy1 nodad + +ip netns exec "$nsrouter" sysctl -q net.ipv6.conf.all.forwarding=1 + +load_fib_rule() { + # filter on iif + daddr so the counters only see our test packets + ip netns exec "$nsrouter" nft -f /dev/stdin <&2 + ip netns exec "$nsrouter" nft list counter ip6 t "$counter" 1>&2 +} + +run_scenario() { + local what="$1"; shift + # counter output format is "packets PACKET_NUM bytes BYTES_NUM"; + # we only care about the packet count + local expect_ok="packets $PKTS bytes" + local expect_bad="packets 0 bytes" + local lret=0 + + # reset route + nexthop state between scenarios + ip -net "$nsrouter" -6 route del dead:dead::/64 > /dev/null 2>&1 || true + ip -net "$nsrouter" nexthop flush > /dev/null 2>&1 || true + + # run the scenario function passed by the caller + "$@" || echo "WARN ($what): scenario setup returned non-zero" + + load_fib_rule || { echo "FAIL ($what): nft load"; ret=1; return; } + + # ping a daddr inside dead:dead::/64 so fib has to walk the nh set + ip netns exec "$ns1" ping -6 -c "$PKTS" -i 0.1 -W 1 dead:dead::1 \ + > /dev/null 2>&1 || true + + # verify the packets went through the expected fib path + if ! ip netns exec "$nsrouter" nft list counter ip6 t nf_ok | grep -q "$expect_ok"; then + bad_counter nf_ok "$expect_ok" "$what" + lret=1 + fi + if ! ip netns exec "$nsrouter" nft list counter ip6 t nf_bad | grep -q "$expect_bad"; then + bad_counter nf_bad "$expect_bad" "$what" + lret=1 + fi + + if [ $lret -eq 0 ]; then + echo "PASS: $what" + else + ret=1 + fi +} + +scenario_single_nh() { + ip -net "$nsrouter" nexthop add id 1 via dead:1::99 dev veth0 + ip -net "$nsrouter" -6 route add dead:dead::/64 nhid 1 +} +run_scenario "single external nexthop (nhid -> veth0)" scenario_single_nh + +scenario_nh_group() { + ip -net "$nsrouter" nexthop add id 1 via dead:2::2 dev dummy0 + ip -net "$nsrouter" nexthop add id 2 via dead:1::99 dev veth0 + ip -net "$nsrouter" nexthop add id 100 group 1/2 + ip -net "$nsrouter" -6 route add dead:dead::/64 nhid 100 +} +run_scenario "nexthop group (dummy0 + veth0)" scenario_nh_group + +scenario_old_multipath() { + ip -net "$nsrouter" -6 route add dead:dead::/64 \ + nexthop via dead:2::2 dev dummy0 \ + nexthop via dead:1::99 dev veth0 +} +run_scenario "old-style multipath (sibling on veth0)" scenario_old_multipath + +exit $ret diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh index c499953d4885..ace3a99023ed 100755 --- a/tools/testing/selftests/net/rtnetlink.sh +++ b/tools/testing/selftests/net/rtnetlink.sh @@ -24,6 +24,8 @@ ALL_TESTS=" kci_test_macsec kci_test_macsec_vlan kci_test_team_bridge_macvlan + kci_test_bridge_promisc_netlink + kci_test_bridge_promisc_sysfs kci_test_ipsec kci_test_ipsec_offload kci_test_fdb_get @@ -61,6 +63,14 @@ check_fail() fi } +sysfs_write() +{ + local val="$1" + local path="$2" + + echo "$val" > "$path" +} + run_cmd_common() { local cmd="$*" @@ -680,6 +690,59 @@ kci_test_team_bridge_macvlan() end_test "PASS: team_bridge_macvlan" } +# Test that changing bridge port flags via the netlink path does not sleep with +# the bridge spin lock held. +kci_test_bridge_promisc_netlink() +{ + local dummy="test_dummy1" + local bridge="test_br1" + local team="test_team1" + local ret=0 + + run_cmd ip link add $team up type team + run_cmd ip link add $bridge up type bridge vlan_filtering 1 + run_cmd ip link add $dummy up type dummy + run_cmd ip link set $dummy master $bridge + run_cmd ip link set $team master $bridge + + # This causes the bridge driver to sync all the static FDB entries to + # the team device (which supports unicast filtering) and remove it from + # promiscuous mode. The call to dev_set_promiscuity() can sleep due to + # Rx mode inlining, which is a problem if the bridge spin lock is held. + run_cmd bridge link set dev $dummy flood off learning off + + run_cmd ip link del $dummy + run_cmd ip link del $bridge + run_cmd ip link del $team + + end_test "PASS: bridge_promisc_netlink" +} + +# Same as kci_test_bridge_promisc_netlink(), but the flags are changed via the +# sysfs path. +kci_test_bridge_promisc_sysfs() +{ + local dummy="test_dummy1" + local bridge="test_br1" + local team="test_team1" + local ret=0 + + run_cmd ip link add $team up type team + run_cmd ip link add $bridge up type bridge vlan_filtering 1 + run_cmd ip link add $dummy up type dummy + run_cmd ip link set $dummy master $bridge + run_cmd ip link set $team master $bridge + + run_cmd sysfs_write 0 /sys/class/net/$dummy/brport/unicast_flood + run_cmd sysfs_write 0 /sys/class/net/$dummy/brport/learning + + run_cmd ip link del $dummy + run_cmd ip link del $bridge + run_cmd ip link del $team + + end_test "PASS: bridge_promisc_sysfs" +} + #------------------------------------------------------------------- # Example commands # ip x s add proto esp src 14.0.0.52 dst 14.0.0.70 \ diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json b/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json index b056eb966871..d0cad6571691 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json @@ -1144,6 +1144,620 @@ "teardown": [ "$TC qdisc del dev $DUMMY clsact" ] + }, + { + "id": "531c", + "name": "Redirect multiport: dummy egress -> dev1 ingress -> dummy egress (Loop)", + "category": [ + "filter", + "mirred" + ], + "plugins": { + "requires": [ + "nsPlugin" + ] + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.10.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY clsact", + "$TC filter add dev $DUMMY egress protocol ip prio 10 matchall action mirred ingress redirect dev $DEV1 index 1", + "$TC qdisc add dev $DEV1 clsact", + "$TC filter add dev $DEV1 ingress protocol ip prio 10 matchall action mirred egress redirect dev $DUMMY index 2" + ], + "cmdUnderTest": "ping -c1 -W0.01 -I $DUMMY 10.10.10.1", + "expExitCode": "1", + "verifyCmd": "$TC -j -s actions get action mirred index 1", + "matchJSON": [ + { + "total acts": 0 + }, + { + "actions": [ + { + "order": 1, + "kind": "mirred", + "mirred_action": "redirect", + "direction": "ingress", + "index": 1, + "stats": { + "packets": 3 + }, + "not_in_hw": true + } + ] + } + ], + "teardown": [ + "$TC qdisc del dev $DUMMY clsact", + "$TC qdisc del dev $DEV1 clsact" + ] + }, + { + "id": "b1d7", + "name": "Redirect singleport: dev1 ingress -> dev1 egress -> dev1 ingress (Loop)", + "category": [ + "filter", + "mirred" + ], + "plugins": { + "requires": [ + "nsPlugin", + "scapyPlugin" + ] + }, + "setup": [ + "$TC qdisc add dev $DEV1 clsact", + "$TC filter add dev $DEV1 ingress protocol ip prio 10 matchall action mirred egress redirect dev $DEV1 index 1" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 egress protocol ip prio 11 matchall action mirred ingress redirect dev $DEV1 index 2", + "scapy": [ + { + "iface": "$DEV0", + "count": 1, + "packet": "Ether()/IP(dst='10.10.10.1', src='10.10.10.10')/ICMP()" + } + ], + "expExitCode": "0", + "verifyCmd": "$TC -j -s actions get action mirred index 1", + "matchJSON": [ + { + "total acts": 0 + }, + { + "actions": [ + { + "order": 1, + "kind": "mirred", + "mirred_action": "redirect", + "direction": "egress", + "index": 1, + "stats": { + "packets": 3 + }, + "not_in_hw": true + } + ] + } + ], + "teardown": [ + "$TC qdisc del dev $DEV1 clsact" + ] + }, + { + "id": "c66d", + "name": "Redirect multiport: dev1 ingress -> dummy ingress -> dev1 egress (No Loop)", + "category": [ + "filter", + "mirred" + ], + "plugins": { + "requires": [ + "nsPlugin", + "scapyPlugin" + ] + }, + "setup": [ + "$TC qdisc add dev $DEV1 clsact", + "$TC filter add dev $DEV1 ingress protocol ip prio 10 matchall action mirred ingress redirect dev $DUMMY index 1", + "$TC qdisc add dev $DUMMY clsact" + ], + "cmdUnderTest": "$TC filter add dev $DUMMY ingress protocol ip prio 11 matchall action mirred egress redirect dev $DEV1 index 2", + "scapy": [ + { + "iface": "$DEV0", + "count": 1, + "packet": "Ether()/IP(dst='10.10.10.1', src='10.10.10.10')/ICMP()" + } + ], + "expExitCode": "0", + "verifyCmd": "$TC -j -s actions get action mirred index 1", + "matchJSON": [ + { + "total acts": 0 + }, + { + "actions": [ + { + "order": 1, + "kind": "mirred", + "mirred_action": "redirect", + "direction": "ingress", + "index": 1, + "stats": { + "packets": 1 + }, + "not_in_hw": true + } + ] + } + ], + "teardown": [ + "$TC qdisc del dev $DEV1 clsact", + "$TC qdisc del dev $DUMMY clsact" + ] + }, + { + "id": "aa99", + "name": "Redirect multiport: dev1 ingress -> dummy ingress -> dev1 ingress (Loop)", + "category": [ + "filter", + "mirred" + ], + "plugins": { + "requires": [ + "nsPlugin", + "scapyPlugin" + ] + }, + "setup": [ + "$TC qdisc add dev $DEV1 clsact", + "$TC filter add dev $DEV1 ingress protocol ip prio 10 matchall action mirred ingress redirect dev $DUMMY index 1", + "$TC qdisc add dev $DUMMY clsact" + ], + "cmdUnderTest": "$TC filter add dev $DUMMY ingress protocol ip prio 11 matchall action mirred ingress redirect dev $DEV1 index 2", + "scapy": [ + { + "iface": "$DEV0", + "count": 1, + "packet": "Ether()/IP(dst='10.10.10.1', src='10.10.10.10')/ICMP()" + } + ], + "expExitCode": "0", + "verifyCmd": "$TC -j -s actions get action mirred index 1", + "matchJSON": [ + { + "total acts": 0 + }, + { + "actions": [ + { + "order": 1, + "kind": "mirred", + "mirred_action": "redirect", + "direction": "ingress", + "index": 1, + "stats": { + "packets": 2, + "overlimits": 1 + }, + "not_in_hw": true + } + ] + } + ], + "teardown": [ + "$TC qdisc del dev $DEV1 clsact", + "$TC qdisc del dev $DUMMY clsact" + ] + }, + { + "id": "37d7", + "name": "Redirect multiport: dev1 ingress -> dummy egress -> dev1 ingress (Loop)", + "category": [ + "filter", + "mirred" + ], + "plugins": { + "requires": [ + "nsPlugin", + "scapyPlugin" + ] + }, + "setup": [ + "$TC qdisc add dev $DEV1 clsact", + "$TC filter add dev $DEV1 ingress protocol ip prio 10 matchall action mirred egress redirect dev $DUMMY index 1", + "$TC qdisc add dev $DUMMY clsact" + ], + "cmdUnderTest": "$TC filter add dev $DUMMY egress protocol ip prio 11 matchall action mirred ingress redirect dev $DEV1 index 2", + "scapy": [ + { + "iface": "$DEV0", + "count": 1, + "packet": "Ether()/IP(dst='10.10.10.1', src='10.10.10.10')/ICMP()" + } + ], + "expExitCode": "0", + "verifyCmd": "$TC -j -s actions get action mirred index 1", + "matchJSON": [ + { + "total acts": 0 + }, + { + "actions": [ + { + "order": 1, + "kind": "mirred", + "mirred_action": "redirect", + "direction": "egress", + "index": 1, + "stats": { + "packets": 3 + }, + "not_in_hw": true + } + ] + } + ], + "teardown": [ + "$TC qdisc del dev $DEV1 clsact", + "$TC qdisc del dev $DUMMY clsact" + ] + }, + { + "id": "6d02", + "name": "Redirect multiport: dummy egress -> dev1 ingress -> dummy egress, different prios (Loop)", + "category": [ + "filter", + "mirred" + ], + "plugins": { + "requires": [ + "nsPlugin" + ] + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.10.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY clsact", + "$TC filter add dev $DUMMY egress protocol ip prio 10 matchall action mirred ingress redirect dev $DEV1 index 1", + "$TC qdisc add dev $DEV1 clsact", + "$TC filter add dev $DEV1 ingress protocol ip prio 11 matchall action mirred egress redirect dev $DUMMY index 2" + ], + "cmdUnderTest": "ping -c1 -W0.01 -I $DUMMY 10.10.10.1", + "expExitCode": "1", + "verifyCmd": "$TC -j -s actions get action mirred index 1", + "matchJSON": [ + { + "total acts": 0 + }, + { + "actions": [ + { + "order": 1, + "kind": "mirred", + "mirred_action": "redirect", + "direction": "ingress", + "index": 1, + "stats": { + "packets": 3 + }, + "not_in_hw": true + } + ] + } + ], + "teardown": [ + "$TC qdisc del dev $DUMMY clsact", + "$TC qdisc del dev $DEV1 clsact" + ] + }, + { + "id": "8115", + "name": "Redirect multiport: dev1 ingress -> dummy ingress -> dummy egress -> dev1 egress (No Loop)", + "category": [ + "filter", + "mirred" + ], + "plugins": { + "requires": [ + "nsPlugin", + "scapyPlugin" + ] + }, + "setup": [ + "$TC qdisc add dev $DEV1 clsact", + "$TC filter add dev $DEV1 ingress protocol ip prio 10 matchall action mirred ingress redirect dev $DUMMY index 1", + "$TC qdisc add dev $DUMMY clsact", + "$TC filter add dev $DUMMY ingress protocol ip prio 11 matchall action mirred egress redirect dev $DUMMY index 2" + ], + "cmdUnderTest": "$TC filter add dev $DUMMY egress protocol ip prio 12 matchall action mirred egress redirect dev $DEV1 index 3", + "scapy": [ + { + "iface": "$DEV0", + "count": 1, + "packet": "Ether()/IP(dst='10.10.10.1', src='10.10.10.10')/ICMP()" + } + ], + "expExitCode": "0", + "verifyCmd": "$TC -j -s actions get action mirred index 1", + "matchJSON": [ + { + "total acts": 0 + }, + { + "actions": [ + { + "order": 1, + "kind": "mirred", + "mirred_action": "redirect", + "direction": "ingress", + "index": 1, + "stats": { + "packets": 1 + }, + "not_in_hw": true + } + ] + } + ], + "teardown": [ + "$TC qdisc del dev $DEV1 clsact", + "$TC qdisc del dev $DUMMY clsact" + ] + }, + { + "id": "9eb3", + "name": "Redirect multiport: dev1 ingress -> dummy egress -> dev1 egress (No Loop)", + "category": [ + "filter", + "mirred" + ], + "plugins": { + "requires": [ + "nsPlugin", + "scapyPlugin" + ] + }, + "setup": [ + "$TC qdisc add dev $DEV1 clsact", + "$TC filter add dev $DEV1 ingress protocol ip prio 10 matchall action mirred egress redirect dev $DUMMY index 1", + "$TC qdisc add dev $DUMMY clsact" + ], + "cmdUnderTest": "$TC filter add dev $DUMMY egress protocol ip prio 11 matchall action mirred egress redirect dev $DEV1 index 2", + "scapy": [ + { + "iface": "$DEV0", + "count": 1, + "packet": "Ether()/IP(dst='10.10.10.1', src='10.10.10.10')/ICMP()" + } + ], + "expExitCode": "0", + "verifyCmd": "$TC -j -s actions get action mirred index 1", + "matchJSON": [ + { + "total acts": 0 + }, + { + "actions": [ + { + "order": 1, + "kind": "mirred", + "mirred_action": "redirect", + "direction": "egress", + "index": 1, + "stats": { + "packets": 1 + }, + "not_in_hw": true + } + ] + } + ], + "teardown": [ + "$TC qdisc del dev $DEV1 clsact", + "$TC qdisc del dev $DUMMY clsact" + ] + }, + { + "id": "d837", + "name": "Redirect multiport: dev1 ingress -> dummy egress -> dummy ingress (No Loop)", + "category": [ + "filter", + "mirred" + ], + "plugins": { + "requires": [ + "nsPlugin", + "scapyPlugin" + ] + }, + "setup": [ + "$TC qdisc add dev $DEV1 clsact", + "$TC filter add dev $DEV1 ingress protocol ip prio 10 matchall action mirred egress redirect dev $DUMMY index 1", + "$TC qdisc add dev $DUMMY clsact" + ], + "cmdUnderTest": "$TC filter add dev $DUMMY egress protocol ip prio 11 matchall action mirred ingress redirect dev $DUMMY index 2", + "scapy": [ + { + "iface": "$DEV0", + "count": 1, + "packet": "Ether()/IP(dst='10.10.10.1', src='10.10.10.10')/ICMP()" + } + ], + "expExitCode": "0", + "verifyCmd": "$TC -j -s actions get action mirred index 1", + "matchJSON": [ + { + "total acts": 0 + }, + { + "actions": [ + { + "order": 1, + "kind": "mirred", + "mirred_action": "redirect", + "direction": "egress", + "index": 1, + "stats": { + "packets": 1 + }, + "not_in_hw": true + } + ] + } + ], + "teardown": [ + "$TC qdisc del dev $DEV1 clsact", + "$TC qdisc del dev $DUMMY clsact" + ] + }, + { + "id": "2071", + "name": "Redirect singleport: dev1 ingress -> dev1 ingress (Loop)", + "category": [ + "filter", + "mirred" + ], + "plugins": { + "requires": [ + "nsPlugin", + "scapyPlugin" + ] + }, + "setup": [ + "$TC qdisc add dev $DEV1 clsact" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 ingress protocol ip prio 10 matchall action mirred ingress redirect dev $DEV1 index 1", + "scapy": [ + { + "iface": "$DEV0", + "count": 1, + "packet": "Ether()/IP(dst='10.10.10.1', src='10.10.10.10')/ICMP()" + } + ], + "expExitCode": "0", + "verifyCmd": "$TC -j -s actions get action mirred index 1", + "matchJSON": [ + { + "total acts": 0 + }, + { + "actions": [ + { + "order": 1, + "kind": "mirred", + "mirred_action": "redirect", + "direction": "ingress", + "index": 1, + "stats": { + "packets": 1, + "overlimits": 1 + }, + "not_in_hw": true + } + ] + } + ], + "teardown": [ + "$TC qdisc del dev $DEV1 clsact" + ] + }, + { + "id": "0101", + "name": "Redirect singleport: dummy egress -> dummy ingress (No Loop)", + "category": [ + "filter", + "mirred" + ], + "plugins": { + "requires": [ + "nsPlugin" + ] + }, + "setup": [ + "$IP addr add 10.10.10.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY clsact", + "$TC filter add dev $DUMMY egress protocol ip prio 11 matchall action mirred ingress redirect dev $DUMMY index 1" + ], + "cmdUnderTest": "ping -c1 -W0.01 -I $DUMMY 10.10.10.1", + "expExitCode": "1", + "verifyCmd": "$TC -j -s actions get action mirred index 1", + "matchJSON": [ + { + "total acts": 0 + }, + { + "actions": [ + { + "order": 1, + "kind": "mirred", + "mirred_action": "redirect", + "direction": "ingress", + "index": 1, + "stats": { + "packets": 1 + }, + "not_in_hw": true + } + ] + } + ], + "teardown": [ + "$TC qdisc del dev $DUMMY clsact" + ] + }, + { + "id": "cf97", + "name": "Redirect multiport: dev1 ingress -> dummy ingress -> dummy egress (No Loop)", + "category": [ + "filter", + "mirred" + ], + "plugins": { + "requires": [ + "nsPlugin", + "scapyPlugin" + ] + }, + "setup": [ + "$TC qdisc add dev $DEV1 clsact", + "$TC filter add dev $DEV1 ingress protocol ip prio 10 matchall action mirred ingress redirect dev $DUMMY index 1", + "$TC qdisc add dev $DUMMY clsact" + ], + "cmdUnderTest": "$TC filter add dev $DUMMY ingress protocol ip prio 11 matchall action mirred egress redirect dev $DUMMY index 2", + "scapy": [ + { + "iface": "$DEV0", + "count": 1, + "packet": "Ether()/IP(dst='10.10.10.1', src='10.10.10.10')/ICMP()" + } + ], + "expExitCode": "0", + "verifyCmd": "$TC -j -s actions get action mirred index 1", + "matchJSON": [ + { + "total acts": 0 + }, + { + "actions": [ + { + "order": 1, + "kind": "mirred", + "mirred_action": "redirect", + "direction": "ingress", + "index": 1, + "stats": { + "packets": 1 + }, + "not_in_hw": true + } + ] + } + ], + "teardown": [ + "$TC qdisc del dev $DEV1 clsact", + "$TC qdisc del dev $DUMMY clsact" + ] } - ] diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json index 848696c373fc..82c38a13dfbf 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json +++ b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json @@ -702,6 +702,7 @@ "$TC qdisc add dev $DUMMY parent 1:1 handle 2:0 netem duplicate 100%", "$TC filter add dev $DUMMY parent 1:0 protocol ip prio 1 u32 match ip dst 10.10.10.1/32 flowid 1:1", "$TC class add dev $DUMMY parent 1:0 classid 1:2 hfsc ls m2 10Mbit", + "$TC qdisc add dev $DUMMY parent 1:2 handle 3:0 netem duplicate 100%", "$TC filter add dev $DUMMY parent 1:0 protocol ip prio 2 u32 match ip dst 10.10.10.2/32 flowid 1:2", "ping -c 1 10.10.10.1 -I$DUMMY > /dev/null || true", "$TC filter del dev $DUMMY parent 1:0 protocol ip prio 1", @@ -714,8 +715,8 @@ { "kind": "hfsc", "handle": "1:", - "bytes": 294, - "packets": 3 + "bytes": 392, + "packets": 4 } ], "matchCount": "1", diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/netem.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/netem.json index 718d2df2aafa..472b672a600d 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/netem.json +++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/netem.json @@ -338,84 +338,34 @@ ] }, { - "id": "d34d", - "name": "NETEM test qdisc duplication restriction in qdisc tree in netem_change root", - "category": ["qdisc", "netem"], + "id": "8c17", + "name": "Test netem's recursive duplicate", + "category": [ + "qdisc", + "netem" + ], "plugins": { "requires": "nsPlugin" }, "setup": [ - "$TC qdisc add dev $DUMMY root handle 1: netem limit 1", - "$TC qdisc add dev $DUMMY parent 1: handle 2: netem limit 1" + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.11.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY root handle 1: netem limit 1000 duplicate 100%", + "$TC qdisc add dev $DUMMY parent 1: handle 2: netem limit 1000 duplicate 100%" ], - "cmdUnderTest": "$TC qdisc change dev $DUMMY handle 1: netem duplicate 50%", - "expExitCode": "2", - "verifyCmd": "$TC -s qdisc show dev $DUMMY", - "matchPattern": "qdisc netem", - "matchCount": "2", - "teardown": [ - "$TC qdisc del dev $DUMMY handle 1:0 root" - ] - }, - { - "id": "b33f", - "name": "NETEM test qdisc duplication restriction in qdisc tree in netem_change non-root", - "category": ["qdisc", "netem"], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$TC qdisc add dev $DUMMY root handle 1: netem limit 1", - "$TC qdisc add dev $DUMMY parent 1: handle 2: netem limit 1" + "cmdUnderTest": "ping -c 1 10.10.11.11 -W 0.01", + "expExitCode": "1", + "verifyCmd": "$TC -s -j qdisc ls dev $DUMMY root", + "matchJSON": [ + { + "kind": "netem", + "handle": "1:", + "bytes": 294, + "packets": 3 + } ], - "cmdUnderTest": "$TC qdisc change dev $DUMMY handle 2: netem duplicate 50%", - "expExitCode": "2", - "verifyCmd": "$TC -s qdisc show dev $DUMMY", - "matchPattern": "qdisc netem", - "matchCount": "2", "teardown": [ - "$TC qdisc del dev $DUMMY handle 1:0 root" + "$TC qdisc del dev $DUMMY handle 1: root" ] - }, - { - "id": "cafe", - "name": "NETEM test qdisc duplication restriction in qdisc tree", - "category": ["qdisc", "netem"], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$TC qdisc add dev $DUMMY root handle 1: netem limit 1 duplicate 100%" - ], - "cmdUnderTest": "$TC qdisc add dev $DUMMY parent 1: handle 2: netem duplicate 100%", - "expExitCode": "2", - "verifyCmd": "$TC -s qdisc show dev $DUMMY", - "matchPattern": "qdisc netem", - "matchCount": "1", - "teardown": [ - "$TC qdisc del dev $DUMMY handle 1:0 root" - ] - }, - { - "id": "1337", - "name": "NETEM test qdisc duplication restriction in qdisc tree across branches", - "category": ["qdisc", "netem"], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$TC qdisc add dev $DUMMY parent root handle 1:0 hfsc", - "$TC class add dev $DUMMY parent 1:0 classid 1:1 hfsc rt m2 10Mbit", - "$TC qdisc add dev $DUMMY parent 1:1 handle 2:0 netem", - "$TC class add dev $DUMMY parent 1:0 classid 1:2 hfsc rt m2 10Mbit" - ], - "cmdUnderTest": "$TC qdisc add dev $DUMMY parent 1:2 handle 3:0 netem duplicate 100%", - "expExitCode": "2", - "verifyCmd": "$TC -s qdisc show dev $DUMMY", - "matchPattern": "qdisc netem", - "matchCount": "1", - "teardown": [ - "$TC qdisc del dev $DUMMY handle 1:0 root" - ] - } + } ]