From 08193d1a893c802c4b807e4d522865061f4e9f4f Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Fri, 27 Jul 2018 15:26:55 +0300 Subject: [PATCH 1/8] net: dcb: For wild-card lookups, use priority -1, not 0 The function dcb_app_lookup walks the list of specified DCB APP entries, looking for one that matches a given criteria: ifindex, selector, protocol ID and optionally also priority. The "don't care" value for priority is set to 0, because that priority has not been allowed under CEE regime, which predates the IEEE standardization. Under IEEE, 0 is a valid priority number. But because dcb_app_lookup considers zero a wild card, attempts to add an APP entry with priority 0 fail when other entries exist for a given ifindex / selector / PID triplet. Fix by changing the wild-card value to -1. Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- net/dcb/dcbnl.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c index 2589a6b78aa1..013fdb6fa07a 100644 --- a/net/dcb/dcbnl.c +++ b/net/dcb/dcbnl.c @@ -1786,7 +1786,7 @@ static struct dcb_app_type *dcb_app_lookup(const struct dcb_app *app, if (itr->app.selector == app->selector && itr->app.protocol == app->protocol && itr->ifindex == ifindex && - (!prio || itr->app.priority == prio)) + ((prio == -1) || itr->app.priority == prio)) return itr; } @@ -1821,7 +1821,8 @@ u8 dcb_getapp(struct net_device *dev, struct dcb_app *app) u8 prio = 0; spin_lock_bh(&dcb_lock); - if ((itr = dcb_app_lookup(app, dev->ifindex, 0))) + itr = dcb_app_lookup(app, dev->ifindex, -1); + if (itr) prio = itr->app.priority; spin_unlock_bh(&dcb_lock); @@ -1849,7 +1850,8 @@ int dcb_setapp(struct net_device *dev, struct dcb_app *new) spin_lock_bh(&dcb_lock); /* Search for existing match and replace */ - if ((itr = dcb_app_lookup(new, dev->ifindex, 0))) { + itr = dcb_app_lookup(new, dev->ifindex, -1); + if (itr) { if (new->priority) itr->app.priority = new->priority; else { @@ -1882,7 +1884,8 @@ u8 dcb_ieee_getapp_mask(struct net_device *dev, struct dcb_app *app) u8 prio = 0; spin_lock_bh(&dcb_lock); - if ((itr = dcb_app_lookup(app, dev->ifindex, 0))) + itr = dcb_app_lookup(app, dev->ifindex, -1); + if (itr) prio |= 1 << itr->app.priority; spin_unlock_bh(&dcb_lock); From b67c540b8a987e365dc548e5b2ddf023946e3d63 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Fri, 27 Jul 2018 15:26:56 +0300 Subject: [PATCH 2/8] net: dcb: Add priority-to-DSCP map getters On ingress, a network device such as a switch assigns to packets priority based on various criteria. Common options include interpreting PCP and DSCP fields according to user configuration. When a packet egresses the switch, a reverse process may rewrite PCP and/or DSCP values according to packet priority. The following three functions support a) obtaining a DSCP-to-priority map or vice versa, and b) finding default-priority entries in APP database. The DCB subsystem supports for APP entries a very generous M:N mapping between priorities and protocol identifiers. Understandably, several (say) DSCP values can map to the same priority. But this asymmetry holds the other way around as well--one priority can map to several DSCP values. For this reason, the following functions operate in terms of bitmaps, with ones in positions that match some APP entry. - dcb_ieee_getapp_dscp_prio_mask_map() to compute for a given netdevice a map of DSCP-to-priority-mask, which gives for each DSCP value a bitmap of priorities related to that DSCP value by APP, along the lines of dcb_ieee_getapp_mask(). - dcb_ieee_getapp_prio_dscp_mask_map() similarly to compute for a given netdevice a map from priorities to a bitmap of DSCPs. - dcb_ieee_getapp_default_prio_mask() which finds all default-priority rules for a given port in APP database, and returns a mask of priorities allowed by these default-priority rules. Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- include/net/dcbnl.h | 13 +++++++ net/dcb/dcbnl.c | 86 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 99 insertions(+) diff --git a/include/net/dcbnl.h b/include/net/dcbnl.h index 0e5e91be2d30..e22a8a3c089b 100644 --- a/include/net/dcbnl.h +++ b/include/net/dcbnl.h @@ -34,6 +34,19 @@ int dcb_ieee_setapp(struct net_device *, struct dcb_app *); int dcb_ieee_delapp(struct net_device *, struct dcb_app *); u8 dcb_ieee_getapp_mask(struct net_device *, struct dcb_app *); +struct dcb_ieee_app_prio_map { + u64 map[IEEE_8021QAZ_MAX_TCS]; +}; +void dcb_ieee_getapp_prio_dscp_mask_map(const struct net_device *dev, + struct dcb_ieee_app_prio_map *p_map); + +struct dcb_ieee_app_dscp_map { + u8 map[64]; +}; +void dcb_ieee_getapp_dscp_prio_mask_map(const struct net_device *dev, + struct dcb_ieee_app_dscp_map *p_map); +u8 dcb_ieee_getapp_default_prio_mask(const struct net_device *dev); + int dcbnl_ieee_notify(struct net_device *dev, int event, int cmd, u32 seq, u32 pid); int dcbnl_cee_notify(struct net_device *dev, int event, int cmd, diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c index 013fdb6fa07a..a556cd708885 100644 --- a/net/dcb/dcbnl.c +++ b/net/dcb/dcbnl.c @@ -1958,6 +1958,92 @@ int dcb_ieee_delapp(struct net_device *dev, struct dcb_app *del) } EXPORT_SYMBOL(dcb_ieee_delapp); +/** + * dcb_ieee_getapp_prio_dscp_mask_map - For a given device, find mapping from + * priorities to the DSCP values assigned to that priority. Initialize p_map + * such that each map element holds a bit mask of DSCP values configured for + * that priority by APP entries. + */ +void dcb_ieee_getapp_prio_dscp_mask_map(const struct net_device *dev, + struct dcb_ieee_app_prio_map *p_map) +{ + int ifindex = dev->ifindex; + struct dcb_app_type *itr; + u8 prio; + + memset(p_map->map, 0, sizeof(p_map->map)); + + spin_lock_bh(&dcb_lock); + list_for_each_entry(itr, &dcb_app_list, list) { + if (itr->ifindex == ifindex && + itr->app.selector == IEEE_8021QAZ_APP_SEL_DSCP && + itr->app.protocol < 64 && + itr->app.priority < IEEE_8021QAZ_MAX_TCS) { + prio = itr->app.priority; + p_map->map[prio] |= 1ULL << itr->app.protocol; + } + } + spin_unlock_bh(&dcb_lock); +} +EXPORT_SYMBOL(dcb_ieee_getapp_prio_dscp_mask_map); + +/** + * dcb_ieee_getapp_dscp_prio_mask_map - For a given device, find mapping from + * DSCP values to the priorities assigned to that DSCP value. Initialize p_map + * such that each map element holds a bit mask of priorities configured for a + * given DSCP value by APP entries. + */ +void +dcb_ieee_getapp_dscp_prio_mask_map(const struct net_device *dev, + struct dcb_ieee_app_dscp_map *p_map) +{ + int ifindex = dev->ifindex; + struct dcb_app_type *itr; + + memset(p_map->map, 0, sizeof(p_map->map)); + + spin_lock_bh(&dcb_lock); + list_for_each_entry(itr, &dcb_app_list, list) { + if (itr->ifindex == ifindex && + itr->app.selector == IEEE_8021QAZ_APP_SEL_DSCP && + itr->app.protocol < 64 && + itr->app.priority < IEEE_8021QAZ_MAX_TCS) + p_map->map[itr->app.protocol] |= 1 << itr->app.priority; + } + spin_unlock_bh(&dcb_lock); +} +EXPORT_SYMBOL(dcb_ieee_getapp_dscp_prio_mask_map); + +/** + * Per 802.1Q-2014, the selector value of 1 is used for matching on Ethernet + * type, with valid PID values >= 1536. A special meaning is then assigned to + * protocol value of 0: "default priority. For use when priority is not + * otherwise specified". + * + * dcb_ieee_getapp_default_prio_mask - For a given device, find all APP entries + * of the form {$PRIO, ETHERTYPE, 0} and construct a bit mask of all default + * priorities set by these entries. + */ +u8 dcb_ieee_getapp_default_prio_mask(const struct net_device *dev) +{ + int ifindex = dev->ifindex; + struct dcb_app_type *itr; + u8 mask = 0; + + spin_lock_bh(&dcb_lock); + list_for_each_entry(itr, &dcb_app_list, list) { + if (itr->ifindex == ifindex && + itr->app.selector == IEEE_8021QAZ_APP_SEL_ETHERTYPE && + itr->app.protocol == 0 && + itr->app.priority < IEEE_8021QAZ_MAX_TCS) + mask |= 1 << itr->app.priority; + } + spin_unlock_bh(&dcb_lock); + + return mask; +} +EXPORT_SYMBOL(dcb_ieee_getapp_default_prio_mask); + static int __init dcbnl_init(void) { INIT_LIST_HEAD(&dcb_app_list); From 02837d726721cbc87629741e6b2570580ce47fae Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Fri, 27 Jul 2018 15:26:57 +0300 Subject: [PATCH 3/8] mlxsw: reg: Add QoS Port DSCP to Priority Mapping Register The QPDPM register controls the mapping from DSCP field to Switch Priority for IP packets. Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/reg.h | 52 +++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index fd2e3dd166d2..411d06b5aaae 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -3329,6 +3329,57 @@ static inline void mlxsw_reg_qeec_pack(char *payload, u8 local_port, mlxsw_reg_qeec_next_element_index_set(payload, next_index); } +/* QPDPM - QoS Port DSCP to Priority Mapping Register + * -------------------------------------------------- + * This register controls the mapping from DSCP field to + * Switch Priority for IP packets. + */ +#define MLXSW_REG_QPDPM_ID 0x4013 +#define MLXSW_REG_QPDPM_BASE_LEN 0x4 /* base length, without records */ +#define MLXSW_REG_QPDPM_DSCP_ENTRY_REC_LEN 0x2 /* record length */ +#define MLXSW_REG_QPDPM_DSCP_ENTRY_REC_MAX_COUNT 64 +#define MLXSW_REG_QPDPM_LEN (MLXSW_REG_QPDPM_BASE_LEN + \ + MLXSW_REG_QPDPM_DSCP_ENTRY_REC_LEN * \ + MLXSW_REG_QPDPM_DSCP_ENTRY_REC_MAX_COUNT) + +MLXSW_REG_DEFINE(qpdpm, MLXSW_REG_QPDPM_ID, MLXSW_REG_QPDPM_LEN); + +/* reg_qpdpm_local_port + * Local Port. Supported for data packets from CPU port. + * Access: Index + */ +MLXSW_ITEM32(reg, qpdpm, local_port, 0x00, 16, 8); + +/* reg_qpdpm_dscp_e + * Enable update of the specific entry. When cleared, the switch_prio and color + * fields are ignored and the previous switch_prio and color values are + * preserved. + * Access: WO + */ +MLXSW_ITEM16_INDEXED(reg, qpdpm, dscp_entry_e, MLXSW_REG_QPDPM_BASE_LEN, 15, 1, + MLXSW_REG_QPDPM_DSCP_ENTRY_REC_LEN, 0x00, false); + +/* reg_qpdpm_dscp_prio + * The new Switch Priority value for the relevant DSCP value. + * Access: RW + */ +MLXSW_ITEM16_INDEXED(reg, qpdpm, dscp_entry_prio, + MLXSW_REG_QPDPM_BASE_LEN, 0, 4, + MLXSW_REG_QPDPM_DSCP_ENTRY_REC_LEN, 0x00, false); + +static inline void mlxsw_reg_qpdpm_pack(char *payload, u8 local_port) +{ + MLXSW_REG_ZERO(qpdpm, payload); + mlxsw_reg_qpdpm_local_port_set(payload, local_port); +} + +static inline void +mlxsw_reg_qpdpm_dscp_pack(char *payload, unsigned short dscp, u8 prio) +{ + mlxsw_reg_qpdpm_dscp_entry_e_set(payload, dscp, 1); + mlxsw_reg_qpdpm_dscp_entry_prio_set(payload, dscp, prio); +} + /* PMLP - Ports Module to Local Port Register * ------------------------------------------ * Configures the assignment of modules to local ports. @@ -8542,6 +8593,7 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = { MLXSW_REG(qpcr), MLXSW_REG(qtct), MLXSW_REG(qeec), + MLXSW_REG(qpdpm), MLXSW_REG(pmlp), MLXSW_REG(pmtu), MLXSW_REG(ptys), From 746da42a1f60728fc0f3ba7818ffe8d1aa69cacd Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Fri, 27 Jul 2018 15:26:58 +0300 Subject: [PATCH 4/8] mlxsw: reg: Add QoS Priority Trust State Register The QPTS register controls the port policy to calculate the switch priority and packet color based on incoming packet fields. Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/reg.h | 39 +++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index 411d06b5aaae..c50e754dd725 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -3017,6 +3017,44 @@ static inline void mlxsw_reg_iedr_rec_pack(char *payload, int rec_index, mlxsw_reg_iedr_rec_index_start_set(payload, rec_index, rec_index_start); } +/* QPTS - QoS Priority Trust State Register + * ---------------------------------------- + * This register controls the port policy to calculate the switch priority and + * packet color based on incoming packet fields. + */ +#define MLXSW_REG_QPTS_ID 0x4002 +#define MLXSW_REG_QPTS_LEN 0x8 + +MLXSW_REG_DEFINE(qpts, MLXSW_REG_QPTS_ID, MLXSW_REG_QPTS_LEN); + +/* reg_qpts_local_port + * Local port number. + * Access: Index + * + * Note: CPU port is supported. + */ +MLXSW_ITEM32(reg, qpts, local_port, 0x00, 16, 8); + +enum mlxsw_reg_qpts_trust_state { + MLXSW_REG_QPTS_TRUST_STATE_PCP = 1, + MLXSW_REG_QPTS_TRUST_STATE_DSCP = 2, /* For MPLS, trust EXP. */ +}; + +/* reg_qpts_trust_state + * Trust state for a given port. + * Access: RW + */ +MLXSW_ITEM32(reg, qpts, trust_state, 0x04, 0, 3); + +static inline void mlxsw_reg_qpts_pack(char *payload, u8 local_port, + enum mlxsw_reg_qpts_trust_state ts) +{ + MLXSW_REG_ZERO(qpts, payload); + + mlxsw_reg_qpts_local_port_set(payload, local_port); + mlxsw_reg_qpts_trust_state_set(payload, ts); +} + /* QPCR - QoS Policer Configuration Register * ----------------------------------------- * The QPCR register is used to create policers - that limit @@ -8590,6 +8628,7 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = { MLXSW_REG(percr), MLXSW_REG(pererp), MLXSW_REG(iedr), + MLXSW_REG(qpts), MLXSW_REG(qpcr), MLXSW_REG(qtct), MLXSW_REG(qeec), From e67131d9b861eb753b077961e291fc21a59daa28 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Fri, 27 Jul 2018 15:26:59 +0300 Subject: [PATCH 5/8] mlxsw: reg: Add QoS ReWrite Enable Register This register configures the rewrite enable (whether PCP or DSCP value in packet should be updated according to packet priority) per receive port. Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/reg.h | 39 +++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index c50e754dd725..02c0e1531ed2 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -3367,6 +3367,44 @@ static inline void mlxsw_reg_qeec_pack(char *payload, u8 local_port, mlxsw_reg_qeec_next_element_index_set(payload, next_index); } +/* QRWE - QoS ReWrite Enable + * ------------------------- + * This register configures the rewrite enable per receive port. + */ +#define MLXSW_REG_QRWE_ID 0x400F +#define MLXSW_REG_QRWE_LEN 0x08 + +MLXSW_REG_DEFINE(qrwe, MLXSW_REG_QRWE_ID, MLXSW_REG_QRWE_LEN); + +/* reg_qrwe_local_port + * Local port number. + * Access: Index + * + * Note: CPU port is supported. No support for router port. + */ +MLXSW_ITEM32(reg, qrwe, local_port, 0x00, 16, 8); + +/* reg_qrwe_dscp + * Whether to enable DSCP rewrite (default is 0, don't rewrite). + * Access: RW + */ +MLXSW_ITEM32(reg, qrwe, dscp, 0x04, 1, 1); + +/* reg_qrwe_pcp + * Whether to enable PCP and DEI rewrite (default is 0, don't rewrite). + * Access: RW + */ +MLXSW_ITEM32(reg, qrwe, pcp, 0x04, 0, 1); + +static inline void mlxsw_reg_qrwe_pack(char *payload, u8 local_port, + bool rewrite_pcp, bool rewrite_dscp) +{ + MLXSW_REG_ZERO(qrwe, payload); + mlxsw_reg_qrwe_local_port_set(payload, local_port); + mlxsw_reg_qrwe_pcp_set(payload, rewrite_pcp); + mlxsw_reg_qrwe_dscp_set(payload, rewrite_dscp); +} + /* QPDPM - QoS Port DSCP to Priority Mapping Register * -------------------------------------------------- * This register controls the mapping from DSCP field to @@ -8632,6 +8670,7 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = { MLXSW_REG(qpcr), MLXSW_REG(qtct), MLXSW_REG(qeec), + MLXSW_REG(qrwe), MLXSW_REG(qpdpm), MLXSW_REG(pmlp), MLXSW_REG(pmtu), From 55fb71f481aac930ba87dc0f99a3060ced0326d3 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Fri, 27 Jul 2018 15:27:00 +0300 Subject: [PATCH 6/8] mlxsw: reg: Add QoS Priority to DSCP Mapping Register This register controls mapping from Priority to DSCP for purposes of rewrite. Note that rewrite happens as the packet is transmitted provided that the DSCP rewrite bit is enabled for the packet. Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/reg.h | 89 +++++++++++++++++++++++ 1 file changed, 89 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index 02c0e1531ed2..e52841627966 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -3405,6 +3405,94 @@ static inline void mlxsw_reg_qrwe_pack(char *payload, u8 local_port, mlxsw_reg_qrwe_dscp_set(payload, rewrite_dscp); } +/* QPDSM - QoS Priority to DSCP Mapping + * ------------------------------------ + * QoS Priority to DSCP Mapping Register + */ +#define MLXSW_REG_QPDSM_ID 0x4011 +#define MLXSW_REG_QPDSM_BASE_LEN 0x04 /* base length, without records */ +#define MLXSW_REG_QPDSM_PRIO_ENTRY_REC_LEN 0x4 /* record length */ +#define MLXSW_REG_QPDSM_PRIO_ENTRY_REC_MAX_COUNT 16 +#define MLXSW_REG_QPDSM_LEN (MLXSW_REG_QPDSM_BASE_LEN + \ + MLXSW_REG_QPDSM_PRIO_ENTRY_REC_LEN * \ + MLXSW_REG_QPDSM_PRIO_ENTRY_REC_MAX_COUNT) + +MLXSW_REG_DEFINE(qpdsm, MLXSW_REG_QPDSM_ID, MLXSW_REG_QPDSM_LEN); + +/* reg_qpdsm_local_port + * Local Port. Supported for data packets from CPU port. + * Access: Index + */ +MLXSW_ITEM32(reg, qpdsm, local_port, 0x00, 16, 8); + +/* reg_qpdsm_prio_entry_color0_e + * Enable update of the entry for color 0 and a given port. + * Access: WO + */ +MLXSW_ITEM32_INDEXED(reg, qpdsm, prio_entry_color0_e, + MLXSW_REG_QPDSM_BASE_LEN, 31, 1, + MLXSW_REG_QPDSM_PRIO_ENTRY_REC_LEN, 0x00, false); + +/* reg_qpdsm_prio_entry_color0_dscp + * DSCP field in the outer label of the packet for color 0 and a given port. + * Reserved when e=0. + * Access: RW + */ +MLXSW_ITEM32_INDEXED(reg, qpdsm, prio_entry_color0_dscp, + MLXSW_REG_QPDSM_BASE_LEN, 24, 6, + MLXSW_REG_QPDSM_PRIO_ENTRY_REC_LEN, 0x00, false); + +/* reg_qpdsm_prio_entry_color1_e + * Enable update of the entry for color 1 and a given port. + * Access: WO + */ +MLXSW_ITEM32_INDEXED(reg, qpdsm, prio_entry_color1_e, + MLXSW_REG_QPDSM_BASE_LEN, 23, 1, + MLXSW_REG_QPDSM_PRIO_ENTRY_REC_LEN, 0x00, false); + +/* reg_qpdsm_prio_entry_color1_dscp + * DSCP field in the outer label of the packet for color 1 and a given port. + * Reserved when e=0. + * Access: RW + */ +MLXSW_ITEM32_INDEXED(reg, qpdsm, prio_entry_color1_dscp, + MLXSW_REG_QPDSM_BASE_LEN, 16, 6, + MLXSW_REG_QPDSM_PRIO_ENTRY_REC_LEN, 0x00, false); + +/* reg_qpdsm_prio_entry_color2_e + * Enable update of the entry for color 2 and a given port. + * Access: WO + */ +MLXSW_ITEM32_INDEXED(reg, qpdsm, prio_entry_color2_e, + MLXSW_REG_QPDSM_BASE_LEN, 15, 1, + MLXSW_REG_QPDSM_PRIO_ENTRY_REC_LEN, 0x00, false); + +/* reg_qpdsm_prio_entry_color2_dscp + * DSCP field in the outer label of the packet for color 2 and a given port. + * Reserved when e=0. + * Access: RW + */ +MLXSW_ITEM32_INDEXED(reg, qpdsm, prio_entry_color2_dscp, + MLXSW_REG_QPDSM_BASE_LEN, 8, 6, + MLXSW_REG_QPDSM_PRIO_ENTRY_REC_LEN, 0x00, false); + +static inline void mlxsw_reg_qpdsm_pack(char *payload, u8 local_port) +{ + MLXSW_REG_ZERO(qpdsm, payload); + mlxsw_reg_qpdsm_local_port_set(payload, local_port); +} + +static inline void +mlxsw_reg_qpdsm_prio_pack(char *payload, unsigned short prio, u8 dscp) +{ + mlxsw_reg_qpdsm_prio_entry_color0_e_set(payload, prio, 1); + mlxsw_reg_qpdsm_prio_entry_color0_dscp_set(payload, prio, dscp); + mlxsw_reg_qpdsm_prio_entry_color1_e_set(payload, prio, 1); + mlxsw_reg_qpdsm_prio_entry_color1_dscp_set(payload, prio, dscp); + mlxsw_reg_qpdsm_prio_entry_color2_e_set(payload, prio, 1); + mlxsw_reg_qpdsm_prio_entry_color2_dscp_set(payload, prio, dscp); +} + /* QPDPM - QoS Port DSCP to Priority Mapping Register * -------------------------------------------------- * This register controls the mapping from DSCP field to @@ -8671,6 +8759,7 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = { MLXSW_REG(qtct), MLXSW_REG(qeec), MLXSW_REG(qrwe), + MLXSW_REG(qpdsm), MLXSW_REG(qpdpm), MLXSW_REG(pmlp), MLXSW_REG(pmtu), From b2b1dab6884e39c9cea2650b0c399e1990cd855a Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Fri, 27 Jul 2018 15:27:01 +0300 Subject: [PATCH 7/8] mlxsw: spectrum: Support ieee_setapp, ieee_delapp The APP TLVs are used for communicating priority-to-protocol ID maps for a given netdevice. Support the following APP TLVs: - DSCP (selector 5) to configure priority-to-DSCP code point maps. Use these maps to configure packet priority on ingress, and DSCP code point rewrite on egress. - Default priority (selector 1, PID 0) to configure priority for the DSCP code points that don't have one assigned by the DSCP selector. In future this could also be used for assigning default port priority when a packet arrives without DSCP tagging. Besides setting up the maps themselves, also configure port trust level and rewrite bits. Port trust level determines whether, for a packet arriving through a certain port, the priority should be determined based on PCP or DSCP header fields. So far, mlxsw kept the device default of trust-PCP. Now, as soon as the first DSCP APP TLV is configured, switch to trust-DSCP. Only when all DSCP APP TLVs are removed, switch back to trust-PCP again. Note that the default priority APP TLV doesn't impact the trust level configuration. Rewrite bits determine whether DSCP and PCP fields of egressing packets should be updated according to switch priority. When port trust is switched to DSCP, enable rewrite of DSCP field. Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlxsw/spectrum.h | 4 +- .../ethernet/mellanox/mlxsw/spectrum_dcb.c | 269 +++++++++++++++++- 2 files changed, 271 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index bc2704193666..13eca1a79d52 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -1,6 +1,6 @@ /* * drivers/net/ethernet/mellanox/mlxsw/spectrum.h - * Copyright (c) 2015-2017 Mellanox Technologies. All rights reserved. + * Copyright (c) 2015-2018 Mellanox Technologies. All rights reserved. * Copyright (c) 2015-2017 Jiri Pirko * Copyright (c) 2015 Ido Schimmel * Copyright (c) 2015 Elad Raz @@ -54,6 +54,7 @@ #include "core.h" #include "core_acl_flex_keys.h" #include "core_acl_flex_actions.h" +#include "reg.h" #define MLXSW_SP_FID_8021D_MAX 1024 @@ -243,6 +244,7 @@ struct mlxsw_sp_port { struct ieee_ets *ets; struct ieee_maxrate *maxrate; struct ieee_pfc *pfc; + enum mlxsw_reg_qpts_trust_state trust_state; } dcb; struct { u8 module; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c index b6ed7f7c531e..c31aeb25ab5a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c @@ -1,6 +1,6 @@ /* * drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c - * Copyright (c) 2016 Mellanox Technologies. All rights reserved. + * Copyright (c) 2016-2018 Mellanox Technologies. All rights reserved. * Copyright (c) 2016 Ido Schimmel * * Redistribution and use in source and binary forms, with or without @@ -255,6 +255,270 @@ static int mlxsw_sp_dcbnl_ieee_setets(struct net_device *dev, return 0; } +static int mlxsw_sp_dcbnl_app_validate(struct net_device *dev, + struct dcb_app *app) +{ + int prio; + + if (app->priority >= IEEE_8021QAZ_MAX_TCS) { + netdev_err(dev, "APP entry with priority value %u is invalid\n", + app->priority); + return -EINVAL; + } + + switch (app->selector) { + case IEEE_8021QAZ_APP_SEL_DSCP: + if (app->protocol >= 64) { + netdev_err(dev, "DSCP APP entry with protocol value %u is invalid\n", + app->protocol); + return -EINVAL; + } + + /* Warn about any DSCP APP entries with the same PID. */ + prio = fls(dcb_ieee_getapp_mask(dev, app)); + if (prio--) { + if (prio < app->priority) + netdev_warn(dev, "Choosing priority %d for DSCP %d in favor of previously-active value of %d\n", + app->priority, app->protocol, prio); + else if (prio > app->priority) + netdev_warn(dev, "Ignoring new priority %d for DSCP %d in favor of current value of %d\n", + app->priority, app->protocol, prio); + } + break; + + case IEEE_8021QAZ_APP_SEL_ETHERTYPE: + if (app->protocol) { + netdev_err(dev, "EtherType APP entries with protocol value != 0 not supported\n"); + return -EINVAL; + } + break; + + default: + netdev_err(dev, "APP entries with selector %u not supported\n", + app->selector); + return -EINVAL; + } + + return 0; +} + +static u8 +mlxsw_sp_port_dcb_app_default_prio(struct mlxsw_sp_port *mlxsw_sp_port) +{ + u8 prio_mask; + + prio_mask = dcb_ieee_getapp_default_prio_mask(mlxsw_sp_port->dev); + if (prio_mask) + /* Take the highest configured priority. */ + return fls(prio_mask) - 1; + + return 0; +} + +static void +mlxsw_sp_port_dcb_app_dscp_prio_map(struct mlxsw_sp_port *mlxsw_sp_port, + u8 default_prio, + struct dcb_ieee_app_dscp_map *map) +{ + int i; + + dcb_ieee_getapp_dscp_prio_mask_map(mlxsw_sp_port->dev, map); + for (i = 0; i < ARRAY_SIZE(map->map); ++i) { + if (map->map[i]) + map->map[i] = fls(map->map[i]) - 1; + else + map->map[i] = default_prio; + } +} + +static bool +mlxsw_sp_port_dcb_app_prio_dscp_map(struct mlxsw_sp_port *mlxsw_sp_port, + struct dcb_ieee_app_prio_map *map) +{ + bool have_dscp = false; + int i; + + dcb_ieee_getapp_prio_dscp_mask_map(mlxsw_sp_port->dev, map); + for (i = 0; i < ARRAY_SIZE(map->map); ++i) { + if (map->map[i]) { + map->map[i] = fls64(map->map[i]) - 1; + have_dscp = true; + } + } + + return have_dscp; +} + +static int +mlxsw_sp_port_dcb_app_update_qpts(struct mlxsw_sp_port *mlxsw_sp_port, + enum mlxsw_reg_qpts_trust_state ts) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char qpts_pl[MLXSW_REG_QPTS_LEN]; + + mlxsw_reg_qpts_pack(qpts_pl, mlxsw_sp_port->local_port, ts); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(qpts), qpts_pl); +} + +static int +mlxsw_sp_port_dcb_app_update_qrwe(struct mlxsw_sp_port *mlxsw_sp_port, + bool rewrite_dscp) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char qrwe_pl[MLXSW_REG_QRWE_LEN]; + + mlxsw_reg_qrwe_pack(qrwe_pl, mlxsw_sp_port->local_port, + false, rewrite_dscp); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(qrwe), qrwe_pl); +} + +static int +mlxsw_sp_port_dcb_toggle_trust(struct mlxsw_sp_port *mlxsw_sp_port, + enum mlxsw_reg_qpts_trust_state ts) +{ + bool rewrite_dscp = ts == MLXSW_REG_QPTS_TRUST_STATE_DSCP; + int err; + + if (mlxsw_sp_port->dcb.trust_state == ts) + return 0; + + err = mlxsw_sp_port_dcb_app_update_qpts(mlxsw_sp_port, ts); + if (err) + return err; + + err = mlxsw_sp_port_dcb_app_update_qrwe(mlxsw_sp_port, rewrite_dscp); + if (err) + goto err_update_qrwe; + + mlxsw_sp_port->dcb.trust_state = ts; + return 0; + +err_update_qrwe: + mlxsw_sp_port_dcb_app_update_qpts(mlxsw_sp_port, + mlxsw_sp_port->dcb.trust_state); + return err; +} + +static int +mlxsw_sp_port_dcb_app_update_qpdpm(struct mlxsw_sp_port *mlxsw_sp_port, + struct dcb_ieee_app_dscp_map *map) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char qpdpm_pl[MLXSW_REG_QPDPM_LEN]; + short int i; + + mlxsw_reg_qpdpm_pack(qpdpm_pl, mlxsw_sp_port->local_port); + for (i = 0; i < ARRAY_SIZE(map->map); ++i) + mlxsw_reg_qpdpm_dscp_pack(qpdpm_pl, i, map->map[i]); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(qpdpm), qpdpm_pl); +} + +static int +mlxsw_sp_port_dcb_app_update_qpdsm(struct mlxsw_sp_port *mlxsw_sp_port, + struct dcb_ieee_app_prio_map *map) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char qpdsm_pl[MLXSW_REG_QPDSM_LEN]; + short int i; + + mlxsw_reg_qpdsm_pack(qpdsm_pl, mlxsw_sp_port->local_port); + for (i = 0; i < ARRAY_SIZE(map->map); ++i) + mlxsw_reg_qpdsm_prio_pack(qpdsm_pl, i, map->map[i]); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(qpdsm), qpdsm_pl); +} + +static int mlxsw_sp_port_dcb_app_update(struct mlxsw_sp_port *mlxsw_sp_port) +{ + struct dcb_ieee_app_prio_map prio_map; + struct dcb_ieee_app_dscp_map dscp_map; + u8 default_prio; + bool have_dscp; + int err; + + default_prio = mlxsw_sp_port_dcb_app_default_prio(mlxsw_sp_port); + have_dscp = mlxsw_sp_port_dcb_app_prio_dscp_map(mlxsw_sp_port, + &prio_map); + + if (!have_dscp) { + err = mlxsw_sp_port_dcb_toggle_trust(mlxsw_sp_port, + MLXSW_REG_QPTS_TRUST_STATE_PCP); + if (err) + netdev_err(mlxsw_sp_port->dev, "Couldn't switch to trust L2\n"); + return err; + } + + mlxsw_sp_port_dcb_app_dscp_prio_map(mlxsw_sp_port, default_prio, + &dscp_map); + err = mlxsw_sp_port_dcb_app_update_qpdpm(mlxsw_sp_port, + &dscp_map); + if (err) { + netdev_err(mlxsw_sp_port->dev, "Couldn't configure priority map\n"); + return err; + } + + err = mlxsw_sp_port_dcb_app_update_qpdsm(mlxsw_sp_port, + &prio_map); + if (err) { + netdev_err(mlxsw_sp_port->dev, "Couldn't configure DSCP rewrite map\n"); + return err; + } + + err = mlxsw_sp_port_dcb_toggle_trust(mlxsw_sp_port, + MLXSW_REG_QPTS_TRUST_STATE_DSCP); + if (err) { + /* A failure to set trust DSCP means that the QPDPM and QPDSM + * maps installed above are not in effect. And since we are here + * attempting to set trust DSCP, we couldn't have attempted to + * switch trust to PCP. Thus no cleanup is necessary. + */ + netdev_err(mlxsw_sp_port->dev, "Couldn't switch to trust L3\n"); + return err; + } + + return 0; +} + +static int mlxsw_sp_dcbnl_ieee_setapp(struct net_device *dev, + struct dcb_app *app) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + int err; + + err = mlxsw_sp_dcbnl_app_validate(dev, app); + if (err) + return err; + + err = dcb_ieee_setapp(dev, app); + if (err) + return err; + + err = mlxsw_sp_port_dcb_app_update(mlxsw_sp_port); + if (err) + goto err_update; + + return 0; + +err_update: + dcb_ieee_delapp(dev, app); + return err; +} + +static int mlxsw_sp_dcbnl_ieee_delapp(struct net_device *dev, + struct dcb_app *app) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + int err; + + err = dcb_ieee_delapp(dev, app); + if (err) + return err; + + err = mlxsw_sp_port_dcb_app_update(mlxsw_sp_port); + if (err) + netdev_err(dev, "Failed to update DCB APP configuration\n"); + return 0; +} + static int mlxsw_sp_dcbnl_ieee_getmaxrate(struct net_device *dev, struct ieee_maxrate *maxrate) { @@ -394,6 +658,8 @@ static const struct dcbnl_rtnl_ops mlxsw_sp_dcbnl_ops = { .ieee_setmaxrate = mlxsw_sp_dcbnl_ieee_setmaxrate, .ieee_getpfc = mlxsw_sp_dcbnl_ieee_getpfc, .ieee_setpfc = mlxsw_sp_dcbnl_ieee_setpfc, + .ieee_setapp = mlxsw_sp_dcbnl_ieee_setapp, + .ieee_delapp = mlxsw_sp_dcbnl_ieee_delapp, .getdcbx = mlxsw_sp_dcbnl_getdcbx, .setdcbx = mlxsw_sp_dcbnl_setdcbx, @@ -467,6 +733,7 @@ int mlxsw_sp_port_dcb_init(struct mlxsw_sp_port *mlxsw_sp_port) if (err) goto err_port_pfc_init; + mlxsw_sp_port->dcb.trust_state = MLXSW_REG_QPTS_TRUST_STATE_PCP; mlxsw_sp_port->dev->dcbnl_ops = &mlxsw_sp_dcbnl_ops; return 0; From d159261f3662a89a5cd4fae041107ee511d9552e Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Fri, 27 Jul 2018 15:27:02 +0300 Subject: [PATCH 8/8] selftests: mlxsw: Add test for trust-DSCP Add a test that exercises the new code. Send DSCP-tagged packets, and observe how they are prioritized in the switch and the DSCP is updated on egress again. Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../drivers/net/mlxsw/qos_dscp_bridge.sh | 248 ++++++++++++++++++ 1 file changed, 248 insertions(+) create mode 100755 tools/testing/selftests/drivers/net/mlxsw/qos_dscp_bridge.sh diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_bridge.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_bridge.sh new file mode 100755 index 000000000000..418319f19108 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_bridge.sh @@ -0,0 +1,248 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Test for DSCP prioritization and rewrite. Packets ingress $swp1 with a DSCP +# tag and are prioritized according to the map at $swp1. They egress $swp2 and +# the DSCP value is updated to match the map at that interface. The updated DSCP +# tag is verified at $h2. +# +# ICMP responses are produced with the same DSCP tag that arrived at $h2. They +# go through prioritization at $swp2 and DSCP retagging at $swp1. The tag is +# verified at $h1--it should match the original tag. +# +# +----------------------+ +----------------------+ +# | H1 | | H2 | +# | + $h1 | | $h2 + | +# | | 192.0.2.1/28 | | 192.0.2.2/28 | | +# +----|-----------------+ +----------------|-----+ +# | | +# +----|----------------------------------------------------------------|-----+ +# | SW | | | +# | +-|----------------------------------------------------------------|-+ | +# | | + $swp1 BR $swp2 + | | +# | | APP=0,5,10 .. 7,5,17 APP=0,5,20 .. 7,5,27 | | +# | +--------------------------------------------------------------------+ | +# +---------------------------------------------------------------------------+ + +ALL_TESTS=" + ping_ipv4 + test_dscp +" + +lib_dir=$(dirname $0)/../../../net/forwarding + +NUM_NETIFS=4 +source $lib_dir/lib.sh + +__dscp_capture_add_del() +{ + local add_del=$1; shift + local dev=$1; shift + local base=$1; shift + local dscp; + + for prio in {0..7}; do + dscp=$((base + prio)) + __icmp_capture_add_del $add_del $dscp "" $dev \ + "ip_tos $((dscp << 2))" + done +} + +dscp_capture_install() +{ + local dev=$1; shift + local base=$1; shift + + __dscp_capture_add_del add $dev $base +} + +dscp_capture_uninstall() +{ + local dev=$1; shift + local base=$1; shift + + __dscp_capture_add_del del $dev $base +} + +h1_create() +{ + local dscp; + + simple_if_init $h1 192.0.2.1/28 + tc qdisc add dev $h1 clsact + dscp_capture_install $h1 10 +} + +h1_destroy() +{ + dscp_capture_uninstall $h1 10 + tc qdisc del dev $h1 clsact + simple_if_fini $h1 192.0.2.1/28 +} + +h2_create() +{ + simple_if_init $h2 192.0.2.2/28 + tc qdisc add dev $h2 clsact + dscp_capture_install $h2 20 +} + +h2_destroy() +{ + dscp_capture_uninstall $h2 20 + tc qdisc del dev $h2 clsact + simple_if_fini $h2 192.0.2.2/28 +} + +dscp_map() +{ + local base=$1; shift + + for prio in {0..7}; do + echo app=$prio,5,$((base + prio)) + done +} + +lldpad_wait() +{ + local dev=$1; shift + + while lldptool -t -i $dev -V APP -c app | grep -q pending; do + echo "$dev: waiting for lldpad to push pending APP updates" + sleep 5 + done +} + +switch_create() +{ + ip link add name br1 type bridge vlan_filtering 1 + ip link set dev br1 up + ip link set dev $swp1 master br1 + ip link set dev $swp1 up + ip link set dev $swp2 master br1 + ip link set dev $swp2 up + + lldptool -T -i $swp1 -V APP $(dscp_map 10) >/dev/null + lldptool -T -i $swp2 -V APP $(dscp_map 20) >/dev/null + lldpad_wait $swp1 + lldpad_wait $swp2 +} + +switch_destroy() +{ + lldptool -T -i $swp2 -V APP -d $(dscp_map 20) >/dev/null + lldptool -T -i $swp1 -V APP -d $(dscp_map 10) >/dev/null + + # Give lldpad a chance to push down the changes. If the device is downed + # too soon, the updates will be left pending, but will have been struck + # off the lldpad's DB already, and we won't be able to tell. Then on + # next test iteration this would cause weirdness as newly-added APP + # rules conflict with the old ones, sometimes getting stuck in an + # "unknown" state. + sleep 5 + + ip link set dev $swp2 nomaster + ip link set dev $swp1 nomaster + ip link del dev br1 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + vrf_prepare + + h1_create + h2_create + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + h2_destroy + h1_destroy + + vrf_cleanup +} + +dscp_fetch_stats() +{ + local dev=$1; shift + local base=$1; shift + + for prio in {0..7}; do + local dscp=$((base + prio)) + local t=$(tc_rule_stats_get $dev $dscp) + echo "[$dscp]=$t " + done +} + +ping_ipv4() +{ + ping_test $h1 192.0.2.2 +} + +dscp_ping_test() +{ + local vrf_name=$1; shift + local sip=$1; shift + local dip=$1; shift + local prio=$1; shift + local dev_10=$1; shift + local dev_20=$1; shift + + local dscp_10=$(((prio + 10) << 2)) + local dscp_20=$(((prio + 20) << 2)) + + RET=0 + + local -A t0s + eval "t0s=($(dscp_fetch_stats $dev_10 10) + $(dscp_fetch_stats $dev_20 20))" + + ip vrf exec $vrf_name \ + ${PING} -Q $dscp_10 ${sip:+-I $sip} $dip \ + -c 10 -i 0.1 -w 2 &> /dev/null + + local -A t1s + eval "t1s=($(dscp_fetch_stats $dev_10 10) + $(dscp_fetch_stats $dev_20 20))" + + for key in ${!t0s[@]}; do + local expect + if ((key == dscp_10 || key == dscp_20)); then + expect=10 + else + expect=0 + fi + + local delta=$((t1s[key] - t0s[key])) + ((expect == delta)) + check_err $? "DSCP $key: Expected to capture $expect packets, got $delta." + done + + log_test "DSCP rewrite: $dscp_10-(prio $prio)-$dscp_20" +} + +test_dscp() +{ + for prio in {0..7}; do + dscp_ping_test v$h1 192.0.2.1 192.0.2.2 $prio $h1 $h2 + done +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS