From 3c71006d15fd3a99071a2b20d01de3edabc85767 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 16 Mar 2017 09:08:12 +0100 Subject: [PATCH 1/9] ipv4: fib_rules: Check if rule is a default rule Currently, when non-default (custom) FIB rules are used, devices capable of layer 3 offloading flush their tables and let the kernel do the forwarding instead. When these devices' drivers are loaded they register to the FIB notification chain, which lets them know about the existence of any custom FIB rules. This is done by sending a RULE_ADD notification based on the value of 'net->ipv4.fib_has_custom_rules'. This approach is problematic when VRF offload is taken into account, as upon the creation of the first VRF netdev, a l3mdev rule is programmed to direct skbs to the VRF's table. Instead of merely reading the above value and sending a single RULE_ADD notification, we should iterate over all the FIB rules and send a detailed notification for each, thereby allowing offloading drivers to sanitize the rules they don't support and potentially flush their tables. While l3mdev rules are uniquely marked, the default rules are not. Therefore, when they are being notified they might invoke offloading drivers to unnecessarily flush their tables. Solve this by adding an helper to check if a FIB rule is a default rule. Namely, its selector should match all packets and its action should point to the local, main or default tables. As noted by David Ahern, uniquely marking the default rules is insufficient. When using VRFs, it's common to avoid false hits by moving the rule for the local table to just before the main table: Default configuration: $ ip rule show 0: from all lookup local 32766: from all lookup main 32767: from all lookup default Common configuration with VRFs: $ ip rule show 1000: from all lookup [l3mdev-table] 32765: from all lookup local 32766: from all lookup main 32767: from all lookup default Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Acked-by: David Ahern Signed-off-by: David S. Miller --- include/net/fib_rules.h | 1 + include/net/ip_fib.h | 7 +++++++ net/core/fib_rules.c | 14 ++++++++++++++ net/ipv4/fib_rules.c | 21 +++++++++++++++++++++ 4 files changed, 43 insertions(+) diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index 8dbfdf728cd8..1243b9c7694e 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -141,6 +141,7 @@ int fib_rules_lookup(struct fib_rules_ops *, struct flowi *, int flags, struct fib_lookup_arg *); int fib_default_rule_add(struct fib_rules_ops *, u32 pref, u32 table, u32 flags); +bool fib_rule_matchall(const struct fib_rule *rule); int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh); int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh); diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index d9cee9659978..da6fa7b15558 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -311,6 +311,11 @@ static inline int fib_lookup(struct net *net, const struct flowi4 *flp, return err; } +static inline bool fib4_rule_default(const struct fib_rule *rule) +{ + return true; +} + #else /* CONFIG_IP_MULTIPLE_TABLES */ int __net_init fib4_rules_init(struct net *net); void __net_exit fib4_rules_exit(struct net *net); @@ -355,6 +360,8 @@ out: return err; } +bool fib4_rule_default(const struct fib_rule *rule); + #endif /* CONFIG_IP_MULTIPLE_TABLES */ /* Exported by fib_frontend.c */ diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index b6791d94841d..816e3ccb0ec9 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -23,6 +23,20 @@ static const struct fib_kuid_range fib_kuid_range_unset = { KUIDT_INIT(~0), }; +bool fib_rule_matchall(const struct fib_rule *rule) +{ + if (rule->iifindex || rule->oifindex || rule->mark || rule->tun_id || + rule->flags) + return false; + if (rule->suppress_ifgroup != -1 || rule->suppress_prefixlen != -1) + return false; + if (!uid_eq(rule->uid_range.start, fib_kuid_range_unset.start) || + !uid_eq(rule->uid_range.end, fib_kuid_range_unset.end)) + return false; + return true; +} +EXPORT_SYMBOL_GPL(fib_rule_matchall); + int fib_default_rule_add(struct fib_rules_ops *ops, u32 pref, u32 table, u32 flags) { diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 289210903d58..d531bc94b15e 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -47,6 +47,27 @@ struct fib4_rule { #endif }; +static bool fib4_rule_matchall(const struct fib_rule *rule) +{ + struct fib4_rule *r = container_of(rule, struct fib4_rule, common); + + if (r->dst_len || r->src_len || r->tos) + return false; + return fib_rule_matchall(rule); +} + +bool fib4_rule_default(const struct fib_rule *rule) +{ + if (!fib4_rule_matchall(rule) || rule->action != FR_ACT_TO_TBL || + rule->l3mdev) + return false; + if (rule->table != RT_TABLE_LOCAL && rule->table != RT_TABLE_MAIN && + rule->table != RT_TABLE_DEFAULT) + return false; + return true; +} +EXPORT_SYMBOL_GPL(fib4_rule_default); + int __fib_lookup(struct net *net, struct flowi4 *flp, struct fib_result *res, unsigned int flags) { From 6a003a5ff29499a94373110202631743663569c6 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 16 Mar 2017 09:08:13 +0100 Subject: [PATCH 2/9] ipv4: fib_rules: Add notifier info to FIB rules notifications Whenever a FIB rule is added or removed, a notification is sent in the FIB notification chain. However, listeners don't have a way to tell which rule was added or removed. This is problematic as we would like to give listeners the ability to decide which action to execute based on the notified rule. Specifically, offloading drivers should be able to determine if they support the reflection of the notified FIB rule and flush their LPM tables in case they don't. Do that by adding a notifier info to these notifications and embed the common FIB rule struct in it. Signed-off-by: Ido Schimmel Acked-by: David Ahern Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/ip_fib.h | 5 +++++ net/ipv4/fib_rules.c | 13 ++++++++----- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index da6fa7b15558..272e62e139e0 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -213,6 +213,11 @@ struct fib_entry_notifier_info { u32 tb_id; }; +struct fib_rule_notifier_info { + struct fib_notifier_info info; /* must be first */ + struct fib_rule *rule; +}; + struct fib_nh_notifier_info { struct fib_notifier_info info; /* must be first */ struct fib_nh *fib_nh; diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index d531bc94b15e..310d24a2e097 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -186,11 +186,14 @@ static struct fib_table *fib_empty_table(struct net *net) } static int call_fib_rule_notifiers(struct net *net, - enum fib_event_type event_type) + enum fib_event_type event_type, + struct fib_rule *rule) { - struct fib_notifier_info info; + struct fib_rule_notifier_info info = { + .rule = rule, + }; - return call_fib_notifiers(net, event_type, &info); + return call_fib_notifiers(net, event_type, &info.info); } void fib_rules_notify(struct net *net, struct notifier_block *nb) @@ -257,7 +260,7 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb, rule4->tos = frh->tos; net->ipv4.fib_has_custom_rules = true; - call_fib_rule_notifiers(net, FIB_EVENT_RULE_ADD); + call_fib_rule_notifiers(net, FIB_EVENT_RULE_ADD, rule); err = 0; errout: @@ -279,7 +282,7 @@ static int fib4_rule_delete(struct fib_rule *rule) net->ipv4.fib_num_tclassid_users--; #endif net->ipv4.fib_has_custom_rules = true; - call_fib_rule_notifiers(net, FIB_EVENT_RULE_DEL); + call_fib_rule_notifiers(net, FIB_EVENT_RULE_DEL, rule); errout: return err; } From 5d7bfd141924a5ece21eb612ad3c56612f041c1e Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 16 Mar 2017 09:08:14 +0100 Subject: [PATCH 3/9] ipv4: fib_rules: Dump FIB rules when registering FIB notifier In commit c3852ef7f2f8 ("ipv4: fib: Replay events when registering FIB notifier") we dumped the FIB tables and replayed the events to the passed notification block. However, we merely sent a RULE_ADD notification in case custom rules were in use. As explained in previous patches, this approach won't work anymore. Instead, we should notify the caller about all the FIB rules and let it act accordingly. Upon registration to the FIB notification chain, replay a RULE_ADD notification for each programmed FIB rule, custom or not. The integrity of the dump is ensured by the mechanism introduced in the above mentioned commit. Prevent regressions by making sure current listeners correctly sanitize the notified rules. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Acked-by: David Ahern Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlxsw/spectrum_router.c | 13 ++++++++++++- drivers/net/ethernet/rocker/rocker_main.c | 17 +++++++++++++++-- net/ipv4/fib_rules.c | 19 ++++++++++++++++--- 3 files changed, 43 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 80345a1ddf17..9ab41c47c263 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -45,6 +45,7 @@ #include #include #include +#include #include "spectrum.h" #include "core.h" @@ -2514,6 +2515,7 @@ struct mlxsw_sp_fib_event_work { struct work_struct work; union { struct fib_entry_notifier_info fen_info; + struct fib_rule_notifier_info fr_info; struct fib_nh_notifier_info fnh_info; }; struct mlxsw_sp *mlxsw_sp; @@ -2525,6 +2527,7 @@ static void mlxsw_sp_router_fib_event_work(struct work_struct *work) struct mlxsw_sp_fib_event_work *fib_work = container_of(work, struct mlxsw_sp_fib_event_work, work); struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp; + struct fib_rule *rule; bool replace, append; int err; @@ -2548,7 +2551,10 @@ static void mlxsw_sp_router_fib_event_work(struct work_struct *work) break; case FIB_EVENT_RULE_ADD: /* fall through */ case FIB_EVENT_RULE_DEL: - mlxsw_sp_router_fib4_abort(mlxsw_sp); + rule = fib_work->fr_info.rule; + if (!fib4_rule_default(rule)) + mlxsw_sp_router_fib4_abort(mlxsw_sp); + fib_rule_put(rule); break; case FIB_EVENT_NH_ADD: /* fall through */ case FIB_EVENT_NH_DEL: @@ -2591,6 +2597,11 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb, */ fib_info_hold(fib_work->fen_info.fi); break; + case FIB_EVENT_RULE_ADD: /* fall through */ + case FIB_EVENT_RULE_DEL: + memcpy(&fib_work->fr_info, ptr, sizeof(fib_work->fr_info)); + fib_rule_get(fib_work->fr_info.rule); + break; case FIB_EVENT_NH_ADD: /* fall through */ case FIB_EVENT_NH_DEL: memcpy(&fib_work->fnh_info, ptr, sizeof(fib_work->fnh_info)); diff --git a/drivers/net/ethernet/rocker/rocker_main.c b/drivers/net/ethernet/rocker/rocker_main.c index b712ec23075b..bab13613b138 100644 --- a/drivers/net/ethernet/rocker/rocker_main.c +++ b/drivers/net/ethernet/rocker/rocker_main.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include @@ -2175,7 +2176,10 @@ static const struct switchdev_ops rocker_port_switchdev_ops = { struct rocker_fib_event_work { struct work_struct work; - struct fib_entry_notifier_info fen_info; + union { + struct fib_entry_notifier_info fen_info; + struct fib_rule_notifier_info fr_info; + }; struct rocker *rocker; unsigned long event; }; @@ -2185,6 +2189,7 @@ static void rocker_router_fib_event_work(struct work_struct *work) struct rocker_fib_event_work *fib_work = container_of(work, struct rocker_fib_event_work, work); struct rocker *rocker = fib_work->rocker; + struct fib_rule *rule; int err; /* Protect internal structures from changes */ @@ -2202,7 +2207,10 @@ static void rocker_router_fib_event_work(struct work_struct *work) break; case FIB_EVENT_RULE_ADD: /* fall through */ case FIB_EVENT_RULE_DEL: - rocker_world_fib4_abort(rocker); + rule = fib_work->fr_info.rule; + if (!fib4_rule_default(rule)) + rocker_world_fib4_abort(rocker); + fib_rule_put(rule); break; } rtnl_unlock(); @@ -2233,6 +2241,11 @@ static int rocker_router_fib_event(struct notifier_block *nb, */ fib_info_hold(fib_work->fen_info.fi); break; + case FIB_EVENT_RULE_ADD: /* fall through */ + case FIB_EVENT_RULE_DEL: + memcpy(&fib_work->fr_info, ptr, sizeof(fib_work->fr_info)); + fib_rule_get(fib_work->fr_info.rule); + break; } queue_work(rocker->rocker_owq, &fib_work->work); diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 310d24a2e097..778ecf977eb2 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -185,6 +185,17 @@ static struct fib_table *fib_empty_table(struct net *net) return NULL; } +static int call_fib_rule_notifier(struct notifier_block *nb, struct net *net, + enum fib_event_type event_type, + struct fib_rule *rule) +{ + struct fib_rule_notifier_info info = { + .rule = rule, + }; + + return call_fib_notifier(nb, net, event_type, &info.info); +} + static int call_fib_rule_notifiers(struct net *net, enum fib_event_type event_type, struct fib_rule *rule) @@ -196,12 +207,14 @@ static int call_fib_rule_notifiers(struct net *net, return call_fib_notifiers(net, event_type, &info.info); } +/* Called with rcu_read_lock() */ void fib_rules_notify(struct net *net, struct notifier_block *nb) { - struct fib_notifier_info info; + struct fib_rules_ops *ops = net->ipv4.rules_ops; + struct fib_rule *rule; - if (net->ipv4.fib_has_custom_rules) - call_fib_notifier(nb, net, FIB_EVENT_RULE_ADD, &info); + list_for_each_entry_rcu(rule, &ops->rules_list, list) + call_fib_rule_notifier(nb, net, FIB_EVENT_RULE_ADD, rule); } static const struct nla_policy fib4_rule_policy[FRA_MAX+1] = { From fdeea7be88b12742bfd50d9e19a06c0d2e702400 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 16 Mar 2017 09:08:15 +0100 Subject: [PATCH 4/9] net: vrf: Set slave's private flag before linking Allow listeners of the subsequent CHANGEUPPER notification to retrieve the VRF's table ID by calling l3mdev_fib_table() with the slave netdev. Without this change, the netdev won't be considered an L3 slave and the function would return 0. This is consistent with other master device such as bridge and bond that set the slave's private flag before linking. It also makes do_vrf_{add,del}_slave() symmetric. Signed-off-by: Ido Schimmel Acked-by: David Ahern Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/vrf.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index fea687f35b5a..7f28021d9d93 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -747,14 +747,18 @@ static int do_vrf_add_slave(struct net_device *dev, struct net_device *port_dev) { int ret; + port_dev->priv_flags |= IFF_L3MDEV_SLAVE; ret = netdev_master_upper_dev_link(port_dev, dev, NULL, NULL); if (ret < 0) - return ret; + goto err; - port_dev->priv_flags |= IFF_L3MDEV_SLAVE; cycle_netdev(port_dev); return 0; + +err: + port_dev->priv_flags &= ~IFF_L3MDEV_SLAVE; + return ret; } static int vrf_add_slave(struct net_device *dev, struct net_device *port_dev) From 57837885e3c74e42596a8d8c3e6831be1c0a7974 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 16 Mar 2017 09:08:16 +0100 Subject: [PATCH 5/9] mlxsw: spectrum_router: Associate RIFs with correct VR When a router interface (RIF) is created due to a netdev being enslaved to a VRF master, then it should be associated with the appropriate virtual router (VR) and not the default one. If netdev is a VRF slave, lookup the VR based on the VRF's table ID. Otherwise default to the MAIN table. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 9ab41c47c263..5aad0aef1ed3 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -46,6 +46,7 @@ #include #include #include +#include #include "spectrum.h" #include "core.h" @@ -2762,6 +2763,7 @@ mlxsw_sp_vport_rif_sp_create(struct mlxsw_sp_port *mlxsw_sp_vport, struct net_device *l3_dev) { struct mlxsw_sp *mlxsw_sp = mlxsw_sp_vport->mlxsw_sp; + u32 tb_id = l3mdev_fib_table(l3_dev); struct mlxsw_sp_vr *vr; struct mlxsw_sp_fid *f; struct mlxsw_sp_rif *r; @@ -2772,7 +2774,7 @@ mlxsw_sp_vport_rif_sp_create(struct mlxsw_sp_port *mlxsw_sp_vport, if (rif == MLXSW_SP_INVALID_RIF) return ERR_PTR(-ERANGE); - vr = mlxsw_sp_vr_get(mlxsw_sp, RT_TABLE_MAIN); + vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id ? : RT_TABLE_MAIN); if (IS_ERR(vr)) return ERR_CAST(vr); @@ -3010,6 +3012,7 @@ static int mlxsw_sp_rif_bridge_create(struct mlxsw_sp *mlxsw_sp, struct net_device *l3_dev, struct mlxsw_sp_fid *f) { + u32 tb_id = l3mdev_fib_table(l3_dev); struct mlxsw_sp_vr *vr; struct mlxsw_sp_rif *r; u16 rif; @@ -3019,7 +3022,7 @@ static int mlxsw_sp_rif_bridge_create(struct mlxsw_sp *mlxsw_sp, if (rif == MLXSW_SP_INVALID_RIF) return -ERANGE; - vr = mlxsw_sp_vr_get(mlxsw_sp, RT_TABLE_MAIN); + vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id ? : RT_TABLE_MAIN); if (IS_ERR(vr)) return PTR_ERR(vr); From 9db032bb1e8eed0721884c01a6c87f55f3d400fa Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 16 Mar 2017 09:08:17 +0100 Subject: [PATCH 6/9] mlxsw: spectrum_router: Don't destroy RIF if L3 slave We usually destroy the netdev's router interface (RIF) when the last IP address is removed from it. However, we shouldn't do that if it's enslaved to an L3 master device. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 5aad0aef1ed3..91ec62a2db48 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -41,6 +41,7 @@ #include #include #include +#include #include #include #include @@ -2660,7 +2661,7 @@ static bool mlxsw_sp_rif_should_config(struct mlxsw_sp_rif *r, return true; return false; case NETDEV_DOWN: - if (r && !in_dev->ifa_list) + if (r && !in_dev->ifa_list && !netif_is_l3_slave(r->dev)) return true; /* It is possible we already removed the RIF ourselves * if it was assigned to a netdev that is now a bridge From 7179eb5acd59d64cb8bef2d8788af8e9647f6986 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 16 Mar 2017 09:08:18 +0100 Subject: [PATCH 7/9] mlxsw: spectrum_router: Add support for VRFs Allow port netdevs, LAG and VLAN devices stacked on top of these to be enslaved to a VRF master device. Upon enslavement, create a router interface (RIF) for the enslaved netdev and associate it with a virtual router (VR) based on the VRF's table ID. If a RIF already exists for the netdev (f.e., due to the existence of an IP address), then it's deleted and a new one is created with the appropriate VR binding. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlxsw/spectrum.c | 19 +++++++-- .../net/ethernet/mellanox/mlxsw/spectrum.h | 4 ++ .../ethernet/mellanox/mlxsw/spectrum_router.c | 41 +++++++++++++++++++ 3 files changed, 61 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 475499b6c989..659df3225a24 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -3951,7 +3951,8 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *dev, upper_dev = info->upper_dev; if (!is_vlan_dev(upper_dev) && !netif_is_lag_master(upper_dev) && - !netif_is_bridge_master(upper_dev)) + !netif_is_bridge_master(upper_dev) && + !netif_is_l3_master(upper_dev)) return -EINVAL; if (!info->linking) break; @@ -3991,6 +3992,11 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *dev, else mlxsw_sp_port_lag_leave(mlxsw_sp_port, upper_dev); + } else if (netif_is_l3_master(upper_dev)) { + if (info->linking) + err = mlxsw_sp_port_vrf_join(mlxsw_sp_port); + else + mlxsw_sp_port_vrf_leave(mlxsw_sp_port); } else { err = -EINVAL; WARN_ON(1); @@ -4353,14 +4359,16 @@ static int mlxsw_sp_netdevice_vport_event(struct net_device *dev, switch (event) { case NETDEV_PRECHANGEUPPER: upper_dev = info->upper_dev; - if (!netif_is_bridge_master(upper_dev)) + if (!netif_is_bridge_master(upper_dev) && + !netif_is_l3_master(upper_dev)) return -EINVAL; if (!info->linking) break; /* We can't have multiple VLAN interfaces configured on * the same port and being members in the same bridge. */ - if (!mlxsw_sp_port_master_bridge_check(mlxsw_sp_port, + if (netif_is_bridge_master(upper_dev) && + !mlxsw_sp_port_master_bridge_check(mlxsw_sp_port, upper_dev)) return -EINVAL; break; @@ -4372,6 +4380,11 @@ static int mlxsw_sp_netdevice_vport_event(struct net_device *dev, upper_dev); else mlxsw_sp_vport_bridge_leave(mlxsw_sp_vport); + } else if (netif_is_l3_master(upper_dev)) { + if (info->linking) + err = mlxsw_sp_vport_vrf_join(mlxsw_sp_vport); + else + mlxsw_sp_vport_vrf_leave(mlxsw_sp_vport); } else { err = -EINVAL; WARN_ON(1); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 5502232b06cf..60004d97e631 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -578,6 +578,10 @@ int mlxsw_sp_inetaddr_event(struct notifier_block *unused, unsigned long event, void *ptr); void mlxsw_sp_rif_bridge_destroy(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_rif *r); +int mlxsw_sp_vport_vrf_join(struct mlxsw_sp_port *mlxsw_sp_vport); +void mlxsw_sp_vport_vrf_leave(struct mlxsw_sp_port *mlxsw_sp_vport); +int mlxsw_sp_port_vrf_join(struct mlxsw_sp_port *mlxsw_sp_port); +void mlxsw_sp_port_vrf_leave(struct mlxsw_sp_port *mlxsw_sp_port); int mlxsw_sp_kvdl_alloc(struct mlxsw_sp *mlxsw_sp, unsigned int entry_count); void mlxsw_sp_kvdl_free(struct mlxsw_sp *mlxsw_sp, int entry_index); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 91ec62a2db48..e26268805fcc 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -3226,6 +3226,47 @@ err_rif_edit: return err; } +int mlxsw_sp_vport_vrf_join(struct mlxsw_sp_port *mlxsw_sp_vport) +{ + struct mlxsw_sp_fid *f = mlxsw_sp_vport_fid_get(mlxsw_sp_vport); + struct net_device *dev = mlxsw_sp_vport->dev; + + /* In case vPort already has a RIF, then we need to drop it. + * A new one will be created using the VRF's VR. + */ + if (f && f->r) + mlxsw_sp_vport_rif_sp_leave(mlxsw_sp_vport); + + return mlxsw_sp_vport_rif_sp_join(mlxsw_sp_vport, dev); +} + +void mlxsw_sp_vport_vrf_leave(struct mlxsw_sp_port *mlxsw_sp_vport) +{ + mlxsw_sp_vport_rif_sp_leave(mlxsw_sp_vport); +} + +int mlxsw_sp_port_vrf_join(struct mlxsw_sp_port *mlxsw_sp_port) +{ + struct mlxsw_sp_port *mlxsw_sp_vport; + + mlxsw_sp_vport = mlxsw_sp_port_vport_find(mlxsw_sp_port, 1); + if (WARN_ON(!mlxsw_sp_vport)) + return -EINVAL; + + return mlxsw_sp_vport_vrf_join(mlxsw_sp_vport); +} + +void mlxsw_sp_port_vrf_leave(struct mlxsw_sp_port *mlxsw_sp_port) +{ + struct mlxsw_sp_port *mlxsw_sp_vport; + + mlxsw_sp_vport = mlxsw_sp_port_vport_find(mlxsw_sp_port, 1); + if (WARN_ON(!mlxsw_sp_vport)) + return; + + mlxsw_sp_vport_vrf_leave(mlxsw_sp_vport); +} + static void mlxsw_sp_router_fib_dump_flush(struct notifier_block *nb) { struct mlxsw_sp *mlxsw_sp = container_of(nb, struct mlxsw_sp, fib_nb); From 3d70e458be3af878216f8c97fb4c0926cc003e31 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 16 Mar 2017 09:08:19 +0100 Subject: [PATCH 8/9] mlxsw: spectrum_router: Add support for VRFs on top of bridges In a similar fashion to the previous patch, allow bridges and VLAN devices on top of bridges to be enslaved to a VRF master device. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlxsw/spectrum.c | 52 ++++++++++++++++++- .../net/ethernet/mellanox/mlxsw/spectrum.h | 4 ++ .../ethernet/mellanox/mlxsw/spectrum_router.c | 26 ++++++++++ 3 files changed, 81 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 659df3225a24..28019f8b70f7 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -4111,7 +4111,7 @@ static int mlxsw_sp_netdevice_bridge_event(struct net_device *br_dev, switch (event) { case NETDEV_PRECHANGEUPPER: upper_dev = info->upper_dev; - if (!is_vlan_dev(upper_dev)) + if (!is_vlan_dev(upper_dev) && !netif_is_l3_master(upper_dev)) return -EINVAL; if (is_vlan_dev(upper_dev) && br_dev != mlxsw_sp->master_bridge.dev) @@ -4126,6 +4126,12 @@ static int mlxsw_sp_netdevice_bridge_event(struct net_device *br_dev, else mlxsw_sp_master_bridge_vlan_unlink(mlxsw_sp, upper_dev); + } else if (netif_is_l3_master(upper_dev)) { + if (info->linking) + err = mlxsw_sp_bridge_vrf_join(mlxsw_sp, + br_dev); + else + mlxsw_sp_bridge_vrf_leave(mlxsw_sp, br_dev); } else { err = -EINVAL; WARN_ON(1); @@ -4415,6 +4421,47 @@ static int mlxsw_sp_netdevice_lag_vport_event(struct net_device *lag_dev, return 0; } +static int mlxsw_sp_netdevice_bridge_vlan_event(struct net_device *vlan_dev, + unsigned long event, void *ptr) +{ + struct netdev_notifier_changeupper_info *info; + struct mlxsw_sp *mlxsw_sp; + int err = 0; + + mlxsw_sp = mlxsw_sp_lower_get(vlan_dev); + if (!mlxsw_sp) + return 0; + + info = ptr; + + switch (event) { + case NETDEV_PRECHANGEUPPER: + /* VLAN devices are only allowed on top of the + * VLAN-aware bridge. + */ + if (WARN_ON(vlan_dev_real_dev(vlan_dev) != + mlxsw_sp->master_bridge.dev)) + return -EINVAL; + if (!netif_is_l3_master(info->upper_dev)) + return -EINVAL; + break; + case NETDEV_CHANGEUPPER: + if (netif_is_l3_master(info->upper_dev)) { + if (info->linking) + err = mlxsw_sp_bridge_vrf_join(mlxsw_sp, + vlan_dev); + else + mlxsw_sp_bridge_vrf_leave(mlxsw_sp, vlan_dev); + } else { + err = -EINVAL; + WARN_ON(1); + } + break; + } + + return err; +} + static int mlxsw_sp_netdevice_vlan_event(struct net_device *vlan_dev, unsigned long event, void *ptr) { @@ -4427,6 +4474,9 @@ static int mlxsw_sp_netdevice_vlan_event(struct net_device *vlan_dev, else if (netif_is_lag_master(real_dev)) return mlxsw_sp_netdevice_lag_vport_event(real_dev, event, ptr, vid); + else if (netif_is_bridge_master(real_dev)) + return mlxsw_sp_netdevice_bridge_vlan_event(vlan_dev, event, + ptr); return 0; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 60004d97e631..0e223f6983c5 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -582,6 +582,10 @@ int mlxsw_sp_vport_vrf_join(struct mlxsw_sp_port *mlxsw_sp_vport); void mlxsw_sp_vport_vrf_leave(struct mlxsw_sp_port *mlxsw_sp_vport); int mlxsw_sp_port_vrf_join(struct mlxsw_sp_port *mlxsw_sp_port); void mlxsw_sp_port_vrf_leave(struct mlxsw_sp_port *mlxsw_sp_port); +int mlxsw_sp_bridge_vrf_join(struct mlxsw_sp *mlxsw_sp, + struct net_device *l3_dev); +void mlxsw_sp_bridge_vrf_leave(struct mlxsw_sp *mlxsw_sp, + struct net_device *l3_dev); int mlxsw_sp_kvdl_alloc(struct mlxsw_sp *mlxsw_sp, unsigned int entry_count); void mlxsw_sp_kvdl_free(struct mlxsw_sp *mlxsw_sp, int entry_index); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index e26268805fcc..c89f4b4d70b1 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -3267,6 +3267,32 @@ void mlxsw_sp_port_vrf_leave(struct mlxsw_sp_port *mlxsw_sp_port) mlxsw_sp_vport_vrf_leave(mlxsw_sp_vport); } +int mlxsw_sp_bridge_vrf_join(struct mlxsw_sp *mlxsw_sp, + struct net_device *l3_dev) +{ + struct mlxsw_sp_fid *f; + + f = mlxsw_sp_bridge_fid_get(mlxsw_sp, l3_dev); + if (WARN_ON(!f)) + return -EINVAL; + + if (f->r) + mlxsw_sp_rif_bridge_destroy(mlxsw_sp, f->r); + + return mlxsw_sp_rif_bridge_create(mlxsw_sp, l3_dev, f); +} + +void mlxsw_sp_bridge_vrf_leave(struct mlxsw_sp *mlxsw_sp, + struct net_device *l3_dev) +{ + struct mlxsw_sp_fid *f; + + f = mlxsw_sp_bridge_fid_get(mlxsw_sp, l3_dev); + if (WARN_ON(!f)) + return; + mlxsw_sp_rif_bridge_destroy(mlxsw_sp, f->r); +} + static void mlxsw_sp_router_fib_dump_flush(struct notifier_block *nb) { struct mlxsw_sp *mlxsw_sp = container_of(nb, struct mlxsw_sp, fib_nb); From c7f6e6658b771c64a07b69852ac4362a8e781ab6 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 16 Mar 2017 09:08:20 +0100 Subject: [PATCH 9/9] mlxsw: spectrum_router: Don't abort on l3mdev rules Now that port netdevs can be enslaved to a VRF master we need to make sure the device's routing tables won't be flushed upon the insertion of a l3mdev rule. Note that we assume the notified l3mdev rule is a simple rule as used by the VRF master. We don't check for the presence of other selectors such as 'iif' and 'oif'. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index c89f4b4d70b1..488bc1fd7868 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -2554,7 +2554,7 @@ static void mlxsw_sp_router_fib_event_work(struct work_struct *work) case FIB_EVENT_RULE_ADD: /* fall through */ case FIB_EVENT_RULE_DEL: rule = fib_work->fr_info.rule; - if (!fib4_rule_default(rule)) + if (!fib4_rule_default(rule) && !rule->l3mdev) mlxsw_sp_router_fib4_abort(mlxsw_sp); fib_rule_put(rule); break;