1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2019 Mellanox Technologies. */
3
4 #include <linux/netdevice.h>
5 #include <net/nexthop.h>
6 #include "lag/lag.h"
7 #include "lag/mp.h"
8 #include "mlx5_core.h"
9 #include "eswitch.h"
10 #include "lib/mlx5.h"
11
__mlx5_lag_is_multipath(struct mlx5_lag * ldev)12 static bool __mlx5_lag_is_multipath(struct mlx5_lag *ldev)
13 {
14 return ldev->mode == MLX5_LAG_MODE_MULTIPATH;
15 }
16
mlx5_lag_multipath_check_prereq(struct mlx5_lag * ldev)17 static bool mlx5_lag_multipath_check_prereq(struct mlx5_lag *ldev)
18 {
19 if (!mlx5_lag_is_ready(ldev))
20 return false;
21
22 if (__mlx5_lag_is_active(ldev) && !__mlx5_lag_is_multipath(ldev))
23 return false;
24
25 return mlx5_esw_multipath_prereq(ldev->pf[MLX5_LAG_P1].dev,
26 ldev->pf[MLX5_LAG_P2].dev);
27 }
28
mlx5_lag_is_multipath(struct mlx5_core_dev * dev)29 bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev)
30 {
31 struct mlx5_lag *ldev = mlx5_lag_dev(dev);
32
33 return ldev && __mlx5_lag_is_multipath(ldev);
34 }
35
36 /**
37 * mlx5_lag_set_port_affinity
38 *
39 * @ldev: lag device
40 * @port:
41 * 0 - set normal affinity.
42 * 1 - set affinity to port 1.
43 * 2 - set affinity to port 2.
44 *
45 **/
mlx5_lag_set_port_affinity(struct mlx5_lag * ldev,enum mlx5_lag_port_affinity port)46 static void mlx5_lag_set_port_affinity(struct mlx5_lag *ldev,
47 enum mlx5_lag_port_affinity port)
48 {
49 struct lag_tracker tracker = {};
50
51 if (!__mlx5_lag_is_multipath(ldev))
52 return;
53
54 switch (port) {
55 case MLX5_LAG_NORMAL_AFFINITY:
56 tracker.netdev_state[MLX5_LAG_P1].tx_enabled = true;
57 tracker.netdev_state[MLX5_LAG_P2].tx_enabled = true;
58 tracker.netdev_state[MLX5_LAG_P1].link_up = true;
59 tracker.netdev_state[MLX5_LAG_P2].link_up = true;
60 break;
61 case MLX5_LAG_P1_AFFINITY:
62 tracker.netdev_state[MLX5_LAG_P1].tx_enabled = true;
63 tracker.netdev_state[MLX5_LAG_P1].link_up = true;
64 tracker.netdev_state[MLX5_LAG_P2].tx_enabled = false;
65 tracker.netdev_state[MLX5_LAG_P2].link_up = false;
66 break;
67 case MLX5_LAG_P2_AFFINITY:
68 tracker.netdev_state[MLX5_LAG_P1].tx_enabled = false;
69 tracker.netdev_state[MLX5_LAG_P1].link_up = false;
70 tracker.netdev_state[MLX5_LAG_P2].tx_enabled = true;
71 tracker.netdev_state[MLX5_LAG_P2].link_up = true;
72 break;
73 default:
74 mlx5_core_warn(ldev->pf[MLX5_LAG_P1].dev,
75 "Invalid affinity port %d", port);
76 return;
77 }
78
79 if (tracker.netdev_state[MLX5_LAG_P1].tx_enabled)
80 mlx5_notifier_call_chain(ldev->pf[MLX5_LAG_P1].dev->priv.events,
81 MLX5_DEV_EVENT_PORT_AFFINITY,
82 (void *)0);
83
84 if (tracker.netdev_state[MLX5_LAG_P2].tx_enabled)
85 mlx5_notifier_call_chain(ldev->pf[MLX5_LAG_P2].dev->priv.events,
86 MLX5_DEV_EVENT_PORT_AFFINITY,
87 (void *)0);
88
89 mlx5_modify_lag(ldev, &tracker);
90 }
91
mlx5_lag_fib_event_flush(struct notifier_block * nb)92 static void mlx5_lag_fib_event_flush(struct notifier_block *nb)
93 {
94 struct lag_mp *mp = container_of(nb, struct lag_mp, fib_nb);
95
96 flush_workqueue(mp->wq);
97 }
98
mlx5_lag_fib_set(struct lag_mp * mp,struct fib_info * fi,u32 dst,int dst_len)99 static void mlx5_lag_fib_set(struct lag_mp *mp, struct fib_info *fi, u32 dst, int dst_len)
100 {
101 mp->fib.mfi = fi;
102 mp->fib.priority = fi->fib_priority;
103 mp->fib.dst = dst;
104 mp->fib.dst_len = dst_len;
105 }
106
107 struct mlx5_fib_event_work {
108 struct work_struct work;
109 struct mlx5_lag *ldev;
110 unsigned long event;
111 union {
112 struct fib_entry_notifier_info fen_info;
113 struct fib_nh_notifier_info fnh_info;
114 };
115 };
116
117 static struct net_device*
mlx5_lag_get_next_fib_dev(struct mlx5_lag * ldev,struct fib_info * fi,struct net_device * current_dev)118 mlx5_lag_get_next_fib_dev(struct mlx5_lag *ldev,
119 struct fib_info *fi,
120 struct net_device *current_dev)
121 {
122 struct net_device *fib_dev;
123 int i, ldev_idx, nhs;
124
125 nhs = fib_info_num_path(fi);
126 i = 0;
127 if (current_dev) {
128 for (; i < nhs; i++) {
129 fib_dev = fib_info_nh(fi, i)->fib_nh_dev;
130 if (fib_dev == current_dev) {
131 i++;
132 break;
133 }
134 }
135 }
136 for (; i < nhs; i++) {
137 fib_dev = fib_info_nh(fi, i)->fib_nh_dev;
138 ldev_idx = mlx5_lag_dev_get_netdev_idx(ldev, fib_dev);
139 if (ldev_idx >= 0)
140 return ldev->pf[ldev_idx].netdev;
141 }
142
143 return NULL;
144 }
145
mlx5_lag_fib_route_event(struct mlx5_lag * ldev,unsigned long event,struct fib_entry_notifier_info * fen_info)146 static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, unsigned long event,
147 struct fib_entry_notifier_info *fen_info)
148 {
149 struct net_device *nh_dev0, *nh_dev1;
150 struct fib_info *fi = fen_info->fi;
151 struct lag_mp *mp = &ldev->lag_mp;
152
153 /* Handle delete event */
154 if (event == FIB_EVENT_ENTRY_DEL) {
155 /* stop track */
156 if (mp->fib.mfi == fi)
157 mp->fib.mfi = NULL;
158 return;
159 }
160
161 /* Handle multipath entry with lower priority value */
162 if (mp->fib.mfi && mp->fib.mfi != fi &&
163 (mp->fib.dst != fen_info->dst || mp->fib.dst_len != fen_info->dst_len) &&
164 fi->fib_priority >= mp->fib.priority)
165 return;
166
167 nh_dev0 = mlx5_lag_get_next_fib_dev(ldev, fi, NULL);
168 nh_dev1 = mlx5_lag_get_next_fib_dev(ldev, fi, nh_dev0);
169
170 /* Handle add/replace event */
171 if (!nh_dev0) {
172 if (mp->fib.dst == fen_info->dst && mp->fib.dst_len == fen_info->dst_len)
173 mp->fib.mfi = NULL;
174 return;
175 }
176
177 if (nh_dev0 == nh_dev1) {
178 mlx5_core_warn(ldev->pf[MLX5_LAG_P1].dev,
179 "Multipath offload doesn't support routes with multiple nexthops of the same device");
180 return;
181 }
182
183 if (!nh_dev1) {
184 if (__mlx5_lag_is_active(ldev)) {
185 int i = mlx5_lag_dev_get_netdev_idx(ldev, nh_dev0);
186
187 i++;
188 mlx5_lag_set_port_affinity(ldev, i);
189 mlx5_lag_fib_set(mp, fi, fen_info->dst, fen_info->dst_len);
190 }
191
192 return;
193 }
194
195 /* First time we see multipath route */
196 if (!mp->fib.mfi && !__mlx5_lag_is_active(ldev)) {
197 struct lag_tracker tracker;
198
199 tracker = ldev->tracker;
200 mlx5_activate_lag(ldev, &tracker, MLX5_LAG_MODE_MULTIPATH, false);
201 }
202
203 mlx5_lag_set_port_affinity(ldev, MLX5_LAG_NORMAL_AFFINITY);
204 mlx5_lag_fib_set(mp, fi, fen_info->dst, fen_info->dst_len);
205 }
206
mlx5_lag_fib_nexthop_event(struct mlx5_lag * ldev,unsigned long event,struct fib_nh * fib_nh,struct fib_info * fi)207 static void mlx5_lag_fib_nexthop_event(struct mlx5_lag *ldev,
208 unsigned long event,
209 struct fib_nh *fib_nh,
210 struct fib_info *fi)
211 {
212 struct lag_mp *mp = &ldev->lag_mp;
213
214 /* Check the nh event is related to the route */
215 if (!mp->fib.mfi || mp->fib.mfi != fi)
216 return;
217
218 /* nh added/removed */
219 if (event == FIB_EVENT_NH_DEL) {
220 int i = mlx5_lag_dev_get_netdev_idx(ldev, fib_nh->fib_nh_dev);
221
222 if (i >= 0) {
223 i = (i + 1) % 2 + 1; /* peer port */
224 mlx5_lag_set_port_affinity(ldev, i);
225 }
226 } else if (event == FIB_EVENT_NH_ADD &&
227 fib_info_num_path(fi) == 2) {
228 mlx5_lag_set_port_affinity(ldev, MLX5_LAG_NORMAL_AFFINITY);
229 }
230 }
231
mlx5_lag_fib_update(struct work_struct * work)232 static void mlx5_lag_fib_update(struct work_struct *work)
233 {
234 struct mlx5_fib_event_work *fib_work =
235 container_of(work, struct mlx5_fib_event_work, work);
236 struct mlx5_lag *ldev = fib_work->ldev;
237 struct fib_nh *fib_nh;
238
239 /* Protect internal structures from changes */
240 rtnl_lock();
241 switch (fib_work->event) {
242 case FIB_EVENT_ENTRY_REPLACE:
243 case FIB_EVENT_ENTRY_DEL:
244 mlx5_lag_fib_route_event(ldev, fib_work->event,
245 &fib_work->fen_info);
246 fib_info_put(fib_work->fen_info.fi);
247 break;
248 case FIB_EVENT_NH_ADD:
249 case FIB_EVENT_NH_DEL:
250 fib_nh = fib_work->fnh_info.fib_nh;
251 mlx5_lag_fib_nexthop_event(ldev,
252 fib_work->event,
253 fib_work->fnh_info.fib_nh,
254 fib_nh->nh_parent);
255 fib_info_put(fib_work->fnh_info.fib_nh->nh_parent);
256 break;
257 }
258
259 rtnl_unlock();
260 kfree(fib_work);
261 }
262
263 static struct mlx5_fib_event_work *
mlx5_lag_init_fib_work(struct mlx5_lag * ldev,unsigned long event)264 mlx5_lag_init_fib_work(struct mlx5_lag *ldev, unsigned long event)
265 {
266 struct mlx5_fib_event_work *fib_work;
267
268 fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC);
269 if (WARN_ON(!fib_work))
270 return NULL;
271
272 INIT_WORK(&fib_work->work, mlx5_lag_fib_update);
273 fib_work->ldev = ldev;
274 fib_work->event = event;
275
276 return fib_work;
277 }
278
mlx5_lag_fib_event(struct notifier_block * nb,unsigned long event,void * ptr)279 static int mlx5_lag_fib_event(struct notifier_block *nb,
280 unsigned long event,
281 void *ptr)
282 {
283 struct lag_mp *mp = container_of(nb, struct lag_mp, fib_nb);
284 struct mlx5_lag *ldev = container_of(mp, struct mlx5_lag, lag_mp);
285 struct fib_notifier_info *info = ptr;
286 struct mlx5_fib_event_work *fib_work;
287 struct fib_entry_notifier_info *fen_info;
288 struct fib_nh_notifier_info *fnh_info;
289 struct fib_info *fi;
290
291 if (info->family != AF_INET)
292 return NOTIFY_DONE;
293
294 if (!mlx5_lag_multipath_check_prereq(ldev))
295 return NOTIFY_DONE;
296
297 switch (event) {
298 case FIB_EVENT_ENTRY_REPLACE:
299 case FIB_EVENT_ENTRY_DEL:
300 fen_info = container_of(info, struct fib_entry_notifier_info,
301 info);
302 fi = fen_info->fi;
303 if (fi->nh)
304 return NOTIFY_DONE;
305
306 fib_work = mlx5_lag_init_fib_work(ldev, event);
307 if (!fib_work)
308 return NOTIFY_DONE;
309 fib_work->fen_info = *fen_info;
310 /* Take reference on fib_info to prevent it from being
311 * freed while work is queued. Release it afterwards.
312 */
313 fib_info_hold(fib_work->fen_info.fi);
314 break;
315 case FIB_EVENT_NH_ADD:
316 case FIB_EVENT_NH_DEL:
317 fnh_info = container_of(info, struct fib_nh_notifier_info,
318 info);
319 fib_work = mlx5_lag_init_fib_work(ldev, event);
320 if (!fib_work)
321 return NOTIFY_DONE;
322 fib_work->fnh_info = *fnh_info;
323 fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent);
324 break;
325 default:
326 return NOTIFY_DONE;
327 }
328
329 queue_work(mp->wq, &fib_work->work);
330
331 return NOTIFY_DONE;
332 }
333
mlx5_lag_mp_reset(struct mlx5_lag * ldev)334 void mlx5_lag_mp_reset(struct mlx5_lag *ldev)
335 {
336 /* Clear mfi, as it might become stale when a route delete event
337 * has been missed, see mlx5_lag_fib_route_event().
338 */
339 ldev->lag_mp.fib.mfi = NULL;
340 }
341
mlx5_lag_mp_init(struct mlx5_lag * ldev)342 int mlx5_lag_mp_init(struct mlx5_lag *ldev)
343 {
344 struct lag_mp *mp = &ldev->lag_mp;
345 int err;
346
347 /* always clear mfi, as it might become stale when a route delete event
348 * has been missed
349 */
350 mp->fib.mfi = NULL;
351
352 if (mp->fib_nb.notifier_call)
353 return 0;
354
355 mp->wq = create_singlethread_workqueue("mlx5_lag_mp");
356 if (!mp->wq)
357 return -ENOMEM;
358
359 mp->fib_nb.notifier_call = mlx5_lag_fib_event;
360 err = register_fib_notifier(&init_net, &mp->fib_nb,
361 mlx5_lag_fib_event_flush, NULL);
362 if (err) {
363 destroy_workqueue(mp->wq);
364 mp->fib_nb.notifier_call = NULL;
365 }
366
367 return err;
368 }
369
mlx5_lag_mp_cleanup(struct mlx5_lag * ldev)370 void mlx5_lag_mp_cleanup(struct mlx5_lag *ldev)
371 {
372 struct lag_mp *mp = &ldev->lag_mp;
373
374 if (!mp->fib_nb.notifier_call)
375 return;
376
377 unregister_fib_notifier(&init_net, &mp->fib_nb);
378 destroy_workqueue(mp->wq);
379 mp->fib_nb.notifier_call = NULL;
380 mp->fib.mfi = NULL;
381 }
382