登录
首页 >  Golang >  Go问答

netlink 序列号乱序

来源:stackoverflow

时间:2024-04-14 22:09:33 486浏览 收藏

珍惜时间,勤奋学习!今天给大家带来《netlink 序列号乱序》,正文内容主要涉及到等等,如果你正在学习Golang,或者是对Golang有疑问,欢迎大家关注我!后面我会持续更新相关内容的,希望都能帮到正在学习的大家!

问题内容

我正在使用 netlinkgenetlink go 包编写一个实用程序来与 mac80211_hwsim netlink 系列函数进行交互。它运行良好......几乎。

我发现,如果我执行 get_radio 调用,则对该系列的下一次调用将无法验证,因为响应的序列号与请求的序列号不匹配。具体来说,我看到的模式是

get_radio request:  seq=655
get_radio response: seq=655
del_radio request:  seq=656
del_radio response: seq=655

无论我在 get_radio 之后发出哪个命令,都会出现相同的模式。如果 get_radio 不在混合中,则序列号与预期完全一致。

有一个用 c 语言编写的现有命令,使用 libnllibgenl,我对其进行了修改以运行多个命令。据我所知,该实现做了非常相似的事情,但没有序列号问题,所以我不愿意认为问题出在内核模块上。但 strace 清楚地表明内核正在响应一个无序的数字(请原谅过度的横向滚动):

socket(af_netlink, sock_raw, netlink_generic) = 3
bind(3, {sa_family=af_netlink, nl_pid=0, nl_groups=00000000}, 12) = 0
getsockname(3, {sa_family=af_netlink, nl_pid=18357, nl_groups=00000000}, [112->12]) = 0
sendmsg(3, {msg_name={sa_family=af_netlink, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base={{len=40, type=0x10 /* nlmsg_??? */, flags=nlm_f_request, seq=2596996163, pid=18357}, "\3\1\0\0\23\0\2\0mac80211_hwsim\0\0"}, iov_len=40}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 40
recvmsg(3, {msg_name={sa_family=af_netlink, nl_pid=0, nl_groups=00000000}, msg_namelen=112->12, msg_iov=[{iov_base=[{{len=224, type=0x10 /* nlmsg_??? */, flags=0, seq=2596996163, pid=18357}, "\1\2\0\0\23\0\2\0mac80211_hwsim\0\0\6\0\1\0\31\0\0\0"...}, {{len=0, type=0 /* nlmsg_??? */, flags=0, seq=0, pid=0}}], iov_len=4096}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, msg_peek) = 224
recvmsg(3, {msg_name={sa_family=af_netlink, nl_pid=0, nl_groups=00000000}, msg_namelen=112->12, msg_iov=[{iov_base=[{{len=224, type=0x10 /* nlmsg_??? */, flags=0, seq=2596996163, pid=18357}, "\1\2\0\0\23\0\2\0mac80211_hwsim\0\0\6\0\1\0\31\0\0\0"...}, {{len=0, type=0 /* nlmsg_??? */, flags=0, seq=0, pid=0}}], iov_len=4096}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 224
sendmsg(3, {msg_name={sa_family=af_netlink, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base={{len=28, type=0x19 /* nlmsg_??? */, flags=nlm_f_request|nlm_f_ack, seq=2596996164, pid=18357}, "\6\1\0\0\10\0\n\0t\0\0\0"}, iov_len=28}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 28
recvmsg(3, {msg_name={sa_family=af_netlink, nl_pid=0, nl_groups=00000000}, msg_namelen=112->12, msg_iov=[{iov_base=[{{len=48, type=0x19 /* nlmsg_??? */, flags=0, seq=2596996164, pid=18357}, "\6\1\0\0\10\0\n\0t\0\0\0\10\0\t\0\1\0\0\0\t\0\21\0phy84\0\0\0"}, {{len=0, type=0 /* nlmsg_??? */, flags=0, seq=0, pid=0}}], iov_len=4096}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, msg_peek) = 48
recvmsg(3, {msg_name={sa_family=af_netlink, nl_pid=0, nl_groups=00000000}, msg_namelen=112->12, msg_iov=[{iov_base=[{{len=48, type=0x19 /* nlmsg_??? */, flags=0, seq=2596996164, pid=18357}, "\6\1\0\0\10\0\n\0t\0\0\0\10\0\t\0\1\0\0\0\t\0\21\0phy84\0\0\0"}, {{len=0, type=0 /* nlmsg_??? */, flags=0, seq=0, pid=0}}], iov_len=4096}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 48
sendmsg(3, {msg_name={sa_family=af_netlink, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base={{len=28, type=0x19 /* nlmsg_??? */, flags=nlm_f_request|nlm_f_ack, seq=2596996165, pid=18357}, "\5\1\0\0\10\0\n\0t\0\0\0"}, iov_len=28}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 28
recvmsg(3, {msg_name={sa_family=af_netlink, nl_pid=0, nl_groups=00000000}, msg_namelen=112->12, msg_iov=[{iov_base=[{{len=36, type=nlmsg_error, flags=0, seq=2596996164, pid=18357}, "\0\0\0\0\34\0\0\0\31\0\5\0d\4\313\232\265g\0\0"}, {{len=0, type=0 /* nlmsg_??? */, flags=0, seq=0, pid=0}}], iov_len=4096}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, msg_peek) = 36
recvmsg(3, {msg_name={sa_family=af_netlink, nl_pid=0, nl_groups=00000000}, msg_namelen=112->12, msg_iov=[{iov_base=[{{len=36, type=nlmsg_error, flags=0, seq=2596996164, pid=18357}, "\0\0\0\0\34\0\0\0\31\0\5\0d\4\313\232\265g\0\0"}, {{len=0, type=0 /* nlmsg_??? */, flags=0, seq=0, pid=0}}], iov_len=4096}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 36
close(3)                                = 0
+++ exited with 0 +++

请注意,每个 sendmsg() 有两个 recvmsg() 调用,因为第一个调用只是执行 msg_peek 来查看答案是否可用。在三组请求和响应中,第一组(序列...163)只是获取家庭信息。第二个(序列 164)正在执行 get_radio (\6) 调用,第三个是具有无序响应的调用:此 del_radio (\5) 调用发送一条带有 ...165 序列的消息号,但返回的响应(否则格式良好)具有与上一个命令匹配的序列号:...164。

因为这是 go,所以有多个线程同时处于活动状态,但是上面 strace 输出中的所有调用都来自同一个线程。跟踪中唯一缺少的调用是对 futex() 的调用。

c 版本,因为它使用 libnllibgenl 提供的回调机制,所以确实在与请求来源不同的线程中处理响应。请求跟踪:

socket(af_netlink, sock_raw|sock_cloexec, netlink_generic) = 3
setsockopt(3, sol_socket, so_sndbuf, [32768], 4) = 0
setsockopt(3, sol_socket, so_rcvbuf, [32768], 4) = 0
getpid()                                = 21338
bind(3, {sa_family=af_netlink, nl_pid=-1300212902, nl_groups=00000000}, 12) = 0
getsockname(3, {sa_family=af_netlink, nl_pid=-1300212902, nl_groups=00000000}, [12]) = 0
sendmsg(3, {msg_name={sa_family=af_netlink, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base={{len=20, type=0x10 /* nlmsg_??? */, flags=nlm_f_request|nlm_f_ack|0x300, seq=1530817156, pid=2994754394}, "\3\1\0\0"}, iov_len=20}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 20
recvmsg(3, {msg_name={sa_family=af_netlink, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base=[{{len=116, type=0x10 /* nlmsg_??? */, flags=nlm_f_multi, seq=1530817156, pid=2994754394}, "\1\2\0\0\v\0\2\0nlctrl\0\0\6\0\1\0\20\0\0\0\10\0\3\0\2\0\0\0"...}, {{len=96, type=0x10 /* nlmsg_??? */, flags=nlm_f_multi, seq=1530817156, pid=2994754394}, "\1\2\0\0\16\0\2\0vfs_dquot\0\0\0\6\0\1\0\21\0\0\0\10\0\3\0"...}, {{len=104, type=0x10 /* nlmsg_??? */, flags=nlm_f_multi, seq=1530817156, pid=2994754394}, "\1\2\0\0\17\0\2\0acpi_event\0\0\6\0\1\0\23\0\0\0\10\0\3\0"...}, {{len=108, type=0x10 /* nlmsg_??? */, flags=nlm_f_multi, seq=1530817156, pid=2994754394}, "\1\2\0\0\22\0\2\0thermal_event\0\0\0\6\0\1\0\24\0\0\0"...}, {{len=112, type=0x10 /* nlmsg_??? */, flags=nlm_f_multi, seq=1530817156, pid=2994754394}, "\1\2\0\0\20\0\2\0tcp_metrics\0\6\0\1\0\25\0\0\0\10\0\3\0"...}, {{len=112, type=0x10 /* nlmsg_??? */, flags=nlm_f_multi, seq=1530817156, pid=2994754394}, "\1\2\0\0\16\0\2\0taskstats\0\0\0\6\0\1\0\26\0\0\0\10\0\3\0"...}, {{len=2076, type=0x10 /* nlmsg_??? */, flags=nlm_f_multi, seq=1530817156, pid=2994754394}, "\1\2\0\0\f\0\2\0nl80211\0\6\0\1\0\30\0\0\0\10\0\3\0\1\0\0\0"...}, {{len=224, type=0x10 /* nlmsg_??? */, flags=nlm_f_multi, seq=1530817156, pid=2994754394}, "\1\2\0\0\23\0\2\0mac80211_hwsim\0\0\6\0\1\0\31\0\0\0"...}, {{len=0, type=0 /* nlmsg_??? */, flags=0, seq=0, pid=0}}], iov_len=16384}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 2948
recvmsg(3, {msg_name={sa_family=af_netlink, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base=[{{len=20, type=nlmsg_done, flags=nlm_f_multi, seq=1530817156, pid=2994754394}, "\0\0\0\0"}, {{len=0, type=0 /* nlmsg_??? */, flags=0, seq=0, pid=65542}}], iov_len=16384}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 20
mmap(null, 8392704, prot_read|prot_write, map_private|map_anonymous|map_stack, -1, 0) = 0x7fdb752ce000
mprotect(0x7fdb752ce000, 4096, prot_none) = 0
clone(child_stack=0x7fdb75acdff0, flags=clone_vm|clone_fs|clone_files|clone_sighand|clone_thread|clone_sysvsem|clone_settls|clone_parent_settid|clone_child_cleartid, parent_tidptr=0x7fdb75ace9d0, tls=0x7fdb75ace700, child_tidptr=0x7fdb75ace9d0) = 21339
sendmsg(3, {msg_name={sa_family=af_netlink, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base={{len=28, type=0x19 /* nlmsg_??? */, flags=nlm_f_request|nlm_f_ack, seq=1530817157, pid=2994754394}, "\6\1\0\0\10\0\n\0_\0\0\0"}, iov_len=28}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 28
nanosleep({tv_sec=2, tv_nsec=0}, 0x7ffff64ac200) = 0
write(1, "deleting radio with id '95'...\n", 31) = 31
sendmsg(3, {msg_name={sa_family=af_netlink, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base={{len=28, type=0x19 /* nlmsg_??? */, flags=nlm_f_request|nlm_f_ack, seq=1530817158, pid=2994754394}, "\5\1\0\0\10\0\n\0_\0\0\0"}, iov_len=28}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 28
nanosleep({tv_sec=2, tv_nsec=0},  <unfinished ...>) = ?
+++ exited with 0 +++

和响应跟踪:

recvmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base=[{{len=48, type=0x19 /* NLMSG_??? */, flags=0, seq=1530817157, pid=2994754394}, "\6\1\0\0\10\0\n\0_\0\0\0\10\0\t\0\1\0\0\0\t\0\21\0phy95\0\0\0"}, {{len=0, type=0 /* NLMSG_??? */, flags=0, seq=0, pid=0}}], iov_len=16384}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 48
epoll_wait(4, [{EPOLLIN, {u32=3, u64=3}}], 32, -1) = 1
recvmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base=[{{len=36, type=NLMSG_ERROR, flags=0, seq=1530817157, pid=2994754394}, "\0\0\0\0\34\0\0\0\31\0\5\0\205j>[ZS\200\262"}, {{len=1114121, type=0x6870 /* NLMSG_??? */, flags=NLM_F_REQUEST|NLM_F_ECHO|NLM_F_DUMP_INTR|NLM_F_DUMP_FILTERED|0x3940, seq=53, pid=0}, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"...}], iov_len=16384}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 36
fstat(1, {st_mode=S_IFCHR|0620, st_rdev=makedev(136, 0), ...}) = 0
write(1, "new SSID defined to interface 95"..., 33) = 33
epoll_wait(4, [{EPOLLIN, {u32=3, u64=3}}], 32, -1) = 1
recvmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base=[{{len=36, type=NLMSG_ERROR, flags=0, seq=1530817158, pid=2994754394}, "\0\0\0\0\34\0\0\0\31\0\5\0\206j>[ZS\200\262"}, {{len=1114121, type=0x6870 /* NLMSG_??? */, flags=NLM_F_REQUEST|NLM_F_ECHO|NLM_F_DUMP_INTR|NLM_F_DUMP_FILTERED|0x3940, seq=53, pid=0}, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"...}], iov_len=16384}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 36
write(1, "Successfully deleted device with"..., 39) = 39
exit_group(0)                                       = ?
+++ exited with 0 +++

唯一与我相关的事情(除了线程结构)是它使用不同的标志(c 代码中的 nlm_f_dump_intr|nlm_f_dump_filtered,go 代码中的 0)。

我不确定我可以使用哪些其他工具来解决这个问题,因为我不知道这些序列号是如何构造的。在我看来,内核模块、go 包或我的 go 代码中的代码似乎没有任何异常错误,但接口的使用方式和实际接口之间总是可能存在不匹配的情况。事实就是如此。这个问题是否对任何人来说都很熟悉,或者是否有人对如何在这方面取得进展有任何想法?


解决方案


我仍然不明白为什么会发生这种情况,但 TL;DR 修复似乎是读取下一个数据包:它将具有具有正确序列号的相同数据。

我尝试使用不同的 netlink package 编写一个程序,发现虽然它有相同的问题,但在遇到这种情况时它会重复 recvmsg() 调用。执行此操作的代码在这里:

https://github.com/vishvananda/netlink/blob/a06dabf/nl/nl_linux.go#L425

引入该代码的提交并没有解释为什么这个循环是正确的,但是在其他包中模拟它可以解决这个问题。

本篇关于《netlink 序列号乱序》的介绍就到此结束啦,但是学无止境,想要了解学习更多关于Golang的相关知识,请关注golang学习网公众号!

声明:本文转载于:stackoverflow 如有侵犯,请联系study_golang@163.com删除
相关阅读
更多>
最新阅读
更多>
课程推荐
更多>