1200字范文,内容丰富有趣,写作的好帮手!
1200字范文 > Linux数据报文接收发送总结6

Linux数据报文接收发送总结6

时间:2022-12-12 04:56:45

相关推荐

Linux数据报文接收发送总结6

2.3 协议栈注册

内核实现了网络层的ip协议,也实现了传输层的tcp协议和udp协议。这些协议对应的实现函数分别是ip_rcv(),tcp_v4_rcv()和udp_rcv()。和我们平时写代码的方式不一样的是,内核是通过注册的方式来实现的。Linux内核中的fs_initcall和subsys_initcall类似,也是初始化模块的入口。fs_initcall调用inet_init后开始网络协议栈注册。通过inet_init,将这些函数注册到了inet_protos(传输层协议)和ptype_base(网络/链路层协议)数据结构中了。如下图:

相关代码如下

//file: net/ipv4/af_inet.cstatic const struct net_proto_family inet_family_ops = {.family = PF_INET,.create = inet_create,.owner= THIS_MODULE,};/* Upon startup we insert all the elements in inetsw_array[] into* the linked list inetsw.*/static struct inet_protosw inetsw_array[] ={{.type = SOCK_STREAM,.protocol = IPPROTO_TCP,.prot = &tcp_prot,.ops = &inet_stream_ops,.flags =INET_PROTOSW_PERMANENT |INET_PROTOSW_ICSK,},{.type = SOCK_DGRAM,.protocol = IPPROTO_UDP,.prot = &udp_prot,.ops = &inet_dgram_ops,.flags =INET_PROTOSW_PERMANENT,},{.type = SOCK_DGRAM,.protocol = IPPROTO_ICMP,.prot = &ping_prot,.ops = &inet_sockraw_ops,.flags =INET_PROTOSW_REUSE,},{.type = SOCK_RAW,.protocol = IPPROTO_IP,/* wild card */.prot = &raw_prot,.ops = &inet_sockraw_ops,.flags =INET_PROTOSW_REUSE,}};static struct packet_type ip_packet_type __read_mostly = {.type = cpu_to_be16(ETH_P_IP),.func = ip_rcv,};static const struct net_protocol tcp_protocol = {.early_demux=tcp_v4_early_demux,.handler=tcp_v4_rcv,.err_handler=tcp_v4_err,.no_policy=1,.netns_ok=1,.icmp_strict_tag_validation = 1,};static const struct net_protocol udp_protocol = {.early_demux =udp_v4_early_demux,.handler =udp_rcv,.err_handler =udp_err,.no_policy =1,.netns_ok =1,};static const struct net_protocol icmp_protocol = {.handler =icmp_rcv,.err_handler =icmp_err,.no_policy =1,.netns_ok =1,};static int __init inet_init(void){struct inet_protosw *q;struct list_head *r;int rc = -EINVAL;sock_skb_cb_check_size(sizeof(struct inet_skb_parm));rc = proto_register(&tcp_prot, 1);if (rc)goto out;rc = proto_register(&udp_prot, 1);if (rc)goto out_unregister_tcp_proto;rc = proto_register(&raw_prot, 1);if (rc)goto out_unregister_udp_proto;rc = proto_register(&ping_prot, 1);if (rc)goto out_unregister_raw_proto;/**Tell SOCKET that we are alive...*/(void)sock_register(&inet_family_ops); // 协议族注册, socket函数的第一个参数#ifdef CONFIG_SYSCTLip_static_sysctl_init();#endif/**Add all the base protocols.*/if (inet_add_protocol(&icmp_protocol, IPPROTO_ICMP) < 0) //协议注册,ip层协议解析后,再向上解析传输层调用 pr_crit("%s: Cannot add ICMP protocol\n", __func__);if (inet_add_protocol(&udp_protocol, IPPROTO_UDP) < 0)pr_crit("%s: Cannot add UDP protocol\n", __func__);if (inet_add_protocol(&tcp_protocol, IPPROTO_TCP) < 0)pr_crit("%s: Cannot add TCP protocol\n", __func__);#ifdef CONFIG_IP_MULTICASTif (inet_add_protocol(&igmp_protocol, IPPROTO_IGMP) < 0)pr_crit("%s: Cannot add IGMP protocol\n", __func__);#endif/* Register the socket-side information for inet_create. */for (r = &inetsw[0]; r < &inetsw[SOCK_MAX]; ++r)INIT_LIST_HEAD(r);for (q = inetsw_array; q < &inetsw_array[INETSW_ARRAY_LEN]; ++q) // 类型注册,对应socket中的第二个参数inet_register_protosw(q);/**Set the ARP module up*/arp_init();/**Set the IP module up*/ip_init();tcp_v4_init();/* Setup TCP slab cache for open requests. */tcp_init();/* Setup UDP memory threshold */udp_init();/* Add UDP-Lite (RFC 3828) */udplite4_register();ping_init();/**Set the ICMP layer up*/if (icmp_init() < 0)panic("Failed to create the ICMP control socket.\n");/**Initialise the multicast router*/#if defined(CONFIG_IP_MROUTE)if (ip_mr_init())pr_crit("%s: Cannot init ipv4 mroute\n", __func__);#endifif (init_inet_pernet_ops())pr_crit("%s: Cannot init ipv4 inet pernet ops\n", __func__);/**Initialise per-cpu ipv4 mibs*/if (init_ipv4_mibs())pr_crit("%s: Cannot init ipv4 mibs\n", __func__);ipv4_proc_init();ipfrag_init();dev_add_pack(&ip_packet_type); // 注册到ptype_base, 接收报文时,根据协议进行相应的处理函数ip_tunnel_core_init();rc = 0;out:return rc;out_unregister_raw_proto:proto_unregister(&raw_prot);out_unregister_udp_proto:proto_unregister(&udp_prot);out_unregister_tcp_proto:proto_unregister(&tcp_prot);goto out;}fs_initcall(inet_init);

proto_register注册函数,将对应协议加到proto_list链表中。proto_list是一个全局的静态链表,inet域支持的所有协议全部在这个链表中,但这个链表在协议栈中并没有太大用途,它只是用于在/proc/net/protocols文件中输出当前系统所支持的所有协议。

inet_register_protosw注册函数,将对协议加到 inetsw 数组中,在socket函数系统调用时选择具体的协议时会用到,发送报文时会用到。

int proto_register(struct proto *prot, int alloc_slab){if (alloc_slab) {......}mutex_lock(&proto_list_mutex);list_add(&prot->node, &proto_list);assign_proto_idx(prot);mutex_unlock(&proto_list_mutex);return 0;}EXPORT_SYMBOL(proto_register);void inet_register_protosw(struct inet_protosw *p){struct list_head *lh;struct inet_protosw *answer;int protocol = p->protocol;struct list_head *last_perm;spin_lock_bh(&inetsw_lock);if (p->type >= SOCK_MAX)goto out_illegal;/* If we are trying to override a permanent protocol, bail. */last_perm = &inetsw[p->type];list_for_each(lh, &inetsw[p->type]) {answer = list_entry(lh, struct inet_protosw, list);/* Check only the non-wild match. */if ((INET_PROTOSW_PERMANENT & answer->flags) == 0)break;if (protocol == answer->protocol)goto out_permanent;last_perm = lh;}/* Add the new entry after the last permanent entry if any, so that* the new entry does not override a permanent entry when matched with* a wild-card protocol. But it is allowed to override any existing* non-permanent entry. This means that when we remove this entry, the* system automatically returns to the old behavior.*/list_add_rcu(&p->list, last_perm);out:spin_unlock_bh(&inetsw_lock);return;......}

上面的代码中我们可以看到,udp_protocol结构体中的handler是udp_rcv,tcp_protocol结构体中的handler是tcp_v4_rcv,通过inet_add_protocol被初始化了进来。

int inet_add_protocol(const struct net_protocol *prot, unsigned char protocol){if (!prot->netns_ok) {pr_err("Protocol %u is not namespace aware, cannot register.\n",protocol);return -EINVAL;}return !cmpxchg((const struct net_protocol **)&inet_protos[protocol],NULL, prot) ? 0 : -1;}

inet_add_protocol函数将tcp和udp对应的处理函数都注册到了inet_protos(接收报文时,解析传输层应用)数组中了。

再看dev_add_pack(&ip_packet_type);这一行,ip_packet_type结构体中的type是协议名,func是ip_rcv函数,在dev_add_pack中会被注册到ptype_base(接收报文时,解析网络层使用)哈希表中。(上面在net_dev_init时,最多支持16个协议)

//file: net/core/dev.cvoid dev_add_pack(struct packet_type *pt){struct list_head *head = ptype_head(pt);......}static inline struct list_head *ptype_head(const struct packet_type *pt){if (pt->type == htons(ETH_P_ALL))return &ptype_all;elsereturn &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];}

这里我们需要记住inet_protos记录着udp,tcp的处理函数地址,ptype_base存储着ip_rcv()函数的处理地址。后面我们会看到软中断中会通过ptype_base找到ip_rcv函数地址,进而将ip包正确地送到ip_rcv()中执行。在ip_rcv中将会通过inet_protos找到tcp或者udp的处理函数,再而把包转发给udp_rcv()或tcp_v4_rcv()函数。

扩展一下,如果看一下ip_rcv和udp_rcv等函数的代码能看到很多协议的处理过程。例如,ip_rcv中会处理netfilter和iptable过滤,如果你有很多或者很复杂的 netfilter 或 iptables 规则,这些规则都是在软中断的上下文中执行的,会加大网络延迟。再例如,udp_rcv中会判断socket接收队列是否满了。对应的相关内核参数是net.core.rmem_max和net.core.rmem_default。如果有兴趣,建议大家好好读一下inet_init这个函数的代码。

本内容不代表本网观点和政治立场,如有侵犯你的权益请联系我们处理。
网友评论
网友评论仅供其表达个人看法,并不表明网站立场。