本文由腾讯技术kernel分享,原题“TCP经典异常问题探讨与解决”,即时通讯网进行了排版和内容优化等。
cover-opti.png (12.88 KB, 下载次数: 364)
下载附件 保存到相册
7 个月前 上传
1.png (53.59 KB, 下载次数: 397)
2.png (8.07 KB, 下载次数: 384)
tcp_send_active_reset() -> skb = alloc_skb(MAX_TCP_HEADER, priority); -> tcp_init_nondata_skb(skb, tcp_acceptable_seq(sk), TCPHDR_ACK | TCPHDR_RST); -> tcp_transmit_skb()
3.png (9.28 KB, 下载次数: 389)
tcp_v4_send_reset() if (th->ack) { // 这里对应的就是上图中为何出现Seq==Ack rep.th.seq = th->ack_seq; } else { // 极小概率,如果出现,那么RST包的就没有Seq序列号 rep.th.ack = 1; rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin + skb->len - (th->doff << 2)); }
sudo bpftrace -e 'k:tcp_send_active_reset { @[kstack()] = count(); }'
4.png (33.93 KB, 下载次数: 391)
sudo bpftrace -e 'k:tcp_v4_send_reset { @[kstack()] = count(); }'
5.png (23.83 KB, 下载次数: 390)
6.png (10.04 KB, 下载次数: 397)
7.png (45.43 KB, 下载次数: 390)
8.png (40.98 KB, 下载次数: 421)
9.png (18.51 KB, 下载次数: 404)
10.png (30.61 KB, 下载次数: 377)
CPU 0 CPU 1 ----- ----- tcp_v4_rcv() syn_recv_sock() inet_ehash_insert() -> sk_nulls_del_node_init_rcu(osk) __inet_lookup_established() -> __sk_nulls_add_node_rcu(sk, list)
11.png (27.85 KB, 下载次数: 429)
12.png (29.99 KB, 下载次数: 385)
13.png (35.81 KB, 下载次数: 388)
14.png (31.42 KB, 下载次数: 450)
// iptables A port -> B port iptables ... -p tcp --port 1111 -j REDIRECT --to-ports 80 iptables ... -p tcp --port 1112 -j REDIRECT --to-ports 80
15.png (29.54 KB, 下载次数: 401)
1. saddr:12345 -> daddr:80 // 正常连接 2. saddr:12345 -> daddr:1112 -> daddr:80 // NAT参与转化 (对内核细节不感兴趣的同学可以跳过此段)
// 2.2.2.2是去敏后的server端ip地址,另外两个是client的ip sk info: 1.1.1.1:1111 <-> 2.2.2.2:80 // 我们可以知道真实的socket的建立是使用了80端口 skb info: 1.1.1.2:2222 <-> 2.2.2.2:1112 // 异常的skb未成功将1112端口转化为80端口
#!/usr/bin/env python from __future__ import print_function from bcc import BPF import argparse from time import strftime from socket import inet_ntop, AF_INET, AF_INET6 from struct import pack import ctypes as ct from time import sleep from bcc import tcp # arguments examples = """examples: ./tcpdrop # trace kernel TCP drops """ parser = argparse.ArgumentParser( description="Trace TCP drops by the kernel", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=examples) parser.add_argument("--ebpf", action="store_true", help=argparse.SUPPRESS) args = parser.parse_args() debug = 0 # define BPF program bpf_text = """ #include <uapi/linux/ptrace.h> #include <uapi/linux/tcp.h> #include <uapi/linux/ip.h> #include <net/sock.h> #include <bcc/proto.h> BPF_STACK_TRACE(stack_traces, 1024); struct ipv4_data_t { u32 pid; u64 is_sknull; u32 saddr; u32 daddr; u16 sport; u16 dport; u8 state; u8 tcpflags; u32 stack_id; }; BPF_PERF_OUTPUT(ipv4_events); struct active_data_t { u32 pid; u32 saddr; u32 daddr; u16 sport; u16 dport; u32 stack_id; }; BPF_PERF_OUTPUT(active_events); static struct tcphdr *skb_to_tcphdr(const struct sk_buff *skb) { // unstable API. verify logic in tcp_hdr() -> skb_transport_header(). return (struct tcphdr *)(skb->head + skb->transport_header); } static inline struct iphdr *skb_to_iphdr(const struct sk_buff *skb) { // unstable API. verify logic in ip_hdr() -> skb_network_header(). return (struct iphdr *)(skb->head + skb->network_header); } // from include/net/tcp.h: #ifndef tcp_flag_byte #define tcp_flag_byte(th) (((u_int8_t *)th)[13]) #endif int trace_tcp_v4_send_reset(struct pt_regs *ctx, struct sock *sk, struct sk_buff *skb) { u8 is_sk_null = sk ? 0 : 1; u8 state = sk ? (u8)sk->__sk_common.skc_state : 1; u32 pid = bpf_get_current_pid_tgid(); struct iphdr *ip = skb_to_iphdr(skb); u32 daddr = ip->daddr; u32 saddr = ip->saddr; // pull in details from the packet headers and the sock struct u16 family = sk->__sk_common.skc_family; u16 sport = 0, dport = 0; struct tcphdr *tcp = skb_to_tcphdr(skb); u8 tcpflags = ((u_int8_t *)tcp)[13]; sport = tcp->source; dport = tcp->dest; sport = ntohs(sport); dport = ntohs(dport); if (family == AF_INET && (saddr == 16777343 && daddr == 16777343) && (sport == 8004 || dport == 8004)) { struct ipv4_data_t data4 = {}; data4.pid = pid; data4.saddr = saddr; data4.daddr = daddr; data4.dport = dport; data4.sport = sport; data4.state = state; data4.tcpflags = tcpflags; data4.stack_id = stack_traces.get_stackid(ctx, 0); ipv4_events.perf_submit(ctx, &data4, sizeof(data4)); } return 0; } int trace_tcp_send_active_reset(struct pt_regs *ctx, struct sock *sk, unsigned int priority) { u32 pid = bpf_get_current_pid_tgid() >> 32; u32 saddr = 0, daddr = 0; u16 family = AF_INET; u16 sport = 0, dport = 0; // sport is not right sport = sk->__sk_common.skc_num; dport = sk->__sk_common.skc_dport; dport = ntohs(dport); saddr = sk->__sk_common.skc_rcv_saddr; daddr = sk->__sk_common.skc_daddr; if (family == AF_INET && (saddr == 16777343 && daddr == 16777343)) { struct active_data_t data4 = {}; data4.pid = pid; data4.saddr = saddr; data4.daddr = daddr; data4.dport = dport; data4.sport = sport; data4.stack_id = stack_traces.get_stackid(ctx, 0); active_events.perf_submit(ctx, &data4, sizeof(data4)); } return 0; } """ if debug or args.ebpf: print(bpf_text) if args.ebpf: exit() # event data class Data_ipv4(ct.Structure): _fields_ = [ ("pid", ct.c_uint), ("is_sknull", ct.c_ulonglong), ("saddr", ct.c_uint), ("daddr", ct.c_uint), ("sport", ct.c_ushort), ("dport", ct.c_ushort), ("state", ct.c_ubyte), ("tcpflags", ct.c_ubyte), ("stack_id", ct.c_ulong) ] class Data_active(ct.Structure): _fields_ = [ ("pid", ct.c_uint), ("saddr", ct.c_uint), ("daddr", ct.c_uint), ("sport", ct.c_ushort), ("dport", ct.c_ushort), ("stack_id", ct.c_ulong) ] # process event def print_ipv4_event(cpu, data, size): event = ct.cast(data, ct.POINTER(Data_ipv4)).contents if event.is_sknull is 1: print("%-8s %-7d %-20s > %-20s %s (%s)" % ( strftime("%H:%M:%S"), event.pid, "%s:%d" % (inet_ntop(AF_INET, pack('I', event.saddr)), event.sport), "%s:%s" % (inet_ntop(AF_INET, pack('I', event.daddr)), event.dport), "sk-is-null", tcp.flags2str(event.tcpflags))) else: print("%-8s %-7d %-20s > %-20s %s (%s)" % ( strftime("%H:%M:%S"), event.pid, "%s:%d" % (inet_ntop(AF_INET, pack('I', event.saddr)), event.sport), "%s:%s" % (inet_ntop(AF_INET, pack('I', event.daddr)), event.dport), tcp.tcpstate[event.state], tcp.flags2str(event.tcpflags))) for addr in stack_traces.walk(event.stack_id): sym = b.ksym(addr, show_offset=True) print("\t%s" % sym) print("") def print_active_event(cpu, data, size): event = ct.cast(data, ct.POINTER(Data_active)).contents print("%-8s %-7d %-20s > %-20s" % ( strftime("%H:%M:%S"), event.pid, "%s:%d" % (inet_ntop(AF_INET, pack('I', event.saddr)), event.sport), "%s:%d" % (inet_ntop(AF_INET, pack('I', event.daddr)), event.dport))) for addr in stack_traces.walk(event.stack_id): sym = b.ksym(addr, show_offset=True) print("\t%s" % sym) print("") # initialize BPF b = BPF(text=bpf_text) if b.get_kprobe_functions(b"tcp_v4_send_reset"): b.attach_kprobe(event="tcp_v4_send_reset", fn_name="trace_tcp_v4_send_reset") else: print("ERROR: tcp_drop() kernel function not found or traceable. " "Older kernel versions not supported.") exit() if b.get_kprobe_functions(b"tcp_send_active_reset"): b.attach_kprobe(event="tcp_send_active_reset", fn_name="trace_tcp_send_active_reset") else: print("ERROR: tcp_v4_send_reset() kernel function") exit() stack_traces = b.get_table("stack_traces") # header print("%-8s %-6s %-2s %-20s > %-20s %s (%s)" % ("TIME", "PID", "IP", "SADDR:SPORT", "DADDR:DPORT", "STATE", "FLAGS")) # read events b["ipv4_events"].open_perf_buffer(print_ipv4_event) #b["active_events"].open_perf_buffer(print_active_event) while 1: try: b.perf_buffer_poll() except KeyboardInterrupt: exit()
来源:即时通讯网 - 即时通讯开发者社区!
轻量级开源移动端即时通讯框架。
快速入门 / 性能 / 指南 / 提问
轻量级Web端即时通讯框架。
详细介绍 / 精编源码 / 手册教程
移动端实时音视频框架。
详细介绍 / 性能测试 / 安装体验
基于MobileIMSDK的移动IM系统。
详细介绍 / 产品截图 / 安装体验
一套产品级Web端IM系统。
详细介绍 / 产品截图 / 演示视频
精华主题数超过100个。
连续任职达2年以上的合格正式版主
为论区做出突出贡献的开发者、版主等。
Copyright © 2014-2024 即时通讯网 - 即时通讯开发者社区 / 版本 V4.4
苏州网际时代信息科技有限公司 (苏ICP备16005070号-1)
Processed in 0.151358 second(s), 43 queries , Gzip On.