深入理解ping原理及源码实现
ping原理
发起端发送ICMP探测报文,发起端和中间设备根据路由表进行转发,中间设备将报文TTL减1,并更新下一跳二层头信息转发,目的节点收到发给自己的ping包之后,检查报文是否合法,完成检查后给发起端发送ICMP响应报文。
A ->R1 -> R2 -> R3 -> B (ICMP echo request)
A <- R1 <- R2 <- R3 <-B (ICMP echo reply)
ICMP的报文格式定义在RFC 792中,它的IP Proto字段是1,从这里也可以看出ICMP是IP网络的基础协议。具体echo request和echo reply的报文格式请参考具体的RFC协议。
一般来说,上面两个方向的报文走的路径是一样的,所以ping报文可以测量发起端到接收端的往返延时(RTT),同时会计算出最小最大平均时延,有些工具还会计算出方差。对于ping包发送个数,Windows上默认是4个, Linux上默认一直发送除非ctrl+c停止,但是都提供配置发送个数选项。默认发送的icmp报文长度不会很大,Windows上是94字节,Linux上是118字节(上述是IPv6报文ICMPv6对应的长度),净荷字段的填充因为没有限制,更是五花八门,这里不在一一列出。因为ping包如果超过接口的MTU会默认分片,所以ping也提供了不分片设置标志,以便探测网络对大包的支持情况。TTL配置则会限制报文最大经过的中间设备数量,如果不设置Windows是128,Linux是64,一般场景足够了。每个ICMP回复等待的超时时间默认是1秒,如果有特殊使用,可以设置ICMP探测的超时时间。因为ping包发送时候使用的是发送端口的接口地址,如果有需要也可以配置发送报文的原地址。
上面说的都是Windows和Linux都支持的功能,除此之外每个工具还有自己特定的一些配置,另外IPv6和IPv4在使用上还有一些差异。
在实现上,发起端的ping工具一般实现在用户态,而接收端的ech request处理以及响应echo reply发送都是在内核态自动处理的,不需要用户态干预。在Windows比较新的版本上,比如Windows 7及之后的版本,默认不开启ech request报文上送,导致不会自动响应ping请求,需要手动开启一下。
ping源码实现
Talk is cheap, show me you code. 下面我们来分析ping的源码实现,因为Windows没有开源Ping的实现,我们现在以Linux下的ping工具为对象分析具体源码实现。ping工具在iputils中实现,Ubuntu使用的源码在这里。
ping工具的实现在目录ping中,源文件如下所示:
path: root/ping
Mode Name Size
-rw-r--r-- meson.build 518 log plain
-rw-r--r-- node_info.c 12858 log plain
-rw-r--r-- ping.c 50280 log plain
-rw-r--r-- ping.h 12646 log plain
-rw-r--r-- ping6_common.c 26860 log plain
-rw-r--r-- ping_common.c 28183 log plain
主要的代码实现在ping.c, ping_common.c和ping6_common.c中。
ping.c实现了ping工具中对于ipv4和ipv6的通用处理入口,包括创建socket的create_socket, 主入口函数main。关于ipv4的主入口ping4_run,收发包处理ping4_receive_error_msg, ping4_parse_reply, ping4_send_probe, ping4_install_filter也实现在ping.c中。
ping_common.c实现了ping工具用法usage, 权限操作接口limit_capabilities, modify_capability, drop_capabilities, ping包发送入口pinger, 配置入口setup, 主循环main_loop, 统计信息更新gather_statistics, 以及最后接触处理的finish和status。
ping6_common.c实现了ipv6的一些相关接口,包括主入口ping6_run, 以及针对ipv6的收发包处理ping6_receive_error_msg, ping6_parse_reply, ping6_send_probe和ping6_install_filter。
整体流程图如下:

上图图片是用Mermaid Live Editor生成,地址如下:https://mermaid.live/
源码分析
main (ping.c)
int main(int argc, char **argv) { ... struct addrinfo hints = { .ai_family = AF_UNSPEC, /*默认地址族*/ .ai_protocol = IPPROTO_UDP, .ai_socktype = SOCK_DGRAM, /*默认使用DGRAM类型*/ .ai_flags = getaddrinfo_flags }; ... static struct ping_rts rts = { .interval = 1000, /*默认间隔1000ms*/ .preload = 1, .lingertime = MAXWAIT * 1000, .confirm_flag = MSG_CONFIRM, .tmin = LONG_MAX, .pipesize = -1, .datalen = DEFDATALEN, .ident = -1, .screen_width = INT_MAX, #ifdef HAVE_LIBCAP .cap_raw = CAP_NET_RAW, .cap_admin = CAP_NET_ADMIN, #endif .pmtudisc = -1, .source.sin_family = AF_INET, .source6.sin6_family = AF_INET6, .ni.query = -1, .ni.subject_type = -1, }; ... /*因为支持创建ping4和ping6的软链接,所以根据程序名称设置默认地址族*/ /* Support being called using `ping4` or `ping6` symlinks */ if (argv[0][strlen(argv[0]) - 1] == '4') hints.ai_family = AF_INET; else if (argv[0][strlen(argv[0]) - 1] == '6') hints.ai_family = AF_INET6; ... /*这里跳过解析的opt关键字,剩下hops和targets*/ argc -= optind; argv += optind; /*必须指定目的地,如果argc是1则是target,如果argc大于1,则除最后一个外都是hop*/ if (!argc) error(2, EDESTADDRREQ, "usage error"); target = argv[argc - 1]; /*target就是ping的目标,必须是最后一个参数*/ /* Create sockets */ enable_capability_raw(); /*使能创建socket权限*/ if (hints.ai_family != AF_INET6) { /*创建ipv4的socket*/ create_socket(&rts, &sock4, AF_INET, hints.ai_socktype, IPPROTO_ICMP, hints.ai_family == AF_INET); } if (hints.ai_family != AF_INET) { /*创建ipv6的socket*/ create_socket(&rts, &sock6, AF_INET6, hints.ai_socktype, IPPROTO_ICMPV6, sock4.fd == -1); /* This may not be needed if both protocol versions always had the same value, but * since I don't know that, it's better to be safe than sorry. */ rts.pmtudisc = rts.pmtudisc == IP_PMTUDISC_DO ? IPV6_PMTUDISC_DO : rts.pmtudisc == IP_PMTUDISC_DONT ? IPV6_PMTUDISC_DONT : rts.pmtudisc == IP_PMTUDISC_WANT ? IPV6_PMTUDISC_WANT : rts.pmtudisc == IP_PMTUDISC_PROBE? IPV6_PMTUDISC_PROBE: rts.pmtudisc; } disable_capability_raw(); /*关闭打开socket的权限*/ /*如果是通过ping执行的程序,这里地址族还是AF_UNSPEC,如果只有一种地址族支持则使能它,否则ipv4和ipv6都会创建成功*/ /* Limit address family on single-protocol systems */ if (hints.ai_family == AF_UNSPEC) { if (sock4.fd == -1) hints.ai_family = AF_INET6; else if (sock6.fd == -1) hints.ai_family = AF_INET; } /*下面是解析target,然后根据地址族计算ICMP报文的最大长度*/ int max_s = MAX(ICMP_MAX_DATALEN, ICMPV6_MAX_DATALEN); /* Detect based on -4 / -6 */ if (hints.ai_family == AF_INET) max_s = ICMP_MAX_DATALEN - get_ipv4_optlen(&rts); else if (hints.ai_family == AF_INET6) max_s = ICMPV6_MAX_DATALEN; /* Force limit on IPv4/IPv6 adresses */ if (inet_pton(AF_INET, target, buf)) max_s = ICMP_MAX_DATALEN - get_ipv4_optlen(&rts); else if (inet_pton(AF_INET6, target, buf)) max_s = ICMPV6_MAX_DATALEN; /*支持设置tos或者tc*/ /* Set socket options */ if (rts.settos) set_socket_option(&sock4, IPPROTO_IP, IP_TOS, &rts.settos, sizeof(rts.settos)); if (rts.tclass) set_socket_option(&sock6, IPPROTO_IPV6, IPV6_TCLASS, &rts.tclass, sizeof(rts.tclass)); /*获取目标地址,这里是根据target做dns解析*/ /* getaddrinfo fails to indicate a scopeid when not used in dual-stack mode. * Work around by always using dual-stack name resolution. * * https://github.com/iputils/iputils/issues/252 */ int target_ai_family = hints.ai_family; hints.ai_family = AF_UNSPEC; /*ipv6 linklocal地址需要指定接口或者scope-id*/ if (!strchr(target, '%') && sock6.socktype == SOCK_DGRAM && inet_pton(AF_INET6, target, buf) > 0 && (IN6_IS_ADDR_LINKLOCAL(buf) || IN6_IS_ADDR_MC_LINKLOCAL(buf))) { error(0, 0, _( "Warning: IPv6 link-local address on ICMP datagram socket may require ifname or scope-id" " => use: address%%<ifname|scope-id>")); } /*使用getaddrinfo进行dns解析,注意这里如果target已经是有效的ip地址了,是如何处理的*/ ret_val = getaddrinfo(target, NULL, &hints, &result); if (ret_val) error(2, 0, "%s: %s", target, gai_strerro,r(ret_val)); /*遍历dns解析的结果,说明如果解析了多个地址,则分别执行ping么?*/ for (ai = result; ai; ai = ai->ai_next) { if (rts.opt_verbose) printf("ai->ai_family: %s, ai->ai_canonname: '%s'\n", str_family(ai->ai_family), ai->ai_canonname ? ai->ai_canonname : ""); /*如果指定的地址族和解析出来的地址族不一致,而且已经是最后一个了,则抛出错误*/ if (target_ai_family != AF_UNSPEC && target_ai_family != ai->ai_family) { if (!ai->ai_next) { /* An address was found, but not of the family we really want. * Throw the appropriate gai error. */ error(2, 0, "%s: %s", target, gai_strerror(EAI_ADDRFAMILY)); } continue; } /*根据地址族执行对应的ping程序*/ switch (ai->ai_family) { case AF_INET: ret_val = ping4_run(&rts, argc, argv, ai, &sock4); break; case AF_INET6: ret_val = ping6_run(&rts, argc, argv, ai, &sock6); break; default: error(2, 0, _("unknown protocol family: %d"), ai->ai_family); } /*返回值大于等于0表示成功,然后就退出,小于0表示失败,则执行下一个地址族*/ if (ret_val >= 0) break; /* ret_val < 0 means to go on to next addrinfo result, there * better be one. */ assert(ai->ai_next); } freeaddrinfo(result); /*返回值来自于ping4_run或者ping6_run*/ return ret_val; }
create_socket (ping.c)
static void create_socket(struct ping_rts *rts, socket_st *sock, int family, int socktype, int protocol, int requisite) { int do_fallback = 0; errno = 0; assert(sock->fd == -1); assert(socktype == SOCK_DGRAM || socktype == SOCK_RAW); /* Attempt to create a ping socket if requested. Attempt to create a raw * socket otherwise or as a fallback. Well known errno values follow. * * 1) EACCES * * Kernel returns EACCES for all ping socket creation attempts when the * user isn't allowed to use ping socket. A range of group ids is * configured using the `net.ipv4.ping_group_range` sysctl. Fallback * to raw socket is necessary. * * Kernel returns EACCES for all raw socket creation attempts when the * process doesn't have the `CAP_NET_RAW` capability. * * 2) EAFNOSUPPORT * * Kernel returns EAFNOSUPPORT for IPv6 ping or raw socket creation * attempts when run with IPv6 support disabled (e.g. via `ipv6.disable=1` * kernel command-line option. * * https://github.com/iputils/iputils/issues/32 * * OpenVZ 2.6.32-042stab113.11 and possibly other older kernels return * EAFNOSUPPORT for all IPv4 ping socket creation attempts due to lack * of support in the kernel. Fallback to raw socket is necessary. * * https://github.com/iputils/iputils/issues/54 * * 3) EPROTONOSUPPORT * * OpenVZ 2.6.32-042stab113.11 and possibly other older kernels return * EPROTONOSUPPORT for all IPv6 ping socket creation attempts due to lack * of support in the kernel [1]. Debian 9.5 based container with kernel 4.10 * returns EPROTONOSUPPORT also for IPv4 [2]. Fallback to raw socket is * necessary. * * [1] https://github.com/iputils/iputils/issues/54 * [2] https://github.com/iputils/iputils/issues/129 */ /*如果没有显示指定RAW,则优先使用DGRAM类型创建socket*/ if (socktype == SOCK_DGRAM) sock->fd = socket(family, socktype, protocol); /*创建失败,且是IPv4的地址族不支持,或者协议不支持,fallback到raw类型*/ /* Kernel doesn't support ping sockets. */ if (sock->fd == -1 && errno == EAFNOSUPPORT && family == AF_INET) do_fallback = 1;, if (sock->fd == -1 && errno == EPROTONOSUPPORT) do_fallback = 1; /*权限不允许,fallback到raw类型*/ /* User is not allowed to use ping sockets. */ if (sock->fd == -1 && errno == EACCES) do_fallback = 1; /*显试指定raw或者fallback到raw类型,创建raw socket*/ if (socktype == SOCK_RAW || do_fallback) { socktype = SOCK_RAW; sock->fd = socket(family, SOCK_RAW, protocol); } sock->socktype = socktype; /* valid socket */ if (sock->fd != -1) return; /*创建失败的一些处理,是权限问题还是其他原因,打印errno*/ /* failed to create socket */ if (requisite || rts->opt_verbose) { error(0, 0, "socktype: %s", str_socktype(socktype)); error(0, errno, "socket"); } if (requisite) { if (socktype == SOCK_RAW && geteuid() != 0) error(0, 0, _("=> missing cap_net_raw+p capability or setuid?")); exit(2); } }
bind_to_device (ping.c)
static void bind_to_device(struct ping_rts *rts, int fd, in_addr_t addr) { int rc; int errno_save; enable_capability_raw(); /*绑定指定接口*/ rc = setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE, rts->device, strlen(rts->device) + 1); errno_save = errno; disable_capability_raw(); if (rc != -1) return; /*组播地址支持绑定到指定接口设备*/ if (IN_MULTICAST(ntohl(addr))) { struct ip_mreqn imr; memset(&imr, 0, sizeof(imr)); imr.imr_ifindex = iface_name2index(rts, fd); if (setsockopt(fd, SOL_IP, IP_MULTICAST_IF, &imr, sizeof(imr)) == -1) error(2, errno, "IP_MULTICAST_IF"); } else { error(2, errno_save, "SO_BINDTODEVICE %s", rts->device); } }
ping4_run (ping.c)
/* return >= 0: exit with this code, < 0: go on to next addrinfo result */ int ping4_run(struct ping_rts *rts, int argc, char **argv, struct addrinfo *ai, socket_st *sock) { static const struct addrinfo hints = { .ai_family = AF_INET, .ai_protocol = IPPROTO_UDP, .ai_flags = getaddrinfo_flags }; ... if (argc > 1) { /*record route选项不能设置hop信息*/ if (rts->opt_rroute) usage(); else if (rts->opt_timestamp) { /*时间戳选项只支持prespec类型,且跳数不能大于等于5个*/ if (rts->ts_type != IPOPT_TS_PRESPEC) usage(); if (argc > 5) usage(); } else { /*源路由支持的跳数不能大于等于10个*/ if (argc > 10) usage(); rts->opt_sourceroute = 1; } } /*解析域名或IP地址信息,每次迭代都更新hostname,也就是最终的目的地*/ while (argc > 0) { target = *argv; memset((char *)&rts->whereto, 0, sizeof(rts->whereto)); rts->whereto.sin_family = AF_INET; if (inet_aton(target, &rts->whereto.sin_addr) == 1) { /*优先使用IP地址解析,成功则设置hostname,只有1个地址的时候,设置numric选项*/ rts->hostname = target; if (argc == 1) rts->opt_numeric = 1; } else { struct addrinfo *result = ai; int ret_val; /*当包含hop和target的时候,使用域名解析hop信息*/ if (argc > 1) { ret_val = getaddrinfo(target, NULL, &hints, &result); if (ret_val) error(2, 0, "%s: %s", target, gai_strerror(ret_val)); } memcpy(&rts->whereto, result->ai_addr, sizeof rts->whereto); memset(hnamebuf, 0, sizeof hnamebuf); /*如果返回结果支持ai_canonname则使用ai_canonname,否则使用target来显示ping的结果*/ /* * On certain network setup getaddrinfo() can return empty * ai_canonname. Instead of printing nothing in "PING" * line use the target. */ if (result->ai_canonname) strncpy(hnamebuf, result->ai_canonname, sizeof hnamebuf - 1); else strncpy(hnamebuf, target, sizeof hnamebuf - 1); rts->hostname = hnamebuf; /*argc大于1,意味着存在hop信息,释放上面getaddrinfo返回的result*/ if (argc > 1) freeaddrinfo(result); } /*除了最后一个,记录IP地址到rts->route数组中,并递增rts->nroute个数信息*/ if (argc > 1) rts->route[rts->nroute++] = rts->whereto.sin_addr.s_addr; /*继续处理下一个地址信息*/ argc--; argv++; } /*源地址处理*/ if (rts->source.sin_addr.s_addr == 0) { /*进入此处说明没有设置-I选项设置IP地址信息,可能设置了接口*/ socklen_t alen; /*创建1个探测probe_fd,用来进行源地址的选择,udp类型*/ int probe_fd = socket(AF_INET, SOCK_DGRAM, 0); /*通过上面解析出来的目的地*/ dst = rts->whereto; if (probe_fd < 0) error(2, errno, "socket"); /*如果指定了接口,则将probe_fd和sock->fd都绑定到这个接口上*/ if (rts->device) { bind_to_device(rts, probe_fd, dst.sin_addr.s_addr); bind_to_device(rts, sock->fd, dst.sin_addr.s_addr); } if (rts->settos && setsockopt(probe_fd, IPPROTO_IP, IP_TOS, (char *)&rts->settos, sizeof(int)) < 0) error(0, errno, _("warning: QOS sockopts")); sock_setmark(rts, probe_fd); /*使用目的端口号1025进行探测,非保留端口号的第1个*/ dst.sin_port = htons(1025); /*如果配置了源路由,则使用第一个地址*/ if (rts->nroute) dst.sin_addr.s_addr = rts->route[0]; /*通过connect连接probe_fd来判断目的地址是否可达,对于udp套接字来说connect在第一次建立路由信息*/ if (connect(probe_fd, (struct sockaddr *)&dst, sizeof(dst)) == -1) { /*返回-1表示失败,处理权限,地址不可达等各种错误*/ if (errno == EACCES) { if (rts->broadcast_pings == 0) error(2, 0, _("Do you want to ping broadcast? Then -b. If not, check your local firewall rules")); fprintf(stderr, _("WARNING: pinging broadcast address\n")); if (setsockopt(probe_fd, SOL_SOCKET, SO_BROADCAST, &rts->broadcast_pings, sizeof(rts->broadcast_pings)) < 0) error(2, errno, _("cannot set broadcasting")); if (connect(probe_fd, (struct sockaddr *)&dst, sizeof(dst)) == -1) error(2, errno, "connect"); } else if ((errno == EHOSTUNREACH || errno == ENETUNREACH) && ai->ai_next) { close(probe_fd); return -1;, } else { error(2, errno, "connect"); } } /*走到这里意味着地址可达,获取源地址信息*/ alen = sizeof(rts->source); if (getsockname(probe_fd, (struct sockaddr *)&rts->source, &alen) == -1) error(2, errno, "getsockname"); rts->source.sin_port = 0; /*如果指定了接口,判断接口和源地址是否匹配,如果二者不匹配则提示错误*/ if (rts->device) { struct ifaddrs *ifa0, *ifa; int ret; ret = getifaddrs(&ifa0); if (ret) error(2, errno, _("gatifaddrs failed")); for (ifa = ifa0; ifa; ifa = ifa->ifa_next) { if (!ifa->ifa_name || !ifa->ifa_addr || ifa->ifa_addr->sa_family != AF_INET) continue; if (!strcmp(ifa->ifa_name, rts->device) && !memcmp(&((struct sockaddr_in *)ifa->ifa_addr)->sin_addr, &rts->source.sin_addr, sizeof(rts->source.sin_addr))) break; } freeifaddrs(ifa0); if (!ifa) error(0, 0, _("Warning: source address might be selected on device other than: %s"), rts->device); } close(probe_fd); } else if (rts->device) { /*直接进行绑定*/ bind_to_device(rts, sock->fd, rts->whereto.sin_addr.s_addr); } /*如果目的地址为0,则将源地址赋值给目的地址,什么会走到这里,按理说whereto应该都非0的了*/ if (rts->whereto.sin_addr.s_addr == 0) rts->whereto.sin_addr.s_addr = rts->source.sin_addr.s_addr; ... /*如果支持recore route则设置相关option,如果支持timestamp或者source route则拷贝上面解析的rts->route到ip头的option字段中*/ /* record route option */ if (rts->opt_rroute) { memset(rspace, 0, sizeof(rspace)); rspace[0] = IPOPT_NOP; rspace[1 + IPOPT_OPTVAL] = IPOPT_RR; rspace[1 + IPOPT_OLEN] = sizeof(rspace) - 1; rspace[1 + IPOPT_OFFSET] = IPOPT_MINOFF; if (setsockopt(sock->fd, IPPROTO_IP, IP_OPTIONS, rspace, sizeof rspace) < 0) error(2, errno, "record route"); } if (rts->opt_timestamp) { memset(rspace, 0, sizeof(rspace)); rspace[0] = IPOPT_TIMESTAMP; rspace[1] = (rts->ts_type == IPOPT_TS_TSONLY ? 40 : 36); rspace[2] = 5; rspace[3] = rts->ts_type; if (rts->ts_type == IPOPT_TS_PRESPEC) { int i; rspace[1] = 4 + rts->nroute * 8; for (i = 0; i < rts->nroute; i++) { tmp_rspace = (uint32_t *)&rspace[4 + i * 8]; *tmp_rspace = rts->route[i]; } } if (setsockopt(sock->fd, IPPROTO_IP, IP_OPTIONS, rspace, rspace[1]) < 0) { rspace[3] = 2; if (setsockopt(sock->fd, IPPROTO_IP, IP_OPTIONS, rspace, rspace[1]) < 0) error(2, errno, "ts option"); } } if (rts->opt_sourceroute) { int i; memset(rspace, 0, sizeof(rspace)); rspace[0] = IPOPT_NOOP; rspace[1 + IPOPT_OPTVAL] = rts->opt_so_dontroute ? IPOPT_SSRR : IPOPT_LSRR; rspace[1 + IPOPT_OLEN] = 3 + rts->nroute * 4; rspace[1 + IPOPT_OFFSET] = IPOPT_MINOFF; for (i = 0; i < rts->nroute; i++) { tmp_rspace = (uint32_t *)&rspace[4 + i * 4]; *tmp_rspace = rts->route[i]; } if (setsockopt(sock->fd, IPPROTO_IP, IP_OPTIONS, rspace, 4 + rts->nroute * 4) < 0) error(2, errno, "record route"); ... /*如果指定报文长度大于时间戳长度,则支持通过报文净荷传输时间戳信息*/ if (rts->datalen >= (int)sizeof(struct timeval)) /* can we time transfer */ rts->timing = 1; /*报文长度还需要计算IP头和ICMP头*/ packlen = rts->datalen + MAXIPLEN + MAXICMPLEN; /*动态申请发包内存*/ if (!(packet = (unsigned char *)malloc((unsigned int)packlen))) error(2, errno, _("memory allocation failed")); /*打印ping目的地名称和ipv4地址,目的地名称和你命令行指定的域名可能是不一样的,因为这可能是dns解析出的ai_canonname。 比如ping www.baidu.com,打印的结果是PING www.a.shifen.com (39.156.70.239) 56(84) bytes of data.*/ printf(_("PING %s (%s) "), rts->hostname, inet_ntoa(rts->whereto.sin_addr)); if (rts->device || rts->opt_strictsource) printf(_("from %s %s: "), inet_ntoa(rts->source.sin_addr), rts->device ? rts->device : ""); printf(_("%d(%d) bytes of data.\n"), rts->datalen, rts->datalen + 8 + rts->optlen + 20); /*其他一些和协议无关的配置和参数检查*/ setup(rts, sock); /*如果通过-C选项显式指定了connect调用但是失败了,则返回错误。 之所以可以指定connect调用,是因为ping默认支持dgram方式的套接字*/ if (rts->opt_connect_sk && connect(sock->fd, (struct sockaddr *)&dst, sizeof(dst)) == -1) error(2, errno, "connect failed"); /*释放权限*/ drop_capabilities(); /*进入主循环发包和收包*/ ret = main_loop(rts, &ping4_func_set, sock, packet, packlen); /*释放发包内存*/ free(packet); /*返回main_loop返回的结果*/ return ret; }
ping6_run (ping6_common.c)
TODO
setup (ping_common.c)
/* Protocol independent setup and parameter checks. */ void setup(struct ping_rts *rts, socket_st *sock) { int hold; struct timeval tv; sigset_t sset; /*flood ping如果没有设置发包间隔,则设置为0*/ if (rts->opt_flood && !rts->opt_interval) rts->interval = 0; /*flood ping的最小间隔是MIN_USER_INTERVAL_MS(2ms)*/ if (rts->uid && rts->interval < MIN_USER_INTERVAL_MS) error(2, 0, _("cannot flood, minimal interval for user must be >= %d ms, use -i %s (or higher)"), MIN_USER_INTERVAL_MS, str_interval(MIN_USER_INTERVAL_MS)); if (rts->interval >= INT_MAX / rts->preload) error(2, 0, _("illegal preload and/or interval: %d"), rts->interval); hold = 1; /*打开调试*/ if (rts->opt_so_debug) setsockopt(sock->fd, SOL_SOCKET, SO_DEBUG, (char *)&hold, sizeof(hold)); if (rts->opt_so_dontroute) setsockopt(sock->fd, SOL_SOCKET, SO_DONTROUTE, (char *)&hold, sizeof(hold)); #ifdef SO_TIMESTAMP if (!rts->opt_latency) { int on = 1; if (setsockopt(sock->fd, SOL_SOCKET, SO_TIMESTAMP, &on, sizeof(on))) error(0, 0, _("Warning: no SO_TIMESTAMP support, falling back to SIOCGSTAMP")); } #endif sock_setmark(rts, sock->fd); /*设置发送和接收超时时间,发送超时最长为1秒,接收超时间隔时间*/ /* Set some SNDTIMEO to prevent blocking forever * on sends, when device is too slow or stalls. Just put limit * of one second, or "interval", if it is less. */ tv.tv_sec = 1; tv.tv_usec = 0; if (rts->interval < 1000) { tv.tv_sec = 0; tv.tv_usec = 1000 * SCHINT(rts->interval); } setsockopt(sock->fd, SOL_SOCKET, SO_SNDTIMEO, (char *)&tv, sizeof(tv)); /* Set RCVTIMEO to "interval". Note, it is just an optimization * allowing to avoid redundant poll(). */ tv.tv_sec = SCHINT(rts->interval) / 1000; tv.tv_usec = 1000 * (SCHINT(rts->interval) % 1000); if (setsockopt(sock->fd, SOL_SOCKET, SO_RCVTIMEO, (char *)&tv, sizeof(tv))) rts->opt_flood_poll = 1; if (!rts->opt_pingfilled) { int i; unsigned char *p = rts->outpack + 8; /* Do not forget about case of small datalen, fill timestamp area too! */ for (i = 0; i < rts->datalen; ++i) *p++ = i; } /*如果是raw类型且没有通过—e设置ident,则使用pid作为ident,也就是icmp报文中的ident字段*/ if (sock->socktype == SOCK_RAW && rts->ident == -1) rts->ident = htons(getpid() & 0xFFFF); /*设置信号处理函数*/ set_signal(SIGINT, sigexit); set_signal(SIGALRM, sigexit); set_signal(SIGQUIT, sigstatus); i sigemptyset(&sset); sigprocmask(SIG_SETMASK, &sset, NULL); /*获取当前系统时间*/ clock_gettime(CLOCK_MONOTONIC_RAW, &rts->start_time); /*如果通过-w设置了时长,则创建定时器*/ if (rts->deadline) { struct itimerval it; it.it_interval.tv_sec = 0; it.it_interval.tv_usec = 0; it.it_value.tv_sec = rts->deadline; it.it_value.tv_usec = 0; setitimer(ITIMER_REAL, &it, NULL); } /*控制台设置*/ if (isatty(STDOUT_FILENO)) { struct winsize w; if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &w) != -1) { if (w.ws_col > 0) rts->screen_width = w.ws_col; } } }
main_loop (ping_common.c)
int main_loop(struct ping_rts *rts, ping_func_set_st *fset, socket_st *sock, uint8_t *packet, int packlen) { char addrbuf[128]; char ans_data[4096]; struct iovec iov; struct msghdr msg; int cc; int next; int polling; int recv_error; iov.iov_base = (char *)packet; for (;;) { /* Check exit conditions. */ /*通过ctrl+c退出,则exiting被置位*/ if (rts->exiting) break; /*通过-c设置了最大接收报文个数,而且接收个数+错误个数>=设置的最大接收报文个数*/ if (rts->npackets && rts->nreceived + rts->nerrors >= rts->npackets) break; /*设置了最大时间,存在错误报文,这种情况也退出*/ if (rts->deadline && rts->nerrors) break; /* Check for and do special actions. */ /*通过ctrl+\触发ping过程中的状态显示,可以在不停止ping的情况下查看截止到当前的收发报文统计*/ if (rts->status_snapshot) status(rts); /* Send probes scheduled to this time. */ /*发送探测报文然后进行调度,如果next小于等于0,则继续发送探测报文*/ do { next = pinger(rts, fset, sock); next = schedule_exit(rts, next); } while (next <= 0); /* "next" is time to send next probe, if positive. * If next<=0 send now or as soon as possible. */ /* Technical part. Looks wicked. Could be dropped, * if everyone used the newest kernel. :-) * Its purpose is: * 1. Provide intervals less than resolution of scheduler. * Solution: spinning. * 2. Avoid use of poll(), when recvmsg() can provide * timed waiting (SO_RCVTIMEO). */ /*上面的注释解释了下面代码的主要目的,在所有人使用的都不是最新内核版本的情况下,提供更加快速的调度策略。 1. 实现比调度器粒度还小的间隔设置,方法是自旋;2. 当recvmsg可以通过接收超时退出的时候,避免使用poll调用。*/ polling = 0; /*默认需要poll调用等待超时*/ recv_error = 0; /*如果开启了自适应ping(-A),或者fd的接收超时设置失败(opt_flood_poll),或者下次发送时间小于设置的发送间隔*/ if (rts->opt_adaptive || rts->opt_flood_poll || next < SCHINT(rts->interval)) { /*recv_expected是发送但是没有确认的报文个数*/ int recv_expected = in_flight(rts); /* If we are here, recvmsg() is unable to wait for * required timeout. */ if (1000 % HZ == 0 ? next <= 1000 / HZ : (next < INT_MAX / HZ && next * HZ <= 1000)) { /* Very short timeout... So, if we wait for * something, we sleep for MIN_INTERVAL_MS. * Otherwise, spin! */ if (recv_expected) { /*如果有报文待确认,则下次调度时间设置为MIN_INTERVAL_MS(10ms)*/ next = MIN_INTERVAL_MS; } else { /*报文都已经确认了,直接自旋等待下次调度发包,通过sched_yield触发调用, polling设置为MSG_DONTWAIT,表示不需要poll,直接调用recvmsg*/ next = 0; /* When spinning, no reasons to poll. * Use nonblocking recvmsg() instead. */ polling = MSG_DONTWAIT; /* But yield yet. */ sched_yield(); } } if (!polling && (rts->opt_adaptive || rts->opt_flood_poll || rts->interval)) { /*没有进行快速调度,则需要通过poll设置超时时间*/ struct pollfd pset; pset.fd = sock->fd; pset.events = POLLIN; pset.revents = 0; /*poll返回-1表示失败,返回0表示超时时间到,返回1表示有fd被置位。 如果是前2者则继续下次调度发包,如果是后者则需要判断是否设置了POLLIN或者POLLERR,没有这两个标志则继续,有的话往下走*/ if (poll(&pset, 1, next) < 1 || !(pset.revents & (POLLIN | POLLERR))) continue; /*有POLLIN或者POLLERR被置位,将polling设置为MSG_DONTWAIT不需要再次poll,同时将revents中的POLLERR记录到recv_error中*/ polling = MSG_DONTWAIT; recv_error = pset.revents & POLLERR; } } for (;;) { struct timeval *recv_timep = NULL; struct timeval recv_time; int not_ours = 0; /* Raw socket can receive messages * destined to other running pings. */ iov.iov_len = packlen; memset(&msg, 0, sizeof(msg)); msg.msg_name = addrbuf; msg.msg_namelen = sizeof(addrbuf); msg.msg_iov = &iov; msg.msg_iovlen = 1; msg.msg_control = ans_data; msg.msg_controllen = sizeof(ans_data); cc = recvmsg(sock->fd, &msg, polling); polling = MSG_DONTWAIT; /*recvmsg返回值小于0表示出错,根据errno和recv_error来处理错误*/ if (cc < 0) { /* If there was a POLLERR and there is no packet * on the socket, try to read the error queue. * Otherwise, give up. */ /*如果返回EAGAIN且poll没有设置POLLERR,或者返回EINTR则跳出重新发包*/ if ((errno == EAGAIN && !recv_error) || errno == EINTR) break; /*如果是其他错误,调用地址族的receive_error_msg进行错误处理,具体来说是读error queue的信息*/ recv_error = 0; if (!fset->receive_error_msg(rts, sock)) { if (errno) { error(0, errno, "recvmsg"); break; } not_ours = 1; } } else { #ifdef SO_TIMESTAMP struct cmsghdr *c; /*获取时间戳信息*/ for (c = CMSG_FIRSTHDR(&msg); c; c = CMSG_NXTHDR(&msg, c)) { if (c->cmsg_level != SOL_SOCKET || c->cmsg_type != SO_TIMESTAMP) continue; if (c->cmsg_len < CMSG_LEN(sizeof(struct timeval))) continue; recv_timep = (struct timeval *)CMSG_DATA(c); } #endif if (rts->opt_latency || recv_timep == NULL) { /*更新接收时间,如果没有时间戳则取当前系统时间*/ if (rts->opt_latency || ioctl(sock->fd, SIOCGSTAMP, &recv_time)) gettimeofday(&recv_time, NULL); recv_timep = &recv_time; } /*调用地址族的parse_reply处理接收的报文,返回是否是我们自己需要处理的报文*/ not_ours = fset->parse_reply(rts, sock, &msg, cc, addrbuf, recv_timep); } /* See? ... someone runs another ping on this host. */ /*收到了不是发给我们进程的报文,在raw类型会出现这种,则安装一下filter过滤一下*/ if (not_ours && sock->socktype == SOCK_RAW) fset->install_filter(rts, sock); /* If nothing is in flight, "break" returns us to pinger. */ /*in_flight表示待确认报文个数,为0表示没有需要确认的报文,直接返回pinger触发下次发送*/ if (in_flight(rts) == 0) break; /* Otherwise, try to recvmsg() again. recvmsg() * is nonblocking after the first iteration, so that * if nothing is queued, it will receive EAGAIN * and return to pinger. */ /*即使有没有确认的报文,这里也没有什么额外的操作*/ } } /*退出发包和收包处理流程,返回finish的处理结果*/ return finish(rts); }
pinger (ping_common.c)
/* * pinger -- * Compose and transmit an ICMP ECHO REQUEST packet. The IP packet * will be added on by the kernel. The ID field is our UNIX process ID, * and the sequence number is an ascending integer. The first several bytes * of the data portion are used to hold a UNIX "timeval" struct in VAX * byte-order, to compute the round-trip time. */ /*组包和发送ICMP ECHO REQUEST报文*/ int pinger(struct ping_rts *rts, ping_func_set_st *fset, socket_st *sock) { static int oom_count; static int tokens; int i; /* Have we already sent enough? If we have, return an arbitrary positive value. */ /*发包之前判断是否需要退出,如果满足退出条件,返回1000表示触发调度*/ if (rts->exiting || (rts->npackets && rts->ntransmitted >= rts->npackets && !rts->deadline)) return 1000; /*preload表示在等待回复报文之前预发送的报文个数,这些报文不计算收包和rtt时间,默认没有:preload=1*/ /* Check that packets < rate*time + preload */ if (rts->cur_time.tv_sec == 0 && rts->cur_time.tv_nsec == 0) { /*第1此进入这个流程,获取时间戳,通过preload和interval计算tockens*/ clock_gettime(CLOCK_MONOTONIC_RAW, &rts->cur_time); tokens = rts->interval * (rts->preload - 1); } else { long ntokens, tmp; struct timespec tv; /*非首次进入,通过时间差计算ntokens,1ms=1token.*/ clock_gettime(CLOCK_MONOTONIC_RAW, &tv); ntokens = (tv.tv_sec - rts->cur_time.tv_sec) * 1000 + (tv.tv_nsec - rts->cur_time.tv_nsec) / 1000000; if (!rts->interval) { /* Case of unlimited flood is special; * if we see no reply, they are limited to 100pps */ /*未设置interval,flood ping,限速100pps,这里MIN_INTERVAL_MS */ if (ntokens < MIN_INTERVAL_MS && in_flight(rts) >= rts->preload) return MIN_INTERVAL_MS - ntokens; } ntokens += tokens; /*累计token到tokens上*/ /*下面的token计算没有看懂*/ tmp = (long)rts->interval * (long)rts->preload; if (tmp < ntokens) ntokens = tmp; if (ntokens < rts->interval) return rts->interval - ntokens; rts->cur_time = tv; tokens = ntokens - rts->interval; } /*打印通过-O设置的outstanding信息*/ if (rts->opt_outstanding) { if (rts->ntransmitted > 0 && !rcvd_test(rts, rts->ntransmitted)) { print_timestamp(rts); printf(_("no answer yet for icmp_seq=%lu\n"), (rts->ntransmitted % MAX_DUP_CHK)); fflush(stdout); } } /*调用地址族的send_probe进行发包*/ resend: i = fset->send_probe(rts, sock, rts->outpack, sizeof(rts->outpack)); /*返回0表示发包正常*/ if (i == 0) { oom_count = 0; advance_ntransmitted(rts); /*增加发包个数*/ if (!rts->opt_quiet && rts->opt_flood) { /* Very silly, but without this output with * high preload or pipe size is very confusing. */ /*flood ping打印.表示进度*/ if ((rts->preload < rts->screen_width && rts->pipesize < rts->screen_width) || in_flight(rts) < rts->screen_width) write_stdout(".", 1); } /*消耗令牌后返回*/ return rts->interval - tokens; } /* And handle various errors... */ /*各种错误处理*/ if (i > 0) { /* Apparently, it is some fatal bug. */ abort(); } else if (errno == ENOBUFS || errno == ENOMEM) { int nores_interval; /* Device queue overflow or OOM. Packet is not sent. */ tokens = 0; /* Slowdown. This works only in adaptive mode (option -A) */ rts->rtt_addend += (rts->rtt < 8 * 50000 ? rts->rtt / 8 : 50000); if (rts->opt_adaptive) update_interval(rts); nores_interval = SCHINT(rts->interval / 2); if (nores_interval > 500) nores_interval = 500; oom_count++; if (oom_count * nores_interval < rts->lingertime) return nores_interval; i = 0; /* Fall to hard error. It is to avoid complete deadlock * on stuck output device even when dealine was not requested. * Expected timings are screwed up in any case, but we will * exit some day. :-) */ } else if (errno == EAGAIN) { /* Socket buffer is full. */ tokens += rts->interval; return MIN_INTERVAL_MS; } else if (errno == EMSGSIZE) { /* For example, sendto with len > 65527 on SOCK_DGRAM fails with this errno. */ rts->nerrors++; i = 0; } else { /*调用地址族的错误处理*/ if ((i = fset->receive_error_msg(rts, sock)) > 0) { /* An ICMP error arrived. In this case, we've received * an error from sendto(), but we've also received an * ICMP message, which means the packet did in fact * send in some capacity. So, in this odd case, report * the more specific errno as the error, and treat this * as a hard local error. */ i = 0; goto hard_local_error; } /* Compatibility with old linuces. */ if (i == 0 && rts->confirm_flag && errno == EINVAL) { rts->confirm_flag = 0; errno = 0; } if (!errno) goto resend; } hard_local_error: /* Hard local error. Pretend we sent packet. */ advance_ntransmitted(rts); if (i == 0 && !rts->opt_quiet) { if (rts->opt_flood) write_stdout("E", 1); else error(0, errno, "sendmsg"); } tokens = 0; return SCHINT(rts->interval); }
ping4_send_probe (ping.c)
/* * pinger -- * Compose and transmit an ICMP ECHO REQUEST packet. The IP packet * will be added on by the kernel. The ID field is our UNIX process ID, * and the sequence number is an ascending integer. The first several bytes * of the data portion are used to hold a UNIX "timeval" struct in VAX * byte-order, to compute the round-trip time. */ int ping4_send_probe(struct ping_rts *rts, socket_st *sock, void *packet, unsigned packet_size __attribute__((__unused__))) { struct icmphdr *icp; int cc; int i; /*icmp报文组包,设置type,code,更新sequence和id信息*/ icp = (struct icmphdr *)packet; icp->type = ICMP_ECHO; icp->code = 0; icp->checksum = 0; icp->un.echo.sequence = htons(rts->ntransmitted + 1); icp->un.echo.id = rts->ident; /* ID */ rcvd_clear(rts, rts->ntransmitted + 1); /*时间戳信息置位*/ if (rts->timing) { if (rts->opt_latency) { struct timeval tmp_tv; gettimeofday(&tmp_tv, NULL); memcpy(icp + 1, &tmp_tv, sizeof(tmp_tv)); } else { memset(icp + 1, 0, sizeof(struct timeval)); } } cc = rts->datalen + 8; /* skips ICMP portion */ /* compute ICMP checksum here */ /*计算校验和*/ icp->checksum = in_cksum((unsigned short *)icp, cc, 0); if (rts->timing && !rts->opt_latency) { /*更新报文中的时间戳*/ struct timeval tmp_tv; gettimeofday(&tmp_tv, NULL); memcpy(icp + 1, &tmp_tv, sizeof(tmp_tv)); icp->checksum = in_cksum((unsigned short *)&tmp_tv, sizeof(tmp_tv), ~icp->checksum); } /*发送报文*/ i = sendto(sock->fd, icp, cc, 0, (struct sockaddr *)&rts->whereto, sizeof(rts->whereto)); /*根据成功发送报文的个数设置返回值,成功则返回0,否则返回sendto的结果*/ return (cc == i ? 0 : i); }
ping6_send_probe (ping6_common.c)
TODO
__schedule_exit (ping_common.c)
int __schedule_exit(int next) { static unsigned long waittime; struct itimerval it; if (waittime) return next; if (global_rts->nreceived) { waittime = 2 * global_rts->tmax; if (waittime < 1000 * (unsigned long)global_rts->interval) waittime = 1000 * global_rts->interval; } else waittime = global_rts->lingertime * 1000; if (next < 0 || (unsigned long)next < waittime / 1000) next = waittime / 1000; it.it_interval.tv_sec = 0; it.it_interval.tv_usec = 0; it.it_value.tv_sec = waittime / 1000000; it.it_value.tv_usec = waittime % 1000000; setitimer(ITIMER_REAL, &it, NULL); return next; } /*schedule_exit实现在ping.h中*/ static inline int schedule_exit(struct ping_rts *rts, int next) { /*设置了发包个数,且已发送个数大于设置的个数,且没有到最大发包时间,通过__schedule_exit调度*/ if (rts->npackets && rts->ntransmitted >= rts->npackets && !rts->deadline) next = __schedule_exit(next); return next; }
ping4_receive_error_msg (ping.c)
int ping4_receive_error_msg(struct ping_rts *rts, socket_st *sock) { ssize_t res; char cbuf[512]; struct iovec iov; struct msghdr msg; struct cmsghdr *cmsgh; struct sock_extended_err *e; struct icmphdr icmph; struct sockaddr_in target; int net_errors = 0; int local_errors = 0; int saved_errno = errno; iov.iov_base = &icmph; iov.iov_len = sizeof(icmph); msg.msg_name = (void *)⌖ msg.msg_namelen = sizeof(target); msg.msg_iov = &iov; msg.msg_iovlen = 1; msg.msg_flags = 0; msg.msg_control = cbuf; msg.msg_controllen = sizeof(cbuf); /*读取MSG_ERRQUEUE中的信息,进行错误处理*/ res = recvmsg(sock->fd, &msg, MSG_ERRQUEUE | MSG_DONTWAIT); if (res < 0) { if (errno == EAGAIN || errno == EINTR) local_errors++; goto out; } e = NULL; for (cmsgh = CMSG_FIRSTHDR(&msg); cmsgh; cmsgh = CMSG_NXTHDR(&msg, cmsgh)) { if (cmsgh->cmsg_level == SOL_IP) { if (cmsgh->cmsg_type == IP_RECVERR) e = (struct sock_extended_err *)CMSG_DATA(cmsgh); } } if (e == NULL) abort(); /*错误类型参考:https://www.man7.org/linux/man-pages/man7/ip.7.html*/ if (e->ee_origin == SO_EE_ORIGIN_LOCAL) { local_errors++; if (rts->opt_quiet) goto out; if (rts->opt_flood) write_stdout("E", 1); else if (e->ee_errno != EMSGSIZE) error(0, e->ee_errno, _("local error")); else error(0, 0, _("local error: message too long, mtu=%u"), e->ee_info); rts->nerrors++; } else if (e->ee_origin == SO_EE_ORIGIN_ICMP) { struct sockaddr_in *sin = (struct sockaddr_in *)(e + 1); if (res < (ssize_t) sizeof(icmph) || target.sin_addr.s_addr != rts->whereto.sin_addr.s_addr || icmph.type != ICMP_ECHO || !is_ours(rts, sock, icmph.un.echo.id)) { /* Not our error, not an error at all. Clear. */ saved_errno = 0; goto out; } acknowledge(rts, ntohs(icmph.un.echo.sequence)); if (sock->socktype == SOCK_RAW) { struct icmp_filter filt; filt.data = ~((1 << ICMP_SOURCE_QUENCH) | (1 << ICMP_REDIRECT) | (1 << ICMP_ECHOREPLY)); if (setsockopt(sock->fd, SOL_RAW, ICMP_FILTER, (const void *)&filt, sizeof(filt)) == -1) error(2, errno, "setsockopt(ICMP_FILTER)"); } net_errors++; rts->nerrors++; if (rts->opt_quiet) goto out; if (rts->opt_flood) { write_stdout("\bE", 2); } else { print_timestamp(rts); printf(_("From %s icmp_seq=%u "), pr_addr(rts, sin, sizeof *sin), ntohs(icmph.un.echo.sequence)); pr_icmph(rts, e->ee_type, e->ee_code, e->ee_info, NULL); fflush(stdout); } } out: errno = saved_errno; return net_errors ? net_errors : -local_errors; }
ping6_receive_error_msg (ping6_common.c)
TODO
ping4_parse_reply (ping.c)
int ping4_parse_reply(struct ping_rts *rts, struct socket_st *sock, struct msghdr *msg, int cc, void *addr, struct timeval *tv) { struct sockaddr_in *from = addr; uint8_t *buf = msg->msg_iov->iov_base; struct icmphdr *icp; struct iphdr *ip; int hlen; int csfailed; struct cmsghdr *cmsgh; int reply_ttl; uint8_t *opts, *tmp_ttl; int olen; int wrong_source = 0; /*分别校验ip头和icmp头*/ /* Check the IP header */ ip = (struct iphdr *)buf; if (sock->socktype == SOCK_RAW) { hlen = ip->ihl * 4; if (cc < hlen + 8 || ip->ihl < 5) { if (rts->opt_verbose) error(0, 0, _("packet too short (%d bytes) from %s"), cc, pr_addr(rts,from, sizeof *from)); return 1; } reply_ttl = ip->ttl; opts = buf + sizeof(struct iphdr); olen = hlen - sizeof(struct iphdr); } else { hlen = 0; reply_ttl = 0; opts = buf; olen = 0; for (cmsgh = CMSG_FIRSTHDR(msg); cmsgh; cmsgh = CMSG_NXTHDR(msg, cmsgh)) { if (cmsgh->cmsg_level != SOL_IP) continue; if (cmsgh->cmsg_type == IP_TTL) { if (cmsgh->cmsg_len < sizeof(int)) continue; tmp_ttl = (uint8_t *)CMSG_DATA(cmsgh); reply_ttl = (int)*tmp_ttl; } else if (cmsgh->cmsg_type == IP_RETOPTS) { opts = (uint8_t *)CMSG_DATA(cmsgh); olen = cmsgh->cmsg_len; } } } /* Now the ICMP part */ cc -= hlen; icp = (struct icmphdr *)(buf + hlen); csfailed = in_cksum((unsigned short *)icp, cc, 0); if (icp->type == ICMP_ECHOREPLY) { if (!is_ours(rts, sock, icp->un.echo.id)) return 1; /* 'Twas not our ECHO */ if (!rts->broadcast_pings && !rts->multicast && from->sin_addr.s_addr != rts->whereto.sin_addr.s_addr) wrong_source = 1; if (gather_statistics(rts, (uint8_t *)icp, sizeof(*icp), cc, ntohs(icp->un.echo.sequence), reply_ttl, csfailed, tv, pr_addr(rts, from, sizeof *from), pr_echo_reply, rts->multicast, wrong_source)) { fflush(stdout); return 0; } } else { /* We fall here when a redirect or source quench arrived. */ switch (icp->type) { case ICMP_ECHO: /* MUST NOT */ return 1; case ICMP_SOURCE_QUENCH: case ICMP_REDIRECT: case ICMP_DEST_UNREACH: case ICMP_TIME_EXCEEDED: case ICMP_PARAMETERPROB: { struct iphdr *iph = (struct iphdr *)(&icp[1]); struct icmphdr *icp1 = (struct icmphdr *) ((unsigned char *)iph + iph->ihl * 4); int error_pkt; if (cc < (int)(8 + sizeof(struct iphdr) + 8) || cc < 8 + iph->ihl * 4 + 8) return 1; if (icp1->type != ICMP_ECHO || iph->daddr != rts->whereto.sin_addr.s_addr || !is_ours(rts, sock, icp1->un.echo.id)) return 1; error_pkt = (icp->type != ICMP_REDIRECT && icp->type != ICMP_SOURCE_QUENCH); if (error_pkt) { acknowledge(rts, ntohs(icp1->un.echo.sequence)); return 0; } if (rts->opt_quiet || rts->opt_flood) return 1; print_timestamp(rts); printf(_("From %s: icmp_seq=%u "), pr_addr(rts, from, sizeof *from), ntohs(icp1->un.echo.sequence)); if (csfailed) printf(_("(BAD CHECKSUM)")); pr_icmph(rts, icp->type, icp->code, ntohl(icp->un.gateway), icp); return 1; } default: /* MUST NOT */ break; } if (rts->opt_flood && !(rts->opt_verbose || rts->opt_quiet)) { if (!csfailed) write_stdout("!E", 2); else write_stdout("!EC", 3); return 0; } if (!rts->opt_verbose || rts->uid) return 0; if (rts->opt_ptimeofday) { struct timeval recv_time; gettimeofday(&recv_time, NULL); printf("%lu.%06lu ", (unsigned long)recv_time.tv_sec, (unsigned long)recv_time.tv_usec); } printf(_("From %s: "), pr_addr(rts, from, sizeof *from)); if (csfailed) { printf(_("(BAD CHECKSUM)\n")); return 0; } pr_icmph(rts, icp->type, icp->code, ntohl(icp->un.gateway), icp); return 0; } if (rts->opt_audible) { putchar('\a'); if (rts->opt_flood) fflush(stdout); } if (!rts->opt_flood) { pr_options(rts, opts, olen + sizeof(struct iphdr)); putchar('\n'); fflush(stdout); } return 0; }
ping6_parse_reply (ping6_common.c)
TODO
ping4_install_filter (ping.c)
void ping4_install_filter(struct ping_rts *rts, socket_st *sock) { /*icmp包过滤器设置*/ static int once; static struct sock_filter insns[] = { BPF_STMT(BPF_LDX | BPF_B | BPF_MSH, 0), /* Skip IP header due BSD, see ping6. */ BPF_STMT(BPF_LD | BPF_H | BPF_IND, 4), /* Load icmp echo ident */ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0xAAAA, 0, 1), /* Ours? */ BPF_STMT(BPF_RET | BPF_K, ~0U), /* Yes, it passes. */ BPF_STMT(BPF_LD | BPF_B | BPF_IND, 0), /* Load icmp type */ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, ICMP_ECHOREPLY, 1, 0), /* Echo? */ BPF_STMT(BPF_RET | BPF_K, 0xFFFFFFF), /* No. It passes. */ BPF_STMT(BPF_RET | BPF_K, 0) /* Echo with wrong ident. Reject. */ }; static struct sock_fprog filter = { sizeof insns / sizeof(insns[0]), insns }; if (once) return; once = 1; /* Patch bpflet for current identifier. */ insns[2] = (struct sock_filter)BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, htons(rts->ident), 0, 1); if (setsockopt(sock->fd, SOL_SOCKET, SO_ATTACH_FILTER, &filter, sizeof(filter))) error(0, errno, _("WARNING: failed to install socket filter")); }
ping6_install_filter (ping6_common.c)
TODO
finish (ping_common.c)
/* * finish -- * Print out statistics, and give up. */ int finish(struct ping_rts *rts) { struct timespec tv = rts->cur_time; char *comma = ""; tssub(&tv, &rts->start_time); /*打印ping统计结果信息*/ putchar('\n'); fflush(stdout); printf(_("--- %s ping statistics ---\n"), rts->hostname); printf(_("%ld packets transmitted, "), rts->ntransmitted); printf(_("%ld received"), rts->nreceived); if (rts->nrepeats) printf(_(", +%ld duplicates"), rts->nrepeats); if (rts->nchecksum) printf(_(", +%ld corrupted"), rts->nchecksum); if (rts->nerrors) printf(_(", +%ld errors"), rts->nerrors); if (rts->ntransmitted) { #ifdef USE_IDN setlocale(LC_ALL, "C"); #endif /*打印丢包数据统计百分比*/ printf(_(", %g%% packet loss"), (float)((((long long)(rts->ntransmitted - rts->nreceived)) * 100.0) / rts->ntransmitted)); printf(_(", time %llums"), (unsigned long long)(1000 * tv.tv_sec + (tv.tv_nsec + 500000) / 1000000)); } putchar('\n'); if (rts->nreceived && rts->timing) { double tmdev; long total = rts->nreceived + rts->nrepeats; long tmavg = rts->tsum / total; long long tmvar; /*计算时间统计信息*/ if (rts->tsum < INT_MAX) /* This slightly clumsy computation order is important to avoid * integer rounding errors for small ping times. */ tmvar = (rts->tsum2 - ((rts->tsum * rts->tsum) / total)) / total; else tmvar = (rts->tsum2 / total) - (tmavg * tmavg); tmdev = llsqrt(tmvar); printf(_("rtt min/avg/max/mdev = %ld.%03ld/%lu.%03ld/%ld.%03ld/%ld.%03ld ms"), (long)rts->tmin / 1000, (long)rts->tmin % 1000, (unsigned long)(tmavg / 1000), (long)(tmavg % 1000), (long)rts->tmax / 1000, (long)rts->tmax % 1000, (long)tmdev / 1000, (long)tmdev % 1000); comma = ", "; } if (rts->pipesize > 1) { printf(_("%spipe %d"), comma, rts->pipesize); comma = ", "; } /*打印rtt信息*/ if (rts->nreceived && (!rts->interval || rts->opt_flood || rts->opt_adaptive) && rts->ntransmitted > 1) { int ipg = (1000000 * (long long)tv.tv_sec + tv.tv_nsec / 1000) / (rts->ntransmitted - 1); printf(_("%sipg/ewma %d.%03d/%d.%03d ms"), comma, ipg / 1000, ipg % 1000, rts->rtt / 8000, (rts->rtt / 8) % 1000); } putchar('\n'); return (!rts->nreceived || (rts->deadline && rts->nreceived < rts->npackets)); }
status (ping_common.c)
void status(struct ping_rts *rts) { int loss = 0; long tavg = 0; rts->status_snapshot = 0; /*相对于finish少了一些信息*/ if (rts->ntransmitted) loss = (((long long)(rts->ntransmitted - rts->nreceived)) * 100) / rts->ntransmitted; fprintf(stderr, "\r"); fprintf(stderr, _("%ld/%ld packets, %d%% loss"), rts->nreceived, rts->ntransmitted, loss); if (rts->nreceived && rts->timing) { tavg = rts->tsum / (rts->nreceived + rts->nrepeats); fprintf(stderr, _(", min/avg/ewma/max = %ld.%03ld/%lu.%03ld/%d.%03d/%ld.%03ld ms"), (long)rts->tmin / 1000, (long)rts->tmin % 1000, tavg / 1000, tavg % 1000, rts->rtt / 8000, (rts->rtt / 8) % 1000, (long)rts->tmax / 1000, (long)rts->tmax % 1000); } fprintf(stderr, "\n"); }
gather_statistics (ping_common.c)
int gather_statistics(struct ping_rts *rts, uint8_t *icmph, int icmplen, int cc, uint16_t seq, int hops, int csfailed, struct timeval *tv, char *from, void (*pr_reply)(uint8_t *icmph, int cc), int multicast, int wrong_source) { int dupflag = 0; long triptime = 0; uint8_t *ptr = icmph + icmplen; /*更新统计数据*/ ++rts->nreceived; if (!csfailed) acknowledge(rts, seq); if (rts->timing && cc >= (int)(8 + sizeof(struct timeval))) { struct timeval tmp_tv; memcpy(&tmp_tv, ptr, sizeof(tmp_tv)); restamp: tvsub(tv, &tmp_tv); triptime = tv->tv_sec * 1000000 + tv->tv_usec; if (triptime < 0) { error(0, 0, _("Warning: time of day goes back (%ldus), taking countermeasures"), triptime); triptime = 0; if (!rts->opt_latency) { gettimeofday(tv, NULL); rts->opt_latency = 1; goto restamp; } } if (!csfailed) { rts->tsum += triptime; rts->tsum2 += (double)((long long)triptime * (long long)triptime); if (triptime < rts->tmin) rts->tmin = triptime; if (triptime > rts->tmax) rts->tmax = triptime; if (!rts->rtt) rts->rtt = triptime * 8; else rts->rtt += triptime - rts->rtt / 8; if (rts->opt_adaptive) update_interval(rts); } } if (csfailed) { ++rts->nchecksum; --rts->nreceived; } else if (rcvd_test(rts, seq)) { ++rts->nrepeats; --rts->nreceived; dupflag = 1; } else { rcvd_set(rts, seq); dupflag = 0; } rts->confirm = rts->confirm_flag; if (rts->opt_quiet) return 1; if (rts->opt_flood) { if (!csfailed) write_stdout("\b \b", 3); else write_stdout("\bC", 2); } else { int i; uint8_t *cp, *dp; print_timestamp(rts); printf(_("%d bytes from %s:"), cc, from); if (pr_reply) pr_reply(icmph, cc); if (rts->opt_verbose && rts->ident != -1) printf(_(" ident=%d"), ntohs(rts->ident)); if (hops >= 0) printf(_(" ttl=%d"), hops); if (cc < rts->datalen + 8) { printf(_(" (truncated)\n")); return 1; } if (rts->timing) { if (rts->opt_rtt_precision) printf(_(" time=%ld.%03ld ms"), triptime / 1000, triptime % 1000); else if (triptime >= 100000 - 50) printf(_(" time=%ld ms"), (triptime + 500) / 1000); else if (triptime >= 10000 - 5) printf(_(" time=%ld.%01ld ms"), (triptime + 50) / 1000, ((triptime + 50) % 1000) / 100); else if (triptime >= 1000) printf(_(" time=%ld.%02ld ms"), (triptime + 5) / 1000, ((triptime + 5) % 1000) / 10); else printf(_(" time=%ld.%03ld ms"), triptime / 1000, triptime % 1000); } if (dupflag && (!multicast || rts->opt_verbose)) printf(_(" (DUP!)")); if (csfailed) printf(_(" (BAD CHECKSUM!)")); if (wrong_source) printf(_(" (DIFFERENT ADDRESS!)")); /* check the data */ cp = ((unsigned char *)ptr) + sizeof(struct timeval); dp = &rts->outpack[8 + sizeof(struct timeval)]; for (i = sizeof(struct timeval); i < rts->datalen; ++i, ++cp, ++dp) { if (*cp != *dp) { printf(_("\nwrong data byte #%d should be 0x%x but was 0x%x"), i, *dp, *cp); cp = (unsigned char *)ptr + sizeof(struct timeval); for (i = sizeof(struct timeval); i < rts->datalen; ++i, ++cp) { if ((i % 32) == sizeof(struct timeval)) printf("\n#%d\t", i); printf("%x ", *cp); } break; } } } return 0; }
in_cksum (ping.c)
static unsigned short in_cksum(const unsigned short *addr, int len, unsigned short csum) { int nleft = len; const unsigned short *w = addr; unsigned short answer; int sum = csum; /*计算报文crc的一种方法*/ /* * Our algorithm is simple, using a 32 bit accumulator (sum), * we add sequential 16 bit words to it, and at the end, fold * back all the carry bits from the top 16 bits into the lower * 16 bits. */ while (nleft > 1) { sum += *w++; nleft -= 2; } /* mop up an odd byte, if necessary */ if (nleft == 1) sum += ODDBYTE(*(unsigned char *)w); /* le16toh() may be unavailable on old systems */ /* * add back carry outs from top 16 bits to low 16 bits */ sum = (sum >> 16) + (sum & 0xffff); /* add hi 16 to low 16 */ sum += (sum >> 16); /* add carry */ answer = ~sum; /* truncate to 16 bits */ return (answer); }
参考源码
Ubuntu源码
https://git.launchpad.net/ubuntu/+source/iputils/tree/ping?h=ubuntu/plucky
其他源码