dns 根据/etc/resolv.conf发包处理

gethostbyname #

dns的一些系统行为 #

1. 什么情况下使用下一个dns服务器,当前结果直接丢弃 #

  1. 当前dns服务器接收失败(网络错误,收包错误等)
  2. dns服务器返回serverfail、notimplement、refused
  3. dns服务器返回正常的情况下,下面条件都满足就尝试下一个
    • answer为空
    • 当前dns服务器不是authoritive answer
    • 当前dns服务器不能递归查询
    • resource为空

2. 什么情况下使用tcp进行发送dns #

  • 返回响应中有tc字段为1

源码 #

  • _nss_dns_gethostbyname3_r开始
 1// resolv/nss_dns/dns-host.c
 2enum nss_status
 3_nss_dns_gethostbyname3_r (const char *name, int af, struct hostent *result,
 4			   char *buffer, size_t buflen, int *errnop,
 5			   int *h_errnop, int32_t *ttlp, char **canonp)
 6{
 7  struct resolv_context *ctx = __resolv_context_get ();
 8  if (ctx == NULL)
 9    {
10      *errnop = errno;
11      *h_errnop = NETDB_INTERNAL;
12      return NSS_STATUS_UNAVAIL;
13    }
14  enum nss_status status = gethostbyname3_context
15    (ctx, name, af, result, buffer, buflen, errnop, h_errnop, ttlp, canonp);
16  __resolv_context_put (ctx);
17  return status;
18}
19libc_hidden_def (_nss_dns_gethostbyname3_r)

1. 如何读取/etc/resolv.conf #

  • 获取struct resolv_context *ctx
 1// resolv/resolv_context.c
 2struct resolv_context *
 3__resolv_context_get (void)
 4{
 5  return context_get (false);
 6}
 7libc_hidden_def (__resolv_context_get)
 8
 9// resolv/resolv_context.c
10/* Backing function for the __resolv_context_get family of
11   functions.  */
12static struct resolv_context *
13context_get (bool preinit)
14{
15  if (current != NULL)
16    return context_reuse ();
17
18	// 这里的_res是一个全局线程安全的变量
19  struct resolv_context *ctx = context_alloc (&_res);
20  if (ctx == NULL)
21    return NULL;
22  if (!maybe_init (ctx, preinit))
23    {
24      context_free (ctx);
25      return NULL;
26    }
27  return ctx;
28}
29
30// resolv/resolv_context.c
31/* Initialize *RESP if RES_INIT is not yet set in RESP->options, or if
32   res_init in some other thread requested re-initializing.  */
33static __attribute__ ((warn_unused_result)) bool
34maybe_init (struct resolv_context *ctx, bool preinit)
35{
36  struct __res_state *resp = ctx->resp;
37  if (resp->options & RES_INIT)
38    {
39      if (resp->options & RES_NORELOAD)
40        /* Configuration reloading was explicitly disabled.  */
41        return true;
42
43      /* If there is no associated resolv_conf object despite the
44         initialization, something modified *ctx->resp.  Do not
45         override those changes.  */
46      if (ctx->conf != NULL && replicated_configuration_matches (ctx))
47        {
48          struct resolv_conf *current = __resolv_conf_get_current ();
49          if (current == NULL)
50            return false;
51
52          /* Check if the configuration changed.  */
53          if (current != ctx->conf)
54            {
55              /* This call will detach the extended resolver state.  */
56              if (resp->nscount > 0)
57                __res_iclose (resp, true);
58              /* Reattach the current configuration.  */
59              if (__resolv_conf_attach (ctx->resp, current))
60                {
61                  __resolv_conf_put (ctx->conf);
62                  /* ctx takes ownership, so we do not release current.  */
63                  ctx->conf = current;
64                }
65            }
66          else
67            /* No change.  Drop the reference count for current.  */
68            __resolv_conf_put (current);
69        }
70      return true;
71    }
72
73  assert (ctx->conf == NULL);
74  if (preinit)
75    {
76      if (!resp->retrans)
77        resp->retrans = RES_TIMEOUT;
78      if (!resp->retry)
79        resp->retry = RES_DFLRETRY;
80      resp->options = RES_DEFAULT;
81      if (!resp->id)
82        resp->id = res_randomid ();
83    }
84
85  if (__res_vinit (resp, preinit) < 0)
86    return false;
87  ctx->conf = __resolv_conf_get (ctx->resp);
88  return true;
89}
  • 调用__resolv_conf_get_current,里面会检测/etc/resolv.conf是否改变,改变则重新加载
 1struct resolv_conf *
 2__resolv_conf_get_current (void)
 3{
 4  struct file_change_detection initial;
 5  // 检测一下/etc/resolv.conf是否更改,结果存到initial中,返回的只是函数调用的成功或失败
 6  if (!__file_change_detection_for_path (&initial, _PATH_RESCONF))
 7    return NULL;
 8
 9  struct resolv_conf_global *global_copy = get_locked_global ();
10  if (global_copy == NULL)
11    return NULL;
12  struct resolv_conf *conf;
13  if (global_copy->conf_current != NULL
14      && __file_is_unchanged (&initial, &global_copy->file_resolve_conf))
15    /* We can reuse the cached configuration object.  */
16    conf = global_copy->conf_current;
17  else
18    {
19      /* Parse configuration while holding the lock.  This avoids
20         duplicate work.  */
21      struct file_change_detection after_load;
22      conf = __resolv_conf_load (NULL, &after_load);
23      if (conf != NULL)
24        {
25          if (global_copy->conf_current != NULL)
26            conf_decrement (global_copy->conf_current);
27          global_copy->conf_current = conf; /* Takes ownership.  */
28
29          /* Update file change detection data, but only if it matches
30             the initial measurement.  This avoids an ABA race in case
31             /etc/resolv.conf is temporarily replaced while the file
32             is read (after the initial measurement), and restored to
33             the initial version later.  */
34          if (__file_is_unchanged (&initial, &after_load))
35            global_copy->file_resolve_conf = after_load;
36          else
37            /* If there is a discrepancy, trigger a reload during the
38               next use.  */
39            global_copy->file_resolve_conf.size = -1;
40        }
41    }
42
43  if (conf != NULL)
44    {
45      /* Return an additional reference.  */
46      assert (conf->__refcount > 0);
47      ++conf->__refcount;
48      assert (conf->__refcount > 0);
49    }
50  put_locked_global (global_copy);
51  return conf;
52}
  • 重新加载会调用__resolv_conf_load加载配置打开/etc/resolv.conf
 1// resolv/resolv.h
 2#ifndef _PATH_RESCONF
 3#define _PATH_RESCONF        "/etc/resolv.conf"
 4#endif
 5
 6// resolv/res_init.c
 7struct resolv_conf *
 8__resolv_conf_load (struct __res_state *preinit,
 9                    struct file_change_detection *change)
10{
11  /* Ensure that /etc/hosts.conf has been loaded (once).  */
12  _res_hconf_init ();
13
14  FILE *fp = fopen (_PATH_RESCONF, "rce");
15  if (fp == NULL)
16    switch (errno)
17      {
18      case EACCES:
19      case EISDIR:
20      case ELOOP:
21      case ENOENT:
22      case ENOTDIR:
23      case EPERM:
24        /* Ignore these errors.  They are persistent errors caused
25           by file system contents.  */
26        break;
27      default:
28        /* Other errors refer to resource allocation problems and
29           need to be handled by the application.  */
30        return NULL;
31      }
32
33  struct resolv_conf_parser parser;
34  resolv_conf_parser_init (&parser, preinit);
35
36  struct resolv_conf *conf = NULL;
37  bool ok = res_vinit_1 (fp, &parser);
38  if (ok && change != NULL)
39    /* Update the file change information if the configuration was
40       loaded successfully.  */
41    ok = __file_change_detection_for_fp (change, fp);
42
43  if (ok)
44    {
45      parser.template.nameserver_list
46        = nameserver_list_begin (&parser.nameserver_list);
47      parser.template.nameserver_list_size
48        = nameserver_list_size (&parser.nameserver_list);
49      parser.template.search_list = search_list_begin (&parser.search_list);
50      parser.template.search_list_size
51        = search_list_size (&parser.search_list);
52      parser.template.sort_list = sort_list_begin (&parser.sort_list);
53      parser.template.sort_list_size = sort_list_size (&parser.sort_list);
54      conf = __resolv_conf_allocate (&parser.template);
55    }
56  resolv_conf_parser_free (&parser);
57
58  if (fp != NULL)
59    {
60      int saved_errno = errno;
61      fclose (fp);
62      __set_errno (saved_errno);
63    }
64
65  return conf;
66}

处理/etc/resolv.conf #

  • 格式化一下看
  1// resolv/res_init.c
  2/* Internal helper function for __res_vinit, to aid with resource
  3   deallocation and error handling.  Return true on success, false on
  4   failure.  */
  5static bool res_vinit_1(FILE *fp, struct resolv_conf_parser *parser) {
  6    char *cp;
  7    size_t buffer_size = 0;
  8    bool haveenv = false;
  9
 10    /* Allow user to override the local domain definition.  */
 11    if ((cp = getenv("LOCALDOMAIN")) != NULL) {
 12        /* The code below splits the string in place.  */
 13        cp = __strdup(cp);
 14        if (cp == NULL) return false;
 15        free(parser->search_list_store);
 16        parser->search_list_store = cp;
 17        haveenv = true;
 18
 19        /* The string will be truncated as needed below.  */
 20        search_list_add(&parser->search_list, cp);
 21
 22        /* Set search list to be blank-separated strings from rest of
 23           env value.  Permits users of LOCALDOMAIN to still have a
 24           search list, and anyone to set the one that they want to use
 25           as an individual (even more important now that the rfc1535
 26           stuff restricts searches).  */
 27        for (bool in_name = true; *cp != '\0'; cp++) {
 28            if (*cp == '\n') {
 29                *cp = '\0';
 30                break;
 31            } else if (*cp == ' ' || *cp == '\t') {
 32                *cp = '\0';
 33                in_name = false;
 34            } else if (!in_name) {
 35                search_list_add(&parser->search_list, cp);
 36                in_name = true;
 37            }
 38        }
 39    }
 40
 41#define MATCH(line, name) \
 42    (!strncmp((line), name, sizeof(name) - 1) && ((line)[sizeof(name) - 1] == ' ' || (line)[sizeof(name) - 1] == '\t'))
 43
 44    if (fp != NULL) {
 45        /* No threads use this stream.  */
 46        __fsetlocking(fp, FSETLOCKING_BYCALLER);
 47        /* Read the config file.  */
 48        while (true) {
 49            {
 50                ssize_t ret = __getline(&parser->buffer, &buffer_size, fp);
 51                if (ret <= 0) {
 52                    if (_IO_ferror_unlocked(fp))
 53                        return false;
 54                    else
 55                        break;
 56                }
 57            }
 58
 59            /* Skip comments.  */
 60            if (*parser->buffer == ';' || *parser->buffer == '#') continue;
 61            /* Read default domain name.  */
 62            if (MATCH(parser->buffer, "domain")) {
 63                if (haveenv) /* LOCALDOMAIN overrides the configuration file.  */
 64                    continue;
 65                cp = parser->buffer + sizeof("domain") - 1;
 66                while (*cp == ' ' || *cp == '\t') cp++;
 67                if ((*cp == '\0') || (*cp == '\n')) continue;
 68
 69                cp = __strdup(cp);
 70                if (cp == NULL) return false;
 71                free(parser->search_list_store);
 72                parser->search_list_store = cp;
 73                search_list_clear(&parser->search_list);
 74                search_list_add(&parser->search_list, cp);
 75                /* Replace trailing whitespace.  */
 76                if ((cp = strpbrk(cp, " \t\n")) != NULL) *cp = '\0';
 77                continue;
 78            }
 79            /* Set search list.  */
 80            if (MATCH(parser->buffer, "search")) {
 81                if (haveenv) /* LOCALDOMAIN overrides the configuration file.  */
 82                    continue;
 83                cp = parser->buffer + sizeof("search") - 1;
 84                while (*cp == ' ' || *cp == '\t') cp++;
 85                if ((*cp == '\0') || (*cp == '\n')) continue;
 86
 87                {
 88                    char *p = strchr(cp, '\n');
 89                    if (p != NULL) *p = '\0';
 90                }
 91                cp = __strdup(cp);
 92                if (cp == NULL) return false;
 93                free(parser->search_list_store);
 94                parser->search_list_store = cp;
 95
 96                /* The string is truncated below.  */
 97                search_list_clear(&parser->search_list);
 98                search_list_add(&parser->search_list, cp);
 99
100                /* Set search list to be blank-separated strings on rest
101                   of line.  */
102                for (bool in_name = true; *cp != '\0'; cp++) {
103                    if (*cp == ' ' || *cp == '\t') {
104                        *cp = '\0';
105                        in_name = false;
106                    } else if (!in_name) {
107                        search_list_add(&parser->search_list, cp);
108                        in_name = true;
109                    }
110                }
111                continue;
112            }
113            /* Read nameservers to query.  */
114            if (MATCH(parser->buffer, "nameserver")) {
115                struct in_addr a;
116
117                cp = parser->buffer + sizeof("nameserver") - 1;
118                while (*cp == ' ' || *cp == '\t') cp++;
119
120                /* Ignore trailing contents on the name server line.  */
121                {
122                    char *el;
123                    if ((el = strpbrk(cp, " \t\n")) != NULL) *el = '\0';
124                }
125
126                struct sockaddr *sa;
127                if ((*cp != '\0') && (*cp != '\n') && __inet_aton_exact(cp, &a)) {
128                    sa = allocate_address_v4(a, NAMESERVER_PORT);
129                    if (sa == NULL) return false;
130                } else {
131                    struct in6_addr a6;
132                    char *el;
133                    if ((el = strchr(cp, SCOPE_DELIMITER)) != NULL) *el = '\0';
134                    if ((*cp != '\0') && (__inet_pton(AF_INET6, cp, &a6) > 0)) {
135                        struct sockaddr_in6 *sa6;
136
137                        sa6 = malloc(sizeof(*sa6));
138                        if (sa6 == NULL) return false;
139
140                        sa6->sin6_family = AF_INET6;
141                        sa6->sin6_port = htons(NAMESERVER_PORT);
142                        sa6->sin6_flowinfo = 0;
143                        sa6->sin6_addr = a6;
144
145                        sa6->sin6_scope_id = 0;
146                        if (__glibc_likely(el != NULL))
147                            /* Ignore errors, for backwards
148                               compatibility.  */
149                            __inet6_scopeid_pton(&a6, el + 1, &sa6->sin6_scope_id);
150                        sa = (struct sockaddr *)sa6;
151                    } else
152                        /* IPv6 address parse failure.  */
153                        sa = NULL;
154                }
155                if (sa != NULL) {
156                    const struct sockaddr **p = nameserver_list_emplace(&parser->nameserver_list);
157                    if (p != NULL)
158                        *p = sa;
159                    else {
160                        free(sa);
161                        return false;
162                    }
163                }
164                continue;
165            }
166            if (MATCH(parser->buffer, "sortlist")) {
167                struct in_addr a;
168
169                cp = parser->buffer + sizeof("sortlist") - 1;
170                while (true) {
171                    while (*cp == ' ' || *cp == '\t') cp++;
172                    if (*cp == '\0' || *cp == '\n' || *cp == ';') break;
173                    char *net = cp;
174                    while (*cp && !is_sort_mask(*cp) && *cp != ';' && isascii(*cp) && !isspace(*cp)) cp++;
175                    char separator = *cp;
176                    *cp = 0;
177                    struct resolv_sortlist_entry e;
178                    if (__inet_aton_exact(net, &a)) {
179                        e.addr = a;
180                        if (is_sort_mask(separator)) {
181                            *cp++ = separator;
182                            net = cp;
183                            while (*cp && *cp != ';' && isascii(*cp) && !isspace(*cp)) cp++;
184                            separator = *cp;
185                            *cp = 0;
186                            if (__inet_aton_exact(net, &a))
187                                e.mask = a.s_addr;
188                            else
189                                e.mask = net_mask(e.addr);
190                        } else
191                            e.mask = net_mask(e.addr);
192                        sort_list_add(&parser->sort_list, e);
193                    }
194                    *cp = separator;
195                }
196                continue;
197            }
198            if (MATCH(parser->buffer, "options")) {
199                res_setoptions(parser, parser->buffer + sizeof("options") - 1);
200                continue;
201            }
202        }
203    }
204    if (__glibc_unlikely(nameserver_list_size(&parser->nameserver_list) == 0)) {
205        const struct sockaddr **p = nameserver_list_emplace(&parser->nameserver_list);
206        if (p == NULL) return false;
207        *p = allocate_address_v4(__inet_makeaddr(IN_LOOPBACKNET, 1), NAMESERVER_PORT);
208        if (*p == NULL) return false;
209    }
210
211    if (search_list_size(&parser->search_list) == 0) {
212        char *domain;
213        if (!domain_from_hostname(&domain)) return false;
214        if (domain != NULL) {
215            free(parser->search_list_store);
216            parser->search_list_store = domain;
217            search_list_add(&parser->search_list, domain);
218        }
219    }
220
221    if ((cp = getenv("RES_OPTIONS")) != NULL) res_setoptions(parser, cp);
222
223    if (nameserver_list_has_failed(&parser->nameserver_list) || search_list_has_failed(&parser->search_list) ||
224        sort_list_has_failed(&parser->sort_list)) {
225        __set_errno(ENOMEM);
226        return false;
227    }
228
229    return true;
230}

2. 如何发包 #

  • 调用gethostbyname3_context,格式化了一下,之前的缩进太难看懂
 1// resolv/nss_dns/dns-host.c
 2static enum nss_status gethostbyname3_context(struct resolv_context *ctx, const char *name, int af,
 3                                              struct hostent *result, char *buffer, size_t buflen, int *errnop,
 4                                              int *h_errnop, int32_t *ttlp, char **canonp) {
 5    union {
 6        querybuf *buf;
 7        u_char *ptr;
 8    } host_buffer;
 9    querybuf *orig_host_buffer;
10    char tmp[NS_MAXDNAME];
11    int size, type, n;
12    const char *cp;
13    int map = 0;
14    int olderr = errno;
15    enum nss_status status;
16
17    switch (af) {
18        case AF_INET:
19            size = INADDRSZ;
20            type = T_A;
21            break;
22        case AF_INET6:
23            size = IN6ADDRSZ;
24            type = T_AAAA;
25            break;
26        default:
27            *h_errnop = NO_DATA;
28            *errnop = EAFNOSUPPORT;
29            return NSS_STATUS_UNAVAIL;
30    }
31
32    result->h_addrtype = af;
33    result->h_length = size;
34
35    /*
36     * if there aren't any dots, it could be a user-level alias.
37     * this is also done in res_query() since we are not the only
38     * function that looks up host names.
39     */
40    if (strchr(name, '.') == NULL && (cp = __res_context_hostalias(ctx, name, tmp, sizeof(tmp))) != NULL) name = cp;
41
42    host_buffer.buf = orig_host_buffer = (querybuf *)alloca(1024);
43
44    // 这里发起dns请求
45    n = __res_context_search(ctx, name, C_IN, type, host_buffer.buf->buf, 1024, &host_buffer.ptr, NULL, NULL, NULL,
46                             NULL);
47    if (n < 0) {
48        switch (errno) {
49            case ESRCH:
50                status = NSS_STATUS_TRYAGAIN;
51                h_errno = TRY_AGAIN;
52                break;
53            /* System has run out of file descriptors.  */
54            case EMFILE:
55            case ENFILE:
56                h_errno = NETDB_INTERNAL;
57                /* Fall through.  */
58            case ECONNREFUSED:
59            case ETIMEDOUT:
60                status = NSS_STATUS_UNAVAIL;
61                break;
62            default:
63                status = NSS_STATUS_NOTFOUND;
64                break;
65        }
66        *h_errnop = h_errno;
67        if (h_errno == TRY_AGAIN)
68            *errnop = EAGAIN;
69        else
70            __set_errno(olderr);
71
72        /* If we are looking for an IPv6 address and mapping is enabled
73       by having the RES_USE_INET6 bit in _res.options set, we try
74       another lookup.  */
75        if (af == AF_INET6 && res_use_inet6())
76            n = __res_context_search(ctx, name, C_IN, T_A, host_buffer.buf->buf,
77                                     host_buffer.buf != orig_host_buffer ? MAXPACKET : 1024, &host_buffer.ptr, NULL,
78                                     NULL, NULL, NULL);
79
80        if (n < 0) {
81            if (host_buffer.buf != orig_host_buffer) free(host_buffer.buf);
82            return status;
83        }
84
85        map = 1;
86
87        result->h_addrtype = AF_INET;
88        result->h_length = INADDRSZ;
89    }
90
91    // 处理一下dns响应结果
92    status =
93        getanswer_r(ctx, host_buffer.buf, n, name, type, result, buffer, buflen, errnop, h_errnop, map, ttlp, canonp);
94    if (host_buffer.buf != orig_host_buffer) free(host_buffer.buf);
95    return status;
96}
  • __res_context_search进行处理发送dns请求
  1// resolv/res_query.c
  2/* Formulate a normal query, send, and retrieve answer in supplied
  3   buffer.  Return the size of the response on success, -1 on error.
  4   If enabled, implement search rules until answer or unrecoverable
  5   failure is detected.  Error code, if any, is left in h_errno.  */
  6int
  7__res_context_search (struct resolv_context *ctx,
  8		      const char *name, int class, int type,
  9		      unsigned char *answer, int anslen,
 10		      unsigned char **answerp, unsigned char **answerp2,
 11		      int *nanswerp2, int *resplen2, int *answerp2_malloced)
 12{
 13	struct __res_state *statp = ctx->resp;
 14	const char *cp;
 15	UHEADER *hp = (UHEADER *) answer;
 16	char tmp[NS_MAXDNAME];
 17	u_int dots;
 18	int trailing_dot, ret, saved_herrno;
 19	int got_nodata = 0, got_servfail = 0, root_on_list = 0;
 20	int tried_as_is = 0;
 21	int searched = 0;
 22
 23	__set_errno (0);
 24	RES_SET_H_ERRNO(statp, HOST_NOT_FOUND);  /* True if we never query. */
 25
 26	dots = 0;
 27	for (cp = name; *cp != '\0'; cp++)
 28		dots += (*cp == '.');
 29	trailing_dot = 0;
 30	if (cp > name && *--cp == '.')
 31		trailing_dot++;
 32
 33	/* If there aren't any dots, it could be a user-level alias. */
 34	if (!dots && (cp = __res_context_hostalias
 35		      (ctx, name, tmp, sizeof tmp))!= NULL)
 36	  return __res_context_query (ctx, cp, class, type, answer,
 37				      anslen, answerp, answerp2,
 38				      nanswerp2, resplen2, answerp2_malloced);
 39
 40	/*
 41	 * If there are enough dots in the name, let's just give it a
 42	 * try 'as is'. The threshold can be set with the "ndots" option.
 43	 * Also, query 'as is', if there is a trailing dot in the name.
 44	 */
 45	saved_herrno = -1;
 46	if (dots >= statp->ndots || trailing_dot) {
 47		ret = __res_context_querydomain (ctx, name, NULL, class, type,
 48						 answer, anslen, answerp,
 49						 answerp2, nanswerp2, resplen2,
 50						 answerp2_malloced);
 51		if (ret > 0 || trailing_dot
 52		    /* If the second response is valid then we use that.  */
 53		    || (ret == 0 && resplen2 != NULL && *resplen2 > 0))
 54			return (ret);
 55		saved_herrno = h_errno;
 56		tried_as_is++;
 57		if (answerp && *answerp != answer) {
 58			answer = *answerp;
 59			anslen = MAXPACKET;
 60		}
 61		if (answerp2 && *answerp2_malloced)
 62		  {
 63		    free (*answerp2);
 64		    *answerp2 = NULL;
 65		    *nanswerp2 = 0;
 66		    *answerp2_malloced = 0;
 67		  }
 68	}
 69
 70	/*
 71	 * We do at least one level of search if
 72	 *	- there is no dot and RES_DEFNAME is set, or
 73	 *	- there is at least one dot, there is no trailing dot,
 74	 *	  and RES_DNSRCH is set.
 75	 */
 76	if ((!dots && (statp->options & RES_DEFNAMES) != 0) ||
 77	    (dots && !trailing_dot && (statp->options & RES_DNSRCH) != 0)) {
 78		int done = 0;
 79
 80		for (size_t domain_index = 0; !done; ++domain_index) {
 81			const char *dname = __resolv_context_search_list
 82			  (ctx, domain_index);
 83			if (dname == NULL)
 84			  break;
 85			searched = 1;
 86
 87			/* __res_context_querydoman concatenates name
 88			   with dname with a "." in between.  If we
 89			   pass it in dname the "." we got from the
 90			   configured default search path, we'll end
 91			   up with "name..", which won't resolve.
 92			   OTOH, passing it "" will result in "name.",
 93			   which has the intended effect for both
 94			   possible representations of the root
 95			   domain.  */
 96			if (dname[0] == '.')
 97				dname++;
 98			if (dname[0] == '\0')
 99				root_on_list++;
100
101			ret = __res_context_querydomain
102			  (ctx, name, dname, class, type,
103			   answer, anslen, answerp, answerp2, nanswerp2,
104			   resplen2, answerp2_malloced);
105			if (ret > 0 || (ret == 0 && resplen2 != NULL
106					&& *resplen2 > 0))
107				return (ret);
108
109			if (answerp && *answerp != answer) {
110				answer = *answerp;
111				anslen = MAXPACKET;
112			}
113			if (answerp2 && *answerp2_malloced)
114			  {
115			    free (*answerp2);
116			    *answerp2 = NULL;
117			    *nanswerp2 = 0;
118			    *answerp2_malloced = 0;
119			  }
120
121			/*
122			 * If no server present, give up.
123			 * If name isn't found in this domain,
124			 * keep trying higher domains in the search list
125			 * (if that's enabled).
126			 * On a NO_DATA error, keep trying, otherwise
127			 * a wildcard entry of another type could keep us
128			 * from finding this entry higher in the domain.
129			 * If we get some other error (negative answer or
130			 * server failure), then stop searching up,
131			 * but try the input name below in case it's
132			 * fully-qualified.
133			 */
134			if (errno == ECONNREFUSED) {
135				RES_SET_H_ERRNO(statp, TRY_AGAIN);
136				return (-1);
137			}
138
139			switch (statp->res_h_errno) {
140			case NO_DATA:
141				got_nodata++;
142				/* FALLTHROUGH */
143			case HOST_NOT_FOUND:
144				/* keep trying */
145				break;
146			case TRY_AGAIN:
147				if (hp->rcode == SERVFAIL) {
148					/* try next search element, if any */
149					got_servfail++;
150					break;
151				}
152				/* FALLTHROUGH */
153			default:
154				/* anything else implies that we're done */
155				done++;
156			}
157
158			/* if we got here for some reason other than DNSRCH,
159			 * we only wanted one iteration of the loop, so stop.
160			 */
161			if ((statp->options & RES_DNSRCH) == 0)
162				done++;
163		}
164	}
165
166	/*
167	 * If the query has not already been tried as is then try it
168	 * unless RES_NOTLDQUERY is set and there were no dots.
169	 */
170	if ((dots || !searched || (statp->options & RES_NOTLDQUERY) == 0)
171	    && !(tried_as_is || root_on_list)) {
172		ret = __res_context_querydomain
173		  (ctx, name, NULL, class, type,
174		   answer, anslen, answerp, answerp2, nanswerp2,
175		   resplen2, answerp2_malloced);
176		if (ret > 0 || (ret == 0 && resplen2 != NULL
177				&& *resplen2 > 0))
178			return (ret);
179	}
180
181	/* if we got here, we didn't satisfy the search.
182	 * if we did an initial full query, return that query's H_ERRNO
183	 * (note that we wouldn't be here if that query had succeeded).
184	 * else if we ever got a nodata, send that back as the reason.
185	 * else send back meaningless H_ERRNO, that being the one from
186	 * the last DNSRCH we did.
187	 */
188	if (answerp2 && *answerp2_malloced)
189	  {
190	    free (*answerp2);
191	    *answerp2 = NULL;
192	    *nanswerp2 = 0;
193	    *answerp2_malloced = 0;
194	  }
195	if (saved_herrno != -1)
196		RES_SET_H_ERRNO(statp, saved_herrno);
197	else if (got_nodata)
198		RES_SET_H_ERRNO(statp, NO_DATA);
199	else if (got_servfail)
200		RES_SET_H_ERRNO(statp, TRY_AGAIN);
201	return (-1);
202}
203libc_hidden_def (__res_context_search)
  • __res_context_querydomain也会调用到__res_context_query
  • 里面就是判断了一下最大域名大小不超过1025
 1// resolv/arpa/nameser_compat.h
 2#define MAXDNAME	NS_MAXDNAME
 3
 4// resolv/arpa/nameser.h
 5#define NS_MAXDNAME	1025	/*%< maximum domain name */
 6
 7// resolv/res_query.c
 8/*  Perform a call on res_query on the concatenation of name and
 9    domain.  */
10static int
11__res_context_querydomain (struct resolv_context *ctx,
12			   const char *name, const char *domain,
13			   int class, int type,
14			   unsigned char *answer, int anslen,
15			   unsigned char **answerp, unsigned char **answerp2,
16			   int *nanswerp2, int *resplen2,
17			   int *answerp2_malloced)
18{
19	struct __res_state *statp = ctx->resp;
20	char nbuf[MAXDNAME];
21	const char *longname = nbuf;
22	size_t n, d;
23
24	if (domain == NULL) {
25		n = strlen(name);
26
27		/* Decrement N prior to checking it against MAXDNAME
28		   so that we detect a wrap to SIZE_MAX and return
29		   a reasonable error.  */
30		n--;
31		if (n >= MAXDNAME - 1) {
32			RES_SET_H_ERRNO(statp, NO_RECOVERY);
33			return (-1);
34		}
35		longname = name;
36	} else {
37		n = strlen(name);
38		d = strlen(domain);
39        // 判断一下是否超过最长域名大小
40		if (n + d + 1 >= MAXDNAME) {
41			RES_SET_H_ERRNO(statp, NO_RECOVERY);
42			return (-1);
43		}
44		char *p = __stpcpy (nbuf, name);
45		*p++ = '.';
46		strcpy (p, domain);
47	}
48	return __res_context_query (ctx, longname, class, type, answer,
49				    anslen, answerp, answerp2, nanswerp2,
50				    resplen2, answerp2_malloced);
51}
  • __res_context_query发起dns请求
  1// resolv/res_query.c
  2/* Formulate a normal query, send, and await answer.  Returned answer
  3   is placed in supplied buffer ANSWER.  Perform preliminary check of
  4   answer, returning success only if no error is indicated and the
  5   answer count is nonzero.  Return the size of the response on
  6   success, -1 on error.  Error number is left in h_errno.
  7
  8   Caller must parse answer and determine whether it answers the
  9   question.  */
 10int
 11__res_context_query (struct resolv_context *ctx, const char *name,
 12		     int class, int type,
 13		     unsigned char *answer, int anslen,
 14		     unsigned char **answerp, unsigned char **answerp2,
 15		     int *nanswerp2, int *resplen2, int *answerp2_malloced)
 16{
 17	struct __res_state *statp = ctx->resp;
 18	UHEADER *hp = (UHEADER *) answer;
 19	UHEADER *hp2;
 20	int n, use_malloc = 0;
 21
 22	size_t bufsize = (type == T_QUERY_A_AND_AAAA ? 2 : 1) * QUERYSIZE;
 23	u_char *buf = alloca (bufsize);
 24	u_char *query1 = buf;
 25	int nquery1 = -1;
 26	u_char *query2 = NULL;	// 存储AAAA请求
 27	int nquery2 = 0;		// query2的长度
 28
 29 again:
 30    // 构造dns请求包
 31	hp->rcode = NOERROR;	/* default */
 32
 33	if (type == T_QUERY_A_AND_AAAA)
 34	  {
 35		// 要请求A和AAAA,构造A记录存到query1中,AAAA存到query2中
 36		// 构造A存到query1中
 37	    n = __res_context_mkquery (ctx, QUERY, name, class, T_A, NULL,
 38				       query1, bufsize);
 39	    if (n > 0)
 40	      {
 41		if ((statp->options & (RES_USE_EDNS0|RES_USE_DNSSEC)) != 0)
 42		  {
 43		    /* Use RESOLV_EDNS_BUFFER_SIZE because the receive
 44		       buffer can be reallocated.  */
 45		    n = __res_nopt (ctx, n, query1, bufsize,
 46				    RESOLV_EDNS_BUFFER_SIZE);
 47		    if (n < 0)
 48		      goto unspec_nomem;
 49		  }
 50
 51		nquery1 = n;
 52		/* Align the buffer.  */
 53		int npad = ((nquery1 + __alignof__ (HEADER) - 1)
 54			    & ~(__alignof__ (HEADER) - 1)) - nquery1;
 55		if (n > bufsize - npad)
 56		  {
 57		    n = -1;
 58		    goto unspec_nomem;
 59		  }
 60		int nused = n + npad;
 61		query2 = buf + nused;
 62		// 构造AAAA到query2
 63		n = __res_context_mkquery (ctx, QUERY, name, class, T_AAAA,
 64					   NULL, query2, bufsize - nused);
 65		if (n > 0
 66		    && (statp->options & (RES_USE_EDNS0|RES_USE_DNSSEC)) != 0)
 67		  /* Use RESOLV_EDNS_BUFFER_SIZE because the receive
 68		     buffer can be reallocated.  */
 69		  n = __res_nopt (ctx, n, query2, bufsize,
 70				  RESOLV_EDNS_BUFFER_SIZE);
 71		nquery2 = n;
 72	      }
 73
 74	  unspec_nomem:;
 75	  }
 76	else
 77	  {
 78		// 不是A和AAAA都需要,就只构造query就行了
 79	    n = __res_context_mkquery (ctx, QUERY, name, class, type, NULL,
 80				       query1, bufsize);
 81
 82	    if (n > 0
 83		&& (statp->options & (RES_USE_EDNS0|RES_USE_DNSSEC)) != 0)
 84	      {
 85		/* Use RESOLV_EDNS_BUFFER_SIZE if the receive buffer
 86		   can be reallocated.  */
 87		size_t advertise;
 88		if (answerp == NULL)
 89		  advertise = anslen;
 90		else
 91		  advertise = RESOLV_EDNS_BUFFER_SIZE;
 92		n = __res_nopt (ctx, n, query1, bufsize, advertise);
 93	      }
 94
 95	    nquery1 = n;
 96	  }
 97
 98	if (__glibc_unlikely (n <= 0) && !use_malloc) {
 99		/* Retry just in case res_nmkquery failed because of too
100		   short buffer.  Shouldn't happen.  */
101		bufsize = (type == T_QUERY_A_AND_AAAA ? 2 : 1) * MAXPACKET;
102		buf = malloc (bufsize);
103		if (buf != NULL) {
104			query1 = buf;
105			use_malloc = 1;
106			goto again;
107		}
108	}
109	if (__glibc_unlikely (n <= 0))       {
110		RES_SET_H_ERRNO(statp, NO_RECOVERY);
111		if (use_malloc)
112			free (buf);
113		return (n);
114	}
115
116	/* Suppress AAAA lookups if required.  __res_handle_no_aaaa
117	   checks RES_NOAAAA first, so avoids parsing the
118	   just-generated query packet in most cases.  nss_dns avoids
119	   using T_QUERY_A_AND_AAAA in RES_NOAAAA mode, so there is no
120	   need to handle it here.  */
121	if (type == T_AAAA && __res_handle_no_aaaa (ctx, query1, nquery1,
122						    answer, anslen, &n))
123	  /* There must be no second query for AAAA queries.  The code
124	     below is still needed to translate NODATA responses.  */
125	  assert (query2 == NULL);
126	else
127	  {
128	    assert (answerp == NULL || (void *) *answerp == (void *) answer);
129        // 发送dns请求
130	    n = __res_context_send (ctx, query1, nquery1, query2, nquery2,
131				    answer, anslen,
132				    answerp, answerp2, nanswerp2, resplen2,
133				    answerp2_malloced);
134	  }
135
136    // 处理dns响应
137	if (use_malloc)
138		free (buf);
139	if (n < 0) {
140		RES_SET_H_ERRNO(statp, TRY_AGAIN);
141		return (n);
142	}
143
144	if (answerp != NULL)
145	  /* __res_context_send might have reallocated the buffer.  */
146	  hp = (UHEADER *) *answerp;
147
148	/* We simplify the following tests by assigning HP to HP2 or
149	   vice versa.  It is easy to verify that this is the same as
150	   ignoring all tests of HP or HP2.  */
151	if (answerp2 == NULL || *resplen2 < (int) sizeof (HEADER))
152	  {
153	    hp2 = hp;
154	  }
155	else
156	  {
157	    hp2 = (UHEADER *) *answerp2;
158	    if (n < (int) sizeof (HEADER))
159	      {
160	        hp = hp2;
161	      }
162	  }
163
164	/* Make sure both hp and hp2 are defined */
165	assert((hp != NULL) && (hp2 != NULL));
166
167	if ((hp->rcode != NOERROR || ntohs(hp->ancount) == 0)
168	    && (hp2->rcode != NOERROR || ntohs(hp2->ancount) == 0)) {
169		switch (hp->rcode == NOERROR ? hp2->rcode : hp->rcode) {
170		case NXDOMAIN:
171			if ((hp->rcode == NOERROR && ntohs (hp->ancount) != 0)
172			    || (hp2->rcode == NOERROR
173				&& ntohs (hp2->ancount) != 0))
174				goto success;
175			RES_SET_H_ERRNO(statp, HOST_NOT_FOUND);
176			break;
177		case SERVFAIL:
178			RES_SET_H_ERRNO(statp, TRY_AGAIN);
179			break;
180		case NOERROR:
181			if (ntohs (hp->ancount) != 0
182			    || ntohs (hp2->ancount) != 0)
183				goto success;
184			RES_SET_H_ERRNO(statp, NO_DATA);
185			break;
186		case FORMERR:
187		case NOTIMP:
188			/* Servers must not reply to AAAA queries with
189			   NOTIMP etc but some of them do.  */
190			if ((hp->rcode == NOERROR && ntohs (hp->ancount) != 0)
191			    || (hp2->rcode == NOERROR
192				&& ntohs (hp2->ancount) != 0))
193				goto success;
194			/* FALLTHROUGH */
195		case REFUSED:
196		default:
197			RES_SET_H_ERRNO(statp, NO_RECOVERY);
198			break;
199		}
200		return (-1);
201	}
202 success:
203	return (n);
204}
205libc_hidden_def (__res_context_query)
  • __res_context_send发送dns请求包
  1// resolv/res_send.c
  2int
  3__res_context_send (struct resolv_context *ctx,
  4		    const unsigned char *buf, int buflen,
  5		    const unsigned char *buf2, int buflen2,
  6		    unsigned char *ans, int anssiz,
  7		    unsigned char **ansp, unsigned char **ansp2,
  8		    int *nansp2, int *resplen2, int *ansp2_malloced)
  9{
 10	struct __res_state *statp = ctx->resp;
 11	int gotsomewhere, terrno, try, v_circuit, resplen;
 12	/* On some architectures send_vc is inlined and the compiler might emit
 13	   a warning indicating 'resplen' may be used uninitialized.  Note that
 14	   the warning belongs to resplen in send_vc which is used as return
 15	   value!  There the maybe-uninitialized warning is already ignored as
 16	   it is a false-positive - see comment in send_vc.
 17	   Here the variable n is set to the return value of send_vc.
 18	   See below.  */
 19	DIAG_PUSH_NEEDS_COMMENT;
 20	DIAG_IGNORE_NEEDS_COMMENT (9, "-Wmaybe-uninitialized");
 21	int n;
 22	DIAG_POP_NEEDS_COMMENT;
 23
 24	if (statp->nscount == 0) {
 25		__set_errno (ESRCH);
 26		return (-1);
 27	}
 28
 29	if (anssiz < (buf2 == NULL ? 1 : 2) * HFIXEDSZ) {
 30		__set_errno (EINVAL);
 31		return (-1);
 32	}
 33
 34	v_circuit = ((statp->options & RES_USEVC)
 35		     || buflen > PACKETSZ
 36		     || buflen2 > PACKETSZ);
 37	gotsomewhere = 0;
 38	terrno = ETIMEDOUT;
 39
 40	/*
 41	 * If the ns_addr_list in the resolver context has changed, then
 42	 * invalidate our cached copy and the associated timing data.
 43	 */
 44	if (EXT(statp).nscount != 0) {
 45		int needclose = 0;
 46
 47		if (EXT(statp).nscount != statp->nscount)
 48			needclose++;
 49		else
 50			for (unsigned int ns = 0; ns < statp->nscount; ns++) {
 51				if (statp->nsaddr_list[ns].sin_family != 0
 52				    && !sock_eq((struct sockaddr_in6 *)
 53						&statp->nsaddr_list[ns],
 54						EXT(statp).nsaddrs[ns]))
 55				{
 56					needclose++;
 57					break;
 58				}
 59			}
 60		if (needclose) {
 61			__res_iclose(statp, false);
 62			EXT(statp).nscount = 0;
 63		}
 64	}
 65
 66	/*
 67	 * Maybe initialize our private copy of the ns_addr_list.
 68	 */
 69	if (EXT(statp).nscount == 0) {
 70		for (unsigned int ns = 0; ns < statp->nscount; ns++) {
 71			EXT(statp).nssocks[ns] = -1;
 72			if (statp->nsaddr_list[ns].sin_family == 0)
 73				continue;
 74			if (EXT(statp).nsaddrs[ns] == NULL)
 75				EXT(statp).nsaddrs[ns] =
 76				    malloc(sizeof (struct sockaddr_in6));
 77			if (EXT(statp).nsaddrs[ns] != NULL)
 78				memset (mempcpy(EXT(statp).nsaddrs[ns],
 79						&statp->nsaddr_list[ns],
 80						sizeof (struct sockaddr_in)),
 81					'\0',
 82					sizeof (struct sockaddr_in6)
 83					- sizeof (struct sockaddr_in));
 84			else
 85				return -1;
 86		}
 87		EXT(statp).nscount = statp->nscount;
 88	}
 89
 90	/* Name server index offset.  Used to implement
 91	   RES_ROTATE.  */
 92	unsigned int ns_offset = nameserver_offset (statp);
 93
 94	/*
 95	 * Send request, RETRY times, or until successful.
 96	 */
 97    // 发送请求,重试到成功或超出重试次数为止
 98    // 根据ns为索引,一个一个尝试,超时就尝试下一个
 99	for (try = 0; try < statp->retry; try++) {
100	    for (unsigned ns_shift = 0; ns_shift < statp->nscount; ns_shift++)
101	    {
102		/* The actual name server index.  This implements
103		   RES_ROTATE.  */
104		unsigned int ns = ns_shift + ns_offset;
105		if (ns >= statp->nscount)
106			ns -= statp->nscount;
107
108	    same_ns:
109		if (__glibc_unlikely (v_circuit))       {
110            // tcpdns,内部会创建socket放到statp->_vcsock中
111			/* Use VC; at most one attempt per server. */
112			try = statp->retry;
113			n = send_vc(statp, buf, buflen, buf2, buflen2,
114				    &ans, &anssiz, &terrno,
115				    ns, ansp, ansp2, nansp2, resplen2,
116				    ansp2_malloced);
117			if (n < 0)
118				return (-1);
119			/* See comment at the declaration of n.  */
120			DIAG_PUSH_NEEDS_COMMENT;
121			DIAG_IGNORE_NEEDS_COMMENT (9, "-Wmaybe-uninitialized");
122			if (n == 0 && (buf2 == NULL || *resplen2 == 0))
123				goto next_ns;
124			DIAG_POP_NEEDS_COMMENT;
125		} else {
126            // udpdns,内部会创建socket放到EXT(statp).nssocks中
127			/* Use datagrams. */
128			n = send_dg(statp, buf, buflen, buf2, buflen2,
129				    &ans, &anssiz, &terrno,
130				    ns, &v_circuit, &gotsomewhere, ansp,
131				    ansp2, nansp2, resplen2, ansp2_malloced);
132			if (n < 0)
133				return (-1);
134			// n为0且不需要buf2或resplen2为0,尝试下一个
135			if (n == 0 && (buf2 == NULL || *resplen2 == 0))
136				goto next_ns;
137			// v_circuit就使用同一个ns继续发,也就是tcp重新发一次
138			if (v_circuit)
139			  // XXX Check whether both requests failed or
140			  // XXX whether one has been answered successfully
141				goto same_ns;
142		}
143
144		resplen = n;
145
146		/* See comment at the declaration of n.  Note: resplen = n;  */
147		DIAG_PUSH_NEEDS_COMMENT;
148		DIAG_IGNORE_NEEDS_COMMENT (9, "-Wmaybe-uninitialized");
149		/* Mask the AD bit in both responses unless it is
150		   marked trusted.  */
151		if (resplen > HFIXEDSZ)
152		  {
153		    if (ansp != NULL)
154		      mask_ad_bit (ctx, *ansp);
155		    else
156		      mask_ad_bit (ctx, ans);
157		  }
158		DIAG_POP_NEEDS_COMMENT;
159		if (resplen2 != NULL && *resplen2 > HFIXEDSZ)
160		  mask_ad_bit (ctx, *ansp2);
161
162		/*
163		 * If we have temporarily opened a virtual circuit,
164		 * or if we haven't been asked to keep a socket open,
165		 * close the socket.
166		 */
167		if ((v_circuit && (statp->options & RES_USEVC) == 0) ||
168		    (statp->options & RES_STAYOPEN) == 0) {
169			__res_iclose(statp, false);
170		}
171		return (resplen);
172 next_ns: ;
173	   } /*foreach ns*/
174	} /*foreach retry*/
175	__res_iclose(statp, false);
176	if (!v_circuit) {
177		if (!gotsomewhere)
178			__set_errno (ECONNREFUSED);	/* no nameservers found */
179		else
180			__set_errno (ETIMEDOUT);	/* no answer obtained */
181	} else
182		__set_errno (terrno);
183	return (-1);
184}
185libc_hidden_def (__res_context_send)

2.1. 发送udp包给一个dns服务器 #

  • 调用send_dg
  • 从这里可以看到什么情况下使用下一个dns服务器,什么情况下使用tcp重新发送
  1// /resolv/res_send.c
  2/* The send_dg function is responsible for sending a DNS query over UDP
  3   to the nameserver numbered NS from the res_state STATP i.e.
  4   EXT(statp).nssocks[ns].  The function supports IPv4 and IPv6 queries
  5   along with the ability to send the query in parallel for both stacks
  6   (default) or serially (RES_SINGLKUP).  It also supports serial lookup
  7   with a close and reopen of the socket used to talk to the server
  8   (RES_SNGLKUPREOP) to work around broken name servers.
  9
 10   The query stored in BUF of BUFLEN length is sent first followed by
 11   the query stored in BUF2 of BUFLEN2 length.  Queries are sent
 12   in parallel (default) or serially (RES_SINGLKUP or RES_SNGLKUPREOP).
 13
 14   Answers to the query are stored firstly in *ANSP up to a max of
 15   *ANSSIZP bytes.  If more than *ANSSIZP bytes are needed and ANSCP
 16   is non-NULL (to indicate that modifying the answer buffer is allowed)
 17   then malloc is used to allocate a new response buffer and ANSCP and
 18   ANSP will both point to the new buffer.  If more than *ANSSIZP bytes
 19   are needed but ANSCP is NULL, then as much of the response as
 20   possible is read into the buffer, but the results will be truncated.
 21   When truncation happens because of a small answer buffer the DNS
 22   packets header field TC will bet set to 1, indicating a truncated
 23   message, while the rest of the UDP packet is discarded.
 24
 25   Answers to the query are stored secondly in *ANSP2 up to a max of
 26   *ANSSIZP2 bytes, with the actual response length stored in
 27   *RESPLEN2.  If more than *ANSSIZP bytes are needed and ANSP2
 28   is non-NULL (required for a second query) then malloc is used to
 29   allocate a new response buffer, *ANSSIZP2 is set to the new buffer
 30   size and *ANSP2_MALLOCED is set to 1.
 31
 32   The ANSP2_MALLOCED argument will eventually be removed as the
 33   change in buffer pointer can be used to detect the buffer has
 34   changed and that the caller should use free on the new buffer.
 35
 36   Note that the answers may arrive in any order from the server and
 37   therefore the first and second answer buffers may not correspond to
 38   the first and second queries.
 39
 40   It is not supported to call this function with a non-NULL ANSP2
 41   but a NULL ANSCP.  Put another way, you can call send_vc with a
 42   single unmodifiable buffer or two modifiable buffers, but no other
 43   combination is supported.
 44
 45   It is the caller's responsibility to free the malloc allocated
 46   buffers by detecting that the pointers have changed from their
 47   original values i.e. *ANSCP or *ANSP2 has changed.
 48
 49   If an answer is truncated because of UDP datagram DNS limits then
 50   *V_CIRCUIT is set to 1 and the return value non-zero to indicate to
 51   the caller to retry with TCP.  The value *GOTSOMEWHERE is set to 1
 52   if any progress was made reading a response from the nameserver and
 53   is used by the caller to distinguish between ECONNREFUSED and
 54   ETIMEDOUT (the latter if *GOTSOMEWHERE is 1).
 55
 56   If errors are encountered then *TERRNO is set to an appropriate
 57   errno value and a zero result is returned for a recoverable error,
 58   and a less-than zero result is returned for a non-recoverable error.
 59
 60   If no errors are encountered then *TERRNO is left unmodified and
 61   a the length of the first response in bytes is returned.  */
 62/**
 63 * @brief 发送udp包给某个dns服务器
 64 *
 65 * @param statp dns配置上下文,dns服务器的socket存在EXT(statp).nssocks[ns]中
 66 * @param buf 要发送的数据包1,A和AAAA都需要时存放A记录,否则要啥就存啥
 67 * @param buflen
 68 * @param buf2 要发送的数据包2,A和AAAA都需要时存放AAAA记录,否则为空
 69 * @param buflen2
 70 * @param ansp
 71 * @param anssizp
 72 * @param terrno 当有错误时,errno存到这里返回
 73 * @param ns dns服务器的索引号
 74 * @param v_circuit
 75 * @param gotsomewhere
 76 * @param anscp
 77 * @param ansp2
 78 * @param anssizp2
 79 * @param resplen2
 80 * @param ansp2_malloced
 81 * @return int 如果返回1,要使用tcp重新发包
 82				如果返回0,当buf2为空或resplen2为0则尝试下一个dns服务器
 83				如果返回大于0的其他数,有结果返回
 84				如果返回小于0,不可恢复错误
 85 */
 86static int send_dg(res_state statp, const u_char *buf, int buflen, const u_char *buf2, int buflen2, u_char **ansp,
 87                   int *anssizp, int *terrno, int ns, int *v_circuit, int *gotsomewhere, u_char **anscp, u_char **ansp2,
 88                   int *anssizp2, int *resplen2, int *ansp2_malloced) {
 89    const UHEADER *hp = (UHEADER *)buf;
 90    const UHEADER *hp2 = (UHEADER *)buf2;
 91    struct timespec now, timeout, finish;
 92    struct pollfd pfd[1];
 93    int ptimeout;
 94    struct sockaddr_in6 from;
 95    int resplen = 0;
 96    int n;
 97
 98    /*
 99     * Compute time for the total operation.
100     */
101    // 计算超时时间,retrans默认为5,可以配置resolv.conf "options timeout:10"来修改
102    int seconds = (statp->retrans << ns);
103    // nscount为配置的dns服务器数量,一般一个网卡一个到两个
104    // 当ns为0时,seconds为5
105    // 当ns为1时,当nscount为2时,seconds为5;当nscount为3时,seconds为3;当nscount为4时,seconds为2
106    // 当ns为2时,当nscount为2时,seconds为10;当nscount为3时,seconds为6;当nscount为4时,seconds为5
107    if (ns > 0) seconds /= statp->nscount;
108    if (seconds <= 0) seconds = 1;
109    bool single_request_reopen = (statp->options & RES_SNGLKUPREOP) != 0;
110    bool single_request = (((statp->options & RES_SNGLKUP) != 0) | single_request_reopen);
111    int save_gotsomewhere = *gotsomewhere;
112
113    int retval;
114retry_reopen:
115    // 重新建立socket
116    retval = reopen(statp, terrno, ns);
117    if (retval <= 0) {
118        if (resplen2 != NULL) *resplen2 = 0;
119        return retval;
120    }
121retry:
122    // 根据上面的seconds计算超时,存到finish中
123    evNowTime(&now);
124    evConsTime(&timeout, seconds, 0);
125    evAddTime(&finish, &now, &timeout);
126    int need_recompute = 0; // 代表是否需要计算超时时间
127    int nwritten = 0;
128    int recvresp1 = 0;  // buf是否已经收到了响应
129    /* Skip the second response if there is no second query.
130       To do that we mark the second response as received.  */
131    int recvresp2 = buf2 == NULL; // buf2是否已经收到了响应,如果没有buf2,默认为1
132    pfd[0].fd = EXT(statp).nssocks[ns];
133    pfd[0].events = POLLOUT;
134wait:
135    if (need_recompute) {
136        // 需要计算超时,算一下
137    recompute_resend:
138        evNowTime(&now);
139        if (evCmpTime(finish, now) <= 0) {
140        poll_err_out:
141            // 超时了,返回0,resplen2置0,尝试下一个dns服务器
142            return close_and_return_error(statp, resplen2);
143        }
144        evSubTime(&timeout, &finish, &now);
145        need_recompute = 0;
146    }
147    /* Convert struct timespec in milliseconds.  */
148    ptimeout = timeout.tv_sec * 1000 + timeout.tv_nsec / 1000000;
149
150	// 使用poll进行收包
151    n = 0;
152    if (nwritten == 0) n = __poll(pfd, 1, 0);
153    if (__glibc_unlikely(n == 0)) {
154        n = __poll(pfd, 1, ptimeout);
155        need_recompute = 1;
156    }
157    if (n == 0) {
158        if (resplen > 1 && (recvresp1 || (buf2 != NULL && recvresp2))) {
159            /* There are quite a few broken name servers out
160               there which don't handle two outstanding
161               requests from the same source.  There are also
162               broken firewall settings.  If we time out after
163               having received one answer switch to the mode
164               where we send the second request only once we
165               have received the first answer.  */
166            if (!single_request) {
167                statp->options |= RES_SNGLKUP;
168                single_request = true;
169                *gotsomewhere = save_gotsomewhere;
170                goto retry;
171            } else if (!single_request_reopen) {
172                statp->options |= RES_SNGLKUPREOP;
173                single_request_reopen = true;
174                *gotsomewhere = save_gotsomewhere;
175                __res_iclose(statp, false);
176                goto retry_reopen;
177            }
178
179            *resplen2 = 1;
180            return resplen;
181        }
182
183        *gotsomewhere = 1;
184        if (resplen2 != NULL) *resplen2 = 0;
185        return 0;
186    }
187    if (n < 0) {
188        if (errno == EINTR) goto recompute_resend;
189
190        goto poll_err_out;
191    }
192    __set_errno(0);
193    if (pfd[0].revents & POLLOUT) {
194        // POLLOUT代表可以发包了,下面是发送数据包的逻辑,调用sendmsg
195#ifndef __ASSUME_SENDMMSG
196        static int have_sendmmsg;
197#else
198#define have_sendmmsg 1
199#endif
200        if (have_sendmmsg >= 0 && nwritten == 0 && buf2 != NULL && !single_request) {
201            struct iovec iov = {.iov_base = (void *)buf, .iov_len = buflen};
202            struct iovec iov2 = {.iov_base = (void *)buf2, .iov_len = buflen2};
203            struct mmsghdr reqs[2] = {
204                {
205                    .msg_hdr =
206                        {
207                            .msg_iov = &iov,
208                            .msg_iovlen = 1,
209                        },
210                },
211                {.msg_hdr =
212                     {
213                         .msg_iov = &iov2,
214                         .msg_iovlen = 1,
215                     }},
216            };
217
218            int ndg = __sendmmsg(pfd[0].fd, reqs, 2, MSG_NOSIGNAL);
219            if (__glibc_likely(ndg == 2)) {
220                if (reqs[0].msg_len != buflen || reqs[1].msg_len != buflen2) goto fail_sendmmsg;
221
222                pfd[0].events = POLLIN;
223                nwritten += 2;
224            } else if (ndg == 1 && reqs[0].msg_len == buflen)
225                goto just_one;
226            else if (ndg < 0 && (errno == EINTR || errno == EAGAIN))
227                goto recompute_resend;
228            else {
229#ifndef __ASSUME_SENDMMSG
230                if (__glibc_unlikely(have_sendmmsg == 0)) {
231                    if (ndg < 0 && errno == ENOSYS) {
232                        have_sendmmsg = -1;
233                        goto try_send;
234                    }
235                    have_sendmmsg = 1;
236                }
237#endif
238
239            fail_sendmmsg:
240                return close_and_return_error(statp, resplen2);
241            }
242        } else {
243            ssize_t sr;
244#ifndef __ASSUME_SENDMMSG
245        try_send:
246#endif
247            if (nwritten != 0)
248                sr = __send(pfd[0].fd, buf2, buflen2, MSG_NOSIGNAL);
249            else
250                sr = __send(pfd[0].fd, buf, buflen, MSG_NOSIGNAL);
251
252            if (sr != (nwritten != 0 ? buflen2 : buflen)) {
253                if (errno == EINTR || errno == EAGAIN) goto recompute_resend;
254                return close_and_return_error(statp, resplen2);
255            }
256        just_one:
257            if (nwritten != 0 || buf2 == NULL || single_request)
258                pfd[0].events = POLLIN;
259            else
260                pfd[0].events = POLLIN | POLLOUT;
261            ++nwritten;
262        }
263        goto wait;
264    } else if (pfd[0].revents & POLLIN) {
265        // pollin代表有包收到了,这里是收包逻辑
266        int *thisanssizp;
267        u_char **thisansp;
268        int *thisresplenp;  // 当前收包长度
269
270        // 都没收到响应或buf2为空,当前收到包的长度放到resplen
271        // 有一个收到响应,且buf2不为空,当前收包长度放到resp2len
272        if ((recvresp1 | recvresp2) == 0 || buf2 == NULL) {
273            /* We have not received any responses
274               yet or we only have one response to
275               receive.  */
276            thisanssizp = anssizp;
277            thisansp = anscp ?: ansp;
278            assert(anscp != NULL || ansp2 == NULL);
279            thisresplenp = &resplen;
280        } else {
281            thisanssizp = anssizp2;
282            thisansp = ansp2;
283            thisresplenp = resplen2;
284        }
285
286        if (*thisanssizp < MAXPACKET
287            /* If the current buffer is not the the static
288               user-supplied buffer then we can reallocate
289               it.  */
290            && (thisansp != NULL && thisansp != ansp)
291#ifdef FIONREAD
292            /* Is the size too small?  */
293            && (__ioctl(pfd[0].fd, FIONREAD, thisresplenp) < 0 || *thisanssizp < *thisresplenp)
294#endif
295        ) {
296            /* Always allocate MAXPACKET, callers expect
297               this specific size.  */
298            u_char *newp = malloc(MAXPACKET);
299            if (newp != NULL) {
300                *thisanssizp = MAXPACKET;
301                *thisansp = newp;
302                if (thisansp == ansp2) *ansp2_malloced = 1;
303            }
304        }
305        /* We could end up with truncation if anscp was NULL
306           (not allowed to change caller's buffer) and the
307           response buffer size is too small.  This isn't a
308           reliable way to detect truncation because the ioctl
309           may be an inaccurate report of the UDP message size.
310           Therefore we use this only to issue debug output.
311           To do truncation accurately with UDP we need
312           MSG_TRUNC which is only available on Linux.  We
313           can abstract out the Linux-specific feature in the
314           future to detect truncation.  */
315        UHEADER *anhp = (UHEADER *)*thisansp;   // anhp为收到的数据包
316        socklen_t fromlen = sizeof(struct sockaddr_in6);
317        assert(sizeof(from) <= fromlen);
318        *thisresplenp = __recvfrom(pfd[0].fd, (char *)*thisansp, *thisanssizp, 0, (struct sockaddr *)&from, &fromlen);
319        if (__glibc_unlikely(*thisresplenp <= 0)) {
320            if (errno == EINTR || errno == EAGAIN) {
321                // errno为intr或eagin,重新收包,回到wait判断一下超时时间是否到达
322                need_recompute = 1;
323                goto wait;
324            }
325            // 否则收包失败后,返回0,resplen2置0,尝试下一个dns服务器
326            return close_and_return_error(statp, resplen2);
327        }
328        *gotsomewhere = 1;
329        if (__glibc_unlikely(*thisresplenp < HFIXEDSZ)) {
330            // 包长度小于dns固定头部长度12,返回0,resplen2置0,尝试下一个dns服务器
331            /*
332             * Undersized message.
333             */
334            *terrno = EMSGSIZE;
335            return close_and_return_error(statp, resplen2);
336        }
337
338        /* Check for the correct header layout and a matching
339           question.  */
340        int matching_query = 0; /* Default to no matching query.  */
341        // 对比queryid是否匹配,请求的两个id都匹配一下是否对应
342        // 这里说明A记录和AAAA记录的id不能一样
343        if (!recvresp1 && anhp->id == hp->id &&
344            __libc_res_queriesmatch(buf, buf + buflen, *thisansp, *thisansp + *thisanssizp))
345            matching_query = 1;
346        if (!recvresp2 && anhp->id == hp2->id &&
347            __libc_res_queriesmatch(buf2, buf2 + buflen2, *thisansp, *thisansp + *thisanssizp))
348            matching_query = 2;
349        if (matching_query == 0)
350        /* Spurious UDP packet.  Drop it and continue
351           waiting.  */
352        {
353            // 没匹配到,丢包继续等,顺便算一下超时时间
354            need_recompute = 1;
355            goto wait;
356        }
357
358        if (anhp->rcode == SERVFAIL || anhp->rcode == NOTIMP || anhp->rcode == REFUSED) {
359            // 上面三个响应错误要使用下一个dns服务器继续尝试
360        next_ns:
361            // 使用下一个dns服务器继续试的处理
362            if (recvresp1 || (buf2 != NULL && recvresp2)) {
363                // buf已经收到了响应或者buf2已经收到了响应(非这一次)
364                // resplen2置0,返回第一次响应长度
365				// 第一次也是需要下一个dns服务器的错误,则返回0,使用下一个dns服务器继续请求
366				// 第一次不需要下一个dns服务器,就是第一次的长度,不会尝试下一个dns服务器
367                *resplen2 = 0;
368                return resplen;
369            }
370            if (buf2 != NULL) {
371                // 这里就是首次收到响应就有错误且有buf2,等一下两个结果都返回
372                // 把第一次的长度置0,哪个响应哪个为1,然后继续等
373                /* No data from the first reply.  */
374                resplen = 0;
375                /* We are waiting for a possible second reply.  */
376                if (matching_query == 1)
377                    recvresp1 = 1;
378                else
379                    recvresp2 = 1;
380
381                goto wait;
382            }
383
384            /* don't retry if called from dig */
385            // dig会设置pfcode的值,这里判断为0则代表不是dig
386            // 正常解析就直接关闭socket然后返回0
387            if (!statp->pfcode) return close_and_return_error(statp, resplen2);
388            __res_iclose(statp, false);
389        }
390        if (anhp->rcode == NOERROR && anhp->ancount == 0 && anhp->aa == 0 && anhp->ra == 0 && anhp->arcount == 0) {
391            // 没错误,但是不是递归查询且结果为空,尝试下一个dns服务器
392            goto next_ns;
393        }
394        if (!(statp->options & RES_IGNTC) && anhp->tc) {
395            // 非忽略tc的配置下,当响应中有tc字段,设置v_circuit为1,返回长度1
396            // 虽成功但需要使用tcp重新发包,并且只给resplen,resplen2设置为0
397            /*
398             * To get the rest of answer,
399             * use TCP with same server.
400             */
401            *v_circuit = 1;
402            __res_iclose(statp, false);
403            // XXX if we have received one reply we could
404            // XXX use it and not repeat it over TCP...
405            if (resplen2 != NULL) *resplen2 = 0;
406            return (1);
407        }
408        /* Mark which reply we received.  */
409        if (matching_query == 1)
410            recvresp1 = 1;
411        else
412            recvresp2 = 1;
413        /* Repeat waiting if we have a second answer to arrive.  */
414        if ((recvresp1 & recvresp2) == 0) {
415            // 还有一个还没收到响应,继续等
416            if (single_request) {
417                pfd[0].events = POLLOUT;
418                if (single_request_reopen) {
419                    __res_iclose(statp, false);
420                    retval = reopen(statp, terrno, ns);
421                    if (retval <= 0) {
422                        if (resplen2 != NULL) *resplen2 = 0;
423                        return retval;
424                    }
425                    pfd[0].fd = EXT(statp).nssocks[ns];
426                }
427            }
428            goto wait;
429        }
430        /* All is well.  We have received both responses (if
431           two responses were requested).  */
432        return (resplen);
433    } else if (pfd[0].revents & (POLLERR | POLLHUP | POLLNVAL))
434        /* Something went wrong.  We can stop trying.  */
435        return close_and_return_error(statp, resplen2);
436    else {
437        /* poll should not have returned > 0 in this case.  */
438        abort();
439    }
440}