VirtualBox

source: vbox/trunk/src/libs/curl-7.83.1/lib/urlapi.c@ 98341

最後變更 在這個檔案從98341是 95312,由 vboxsync 提交於 3 年 前

libs/{curl,libxml2}: OSE export fixes, bugref:8515

  • 屬性 svn:eol-style 設為 native
檔案大小: 44.3 KB
 
1/***************************************************************************
2 * _ _ ____ _
3 * Project ___| | | | _ \| |
4 * / __| | | | |_) | |
5 * | (__| |_| | _ <| |___
6 * \___|\___/|_| \_\_____|
7 *
8 * Copyright (C) 1998 - 2022, Daniel Stenberg, <[email protected]>, et al.
9 *
10 * This software is licensed as described in the file COPYING, which
11 * you should have received as part of this distribution. The terms
12 * are also available at https://curl.se/docs/copyright.html.
13 *
14 * You may opt to use, copy, modify, merge, publish, distribute and/or sell
15 * copies of the Software, and permit persons to whom the Software is
16 * furnished to do so, under the terms of the COPYING file.
17 *
18 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
19 * KIND, either express or implied.
20 *
21 ***************************************************************************/
22
23#include "curl_setup.h"
24
25#include "urldata.h"
26#include "urlapi-int.h"
27#include "strcase.h"
28#include "dotdot.h"
29#include "url.h"
30#include "escape.h"
31#include "curl_ctype.h"
32#include "inet_pton.h"
33#include "inet_ntop.h"
34
35/* The last 3 #include files should be in this order */
36#include "curl_printf.h"
37#include "curl_memory.h"
38#include "memdebug.h"
39
40 /* MSDOS/Windows style drive prefix, eg c: in c:foo */
41#define STARTS_WITH_DRIVE_PREFIX(str) \
42 ((('a' <= str[0] && str[0] <= 'z') || \
43 ('A' <= str[0] && str[0] <= 'Z')) && \
44 (str[1] == ':'))
45
46 /* MSDOS/Windows style drive prefix, optionally with
47 * a '|' instead of ':', followed by a slash or NUL */
48#define STARTS_WITH_URL_DRIVE_PREFIX(str) \
49 ((('a' <= (str)[0] && (str)[0] <= 'z') || \
50 ('A' <= (str)[0] && (str)[0] <= 'Z')) && \
51 ((str)[1] == ':' || (str)[1] == '|') && \
52 ((str)[2] == '/' || (str)[2] == '\\' || (str)[2] == 0))
53
54/* scheme is not URL encoded, the longest libcurl supported ones are... */
55#define MAX_SCHEME_LEN 40
56
57/* Internal representation of CURLU. Point to URL-encoded strings. */
58struct Curl_URL {
59 char *scheme;
60 char *user;
61 char *password;
62 char *options; /* IMAP only? */
63 char *host;
64 char *zoneid; /* for numerical IPv6 addresses */
65 char *port;
66 char *path;
67 char *query;
68 char *fragment;
69
70 char *scratch; /* temporary scratch area */
71 char *temppath; /* temporary path pointer */
72 long portnum; /* the numerical version */
73};
74
75#define DEFAULT_SCHEME "https"
76
77static void free_urlhandle(struct Curl_URL *u)
78{
79 free(u->scheme);
80 free(u->user);
81 free(u->password);
82 free(u->options);
83 free(u->host);
84 free(u->zoneid);
85 free(u->port);
86 free(u->path);
87 free(u->query);
88 free(u->fragment);
89 free(u->scratch);
90 free(u->temppath);
91}
92
93/*
94 * Find the separator at the end of the host name, or the '?' in cases like
95 * http://www.url.com?id=2380
96 */
97static const char *find_host_sep(const char *url)
98{
99 const char *sep;
100 const char *query;
101
102 /* Find the start of the hostname */
103 sep = strstr(url, "//");
104 if(!sep)
105 sep = url;
106 else
107 sep += 2;
108
109 query = strchr(sep, '?');
110 sep = strchr(sep, '/');
111
112 if(!sep)
113 sep = url + strlen(url);
114
115 if(!query)
116 query = url + strlen(url);
117
118 return sep < query ? sep : query;
119}
120
121/*
122 * Decide in an encoding-independent manner whether a character in an
123 * URL must be escaped. The same criterion must be used in strlen_url()
124 * and strcpy_url().
125 */
126static bool urlchar_needs_escaping(int c)
127{
128 return !(ISCNTRL(c) || ISSPACE(c) || ISGRAPH(c));
129}
130
131/*
132 * strlen_url() returns the length of the given URL if the spaces within the
133 * URL were properly URL encoded.
134 * URL encoding should be skipped for host names, otherwise IDN resolution
135 * will fail.
136 */
137static size_t strlen_url(const char *url, bool relative)
138{
139 const unsigned char *ptr;
140 size_t newlen = 0;
141 bool left = TRUE; /* left side of the ? */
142 const unsigned char *host_sep = (const unsigned char *) url;
143
144 if(!relative)
145 host_sep = (const unsigned char *) find_host_sep(url);
146
147 for(ptr = (unsigned char *)url; *ptr; ptr++) {
148
149 if(ptr < host_sep) {
150 ++newlen;
151 continue;
152 }
153
154 if(*ptr == ' ') {
155 if(left)
156 newlen += 3;
157 else
158 newlen++;
159 continue;
160 }
161
162 if (*ptr == '?')
163 left = FALSE;
164
165 if(urlchar_needs_escaping(*ptr))
166 newlen += 2;
167
168 newlen++;
169 }
170
171 return newlen;
172}
173
174/* strcpy_url() copies a url to a output buffer and URL-encodes the spaces in
175 * the source URL accordingly.
176 * URL encoding should be skipped for host names, otherwise IDN resolution
177 * will fail.
178 */
179static void strcpy_url(char *output, const char *url, bool relative)
180{
181 /* we must add this with whitespace-replacing */
182 bool left = TRUE;
183 const unsigned char *iptr;
184 char *optr = output;
185 const unsigned char *host_sep = (const unsigned char *) url;
186
187 if(!relative)
188 host_sep = (const unsigned char *) find_host_sep(url);
189
190 for(iptr = (unsigned char *)url; /* read from here */
191 *iptr; /* until zero byte */
192 iptr++) {
193
194 if(iptr < host_sep) {
195 *optr++ = *iptr;
196 continue;
197 }
198
199 if(*iptr == ' ') {
200 if(left) {
201 *optr++='%'; /* add a '%' */
202 *optr++='2'; /* add a '2' */
203 *optr++='0'; /* add a '0' */
204 }
205 else
206 *optr++='+'; /* add a '+' here */
207 continue;
208 }
209
210 if(*iptr == '?')
211 left = FALSE;
212
213 if(urlchar_needs_escaping(*iptr)) {
214 msnprintf(optr, 4, "%%%02x", *iptr);
215 optr += 3;
216 }
217 else
218 *optr++ = *iptr;
219 }
220 *optr = 0; /* null-terminate output buffer */
221
222}
223
224/*
225 * Returns true if the given URL is absolute (as opposed to relative). Returns
226 * the scheme in the buffer if TRUE and 'buf' is non-NULL. The buflen must
227 * be larger than MAX_SCHEME_LEN if buf is set.
228 */
229bool Curl_is_absolute_url(const char *url, char *buf, size_t buflen)
230{
231 int i;
232 DEBUGASSERT(!buf || (buflen > MAX_SCHEME_LEN));
233 (void)buflen; /* only used in debug-builds */
234 if(buf)
235 buf[0] = 0; /* always leave a defined value in buf */
236#ifdef WIN32
237 if(STARTS_WITH_DRIVE_PREFIX(url))
238 return FALSE;
239#endif
240 for(i = 0; i < MAX_SCHEME_LEN; ++i) {
241 char s = url[i];
242 if(s && (ISALNUM(s) || (s == '+') || (s == '-') || (s == '.') )) {
243 /* RFC 3986 3.1 explains:
244 scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
245 */
246 }
247 else {
248 break;
249 }
250 }
251 if(i && (url[i] == ':') && (url[i + 1] == '/')) {
252 if(buf) {
253 buf[i] = 0;
254 while(i--) {
255 buf[i] = (char)TOLOWER(url[i]);
256 }
257 }
258 return TRUE;
259 }
260 return FALSE;
261}
262
263/*
264 * Concatenate a relative URL to a base URL making it absolute.
265 * URL-encodes any spaces.
266 * The returned pointer must be freed by the caller unless NULL
267 * (returns NULL on out of memory).
268 */
269static char *concat_url(const char *base, const char *relurl)
270{
271 /***
272 TRY to append this new path to the old URL
273 to the right of the host part. Oh crap, this is doomed to cause
274 problems in the future...
275 */
276 char *newest;
277 char *protsep;
278 char *pathsep;
279 size_t newlen;
280 bool host_changed = FALSE;
281
282 const char *useurl = relurl;
283 size_t urllen;
284
285 /* we must make our own copy of the URL to play with, as it may
286 point to read-only data */
287 char *url_clone = strdup(base);
288
289 if(!url_clone)
290 return NULL; /* skip out of this NOW */
291
292 /* protsep points to the start of the host name */
293 protsep = strstr(url_clone, "//");
294 if(!protsep)
295 protsep = url_clone;
296 else
297 protsep += 2; /* pass the slashes */
298
299 if('/' != relurl[0]) {
300 int level = 0;
301
302 /* First we need to find out if there's a ?-letter in the URL,
303 and cut it and the right-side of that off */
304 pathsep = strchr(protsep, '?');
305 if(pathsep)
306 *pathsep = 0;
307
308 /* we have a relative path to append to the last slash if there's one
309 available, or if the new URL is just a query string (starts with a
310 '?') we append the new one at the end of the entire currently worked
311 out URL */
312 if(useurl[0] != '?') {
313 pathsep = strrchr(protsep, '/');
314 if(pathsep)
315 *pathsep = 0;
316 }
317
318 /* Check if there's any slash after the host name, and if so, remember
319 that position instead */
320 pathsep = strchr(protsep, '/');
321 if(pathsep)
322 protsep = pathsep + 1;
323 else
324 protsep = NULL;
325
326 /* now deal with one "./" or any amount of "../" in the newurl
327 and act accordingly */
328
329 if((useurl[0] == '.') && (useurl[1] == '/'))
330 useurl += 2; /* just skip the "./" */
331
332 while((useurl[0] == '.') &&
333 (useurl[1] == '.') &&
334 (useurl[2] == '/')) {
335 level++;
336 useurl += 3; /* pass the "../" */
337 }
338
339 if(protsep) {
340 while(level--) {
341 /* cut off one more level from the right of the original URL */
342 pathsep = strrchr(protsep, '/');
343 if(pathsep)
344 *pathsep = 0;
345 else {
346 *protsep = 0;
347 break;
348 }
349 }
350 }
351 }
352 else {
353 /* We got a new absolute path for this server */
354
355 if(relurl[1] == '/') {
356 /* the new URL starts with //, just keep the protocol part from the
357 original one */
358 *protsep = 0;
359 useurl = &relurl[2]; /* we keep the slashes from the original, so we
360 skip the new ones */
361 host_changed = TRUE;
362 }
363 else {
364 /* cut off the original URL from the first slash, or deal with URLs
365 without slash */
366 pathsep = strchr(protsep, '/');
367 if(pathsep) {
368 /* When people use badly formatted URLs, such as
369 "http://www.url.com?dir=/home/daniel" we must not use the first
370 slash, if there's a ?-letter before it! */
371 char *sep = strchr(protsep, '?');
372 if(sep && (sep < pathsep))
373 pathsep = sep;
374 *pathsep = 0;
375 }
376 else {
377 /* There was no slash. Now, since we might be operating on a badly
378 formatted URL, such as "http://www.url.com?id=2380" which doesn't
379 use a slash separator as it is supposed to, we need to check for a
380 ?-letter as well! */
381 pathsep = strchr(protsep, '?');
382 if(pathsep)
383 *pathsep = 0;
384 }
385 }
386 }
387
388 /* If the new part contains a space, this is a mighty stupid redirect
389 but we still make an effort to do "right". To the left of a '?'
390 letter we replace each space with %20 while it is replaced with '+'
391 on the right side of the '?' letter.
392 */
393 newlen = strlen_url(useurl, !host_changed);
394
395 urllen = strlen(url_clone);
396
397 newest = malloc(urllen + 1 + /* possible slash */
398 newlen + 1 /* zero byte */);
399
400 if(!newest) {
401 free(url_clone); /* don't leak this */
402 return NULL;
403 }
404
405 /* copy over the root url part */
406 memcpy(newest, url_clone, urllen);
407
408 /* check if we need to append a slash */
409 if(('/' == useurl[0]) || (protsep && !*protsep) || ('?' == useurl[0]))
410 ;
411 else
412 newest[urllen++]='/';
413
414 /* then append the new piece on the right side */
415 strcpy_url(&newest[urllen], useurl, !host_changed);
416
417 free(url_clone);
418
419 return newest;
420}
421
422/* scan for byte values < 31 or 127 */
423static bool junkscan(const char *part, unsigned int flags)
424{
425 if(part) {
426 static const char badbytes[]={
427 /* */ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
428 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
429 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
430 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
431 0x7f, 0x00 /* null-terminate */
432 };
433 size_t n = strlen(part);
434 size_t nfine = strcspn(part, badbytes);
435 if(nfine != n)
436 /* since we don't know which part is scanned, return a generic error
437 code */
438 return TRUE;
439 if(!(flags & CURLU_ALLOW_SPACE) && strchr(part, ' '))
440 return TRUE;
441 }
442 return FALSE;
443}
444
445/*
446 * parse_hostname_login()
447 *
448 * Parse the login details (user name, password and options) from the URL and
449 * strip them out of the host name
450 *
451 */
452static CURLUcode parse_hostname_login(struct Curl_URL *u,
453 char **hostname,
454 unsigned int flags)
455{
456 CURLUcode result = CURLUE_OK;
457 CURLcode ccode;
458 char *userp = NULL;
459 char *passwdp = NULL;
460 char *optionsp = NULL;
461 const struct Curl_handler *h = NULL;
462
463 /* At this point, we're hoping all the other special cases have
464 * been taken care of, so conn->host.name is at most
465 * [user[:password][;options]]@]hostname
466 *
467 * We need somewhere to put the embedded details, so do that first.
468 */
469
470 char *ptr = strchr(*hostname, '@');
471 char *login = *hostname;
472
473 if(!ptr)
474 goto out;
475
476 /* We will now try to extract the
477 * possible login information in a string like:
478 * ftp://user:[email protected]:8021/README */
479 *hostname = ++ptr;
480
481 /* if this is a known scheme, get some details */
482 if(u->scheme)
483 h = Curl_builtin_scheme(u->scheme);
484
485 /* We could use the login information in the URL so extract it. Only parse
486 options if the handler says we should. Note that 'h' might be NULL! */
487 ccode = Curl_parse_login_details(login, ptr - login - 1,
488 &userp, &passwdp,
489 (h && (h->flags & PROTOPT_URLOPTIONS)) ?
490 &optionsp:NULL);
491 if(ccode) {
492 result = CURLUE_BAD_LOGIN;
493 goto out;
494 }
495
496 if(userp) {
497 if(flags & CURLU_DISALLOW_USER) {
498 /* Option DISALLOW_USER is set and url contains username. */
499 result = CURLUE_USER_NOT_ALLOWED;
500 goto out;
501 }
502 if(junkscan(userp, flags)) {
503 result = CURLUE_BAD_USER;
504 goto out;
505 }
506 u->user = userp;
507 }
508
509 if(passwdp) {
510 if(junkscan(passwdp, flags)) {
511 result = CURLUE_BAD_PASSWORD;
512 goto out;
513 }
514 u->password = passwdp;
515 }
516
517 if(optionsp) {
518 if(junkscan(optionsp, flags)) {
519 result = CURLUE_BAD_LOGIN;
520 goto out;
521 }
522 u->options = optionsp;
523 }
524
525 return CURLUE_OK;
526 out:
527
528 free(userp);
529 free(passwdp);
530 free(optionsp);
531 u->user = NULL;
532 u->password = NULL;
533 u->options = NULL;
534
535 return result;
536}
537
538UNITTEST CURLUcode Curl_parse_port(struct Curl_URL *u, char *hostname,
539 bool has_scheme)
540{
541 char *portptr = NULL;
542 char endbracket;
543 int len;
544
545 /*
546 * Find the end of an IPv6 address, either on the ']' ending bracket or
547 * a percent-encoded zone index.
548 */
549 if(1 == sscanf(hostname, "[%*45[0123456789abcdefABCDEF:.]%c%n",
550 &endbracket, &len)) {
551 if(']' == endbracket)
552 portptr = &hostname[len];
553 else if('%' == endbracket) {
554 int zonelen = len;
555 if(1 == sscanf(hostname + zonelen, "%*[^]]%c%n", &endbracket, &len)) {
556 if(']' != endbracket)
557 return CURLUE_BAD_IPV6;
558 portptr = &hostname[--zonelen + len + 1];
559 }
560 else
561 return CURLUE_BAD_IPV6;
562 }
563 else
564 return CURLUE_BAD_IPV6;
565
566 /* this is a RFC2732-style specified IP-address */
567 if(portptr && *portptr) {
568 if(*portptr != ':')
569 return CURLUE_BAD_IPV6;
570 }
571 else
572 portptr = NULL;
573 }
574 else
575 portptr = strchr(hostname, ':');
576
577 if(portptr) {
578 char *rest;
579 long port;
580 char portbuf[7];
581
582 /* Browser behavior adaptation. If there's a colon with no digits after,
583 just cut off the name there which makes us ignore the colon and just
584 use the default port. Firefox, Chrome and Safari all do that.
585
586 Don't do it if the URL has no scheme, to make something that looks like
587 a scheme not work!
588 */
589 if(!portptr[1]) {
590 *portptr = '\0';
591 return has_scheme ? CURLUE_OK : CURLUE_BAD_PORT_NUMBER;
592 }
593
594 if(!ISDIGIT(portptr[1]))
595 return CURLUE_BAD_PORT_NUMBER;
596
597 port = strtol(portptr + 1, &rest, 10); /* Port number must be decimal */
598
599 if(port > 0xffff)
600 return CURLUE_BAD_PORT_NUMBER;
601
602 if(rest[0])
603 return CURLUE_BAD_PORT_NUMBER;
604
605 *portptr++ = '\0'; /* cut off the name there */
606 *rest = 0;
607 /* generate a new port number string to get rid of leading zeroes etc */
608 msnprintf(portbuf, sizeof(portbuf), "%ld", port);
609 u->portnum = port;
610 u->port = strdup(portbuf);
611 if(!u->port)
612 return CURLUE_OUT_OF_MEMORY;
613 }
614
615 return CURLUE_OK;
616}
617
618static CURLUcode hostname_check(struct Curl_URL *u, char *hostname)
619{
620 size_t len;
621 size_t hlen = strlen(hostname);
622
623 if(hostname[0] == '[') {
624 const char *l = "0123456789abcdefABCDEF:.";
625 if(hlen < 4) /* '[::]' is the shortest possible valid string */
626 return CURLUE_BAD_IPV6;
627 hostname++;
628 hlen -= 2;
629
630 if(hostname[hlen] != ']')
631 return CURLUE_BAD_IPV6;
632
633 /* only valid letters are ok */
634 len = strspn(hostname, l);
635 if(hlen != len) {
636 hlen = len;
637 if(hostname[len] == '%') {
638 /* this could now be '%[zone id]' */
639 char zoneid[16];
640 int i = 0;
641 char *h = &hostname[len + 1];
642 /* pass '25' if present and is a url encoded percent sign */
643 if(!strncmp(h, "25", 2) && h[2] && (h[2] != ']'))
644 h += 2;
645 while(*h && (*h != ']') && (i < 15))
646 zoneid[i++] = *h++;
647 if(!i || (']' != *h))
648 /* impossible to reach? */
649 return CURLUE_MALFORMED_INPUT;
650 zoneid[i] = 0;
651 u->zoneid = strdup(zoneid);
652 if(!u->zoneid)
653 return CURLUE_OUT_OF_MEMORY;
654 hostname[len] = ']'; /* insert end bracket */
655 hostname[len + 1] = 0; /* terminate the hostname */
656 }
657 else
658 return CURLUE_BAD_IPV6;
659 /* hostname is fine */
660 }
661#ifdef ENABLE_IPV6
662 {
663 char dest[16]; /* fits a binary IPv6 address */
664 char norm[MAX_IPADR_LEN];
665 hostname[hlen] = 0; /* end the address there */
666 if(1 != Curl_inet_pton(AF_INET6, hostname, dest))
667 return CURLUE_BAD_IPV6;
668
669 /* check if it can be done shorter */
670 if(Curl_inet_ntop(AF_INET6, dest, norm, sizeof(norm)) &&
671 (strlen(norm) < hlen)) {
672 strcpy(hostname, norm);
673 hlen = strlen(norm);
674 hostname[hlen + 1] = 0;
675 }
676 hostname[hlen] = ']'; /* restore ending bracket */
677 }
678#endif
679 }
680 else {
681 /* letters from the second string are not ok */
682 len = strcspn(hostname, " \r\n\t/:#?!@");
683 if(hlen != len)
684 /* hostname with bad content */
685 return CURLUE_BAD_HOSTNAME;
686 }
687 if(!hostname[0])
688 return CURLUE_NO_HOST;
689 return CURLUE_OK;
690}
691
692#define HOSTNAME_END(x) (((x) == '/') || ((x) == '?') || ((x) == '#'))
693
694/*
695 * Handle partial IPv4 numerical addresses and different bases, like
696 * '16843009', '0x7f', '0x7f.1' '0177.1.1.1' etc.
697 *
698 * If the given input string is syntactically wrong or any part for example is
699 * too big, this function returns FALSE and doesn't create any output.
700 *
701 * Output the "normalized" version of that input string in plain quad decimal
702 * integers and return TRUE.
703 */
704static bool ipv4_normalize(const char *hostname, char *outp, size_t olen)
705{
706 bool done = FALSE;
707 int n = 0;
708 const char *c = hostname;
709 unsigned long parts[4] = {0, 0, 0, 0};
710
711 while(!done) {
712 char *endp;
713 unsigned long l;
714 if((*c < '0') || (*c > '9'))
715 /* most importantly this doesn't allow a leading plus or minus */
716 return FALSE;
717 l = strtoul(c, &endp, 0);
718
719 /* overflow or nothing parsed at all */
720 if(((l == ULONG_MAX) && (errno == ERANGE)) || (endp == c))
721 return FALSE;
722
723#if SIZEOF_LONG > 4
724 /* a value larger than 32 bits */
725 if(l > UINT_MAX)
726 return FALSE;
727#endif
728
729 parts[n] = l;
730 c = endp;
731
732 switch (*c) {
733 case '.' :
734 if(n == 3)
735 return FALSE;
736 n++;
737 c++;
738 break;
739
740 case '\0':
741 done = TRUE;
742 break;
743
744 default:
745 return FALSE;
746 }
747 }
748
749 /* this is deemed a valid IPv4 numerical address */
750
751 switch(n) {
752 case 0: /* a -- 32 bits */
753 msnprintf(outp, olen, "%u.%u.%u.%u",
754 parts[0] >> 24, (parts[0] >> 16) & 0xff,
755 (parts[0] >> 8) & 0xff, parts[0] & 0xff);
756 break;
757 case 1: /* a.b -- 8.24 bits */
758 if((parts[0] > 0xff) || (parts[1] > 0xffffff))
759 return FALSE;
760 msnprintf(outp, olen, "%u.%u.%u.%u",
761 parts[0], (parts[1] >> 16) & 0xff,
762 (parts[1] >> 8) & 0xff, parts[1] & 0xff);
763 break;
764 case 2: /* a.b.c -- 8.8.16 bits */
765 if((parts[0] > 0xff) || (parts[1] > 0xff) || (parts[2] > 0xffff))
766 return FALSE;
767 msnprintf(outp, olen, "%u.%u.%u.%u",
768 parts[0], parts[1], (parts[2] >> 8) & 0xff,
769 parts[2] & 0xff);
770 break;
771 case 3: /* a.b.c.d -- 8.8.8.8 bits */
772 if((parts[0] > 0xff) || (parts[1] > 0xff) || (parts[2] > 0xff) ||
773 (parts[3] > 0xff))
774 return FALSE;
775 msnprintf(outp, olen, "%u.%u.%u.%u",
776 parts[0], parts[1], parts[2], parts[3]);
777 break;
778 }
779 return TRUE;
780}
781
782/* return strdup'ed version in 'outp', possibly percent decoded */
783static CURLUcode decode_host(char *hostname, char **outp)
784{
785 char *per = NULL;
786 if(hostname[0] != '[')
787 /* only decode if not an ipv6 numerical */
788 per = strchr(hostname, '%');
789 if(!per) {
790 *outp = strdup(hostname);
791 if(!*outp)
792 return CURLUE_OUT_OF_MEMORY;
793 }
794 else {
795 /* might be encoded */
796 size_t dlen;
797 CURLcode result = Curl_urldecode(hostname, 0, outp, &dlen, REJECT_CTRL);
798 if(result)
799 return CURLUE_BAD_HOSTNAME;
800 }
801
802 return CURLUE_OK;
803}
804
805static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags)
806{
807 char *path;
808 bool path_alloced = FALSE;
809 bool uncpath = FALSE;
810 char *hostname;
811 char *query = NULL;
812 char *fragment = NULL;
813 CURLUcode result;
814 bool url_has_scheme = FALSE;
815 char schemebuf[MAX_SCHEME_LEN + 1];
816 const char *schemep = NULL;
817 size_t schemelen = 0;
818 size_t urllen;
819
820 DEBUGASSERT(url);
821
822 /*************************************************************
823 * Parse the URL.
824 ************************************************************/
825 /* allocate scratch area */
826 urllen = strlen(url);
827 if(urllen > CURL_MAX_INPUT_LENGTH)
828 /* excessive input length */
829 return CURLUE_MALFORMED_INPUT;
830
831 path = u->scratch = malloc(urllen * 2 + 2);
832 if(!path)
833 return CURLUE_OUT_OF_MEMORY;
834
835 hostname = &path[urllen + 1];
836 hostname[0] = 0;
837
838 if(Curl_is_absolute_url(url, schemebuf, sizeof(schemebuf))) {
839 url_has_scheme = TRUE;
840 schemelen = strlen(schemebuf);
841 }
842
843 /* handle the file: scheme */
844 if(url_has_scheme && !strcmp(schemebuf, "file")) {
845 if(urllen <= 6)
846 /* file:/ is not enough to actually be a complete file: URL */
847 return CURLUE_BAD_FILE_URL;
848
849 /* path has been allocated large enough to hold this */
850 strcpy(path, &url[5]);
851
852 u->scheme = strdup("file");
853 if(!u->scheme)
854 return CURLUE_OUT_OF_MEMORY;
855
856 /* Extra handling URLs with an authority component (i.e. that start with
857 * "file://")
858 *
859 * We allow omitted hostname (e.g. file:/<path>) -- valid according to
860 * RFC 8089, but not the (current) WHAT-WG URL spec.
861 */
862 if(path[0] == '/' && path[1] == '/') {
863 /* swallow the two slashes */
864 char *ptr = &path[2];
865
866 /*
867 * According to RFC 8089, a file: URL can be reliably dereferenced if:
868 *
869 * o it has no/blank hostname, or
870 *
871 * o the hostname matches "localhost" (case-insensitively), or
872 *
873 * o the hostname is a FQDN that resolves to this machine, or
874 *
875 * o it is an UNC String transformed to an URI (Windows only, RFC 8089
876 * Appendix E.3).
877 *
878 * For brevity, we only consider URLs with empty, "localhost", or
879 * "127.0.0.1" hostnames as local, otherwise as an UNC String.
880 *
881 * Additionally, there is an exception for URLs with a Windows drive
882 * letter in the authority (which was accidentally omitted from RFC 8089
883 * Appendix E, but believe me, it was meant to be there. --MK)
884 */
885 if(ptr[0] != '/' && !STARTS_WITH_URL_DRIVE_PREFIX(ptr)) {
886 /* the URL includes a host name, it must match "localhost" or
887 "127.0.0.1" to be valid */
888 if(checkprefix("localhost/", ptr) ||
889 checkprefix("127.0.0.1/", ptr)) {
890 ptr += 9; /* now points to the slash after the host */
891 }
892 else {
893#if defined(WIN32)
894 size_t len;
895
896 /* the host name, NetBIOS computer name, can not contain disallowed
897 chars, and the delimiting slash character must be appended to the
898 host name */
899 path = strpbrk(ptr, "/\\:*?\"<>|");
900 if(!path || *path != '/')
901 return CURLUE_BAD_FILE_URL;
902
903 len = path - ptr;
904 if(len) {
905 memcpy(hostname, ptr, len);
906 hostname[len] = 0;
907 uncpath = TRUE;
908 }
909
910 ptr -= 2; /* now points to the // before the host in UNC */
911#else
912 /* Invalid file://hostname/, expected localhost or 127.0.0.1 or
913 none */
914 return CURLUE_BAD_FILE_URL;
915#endif
916 }
917 }
918
919 path = ptr;
920 }
921
922 if(!uncpath)
923 hostname = NULL; /* no host for file: URLs by default */
924
925#if !defined(MSDOS) && !defined(WIN32) && !defined(__CYGWIN__)
926 /* Don't allow Windows drive letters when not in Windows.
927 * This catches both "file:/c:" and "file:c:" */
928 if(('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) ||
929 STARTS_WITH_URL_DRIVE_PREFIX(path)) {
930 /* File drive letters are only accepted in MSDOS/Windows */
931 return CURLUE_BAD_FILE_URL;
932 }
933#else
934 /* If the path starts with a slash and a drive letter, ditch the slash */
935 if('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) {
936 /* This cannot be done with strcpy, as the memory chunks overlap! */
937 memmove(path, &path[1], strlen(&path[1]) + 1);
938 }
939#endif
940
941 }
942 else {
943 /* clear path */
944 const char *p;
945 const char *hostp;
946 size_t len;
947 path[0] = 0;
948
949 if(url_has_scheme) {
950 int i = 0;
951 p = &url[schemelen + 1];
952 while(p && (*p == '/') && (i < 4)) {
953 p++;
954 i++;
955 }
956 if((i < 1) || (i>3))
957 /* less than one or more than three slashes */
958 return CURLUE_BAD_SLASHES;
959
960 schemep = schemebuf;
961 if(!Curl_builtin_scheme(schemep) &&
962 !(flags & CURLU_NON_SUPPORT_SCHEME))
963 return CURLUE_UNSUPPORTED_SCHEME;
964
965 if(junkscan(schemep, flags))
966 return CURLUE_BAD_SCHEME;
967 }
968 else {
969 /* no scheme! */
970
971 if(!(flags & (CURLU_DEFAULT_SCHEME|CURLU_GUESS_SCHEME)))
972 return CURLUE_BAD_SCHEME;
973 if(flags & CURLU_DEFAULT_SCHEME)
974 schemep = DEFAULT_SCHEME;
975
976 /*
977 * The URL was badly formatted, let's try without scheme specified.
978 */
979 p = url;
980 }
981 hostp = p; /* host name starts here */
982
983 /* find the end of the host name + port number */
984 while(*p && !HOSTNAME_END(*p))
985 p++;
986
987 len = p - hostp;
988 if(len) {
989 memcpy(hostname, hostp, len);
990 hostname[len] = 0;
991 }
992 else {
993 if(!(flags & CURLU_NO_AUTHORITY))
994 return CURLUE_NO_HOST;
995 }
996
997 strcpy(path, p);
998
999 if(schemep) {
1000 u->scheme = strdup(schemep);
1001 if(!u->scheme)
1002 return CURLUE_OUT_OF_MEMORY;
1003 }
1004 }
1005
1006 if((flags & CURLU_URLENCODE) && path[0]) {
1007 /* worst case output length is 3x the original! */
1008 char *newp = malloc(strlen(path) * 3);
1009 if(!newp)
1010 return CURLUE_OUT_OF_MEMORY;
1011 path_alloced = TRUE;
1012 strcpy_url(newp, path, TRUE); /* consider it relative */
1013 u->temppath = path = newp;
1014 }
1015
1016 fragment = strchr(path, '#');
1017 if(fragment) {
1018 *fragment++ = 0;
1019 if(junkscan(fragment, flags))
1020 return CURLUE_BAD_FRAGMENT;
1021 if(fragment[0]) {
1022 u->fragment = strdup(fragment);
1023 if(!u->fragment)
1024 return CURLUE_OUT_OF_MEMORY;
1025 }
1026 }
1027
1028 query = strchr(path, '?');
1029 if(query) {
1030 *query++ = 0;
1031 if(junkscan(query, flags))
1032 return CURLUE_BAD_QUERY;
1033 /* done even if the query part is a blank string */
1034 u->query = strdup(query);
1035 if(!u->query)
1036 return CURLUE_OUT_OF_MEMORY;
1037 }
1038
1039 if(junkscan(path, flags))
1040 return CURLUE_BAD_PATH;
1041
1042 if(!path[0])
1043 /* if there's no path left set, unset */
1044 path = NULL;
1045 else {
1046 if(!(flags & CURLU_PATH_AS_IS)) {
1047 /* remove ../ and ./ sequences according to RFC3986 */
1048 char *newp = Curl_dedotdotify(path);
1049 if(!newp)
1050 return CURLUE_OUT_OF_MEMORY;
1051
1052 if(strcmp(newp, path)) {
1053 /* if we got a new version */
1054 if(path_alloced)
1055 Curl_safefree(u->temppath);
1056 u->temppath = path = newp;
1057 path_alloced = TRUE;
1058 }
1059 else
1060 free(newp);
1061 }
1062
1063 u->path = path_alloced?path:strdup(path);
1064 if(!u->path)
1065 return CURLUE_OUT_OF_MEMORY;
1066 u->temppath = NULL; /* used now */
1067 }
1068
1069 if(hostname) {
1070 char normalized_ipv4[sizeof("255.255.255.255") + 1];
1071
1072 /*
1073 * Parse the login details and strip them out of the host name.
1074 */
1075 result = parse_hostname_login(u, &hostname, flags);
1076 if(result)
1077 return result;
1078
1079 result = Curl_parse_port(u, hostname, url_has_scheme);
1080 if(result)
1081 return result;
1082
1083 if(junkscan(hostname, flags))
1084 return CURLUE_BAD_HOSTNAME;
1085
1086 if(0 == strlen(hostname) && (flags & CURLU_NO_AUTHORITY)) {
1087 /* Skip hostname check, it's allowed to be empty. */
1088 u->host = strdup("");
1089 }
1090 else {
1091 if(ipv4_normalize(hostname, normalized_ipv4, sizeof(normalized_ipv4)))
1092 u->host = strdup(normalized_ipv4);
1093 else {
1094 result = decode_host(hostname, &u->host);
1095 if(result)
1096 return result;
1097 result = hostname_check(u, u->host);
1098 if(result)
1099 return result;
1100 }
1101 }
1102 if(!u->host)
1103 return CURLUE_OUT_OF_MEMORY;
1104 if((flags & CURLU_GUESS_SCHEME) && !schemep) {
1105 /* legacy curl-style guess based on host name */
1106 if(checkprefix("ftp.", hostname))
1107 schemep = "ftp";
1108 else if(checkprefix("dict.", hostname))
1109 schemep = "dict";
1110 else if(checkprefix("ldap.", hostname))
1111 schemep = "ldap";
1112 else if(checkprefix("imap.", hostname))
1113 schemep = "imap";
1114 else if(checkprefix("smtp.", hostname))
1115 schemep = "smtp";
1116 else if(checkprefix("pop3.", hostname))
1117 schemep = "pop3";
1118 else
1119 schemep = "http";
1120
1121 u->scheme = strdup(schemep);
1122 if(!u->scheme)
1123 return CURLUE_OUT_OF_MEMORY;
1124 }
1125 }
1126
1127 Curl_safefree(u->scratch);
1128 Curl_safefree(u->temppath);
1129
1130 return CURLUE_OK;
1131}
1132
1133/*
1134 * Parse the URL and set the relevant members of the Curl_URL struct.
1135 */
1136static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
1137{
1138 CURLUcode result = seturl(url, u, flags);
1139 if(result) {
1140 free_urlhandle(u);
1141 memset(u, 0, sizeof(struct Curl_URL));
1142 }
1143 return result;
1144}
1145
1146/*
1147 * Parse the URL and, if successful, replace everything in the Curl_URL struct.
1148 */
1149static CURLUcode parseurl_and_replace(const char *url, CURLU *u,
1150 unsigned int flags)
1151{
1152 CURLUcode result;
1153 CURLU tmpurl;
1154 memset(&tmpurl, 0, sizeof(tmpurl));
1155 result = parseurl(url, &tmpurl, flags);
1156 if(!result) {
1157 free_urlhandle(u);
1158 *u = tmpurl;
1159 }
1160 else
1161 free_urlhandle(&tmpurl);
1162 return result;
1163}
1164
1165/*
1166 */
1167CURLU *curl_url(void)
1168{
1169 return calloc(sizeof(struct Curl_URL), 1);
1170}
1171
1172void curl_url_cleanup(CURLU *u)
1173{
1174 if(u) {
1175 free_urlhandle(u);
1176 free(u);
1177 }
1178}
1179
1180#define DUP(dest, src, name) \
1181 do { \
1182 if(src->name) { \
1183 dest->name = strdup(src->name); \
1184 if(!dest->name) \
1185 goto fail; \
1186 } \
1187 } while(0)
1188
1189CURLU *curl_url_dup(CURLU *in)
1190{
1191 struct Curl_URL *u = calloc(sizeof(struct Curl_URL), 1);
1192 if(u) {
1193 DUP(u, in, scheme);
1194 DUP(u, in, user);
1195 DUP(u, in, password);
1196 DUP(u, in, options);
1197 DUP(u, in, host);
1198 DUP(u, in, port);
1199 DUP(u, in, path);
1200 DUP(u, in, query);
1201 DUP(u, in, fragment);
1202 u->portnum = in->portnum;
1203 }
1204 return u;
1205 fail:
1206 curl_url_cleanup(u);
1207 return NULL;
1208}
1209
1210CURLUcode curl_url_get(CURLU *u, CURLUPart what,
1211 char **part, unsigned int flags)
1212{
1213 char *ptr;
1214 CURLUcode ifmissing = CURLUE_UNKNOWN_PART;
1215 char portbuf[7];
1216 bool urldecode = (flags & CURLU_URLDECODE)?1:0;
1217 bool urlencode = (flags & CURLU_URLENCODE)?1:0;
1218 bool plusdecode = FALSE;
1219 (void)flags;
1220 if(!u)
1221 return CURLUE_BAD_HANDLE;
1222 if(!part)
1223 return CURLUE_BAD_PARTPOINTER;
1224 *part = NULL;
1225
1226 switch(what) {
1227 case CURLUPART_SCHEME:
1228 ptr = u->scheme;
1229 ifmissing = CURLUE_NO_SCHEME;
1230 urldecode = FALSE; /* never for schemes */
1231 break;
1232 case CURLUPART_USER:
1233 ptr = u->user;
1234 ifmissing = CURLUE_NO_USER;
1235 break;
1236 case CURLUPART_PASSWORD:
1237 ptr = u->password;
1238 ifmissing = CURLUE_NO_PASSWORD;
1239 break;
1240 case CURLUPART_OPTIONS:
1241 ptr = u->options;
1242 ifmissing = CURLUE_NO_OPTIONS;
1243 break;
1244 case CURLUPART_HOST:
1245 ptr = u->host;
1246 ifmissing = CURLUE_NO_HOST;
1247 break;
1248 case CURLUPART_ZONEID:
1249 ptr = u->zoneid;
1250 ifmissing = CURLUE_NO_ZONEID;
1251 break;
1252 case CURLUPART_PORT:
1253 ptr = u->port;
1254 ifmissing = CURLUE_NO_PORT;
1255 urldecode = FALSE; /* never for port */
1256 if(!ptr && (flags & CURLU_DEFAULT_PORT) && u->scheme) {
1257 /* there's no stored port number, but asked to deliver
1258 a default one for the scheme */
1259 const struct Curl_handler *h =
1260 Curl_builtin_scheme(u->scheme);
1261 if(h) {
1262 msnprintf(portbuf, sizeof(portbuf), "%u", h->defport);
1263 ptr = portbuf;
1264 }
1265 }
1266 else if(ptr && u->scheme) {
1267 /* there is a stored port number, but ask to inhibit if
1268 it matches the default one for the scheme */
1269 const struct Curl_handler *h =
1270 Curl_builtin_scheme(u->scheme);
1271 if(h && (h->defport == u->portnum) &&
1272 (flags & CURLU_NO_DEFAULT_PORT))
1273 ptr = NULL;
1274 }
1275 break;
1276 case CURLUPART_PATH:
1277 ptr = u->path;
1278 if(!ptr) {
1279 ptr = u->path = strdup("/");
1280 if(!u->path)
1281 return CURLUE_OUT_OF_MEMORY;
1282 }
1283 break;
1284 case CURLUPART_QUERY:
1285 ptr = u->query;
1286 ifmissing = CURLUE_NO_QUERY;
1287 plusdecode = urldecode;
1288 break;
1289 case CURLUPART_FRAGMENT:
1290 ptr = u->fragment;
1291 ifmissing = CURLUE_NO_FRAGMENT;
1292 break;
1293 case CURLUPART_URL: {
1294 char *url;
1295 char *scheme;
1296 char *options = u->options;
1297 char *port = u->port;
1298 char *allochost = NULL;
1299 if(u->scheme && strcasecompare("file", u->scheme)) {
1300 url = aprintf("file://%s%s%s",
1301 u->path,
1302 u->fragment? "#": "",
1303 u->fragment? u->fragment : "");
1304 }
1305 else if(!u->host)
1306 return CURLUE_NO_HOST;
1307 else {
1308 const struct Curl_handler *h = NULL;
1309 if(u->scheme)
1310 scheme = u->scheme;
1311 else if(flags & CURLU_DEFAULT_SCHEME)
1312 scheme = (char *) DEFAULT_SCHEME;
1313 else
1314 return CURLUE_NO_SCHEME;
1315
1316 h = Curl_builtin_scheme(scheme);
1317 if(!port && (flags & CURLU_DEFAULT_PORT)) {
1318 /* there's no stored port number, but asked to deliver
1319 a default one for the scheme */
1320 if(h) {
1321 msnprintf(portbuf, sizeof(portbuf), "%u", h->defport);
1322 port = portbuf;
1323 }
1324 }
1325 else if(port) {
1326 /* there is a stored port number, but asked to inhibit if it matches
1327 the default one for the scheme */
1328 if(h && (h->defport == u->portnum) &&
1329 (flags & CURLU_NO_DEFAULT_PORT))
1330 port = NULL;
1331 }
1332
1333 if(h && !(h->flags & PROTOPT_URLOPTIONS))
1334 options = NULL;
1335
1336 if(u->host[0] == '[') {
1337 if(u->zoneid) {
1338 /* make it '[ host %25 zoneid ]' */
1339 size_t hostlen = strlen(u->host);
1340 size_t alen = hostlen + 3 + strlen(u->zoneid) + 1;
1341 allochost = malloc(alen);
1342 if(!allochost)
1343 return CURLUE_OUT_OF_MEMORY;
1344 memcpy(allochost, u->host, hostlen - 1);
1345 msnprintf(&allochost[hostlen - 1], alen - hostlen + 1,
1346 "%%25%s]", u->zoneid);
1347 }
1348 }
1349 else if(urlencode) {
1350 allochost = curl_easy_escape(NULL, u->host, 0);
1351 if(!allochost)
1352 return CURLUE_OUT_OF_MEMORY;
1353 }
1354 else {
1355 /* only encode '%' in output host name */
1356 char *host = u->host;
1357 size_t pcount = 0;
1358 /* first, count number of percents present in the name */
1359 while(*host) {
1360 if(*host == '%')
1361 pcount++;
1362 host++;
1363 }
1364 /* if there were percents, encode the host name */
1365 if(pcount) {
1366 size_t hostlen = strlen(u->host);
1367 size_t alen = hostlen + 2 * pcount + 1;
1368 char *o = allochost = malloc(alen);
1369 if(!allochost)
1370 return CURLUE_OUT_OF_MEMORY;
1371
1372 host = u->host;
1373 while(*host) {
1374 if(*host == '%') {
1375 memcpy(o, "%25", 3);
1376 o += 3;
1377 host++;
1378 continue;
1379 }
1380 *o++ = *host++;
1381 }
1382 *o = '\0';
1383 }
1384 }
1385
1386 url = aprintf("%s://%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
1387 scheme,
1388 u->user ? u->user : "",
1389 u->password ? ":": "",
1390 u->password ? u->password : "",
1391 options ? ";" : "",
1392 options ? options : "",
1393 (u->user || u->password || options) ? "@": "",
1394 allochost ? allochost : u->host,
1395 port ? ":": "",
1396 port ? port : "",
1397 (u->path && (u->path[0] != '/')) ? "/": "",
1398 u->path ? u->path : "/",
1399 (u->query && u->query[0]) ? "?": "",
1400 (u->query && u->query[0]) ? u->query : "",
1401 u->fragment? "#": "",
1402 u->fragment? u->fragment : "");
1403 free(allochost);
1404 }
1405 if(!url)
1406 return CURLUE_OUT_OF_MEMORY;
1407 *part = url;
1408 return CURLUE_OK;
1409 }
1410 default:
1411 ptr = NULL;
1412 break;
1413 }
1414 if(ptr) {
1415 *part = strdup(ptr);
1416 if(!*part)
1417 return CURLUE_OUT_OF_MEMORY;
1418 if(plusdecode) {
1419 /* convert + to space */
1420 char *plus;
1421 for(plus = *part; *plus; ++plus) {
1422 if(*plus == '+')
1423 *plus = ' ';
1424 }
1425 }
1426 if(urldecode) {
1427 char *decoded;
1428 size_t dlen;
1429 /* this unconditional rejection of control bytes is documented
1430 API behavior */
1431 CURLcode res = Curl_urldecode(*part, 0, &decoded, &dlen, REJECT_CTRL);
1432 free(*part);
1433 if(res) {
1434 *part = NULL;
1435 return CURLUE_URLDECODE;
1436 }
1437 *part = decoded;
1438 }
1439 return CURLUE_OK;
1440 }
1441 else
1442 return ifmissing;
1443}
1444
1445CURLUcode curl_url_set(CURLU *u, CURLUPart what,
1446 const char *part, unsigned int flags)
1447{
1448 char **storep = NULL;
1449 long port = 0;
1450 bool urlencode = (flags & CURLU_URLENCODE)? 1 : 0;
1451 bool plusencode = FALSE;
1452 bool urlskipslash = FALSE;
1453 bool appendquery = FALSE;
1454 bool equalsencode = FALSE;
1455
1456 if(!u)
1457 return CURLUE_BAD_HANDLE;
1458 if(!part) {
1459 /* setting a part to NULL clears it */
1460 switch(what) {
1461 case CURLUPART_URL:
1462 break;
1463 case CURLUPART_SCHEME:
1464 storep = &u->scheme;
1465 break;
1466 case CURLUPART_USER:
1467 storep = &u->user;
1468 break;
1469 case CURLUPART_PASSWORD:
1470 storep = &u->password;
1471 break;
1472 case CURLUPART_OPTIONS:
1473 storep = &u->options;
1474 break;
1475 case CURLUPART_HOST:
1476 storep = &u->host;
1477 break;
1478 case CURLUPART_ZONEID:
1479 storep = &u->zoneid;
1480 break;
1481 case CURLUPART_PORT:
1482 u->portnum = 0;
1483 storep = &u->port;
1484 break;
1485 case CURLUPART_PATH:
1486 storep = &u->path;
1487 break;
1488 case CURLUPART_QUERY:
1489 storep = &u->query;
1490 break;
1491 case CURLUPART_FRAGMENT:
1492 storep = &u->fragment;
1493 break;
1494 default:
1495 return CURLUE_UNKNOWN_PART;
1496 }
1497 if(storep && *storep) {
1498 Curl_safefree(*storep);
1499 }
1500 return CURLUE_OK;
1501 }
1502
1503 switch(what) {
1504 case CURLUPART_SCHEME:
1505 if(strlen(part) > MAX_SCHEME_LEN)
1506 /* too long */
1507 return CURLUE_BAD_SCHEME;
1508 if(!(flags & CURLU_NON_SUPPORT_SCHEME) &&
1509 /* verify that it is a fine scheme */
1510 !Curl_builtin_scheme(part))
1511 return CURLUE_UNSUPPORTED_SCHEME;
1512 storep = &u->scheme;
1513 urlencode = FALSE; /* never */
1514 break;
1515 case CURLUPART_USER:
1516 storep = &u->user;
1517 break;
1518 case CURLUPART_PASSWORD:
1519 storep = &u->password;
1520 break;
1521 case CURLUPART_OPTIONS:
1522 storep = &u->options;
1523 break;
1524 case CURLUPART_HOST: {
1525 size_t len = strcspn(part, " \r\n");
1526 if(strlen(part) != len)
1527 /* hostname with bad content */
1528 return CURLUE_BAD_HOSTNAME;
1529 storep = &u->host;
1530 Curl_safefree(u->zoneid);
1531 break;
1532 }
1533 case CURLUPART_ZONEID:
1534 storep = &u->zoneid;
1535 break;
1536 case CURLUPART_PORT:
1537 {
1538 char *endp;
1539 urlencode = FALSE; /* never */
1540 port = strtol(part, &endp, 10); /* Port number must be decimal */
1541 if((port <= 0) || (port > 0xffff))
1542 return CURLUE_BAD_PORT_NUMBER;
1543 if(*endp)
1544 /* weirdly provided number, not good! */
1545 return CURLUE_BAD_PORT_NUMBER;
1546 storep = &u->port;
1547 }
1548 break;
1549 case CURLUPART_PATH:
1550 urlskipslash = TRUE;
1551 storep = &u->path;
1552 break;
1553 case CURLUPART_QUERY:
1554 plusencode = urlencode;
1555 appendquery = (flags & CURLU_APPENDQUERY)?1:0;
1556 equalsencode = appendquery;
1557 storep = &u->query;
1558 break;
1559 case CURLUPART_FRAGMENT:
1560 storep = &u->fragment;
1561 break;
1562 case CURLUPART_URL: {
1563 /*
1564 * Allow a new URL to replace the existing (if any) contents.
1565 *
1566 * If the existing contents is enough for a URL, allow a relative URL to
1567 * replace it.
1568 */
1569 CURLUcode result;
1570 char *oldurl;
1571 char *redired_url;
1572
1573 /* if the new thing is absolute or the old one is not
1574 * (we could not get an absolute url in 'oldurl'),
1575 * then replace the existing with the new. */
1576 if(Curl_is_absolute_url(part, NULL, 0)
1577 || curl_url_get(u, CURLUPART_URL, &oldurl, flags)) {
1578 return parseurl_and_replace(part, u, flags);
1579 }
1580
1581 /* apply the relative part to create a new URL
1582 * and replace the existing one with it. */
1583 redired_url = concat_url(oldurl, part);
1584 free(oldurl);
1585 if(!redired_url)
1586 return CURLUE_OUT_OF_MEMORY;
1587
1588 result = parseurl_and_replace(redired_url, u, flags);
1589 free(redired_url);
1590 return result;
1591 }
1592 default:
1593 return CURLUE_UNKNOWN_PART;
1594 }
1595 DEBUGASSERT(storep);
1596 {
1597 const char *newp = part;
1598 size_t nalloc = strlen(part);
1599
1600 if(nalloc > CURL_MAX_INPUT_LENGTH)
1601 /* excessive input length */
1602 return CURLUE_MALFORMED_INPUT;
1603
1604 if(urlencode) {
1605 const unsigned char *i;
1606 char *o;
1607 char *enc = malloc(nalloc * 3 + 1); /* for worst case! */
1608 if(!enc)
1609 return CURLUE_OUT_OF_MEMORY;
1610 for(i = (const unsigned char *)part, o = enc; *i; i++) {
1611 if((*i == ' ') && plusencode) {
1612 *o = '+';
1613 o++;
1614 }
1615 else if(Curl_isunreserved(*i) ||
1616 ((*i == '/') && urlskipslash) ||
1617 ((*i == '=') && equalsencode)) {
1618 if((*i == '=') && equalsencode)
1619 /* only skip the first equals sign */
1620 equalsencode = FALSE;
1621 *o = *i;
1622 o++;
1623 }
1624 else {
1625 msnprintf(o, 4, "%%%02x", *i);
1626 o += 3;
1627 }
1628 }
1629 *o = 0; /* null-terminate */
1630 newp = enc;
1631 }
1632 else {
1633 char *p;
1634 newp = strdup(part);
1635 if(!newp)
1636 return CURLUE_OUT_OF_MEMORY;
1637 p = (char *)newp;
1638 while(*p) {
1639 /* make sure percent encoded are lower case */
1640 if((*p == '%') && ISXDIGIT(p[1]) && ISXDIGIT(p[2]) &&
1641 (ISUPPER(p[1]) || ISUPPER(p[2]))) {
1642 p[1] = (char)TOLOWER(p[1]);
1643 p[2] = (char)TOLOWER(p[2]);
1644 p += 3;
1645 }
1646 else
1647 p++;
1648 }
1649 }
1650
1651 if(appendquery) {
1652 /* Append the string onto the old query. Add a '&' separator if none is
1653 present at the end of the exsting query already */
1654 size_t querylen = u->query ? strlen(u->query) : 0;
1655 bool addamperand = querylen && (u->query[querylen -1] != '&');
1656 if(querylen) {
1657 size_t newplen = strlen(newp);
1658 char *p = malloc(querylen + addamperand + newplen + 1);
1659 if(!p) {
1660 free((char *)newp);
1661 return CURLUE_OUT_OF_MEMORY;
1662 }
1663 strcpy(p, u->query); /* original query */
1664 if(addamperand)
1665 p[querylen] = '&'; /* ampersand */
1666 strcpy(&p[querylen + addamperand], newp); /* new suffix */
1667 free((char *)newp);
1668 free(*storep);
1669 *storep = p;
1670 return CURLUE_OK;
1671 }
1672 }
1673
1674 if(what == CURLUPART_HOST) {
1675 if(0 == strlen(newp) && (flags & CURLU_NO_AUTHORITY)) {
1676 /* Skip hostname check, it's allowed to be empty. */
1677 }
1678 else {
1679 if(hostname_check(u, (char *)newp)) {
1680 free((char *)newp);
1681 return CURLUE_BAD_HOSTNAME;
1682 }
1683 }
1684 }
1685
1686 free(*storep);
1687 *storep = (char *)newp;
1688 }
1689 /* set after the string, to make it not assigned if the allocation above
1690 fails */
1691 if(port)
1692 u->portnum = port;
1693 return CURLUE_OK;
1694}
注意: 瀏覽 TracBrowser 來幫助您使用儲存庫瀏覽器

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette