urlapi.c@ 108333

最後變更在這個檔案從108333是 108048,由 vboxsync 提交於 7 週前
curl-8.11.1: Applied and adjusted our curl changes to 8.7.1. jiraref:VBP-1535
屬性 svn:eol-style 設為 `native`
檔案大小: 54.6 KB

行
1	/***************************************************************************
2	* _ _ ____ _
3	* Project ___\| \| \| \| _ \\| \|
4	* / __\| \| \| \| \|_) \| \|
5	* \| (__\| \|_\| \| _ <\| \|___
6	* \___\|\___/\|_\| \_\_____\|
7	*
8	* Copyright (C) Daniel Stenberg, <[email protected]>, et al.
9	*
10	* This software is licensed as described in the file COPYING, which
11	* you should have received as part of this distribution. The terms
12	* are also available at https://curl.se/docs/copyright.html.
13	*
14	* You may opt to use, copy, modify, merge, publish, distribute and/or sell
15	* copies of the Software, and permit persons to whom the Software is
16	* furnished to do so, under the terms of the COPYING file.
17	*
18	* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
19	* KIND, either express or implied.
20	*
21	* SPDX-License-Identifier: curl
22	*
23	***************************************************************************/
24
25	#include "curl_setup.h"
26
27	#include "urldata.h"
28	#include "urlapi-int.h"
29	#include "strcase.h"
30	#include "url.h"
31	#include "escape.h"
32	#include "curl_ctype.h"
33	#include "inet_pton.h"
34	#include "inet_ntop.h"
35	#include "strdup.h"
36	#include "idn.h"
37
38	/* The last 3 #include files should be in this order */
39	#include "curl_printf.h"
40	#include "curl_memory.h"
41	#include "memdebug.h"
42
43	/* MS-DOS/Windows style drive prefix, eg c: in c:foo */
44	#define STARTS_WITH_DRIVE_PREFIX(str) \
45	((('a' <= str[0] && str[0] <= 'z') \|\| \
46	('A' <= str[0] && str[0] <= 'Z')) && \
47	(str[1] == ':'))
48
49	/* MS-DOS/Windows style drive prefix, optionally with
50	* a '\|' instead of ':', followed by a slash or NUL */
51	#define STARTS_WITH_URL_DRIVE_PREFIX(str) \
52	((('a' <= (str)[0] && (str)[0] <= 'z') \|\| \
53	('A' <= (str)[0] && (str)[0] <= 'Z')) && \
54	((str)[1] == ':' \|\| (str)[1] == '\|') && \
55	((str)[2] == '/' \|\| (str)[2] == '\\' \|\| (str)[2] == 0))
56
57	/* scheme is not URL encoded, the longest libcurl supported ones are... */
58	#define MAX_SCHEME_LEN 40
59
60	/*
61	* If USE_IPV6 is disabled, we still want to parse IPv6 addresses, so make
62	* sure we have _some_ value for AF_INET6 without polluting our fake value
63	* everywhere.
64	*/
65	#if !defined(USE_IPV6) && !defined(AF_INET6)
66	#define AF_INET6 (AF_INET + 1)
67	#endif
68
69	/* Internal representation of CURLU. Point to URL-encoded strings. */
70	struct Curl_URL {
71	char *scheme;
72	char *user;
73	char *password;
74	char options; / IMAP only? */
75	char *host;
76	char zoneid; / for numerical IPv6 addresses */
77	char *port;
78	char *path;
79	char *query;
80	char *fragment;
81	unsigned short portnum; /* the numerical version (if 'port' is set) */
82	BIT(query_present); /* to support blank */
83	BIT(fragment_present); /* to support blank */
84	BIT(guessed_scheme); /* when a URL without scheme is parsed */
85	};
86
87	#define DEFAULT_SCHEME "https"
88
89	static void free_urlhandle(struct Curl_URL *u)
90	{
91	free(u->scheme);
92	free(u->user);
93	free(u->password);
94	free(u->options);
95	free(u->host);
96	free(u->zoneid);
97	free(u->port);
98	free(u->path);
99	free(u->query);
100	free(u->fragment);
101	}
102
103	/*
104	* Find the separator at the end of the hostname, or the '?' in cases like
105	* http://www.example.com?id=2380
106	*/
107	static const char find_host_sep(const char url)
108	{
109	const char *sep;
110	const char *query;
111
112	/* Find the start of the hostname */
113	sep = strstr(url, "//");
114	if(!sep)
115	sep = url;
116	else
117	sep += 2;
118
119	query = strchr(sep, '?');
120	sep = strchr(sep, '/');
121
122	if(!sep)
123	sep = url + strlen(url);
124
125	if(!query)
126	query = url + strlen(url);
127
128	return sep < query ? sep : query;
129	}
130
131	/* convert CURLcode to CURLUcode */
132	#define cc2cu(x) ((x) == CURLE_TOO_LARGE ? CURLUE_TOO_LARGE : \
133	CURLUE_OUT_OF_MEMORY)
134	/*
135	* Decide whether a character in a URL must be escaped.
136	*/
137	#define urlchar_needs_escaping(c) (!(ISCNTRL(c) \|\| ISSPACE(c) \|\| ISGRAPH(c)))
138
139	static const char hexdigits[] = "0123456789abcdef";
140	/* urlencode_str() writes data into an output dynbuf and URL-encodes the
141	* spaces in the source URL accordingly.
142	*
143	* URL encoding should be skipped for hostnames, otherwise IDN resolution
144	* will fail.
145	*/
146	static CURLUcode urlencode_str(struct dynbuf o, const char url,
147	size_t len, bool relative,
148	bool query)
149	{
150	/* we must add this with whitespace-replacing */
151	bool left = !query;
152	const unsigned char *iptr;
153	const unsigned char host_sep = (const unsigned char ) url;
154	CURLcode result;
155
156	if(!relative)
157	host_sep = (const unsigned char *) find_host_sep(url);
158
159	for(iptr = (unsigned char )url; / read from here */
160	len; iptr++, len--) {
161
162	if(iptr < host_sep) {
163	result = Curl_dyn_addn(o, iptr, 1);
164	if(result)
165	return cc2cu(result);
166	continue;
167	}
168
169	if(*iptr == ' ') {
170	if(left)
171	result = Curl_dyn_addn(o, "%20", 3);
172	else
173	result = Curl_dyn_addn(o, "+", 1);
174	if(result)
175	return cc2cu(result);
176	continue;
177	}
178
179	if(*iptr == '?')
180	left = FALSE;
181
182	if(urlchar_needs_escaping(*iptr)) {
183	char out[3]={'%'};
184	out[1] = hexdigits[*iptr >> 4];
185	out[2] = hexdigits[*iptr & 0xf];
186	result = Curl_dyn_addn(o, out, 3);
187	}
188	else
189	result = Curl_dyn_addn(o, iptr, 1);
190	if(result)
191	return cc2cu(result);
192	}
193
194	return CURLUE_OK;
195	}
196
197	/*
198	* Returns the length of the scheme if the given URL is absolute (as opposed
199	* to relative). Stores the scheme in the buffer if TRUE and 'buf' is
200	* non-NULL. The buflen must be larger than MAX_SCHEME_LEN if buf is set.
201	*
202	* If 'guess_scheme' is TRUE, it means the URL might be provided without
203	* scheme.
204	*/
205	size_t Curl_is_absolute_url(const char url, char buf, size_t buflen,
206	bool guess_scheme)
207	{
208	size_t i = 0;
209	DEBUGASSERT(!buf \|\| (buflen > MAX_SCHEME_LEN));
210	(void)buflen; /* only used in debug-builds */
211	if(buf)
212	buf[0] = 0; /* always leave a defined value in buf */
213	#ifdef _WIN32
214	if(guess_scheme && STARTS_WITH_DRIVE_PREFIX(url))
215	return 0;
216	#endif
217	if(ISALPHA(url[0]))
218	for(i = 1; i < MAX_SCHEME_LEN; ++i) {
219	char s = url[i];
220	if(s && (ISALNUM(s) \|\| (s == '+') \|\| (s == '-') \|\| (s == '.') )) {
221	/* RFC 3986 3.1 explains:
222	scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
223	*/
224	}
225	else {
226	break;
227	}
228	}
229	if(i && (url[i] == ':') && ((url[i + 1] == '/') \|\| !guess_scheme)) {
230	/* If this does not guess scheme, the scheme always ends with the colon so
231	that this also detects data: URLs etc. In guessing mode, data: could
232	be the hostname "data" with a specified port number. */
233
234	/* the length of the scheme is the name part only */
235	size_t len = i;
236	if(buf) {
237	Curl_strntolower(buf, url, i);
238	buf[i] = 0;
239	}
240	return len;
241	}
242	return 0;
243	}
244
245	/*
246	* Concatenate a relative URL to a base URL making it absolute.
247	* URL-encodes any spaces.
248	* The returned pointer must be freed by the caller unless NULL
249	* (returns NULL on out of memory).
250	*
251	* Note that this function destroys the 'base' string.
252	*/
253	static CURLcode concat_url(char base, const char relurl, char **newurl)
254	{
255	/***
256	TRY to append this new path to the old URL
257	to the right of the host part. Oh crap, this is doomed to cause
258	problems in the future...
259	*/
260	struct dynbuf newest;
261	char *protsep;
262	char *pathsep;
263	bool host_changed = FALSE;
264	const char *useurl = relurl;
265	CURLcode result = CURLE_OK;
266	CURLUcode uc;
267	bool skip_slash = FALSE;
268	*newurl = NULL;
269
270	/* protsep points to the start of the hostname */
271	protsep = strstr(base, "//");
272	if(!protsep)
273	protsep = base;
274	else
275	protsep += 2; /* pass the slashes */
276
277	if('/' != relurl[0]) {
278	int level = 0;
279
280	/* First we need to find out if there is a ?-letter in the URL,
281	and cut it and the right-side of that off */
282	pathsep = strchr(protsep, '?');
283	if(pathsep)
284	*pathsep = 0;
285
286	/* we have a relative path to append to the last slash if there is one
287	available, or the new URL is just a query string (starts with a '?') or
288	a fragment (starts with '#') we append the new one at the end of the
289	current URL */
290	if((useurl[0] != '?') && (useurl[0] != '#')) {
291	pathsep = strrchr(protsep, '/');
292	if(pathsep)
293	*pathsep = 0;
294
295	/* Check if there is any slash after the hostname, and if so, remember
296	that position instead */
297	pathsep = strchr(protsep, '/');
298	if(pathsep)
299	protsep = pathsep + 1;
300	else
301	protsep = NULL;
302
303	/* now deal with one "./" or any amount of "../" in the newurl
304	and act accordingly */
305
306	if((useurl[0] == '.') && (useurl[1] == '/'))
307	useurl += 2; /* just skip the "./" */
308
309	while((useurl[0] == '.') &&
310	(useurl[1] == '.') &&
311	(useurl[2] == '/')) {
312	level++;
313	useurl += 3; /* pass the "../" */
314	}
315
316	if(protsep) {
317	while(level--) {
318	/* cut off one more level from the right of the original URL */
319	pathsep = strrchr(protsep, '/');
320	if(pathsep)
321	*pathsep = 0;
322	else {
323	*protsep = 0;
324	break;
325	}
326	}
327	}
328	}
329	else
330	skip_slash = TRUE;
331	}
332	else {
333	/* We got a new absolute path for this server */
334
335	if(relurl[1] == '/') {
336	/* the new URL starts with //, just keep the protocol part from the
337	original one */
338	*protsep = 0;
339	useurl = &relurl[2]; /* we keep the slashes from the original, so we
340	skip the new ones */
341	host_changed = TRUE;
342	}
343	else {
344	/* cut off the original URL from the first slash, or deal with URLs
345	without slash */
346	pathsep = strchr(protsep, '/');
347	if(pathsep) {
348	/* When people use badly formatted URLs, such as
349	"http://www.example.com?dir=/home/daniel" we must not use the first
350	slash, if there is a ?-letter before it! */
351	char *sep = strchr(protsep, '?');
352	if(sep && (sep < pathsep))
353	pathsep = sep;
354	*pathsep = 0;
355	}
356	else {
357	/* There was no slash. Now, since we might be operating on a badly
358	formatted URL, such as "http://www.example.com?id=2380" which does
359	not use a slash separator as it is supposed to, we need to check
360	for a ?-letter as well! */
361	pathsep = strchr(protsep, '?');
362	if(pathsep)
363	*pathsep = 0;
364	}
365	}
366	}
367
368	Curl_dyn_init(&newest, CURL_MAX_INPUT_LENGTH);
369
370	/* copy over the root URL part */
371	result = Curl_dyn_add(&newest, base);
372	if(result)
373	return result;
374
375	/* check if we need to append a slash */
376	if(('/' == useurl[0]) \|\| (protsep && !*protsep) \|\| skip_slash)
377	;
378	else {
379	result = Curl_dyn_addn(&newest, "/", 1);
380	if(result)
381	return result;
382	}
383
384	/* then append the new piece on the right side */
385	uc = urlencode_str(&newest, useurl, strlen(useurl), !host_changed,
386	FALSE);
387	if(uc)
388	return (uc == CURLUE_TOO_LARGE) ? CURLE_TOO_LARGE : CURLE_OUT_OF_MEMORY;
389
390	*newurl = Curl_dyn_ptr(&newest);
391	return CURLE_OK;
392	}
393
394	/* scan for byte values <= 31, 127 and sometimes space */
395	static CURLUcode junkscan(const char url, size_t urllen, unsigned int flags)
396	{
397	static const char badbytes[]={
398	/* */ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
399	0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
400	0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
401	0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
402	0x7f, 0x00 /* null-terminate */
403	};
404	size_t n = strlen(url);
405	size_t nfine;
406
407	if(n > CURL_MAX_INPUT_LENGTH)
408	/* excessive input length */
409	return CURLUE_MALFORMED_INPUT;
410
411	nfine = strcspn(url, badbytes);
412	if((nfine != n) \|\|
413	(!(flags & CURLU_ALLOW_SPACE) && strchr(url, ' ')))
414	return CURLUE_MALFORMED_INPUT;
415
416	*urllen = n;
417	return CURLUE_OK;
418	}
419
420	/*
421	* parse_hostname_login()
422	*
423	* Parse the login details (username, password and options) from the URL and
424	* strip them out of the hostname
425	*
426	*/
427	static CURLUcode parse_hostname_login(struct Curl_URL *u,
428	const char *login,
429	size_t len,
430	unsigned int flags,
431	size_t offset) / to the hostname */
432	{
433	CURLUcode result = CURLUE_OK;
434	CURLcode ccode;
435	char *userp = NULL;
436	char *passwdp = NULL;
437	char *optionsp = NULL;
438	const struct Curl_handler *h = NULL;
439
440	/* At this point, we assume all the other special cases have been taken
441	* care of, so the host is at most
442	*
443	* [user[:password][;options]]@]hostname
444	*
445	* We need somewhere to put the embedded details, so do that first.
446	*/
447	char *ptr;
448
449	DEBUGASSERT(login);
450
451	*offset = 0;
452	ptr = memchr(login, '@', len);
453	if(!ptr)
454	goto out;
455
456	/* We will now try to extract the
457	* possible login information in a string like:
458	* ftp://user:[email protected]:8021/README */
459	ptr++;
460
461	/* if this is a known scheme, get some details */
462	if(u->scheme)
463	h = Curl_get_scheme_handler(u->scheme);
464
465	/* We could use the login information in the URL so extract it. Only parse
466	options if the handler says we should. Note that 'h' might be NULL! */
467	ccode = Curl_parse_login_details(login, ptr - login - 1,
468	&userp, &passwdp,
469	(h && (h->flags & PROTOPT_URLOPTIONS)) ?
470	&optionsp : NULL);
471	if(ccode) {
472	result = CURLUE_BAD_LOGIN;
473	goto out;
474	}
475
476	if(userp) {
477	if(flags & CURLU_DISALLOW_USER) {
478	/* Option DISALLOW_USER is set and URL contains username. */
479	result = CURLUE_USER_NOT_ALLOWED;
480	goto out;
481	}
482	free(u->user);
483	u->user = userp;
484	}
485
486	if(passwdp) {
487	free(u->password);
488	u->password = passwdp;
489	}
490
491	if(optionsp) {
492	free(u->options);
493	u->options = optionsp;
494	}
495
496	/* the hostname starts at this offset */
497	*offset = ptr - login;
498	return CURLUE_OK;
499
500	out:
501
502	free(userp);
503	free(passwdp);
504	free(optionsp);
505	u->user = NULL;
506	u->password = NULL;
507	u->options = NULL;
508
509	return result;
510	}
511
512	UNITTEST CURLUcode Curl_parse_port(struct Curl_URL u, struct dynbuf host,
513	bool has_scheme)
514	{
515	char *portptr;
516	char *hostname = Curl_dyn_ptr(host);
517	/*
518	* Find the end of an IPv6 address on the ']' ending bracket.
519	*/
520	if(hostname[0] == '[') {
521	portptr = strchr(hostname, ']');
522	if(!portptr)
523	return CURLUE_BAD_IPV6;
524	portptr++;
525	/* this is a RFC2732-style specified IP-address */
526	if(*portptr) {
527	if(*portptr != ':')
528	return CURLUE_BAD_PORT_NUMBER;
529	}
530	else
531	portptr = NULL;
532	}
533	else
534	portptr = strchr(hostname, ':');
535
536	if(portptr) {
537	char *rest = NULL;
538	unsigned long port;
539	size_t keep = portptr - hostname;
540
541	/* Browser behavior adaptation. If there is a colon with no digits after,
542	just cut off the name there which makes us ignore the colon and just
543	use the default port. Firefox, Chrome and Safari all do that.
544
545	Do not do it if the URL has no scheme, to make something that looks like
546	a scheme not work!
547	*/
548	Curl_dyn_setlen(host, keep);
549	portptr++;
550	if(!*portptr)
551	return has_scheme ? CURLUE_OK : CURLUE_BAD_PORT_NUMBER;
552
553	if(!ISDIGIT(*portptr))
554	return CURLUE_BAD_PORT_NUMBER;
555
556	errno = 0;
557	port = strtoul(portptr, &rest, 10); /* Port number must be decimal */
558
559	if(errno \|\| (port > 0xffff) \|\| *rest)
560	return CURLUE_BAD_PORT_NUMBER;
561
562	u->portnum = (unsigned short) port;
563	/* generate a new port number string to get rid of leading zeroes etc */
564	free(u->port);
565	u->port = aprintf("%ld", port);
566	if(!u->port)
567	return CURLUE_OUT_OF_MEMORY;
568	}
569
570	return CURLUE_OK;
571	}
572
573	/* this assumes 'hostname' now starts with [ */
574	static CURLUcode ipv6_parse(struct Curl_URL u, char hostname,
575	size_t hlen) /* length of hostname */
576	{
577	size_t len;
578	DEBUGASSERT(*hostname == '[');
579	if(hlen < 4) /* '[::]' is the shortest possible valid string */
580	return CURLUE_BAD_IPV6;
581	hostname++;
582	hlen -= 2;
583
584	/* only valid IPv6 letters are ok */
585	len = strspn(hostname, "0123456789abcdefABCDEF:.");
586
587	if(hlen != len) {
588	hlen = len;
589	if(hostname[len] == '%') {
590	/* this could now be '%[zone id]' */
591	char zoneid[16];
592	int i = 0;
593	char *h = &hostname[len + 1];
594	/* pass '25' if present and is a URL encoded percent sign */
595	if(!strncmp(h, "25", 2) && h[2] && (h[2] != ']'))
596	h += 2;
597	while(h && (h != ']') && (i < 15))
598	zoneid[i++] = *h++;
599	if(!i \|\| (']' != *h))
600	return CURLUE_BAD_IPV6;
601	zoneid[i] = 0;
602	u->zoneid = strdup(zoneid);
603	if(!u->zoneid)
604	return CURLUE_OUT_OF_MEMORY;
605	hostname[len] = ']'; /* insert end bracket */
606	hostname[len + 1] = 0; /* terminate the hostname */
607	}
608	else
609	return CURLUE_BAD_IPV6;
610	/* hostname is fine */
611	}
612
613	/* Normalize the IPv6 address */
614	{
615	char dest[16]; /* fits a binary IPv6 address */
616	hostname[hlen] = 0; /* end the address there */
617	if(1 != Curl_inet_pton(AF_INET6, hostname, dest))
618	return CURLUE_BAD_IPV6;
619	if(Curl_inet_ntop(AF_INET6, dest, hostname, hlen)) {
620	hlen = strlen(hostname); /* might be shorter now */
621	hostname[hlen + 1] = 0;
622	}
623	hostname[hlen] = ']'; /* restore ending bracket */
624	}
625	return CURLUE_OK;
626	}
627
628	static CURLUcode hostname_check(struct Curl_URL u, char hostname,
629	size_t hlen) /* length of hostname */
630	{
631	size_t len;
632	DEBUGASSERT(hostname);
633
634	if(!hlen)
635	return CURLUE_NO_HOST;
636	else if(hostname[0] == '[')
637	return ipv6_parse(u, hostname, hlen);
638	else {
639	/* letters from the second string are not ok */
640	len = strcspn(hostname, " \r\n\t/:#?!@{}[]\\$\'\"^`*<>=;,+&()%");
641	if(hlen != len)
642	/* hostname with bad content */
643	return CURLUE_BAD_HOSTNAME;
644	}
645	return CURLUE_OK;
646	}
647
648	/*
649	* Handle partial IPv4 numerical addresses and different bases, like
650	* '16843009', '0x7f', '0x7f.1' '0177.1.1.1' etc.
651	*
652	* If the given input string is syntactically wrong IPv4 or any part for
653	* example is too big, this function returns HOST_NAME.
654	*
655	* Output the "normalized" version of that input string in plain quad decimal
656	* integers.
657	*
658	* Returns the host type.
659	*/
660
661	#define HOST_ERROR -1 /* out of memory */
662
663	#define HOST_NAME 1
664	#define HOST_IPV4 2
665	#define HOST_IPV6 3
666
667	static int ipv4_normalize(struct dynbuf *host)
668	{
669	bool done = FALSE;
670	int n = 0;
671	const char *c = Curl_dyn_ptr(host);
672	unsigned long parts[4] = {0, 0, 0, 0};
673	CURLcode result = CURLE_OK;
674
675	if(*c == '[')
676	return HOST_IPV6;
677
678	errno = 0; /* for strtoul */
679	while(!done) {
680	char *endp = NULL;
681	unsigned long l;
682	if(!ISDIGIT(*c))
683	/* most importantly this does not allow a leading plus or minus */
684	return HOST_NAME;
685	l = strtoul(c, &endp, 0);
686	if(errno)
687	return HOST_NAME;
688	#if SIZEOF_LONG > 4
689	/* a value larger than 32 bits */
690	if(l > UINT_MAX)
691	return HOST_NAME;
692	#endif
693
694	parts[n] = l;
695	c = endp;
696
697	switch(*c) {
698	case '.':
699	if(n == 3)
700	return HOST_NAME;
701	n++;
702	c++;
703	break;
704
705	case '\0':
706	done = TRUE;
707	break;
708
709	default:
710	return HOST_NAME;
711	}
712	}
713
714	switch(n) {
715	case 0: /* a -- 32 bits */
716	Curl_dyn_reset(host);
717
718	result = Curl_dyn_addf(host, "%u.%u.%u.%u",
719	(unsigned int)(parts[0] >> 24),
720	(unsigned int)((parts[0] >> 16) & 0xff),
721	(unsigned int)((parts[0] >> 8) & 0xff),
722	(unsigned int)(parts[0] & 0xff));
723	break;
724	case 1: /* a.b -- 8.24 bits */
725	if((parts[0] > 0xff) \|\| (parts[1] > 0xffffff))
726	return HOST_NAME;
727	Curl_dyn_reset(host);
728	result = Curl_dyn_addf(host, "%u.%u.%u.%u",
729	(unsigned int)(parts[0]),
730	(unsigned int)((parts[1] >> 16) & 0xff),
731	(unsigned int)((parts[1] >> 8) & 0xff),
732	(unsigned int)(parts[1] & 0xff));
733	break;
734	case 2: /* a.b.c -- 8.8.16 bits */
735	if((parts[0] > 0xff) \|\| (parts[1] > 0xff) \|\| (parts[2] > 0xffff))
736	return HOST_NAME;
737	Curl_dyn_reset(host);
738	result = Curl_dyn_addf(host, "%u.%u.%u.%u",
739	(unsigned int)(parts[0]),
740	(unsigned int)(parts[1]),
741	(unsigned int)((parts[2] >> 8) & 0xff),
742	(unsigned int)(parts[2] & 0xff));
743	break;
744	case 3: /* a.b.c.d -- 8.8.8.8 bits */
745	if((parts[0] > 0xff) \|\| (parts[1] > 0xff) \|\| (parts[2] > 0xff) \|\|
746	(parts[3] > 0xff))
747	return HOST_NAME;
748	Curl_dyn_reset(host);
749	result = Curl_dyn_addf(host, "%u.%u.%u.%u",
750	(unsigned int)(parts[0]),
751	(unsigned int)(parts[1]),
752	(unsigned int)(parts[2]),
753	(unsigned int)(parts[3]));
754	break;
755	}
756	if(result)
757	return HOST_ERROR;
758	return HOST_IPV4;
759	}
760
761	/* if necessary, replace the host content with a URL decoded version */
762	static CURLUcode urldecode_host(struct dynbuf *host)
763	{
764	char *per = NULL;
765	const char *hostname = Curl_dyn_ptr(host);
766	per = strchr(hostname, '%');
767	if(!per)
768	/* nothing to decode */
769	return CURLUE_OK;
770	else {
771	/* encoded */
772	size_t dlen;
773	char *decoded;
774	CURLcode result = Curl_urldecode(hostname, 0, &decoded, &dlen,
775	REJECT_CTRL);
776	if(result)
777	return CURLUE_BAD_HOSTNAME;
778	Curl_dyn_reset(host);
779	result = Curl_dyn_addn(host, decoded, dlen);
780	free(decoded);
781	if(result)
782	return cc2cu(result);
783	}
784
785	return CURLUE_OK;
786	}
787
788	static CURLUcode parse_authority(struct Curl_URL *u,
789	const char *auth, size_t authlen,
790	unsigned int flags,
791	struct dynbuf *host,
792	bool has_scheme)
793	{
794	size_t offset;
795	CURLUcode uc;
796	CURLcode result;
797
798	/*
799	* Parse the login details and strip them out of the hostname.
800	*/
801	uc = parse_hostname_login(u, auth, authlen, flags, &offset);
802	if(uc)
803	goto out;
804
805	result = Curl_dyn_addn(host, auth + offset, authlen - offset);
806	if(result) {
807	uc = cc2cu(result);
808	goto out;
809	}
810
811	uc = Curl_parse_port(u, host, has_scheme);
812	if(uc)
813	goto out;
814
815	if(!Curl_dyn_len(host))
816	return CURLUE_NO_HOST;
817
818	switch(ipv4_normalize(host)) {
819	case HOST_IPV4:
820	break;
821	case HOST_IPV6:
822	uc = ipv6_parse(u, Curl_dyn_ptr(host), Curl_dyn_len(host));
823	break;
824	case HOST_NAME:
825	uc = urldecode_host(host);
826	if(!uc)
827	uc = hostname_check(u, Curl_dyn_ptr(host), Curl_dyn_len(host));
828	break;
829	case HOST_ERROR:
830	uc = CURLUE_OUT_OF_MEMORY;
831	break;
832	default:
833	uc = CURLUE_BAD_HOSTNAME; /* Bad IPv4 address even */
834	break;
835	}
836
837	out:
838	return uc;
839	}
840
841	/* used for HTTP/2 server push */
842	CURLUcode Curl_url_set_authority(CURLU u, const char authority)
843	{
844	CURLUcode result;
845	struct dynbuf host;
846
847	DEBUGASSERT(authority);
848	Curl_dyn_init(&host, CURL_MAX_INPUT_LENGTH);
849
850	result = parse_authority(u, authority, strlen(authority),
851	CURLU_DISALLOW_USER, &host, !!u->scheme);
852	if(result)
853	Curl_dyn_free(&host);
854	else {
855	free(u->host);
856	u->host = Curl_dyn_ptr(&host);
857	}
858	return result;
859	}
860
861	/*
862	* "Remove Dot Segments"
863	* https://datatracker.ietf.org/doc/html/rfc3986#section-5.2.4
864	*/
865
866	/*
867	* dedotdotify()
868	* @unittest: 1395
869	*
870	* This function gets a null-terminated path with dot and dotdot sequences
871	* passed in and strips them off according to the rules in RFC 3986 section
872	* 5.2.4.
873	*
874	* The function handles a query part ('?' + stuff) appended but it expects
875	* that fragments ('#' + stuff) have already been cut off.
876	*
877	* RETURNS
878	*
879	* Zero for success and 'out' set to an allocated dedotdotified string.
880	*/
881	UNITTEST int dedotdotify(const char input, size_t clen, char *outp);
882	UNITTEST int dedotdotify(const char input, size_t clen, char *outp)
883	{
884	char *outptr;
885	const char *endp = &input[clen];
886	char *out;
887
888	*outp = NULL;
889	/* the path always starts with a slash, and a slash has not dot */
890	if((clen < 2) \|\| !memchr(input, '.', clen))
891	return 0;
892
893	out = malloc(clen + 1);
894	if(!out)
895	return 1; /* out of memory */
896
897	out = 0; / null-terminates, for inputs like "./" */
898	outptr = out;
899
900	do {
901	bool dotdot = TRUE;
902	if(*input == '.') {
903	/* A. If the input buffer begins with a prefix of "../" or "./", then
904	remove that prefix from the input buffer; otherwise, */
905
906	if(!strncmp("./", input, 2)) {
907	input += 2;
908	clen -= 2;
909	}
910	else if(!strncmp("../", input, 3)) {
911	input += 3;
912	clen -= 3;
913	}
914	/* D. if the input buffer consists only of "." or "..", then remove
915	that from the input buffer; otherwise, */
916
917	else if(!strcmp(".", input) \|\| !strcmp("..", input) \|\|
918	!strncmp(".?", input, 2) \|\| !strncmp("..?", input, 3)) {
919	*out = 0;
920	break;
921	}
922	else
923	dotdot = FALSE;
924	}
925	else if(*input == '/') {
926	/* B. if the input buffer begins with a prefix of "/./" or "/.", where
927	"." is a complete path segment, then replace that prefix with "/" in
928	the input buffer; otherwise, */
929	if(!strncmp("/./", input, 3)) {
930	input += 2;
931	clen -= 2;
932	}
933	else if(!strcmp("/.", input) \|\| !strncmp("/.?", input, 3)) {
934	*outptr++ = '/';
935	*outptr = 0;
936	break;
937	}
938
939	/* C. if the input buffer begins with a prefix of "/../" or "/..",
940	where ".." is a complete path segment, then replace that prefix with
941	"/" in the input buffer and remove the last segment and its
942	preceding "/" (if any) from the output buffer; otherwise, */
943
944	else if(!strncmp("/../", input, 4)) {
945	input += 3;
946	clen -= 3;
947	/* remove the last segment from the output buffer */
948	while(outptr > out) {
949	outptr--;
950	if(*outptr == '/')
951	break;
952	}
953	outptr = 0; / null-terminate where it stops */
954	}
955	else if(!strcmp("/..", input) \|\| !strncmp("/..?", input, 4)) {
956	/* remove the last segment from the output buffer */
957	while(outptr > out) {
958	outptr--;
959	if(*outptr == '/')
960	break;
961	}
962	*outptr++ = '/';
963	outptr = 0; / null-terminate where it stops */
964	break;
965	}
966	else
967	dotdot = FALSE;
968	}
969	else
970	dotdot = FALSE;
971
972	if(!dotdot) {
973	/* E. move the first path segment in the input buffer to the end of
974	the output buffer, including the initial "/" character (if any) and
975	any subsequent characters up to, but not including, the next "/"
976	character or the end of the input buffer. */
977
978	do {
979	outptr++ = input++;
980	clen--;
981	} while(input && (input != '/') && (*input != '?'));
982	*outptr = 0;
983	}
984
985	/* continue until end of path */
986	} while(input < endp);
987
988	*outp = out;
989	return 0; /* success */
990	}
991
992	static CURLUcode parseurl(const char url, CURLU u, unsigned int flags)
993	{
994	const char *path;
995	size_t pathlen;
996	char *query = NULL;
997	char *fragment = NULL;
998	char schemebuf[MAX_SCHEME_LEN + 1];
999	size_t schemelen = 0;
1000	size_t urllen;
1001	CURLUcode result = CURLUE_OK;
1002	size_t fraglen = 0;
1003	struct dynbuf host;
1004
1005	DEBUGASSERT(url);
1006
1007	Curl_dyn_init(&host, CURL_MAX_INPUT_LENGTH);
1008
1009	result = junkscan(url, &urllen, flags);
1010	if(result)
1011	goto fail;
1012
1013	schemelen = Curl_is_absolute_url(url, schemebuf, sizeof(schemebuf),
1014	flags & (CURLU_GUESS_SCHEME\|
1015	CURLU_DEFAULT_SCHEME));
1016
1017	/* handle the file: scheme */
1018	if(schemelen && !strcmp(schemebuf, "file")) {
1019	bool uncpath = FALSE;
1020	if(urllen <= 6) {
1021	/* file:/ is not enough to actually be a complete file: URL */
1022	result = CURLUE_BAD_FILE_URL;
1023	goto fail;
1024	}
1025
1026	/* path has been allocated large enough to hold this */
1027	path = (char *)&url[5];
1028	pathlen = urllen - 5;
1029
1030	u->scheme = strdup("file");
1031	if(!u->scheme) {
1032	result = CURLUE_OUT_OF_MEMORY;
1033	goto fail;
1034	}
1035
1036	/* Extra handling URLs with an authority component (i.e. that start with
1037	* "file://")
1038	*
1039	* We allow omitted hostname (e.g. file:/<path>) -- valid according to
1040	* RFC 8089, but not the (current) WHAT-WG URL spec.
1041	*/
1042	if(path[0] == '/' && path[1] == '/') {
1043	/* swallow the two slashes */
1044	const char *ptr = &path[2];
1045
1046	/*
1047	* According to RFC 8089, a file: URL can be reliably dereferenced if:
1048	*
1049	* o it has no/blank hostname, or
1050	*
1051	* o the hostname matches "localhost" (case-insensitively), or
1052	*
1053	* o the hostname is a FQDN that resolves to this machine, or
1054	*
1055	* o it is an UNC String transformed to an URI (Windows only, RFC 8089
1056	* Appendix E.3).
1057	*
1058	* For brevity, we only consider URLs with empty, "localhost", or
1059	* "127.0.0.1" hostnames as local, otherwise as an UNC String.
1060	*
1061	* Additionally, there is an exception for URLs with a Windows drive
1062	* letter in the authority (which was accidentally omitted from RFC 8089
1063	* Appendix E, but believe me, it was meant to be there. --MK)
1064	*/
1065	if(ptr[0] != '/' && !STARTS_WITH_URL_DRIVE_PREFIX(ptr)) {
1066	/* the URL includes a hostname, it must match "localhost" or
1067	"127.0.0.1" to be valid */
1068	if(checkprefix("localhost/", ptr) \|\|
1069	checkprefix("127.0.0.1/", ptr)) {
1070	ptr += 9; /* now points to the slash after the host */
1071	}
1072	else {
1073	#if defined(_WIN32)
1074	size_t len;
1075
1076	/* the hostname, NetBIOS computer name, can not contain disallowed
1077	chars, and the delimiting slash character must be appended to the
1078	hostname */
1079	path = strpbrk(ptr, "/\\:*?\"<>\|");
1080	if(!path \|\| *path != '/') {
1081	result = CURLUE_BAD_FILE_URL;
1082	goto fail;
1083	}
1084
1085	len = path - ptr;
1086	if(len) {
1087	CURLcode code = Curl_dyn_addn(&host, ptr, len);
1088	if(code) {
1089	result = cc2cu(code);
1090	goto fail;
1091	}
1092	uncpath = TRUE;
1093	}
1094
1095	ptr -= 2; /* now points to the // before the host in UNC */
1096	#else
1097	/* Invalid file://hostname/, expected localhost or 127.0.0.1 or
1098	none */
1099	result = CURLUE_BAD_FILE_URL;
1100	goto fail;
1101	#endif
1102	}
1103	}
1104
1105	path = ptr;
1106	pathlen = urllen - (ptr - url);
1107	}
1108
1109	if(!uncpath)
1110	/* no host for file: URLs by default */
1111	Curl_dyn_reset(&host);
1112
1113	#if !defined(_WIN32) && !defined(MSDOS) && !defined(__CYGWIN__)
1114	/* Do not allow Windows drive letters when not in Windows.
1115	* This catches both "file:/c:" and "file:c:" */
1116	if(('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) \|\|
1117	STARTS_WITH_URL_DRIVE_PREFIX(path)) {
1118	/* File drive letters are only accepted in MS-DOS/Windows */
1119	result = CURLUE_BAD_FILE_URL;
1120	goto fail;
1121	}
1122	#else
1123	/* If the path starts with a slash and a drive letter, ditch the slash */
1124	if('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) {
1125	/* This cannot be done with strcpy, as the memory chunks overlap! */
1126	path++;
1127	pathlen--;
1128	}
1129	#endif
1130
1131	}
1132	else {
1133	/* clear path */
1134	const char *schemep = NULL;
1135	const char *hostp;
1136	size_t hostlen;
1137
1138	if(schemelen) {
1139	int i = 0;
1140	const char *p = &url[schemelen + 1];
1141	while((*p == '/') && (i < 4)) {
1142	p++;
1143	i++;
1144	}
1145
1146	schemep = schemebuf;
1147	if(!Curl_get_scheme_handler(schemep) &&
1148	!(flags & CURLU_NON_SUPPORT_SCHEME)) {
1149	result = CURLUE_UNSUPPORTED_SCHEME;
1150	goto fail;
1151	}
1152
1153	if((i < 1) \|\| (i > 3)) {
1154	/* less than one or more than three slashes */
1155	result = CURLUE_BAD_SLASHES;
1156	goto fail;
1157	}
1158	hostp = p; /* hostname starts here */
1159	}
1160	else {
1161	/* no scheme! */
1162
1163	if(!(flags & (CURLU_DEFAULT_SCHEME\|CURLU_GUESS_SCHEME))) {
1164	result = CURLUE_BAD_SCHEME;
1165	goto fail;
1166	}
1167	if(flags & CURLU_DEFAULT_SCHEME)
1168	schemep = DEFAULT_SCHEME;
1169
1170	/*
1171	* The URL was badly formatted, let's try without scheme specified.
1172	*/
1173	hostp = url;
1174	}
1175
1176	if(schemep) {
1177	u->scheme = strdup(schemep);
1178	if(!u->scheme) {
1179	result = CURLUE_OUT_OF_MEMORY;
1180	goto fail;
1181	}
1182	}
1183
1184	/* find the end of the hostname + port number */
1185	hostlen = strcspn(hostp, "/?#");
1186	path = &hostp[hostlen];
1187
1188	/* this pathlen also contains the query and the fragment */
1189	pathlen = urllen - (path - url);
1190	if(hostlen) {
1191
1192	result = parse_authority(u, hostp, hostlen, flags, &host, schemelen);
1193	if(result)
1194	goto fail;
1195
1196	if((flags & CURLU_GUESS_SCHEME) && !schemep) {
1197	const char *hostname = Curl_dyn_ptr(&host);
1198	/* legacy curl-style guess based on hostname */
1199	if(checkprefix("ftp.", hostname))
1200	schemep = "ftp";
1201	else if(checkprefix("dict.", hostname))
1202	schemep = "dict";
1203	else if(checkprefix("ldap.", hostname))
1204	schemep = "ldap";
1205	else if(checkprefix("imap.", hostname))
1206	schemep = "imap";
1207	else if(checkprefix("smtp.", hostname))
1208	schemep = "smtp";
1209	else if(checkprefix("pop3.", hostname))
1210	schemep = "pop3";
1211	else
1212	schemep = "http";
1213
1214	u->scheme = strdup(schemep);
1215	if(!u->scheme) {
1216	result = CURLUE_OUT_OF_MEMORY;
1217	goto fail;
1218	}
1219	u->guessed_scheme = TRUE;
1220	}
1221	}
1222	else if(flags & CURLU_NO_AUTHORITY) {
1223	/* allowed to be empty. */
1224	if(Curl_dyn_add(&host, "")) {
1225	result = CURLUE_OUT_OF_MEMORY;
1226	goto fail;
1227	}
1228	}
1229	else {
1230	result = CURLUE_NO_HOST;
1231	goto fail;
1232	}
1233	}
1234
1235	fragment = strchr(path, '#');
1236	if(fragment) {
1237	fraglen = pathlen - (fragment - path);
1238	u->fragment_present = TRUE;
1239	if(fraglen > 1) {
1240	/* skip the leading '#' in the copy but include the terminating null */
1241	if(flags & CURLU_URLENCODE) {
1242	struct dynbuf enc;
1243	Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1244	result = urlencode_str(&enc, fragment + 1, fraglen - 1, TRUE, FALSE);
1245	if(result)
1246	goto fail;
1247	u->fragment = Curl_dyn_ptr(&enc);
1248	}
1249	else {
1250	u->fragment = Curl_memdup0(fragment + 1, fraglen - 1);
1251	if(!u->fragment) {
1252	result = CURLUE_OUT_OF_MEMORY;
1253	goto fail;
1254	}
1255	}
1256	}
1257	/* after this, pathlen still contains the query */
1258	pathlen -= fraglen;
1259	}
1260
1261	query = memchr(path, '?', pathlen);
1262	if(query) {
1263	size_t qlen = fragment ? (size_t)(fragment - query) :
1264	pathlen - (query - path);
1265	pathlen -= qlen;
1266	u->query_present = TRUE;
1267	if(qlen > 1) {
1268	if(flags & CURLU_URLENCODE) {
1269	struct dynbuf enc;
1270	Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1271	/* skip the leading question mark */
1272	result = urlencode_str(&enc, query + 1, qlen - 1, TRUE, TRUE);
1273	if(result)
1274	goto fail;
1275	u->query = Curl_dyn_ptr(&enc);
1276	}
1277	else {
1278	u->query = Curl_memdup0(query + 1, qlen - 1);
1279	if(!u->query) {
1280	result = CURLUE_OUT_OF_MEMORY;
1281	goto fail;
1282	}
1283	}
1284	}
1285	else {
1286	/* single byte query */
1287	u->query = strdup("");
1288	if(!u->query) {
1289	result = CURLUE_OUT_OF_MEMORY;
1290	goto fail;
1291	}
1292	}
1293	}
1294
1295	if(pathlen && (flags & CURLU_URLENCODE)) {
1296	struct dynbuf enc;
1297	Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1298	result = urlencode_str(&enc, path, pathlen, TRUE, FALSE);
1299	if(result)
1300	goto fail;
1301	pathlen = Curl_dyn_len(&enc);
1302	path = u->path = Curl_dyn_ptr(&enc);
1303	}
1304
1305	if(pathlen <= 1) {
1306	/* there is no path left or just the slash, unset */
1307	path = NULL;
1308	}
1309	else {
1310	if(!u->path) {
1311	u->path = Curl_memdup0(path, pathlen);
1312	if(!u->path) {
1313	result = CURLUE_OUT_OF_MEMORY;
1314	goto fail;
1315	}
1316	path = u->path;
1317	}
1318	else if(flags & CURLU_URLENCODE)
1319	/* it might have encoded more than just the path so cut it */
1320	u->path[pathlen] = 0;
1321
1322	if(!(flags & CURLU_PATH_AS_IS)) {
1323	/* remove ../ and ./ sequences according to RFC3986 */
1324	char *dedot;
1325	int err = dedotdotify((char *)path, pathlen, &dedot);
1326	if(err) {
1327	result = CURLUE_OUT_OF_MEMORY;
1328	goto fail;
1329	}
1330	if(dedot) {
1331	free(u->path);
1332	u->path = dedot;
1333	}
1334	}
1335	}
1336
1337	u->host = Curl_dyn_ptr(&host);
1338
1339	return result;
1340	fail:
1341	Curl_dyn_free(&host);
1342	free_urlhandle(u);
1343	return result;
1344	}
1345
1346	/*
1347	* Parse the URL and, if successful, replace everything in the Curl_URL struct.
1348	*/
1349	static CURLUcode parseurl_and_replace(const char url, CURLU u,
1350	unsigned int flags)
1351	{
1352	CURLUcode result;
1353	CURLU tmpurl;
1354	memset(&tmpurl, 0, sizeof(tmpurl));
1355	result = parseurl(url, &tmpurl, flags);
1356	if(!result) {
1357	free_urlhandle(u);
1358	*u = tmpurl;
1359	}
1360	return result;
1361	}
1362
1363	/*
1364	*/
1365	CURLU *curl_url(void)
1366	{
1367	return calloc(1, sizeof(struct Curl_URL));
1368	}
1369
1370	void curl_url_cleanup(CURLU *u)
1371	{
1372	if(u) {
1373	free_urlhandle(u);
1374	free(u);
1375	}
1376	}
1377
1378	#define DUP(dest, src, name) \
1379	do { \
1380	if(src->name) { \
1381	dest->name = strdup(src->name); \
1382	if(!dest->name) \
1383	goto fail; \
1384	} \
1385	} while(0)
1386
1387	CURLU curl_url_dup(const CURLU in)
1388	{
1389	struct Curl_URL *u = calloc(1, sizeof(struct Curl_URL));
1390	if(u) {
1391	DUP(u, in, scheme);
1392	DUP(u, in, user);
1393	DUP(u, in, password);
1394	DUP(u, in, options);
1395	DUP(u, in, host);
1396	DUP(u, in, port);
1397	DUP(u, in, path);
1398	DUP(u, in, query);
1399	DUP(u, in, fragment);
1400	DUP(u, in, zoneid);
1401	u->portnum = in->portnum;
1402	u->fragment_present = in->fragment_present;
1403	u->query_present = in->query_present;
1404	}
1405	return u;
1406	fail:
1407	curl_url_cleanup(u);
1408	return NULL;
1409	}
1410
1411	CURLUcode curl_url_get(const CURLU *u, CURLUPart what,
1412	char **part, unsigned int flags)
1413	{
1414	const char *ptr;
1415	CURLUcode ifmissing = CURLUE_UNKNOWN_PART;
1416	char portbuf[7];
1417	bool urldecode = (flags & CURLU_URLDECODE) ? 1 : 0;
1418	bool urlencode = (flags & CURLU_URLENCODE) ? 1 : 0;
1419	bool punycode = FALSE;
1420	bool depunyfy = FALSE;
1421	bool plusdecode = FALSE;
1422	(void)flags;
1423	if(!u)
1424	return CURLUE_BAD_HANDLE;
1425	if(!part)
1426	return CURLUE_BAD_PARTPOINTER;
1427	*part = NULL;
1428
1429	switch(what) {
1430	case CURLUPART_SCHEME:
1431	ptr = u->scheme;
1432	ifmissing = CURLUE_NO_SCHEME;
1433	urldecode = FALSE; /* never for schemes */
1434	if((flags & CURLU_NO_GUESS_SCHEME) && u->guessed_scheme)
1435	return CURLUE_NO_SCHEME;
1436	break;
1437	case CURLUPART_USER:
1438	ptr = u->user;
1439	ifmissing = CURLUE_NO_USER;
1440	break;
1441	case CURLUPART_PASSWORD:
1442	ptr = u->password;
1443	ifmissing = CURLUE_NO_PASSWORD;
1444	break;
1445	case CURLUPART_OPTIONS:
1446	ptr = u->options;
1447	ifmissing = CURLUE_NO_OPTIONS;
1448	break;
1449	case CURLUPART_HOST:
1450	ptr = u->host;
1451	ifmissing = CURLUE_NO_HOST;
1452	punycode = (flags & CURLU_PUNYCODE) ? 1 : 0;
1453	depunyfy = (flags & CURLU_PUNY2IDN) ? 1 : 0;
1454	break;
1455	case CURLUPART_ZONEID:
1456	ptr = u->zoneid;
1457	ifmissing = CURLUE_NO_ZONEID;
1458	break;
1459	case CURLUPART_PORT:
1460	ptr = u->port;
1461	ifmissing = CURLUE_NO_PORT;
1462	urldecode = FALSE; /* never for port */
1463	if(!ptr && (flags & CURLU_DEFAULT_PORT) && u->scheme) {
1464	/* there is no stored port number, but asked to deliver
1465	a default one for the scheme */
1466	const struct Curl_handler *h = Curl_get_scheme_handler(u->scheme);
1467	if(h) {
1468	msnprintf(portbuf, sizeof(portbuf), "%u", h->defport);
1469	ptr = portbuf;
1470	}
1471	}
1472	else if(ptr && u->scheme) {
1473	/* there is a stored port number, but ask to inhibit if
1474	it matches the default one for the scheme */
1475	const struct Curl_handler *h = Curl_get_scheme_handler(u->scheme);
1476	if(h && (h->defport == u->portnum) &&
1477	(flags & CURLU_NO_DEFAULT_PORT))
1478	ptr = NULL;
1479	}
1480	break;
1481	case CURLUPART_PATH:
1482	ptr = u->path;
1483	if(!ptr)
1484	ptr = "/";
1485	break;
1486	case CURLUPART_QUERY:
1487	ptr = u->query;
1488	ifmissing = CURLUE_NO_QUERY;
1489	plusdecode = urldecode;
1490	if(ptr && !ptr[0] && !(flags & CURLU_GET_EMPTY))
1491	/* there was a blank query and the user do not ask for it */
1492	ptr = NULL;
1493	break;
1494	case CURLUPART_FRAGMENT:
1495	ptr = u->fragment;
1496	ifmissing = CURLUE_NO_FRAGMENT;
1497	if(!ptr && u->fragment_present && flags & CURLU_GET_EMPTY)
1498	/* there was a blank fragment and the user asks for it */
1499	ptr = "";
1500	break;
1501	case CURLUPART_URL: {
1502	char *url;
1503	char *scheme;
1504	char *options = u->options;
1505	char *port = u->port;
1506	char *allochost = NULL;
1507	bool show_fragment =
1508	u->fragment \|\| (u->fragment_present && flags & CURLU_GET_EMPTY);
1509	bool show_query =
1510	(u->query && u->query[0]) \|\|
1511	(u->query_present && flags & CURLU_GET_EMPTY);
1512	punycode = (flags & CURLU_PUNYCODE) ? 1 : 0;
1513	depunyfy = (flags & CURLU_PUNY2IDN) ? 1 : 0;
1514	if(u->scheme && strcasecompare("file", u->scheme)) {
1515	url = aprintf("file://%s%s%s",
1516	u->path,
1517	show_fragment ? "#": "",
1518	u->fragment ? u->fragment : "");
1519	}
1520	else if(!u->host)
1521	return CURLUE_NO_HOST;
1522	else {
1523	const struct Curl_handler *h = NULL;
1524	char schemebuf[MAX_SCHEME_LEN + 5];
1525	if(u->scheme)
1526	scheme = u->scheme;
1527	else if(flags & CURLU_DEFAULT_SCHEME)
1528	scheme = (char *) DEFAULT_SCHEME;
1529	else
1530	return CURLUE_NO_SCHEME;
1531
1532	h = Curl_get_scheme_handler(scheme);
1533	if(!port && (flags & CURLU_DEFAULT_PORT)) {
1534	/* there is no stored port number, but asked to deliver
1535	a default one for the scheme */
1536	if(h) {
1537	msnprintf(portbuf, sizeof(portbuf), "%u", h->defport);
1538	port = portbuf;
1539	}
1540	}
1541	else if(port) {
1542	/* there is a stored port number, but asked to inhibit if it matches
1543	the default one for the scheme */
1544	if(h && (h->defport == u->portnum) &&
1545	(flags & CURLU_NO_DEFAULT_PORT))
1546	port = NULL;
1547	}
1548
1549	if(h && !(h->flags & PROTOPT_URLOPTIONS))
1550	options = NULL;
1551
1552	if(u->host[0] == '[') {
1553	if(u->zoneid) {
1554	/* make it '[ host %25 zoneid ]' */
1555	struct dynbuf enc;
1556	size_t hostlen = strlen(u->host);
1557	Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1558	if(Curl_dyn_addf(&enc, "%.*s%%25%s]", (int)hostlen - 1, u->host,
1559	u->zoneid))
1560	return CURLUE_OUT_OF_MEMORY;
1561	allochost = Curl_dyn_ptr(&enc);
1562	}
1563	}
1564	else if(urlencode) {
1565	allochost = curl_easy_escape(NULL, u->host, 0);
1566	if(!allochost)
1567	return CURLUE_OUT_OF_MEMORY;
1568	}
1569	else if(punycode) {
1570	if(!Curl_is_ASCII_name(u->host)) {
1571	#ifndef USE_IDN
1572	return CURLUE_LACKS_IDN;
1573	#else
1574	CURLcode result = Curl_idn_decode(u->host, &allochost);
1575	if(result)
1576	return (result == CURLE_OUT_OF_MEMORY) ?
1577	CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME;
1578	#endif
1579	}
1580	}
1581	else if(depunyfy) {
1582	if(Curl_is_ASCII_name(u->host) && !strncmp("xn--", u->host, 4)) {
1583	#ifndef USE_IDN
1584	return CURLUE_LACKS_IDN;
1585	#else
1586	CURLcode result = Curl_idn_encode(u->host, &allochost);
1587	if(result)
1588	/* this is the most likely error */
1589	return (result == CURLE_OUT_OF_MEMORY) ?
1590	CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME;
1591	#endif
1592	}
1593	}
1594
1595	if(!(flags & CURLU_NO_GUESS_SCHEME) \|\| !u->guessed_scheme)
1596	msnprintf(schemebuf, sizeof(schemebuf), "%s://", scheme);
1597	else
1598	schemebuf[0] = 0;
1599
1600	url = aprintf("%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
1601	schemebuf,
1602	u->user ? u->user : "",
1603	u->password ? ":": "",
1604	u->password ? u->password : "",
1605	options ? ";" : "",
1606	options ? options : "",
1607	(u->user \|\| u->password \|\| options) ? "@": "",
1608	allochost ? allochost : u->host,
1609	port ? ":": "",
1610	port ? port : "",
1611	u->path ? u->path : "/",
1612	show_query ? "?": "",
1613	u->query ? u->query : "",
1614	show_fragment ? "#": "",
1615	u->fragment ? u->fragment : "");
1616	free(allochost);
1617	}
1618	if(!url)
1619	return CURLUE_OUT_OF_MEMORY;
1620	*part = url;
1621	return CURLUE_OK;
1622	}
1623	default:
1624	ptr = NULL;
1625	break;
1626	}
1627	if(ptr) {
1628	size_t partlen = strlen(ptr);
1629	size_t i = 0;
1630	*part = Curl_memdup0(ptr, partlen);
1631	if(!*part)
1632	return CURLUE_OUT_OF_MEMORY;
1633	if(plusdecode) {
1634	/* convert + to space */
1635	char plus = part;
1636	for(i = 0; i < partlen; ++plus, i++) {
1637	if(*plus == '+')
1638	*plus = ' ';
1639	}
1640	}
1641	if(urldecode) {
1642	char *decoded;
1643	size_t dlen;
1644	/* this unconditional rejection of control bytes is documented
1645	API behavior */
1646	CURLcode res = Curl_urldecode(*part, 0, &decoded, &dlen, REJECT_CTRL);
1647	free(*part);
1648	if(res) {
1649	*part = NULL;
1650	return CURLUE_URLDECODE;
1651	}
1652	*part = decoded;
1653	partlen = dlen;
1654	}
1655	if(urlencode) {
1656	struct dynbuf enc;
1657	CURLUcode uc;
1658	Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1659	uc = urlencode_str(&enc, *part, partlen, TRUE, what == CURLUPART_QUERY);
1660	if(uc)
1661	return uc;
1662	free(*part);
1663	*part = Curl_dyn_ptr(&enc);
1664	}
1665	else if(punycode) {
1666	if(!Curl_is_ASCII_name(u->host)) {
1667	#ifndef USE_IDN
1668	return CURLUE_LACKS_IDN;
1669	#else
1670	char *allochost;
1671	CURLcode result = Curl_idn_decode(*part, &allochost);
1672	if(result)
1673	return (result == CURLE_OUT_OF_MEMORY) ?
1674	CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME;
1675	free(*part);
1676	*part = allochost;
1677	#endif
1678	}
1679	}
1680	else if(depunyfy) {
1681	if(Curl_is_ASCII_name(u->host) && !strncmp("xn--", u->host, 4)) {
1682	#ifndef USE_IDN
1683	return CURLUE_LACKS_IDN;
1684	#else
1685	char *allochost;
1686	CURLcode result = Curl_idn_encode(*part, &allochost);
1687	if(result)
1688	return (result == CURLE_OUT_OF_MEMORY) ?
1689	CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME;
1690	free(*part);
1691	*part = allochost;
1692	#endif
1693	}
1694	}
1695
1696	return CURLUE_OK;
1697	}
1698	else
1699	return ifmissing;
1700	}
1701
1702	CURLUcode curl_url_set(CURLU *u, CURLUPart what,
1703	const char *part, unsigned int flags)
1704	{
1705	char **storep = NULL;
1706	bool urlencode = (flags & CURLU_URLENCODE) ? 1 : 0;
1707	bool plusencode = FALSE;
1708	bool urlskipslash = FALSE;
1709	bool leadingslash = FALSE;
1710	bool appendquery = FALSE;
1711	bool equalsencode = FALSE;
1712	size_t nalloc;
1713
1714	if(!u)
1715	return CURLUE_BAD_HANDLE;
1716	if(!part) {
1717	/* setting a part to NULL clears it */
1718	switch(what) {
1719	case CURLUPART_URL:
1720	break;
1721	case CURLUPART_SCHEME:
1722	storep = &u->scheme;
1723	u->guessed_scheme = FALSE;
1724	break;
1725	case CURLUPART_USER:
1726	storep = &u->user;
1727	break;
1728	case CURLUPART_PASSWORD:
1729	storep = &u->password;
1730	break;
1731	case CURLUPART_OPTIONS:
1732	storep = &u->options;
1733	break;
1734	case CURLUPART_HOST:
1735	storep = &u->host;
1736	break;
1737	case CURLUPART_ZONEID:
1738	storep = &u->zoneid;
1739	break;
1740	case CURLUPART_PORT:
1741	u->portnum = 0;
1742	storep = &u->port;
1743	break;
1744	case CURLUPART_PATH:
1745	storep = &u->path;
1746	break;
1747	case CURLUPART_QUERY:
1748	storep = &u->query;
1749	u->query_present = FALSE;
1750	break;
1751	case CURLUPART_FRAGMENT:
1752	storep = &u->fragment;
1753	u->fragment_present = FALSE;
1754	break;
1755	default:
1756	return CURLUE_UNKNOWN_PART;
1757	}
1758	if(storep && *storep) {
1759	Curl_safefree(*storep);
1760	}
1761	else if(!storep) {
1762	free_urlhandle(u);
1763	memset(u, 0, sizeof(struct Curl_URL));
1764	}
1765	return CURLUE_OK;
1766	}
1767
1768	nalloc = strlen(part);
1769	if(nalloc > CURL_MAX_INPUT_LENGTH)
1770	/* excessive input length */
1771	return CURLUE_MALFORMED_INPUT;
1772
1773	switch(what) {
1774	case CURLUPART_SCHEME: {
1775	size_t plen = strlen(part);
1776	const char *s = part;
1777	if((plen > MAX_SCHEME_LEN) \|\| (plen < 1))
1778	/* too long or too short */
1779	return CURLUE_BAD_SCHEME;
1780	/* verify that it is a fine scheme */
1781	if(!(flags & CURLU_NON_SUPPORT_SCHEME) && !Curl_get_scheme_handler(part))
1782	return CURLUE_UNSUPPORTED_SCHEME;
1783	storep = &u->scheme;
1784	urlencode = FALSE; /* never */
1785	if(ISALPHA(*s)) {
1786	/* ALPHA ( ALPHA / DIGIT / "+" / "-" / "." ) /
1787	while(--plen) {
1788	if(ISALNUM(s) \|\| (s == '+') \|\| (s == '-') \|\| (s == '.'))
1789	s++; /* fine */
1790	else
1791	return CURLUE_BAD_SCHEME;
1792	}
1793	}
1794	else
1795	return CURLUE_BAD_SCHEME;
1796	u->guessed_scheme = FALSE;
1797	break;
1798	}
1799	case CURLUPART_USER:
1800	storep = &u->user;
1801	break;
1802	case CURLUPART_PASSWORD:
1803	storep = &u->password;
1804	break;
1805	case CURLUPART_OPTIONS:
1806	storep = &u->options;
1807	break;
1808	case CURLUPART_HOST:
1809	storep = &u->host;
1810	Curl_safefree(u->zoneid);
1811	break;
1812	case CURLUPART_ZONEID:
1813	storep = &u->zoneid;
1814	break;
1815	case CURLUPART_PORT:
1816	if(!ISDIGIT(part[0]))
1817	/* not a number */
1818	return CURLUE_BAD_PORT_NUMBER;
1819	else {
1820	char *tmp;
1821	char *endp;
1822	unsigned long port;
1823	errno = 0;
1824	port = strtoul(part, &endp, 10); /* must be decimal */
1825	if(errno \|\| (port > 0xffff) \|\| *endp)
1826	/* weirdly provided number, not good! */
1827	return CURLUE_BAD_PORT_NUMBER;
1828	tmp = strdup(part);
1829	if(!tmp)
1830	return CURLUE_OUT_OF_MEMORY;
1831	free(u->port);
1832	u->port = tmp;
1833	u->portnum = (unsigned short)port;
1834	return CURLUE_OK;
1835	}
1836	case CURLUPART_PATH:
1837	urlskipslash = TRUE;
1838	leadingslash = TRUE; /* enforce */
1839	storep = &u->path;
1840	break;
1841	case CURLUPART_QUERY:
1842	plusencode = urlencode;
1843	appendquery = (flags & CURLU_APPENDQUERY) ? 1 : 0;
1844	equalsencode = appendquery;
1845	storep = &u->query;
1846	u->query_present = TRUE;
1847	break;
1848	case CURLUPART_FRAGMENT:
1849	storep = &u->fragment;
1850	u->fragment_present = TRUE;
1851	break;
1852	case CURLUPART_URL: {
1853	/*
1854	* Allow a new URL to replace the existing (if any) contents.
1855	*
1856	* If the existing contents is enough for a URL, allow a relative URL to
1857	* replace it.
1858	*/
1859	CURLcode result;
1860	CURLUcode uc;
1861	char *oldurl;
1862	char *redired_url;
1863
1864	if(!nalloc)
1865	/* a blank URL is not a valid URL */
1866	return CURLUE_MALFORMED_INPUT;
1867
1868	/* if the new thing is absolute or the old one is not
1869	* (we could not get an absolute URL in 'oldurl'),
1870	* then replace the existing with the new. */
1871	if(Curl_is_absolute_url(part, NULL, 0,
1872	flags & (CURLU_GUESS_SCHEME\|
1873	CURLU_DEFAULT_SCHEME))
1874	\|\| curl_url_get(u, CURLUPART_URL, &oldurl, flags)) {
1875	return parseurl_and_replace(part, u, flags);
1876	}
1877
1878	/* apply the relative part to create a new URL
1879	* and replace the existing one with it. */
1880	result = concat_url(oldurl, part, &redired_url);
1881	free(oldurl);
1882	if(result)
1883	return cc2cu(result);
1884
1885	uc = parseurl_and_replace(redired_url, u, flags);
1886	free(redired_url);
1887	return uc;
1888	}
1889	default:
1890	return CURLUE_UNKNOWN_PART;
1891	}
1892	DEBUGASSERT(storep);
1893	{
1894	const char *newp;
1895	struct dynbuf enc;
1896	Curl_dyn_init(&enc, nalloc * 3 + 1 + leadingslash);
1897
1898	if(leadingslash && (part[0] != '/')) {
1899	CURLcode result = Curl_dyn_addn(&enc, "/", 1);
1900	if(result)
1901	return cc2cu(result);
1902	}
1903	if(urlencode) {
1904	const unsigned char *i;
1905
1906	for(i = (const unsigned char )part; i; i++) {
1907	CURLcode result;
1908	if((*i == ' ') && plusencode) {
1909	result = Curl_dyn_addn(&enc, "+", 1);
1910	if(result)
1911	return CURLUE_OUT_OF_MEMORY;
1912	}
1913	else if(ISUNRESERVED(*i) \|\|
1914	((*i == '/') && urlskipslash) \|\|
1915	((*i == '=') && equalsencode)) {
1916	if((*i == '=') && equalsencode)
1917	/* only skip the first equals sign */
1918	equalsencode = FALSE;
1919	result = Curl_dyn_addn(&enc, i, 1);
1920	if(result)
1921	return cc2cu(result);
1922	}
1923	else {
1924	char out[3]={'%'};
1925	out[1] = hexdigits[*i >> 4];
1926	out[2] = hexdigits[*i & 0xf];
1927	result = Curl_dyn_addn(&enc, out, 3);
1928	if(result)
1929	return cc2cu(result);
1930	}
1931	}
1932	}
1933	else {
1934	char *p;
1935	CURLcode result = Curl_dyn_add(&enc, part);
1936	if(result)
1937	return cc2cu(result);
1938	p = Curl_dyn_ptr(&enc);
1939	while(*p) {
1940	/* make sure percent encoded are lower case */
1941	if((*p == '%') && ISXDIGIT(p[1]) && ISXDIGIT(p[2]) &&
1942	(ISUPPER(p[1]) \|\| ISUPPER(p[2]))) {
1943	p[1] = Curl_raw_tolower(p[1]);
1944	p[2] = Curl_raw_tolower(p[2]);
1945	p += 3;
1946	}
1947	else
1948	p++;
1949	}
1950	}
1951	newp = Curl_dyn_ptr(&enc);
1952
1953	if(appendquery && newp) {
1954	/* Append the 'newp' string onto the old query. Add a '&' separator if
1955	none is present at the end of the existing query already */
1956
1957	size_t querylen = u->query ? strlen(u->query) : 0;
1958	bool addamperand = querylen && (u->query[querylen -1] != '&');
1959	if(querylen) {
1960	struct dynbuf qbuf;
1961	Curl_dyn_init(&qbuf, CURL_MAX_INPUT_LENGTH);
1962
1963	if(Curl_dyn_addn(&qbuf, u->query, querylen)) /* add original query */
1964	goto nomem;
1965
1966	if(addamperand) {
1967	if(Curl_dyn_addn(&qbuf, "&", 1))
1968	goto nomem;
1969	}
1970	if(Curl_dyn_add(&qbuf, newp))
1971	goto nomem;
1972	Curl_dyn_free(&enc);
1973	free(*storep);
1974	*storep = Curl_dyn_ptr(&qbuf);
1975	return CURLUE_OK;
1976	nomem:
1977	Curl_dyn_free(&enc);
1978	return CURLUE_OUT_OF_MEMORY;
1979	}
1980	}
1981
1982	else if(what == CURLUPART_HOST) {
1983	size_t n = Curl_dyn_len(&enc);
1984	if(!n && (flags & CURLU_NO_AUTHORITY)) {
1985	/* Skip hostname check, it is allowed to be empty. */
1986	}
1987	else {
1988	bool bad = FALSE;
1989	if(!n)
1990	bad = TRUE; /* empty hostname is not okay */
1991	else if(!urlencode) {
1992	/* if the host name part was not URL encoded here, it was set ready
1993	URL encoded so we need to decode it to check */
1994	size_t dlen;
1995	char *decoded = NULL;
1996	CURLcode result =
1997	Curl_urldecode(newp, n, &decoded, &dlen, REJECT_CTRL);
1998	if(result \|\| hostname_check(u, decoded, dlen))
1999	bad = TRUE;
2000	free(decoded);
2001	}
2002	else if(hostname_check(u, (char *)newp, n))
2003	bad = TRUE;
2004	if(bad) {
2005	Curl_dyn_free(&enc);
2006	return CURLUE_BAD_HOSTNAME;
2007	}
2008	}
2009	}
2010
2011	free(*storep);
2012	storep = (char )newp;
2013	}
2014	return CURLUE_OK;
2015	}

注意: 瀏覽 TracBrowser 來幫助您使用儲存庫瀏覽器

source: vbox/trunk/src/libs/curl-8.11.1/lib/urlapi.c@ 108333

以其他格式下載: