urlapi.c@ 98341

最後變更在這個檔案從98341是 95312,由 vboxsync 提交於 3 年前
libs/{curl,libxml2}: OSE export fixes, bugref:8515
屬性 svn:eol-style 設為 `native`
檔案大小: 44.3 KB

行
1	/***************************************************************************
2	* _ _ ____ _
3	* Project ___\| \| \| \| _ \\| \|
4	* / __\| \| \| \| \|_) \| \|
5	* \| (__\| \|_\| \| _ <\| \|___
6	* \___\|\___/\|_\| \_\_____\|
7	*
8	* Copyright (C) 1998 - 2022, Daniel Stenberg, <[email protected]>, et al.
9	*
10	* This software is licensed as described in the file COPYING, which
11	* you should have received as part of this distribution. The terms
12	* are also available at https://curl.se/docs/copyright.html.
13	*
14	* You may opt to use, copy, modify, merge, publish, distribute and/or sell
15	* copies of the Software, and permit persons to whom the Software is
16	* furnished to do so, under the terms of the COPYING file.
17	*
18	* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
19	* KIND, either express or implied.
20	*
21	***************************************************************************/
22
23	#include "curl_setup.h"
24
25	#include "urldata.h"
26	#include "urlapi-int.h"
27	#include "strcase.h"
28	#include "dotdot.h"
29	#include "url.h"
30	#include "escape.h"
31	#include "curl_ctype.h"
32	#include "inet_pton.h"
33	#include "inet_ntop.h"
34
35	/* The last 3 #include files should be in this order */
36	#include "curl_printf.h"
37	#include "curl_memory.h"
38	#include "memdebug.h"
39
40	/* MSDOS/Windows style drive prefix, eg c: in c:foo */
41	#define STARTS_WITH_DRIVE_PREFIX(str) \
42	((('a' <= str[0] && str[0] <= 'z') \|\| \
43	('A' <= str[0] && str[0] <= 'Z')) && \
44	(str[1] == ':'))
45
46	/* MSDOS/Windows style drive prefix, optionally with
47	* a '\|' instead of ':', followed by a slash or NUL */
48	#define STARTS_WITH_URL_DRIVE_PREFIX(str) \
49	((('a' <= (str)[0] && (str)[0] <= 'z') \|\| \
50	('A' <= (str)[0] && (str)[0] <= 'Z')) && \
51	((str)[1] == ':' \|\| (str)[1] == '\|') && \
52	((str)[2] == '/' \|\| (str)[2] == '\\' \|\| (str)[2] == 0))
53
54	/* scheme is not URL encoded, the longest libcurl supported ones are... */
55	#define MAX_SCHEME_LEN 40
56
57	/* Internal representation of CURLU. Point to URL-encoded strings. */
58	struct Curl_URL {
59	char *scheme;
60	char *user;
61	char *password;
62	char options; / IMAP only? */
63	char *host;
64	char zoneid; / for numerical IPv6 addresses */
65	char *port;
66	char *path;
67	char *query;
68	char *fragment;
69
70	char scratch; / temporary scratch area */
71	char temppath; / temporary path pointer */
72	long portnum; /* the numerical version */
73	};
74
75	#define DEFAULT_SCHEME "https"
76
77	static void free_urlhandle(struct Curl_URL *u)
78	{
79	free(u->scheme);
80	free(u->user);
81	free(u->password);
82	free(u->options);
83	free(u->host);
84	free(u->zoneid);
85	free(u->port);
86	free(u->path);
87	free(u->query);
88	free(u->fragment);
89	free(u->scratch);
90	free(u->temppath);
91	}
92
93	/*
94	* Find the separator at the end of the host name, or the '?' in cases like
95	* http://www.url.com?id=2380
96	*/
97	static const char find_host_sep(const char url)
98	{
99	const char *sep;
100	const char *query;
101
102	/* Find the start of the hostname */
103	sep = strstr(url, "//");
104	if(!sep)
105	sep = url;
106	else
107	sep += 2;
108
109	query = strchr(sep, '?');
110	sep = strchr(sep, '/');
111
112	if(!sep)
113	sep = url + strlen(url);
114
115	if(!query)
116	query = url + strlen(url);
117
118	return sep < query ? sep : query;
119	}
120
121	/*
122	* Decide in an encoding-independent manner whether a character in an
123	* URL must be escaped. The same criterion must be used in strlen_url()
124	* and strcpy_url().
125	*/
126	static bool urlchar_needs_escaping(int c)
127	{
128	return !(ISCNTRL(c) \|\| ISSPACE(c) \|\| ISGRAPH(c));
129	}
130
131	/*
132	* strlen_url() returns the length of the given URL if the spaces within the
133	* URL were properly URL encoded.
134	* URL encoding should be skipped for host names, otherwise IDN resolution
135	* will fail.
136	*/
137	static size_t strlen_url(const char *url, bool relative)
138	{
139	const unsigned char *ptr;
140	size_t newlen = 0;
141	bool left = TRUE; /* left side of the ? */
142	const unsigned char host_sep = (const unsigned char ) url;
143
144	if(!relative)
145	host_sep = (const unsigned char *) find_host_sep(url);
146
147	for(ptr = (unsigned char )url; ptr; ptr++) {
148
149	if(ptr < host_sep) {
150	++newlen;
151	continue;
152	}
153
154	if(*ptr == ' ') {
155	if(left)
156	newlen += 3;
157	else
158	newlen++;
159	continue;
160	}
161
162	if (*ptr == '?')
163	left = FALSE;
164
165	if(urlchar_needs_escaping(*ptr))
166	newlen += 2;
167
168	newlen++;
169	}
170
171	return newlen;
172	}
173
174	/* strcpy_url() copies a url to a output buffer and URL-encodes the spaces in
175	* the source URL accordingly.
176	* URL encoding should be skipped for host names, otherwise IDN resolution
177	* will fail.
178	*/
179	static void strcpy_url(char output, const char url, bool relative)
180	{
181	/* we must add this with whitespace-replacing */
182	bool left = TRUE;
183	const unsigned char *iptr;
184	char *optr = output;
185	const unsigned char host_sep = (const unsigned char ) url;
186
187	if(!relative)
188	host_sep = (const unsigned char *) find_host_sep(url);
189
190	for(iptr = (unsigned char )url; / read from here */
191	iptr; / until zero byte */
192	iptr++) {
193
194	if(iptr < host_sep) {
195	optr++ = iptr;
196	continue;
197	}
198
199	if(*iptr == ' ') {
200	if(left) {
201	optr++='%'; / add a '%' */
202	optr++='2'; / add a '2' */
203	optr++='0'; / add a '0' */
204	}
205	else
206	optr++='+'; / add a '+' here */
207	continue;
208	}
209
210	if(*iptr == '?')
211	left = FALSE;
212
213	if(urlchar_needs_escaping(*iptr)) {
214	msnprintf(optr, 4, "%%%02x", *iptr);
215	optr += 3;
216	}
217	else
218	optr++ = iptr;
219	}
220	optr = 0; / null-terminate output buffer */
221
222	}
223
224	/*
225	* Returns true if the given URL is absolute (as opposed to relative). Returns
226	* the scheme in the buffer if TRUE and 'buf' is non-NULL. The buflen must
227	* be larger than MAX_SCHEME_LEN if buf is set.
228	*/
229	bool Curl_is_absolute_url(const char url, char buf, size_t buflen)
230	{
231	int i;
232	DEBUGASSERT(!buf \|\| (buflen > MAX_SCHEME_LEN));
233	(void)buflen; /* only used in debug-builds */
234	if(buf)
235	buf[0] = 0; /* always leave a defined value in buf */
236	#ifdef WIN32
237	if(STARTS_WITH_DRIVE_PREFIX(url))
238	return FALSE;
239	#endif
240	for(i = 0; i < MAX_SCHEME_LEN; ++i) {
241	char s = url[i];
242	if(s && (ISALNUM(s) \|\| (s == '+') \|\| (s == '-') \|\| (s == '.') )) {
243	/* RFC 3986 3.1 explains:
244	scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
245	*/
246	}
247	else {
248	break;
249	}
250	}
251	if(i && (url[i] == ':') && (url[i + 1] == '/')) {
252	if(buf) {
253	buf[i] = 0;
254	while(i--) {
255	buf[i] = (char)TOLOWER(url[i]);
256	}
257	}
258	return TRUE;
259	}
260	return FALSE;
261	}
262
263	/*
264	* Concatenate a relative URL to a base URL making it absolute.
265	* URL-encodes any spaces.
266	* The returned pointer must be freed by the caller unless NULL
267	* (returns NULL on out of memory).
268	*/
269	static char concat_url(const char base, const char *relurl)
270	{
271	/***
272	TRY to append this new path to the old URL
273	to the right of the host part. Oh crap, this is doomed to cause
274	problems in the future...
275	*/
276	char *newest;
277	char *protsep;
278	char *pathsep;
279	size_t newlen;
280	bool host_changed = FALSE;
281
282	const char *useurl = relurl;
283	size_t urllen;
284
285	/* we must make our own copy of the URL to play with, as it may
286	point to read-only data */
287	char *url_clone = strdup(base);
288
289	if(!url_clone)
290	return NULL; /* skip out of this NOW */
291
292	/* protsep points to the start of the host name */
293	protsep = strstr(url_clone, "//");
294	if(!protsep)
295	protsep = url_clone;
296	else
297	protsep += 2; /* pass the slashes */
298
299	if('/' != relurl[0]) {
300	int level = 0;
301
302	/* First we need to find out if there's a ?-letter in the URL,
303	and cut it and the right-side of that off */
304	pathsep = strchr(protsep, '?');
305	if(pathsep)
306	*pathsep = 0;
307
308	/* we have a relative path to append to the last slash if there's one
309	available, or if the new URL is just a query string (starts with a
310	'?') we append the new one at the end of the entire currently worked
311	out URL */
312	if(useurl[0] != '?') {
313	pathsep = strrchr(protsep, '/');
314	if(pathsep)
315	*pathsep = 0;
316	}
317
318	/* Check if there's any slash after the host name, and if so, remember
319	that position instead */
320	pathsep = strchr(protsep, '/');
321	if(pathsep)
322	protsep = pathsep + 1;
323	else
324	protsep = NULL;
325
326	/* now deal with one "./" or any amount of "../" in the newurl
327	and act accordingly */
328
329	if((useurl[0] == '.') && (useurl[1] == '/'))
330	useurl += 2; /* just skip the "./" */
331
332	while((useurl[0] == '.') &&
333	(useurl[1] == '.') &&
334	(useurl[2] == '/')) {
335	level++;
336	useurl += 3; /* pass the "../" */
337	}
338
339	if(protsep) {
340	while(level--) {
341	/* cut off one more level from the right of the original URL */
342	pathsep = strrchr(protsep, '/');
343	if(pathsep)
344	*pathsep = 0;
345	else {
346	*protsep = 0;
347	break;
348	}
349	}
350	}
351	}
352	else {
353	/* We got a new absolute path for this server */
354
355	if(relurl[1] == '/') {
356	/* the new URL starts with //, just keep the protocol part from the
357	original one */
358	*protsep = 0;
359	useurl = &relurl[2]; /* we keep the slashes from the original, so we
360	skip the new ones */
361	host_changed = TRUE;
362	}
363	else {
364	/* cut off the original URL from the first slash, or deal with URLs
365	without slash */
366	pathsep = strchr(protsep, '/');
367	if(pathsep) {
368	/* When people use badly formatted URLs, such as
369	"http://www.url.com?dir=/home/daniel" we must not use the first
370	slash, if there's a ?-letter before it! */
371	char *sep = strchr(protsep, '?');
372	if(sep && (sep < pathsep))
373	pathsep = sep;
374	*pathsep = 0;
375	}
376	else {
377	/* There was no slash. Now, since we might be operating on a badly
378	formatted URL, such as "http://www.url.com?id=2380" which doesn't
379	use a slash separator as it is supposed to, we need to check for a
380	?-letter as well! */
381	pathsep = strchr(protsep, '?');
382	if(pathsep)
383	*pathsep = 0;
384	}
385	}
386	}
387
388	/* If the new part contains a space, this is a mighty stupid redirect
389	but we still make an effort to do "right". To the left of a '?'
390	letter we replace each space with %20 while it is replaced with '+'
391	on the right side of the '?' letter.
392	*/
393	newlen = strlen_url(useurl, !host_changed);
394
395	urllen = strlen(url_clone);
396
397	newest = malloc(urllen + 1 + /* possible slash */
398	newlen + 1 /* zero byte */);
399
400	if(!newest) {
401	free(url_clone); /* don't leak this */
402	return NULL;
403	}
404
405	/* copy over the root url part */
406	memcpy(newest, url_clone, urllen);
407
408	/* check if we need to append a slash */
409	if(('/' == useurl[0]) \|\| (protsep && !*protsep) \|\| ('?' == useurl[0]))
410	;
411	else
412	newest[urllen++]='/';
413
414	/* then append the new piece on the right side */
415	strcpy_url(&newest[urllen], useurl, !host_changed);
416
417	free(url_clone);
418
419	return newest;
420	}
421
422	/* scan for byte values < 31 or 127 */
423	static bool junkscan(const char *part, unsigned int flags)
424	{
425	if(part) {
426	static const char badbytes[]={
427	/* */ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
428	0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
429	0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
430	0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
431	0x7f, 0x00 /* null-terminate */
432	};
433	size_t n = strlen(part);
434	size_t nfine = strcspn(part, badbytes);
435	if(nfine != n)
436	/* since we don't know which part is scanned, return a generic error
437	code */
438	return TRUE;
439	if(!(flags & CURLU_ALLOW_SPACE) && strchr(part, ' '))
440	return TRUE;
441	}
442	return FALSE;
443	}
444
445	/*
446	* parse_hostname_login()
447	*
448	* Parse the login details (user name, password and options) from the URL and
449	* strip them out of the host name
450	*
451	*/
452	static CURLUcode parse_hostname_login(struct Curl_URL *u,
453	char **hostname,
454	unsigned int flags)
455	{
456	CURLUcode result = CURLUE_OK;
457	CURLcode ccode;
458	char *userp = NULL;
459	char *passwdp = NULL;
460	char *optionsp = NULL;
461	const struct Curl_handler *h = NULL;
462
463	/* At this point, we're hoping all the other special cases have
464	* been taken care of, so conn->host.name is at most
465	* [user[:password][;options]]@]hostname
466	*
467	* We need somewhere to put the embedded details, so do that first.
468	*/
469
470	char ptr = strchr(hostname, '@');
471	char login = hostname;
472
473	if(!ptr)
474	goto out;
475
476	/* We will now try to extract the
477	* possible login information in a string like:
478	* ftp://user:[email protected]:8021/README */
479	*hostname = ++ptr;
480
481	/* if this is a known scheme, get some details */
482	if(u->scheme)
483	h = Curl_builtin_scheme(u->scheme);
484
485	/* We could use the login information in the URL so extract it. Only parse
486	options if the handler says we should. Note that 'h' might be NULL! */
487	ccode = Curl_parse_login_details(login, ptr - login - 1,
488	&userp, &passwdp,
489	(h && (h->flags & PROTOPT_URLOPTIONS)) ?
490	&optionsp:NULL);
491	if(ccode) {
492	result = CURLUE_BAD_LOGIN;
493	goto out;
494	}
495
496	if(userp) {
497	if(flags & CURLU_DISALLOW_USER) {
498	/* Option DISALLOW_USER is set and url contains username. */
499	result = CURLUE_USER_NOT_ALLOWED;
500	goto out;
501	}
502	if(junkscan(userp, flags)) {
503	result = CURLUE_BAD_USER;
504	goto out;
505	}
506	u->user = userp;
507	}
508
509	if(passwdp) {
510	if(junkscan(passwdp, flags)) {
511	result = CURLUE_BAD_PASSWORD;
512	goto out;
513	}
514	u->password = passwdp;
515	}
516
517	if(optionsp) {
518	if(junkscan(optionsp, flags)) {
519	result = CURLUE_BAD_LOGIN;
520	goto out;
521	}
522	u->options = optionsp;
523	}
524
525	return CURLUE_OK;
526	out:
527
528	free(userp);
529	free(passwdp);
530	free(optionsp);
531	u->user = NULL;
532	u->password = NULL;
533	u->options = NULL;
534
535	return result;
536	}
537
538	UNITTEST CURLUcode Curl_parse_port(struct Curl_URL u, char hostname,
539	bool has_scheme)
540	{
541	char *portptr = NULL;
542	char endbracket;
543	int len;
544
545	/*
546	* Find the end of an IPv6 address, either on the ']' ending bracket or
547	* a percent-encoded zone index.
548	*/
549	if(1 == sscanf(hostname, "[%*45[0123456789abcdefABCDEF:.]%c%n",
550	&endbracket, &len)) {
551	if(']' == endbracket)
552	portptr = &hostname[len];
553	else if('%' == endbracket) {
554	int zonelen = len;
555	if(1 == sscanf(hostname + zonelen, "%*[^]]%c%n", &endbracket, &len)) {
556	if(']' != endbracket)
557	return CURLUE_BAD_IPV6;
558	portptr = &hostname[--zonelen + len + 1];
559	}
560	else
561	return CURLUE_BAD_IPV6;
562	}
563	else
564	return CURLUE_BAD_IPV6;
565
566	/* this is a RFC2732-style specified IP-address */
567	if(portptr && *portptr) {
568	if(*portptr != ':')
569	return CURLUE_BAD_IPV6;
570	}
571	else
572	portptr = NULL;
573	}
574	else
575	portptr = strchr(hostname, ':');
576
577	if(portptr) {
578	char *rest;
579	long port;
580	char portbuf[7];
581
582	/* Browser behavior adaptation. If there's a colon with no digits after,
583	just cut off the name there which makes us ignore the colon and just
584	use the default port. Firefox, Chrome and Safari all do that.
585
586	Don't do it if the URL has no scheme, to make something that looks like
587	a scheme not work!
588	*/
589	if(!portptr[1]) {
590	*portptr = '\0';
591	return has_scheme ? CURLUE_OK : CURLUE_BAD_PORT_NUMBER;
592	}
593
594	if(!ISDIGIT(portptr[1]))
595	return CURLUE_BAD_PORT_NUMBER;
596
597	port = strtol(portptr + 1, &rest, 10); /* Port number must be decimal */
598
599	if(port > 0xffff)
600	return CURLUE_BAD_PORT_NUMBER;
601
602	if(rest[0])
603	return CURLUE_BAD_PORT_NUMBER;
604
605	portptr++ = '\0'; / cut off the name there */
606	*rest = 0;
607	/* generate a new port number string to get rid of leading zeroes etc */
608	msnprintf(portbuf, sizeof(portbuf), "%ld", port);
609	u->portnum = port;
610	u->port = strdup(portbuf);
611	if(!u->port)
612	return CURLUE_OUT_OF_MEMORY;
613	}
614
615	return CURLUE_OK;
616	}
617
618	static CURLUcode hostname_check(struct Curl_URL u, char hostname)
619	{
620	size_t len;
621	size_t hlen = strlen(hostname);
622
623	if(hostname[0] == '[') {
624	const char *l = "0123456789abcdefABCDEF:.";
625	if(hlen < 4) /* '[::]' is the shortest possible valid string */
626	return CURLUE_BAD_IPV6;
627	hostname++;
628	hlen -= 2;
629
630	if(hostname[hlen] != ']')
631	return CURLUE_BAD_IPV6;
632
633	/* only valid letters are ok */
634	len = strspn(hostname, l);
635	if(hlen != len) {
636	hlen = len;
637	if(hostname[len] == '%') {
638	/* this could now be '%[zone id]' */
639	char zoneid[16];
640	int i = 0;
641	char *h = &hostname[len + 1];
642	/* pass '25' if present and is a url encoded percent sign */
643	if(!strncmp(h, "25", 2) && h[2] && (h[2] != ']'))
644	h += 2;
645	while(h && (h != ']') && (i < 15))
646	zoneid[i++] = *h++;
647	if(!i \|\| (']' != *h))
648	/* impossible to reach? */
649	return CURLUE_MALFORMED_INPUT;
650	zoneid[i] = 0;
651	u->zoneid = strdup(zoneid);
652	if(!u->zoneid)
653	return CURLUE_OUT_OF_MEMORY;
654	hostname[len] = ']'; /* insert end bracket */
655	hostname[len + 1] = 0; /* terminate the hostname */
656	}
657	else
658	return CURLUE_BAD_IPV6;
659	/* hostname is fine */
660	}
661	#ifdef ENABLE_IPV6
662	{
663	char dest[16]; /* fits a binary IPv6 address */
664	char norm[MAX_IPADR_LEN];
665	hostname[hlen] = 0; /* end the address there */
666	if(1 != Curl_inet_pton(AF_INET6, hostname, dest))
667	return CURLUE_BAD_IPV6;
668
669	/* check if it can be done shorter */
670	if(Curl_inet_ntop(AF_INET6, dest, norm, sizeof(norm)) &&
671	(strlen(norm) < hlen)) {
672	strcpy(hostname, norm);
673	hlen = strlen(norm);
674	hostname[hlen + 1] = 0;
675	}
676	hostname[hlen] = ']'; /* restore ending bracket */
677	}
678	#endif
679	}
680	else {
681	/* letters from the second string are not ok */
682	len = strcspn(hostname, " \r\n\t/:#?!@");
683	if(hlen != len)
684	/* hostname with bad content */
685	return CURLUE_BAD_HOSTNAME;
686	}
687	if(!hostname[0])
688	return CURLUE_NO_HOST;
689	return CURLUE_OK;
690	}
691
692	#define HOSTNAME_END(x) (((x) == '/') \|\| ((x) == '?') \|\| ((x) == '#'))
693
694	/*
695	* Handle partial IPv4 numerical addresses and different bases, like
696	* '16843009', '0x7f', '0x7f.1' '0177.1.1.1' etc.
697	*
698	* If the given input string is syntactically wrong or any part for example is
699	* too big, this function returns FALSE and doesn't create any output.
700	*
701	* Output the "normalized" version of that input string in plain quad decimal
702	* integers and return TRUE.
703	*/
704	static bool ipv4_normalize(const char hostname, char outp, size_t olen)
705	{
706	bool done = FALSE;
707	int n = 0;
708	const char *c = hostname;
709	unsigned long parts[4] = {0, 0, 0, 0};
710
711	while(!done) {
712	char *endp;
713	unsigned long l;
714	if((c < '0') \|\| (c > '9'))
715	/* most importantly this doesn't allow a leading plus or minus */
716	return FALSE;
717	l = strtoul(c, &endp, 0);
718
719	/* overflow or nothing parsed at all */
720	if(((l == ULONG_MAX) && (errno == ERANGE)) \|\| (endp == c))
721	return FALSE;
722
723	#if SIZEOF_LONG > 4
724	/* a value larger than 32 bits */
725	if(l > UINT_MAX)
726	return FALSE;
727	#endif
728
729	parts[n] = l;
730	c = endp;
731
732	switch (*c) {
733	case '.' :
734	if(n == 3)
735	return FALSE;
736	n++;
737	c++;
738	break;
739
740	case '\0':
741	done = TRUE;
742	break;
743
744	default:
745	return FALSE;
746	}
747	}
748
749	/* this is deemed a valid IPv4 numerical address */
750
751	switch(n) {
752	case 0: /* a -- 32 bits */
753	msnprintf(outp, olen, "%u.%u.%u.%u",
754	parts[0] >> 24, (parts[0] >> 16) & 0xff,
755	(parts[0] >> 8) & 0xff, parts[0] & 0xff);
756	break;
757	case 1: /* a.b -- 8.24 bits */
758	if((parts[0] > 0xff) \|\| (parts[1] > 0xffffff))
759	return FALSE;
760	msnprintf(outp, olen, "%u.%u.%u.%u",
761	parts[0], (parts[1] >> 16) & 0xff,
762	(parts[1] >> 8) & 0xff, parts[1] & 0xff);
763	break;
764	case 2: /* a.b.c -- 8.8.16 bits */
765	if((parts[0] > 0xff) \|\| (parts[1] > 0xff) \|\| (parts[2] > 0xffff))
766	return FALSE;
767	msnprintf(outp, olen, "%u.%u.%u.%u",
768	parts[0], parts[1], (parts[2] >> 8) & 0xff,
769	parts[2] & 0xff);
770	break;
771	case 3: /* a.b.c.d -- 8.8.8.8 bits */
772	if((parts[0] > 0xff) \|\| (parts[1] > 0xff) \|\| (parts[2] > 0xff) \|\|
773	(parts[3] > 0xff))
774	return FALSE;
775	msnprintf(outp, olen, "%u.%u.%u.%u",
776	parts[0], parts[1], parts[2], parts[3]);
777	break;
778	}
779	return TRUE;
780	}
781
782	/* return strdup'ed version in 'outp', possibly percent decoded */
783	static CURLUcode decode_host(char hostname, char *outp)
784	{
785	char *per = NULL;
786	if(hostname[0] != '[')
787	/* only decode if not an ipv6 numerical */
788	per = strchr(hostname, '%');
789	if(!per) {
790	*outp = strdup(hostname);
791	if(!*outp)
792	return CURLUE_OUT_OF_MEMORY;
793	}
794	else {
795	/* might be encoded */
796	size_t dlen;
797	CURLcode result = Curl_urldecode(hostname, 0, outp, &dlen, REJECT_CTRL);
798	if(result)
799	return CURLUE_BAD_HOSTNAME;
800	}
801
802	return CURLUE_OK;
803	}
804
805	static CURLUcode seturl(const char url, CURLU u, unsigned int flags)
806	{
807	char *path;
808	bool path_alloced = FALSE;
809	bool uncpath = FALSE;
810	char *hostname;
811	char *query = NULL;
812	char *fragment = NULL;
813	CURLUcode result;
814	bool url_has_scheme = FALSE;
815	char schemebuf[MAX_SCHEME_LEN + 1];
816	const char *schemep = NULL;
817	size_t schemelen = 0;
818	size_t urllen;
819
820	DEBUGASSERT(url);
821
822	/*************************************************************
823	* Parse the URL.
824	************************************************************/
825	/* allocate scratch area */
826	urllen = strlen(url);
827	if(urllen > CURL_MAX_INPUT_LENGTH)
828	/* excessive input length */
829	return CURLUE_MALFORMED_INPUT;
830
831	path = u->scratch = malloc(urllen * 2 + 2);
832	if(!path)
833	return CURLUE_OUT_OF_MEMORY;
834
835	hostname = &path[urllen + 1];
836	hostname[0] = 0;
837
838	if(Curl_is_absolute_url(url, schemebuf, sizeof(schemebuf))) {
839	url_has_scheme = TRUE;
840	schemelen = strlen(schemebuf);
841	}
842
843	/* handle the file: scheme */
844	if(url_has_scheme && !strcmp(schemebuf, "file")) {
845	if(urllen <= 6)
846	/* file:/ is not enough to actually be a complete file: URL */
847	return CURLUE_BAD_FILE_URL;
848
849	/* path has been allocated large enough to hold this */
850	strcpy(path, &url[5]);
851
852	u->scheme = strdup("file");
853	if(!u->scheme)
854	return CURLUE_OUT_OF_MEMORY;
855
856	/* Extra handling URLs with an authority component (i.e. that start with
857	* "file://")
858	*
859	* We allow omitted hostname (e.g. file:/<path>) -- valid according to
860	* RFC 8089, but not the (current) WHAT-WG URL spec.
861	*/
862	if(path[0] == '/' && path[1] == '/') {
863	/* swallow the two slashes */
864	char *ptr = &path[2];
865
866	/*
867	* According to RFC 8089, a file: URL can be reliably dereferenced if:
868	*
869	* o it has no/blank hostname, or
870	*
871	* o the hostname matches "localhost" (case-insensitively), or
872	*
873	* o the hostname is a FQDN that resolves to this machine, or
874	*
875	* o it is an UNC String transformed to an URI (Windows only, RFC 8089
876	* Appendix E.3).
877	*
878	* For brevity, we only consider URLs with empty, "localhost", or
879	* "127.0.0.1" hostnames as local, otherwise as an UNC String.
880	*
881	* Additionally, there is an exception for URLs with a Windows drive
882	* letter in the authority (which was accidentally omitted from RFC 8089
883	* Appendix E, but believe me, it was meant to be there. --MK)
884	*/
885	if(ptr[0] != '/' && !STARTS_WITH_URL_DRIVE_PREFIX(ptr)) {
886	/* the URL includes a host name, it must match "localhost" or
887	"127.0.0.1" to be valid */
888	if(checkprefix("localhost/", ptr) \|\|
889	checkprefix("127.0.0.1/", ptr)) {
890	ptr += 9; /* now points to the slash after the host */
891	}
892	else {
893	#if defined(WIN32)
894	size_t len;
895
896	/* the host name, NetBIOS computer name, can not contain disallowed
897	chars, and the delimiting slash character must be appended to the
898	host name */
899	path = strpbrk(ptr, "/\\:*?\"<>\|");
900	if(!path \|\| *path != '/')
901	return CURLUE_BAD_FILE_URL;
902
903	len = path - ptr;
904	if(len) {
905	memcpy(hostname, ptr, len);
906	hostname[len] = 0;
907	uncpath = TRUE;
908	}
909
910	ptr -= 2; /* now points to the // before the host in UNC */
911	#else
912	/* Invalid file://hostname/, expected localhost or 127.0.0.1 or
913	none */
914	return CURLUE_BAD_FILE_URL;
915	#endif
916	}
917	}
918
919	path = ptr;
920	}
921
922	if(!uncpath)
923	hostname = NULL; /* no host for file: URLs by default */
924
925	#if !defined(MSDOS) && !defined(WIN32) && !defined(__CYGWIN__)
926	/* Don't allow Windows drive letters when not in Windows.
927	* This catches both "file:/c:" and "file:c:" */
928	if(('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) \|\|
929	STARTS_WITH_URL_DRIVE_PREFIX(path)) {
930	/* File drive letters are only accepted in MSDOS/Windows */
931	return CURLUE_BAD_FILE_URL;
932	}
933	#else
934	/* If the path starts with a slash and a drive letter, ditch the slash */
935	if('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) {
936	/* This cannot be done with strcpy, as the memory chunks overlap! */
937	memmove(path, &path[1], strlen(&path[1]) + 1);
938	}
939	#endif
940
941	}
942	else {
943	/* clear path */
944	const char *p;
945	const char *hostp;
946	size_t len;
947	path[0] = 0;
948
949	if(url_has_scheme) {
950	int i = 0;
951	p = &url[schemelen + 1];
952	while(p && (*p == '/') && (i < 4)) {
953	p++;
954	i++;
955	}
956	if((i < 1) \|\| (i>3))
957	/* less than one or more than three slashes */
958	return CURLUE_BAD_SLASHES;
959
960	schemep = schemebuf;
961	if(!Curl_builtin_scheme(schemep) &&
962	!(flags & CURLU_NON_SUPPORT_SCHEME))
963	return CURLUE_UNSUPPORTED_SCHEME;
964
965	if(junkscan(schemep, flags))
966	return CURLUE_BAD_SCHEME;
967	}
968	else {
969	/* no scheme! */
970
971	if(!(flags & (CURLU_DEFAULT_SCHEME\|CURLU_GUESS_SCHEME)))
972	return CURLUE_BAD_SCHEME;
973	if(flags & CURLU_DEFAULT_SCHEME)
974	schemep = DEFAULT_SCHEME;
975
976	/*
977	* The URL was badly formatted, let's try without scheme specified.
978	*/
979	p = url;
980	}
981	hostp = p; /* host name starts here */
982
983	/* find the end of the host name + port number */
984	while(p && !HOSTNAME_END(p))
985	p++;
986
987	len = p - hostp;
988	if(len) {
989	memcpy(hostname, hostp, len);
990	hostname[len] = 0;
991	}
992	else {
993	if(!(flags & CURLU_NO_AUTHORITY))
994	return CURLUE_NO_HOST;
995	}
996
997	strcpy(path, p);
998
999	if(schemep) {
1000	u->scheme = strdup(schemep);
1001	if(!u->scheme)
1002	return CURLUE_OUT_OF_MEMORY;
1003	}
1004	}
1005
1006	if((flags & CURLU_URLENCODE) && path[0]) {
1007	/* worst case output length is 3x the original! */
1008	char newp = malloc(strlen(path) 3);
1009	if(!newp)
1010	return CURLUE_OUT_OF_MEMORY;
1011	path_alloced = TRUE;
1012	strcpy_url(newp, path, TRUE); /* consider it relative */
1013	u->temppath = path = newp;
1014	}
1015
1016	fragment = strchr(path, '#');
1017	if(fragment) {
1018	*fragment++ = 0;
1019	if(junkscan(fragment, flags))
1020	return CURLUE_BAD_FRAGMENT;
1021	if(fragment[0]) {
1022	u->fragment = strdup(fragment);
1023	if(!u->fragment)
1024	return CURLUE_OUT_OF_MEMORY;
1025	}
1026	}
1027
1028	query = strchr(path, '?');
1029	if(query) {
1030	*query++ = 0;
1031	if(junkscan(query, flags))
1032	return CURLUE_BAD_QUERY;
1033	/* done even if the query part is a blank string */
1034	u->query = strdup(query);
1035	if(!u->query)
1036	return CURLUE_OUT_OF_MEMORY;
1037	}
1038
1039	if(junkscan(path, flags))
1040	return CURLUE_BAD_PATH;
1041
1042	if(!path[0])
1043	/* if there's no path left set, unset */
1044	path = NULL;
1045	else {
1046	if(!(flags & CURLU_PATH_AS_IS)) {
1047	/* remove ../ and ./ sequences according to RFC3986 */
1048	char *newp = Curl_dedotdotify(path);
1049	if(!newp)
1050	return CURLUE_OUT_OF_MEMORY;
1051
1052	if(strcmp(newp, path)) {
1053	/* if we got a new version */
1054	if(path_alloced)
1055	Curl_safefree(u->temppath);
1056	u->temppath = path = newp;
1057	path_alloced = TRUE;
1058	}
1059	else
1060	free(newp);
1061	}
1062
1063	u->path = path_alloced?path:strdup(path);
1064	if(!u->path)
1065	return CURLUE_OUT_OF_MEMORY;
1066	u->temppath = NULL; /* used now */
1067	}
1068
1069	if(hostname) {
1070	char normalized_ipv4[sizeof("255.255.255.255") + 1];
1071
1072	/*
1073	* Parse the login details and strip them out of the host name.
1074	*/
1075	result = parse_hostname_login(u, &hostname, flags);
1076	if(result)
1077	return result;
1078
1079	result = Curl_parse_port(u, hostname, url_has_scheme);
1080	if(result)
1081	return result;
1082
1083	if(junkscan(hostname, flags))
1084	return CURLUE_BAD_HOSTNAME;
1085
1086	if(0 == strlen(hostname) && (flags & CURLU_NO_AUTHORITY)) {
1087	/* Skip hostname check, it's allowed to be empty. */
1088	u->host = strdup("");
1089	}
1090	else {
1091	if(ipv4_normalize(hostname, normalized_ipv4, sizeof(normalized_ipv4)))
1092	u->host = strdup(normalized_ipv4);
1093	else {
1094	result = decode_host(hostname, &u->host);
1095	if(result)
1096	return result;
1097	result = hostname_check(u, u->host);
1098	if(result)
1099	return result;
1100	}
1101	}
1102	if(!u->host)
1103	return CURLUE_OUT_OF_MEMORY;
1104	if((flags & CURLU_GUESS_SCHEME) && !schemep) {
1105	/* legacy curl-style guess based on host name */
1106	if(checkprefix("ftp.", hostname))
1107	schemep = "ftp";
1108	else if(checkprefix("dict.", hostname))
1109	schemep = "dict";
1110	else if(checkprefix("ldap.", hostname))
1111	schemep = "ldap";
1112	else if(checkprefix("imap.", hostname))
1113	schemep = "imap";
1114	else if(checkprefix("smtp.", hostname))
1115	schemep = "smtp";
1116	else if(checkprefix("pop3.", hostname))
1117	schemep = "pop3";
1118	else
1119	schemep = "http";
1120
1121	u->scheme = strdup(schemep);
1122	if(!u->scheme)
1123	return CURLUE_OUT_OF_MEMORY;
1124	}
1125	}
1126
1127	Curl_safefree(u->scratch);
1128	Curl_safefree(u->temppath);
1129
1130	return CURLUE_OK;
1131	}
1132
1133	/*
1134	* Parse the URL and set the relevant members of the Curl_URL struct.
1135	*/
1136	static CURLUcode parseurl(const char url, CURLU u, unsigned int flags)
1137	{
1138	CURLUcode result = seturl(url, u, flags);
1139	if(result) {
1140	free_urlhandle(u);
1141	memset(u, 0, sizeof(struct Curl_URL));
1142	}
1143	return result;
1144	}
1145
1146	/*
1147	* Parse the URL and, if successful, replace everything in the Curl_URL struct.
1148	*/
1149	static CURLUcode parseurl_and_replace(const char url, CURLU u,
1150	unsigned int flags)
1151	{
1152	CURLUcode result;
1153	CURLU tmpurl;
1154	memset(&tmpurl, 0, sizeof(tmpurl));
1155	result = parseurl(url, &tmpurl, flags);
1156	if(!result) {
1157	free_urlhandle(u);
1158	*u = tmpurl;
1159	}
1160	else
1161	free_urlhandle(&tmpurl);
1162	return result;
1163	}
1164
1165	/*
1166	*/
1167	CURLU *curl_url(void)
1168	{
1169	return calloc(sizeof(struct Curl_URL), 1);
1170	}
1171
1172	void curl_url_cleanup(CURLU *u)
1173	{
1174	if(u) {
1175	free_urlhandle(u);
1176	free(u);
1177	}
1178	}
1179
1180	#define DUP(dest, src, name) \
1181	do { \
1182	if(src->name) { \
1183	dest->name = strdup(src->name); \
1184	if(!dest->name) \
1185	goto fail; \
1186	} \
1187	} while(0)
1188
1189	CURLU curl_url_dup(CURLU in)
1190	{
1191	struct Curl_URL *u = calloc(sizeof(struct Curl_URL), 1);
1192	if(u) {
1193	DUP(u, in, scheme);
1194	DUP(u, in, user);
1195	DUP(u, in, password);
1196	DUP(u, in, options);
1197	DUP(u, in, host);
1198	DUP(u, in, port);
1199	DUP(u, in, path);
1200	DUP(u, in, query);
1201	DUP(u, in, fragment);
1202	u->portnum = in->portnum;
1203	}
1204	return u;
1205	fail:
1206	curl_url_cleanup(u);
1207	return NULL;
1208	}
1209
1210	CURLUcode curl_url_get(CURLU *u, CURLUPart what,
1211	char **part, unsigned int flags)
1212	{
1213	char *ptr;
1214	CURLUcode ifmissing = CURLUE_UNKNOWN_PART;
1215	char portbuf[7];
1216	bool urldecode = (flags & CURLU_URLDECODE)?1:0;
1217	bool urlencode = (flags & CURLU_URLENCODE)?1:0;
1218	bool plusdecode = FALSE;
1219	(void)flags;
1220	if(!u)
1221	return CURLUE_BAD_HANDLE;
1222	if(!part)
1223	return CURLUE_BAD_PARTPOINTER;
1224	*part = NULL;
1225
1226	switch(what) {
1227	case CURLUPART_SCHEME:
1228	ptr = u->scheme;
1229	ifmissing = CURLUE_NO_SCHEME;
1230	urldecode = FALSE; /* never for schemes */
1231	break;
1232	case CURLUPART_USER:
1233	ptr = u->user;
1234	ifmissing = CURLUE_NO_USER;
1235	break;
1236	case CURLUPART_PASSWORD:
1237	ptr = u->password;
1238	ifmissing = CURLUE_NO_PASSWORD;
1239	break;
1240	case CURLUPART_OPTIONS:
1241	ptr = u->options;
1242	ifmissing = CURLUE_NO_OPTIONS;
1243	break;
1244	case CURLUPART_HOST:
1245	ptr = u->host;
1246	ifmissing = CURLUE_NO_HOST;
1247	break;
1248	case CURLUPART_ZONEID:
1249	ptr = u->zoneid;
1250	ifmissing = CURLUE_NO_ZONEID;
1251	break;
1252	case CURLUPART_PORT:
1253	ptr = u->port;
1254	ifmissing = CURLUE_NO_PORT;
1255	urldecode = FALSE; /* never for port */
1256	if(!ptr && (flags & CURLU_DEFAULT_PORT) && u->scheme) {
1257	/* there's no stored port number, but asked to deliver
1258	a default one for the scheme */
1259	const struct Curl_handler *h =
1260	Curl_builtin_scheme(u->scheme);
1261	if(h) {
1262	msnprintf(portbuf, sizeof(portbuf), "%u", h->defport);
1263	ptr = portbuf;
1264	}
1265	}
1266	else if(ptr && u->scheme) {
1267	/* there is a stored port number, but ask to inhibit if
1268	it matches the default one for the scheme */
1269	const struct Curl_handler *h =
1270	Curl_builtin_scheme(u->scheme);
1271	if(h && (h->defport == u->portnum) &&
1272	(flags & CURLU_NO_DEFAULT_PORT))
1273	ptr = NULL;
1274	}
1275	break;
1276	case CURLUPART_PATH:
1277	ptr = u->path;
1278	if(!ptr) {
1279	ptr = u->path = strdup("/");
1280	if(!u->path)
1281	return CURLUE_OUT_OF_MEMORY;
1282	}
1283	break;
1284	case CURLUPART_QUERY:
1285	ptr = u->query;
1286	ifmissing = CURLUE_NO_QUERY;
1287	plusdecode = urldecode;
1288	break;
1289	case CURLUPART_FRAGMENT:
1290	ptr = u->fragment;
1291	ifmissing = CURLUE_NO_FRAGMENT;
1292	break;
1293	case CURLUPART_URL: {
1294	char *url;
1295	char *scheme;
1296	char *options = u->options;
1297	char *port = u->port;
1298	char *allochost = NULL;
1299	if(u->scheme && strcasecompare("file", u->scheme)) {
1300	url = aprintf("file://%s%s%s",
1301	u->path,
1302	u->fragment? "#": "",
1303	u->fragment? u->fragment : "");
1304	}
1305	else if(!u->host)
1306	return CURLUE_NO_HOST;
1307	else {
1308	const struct Curl_handler *h = NULL;
1309	if(u->scheme)
1310	scheme = u->scheme;
1311	else if(flags & CURLU_DEFAULT_SCHEME)
1312	scheme = (char *) DEFAULT_SCHEME;
1313	else
1314	return CURLUE_NO_SCHEME;
1315
1316	h = Curl_builtin_scheme(scheme);
1317	if(!port && (flags & CURLU_DEFAULT_PORT)) {
1318	/* there's no stored port number, but asked to deliver
1319	a default one for the scheme */
1320	if(h) {
1321	msnprintf(portbuf, sizeof(portbuf), "%u", h->defport);
1322	port = portbuf;
1323	}
1324	}
1325	else if(port) {
1326	/* there is a stored port number, but asked to inhibit if it matches
1327	the default one for the scheme */
1328	if(h && (h->defport == u->portnum) &&
1329	(flags & CURLU_NO_DEFAULT_PORT))
1330	port = NULL;
1331	}
1332
1333	if(h && !(h->flags & PROTOPT_URLOPTIONS))
1334	options = NULL;
1335
1336	if(u->host[0] == '[') {
1337	if(u->zoneid) {
1338	/* make it '[ host %25 zoneid ]' */
1339	size_t hostlen = strlen(u->host);
1340	size_t alen = hostlen + 3 + strlen(u->zoneid) + 1;
1341	allochost = malloc(alen);
1342	if(!allochost)
1343	return CURLUE_OUT_OF_MEMORY;
1344	memcpy(allochost, u->host, hostlen - 1);
1345	msnprintf(&allochost[hostlen - 1], alen - hostlen + 1,
1346	"%%25%s]", u->zoneid);
1347	}
1348	}
1349	else if(urlencode) {
1350	allochost = curl_easy_escape(NULL, u->host, 0);
1351	if(!allochost)
1352	return CURLUE_OUT_OF_MEMORY;
1353	}
1354	else {
1355	/* only encode '%' in output host name */
1356	char *host = u->host;
1357	size_t pcount = 0;
1358	/* first, count number of percents present in the name */
1359	while(*host) {
1360	if(*host == '%')
1361	pcount++;
1362	host++;
1363	}
1364	/* if there were percents, encode the host name */
1365	if(pcount) {
1366	size_t hostlen = strlen(u->host);
1367	size_t alen = hostlen + 2 * pcount + 1;
1368	char *o = allochost = malloc(alen);
1369	if(!allochost)
1370	return CURLUE_OUT_OF_MEMORY;
1371
1372	host = u->host;
1373	while(*host) {
1374	if(*host == '%') {
1375	memcpy(o, "%25", 3);
1376	o += 3;
1377	host++;
1378	continue;
1379	}
1380	o++ = host++;
1381	}
1382	*o = '\0';
1383	}
1384	}
1385
1386	url = aprintf("%s://%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
1387	scheme,
1388	u->user ? u->user : "",
1389	u->password ? ":": "",
1390	u->password ? u->password : "",
1391	options ? ";" : "",
1392	options ? options : "",
1393	(u->user \|\| u->password \|\| options) ? "@": "",
1394	allochost ? allochost : u->host,
1395	port ? ":": "",
1396	port ? port : "",
1397	(u->path && (u->path[0] != '/')) ? "/": "",
1398	u->path ? u->path : "/",
1399	(u->query && u->query[0]) ? "?": "",
1400	(u->query && u->query[0]) ? u->query : "",
1401	u->fragment? "#": "",
1402	u->fragment? u->fragment : "");
1403	free(allochost);
1404	}
1405	if(!url)
1406	return CURLUE_OUT_OF_MEMORY;
1407	*part = url;
1408	return CURLUE_OK;
1409	}
1410	default:
1411	ptr = NULL;
1412	break;
1413	}
1414	if(ptr) {
1415	*part = strdup(ptr);
1416	if(!*part)
1417	return CURLUE_OUT_OF_MEMORY;
1418	if(plusdecode) {
1419	/* convert + to space */
1420	char *plus;
1421	for(plus = part; plus; ++plus) {
1422	if(*plus == '+')
1423	*plus = ' ';
1424	}
1425	}
1426	if(urldecode) {
1427	char *decoded;
1428	size_t dlen;
1429	/* this unconditional rejection of control bytes is documented
1430	API behavior */
1431	CURLcode res = Curl_urldecode(*part, 0, &decoded, &dlen, REJECT_CTRL);
1432	free(*part);
1433	if(res) {
1434	*part = NULL;
1435	return CURLUE_URLDECODE;
1436	}
1437	*part = decoded;
1438	}
1439	return CURLUE_OK;
1440	}
1441	else
1442	return ifmissing;
1443	}
1444
1445	CURLUcode curl_url_set(CURLU *u, CURLUPart what,
1446	const char *part, unsigned int flags)
1447	{
1448	char **storep = NULL;
1449	long port = 0;
1450	bool urlencode = (flags & CURLU_URLENCODE)? 1 : 0;
1451	bool plusencode = FALSE;
1452	bool urlskipslash = FALSE;
1453	bool appendquery = FALSE;
1454	bool equalsencode = FALSE;
1455
1456	if(!u)
1457	return CURLUE_BAD_HANDLE;
1458	if(!part) {
1459	/* setting a part to NULL clears it */
1460	switch(what) {
1461	case CURLUPART_URL:
1462	break;
1463	case CURLUPART_SCHEME:
1464	storep = &u->scheme;
1465	break;
1466	case CURLUPART_USER:
1467	storep = &u->user;
1468	break;
1469	case CURLUPART_PASSWORD:
1470	storep = &u->password;
1471	break;
1472	case CURLUPART_OPTIONS:
1473	storep = &u->options;
1474	break;
1475	case CURLUPART_HOST:
1476	storep = &u->host;
1477	break;
1478	case CURLUPART_ZONEID:
1479	storep = &u->zoneid;
1480	break;
1481	case CURLUPART_PORT:
1482	u->portnum = 0;
1483	storep = &u->port;
1484	break;
1485	case CURLUPART_PATH:
1486	storep = &u->path;
1487	break;
1488	case CURLUPART_QUERY:
1489	storep = &u->query;
1490	break;
1491	case CURLUPART_FRAGMENT:
1492	storep = &u->fragment;
1493	break;
1494	default:
1495	return CURLUE_UNKNOWN_PART;
1496	}
1497	if(storep && *storep) {
1498	Curl_safefree(*storep);
1499	}
1500	return CURLUE_OK;
1501	}
1502
1503	switch(what) {
1504	case CURLUPART_SCHEME:
1505	if(strlen(part) > MAX_SCHEME_LEN)
1506	/* too long */
1507	return CURLUE_BAD_SCHEME;
1508	if(!(flags & CURLU_NON_SUPPORT_SCHEME) &&
1509	/* verify that it is a fine scheme */
1510	!Curl_builtin_scheme(part))
1511	return CURLUE_UNSUPPORTED_SCHEME;
1512	storep = &u->scheme;
1513	urlencode = FALSE; /* never */
1514	break;
1515	case CURLUPART_USER:
1516	storep = &u->user;
1517	break;
1518	case CURLUPART_PASSWORD:
1519	storep = &u->password;
1520	break;
1521	case CURLUPART_OPTIONS:
1522	storep = &u->options;
1523	break;
1524	case CURLUPART_HOST: {
1525	size_t len = strcspn(part, " \r\n");
1526	if(strlen(part) != len)
1527	/* hostname with bad content */
1528	return CURLUE_BAD_HOSTNAME;
1529	storep = &u->host;
1530	Curl_safefree(u->zoneid);
1531	break;
1532	}
1533	case CURLUPART_ZONEID:
1534	storep = &u->zoneid;
1535	break;
1536	case CURLUPART_PORT:
1537	{
1538	char *endp;
1539	urlencode = FALSE; /* never */
1540	port = strtol(part, &endp, 10); /* Port number must be decimal */
1541	if((port <= 0) \|\| (port > 0xffff))
1542	return CURLUE_BAD_PORT_NUMBER;
1543	if(*endp)
1544	/* weirdly provided number, not good! */
1545	return CURLUE_BAD_PORT_NUMBER;
1546	storep = &u->port;
1547	}
1548	break;
1549	case CURLUPART_PATH:
1550	urlskipslash = TRUE;
1551	storep = &u->path;
1552	break;
1553	case CURLUPART_QUERY:
1554	plusencode = urlencode;
1555	appendquery = (flags & CURLU_APPENDQUERY)?1:0;
1556	equalsencode = appendquery;
1557	storep = &u->query;
1558	break;
1559	case CURLUPART_FRAGMENT:
1560	storep = &u->fragment;
1561	break;
1562	case CURLUPART_URL: {
1563	/*
1564	* Allow a new URL to replace the existing (if any) contents.
1565	*
1566	* If the existing contents is enough for a URL, allow a relative URL to
1567	* replace it.
1568	*/
1569	CURLUcode result;
1570	char *oldurl;
1571	char *redired_url;
1572
1573	/* if the new thing is absolute or the old one is not
1574	* (we could not get an absolute url in 'oldurl'),
1575	* then replace the existing with the new. */
1576	if(Curl_is_absolute_url(part, NULL, 0)
1577	\|\| curl_url_get(u, CURLUPART_URL, &oldurl, flags)) {
1578	return parseurl_and_replace(part, u, flags);
1579	}
1580
1581	/* apply the relative part to create a new URL
1582	* and replace the existing one with it. */
1583	redired_url = concat_url(oldurl, part);
1584	free(oldurl);
1585	if(!redired_url)
1586	return CURLUE_OUT_OF_MEMORY;
1587
1588	result = parseurl_and_replace(redired_url, u, flags);
1589	free(redired_url);
1590	return result;
1591	}
1592	default:
1593	return CURLUE_UNKNOWN_PART;
1594	}
1595	DEBUGASSERT(storep);
1596	{
1597	const char *newp = part;
1598	size_t nalloc = strlen(part);
1599
1600	if(nalloc > CURL_MAX_INPUT_LENGTH)
1601	/* excessive input length */
1602	return CURLUE_MALFORMED_INPUT;
1603
1604	if(urlencode) {
1605	const unsigned char *i;
1606	char *o;
1607	char enc = malloc(nalloc 3 + 1); /* for worst case! */
1608	if(!enc)
1609	return CURLUE_OUT_OF_MEMORY;
1610	for(i = (const unsigned char )part, o = enc; i; i++) {
1611	if((*i == ' ') && plusencode) {
1612	*o = '+';
1613	o++;
1614	}
1615	else if(Curl_isunreserved(*i) \|\|
1616	((*i == '/') && urlskipslash) \|\|
1617	((*i == '=') && equalsencode)) {
1618	if((*i == '=') && equalsencode)
1619	/* only skip the first equals sign */
1620	equalsencode = FALSE;
1621	o = i;
1622	o++;
1623	}
1624	else {
1625	msnprintf(o, 4, "%%%02x", *i);
1626	o += 3;
1627	}
1628	}
1629	o = 0; / null-terminate */
1630	newp = enc;
1631	}
1632	else {
1633	char *p;
1634	newp = strdup(part);
1635	if(!newp)
1636	return CURLUE_OUT_OF_MEMORY;
1637	p = (char *)newp;
1638	while(*p) {
1639	/* make sure percent encoded are lower case */
1640	if((*p == '%') && ISXDIGIT(p[1]) && ISXDIGIT(p[2]) &&
1641	(ISUPPER(p[1]) \|\| ISUPPER(p[2]))) {
1642	p[1] = (char)TOLOWER(p[1]);
1643	p[2] = (char)TOLOWER(p[2]);
1644	p += 3;
1645	}
1646	else
1647	p++;
1648	}
1649	}
1650
1651	if(appendquery) {
1652	/* Append the string onto the old query. Add a '&' separator if none is
1653	present at the end of the exsting query already */
1654	size_t querylen = u->query ? strlen(u->query) : 0;
1655	bool addamperand = querylen && (u->query[querylen -1] != '&');
1656	if(querylen) {
1657	size_t newplen = strlen(newp);
1658	char *p = malloc(querylen + addamperand + newplen + 1);
1659	if(!p) {
1660	free((char *)newp);
1661	return CURLUE_OUT_OF_MEMORY;
1662	}
1663	strcpy(p, u->query); /* original query */
1664	if(addamperand)
1665	p[querylen] = '&'; /* ampersand */
1666	strcpy(&p[querylen + addamperand], newp); /* new suffix */
1667	free((char *)newp);
1668	free(*storep);
1669	*storep = p;
1670	return CURLUE_OK;
1671	}
1672	}
1673
1674	if(what == CURLUPART_HOST) {
1675	if(0 == strlen(newp) && (flags & CURLU_NO_AUTHORITY)) {
1676	/* Skip hostname check, it's allowed to be empty. */
1677	}
1678	else {
1679	if(hostname_check(u, (char *)newp)) {
1680	free((char *)newp);
1681	return CURLUE_BAD_HOSTNAME;
1682	}
1683	}
1684	}
1685
1686	free(*storep);
1687	storep = (char )newp;
1688	}
1689	/* set after the string, to make it not assigned if the allocation above
1690	fails */
1691	if(port)
1692	u->portnum = port;
1693	return CURLUE_OK;
1694	}

注意: 瀏覽 TracBrowser 來幫助您使用儲存庫瀏覽器

source: vbox/trunk/src/libs/curl-7.83.1/lib/urlapi.c@ 98341

以其他格式下載: