serve

serve.git
git clone git://git.lenczewski.org/serve.git
Log | Files | Refs

uri.c (4461B)


      1 #include "website.h"
      2 
      3 static inline unsigned char
      4 fromxdigit(char c)
      5 {
      6 	static unsigned char lut[1 << CHAR_BIT] = {
      7 		['0'] = 0, ['1'] = 1, ['2'] = 2, ['3'] = 3, ['4'] = 4,
      8 		['5'] = 5, ['6'] = 6, ['7'] = 7, ['8'] = 8, ['9'] = 9,
      9 
     10 		['a'] = 10, ['A'] = 10, ['b'] = 11, ['B'] = 11,
     11 		['c'] = 12, ['C'] = 12, ['d'] = 13, ['D'] = 13,
     12 		['e'] = 14, ['E'] = 14, ['f'] = 15, ['F'] = 15,
     13 	};
     14 
     15 	return lut[(unsigned char) c];
     16 }
     17 
     18 bool
     19 urifrag_try_normalise(struct urifrag *frag)
     20 {
     21 	char *cur = frag->ptr;
     22 
     23 	for (int i = 0; i < frag->len; i++) {
     24 		char buf[3] = {
     25 			frag->ptr[i],
     26 			(i+1 < frag->len) ? frag->ptr[i+1] : '\0',
     27 			(i+2 < frag->len) ? frag->ptr[i+2] : '\0',
     28 		};
     29 
     30 		unsigned char decoded = buf[0];
     31 		if (decoded == '%') {
     32 			if (!isxdigit(buf[1]) || !isxdigit(buf[2]))
     33 				return false;
     34 
     35 			decoded = (fromxdigit(buf[1]) << 4) | fromxdigit(buf[2]);
     36 		}
     37 
     38 		*cur++ = decoded;
     39 	}
     40 
     41 	frag->len = cur - frag->ptr;
     42 
     43 	return true;
     44 }
     45 
     46 static inline char *
     47 prev_dot_segment(char *begin, char *cur)
     48 {
     49 	while (begin < cur) {
     50 		if (*(--cur) == '/')
     51 			return cur;
     52 	}
     53 
     54 	return begin;
     55 }
     56 
     57 void
     58 urifrag_remove_dot_segments(struct urifrag *frag)
     59 {
     60 	/* TODO: fix me! */
     61 
     62 	char *wrcur = frag->ptr, *rdcur = frag->ptr, *end = frag->ptr + frag->len;
     63 
     64 	while (rdcur < end) {
     65 		char buf[3] = {
     66 			rdcur[0],
     67 			((rdcur+1) < end) ? rdcur[1] : '\0',
     68 			((rdcur+2) < end) ? rdcur[2] : '\0',
     69 		};
     70 
     71 		if (buf[0] == '.' && buf[1] == '.' && (buf[2] == '/' || buf[2] == '\0')) {
     72 			wrcur = prev_dot_segment(frag->ptr, wrcur - 1);
     73 			rdcur += 2;
     74 		} else if (buf[0] == '.' && (buf[1] == '/' || buf[1] == '\0')) {
     75 			rdcur += 2;
     76 		} else {
     77 			*wrcur++ = *rdcur++;
     78 		}
     79 	}
     80 
     81 	frag->len = wrcur - frag->ptr;
     82 }
     83 
     84 struct uri
     85 uri_parse(char *buf, size_t len)
     86 {
     87 	/* uri = [scheme ':'] ['//' authority] path ['?' query] ['#' fragment]
     88 	 * authority = [user '@'] host [':' port]
     89 	 */
     90 
     91 	struct uri result;
     92 	memset(&result, 0, sizeof result);
     93 
     94 	char *cur = buf, *end = buf + len;
     95 
     96 	int state = 0;
     97 	struct urifrag frag = {
     98 		.ptr = cur,
     99 		.len = 0,
    100 	};
    101 
    102 	while (true) {
    103 #define __STATE(n, toks) \
    104 		{ state = (n); cur += toks; frag.ptr = cur; frag.len = 0; continue; }
    105 
    106 		char buf[3] = {
    107 			(cur < end) ? cur[0] : '\0',
    108 			(cur+1 < end) ? cur[1] : '\0',
    109 			(cur+2 < end) ? cur[2] : '\0',
    110 		};
    111 
    112 		switch (state) {
    113 		case 0: { /* scheme or authority or path */
    114 			if (buf[0] == ':') {
    115 				result.scheme = frag;
    116 				__STATE(1, 1)
    117 			} else if (buf[0] == '/' && buf[1] == '/') {
    118 				__STATE(2, 2)
    119 			} else if (buf[0] == '?') {
    120 				result.path = frag;
    121 				__STATE(6, 1)
    122 			} else if (buf[0] == '#') {
    123 				result.path = frag;
    124 				__STATE(7, 1)
    125 			} else if (buf[0] == '\0') {
    126 				result.path = frag;
    127 				__STATE(99, 0)
    128 			}
    129 		} break;
    130 
    131 		case 1: { /* authority or path */
    132 			if (buf[0] == '/' && buf[1] == '/') {
    133 				__STATE(2, 2)
    134 			} else if (buf[0] == '?') {
    135 				result.path = frag;
    136 				__STATE(6, 1)
    137 			} else if (buf[0] == '#') {
    138 				result.path = frag;
    139 				__STATE(7, 1)
    140 			} else if (buf[0] == '\0') {
    141 				result.path = frag;
    142 				__STATE(99, 0)
    143 			}
    144 		} break;
    145 
    146 		case 2: { /* authority */
    147 			if (buf[0] == '@') {
    148 				result.user = frag;
    149 				__STATE(3, 1)
    150 			} else if (buf[0] == ':') {
    151 				result.host = frag;
    152 				__STATE(4, 1)
    153 			} else if (buf[0] == '/') {
    154 				result.host = frag;
    155 				__STATE(5, 0)
    156 			} else if (buf[0] == '\0') {
    157 				result.host = frag;
    158 				__STATE(99, 0)
    159 			}
    160 		} break;
    161 
    162 		case 3: { /* host */
    163 			if (buf[0] == ':') {
    164 				result.host = frag;
    165 				__STATE(4, 1)
    166 			} else if (buf[0] == '/') {
    167 				result.host = frag;
    168 				__STATE(5, 0)
    169 			} else if (buf[0] == '\0') {
    170 				result.host = frag;
    171 				__STATE(99, 0)
    172 			}
    173 		} break;
    174 
    175 		case 4: { /* port */
    176 			if (!isdigit(buf[0])) {
    177 				result.port = frag;
    178 				__STATE(5, 0)
    179 			} else if (buf[0] == '\0') {
    180 				result.port = frag;
    181 				__STATE(99, 0)
    182 			}
    183 		} break;
    184 
    185 		case 5: { /* path */
    186 			if (buf[0] == '?') {
    187 				result.path = frag;
    188 				__STATE(6, 1)
    189 			} else if (buf[0] == '#') {
    190 				result.path = frag;
    191 				__STATE(7, 1)
    192 			} else if (buf[0] == '\0') {
    193 				result.path = frag;
    194 				__STATE(99, 0)
    195 			}
    196 		} break;
    197 
    198 		case 6: { /* query */
    199 			if (buf[0] == '#') {
    200 				result.query = frag;
    201 				__STATE(7, 1)
    202 			} else if (buf[0] == '\0') {
    203 				result.query = frag;
    204 				__STATE(99, 0)
    205 			}
    206 		} break;
    207 
    208 		case 7: { /* fragment */
    209 			if (buf[0] == '\0') {
    210 				result.fragment = frag;
    211 				__STATE(99, 0)
    212 			}
    213 		} break;
    214 
    215 		case 99: /* end-of-string */
    216 			goto end;
    217 
    218 		}
    219 
    220 		frag.len++;
    221 
    222 		cur++;
    223 
    224 #undef __STATE
    225 	}
    226 
    227 end:
    228 	return result;
    229 }