uri.c (4461B)
1 #include "website.h" 2 3 static inline unsigned char 4 fromxdigit(char c) 5 { 6 static unsigned char lut[1 << CHAR_BIT] = { 7 ['0'] = 0, ['1'] = 1, ['2'] = 2, ['3'] = 3, ['4'] = 4, 8 ['5'] = 5, ['6'] = 6, ['7'] = 7, ['8'] = 8, ['9'] = 9, 9 10 ['a'] = 10, ['A'] = 10, ['b'] = 11, ['B'] = 11, 11 ['c'] = 12, ['C'] = 12, ['d'] = 13, ['D'] = 13, 12 ['e'] = 14, ['E'] = 14, ['f'] = 15, ['F'] = 15, 13 }; 14 15 return lut[(unsigned char) c]; 16 } 17 18 bool 19 urifrag_try_normalise(struct urifrag *frag) 20 { 21 char *cur = frag->ptr; 22 23 for (int i = 0; i < frag->len; i++) { 24 char buf[3] = { 25 frag->ptr[i], 26 (i+1 < frag->len) ? frag->ptr[i+1] : '\0', 27 (i+2 < frag->len) ? frag->ptr[i+2] : '\0', 28 }; 29 30 unsigned char decoded = buf[0]; 31 if (decoded == '%') { 32 if (!isxdigit(buf[1]) || !isxdigit(buf[2])) 33 return false; 34 35 decoded = (fromxdigit(buf[1]) << 4) | fromxdigit(buf[2]); 36 } 37 38 *cur++ = decoded; 39 } 40 41 frag->len = cur - frag->ptr; 42 43 return true; 44 } 45 46 static inline char * 47 prev_dot_segment(char *begin, char *cur) 48 { 49 while (begin < cur) { 50 if (*(--cur) == '/') 51 return cur; 52 } 53 54 return begin; 55 } 56 57 void 58 urifrag_remove_dot_segments(struct urifrag *frag) 59 { 60 /* TODO: fix me! */ 61 62 char *wrcur = frag->ptr, *rdcur = frag->ptr, *end = frag->ptr + frag->len; 63 64 while (rdcur < end) { 65 char buf[3] = { 66 rdcur[0], 67 ((rdcur+1) < end) ? rdcur[1] : '\0', 68 ((rdcur+2) < end) ? rdcur[2] : '\0', 69 }; 70 71 if (buf[0] == '.' && buf[1] == '.' && (buf[2] == '/' || buf[2] == '\0')) { 72 wrcur = prev_dot_segment(frag->ptr, wrcur - 1); 73 rdcur += 2; 74 } else if (buf[0] == '.' && (buf[1] == '/' || buf[1] == '\0')) { 75 rdcur += 2; 76 } else { 77 *wrcur++ = *rdcur++; 78 } 79 } 80 81 frag->len = wrcur - frag->ptr; 82 } 83 84 struct uri 85 uri_parse(char *buf, size_t len) 86 { 87 /* uri = [scheme ':'] ['//' authority] path ['?' query] ['#' fragment] 88 * authority = [user '@'] host [':' port] 89 */ 90 91 struct uri result; 92 memset(&result, 0, sizeof result); 93 94 char *cur = buf, *end = buf + len; 95 96 int state = 0; 97 struct urifrag frag = { 98 .ptr = cur, 99 .len = 0, 100 }; 101 102 while (true) { 103 #define __STATE(n, toks) \ 104 { state = (n); cur += toks; frag.ptr = cur; frag.len = 0; continue; } 105 106 char buf[3] = { 107 (cur < end) ? cur[0] : '\0', 108 (cur+1 < end) ? cur[1] : '\0', 109 (cur+2 < end) ? cur[2] : '\0', 110 }; 111 112 switch (state) { 113 case 0: { /* scheme or authority or path */ 114 if (buf[0] == ':') { 115 result.scheme = frag; 116 __STATE(1, 1) 117 } else if (buf[0] == '/' && buf[1] == '/') { 118 __STATE(2, 2) 119 } else if (buf[0] == '?') { 120 result.path = frag; 121 __STATE(6, 1) 122 } else if (buf[0] == '#') { 123 result.path = frag; 124 __STATE(7, 1) 125 } else if (buf[0] == '\0') { 126 result.path = frag; 127 __STATE(99, 0) 128 } 129 } break; 130 131 case 1: { /* authority or path */ 132 if (buf[0] == '/' && buf[1] == '/') { 133 __STATE(2, 2) 134 } else if (buf[0] == '?') { 135 result.path = frag; 136 __STATE(6, 1) 137 } else if (buf[0] == '#') { 138 result.path = frag; 139 __STATE(7, 1) 140 } else if (buf[0] == '\0') { 141 result.path = frag; 142 __STATE(99, 0) 143 } 144 } break; 145 146 case 2: { /* authority */ 147 if (buf[0] == '@') { 148 result.user = frag; 149 __STATE(3, 1) 150 } else if (buf[0] == ':') { 151 result.host = frag; 152 __STATE(4, 1) 153 } else if (buf[0] == '/') { 154 result.host = frag; 155 __STATE(5, 0) 156 } else if (buf[0] == '\0') { 157 result.host = frag; 158 __STATE(99, 0) 159 } 160 } break; 161 162 case 3: { /* host */ 163 if (buf[0] == ':') { 164 result.host = frag; 165 __STATE(4, 1) 166 } else if (buf[0] == '/') { 167 result.host = frag; 168 __STATE(5, 0) 169 } else if (buf[0] == '\0') { 170 result.host = frag; 171 __STATE(99, 0) 172 } 173 } break; 174 175 case 4: { /* port */ 176 if (!isdigit(buf[0])) { 177 result.port = frag; 178 __STATE(5, 0) 179 } else if (buf[0] == '\0') { 180 result.port = frag; 181 __STATE(99, 0) 182 } 183 } break; 184 185 case 5: { /* path */ 186 if (buf[0] == '?') { 187 result.path = frag; 188 __STATE(6, 1) 189 } else if (buf[0] == '#') { 190 result.path = frag; 191 __STATE(7, 1) 192 } else if (buf[0] == '\0') { 193 result.path = frag; 194 __STATE(99, 0) 195 } 196 } break; 197 198 case 6: { /* query */ 199 if (buf[0] == '#') { 200 result.query = frag; 201 __STATE(7, 1) 202 } else if (buf[0] == '\0') { 203 result.query = frag; 204 __STATE(99, 0) 205 } 206 } break; 207 208 case 7: { /* fragment */ 209 if (buf[0] == '\0') { 210 result.fragment = frag; 211 __STATE(99, 0) 212 } 213 } break; 214 215 case 99: /* end-of-string */ 216 goto end; 217 218 } 219 220 frag.len++; 221 222 cur++; 223 224 #undef __STATE 225 } 226 227 end: 228 return result; 229 }