1 // Written in the D programming language. 2 /** 3 Haystack trio decode. 4 5 Copyright: Copyright (c) 2019, Radu Racariu <radu.racariu@gmail.com> 6 License: $(LINK2 www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 7 Authors: Radu Racariu 8 **/ 9 module haystack.trio.decode; 10 11 import std.traits : isSomeChar; 12 import std.range.primitives : empty, isInputRange, ElementEncodingType; 13 14 import haystack.tag; 15 import haystack.zinc.lexer; 16 import haystack.zinc.decode; 17 18 /* 19 Parses a Trio encoded `InputRange` 20 */ 21 struct TrioParser(Range) 22 if (isInputRange!Range && isSomeChar!(ElementEncodingType!Range)) 23 { 24 alias Lexer = ZincLexer!Range; 25 alias Parser = ZincParser!Range; 26 27 this(Range r) 28 { 29 this.parser = Parser(r); 30 if (!r.empty) 31 popFront(); 32 else 33 state = State.fault; 34 } 35 @disable this(); 36 @disable this(this); 37 38 /// True until parsing error or parsing complete 39 @property bool empty() 40 { 41 return (state == State.done || state == State.fault); 42 } 43 44 /// The last parsed `Dict` 45 @property Dict front() 46 { 47 assert(!empty, "Attempting to access front of an empty Trio."); 48 return dict; 49 } 50 51 /// Parse next `Dict` 52 void popFront() 53 { 54 if (lexer.empty) 55 { 56 state = State.done; 57 return; 58 } 59 60 dict = Dict.init; 61 string key; 62 63 // trio parsing state machine 64 loop: 65 for (; !empty; lexer.popFront()) 66 { 67 start: 68 switch (state) 69 { 70 case State.key: 71 // must start with valid key 72 if (lexer.empty || !lexer.front.isId) 73 { 74 if (isCommentChar) 75 { 76 state = State.comment; 77 goto start; 78 } 79 state = State.fault; 80 break loop; 81 } 82 key = lexer.front.value!Str; 83 state = State.valueSep; 84 break; 85 86 case State.valueSep: 87 if (lexer.front.isSpace) 88 continue; 89 // marker symbol 90 if (lexer.empty || lexer.front.isNewLine) 91 { 92 dict[key] = marker; 93 state = State.nextLine; 94 goto start; 95 } 96 else if (isValueSeparatorChar) 97 { 98 state = State.value; 99 continue; 100 } 101 break; 102 103 case State.value: 104 if (lexer.front.isSpace) 105 continue; 106 107 if (lexer.empty) 108 { 109 state = State.fault; 110 break loop; 111 } 112 113 // Special casing for str and xstr 114 if ((!lexer.front.isValid && !isComplexType) 115 || lexer.front.isId) 116 { 117 // multiline string 118 if (lexer.front.isNewLine) 119 { 120 auto val = lexXStr(); 121 dict[key] = Str(val); 122 } 123 else 124 { 125 // unquoted string start 126 // could match and id 127 if (lexer.front.isId) 128 { 129 foreach (c; lexer.front.value!Str) 130 lexer.buffer.stash(c); 131 } 132 auto type = lexStr(); 133 // get the separator char 134 lexer.popFront(); 135 // XStr encoding 136 if (isValueSeparatorChar) 137 { 138 lexer.popFront(); 139 // expect newline 140 if (lexer.empty || !lexer.front.isNewLine) 141 { 142 state = State.fault; 143 break loop; 144 } 145 lexer.popFront(); 146 if (lexer.empty) 147 { 148 state = State.fault; 149 break loop; 150 } 151 auto val = lexXStr(); 152 dict[key] = XStr(type, val); 153 state = State.nextLine; 154 goto start; 155 } 156 else // simple unquoted string 157 { 158 dict[key] = Str(type); 159 } 160 } 161 } 162 else // scalar, list, dict 163 { 164 auto el = Parser.AnyTag(parser); 165 dict[key] = cast() el.asTag; 166 } 167 168 state = State.nextLine; 169 break; 170 171 case State.nextLine: 172 if (lexer.empty) 173 break loop; 174 175 if (lexer.front.isSpace) 176 continue; 177 178 if (lexer.front.isValid) 179 { 180 state = State.key; 181 goto start; 182 } 183 else if (isNextDictChar) 184 { 185 state = State.nextDict; 186 continue; 187 } 188 else if (isCommentChar) 189 { 190 lexer.popFront(); 191 if (lexer.empty || !isCommentChar) 192 { 193 state = State.fault; 194 break loop; 195 } 196 state = State.comment; 197 continue; 198 } 199 else if (!lexer.front.isNewLine) 200 { 201 state = State.fault; 202 break loop; 203 } 204 break; 205 206 case State.nextDict: 207 if (lexer.empty) 208 break loop; 209 210 if (isNextDictChar) 211 continue; 212 213 if (lexer.front.isNewLine) 214 { 215 state = State.key; 216 lexer.popFront(); 217 break loop; 218 } 219 else 220 { 221 state = State.fault; 222 break loop; 223 } 224 225 case State.comment: 226 if (lexer.empty) 227 break loop; 228 229 if (!lexer.front.isNewLine) 230 continue; 231 232 state = State.nextLine; 233 goto start; 234 235 default: 236 assert(false, "Invalid parser state."); 237 } 238 } 239 } 240 241 /// Definitions of parser states 242 enum State { key, valueSep, value, nextLine, nextDict, comment, fault, done }; 243 /// Current parser state 244 State state; 245 246 private: 247 248 @property scope Lexer* lexer() return 249 { 250 return &this.parser.lexer; 251 } 252 253 // Read a single line of unquoted text 254 string lexStr() 255 { 256 import std.uni : isAlpha, isSpace; 257 258 scope range = &lexer.buffer(); 259 range.save(); 260 261 for (; !range.empty; range.popFront()) 262 { 263 if (range.front.isAlpha 264 || range.front == '_' 265 || range.front == '#' 266 || range.front.isSpace) 267 range.stash(); 268 else 269 break; 270 } 271 return range.commitStash(); 272 } 273 274 // Read multiline string 275 string lexXStr() 276 { 277 import std.uni : isControl, isSpace, isWhite; 278 279 enum XStrState { line, nextLine} 280 XStrState thisState; 281 282 scope range = &lexer.buffer(); 283 range.save(); 284 285 loop: 286 for (; !range.empty; range.popFront()) 287 { 288 final switch (thisState) 289 { 290 case XStrState.line: 291 if (range.front.isSpace) 292 continue; 293 if (range.front.isWhite && range.front.isControl) 294 { 295 thisState = XStrState.nextLine; 296 continue; 297 } 298 range.stash(); 299 break; 300 301 case XStrState.nextLine: 302 if (range.front.isSpace) 303 { 304 thisState = XStrState.line; 305 continue; 306 } 307 break loop; 308 } 309 } 310 return range.commitStash(); 311 } 312 313 @property bool isCommentChar() pure 314 { 315 return lexer.front.hasChr('/'); 316 } 317 318 @property bool isNextDictChar() pure 319 { 320 return lexer.front.hasChr('-'); 321 } 322 323 @property bool isValueSeparatorChar() pure 324 { 325 return lexer.front.hasChr(':'); 326 } 327 328 @property bool isComplexType() pure 329 { 330 return lexer.front.hasChr('[') || lexer.front.hasChr('{'); 331 } 332 333 Dict dict; 334 Parser parser = void; 335 } 336 337 alias TrioStringDecoder = TrioParser!string; 338 339 unittest 340 { 341 import haystack.util.tzdata : timeZone; 342 auto scalars = q"{marker 343 na: NA 344 bool: T 345 number: 1234.5$ 346 str: "ana are mere si pere" 347 strSimple:a simple string 348 xstr:Foo: 349 blah 350 coord: C(37.545826,-77.449188) 351 uri: `/a/b/c` 352 ref:@someId 353 date:2019-06-06 354 time: 15:23:03 355 dateTime:2019-04-09T15:24:00+02:00 Europe/Athens 356 }"; 357 358 auto decoder = TrioStringDecoder(scalars); 359 auto dict = decoder.front(); 360 361 assert(dict.has!Marker("marker")); 362 assert(dict.has!Na("na")); 363 assert(dict.get!Bool("bool") == true); 364 assert(dict.get!Num("number") == Num(1234.5, "$")); 365 assert(dict.get!Str("str") == "ana are mere si pere"); 366 assert(dict.get!Str("strSimple") == "a simple string"); 367 assert(dict.get!XStr("xstr") == XStr("Foo", "blah")); 368 assert(dict.get!Coord("coord") == Coord(37.545826,-77.449188)); 369 assert(dict.get!Uri("uri") == Uri(`/a/b/c`)); 370 assert(dict.get!Ref("ref") == Ref("someId")); 371 assert(dict.get!Date("date") == Date(2019, 6, 6)); 372 assert(dict.get!Time("time") == Time(15, 23, 3)); 373 assert(dict.get!SysTime("dateTime") == SysTime(DateTime(Date(2019, 4, 9), Time(15, 24, 0)), timeZone("Europe/Athens"))); 374 } 375 376 377 unittest 378 { 379 import std.algorithm: move; 380 auto scalars = 381 q"{foo 382 bar 383 --- 384 n:3}"; 385 386 auto decoder = TrioStringDecoder(scalars); 387 Dict[] dicts; 388 389 foreach (d; decoder.move) 390 dicts ~= d; 391 392 assert(dicts.length == 2); 393 assert(dicts[0].has!Marker("foo")); 394 assert(dicts[1].get!Num("n").to!int == 3); 395 } 396 397 unittest 398 { 399 auto complex = q"{list:[1,"str",T] 400 dict: {ana:"are" mere} 401 // some comment 402 grid:Zinc: 403 ver:"3.0" 404 empty 405 }"; 406 407 auto decoder = TrioStringDecoder(complex); 408 auto dict = decoder.front(); 409 assert(dict.has!List("list")); 410 assert(dict.has!Dict("dict")); 411 assert(dict.has!XStr("grid")); 412 } 413 unittest 414 { 415 import std.algorithm: move; 416 auto comment = 417 q"{// A comment 418 value:1}"; 419 420 auto decoder = TrioStringDecoder(comment); 421 422 assert(decoder.front.length == 1); 423 assert(decoder.front.get!Num("value").to!int == 1); 424 }