Ruby  2.0.0p648(2015-12-16revision53162)
scanner.c
Go to the documentation of this file.
1 
2 /*
3  * Introduction
4  * ************
5  *
6  * The following notes assume that you are familiar with the YAML specification
7  * (http://yaml.org/spec/cvs/current.html). We mostly follow it, although in
8  * some cases we are less restrictive that it requires.
9  *
10  * The process of transforming a YAML stream into a sequence of events is
11  * divided on two steps: Scanning and Parsing.
12  *
13  * The Scanner transforms the input stream into a sequence of tokens, while the
14  * parser transform the sequence of tokens produced by the Scanner into a
15  * sequence of parsing events.
16  *
17  * The Scanner is rather clever and complicated. The Parser, on the contrary,
18  * is a straightforward implementation of a recursive-descendant parser (or,
19  * LL(1) parser, as it is usually called).
20  *
21  * Actually there are two issues of Scanning that might be called "clever", the
22  * rest is quite straightforward. The issues are "block collection start" and
23  * "simple keys". Both issues are explained below in details.
24  *
25  * Here the Scanning step is explained and implemented. We start with the list
26  * of all the tokens produced by the Scanner together with short descriptions.
27  *
28  * Now, tokens:
29  *
30  * STREAM-START(encoding) # The stream start.
31  * STREAM-END # The stream end.
32  * VERSION-DIRECTIVE(major,minor) # The '%YAML' directive.
33  * TAG-DIRECTIVE(handle,prefix) # The '%TAG' directive.
34  * DOCUMENT-START # '---'
35  * DOCUMENT-END # '...'
36  * BLOCK-SEQUENCE-START # Indentation increase denoting a block
37  * BLOCK-MAPPING-START # sequence or a block mapping.
38  * BLOCK-END # Indentation decrease.
39  * FLOW-SEQUENCE-START # '['
40  * FLOW-SEQUENCE-END # ']'
41  * BLOCK-SEQUENCE-START # '{'
42  * BLOCK-SEQUENCE-END # '}'
43  * BLOCK-ENTRY # '-'
44  * FLOW-ENTRY # ','
45  * KEY # '?' or nothing (simple keys).
46  * VALUE # ':'
47  * ALIAS(anchor) # '*anchor'
48  * ANCHOR(anchor) # '&anchor'
49  * TAG(handle,suffix) # '!handle!suffix'
50  * SCALAR(value,style) # A scalar.
51  *
52  * The following two tokens are "virtual" tokens denoting the beginning and the
53  * end of the stream:
54  *
55  * STREAM-START(encoding)
56  * STREAM-END
57  *
58  * We pass the information about the input stream encoding with the
59  * STREAM-START token.
60  *
61  * The next two tokens are responsible for tags:
62  *
63  * VERSION-DIRECTIVE(major,minor)
64  * TAG-DIRECTIVE(handle,prefix)
65  *
66  * Example:
67  *
68  * %YAML 1.1
69  * %TAG ! !foo
70  * %TAG !yaml! tag:yaml.org,2002:
71  * ---
72  *
73  * The correspoding sequence of tokens:
74  *
75  * STREAM-START(utf-8)
76  * VERSION-DIRECTIVE(1,1)
77  * TAG-DIRECTIVE("!","!foo")
78  * TAG-DIRECTIVE("!yaml","tag:yaml.org,2002:")
79  * DOCUMENT-START
80  * STREAM-END
81  *
82  * Note that the VERSION-DIRECTIVE and TAG-DIRECTIVE tokens occupy a whole
83  * line.
84  *
85  * The document start and end indicators are represented by:
86  *
87  * DOCUMENT-START
88  * DOCUMENT-END
89  *
90  * Note that if a YAML stream contains an implicit document (without '---'
91  * and '...' indicators), no DOCUMENT-START and DOCUMENT-END tokens will be
92  * produced.
93  *
94  * In the following examples, we present whole documents together with the
95  * produced tokens.
96  *
97  * 1. An implicit document:
98  *
99  * 'a scalar'
100  *
101  * Tokens:
102  *
103  * STREAM-START(utf-8)
104  * SCALAR("a scalar",single-quoted)
105  * STREAM-END
106  *
107  * 2. An explicit document:
108  *
109  * ---
110  * 'a scalar'
111  * ...
112  *
113  * Tokens:
114  *
115  * STREAM-START(utf-8)
116  * DOCUMENT-START
117  * SCALAR("a scalar",single-quoted)
118  * DOCUMENT-END
119  * STREAM-END
120  *
121  * 3. Several documents in a stream:
122  *
123  * 'a scalar'
124  * ---
125  * 'another scalar'
126  * ---
127  * 'yet another scalar'
128  *
129  * Tokens:
130  *
131  * STREAM-START(utf-8)
132  * SCALAR("a scalar",single-quoted)
133  * DOCUMENT-START
134  * SCALAR("another scalar",single-quoted)
135  * DOCUMENT-START
136  * SCALAR("yet another scalar",single-quoted)
137  * STREAM-END
138  *
139  * We have already introduced the SCALAR token above. The following tokens are
140  * used to describe aliases, anchors, tag, and scalars:
141  *
142  * ALIAS(anchor)
143  * ANCHOR(anchor)
144  * TAG(handle,suffix)
145  * SCALAR(value,style)
146  *
147  * The following series of examples illustrate the usage of these tokens:
148  *
149  * 1. A recursive sequence:
150  *
151  * &A [ *A ]
152  *
153  * Tokens:
154  *
155  * STREAM-START(utf-8)
156  * ANCHOR("A")
157  * FLOW-SEQUENCE-START
158  * ALIAS("A")
159  * FLOW-SEQUENCE-END
160  * STREAM-END
161  *
162  * 2. A tagged scalar:
163  *
164  * !!float "3.14" # A good approximation.
165  *
166  * Tokens:
167  *
168  * STREAM-START(utf-8)
169  * TAG("!!","float")
170  * SCALAR("3.14",double-quoted)
171  * STREAM-END
172  *
173  * 3. Various scalar styles:
174  *
175  * --- # Implicit empty plain scalars do not produce tokens.
176  * --- a plain scalar
177  * --- 'a single-quoted scalar'
178  * --- "a double-quoted scalar"
179  * --- |-
180  * a literal scalar
181  * --- >-
182  * a folded
183  * scalar
184  *
185  * Tokens:
186  *
187  * STREAM-START(utf-8)
188  * DOCUMENT-START
189  * DOCUMENT-START
190  * SCALAR("a plain scalar",plain)
191  * DOCUMENT-START
192  * SCALAR("a single-quoted scalar",single-quoted)
193  * DOCUMENT-START
194  * SCALAR("a double-quoted scalar",double-quoted)
195  * DOCUMENT-START
196  * SCALAR("a literal scalar",literal)
197  * DOCUMENT-START
198  * SCALAR("a folded scalar",folded)
199  * STREAM-END
200  *
201  * Now it's time to review collection-related tokens. We will start with
202  * flow collections:
203  *
204  * FLOW-SEQUENCE-START
205  * FLOW-SEQUENCE-END
206  * FLOW-MAPPING-START
207  * FLOW-MAPPING-END
208  * FLOW-ENTRY
209  * KEY
210  * VALUE
211  *
212  * The tokens FLOW-SEQUENCE-START, FLOW-SEQUENCE-END, FLOW-MAPPING-START, and
213  * FLOW-MAPPING-END represent the indicators '[', ']', '{', and '}'
214  * correspondingly. FLOW-ENTRY represent the ',' indicator. Finally the
215  * indicators '?' and ':', which are used for denoting mapping keys and values,
216  * are represented by the KEY and VALUE tokens.
217  *
218  * The following examples show flow collections:
219  *
220  * 1. A flow sequence:
221  *
222  * [item 1, item 2, item 3]
223  *
224  * Tokens:
225  *
226  * STREAM-START(utf-8)
227  * FLOW-SEQUENCE-START
228  * SCALAR("item 1",plain)
229  * FLOW-ENTRY
230  * SCALAR("item 2",plain)
231  * FLOW-ENTRY
232  * SCALAR("item 3",plain)
233  * FLOW-SEQUENCE-END
234  * STREAM-END
235  *
236  * 2. A flow mapping:
237  *
238  * {
239  * a simple key: a value, # Note that the KEY token is produced.
240  * ? a complex key: another value,
241  * }
242  *
243  * Tokens:
244  *
245  * STREAM-START(utf-8)
246  * FLOW-MAPPING-START
247  * KEY
248  * SCALAR("a simple key",plain)
249  * VALUE
250  * SCALAR("a value",plain)
251  * FLOW-ENTRY
252  * KEY
253  * SCALAR("a complex key",plain)
254  * VALUE
255  * SCALAR("another value",plain)
256  * FLOW-ENTRY
257  * FLOW-MAPPING-END
258  * STREAM-END
259  *
260  * A simple key is a key which is not denoted by the '?' indicator. Note that
261  * the Scanner still produce the KEY token whenever it encounters a simple key.
262  *
263  * For scanning block collections, the following tokens are used (note that we
264  * repeat KEY and VALUE here):
265  *
266  * BLOCK-SEQUENCE-START
267  * BLOCK-MAPPING-START
268  * BLOCK-END
269  * BLOCK-ENTRY
270  * KEY
271  * VALUE
272  *
273  * The tokens BLOCK-SEQUENCE-START and BLOCK-MAPPING-START denote indentation
274  * increase that precedes a block collection (cf. the INDENT token in Python).
275  * The token BLOCK-END denote indentation decrease that ends a block collection
276  * (cf. the DEDENT token in Python). However YAML has some syntax pecularities
277  * that makes detections of these tokens more complex.
278  *
279  * The tokens BLOCK-ENTRY, KEY, and VALUE are used to represent the indicators
280  * '-', '?', and ':' correspondingly.
281  *
282  * The following examples show how the tokens BLOCK-SEQUENCE-START,
283  * BLOCK-MAPPING-START, and BLOCK-END are emitted by the Scanner:
284  *
285  * 1. Block sequences:
286  *
287  * - item 1
288  * - item 2
289  * -
290  * - item 3.1
291  * - item 3.2
292  * -
293  * key 1: value 1
294  * key 2: value 2
295  *
296  * Tokens:
297  *
298  * STREAM-START(utf-8)
299  * BLOCK-SEQUENCE-START
300  * BLOCK-ENTRY
301  * SCALAR("item 1",plain)
302  * BLOCK-ENTRY
303  * SCALAR("item 2",plain)
304  * BLOCK-ENTRY
305  * BLOCK-SEQUENCE-START
306  * BLOCK-ENTRY
307  * SCALAR("item 3.1",plain)
308  * BLOCK-ENTRY
309  * SCALAR("item 3.2",plain)
310  * BLOCK-END
311  * BLOCK-ENTRY
312  * BLOCK-MAPPING-START
313  * KEY
314  * SCALAR("key 1",plain)
315  * VALUE
316  * SCALAR("value 1",plain)
317  * KEY
318  * SCALAR("key 2",plain)
319  * VALUE
320  * SCALAR("value 2",plain)
321  * BLOCK-END
322  * BLOCK-END
323  * STREAM-END
324  *
325  * 2. Block mappings:
326  *
327  * a simple key: a value # The KEY token is produced here.
328  * ? a complex key
329  * : another value
330  * a mapping:
331  * key 1: value 1
332  * key 2: value 2
333  * a sequence:
334  * - item 1
335  * - item 2
336  *
337  * Tokens:
338  *
339  * STREAM-START(utf-8)
340  * BLOCK-MAPPING-START
341  * KEY
342  * SCALAR("a simple key",plain)
343  * VALUE
344  * SCALAR("a value",plain)
345  * KEY
346  * SCALAR("a complex key",plain)
347  * VALUE
348  * SCALAR("another value",plain)
349  * KEY
350  * SCALAR("a mapping",plain)
351  * BLOCK-MAPPING-START
352  * KEY
353  * SCALAR("key 1",plain)
354  * VALUE
355  * SCALAR("value 1",plain)
356  * KEY
357  * SCALAR("key 2",plain)
358  * VALUE
359  * SCALAR("value 2",plain)
360  * BLOCK-END
361  * KEY
362  * SCALAR("a sequence",plain)
363  * VALUE
364  * BLOCK-SEQUENCE-START
365  * BLOCK-ENTRY
366  * SCALAR("item 1",plain)
367  * BLOCK-ENTRY
368  * SCALAR("item 2",plain)
369  * BLOCK-END
370  * BLOCK-END
371  * STREAM-END
372  *
373  * YAML does not always require to start a new block collection from a new
374  * line. If the current line contains only '-', '?', and ':' indicators, a new
375  * block collection may start at the current line. The following examples
376  * illustrate this case:
377  *
378  * 1. Collections in a sequence:
379  *
380  * - - item 1
381  * - item 2
382  * - key 1: value 1
383  * key 2: value 2
384  * - ? complex key
385  * : complex value
386  *
387  * Tokens:
388  *
389  * STREAM-START(utf-8)
390  * BLOCK-SEQUENCE-START
391  * BLOCK-ENTRY
392  * BLOCK-SEQUENCE-START
393  * BLOCK-ENTRY
394  * SCALAR("item 1",plain)
395  * BLOCK-ENTRY
396  * SCALAR("item 2",plain)
397  * BLOCK-END
398  * BLOCK-ENTRY
399  * BLOCK-MAPPING-START
400  * KEY
401  * SCALAR("key 1",plain)
402  * VALUE
403  * SCALAR("value 1",plain)
404  * KEY
405  * SCALAR("key 2",plain)
406  * VALUE
407  * SCALAR("value 2",plain)
408  * BLOCK-END
409  * BLOCK-ENTRY
410  * BLOCK-MAPPING-START
411  * KEY
412  * SCALAR("complex key")
413  * VALUE
414  * SCALAR("complex value")
415  * BLOCK-END
416  * BLOCK-END
417  * STREAM-END
418  *
419  * 2. Collections in a mapping:
420  *
421  * ? a sequence
422  * : - item 1
423  * - item 2
424  * ? a mapping
425  * : key 1: value 1
426  * key 2: value 2
427  *
428  * Tokens:
429  *
430  * STREAM-START(utf-8)
431  * BLOCK-MAPPING-START
432  * KEY
433  * SCALAR("a sequence",plain)
434  * VALUE
435  * BLOCK-SEQUENCE-START
436  * BLOCK-ENTRY
437  * SCALAR("item 1",plain)
438  * BLOCK-ENTRY
439  * SCALAR("item 2",plain)
440  * BLOCK-END
441  * KEY
442  * SCALAR("a mapping",plain)
443  * VALUE
444  * BLOCK-MAPPING-START
445  * KEY
446  * SCALAR("key 1",plain)
447  * VALUE
448  * SCALAR("value 1",plain)
449  * KEY
450  * SCALAR("key 2",plain)
451  * VALUE
452  * SCALAR("value 2",plain)
453  * BLOCK-END
454  * BLOCK-END
455  * STREAM-END
456  *
457  * YAML also permits non-indented sequences if they are included into a block
458  * mapping. In this case, the token BLOCK-SEQUENCE-START is not produced:
459  *
460  * key:
461  * - item 1 # BLOCK-SEQUENCE-START is NOT produced here.
462  * - item 2
463  *
464  * Tokens:
465  *
466  * STREAM-START(utf-8)
467  * BLOCK-MAPPING-START
468  * KEY
469  * SCALAR("key",plain)
470  * VALUE
471  * BLOCK-ENTRY
472  * SCALAR("item 1",plain)
473  * BLOCK-ENTRY
474  * SCALAR("item 2",plain)
475  * BLOCK-END
476  */
477 
478 #include "yaml_private.h"
479 
480 /*
481  * Ensure that the buffer contains the required number of characters.
482  * Return 1 on success, 0 on failure (reader error or memory error).
483  */
484 
485 #define CACHE(parser,length) \
486  (parser->unread >= (length) \
487  ? 1 \
488  : yaml_parser_update_buffer(parser, (length)))
489 
490 /*
491  * Advance the buffer pointer.
492  */
493 
494 #define SKIP(parser) \
495  (parser->mark.index ++, \
496  parser->mark.column ++, \
497  parser->unread --, \
498  parser->buffer.pointer += WIDTH(parser->buffer))
499 
500 #define SKIP_LINE(parser) \
501  (IS_CRLF(parser->buffer) ? \
502  (parser->mark.index += 2, \
503  parser->mark.column = 0, \
504  parser->mark.line ++, \
505  parser->unread -= 2, \
506  parser->buffer.pointer += 2) : \
507  IS_BREAK(parser->buffer) ? \
508  (parser->mark.index ++, \
509  parser->mark.column = 0, \
510  parser->mark.line ++, \
511  parser->unread --, \
512  parser->buffer.pointer += WIDTH(parser->buffer)) : 0)
513 
514 /*
515  * Copy a character to a string buffer and advance pointers.
516  */
517 
518 #define READ(parser,string) \
519  (STRING_EXTEND(parser,string) ? \
520  (COPY(string,parser->buffer), \
521  parser->mark.index ++, \
522  parser->mark.column ++, \
523  parser->unread --, \
524  1) : 0)
525 
526 /*
527  * Copy a line break character to a string buffer and advance pointers.
528  */
529 
530 #define READ_LINE(parser,string) \
531  (STRING_EXTEND(parser,string) ? \
532  (((CHECK_AT(parser->buffer,'\r',0) \
533  && CHECK_AT(parser->buffer,'\n',1)) ? /* CR LF -> LF */ \
534  (*((string).pointer++) = (yaml_char_t) '\n', \
535  parser->buffer.pointer += 2, \
536  parser->mark.index += 2, \
537  parser->mark.column = 0, \
538  parser->mark.line ++, \
539  parser->unread -= 2) : \
540  (CHECK_AT(parser->buffer,'\r',0) \
541  || CHECK_AT(parser->buffer,'\n',0)) ? /* CR|LF -> LF */ \
542  (*((string).pointer++) = (yaml_char_t) '\n', \
543  parser->buffer.pointer ++, \
544  parser->mark.index ++, \
545  parser->mark.column = 0, \
546  parser->mark.line ++, \
547  parser->unread --) : \
548  (CHECK_AT(parser->buffer,'\xC2',0) \
549  && CHECK_AT(parser->buffer,'\x85',1)) ? /* NEL -> LF */ \
550  (*((string).pointer++) = (yaml_char_t) '\n', \
551  parser->buffer.pointer += 2, \
552  parser->mark.index ++, \
553  parser->mark.column = 0, \
554  parser->mark.line ++, \
555  parser->unread --) : \
556  (CHECK_AT(parser->buffer,'\xE2',0) && \
557  CHECK_AT(parser->buffer,'\x80',1) && \
558  (CHECK_AT(parser->buffer,'\xA8',2) || \
559  CHECK_AT(parser->buffer,'\xA9',2))) ? /* LS|PS -> LS|PS */ \
560  (*((string).pointer++) = *(parser->buffer.pointer++), \
561  *((string).pointer++) = *(parser->buffer.pointer++), \
562  *((string).pointer++) = *(parser->buffer.pointer++), \
563  parser->mark.index ++, \
564  parser->mark.column = 0, \
565  parser->mark.line ++, \
566  parser->unread --) : 0), \
567  1) : 0)
568 
569 /*
570  * Public API declarations.
571  */
572 
573 YAML_DECLARE(int)
575 
576 /*
577  * Error handling.
578  */
579 
580 static int
581 yaml_parser_set_scanner_error(yaml_parser_t *parser, const char *context,
582  yaml_mark_t context_mark, const char *problem);
583 
584 /*
585  * High-level token API.
586  */
587 
588 YAML_DECLARE(int)
590 
591 static int
593 
594 /*
595  * Potential simple keys.
596  */
597 
598 static int
600 
601 static int
603 
604 static int
606 
607 static int
609 
610 static int
612 
613 /*
614  * Indentation treatment.
615  */
616 
617 static int
618 yaml_parser_roll_indent(yaml_parser_t *parser, ptrdiff_t column,
619  ptrdiff_t number, yaml_token_type_t type, yaml_mark_t mark);
620 
621 static int
622 yaml_parser_unroll_indent(yaml_parser_t *parser, ptrdiff_t column);
623 
624 /*
625  * Token fetchers.
626  */
627 
628 static int
630 
631 static int
633 
634 static int
636 
637 static int
640 
641 static int
644 
645 static int
648 
649 static int
651 
652 static int
654 
655 static int
657 
658 static int
660 
661 static int
663 
664 static int
666 
667 static int
668 yaml_parser_fetch_block_scalar(yaml_parser_t *parser, int literal);
669 
670 static int
671 yaml_parser_fetch_flow_scalar(yaml_parser_t *parser, int single);
672 
673 static int
675 
676 /*
677  * Token scanners.
678  */
679 
680 static int
682 
683 static int
685 
686 static int
688  yaml_mark_t start_mark, yaml_char_t **name);
689 
690 static int
692  yaml_mark_t start_mark, int *major, int *minor);
693 
694 static int
696  yaml_mark_t start_mark, int *number);
697 
698 static int
700  yaml_mark_t mark, yaml_char_t **handle, yaml_char_t **prefix);
701 
702 static int
705 
706 static int
708 
709 static int
710 yaml_parser_scan_tag_handle(yaml_parser_t *parser, int directive,
711  yaml_mark_t start_mark, yaml_char_t **handle);
712 
713 static int
714 yaml_parser_scan_tag_uri(yaml_parser_t *parser, int directive,
715  yaml_char_t *head, yaml_mark_t start_mark, yaml_char_t **uri);
716 
717 static int
718 yaml_parser_scan_uri_escapes(yaml_parser_t *parser, int directive,
719  yaml_mark_t start_mark, yaml_string_t *string);
720 
721 static int
723  int literal);
724 
725 static int
727  int *indent, yaml_string_t *breaks,
728  yaml_mark_t start_mark, yaml_mark_t *end_mark);
729 
730 static int
732  int single);
733 
734 static int
736 
737 /*
738  * Get the next token.
739  */
740 
741 YAML_DECLARE(int)
743 {
744  assert(parser); /* Non-NULL parser object is expected. */
745  assert(token); /* Non-NULL token object is expected. */
746 
747  /* Erase the token object. */
748 
749  memset(token, 0, sizeof(yaml_token_t));
750 
751  /* No tokens after STREAM-END or error. */
752 
753  if (parser->stream_end_produced || parser->error) {
754  return 1;
755  }
756 
757  /* Ensure that the tokens queue contains enough tokens. */
758 
759  if (!parser->token_available) {
760  if (!yaml_parser_fetch_more_tokens(parser))
761  return 0;
762  }
763 
764  /* Fetch the next token from the queue. */
765 
766  *token = DEQUEUE(parser, parser->tokens);
767  parser->token_available = 0;
768  parser->tokens_parsed ++;
769 
770  if (token->type == YAML_STREAM_END_TOKEN) {
771  parser->stream_end_produced = 1;
772  }
773 
774  return 1;
775 }
776 
777 /*
778  * Set the scanner error and return 0.
779  */
780 
781 static int
782 yaml_parser_set_scanner_error(yaml_parser_t *parser, const char *context,
783  yaml_mark_t context_mark, const char *problem)
784 {
785  parser->error = YAML_SCANNER_ERROR;
786  parser->context = context;
787  parser->context_mark = context_mark;
788  parser->problem = problem;
789  parser->problem_mark = parser->mark;
790 
791  return 0;
792 }
793 
794 /*
795  * Ensure that the tokens queue contains at least one token which can be
796  * returned to the Parser.
797  */
798 
799 YAML_DECLARE(int)
801 {
802  int need_more_tokens;
803 
804  /* While we need more tokens to fetch, do it. */
805 
806  while (1)
807  {
808  /*
809  * Check if we really need to fetch more tokens.
810  */
811 
812  need_more_tokens = 0;
813 
814  if (parser->tokens.head == parser->tokens.tail)
815  {
816  /* Queue is empty. */
817 
818  need_more_tokens = 1;
819  }
820  else
821  {
822  yaml_simple_key_t *simple_key;
823 
824  /* Check if any potential simple key may occupy the head position. */
825 
826  if (!yaml_parser_stale_simple_keys(parser))
827  return 0;
828 
829  for (simple_key = parser->simple_keys.start;
830  simple_key != parser->simple_keys.top; simple_key++) {
831  if (simple_key->possible
832  && simple_key->token_number == parser->tokens_parsed) {
833  need_more_tokens = 1;
834  break;
835  }
836  }
837  }
838 
839  /* We are finished. */
840 
841  if (!need_more_tokens)
842  break;
843 
844  /* Fetch the next token. */
845 
846  if (!yaml_parser_fetch_next_token(parser))
847  return 0;
848  }
849 
850  parser->token_available = 1;
851 
852  return 1;
853 }
854 
855 /*
856  * The dispatcher for token fetchers.
857  */
858 
859 static int
861 {
862  /* Ensure that the buffer is initialized. */
863 
864  if (!CACHE(parser, 1))
865  return 0;
866 
867  /* Check if we just started scanning. Fetch STREAM-START then. */
868 
869  if (!parser->stream_start_produced)
870  return yaml_parser_fetch_stream_start(parser);
871 
872  /* Eat whitespaces and comments until we reach the next token. */
873 
874  if (!yaml_parser_scan_to_next_token(parser))
875  return 0;
876 
877  /* Remove obsolete potential simple keys. */
878 
879  if (!yaml_parser_stale_simple_keys(parser))
880  return 0;
881 
882  /* Check the indentation level against the current column. */
883 
884  if (!yaml_parser_unroll_indent(parser, parser->mark.column))
885  return 0;
886 
887  /*
888  * Ensure that the buffer contains at least 4 characters. 4 is the length
889  * of the longest indicators ('--- ' and '... ').
890  */
891 
892  if (!CACHE(parser, 4))
893  return 0;
894 
895  /* Is it the end of the stream? */
896 
897  if (IS_Z(parser->buffer))
898  return yaml_parser_fetch_stream_end(parser);
899 
900  /* Is it a directive? */
901 
902  if (parser->mark.column == 0 && CHECK(parser->buffer, '%'))
903  return yaml_parser_fetch_directive(parser);
904 
905  /* Is it the document start indicator? */
906 
907  if (parser->mark.column == 0
908  && CHECK_AT(parser->buffer, '-', 0)
909  && CHECK_AT(parser->buffer, '-', 1)
910  && CHECK_AT(parser->buffer, '-', 2)
911  && IS_BLANKZ_AT(parser->buffer, 3))
914 
915  /* Is it the document end indicator? */
916 
917  if (parser->mark.column == 0
918  && CHECK_AT(parser->buffer, '.', 0)
919  && CHECK_AT(parser->buffer, '.', 1)
920  && CHECK_AT(parser->buffer, '.', 2)
921  && IS_BLANKZ_AT(parser->buffer, 3))
924 
925  /* Is it the flow sequence start indicator? */
926 
927  if (CHECK(parser->buffer, '['))
930 
931  /* Is it the flow mapping start indicator? */
932 
933  if (CHECK(parser->buffer, '{'))
936 
937  /* Is it the flow sequence end indicator? */
938 
939  if (CHECK(parser->buffer, ']'))
942 
943  /* Is it the flow mapping end indicator? */
944 
945  if (CHECK(parser->buffer, '}'))
948 
949  /* Is it the flow entry indicator? */
950 
951  if (CHECK(parser->buffer, ','))
952  return yaml_parser_fetch_flow_entry(parser);
953 
954  /* Is it the block entry indicator? */
955 
956  if (CHECK(parser->buffer, '-') && IS_BLANKZ_AT(parser->buffer, 1))
957  return yaml_parser_fetch_block_entry(parser);
958 
959  /* Is it the key indicator? */
960 
961  if (CHECK(parser->buffer, '?')
962  && (parser->flow_level || IS_BLANKZ_AT(parser->buffer, 1)))
963  return yaml_parser_fetch_key(parser);
964 
965  /* Is it the value indicator? */
966 
967  if (CHECK(parser->buffer, ':')
968  && (parser->flow_level || IS_BLANKZ_AT(parser->buffer, 1)))
969  return yaml_parser_fetch_value(parser);
970 
971  /* Is it an alias? */
972 
973  if (CHECK(parser->buffer, '*'))
975 
976  /* Is it an anchor? */
977 
978  if (CHECK(parser->buffer, '&'))
980 
981  /* Is it a tag? */
982 
983  if (CHECK(parser->buffer, '!'))
984  return yaml_parser_fetch_tag(parser);
985 
986  /* Is it a literal scalar? */
987 
988  if (CHECK(parser->buffer, '|') && !parser->flow_level)
989  return yaml_parser_fetch_block_scalar(parser, 1);
990 
991  /* Is it a folded scalar? */
992 
993  if (CHECK(parser->buffer, '>') && !parser->flow_level)
994  return yaml_parser_fetch_block_scalar(parser, 0);
995 
996  /* Is it a single-quoted scalar? */
997 
998  if (CHECK(parser->buffer, '\''))
999  return yaml_parser_fetch_flow_scalar(parser, 1);
1000 
1001  /* Is it a double-quoted scalar? */
1002 
1003  if (CHECK(parser->buffer, '"'))
1004  return yaml_parser_fetch_flow_scalar(parser, 0);
1005 
1006  /*
1007  * Is it a plain scalar?
1008  *
1009  * A plain scalar may start with any non-blank characters except
1010  *
1011  * '-', '?', ':', ',', '[', ']', '{', '}',
1012  * '#', '&', '*', '!', '|', '>', '\'', '\"',
1013  * '%', '@', '`'.
1014  *
1015  * In the block context (and, for the '-' indicator, in the flow context
1016  * too), it may also start with the characters
1017  *
1018  * '-', '?', ':'
1019  *
1020  * if it is followed by a non-space character.
1021  *
1022  * The last rule is more restrictive than the specification requires.
1023  */
1024 
1025  if (!(IS_BLANKZ(parser->buffer) || CHECK(parser->buffer, '-')
1026  || CHECK(parser->buffer, '?') || CHECK(parser->buffer, ':')
1027  || CHECK(parser->buffer, ',') || CHECK(parser->buffer, '[')
1028  || CHECK(parser->buffer, ']') || CHECK(parser->buffer, '{')
1029  || CHECK(parser->buffer, '}') || CHECK(parser->buffer, '#')
1030  || CHECK(parser->buffer, '&') || CHECK(parser->buffer, '*')
1031  || CHECK(parser->buffer, '!') || CHECK(parser->buffer, '|')
1032  || CHECK(parser->buffer, '>') || CHECK(parser->buffer, '\'')
1033  || CHECK(parser->buffer, '"') || CHECK(parser->buffer, '%')
1034  || CHECK(parser->buffer, '@') || CHECK(parser->buffer, '`')) ||
1035  (CHECK(parser->buffer, '-') && !IS_BLANK_AT(parser->buffer, 1)) ||
1036  (!parser->flow_level &&
1037  (CHECK(parser->buffer, '?') || CHECK(parser->buffer, ':'))
1038  && !IS_BLANKZ_AT(parser->buffer, 1)))
1039  return yaml_parser_fetch_plain_scalar(parser);
1040 
1041  /*
1042  * If we don't determine the token type so far, it is an error.
1043  */
1044 
1045  return yaml_parser_set_scanner_error(parser,
1046  "while scanning for the next token", parser->mark,
1047  "found character that cannot start any token");
1048 }
1049 
1050 /*
1051  * Check the list of potential simple keys and remove the positions that
1052  * cannot contain simple keys anymore.
1053  */
1054 
1055 static int
1057 {
1058  yaml_simple_key_t *simple_key;
1059 
1060  /* Check for a potential simple key for each flow level. */
1061 
1062  for (simple_key = parser->simple_keys.start;
1063  simple_key != parser->simple_keys.top; simple_key ++)
1064  {
1065  /*
1066  * The specification requires that a simple key
1067  *
1068  * - is limited to a single line,
1069  * - is shorter than 1024 characters.
1070  */
1071 
1072  if (simple_key->possible
1073  && (simple_key->mark.line < parser->mark.line
1074  || simple_key->mark.index+1024 < parser->mark.index)) {
1075 
1076  /* Check if the potential simple key to be removed is required. */
1077 
1078  if (simple_key->required) {
1079  return yaml_parser_set_scanner_error(parser,
1080  "while scanning a simple key", simple_key->mark,
1081  "could not find expected ':'");
1082  }
1083 
1084  simple_key->possible = 0;
1085  }
1086  }
1087 
1088  return 1;
1089 }
1090 
1091 /*
1092  * Check if a simple key may start at the current position and add it if
1093  * needed.
1094  */
1095 
1096 static int
1098 {
1099  /*
1100  * A simple key is required at the current position if the scanner is in
1101  * the block context and the current column coincides with the indentation
1102  * level.
1103  */
1104 
1105  int required = (!parser->flow_level
1106  && parser->indent == (ptrdiff_t)parser->mark.column);
1107 
1108  /*
1109  * A simple key is required only when it is the first token in the current
1110  * line. Therefore it is always allowed. But we add a check anyway.
1111  */
1112 
1113  assert(parser->simple_key_allowed || !required); /* Impossible. */
1114 
1115  /*
1116  * If the current position may start a simple key, save it.
1117  */
1118 
1119  if (parser->simple_key_allowed)
1120  {
1121  yaml_simple_key_t simple_key;
1122  simple_key.possible = 1;
1123  simple_key.required = required;
1124  simple_key.token_number =
1125  parser->tokens_parsed + (parser->tokens.tail - parser->tokens.head);
1126  simple_key.mark = parser->mark;
1127 
1128  if (!yaml_parser_remove_simple_key(parser)) return 0;
1129 
1130  *(parser->simple_keys.top-1) = simple_key;
1131  }
1132 
1133  return 1;
1134 }
1135 
1136 /*
1137  * Remove a potential simple key at the current flow level.
1138  */
1139 
1140 static int
1142 {
1143  yaml_simple_key_t *simple_key = parser->simple_keys.top-1;
1144 
1145  if (simple_key->possible)
1146  {
1147  /* If the key is required, it is an error. */
1148 
1149  if (simple_key->required) {
1150  return yaml_parser_set_scanner_error(parser,
1151  "while scanning a simple key", simple_key->mark,
1152  "could not find expected ':'");
1153  }
1154  }
1155 
1156  /* Remove the key from the stack. */
1157 
1158  simple_key->possible = 0;
1159 
1160  return 1;
1161 }
1162 
1163 /*
1164  * Increase the flow level and resize the simple key list if needed.
1165  */
1166 
1167 static int
1169 {
1170  yaml_simple_key_t empty_simple_key = { 0, 0, 0, { 0, 0, 0 } };
1171 
1172  /* Reset the simple key on the next level. */
1173 
1174  if (!PUSH(parser, parser->simple_keys, empty_simple_key))
1175  return 0;
1176 
1177  /* Increase the flow level. */
1178 
1179  if (parser->flow_level == INT_MAX) {
1180  parser->error = YAML_MEMORY_ERROR;
1181  return 0;
1182  }
1183 
1184  parser->flow_level++;
1185 
1186  return 1;
1187 }
1188 
1189 /*
1190  * Decrease the flow level.
1191  */
1192 
1193 static int
1195 {
1196  if (parser->flow_level) {
1197  parser->flow_level --;
1198  (void)POP(parser, parser->simple_keys);
1199  }
1200 
1201  return 1;
1202 }
1203 
1204 /*
1205  * Push the current indentation level to the stack and set the new level
1206  * the current column is greater than the indentation level. In this case,
1207  * append or insert the specified token into the token queue.
1208  *
1209  */
1210 
1211 static int
1212 yaml_parser_roll_indent(yaml_parser_t *parser, ptrdiff_t column,
1213  ptrdiff_t number, yaml_token_type_t type, yaml_mark_t mark)
1214 {
1216 
1217  /* In the flow context, do nothing. */
1218 
1219  if (parser->flow_level)
1220  return 1;
1221 
1222  if (parser->indent < column)
1223  {
1224  /*
1225  * Push the current indentation level to the stack and set the new
1226  * indentation level.
1227  */
1228 
1229  if (!PUSH(parser, parser->indents, parser->indent))
1230  return 0;
1231 
1232 #if PTRDIFF_MAX > INT_MAX
1233  if (column > INT_MAX) {
1234  parser->error = YAML_MEMORY_ERROR;
1235  return 0;
1236  }
1237 #endif
1238 
1239  parser->indent = (int)column;
1240 
1241  /* Create a token and insert it into the queue. */
1242 
1244 
1245  if (number == -1) {
1246  if (!ENQUEUE(parser, parser->tokens, token))
1247  return 0;
1248  }
1249  else {
1250  if (!QUEUE_INSERT(parser,
1251  parser->tokens, number - parser->tokens_parsed, token))
1252  return 0;
1253  }
1254  }
1255 
1256  return 1;
1257 }
1258 
1259 /*
1260  * Pop indentation levels from the indents stack until the current level
1261  * becomes less or equal to the column. For each intendation level, append
1262  * the BLOCK-END token.
1263  */
1264 
1265 
1266 static int
1267 yaml_parser_unroll_indent(yaml_parser_t *parser, ptrdiff_t column)
1268 {
1270 
1271  /* In the flow context, do nothing. */
1272 
1273  if (parser->flow_level)
1274  return 1;
1275 
1276  /* Loop through the intendation levels in the stack. */
1277 
1278  while (parser->indent > column)
1279  {
1280  /* Create a token and append it to the queue. */
1281 
1282  TOKEN_INIT(token, YAML_BLOCK_END_TOKEN, parser->mark, parser->mark);
1283 
1284  if (!ENQUEUE(parser, parser->tokens, token))
1285  return 0;
1286 
1287  /* Pop the indentation level. */
1288 
1289  parser->indent = POP(parser, parser->indents);
1290  }
1291 
1292  return 1;
1293 }
1294 
1295 /*
1296  * Initialize the scanner and produce the STREAM-START token.
1297  */
1298 
1299 static int
1301 {
1302  yaml_simple_key_t simple_key = { 0, 0, 0, { 0, 0, 0 } };
1304 
1305  /* Set the initial indentation. */
1306 
1307  parser->indent = -1;
1308 
1309  /* Initialize the simple key stack. */
1310 
1311  if (!PUSH(parser, parser->simple_keys, simple_key))
1312  return 0;
1313 
1314  /* A simple key is allowed at the beginning of the stream. */
1315 
1316  parser->simple_key_allowed = 1;
1317 
1318  /* We have started. */
1319 
1320  parser->stream_start_produced = 1;
1321 
1322  /* Create the STREAM-START token and append it to the queue. */
1323 
1325  parser->mark, parser->mark);
1326 
1327  if (!ENQUEUE(parser, parser->tokens, token))
1328  return 0;
1329 
1330  return 1;
1331 }
1332 
1333 /*
1334  * Produce the STREAM-END token and shut down the scanner.
1335  */
1336 
1337 static int
1339 {
1341 
1342  /* Force new line. */
1343 
1344  if (parser->mark.column != 0) {
1345  parser->mark.column = 0;
1346  parser->mark.line ++;
1347  }
1348 
1349  /* Reset the indentation level. */
1350 
1351  if (!yaml_parser_unroll_indent(parser, -1))
1352  return 0;
1353 
1354  /* Reset simple keys. */
1355 
1356  if (!yaml_parser_remove_simple_key(parser))
1357  return 0;
1358 
1359  parser->simple_key_allowed = 0;
1360 
1361  /* Create the STREAM-END token and append it to the queue. */
1362 
1363  STREAM_END_TOKEN_INIT(token, parser->mark, parser->mark);
1364 
1365  if (!ENQUEUE(parser, parser->tokens, token))
1366  return 0;
1367 
1368  return 1;
1369 }
1370 
1371 /*
1372  * Produce a VERSION-DIRECTIVE or TAG-DIRECTIVE token.
1373  */
1374 
1375 static int
1377 {
1379 
1380  /* Reset the indentation level. */
1381 
1382  if (!yaml_parser_unroll_indent(parser, -1))
1383  return 0;
1384 
1385  /* Reset simple keys. */
1386 
1387  if (!yaml_parser_remove_simple_key(parser))
1388  return 0;
1389 
1390  parser->simple_key_allowed = 0;
1391 
1392  /* Create the YAML-DIRECTIVE or TAG-DIRECTIVE token. */
1393 
1394  if (!yaml_parser_scan_directive(parser, &token))
1395  return 0;
1396 
1397  /* Append the token to the queue. */
1398 
1399  if (!ENQUEUE(parser, parser->tokens, token)) {
1401  return 0;
1402  }
1403 
1404  return 1;
1405 }
1406 
1407 /*
1408  * Produce the DOCUMENT-START or DOCUMENT-END token.
1409  */
1410 
1411 static int
1414 {
1415  yaml_mark_t start_mark, end_mark;
1417 
1418  /* Reset the indentation level. */
1419 
1420  if (!yaml_parser_unroll_indent(parser, -1))
1421  return 0;
1422 
1423  /* Reset simple keys. */
1424 
1425  if (!yaml_parser_remove_simple_key(parser))
1426  return 0;
1427 
1428  parser->simple_key_allowed = 0;
1429 
1430  /* Consume the token. */
1431 
1432  start_mark = parser->mark;
1433 
1434  SKIP(parser);
1435  SKIP(parser);
1436  SKIP(parser);
1437 
1438  end_mark = parser->mark;
1439 
1440  /* Create the DOCUMENT-START or DOCUMENT-END token. */
1441 
1442  TOKEN_INIT(token, type, start_mark, end_mark);
1443 
1444  /* Append the token to the queue. */
1445 
1446  if (!ENQUEUE(parser, parser->tokens, token))
1447  return 0;
1448 
1449  return 1;
1450 }
1451 
1452 /*
1453  * Produce the FLOW-SEQUENCE-START or FLOW-MAPPING-START token.
1454  */
1455 
1456 static int
1459 {
1460  yaml_mark_t start_mark, end_mark;
1462 
1463  /* The indicators '[' and '{' may start a simple key. */
1464 
1465  if (!yaml_parser_save_simple_key(parser))
1466  return 0;
1467 
1468  /* Increase the flow level. */
1469 
1470  if (!yaml_parser_increase_flow_level(parser))
1471  return 0;
1472 
1473  /* A simple key may follow the indicators '[' and '{'. */
1474 
1475  parser->simple_key_allowed = 1;
1476 
1477  /* Consume the token. */
1478 
1479  start_mark = parser->mark;
1480  SKIP(parser);
1481  end_mark = parser->mark;
1482 
1483  /* Create the FLOW-SEQUENCE-START of FLOW-MAPPING-START token. */
1484 
1485  TOKEN_INIT(token, type, start_mark, end_mark);
1486 
1487  /* Append the token to the queue. */
1488 
1489  if (!ENQUEUE(parser, parser->tokens, token))
1490  return 0;
1491 
1492  return 1;
1493 }
1494 
1495 /*
1496  * Produce the FLOW-SEQUENCE-END or FLOW-MAPPING-END token.
1497  */
1498 
1499 static int
1502 {
1503  yaml_mark_t start_mark, end_mark;
1505 
1506  /* Reset any potential simple key on the current flow level. */
1507 
1508  if (!yaml_parser_remove_simple_key(parser))
1509  return 0;
1510 
1511  /* Decrease the flow level. */
1512 
1513  if (!yaml_parser_decrease_flow_level(parser))
1514  return 0;
1515 
1516  /* No simple keys after the indicators ']' and '}'. */
1517 
1518  parser->simple_key_allowed = 0;
1519 
1520  /* Consume the token. */
1521 
1522  start_mark = parser->mark;
1523  SKIP(parser);
1524  end_mark = parser->mark;
1525 
1526  /* Create the FLOW-SEQUENCE-END of FLOW-MAPPING-END token. */
1527 
1528  TOKEN_INIT(token, type, start_mark, end_mark);
1529 
1530  /* Append the token to the queue. */
1531 
1532  if (!ENQUEUE(parser, parser->tokens, token))
1533  return 0;
1534 
1535  return 1;
1536 }
1537 
1538 /*
1539  * Produce the FLOW-ENTRY token.
1540  */
1541 
1542 static int
1544 {
1545  yaml_mark_t start_mark, end_mark;
1547 
1548  /* Reset any potential simple keys on the current flow level. */
1549 
1550  if (!yaml_parser_remove_simple_key(parser))
1551  return 0;
1552 
1553  /* Simple keys are allowed after ','. */
1554 
1555  parser->simple_key_allowed = 1;
1556 
1557  /* Consume the token. */
1558 
1559  start_mark = parser->mark;
1560  SKIP(parser);
1561  end_mark = parser->mark;
1562 
1563  /* Create the FLOW-ENTRY token and append it to the queue. */
1564 
1565  TOKEN_INIT(token, YAML_FLOW_ENTRY_TOKEN, start_mark, end_mark);
1566 
1567  if (!ENQUEUE(parser, parser->tokens, token))
1568  return 0;
1569 
1570  return 1;
1571 }
1572 
1573 /*
1574  * Produce the BLOCK-ENTRY token.
1575  */
1576 
1577 static int
1579 {
1580  yaml_mark_t start_mark, end_mark;
1582 
1583  /* Check if the scanner is in the block context. */
1584 
1585  if (!parser->flow_level)
1586  {
1587  /* Check if we are allowed to start a new entry. */
1588 
1589  if (!parser->simple_key_allowed) {
1590  return yaml_parser_set_scanner_error(parser, NULL, parser->mark,
1591  "block sequence entries are not allowed in this context");
1592  }
1593 
1594  /* Add the BLOCK-SEQUENCE-START token if needed. */
1595 
1596  if (!yaml_parser_roll_indent(parser, parser->mark.column, -1,
1598  return 0;
1599  }
1600  else
1601  {
1602  /*
1603  * It is an error for the '-' indicator to occur in the flow context,
1604  * but we let the Parser detect and report about it because the Parser
1605  * is able to point to the context.
1606  */
1607  }
1608 
1609  /* Reset any potential simple keys on the current flow level. */
1610 
1611  if (!yaml_parser_remove_simple_key(parser))
1612  return 0;
1613 
1614  /* Simple keys are allowed after '-'. */
1615 
1616  parser->simple_key_allowed = 1;
1617 
1618  /* Consume the token. */
1619 
1620  start_mark = parser->mark;
1621  SKIP(parser);
1622  end_mark = parser->mark;
1623 
1624  /* Create the BLOCK-ENTRY token and append it to the queue. */
1625 
1626  TOKEN_INIT(token, YAML_BLOCK_ENTRY_TOKEN, start_mark, end_mark);
1627 
1628  if (!ENQUEUE(parser, parser->tokens, token))
1629  return 0;
1630 
1631  return 1;
1632 }
1633 
1634 /*
1635  * Produce the KEY token.
1636  */
1637 
1638 static int
1640 {
1641  yaml_mark_t start_mark, end_mark;
1643 
1644  /* In the block context, additional checks are required. */
1645 
1646  if (!parser->flow_level)
1647  {
1648  /* Check if we are allowed to start a new key (not nessesary simple). */
1649 
1650  if (!parser->simple_key_allowed) {
1651  return yaml_parser_set_scanner_error(parser, NULL, parser->mark,
1652  "mapping keys are not allowed in this context");
1653  }
1654 
1655  /* Add the BLOCK-MAPPING-START token if needed. */
1656 
1657  if (!yaml_parser_roll_indent(parser, parser->mark.column, -1,
1659  return 0;
1660  }
1661 
1662  /* Reset any potential simple keys on the current flow level. */
1663 
1664  if (!yaml_parser_remove_simple_key(parser))
1665  return 0;
1666 
1667  /* Simple keys are allowed after '?' in the block context. */
1668 
1669  parser->simple_key_allowed = (!parser->flow_level);
1670 
1671  /* Consume the token. */
1672 
1673  start_mark = parser->mark;
1674  SKIP(parser);
1675  end_mark = parser->mark;
1676 
1677  /* Create the KEY token and append it to the queue. */
1678 
1679  TOKEN_INIT(token, YAML_KEY_TOKEN, start_mark, end_mark);
1680 
1681  if (!ENQUEUE(parser, parser->tokens, token))
1682  return 0;
1683 
1684  return 1;
1685 }
1686 
1687 /*
1688  * Produce the VALUE token.
1689  */
1690 
1691 static int
1693 {
1694  yaml_mark_t start_mark, end_mark;
1696  yaml_simple_key_t *simple_key = parser->simple_keys.top-1;
1697 
1698  /* Have we found a simple key? */
1699 
1700  if (simple_key->possible)
1701  {
1702 
1703  /* Create the KEY token and insert it into the queue. */
1704 
1705  TOKEN_INIT(token, YAML_KEY_TOKEN, simple_key->mark, simple_key->mark);
1706 
1707  if (!QUEUE_INSERT(parser, parser->tokens,
1708  simple_key->token_number - parser->tokens_parsed, token))
1709  return 0;
1710 
1711  /* In the block context, we may need to add the BLOCK-MAPPING-START token. */
1712 
1713  if (!yaml_parser_roll_indent(parser, simple_key->mark.column,
1714  simple_key->token_number,
1715  YAML_BLOCK_MAPPING_START_TOKEN, simple_key->mark))
1716  return 0;
1717 
1718  /* Remove the simple key. */
1719 
1720  simple_key->possible = 0;
1721 
1722  /* A simple key cannot follow another simple key. */
1723 
1724  parser->simple_key_allowed = 0;
1725  }
1726  else
1727  {
1728  /* The ':' indicator follows a complex key. */
1729 
1730  /* In the block context, extra checks are required. */
1731 
1732  if (!parser->flow_level)
1733  {
1734  /* Check if we are allowed to start a complex value. */
1735 
1736  if (!parser->simple_key_allowed) {
1737  return yaml_parser_set_scanner_error(parser, NULL, parser->mark,
1738  "mapping values are not allowed in this context");
1739  }
1740 
1741  /* Add the BLOCK-MAPPING-START token if needed. */
1742 
1743  if (!yaml_parser_roll_indent(parser, parser->mark.column, -1,
1745  return 0;
1746  }
1747 
1748  /* Simple keys after ':' are allowed in the block context. */
1749 
1750  parser->simple_key_allowed = (!parser->flow_level);
1751  }
1752 
1753  /* Consume the token. */
1754 
1755  start_mark = parser->mark;
1756  SKIP(parser);
1757  end_mark = parser->mark;
1758 
1759  /* Create the VALUE token and append it to the queue. */
1760 
1761  TOKEN_INIT(token, YAML_VALUE_TOKEN, start_mark, end_mark);
1762 
1763  if (!ENQUEUE(parser, parser->tokens, token))
1764  return 0;
1765 
1766  return 1;
1767 }
1768 
1769 /*
1770  * Produce the ALIAS or ANCHOR token.
1771  */
1772 
1773 static int
1775 {
1777 
1778  /* An anchor or an alias could be a simple key. */
1779 
1780  if (!yaml_parser_save_simple_key(parser))
1781  return 0;
1782 
1783  /* A simple key cannot follow an anchor or an alias. */
1784 
1785  parser->simple_key_allowed = 0;
1786 
1787  /* Create the ALIAS or ANCHOR token and append it to the queue. */
1788 
1789  if (!yaml_parser_scan_anchor(parser, &token, type))
1790  return 0;
1791 
1792  if (!ENQUEUE(parser, parser->tokens, token)) {
1794  return 0;
1795  }
1796  return 1;
1797 }
1798 
1799 /*
1800  * Produce the TAG token.
1801  */
1802 
1803 static int
1805 {
1807 
1808  /* A tag could be a simple key. */
1809 
1810  if (!yaml_parser_save_simple_key(parser))
1811  return 0;
1812 
1813  /* A simple key cannot follow a tag. */
1814 
1815  parser->simple_key_allowed = 0;
1816 
1817  /* Create the TAG token and append it to the queue. */
1818 
1819  if (!yaml_parser_scan_tag(parser, &token))
1820  return 0;
1821 
1822  if (!ENQUEUE(parser, parser->tokens, token)) {
1824  return 0;
1825  }
1826 
1827  return 1;
1828 }
1829 
1830 /*
1831  * Produce the SCALAR(...,literal) or SCALAR(...,folded) tokens.
1832  */
1833 
1834 static int
1836 {
1838 
1839  /* Remove any potential simple keys. */
1840 
1841  if (!yaml_parser_remove_simple_key(parser))
1842  return 0;
1843 
1844  /* A simple key may follow a block scalar. */
1845 
1846  parser->simple_key_allowed = 1;
1847 
1848  /* Create the SCALAR token and append it to the queue. */
1849 
1850  if (!yaml_parser_scan_block_scalar(parser, &token, literal))
1851  return 0;
1852 
1853  if (!ENQUEUE(parser, parser->tokens, token)) {
1855  return 0;
1856  }
1857 
1858  return 1;
1859 }
1860 
1861 /*
1862  * Produce the SCALAR(...,single-quoted) or SCALAR(...,double-quoted) tokens.
1863  */
1864 
1865 static int
1867 {
1869 
1870  /* A plain scalar could be a simple key. */
1871 
1872  if (!yaml_parser_save_simple_key(parser))
1873  return 0;
1874 
1875  /* A simple key cannot follow a flow scalar. */
1876 
1877  parser->simple_key_allowed = 0;
1878 
1879  /* Create the SCALAR token and append it to the queue. */
1880 
1881  if (!yaml_parser_scan_flow_scalar(parser, &token, single))
1882  return 0;
1883 
1884  if (!ENQUEUE(parser, parser->tokens, token)) {
1886  return 0;
1887  }
1888 
1889  return 1;
1890 }
1891 
1892 /*
1893  * Produce the SCALAR(...,plain) token.
1894  */
1895 
1896 static int
1898 {
1900 
1901  /* A plain scalar could be a simple key. */
1902 
1903  if (!yaml_parser_save_simple_key(parser))
1904  return 0;
1905 
1906  /* A simple key cannot follow a flow scalar. */
1907 
1908  parser->simple_key_allowed = 0;
1909 
1910  /* Create the SCALAR token and append it to the queue. */
1911 
1912  if (!yaml_parser_scan_plain_scalar(parser, &token))
1913  return 0;
1914 
1915  if (!ENQUEUE(parser, parser->tokens, token)) {
1917  return 0;
1918  }
1919 
1920  return 1;
1921 }
1922 
1923 /*
1924  * Eat whitespaces and comments until the next token is found.
1925  */
1926 
1927 static int
1929 {
1930  /* Until the next token is not found. */
1931 
1932  while (1)
1933  {
1934  /* Allow the BOM mark to start a line. */
1935 
1936  if (!CACHE(parser, 1)) return 0;
1937 
1938  if (parser->mark.column == 0 && IS_BOM(parser->buffer))
1939  SKIP(parser);
1940 
1941  /*
1942  * Eat whitespaces.
1943  *
1944  * Tabs are allowed:
1945  *
1946  * - in the flow context;
1947  * - in the block context, but not at the beginning of the line or
1948  * after '-', '?', or ':' (complex value).
1949  */
1950 
1951  if (!CACHE(parser, 1)) return 0;
1952 
1953  while (CHECK(parser->buffer,' ') ||
1954  ((parser->flow_level || !parser->simple_key_allowed) &&
1955  CHECK(parser->buffer, '\t'))) {
1956  SKIP(parser);
1957  if (!CACHE(parser, 1)) return 0;
1958  }
1959 
1960  /* Eat a comment until a line break. */
1961 
1962  if (CHECK(parser->buffer, '#')) {
1963  while (!IS_BREAKZ(parser->buffer)) {
1964  SKIP(parser);
1965  if (!CACHE(parser, 1)) return 0;
1966  }
1967  }
1968 
1969  /* If it is a line break, eat it. */
1970 
1971  if (IS_BREAK(parser->buffer))
1972  {
1973  if (!CACHE(parser, 2)) return 0;
1974  SKIP_LINE(parser);
1975 
1976  /* In the block context, a new line may start a simple key. */
1977 
1978  if (!parser->flow_level) {
1979  parser->simple_key_allowed = 1;
1980  }
1981  }
1982  else
1983  {
1984  /* We have found a token. */
1985 
1986  break;
1987  }
1988  }
1989 
1990  return 1;
1991 }
1992 
1993 /*
1994  * Scan a YAML-DIRECTIVE or TAG-DIRECTIVE token.
1995  *
1996  * Scope:
1997  * %YAML 1.1 # a comment \n
1998  * ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
1999  * %TAG !yaml! tag:yaml.org,2002: \n
2000  * ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
2001  */
2002 
2003 int
2005 {
2006  yaml_mark_t start_mark, end_mark;
2007  yaml_char_t *name = NULL;
2008  int major, minor;
2009  yaml_char_t *handle = NULL, *prefix = NULL;
2010 
2011  /* Eat '%'. */
2012 
2013  start_mark = parser->mark;
2014 
2015  SKIP(parser);
2016 
2017  /* Scan the directive name. */
2018 
2019  if (!yaml_parser_scan_directive_name(parser, start_mark, &name))
2020  goto error;
2021 
2022  /* Is it a YAML directive? */
2023 
2024  if (strcmp((char *)name, "YAML") == 0)
2025  {
2026  /* Scan the VERSION directive value. */
2027 
2028  if (!yaml_parser_scan_version_directive_value(parser, start_mark,
2029  &major, &minor))
2030  goto error;
2031 
2032  end_mark = parser->mark;
2033 
2034  /* Create a VERSION-DIRECTIVE token. */
2035 
2037  start_mark, end_mark);
2038  }
2039 
2040  /* Is it a TAG directive? */
2041 
2042  else if (strcmp((char *)name, "TAG") == 0)
2043  {
2044  /* Scan the TAG directive value. */
2045 
2046  if (!yaml_parser_scan_tag_directive_value(parser, start_mark,
2047  &handle, &prefix))
2048  goto error;
2049 
2050  end_mark = parser->mark;
2051 
2052  /* Create a TAG-DIRECTIVE token. */
2053 
2054  TAG_DIRECTIVE_TOKEN_INIT(*token, handle, prefix,
2055  start_mark, end_mark);
2056  }
2057 
2058  /* Unknown directive. */
2059 
2060  else
2061  {
2062  yaml_parser_set_scanner_error(parser, "while scanning a directive",
2063  start_mark, "found uknown directive name");
2064  goto error;
2065  }
2066 
2067  /* Eat the rest of the line including any comments. */
2068 
2069  if (!CACHE(parser, 1)) goto error;
2070 
2071  while (IS_BLANK(parser->buffer)) {
2072  SKIP(parser);
2073  if (!CACHE(parser, 1)) goto error;
2074  }
2075 
2076  if (CHECK(parser->buffer, '#')) {
2077  while (!IS_BREAKZ(parser->buffer)) {
2078  SKIP(parser);
2079  if (!CACHE(parser, 1)) goto error;
2080  }
2081  }
2082 
2083  /* Check if we are at the end of the line. */
2084 
2085  if (!IS_BREAKZ(parser->buffer)) {
2086  yaml_parser_set_scanner_error(parser, "while scanning a directive",
2087  start_mark, "did not find expected comment or line break");
2088  goto error;
2089  }
2090 
2091  /* Eat a line break. */
2092 
2093  if (IS_BREAK(parser->buffer)) {
2094  if (!CACHE(parser, 2)) goto error;
2095  SKIP_LINE(parser);
2096  }
2097 
2098  yaml_free(name);
2099 
2100  return 1;
2101 
2102 error:
2103  yaml_free(prefix);
2104  yaml_free(handle);
2105  yaml_free(name);
2106  return 0;
2107 }
2108 
2109 /*
2110  * Scan the directive name.
2111  *
2112  * Scope:
2113  * %YAML 1.1 # a comment \n
2114  * ^^^^
2115  * %TAG !yaml! tag:yaml.org,2002: \n
2116  * ^^^
2117  */
2118 
2119 static int
2121  yaml_mark_t start_mark, yaml_char_t **name)
2122 {
2123  yaml_string_t string = NULL_STRING;
2124 
2125  if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error;
2126 
2127  /* Consume the directive name. */
2128 
2129  if (!CACHE(parser, 1)) goto error;
2130 
2131  while (IS_ALPHA(parser->buffer))
2132  {
2133  if (!READ(parser, string)) goto error;
2134  if (!CACHE(parser, 1)) goto error;
2135  }
2136 
2137  /* Check if the name is empty. */
2138 
2139  if (string.start == string.pointer) {
2140  yaml_parser_set_scanner_error(parser, "while scanning a directive",
2141  start_mark, "could not find expected directive name");
2142  goto error;
2143  }
2144 
2145  /* Check for an blank character after the name. */
2146 
2147  if (!IS_BLANKZ(parser->buffer)) {
2148  yaml_parser_set_scanner_error(parser, "while scanning a directive",
2149  start_mark, "found unexpected non-alphabetical character");
2150  goto error;
2151  }
2152 
2153  *name = string.start;
2154 
2155  return 1;
2156 
2157 error:
2158  STRING_DEL(parser, string);
2159  return 0;
2160 }
2161 
2162 /*
2163  * Scan the value of VERSION-DIRECTIVE.
2164  *
2165  * Scope:
2166  * %YAML 1.1 # a comment \n
2167  * ^^^^^^
2168  */
2169 
2170 static int
2172  yaml_mark_t start_mark, int *major, int *minor)
2173 {
2174  /* Eat whitespaces. */
2175 
2176  if (!CACHE(parser, 1)) return 0;
2177 
2178  while (IS_BLANK(parser->buffer)) {
2179  SKIP(parser);
2180  if (!CACHE(parser, 1)) return 0;
2181  }
2182 
2183  /* Consume the major version number. */
2184 
2185  if (!yaml_parser_scan_version_directive_number(parser, start_mark, major))
2186  return 0;
2187 
2188  /* Eat '.'. */
2189 
2190  if (!CHECK(parser->buffer, '.')) {
2191  return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive",
2192  start_mark, "did not find expected digit or '.' character");
2193  }
2194 
2195  SKIP(parser);
2196 
2197  /* Consume the minor version number. */
2198 
2199  if (!yaml_parser_scan_version_directive_number(parser, start_mark, minor))
2200  return 0;
2201 
2202  return 1;
2203 }
2204 
2205 #define MAX_NUMBER_LENGTH 9
2206 
2207 /*
2208  * Scan the version number of VERSION-DIRECTIVE.
2209  *
2210  * Scope:
2211  * %YAML 1.1 # a comment \n
2212  * ^
2213  * %YAML 1.1 # a comment \n
2214  * ^
2215  */
2216 
2217 static int
2219  yaml_mark_t start_mark, int *number)
2220 {
2221  int value = 0;
2222  size_t length = 0;
2223 
2224  /* Repeat while the next character is digit. */
2225 
2226  if (!CACHE(parser, 1)) return 0;
2227 
2228  while (IS_DIGIT(parser->buffer))
2229  {
2230  /* Check if the number is too long. */
2231 
2232  if (++length > MAX_NUMBER_LENGTH) {
2233  return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive",
2234  start_mark, "found extremely long version number");
2235  }
2236 
2237  value = value*10 + AS_DIGIT(parser->buffer);
2238 
2239  SKIP(parser);
2240 
2241  if (!CACHE(parser, 1)) return 0;
2242  }
2243 
2244  /* Check if the number was present. */
2245 
2246  if (!length) {
2247  return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive",
2248  start_mark, "did not find expected version number");
2249  }
2250 
2251  *number = value;
2252 
2253  return 1;
2254 }
2255 
2256 /*
2257  * Scan the value of a TAG-DIRECTIVE token.
2258  *
2259  * Scope:
2260  * %TAG !yaml! tag:yaml.org,2002: \n
2261  * ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
2262  */
2263 
2264 static int
2266  yaml_mark_t start_mark, yaml_char_t **handle, yaml_char_t **prefix)
2267 {
2268  yaml_char_t *handle_value = NULL;
2269  yaml_char_t *prefix_value = NULL;
2270 
2271  /* Eat whitespaces. */
2272 
2273  if (!CACHE(parser, 1)) goto error;
2274 
2275  while (IS_BLANK(parser->buffer)) {
2276  SKIP(parser);
2277  if (!CACHE(parser, 1)) goto error;
2278  }
2279 
2280  /* Scan a handle. */
2281 
2282  if (!yaml_parser_scan_tag_handle(parser, 1, start_mark, &handle_value))
2283  goto error;
2284 
2285  /* Expect a whitespace. */
2286 
2287  if (!CACHE(parser, 1)) goto error;
2288 
2289  if (!IS_BLANK(parser->buffer)) {
2290  yaml_parser_set_scanner_error(parser, "while scanning a %TAG directive",
2291  start_mark, "did not find expected whitespace");
2292  goto error;
2293  }
2294 
2295  /* Eat whitespaces. */
2296 
2297  while (IS_BLANK(parser->buffer)) {
2298  SKIP(parser);
2299  if (!CACHE(parser, 1)) goto error;
2300  }
2301 
2302  /* Scan a prefix. */
2303 
2304  if (!yaml_parser_scan_tag_uri(parser, 1, NULL, start_mark, &prefix_value))
2305  goto error;
2306 
2307  /* Expect a whitespace or line break. */
2308 
2309  if (!CACHE(parser, 1)) goto error;
2310 
2311  if (!IS_BLANKZ(parser->buffer)) {
2312  yaml_parser_set_scanner_error(parser, "while scanning a %TAG directive",
2313  start_mark, "did not find expected whitespace or line break");
2314  goto error;
2315  }
2316 
2317  *handle = handle_value;
2318  *prefix = prefix_value;
2319 
2320  return 1;
2321 
2322 error:
2323  yaml_free(handle_value);
2324  yaml_free(prefix_value);
2325  return 0;
2326 }
2327 
2328 static int
2331 {
2332  int length = 0;
2333  yaml_mark_t start_mark, end_mark;
2334  yaml_string_t string = NULL_STRING;
2335 
2336  if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error;
2337 
2338  /* Eat the indicator character. */
2339 
2340  start_mark = parser->mark;
2341 
2342  SKIP(parser);
2343 
2344  /* Consume the value. */
2345 
2346  if (!CACHE(parser, 1)) goto error;
2347 
2348  while (IS_ALPHA(parser->buffer)) {
2349  if (!READ(parser, string)) goto error;
2350  if (!CACHE(parser, 1)) goto error;
2351  length ++;
2352  }
2353 
2354  end_mark = parser->mark;
2355 
2356  /*
2357  * Check if length of the anchor is greater than 0 and it is followed by
2358  * a whitespace character or one of the indicators:
2359  *
2360  * '?', ':', ',', ']', '}', '%', '@', '`'.
2361  */
2362 
2363  if (!length || !(IS_BLANKZ(parser->buffer) || CHECK(parser->buffer, '?')
2364  || CHECK(parser->buffer, ':') || CHECK(parser->buffer, ',')
2365  || CHECK(parser->buffer, ']') || CHECK(parser->buffer, '}')
2366  || CHECK(parser->buffer, '%') || CHECK(parser->buffer, '@')
2367  || CHECK(parser->buffer, '`'))) {
2369  "while scanning an anchor" : "while scanning an alias", start_mark,
2370  "did not find expected alphabetic or numeric character");
2371  goto error;
2372  }
2373 
2374  /* Create a token. */
2375 
2376  if (type == YAML_ANCHOR_TOKEN) {
2377  ANCHOR_TOKEN_INIT(*token, string.start, start_mark, end_mark);
2378  }
2379  else {
2380  ALIAS_TOKEN_INIT(*token, string.start, start_mark, end_mark);
2381  }
2382 
2383  return 1;
2384 
2385 error:
2386  STRING_DEL(parser, string);
2387  return 0;
2388 }
2389 
2390 /*
2391  * Scan a TAG token.
2392  */
2393 
2394 static int
2396 {
2397  yaml_char_t *handle = NULL;
2398  yaml_char_t *suffix = NULL;
2399  yaml_mark_t start_mark, end_mark;
2400 
2401  start_mark = parser->mark;
2402 
2403  /* Check if the tag is in the canonical form. */
2404 
2405  if (!CACHE(parser, 2)) goto error;
2406 
2407  if (CHECK_AT(parser->buffer, '<', 1))
2408  {
2409  /* Set the handle to '' */
2410 
2411  handle = yaml_malloc(1);
2412  if (!handle) goto error;
2413  handle[0] = '\0';
2414 
2415  /* Eat '!<' */
2416 
2417  SKIP(parser);
2418  SKIP(parser);
2419 
2420  /* Consume the tag value. */
2421 
2422  if (!yaml_parser_scan_tag_uri(parser, 0, NULL, start_mark, &suffix))
2423  goto error;
2424 
2425  /* Check for '>' and eat it. */
2426 
2427  if (!CHECK(parser->buffer, '>')) {
2428  yaml_parser_set_scanner_error(parser, "while scanning a tag",
2429  start_mark, "did not find the expected '>'");
2430  goto error;
2431  }
2432 
2433  SKIP(parser);
2434  }
2435  else
2436  {
2437  /* The tag has either the '!suffix' or the '!handle!suffix' form. */
2438 
2439  /* First, try to scan a handle. */
2440 
2441  if (!yaml_parser_scan_tag_handle(parser, 0, start_mark, &handle))
2442  goto error;
2443 
2444  /* Check if it is, indeed, handle. */
2445 
2446  if (handle[0] == '!' && handle[1] != '\0' && handle[strlen((char *)handle)-1] == '!')
2447  {
2448  /* Scan the suffix now. */
2449 
2450  if (!yaml_parser_scan_tag_uri(parser, 0, NULL, start_mark, &suffix))
2451  goto error;
2452  }
2453  else
2454  {
2455  /* It wasn't a handle after all. Scan the rest of the tag. */
2456 
2457  if (!yaml_parser_scan_tag_uri(parser, 0, handle, start_mark, &suffix))
2458  goto error;
2459 
2460  /* Set the handle to '!'. */
2461 
2462  yaml_free(handle);
2463  handle = yaml_malloc(2);
2464  if (!handle) goto error;
2465  handle[0] = '!';
2466  handle[1] = '\0';
2467 
2468  /*
2469  * A special case: the '!' tag. Set the handle to '' and the
2470  * suffix to '!'.
2471  */
2472 
2473  if (suffix[0] == '\0') {
2474  yaml_char_t *tmp = handle;
2475  handle = suffix;
2476  suffix = tmp;
2477  }
2478  }
2479  }
2480 
2481  /* Check the character which ends the tag. */
2482 
2483  if (!CACHE(parser, 1)) goto error;
2484 
2485  if (!IS_BLANKZ(parser->buffer)) {
2486  yaml_parser_set_scanner_error(parser, "while scanning a tag",
2487  start_mark, "did not find expected whitespace or line break");
2488  goto error;
2489  }
2490 
2491  end_mark = parser->mark;
2492 
2493  /* Create a token. */
2494 
2495  TAG_TOKEN_INIT(*token, handle, suffix, start_mark, end_mark);
2496 
2497  return 1;
2498 
2499 error:
2500  yaml_free(handle);
2501  yaml_free(suffix);
2502  return 0;
2503 }
2504 
2505 /*
2506  * Scan a tag handle.
2507  */
2508 
2509 static int
2511  yaml_mark_t start_mark, yaml_char_t **handle)
2512 {
2513  yaml_string_t string = NULL_STRING;
2514 
2515  if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error;
2516 
2517  /* Check the initial '!' character. */
2518 
2519  if (!CACHE(parser, 1)) goto error;
2520 
2521  if (!CHECK(parser->buffer, '!')) {
2522  yaml_parser_set_scanner_error(parser, directive ?
2523  "while scanning a tag directive" : "while scanning a tag",
2524  start_mark, "did not find expected '!'");
2525  goto error;
2526  }
2527 
2528  /* Copy the '!' character. */
2529 
2530  if (!READ(parser, string)) goto error;
2531 
2532  /* Copy all subsequent alphabetical and numerical characters. */
2533 
2534  if (!CACHE(parser, 1)) goto error;
2535 
2536  while (IS_ALPHA(parser->buffer))
2537  {
2538  if (!READ(parser, string)) goto error;
2539  if (!CACHE(parser, 1)) goto error;
2540  }
2541 
2542  /* Check if the trailing character is '!' and copy it. */
2543 
2544  if (CHECK(parser->buffer, '!'))
2545  {
2546  if (!READ(parser, string)) goto error;
2547  }
2548  else
2549  {
2550  /*
2551  * It's either the '!' tag or not really a tag handle. If it's a %TAG
2552  * directive, it's an error. If it's a tag token, it must be a part of
2553  * URI.
2554  */
2555 
2556  if (directive && !(string.start[0] == '!' && string.start[1] == '\0')) {
2557  yaml_parser_set_scanner_error(parser, "while parsing a tag directive",
2558  start_mark, "did not find expected '!'");
2559  goto error;
2560  }
2561  }
2562 
2563  *handle = string.start;
2564 
2565  return 1;
2566 
2567 error:
2568  STRING_DEL(parser, string);
2569  return 0;
2570 }
2571 
2572 /*
2573  * Scan a tag.
2574  */
2575 
2576 static int
2578  yaml_char_t *head, yaml_mark_t start_mark, yaml_char_t **uri)
2579 {
2580  size_t length = head ? strlen((char *)head) : 0;
2581  yaml_string_t string = NULL_STRING;
2582 
2583  if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error;
2584 
2585  /* Resize the string to include the head. */
2586 
2587  while ((size_t)(string.end - string.start) <= length) {
2588  if (!yaml_string_extend(&string.start, &string.pointer, &string.end)) {
2589  parser->error = YAML_MEMORY_ERROR;
2590  goto error;
2591  }
2592  }
2593 
2594  /*
2595  * Copy the head if needed.
2596  *
2597  * Note that we don't copy the leading '!' character.
2598  */
2599 
2600  if (length > 1) {
2601  memcpy(string.start, head+1, length-1);
2602  string.pointer += length-1;
2603  }
2604 
2605  /* Scan the tag. */
2606 
2607  if (!CACHE(parser, 1)) goto error;
2608 
2609  /*
2610  * The set of characters that may appear in URI is as follows:
2611  *
2612  * '0'-'9', 'A'-'Z', 'a'-'z', '_', '-', ';', '/', '?', ':', '@', '&',
2613  * '=', '+', '$', ',', '.', '!', '~', '*', '\'', '(', ')', '[', ']',
2614  * '%'.
2615  */
2616 
2617  while (IS_ALPHA(parser->buffer) || CHECK(parser->buffer, ';')
2618  || CHECK(parser->buffer, '/') || CHECK(parser->buffer, '?')
2619  || CHECK(parser->buffer, ':') || CHECK(parser->buffer, '@')
2620  || CHECK(parser->buffer, '&') || CHECK(parser->buffer, '=')
2621  || CHECK(parser->buffer, '+') || CHECK(parser->buffer, '$')
2622  || CHECK(parser->buffer, ',') || CHECK(parser->buffer, '.')
2623  || CHECK(parser->buffer, '!') || CHECK(parser->buffer, '~')
2624  || CHECK(parser->buffer, '*') || CHECK(parser->buffer, '\'')
2625  || CHECK(parser->buffer, '(') || CHECK(parser->buffer, ')')
2626  || CHECK(parser->buffer, '[') || CHECK(parser->buffer, ']')
2627  || CHECK(parser->buffer, '%'))
2628  {
2629  /* Check if it is a URI-escape sequence. */
2630 
2631  if (CHECK(parser->buffer, '%')) {
2632  if (!STRING_EXTEND(parser, string))
2633  goto error;
2634 
2635  if (!yaml_parser_scan_uri_escapes(parser,
2636  directive, start_mark, &string)) goto error;
2637  }
2638  else {
2639  if (!READ(parser, string)) goto error;
2640  }
2641 
2642  length ++;
2643  if (!CACHE(parser, 1)) goto error;
2644  }
2645 
2646  /* Check if the tag is non-empty. */
2647 
2648  if (!length) {
2649  if (!STRING_EXTEND(parser, string))
2650  goto error;
2651 
2652  yaml_parser_set_scanner_error(parser, directive ?
2653  "while parsing a %TAG directive" : "while parsing a tag",
2654  start_mark, "did not find expected tag URI");
2655  goto error;
2656  }
2657 
2658  *uri = string.start;
2659 
2660  return 1;
2661 
2662 error:
2663  STRING_DEL(parser, string);
2664  return 0;
2665 }
2666 
2667 /*
2668  * Decode an URI-escape sequence corresponding to a single UTF-8 character.
2669  */
2670 
2671 static int
2673  yaml_mark_t start_mark, yaml_string_t *string)
2674 {
2675  int width = 0;
2676 
2677  /* Decode the required number of characters. */
2678 
2679  do {
2680 
2681  unsigned char octet = 0;
2682 
2683  /* Check for a URI-escaped octet. */
2684 
2685  if (!CACHE(parser, 3)) return 0;
2686 
2687  if (!(CHECK(parser->buffer, '%')
2688  && IS_HEX_AT(parser->buffer, 1)
2689  && IS_HEX_AT(parser->buffer, 2))) {
2690  return yaml_parser_set_scanner_error(parser, directive ?
2691  "while parsing a %TAG directive" : "while parsing a tag",
2692  start_mark, "did not find URI escaped octet");
2693  }
2694 
2695  /* Get the octet. */
2696 
2697  octet = (AS_HEX_AT(parser->buffer, 1) << 4) + AS_HEX_AT(parser->buffer, 2);
2698 
2699  /* If it is the leading octet, determine the length of the UTF-8 sequence. */
2700 
2701  if (!width)
2702  {
2703  width = (octet & 0x80) == 0x00 ? 1 :
2704  (octet & 0xE0) == 0xC0 ? 2 :
2705  (octet & 0xF0) == 0xE0 ? 3 :
2706  (octet & 0xF8) == 0xF0 ? 4 : 0;
2707  if (!width) {
2708  return yaml_parser_set_scanner_error(parser, directive ?
2709  "while parsing a %TAG directive" : "while parsing a tag",
2710  start_mark, "found an incorrect leading UTF-8 octet");
2711  }
2712  }
2713  else
2714  {
2715  /* Check if the trailing octet is correct. */
2716 
2717  if ((octet & 0xC0) != 0x80) {
2718  return yaml_parser_set_scanner_error(parser, directive ?
2719  "while parsing a %TAG directive" : "while parsing a tag",
2720  start_mark, "found an incorrect trailing UTF-8 octet");
2721  }
2722  }
2723 
2724  /* Copy the octet and move the pointers. */
2725 
2726  *(string->pointer++) = octet;
2727  SKIP(parser);
2728  SKIP(parser);
2729  SKIP(parser);
2730 
2731  } while (--width);
2732 
2733  return 1;
2734 }
2735 
2736 /*
2737  * Scan a block scalar.
2738  */
2739 
2740 static int
2742  int literal)
2743 {
2744  yaml_mark_t start_mark;
2745  yaml_mark_t end_mark;
2746  yaml_string_t string = NULL_STRING;
2747  yaml_string_t leading_break = NULL_STRING;
2748  yaml_string_t trailing_breaks = NULL_STRING;
2749  int chomping = 0;
2750  int increment = 0;
2751  int indent = 0;
2752  int leading_blank = 0;
2753  int trailing_blank = 0;
2754 
2755  if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error;
2756  if (!STRING_INIT(parser, leading_break, INITIAL_STRING_SIZE)) goto error;
2757  if (!STRING_INIT(parser, trailing_breaks, INITIAL_STRING_SIZE)) goto error;
2758 
2759  /* Eat the indicator '|' or '>'. */
2760 
2761  start_mark = parser->mark;
2762 
2763  SKIP(parser);
2764 
2765  /* Scan the additional block scalar indicators. */
2766 
2767  if (!CACHE(parser, 1)) goto error;
2768 
2769  /* Check for a chomping indicator. */
2770 
2771  if (CHECK(parser->buffer, '+') || CHECK(parser->buffer, '-'))
2772  {
2773  /* Set the chomping method and eat the indicator. */
2774 
2775  chomping = CHECK(parser->buffer, '+') ? +1 : -1;
2776 
2777  SKIP(parser);
2778 
2779  /* Check for an indentation indicator. */
2780 
2781  if (!CACHE(parser, 1)) goto error;
2782 
2783  if (IS_DIGIT(parser->buffer))
2784  {
2785  /* Check that the intendation is greater than 0. */
2786 
2787  if (CHECK(parser->buffer, '0')) {
2788  yaml_parser_set_scanner_error(parser, "while scanning a block scalar",
2789  start_mark, "found an intendation indicator equal to 0");
2790  goto error;
2791  }
2792 
2793  /* Get the intendation level and eat the indicator. */
2794 
2795  increment = AS_DIGIT(parser->buffer);
2796 
2797  SKIP(parser);
2798  }
2799  }
2800 
2801  /* Do the same as above, but in the opposite order. */
2802 
2803  else if (IS_DIGIT(parser->buffer))
2804  {
2805  if (CHECK(parser->buffer, '0')) {
2806  yaml_parser_set_scanner_error(parser, "while scanning a block scalar",
2807  start_mark, "found an intendation indicator equal to 0");
2808  goto error;
2809  }
2810 
2811  increment = AS_DIGIT(parser->buffer);
2812 
2813  SKIP(parser);
2814 
2815  if (!CACHE(parser, 1)) goto error;
2816 
2817  if (CHECK(parser->buffer, '+') || CHECK(parser->buffer, '-')) {
2818  chomping = CHECK(parser->buffer, '+') ? +1 : -1;
2819 
2820  SKIP(parser);
2821  }
2822  }
2823 
2824  /* Eat whitespaces and comments to the end of the line. */
2825 
2826  if (!CACHE(parser, 1)) goto error;
2827 
2828  while (IS_BLANK(parser->buffer)) {
2829  SKIP(parser);
2830  if (!CACHE(parser, 1)) goto error;
2831  }
2832 
2833  if (CHECK(parser->buffer, '#')) {
2834  while (!IS_BREAKZ(parser->buffer)) {
2835  SKIP(parser);
2836  if (!CACHE(parser, 1)) goto error;
2837  }
2838  }
2839 
2840  /* Check if we are at the end of the line. */
2841 
2842  if (!IS_BREAKZ(parser->buffer)) {
2843  yaml_parser_set_scanner_error(parser, "while scanning a block scalar",
2844  start_mark, "did not find expected comment or line break");
2845  goto error;
2846  }
2847 
2848  /* Eat a line break. */
2849 
2850  if (IS_BREAK(parser->buffer)) {
2851  if (!CACHE(parser, 2)) goto error;
2852  SKIP_LINE(parser);
2853  }
2854 
2855  end_mark = parser->mark;
2856 
2857  /* Set the intendation level if it was specified. */
2858 
2859  if (increment) {
2860  indent = parser->indent >= 0 ? parser->indent+increment : increment;
2861  }
2862 
2863  /* Scan the leading line breaks and determine the indentation level if needed. */
2864 
2865  if (!yaml_parser_scan_block_scalar_breaks(parser, &indent, &trailing_breaks,
2866  start_mark, &end_mark)) goto error;
2867 
2868  /* Scan the block scalar content. */
2869 
2870  if (!CACHE(parser, 1)) goto error;
2871 
2872  while ((int)parser->mark.column == indent && !IS_Z(parser->buffer))
2873  {
2874  /*
2875  * We are at the beginning of a non-empty line.
2876  */
2877 
2878  /* Is it a trailing whitespace? */
2879 
2880  trailing_blank = IS_BLANK(parser->buffer);
2881 
2882  /* Check if we need to fold the leading line break. */
2883 
2884  if (!literal && (*leading_break.start == '\n')
2885  && !leading_blank && !trailing_blank)
2886  {
2887  /* Do we need to join the lines by space? */
2888 
2889  if (*trailing_breaks.start == '\0') {
2890  if (!STRING_EXTEND(parser, string)) goto error;
2891  *(string.pointer ++) = ' ';
2892  }
2893 
2894  CLEAR(parser, leading_break);
2895  }
2896  else {
2897  if (!JOIN(parser, string, leading_break)) goto error;
2898  CLEAR(parser, leading_break);
2899  }
2900 
2901  /* Append the remaining line breaks. */
2902 
2903  if (!JOIN(parser, string, trailing_breaks)) goto error;
2904  CLEAR(parser, trailing_breaks);
2905 
2906  /* Is it a leading whitespace? */
2907 
2908  leading_blank = IS_BLANK(parser->buffer);
2909 
2910  /* Consume the current line. */
2911 
2912  while (!IS_BREAKZ(parser->buffer)) {
2913  if (!READ(parser, string)) goto error;
2914  if (!CACHE(parser, 1)) goto error;
2915  }
2916 
2917  /* Consume the line break. */
2918 
2919  if (!CACHE(parser, 2)) goto error;
2920 
2921  if (!READ_LINE(parser, leading_break)) goto error;
2922 
2923  /* Eat the following intendation spaces and line breaks. */
2924 
2926  &indent, &trailing_breaks, start_mark, &end_mark)) goto error;
2927  }
2928 
2929  /* Chomp the tail. */
2930 
2931  if (chomping != -1) {
2932  if (!JOIN(parser, string, leading_break)) goto error;
2933  }
2934  if (chomping == 1) {
2935  if (!JOIN(parser, string, trailing_breaks)) goto error;
2936  }
2937 
2938  /* Create a token. */
2939 
2940  SCALAR_TOKEN_INIT(*token, string.start, string.pointer-string.start,
2942  start_mark, end_mark);
2943 
2944  STRING_DEL(parser, leading_break);
2945  STRING_DEL(parser, trailing_breaks);
2946 
2947  return 1;
2948 
2949 error:
2950  STRING_DEL(parser, string);
2951  STRING_DEL(parser, leading_break);
2952  STRING_DEL(parser, trailing_breaks);
2953 
2954  return 0;
2955 }
2956 
2957 /*
2958  * Scan intendation spaces and line breaks for a block scalar. Determine the
2959  * intendation level if needed.
2960  */
2961 
2962 static int
2964  int *indent, yaml_string_t *breaks,
2965  yaml_mark_t start_mark, yaml_mark_t *end_mark)
2966 {
2967  int max_indent = 0;
2968 
2969  *end_mark = parser->mark;
2970 
2971  /* Eat the intendation spaces and line breaks. */
2972 
2973  while (1)
2974  {
2975  /* Eat the intendation spaces. */
2976 
2977  if (!CACHE(parser, 1)) return 0;
2978 
2979  while ((!*indent || (int)parser->mark.column < *indent)
2980  && IS_SPACE(parser->buffer)) {
2981  SKIP(parser);
2982  if (!CACHE(parser, 1)) return 0;
2983  }
2984 
2985  if ((int)parser->mark.column > max_indent)
2986  max_indent = (int)parser->mark.column;
2987 
2988  /* Check for a tab character messing the intendation. */
2989 
2990  if ((!*indent || (int)parser->mark.column < *indent)
2991  && IS_TAB(parser->buffer)) {
2992  return yaml_parser_set_scanner_error(parser, "while scanning a block scalar",
2993  start_mark, "found a tab character where an intendation space is expected");
2994  }
2995 
2996  /* Have we found a non-empty line? */
2997 
2998  if (!IS_BREAK(parser->buffer)) break;
2999 
3000  /* Consume the line break. */
3001 
3002  if (!CACHE(parser, 2)) return 0;
3003  if (!READ_LINE(parser, *breaks)) return 0;
3004  *end_mark = parser->mark;
3005  }
3006 
3007  /* Determine the indentation level if needed. */
3008 
3009  if (!*indent) {
3010  *indent = max_indent;
3011  if (*indent < parser->indent + 1)
3012  *indent = parser->indent + 1;
3013  if (*indent < 1)
3014  *indent = 1;
3015  }
3016 
3017  return 1;
3018 }
3019 
3020 /*
3021  * Scan a quoted scalar.
3022  */
3023 
3024 static int
3026  int single)
3027 {
3028  yaml_mark_t start_mark;
3029  yaml_mark_t end_mark;
3030  yaml_string_t string = NULL_STRING;
3031  yaml_string_t leading_break = NULL_STRING;
3032  yaml_string_t trailing_breaks = NULL_STRING;
3033  yaml_string_t whitespaces = NULL_STRING;
3034  int leading_blanks;
3035 
3036  if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error;
3037  if (!STRING_INIT(parser, leading_break, INITIAL_STRING_SIZE)) goto error;
3038  if (!STRING_INIT(parser, trailing_breaks, INITIAL_STRING_SIZE)) goto error;
3039  if (!STRING_INIT(parser, whitespaces, INITIAL_STRING_SIZE)) goto error;
3040 
3041  /* Eat the left quote. */
3042 
3043  start_mark = parser->mark;
3044 
3045  SKIP(parser);
3046 
3047  /* Consume the content of the quoted scalar. */
3048 
3049  while (1)
3050  {
3051  /* Check that there are no document indicators at the beginning of the line. */
3052 
3053  if (!CACHE(parser, 4)) goto error;
3054 
3055  if (parser->mark.column == 0 &&
3056  ((CHECK_AT(parser->buffer, '-', 0) &&
3057  CHECK_AT(parser->buffer, '-', 1) &&
3058  CHECK_AT(parser->buffer, '-', 2)) ||
3059  (CHECK_AT(parser->buffer, '.', 0) &&
3060  CHECK_AT(parser->buffer, '.', 1) &&
3061  CHECK_AT(parser->buffer, '.', 2))) &&
3062  IS_BLANKZ_AT(parser->buffer, 3))
3063  {
3064  yaml_parser_set_scanner_error(parser, "while scanning a quoted scalar",
3065  start_mark, "found unexpected document indicator");
3066  goto error;
3067  }
3068 
3069  /* Check for EOF. */
3070 
3071  if (IS_Z(parser->buffer)) {
3072  yaml_parser_set_scanner_error(parser, "while scanning a quoted scalar",
3073  start_mark, "found unexpected end of stream");
3074  goto error;
3075  }
3076 
3077  /* Consume non-blank characters. */
3078 
3079  if (!CACHE(parser, 2)) goto error;
3080 
3081  leading_blanks = 0;
3082 
3083  while (!IS_BLANKZ(parser->buffer))
3084  {
3085  /* Check for an escaped single quote. */
3086 
3087  if (single && CHECK_AT(parser->buffer, '\'', 0)
3088  && CHECK_AT(parser->buffer, '\'', 1))
3089  {
3090  if (!STRING_EXTEND(parser, string)) goto error;
3091  *(string.pointer++) = '\'';
3092  SKIP(parser);
3093  SKIP(parser);
3094  }
3095 
3096  /* Check for the right quote. */
3097 
3098  else if (CHECK(parser->buffer, single ? '\'' : '"'))
3099  {
3100  break;
3101  }
3102 
3103  /* Check for an escaped line break. */
3104 
3105  else if (!single && CHECK(parser->buffer, '\\')
3106  && IS_BREAK_AT(parser->buffer, 1))
3107  {
3108  if (!CACHE(parser, 3)) goto error;
3109  SKIP(parser);
3110  SKIP_LINE(parser);
3111  leading_blanks = 1;
3112  break;
3113  }
3114 
3115  /* Check for an escape sequence. */
3116 
3117  else if (!single && CHECK(parser->buffer, '\\'))
3118  {
3119  size_t code_length = 0;
3120 
3121  if (!STRING_EXTEND(parser, string)) goto error;
3122 
3123  /* Check the escape character. */
3124 
3125  switch (parser->buffer.pointer[1])
3126  {
3127  case '0':
3128  *(string.pointer++) = '\0';
3129  break;
3130 
3131  case 'a':
3132  *(string.pointer++) = '\x07';
3133  break;
3134 
3135  case 'b':
3136  *(string.pointer++) = '\x08';
3137  break;
3138 
3139  case 't':
3140  case '\t':
3141  *(string.pointer++) = '\x09';
3142  break;
3143 
3144  case 'n':
3145  *(string.pointer++) = '\x0A';
3146  break;
3147 
3148  case 'v':
3149  *(string.pointer++) = '\x0B';
3150  break;
3151 
3152  case 'f':
3153  *(string.pointer++) = '\x0C';
3154  break;
3155 
3156  case 'r':
3157  *(string.pointer++) = '\x0D';
3158  break;
3159 
3160  case 'e':
3161  *(string.pointer++) = '\x1B';
3162  break;
3163 
3164  case ' ':
3165  *(string.pointer++) = '\x20';
3166  break;
3167 
3168  case '"':
3169  *(string.pointer++) = '"';
3170  break;
3171 
3172  case '\'':
3173  *(string.pointer++) = '\'';
3174  break;
3175 
3176  case '\\':
3177  *(string.pointer++) = '\\';
3178  break;
3179 
3180  case 'N': /* NEL (#x85) */
3181  *(string.pointer++) = '\xC2';
3182  *(string.pointer++) = '\x85';
3183  break;
3184 
3185  case '_': /* #xA0 */
3186  *(string.pointer++) = '\xC2';
3187  *(string.pointer++) = '\xA0';
3188  break;
3189 
3190  case 'L': /* LS (#x2028) */
3191  *(string.pointer++) = '\xE2';
3192  *(string.pointer++) = '\x80';
3193  *(string.pointer++) = '\xA8';
3194  break;
3195 
3196  case 'P': /* PS (#x2029) */
3197  *(string.pointer++) = '\xE2';
3198  *(string.pointer++) = '\x80';
3199  *(string.pointer++) = '\xA9';
3200  break;
3201 
3202  case 'x':
3203  code_length = 2;
3204  break;
3205 
3206  case 'u':
3207  code_length = 4;
3208  break;
3209 
3210  case 'U':
3211  code_length = 8;
3212  break;
3213 
3214  default:
3215  yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar",
3216  start_mark, "found unknown escape character");
3217  goto error;
3218  }
3219 
3220  SKIP(parser);
3221  SKIP(parser);
3222 
3223  /* Consume an arbitrary escape code. */
3224 
3225  if (code_length)
3226  {
3227  unsigned int value = 0;
3228  size_t k;
3229 
3230  /* Scan the character value. */
3231 
3232  if (!CACHE(parser, code_length)) goto error;
3233 
3234  for (k = 0; k < code_length; k ++) {
3235  if (!IS_HEX_AT(parser->buffer, k)) {
3236  yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar",
3237  start_mark, "did not find expected hexdecimal number");
3238  goto error;
3239  }
3240  value = (value << 4) + AS_HEX_AT(parser->buffer, k);
3241  }
3242 
3243  /* Check the value and write the character. */
3244 
3245  if ((value >= 0xD800 && value <= 0xDFFF) || value > 0x10FFFF) {
3246  yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar",
3247  start_mark, "found invalid Unicode character escape code");
3248  goto error;
3249  }
3250 
3251  if (value <= 0x7F) {
3252  *(string.pointer++) = value;
3253  }
3254  else if (value <= 0x7FF) {
3255  *(string.pointer++) = 0xC0 + (value >> 6);
3256  *(string.pointer++) = 0x80 + (value & 0x3F);
3257  }
3258  else if (value <= 0xFFFF) {
3259  *(string.pointer++) = 0xE0 + (value >> 12);
3260  *(string.pointer++) = 0x80 + ((value >> 6) & 0x3F);
3261  *(string.pointer++) = 0x80 + (value & 0x3F);
3262  }
3263  else {
3264  *(string.pointer++) = 0xF0 + (value >> 18);
3265  *(string.pointer++) = 0x80 + ((value >> 12) & 0x3F);
3266  *(string.pointer++) = 0x80 + ((value >> 6) & 0x3F);
3267  *(string.pointer++) = 0x80 + (value & 0x3F);
3268  }
3269 
3270  /* Advance the pointer. */
3271 
3272  for (k = 0; k < code_length; k ++) {
3273  SKIP(parser);
3274  }
3275  }
3276  }
3277 
3278  else
3279  {
3280  /* It is a non-escaped non-blank character. */
3281 
3282  if (!READ(parser, string)) goto error;
3283  }
3284 
3285  if (!CACHE(parser, 2)) goto error;
3286  }
3287 
3288  /* Check if we are at the end of the scalar. */
3289 
3290  if (CHECK(parser->buffer, single ? '\'' : '"'))
3291  break;
3292 
3293  /* Consume blank characters. */
3294 
3295  if (!CACHE(parser, 1)) goto error;
3296 
3297  while (IS_BLANK(parser->buffer) || IS_BREAK(parser->buffer))
3298  {
3299  if (IS_BLANK(parser->buffer))
3300  {
3301  /* Consume a space or a tab character. */
3302 
3303  if (!leading_blanks) {
3304  if (!READ(parser, whitespaces)) goto error;
3305  }
3306  else {
3307  SKIP(parser);
3308  }
3309  }
3310  else
3311  {
3312  if (!CACHE(parser, 2)) goto error;
3313 
3314  /* Check if it is a first line break. */
3315 
3316  if (!leading_blanks)
3317  {
3318  CLEAR(parser, whitespaces);
3319  if (!READ_LINE(parser, leading_break)) goto error;
3320  leading_blanks = 1;
3321  }
3322  else
3323  {
3324  if (!READ_LINE(parser, trailing_breaks)) goto error;
3325  }
3326  }
3327  if (!CACHE(parser, 1)) goto error;
3328  }
3329 
3330  /* Join the whitespaces or fold line breaks. */
3331 
3332  if (leading_blanks)
3333  {
3334  /* Do we need to fold line breaks? */
3335 
3336  if (leading_break.start[0] == '\n') {
3337  if (trailing_breaks.start[0] == '\0') {
3338  if (!STRING_EXTEND(parser, string)) goto error;
3339  *(string.pointer++) = ' ';
3340  }
3341  else {
3342  if (!JOIN(parser, string, trailing_breaks)) goto error;
3343  CLEAR(parser, trailing_breaks);
3344  }
3345  CLEAR(parser, leading_break);
3346  }
3347  else {
3348  if (!JOIN(parser, string, leading_break)) goto error;
3349  if (!JOIN(parser, string, trailing_breaks)) goto error;
3350  CLEAR(parser, leading_break);
3351  CLEAR(parser, trailing_breaks);
3352  }
3353  }
3354  else
3355  {
3356  if (!JOIN(parser, string, whitespaces)) goto error;
3357  CLEAR(parser, whitespaces);
3358  }
3359  }
3360 
3361  /* Eat the right quote. */
3362 
3363  SKIP(parser);
3364 
3365  end_mark = parser->mark;
3366 
3367  /* Create a token. */
3368 
3369  SCALAR_TOKEN_INIT(*token, string.start, string.pointer-string.start,
3371  start_mark, end_mark);
3372 
3373  STRING_DEL(parser, leading_break);
3374  STRING_DEL(parser, trailing_breaks);
3375  STRING_DEL(parser, whitespaces);
3376 
3377  return 1;
3378 
3379 error:
3380  STRING_DEL(parser, string);
3381  STRING_DEL(parser, leading_break);
3382  STRING_DEL(parser, trailing_breaks);
3383  STRING_DEL(parser, whitespaces);
3384 
3385  return 0;
3386 }
3387 
3388 /*
3389  * Scan a plain scalar.
3390  */
3391 
3392 static int
3394 {
3395  yaml_mark_t start_mark;
3396  yaml_mark_t end_mark;
3397  yaml_string_t string = NULL_STRING;
3398  yaml_string_t leading_break = NULL_STRING;
3399  yaml_string_t trailing_breaks = NULL_STRING;
3400  yaml_string_t whitespaces = NULL_STRING;
3401  int leading_blanks = 0;
3402  int indent = parser->indent+1;
3403 
3404  if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error;
3405  if (!STRING_INIT(parser, leading_break, INITIAL_STRING_SIZE)) goto error;
3406  if (!STRING_INIT(parser, trailing_breaks, INITIAL_STRING_SIZE)) goto error;
3407  if (!STRING_INIT(parser, whitespaces, INITIAL_STRING_SIZE)) goto error;
3408 
3409  start_mark = end_mark = parser->mark;
3410 
3411  /* Consume the content of the plain scalar. */
3412 
3413  while (1)
3414  {
3415  /* Check for a document indicator. */
3416 
3417  if (!CACHE(parser, 4)) goto error;
3418 
3419  if (parser->mark.column == 0 &&
3420  ((CHECK_AT(parser->buffer, '-', 0) &&
3421  CHECK_AT(parser->buffer, '-', 1) &&
3422  CHECK_AT(parser->buffer, '-', 2)) ||
3423  (CHECK_AT(parser->buffer, '.', 0) &&
3424  CHECK_AT(parser->buffer, '.', 1) &&
3425  CHECK_AT(parser->buffer, '.', 2))) &&
3426  IS_BLANKZ_AT(parser->buffer, 3)) break;
3427 
3428  /* Check for a comment. */
3429 
3430  if (CHECK(parser->buffer, '#'))
3431  break;
3432 
3433  /* Consume non-blank characters. */
3434 
3435  while (!IS_BLANKZ(parser->buffer))
3436  {
3437  /* Check for 'x:x' in the flow context. TODO: Fix the test "spec-08-13". */
3438 
3439  if (parser->flow_level
3440  && CHECK(parser->buffer, ':')
3441  && !IS_BLANKZ_AT(parser->buffer, 1)) {
3442  yaml_parser_set_scanner_error(parser, "while scanning a plain scalar",
3443  start_mark, "found unexpected ':'");
3444  goto error;
3445  }
3446 
3447  /* Check for indicators that may end a plain scalar. */
3448 
3449  if ((CHECK(parser->buffer, ':') && IS_BLANKZ_AT(parser->buffer, 1))
3450  || (parser->flow_level &&
3451  (CHECK(parser->buffer, ',') || CHECK(parser->buffer, ':')
3452  || CHECK(parser->buffer, '?') || CHECK(parser->buffer, '[')
3453  || CHECK(parser->buffer, ']') || CHECK(parser->buffer, '{')
3454  || CHECK(parser->buffer, '}'))))
3455  break;
3456 
3457  /* Check if we need to join whitespaces and breaks. */
3458 
3459  if (leading_blanks || whitespaces.start != whitespaces.pointer)
3460  {
3461  if (leading_blanks)
3462  {
3463  /* Do we need to fold line breaks? */
3464 
3465  if (leading_break.start[0] == '\n') {
3466  if (trailing_breaks.start[0] == '\0') {
3467  if (!STRING_EXTEND(parser, string)) goto error;
3468  *(string.pointer++) = ' ';
3469  }
3470  else {
3471  if (!JOIN(parser, string, trailing_breaks)) goto error;
3472  CLEAR(parser, trailing_breaks);
3473  }
3474  CLEAR(parser, leading_break);
3475  }
3476  else {
3477  if (!JOIN(parser, string, leading_break)) goto error;
3478  if (!JOIN(parser, string, trailing_breaks)) goto error;
3479  CLEAR(parser, leading_break);
3480  CLEAR(parser, trailing_breaks);
3481  }
3482 
3483  leading_blanks = 0;
3484  }
3485  else
3486  {
3487  if (!JOIN(parser, string, whitespaces)) goto error;
3488  CLEAR(parser, whitespaces);
3489  }
3490  }
3491 
3492  /* Copy the character. */
3493 
3494  if (!READ(parser, string)) goto error;
3495 
3496  end_mark = parser->mark;
3497 
3498  if (!CACHE(parser, 2)) goto error;
3499  }
3500 
3501  /* Is it the end? */
3502 
3503  if (!(IS_BLANK(parser->buffer) || IS_BREAK(parser->buffer)))
3504  break;
3505 
3506  /* Consume blank characters. */
3507 
3508  if (!CACHE(parser, 1)) goto error;
3509 
3510  while (IS_BLANK(parser->buffer) || IS_BREAK(parser->buffer))
3511  {
3512  if (IS_BLANK(parser->buffer))
3513  {
3514  /* Check for tab character that abuse intendation. */
3515 
3516  if (leading_blanks && (int)parser->mark.column < indent
3517  && IS_TAB(parser->buffer)) {
3518  yaml_parser_set_scanner_error(parser, "while scanning a plain scalar",
3519  start_mark, "found a tab character that violate intendation");
3520  goto error;
3521  }
3522 
3523  /* Consume a space or a tab character. */
3524 
3525  if (!leading_blanks) {
3526  if (!READ(parser, whitespaces)) goto error;
3527  }
3528  else {
3529  SKIP(parser);
3530  }
3531  }
3532  else
3533  {
3534  if (!CACHE(parser, 2)) goto error;
3535 
3536  /* Check if it is a first line break. */
3537 
3538  if (!leading_blanks)
3539  {
3540  CLEAR(parser, whitespaces);
3541  if (!READ_LINE(parser, leading_break)) goto error;
3542  leading_blanks = 1;
3543  }
3544  else
3545  {
3546  if (!READ_LINE(parser, trailing_breaks)) goto error;
3547  }
3548  }
3549  if (!CACHE(parser, 1)) goto error;
3550  }
3551 
3552  /* Check intendation level. */
3553 
3554  if (!parser->flow_level && (int)parser->mark.column < indent)
3555  break;
3556  }
3557 
3558  /* Create a token. */
3559 
3560  SCALAR_TOKEN_INIT(*token, string.start, string.pointer-string.start,
3561  YAML_PLAIN_SCALAR_STYLE, start_mark, end_mark);
3562 
3563  /* Note that we change the 'simple_key_allowed' flag. */
3564 
3565  if (leading_blanks) {
3566  parser->simple_key_allowed = 1;
3567  }
3568 
3569  STRING_DEL(parser, leading_break);
3570  STRING_DEL(parser, trailing_breaks);
3571  STRING_DEL(parser, whitespaces);
3572 
3573  return 1;
3574 
3575 error:
3576  STRING_DEL(parser, string);
3577  STRING_DEL(parser, leading_break);
3578  STRING_DEL(parser, trailing_breaks);
3579  STRING_DEL(parser, whitespaces);
3580 
3581  return 0;
3582 }
3583 
The double-quoted scalar style.
Definition: yaml.h:174
#define PUSH(x)
Definition: bigdecimal.c:64
A BLOCK-SEQUENCE-START token.
Definition: yaml.h:232
The pointer position.
Definition: yaml.h:145
static int yaml_parser_scan_directive(yaml_parser_t *parser, yaml_token_t *token)
Definition: scanner.c:2004
static int yaml_parser_fetch_directive(yaml_parser_t *parser)
Definition: scanner.c:1376
size_t strlen(const char *)
yaml_token_t * tail
The tail of the tokens queue.
Definition: yaml.h:1200
int minor
Definition: tcltklib.c:110
struct yaml_parser_s::@38 buffer
The working buffer.
if(dispIdMember==DISPID_VALUE)
Definition: win32ole.c:791
A FLOW-SEQUENCE-START token.
Definition: yaml.h:239
#define NULL_STRING
Definition: yaml_private.h:126
A VALUE token.
Definition: yaml.h:254
#define IS_BOM(string)
Definition: yaml_private.h:301
static int yaml_parser_scan_flow_scalar(yaml_parser_t *parser, yaml_token_t *token, int single)
Definition: scanner.c:3025
Cannot allocate or reallocate a block of memory.
Definition: yaml.h:127
static int yaml_parser_scan_tag(yaml_parser_t *parser, yaml_token_t *token)
Definition: scanner.c:2395
#define ALIAS_TOKEN_INIT(token, token_value, start_mark, end_mark)
Definition: yaml_private.h:516
static int yaml_parser_fetch_stream_start(yaml_parser_t *parser)
Definition: scanner.c:1300
A BLOCK-END token.
Definition: yaml.h:236
yaml_string_extend(yaml_char_t **start, yaml_char_t **pointer, yaml_char_t **end)
Definition: api.c:74
#define AS_DIGIT(string)
Definition: yaml_private.h:220
struct yaml_parser_s::@40 tokens
The tokens queue.
int stream_start_produced
Have we started to scan the input stream?
Definition: yaml.h:1183
static int yaml_parser_fetch_block_scalar(yaml_parser_t *parser, int literal)
Definition: scanner.c:1835
static int yaml_parser_fetch_block_entry(yaml_parser_t *parser)
Definition: scanner.c:1578
yaml_encoding_t encoding
The input encoding.
Definition: yaml.h:1165
The parser structure.
Definition: yaml.h:1081
A BLOCK-SEQUENCE-END token.
Definition: yaml.h:234
#define SKIP(parser)
Definition: scanner.c:494
A FLOW-ENTRY token.
Definition: yaml.h:250
#define READ(parser, string)
Definition: scanner.c:518
static int yaml_parser_fetch_flow_collection_end(yaml_parser_t *parser, yaml_token_type_t type)
Definition: scanner.c:1500
static int yaml_parser_scan_tag_directive_value(yaml_parser_t *parser, yaml_mark_t mark, yaml_char_t **handle, yaml_char_t **prefix)
Definition: scanner.c:2265
static int yaml_parser_scan_block_scalar(yaml_parser_t *parser, yaml_token_t *token, int literal)
Definition: scanner.c:2741
unsigned char yaml_char_t
The character type (UTF-8 octet).
Definition: yaml.h:78
const unsigned char * start
The string start pointer.
Definition: yaml.h:1123
const char * context
The error context.
Definition: yaml.h:1099
yaml_mark_t mark
The position mark.
Definition: yaml.h:1004
#define IS_BREAK_AT(string, offset)
Definition: yaml_private.h:332
yaml_char_t * pointer
Definition: yaml_private.h:114
static int yaml_parser_fetch_flow_scalar(yaml_parser_t *parser, int single)
Definition: scanner.c:1866
#define READ_LINE(parser, string)
Definition: scanner.c:530
int indent
The current indentation level.
Definition: yaml.h:1220
#define IS_BLANK(string)
Definition: yaml_private.h:326
static int yaml_parser_scan_version_directive_value(yaml_parser_t *parser, yaml_mark_t start_mark, int *major, int *minor)
Definition: scanner.c:2171
#define TAG_DIRECTIVE_TOKEN_INIT(token, token_handle, token_prefix, start_mark, end_mark)
Definition: yaml_private.h:540
static int yaml_parser_fetch_flow_collection_start(yaml_parser_t *parser, yaml_token_type_t type)
Definition: scanner.c:1457
yaml_char_t * pointer
The current position of the buffer.
Definition: yaml.h:1144
An ALIAS token.
Definition: yaml.h:257
yaml_mark_t mark
The mark of the current position.
Definition: yaml.h:1171
#define head
Definition: st.c:107
#define IS_HEX_AT(string, offset)
Definition: yaml_private.h:226
yaml_char_t * start
Definition: yaml_private.h:112
static int yaml_parser_remove_simple_key(yaml_parser_t *parser)
Definition: scanner.c:1141
#define DEQUEUE(context, queue)
Definition: yaml_private.h:484
A FLOW-SEQUENCE-END token.
Definition: yaml.h:241
#define INITIAL_STRING_SIZE
Definition: yaml_private.h:89
#define STRING_DEL(context, string)
Definition: yaml_private.h:144
#define IS_SPACE(string)
Definition: yaml_private.h:309
yaml_free(void *ptr)
Definition: api.c:51
The folded scalar style.
Definition: yaml.h:179
#define AS_HEX_AT(string, offset)
Definition: yaml_private.h:240
int * top
The top of the stack.
Definition: yaml.h:1216
#define QUEUE_INSERT(context, queue, index, value)
Definition: yaml_private.h:487
static int yaml_parser_fetch_next_token(yaml_parser_t *parser)
Definition: scanner.c:860
int required
Is a simple key required?
Definition: yaml.h:998
int simple_key_allowed
May a simple key occur at the current position?
Definition: yaml.h:1223
#define CACHE(parser, length)
Definition: scanner.c:485
#define IS_TAB(string)
Definition: yaml_private.h:317
static int yaml_parser_fetch_document_indicator(yaml_parser_t *parser, yaml_token_type_t type)
Definition: scanner.c:1412
static int yaml_parser_scan_uri_escapes(yaml_parser_t *parser, int directive, yaml_mark_t start_mark, yaml_string_t *string)
Definition: scanner.c:2672
static int yaml_parser_unroll_indent(yaml_parser_t *parser, ptrdiff_t column)
Definition: scanner.c:1267
static int yaml_parser_fetch_stream_end(yaml_parser_t *parser)
Definition: scanner.c:1338
static int yaml_parser_set_scanner_error(yaml_parser_t *parser, const char *context, yaml_mark_t context_mark, const char *problem)
Definition: scanner.c:782
#define IS_BREAKZ(string)
Definition: yaml_private.h:358
#define CHECK(string, octet)
Definition: yaml_private.h:184
#define IS_ALPHA(string)
Definition: yaml_private.h:201
static int yaml_parser_scan_block_scalar_breaks(yaml_parser_t *parser, int *indent, yaml_string_t *breaks, yaml_mark_t start_mark, yaml_mark_t *end_mark)
Definition: scanner.c:2963
yaml_token_delete(yaml_token_t *token)
Free any memory allocated for a token object.
Definition: api.c:578
#define TOKEN_INIT(token, token_type, token_start_mark, token_end_mark)
Definition: yaml_private.h:503
static int yaml_parser_fetch_tag(yaml_parser_t *parser)
Definition: scanner.c:1804
struct yaml_parser_s::@41 indents
The indentation levels stack.
#define YAML_DECLARE(type)
The public API declaration.
Definition: yaml.h:38
ID token
Definition: ripper.c:16487
A FLOW-MAPPING-START token.
Definition: yaml.h:243
static int yaml_parser_decrease_flow_level(yaml_parser_t *parser)
Definition: scanner.c:1194
#define TAG_TOKEN_INIT(token, token_handle, token_suffix, start_mark, end_mark)
Definition: yaml_private.h:524
#define IS_BLANK_AT(string, offset)
Definition: yaml_private.h:323
yaml_error_type_t error
Error type.
Definition: yaml.h:1089
#define IS_DIGIT(string)
Definition: yaml_private.h:211
static int yaml_parser_scan_tag_handle(yaml_parser_t *parser, int directive, yaml_mark_t start_mark, yaml_char_t **handle)
Definition: scanner.c:2510
#define STREAM_START_TOKEN_INIT(token, token_encoding, start_mark, end_mark)
Definition: yaml_private.h:509
static int yaml_parser_fetch_value(yaml_parser_t *parser)
Definition: scanner.c:1692
#define CHECK_AT(string, octet, offset)
Definition: yaml_private.h:177
size_t token_number
The number of the token.
Definition: yaml.h:1001
static int yaml_parser_fetch_flow_entry(yaml_parser_t *parser)
Definition: scanner.c:1543
static int yaml_parser_scan_plain_scalar(yaml_parser_t *parser, yaml_token_t *token)
Definition: scanner.c:3393
static int yaml_parser_increase_flow_level(yaml_parser_t *parser)
Definition: scanner.c:1168
int type
Definition: tcltklib.c:111
A STREAM-END token.
Definition: yaml.h:220
enum yaml_token_type_e yaml_token_type_t
Token types.
yaml_token_t * head
The head of the tokens queue.
Definition: yaml.h:1198
#define ENQUEUE(context, queue, value)
Definition: yaml_private.h:475
An ANCHOR token.
Definition: yaml.h:259
#define JOIN(context, string_a, string_b)
Definition: yaml_private.h:160
#define ANCHOR_TOKEN_INIT(token, token_value, start_mark, end_mark)
Definition: yaml_private.h:520
#define SCALAR_TOKEN_INIT(token, token_value, token_length, token_style, start_mark, end_mark)
Definition: yaml_private.h:529
yaml_mark_t context_mark
The context position.
Definition: yaml.h:1101
static int yaml_parser_fetch_anchor(yaml_parser_t *parser, yaml_token_type_t type)
Definition: scanner.c:1774
struct yaml_parser_s::@42 simple_keys
The stack of simple keys.
A FLOW-MAPPING-END token.
Definition: yaml.h:245
yaml_mark_t problem_mark
The problem position.
Definition: yaml.h:1097
size_t tokens_parsed
The number of tokens fetched from the queue.
Definition: yaml.h:1204
static int yaml_parser_save_simple_key(yaml_parser_t *parser)
Definition: scanner.c:1097
#define STRING_EXTEND(context, string)
Definition: yaml_private.h:148
#define IS_BLANKZ_AT(string, offset)
Definition: yaml_private.h:373
A DOCUMENT-START token.
Definition: yaml.h:227
static int yaml_parser_scan_to_next_token(yaml_parser_t *parser)
Definition: scanner.c:1928
#define STRING_INIT(context, string, size)
Definition: yaml_private.h:135
#define STREAM_END_TOKEN_INIT(token, start_mark, end_mark)
Definition: yaml_private.h:513
static int yaml_parser_scan_tag_uri(yaml_parser_t *parser, int directive, yaml_char_t *head, yaml_mark_t start_mark, yaml_char_t **uri)
Definition: scanner.c:2577
This structure holds information about a potential simple key.
Definition: yaml.h:993
#define MAX_NUMBER_LENGTH
Definition: scanner.c:2205
The plain scalar style.
Definition: yaml.h:169
The literal scalar style.
Definition: yaml.h:177
static int yaml_parser_scan_anchor(yaml_parser_t *parser, yaml_token_t *token, yaml_token_type_t type)
Definition: scanner.c:2329
size_t line
The position line.
Definition: yaml.h:150
#define POP(context, stack)
Definition: yaml_private.h:457
A DOCUMENT-END token.
Definition: yaml.h:229
#define IS_Z(string)
Definition: yaml_private.h:290
static VALUE mark(VALUE self)
Definition: psych_parser.c:523
static int yaml_parser_stale_simple_keys(yaml_parser_t *parser)
Definition: scanner.c:1056
Cannot scan the input stream.
Definition: yaml.h:132
static int yaml_parser_fetch_key(yaml_parser_t *parser)
Definition: scanner.c:1639
size_t index
The position index.
Definition: yaml.h:147
static int yaml_parser_scan_version_directive_number(yaml_parser_t *parser, yaml_mark_t start_mark, int *number)
Definition: scanner.c:2218
#define IS_BLANKZ(string)
Definition: yaml_private.h:376
yaml_parser_scan(yaml_parser_t *parser, yaml_token_t *token)
Scan the input stream and produce the next token.
Definition: scanner.c:742
#define assert(condition)
Definition: ossl.h:45
const char * name
Definition: nkf.c:208
yaml_malloc(size_t size)
Definition: api.c:31
const char * problem
Error description.
Definition: yaml.h:1091
int possible
Is a simple key possible?
Definition: yaml.h:995
A BLOCK-ENTRY token.
Definition: yaml.h:248
yaml_parser_fetch_more_tokens(yaml_parser_t *parser)
Definition: scanner.c:800
int major
Definition: tcltklib.c:109
static int yaml_parser_scan_directive_name(yaml_parser_t *parser, yaml_mark_t start_mark, yaml_char_t **name)
Definition: scanner.c:2120
#define VERSION_DIRECTIVE_TOKEN_INIT(token, token_major, token_minor, start_mark, end_mark)
Definition: yaml_private.h:535
#define IS_BREAK(string)
Definition: yaml_private.h:344
size_t column
The position column.
Definition: yaml.h:153
#define NULL
Definition: _sdbm.c:102
A KEY token.
Definition: yaml.h:252
int flow_level
The number of unclosed &#39;[&#39; and &#39;{&#39; indicators.
Definition: yaml.h:1189
static int yaml_parser_roll_indent(yaml_parser_t *parser, ptrdiff_t column, ptrdiff_t number, yaml_token_type_t type, yaml_mark_t mark)
Definition: scanner.c:1212
#define SKIP_LINE(parser)
Definition: scanner.c:500
The token structure.
Definition: yaml.h:267
#define CLEAR(context, string)
Definition: yaml_private.h:156
static int yaml_parser_fetch_plain_scalar(yaml_parser_t *parser)
Definition: scanner.c:1897
The single-quoted scalar style.
Definition: yaml.h:172