1 /* csv.y - CSV parser using Bison */
3 /* a "pure" api means communication variables like yylval
4 won't be global variables, and yylex is assumed to
5 have a different signature */
9 /* change prefix of symbols from yy to "csv" to avoid
10 clashes with any other parsers we may want to link */
12 %define api.prefix {csv}
14 /* generate much more meaningful errors rather than the
15 uninformative string "syntax error" */
17 %define parse.error verbose
19 /* enable location tracking for better error reporting */
22 /* Bison offers different %code insertion locations in
23 addition to yacc's %{ %} construct.
25 The "top" location is good for headers and feature
26 flags like the _XOPEN_SOURCE we use here */
29 /* XOPEN for strdup */
30 #define _XOPEN_SOURCE 600
35 /* Bison versions 3.7.5 and above provide the YYNOMEM
36 macro to allow our actions to signal the unlikely
37 event that they couldn't allocate memory. Thanks
38 to the Bison team for adding this feature at my
39 request. :) YYNOMEM causes yyparse() to return 2.
41 The following conditional define allows us to use
42 the functionality in earlier versions too. */
45 #define YYNOMEM goto yyexhaustedlab
55 /* Simple linked list structures for CSV parsing */
56 typedef struct field_node {
58 struct field_node *next;
61 typedef struct record_node {
63 struct record_node *next;
67 /* Add another argument in yyparse() so that we
68 can communicate any parser state to the caller.
69 We can't return the result directly, since the
70 return value is already reserved as an int, with
71 0=success, 1=error, 2=nomem */
73 %parse-param {void *parser_state}
75 /* param adds an extra param to yyparse (like parse-param)
76 but also causes yyparse to send the value to yylex.
77 In our case the caller will initialize their own scanner
78 instance and pass it through */
80 %param {void *scanner}
83 /* Function declarations - updated for pure API */
84 int csverror(CSVLTYPE *locp, void *parser_state, void *scanner, const char *s);
85 int csvlex(void *lval, CSVLTYPE *locp, void *scanner);
86 bool one_empty_field(field_node_t *fields);
87 field_node_t *create_field(char *content);
88 void append_field_to_list(field_node_t **head, char *content);
89 void free_field_list(field_node_t *fields);
90 size_t count_fields(field_node_t *fields);
92 /* Interface with the CSV parser library */
93 void csv_parser_add_record(field_node_t *fields);
94 void csv_parser_set_error_with_location(const char *error, int line, int column);
100 field_node_t *fields;
105 %type <str> field.opt
106 %type <fields> record
108 /* Bison memory management - automatically free memory on errors */
109 %destructor { free($$); } <str>
110 %destructor { free_field_list($$); } <fields>
116 | file CRLF consumed_record
119 /* A record can be constructed in two ways, but we want to
120 run the same side effect for either case. We add an
121 intermediate non-terminal symbol "consumed_record" just
122 to perform the action. In library code, this would be a
123 good place to send the the record to a callback function. */
127 /* a record comprised of exactly one blank field is a
128 blank record, which we can skip */
129 if (!one_empty_field($1))
131 /* Send the record to the parser library */
132 csv_parser_add_record($1);
134 /* Memory is automatically freed by %destructor */
140 /* Create first field node */
141 $$ = create_field($1);
144 | record ',' field.opt {
145 /* Append field to existing list */
146 append_field_to_list(&$1, $3);
161 field_node_t *create_field(char *content)
163 field_node_t *field = malloc(sizeof(field_node_t));
166 /* Can't use YYNOMEM here since we're not in a parser action */
170 field->content = content; /* Take ownership of the string */
175 void append_field_to_list(field_node_t **head, char *content)
177 field_node_t *new_field = create_field(content);
178 if (!new_field) return;
185 /* Find the last field in the list */
186 field_node_t *current = *head;
187 while (current->next) {
188 current = current->next;
190 current->next = new_field;
193 void free_field_list(field_node_t *fields)
195 field_node_t *current = fields;
197 field_node_t *next = current->next;
198 free(current->content);
204 size_t count_fields(field_node_t *fields)
207 field_node_t *current = fields;
210 current = current->next;
215 bool one_empty_field(field_node_t *fields)
217 return fields && !fields->next &&
218 fields->content && fields->content[0] == '\0';
221 int csverror(CSVLTYPE *locp, void *parser_state, void *scanner, const char *s)
226 /* Use location information if available */
228 csv_parser_set_error_with_location(s, locp->first_line, locp->first_column);
230 csv_parser_set_error_with_location(s, 0, 0);