1 /* csv.y - CSV parser using Bison */
3 /* a "pure" api means communication variables like yylval
4 won't be global variables, and yylex is assumed to
5 have a different signature */
9 /* change prefix of symbols from yy to "csv" to avoid
10 clashes with any other parsers we may want to link */
12 %define api.prefix {csv}
14 /* generate much more meaningful errors rather than the
15 uninformative string "syntax error" */
17 %define parse.error verbose
19 /* Bison offers different %code insertion locations in
20 addition to yacc's %{ %} construct.
22 The "top" location is good for headers and feature
23 flags like the _XOPEN_SOURCE we use here */
26 /* XOPEN for strdup */
27 #define _XOPEN_SOURCE 600
32 /* Bison versions 3.7.5 and above provide the YYNOMEM
33 macro to allow our actions to signal the unlikely
34 event that they couldn't allocate memory. Thanks
35 to the Bison team for adding this feature at my
36 request. :) YYNOMEM causes yyparse() to return 2.
38 The following conditional define allows us to use
39 the functionality in earlier versions too. */
42 #define YYNOMEM goto yyexhaustedlab
52 /* Simple linked list structures for CSV parsing */
53 typedef struct field_node {
55 struct field_node *next;
58 typedef struct record_node {
60 struct record_node *next;
64 /* Add another argument in yyparse() so that we
65 can communicate any parser state to the caller.
66 We can't return the result directly, since the
67 return value is already reserved as an int, with
68 0=success, 1=error, 2=nomem */
70 %parse-param {void *parser_state}
72 /* param adds an extra param to yyparse (like parse-param)
73 but also causes yyparse to send the value to yylex.
74 In our case the caller will initialize their own scanner
75 instance and pass it through */
77 %param {void *scanner}
80 /* Function declarations - updated for pure API */
81 int csverror(void *parser_state, void *scanner, const char *s);
82 int csvlex(void *lval, void *scanner);
83 bool one_empty_field(field_node_t *fields);
84 field_node_t *create_field(char *content);
85 void append_field_to_list(field_node_t **head, char *content);
86 void free_field_list(field_node_t *fields);
87 size_t count_fields(field_node_t *fields);
89 /* Interface with the CSV parser library */
90 void csv_parser_add_record(field_node_t *fields);
91 void csv_parser_set_error(const char *error);
102 %type <str> field.opt
103 %type <fields> record
105 /* Bison memory management - automatically free memory on errors */
106 %destructor { free($$); } <str>
107 %destructor { free_field_list($$); } <fields>
113 | file CRLF consumed_record
116 /* A record can be constructed in two ways, but we want to
117 run the same side effect for either case. We add an
118 intermediate non-terminal symbol "consumed_record" just
119 to perform the action. In library code, this would be a
120 good place to send the the record to a callback function. */
124 /* a record comprised of exactly one blank field is a
125 blank record, which we can skip */
126 if (!one_empty_field($1))
128 /* Send the record to the parser library */
129 csv_parser_add_record($1);
131 /* Memory is automatically freed by %destructor */
137 /* Create first field node */
138 $$ = create_field($1);
141 | record ',' field.opt {
142 /* Append field to existing list */
143 append_field_to_list(&$1, $3);
158 field_node_t *create_field(char *content)
160 field_node_t *field = malloc(sizeof(field_node_t));
163 /* Can't use YYNOMEM here since we're not in a parser action */
167 field->content = content; /* Take ownership of the string */
172 void append_field_to_list(field_node_t **head, char *content)
174 field_node_t *new_field = create_field(content);
175 if (!new_field) return;
182 /* Find the last field in the list */
183 field_node_t *current = *head;
184 while (current->next) {
185 current = current->next;
187 current->next = new_field;
190 void free_field_list(field_node_t *fields)
192 field_node_t *current = fields;
194 field_node_t *next = current->next;
195 free(current->content);
201 size_t count_fields(field_node_t *fields)
204 field_node_t *current = fields;
207 current = current->next;
212 bool one_empty_field(field_node_t *fields)
214 return fields && !fields->next &&
215 fields->content && fields->content[0] == '\0';
218 int csverror(void *parser_state, void *scanner, const char *s)
222 csv_parser_set_error(s);