begriffs open source - sa-parse/blob - src/csv.y

   1 /* csv.y - CSV parser using Bison */
   2
   3 /* a "pure" api means communication variables like yylval
   4    won't be global variables, and yylex is assumed to
   5    have a different signature */
   6
   7 %define api.pure true
   8
   9 /* change prefix of symbols from yy to "csv" to avoid
  10    clashes with any other parsers we may want to link */
  11
  12 %define api.prefix {csv}
  13
  14 /* generate much more meaningful errors rather than the
  15    uninformative string "syntax error" */
  16
  17 %define parse.error verbose
  18
  19 /* Bison offers different %code insertion locations in
  20    addition to yacc's %{ %} construct.
  21
  22    The "top" location is good for headers and feature
  23    flags like the _XOPEN_SOURCE we use here */
  24
  25 %code top {
  26         /* XOPEN for strdup */
  27         #define _XOPEN_SOURCE 600
  28         #include <stdio.h>
  29         #include <stdlib.h>
  30         #include <string.h>
  31
  32         /* Bison versions 3.7.5 and above provide the YYNOMEM
  33            macro to allow our actions to signal the unlikely
  34            event that they couldn't allocate memory. Thanks
  35            to the Bison team for adding this feature at my
  36            request. :) YYNOMEM causes yyparse() to return 2.
  37
  38            The following conditional define allows us to use
  39            the functionality in earlier versions too. */
  40
  41         #ifndef YYNOMEM
  42         #define YYNOMEM goto yyexhaustedlab
  43         #endif
  44 }
  45
  46 %code requires {
  47         #include <stdbool.h>
  48         #include <stdio.h>
  49         #include <stdlib.h>
  50         #include <string.h>
  51
  52         /* Simple linked list structures for CSV parsing */
  53         typedef struct field_node {
  54                 char *content;
  55                 struct field_node *next;
  56         } field_node_t;
  57
  58         typedef struct record_node {
  59                 field_node_t *fields;
  60                 struct record_node *next;
  61         } record_node_t;
  62 }
  63
  64 /* Add another argument in yyparse() so that we
  65    can communicate any parser state to the caller.
  66    We can't return the result directly, since the
  67    return value is already reserved as an int, with
  68    0=success, 1=error, 2=nomem */
  69
  70 %parse-param {void *parser_state}
  71
  72 /* param adds an extra param to yyparse (like parse-param)
  73    but also causes yyparse to send the value to yylex.
  74    In our case the caller will initialize their own scanner
  75    instance and pass it through */
  76
  77 %param {void *scanner}
  78
  79 %code {
  80         /* Function declarations - updated for pure API */
  81         int csverror(void *parser_state, void *scanner, const char *s);
  82         int csvlex(void *lval, void *scanner);
  83         bool one_empty_field(field_node_t *fields);
  84         field_node_t *create_field(char *content);
  85         void append_field_to_list(field_node_t **head, char *content);
  86         void free_field_list(field_node_t *fields);
  87         size_t count_fields(field_node_t *fields);
  88
  89         /* Interface with the CSV parser library */
  90         void csv_parser_add_record(field_node_t *fields);
  91         void csv_parser_set_error(const char *error);
  92 }
  93
  94 %union
  95 {
  96         char *str;
  97         field_node_t *fields;
  98 }
  99
 100 %token CRLF
 101 %token <str> FIELD
 102 %type <str> field.opt
 103 %type <fields> record
 104
 105 /* Bison memory management - automatically free memory on errors */
 106 %destructor { free($$); } <str>
 107 %destructor { free_field_list($$); } <fields>
 108
 109 %%
 110
 111 file :
 112   consumed_record
 113 | file CRLF consumed_record
 114 ;
 115
 116 /* A record can be constructed in two ways, but we want to
 117    run the same side effect for either case. We add an
 118    intermediate non-terminal symbol "consumed_record" just
 119    to perform the action. In library code, this would be a
 120    good place to send the the record to a callback function. */
 121
 122 consumed_record :
 123   record {
 124         /* a record comprised of exactly one blank field is a
 125            blank record, which we can skip */
 126         if (!one_empty_field($1))
 127         {
 128                 /* Send the record to the parser library */
 129                 csv_parser_add_record($1);
 130         }
 131         /* Memory is automatically freed by %destructor */
 132   }
 133 ;
 134
 135 record :
 136   field.opt {
 137         /* Create first field node */
 138         $$ = create_field($1);
 139         if (!$$) YYNOMEM;
 140   }
 141 | record ',' field.opt {
 142         /* Append field to existing list */
 143         append_field_to_list(&$1, $3);
 144         $$ = $1;
 145   }
 146 ;
 147
 148 field.opt :
 149   %empty {
 150         $$ = calloc(1,1);
 151         if (!$$) YYNOMEM;
 152   }
 153 | FIELD
 154 ;
 155
 156 %%
 157
 158 field_node_t *create_field(char *content)
 159 {
 160         field_node_t *field = malloc(sizeof(field_node_t));
 161         if (!field) {
 162                 free(content);
 163                 /* Can't use YYNOMEM here since we're not in a parser action */
 164                 return NULL;
 165         }
 166
 167         field->content = content; /* Take ownership of the string */
 168         field->next = NULL;
 169         return field;
 170 }
 171
 172 void append_field_to_list(field_node_t **head, char *content)
 173 {
 174         field_node_t *new_field = create_field(content);
 175         if (!new_field) return;
 176
 177         if (!*head) {
 178                 *head = new_field;
 179                 return;
 180         }
 181
 182         /* Find the last field in the list */
 183         field_node_t *current = *head;
 184         while (current->next) {
 185                 current = current->next;
 186         }
 187         current->next = new_field;
 188 }
 189
 190 void free_field_list(field_node_t *fields)
 191 {
 192         field_node_t *current = fields;
 193         while (current) {
 194                 field_node_t *next = current->next;
 195                 free(current->content);
 196                 free(current);
 197                 current = next;
 198         }
 199 }
 200
 201 size_t count_fields(field_node_t *fields)
 202 {
 203         size_t count = 0;
 204         field_node_t *current = fields;
 205         while (current) {
 206                 count++;
 207                 current = current->next;
 208         }
 209         return count;
 210 }
 211
 212 bool one_empty_field(field_node_t *fields)
 213 {
 214         return fields && !fields->next &&
 215                fields->content && fields->content[0] == '\0';
 216 }
 217
 218 int csverror(void *parser_state, void *scanner, const char *s)
 219 {
 220         (void)parser_state;
 221         (void)scanner;
 222         csv_parser_set_error(s);
 223         return 0;
 224 }