begriffs open source - sa-parse/blob - src/csv.y

   1 /* csv.y - CSV parser using Bison */
   2
   3 %code requires {
   4         #include <stdbool.h>
   5         #include <stdio.h>
   6         #include <stdlib.h>
   7         #include <string.h>
   8
   9         /* Simple linked list structures for CSV parsing */
  10         typedef struct field_node {
  11                 char *content;
  12                 struct field_node *next;
  13         } field_node_t;
  14
  15         typedef struct record_node {
  16                 field_node_t *fields;
  17                 struct record_node *next;
  18         } record_node_t;
  19 }
  20
  21 %code {
  22         /* Function declarations */
  23         int yylex(void);
  24         int yyerror(const char *s);
  25         bool one_empty_field(field_node_t *fields);
  26         field_node_t *create_field(char *content);
  27         void append_field_to_list(field_node_t **head, char *content);
  28         void free_field_list(field_node_t *fields);
  29         size_t count_fields(field_node_t *fields);
  30
  31         /* Interface with the CSV parser library */
  32         void csv_parser_add_record(field_node_t *fields);
  33         void csv_parser_set_error(const char *error);
  34 }
  35
  36 %union
  37 {
  38         char *str;
  39         field_node_t *fields;
  40 }
  41
  42 %token CRLF
  43 %token <str> FIELD
  44 %type <str> field.opt
  45 %type <fields> record
  46
  47 /* Bison memory management - automatically free memory on errors */
  48 %destructor { free($$); } <str>
  49 %destructor { free_field_list($$); } <fields>
  50
  51 %%
  52
  53 file :
  54   consumed_record
  55 | file CRLF consumed_record
  56 ;
  57
  58 /* A record can be constructed in two ways, but we want to
  59    run the same side effect for either case. We add an
  60    intermediate non-terminal symbol "consumed_record" just
  61    to perform the action. In library code, this would be a
  62    good place to send the the record to a callback function. */
  63
  64 consumed_record :
  65   record {
  66         /* a record comprised of exactly one blank field is a
  67            blank record, which we can skip */
  68         if (!one_empty_field($1))
  69         {
  70                 /* Send the record to the parser library */
  71                 csv_parser_add_record($1);
  72         }
  73         /* Memory is automatically freed by %destructor */
  74   }
  75 ;
  76
  77 record :
  78   field.opt {
  79         /* Create first field node */
  80         $$ = create_field($1);
  81   }
  82 | record ',' field.opt {
  83         /* Append field to existing list */
  84         append_field_to_list(&$1, $3);
  85         $$ = $1;
  86   }
  87 ;
  88
  89 field.opt :
  90   %empty { $$ = calloc(1,1); }
  91 | FIELD
  92 ;
  93
  94 %%
  95
  96 field_node_t *create_field(char *content)
  97 {
  98         field_node_t *field = malloc(sizeof(field_node_t));
  99         if (!field) {
 100                 free(content);
 101                 return NULL;
 102         }
 103
 104         field->content = content; /* Take ownership of the string */
 105         field->next = NULL;
 106         return field;
 107 }
 108
 109 void append_field_to_list(field_node_t **head, char *content)
 110 {
 111         field_node_t *new_field = create_field(content);
 112         if (!new_field) return;
 113
 114         if (!*head) {
 115                 *head = new_field;
 116                 return;
 117         }
 118
 119         /* Find the last field in the list */
 120         field_node_t *current = *head;
 121         while (current->next) {
 122                 current = current->next;
 123         }
 124         current->next = new_field;
 125 }
 126
 127 void free_field_list(field_node_t *fields)
 128 {
 129         field_node_t *current = fields;
 130         while (current) {
 131                 field_node_t *next = current->next;
 132                 free(current->content);
 133                 free(current);
 134                 current = next;
 135         }
 136 }
 137
 138 size_t count_fields(field_node_t *fields)
 139 {
 140         size_t count = 0;
 141         field_node_t *current = fields;
 142         while (current) {
 143                 count++;
 144                 current = current->next;
 145         }
 146         return count;
 147 }
 148
 149 bool one_empty_field(field_node_t *fields)
 150 {
 151         return fields && !fields->next &&
 152                fields->content && fields->content[0] == '\0';
 153 }
 154
 155 int yyerror(const char *s)
 156 {
 157         csv_parser_set_error(s);
 158         return 0;
 159 }