begriffs open source - sa-parse/blob - src/csv_parser.c

   1 #include "csv_parser.h"
   2 #include "csv.tab.h"
   3 #include <stdlib.h>
   4 #include <string.h>
   5
   6 /* External declarations from Bison/Flex */
   7 extern int yyparse(void);
   8 extern FILE *yyin;
   9 extern char *yytext;
  10 extern int yylex_destroy(void);
  11
  12 /* Global parser state - used to communicate with Bison parser */
  13 static csv_document_t *g_current_document = NULL;
  14 static csv_parser_t *g_current_parser = NULL;
  15 static bool g_parse_error = false;
  16 static char g_error_message[256] = "";
  17
  18 /* CSV parser structure */
  19 struct csv_parser {
  20     char error_message[256];
  21     bool has_error;
  22 };
  23
  24 /* Helper functions */
  25 static csv_field_t *convert_field_list(field_node_t *field_nodes);
  26 static csv_record_t *create_record_from_fields(field_node_t *field_nodes);
  27 static void free_document_internal(csv_document_t *doc);
  28
  29 /* Functions called by the Bison parser */
  30 void csv_parser_add_record(field_node_t *fields);
  31 void csv_parser_set_error(const char *error);
  32
  33 /* Implementation */
  34
  35 csv_parser_t *csv_parser_create(void)
  36 {
  37     csv_parser_t *parser = malloc(sizeof(csv_parser_t));
  38     if (!parser) {
  39         return NULL;
  40     }
  41
  42     parser->error_message[0] = '\0';
  43     parser->has_error = false;
  44
  45     return parser;
  46 }
  47
  48 void csv_parser_destroy(csv_parser_t *parser)
  49 {
  50     if (parser) {
  51         free(parser);
  52     }
  53 }
  54
  55 csv_error_t csv_parser_parse_string(csv_parser_t *parser, const char *input, csv_document_t **document)
  56 {
  57     if (!parser || !input || !document) {
  58         return CSV_ERROR_INVALID_PARAM;
  59     }
  60
  61     *document = NULL;
  62
  63     /* Create a temporary file from the string input */
  64     FILE *temp_file = tmpfile();
  65     if (!temp_file) {
  66         strncpy(parser->error_message, "Failed to create temporary file", sizeof(parser->error_message) - 1);
  67         parser->has_error = true;
  68         return CSV_ERROR_IO;
  69     }
  70
  71     /* Write input to temporary file */
  72     if (fputs(input, temp_file) == EOF) {
  73         fclose(temp_file);
  74         strncpy(parser->error_message, "Failed to write to temporary file", sizeof(parser->error_message) - 1);
  75         parser->has_error = true;
  76         return CSV_ERROR_IO;
  77     }
  78
  79     /* Reset file position to beginning */
  80     rewind(temp_file);
  81
  82     /* Parse from the temporary file */
  83     csv_error_t result = csv_parser_parse_file(parser, temp_file, document);
  84     fclose(temp_file);
  85
  86     return result;
  87 }
  88
  89 csv_error_t csv_parser_parse_file(csv_parser_t *parser, FILE *file, csv_document_t **document)
  90 {
  91     if (!parser || !file || !document) {
  92         return CSV_ERROR_INVALID_PARAM;
  93     }
  94
  95     *document = NULL;
  96     parser->has_error = false;
  97     parser->error_message[0] = '\0';
  98
  99     /* Initialize global state */
 100     g_current_document = malloc(sizeof(csv_document_t));
 101     if (!g_current_document) {
 102         strncpy(parser->error_message, "Memory allocation failed", sizeof(parser->error_message) - 1);
 103         parser->has_error = true;
 104         return CSV_ERROR_MEMORY;
 105     }
 106
 107     g_current_document->records = NULL;
 108     g_current_document->record_count = 0;
 109     g_current_document->header = NULL;
 110     g_current_parser = parser;
 111     g_parse_error = false;
 112     g_error_message[0] = '\0';
 113
 114     /* Set up the lexer input */
 115     yyin = file;
 116
 117     /* Parse the input */
 118     int parse_result = yyparse();
 119
 120     /* Clean up lexer state */
 121     yylex_destroy();
 122
 123     /* Check for errors */
 124     if (parse_result != 0 || g_parse_error) {
 125         if (g_error_message[0] != '\0') {
 126             strncpy(parser->error_message, g_error_message, sizeof(parser->error_message) - 1);
 127         } else {
 128             strncpy(parser->error_message, "Parse error", sizeof(parser->error_message) - 1);
 129         }
 130         parser->has_error = true;
 131
 132         free_document_internal(g_current_document);
 133         g_current_document = NULL;
 134         g_current_parser = NULL;
 135         return CSV_ERROR_PARSE;
 136     }
 137
 138     /* Success - transfer ownership of document */
 139     *document = g_current_document;
 140     g_current_document = NULL;
 141     g_current_parser = NULL;
 142
 143     return CSV_SUCCESS;
 144 }
 145
 146 const char *csv_parser_get_error(csv_parser_t *parser)
 147 {
 148     if (!parser) {
 149         return "Invalid parser";
 150     }
 151
 152     return parser->has_error ? parser->error_message : "No error";
 153 }
 154
 155 void csv_document_free(csv_document_t *document)
 156 {
 157     free_document_internal(document);
 158 }
 159
 160 const char *csv_error_string(csv_error_t error)
 161 {
 162     switch (error) {
 163         case CSV_SUCCESS:
 164             return "Success";
 165         case CSV_ERROR_MEMORY:
 166             return "Memory allocation error";
 167         case CSV_ERROR_PARSE:
 168             return "Parse error";
 169         case CSV_ERROR_INVALID_PARAM:
 170             return "Invalid parameter";
 171         case CSV_ERROR_IO:
 172             return "I/O error";
 173         default:
 174             return "Unknown error";
 175     }
 176 }
 177
 178 /* Data access helper functions */
 179
 180 bool csv_document_has_header(const csv_document_t *document)
 181 {
 182     return document && document->header != NULL;
 183 }
 184
 185 csv_record_t *csv_document_get_first_record(const csv_document_t *document)
 186 {
 187     return document ? document->records : NULL;
 188 }
 189
 190 csv_record_t *csv_record_get_next(const csv_record_t *record)
 191 {
 192     return record ? record->next : NULL;
 193 }
 194
 195 csv_field_t *csv_record_get_first_field(const csv_record_t *record)
 196 {
 197     return record ? record->fields : NULL;
 198 }
 199
 200 csv_field_t *csv_field_get_next(const csv_field_t *field)
 201 {
 202     return field ? field->next : NULL;
 203 }
 204
 205 const char *csv_field_get_content(const csv_field_t *field)
 206 {
 207     return field ? field->content : NULL;
 208 }
 209
 210 /* Internal helper functions */
 211
 212 static csv_field_t *convert_field_list(field_node_t *field_nodes)
 213 {
 214     if (!field_nodes) {
 215         return NULL;
 216     }
 217
 218     csv_field_t *first_field = NULL;
 219     csv_field_t *last_field = NULL;
 220
 221     field_node_t *current_node = field_nodes;
 222     while (current_node) {
 223         csv_field_t *field = malloc(sizeof(csv_field_t));
 224         if (!field) {
 225             /* Clean up already allocated fields */
 226             while (first_field) {
 227                 csv_field_t *next = first_field->next;
 228                 free(first_field->content);
 229                 free(first_field);
 230                 first_field = next;
 231             }
 232             return NULL;
 233         }
 234
 235         /* Copy content (take ownership) */
 236         field->content = current_node->content;
 237         current_node->content = NULL; /* Transfer ownership */
 238         field->next = NULL;
 239
 240         if (!first_field) {
 241             first_field = field;
 242             last_field = field;
 243         } else {
 244             last_field->next = field;
 245             last_field = field;
 246         }
 247
 248         current_node = current_node->next;
 249     }
 250
 251     return first_field;
 252 }
 253
 254 static csv_record_t *create_record_from_fields(field_node_t *field_nodes)
 255 {
 256     csv_record_t *record = malloc(sizeof(csv_record_t));
 257     if (!record) {
 258         return NULL;
 259     }
 260
 261     record->fields = convert_field_list(field_nodes);
 262     record->next = NULL;
 263
 264     /* Count fields */
 265     record->field_count = 0;
 266     csv_field_t *current = record->fields;
 267     while (current) {
 268         record->field_count++;
 269         current = current->next;
 270     }
 271
 272     return record;
 273 }
 274
 275 static void free_document_internal(csv_document_t *doc)
 276 {
 277     if (!doc) {
 278         return;
 279     }
 280
 281     /* Free header */
 282     if (doc->header) {
 283         csv_field_t *field = doc->header->fields;
 284         while (field) {
 285             csv_field_t *next = field->next;
 286             free(field->content);
 287             free(field);
 288             field = next;
 289         }
 290         free(doc->header);
 291     }
 292
 293     /* Free records */
 294     csv_record_t *record = doc->records;
 295     while (record) {
 296         csv_record_t *next_record = record->next;
 297
 298         csv_field_t *field = record->fields;
 299         while (field) {
 300             csv_field_t *next_field = field->next;
 301             free(field->content);
 302             free(field);
 303             field = next_field;
 304         }
 305
 306         free(record);
 307         record = next_record;
 308     }
 309
 310     free(doc);
 311 }
 312
 313 /* Functions called by the Bison parser */
 314
 315 void csv_parser_add_record(field_node_t *fields)
 316 {
 317     if (!g_current_document || g_parse_error) {
 318         return;
 319     }
 320
 321     /* Skip empty records (single empty field) */
 322     if (fields && !fields->next && fields->content && fields->content[0] == '\0') {
 323         return;
 324     }
 325
 326     csv_record_t *record = create_record_from_fields(fields);
 327     if (!record) {
 328         g_parse_error = true;
 329         strncpy(g_error_message, "Memory allocation failed while creating record", sizeof(g_error_message) - 1);
 330         return;
 331     }
 332
 333     /* Add to document */
 334     if (!g_current_document->records) {
 335         g_current_document->records = record;
 336     } else {
 337         /* Find the last record and append */
 338         csv_record_t *last = g_current_document->records;
 339         while (last->next) {
 340             last = last->next;
 341         }
 342         last->next = record;
 343     }
 344
 345     g_current_document->record_count++;
 346 }
 347
 348 void csv_parser_set_error(const char *error)
 349 {
 350     g_parse_error = true;
 351     if (error) {
 352         strncpy(g_error_message, error, sizeof(g_error_message) - 1);
 353         g_error_message[sizeof(g_error_message) - 1] = '\0';
 354     }
 355 }
 356
 357 csv_error_t csv_document_set_first_record_as_header(csv_document_t *document)
 358 {
 359     if (!document || !document->records) {
 360         return CSV_ERROR_INVALID_PARAM;
 361     }
 362
 363     /* Move first record to header */
 364     document->header = document->records;
 365     document->records = document->records->next;
 366     document->header->next = NULL;
 367     document->record_count--;
 368
 369     return CSV_SUCCESS;
 370 }