begriffs open source - sa-parse/blob - src/csv_parser.c

   1 #include "csv_parser.h"
   2 #include "csv.tab.h"
   3 #include <stdlib.h>
   4 #include <string.h>
   5
   6 /* External declarations from Bison/Flex - updated for reentrant API */
   7 typedef void *yyscan_t;
   8 extern int csvlex_init(yyscan_t *scanner);
   9 extern int csvlex_destroy(yyscan_t scanner);
  10 extern void csvset_in(FILE *file, yyscan_t scanner);
  11 extern int csvparse(void *parser_state, yyscan_t scanner);
  12
  13 /* Global parser state - used to communicate with Bison parser */
  14 static csv_document_t *g_current_document = NULL;
  15 static csv_parser_t *g_current_parser = NULL;
  16 static bool g_parse_error = false;
  17 static char g_error_message[256] = "";
  18
  19 /* CSV parser structure */
  20 struct csv_parser {
  21     char error_message[256];
  22     bool has_error;
  23 };
  24
  25 /* Helper functions */
  26 static csv_field_t *convert_field_list(field_node_t *field_nodes);
  27 static csv_record_t *create_record_from_fields(field_node_t *field_nodes);
  28 static void free_document_internal(csv_document_t *doc);
  29
  30 /* Functions called by the Bison parser */
  31 void csv_parser_add_record(field_node_t *fields);
  32 void csv_parser_set_error(const char *error);
  33
  34 /* Implementation */
  35
  36 csv_parser_t *csv_parser_create(void)
  37 {
  38     csv_parser_t *parser = malloc(sizeof(csv_parser_t));
  39     if (!parser) {
  40         return NULL;
  41     }
  42
  43     parser->error_message[0] = '\0';
  44     parser->has_error = false;
  45
  46     return parser;
  47 }
  48
  49 void csv_parser_destroy(csv_parser_t *parser)
  50 {
  51     if (parser) {
  52         free(parser);
  53     }
  54 }
  55
  56 csv_error_t csv_parser_parse_string(csv_parser_t *parser, const char *input, csv_document_t **document)
  57 {
  58     if (!parser || !input || !document) {
  59         return CSV_ERROR_INVALID_PARAM;
  60     }
  61
  62     *document = NULL;
  63
  64     /* Create a temporary file from the string input */
  65     FILE *temp_file = tmpfile();
  66     if (!temp_file) {
  67         strncpy(parser->error_message, "Failed to create temporary file", sizeof(parser->error_message) - 1);
  68         parser->has_error = true;
  69         return CSV_ERROR_IO;
  70     }
  71
  72     /* Write input to temporary file */
  73     if (fputs(input, temp_file) == EOF) {
  74         fclose(temp_file);
  75         strncpy(parser->error_message, "Failed to write to temporary file", sizeof(parser->error_message) - 1);
  76         parser->has_error = true;
  77         return CSV_ERROR_IO;
  78     }
  79
  80     /* Reset file position to beginning */
  81     rewind(temp_file);
  82
  83     /* Parse from the temporary file */
  84     csv_error_t result = csv_parser_parse_file(parser, temp_file, document);
  85     fclose(temp_file);
  86
  87     return result;
  88 }
  89
  90 csv_error_t csv_parser_parse_file(csv_parser_t *parser, FILE *file, csv_document_t **document)
  91 {
  92     if (!parser || !file || !document) {
  93         return CSV_ERROR_INVALID_PARAM;
  94     }
  95
  96     *document = NULL;
  97     parser->has_error = false;
  98     parser->error_message[0] = '\0';
  99
 100     /* Initialize global state */
 101     g_current_document = malloc(sizeof(csv_document_t));
 102     if (!g_current_document) {
 103         strncpy(parser->error_message, "Memory allocation failed", sizeof(parser->error_message) - 1);
 104         parser->has_error = true;
 105         return CSV_ERROR_MEMORY;
 106     }
 107
 108     g_current_document->records = NULL;
 109     g_current_document->record_count = 0;
 110     g_current_document->header = NULL;
 111     g_current_parser = parser;
 112     g_parse_error = false;
 113     g_error_message[0] = '\0';
 114
 115     /* Initialize the reentrant scanner */
 116     yyscan_t scanner;
 117     int scanner_init_result = csvlex_init(&scanner);
 118     if (scanner_init_result != 0) {
 119         free(g_current_document);
 120         g_current_document = NULL;
 121         return CSV_ERROR_MEMORY;
 122     }
 123
 124     /* Set up the lexer input */
 125     csvset_in(file, scanner);
 126
 127     /* Parse the input */
 128     int parse_result = csvparse(parser, scanner);
 129
 130     /* Clean up lexer state */
 131     csvlex_destroy(scanner);
 132
 133     /* Check for errors */
 134     if (parse_result != 0 || g_parse_error) {
 135         csv_error_t error_code;
 136
 137         switch (parse_result) {
 138             case 2:
 139                 /* Memory exhaustion in parser */
 140                 strncpy(parser->error_message, "Parser memory exhaustion", sizeof(parser->error_message) - 1);
 141                 error_code = CSV_ERROR_MEMORY;
 142                 break;
 143
 144             case 1:
 145                 /* Parse error (syntax error or YYABORT) */
 146                 if (g_error_message[0] != '\0') {
 147                     strncpy(parser->error_message, g_error_message, sizeof(parser->error_message) - 1);
 148                 } else {
 149                     strncpy(parser->error_message, "Syntax error", sizeof(parser->error_message) - 1);
 150                 }
 151                 error_code = CSV_ERROR_PARSE;
 152                 break;
 153
 154             default:
 155                 /* Other non-zero return or g_parse_error flag set */
 156                 if (g_error_message[0] != '\0') {
 157                     strncpy(parser->error_message, g_error_message, sizeof(parser->error_message) - 1);
 158                 } else {
 159                     strncpy(parser->error_message, "Parse error", sizeof(parser->error_message) - 1);
 160                 }
 161                 error_code = CSV_ERROR_PARSE;
 162                 break;
 163         }
 164
 165         parser->has_error = true;
 166         free_document_internal(g_current_document);
 167         g_current_document = NULL;
 168         g_current_parser = NULL;
 169         return error_code;
 170     }
 171
 172     /* Success - transfer ownership of document */
 173     *document = g_current_document;
 174     g_current_document = NULL;
 175     g_current_parser = NULL;
 176
 177     return CSV_SUCCESS;
 178 }
 179
 180 const char *csv_parser_get_error(csv_parser_t *parser)
 181 {
 182     if (!parser) {
 183         return "Invalid parser";
 184     }
 185
 186     return parser->has_error ? parser->error_message : "No error";
 187 }
 188
 189 void csv_document_free(csv_document_t *document)
 190 {
 191     free_document_internal(document);
 192 }
 193
 194 const char *csv_error_string(csv_error_t error)
 195 {
 196     switch (error) {
 197         case CSV_SUCCESS:
 198             return "Success";
 199         case CSV_ERROR_MEMORY:
 200             return "Memory allocation error";
 201         case CSV_ERROR_PARSE:
 202             return "Parse error";
 203         case CSV_ERROR_INVALID_PARAM:
 204             return "Invalid parameter";
 205         case CSV_ERROR_IO:
 206             return "I/O error";
 207         default:
 208             return "Unknown error";
 209     }
 210 }
 211
 212 /* Data access helper functions */
 213
 214 bool csv_document_has_header(const csv_document_t *document)
 215 {
 216     return document && document->header != NULL;
 217 }
 218
 219 csv_record_t *csv_document_get_first_record(const csv_document_t *document)
 220 {
 221     return document ? document->records : NULL;
 222 }
 223
 224 csv_record_t *csv_record_get_next(const csv_record_t *record)
 225 {
 226     return record ? record->next : NULL;
 227 }
 228
 229 csv_field_t *csv_record_get_first_field(const csv_record_t *record)
 230 {
 231     return record ? record->fields : NULL;
 232 }
 233
 234 csv_field_t *csv_field_get_next(const csv_field_t *field)
 235 {
 236     return field ? field->next : NULL;
 237 }
 238
 239 const char *csv_field_get_content(const csv_field_t *field)
 240 {
 241     return field ? field->content : NULL;
 242 }
 243
 244 /* Internal helper functions */
 245
 246 static csv_field_t *convert_field_list(field_node_t *field_nodes)
 247 {
 248     if (!field_nodes) {
 249         return NULL;
 250     }
 251
 252     csv_field_t *first_field = NULL;
 253     csv_field_t *last_field = NULL;
 254
 255     field_node_t *current_node = field_nodes;
 256     while (current_node) {
 257         csv_field_t *field = malloc(sizeof(csv_field_t));
 258         if (!field) {
 259             /* Clean up already allocated fields */
 260             while (first_field) {
 261                 csv_field_t *next = first_field->next;
 262                 free(first_field->content);
 263                 free(first_field);
 264                 first_field = next;
 265             }
 266             return NULL;
 267         }
 268
 269         /* Copy content (take ownership) */
 270         field->content = current_node->content;
 271         current_node->content = NULL; /* Transfer ownership */
 272         field->next = NULL;
 273
 274         if (!first_field) {
 275             first_field = field;
 276             last_field = field;
 277         } else {
 278             last_field->next = field;
 279             last_field = field;
 280         }
 281
 282         current_node = current_node->next;
 283     }
 284
 285     return first_field;
 286 }
 287
 288 static csv_record_t *create_record_from_fields(field_node_t *field_nodes)
 289 {
 290     csv_record_t *record = malloc(sizeof(csv_record_t));
 291     if (!record) {
 292         return NULL;
 293     }
 294
 295     record->fields = convert_field_list(field_nodes);
 296     record->next = NULL;
 297
 298     /* Count fields */
 299     record->field_count = 0;
 300     csv_field_t *current = record->fields;
 301     while (current) {
 302         record->field_count++;
 303         current = current->next;
 304     }
 305
 306     return record;
 307 }
 308
 309 static void free_document_internal(csv_document_t *doc)
 310 {
 311     if (!doc) {
 312         return;
 313     }
 314
 315     /* Free header */
 316     if (doc->header) {
 317         csv_field_t *field = doc->header->fields;
 318         while (field) {
 319             csv_field_t *next = field->next;
 320             free(field->content);
 321             free(field);
 322             field = next;
 323         }
 324         free(doc->header);
 325     }
 326
 327     /* Free records */
 328     csv_record_t *record = doc->records;
 329     while (record) {
 330         csv_record_t *next_record = record->next;
 331
 332         csv_field_t *field = record->fields;
 333         while (field) {
 334             csv_field_t *next_field = field->next;
 335             free(field->content);
 336             free(field);
 337             field = next_field;
 338         }
 339
 340         free(record);
 341         record = next_record;
 342     }
 343
 344     free(doc);
 345 }
 346
 347 /* Functions called by the Bison parser */
 348
 349 void csv_parser_add_record(field_node_t *fields)
 350 {
 351     if (!g_current_document || g_parse_error) {
 352         return;
 353     }
 354
 355     /* Skip empty records (single empty field) */
 356     if (fields && !fields->next && fields->content && fields->content[0] == '\0') {
 357         return;
 358     }
 359
 360     csv_record_t *record = create_record_from_fields(fields);
 361     if (!record) {
 362         g_parse_error = true;
 363         strncpy(g_error_message, "Memory allocation failed while creating record", sizeof(g_error_message) - 1);
 364         return;
 365     }
 366
 367     /* Add to document */
 368     if (!g_current_document->records) {
 369         g_current_document->records = record;
 370     } else {
 371         /* Find the last record and append */
 372         csv_record_t *last = g_current_document->records;
 373         while (last->next) {
 374             last = last->next;
 375         }
 376         last->next = record;
 377     }
 378
 379     g_current_document->record_count++;
 380 }
 381
 382 void csv_parser_set_error(const char *error)
 383 {
 384     g_parse_error = true;
 385     if (error) {
 386         strncpy(g_error_message, error, sizeof(g_error_message) - 1);
 387         g_error_message[sizeof(g_error_message) - 1] = '\0';
 388     }
 389 }
 390
 391 csv_error_t csv_document_set_first_record_as_header(csv_document_t *document)
 392 {
 393     if (!document || !document->records) {
 394         return CSV_ERROR_INVALID_PARAM;
 395     }
 396
 397     /* Move first record to header */
 398     document->header = document->records;
 399     document->records = document->records->next;
 400     document->header->next = NULL;
 401     document->record_count--;
 402
 403     return CSV_SUCCESS;
 404 }