begriffs open source - sa-parse/blob - src/csv_parser.c

   1 #include "csv_parser.h"
   2 #include "csv.tab.h"
   3 #include <stdlib.h>
   4 #include <string.h>
   5
   6 /* External declarations from Bison/Flex - updated for reentrant API */
   7 typedef void *yyscan_t;
   8
   9 extern int csvlex_init(yyscan_t *scanner);
  10 extern int csvlex_destroy(yyscan_t scanner);
  11 extern void csvset_in(FILE *file, yyscan_t scanner);
  12 extern int csvparse(void *parser_state, yyscan_t scanner);
  13
  14 /* CSV parser structure - now contains all the state that was previously global */
  15 struct csv_parser {
  16     char error_message[256];
  17     bool has_error;
  18     csv_document_t *current_document;
  19     bool parse_error;
  20     char parse_error_message[256];
  21     int error_line;
  22     int error_column;
  23 };
  24
  25 /* Helper functions */
  26 static csv_field_t *convert_field_list(field_node_t *field_nodes);
  27 static csv_record_t *create_record_from_fields(field_node_t *field_nodes);
  28 static void free_document_internal(csv_document_t *doc);
  29
  30 /* Functions called by the Bison parser */
  31 void csv_parser_add_record(void *parser_state, field_node_t *fields);
  32 void csv_parser_set_error_with_location(void *parser_state, const char *error, int line, int column);
  33
  34 /* Implementation */
  35
  36 csv_parser_t *csv_parser_create(void)
  37 {
  38     csv_parser_t *parser = malloc(sizeof(csv_parser_t));
  39     if (!parser) {
  40         return NULL;
  41     }
  42
  43     parser->error_message[0] = '\0';
  44     parser->has_error = false;
  45     parser->current_document = NULL;
  46     parser->parse_error = false;
  47     parser->parse_error_message[0] = '\0';
  48     parser->error_line = 0;
  49     parser->error_column = 0;
  50
  51     return parser;
  52 }
  53
  54 void csv_parser_destroy(csv_parser_t *parser)
  55 {
  56     if (parser) {
  57         free(parser);
  58     }
  59 }
  60
  61 csv_error_t csv_parser_parse_string(csv_parser_t *parser, const char *input, csv_document_t **document)
  62 {
  63     if (!parser || !input || !document) {
  64         return CSV_ERROR_INVALID_PARAM;
  65     }
  66
  67     *document = NULL;
  68
  69     /* Create a temporary file from the string input */
  70     FILE *temp_file = tmpfile();
  71     if (!temp_file) {
  72         strncpy(parser->error_message, "Failed to create temporary file", sizeof(parser->error_message) - 1);
  73         parser->has_error = true;
  74         return CSV_ERROR_IO;
  75     }
  76
  77     /* Write input to temporary file */
  78     if (fputs(input, temp_file) == EOF) {
  79         fclose(temp_file);
  80         strncpy(parser->error_message, "Failed to write to temporary file", sizeof(parser->error_message) - 1);
  81         parser->has_error = true;
  82         return CSV_ERROR_IO;
  83     }
  84
  85     /* Reset file position to beginning */
  86     rewind(temp_file);
  87
  88     /* Parse from the temporary file */
  89     csv_error_t result = csv_parser_parse_file(parser, temp_file, document);
  90     fclose(temp_file);
  91
  92     return result;
  93 }
  94
  95 csv_error_t csv_parser_parse_file(csv_parser_t *parser, FILE *file, csv_document_t **document)
  96 {
  97     if (!parser || !file || !document) {
  98         return CSV_ERROR_INVALID_PARAM;
  99     }
 100
 101     *document = NULL;
 102     parser->has_error = false;
 103     parser->error_message[0] = '\0';
 104
 105     /* Initialize parser state */
 106     parser->current_document = malloc(sizeof(csv_document_t));
 107     if (!parser->current_document) {
 108         strncpy(parser->error_message, "Memory allocation failed", sizeof(parser->error_message) - 1);
 109         parser->has_error = true;
 110         return CSV_ERROR_MEMORY;
 111     }
 112
 113     parser->current_document->records = NULL;
 114     parser->current_document->record_count = 0;
 115     parser->current_document->header = NULL;
 116     parser->parse_error = false;
 117     parser->parse_error_message[0] = '\0';
 118     parser->error_line = 0;
 119     parser->error_column = 0;
 120
 121     /* Initialize the reentrant scanner */
 122     yyscan_t scanner;
 123     int scanner_init_result = csvlex_init(&scanner);
 124     if (scanner_init_result != 0) {
 125         free(parser->current_document);
 126         parser->current_document = NULL;
 127         return CSV_ERROR_MEMORY;
 128     }
 129
 130     /* Set up the lexer input */
 131     csvset_in(file, scanner);
 132
 133     /* Parse the input */
 134     int parse_result = csvparse(parser, scanner);
 135
 136     /* Clean up lexer state */
 137     csvlex_destroy(scanner);
 138
 139     /* Check for errors */
 140     if (parse_result != 0 || parser->parse_error) {
 141         csv_error_t error_code;
 142
 143         switch (parse_result) {
 144             case 2:
 145                 /* Memory exhaustion in parser */
 146                 strncpy(parser->error_message, "Parser memory exhaustion", sizeof(parser->error_message) - 1);
 147                 error_code = CSV_ERROR_MEMORY;
 148                 break;
 149
 150             case 1:
 151                 /* Parse error (syntax error or YYABORT) */
 152                 if (parser->parse_error_message[0] != '\0') {
 153                     strncpy(parser->error_message, parser->parse_error_message, sizeof(parser->error_message) - 1);
 154                 } else {
 155                     strncpy(parser->error_message, "Syntax error", sizeof(parser->error_message) - 1);
 156                 }
 157                 error_code = CSV_ERROR_PARSE;
 158                 break;
 159
 160             default:
 161                 /* Other non-zero return or parser->parse_error flag set */
 162                 if (parser->parse_error_message[0] != '\0') {
 163                     strncpy(parser->error_message, parser->parse_error_message, sizeof(parser->error_message) - 1);
 164                 } else {
 165                     strncpy(parser->error_message, "Parse error", sizeof(parser->error_message) - 1);
 166                 }
 167                 error_code = CSV_ERROR_PARSE;
 168                 break;
 169         }
 170
 171         parser->has_error = true;
 172         free_document_internal(parser->current_document);
 173         parser->current_document = NULL;
 174         return error_code;
 175     }
 176
 177     /* Success - transfer ownership of document */
 178     *document = parser->current_document;
 179     parser->current_document = NULL;
 180
 181     return CSV_SUCCESS;
 182 }
 183
 184 csv_error_info_t csv_parser_get_error_info(csv_parser_t *parser)
 185 {
 186     csv_error_info_t info = {0};
 187
 188     if (!parser) {
 189         info.message = "Invalid parser";
 190         info.line = 0;
 191         info.column = 0;
 192         info.has_location = false;
 193         return info;
 194     }
 195
 196     if (parser->has_error) {
 197         info.message = parser->error_message;
 198         info.line = parser->error_line;
 199         info.column = parser->error_column;
 200         info.has_location = (parser->error_line > 0);
 201     } else {
 202         info.message = "No error";
 203         info.line = 0;
 204         info.column = 0;
 205         info.has_location = false;
 206     }
 207
 208     return info;
 209 }
 210
 211 void csv_document_free(csv_document_t *document)
 212 {
 213     free_document_internal(document);
 214 }
 215
 216 const char *csv_error_string(csv_error_t error)
 217 {
 218     switch (error) {
 219         case CSV_SUCCESS:
 220             return "Success";
 221         case CSV_ERROR_MEMORY:
 222             return "Memory allocation error";
 223         case CSV_ERROR_PARSE:
 224             return "Parse error";
 225         case CSV_ERROR_INVALID_PARAM:
 226             return "Invalid parameter";
 227         case CSV_ERROR_IO:
 228             return "I/O error";
 229         default:
 230             return "Unknown error";
 231     }
 232 }
 233
 234 /* Data access helper functions */
 235
 236 bool csv_document_has_header(const csv_document_t *document)
 237 {
 238     return document && document->header != NULL;
 239 }
 240
 241 csv_record_t *csv_document_get_first_record(const csv_document_t *document)
 242 {
 243     return document ? document->records : NULL;
 244 }
 245
 246 csv_record_t *csv_record_get_next(const csv_record_t *record)
 247 {
 248     return record ? record->next : NULL;
 249 }
 250
 251 csv_field_t *csv_record_get_first_field(const csv_record_t *record)
 252 {
 253     return record ? record->fields : NULL;
 254 }
 255
 256 csv_field_t *csv_field_get_next(const csv_field_t *field)
 257 {
 258     return field ? field->next : NULL;
 259 }
 260
 261 const char *csv_field_get_content(const csv_field_t *field)
 262 {
 263     return field ? field->content : NULL;
 264 }
 265
 266 /* Internal helper functions */
 267
 268 static csv_field_t *convert_field_list(field_node_t *field_nodes)
 269 {
 270     if (!field_nodes) {
 271         return NULL;
 272     }
 273
 274     csv_field_t *first_field = NULL;
 275     csv_field_t *last_field = NULL;
 276
 277     field_node_t *current_node = field_nodes;
 278     while (current_node) {
 279         csv_field_t *field = malloc(sizeof(csv_field_t));
 280         if (!field) {
 281             /* Clean up already allocated fields */
 282             while (first_field) {
 283                 csv_field_t *next = first_field->next;
 284                 free(first_field->content);
 285                 free(first_field);
 286                 first_field = next;
 287             }
 288             return NULL;
 289         }
 290
 291         /* Copy content (take ownership) */
 292         field->content = current_node->content;
 293         current_node->content = NULL; /* Transfer ownership */
 294         field->next = NULL;
 295
 296         if (!first_field) {
 297             first_field = field;
 298             last_field = field;
 299         } else {
 300             last_field->next = field;
 301             last_field = field;
 302         }
 303
 304         current_node = current_node->next;
 305     }
 306
 307     return first_field;
 308 }
 309
 310 static csv_record_t *create_record_from_fields(field_node_t *field_nodes)
 311 {
 312     csv_record_t *record = malloc(sizeof(csv_record_t));
 313     if (!record) {
 314         return NULL;
 315     }
 316
 317     record->fields = convert_field_list(field_nodes);
 318     record->next = NULL;
 319
 320     /* Count fields */
 321     record->field_count = 0;
 322     csv_field_t *current = record->fields;
 323     while (current) {
 324         record->field_count++;
 325         current = current->next;
 326     }
 327
 328     return record;
 329 }
 330
 331 static void free_document_internal(csv_document_t *doc)
 332 {
 333     if (!doc) {
 334         return;
 335     }
 336
 337     /* Free header */
 338     if (doc->header) {
 339         csv_field_t *field = doc->header->fields;
 340         while (field) {
 341             csv_field_t *next = field->next;
 342             free(field->content);
 343             free(field);
 344             field = next;
 345         }
 346         free(doc->header);
 347     }
 348
 349     /* Free records */
 350     csv_record_t *record = doc->records;
 351     while (record) {
 352         csv_record_t *next_record = record->next;
 353
 354         csv_field_t *field = record->fields;
 355         while (field) {
 356             csv_field_t *next_field = field->next;
 357             free(field->content);
 358             free(field);
 359             field = next_field;
 360         }
 361
 362         free(record);
 363         record = next_record;
 364     }
 365
 366     free(doc);
 367 }
 368
 369 /* Functions called by the Bison parser */
 370
 371 void csv_parser_add_record(void *parser_state, field_node_t *fields)
 372 {
 373     csv_parser_t *parser = (csv_parser_t *)parser_state;
 374     if (!parser || !parser->current_document || parser->parse_error) {
 375         return;
 376     }
 377
 378     /* Skip empty records (single empty field) */
 379     if (fields && !fields->next && fields->content && fields->content[0] == '\0') {
 380         return;
 381     }
 382
 383     csv_record_t *record = create_record_from_fields(fields);
 384     if (!record) {
 385         parser->parse_error = true;
 386         strncpy(parser->parse_error_message, "Memory allocation failed while creating record", sizeof(parser->parse_error_message) - 1);
 387         return;
 388     }
 389
 390     /* Add to document */
 391     if (!parser->current_document->records) {
 392         parser->current_document->records = record;
 393     } else {
 394         /* Find the last record and append */
 395         csv_record_t *last = parser->current_document->records;
 396         while (last->next) {
 397             last = last->next;
 398         }
 399         last->next = record;
 400     }
 401
 402     parser->current_document->record_count++;
 403 }
 404
 405 void csv_parser_set_error_with_location(void *parser_state, const char *error, int line, int column)
 406 {
 407     csv_parser_t *parser = (csv_parser_t *)parser_state;
 408     if (!parser) {
 409         return;
 410     }
 411
 412     parser->parse_error = true;
 413     parser->error_line = line;
 414     parser->error_column = column;
 415     if (error) {
 416         strncpy(parser->parse_error_message, error, sizeof(parser->parse_error_message) - 1);
 417         parser->parse_error_message[sizeof(parser->parse_error_message) - 1] = '\0';
 418     }
 419 }
 420
 421 csv_error_t csv_document_set_first_record_as_header(csv_document_t *document)
 422 {
 423     if (!document || !document->records) {
 424         return CSV_ERROR_INVALID_PARAM;
 425     }
 426
 427     /* Move first record to header */
 428     document->header = document->records;
 429     document->records = document->records->next;
 430     document->header->next = NULL;
 431     document->record_count--;
 432
 433     return CSV_SUCCESS;
 434 }