#include "csv_parser.h" #include "csv.tab.h" #include #include /* External declarations from Bison/Flex - updated for reentrant API */ typedef void *yyscan_t; extern int csvlex_init(yyscan_t *scanner); extern int csvlex_destroy(yyscan_t scanner); extern void csvset_in(FILE *file, yyscan_t scanner); extern int csvparse(void *parser_state, yyscan_t scanner); /* CSV parser structure - now contains all the state that was previously global */ struct csv_parser { char error_message[256]; bool has_error; csv_document_t *current_document; bool parse_error; char parse_error_message[256]; int error_line; int error_column; }; /* Helper functions */ static csv_field_t *convert_field_list(field_node_t *field_nodes); static csv_record_t *create_record_from_fields(field_node_t *field_nodes); static void free_document_internal(csv_document_t *doc); /* Functions called by the Bison parser */ void csv_parser_add_record(void *parser_state, field_node_t *fields); void csv_parser_set_error_with_location(void *parser_state, const char *error, int line, int column); /* Implementation */ csv_parser_t *csv_parser_create(void) { csv_parser_t *parser = malloc(sizeof(csv_parser_t)); if (!parser) { return NULL; } parser->error_message[0] = '\0'; parser->has_error = false; parser->current_document = NULL; parser->parse_error = false; parser->parse_error_message[0] = '\0'; parser->error_line = 0; parser->error_column = 0; return parser; } void csv_parser_destroy(csv_parser_t *parser) { if (parser) { free(parser); } } csv_error_t csv_parser_parse_string(csv_parser_t *parser, const char *input, csv_document_t **document) { if (!parser || !input || !document) { return CSV_ERROR_INVALID_PARAMETER; } *document = NULL; /* Create a temporary file from the string input */ FILE *temp_file = tmpfile(); if (!temp_file) { strncpy(parser->error_message, "Failed to create temporary file", sizeof(parser->error_message) - 1); parser->has_error = true; return CSV_ERROR_IO; } /* Write input to temporary file */ if (fputs(input, temp_file) == EOF) { fclose(temp_file); strncpy(parser->error_message, "Failed to write to temporary file", sizeof(parser->error_message) - 1); parser->has_error = true; return CSV_ERROR_IO; } /* Reset file position to beginning */ rewind(temp_file); /* Parse from the temporary file */ csv_error_t result = csv_parser_parse_file(parser, temp_file, document); fclose(temp_file); return result; } csv_error_t csv_parser_parse_file(csv_parser_t *parser, FILE *file, csv_document_t **document) { if (!parser || !file || !document) { return CSV_ERROR_INVALID_PARAMETER; } *document = NULL; parser->has_error = false; parser->error_message[0] = '\0'; /* Initialize parser state */ parser->current_document = malloc(sizeof(csv_document_t)); if (!parser->current_document) { strncpy(parser->error_message, "Memory allocation failed", sizeof(parser->error_message) - 1); parser->has_error = true; return CSV_ERROR_MEMORY; } parser->current_document->records = NULL; parser->current_document->record_count = 0; parser->parse_error = false; parser->parse_error_message[0] = '\0'; parser->error_line = 0; parser->error_column = 0; /* Initialize the reentrant scanner */ yyscan_t scanner; int scanner_init_result = csvlex_init(&scanner); if (scanner_init_result != 0) { free(parser->current_document); parser->current_document = NULL; return CSV_ERROR_MEMORY; } /* Set up the lexer input */ csvset_in(file, scanner); /* Parse the input */ int parse_result = csvparse(parser, scanner); /* Clean up lexer state */ csvlex_destroy(scanner); /* Check for errors */ if (parse_result != 0 || parser->parse_error) { csv_error_t error_code; switch (parse_result) { case 2: /* Memory exhaustion in parser */ strncpy(parser->error_message, "Parser memory exhaustion", sizeof(parser->error_message) - 1); error_code = CSV_ERROR_MEMORY; break; case 1: /* Parse error (syntax error or YYABORT) */ if (parser->parse_error_message[0] != '\0') { strncpy(parser->error_message, parser->parse_error_message, sizeof(parser->error_message) - 1); } else { strncpy(parser->error_message, "Syntax error", sizeof(parser->error_message) - 1); } error_code = CSV_ERROR_PARSE; break; default: /* Other non-zero return or parser->parse_error flag set */ if (parser->parse_error_message[0] != '\0') { strncpy(parser->error_message, parser->parse_error_message, sizeof(parser->error_message) - 1); } else { strncpy(parser->error_message, "Parse error", sizeof(parser->error_message) - 1); } error_code = CSV_ERROR_PARSE; break; } parser->has_error = true; free_document_internal(parser->current_document); parser->current_document = NULL; return error_code; } /* Success - transfer ownership of document */ *document = parser->current_document; parser->current_document = NULL; return CSV_SUCCESS; } csv_error_info_t csv_parser_get_error_info(csv_parser_t *parser) { csv_error_info_t info = {0}; if (!parser) { info.message = "Invalid parser"; info.line = 0; info.column = 0; info.has_location = false; return info; } if (parser->has_error) { info.message = parser->error_message; info.line = parser->error_line; info.column = parser->error_column; info.has_location = (parser->error_line > 0); } else { info.message = "No error"; info.line = 0; info.column = 0; info.has_location = false; } return info; } void csv_document_free(csv_document_t *document) { free_document_internal(document); } const char *csv_error_string(csv_error_t error) { switch (error) { case CSV_SUCCESS: return "Success"; case CSV_ERROR_MEMORY: return "Memory allocation error"; case CSV_ERROR_PARSE: return "Parse error"; case CSV_ERROR_INVALID_PARAMETER: return "Invalid parameter"; case CSV_ERROR_IO: return "I/O error"; default: return "Unknown error"; } } /* Data access helper functions */ csv_record_t *csv_document_get_first_record(const csv_document_t *document) { return document ? document->records : NULL; } csv_record_t *csv_record_get_next(const csv_record_t *record) { return record ? record->next : NULL; } csv_field_t *csv_record_get_first_field(const csv_record_t *record) { return record ? record->fields : NULL; } csv_field_t *csv_field_get_next(const csv_field_t *field) { return field ? field->next : NULL; } const char *csv_field_get_value(const csv_field_t *field) { return field ? field->value : NULL; } /* Internal helper functions */ static csv_field_t *convert_field_list(field_node_t *field_nodes) { if (!field_nodes) { return NULL; } csv_field_t *first_field = NULL; csv_field_t *last_field = NULL; field_node_t *current_node = field_nodes; while (current_node) { csv_field_t *field = malloc(sizeof(csv_field_t)); if (!field) { /* Clean up already allocated fields */ while (first_field) { csv_field_t *next = first_field->next; free(first_field->value); free(first_field); first_field = next; } return NULL; } /* Copy content (take ownership) */ field->value = current_node->content; current_node->content = NULL; /* Transfer ownership */ field->next = NULL; if (!first_field) { first_field = field; last_field = field; } else { last_field->next = field; last_field = field; } current_node = current_node->next; } return first_field; } static csv_record_t *create_record_from_fields(field_node_t *field_nodes) { csv_record_t *record = malloc(sizeof(csv_record_t)); if (!record) { return NULL; } record->fields = convert_field_list(field_nodes); record->next = NULL; /* Count fields */ record->field_count = 0; csv_field_t *current = record->fields; while (current) { record->field_count++; current = current->next; } return record; } static void free_document_internal(csv_document_t *doc) { if (!doc) { return; } /* Free records */ csv_record_t *record = doc->records; while (record) { csv_record_t *next_record = record->next; csv_field_t *field = record->fields; while (field) { csv_field_t *next_field = field->next; free(field->value); free(field); field = next_field; } free(record); record = next_record; } free(doc); } /* Functions called by the Bison parser */ void csv_parser_add_record(void *parser_state, field_node_t *fields) { csv_parser_t *parser = (csv_parser_t *)parser_state; if (!parser || !parser->current_document || parser->parse_error) { return; } /* Skip empty records (single empty field) */ if (fields && !fields->next && fields->content && fields->content[0] == '\0') { return; } csv_record_t *record = create_record_from_fields(fields); if (!record) { parser->parse_error = true; strncpy(parser->parse_error_message, "Memory allocation failed while creating record", sizeof(parser->parse_error_message) - 1); return; } /* Add to document */ if (!parser->current_document->records) { parser->current_document->records = record; } else { /* Find the last record and append */ csv_record_t *last = parser->current_document->records; while (last->next) { last = last->next; } last->next = record; } parser->current_document->record_count++; } void csv_parser_set_error_with_location(void *parser_state, const char *error, int line, int column) { csv_parser_t *parser = (csv_parser_t *)parser_state; if (!parser) { return; } parser->parse_error = true; parser->error_line = line; parser->error_column = column; if (error) { strncpy(parser->parse_error_message, error, sizeof(parser->parse_error_message) - 1); parser->parse_error_message[sizeof(parser->parse_error_message) - 1] = '\0'; } }