1 #include "csv_parser.h"
6 /* External declarations from Bison/Flex - updated for reentrant API */
7 typedef void *yyscan_t;
9 extern int csvlex_init(yyscan_t *scanner);
10 extern int csvlex_destroy(yyscan_t scanner);
11 extern void csvset_in(FILE *file, yyscan_t scanner);
12 extern int csvparse(void *parser_state, yyscan_t scanner);
14 /* CSV parser structure - now contains all the state that was previously global */
16 char error_message[256];
18 vector *current_records; /* Vector of csv_record_t* */
20 char parse_error_message[256];
25 /* Helper functions */
26 static csv_record_t *create_record_from_vector(vector *field_vector);
27 static void csv_record_destructor(void *record_ptr, void *aux);
29 /* Functions called by the Bison parser */
30 void csv_parser_add_record(void *parser_state, vector *fields);
31 void csv_parser_set_error_with_location(void *parser_state, const char *error, int line, int column);
35 csv_parser_t *csv_parser_create(void)
37 csv_parser_t *parser = malloc(sizeof(csv_parser_t));
42 parser->error_message[0] = '\0';
43 parser->has_error = false;
44 parser->current_records = NULL;
45 parser->parse_error = false;
46 parser->parse_error_message[0] = '\0';
47 parser->error_line = 0;
48 parser->error_column = 0;
53 void csv_parser_destroy(csv_parser_t *parser)
60 csv_error_t csv_parser_parse_string(csv_parser_t *parser, const char *input, vector **records)
62 if (!parser || !input || !records) {
63 return CSV_ERROR_INVALID_PARAMETER;
68 /* Create a temporary file from the string input */
69 FILE *temp_file = tmpfile();
71 strncpy(parser->error_message, "Failed to create temporary file", sizeof(parser->error_message) - 1);
72 parser->has_error = true;
76 /* Write input to temporary file */
77 if (fputs(input, temp_file) == EOF) {
79 strncpy(parser->error_message, "Failed to write to temporary file", sizeof(parser->error_message) - 1);
80 parser->has_error = true;
84 /* Reset file position to beginning */
87 /* Parse from the temporary file */
88 csv_error_t result = csv_parser_parse_file(parser, temp_file, records);
94 csv_error_t csv_parser_parse_file(csv_parser_t *parser, FILE *file, vector **records)
96 if (!parser || !file || !records) {
97 return CSV_ERROR_INVALID_PARAMETER;
101 parser->has_error = false;
102 parser->error_message[0] = '\0';
104 /* Initialize parser state */
105 parser->current_records = v_new();
106 if (!parser->current_records) {
107 strncpy(parser->error_message, "Memory allocation failed", sizeof(parser->error_message) - 1);
108 parser->has_error = true;
109 return CSV_ERROR_MEMORY;
112 /* Set destructor for automatic cleanup */
113 v_dtor(parser->current_records, csv_record_destructor, NULL);
115 parser->parse_error = false;
116 parser->parse_error_message[0] = '\0';
117 parser->error_line = 0;
118 parser->error_column = 0;
120 /* Initialize the reentrant scanner */
122 int scanner_init_result = csvlex_init(&scanner);
123 if (scanner_init_result != 0) {
124 v_free(parser->current_records);
125 parser->current_records = NULL;
126 return CSV_ERROR_MEMORY;
129 /* Set up the lexer input */
130 csvset_in(file, scanner);
132 /* Parse the input */
133 int parse_result = csvparse(parser, scanner);
135 /* Clean up lexer state */
136 csvlex_destroy(scanner);
138 /* Check for errors */
139 if (parse_result != 0 || parser->parse_error) {
140 csv_error_t error_code;
142 switch (parse_result) {
144 /* Memory exhaustion in parser */
145 strncpy(parser->error_message, "Parser memory exhaustion", sizeof(parser->error_message) - 1);
146 error_code = CSV_ERROR_MEMORY;
150 /* Parse error (syntax error or YYABORT) */
151 if (parser->parse_error_message[0] != '\0') {
152 strncpy(parser->error_message, parser->parse_error_message, sizeof(parser->error_message) - 1);
154 strncpy(parser->error_message, "Syntax error", sizeof(parser->error_message) - 1);
156 error_code = CSV_ERROR_PARSE;
160 /* Other non-zero return or parser->parse_error flag set */
161 if (parser->parse_error_message[0] != '\0') {
162 strncpy(parser->error_message, parser->parse_error_message, sizeof(parser->error_message) - 1);
164 strncpy(parser->error_message, "Parse error", sizeof(parser->error_message) - 1);
166 error_code = CSV_ERROR_PARSE;
170 parser->has_error = true;
171 v_free(parser->current_records);
172 parser->current_records = NULL;
176 /* Success - transfer ownership of records vector */
177 *records = parser->current_records;
178 parser->current_records = NULL;
183 csv_error_info_t csv_parser_get_error_info(csv_parser_t *parser)
185 csv_error_info_t info = {0};
188 info.message = "Invalid parser";
191 info.has_location = false;
195 if (parser->has_error) {
196 info.message = parser->error_message;
197 info.line = parser->error_line;
198 info.column = parser->error_column;
199 info.has_location = (parser->error_line > 0);
201 info.message = "No error";
204 info.has_location = false;
210 const char *csv_error_string(csv_error_t error)
215 case CSV_ERROR_MEMORY:
216 return "Memory allocation error";
217 case CSV_ERROR_PARSE:
218 return "Parse error";
219 case CSV_ERROR_INVALID_PARAMETER:
220 return "Invalid parameter";
224 return "Unknown error";
228 /* Internal helper functions */
230 static csv_record_t *create_record_from_vector(vector *field_vector)
236 csv_record_t *record = malloc(sizeof(csv_record_t));
241 /* Take ownership of the vector directly */
242 record->fields = field_vector;
247 static void csv_record_destructor(void *record_ptr, void *aux)
249 (void)aux; /* Unused parameter */
250 csv_record_t *record = (csv_record_t *)record_ptr;
252 if (record->fields) {
253 /* The fields vector already has derp_free as its destructor set in yacc,
254 so v_free will automatically clean up the strings */
255 v_free(record->fields);
261 /* Functions called by the Bison parser */
263 void csv_parser_add_record(void *parser_state, vector *fields)
265 csv_parser_t *parser = (csv_parser_t *)parser_state;
266 if (!parser || !parser->current_records || parser->parse_error) {
270 /* Skip empty records (single empty field) */
271 if (fields && v_length(fields) == 1) {
272 char *first_field = v_at(fields, 0);
273 if (first_field && first_field[0] == '\0') {
278 csv_record_t *record = create_record_from_vector(fields);
280 parser->parse_error = true;
281 strncpy(parser->parse_error_message, "Memory allocation failed while creating record", sizeof(parser->parse_error_message) - 1);
285 /* Add to records vector */
286 if (!v_append(parser->current_records, record)) {
287 parser->parse_error = true;
288 strncpy(parser->parse_error_message, "Failed to add record to vector", sizeof(parser->parse_error_message) - 1);
289 csv_record_destructor(record, NULL);
294 void csv_parser_set_error_with_location(void *parser_state, const char *error, int line, int column)
296 csv_parser_t *parser = (csv_parser_t *)parser_state;
301 parser->parse_error = true;
302 parser->error_line = line;
303 parser->error_column = column;
305 strncpy(parser->parse_error_message, error, sizeof(parser->parse_error_message) - 1);
306 parser->parse_error_message[sizeof(parser->parse_error_message) - 1] = '\0';