1 #include "csv_parser.h"
6 /* External declarations from Bison/Flex - updated for reentrant API */
7 typedef void *yyscan_t;
9 extern int csvlex_init(yyscan_t *scanner);
10 extern int csvlex_destroy(yyscan_t scanner);
11 extern void csvset_in(FILE *file, yyscan_t scanner);
12 extern int csvparse(void *parser_state, yyscan_t scanner);
14 /* CSV parser structure - now contains all the state that was previously global */
16 char error_message[256];
18 csv_document_t *current_document;
20 char parse_error_message[256];
25 /* Helper functions */
26 static csv_field_t *convert_field_list(field_node_t *field_nodes);
27 static csv_record_t *create_record_from_fields(field_node_t *field_nodes);
28 static void free_document_internal(csv_document_t *doc);
30 /* Functions called by the Bison parser */
31 void csv_parser_add_record(void *parser_state, field_node_t *fields);
32 void csv_parser_set_error_with_location(void *parser_state, const char *error, int line, int column);
36 csv_parser_t *csv_parser_create(void)
38 csv_parser_t *parser = malloc(sizeof(csv_parser_t));
43 parser->error_message[0] = '\0';
44 parser->has_error = false;
45 parser->current_document = NULL;
46 parser->parse_error = false;
47 parser->parse_error_message[0] = '\0';
48 parser->error_line = 0;
49 parser->error_column = 0;
54 void csv_parser_destroy(csv_parser_t *parser)
61 csv_error_t csv_parser_parse_string(csv_parser_t *parser, const char *input, csv_document_t **document)
63 if (!parser || !input || !document) {
64 return CSV_ERROR_INVALID_PARAM;
69 /* Create a temporary file from the string input */
70 FILE *temp_file = tmpfile();
72 strncpy(parser->error_message, "Failed to create temporary file", sizeof(parser->error_message) - 1);
73 parser->has_error = true;
77 /* Write input to temporary file */
78 if (fputs(input, temp_file) == EOF) {
80 strncpy(parser->error_message, "Failed to write to temporary file", sizeof(parser->error_message) - 1);
81 parser->has_error = true;
85 /* Reset file position to beginning */
88 /* Parse from the temporary file */
89 csv_error_t result = csv_parser_parse_file(parser, temp_file, document);
95 csv_error_t csv_parser_parse_file(csv_parser_t *parser, FILE *file, csv_document_t **document)
97 if (!parser || !file || !document) {
98 return CSV_ERROR_INVALID_PARAM;
102 parser->has_error = false;
103 parser->error_message[0] = '\0';
105 /* Initialize parser state */
106 parser->current_document = malloc(sizeof(csv_document_t));
107 if (!parser->current_document) {
108 strncpy(parser->error_message, "Memory allocation failed", sizeof(parser->error_message) - 1);
109 parser->has_error = true;
110 return CSV_ERROR_MEMORY;
113 parser->current_document->records = NULL;
114 parser->current_document->record_count = 0;
115 parser->current_document->header = NULL;
116 parser->parse_error = false;
117 parser->parse_error_message[0] = '\0';
118 parser->error_line = 0;
119 parser->error_column = 0;
121 /* Initialize the reentrant scanner */
123 int scanner_init_result = csvlex_init(&scanner);
124 if (scanner_init_result != 0) {
125 free(parser->current_document);
126 parser->current_document = NULL;
127 return CSV_ERROR_MEMORY;
130 /* Set up the lexer input */
131 csvset_in(file, scanner);
133 /* Parse the input */
134 int parse_result = csvparse(parser, scanner);
136 /* Clean up lexer state */
137 csvlex_destroy(scanner);
139 /* Check for errors */
140 if (parse_result != 0 || parser->parse_error) {
141 csv_error_t error_code;
143 switch (parse_result) {
145 /* Memory exhaustion in parser */
146 strncpy(parser->error_message, "Parser memory exhaustion", sizeof(parser->error_message) - 1);
147 error_code = CSV_ERROR_MEMORY;
151 /* Parse error (syntax error or YYABORT) */
152 if (parser->parse_error_message[0] != '\0') {
153 if (parser->error_line > 0) {
154 snprintf(parser->error_message, sizeof(parser->error_message) - 1,
155 "Line %d, column %d: %s", parser->error_line, parser->error_column, parser->parse_error_message);
157 strncpy(parser->error_message, parser->parse_error_message, sizeof(parser->error_message) - 1);
160 if (parser->error_line > 0) {
161 snprintf(parser->error_message, sizeof(parser->error_message) - 1,
162 "Line %d, column %d: Syntax error", parser->error_line, parser->error_column);
164 strncpy(parser->error_message, "Syntax error", sizeof(parser->error_message) - 1);
167 error_code = CSV_ERROR_PARSE;
171 /* Other non-zero return or parser->parse_error flag set */
172 if (parser->parse_error_message[0] != '\0') {
173 if (parser->error_line > 0) {
174 snprintf(parser->error_message, sizeof(parser->error_message) - 1,
175 "Line %d, column %d: %s", parser->error_line, parser->error_column, parser->parse_error_message);
177 strncpy(parser->error_message, parser->parse_error_message, sizeof(parser->error_message) - 1);
180 if (parser->error_line > 0) {
181 snprintf(parser->error_message, sizeof(parser->error_message) - 1,
182 "Line %d, column %d: Parse error", parser->error_line, parser->error_column);
184 strncpy(parser->error_message, "Parse error", sizeof(parser->error_message) - 1);
187 error_code = CSV_ERROR_PARSE;
191 parser->has_error = true;
192 free_document_internal(parser->current_document);
193 parser->current_document = NULL;
197 /* Success - transfer ownership of document */
198 *document = parser->current_document;
199 parser->current_document = NULL;
204 const char *csv_parser_get_error(csv_parser_t *parser)
207 return "Invalid parser";
210 return parser->has_error ? parser->error_message : "No error";
213 void csv_document_free(csv_document_t *document)
215 free_document_internal(document);
218 const char *csv_error_string(csv_error_t error)
223 case CSV_ERROR_MEMORY:
224 return "Memory allocation error";
225 case CSV_ERROR_PARSE:
226 return "Parse error";
227 case CSV_ERROR_INVALID_PARAM:
228 return "Invalid parameter";
232 return "Unknown error";
236 /* Data access helper functions */
238 bool csv_document_has_header(const csv_document_t *document)
240 return document && document->header != NULL;
243 csv_record_t *csv_document_get_first_record(const csv_document_t *document)
245 return document ? document->records : NULL;
248 csv_record_t *csv_record_get_next(const csv_record_t *record)
250 return record ? record->next : NULL;
253 csv_field_t *csv_record_get_first_field(const csv_record_t *record)
255 return record ? record->fields : NULL;
258 csv_field_t *csv_field_get_next(const csv_field_t *field)
260 return field ? field->next : NULL;
263 const char *csv_field_get_content(const csv_field_t *field)
265 return field ? field->content : NULL;
268 /* Internal helper functions */
270 static csv_field_t *convert_field_list(field_node_t *field_nodes)
276 csv_field_t *first_field = NULL;
277 csv_field_t *last_field = NULL;
279 field_node_t *current_node = field_nodes;
280 while (current_node) {
281 csv_field_t *field = malloc(sizeof(csv_field_t));
283 /* Clean up already allocated fields */
284 while (first_field) {
285 csv_field_t *next = first_field->next;
286 free(first_field->content);
293 /* Copy content (take ownership) */
294 field->content = current_node->content;
295 current_node->content = NULL; /* Transfer ownership */
302 last_field->next = field;
306 current_node = current_node->next;
312 static csv_record_t *create_record_from_fields(field_node_t *field_nodes)
314 csv_record_t *record = malloc(sizeof(csv_record_t));
319 record->fields = convert_field_list(field_nodes);
323 record->field_count = 0;
324 csv_field_t *current = record->fields;
326 record->field_count++;
327 current = current->next;
333 static void free_document_internal(csv_document_t *doc)
341 csv_field_t *field = doc->header->fields;
343 csv_field_t *next = field->next;
344 free(field->content);
352 csv_record_t *record = doc->records;
354 csv_record_t *next_record = record->next;
356 csv_field_t *field = record->fields;
358 csv_field_t *next_field = field->next;
359 free(field->content);
365 record = next_record;
371 /* Functions called by the Bison parser */
373 void csv_parser_add_record(void *parser_state, field_node_t *fields)
375 csv_parser_t *parser = (csv_parser_t *)parser_state;
376 if (!parser || !parser->current_document || parser->parse_error) {
380 /* Skip empty records (single empty field) */
381 if (fields && !fields->next && fields->content && fields->content[0] == '\0') {
385 csv_record_t *record = create_record_from_fields(fields);
387 parser->parse_error = true;
388 strncpy(parser->parse_error_message, "Memory allocation failed while creating record", sizeof(parser->parse_error_message) - 1);
392 /* Add to document */
393 if (!parser->current_document->records) {
394 parser->current_document->records = record;
396 /* Find the last record and append */
397 csv_record_t *last = parser->current_document->records;
404 parser->current_document->record_count++;
407 void csv_parser_set_error_with_location(void *parser_state, const char *error, int line, int column)
409 csv_parser_t *parser = (csv_parser_t *)parser_state;
414 parser->parse_error = true;
415 parser->error_line = line;
416 parser->error_column = column;
418 strncpy(parser->parse_error_message, error, sizeof(parser->parse_error_message) - 1);
419 parser->parse_error_message[sizeof(parser->parse_error_message) - 1] = '\0';
423 csv_error_t csv_document_set_first_record_as_header(csv_document_t *document)
425 if (!document || !document->records) {
426 return CSV_ERROR_INVALID_PARAM;
429 /* Move first record to header */
430 document->header = document->records;
431 document->records = document->records->next;
432 document->header->next = NULL;
433 document->record_count--;