1 #include "csv_parser.h"
6 /* External declarations from Bison/Flex - updated for reentrant API */
7 typedef void *yyscan_t;
9 extern int csvlex_init(yyscan_t *scanner);
10 extern int csvlex_destroy(yyscan_t scanner);
11 extern void csvset_in(FILE *file, yyscan_t scanner);
12 extern int csvparse(void *parser_state, yyscan_t scanner);
14 /* Global parser state - used to communicate with Bison parser */
15 static csv_document_t *g_current_document = NULL;
16 static csv_parser_t *g_current_parser = NULL;
17 static bool g_parse_error = false;
18 static char g_error_message[256] = "";
19 static int g_error_line = 0;
20 static int g_error_column = 0;
22 /* CSV parser structure */
24 char error_message[256];
28 /* Helper functions */
29 static csv_field_t *convert_field_list(field_node_t *field_nodes);
30 static csv_record_t *create_record_from_fields(field_node_t *field_nodes);
31 static void free_document_internal(csv_document_t *doc);
33 /* Functions called by the Bison parser */
34 void csv_parser_add_record(field_node_t *fields);
35 void csv_parser_set_error_with_location(const char *error, int line, int column);
39 csv_parser_t *csv_parser_create(void)
41 csv_parser_t *parser = malloc(sizeof(csv_parser_t));
46 parser->error_message[0] = '\0';
47 parser->has_error = false;
52 void csv_parser_destroy(csv_parser_t *parser)
59 csv_error_t csv_parser_parse_string(csv_parser_t *parser, const char *input, csv_document_t **document)
61 if (!parser || !input || !document) {
62 return CSV_ERROR_INVALID_PARAM;
67 /* Create a temporary file from the string input */
68 FILE *temp_file = tmpfile();
70 strncpy(parser->error_message, "Failed to create temporary file", sizeof(parser->error_message) - 1);
71 parser->has_error = true;
75 /* Write input to temporary file */
76 if (fputs(input, temp_file) == EOF) {
78 strncpy(parser->error_message, "Failed to write to temporary file", sizeof(parser->error_message) - 1);
79 parser->has_error = true;
83 /* Reset file position to beginning */
86 /* Parse from the temporary file */
87 csv_error_t result = csv_parser_parse_file(parser, temp_file, document);
93 csv_error_t csv_parser_parse_file(csv_parser_t *parser, FILE *file, csv_document_t **document)
95 if (!parser || !file || !document) {
96 return CSV_ERROR_INVALID_PARAM;
100 parser->has_error = false;
101 parser->error_message[0] = '\0';
103 /* Initialize global state */
104 g_current_document = malloc(sizeof(csv_document_t));
105 if (!g_current_document) {
106 strncpy(parser->error_message, "Memory allocation failed", sizeof(parser->error_message) - 1);
107 parser->has_error = true;
108 return CSV_ERROR_MEMORY;
111 g_current_document->records = NULL;
112 g_current_document->record_count = 0;
113 g_current_document->header = NULL;
114 g_current_parser = parser;
115 g_parse_error = false;
116 g_error_message[0] = '\0';
120 /* Initialize the reentrant scanner */
122 int scanner_init_result = csvlex_init(&scanner);
123 if (scanner_init_result != 0) {
124 free(g_current_document);
125 g_current_document = NULL;
126 return CSV_ERROR_MEMORY;
129 /* Set up the lexer input */
130 csvset_in(file, scanner);
132 /* Parse the input */
133 int parse_result = csvparse(parser, scanner);
135 /* Clean up lexer state */
136 csvlex_destroy(scanner);
138 /* Check for errors */
139 if (parse_result != 0 || g_parse_error) {
140 csv_error_t error_code;
142 switch (parse_result) {
144 /* Memory exhaustion in parser */
145 strncpy(parser->error_message, "Parser memory exhaustion", sizeof(parser->error_message) - 1);
146 error_code = CSV_ERROR_MEMORY;
150 /* Parse error (syntax error or YYABORT) */
151 if (g_error_message[0] != '\0') {
152 if (g_error_line > 0) {
153 snprintf(parser->error_message, sizeof(parser->error_message) - 1,
154 "Line %d, column %d: %s", g_error_line, g_error_column, g_error_message);
156 strncpy(parser->error_message, g_error_message, sizeof(parser->error_message) - 1);
159 if (g_error_line > 0) {
160 snprintf(parser->error_message, sizeof(parser->error_message) - 1,
161 "Line %d, column %d: Syntax error", g_error_line, g_error_column);
163 strncpy(parser->error_message, "Syntax error", sizeof(parser->error_message) - 1);
166 error_code = CSV_ERROR_PARSE;
170 /* Other non-zero return or g_parse_error flag set */
171 if (g_error_message[0] != '\0') {
172 if (g_error_line > 0) {
173 snprintf(parser->error_message, sizeof(parser->error_message) - 1,
174 "Line %d, column %d: %s", g_error_line, g_error_column, g_error_message);
176 strncpy(parser->error_message, g_error_message, sizeof(parser->error_message) - 1);
179 if (g_error_line > 0) {
180 snprintf(parser->error_message, sizeof(parser->error_message) - 1,
181 "Line %d, column %d: Parse error", g_error_line, g_error_column);
183 strncpy(parser->error_message, "Parse error", sizeof(parser->error_message) - 1);
186 error_code = CSV_ERROR_PARSE;
190 parser->has_error = true;
191 free_document_internal(g_current_document);
192 g_current_document = NULL;
193 g_current_parser = NULL;
197 /* Success - transfer ownership of document */
198 *document = g_current_document;
199 g_current_document = NULL;
200 g_current_parser = NULL;
205 const char *csv_parser_get_error(csv_parser_t *parser)
208 return "Invalid parser";
211 return parser->has_error ? parser->error_message : "No error";
214 void csv_document_free(csv_document_t *document)
216 free_document_internal(document);
219 const char *csv_error_string(csv_error_t error)
224 case CSV_ERROR_MEMORY:
225 return "Memory allocation error";
226 case CSV_ERROR_PARSE:
227 return "Parse error";
228 case CSV_ERROR_INVALID_PARAM:
229 return "Invalid parameter";
233 return "Unknown error";
237 /* Data access helper functions */
239 bool csv_document_has_header(const csv_document_t *document)
241 return document && document->header != NULL;
244 csv_record_t *csv_document_get_first_record(const csv_document_t *document)
246 return document ? document->records : NULL;
249 csv_record_t *csv_record_get_next(const csv_record_t *record)
251 return record ? record->next : NULL;
254 csv_field_t *csv_record_get_first_field(const csv_record_t *record)
256 return record ? record->fields : NULL;
259 csv_field_t *csv_field_get_next(const csv_field_t *field)
261 return field ? field->next : NULL;
264 const char *csv_field_get_content(const csv_field_t *field)
266 return field ? field->content : NULL;
269 /* Internal helper functions */
271 static csv_field_t *convert_field_list(field_node_t *field_nodes)
277 csv_field_t *first_field = NULL;
278 csv_field_t *last_field = NULL;
280 field_node_t *current_node = field_nodes;
281 while (current_node) {
282 csv_field_t *field = malloc(sizeof(csv_field_t));
284 /* Clean up already allocated fields */
285 while (first_field) {
286 csv_field_t *next = first_field->next;
287 free(first_field->content);
294 /* Copy content (take ownership) */
295 field->content = current_node->content;
296 current_node->content = NULL; /* Transfer ownership */
303 last_field->next = field;
307 current_node = current_node->next;
313 static csv_record_t *create_record_from_fields(field_node_t *field_nodes)
315 csv_record_t *record = malloc(sizeof(csv_record_t));
320 record->fields = convert_field_list(field_nodes);
324 record->field_count = 0;
325 csv_field_t *current = record->fields;
327 record->field_count++;
328 current = current->next;
334 static void free_document_internal(csv_document_t *doc)
342 csv_field_t *field = doc->header->fields;
344 csv_field_t *next = field->next;
345 free(field->content);
353 csv_record_t *record = doc->records;
355 csv_record_t *next_record = record->next;
357 csv_field_t *field = record->fields;
359 csv_field_t *next_field = field->next;
360 free(field->content);
366 record = next_record;
372 /* Functions called by the Bison parser */
374 void csv_parser_add_record(field_node_t *fields)
376 if (!g_current_document || g_parse_error) {
380 /* Skip empty records (single empty field) */
381 if (fields && !fields->next && fields->content && fields->content[0] == '\0') {
385 csv_record_t *record = create_record_from_fields(fields);
387 g_parse_error = true;
388 strncpy(g_error_message, "Memory allocation failed while creating record", sizeof(g_error_message) - 1);
392 /* Add to document */
393 if (!g_current_document->records) {
394 g_current_document->records = record;
396 /* Find the last record and append */
397 csv_record_t *last = g_current_document->records;
404 g_current_document->record_count++;
407 void csv_parser_set_error_with_location(const char *error, int line, int column)
409 g_parse_error = true;
411 g_error_column = column;
413 strncpy(g_error_message, error, sizeof(g_error_message) - 1);
414 g_error_message[sizeof(g_error_message) - 1] = '\0';
418 csv_error_t csv_document_set_first_record_as_header(csv_document_t *document)
420 if (!document || !document->records) {
421 return CSV_ERROR_INVALID_PARAM;
424 /* Move first record to header */
425 document->header = document->records;
426 document->records = document->records->next;
427 document->header->next = NULL;
428 document->record_count--;