1 #include "csv_parser.h"
6 /* External declarations from Bison/Flex - updated for reentrant API */
7 typedef void *yyscan_t;
8 extern int csvlex_init(yyscan_t *scanner);
9 extern int csvlex_destroy(yyscan_t scanner);
10 extern void csvset_in(FILE *file, yyscan_t scanner);
11 extern int csvparse(void *parser_state, yyscan_t scanner);
13 /* Global parser state - used to communicate with Bison parser */
14 static csv_document_t *g_current_document = NULL;
15 static csv_parser_t *g_current_parser = NULL;
16 static bool g_parse_error = false;
17 static char g_error_message[256] = "";
19 /* CSV parser structure */
21 char error_message[256];
25 /* Helper functions */
26 static csv_field_t *convert_field_list(field_node_t *field_nodes);
27 static csv_record_t *create_record_from_fields(field_node_t *field_nodes);
28 static void free_document_internal(csv_document_t *doc);
30 /* Functions called by the Bison parser */
31 void csv_parser_add_record(field_node_t *fields);
32 void csv_parser_set_error(const char *error);
36 csv_parser_t *csv_parser_create(void)
38 csv_parser_t *parser = malloc(sizeof(csv_parser_t));
43 parser->error_message[0] = '\0';
44 parser->has_error = false;
49 void csv_parser_destroy(csv_parser_t *parser)
56 csv_error_t csv_parser_parse_string(csv_parser_t *parser, const char *input, csv_document_t **document)
58 if (!parser || !input || !document) {
59 return CSV_ERROR_INVALID_PARAM;
64 /* Create a temporary file from the string input */
65 FILE *temp_file = tmpfile();
67 strncpy(parser->error_message, "Failed to create temporary file", sizeof(parser->error_message) - 1);
68 parser->has_error = true;
72 /* Write input to temporary file */
73 if (fputs(input, temp_file) == EOF) {
75 strncpy(parser->error_message, "Failed to write to temporary file", sizeof(parser->error_message) - 1);
76 parser->has_error = true;
80 /* Reset file position to beginning */
83 /* Parse from the temporary file */
84 csv_error_t result = csv_parser_parse_file(parser, temp_file, document);
90 csv_error_t csv_parser_parse_file(csv_parser_t *parser, FILE *file, csv_document_t **document)
92 if (!parser || !file || !document) {
93 return CSV_ERROR_INVALID_PARAM;
97 parser->has_error = false;
98 parser->error_message[0] = '\0';
100 /* Initialize global state */
101 g_current_document = malloc(sizeof(csv_document_t));
102 if (!g_current_document) {
103 strncpy(parser->error_message, "Memory allocation failed", sizeof(parser->error_message) - 1);
104 parser->has_error = true;
105 return CSV_ERROR_MEMORY;
108 g_current_document->records = NULL;
109 g_current_document->record_count = 0;
110 g_current_document->header = NULL;
111 g_current_parser = parser;
112 g_parse_error = false;
113 g_error_message[0] = '\0';
115 /* Initialize the reentrant scanner */
117 int scanner_init_result = csvlex_init(&scanner);
118 if (scanner_init_result != 0) {
119 free(g_current_document);
120 g_current_document = NULL;
121 return CSV_ERROR_MEMORY;
124 /* Set up the lexer input */
125 csvset_in(file, scanner);
127 /* Parse the input */
128 int parse_result = csvparse(parser, scanner);
130 /* Clean up lexer state */
131 csvlex_destroy(scanner);
133 /* Check for errors */
134 if (parse_result != 0 || g_parse_error) {
135 csv_error_t error_code;
137 switch (parse_result) {
139 /* Memory exhaustion in parser */
140 strncpy(parser->error_message, "Parser memory exhaustion", sizeof(parser->error_message) - 1);
141 error_code = CSV_ERROR_MEMORY;
145 /* Parse error (syntax error or YYABORT) */
146 if (g_error_message[0] != '\0') {
147 strncpy(parser->error_message, g_error_message, sizeof(parser->error_message) - 1);
149 strncpy(parser->error_message, "Syntax error", sizeof(parser->error_message) - 1);
151 error_code = CSV_ERROR_PARSE;
155 /* Other non-zero return or g_parse_error flag set */
156 if (g_error_message[0] != '\0') {
157 strncpy(parser->error_message, g_error_message, sizeof(parser->error_message) - 1);
159 strncpy(parser->error_message, "Parse error", sizeof(parser->error_message) - 1);
161 error_code = CSV_ERROR_PARSE;
165 parser->has_error = true;
166 free_document_internal(g_current_document);
167 g_current_document = NULL;
168 g_current_parser = NULL;
172 /* Success - transfer ownership of document */
173 *document = g_current_document;
174 g_current_document = NULL;
175 g_current_parser = NULL;
180 const char *csv_parser_get_error(csv_parser_t *parser)
183 return "Invalid parser";
186 return parser->has_error ? parser->error_message : "No error";
189 void csv_document_free(csv_document_t *document)
191 free_document_internal(document);
194 const char *csv_error_string(csv_error_t error)
199 case CSV_ERROR_MEMORY:
200 return "Memory allocation error";
201 case CSV_ERROR_PARSE:
202 return "Parse error";
203 case CSV_ERROR_INVALID_PARAM:
204 return "Invalid parameter";
208 return "Unknown error";
212 /* Data access helper functions */
214 bool csv_document_has_header(const csv_document_t *document)
216 return document && document->header != NULL;
219 csv_record_t *csv_document_get_first_record(const csv_document_t *document)
221 return document ? document->records : NULL;
224 csv_record_t *csv_record_get_next(const csv_record_t *record)
226 return record ? record->next : NULL;
229 csv_field_t *csv_record_get_first_field(const csv_record_t *record)
231 return record ? record->fields : NULL;
234 csv_field_t *csv_field_get_next(const csv_field_t *field)
236 return field ? field->next : NULL;
239 const char *csv_field_get_content(const csv_field_t *field)
241 return field ? field->content : NULL;
244 /* Internal helper functions */
246 static csv_field_t *convert_field_list(field_node_t *field_nodes)
252 csv_field_t *first_field = NULL;
253 csv_field_t *last_field = NULL;
255 field_node_t *current_node = field_nodes;
256 while (current_node) {
257 csv_field_t *field = malloc(sizeof(csv_field_t));
259 /* Clean up already allocated fields */
260 while (first_field) {
261 csv_field_t *next = first_field->next;
262 free(first_field->content);
269 /* Copy content (take ownership) */
270 field->content = current_node->content;
271 current_node->content = NULL; /* Transfer ownership */
278 last_field->next = field;
282 current_node = current_node->next;
288 static csv_record_t *create_record_from_fields(field_node_t *field_nodes)
290 csv_record_t *record = malloc(sizeof(csv_record_t));
295 record->fields = convert_field_list(field_nodes);
299 record->field_count = 0;
300 csv_field_t *current = record->fields;
302 record->field_count++;
303 current = current->next;
309 static void free_document_internal(csv_document_t *doc)
317 csv_field_t *field = doc->header->fields;
319 csv_field_t *next = field->next;
320 free(field->content);
328 csv_record_t *record = doc->records;
330 csv_record_t *next_record = record->next;
332 csv_field_t *field = record->fields;
334 csv_field_t *next_field = field->next;
335 free(field->content);
341 record = next_record;
347 /* Functions called by the Bison parser */
349 void csv_parser_add_record(field_node_t *fields)
351 if (!g_current_document || g_parse_error) {
355 /* Skip empty records (single empty field) */
356 if (fields && !fields->next && fields->content && fields->content[0] == '\0') {
360 csv_record_t *record = create_record_from_fields(fields);
362 g_parse_error = true;
363 strncpy(g_error_message, "Memory allocation failed while creating record", sizeof(g_error_message) - 1);
367 /* Add to document */
368 if (!g_current_document->records) {
369 g_current_document->records = record;
371 /* Find the last record and append */
372 csv_record_t *last = g_current_document->records;
379 g_current_document->record_count++;
382 void csv_parser_set_error(const char *error)
384 g_parse_error = true;
386 strncpy(g_error_message, error, sizeof(g_error_message) - 1);
387 g_error_message[sizeof(g_error_message) - 1] = '\0';
391 csv_error_t csv_document_set_first_record_as_header(csv_document_t *document)
393 if (!document || !document->records) {
394 return CSV_ERROR_INVALID_PARAM;
397 /* Move first record to header */
398 document->header = document->records;
399 document->records = document->records->next;
400 document->header->next = NULL;
401 document->record_count--;