1 #include "csv_parser.h"
6 /* External declarations from Bison/Flex - updated for reentrant API */
7 typedef void *yyscan_t;
9 extern int csvlex_init(yyscan_t *scanner);
10 extern int csvlex_destroy(yyscan_t scanner);
11 extern void csvset_in(FILE *file, yyscan_t scanner);
12 extern int csvparse(void *parser_state, yyscan_t scanner);
14 /* CSV parser structure - now contains all the state that was previously global */
16 char error_message[256];
18 vector *current_records; /* Vector of csv_record_t* */
20 char parse_error_message[256];
23 int expected_field_count; /* Field count from first record (-1 if not set) */
24 int current_record_number; /* Track record number for error reporting */
27 /* Helper functions */
28 static csv_record_t *create_record_from_vector(vector *field_vector);
29 static void csv_record_destructor(void *record_ptr, void *aux);
31 /* Functions called by the Bison parser */
32 void csv_parser_add_record(void *parser_state, vector *fields);
33 void csv_parser_set_error_with_location(void *parser_state, const char *error, int line, int column);
37 csv_parser_t *csv_parser_create(void)
39 csv_parser_t *parser = malloc(sizeof(csv_parser_t));
44 parser->error_message[0] = '\0';
45 parser->has_error = false;
46 parser->current_records = NULL;
47 parser->parse_error = false;
48 parser->parse_error_message[0] = '\0';
49 parser->error_line = 0;
50 parser->error_column = 0;
51 parser->expected_field_count = -1;
52 parser->current_record_number = 0;
57 void csv_parser_destroy(csv_parser_t *parser)
64 csv_error_t csv_parser_parse_string(csv_parser_t *parser, const char *input, vector **records)
66 if (!parser || !input || !records) {
67 return CSV_ERROR_INVALID_PARAMETER;
72 /* Create a temporary file from the string input */
73 FILE *temp_file = tmpfile();
75 strncpy(parser->error_message, "Failed to create temporary file", sizeof(parser->error_message) - 1);
76 parser->has_error = true;
80 /* Write input to temporary file */
81 if (fputs(input, temp_file) == EOF) {
83 strncpy(parser->error_message, "Failed to write to temporary file", sizeof(parser->error_message) - 1);
84 parser->has_error = true;
88 /* Reset file position to beginning */
91 /* Parse from the temporary file */
92 csv_error_t result = csv_parser_parse_file(parser, temp_file, records);
98 csv_error_t csv_parser_parse_file(csv_parser_t *parser, FILE *file, vector **records)
100 if (!parser || !file || !records) {
101 return CSV_ERROR_INVALID_PARAMETER;
105 parser->has_error = false;
106 parser->error_message[0] = '\0';
108 /* Initialize parser state */
109 parser->current_records = v_new();
110 if (!parser->current_records) {
111 strncpy(parser->error_message, "Memory allocation failed", sizeof(parser->error_message) - 1);
112 parser->has_error = true;
113 return CSV_ERROR_MEMORY;
116 /* Set destructor for automatic cleanup */
117 v_dtor(parser->current_records, csv_record_destructor, NULL);
119 parser->parse_error = false;
120 parser->parse_error_message[0] = '\0';
121 parser->error_line = 0;
122 parser->error_column = 0;
123 parser->expected_field_count = -1;
124 parser->current_record_number = 0;
126 /* Initialize the reentrant scanner */
128 int scanner_init_result = csvlex_init(&scanner);
129 if (scanner_init_result != 0) {
130 v_free(parser->current_records);
131 parser->current_records = NULL;
132 return CSV_ERROR_MEMORY;
135 /* Set up the lexer input */
136 csvset_in(file, scanner);
138 /* Parse the input */
139 int parse_result = csvparse(parser, scanner);
141 /* Clean up lexer state */
142 csvlex_destroy(scanner);
144 /* Check for errors */
145 if (parse_result != 0 || parser->parse_error) {
146 csv_error_t error_code;
148 switch (parse_result) {
150 /* Memory exhaustion in parser */
151 strncpy(parser->error_message, "Parser memory exhaustion", sizeof(parser->error_message) - 1);
152 error_code = CSV_ERROR_MEMORY;
156 /* Parse error (syntax error or YYABORT) */
157 if (parser->parse_error_message[0] != '\0') {
158 strncpy(parser->error_message, parser->parse_error_message, sizeof(parser->error_message) - 1);
160 strncpy(parser->error_message, "Syntax error", sizeof(parser->error_message) - 1);
162 error_code = CSV_ERROR_PARSE;
166 /* Other non-zero return or parser->parse_error flag set */
167 if (parser->parse_error_message[0] != '\0') {
168 strncpy(parser->error_message, parser->parse_error_message, sizeof(parser->error_message) - 1);
170 strncpy(parser->error_message, "Parse error", sizeof(parser->error_message) - 1);
172 error_code = CSV_ERROR_PARSE;
176 parser->has_error = true;
177 v_free(parser->current_records);
178 parser->current_records = NULL;
182 /* Success - transfer ownership of records vector */
183 *records = parser->current_records;
184 parser->current_records = NULL;
189 csv_error_info_t csv_parser_get_error_info(csv_parser_t *parser)
191 csv_error_info_t info = {0};
194 info.message = "Invalid parser";
197 info.has_location = false;
201 if (parser->has_error) {
202 info.message = parser->error_message;
203 info.line = parser->error_line;
204 info.column = parser->error_column;
205 info.has_location = (parser->error_line > 0);
207 info.message = "No error";
210 info.has_location = false;
216 const char *csv_error_string(csv_error_t error)
221 case CSV_ERROR_MEMORY:
222 return "Memory allocation error";
223 case CSV_ERROR_PARSE:
224 return "Parse error";
225 case CSV_ERROR_INVALID_PARAMETER:
226 return "Invalid parameter";
230 return "Unknown error";
234 /* Internal helper functions */
236 static csv_record_t *create_record_from_vector(vector *field_vector)
242 csv_record_t *record = malloc(sizeof(csv_record_t));
247 /* Take ownership of the vector directly */
248 record->fields = field_vector;
253 static void csv_record_destructor(void *record_ptr, void *aux)
255 (void)aux; /* Unused parameter */
256 csv_record_t *record = (csv_record_t *)record_ptr;
258 if (record->fields) {
259 /* The fields vector already has derp_free as its destructor set in yacc,
260 so v_free will automatically clean up the strings */
261 v_free(record->fields);
267 /* Functions called by the Bison parser */
269 void csv_parser_add_record(void *parser_state, vector *fields)
271 csv_parser_t *parser = (csv_parser_t *)parser_state;
272 if (!parser || !parser->current_records || parser->parse_error) {
276 /* Skip empty records (single empty field) */
277 if (fields && v_length(fields) == 1) {
278 char *first_field = v_at(fields, 0);
279 if (first_field && first_field[0] == '\0') {
284 parser->current_record_number++;
285 int field_count = v_length(fields);
287 /* Set expected field count from first record */
288 if (parser->expected_field_count == -1) {
289 parser->expected_field_count = field_count;
290 } else if (field_count != parser->expected_field_count) {
291 /* Field count mismatch - RFC 4180 violation */
292 parser->parse_error = true;
293 snprintf(parser->parse_error_message, sizeof(parser->parse_error_message) - 1,
294 "Field count mismatch: record %d has %d fields, expected %d",
295 parser->current_record_number, field_count, parser->expected_field_count);
299 csv_record_t *record = create_record_from_vector(fields);
301 parser->parse_error = true;
302 strncpy(parser->parse_error_message, "Memory allocation failed while creating record", sizeof(parser->parse_error_message) - 1);
306 /* Add to records vector */
307 if (!v_append(parser->current_records, record)) {
308 parser->parse_error = true;
309 strncpy(parser->parse_error_message, "Failed to add record to vector", sizeof(parser->parse_error_message) - 1);
310 csv_record_destructor(record, NULL);
315 void csv_parser_set_error_with_location(void *parser_state, const char *error, int line, int column)
317 csv_parser_t *parser = (csv_parser_t *)parser_state;
322 parser->parse_error = true;
323 parser->error_line = line;
324 parser->error_column = column;
326 strncpy(parser->parse_error_message, error, sizeof(parser->parse_error_message) - 1);
327 parser->parse_error_message[sizeof(parser->parse_error_message) - 1] = '\0';