]> begriffs open source - sa-parse/blob - src/csv.l
Basics CSV parser and test cases
[sa-parse] / src / csv.l
1 %{
2 #include <stdio.h>
3 #include <string.h>
4 #include "y.tab.h"
5
6 /* Buffer for building field content */
7 static char field_buffer[8192];
8 static int field_pos = 0;
9
10 void reset_field_buffer() {
11     field_pos = 0;
12     field_buffer[0] = '\0';
13 }
14
15 void append_to_field(char c) {
16     if (field_pos < sizeof(field_buffer) - 1) {  /* Reserve space for null terminator */
17         field_buffer[field_pos++] = c;
18         field_buffer[field_pos] = '\0';
19     }
20 }
21
22 void append_string_to_field(const char* s) {
23     while (*s && field_pos < sizeof(field_buffer) - 1) {  /* Reserve space for null terminator */
24         field_buffer[field_pos++] = *s++;
25     }
26     field_buffer[field_pos] = '\0';
27 }
28
29 char* get_field_content() {
30     char* result = strdup(field_buffer);  /* NOTE: Caller must free() this memory */
31     if (!result) {
32         fprintf(stderr, "Memory allocation failed in get_field_content()\n");
33         exit(1);  /* Conservative approach: exit on memory failure */
34     }
35     return result;
36 }
37 %}
38
39 %option noyywrap
40 %option yylineno
41
42 %x ESCAPED_FIELD
43
44 TEXTDATA    [\x20-\x21\x23-\x2B\x2D-\x7E]
45 COMMA       \x2C
46 CR          \x0D
47 LF          \x0A
48 DQUOTE      \x22
49 CRLF        {CR}{LF}
50
51 %%
52
53 {COMMA}                 { return COMMA_TOK; }
54
55 {CRLF}                  { return CRLF_TOK; }
56
57 {LF}                    { return CRLF_TOK; }
58
59 {DQUOTE}                { 
60                           reset_field_buffer(); 
61                           BEGIN(ESCAPED_FIELD); 
62                         }
63
64 <ESCAPED_FIELD>{DQUOTE}{DQUOTE}  { append_to_field('"'); }
65
66 <ESCAPED_FIELD>{DQUOTE}          { 
67                                    yylval.str = get_field_content();
68                                    BEGIN(INITIAL);
69                                    return FIELD_TOK;
70                                  }
71
72 <ESCAPED_FIELD>{TEXTDATA}        { append_to_field(yytext[0]); }
73
74 <ESCAPED_FIELD>{COMMA}           { append_to_field(','); }
75
76 <ESCAPED_FIELD>{CR}              { append_to_field('\r'); }
77
78 <ESCAPED_FIELD>{LF}              { append_to_field('\n'); }
79
80 <ESCAPED_FIELD>.                 { append_to_field(yytext[0]); }
81
82 <ESCAPED_FIELD><<EOF>>           {
83                                    /* Handle unterminated quoted field */
84                                    yylval.str = get_field_content();
85                                    BEGIN(INITIAL);
86                                    return FIELD_TOK;
87                                  }
88
89 {TEXTDATA}+             { 
90                           yylval.str = strdup(yytext);  /* NOTE: Caller must free() this memory */
91                           if (!yylval.str) {
92                               fprintf(stderr, "Memory allocation failed in TEXTDATA rule\n");
93                               exit(1);  /* Conservative approach: exit on memory failure */
94                           }
95                           return FIELD_TOK; 
96                         }
97
98 [ \t]                   { /* ignore whitespace outside of fields */ }
99
100 .                       { return yytext[0]; }
101
102 %%