]> begriffs open source - sa-parse/blob - src/csv.l
Switch to Meson build system
[sa-parse] / src / csv.l
1 %{
2 #include <stdio.h>
3 #include <string.h>
4 #include <stdlib.h>
5 #include <unistd.h>
6 #include "csv.tab.h"
7
8 /* Buffer for building field content */
9 static char field_buffer[8192];
10 static size_t field_pos = 0;
11
12 void reset_field_buffer() {
13     field_pos = 0;
14     field_buffer[0] = '\0';
15 }
16
17 void append_to_field(char c) {
18     if (field_pos < sizeof(field_buffer) - 1) {  /* Reserve space for null terminator */
19         field_buffer[field_pos++] = c;
20         field_buffer[field_pos] = '\0';
21     }
22 }
23
24 void append_string_to_field(const char* s) {
25     while (*s && field_pos < sizeof(field_buffer) - 1) {  /* Reserve space for null terminator */
26         field_buffer[field_pos++] = *s++;
27     }
28     field_buffer[field_pos] = '\0';
29 }
30
31 char* get_field_content() {
32     char* result = strdup(field_buffer);  /* NOTE: Caller must free() this memory */
33     if (!result) {
34         fprintf(stderr, "Memory allocation failed in get_field_content()\n");
35         exit(1);  /* Conservative approach: exit on memory failure */
36     }
37     return result;
38 }
39 %}
40
41 %option noyywrap
42 %option yylineno
43 %option nounput
44 %option noinput
45
46 %x ESCAPED_FIELD
47
48 TEXTDATA    [\x20-\x21\x23-\x2B\x2D-\x7E]
49 COMMA       \x2C
50 CR          \x0D
51 LF          \x0A
52 DQUOTE      \x22
53 CRLF        {CR}{LF}
54
55 %%
56
57 {COMMA}                 { 
58                           /* Check if we need to emit an empty field before the comma */
59                           /* This will be handled by the parser grammar instead */
60                           return COMMA_TOK; 
61                         }
62
63 {CRLF}                  { return CRLF_TOK; }
64
65 {LF}                    { return CRLF_TOK; }
66
67 {DQUOTE}                { 
68                           reset_field_buffer(); 
69                           BEGIN(ESCAPED_FIELD); 
70                         }
71
72 <ESCAPED_FIELD>{DQUOTE}{DQUOTE}  { append_to_field('"'); }
73
74 <ESCAPED_FIELD>{DQUOTE}          { 
75                                    yylval.str = get_field_content();
76                                    BEGIN(INITIAL);
77                                    return FIELD_TOK;
78                                  }
79
80 <ESCAPED_FIELD>{TEXTDATA}        { append_to_field(yytext[0]); }
81
82 <ESCAPED_FIELD>{COMMA}           { append_to_field(','); }
83
84 <ESCAPED_FIELD>{CR}              { append_to_field('\r'); }
85
86 <ESCAPED_FIELD>{LF}              { append_to_field('\n'); }
87
88 <ESCAPED_FIELD>.                 { append_to_field(yytext[0]); }
89
90 <ESCAPED_FIELD><<EOF>>           {
91                                    /* Handle unterminated quoted field */
92                                    yylval.str = get_field_content();
93                                    BEGIN(INITIAL);
94                                    return FIELD_TOK;
95                                  }
96
97 {TEXTDATA}+             { 
98                           yylval.str = strdup(yytext);  /* NOTE: Caller must free() this memory */
99                           if (!yylval.str) {
100                               fprintf(stderr, "Memory allocation failed in TEXTDATA rule\n");
101                               exit(1);  /* Conservative approach: exit on memory failure */
102                           }
103                           return FIELD_TOK; 
104                         }
105
106 [ \t]                   { /* ignore whitespace outside of fields */ }
107
108 .                       { return yytext[0]; }
109
110 %%