]> begriffs open source - sa-parse/blob - src/csv.l
initial
[sa-parse] / src / csv.l
1 %{
2 #include <stdio.h>
3 #include <string.h>
4 #include "csv.tab.h"
5
6 /* Buffer for building field content */
7 static char field_buffer[8192];
8 static int field_pos = 0;
9
10 void reset_field_buffer() {
11     field_pos = 0;
12     field_buffer[0] = '\0';
13 }
14
15 void append_to_field(char c) {
16     if (field_pos < sizeof(field_buffer) - 1) {
17         field_buffer[field_pos++] = c;
18         field_buffer[field_pos] = '\0';
19     }
20 }
21
22 void append_string_to_field(const char* s) {
23     while (*s && field_pos < sizeof(field_buffer) - 1) {
24         field_buffer[field_pos++] = *s++;
25     }
26     field_buffer[field_pos] = '\0';
27 }
28
29 char* get_field_content() {
30     return strdup(field_buffer);
31 }
32 %}
33
34 %option noyywrap
35 %option yylineno
36
37 %x ESCAPED_FIELD
38
39 TEXTDATA    [\x20-\x21\x23-\x2B\x2D-\x7E]
40 COMMA       \x2C
41 CR          \x0D
42 LF          \x0A
43 DQUOTE      \x22
44 CRLF        {CR}{LF}
45
46 %%
47
48 {COMMA}                 { return COMMA_TOK; }
49
50 {CRLF}                  { return CRLF_TOK; }
51
52 {LF}                    { return CRLF_TOK; }
53
54 {DQUOTE}                { 
55                           reset_field_buffer(); 
56                           BEGIN(ESCAPED_FIELD); 
57                         }
58
59 <ESCAPED_FIELD>{DQUOTE}{DQUOTE}  { append_to_field('"'); }
60
61 <ESCAPED_FIELD>{DQUOTE}          { 
62                                    yylval.str = get_field_content();
63                                    BEGIN(INITIAL);
64                                    return FIELD_TOK;
65                                  }
66
67 <ESCAPED_FIELD>{TEXTDATA}        { append_to_field(yytext[0]); }
68
69 <ESCAPED_FIELD>{COMMA}           { append_to_field(','); }
70
71 <ESCAPED_FIELD>{CR}              { append_to_field('\r'); }
72
73 <ESCAPED_FIELD>{LF}              { append_to_field('\n'); }
74
75 <ESCAPED_FIELD>.                 { append_to_field(yytext[0]); }
76
77 {TEXTDATA}+             { 
78                           yylval.str = strdup(yytext);
79                           return FIELD_TOK; 
80                         }
81
82 [ \t]                   { /* ignore whitespace outside of fields */ }
83
84 .                       { return yytext[0]; }
85
86 %%