/* ical.l */ /* disable unused functions so we don't get compiler warnings about them */ %option noyywrap nounput noinput /* change our prefix from yy to ical */ %option prefix="ical" /* use the pure parser calling convention */ %option reentrant bison-bridge bison-locations %{ #include "ical.tab.h" #define YY_EXIT_FAILURE ((void)yyscanner, EXIT_FAILURE) /* XOPEN for strdup */ #define _XOPEN_SOURCE 600 #include #include #include /* seems like a bug that I have to do this, since flex should know prefix=ical and match bison's ICALSTYPE */ #define YYSTYPE ICALSTYPE #define YYLTYPE ICALLTYPE /* Helper function to convert string to uppercase for case-insensitive matching */ static char *str_toupper(const char *str) { char *result = strdup(str); if (!result) return NULL; for (int i = 0; result[i]; i++) { result[i] = toupper(result[i]); } return result; } %} /* Lexer states for RFC 5545 content line parsing */ %x PARAM_STATE VALUE_STATE COMPONENT_STATE /* Define patterns for RFC 5545 tokens */ ALPHA [A-Za-z] DIGIT [0-9] CRLF \r?\n WSP [ \t] IANA_TOKEN ({ALPHA}|{DIGIT}|"-")+ X_NAME "X-"[A-Za-z0-9-]*"-"?{IANA_TOKEN} SAFE_CHAR [ !#-+\--9<-~] VALUE_CHAR [ !-~] %% /* Line folding - continuation lines start with whitespace (all states) */ <*>{CRLF}{WSP}+ { /* Update location - treat folded line as continuation */ yylloc->first_line = yylloc->last_line; yylloc->first_column = yylloc->last_column; yylloc->last_line++; yylloc->last_column = yyleng; /* Folded lines are treated as a single space */ yylval->str = strdup(" "); return FOLDING_WS; } /* INITIAL state: expecting property name at start of content line */ /* Component BEGIN/END keywords - case insensitive */ (?i:"BEGIN") { /* Update location */ yylloc->first_line = yylloc->last_line; yylloc->first_column = yylloc->last_column; yylloc->last_column += yyleng; /* After BEGIN, we expect a component name */ BEGIN(COMPONENT_STATE); return BEGIN_TOKEN; } (?i:"END") { /* Update location */ yylloc->first_line = yylloc->last_line; yylloc->first_column = yylloc->last_column; yylloc->last_column += yyleng; /* After END, we expect a component name */ BEGIN(COMPONENT_STATE); return END_TOKEN; } /* Component names - case insensitive */ (?i:"VCALENDAR") { /* Update location */ yylloc->first_line = yylloc->last_line; yylloc->first_column = yylloc->last_column; yylloc->last_column += yyleng; yylval->str = str_toupper(yytext); return COMPONENT_NAME; } (?i:"VEVENT") { /* Update location */ yylloc->first_line = yylloc->last_line; yylloc->first_column = yylloc->last_column; yylloc->last_column += yyleng; yylval->str = str_toupper(yytext); return COMPONENT_NAME; } (?i:"VTODO") { /* Update location */ yylloc->first_line = yylloc->last_line; yylloc->first_column = yylloc->last_column; yylloc->last_column += yyleng; yylval->str = str_toupper(yytext); return COMPONENT_NAME; } (?i:"VJOURNAL") { /* Update location */ yylloc->first_line = yylloc->last_line; yylloc->first_column = yylloc->last_column; yylloc->last_column += yyleng; yylval->str = str_toupper(yytext); return COMPONENT_NAME; } (?i:"VFREEBUSY") { /* Update location */ yylloc->first_line = yylloc->last_line; yylloc->first_column = yylloc->last_column; yylloc->last_column += yyleng; yylval->str = str_toupper(yytext); return COMPONENT_NAME; } (?i:"VTIMEZONE") { /* Update location */ yylloc->first_line = yylloc->last_line; yylloc->first_column = yylloc->last_column; yylloc->last_column += yyleng; yylval->str = str_toupper(yytext); return COMPONENT_NAME; } (?i:"VALARM") { /* Update location */ yylloc->first_line = yylloc->last_line; yylloc->first_column = yylloc->last_column; yylloc->last_column += yyleng; yylval->str = str_toupper(yytext); return COMPONENT_NAME; } /* Property names - only in INITIAL state at start of content line */ /* X_NAME must come before IANA_TOKEN since it's more specific */ {X_NAME} { /* Update location */ yylloc->first_line = yylloc->last_line; yylloc->first_column = yylloc->last_column; yylloc->last_column += yyleng; yylval->str = str_toupper(yytext); return PROPERTY_NAME; } {IANA_TOKEN} { /* Update location */ yylloc->first_line = yylloc->last_line; yylloc->first_column = yylloc->last_column; yylloc->last_column += yyleng; yylval->str = str_toupper(yytext); return PROPERTY_NAME; } /* Semicolon starts parameter parsing */ ";" { /* Update location */ yylloc->first_line = yylloc->last_line; yylloc->first_column = yylloc->last_column; yylloc->last_column++; BEGIN(PARAM_STATE); return ';'; } /* Colon starts value parsing */ ":" { /* Update location */ yylloc->first_line = yylloc->last_line; yylloc->first_column = yylloc->last_column; yylloc->last_column++; BEGIN(VALUE_STATE); return ':'; } /* PARAM_STATE: parsing parameters between ; and : */ { /* Parameter names - X_NAME must come before IANA_TOKEN */ {X_NAME} { /* Update location */ yylloc->first_line = yylloc->last_line; yylloc->first_column = yylloc->last_column; yylloc->last_column += yyleng; yylval->str = str_toupper(yytext); return PROPERTY_NAME; /* Reuse token for param names */ } {IANA_TOKEN} { /* Update location */ yylloc->first_line = yylloc->last_line; yylloc->first_column = yylloc->last_column; yylloc->last_column += yyleng; yylval->str = str_toupper(yytext); return PROPERTY_NAME; /* Reuse token for param names */ } /* Quoted parameter values */ \"([^"]|\\\")*\" { /* Update location */ yylloc->first_line = yylloc->last_line; yylloc->first_column = yylloc->last_column; yylloc->last_column += yyleng; /* Remove quotes and handle escaped quotes */ size_t len = strlen(yytext); char *unquoted = malloc(len - 1); if (!unquoted) return PARAM_VALUE; strncpy(unquoted, yytext + 1, len - 2); unquoted[len - 2] = '\0'; /* Handle escaped quotes - replace \" with " */ char *src = unquoted, *dst = unquoted; while (*src) { if (*src == '\\' && *(src + 1) == '"') { *dst++ = '"'; src += 2; } else { *dst++ = *src++; } } *dst = '\0'; yylval->str = unquoted; return PARAM_VALUE; } /* Unquoted parameter values - SAFE-CHAR per RFC excluding = ; : , " */ [ !#-+\--9<>?@A-Z\[\\\]^_`a-z{|}~]+ { /* Update location */ yylloc->first_line = yylloc->last_line; yylloc->first_column = yylloc->last_column; yylloc->last_column += yyleng; yylval->str = strdup(yytext); return PARAM_VALUE; } "=" { /* Update location */ yylloc->first_line = yylloc->last_line; yylloc->first_column = yylloc->last_column; yylloc->last_column++; return '='; } "," { /* Update location */ yylloc->first_line = yylloc->last_line; yylloc->first_column = yylloc->last_column; yylloc->last_column++; return ','; } ";" { /* Update location */ yylloc->first_line = yylloc->last_line; yylloc->first_column = yylloc->last_column; yylloc->last_column++; return ';'; } ":" { /* Update location */ yylloc->first_line = yylloc->last_line; yylloc->first_column = yylloc->last_column; yylloc->last_column++; BEGIN(VALUE_STATE); return ':'; } } /* COMPONENT_STATE: parsing component name after BEGIN: or END: */ { ":" { /* Update location */ yylloc->first_line = yylloc->last_line; yylloc->first_column = yylloc->last_column; yylloc->last_column++; /* Stay in COMPONENT_STATE - we expect a component name next */ return ':'; } /* Component names in COMPONENT_STATE */ (?i:"VCALENDAR") { /* Update location */ yylloc->first_line = yylloc->last_line; yylloc->first_column = yylloc->last_column; yylloc->last_column += yyleng; yylval->str = str_toupper(yytext); return COMPONENT_NAME; } (?i:"VEVENT") { /* Update location */ yylloc->first_line = yylloc->last_line; yylloc->first_column = yylloc->last_column; yylloc->last_column += yyleng; yylval->str = str_toupper(yytext); return COMPONENT_NAME; } (?i:"VTODO") { /* Update location */ yylloc->first_line = yylloc->last_line; yylloc->first_column = yylloc->last_column; yylloc->last_column += yyleng; yylval->str = str_toupper(yytext); return COMPONENT_NAME; } (?i:"VJOURNAL") { /* Update location */ yylloc->first_line = yylloc->last_line; yylloc->first_column = yylloc->last_column; yylloc->last_column += yyleng; yylval->str = str_toupper(yytext); return COMPONENT_NAME; } (?i:"VFREEBUSY") { /* Update location */ yylloc->first_line = yylloc->last_line; yylloc->first_column = yylloc->last_column; yylloc->last_column += yyleng; yylval->str = str_toupper(yytext); return COMPONENT_NAME; } (?i:"VTIMEZONE") { /* Update location */ yylloc->first_line = yylloc->last_line; yylloc->first_column = yylloc->last_column; yylloc->last_column += yyleng; yylval->str = str_toupper(yytext); return COMPONENT_NAME; } (?i:"VALARM") { /* Update location */ yylloc->first_line = yylloc->last_line; yylloc->first_column = yylloc->last_column; yylloc->last_column += yyleng; yylval->str = str_toupper(yytext); return COMPONENT_NAME; } } /* VALUE_STATE: parsing property value after : until CRLF */ { /* Property value - VALUE-CHAR per RFC (any textual character) */ [ !-~]+ { /* Update location */ yylloc->first_line = yylloc->last_line; yylloc->first_column = yylloc->last_column; yylloc->last_column += yyleng; yylval->str = strdup(yytext); return PARAM_VALUE; /* Reuse token for property values */ } } /* CRLF - end of content line, reset to INITIAL state */ <*>{CRLF} { /* Update location - newline resets column and increments line */ yylloc->first_line = yylloc->last_line; yylloc->first_column = yylloc->last_column; yylloc->last_line++; yylloc->last_column = 1; /* Reset to INITIAL state for next content line */ BEGIN(INITIAL); return CRLF; } /* Whitespace within lines - ignore in all states */ <*>{WSP}+ { /* Update location */ yylloc->first_line = yylloc->last_line; yylloc->first_column = yylloc->last_column; yylloc->last_column += yyleng; /* Ignore whitespace within content lines */ } /* Everything else */ <*>. { /* Update location */ yylloc->first_line = yylloc->last_line; yylloc->first_column = yylloc->last_column; yylloc->last_column++; return *yytext; } %%