/* csv.y - CSV parser using Bison */

/* a "pure" api means communication variables like yylval
   won't be global variables, and yylex is assumed to
   have a different signature */

%define api.pure true

/* change prefix of symbols from yy to "csv" to avoid
   clashes with any other parsers we may want to link */

%define api.prefix {csv}

/* generate much more meaningful errors rather than the
   uninformative string "syntax error" */

%define parse.error verbose

/* Bison offers different %code insertion locations in
   addition to yacc's %{ %} construct.

   The "top" location is good for headers and feature
   flags like the _XOPEN_SOURCE we use here */

%code top {
	/* XOPEN for strdup */
	#define _XOPEN_SOURCE 600
	#include <stdio.h>
	#include <stdlib.h>
	#include <string.h>

	/* Bison versions 3.7.5 and above provide the YYNOMEM
	   macro to allow our actions to signal the unlikely
	   event that they couldn't allocate memory. Thanks
	   to the Bison team for adding this feature at my
	   request. :) YYNOMEM causes yyparse() to return 2.

	   The following conditional define allows us to use
	   the functionality in earlier versions too. */

	#ifndef YYNOMEM
	#define YYNOMEM goto yyexhaustedlab
	#endif
}

%code requires {
	#include <stdbool.h>
	#include <stdio.h>
	#include <stdlib.h>
	#include <string.h>

	/* Simple linked list structures for CSV parsing */
	typedef struct field_node {
		char *content;
		struct field_node *next;
	} field_node_t;

	typedef struct record_node {
		field_node_t *fields;
		struct record_node *next;
	} record_node_t;
}

/* Add another argument in yyparse() so that we
   can communicate any parser state to the caller.
   We can't return the result directly, since the
   return value is already reserved as an int, with
   0=success, 1=error, 2=nomem */

%parse-param {void *parser_state}

/* param adds an extra param to yyparse (like parse-param)
   but also causes yyparse to send the value to yylex.
   In our case the caller will initialize their own scanner
   instance and pass it through */

%param {void *scanner}

%code {
	/* Function declarations - updated for pure API */
	int csverror(void *parser_state, void *scanner, const char *s);
	int csvlex(void *lval, void *scanner);
	bool one_empty_field(field_node_t *fields);
	field_node_t *create_field(char *content);
	void append_field_to_list(field_node_t **head, char *content);
	void free_field_list(field_node_t *fields);
	size_t count_fields(field_node_t *fields);
	
	/* Interface with the CSV parser library */
	void csv_parser_add_record(field_node_t *fields);
	void csv_parser_set_error(const char *error);
}

%union
{
	char *str;
	field_node_t *fields;
}

%token CRLF
%token <str> FIELD
%type <str> field.opt
%type <fields> record

/* Bison memory management - automatically free memory on errors */
%destructor { free($$); } <str>
%destructor { free_field_list($$); } <fields>

%%

file :
  consumed_record
| file CRLF consumed_record
;

/* A record can be constructed in two ways, but we want to
   run the same side effect for either case. We add an
   intermediate non-terminal symbol "consumed_record" just
   to perform the action. In library code, this would be a
   good place to send the the record to a callback function. */

consumed_record :
  record {
	/* a record comprised of exactly one blank field is a
	   blank record, which we can skip */
	if (!one_empty_field($1))
	{
		/* Send the record to the parser library */
		csv_parser_add_record($1);
	}
	/* Memory is automatically freed by %destructor */
  }
;

record :
  field.opt {
	/* Create first field node */
	$$ = create_field($1);
	if (!$$) YYNOMEM;
  }
| record ',' field.opt {
	/* Append field to existing list */
	append_field_to_list(&$1, $3);
	$$ = $1;
  }
;

field.opt :
  %empty { 
	$$ = calloc(1,1);
	if (!$$) YYNOMEM;
  }
| FIELD
;

%%

field_node_t *create_field(char *content)
{
	field_node_t *field = malloc(sizeof(field_node_t));
	if (!field) {
		free(content);
		/* Can't use YYNOMEM here since we're not in a parser action */
		return NULL;
	}
	
	field->content = content; /* Take ownership of the string */
	field->next = NULL;
	return field;
}

void append_field_to_list(field_node_t **head, char *content)
{
	field_node_t *new_field = create_field(content);
	if (!new_field) return;
	
	if (!*head) {
		*head = new_field;
		return;
	}
	
	/* Find the last field in the list */
	field_node_t *current = *head;
	while (current->next) {
		current = current->next;
	}
	current->next = new_field;
}

void free_field_list(field_node_t *fields)
{
	field_node_t *current = fields;
	while (current) {
		field_node_t *next = current->next;
		free(current->content);
		free(current);
		current = next;
	}
}

size_t count_fields(field_node_t *fields)
{
	size_t count = 0;
	field_node_t *current = fields;
	while (current) {
		count++;
		current = current->next;
	}
	return count;
}

bool one_empty_field(field_node_t *fields)
{
	return fields && !fields->next && 
	       fields->content && fields->content[0] == '\0';
}

int csverror(void *parser_state, void *scanner, const char *s)
{
	(void)parser_state;
	(void)scanner;
	csv_parser_set_error(s);
	return 0;
}