/*
 * Copyright (C) 1997 - 2001 Loic Dachary
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 *
 */
#ifndef _webbase_h
#define _webbase_h

#include <khash.h>
#include <webbase_url.h>
#include <mysql.h>
#include <uri.h>

#define WEBBASE_NAME_LENGTH 		128
#define WEBBASE_PATH_LENGTH 		1024
#define WEBBASE_HOST_LENGTH 		256
#define WEBBASE_MESSAGE_LENGTH 		2048

#define WEBBASE_NOTFOUND		-2

#define WEBBASE_OPTIONS		0x0004000

typedef struct server server_t;

typedef struct webbase {
  /* Public */
  hash_t* options;
  /* Name of the database */
  char name[WEBBASE_NAME_LENGTH + 1];
  /* Absolute path name of the database */
  char path[WEBBASE_PATH_LENGTH + 1];
  /* Hostname of the MySQL database */
  char mysql_host[WEBBASE_HOST_LENGTH + 1];
  /* Port to use to query the MySQL database */
  unsigned int mysql_port;
  /* Unix socket to use to query the MySQL database */
  char* mysql_unix_port;
  /* Absolute path name of the directory containing the database */
  char* dir;
  /* Pid of the current process */
  int pid;
  /* Number of seconds to wait when a resource is locked */
  int lock_wait;
  /* Number of retries on a locked resource */
  int lock_max_loop;
  /* Server map */
  server_t* servers;

  hash_t* query;

  webbase_url_start_t default_start;

  /* Private */
  MYSQL mysql;
} webbase_t;

struct option* webbase_options(struct option options[]);
struct option_help* webbase_help_options(struct option_help options_help[]);
webbase_t* webbase_alloc(int argc, char** argv, struct option options[]);
void webbase_free(webbase_t* params);
/*
 * Lock resource <varint><varchar> as %d%s
 */
int webbase_lock(webbase_t* base, int varint, char* varchar);
/*
 * Unlock resource <varint><varchar> as %d%s
 */
void webbase_unlock(webbase_t* base, int varint, char* varchar);
/*
 * Set ECILA_IGNORE environment variable to <varint><varchar> as %d%s
 */
void webbase_lock_ignore(webbase_t* base, int varint, char* varchar);

/* 
 * start
 */
#define WEBBASE_START_FIELDS "rowid,url,url_md5,info+0,url_max_size,size_hrefs,min,depth,level,timeout,loaded_delay,modified_delay,not_found_delay,timeout_delay,robot_delay,accept,filter,allow,disallow,regex_allow,regex_disallow,hook_info,count,delay"
#define WEBBASE_START_FIELDS_INSERT "rowid,url,url_md5,info,url_max_size,size_hrefs,min,depth,level,timeout,loaded_delay,modified_delay,not_found_delay,timeout_delay,robot_delay,delay,accept,filter,allow,disallow,regex_allow,regex_disallow,hook_info,count"

/*
 * Callback prototype when walking the start table
 */
typedef void (*webbase_walk_start_callback_t)(char* params, webbase_url_start_t* start);
/*
 * For each record in start that match <where> call <func> with user
 * defined data <params>.
 */
void webbase_walk_start(webbase_t* base, char* where, webbase_walk_start_callback_t func, char* params);

/*
 * Release internal representation of start table record
 */
void webbase_start_free(webbase_url_start_t* start);
/*
 * Reset internal representation of start table record to empty
 */
void webbase_start_reset(webbase_url_start_t* start);
/*
 * Fill non specified fields with default values
 */
void webbase_default_start(webbase_t* base, webbase_url_start_t* start);
/*
 * Write internal representation of start table record to database
 */
void webbase_update_start(webbase_t* base, webbase_url_start_t* start);
/*
 * Change state of start record to <state> in database
 */
void webbase_start_state(webbase_t* base, webbase_url_start_t* start, int state);
/*
 * Read database record matching <url_md5> and fill internal representation 
 */
webbase_url_start_t* webbase_get_start(webbase_t* base, unsigned char* url_md5, webbase_url_start_t* start);
/*
 * Read database record matching <rowid> and fill internal representation 
 */
webbase_url_start_t* webbase_get_start_rowid(webbase_t* base, int rowid, webbase_url_start_t* start);
/*
 * Read database record from start matching <webbase_url> with lookup
 * in url2start table.
 */
webbase_url_start_t* webbase_get_start_of_url(webbase_t* base, webbase_url_t* webbase_url);
/*
 * Change state of start record to enable crawl
 */
void webbase_enable_start(webbase_t* base, unsigned char* url_md5);
/*
 * Merge <from> in <to> 
 */
void webbase_merge_start(webbase_url_start_t* to, webbase_url_start_t* from);

/*
 * url
 */
#define WEBBASE_URL_FIELDS "rowid,url,url_md5,info+0,code,unix_timestamp(mtime),unix_timestamp(mtime_error),tags,content_type,content_length,md5,complete_rowid,unix_timestamp(crawl),hookid,extract,title,language"
#define WEBBASE_URL_COMPLETE_FIELDS "keywords,description,base_url,relative,absolute,location"

/*
 * Callback prototype when walking the url table
 */
typedef void (*webbase_walk_url_callback_t)(char* params, webbase_url_t* webbase_url);
/*
 * For each record in start that match <where> call <func> with user
 * defined data <params>.
 *
 * Same flag as webbase_get_url
 */
void webbase_walk_url(webbase_t* base, char* where, webbase_walk_url_callback_t func, char* params, int flag);

/*
 * Write internal representation of url record in database
 */
int webbase_insert_url(webbase_t* base, webbase_url_t* webbase_url);
/*
 * Create a record for <url> in database and return rowid
 */
int webbase_visited(webbase_t* base, char* url);
/*
 * Return rowid if <url_md5> exists in database, 0 otherwise
 */
int webbase_exists_url(webbase_t* base, unsigned char* url_md5);
#define WEBBASE_GET_URL_LIGHT	0x01
#define WEBBASE_GET_URL_ALL	0x02
/*
 * Read url record matching <url_md5> from database
 */
webbase_url_t* webbase_get_url(webbase_t* base, unsigned char* url_md5, webbase_url_t* webbase_url, int flag);
/*
 * Read url record matching <rowid> from database
 */
webbase_url_t* webbase_get_url_rowid(webbase_t* base, int rowid, webbase_url_t* webbase_url, int flag);
/*
 * Read url record matching <field> = <quote><value><quote> from database
 */
webbase_url_t* webbase_get_url_1(webbase_t* base, char* quote, char* field, unsigned char* value, int value_length, webbase_url_t* webbase_url, int flag);

/*
 * start2url 
 */
/*
 * Add record <start_rowid>,<url_rowid>,<level> in start2url table
 */
int webbase_insert_start2url(webbase_t* base, int start_rowid, int url_rowid, int level);
/*
 * Retrieve record matching <start_rowid> if not null or <url_rowid> if not
 * null. Replace <start_rowid>,<url_rowid> and <level> with actual values.
 */
int webbase_get_start2url(webbase_t* base, int* start_rowid, int* url_rowid, int* level);
/*
 * Return the number of rows in start2url matching <start>.rowid and <rowid>.
 */
int webbase_counted(webbase_t* base, webbase_url_start_t* start, int rowid);

#endif /* _webbase_h */
