/* 
   HTM2TXT v1.00 --- A small utility to make ASCII docs from HTML docs.
   Copyright (c) Henrik Stokseth, 1999.
*/

#define STRINGLENGTH 100
#define LINELENGTH   78

#ifndef TRUE
  #define TRUE         1
  #define FALSE        0
#endif

#include <stdio.h>
#include <conio.h>
#include <ctype.h>
#include <string.h>
#include <stdlib.h>

FILE *infile, *outfile;

void help()
{
  printf("HTM2TXT v1.0 --- Copyright 1999, Henrik Stokseth.\n\n");
  printf("usage: htm2txt filename\n\n");
}

void error(int errnum)
{
  printf("Error %u: ", errnum);
  switch(errnum)
  {
    case 1  : printf("Error in HTML file...\n"); break;
	default : printf("Undetermined error...\n"); break;
  }
  exit(errnum);
}

char *read_word(char *string)
{
  char process_char;
  int  finished = FALSE;
  int  pos;

  if(feof(infile))
  {
    string[0] = 0;
	return string;
  }

  while(!finished)
  {
    process_char = fgetc(infile);

    /* read html tag */
	if(process_char == '<')
	{
      string[0] = process_char;
	  for(pos=1;; pos++)
	  {
	    string[pos] = fgetc(infile);
        if(string[pos] == '>') break;
	  }
	  string[pos+1] = 0;
	  finished = TRUE;
	}

    /* ignore certain things */
	else if(process_char == '\n');
	else if(process_char == ' ');
	
	/* read word */
	else
	{
	  string[0] = process_char;
	  for(pos=1;; pos++)
	  {
	    string[pos] = fgetc(infile);

        if(feof(infile))        break;
		if(string[pos] == '<')  { fseek(infile, -1, SEEK_CUR); break;}
		if(string[pos] == ' ')  break;
		if(string[pos] == '\n') break;
	  }
	  string[pos] = 0;
	  finished = TRUE;
	}

  }

  return string;
}

int is_html_tag(char *string)
{
  if(string[0] == '<') return TRUE; else return FALSE;
}

void search_body()
{
  char tempstr[STRINGLENGTH];

  while(strncmp(strupr(read_word(tempstr)), "<BODY", 5)) if(feof(infile)) error(1);
}

void convert()
{
  char string1[STRINGLENGTH] = "", string2[STRINGLENGTH] = "";
  char tempstring[STRINGLENGTH] = "";
  int  count;

  while(!feof(infile))
  {
    /* read next */
	read_word(tempstring);

	if(is_html_tag(tempstring))
	{
	  if(!strcmp(strupr(tempstring), "<BR>"))
	  {
	    fputs(string1, outfile);
	    fputc('\n', outfile);
	    strcpy(string1, "");
	  }
	  else if(!strcmp(strupr(tempstring), "<P>"))
	  {
	    fputs(string1, outfile);
	    fputc('\n', outfile);
	    strcpy(string1, "");
	  }
	  else if(!strcmp(strupr(tempstring), "</P>"))
	  {
	    fputs(string1, outfile);
	    fputc('\n', outfile);
	    strcpy(string1, "");
	  }
	  else if(!strcmp(strupr(tempstring), "<LI>"))
	  {
            if(strlen(string1))
	    {
  	      fputs(string1, outfile);
	      fputc('\n', outfile);
	    }
	    strcpy(string1, "-");
	  }
	  else if(!strcmp(strupr(tempstring), "<HR>"))
	  {
	    fputs(string1, outfile);
	    fputc('\n', outfile);
	    strcpy(string1, "");
            for(count=0; count<LINELENGTH; count++) fputc('-', outfile);
	    fputc('\n', outfile);
	  }
	}
	else
	{

          strcpy(string2, tempstring);

	  /* build line */
	  if(strlen(string1)+strlen(string2)+1 <= LINELENGTH)
	  {
	    if(strlen(string1)) strcat(string1, " ");
	    strcat(string1, string2);
	  }

	  /* write line */
	  else
	  {
	    fputs(string1, outfile);
	    fputc('\n', outfile);
	    strcpy(string1, string2);
      }
    }

  }

  /* flush buffer */
  if(strlen(string1))
  {
    fputs(string1, outfile);
	fputc('\n', outfile);
  }
}

void strip_html(char *filename)
{
  char tempstr[STRINGLENGTH] = "";

  /* open files */
  strcpy(tempstr, filename);
  strcat(tempstr, ".htm");
  infile = fopen(tempstr, "r");
  strcpy(tempstr, filename);
  strcat(tempstr, ".txt");
  outfile = fopen(tempstr, "w");

  /* search for html body */
  search_body();

  /* convert html body */
  convert();

  /* close files */
  fclose(infile);
  fclose(outfile);
}

int main(int argc, char *argv[])
{
  if(argc == 1) help();
  if(argc == 2) strip_html(argv[1]);

  return 0;
}
