/* RTF2HTML.c, Chuck Shotton - 6/21/93 */
/************************************************************************
* This program takes a stab at converting RTF (Rich Text Format) files
* into HTML. There are some limitations that keep RTF from being able to
* easily represent things like in-line images and anchors as styles. In
* particular, RTF styles apply to entire "paragraphs", so anchors or
* images in the middle of a text stream can't easily be represented by
* styles. The intent is to ultimately use something like embedded text
* color changes to represent these constructs.
*
* In the meantime, you can take existing Word documents, apply the
* correct style sheet, and convert them to HTML with this tool.
*
* AUTHOR: Chuck Shotton, UT-Houston Academic Computing,
* cshotton@oac.hsc.uth.tmc.edu
*
* Dmitry Potapov, CapitalSoft
* dpotapov@capitalsoft.com
*
* David Lippi, Comune di Prato, Italy
* d.lippi@comune.prato.it
*
* Gabriele Bartolini, Comune di Prato, Italy
* g.bartolini@comune.prato.it
*
* USAGE: rtf2html [rtf_filename]
*
* BEHAVIOR:
* rtf2html will open the specified RTF input file or read from
* standard input, writing converted HTML to standard output.
*
* NOTES:
* The RTF document must be formatted with a style sheet that has
* style numberings that conform to the style_mappings table
* defined in this source file. Characters are converted according
* to the ANSI Windows 1252 code or Macintosh.
*
* MODIFICATIONS:
* 6/21/93 : Chuck Shotton - created version 1.0.
* 11/26/98 : Dmitry Potapov - version 1.1 beta
* 05/07/04 : David Lippi, Gabriele Bartolini - version 1.2
*
* Copyright (C) 2004 Comune di Prato
*
* For copyright details, see the file COPYING in your distribution
* or the GNU General Public License (GPL) version 2 or later
*
*
************************************************************************/
/* Note, the source is formated with 4 character tabs */
#include
#include
#include
#include
#include "charset1252.h"
#include "charsetmac.h"
#ifdef _MSC_VER
# define strcasecmp _stricmp
#endif
#ifndef TRUE
#define TRUE -1
#define FALSE 0
#endif
#define MAX_LEVELS 40 /*defines the # of nested in-line styles (pairs of {})*/
#define MAX_RTF_TOKEN 40
#define MAX_INLINE_STYLES 5 /*defines # of in-line styles, bold, italic, etc.*/
typedef struct tag_StyleState
{
unsigned char s: MAX_INLINE_STYLES;
} TStyleState;
typedef enum { s_plain, s_bold, s_italic, s_underline, s_hidden, /*in-line styles*/
s_para, s_br, /*pseudo style*/
s_h0, s_h1, s_h2, s_h3, s_h4, s_h5, s_h6 /*heading styles*/
} StyleState;
char *styles[][2] = { /*HTML Start and end tags for styles*/
{"", ""},
{"", ""},
{"", ""},
{"", ""},
{""},
{"\n", "\n"}, /* {"\n", "
\n"}, */
{"
\n",""},
{"", ""},
{"", "
"},
{"", "
"},
{"", "
"},
{"", "
"},
{"", "
"},
{"", "
"}
};
/* style_mappings maps the style numbers in a RTF style sheet into one of the*/
/* (currently) six paragraph-oriented HTML styles (i.e. heading 1 through 6.)*/
/* Additional styles for lists, etc. should be added here. Style info */
/* ultimately should be read from some sort of config file into these tables.*/
#define MAX_NAME_LEN 40
char style_name[MAX_NAME_LEN];
#define STYLE_NUMBER 7
char *style_namings[STYLE_NUMBER] = {
"", "heading 1", "heading 2", "heading 3", "heading 4", "heading 5",
"heading 6"
};
char style_mappings[STYLE_NUMBER][MAX_RTF_TOKEN];
char style_number[MAX_RTF_TOKEN];
/* RTF tokens that mean something to the parser. All others are ignored. */
typedef enum {
t_start,
t_fonttbl, t_colortbl, t_stylesheet, t_info, t_s, t_b, t_ul, t_ulw,
t_uld, t_uldb, t_i, t_v, t_plain, t_par, t_pict, t_tab, t_bullet,
t_cell, t_row, t_line, t_endash, t_emdash, t_rquote,
t_end
} TokenIndex;
char *tokens[] = {
"###",
"fonttbl", "colortbl", "stylesheet", "info", "s", "b", "ul", "ulw",
"uld", "uldb", "i", "v", "plain", "par", "pict", "tab", "bullet",
"cell", "row", "line", "endash", "emdash", "rquote",
"###"
};
TStyleState style_state[MAX_LEVELS], curr_style;
short curr_heading;
void (*RTF_DoControl)(FILE*,char*,char*);
char isBody;
char* title;
//FILE* f;
short level, /*current {} nesting level*/
skip_to_level,/*{} level to which parsing should skip (used to skip */
/* font tables, style sheets, color tables, etc.) */
gobble, /*Flag set to indicate all input should be discarded */
ignore_styles;/*Set to ignore inline style expansions after style use*/
/* Charset */
unsigned char** charset_table;
#define CHARSET_DEFAULT 0 // Index of the default charset to use
#define CHARSET_NUMBER 2 // Number of charset used
#define CHARSET_MAX_LENGTH 20 // Max numbero of char in the charset
// metadata used in rtf standard for the charset definition
unsigned char *charset[CHARSET_NUMBER] = {
"ansi",
"mac"
};
// variable with the charset definition
unsigned char **charset_variablename[CHARSET_NUMBER] = {
charset1252,
mac
};
/**************************************/
int openfile (char * filename, FILE ** f)
{
int rv = 1;
if (filename)
{
if (!(*f = fopen (filename, "r")))
{
fprintf (stderr, "\nError: Input file %s not found.\n", filename);
rv = 0;
}
else
{
title = filename;
}
}
else
{
*f = stdin;
title="STDIN";
}
return rv;
}
/**************************************/
int closefile (FILE * f)
{
return fclose (f);
}
/**************************************/
char RTF_GetChar( FILE* f )
{
char ch;
do
{
ch = fgetc( f );
} while ((ch=='\r')||(ch=='\n'));
return ch;
}
/**************************************/
char RTF_UnGetChar(FILE* f, char ch)
{
return ungetc(ch, f);
}
/**************************************/
void RTF_PutStr(char* s)
{
if (gobble) return;
fputs(s, stdout);
}
/**************************************/
void RTF_PutHeader()
{
RTF_PutStr("\n");
RTF_PutStr(title);
RTF_PutStr("\n");
RTF_PutStr("\n");
RTF_PutStr("\n");
}
/**************************************/
void RTF_PutChar(char ch)
{
if (gobble) return;
if (!isBody)
{
RTF_PutHeader();
RTF_PutStr("\n");
isBody=TRUE;
}
switch (ch) {
case '<':
RTF_PutStr("<");
break;
case '>':
RTF_PutStr(">");
break;
case '&':
RTF_PutStr("&");
break;
default:
fputc(ch, stdout);
}
}
/**************************************/
void RTF_PlainStyle (TStyleState* s)
{
int i;
for(i=0;is & (1<s=0;
}
/**************************************/
void RTF_SetStyle(TStyleState* s, StyleState style)
{
if( (!ignore_styles||(style==s_hidden)) && ((s->s&(1<