|
|
|
/* This is RTF to HTML converter, implemented as a text filter, generally.
|
|
|
|
Copyright (C) 2003 Valentin Lavrinenko, vlavrinenko@users.sourceforge.net
|
|
|
|
|
|
|
|
available at http://rtf2html.sf.net
|
|
|
|
|
|
|
|
Original available under the terms of the GNU LGPL2, and according
|
|
|
|
to those terms, relicensed under the GNU GPL2 for inclusion in Tellico */
|
|
|
|
|
|
|
|
/***************************************************************************
|
|
|
|
* *
|
|
|
|
* This program is free software; you can redistribute it and/or modify *
|
|
|
|
* it under the terms of version 2 of the GNU General Public License as *
|
|
|
|
* published by the Free Software Foundation; *
|
|
|
|
* *
|
|
|
|
***************************************************************************/
|
|
|
|
|
|
|
|
#include "rtf2html.h"
|
|
|
|
#include "rtf_table.h"
|
|
|
|
#include "rtf_tools.h"
|
|
|
|
#include "rtf_keyword.h"
|
|
|
|
#include "fmt_opts.h"
|
|
|
|
|
|
|
|
#include <cstdlib>
|
|
|
|
#include <stdexcept>
|
|
|
|
#include <fstream>
|
|
|
|
#include <iostream>
|
|
|
|
#include <string>
|
|
|
|
|
|
|
|
using Tellico::RTF2HTML;
|
|
|
|
using namespace rtf;
|
|
|
|
|
|
|
|
RTF2HTML::RTF2HTML(const TQString& text) : m_text(text) {
|
|
|
|
}
|
|
|
|
|
|
|
|
TQString RTF2HTML::toHTML() const {
|
|
|
|
std::string str_in = m_text.ascii();
|
|
|
|
|
|
|
|
std::string::iterator buf_in=str_in.begin(), buf_in_end=str_in.end();
|
|
|
|
colorvect colortbl;
|
|
|
|
fontmap fonttbl;
|
|
|
|
std::string title;
|
|
|
|
|
|
|
|
bool bAsterisk=false;
|
|
|
|
fo_stack foStack;
|
|
|
|
formatting_options cur_options;
|
|
|
|
std::string html;
|
|
|
|
html_text par_html(cur_options);
|
|
|
|
|
|
|
|
/* CellDefs in rtf are really queer. We'll keep a list of them in main()
|
|
|
|
and will give an iterator into this list to a row */
|
|
|
|
table_cell_defs_list CellDefsList;
|
|
|
|
table_cell_defs_list::iterator CurCellDefs;
|
|
|
|
table_cell_def* tcdCurCellDef=new table_cell_def;
|
|
|
|
table_cell* tcCurCell=new table_cell;
|
|
|
|
table_row* trCurRow=new table_row;
|
|
|
|
table* tblCurTable=new table;
|
|
|
|
int iLastRowLeft=0, iLastRowHeight=0;
|
|
|
|
std::string t_str;
|
|
|
|
|
|
|
|
bool bInTable=false;
|
|
|
|
int iDocWidth=12240;
|
|
|
|
int iMarginLeft=1800;
|
|
|
|
while(buf_in!=buf_in_end)
|
|
|
|
{
|
|
|
|
switch (*buf_in)
|
|
|
|
{
|
|
|
|
case '\\':
|
|
|
|
{
|
|
|
|
rtf_keyword kw(++buf_in);
|
|
|
|
if (kw.is_control_char())
|
|
|
|
switch (kw.control_char())
|
|
|
|
{
|
|
|
|
case '\\': case '{': case '}':
|
|
|
|
par_html.write(kw.control_char());
|
|
|
|
break;
|
|
|
|
case '\'':
|
|
|
|
{
|
|
|
|
std::string stmp(1,*buf_in++);
|
|
|
|
stmp+=*buf_in++;
|
|
|
|
int code=std::strtol(stmp.c_str(), NULL, 16);
|
|
|
|
switch (code)
|
|
|
|
{
|
|
|
|
case 167:
|
|
|
|
par_html.write("•");
|
|
|
|
break;
|
|
|
|
case 188:
|
|
|
|
par_html.write("…");
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
par_html.write((char)code);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case '*':
|
|
|
|
bAsterisk=true;
|
|
|
|
break;
|
|
|
|
case '~':
|
|
|
|
par_html.write(" ");
|
|
|
|
break;
|
|
|
|
case '\n':
|
|
|
|
par_html.write("<br><br>");
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
else //kw.is_control_char
|
|
|
|
if (bAsterisk)
|
|
|
|
{
|
|
|
|
bAsterisk=false;
|
|
|
|
skip_group(buf_in);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
switch (kw.keyword())
|
|
|
|
{
|
|
|
|
case rtf_keyword::rkw_filetbl:
|
|
|
|
case rtf_keyword::rkw_stylesheet:
|
|
|
|
case rtf_keyword::rkw_header:
|
|
|
|
case rtf_keyword::rkw_footer: case rtf_keyword::rkw_headerf:
|
|
|
|
case rtf_keyword::rkw_footerf: case rtf_keyword::rkw_pict:
|
|
|
|
case rtf_keyword::rkw_object:
|
|
|
|
// we'll skip such groups
|
|
|
|
skip_group(buf_in);
|
|
|
|
break;
|
|
|
|
// document title
|
|
|
|
case rtf_keyword::rkw_info:
|
|
|
|
{
|
|
|
|
int depth=1;
|
|
|
|
bool in_title=false;
|
|
|
|
while (depth>0)
|
|
|
|
{
|
|
|
|
// std::cout<<std::string(buf_in).substr(0,20)<<"\t"<<depth<<std::endl;
|
|
|
|
switch (*buf_in)
|
|
|
|
{
|
|
|
|
case '\\':
|
|
|
|
{
|
|
|
|
rtf_keyword kw(++buf_in);
|
|
|
|
if (kw.keyword()==rtf_keyword::rkw_title)
|
|
|
|
in_title=true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case '{': ++depth; ++buf_in; break;
|
|
|
|
case '}': --depth; ++buf_in; in_title=false; break;
|
|
|
|
default: if (in_title) title+=*buf_in; ++buf_in; break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
// color table
|
|
|
|
case rtf_keyword::rkw_colortbl:
|
|
|
|
{
|
|
|
|
color clr;
|
|
|
|
while (*buf_in!='}')
|
|
|
|
{
|
|
|
|
switch (*buf_in)
|
|
|
|
{
|
|
|
|
case '\\':
|
|
|
|
{
|
|
|
|
rtf_keyword kw(++buf_in);
|
|
|
|
switch (kw.keyword())
|
|
|
|
{
|
|
|
|
case rtf_keyword::rkw_red:
|
|
|
|
clr.r=kw.parameter();
|
|
|
|
break;
|
|
|
|
case rtf_keyword::rkw_green:
|
|
|
|
clr.g=kw.parameter();
|
|
|
|
break;
|
|
|
|
case rtf_keyword::rkw_blue:
|
|
|
|
clr.b=kw.parameter();
|
|
|
|
break;
|
|
|
|
default: break;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case ';':
|
|
|
|
colortbl.push_back(clr);
|
|
|
|
++buf_in;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
++buf_in;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
++buf_in;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
// font table
|
|
|
|
case rtf_keyword::rkw_fonttbl:
|
|
|
|
{
|
|
|
|
font fnt;
|
|
|
|
int font_num;
|
|
|
|
bool full_name=false;
|
|
|
|
bool in_font=false;
|
|
|
|
while (! (*buf_in=='}' && !in_font))
|
|
|
|
{
|
|
|
|
switch (*buf_in)
|
|
|
|
{
|
|
|
|
case '\\':
|
|
|
|
{
|
|
|
|
rtf_keyword kw(++buf_in);
|
|
|
|
if (kw.is_control_char() && kw.control_char()=='*')
|
|
|
|
skip_group(buf_in);
|
|
|
|
else
|
|
|
|
switch (kw.keyword())
|
|
|
|
{
|
|
|
|
case rtf_keyword::rkw_f:
|
|
|
|
font_num=kw.parameter();
|
|
|
|
break;
|
|
|
|
case rtf_keyword::rkw_fprq:
|
|
|
|
fnt.pitch=kw.parameter();
|
|
|
|
break;
|
|
|
|
case rtf_keyword::rkw_fcharset:
|
|
|
|
fnt.charset=kw.parameter();
|
|
|
|
break;
|
|
|
|
case rtf_keyword::rkw_fnil:
|
|
|
|
fnt.family=font::ff_none;
|
|
|
|
break;
|
|
|
|
case rtf_keyword::rkw_froman:
|
|
|
|
fnt.family=font::ff_serif;
|
|
|
|
break;
|
|
|
|
case rtf_keyword::rkw_fswiss:
|
|
|
|
fnt.family=font::ff_sans_serif;
|
|
|
|
break;
|
|
|
|
case rtf_keyword::rkw_fmodern:
|
|
|
|
fnt.family=font::ff_monospace;
|
|
|
|
break;
|
|
|
|
case rtf_keyword::rkw_fscript:
|
|
|
|
fnt.family=font::ff_cursive;
|
|
|
|
break;
|
|
|
|
case rtf_keyword::rkw_fdecor:
|
|
|
|
fnt.family=font::ff_fantasy;
|
|
|
|
break;
|
|
|
|
default: break;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case '{':
|
|
|
|
in_font=true;
|
|
|
|
++buf_in;
|
|
|
|
break;
|
|
|
|
case '}':
|
|
|
|
in_font=false;
|
|
|
|
fonttbl.insert(std::make_pair(font_num, fnt));
|
|
|
|
fnt=font();
|
|
|
|
full_name=false;
|
|
|
|
++buf_in;
|
|
|
|
break;
|
|
|
|
case ';':
|
|
|
|
full_name=true;
|
|
|
|
++buf_in;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
if (!full_name && in_font)
|
|
|
|
fnt.name+=*buf_in;
|
|
|
|
++buf_in;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
++buf_in;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
// special characters
|
|
|
|
case rtf_keyword::rkw_line: case rtf_keyword::rkw_softline:
|
|
|
|
par_html.write("<br>");
|
|
|
|
break;
|
|
|
|
case rtf_keyword::rkw_tab:
|
|
|
|
par_html.write(" "); // maybe, this can be done better
|
|
|
|
break;
|
|
|
|
case rtf_keyword::rkw_enspace: case rtf_keyword::rkw_emspace:
|
|
|
|
par_html.write(" ");
|
|
|
|
break;
|
|
|
|
case rtf_keyword::rkw_qmspace:
|
|
|
|
par_html.write(" ");
|
|
|
|
break;
|
|
|
|
case rtf_keyword::rkw_endash:
|
|
|
|
par_html.write("–");
|
|
|
|
break;
|
|
|
|
case rtf_keyword::rkw_emdash:
|
|
|
|
par_html.write("—");
|
|
|
|
break;
|
|
|
|
case rtf_keyword::rkw_bullet:
|
|
|
|
par_html.write("•");
|
|
|
|
break;
|
|
|
|
case rtf_keyword::rkw_lquote:
|
|
|
|
par_html.write("‘");
|
|
|
|
break;
|
|
|
|
case rtf_keyword::rkw_rquote:
|
|
|
|
par_html.write("’");
|
|
|
|
break;
|
|
|
|
case rtf_keyword::rkw_ldblquote:
|
|
|
|
par_html.write("“");
|
|
|
|
break;
|
|
|
|
case rtf_keyword::rkw_rdblquote:
|
|
|
|
par_html.write("”");
|
|
|
|
break;
|
|
|
|
// paragraph formatting
|
|
|
|
case rtf_keyword::rkw_ql:
|
|
|
|
cur_options.papAlign=formatting_options::align_left;
|
|
|
|
break;
|
|
|
|
case rtf_keyword::rkw_qr:
|
|
|
|
cur_options.papAlign=formatting_options::align_right;
|
|
|
|
break;
|
|
|
|
case rtf_keyword::rkw_qc:
|
|
|
|
cur_options.papAlign=formatting_options::align_center;
|
|
|
|
break;
|
|
|
|
case rtf_keyword::rkw_qj:
|
|
|
|
cur_options.papAlign=formatting_options::align_justify;
|
|
|
|
break;
|
|
|
|
case rtf_keyword::rkw_fi:
|
|
|
|
cur_options.papFirst=(int)rint(kw.parameter()/20);
|
|
|
|
break;
|
|
|
|
case rtf_keyword::rkw_li:
|
|
|
|
cur_options.papLeft=(int)rint(kw.parameter()/20);
|
|
|
|
break;
|
|
|
|
case rtf_keyword::rkw_ri:
|
|
|
|
cur_options.papRight=(int)rint(kw.parameter()/20);
|
|
|
|
break;
|
|
|
|
case rtf_keyword::rkw_sb:
|
|
|
|
cur_options.papBefore=(int)rint(kw.parameter()/20);
|
|
|
|
break;
|
|
|
|
case rtf_keyword::rkw_sa:
|
|
|
|
cur_options.papAfter=(int)rint(kw.parameter()/20);
|
|
|
|
break;
|
|
|
|
case rtf_keyword::rkw_pard:
|
|
|
|
cur_options.papBefore=cur_options.papAfter=0;
|
|
|
|
cur_options.papLeft=cur_options.papRight=0;
|
|
|
|
cur_options.papFirst=0;
|
|
|
|
cur_options.papAlign=formatting_options::align_left;
|
|
|
|
cur_options.papInTbl=false;
|
|
|
|
break;
|
|
|
|
case rtf_keyword::rkw_par:
|
|
|
|
case rtf_keyword::rkw_sect:
|
|
|
|
t_str=cur_options.get_par_str()+par_html.str()
|
|
|
|
+" "+par_html.close()+"</p>\n";
|
|
|
|
if (!bInTable)
|
|
|
|
{
|
|
|
|
html+=t_str;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
if (cur_options.papInTbl)
|
|
|
|
{
|
|
|
|
tcCurCell->Text+=t_str;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
html+=tblCurTable->make()+t_str;
|
|
|
|
bInTable=false;
|
|
|
|
tblCurTable=new table;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
par_html.clear();
|
|
|
|
break;
|
|
|
|
// character formatting
|
|
|
|
case rtf_keyword::rkw_super:
|
|
|
|
cur_options.chpVAlign=
|
|
|
|
kw.parameter()==0?formatting_options::va_normal
|
|
|
|
:formatting_options::va_sup;
|
|
|
|
break;
|
|
|
|
case rtf_keyword::rkw_sub:
|
|
|
|
cur_options.chpVAlign=
|
|
|
|
kw.parameter()==0?formatting_options::va_normal
|
|
|
|
:formatting_options::va_sub;
|
|
|
|
break;
|
|
|
|
case rtf_keyword::rkw_b:
|
|
|
|
cur_options.chpBold=!(kw.parameter()==0);
|
|
|
|
break;
|
|
|
|
case rtf_keyword::rkw_i:
|
|
|
|
cur_options.chpItalic=!(kw.parameter()==0);
|
|
|
|
break;
|
|
|
|
case rtf_keyword::rkw_ul:
|
|
|
|
cur_options.chpUnderline=!(kw.parameter()==0);
|
|
|
|
break;
|
|
|
|
case rtf_keyword::rkw_ulnone:
|
|
|
|
cur_options.chpUnderline=false;
|
|
|
|
break;
|
|
|
|
case rtf_keyword::rkw_fs:
|
|
|
|
cur_options.chpFontSize=kw.parameter();
|
|
|
|
break;
|
|
|
|
case rtf_keyword::rkw_cf:
|
|
|
|
cur_options.chpFColor=colortbl[kw.parameter()];
|
|
|
|
break;
|
|
|
|
case rtf_keyword::rkw_cb:
|
|
|
|
cur_options.chpBColor=colortbl[kw.parameter()];
|
|
|
|
break;
|
|
|
|
case rtf_keyword::rkw_highlight:
|
|
|
|
cur_options.chpHighlight=kw.parameter();
|
|
|
|
break;
|
|
|
|
case rtf_keyword::rkw_f:
|
|
|
|
cur_options.chpFont=fonttbl[kw.parameter()];
|
|
|
|
break;
|
|
|
|
case rtf_keyword::rkw_plain:
|
|
|
|
cur_options.chpBold=cur_options.chpItalic
|
|
|
|
=cur_options.chpUnderline=false;
|
|
|
|
cur_options.chpVAlign=formatting_options::va_normal;
|
|
|
|
cur_options.chpFontSize=cur_options.chpHighlight=0;
|
|
|
|
cur_options.chpFColor=cur_options.chpBColor=color();
|
|
|
|
cur_options.chpFont=font();
|
|
|
|
break;
|
|
|
|
// table formatting
|
|
|
|
case rtf_keyword::rkw_intbl:
|
|
|
|
cur_options.papInTbl=true;
|
|
|
|
break;
|
|
|
|
case rtf_keyword::rkw_trowd:
|
|
|
|
CurCellDefs=CellDefsList.insert(CellDefsList.end(),
|
|
|
|
table_cell_defs());
|
|
|
|
case rtf_keyword::rkw_row:
|
|
|
|
if (!trCurRow->Cells.empty())
|
|
|
|
{
|
|
|
|
trCurRow->CellDefs=CurCellDefs;
|
|
|
|
if (trCurRow->Left==-1000)
|
|
|
|
trCurRow->Left=iLastRowLeft;
|
|
|
|
if (trCurRow->Height==-1000)
|
|
|
|
trCurRow->Height=iLastRowHeight;
|
|
|
|
tblCurTable->push_back(trCurRow);
|
|
|
|
trCurRow=new table_row;
|
|
|
|
}
|
|
|
|
bInTable=true;
|
|
|
|
break;
|
|
|
|
case rtf_keyword::rkw_cell:
|
|
|
|
t_str=cur_options.get_par_str()+par_html.str()
|
|
|
|
+" "+par_html.close()+"</p>\n";
|
|
|
|
tcCurCell->Text+=t_str;
|
|
|
|
par_html.clear();
|
|
|
|
trCurRow->Cells.push_back(tcCurCell);
|
|
|
|
tcCurCell=new table_cell;
|
|
|
|
break;
|
|
|
|
case rtf_keyword::rkw_cellx:
|
|
|
|
tcdCurCellDef->Right=kw.parameter();
|
|
|
|
CurCellDefs->push_back(tcdCurCellDef);
|
|
|
|
tcdCurCellDef=new table_cell_def;
|
|
|
|
break;
|
|
|
|
case rtf_keyword::rkw_trleft:
|
|
|
|
trCurRow->Left=kw.parameter();
|
|
|
|
iLastRowLeft=kw.parameter();
|
|
|
|
break;
|
|
|
|
case rtf_keyword::rkw_trrh:
|
|
|
|
trCurRow->Height=kw.parameter();
|
|
|
|
iLastRowHeight=kw.parameter();
|
|
|
|
break;
|
|
|
|
case rtf_keyword::rkw_clvmgf:
|
|
|
|
tcdCurCellDef->FirstMerged=true;
|
|
|
|
break;
|
|
|
|
case rtf_keyword::rkw_clvmrg:
|
|
|
|
tcdCurCellDef->Merged=true;
|
|
|
|
break;
|
|
|
|
case rtf_keyword::rkw_clbrdrb:
|
|
|
|
tcdCurCellDef->BorderBottom=true;
|
|
|
|
tcdCurCellDef->ActiveBorder=&(tcdCurCellDef->BorderBottom);
|
|
|
|
break;
|
|
|
|
case rtf_keyword::rkw_clbrdrt:
|
|
|
|
tcdCurCellDef->BorderTop=true;
|
|
|
|
tcdCurCellDef->ActiveBorder=&(tcdCurCellDef->BorderTop);
|
|
|
|
break;
|
|
|
|
case rtf_keyword::rkw_clbrdrl:
|
|
|
|
tcdCurCellDef->BorderLeft=true;
|
|
|
|
tcdCurCellDef->ActiveBorder=&(tcdCurCellDef->BorderLeft);
|
|
|
|
break;
|
|
|
|
case rtf_keyword::rkw_clbrdrr:
|
|
|
|
tcdCurCellDef->BorderRight=true;
|
|
|
|
tcdCurCellDef->ActiveBorder=&(tcdCurCellDef->BorderRight);
|
|
|
|
break;
|
|
|
|
case rtf_keyword::rkw_brdrnone:
|
|
|
|
if (tcdCurCellDef->ActiveBorder!=NULL)
|
|
|
|
{
|
|
|
|
*(tcdCurCellDef->ActiveBorder)=false;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case rtf_keyword::rkw_clvertalt:
|
|
|
|
tcdCurCellDef->VAlign=table_cell_def::valign_top;
|
|
|
|
break;
|
|
|
|
case rtf_keyword::rkw_clvertalc:
|
|
|
|
tcdCurCellDef->VAlign=table_cell_def::valign_center;
|
|
|
|
break;
|
|
|
|
case rtf_keyword::rkw_clvertalb:
|
|
|
|
tcdCurCellDef->VAlign=table_cell_def::valign_bottom;
|
|
|
|
break;
|
|
|
|
// page formatting
|
|
|
|
case rtf_keyword::rkw_paperw:
|
|
|
|
iDocWidth=kw.parameter();
|
|
|
|
break;
|
|
|
|
case rtf_keyword::rkw_margl:
|
|
|
|
iMarginLeft=kw.parameter();
|
|
|
|
break;
|
|
|
|
default: break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case '{':
|
|
|
|
// perform group opening actions here
|
|
|
|
foStack.push(cur_options);
|
|
|
|
++buf_in;
|
|
|
|
break;
|
|
|
|
case '}':
|
|
|
|
// perform group closing actions here
|
|
|
|
cur_options=foStack.top();
|
|
|
|
foStack.pop();
|
|
|
|
++buf_in;
|
|
|
|
break;
|
|
|
|
case 13:
|
|
|
|
case 10:
|
|
|
|
++buf_in;
|
|
|
|
break;
|
|
|
|
case '<':
|
|
|
|
par_html.write("<");
|
|
|
|
++buf_in;
|
|
|
|
break;
|
|
|
|
case '>':
|
|
|
|
par_html.write(">");
|
|
|
|
++buf_in;
|
|
|
|
break;
|
|
|
|
/* case ' ':
|
|
|
|
par_html.write(" ");
|
|
|
|
++buf_in;
|
|
|
|
break;*/
|
|
|
|
default:
|
|
|
|
par_html.write(*buf_in++);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
t_str=cur_options.get_par_str()+par_html.str()
|
|
|
|
+" "+par_html.close()+"</p>\n";
|
|
|
|
html+=t_str;
|
|
|
|
|
|
|
|
delete tcCurCell;
|
|
|
|
delete trCurRow;
|
|
|
|
delete tblCurTable;
|
|
|
|
delete tcdCurCellDef;
|
|
|
|
|
|
|
|
return html;
|
|
|
|
}
|
|
|
|
|