/* * MD4C: Markdown parser for C * (http://github.com/mity/md4c) * * Copyright (c) 2016-2019 Martin Mitas * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. */ #include #include #include "md4c-html.h" #include "entity.h" #if !defined(__STDC_VERSION__) || __STDC_VERSION__ < 199409L /* C89/90 or old compilers in general may not understand "inline". */ #if defined __GNUC__ #define inline __inline__ #elif defined _MSC_VER #define inline __inline #else #define inline #endif #endif #ifdef _WIN32 #define snprintf _snprintf #endif typedef struct MD_HTML_tag MD_HTML; struct MD_HTML_tag { void (*process_output)(const MD_CHAR*, MD_SIZE, void*); void* userdata; unsigned flags; int image_nesting_level; char escape_map[256]; }; #define NEED_HTML_ESC_FLAG 0x1 #define NEED_URL_ESC_FLAG 0x2 /***************************************** *** HTML rendering helper functions *** *****************************************/ #define ISDIGIT(ch) ('0' <= (ch) && (ch) <= '9') #define ISLOWER(ch) ('a' <= (ch) && (ch) <= 'z') #define ISUPPER(ch) ('A' <= (ch) && (ch) <= 'Z') #define ISALNUM(ch) (ISLOWER(ch) || ISUPPER(ch) || ISDIGIT(ch)) static inline void render_verbatim(MD_HTML* r, const MD_CHAR* text, MD_SIZE size) { r->process_output(text, size, r->userdata); } /* Keep this as a macro. Most compiler should then be smart enough to replace * the strlen() call with a compile-time constant if the string is a C literal. */ #define RENDER_VERBATIM(r, verbatim) \ render_verbatim((r), (verbatim), (MD_SIZE) (strlen(verbatim))) static void render_html_escaped(MD_HTML* r, const MD_CHAR* data, MD_SIZE size) { MD_OFFSET beg = 0; MD_OFFSET off = 0; /* Some characters need to be escaped in normal HTML text. */ #define NEED_HTML_ESC(ch) (r->escape_map[(unsigned char)(ch)] & NEED_HTML_ESC_FLAG) while(1) { /* Optimization: Use some loop unrolling. */ while(off + 3 < size && !NEED_HTML_ESC(data[off+0]) && !NEED_HTML_ESC(data[off+1]) && !NEED_HTML_ESC(data[off+2]) && !NEED_HTML_ESC(data[off+3])) off += 4; while(off < size && !NEED_HTML_ESC(data[off])) off++; if(off > beg) render_verbatim(r, data + beg, off - beg); if(off < size) { switch(data[off]) { case '&': RENDER_VERBATIM(r, "&"); break; case '<': RENDER_VERBATIM(r, "<"); break; case '>': RENDER_VERBATIM(r, ">"); break; case '"': RENDER_VERBATIM(r, """); break; } off++; } else { break; } beg = off; } } static void render_url_escaped(MD_HTML* r, const MD_CHAR* data, MD_SIZE size) { static const MD_CHAR hex_chars[] = "0123456789ABCDEF"; MD_OFFSET beg = 0; MD_OFFSET off = 0; /* Some characters need to be escaped in URL attributes. */ #define NEED_URL_ESC(ch) (r->escape_map[(unsigned char)(ch)] & NEED_URL_ESC_FLAG) while(1) { while(off < size && !NEED_URL_ESC(data[off])) off++; if(off > beg) render_verbatim(r, data + beg, off - beg); if(off < size) { char hex[3]; switch(data[off]) { case '&': RENDER_VERBATIM(r, "&"); break; default: hex[0] = '%'; hex[1] = hex_chars[((unsigned)data[off] >> 4) & 0xf]; hex[2] = hex_chars[((unsigned)data[off] >> 0) & 0xf]; render_verbatim(r, hex, 3); break; } off++; } else { break; } beg = off; } } static unsigned hex_val(char ch) { if('0' <= ch && ch <= '9') return ch - '0'; if('A' <= ch && ch <= 'Z') return ch - 'A' + 10; else return ch - 'a' + 10; } static void render_utf8_codepoint(MD_HTML* r, unsigned codepoint, void (*fn_append)(MD_HTML*, const MD_CHAR*, MD_SIZE)) { static const MD_CHAR utf8_replacement_char[] = { 0xef, 0xbf, 0xbd }; unsigned char utf8[4]; size_t n; if(codepoint <= 0x7f) { n = 1; utf8[0] = codepoint; } else if(codepoint <= 0x7ff) { n = 2; utf8[0] = 0xc0 | ((codepoint >> 6) & 0x1f); utf8[1] = 0x80 + ((codepoint >> 0) & 0x3f); } else if(codepoint <= 0xffff) { n = 3; utf8[0] = 0xe0 | ((codepoint >> 12) & 0xf); utf8[1] = 0x80 + ((codepoint >> 6) & 0x3f); utf8[2] = 0x80 + ((codepoint >> 0) & 0x3f); } else { n = 4; utf8[0] = 0xf0 | ((codepoint >> 18) & 0x7); utf8[1] = 0x80 + ((codepoint >> 12) & 0x3f); utf8[2] = 0x80 + ((codepoint >> 6) & 0x3f); utf8[3] = 0x80 + ((codepoint >> 0) & 0x3f); } if(0 < codepoint && codepoint <= 0x10ffff) fn_append(r, (char*)utf8, (MD_SIZE)n); else fn_append(r, utf8_replacement_char, 3); } /* Translate entity to its UTF-8 equivalent, or output the verbatim one * if such entity is unknown (or if the translation is disabled). */ static void render_entity(MD_HTML* r, const MD_CHAR* text, MD_SIZE size, void (*fn_append)(MD_HTML*, const MD_CHAR*, MD_SIZE)) { if(r->flags & MD_HTML_FLAG_VERBATIM_ENTITIES) { render_verbatim(r, text, size); return; } /* We assume UTF-8 output is what is desired. */ if(size > 3 && text[1] == '#') { unsigned codepoint = 0; if(text[2] == 'x' || text[2] == 'X') { /* Hexadecimal entity (e.g. "�")). */ MD_SIZE i; for(i = 3; i < size-1; i++) codepoint = 16 * codepoint + hex_val(text[i]); } else { /* Decimal entity (e.g. "&1234;") */ MD_SIZE i; for(i = 2; i < size-1; i++) codepoint = 10 * codepoint + (text[i] - '0'); } render_utf8_codepoint(r, codepoint, fn_append); return; } else { /* Named entity (e.g. " "). */ const struct entity* ent; ent = entity_lookup(text, size); if(ent != NULL) { render_utf8_codepoint(r, ent->codepoints[0], fn_append); if(ent->codepoints[1]) render_utf8_codepoint(r, ent->codepoints[1], fn_append); return; } } fn_append(r, text, size); } static void render_attribute(MD_HTML* r, const MD_ATTRIBUTE* attr, void (*fn_append)(MD_HTML*, const MD_CHAR*, MD_SIZE)) { int i; for(i = 0; attr->substr_offsets[i] < attr->size; i++) { MD_TEXTTYPE type = attr->substr_types[i]; MD_OFFSET off = attr->substr_offsets[i]; MD_SIZE size = attr->substr_offsets[i+1] - off; const MD_CHAR* text = attr->text + off; switch(type) { case MD_TEXT_NULLCHAR: render_utf8_codepoint(r, 0x0000, render_verbatim); break; case MD_TEXT_ENTITY: render_entity(r, text, size, fn_append); break; default: fn_append(r, text, size); break; } } } static void render_open_ol_block(MD_HTML* r, const MD_BLOCK_OL_DETAIL* det) { char buf[64]; if(det->start == 1) { RENDER_VERBATIM(r, "
    \n"); return; } snprintf(buf, sizeof(buf), "
      \n", det->start); RENDER_VERBATIM(r, buf); } static void render_open_li_block(MD_HTML* r, const MD_BLOCK_LI_DETAIL* det) { if(det->is_task) { RENDER_VERBATIM(r, "
    1. " "task_mark == 'x' || det->task_mark == 'X') RENDER_VERBATIM(r, " checked"); RENDER_VERBATIM(r, ">"); } else { RENDER_VERBATIM(r, "
    2. "); } } static void render_open_code_block(MD_HTML* r, const MD_BLOCK_CODE_DETAIL* det) { RENDER_VERBATIM(r, "
      lang.text != NULL) {
              RENDER_VERBATIM(r, " class=\"language-");
              render_attribute(r, &det->lang, render_html_escaped);
              RENDER_VERBATIM(r, "\"");
          }
      
          RENDER_VERBATIM(r, ">");
      }
      
      static void
      render_open_td_block(MD_HTML* r, const MD_CHAR* cell_type, const MD_BLOCK_TD_DETAIL* det)
      {
          RENDER_VERBATIM(r, "<");
          RENDER_VERBATIM(r, cell_type);
      
          switch(det->align) {
              case MD_ALIGN_LEFT:     RENDER_VERBATIM(r, " align=\"left\">"); break;
              case MD_ALIGN_CENTER:   RENDER_VERBATIM(r, " align=\"center\">"); break;
              case MD_ALIGN_RIGHT:    RENDER_VERBATIM(r, " align=\"right\">"); break;
              default:                RENDER_VERBATIM(r, ">"); break;
          }
      }
      
      static void
      render_open_a_span(MD_HTML* r, const MD_SPAN_A_DETAIL* det)
      {
          RENDER_VERBATIM(r, "href, render_url_escaped);
      
          if(det->title.text != NULL) {
              RENDER_VERBATIM(r, "\" title=\"");
              render_attribute(r, &det->title, render_html_escaped);
          }
      
          RENDER_VERBATIM(r, "\">");
      }
      
      static void
      render_open_img_span(MD_HTML* r, const MD_SPAN_IMG_DETAIL* det)
      {
          RENDER_VERBATIM(r, "src, render_url_escaped);
      
          RENDER_VERBATIM(r, "\" alt=\"");
      
          r->image_nesting_level++;
      }
      
      static void
      render_close_img_span(MD_HTML* r, const MD_SPAN_IMG_DETAIL* det)
      {
          if(det->title.text != NULL) {
              RENDER_VERBATIM(r, "\" title=\"");
              render_attribute(r, &det->title, render_html_escaped);
          }
      
          RENDER_VERBATIM(r, (r->flags & MD_HTML_FLAG_XHTML) ? "\" />" : "\">");
      
          r->image_nesting_level--;
      }
      
      static void
      render_open_wikilink_span(MD_HTML* r, const MD_SPAN_WIKILINK_DETAIL* det)
      {
          RENDER_VERBATIM(r, "target, render_html_escaped);
      
          RENDER_VERBATIM(r, "\">");
      }
      
      
      /**************************************
       ***  HTML renderer implementation  ***
       **************************************/
      
      static int
      enter_block_callback(MD_BLOCKTYPE type, void* detail, void* userdata)
      {
          static const MD_CHAR* head[6] = { "

      ", "

      ", "

      ", "

      ", "

      ", "
      " }; MD_HTML* r = (MD_HTML*) userdata; switch(type) { case MD_BLOCK_DOC: /* noop */ break; case MD_BLOCK_QUOTE: RENDER_VERBATIM(r, "
      \n"); break; case MD_BLOCK_UL: RENDER_VERBATIM(r, "