summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKristaps Dzonsons <kristaps@bsd.lv>2010-05-25 12:37:20 +0000
committerKristaps Dzonsons <kristaps@bsd.lv>2010-05-25 12:37:20 +0000
commitbd60b6e6dde1b45c862a7cb7092e5a6985629c1f (patch)
tree1b4eb8fbcedf0b30869618fd4adb0e76fcc58f91
parent67f57cab47c6371e6a42d8d6952b35485c7b0b85 (diff)
downloadmandoc-bd60b6e6dde1b45c862a7cb7092e5a6985629c1f.tar.gz
Modified version of Ingo Schwarze's patch for hyphen-breaking.
Breakable hyphens are cued in the back-ends (with ASCII_HYPH) and acted upon in term.c or ignored in html.c. Also cleaned up XML decl printing (no need for extra vars).
-rw-r--r--chars.c1
-rw-r--r--chars.h2
-rw-r--r--html.c29
-rw-r--r--libmandoc.h1
-rw-r--r--mandoc.c28
-rw-r--r--mandoc.h4
-rw-r--r--mdoc.c4
-rw-r--r--term.c23
8 files changed, 69 insertions, 23 deletions
diff --git a/chars.c b/chars.c
index cb97f480..be527323 100644
--- a/chars.c
+++ b/chars.c
@@ -23,6 +23,7 @@
#include <stdlib.h>
#include <string.h>
+#include "mandoc.h"
#include "chars.h"
#define PRINT_HI 126
diff --git a/chars.h b/chars.h
index a5b21396..a18c2880 100644
--- a/chars.h
+++ b/chars.h
@@ -17,8 +17,6 @@
#ifndef CHARS_H
#define CHARS_H
-#define ASCII_NBRSP 31 /* non-breaking space */
-
__BEGIN_DECLS
enum chars {
diff --git a/html.c b/html.c
index 292e503d..5dca90a1 100644
--- a/html.c
+++ b/html.c
@@ -29,6 +29,7 @@
#include <string.h>
#include <unistd.h>
+#include "mandoc.h"
#include "out.h"
#include "chars.h"
#include "html.h"
@@ -296,11 +297,12 @@ print_encode(struct html *h, const char *p, int norecurse)
int len, nospace;
const char *seq;
enum roffdeco deco;
+ static const char rejs[6] = { '\\', '<', '>', '&', ASCII_HYPH, '\0' };
nospace = 0;
for (; *p; p++) {
- sz = strcspn(p, "\\<>&");
+ sz = strcspn(p, rejs);
fwrite(p, 1, sz, stdout);
p += /* LINTED */
@@ -315,6 +317,15 @@ print_encode(struct html *h, const char *p, int norecurse)
} else if ('&' == *p) {
printf("&amp;");
continue;
+ } else if (ASCII_HYPH == *p) {
+ /*
+ * Note: "soft hyphens" aren't graphically
+ * displayed when not breaking the text; we want
+ * them to be displayed.
+ */
+ /*printf("&#173;");*/
+ putchar('-');
+ continue;
} else if ('\0' == *p)
break;
@@ -443,21 +454,9 @@ print_gen_decls(struct html *h)
static void
print_xmltype(struct html *h)
{
- const char *decl;
-
- switch (h->type) {
- case (HTML_XHTML_1_0_STRICT):
- decl = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>";
- break;
- default:
- decl = NULL;
- break;
- }
-
- if (NULL == decl)
- return;
- printf("%s\n", decl);
+ if (HTML_XHTML_1_0_STRICT == h->type)
+ printf("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
}
diff --git a/libmandoc.h b/libmandoc.h
index 638c8ebb..eb18dff4 100644
--- a/libmandoc.h
+++ b/libmandoc.h
@@ -30,6 +30,7 @@ time_t mandoc_a2time(int, const char *);
#define MTIME_MDOCDATE (1 << 2)
#define MTIME_ISO_8601 (1 << 3)
int mandoc_eos(const char *, size_t);
+int mandoc_hyph(const char *, const char *);
__END_DECLS
diff --git a/mandoc.c b/mandoc.c
index 43ccfb21..6801c589 100644
--- a/mandoc.c
+++ b/mandoc.c
@@ -340,3 +340,31 @@ mandoc_eos(const char *p, size_t sz)
return(0);
}
+
+
+int
+mandoc_hyph(const char *start, const char *c)
+{
+
+ /*
+ * Choose whether to break at a hyphenated character. We only
+ * do this if it's free-standing within a word.
+ */
+
+ /* Skip first/last character of buffer. */
+ if (c == start || '\0' == *(c + 1))
+ return(0);
+ /* Skip first/last character of word. */
+ if ('\t' == *(c + 1) || '\t' == *(c - 1))
+ return(0);
+ if (' ' == *(c + 1) || ' ' == *(c - 1))
+ return(0);
+ /* Skip double invocations. */
+ if ('-' == *(c + 1) || '-' == *(c - 1))
+ return(0);
+ /* Skip escapes. */
+ if ('\\' == *(c - 1))
+ return(0);
+
+ return(1);
+}
diff --git a/mandoc.h b/mandoc.h
index 15952549..956b65bf 100644
--- a/mandoc.h
+++ b/mandoc.h
@@ -17,6 +17,10 @@
#ifndef MANDOC_H
#define MANDOC_H
+#define ASCII_NBRSP 31 /* non-breaking space */
+#define ASCII_HYPH 30 /* breakable hyphen */
+
+
__BEGIN_DECLS
enum mandocerr {
diff --git a/mdoc.c b/mdoc.c
index 6b14fdd5..fa633c55 100644
--- a/mdoc.c
+++ b/mdoc.c
@@ -563,6 +563,10 @@ mdoc_ptext(struct mdoc *m, int line, char *buf, int offs)
ws = NULL;
for (c = end = buf + offs; *c; c++) {
switch (*c) {
+ case '-':
+ if (mandoc_hyph(buf + offs, c))
+ *c = ASCII_HYPH;
+ break;
case ' ':
if (NULL == ws)
ws = c;
diff --git a/term.c b/term.c
index a41162c4..03d1ec5b 100644
--- a/term.c
+++ b/term.c
@@ -138,6 +138,7 @@ term_flushln(struct termp *p)
size_t vend; /* end of word visual position on output */
size_t bp; /* visual right border position */
int j; /* temporary loop index */
+ int jhy; /* last hyphen before line overflow */
size_t maxvis, mmax;
/*
@@ -190,20 +191,23 @@ term_flushln(struct termp *p)
*/
/* LINTED */
- for ( ; j < (int)p->col; j++) {
+ for (jhy = 0; j < (int)p->col; j++) {
if ((j && ' ' == p->buf[j]) || '\t' == p->buf[j])
break;
- if (8 == p->buf[j])
- vend--;
- else
+ if (8 != p->buf[j]) {
+ if (vend > vis && vend < bp &&
+ ASCII_HYPH == p->buf[j])
+ jhy = j;
vend++;
+ } else
+ vend--;
}
/*
* Find out whether we would exceed the right margin.
* If so, break to the next line.
*/
- if (vend > bp && vis > 0) {
+ if (vend > bp && 0 == jhy && vis > 0) {
vend -= vis;
putchar('\n');
if (TERMP_NOBREAK & p->flags) {
@@ -231,6 +235,8 @@ term_flushln(struct termp *p)
/* Write out the [remaining] word. */
for ( ; i < (int)p->col; i++) {
+ if (vend > bp && jhy > 0 && i > jhy)
+ break;
if ('\t' == p->buf[i])
break;
if (' ' == p->buf[i]) {
@@ -256,7 +262,12 @@ term_flushln(struct termp *p)
p->viscol += vbl;
vbl = 0;
}
- putchar(p->buf[i]);
+
+ if (ASCII_HYPH == p->buf[i])
+ putchar('-');
+ else
+ putchar(p->buf[i]);
+
p->viscol += 1;
}
vend += vbl;