aboutsummaryrefslogtreecommitdiffstats
path: root/lib/parse/hyperlinks_test.go
blob: 00a067645b5e9bf84310a56c361f536f49cb4314 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
package parse_test

import (
	"io"
	"strings"
	"testing"

	"git.sr.ht/~rjarry/aerc/lib/parse"
)

func TestHyperlinks(t *testing.T) {
	tests := []struct {
		name  string
		text  string
		links []string
	}{
		{
			name:  "http-link",
			text:  "http://aerc-mail.org",
			links: []string{"http://aerc-mail.org"},
		},
		{
			name:  "https-link",
			text:  "https://aerc-mail.org",
			links: []string{"https://aerc-mail.org"},
		},
		{
			name:  "https-link-in-text",
			text:  "text https://aerc-mail.org more text",
			links: []string{"https://aerc-mail.org"},
		},
		{
			name:  "https-link-in-parenthesis",
			text:  "text (https://aerc-mail.org) more text",
			links: []string{"https://aerc-mail.org"},
		},
		{
			name:  "https-link-in-quotes",
			text:  "text \"https://aerc-mail.org\" more text",
			links: []string{"https://aerc-mail.org"},
		},
		{
			name:  "https-link-in-angle-brackets",
			text:  "text <https://aerc-mail.org> more text",
			links: []string{"https://aerc-mail.org"},
		},
		{
			name:  "https-link-in-html",
			text:  "<a href=\"https://aerc-mail.org\">",
			links: []string{"https://aerc-mail.org"},
		},
		{
			name:  "https-link-twice",
			text:  "text https://aerc-mail.org more text https://aerc-mail.org more text",
			links: []string{"https://aerc-mail.org"},
		},
		{
			name:  "https-link-markdown",
			text:  "text [https://aerc-mail.org](https://aerc-mail.org) more text",
			links: []string{"https://aerc-mail.org"},
		},
		{
			name:  "multiple-links",
			text:  "text https://aerc-mail.org more text http://git.sr.ht/~rjarry/aerc more text",
			links: []string{"https://aerc-mail.org", "http://git.sr.ht/~rjarry/aerc"},
		},
		{
			name:  "rfc",
			text:  "text http://www.ietf.org/rfc/rfc2396.txt more text",
			links: []string{"http://www.ietf.org/rfc/rfc2396.txt"},
		},
		{
			name:  "http-with-query-and-fragment",
			text:  "text <http://example.com:8042/over/there?name=ferret#nose> more text",
			links: []string{"http://example.com:8042/over/there?name=ferret#nose"},
		},
		{
			name:  "http-with-at",
			text:  "text http://cnn.example.com&story=breaking_news@10.0.0.1/top_story.htm more text",
			links: []string{"http://cnn.example.com&story=breaking_news@10.0.0.1/top_story.htm"},
		},
		{
			name:  "https-with-fragment",
			text:  "text https://www.ics.uci.edu/pub/ietf/uri/#Related more text",
			links: []string{"https://www.ics.uci.edu/pub/ietf/uri/#Related"},
		},
		{
			name:  "https-with-query",
			text:  "text https://www.example.com/index.php?id_sezione=360&sid=3a5ebc944f41daa6f849f730f1 more text",
			links: []string{"https://www.example.com/index.php?id_sezione=360&sid=3a5ebc944f41daa6f849f730f1"},
		},
		{
			name:  "https-onedrive",
			text:  "I have a link like this in an email (I deleted a few characters here-and-there for privacy) https://1drv.ms/w/s!Ap-KLfhNxS4fRt6tIvw?e=dW8WLO",
			links: []string{"https://1drv.ms/w/s!Ap-KLfhNxS4fRt6tIvw?e=dW8WLO"},
		},
		{
			name:  "email",
			text:  "You can reach me via the somewhat strange, but nonetheless valid, email foo@baz.com",
			links: []string{"mailto:foo@baz.com"},
		},
		{
			name:  "mailto",
			text:  "You can reach me via the somewhat strange, but nonetheless valid, email mailto:bar@fooz.fr. Thank you",
			links: []string{"mailto:bar@fooz.fr"},
		},
		{
			name:  "mailto-ipv6",
			text:  "You can reach me via the somewhat strange, but nonetheless valid, email mailto:~mpldr/list@[2001:db8::7]",
			links: []string{"mailto:~mpldr/list@[2001:db8::7]"},
		},
		{
			name:  "mailto-ipv6-query",
			text:  "You can reach me via the somewhat strange, but nonetheless valid, email mailto:~mpldr/list@[2001:db8::7]?subject=whazzup%3F",
			links: []string{"mailto:~mpldr/list@[2001:db8::7]?subject=whazzup%3F"},
		},
		{
			name:  "simple email in <a href>",
			text:  `<a href="mailto:a@abc.com" rel="noopener noreferrer">`,
			links: []string{"mailto:a@abc.com"},
		},
		{
			name:  "simple email in <a> body",
			text:  `<a href="#" rel="noopener noreferrer">a@abc.com</a><br/><p>more text</p>`,
			links: []string{"mailto:a@abc.com"},
		},
		{
			name:  "emails in <a> href and body",
			text:  `<a href="mailto:a@abc.com" rel="noopener noreferrer">b@abc.com</a><br/><p>more text</p>`,
			links: []string{"mailto:a@abc.com", "mailto:b@abc.com"},
		},
		{
			name:  "email in &lt;...&gt;",
			text:  `<div>01.02.2023, 10:11, "Firstname Lastname" &lt;a@abc.com&gt;:</div>`,
			links: []string{"mailto:a@abc.com"},
		},
	}

	for i, test := range tests {
		t.Run(test.name, func(t *testing.T) {
			// make sure reader is exact copy of input reader
			reader, parsedLinks := parse.HttpLinks(strings.NewReader(test.text))
			if _, err := io.ReadAll(reader); err != nil {
				t.Skipf("could not read text: %v", err)
			}

			// check correct parsed links
			if len(parsedLinks) != len(test.links) {
				t.Errorf("different number of links: got %d but expected %d", len(parsedLinks), len(test.links))
			}
			linkMap := make(map[string]struct{})
			for _, got := range parsedLinks {
				linkMap[got] = struct{}{}
			}
			for _, expected := range test.links {
				if _, ok := linkMap[expected]; !ok {
					t.Errorf("link[%d] not parsed: %s", i, expected)
				}
			}
		})
	}
}