blob: 2087a55cb62f0dd5b4c2e739090d78bd7a75e1b0 (
plain) (
blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
|
package parse
import (
"bufio"
"bytes"
"io"
"regexp"
"strings"
)
var (
submatch = `(https?:\/\/[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,10}\b(?:[-a-zA-Z0-9()@:%_\+.~#?&\/=]*))`
httpRe = regexp.MustCompile("\"" + submatch + "\"" + "|" + "\\(" + submatch + "\\)" + "|" + "<" + submatch + ">" + "|" + submatch)
)
// HttpLinks searches a reader for a http link and returns a copy of the
// reader and a slice with links.
func HttpLinks(r io.Reader) (io.Reader, []string) {
var buf bytes.Buffer
tr := io.TeeReader(r, &buf)
scanner := bufio.NewScanner(tr)
linkMap := make(map[string]struct{})
for scanner.Scan() {
line := scanner.Text()
if !strings.Contains(line, "http") {
continue
}
for _, word := range strings.Fields(line) {
if links := httpRe.FindStringSubmatch(word); len(links) > 0 {
for _, l := range links[1:] {
if l != "" {
linkMap[strings.TrimSpace(l)] = struct{}{}
}
}
}
}
}
results := []string{}
for link := range linkMap {
results = append(results, link)
}
return &buf, results
}
|