You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
94 lines
1.9 KiB
Go
94 lines
1.9 KiB
Go
package main
|
|
|
|
import (
|
|
"crypto/tls"
|
|
"fmt"
|
|
"net/http"
|
|
"strings"
|
|
"time"
|
|
|
|
"golang.org/x/net/html"
|
|
)
|
|
|
|
// Function to scrape data from a URL
|
|
func scrapeData(url string) ([]string, error) {
|
|
client := &http.Client{
|
|
Timeout: 4 * time.Second,
|
|
Transport: &http.Transport{
|
|
TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
|
|
},
|
|
}
|
|
|
|
resp, err := client.Get(url)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
if resp.StatusCode != http.StatusOK {
|
|
return nil, fmt.Errorf("error fetching URL: %s", resp.Status)
|
|
}
|
|
|
|
doc, err := html.Parse(resp.Body)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
var title, metaTags, headers []string
|
|
|
|
var f func(*html.Node)
|
|
f = func(n *html.Node) {
|
|
if n.Type == html.ElementNode {
|
|
switch n.Data {
|
|
case "title":
|
|
if n.FirstChild != nil {
|
|
titleText := strings.TrimSpace(n.FirstChild.Data)
|
|
if titleText != "" {
|
|
title = append(title, titleText)
|
|
}
|
|
}
|
|
case "meta":
|
|
var name, content string
|
|
for _, attr := range n.Attr {
|
|
if attr.Key == "name" || attr.Key == "property" {
|
|
name = attr.Val
|
|
}
|
|
if attr.Key == "content" {
|
|
content = attr.Val
|
|
}
|
|
}
|
|
if (name == "keywords" || name == "description" ||
|
|
name == "og:site_name" || name == "og:description" || name == "og:title") &&
|
|
content != "" {
|
|
metaTags = append(metaTags, content)
|
|
}
|
|
case "h1", "h2", "h3":
|
|
if n.FirstChild != nil {
|
|
headerText := strings.TrimSpace(n.FirstChild.Data)
|
|
if headerText != "" {
|
|
headers = append(headers, headerText)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
for c := n.FirstChild; c != nil; c = c.NextSibling {
|
|
f(c)
|
|
}
|
|
}
|
|
f(doc)
|
|
|
|
// Combine title, metaTags, and headers into a single result slice
|
|
var parts []string
|
|
if len(title) > 0 {
|
|
parts = append(parts, title...)
|
|
}
|
|
if len(metaTags) > 0 {
|
|
parts = append(parts, metaTags...)
|
|
}
|
|
if len(headers) > 0 {
|
|
parts = append(parts, headers...)
|
|
}
|
|
|
|
return parts, nil
|
|
}
|