You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

94 lines
1.9 KiB
Go

5 months ago
package main
import (
"crypto/tls"
"fmt"
"net/http"
"strings"
"time"
"golang.org/x/net/html"
)
// Function to scrape data from a URL
func scrapeData(url string) ([]string, error) {
client := &http.Client{
Timeout: 4 * time.Second,
Transport: &http.Transport{
TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
},
}
resp, err := client.Get(url)
if err != nil {
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("error fetching URL: %s", resp.Status)
}
doc, err := html.Parse(resp.Body)
if err != nil {
return nil, err
}
var title, metaTags, headers []string
var f func(*html.Node)
f = func(n *html.Node) {
if n.Type == html.ElementNode {
switch n.Data {
case "title":
if n.FirstChild != nil {
titleText := strings.TrimSpace(n.FirstChild.Data)
if titleText != "" {
title = append(title, titleText)
}
}
case "meta":
var name, content string
for _, attr := range n.Attr {
if attr.Key == "name" || attr.Key == "property" {
name = attr.Val
}
if attr.Key == "content" {
content = attr.Val
}
}
if (name == "keywords" || name == "description" ||
name == "og:site_name" || name == "og:description" || name == "og:title") &&
content != "" {
metaTags = append(metaTags, content)
}
case "h1", "h2", "h3":
if n.FirstChild != nil {
headerText := strings.TrimSpace(n.FirstChild.Data)
if headerText != "" {
headers = append(headers, headerText)
}
}
}
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
f(c)
}
}
f(doc)
// Combine title, metaTags, and headers into a single result slice
var parts []string
if len(title) > 0 {
parts = append(parts, title...)
}
if len(metaTags) > 0 {
parts = append(parts, metaTags...)
}
if len(headers) > 0 {
parts = append(parts, headers...)
}
return parts, nil
}