diff --git a/.DS_Store b/.DS_Store index dfb3019..b3b74c9 100644 Binary files a/.DS_Store and b/.DS_Store differ diff --git a/.gitignore b/.gitignore index dcd736f..d05df23 100644 --- a/.gitignore +++ b/.gitignore @@ -3,4 +3,6 @@ sber-scrape-win-x86.exe .github/workflows/go.yml .DS_Store /bin* -sber.csv \ No newline at end of file +sber.csv +.html +page.html diff --git a/cmd/sber-scrape/main.go b/cmd/sber-scrape/main.go index c839a8c..6f00435 100644 --- a/cmd/sber-scrape/main.go +++ b/cmd/sber-scrape/main.go @@ -14,14 +14,16 @@ import ( var ( configPath string - urlFlag string + mode string searchFlag string + urlFlag string parseURL string ) // ConfigFile func init() { flag.StringVar(&configPath, "config-path", "config/config.toml", "path to config file") + flag.StringVar(&mode, "m", "web", "mode to run in. makes HTTP requests, while searches for .html file") flag.StringVar(&searchFlag, "s", "", "search") flag.StringVar(&urlFlag, "u", "", "parse url") } @@ -51,7 +53,7 @@ func main() { } // Send a GET request to the URL and parse - if err := sparser.Start(config, parseURL); err != nil { + if err := sparser.Start(config, parseURL, mode); err != nil { log.Fatal(err) } } diff --git a/internal/sparser/base.go b/internal/sparser/base.go index 101484e..59051e1 100644 --- a/internal/sparser/base.go +++ b/internal/sparser/base.go @@ -8,16 +8,24 @@ import ( "sber-scrape/internal/store/sqlstore" ) -func Start(config *Config, url string) error { +func Start(config *Config, url string, mode string) error { store, db, err := chooseStore(config) if err != nil { log.Fatal(err) return err } defer db.Close() - if err := GetHtml(url, store); err != nil { - log.Fatal(err) + switch mode { + case "web": + if err := GetHtml(url, store); err != nil { + log.Fatal(err) + } + case "local": + if err := GetLocalHtml("page.html", store); err != nil { + log.Fatal(err) + } } + if store, ok := store.(*litestore.Store); ok { log.Print("Preparing .csv file...") CommaSeparated("sber", store) diff --git a/internal/sparser/sparser.go b/internal/sparser/sparser.go index 07a7355..74eeb5c 100644 --- a/internal/sparser/sparser.go +++ b/internal/sparser/sparser.go @@ -4,6 +4,7 @@ import ( "fmt" "log" "net/http" + "os" "regexp" "sber-scrape/internal/model" "sber-scrape/internal/store" @@ -93,3 +94,72 @@ func GetHtml(url string, store store.Store) error { }) return nil } + +func GetLocalHtml(filename string, store store.Store) error { + + file, err := os.Open(filename) + if err != nil { + log.Fatal(err) + } + doc, err := goquery.NewDocumentFromReader(file) + if err != nil { + log.Fatal(err) + } + + doc.Find(".item-block").Each(func(index int, itemBlock *goquery.Selection) { + // Helper function to extract text from an element and trim whitespace + getText := func(selector string) string { + return strings.TrimSpace( + strings.ReplaceAll( + itemBlock.Find(selector).Text(), + "\t", + "", + ), + ) + } + convInt := func(str string) (int, error) { + if str != "" { + regex := regexp.MustCompile("[^0-9]+") + result := regex.ReplaceAllString(str, "") + return strconv.Atoi(result) + } else { + return 0, nil + } + } + itemTitle := getText(".item-title") + itemPrice, _ := convInt(getText(".item-price")) + bonusAmount, _ := convInt(getText(".bonus-amount")) + bonusPercent, _ := convInt(getText(".bonus-percent")) + discount, _ := convInt(getText(".discount-percentage__value")) + + // Extract productID and link attributes + productIDText, _ := itemBlock.Find(".ddl_product_link").Attr("data-product-id") + productID, _ := convInt(productIDText) + val, _ := itemBlock.Find(".ddl_product_link").Attr("href") + link := fmt.Sprintf("%s%s", "https://megamarket.ru", val) + + p := &model.Product{ + Title: itemTitle, + Price: itemPrice, + BonusAmount: bonusAmount, + BonusPercent: bonusPercent, + Discount: discount, + ProductID: productID, + Link: link, + } + if err := store.Product().Create(p); err != nil { + log.Fatal(err) + return + } + // Print the extracted data + fmt.Println("Title: ", itemTitle) + fmt.Println("Price: ", itemPrice) + fmt.Println("SBonuses: ", bonusAmount) + fmt.Println("SBonuses %: ", bonusPercent) + fmt.Println("Discount: ", discount) + fmt.Println("Product ID", productID) + fmt.Println("URL: ", link) + // fmt.Println("-" * 10) + }) + return nil +}