Files
platiparser/plati/goodscategory.go
2023-12-28 19:29:04 +03:00

233 lines
6.7 KiB
Go

package plati
import (
"bytes"
"context"
"encoding/xml"
"fmt"
"io"
"log"
"math/rand"
"net/http"
"net/http/httputil"
"strconv"
"strings"
"github.com/PuerkitoBio/goquery"
)
type GoodsRequest struct {
XMLName xml.Name `xml:"digiseller.request"`
GuidAgent string `xml:"guid_agent"`
IDSection int `xml:"id_section"`
Lang string `xml:"lang"`
Encoding string `xml:"encoding"`
Page int `xml:"page"`
Rows int `xml:"rows"`
Currency string `xml:"currency"`
Order string `xml:"order"`
}
type GoodsResponse struct {
XMLName xml.Name `xml:"digiseller.response"`
Retval string `xml:"retval"`
Retdesc string `xml:"retdesc"`
IDSection int `xml:"id_section"`
NameSection string `xml:"name_section"`
Page int `xml:"page"`
Order string `xml:"order"`
CntGoods string `xml:"cnt_goods"`
Pages int `xml:"pages"`
Rows struct {
Cnt int `xml:"cnt,attr"`
Row []struct {
ID string `xml:"id,attr"`
IDGoods int `xml:"id_goods"`
NameGoods string `xml:"name_goods"`
Price Stupidfloat `xml:"price"`
Currency string `xml:"currency"`
Discount string `xml:"discount"`
Gift string `xml:"gift"`
Reward string `xml:"reward"`
IDSeller int `xml:"id_seller"`
NameSeller string `xml:"name_seller"`
Rating Stupidfloat `xml:"rating"`
Summpay Stupidfloat `xml:"summpay"`
SaleInfo struct {
CommonBasePrice string `xml:"common_base_price"`
CommonPriceUsd string `xml:"common_price_usd"`
CommonPriceRur string `xml:"common_price_rur"`
CommonPriceEur string `xml:"common_price_eur"`
CommonPriceUah string `xml:"common_price_uah"`
SalePercent string `xml:"sale_percent"`
} `xml:"sale_info"`
Statistics struct {
CntSell int `xml:"cnt_sell"`
CntSellHidden int `xml:"cnt_sell_hidden"`
CntReturn int `xml:"cnt_return"`
CntReturnHidden int `xml:"cnt_return_hidden"`
CntGoodresponses int `xml:"cnt_goodresponses"`
CntGoodresponsesHidden int `xml:"cnt_goodresponses_hidden"`
CntBadresponses int `xml:"cnt_badresponses"`
CntBadresponsesHidden int `xml:"cnt_badresponses_hidden"`
} `xml:"statistics"`
} `xml:"row"`
} `xml:"rows"`
}
func (c *Client) GetGoods(ctx context.Context, idSection int, lang string, page int, rows int, currency string) (*GoodsResponse, error) {
reqBody, _ := xml.Marshal(GoodsRequest{
GuidAgent: c.guidAgent,
IDSection: idSection,
Lang: lang,
Encoding: "utf-8",
Page: page,
Rows: rows,
Currency: currency,
})
req, _ := http.NewRequestWithContext(ctx, http.MethodPost, "https://plati.io/xml/goods.asp", bytes.NewReader(reqBody))
dump, _ := httputil.DumpRequestOut(req, true)
log.Printf("%s\n", string(dump))
resp, err := c.httpCli().Do(req)
if err != nil {
return nil, fmt.Errorf("http do: %w", err)
}
// dump, err = httputil.DumpResponse(resp, true)
// log.Printf("%d %s", resp.StatusCode, string(dump))
defer resp.Body.Close()
if resp.StatusCode != 200 {
return nil, fmt.Errorf("status code: %d", resp.StatusCode)
}
out := GoodsResponse{}
if err := xml.NewDecoder(resp.Body).Decode(&out); err != nil {
return nil, fmt.Errorf("xml decode GoodsResponse: %w", err)
}
return &out, nil
}
func (c *Client) GetBlockGoodsCategory(ctx context.Context, idC int, idR int, sort string, page int, rows int, curr string, lang string) ([]*Good, error) {
u := fmt.Sprintf("https://plati.market/asp/block_goods_category.asp?preorders=0&id_cb=0&id_c=%d&id_r=%d&sort=%s&page=%d&rows=%d&curr=%s&pp_only=false&lang=ru-RU&rnd=%f", idC, idR, sort, page, rows, curr, rand.Float32())
req, err := http.NewRequestWithContext(ctx, "GET", u, http.NoBody)
if err != nil {
return nil, fmt.Errorf("new request: %w", err)
}
dump, err := httputil.DumpRequestOut(req, false)
log.Printf("%s\n", string(dump))
resp, err := c.doWithRetry(req, 200)
if err != nil {
return nil, fmt.Errorf("http do: %w", err)
}
// dump, err = httputil.DumpResponse(resp, true)
// log.Printf("%d %s", resp.StatusCode, string(dump))
defer resp.Body.Close()
if resp.StatusCode != 200 {
return nil, fmt.Errorf("status code: %d", resp.StatusCode)
}
goods, err := parseGoodsCategory(resp.Body)
if err != nil {
return nil, fmt.Errorf("parse goods category: %w", err)
}
return goods, nil
}
func (c *Client) doWithRetry(req *http.Request, expectedCode int) (*http.Response, error) {
attempt := 0
maxAttempts := 5
for {
attempt++
resp, err := c.httpCli().Do(req)
if err != nil {
if attempt < maxAttempts {
continue
} else {
return resp, fmt.Errorf("after %d attempts: %w", attempt, err)
}
}
if resp.StatusCode != expectedCode {
defer resp.Body.Close()
dump, _ := httputil.DumpResponse(resp, true)
if attempt < maxAttempts {
continue
} else {
return resp, fmt.Errorf("after %d attempts, status: %d, body: %s", attempt, resp.StatusCode, string(dump))
}
}
return resp, nil
}
}
type Good struct {
Name string
GoodLink string
Seller string
SellerLink string
SellerRating int
Sold int
Price float64
}
func parseGoodsCategory(r io.Reader) ([]*Good, error) {
doc, err := goquery.NewDocumentFromReader(r)
if err != nil {
return nil, fmt.Errorf("goquery new document: %w", err)
}
goods := make([]*Good, 0)
doc.Find("table.goods-table-category>tbody>tr").Each(func(i int, tr *goquery.Selection) {
good := Good{}
goods = append(goods, &good)
tr.Find("td").Each(func(i int, td *goquery.Selection) {
if td.HasClass("product-title") {
titleBlock := td.Find("a").First()
good.Name = titleBlock.Text()
good.GoodLink, _ = titleBlock.Attr("href")
}
if td.HasClass("product-merchant") {
titleBlock := td.Find("a").First()
good.Seller = titleBlock.Text()
good.SellerLink, _ = titleBlock.Attr("href")
spanBlock := td.Find("span").First()
sellerRating, err := strconv.ParseInt(spanBlock.Text(), 10, 64)
if err != nil {
log.Printf("cannot parse seller rating %s", spanBlock.Text())
}
good.SellerRating = int(sellerRating)
}
if td.HasClass("product-sold") {
sold, err := strconv.ParseInt(td.Text(), 10, 64)
if err != nil {
log.Printf("cannot parse product sold %s", td.Text())
}
good.Sold = int(sold)
}
if td.HasClass("product-price") {
productPriceText := strings.Fields(td.Text())[0]
productPrice, err := strconv.ParseFloat(productPriceText, 64)
if err != nil {
log.Printf("cannot parse product price %s: %v", td.Text(), err)
}
good.Price = productPrice
}
})
})
return goods, nil
}