From 3bbc6a0baaf0c4b47e8cdbfa44ed610929146a08 Mon Sep 17 00:00:00 2001 From: dilap54 Date: Wed, 27 Dec 2023 00:15:55 +0300 Subject: [PATCH] add parsing sumpays --- cmd/cli/main.go | 2 +- cmd/cli/plati.go | 38 +------- cmd/cli/sumpay.go | 90 +++++++++++++++++ gorm/digi_good.go | 21 ++++ internal/category/default.go | 41 ++++++++ .../20231226235948_digiseller_goods.sql | 21 ++++ plati/client.go | 14 +-- plati/goodscategory.go | 97 +++++++++++++++++++ plati/stupidfloat.go | 26 +++++ 9 files changed, 308 insertions(+), 42 deletions(-) create mode 100644 cmd/cli/sumpay.go create mode 100644 gorm/digi_good.go create mode 100644 internal/category/default.go create mode 100644 migrations/20231226235948_digiseller_goods.sql create mode 100644 plati/stupidfloat.go diff --git a/cmd/cli/main.go b/cmd/cli/main.go index 1b8f2b1..74d68f9 100644 --- a/cmd/cli/main.go +++ b/cmd/cli/main.go @@ -16,7 +16,7 @@ var commands = make([]*cli.Command, 0) func main() { s := &cli.App{ - Name: "ricapi", + Name: "platiparser", Commands: commands, Before: func(c *cli.Context) error { godotenv.Load(".env") diff --git a/cmd/cli/plati.go b/cmd/cli/plati.go index 5293998..243a268 100644 --- a/cmd/cli/plati.go +++ b/cmd/cli/plati.go @@ -6,11 +6,11 @@ import ( "fmt" "log" "os" - "strings" "time" "gitea.home.4it.me/dilap54/platiparser/gorm" "gitea.home.4it.me/dilap54/platiparser/healthbeat" + "gitea.home.4it.me/dilap54/platiparser/internal/category" "gitea.home.4it.me/dilap54/platiparser/plati" "gitea.home.4it.me/dilap54/platiparser/proxies" uuid "github.com/satori/go.uuid" @@ -27,8 +27,8 @@ var platiCommand = &cli.Command{ Action: func(c *cli.Context) error { timeStart := time.Now() - categories := openCategories("./categories.json").Content - categories = filterBySubstring("Gift", categories) + categories := category.OpenCategories("./categories.json").Content + categories = category.FilterBySubstring("Gift", categories) ctx := context.Background() @@ -101,22 +101,6 @@ func convertGoodsToGorm(cat *plati.Category, goods []*plati.Good) []*gorm.Good { return out } -func filterBySubstring(substring string, categories plati.Categories) plati.Categories { - out := make(plati.Categories, 0) - for _, c := range categories { - if len(c.Children) > 0 { - out = append(out, filterBySubstring(substring, c.Children)...) - continue - } - - if strings.Contains(c.FlatName, substring) { - out = append(out, c) - } - } - - return out -} - func printNames(categories plati.Categories) { for _, c := range categories { if len(c.Children) > 0 { @@ -126,19 +110,3 @@ func printNames(categories plati.Categories) { fmt.Printf("%s\n", c.FlatName) } } - -func openCategories(fileName string) plati.CategoriesResponse { - f, err := os.OpenFile(fileName, os.O_RDONLY, 0400) - if err != nil { - panic(err) - } - defer f.Close() - - out := plati.CategoriesResponse{} - if err := json.NewDecoder(f).Decode(&out); err != nil { - panic(err) - } - out.Content.FlatNames("") - out.Content.FixParentID(0) - return out -} diff --git a/cmd/cli/sumpay.go b/cmd/cli/sumpay.go new file mode 100644 index 0000000..c659f84 --- /dev/null +++ b/cmd/cli/sumpay.go @@ -0,0 +1,90 @@ +package main + +import ( + "context" + "fmt" + "log" + "time" + + "gitea.home.4it.me/dilap54/platiparser/gorm" + "gitea.home.4it.me/dilap54/platiparser/internal/category" + "gitea.home.4it.me/dilap54/platiparser/plati" + "gitea.home.4it.me/dilap54/platiparser/proxies" + uuid "github.com/satori/go.uuid" + "github.com/urfave/cli/v2" +) + +func init() { + commands = append(commands, sumpayCommand) +} + +var sumpayCommand = &cli.Command{ + Name: "sumpay", + Action: func(c *cli.Context) error { + categories := category.OpenCategories("./categories.json").Content + categories = category.FilterBySubstring("Gift", categories) + + cats := map[int]struct{}{} + for _, c := range categories { + cats[c.ParentID] = struct{}{} + } + catsArr := make([]int, 0, len(cats)) + for cId := range cats { + catsArr = append(catsArr, cId) + } + + ctx := context.Background() + + db := gorm.GetDB() + platiCli := plati.New(proxies.Default()) + + // printNames(categories) + + for i, categoryId := range catsArr { + for page := 1; page < 100; page++ { + log.Printf("fetching goods [%d/%d], page %d for %d\n", i, len(catsArr), page, categoryId) + goods, err := platiCli.GetGoods(ctx, categoryId, "ru-RU", page, 500, "RUR") + if err != nil { + return fmt.Errorf("GetGoods: %w", err) + } + log.Printf("inserting %d goods to DB", len(goods.Rows.Row)) + + if len(goods.Rows.Row) < 500 { + break + } + + gormGoods := convertDigiGoodsToGorm(goods) + if err := db.Create(gormGoods).Error; err != nil { + return fmt.Errorf("db Create: %w", err) + } + log.Printf("inserted %d goods to DB", len(gormGoods)) + } + + } + + return nil + }, +} + +func convertDigiGoodsToGorm(goods *plati.GoodsResponse) []*gorm.DigiGood { + out := make([]*gorm.DigiGood, 0, len(goods.Rows.Row)) + + for _, g := range goods.Rows.Row { + gormGood := gorm.DigiGood{ + ID: uuid.NewV4().String(), + IDGoods: g.IDGoods, + Name: g.NameGoods, + IDSection: goods.IDSection, + SellerID: g.IDSeller, + Sellerrating: g.Rating.Float64(), + Sold: g.Statistics.CntSell, + Returned: g.Statistics.CntReturn, + Price: g.Price.Float64(), + Sumpay: g.Summpay.Float64(), + CreatedAt: time.Now(), + } + out = append(out, &gormGood) + } + + return out +} diff --git a/gorm/digi_good.go b/gorm/digi_good.go new file mode 100644 index 0000000..5552a6c --- /dev/null +++ b/gorm/digi_good.go @@ -0,0 +1,21 @@ +package gorm + +import "time" + +type DigiGood struct { + ID string + IDGoods int + Name string + IDSection int + SellerID int + Sellerrating float64 + Sold int + Returned int + Price float64 + Sumpay float64 + CreatedAt time.Time +} + +func (DigiGood) TableName() string { + return "digi_goods" +} diff --git a/internal/category/default.go b/internal/category/default.go new file mode 100644 index 0000000..43d4cdd --- /dev/null +++ b/internal/category/default.go @@ -0,0 +1,41 @@ +package category + +import ( + "encoding/json" + "os" + "strings" + + "gitea.home.4it.me/dilap54/platiparser/plati" +) + +func FilterBySubstring(substring string, categories plati.Categories) plati.Categories { + out := make(plati.Categories, 0) + for _, c := range categories { + if len(c.Children) > 0 { + out = append(out, FilterBySubstring(substring, c.Children)...) + continue + } + + if strings.Contains(c.FlatName, substring) { + out = append(out, c) + } + } + + return out +} + +func OpenCategories(fileName string) plati.CategoriesResponse { + f, err := os.OpenFile(fileName, os.O_RDONLY, 0400) + if err != nil { + panic(err) + } + defer f.Close() + + out := plati.CategoriesResponse{} + if err := json.NewDecoder(f).Decode(&out); err != nil { + panic(err) + } + out.Content.FlatNames("") + out.Content.FixParentID(0) + return out +} diff --git a/migrations/20231226235948_digiseller_goods.sql b/migrations/20231226235948_digiseller_goods.sql new file mode 100644 index 0000000..e308a93 --- /dev/null +++ b/migrations/20231226235948_digiseller_goods.sql @@ -0,0 +1,21 @@ +-- +goose Up +-- +goose StatementBegin +CREATE TABLE digi_goods ( + id VARCHAR NOT NULL PRIMARY KEY, + id_goods INTEGER NOT NULL, + name VARCHAR NOT NULL DEFAULT '', + id_section INTEGER NOT NULL, + seller_id INTEGER NOT NULL DEFAULT 0, + sellerrating DECIMAL(15, 2) NOT NULL DEFAULT 0, + sold INTEGER NOT NULL DEFAULT 0, + returned INTEGER NOT NULL DEFAULT 0, + price DECIMAL(15, 2) NOT NULL DEFAULT 0, + sumpay DECIMAL(15, 2) NOT NULL DEFAULT 0, + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW() +); +-- +goose StatementEnd + +-- +goose Down +-- +goose StatementBegin +DROP TABLE digi_goods; +-- +goose StatementEnd diff --git a/plati/client.go b/plati/client.go index aab8dfe..041e6d8 100644 --- a/plati/client.go +++ b/plati/client.go @@ -6,16 +6,18 @@ import ( ) type Client struct { - httpClis []*http.Client - token string - sellerID int + httpClis []*http.Client + token string + sellerID int + guidAgent string } func New(clients []*http.Client) *Client { return &Client{ - httpClis: clients, - token: "7C731D89FED84B479B89F24F81BB8AF2", - sellerID: 1209592, + httpClis: clients, + token: "7C731D89FED84B479B89F24F81BB8AF2", + sellerID: 1209592, + guidAgent: "98232A21641B4DF0B13FE96A48CCAD9E", } } diff --git a/plati/goodscategory.go b/plati/goodscategory.go index b2d5c07..8559c6e 100644 --- a/plati/goodscategory.go +++ b/plati/goodscategory.go @@ -1,7 +1,9 @@ package plati import ( + "bytes" "context" + "encoding/xml" "fmt" "io" "log" @@ -14,6 +16,101 @@ import ( "github.com/PuerkitoBio/goquery" ) +type GoodsRequest struct { + XMLName xml.Name `xml:"digiseller.request"` + GuidAgent string `xml:"guid_agent"` + IDSection int `xml:"id_section"` + Lang string `xml:"lang"` + Encoding string `xml:"encoding"` + Page int `xml:"page"` + Rows int `xml:"rows"` + Currency string `xml:"currency"` + Order string `xml:"order"` +} + +type GoodsResponse struct { + XMLName xml.Name `xml:"digiseller.response"` + Retval string `xml:"retval"` + Retdesc string `xml:"retdesc"` + IDSection int `xml:"id_section"` + NameSection string `xml:"name_section"` + Page string `xml:"page"` + Order string `xml:"order"` + CntGoods string `xml:"cnt_goods"` + Pages string `xml:"pages"` + Rows struct { + Cnt int `xml:"cnt,attr"` + Row []struct { + ID string `xml:"id,attr"` + IDGoods int `xml:"id_goods"` + NameGoods string `xml:"name_goods"` + Price Stupidfloat `xml:"price"` + Currency string `xml:"currency"` + Discount string `xml:"discount"` + Gift string `xml:"gift"` + Reward string `xml:"reward"` + IDSeller int `xml:"id_seller"` + NameSeller string `xml:"name_seller"` + Rating Stupidfloat `xml:"rating"` + Summpay Stupidfloat `xml:"summpay"` + SaleInfo struct { + CommonBasePrice string `xml:"common_base_price"` + CommonPriceUsd string `xml:"common_price_usd"` + CommonPriceRur string `xml:"common_price_rur"` + CommonPriceEur string `xml:"common_price_eur"` + CommonPriceUah string `xml:"common_price_uah"` + SalePercent string `xml:"sale_percent"` + } `xml:"sale_info"` + Statistics struct { + CntSell int `xml:"cnt_sell"` + CntSellHidden int `xml:"cnt_sell_hidden"` + CntReturn int `xml:"cnt_return"` + CntReturnHidden int `xml:"cnt_return_hidden"` + CntGoodresponses int `xml:"cnt_goodresponses"` + CntGoodresponsesHidden int `xml:"cnt_goodresponses_hidden"` + CntBadresponses int `xml:"cnt_badresponses"` + CntBadresponsesHidden int `xml:"cnt_badresponses_hidden"` + } `xml:"statistics"` + } `xml:"row"` + } `xml:"rows"` +} + +func (c *Client) GetGoods(ctx context.Context, idSection int, lang string, page int, rows int, currency string) (*GoodsResponse, error) { + reqBody, _ := xml.Marshal(GoodsRequest{ + GuidAgent: c.guidAgent, + IDSection: idSection, + Lang: lang, + Encoding: "utf-8", + Page: page, + Rows: rows, + Currency: currency, + }) + req, _ := http.NewRequestWithContext(ctx, http.MethodPost, "https://plati.io/xml/goods.asp", bytes.NewReader(reqBody)) + dump, _ := httputil.DumpRequestOut(req, true) + log.Printf("%s\n", string(dump)) + + resp, err := c.httpCli().Do(req) + if err != nil { + return nil, fmt.Errorf("http do: %w", err) + } + + // dump, err = httputil.DumpResponse(resp, true) + // log.Printf("%d %s", resp.StatusCode, string(dump)) + + defer resp.Body.Close() + + if resp.StatusCode != 200 { + return nil, fmt.Errorf("status code: %d", resp.StatusCode) + } + + out := GoodsResponse{} + if err := xml.NewDecoder(resp.Body).Decode(&out); err != nil { + return nil, fmt.Errorf("xml decode GoodsResponse: %w", err) + } + + return &out, nil +} + func (c *Client) GetBlockGoodsCategory(ctx context.Context, idC int, idR int, sort string, page int, rows int, curr string, lang string) ([]*Good, error) { u := fmt.Sprintf("https://plati.market/asp/block_goods_category.asp?preorders=0&id_cb=0&id_c=%d&id_r=%d&sort=%s&page=%d&rows=%d&curr=%s&pp_only=false&lang=ru-RU&rnd=%f", idC, idR, sort, page, rows, curr, rand.Float32()) req, err := http.NewRequestWithContext(ctx, "GET", u, http.NoBody) diff --git a/plati/stupidfloat.go b/plati/stupidfloat.go new file mode 100644 index 0000000..bb57a36 --- /dev/null +++ b/plati/stupidfloat.go @@ -0,0 +1,26 @@ +package plati + +import ( + "encoding/xml" + "strconv" + "strings" +) + +type Stupidfloat float64 + +func (f Stupidfloat) Float64() float64 { + return float64(f) +} + +func (f *Stupidfloat) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error { + floatString := "" + err := d.DecodeElement(&floatString, &start) + if err != nil { + return err + } + floatString = strings.ReplaceAll(floatString, ",", ".") + fl64, err := strconv.ParseFloat(floatString, 64) + + *f = Stupidfloat(fl64) + return nil +}