init parser

This commit is contained in:
2023-12-20 00:36:49 +03:00
commit 872217d845
9 changed files with 477 additions and 0 deletions

1
categories.json Normal file

File diff suppressed because one or more lines are too long

72
cmd/cli/digi/digi.go Normal file
View File

@ -0,0 +1,72 @@
package main
import (
"context"
"encoding/json"
"fmt"
"log"
"os"
"strings"
"gitea.home.4it.me/dilap54/platiparser/plati"
)
func main() {
// вытащить все подкатегории с названием steamgift
// разное/игры это тоже подкатегория
// вытащить все разделы
// для каждого для каждой игры сохранить никнейм продавца
ctx := context.Background()
platiCli := plati.New()
categories, err := platiCli.GetCategories(ctx)
if err != nil {
log.Fatal(err)
}
categories.Content.FlatNames("")
filtered := filterCategories("/Ключи и пин-коды/Игры/", categories.Content)
//json.NewEncoder(os.Stdout).Encode(categories)
for i, f := range filtered {
log.Printf("fetching [%d/%d] %s...\n", i, len(filtered), f.FlatName)
subCategories, err := platiCli.GetSubCategories(ctx, f.ID)
if err != nil {
log.Fatal(err)
}
f.Children = subCategories.Content
}
json.NewEncoder(os.Stdout).Encode(categories)
}
func filterCategories(prefix string, categories plati.Categories) plati.Categories {
out := make(plati.Categories, 0)
for _, c := range categories {
if len(c.Children) > 0 {
out = append(out, filterCategories(prefix, c.Children)...)
continue
}
if strings.HasPrefix(c.FlatName, prefix) {
out = append(out, c)
}
}
return out
}
func printCategories(nameOfParent string, categories []*plati.Category) {
for _, c := range categories {
name := ""
if len(c.Name) > 0 {
name = c.Name[0].Value
}
fmt.Printf("%s/%s\n", nameOfParent, name)
if len(c.Children) > 0 {
printCategories(nameOfParent+"/"+name, c.Children)
}
}
}

74
cmd/cli/plati/plati.go Normal file
View File

@ -0,0 +1,74 @@
package main
import (
"context"
"encoding/json"
"fmt"
"log"
"os"
"strings"
"gitea.home.4it.me/dilap54/platiparser/plati"
)
func main() {
categories := openCategories("./categories.json").Content
categories = filterBySubstring("Gift", categories)
ctx := context.Background()
platiCli := plati.New()
// printNames(categories)
for i, c := range categories {
log.Printf("fetching goods [%d/%d] for %s\n", i, len(categories), c.FlatName)
goods, err := platiCli.GetBlockGoodsCategory(ctx, c.ID, c.ParentID, "cntSellDESC", 1, 100, "RUR", "ru-RU")
if err != nil {
log.Fatal(err)
}
json.NewEncoder(os.Stdout).Encode(goods)
}
}
func filterBySubstring(substring string, categories plati.Categories) plati.Categories {
out := make(plati.Categories, 0)
for _, c := range categories {
if len(c.Children) > 0 {
out = append(out, filterBySubstring(substring, c.Children)...)
continue
}
if strings.Contains(c.FlatName, substring) {
out = append(out, c)
}
}
return out
}
func printNames(categories plati.Categories) {
for _, c := range categories {
if len(c.Children) > 0 {
printNames(c.Children)
continue
}
fmt.Printf("%s\n", c.FlatName)
}
}
func openCategories(fileName string) plati.CategoriesResponse {
f, err := os.OpenFile(fileName, os.O_RDONLY, 0400)
if err != nil {
panic(err)
}
defer f.Close()
out := plati.CategoriesResponse{}
if err := json.NewDecoder(f).Decode(&out); err != nil {
panic(err)
}
out.Content.FlatNames("")
out.Content.FixParentID(0)
return out
}

9
go.mod Normal file
View File

@ -0,0 +1,9 @@
module gitea.home.4it.me/dilap54/platiparser
go 1.21
require (
github.com/PuerkitoBio/goquery v1.8.1 // indirect
github.com/andybalholm/cascadia v1.3.1 // indirect
golang.org/x/net v0.7.0 // indirect
)

35
go.sum Normal file
View File

@ -0,0 +1,35 @@
github.com/PuerkitoBio/goquery v1.8.1 h1:uQxhNlArOIdbrH1tr0UXwdVFgDcZDrZVdcpygAcwmWM=
github.com/PuerkitoBio/goquery v1.8.1/go.mod h1:Q8ICL1kNUJ2sXGoAhPGUdYDJvgQgHzJsnnd3H7Ho5jQ=
github.com/andybalholm/cascadia v1.3.1 h1:nhxRkql1kdYCc8Snf7D5/D3spOX+dBgjA6u8x004T2c=
github.com/andybalholm/cascadia v1.3.1/go.mod h1:R4bJ1UQfqADjvDa4P6HZHLh/3OxWWEqc0Sk8XGwHqvA=
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
golang.org/x/net v0.0.0-20210916014120-12bc252f5db8/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
golang.org/x/net v0.7.0 h1:rJrUqqhjsgNp7KqAIc25s9pZnjU7TUcSY7HcVZjdn1g=
golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=

113
plati/categories.go Normal file
View File

@ -0,0 +1,113 @@
package plati
import (
"context"
"encoding/json"
"fmt"
"net/http"
)
type Categories []*Category
func (categories Categories) FlatNames(nameOfParent string) {
for _, c := range categories {
name := ""
if len(c.Name) > 0 {
name = c.Name[0].Value
}
if c.Title != "" {
name = c.Title + "/" + name
}
c.FlatName = fmt.Sprintf("%s/%s", nameOfParent, name)
if len(c.Children) > 0 {
c.Children.FlatNames(c.FlatName)
}
}
}
func (categories Categories) FixParentID(parentID int) {
for _, c := range categories {
if c.ParentID == 0 {
c.ParentID = parentID
}
c.Children.FixParentID(c.ID)
}
}
type Category struct {
FlatName string `json:"flat_name,omitempty"`
ID int `json:"id"`
Title string `json:"title"`
Level int `json:"level"`
ParentID int `json:"parent_id"`
Name []struct {
Locale string `json:"locale"`
Value string `json:"value"`
} `json:"name"`
Children Categories `json:"children"`
CanAdd bool `json:"can_add"`
}
type CategoriesResponse struct {
Retval int `json:"retval"`
Retdesc any `json:"retdesc"`
Errors any `json:"errors"`
Content Categories `json:"content"`
}
func (c *Client) GetCategories(ctx context.Context) (*CategoriesResponse, error) {
req, err := http.NewRequestWithContext(ctx, "GET", "https://api.digiseller.ru/api/dictionary/platforms/categories/plati", http.NoBody)
if err != nil {
return nil, fmt.Errorf("new request: %w", err)
}
req.Header.Add("Accept", "application/json")
resp, err := c.httpCli.Do(req)
if err != nil {
return nil, fmt.Errorf("http do: %w", err)
}
// dump, err := httputil.DumpResponse(resp, true)
// log.Printf("%d %s", resp.StatusCode, string(dump))
defer resp.Body.Close()
out := CategoriesResponse{}
if err := json.NewDecoder(resp.Body).Decode(&out); err != nil {
return nil, fmt.Errorf("json decode: %w", err)
}
return &out, nil
}
type SubCategories struct {
Retval int `json:"retval"`
Retdesc any `json:"retdesc"`
Errors any `json:"errors"`
Content []*Category `json:"content"`
}
func (c *Client) GetSubCategories(ctx context.Context, categoryID int) (*SubCategories, error) {
req, err := http.NewRequestWithContext(ctx, "GET", fmt.Sprintf("https://api.digiseller.ru/api/dictionary/platforms/subcategories/%d", categoryID), http.NoBody)
if err != nil {
return nil, fmt.Errorf("new request: %w", err)
}
req.Header.Add("Accept", "application/json")
resp, err := c.httpCli.Do(req)
if err != nil {
return nil, fmt.Errorf("http do: %w", err)
}
// dump, err := httputil.DumpResponse(resp, true)
// log.Printf("%d %s", resp.StatusCode, string(dump))
defer resp.Body.Close()
out := SubCategories{}
if err := json.NewDecoder(resp.Body).Decode(&out); err != nil {
return nil, fmt.Errorf("json decode: %w", err)
}
return &out, nil
}

17
plati/client.go Normal file
View File

@ -0,0 +1,17 @@
package plati
import "net/http"
type Client struct {
httpCli *http.Client
token string
sellerID int
}
func New() *Client {
return &Client{
httpCli: &http.Client{},
token: "7C731D89FED84B479B89F24F81BB8AF2",
sellerID: 1209592,
}
}

103
plati/goodscategory.go Normal file
View File

@ -0,0 +1,103 @@
package plati
import (
"context"
"fmt"
"io"
"log"
"math/rand"
"net/http"
"net/http/httputil"
"strconv"
"strings"
"github.com/PuerkitoBio/goquery"
)
func (c *Client) GetBlockGoodsCategory(ctx context.Context, idC int, idR int, sort string, page int, rows int, curr string, lang string) ([]*Good, error) {
u := fmt.Sprintf("https://plati.market/asp/block_goods_category.asp?preorders=0&id_cb=0&id_c=%d&id_r=%d&sort=%s&page=%d&rows=%d&curr=%s&pp_only=false&lang=ru-RU&rnd=%f", idC, idR, sort, page, rows, curr, rand.Float32())
req, err := http.NewRequestWithContext(ctx, "GET", u, http.NoBody)
if err != nil {
return nil, fmt.Errorf("new request: %w", err)
}
dump, err := httputil.DumpRequestOut(req, false)
log.Printf("%s\n", string(dump))
resp, err := c.httpCli.Do(req)
if err != nil {
return nil, fmt.Errorf("http do: %w", err)
}
// dump, err = httputil.DumpResponse(resp, true)
// log.Printf("%d %s", resp.StatusCode, string(dump))
defer resp.Body.Close()
goods, err := parseGoodsCategory(resp.Body)
if err != nil {
return nil, fmt.Errorf("parse goods category: %w", err)
}
return goods, nil
}
type Good struct {
Name string
GoodLink string
Seller string
SellerLink string
SellerRating int
Sold int
Price float64
}
func parseGoodsCategory(r io.Reader) ([]*Good, error) {
doc, err := goquery.NewDocumentFromReader(r)
if err != nil {
return nil, fmt.Errorf("goquery new document: %w", err)
}
goods := make([]*Good, 0)
doc.Find("table.goods-table-category>tbody>tr").Each(func(i int, tr *goquery.Selection) {
good := Good{}
goods = append(goods, &good)
tr.Find("td").Each(func(i int, td *goquery.Selection) {
if td.HasClass("product-title") {
titleBlock := td.Find("a").First()
good.Name = titleBlock.Text()
good.GoodLink, _ = titleBlock.Attr("href")
}
if td.HasClass("product-merchant") {
titleBlock := td.Find("a").First()
good.Seller = titleBlock.Text()
good.SellerLink, _ = titleBlock.Attr("href")
spanBlock := td.Find("span").First()
sellerRating, err := strconv.ParseInt(spanBlock.Text(), 10, 64)
if err != nil {
log.Printf("cannot parse seller rating %s", spanBlock.Text())
}
good.SellerRating = int(sellerRating)
}
if td.HasClass("product-sold") {
sold, err := strconv.ParseInt(td.Text(), 10, 64)
if err != nil {
log.Printf("cannot parse product sold %s", td.Text())
}
good.Sold = int(sold)
}
if td.HasClass("product-price") {
productPriceText := strings.Fields(td.Text())[0]
productPrice, err := strconv.ParseFloat(productPriceText, 64)
if err != nil {
log.Printf("cannot parse product price %s: %v", td.Text(), err)
}
good.Price = productPrice
}
})
})
return goods, nil
}

View File

@ -0,0 +1,53 @@
package plati
import (
"bytes"
"encoding/json"
"io"
"reflect"
"testing"
)
func Test_parseGoodsCategory(t *testing.T) {
type args struct {
body io.Reader
}
tests := []struct {
name string
args args
want []*Good
wantErr bool
}{
{
name: "final_fantasy",
args: args{
body: bytes.NewReader([]byte(`<span class="GoodsBlock_oneline" id="GoodsBlock_1298"><a name=1298></a><div class="table_header clearfix"><h2 class="games-header"> <span><i class="platiru-loader" id="loader_1298"></i></span></h2></div><div><table class="goods-table goods-table-category"><thead><tr><th colspan="3" class="product-title">Название товара</td><th class="product-merchant"><div class="nowrap">Продавец <span class="rating_title">рейтинг</span></div></td><th class="product-sold">Продано</td><th class="product-price"><select onchange="return Goods_C('0',0, 1298,9901,'','cntSellDESC', 1, 100, this.value);"><option value=USD >USD</option><option value=RUR selected>RUB</option><option value=EUR >EUR</option><option value=UAH >UAH</option></select></div></tr></thead><td class="product-checkbox"><div><a class="product-to-notepad" onclick="return noteItem(4026129,0)"><i id="n4026129" class="unchecked" title="В закладки"></i></a></div></td><td class="product-chat"><div><i data-tooltip="m=chat" class="chat_online" onclick="return PopUp(197847, 0);"></i></a></div></td><td class="product-title"><div><a href="/itm/final-fantasy-type-0-hd-steam-gift-ru/4026129" title="FINAL FANTASY TYPE-0 HD (Steam Gift Россия)">FINAL FANTASY TYPE-0 HD (Steam Gift Россия)</a></div></td><td class="product-merchant" data-th='Продавец' class="product-merchant"><div><a href="/seller/igorderish/197847">IgorDeRish</a> <span>582</span></div></td><td class="product-sold" data-th='Продано' class="product-sold"><div>1</div></td><td data-th='Цена' class="product-price"><div class="product-price-inner"><div>498 </span>руб. </div></div></td></tr></table><div class="sort_wrapper"><div class="pages_nav" style="height:25px"></div></div></div>`)),
},
want: []*Good{
{
Name: "FINAL FANTASY TYPE-0 HD (Steam Gift Россия)",
GoodLink: "/itm/final-fantasy-type-0-hd-steam-gift-ru/4026129",
Seller: "IgorDeRish",
SellerLink: "/seller/igorderish/197847",
SellerRating: 582,
Sold: 1,
Price: 498.0,
},
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got, err := parseGoodsCategory(tt.args.body)
if (err != nil) != tt.wantErr {
t.Errorf("parseGoodsCategory() error = %v, wantErr %v", err, tt.wantErr)
return
}
if !reflect.DeepEqual(got, tt.want) {
gotJson, _ := json.Marshal(got)
wantJson, _ := json.Marshal(tt.want)
t.Errorf("parseGoodsCategory() = %s, want %s", string(gotJson), string(wantJson))
}
})
}
}