mirror of
https://github.com/wasrusgen/zov-tech.git
synced 2026-06-03 15:04:50 +00:00
wb: relevance filter — discard anti-bot trash products (платья/обувь in fridge search)
WB sometimes responds with 1-2 unrelated products instead of 429 status. Was returning 'Платье вечернее' on 'Haier холодильник' query. Fix: _is_relevant(product, query) checks that at least 1 significant query word (>=3 chars) appears in product name or brand. Discards full result if zero matches. Tradeoff: may sometimes reject valid product if query is overly specific (e.g. exact SKU). But that's OK — we fall through to next query variant.
This commit is contained in:
parent
d84a53f91d
commit
03c95fe13a
@ -130,7 +130,27 @@ def _search_wb_one(query: str, limit: int, timeout: float, max_retries: int) ->
|
||||
log.info("WB no products for query=%r", query)
|
||||
return []
|
||||
|
||||
return [_build_item(p) for p in products[:limit]]
|
||||
# WB anti-bot иногда возвращает 1-2 нерелевантных товара (платья, обувь)
|
||||
# вместо 429. Фильтруем по релевантности к query.
|
||||
relevant = [p for p in products if _is_relevant(p, query)]
|
||||
if not relevant:
|
||||
log.info("WB: %d products but none relevant to query=%r (anti-bot trash)",
|
||||
len(products), query)
|
||||
return []
|
||||
|
||||
return [_build_item(p) for p in relevant[:limit]]
|
||||
|
||||
|
||||
def _is_relevant(product: dict, query: str) -> bool:
|
||||
"""Проверяем что товар реально соответствует поиску, а не WB anti-bot мусор."""
|
||||
name = (product.get("name") or "").lower()
|
||||
brand = (product.get("brand") or "").lower()
|
||||
q_words = [w.lower() for w in query.split() if len(w) >= 3]
|
||||
if not q_words:
|
||||
return True
|
||||
# Хотя бы 1 значимое слово запроса должно быть в name или brand
|
||||
matches = sum(1 for w in q_words if w in name or w in brand)
|
||||
return matches >= 1
|
||||
|
||||
log.warning("WB gave up after %d attempts for query=%r", max_retries + 1, query)
|
||||
return []
|
||||
|
||||
Loading…
Reference in New Issue
Block a user