From 03c95fe13a5f9747b76f2e29caa2b263ebaffd43 Mon Sep 17 00:00:00 2001 From: wasrusgen Date: Mon, 11 May 2026 23:02:37 +0300 Subject: [PATCH] =?UTF-8?q?wb:=20relevance=20filter=20=E2=80=94=20discard?= =?UTF-8?q?=20anti-bot=20trash=20products=20(=D0=BF=D0=BB=D0=B0=D1=82?= =?UTF-8?q?=D1=8C=D1=8F/=D0=BE=D0=B1=D1=83=D0=B2=D1=8C=20in=20fridge=20sea?= =?UTF-8?q?rch)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit WB sometimes responds with 1-2 unrelated products instead of 429 status. Was returning 'Платье вечернее' on 'Haier холодильник' query. Fix: _is_relevant(product, query) checks that at least 1 significant query word (>=3 chars) appears in product name or brand. Discards full result if zero matches. Tradeoff: may sometimes reject valid product if query is overly specific (e.g. exact SKU). But that's OK — we fall through to next query variant. --- backend-py/app/parsers/wb.py | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/backend-py/app/parsers/wb.py b/backend-py/app/parsers/wb.py index 7a87664..afbe6c9 100644 --- a/backend-py/app/parsers/wb.py +++ b/backend-py/app/parsers/wb.py @@ -130,7 +130,27 @@ def _search_wb_one(query: str, limit: int, timeout: float, max_retries: int) -> log.info("WB no products for query=%r", query) return [] - return [_build_item(p) for p in products[:limit]] + # WB anti-bot иногда возвращает 1-2 нерелевантных товара (платья, обувь) + # вместо 429. Фильтруем по релевантности к query. + relevant = [p for p in products if _is_relevant(p, query)] + if not relevant: + log.info("WB: %d products but none relevant to query=%r (anti-bot trash)", + len(products), query) + return [] + + return [_build_item(p) for p in relevant[:limit]] + + +def _is_relevant(product: dict, query: str) -> bool: + """Проверяем что товар реально соответствует поиску, а не WB anti-bot мусор.""" + name = (product.get("name") or "").lower() + brand = (product.get("brand") or "").lower() + q_words = [w.lower() for w in query.split() if len(w) >= 3] + if not q_words: + return True + # Хотя бы 1 значимое слово запроса должно быть в name или brand + matches = sum(1 for w in q_words if w in name or w in brand) + return matches >= 1 log.warning("WB gave up after %d attempts for query=%r", max_retries + 1, query) return []