import json import requests from bs4 import BeautifulSoup import base64 import re import os from openpyxl import Workbook, load_workbook from datetime import datetime CATEGORY_TID_MAPPING = { 1: 8, 2: 6, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 3, 11: 3, 12: 6, 13: 4, 14: 6, 15: 6, 16: 2, 17: 6, 18: 6, 19: 6, 20: 6, 21: 7, 22: 6, 23: 6, 24: 6, 25: 6, 26: 6, 27: 9, 28: 6, 29: 1, 30: 6 } SUBCATEGORY_FIELD_MAPPING = { 1: ("field_sanatnew", 13), 2: ("field_khadamatnew", 9), 3: ("field_amlaknew", 15), 4: ("field_otomobilnew", 7), 5: ("field_lavazemnew", 19), 16: ("field_amozeshnew", 7), 21: ("field_bazarkarnew", 3), 27: ("field_ertebatnew", 15), 29: ("field_computernew", 15), } def extract_category_name(soup): link = soup.find("a", href=re.compile(r"^/category/")) return link.text.strip() if link else "" def get_category_id_from_name(name): mapping = { "صنعت": 1, "خدمات": 2, "املاک": 3, "اتومبیل": 4, "لوازم": 5, "مواد اولیه": 6, "بسته بندی": 7, "تجهیزات ساختمانی": 3, "لوازم یدکی": 9, "سوخت و مشتقات نفت": 10, "خدمات ساختمانی": 11, "خدمات صنعتی": 12, "خدمات خودرو": 13, "ایمنی و حفاظت": 14, "مواد شیمیایی": 15, "آموزش": 16, "حمل و نقل": 17, "تجهیزات صنعتی": 18, "خدمات مجالس": 19, "تاسیسات": 20, "بازار کار": 21, "تجهیزات پزشکی": 22, "ضایعات": 23, "آهن و فلزات": 24, "دکوراسیون": 25, "خدمات منزل": 26, "ارتباط": 27, "امور شرکتها": 28, "کامپیوتر": 29, "لوازم خانگی": 30 } return mapping.get(name, 1) def extract_keywords(soup): meta = soup.find("meta", attrs={"name": "keywords"}) if not meta or "content" not in meta.attrs: return [] return [k.strip() for k in meta["content"].split(",") if k.strip()] def extract_clean_body(html): soup = BeautifulSoup(html, "html.parser") p = soup.find("p", attrs={"x-ref": "content"}) if not p: return "بدون شرح" for tag in p.find_all(): if tag.name != "br": tag.unwrap() for br in p.find_all("br"): br.replace_with("\n") text = p.get_text() text = re.sub(r'\n{3,}', '\n\n', text) return "\n".join(line.strip() for line in text.strip().splitlines()) def extract_phones_from_html(html): phone_regex = r'/contact/directcall/\w+/(\d{11})' phones = re.findall(phone_regex, html) return list(set(phones)) def extract_ad_data(url): try: r = requests.get(url) r.raise_for_status() html = r.text soup = BeautifulSoup(html, "html.parser") title = soup.find("h1").get_text(strip=True) if soup.find("h1") else "بدون عنوان" body = extract_clean_body(html) phones = extract_phones_from_html(html) contact = phones[0] if phones else "" whatsapp = phones[1] if len(phones) > 1 else "" name_tag = soup.find("div", class_="pb-3") name = name_tag.get_text(strip=True) if name_tag else "نامشخص" img_b64, filename = None, "" img = soup.find("img", class_="object-cover") if img and img.get("src"): img_url = img["src"] if not img_url.startswith("http"): img_url = "https://www.istgah.com" + img_url img_data = requests.get(img_url, timeout=10).content img_b64 = base64.b64encode(img_data).decode("utf-8") filename = os.path.basename(img_url.split("?")[0]) if not filename or "." not in filename: filename = "default.jpg" cat_id = get_category_id_from_name(extract_category_name(soup)) field_group = CATEGORY_TID_MAPPING.get(cat_id, 8) kws_str = ", ".join(extract_keywords(soup)) kws_str = kws_str[:255] # محدودیت طول کلیدواژه ad_data = { "title": title, "body": body, "field_noeagahi": "1", "field_grouh": field_group, "field_mogheiat": 13, "field_telephone": contact, "field_whatsapp": whatsapp, "field_address": "تهران", "field_naamshakhshya": name, "field_tagsapi2": kws_str, } if img_b64: ad_data["field_aks2"] = img_b64 ad_data["field_aks2_filename"] = filename ad_data["field_aks2_filemime"] = "image/jpeg" if cat_id in SUBCATEGORY_FIELD_MAPPING: fld, tid = SUBCATEGORY_FIELD_MAPPING[cat_id] ad_data[fld] = tid return ad_data except Exception as e: print(f"❌ خطا در پردازش {url}: {e}") return None def send_ad(ad_data): api_url = "https://sabtha.com/apiiau/insert-ad?key=223456" try: resp = requests.post(api_url, json=ad_data) return resp.status_code, resp.text except Exception as e: return 0, str(e) # توابع کمکی فایل‌ها و گزارش (load/save links, report) اگر نیاز دارید اضافه کنید if __name__ == "__main__": # تست نمونه ارسال آگهی test_url = "https://www.istgah.com/detail/..." ad = extract_ad_data(test_url) if ad: status_code, response = send_ad(ad) print(f"Response Code: {status_code}") print(response) چاپ برچسب پی وی سی | sabtha.com

خانه > کلیدواژه > چاپ برچسب پی وی سی

آگهی های ویژه