采集商品发布到wordpress

这是今天折腾自己的一个项目,,从数据采集–google翻译–wordpress发布,全部用python打通了。

最终代码如下:

1 import requests
2 import json
3 from bs4 import BeautifulSoup
4 import xlwt
5 from wordpress_xmlrpc import Client, WordPressPost, WordPressTerm
6 from wordpress_xmlrpc.methods.posts import GetPosts, NewPost
7 from wordpress_xmlrpc.methods.users import GetUserInfo
8 from wordpress_xmlrpc.methods import taxonomies
9 import csv
10
11 def getHTMLText(url):
12     try:
13         r = requests.get(url, timeout=30)
14         r.raise_for_status()
15         return r.text
16     except:
17         print("Get HTML Text Failed!")
18         return 0
19
20
21 def google_translate_EtoC(to_translate, from_language="en", to_language="ch-CN"):
22     # 根据参数生产提交的网址
23     base_url = "https://translate.google.cn/m?hl={}&sl={}&ie=UTF-8&q={}"
24     url = base_url.format(to_language, from_language, to_translate)
25
26     # 获取网页
27     html = getHTMLText(url)
28     if html:
29         soup = BeautifulSoup(html, "html.parser")
30
31     # 解析网页得到翻译结果
32     try:
33         result = soup.find_all("div", {"class": "t0"})[0].text
34     except:
35         print("Translation Failed!")
36         result = ""
37
38     return result
39
40 headers = { 'Host':'www.zalora.com.hk',
41                     'Connection':'keep-alive',
42                     'Cache-Control':'max-age=0',
43                     'Accept': 'text/html, */*; q=0.01',
44                     'X-Requested-With': 'XMLHttpRequest',
45                     'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.89 Safari/537.36',
46                     'DNT':'1',
47                     'Referer': 'http://example.com/',
48                     'Accept-Encoding': 'gzip, deflate, sdch',
49                     'Accept-Language': 'zh-CN,zh;q=0.8,ja;q=0.6'
50 }
51
52 file = xlwt.Workbook()
53 table = file.add_sheet('info', cell_overwrite_ok=True)
54 x=0
55 wp = Client('http://www.xxx.cc/xmlrpc.php', 'user', 'pass')
57 with open("urllist2.txt", "r") as f:
58     for line in f.readlines():
59         url = base_url+line
60         res = requests.get(url, headers=headers)
61         json_data = json.loads(res.text)
62         sku = json_data['data']['sku_config']
63         name = json_data['data']['product_name']
64         catogery = json_data['data']['bread_crumb'][2]['value']
65         price = json_data['data']['price'].replace("HK$","")
66         productDesc = json_data['data']['short_description']
67         color = json_data['data']['attributes'][1]['value']
68         Care_label = json_data['data']['attributes'][2]['value']
69         model_body = json_data['data']['size_attributes'][0]['value']
70         model_garment = json_data['data']['size_attributes'][1]['value']
71         size = json_data['data']['size_attributes'][2]['value']
72         image = json_data['data']['product_images'][3]['product_image']
73         returnable = json_data['data']['return_info_text']       
74     # file.save('02.xls')
75         """
76         发表博文
77         """
78         post = WordPressPost()
79         post.title = google_translate_EtoC(name)
80         post.content = catogery+price+google_translate_EtoC(productDesc)
81         post.post_status = 'publish'
82         post.terms_names = {
83             'post_tag': ['test', 'firstpost'],
84             'category': [' Titika', 'Titika']
85         }
86
87         wp.call(NewPost(post))

本文暂无标签