这是今天折腾自己的一个项目,,从数据采集–google翻译–wordpress发布,全部用python打通了。
最终代码如下:
3 | from bs4 import BeautifulSoup |
5 | from wordpress_xmlrpc import Client, WordPressPost, WordPressTerm |
6 | from wordpress_xmlrpc.methods.posts import GetPosts, NewPost |
7 | from wordpress_xmlrpc.methods.users import GetUserInfo |
8 | from wordpress_xmlrpc.methods import taxonomies |
13 | r = requests.get(url, timeout=30) |
17 | print ( "Get HTML Text Failed!" ) |
21 | def google_translate_EtoC(to_translate, from_language= "en" , to_language= "ch-CN" ): |
24 | url = base_url.format(to_language, from_language, to_translate) |
27 | html = getHTMLText(url) |
29 | soup = BeautifulSoup(html, "html.parser" ) |
33 | result = soup.find_all( "div" , { "class" : "t0" })[0].text |
35 | print ( "Translation Failed!" ) |
40 | headers = { 'Host' : 'www.zalora.com.hk' , |
41 | 'Connection' : 'keep-alive' , |
42 | 'Cache-Control' : 'max-age=0' , |
43 | 'Accept' : 'text/html, */*; q=0.01' , |
44 | 'X-Requested-With' : 'XMLHttpRequest' , |
45 | 'User-Agent' : 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.89 Safari/537.36' , |
48 | 'Accept-Encoding' : 'gzip, deflate, sdch' , |
49 | 'Accept-Language' : 'zh-CN,zh;q=0.8,ja;q=0.6' |
53 | table = file.add_sheet( 'info' , cell_overwrite_ok=True) |
57 | with open( "urllist2.txt" , "r" ) as f: |
58 | for line in f.readlines(): |
60 | res = requests.get(url, headers=headers) |
61 | json_data = json.loads(res.text) |
62 | sku = json_data[ 'data' ][ 'sku_config' ] |
63 | name = json_data[ 'data' ][ 'product_name' ] |
64 | catogery = json_data[ 'data' ][ 'bread_crumb' ][2][ 'value' ] |
65 | price = json_data[ 'data' ][ 'price' ].replace( "HK$" , "" ) |
66 | productDesc = json_data[ 'data' ][ 'short_description' ] |
67 | color = json_data[ 'data' ][ 'attributes' ][1][ 'value' ] |
68 | Care_label = json_data[ 'data' ][ 'attributes' ][2][ 'value' ] |
69 | model_body = json_data[ 'data' ][ 'size_attributes' ][0][ 'value' ] |
70 | model_garment = json_data[ 'data' ][ 'size_attributes' ][1][ 'value' ] |
71 | size = json_data[ 'data' ][ 'size_attributes' ][2][ 'value' ] |
72 | image = json_data[ 'data' ][ 'product_images' ][3][ 'product_image' ] |
73 | returnable = json_data[ 'data' ][ 'return_info_text' ] |
78 | post = WordPressPost() |
79 | post.title = google_translate_EtoC(name) |
80 | post.content = catogery+price+google_translate_EtoC(productDesc) |
81 | post.post_status = 'publish' |
83 | 'post_tag' : [ 'test' , 'firstpost' ], |
84 | 'category' : [ ' Titika' , 'Titika' ] |
87 | wp.call(NewPost(post)) |