dom = etree.HTML(source)
id = dom.xpath('//link[@rel="canonical"]/@href')[0]
title = dom.xpath('//title/text()')[0]
price = dom.xpath('//span[@class="unitPriceValue"]/text()')[0]
information = dict(re.compile('<li><span class="label">(.*?)</span>(.*?)</li>').findall(source))
#s = re.compile('<li><span class="label">(.*?)</span>(.*?)</li>').findall(source) ,s 位一个列表,每个元素位一个元组,每个元组有两个值。
#[('房屋户型', '3室2厅1厨3卫'), ('所在楼层', '高楼层 (共28层)'), ('建筑面积', '194.47㎡'), ('户型结构', '平层'), ('套内面积', '156.88㎡'), ('建筑类型', '板楼'), ('房屋朝向', '西南'), ('建筑结构', '钢混结构'), ('装修情况', '精装'), ('梯户比例', '两梯三户'), ('配备电梯', '有')]
information.update(title=title, price=price, url=id) #把三者放入字典最后面
# print(information)
asyncio.ensure_future(save_to_database(information, pool=pool)) #调用save_to_database()保持进数据库
async def save_to_database(information, pool):
'''
使用异步IO方式保存数据到mysql中
注:如果不存在数据表,则创建对应的表
'''
COLstr = '' # 列的字段
ROWstr = '' # 行字段
ColumnStyle = ' VARCHAR(255)'
for key in information.keys():
COLstr = COLstr + ' ' + key + ColumnStyle + ','
ROWstr = (ROWstr + '"%s"' + ',') % (information[key])
# 异步IO方式插入数据库
async with pool.acquire() as conn:
async with conn.cursor() as cur:
try:
await cur.execute("SELECT * FROM %s" % (TABLE_NAME))
await cur.execute("INSERT INTO %s VALUES (%s)"%(TABLE_NAME, ROWstr[:-1]))
print('插入数据成功')
except aiomysql.Error as e:
await cur.execute("CREATE TABLE %s (%s)" % (TABLE_NAME, COLstr[:-1]))
await cur.execute("INSERT INTO %s VALUES (%s)" % (TABLE_NAME, ROWstr[:-1]))
except aiomysql.Error as e:
print('mysql error %d: %s' % (e.args[0], e.args[1]))
Post Views: 8