dom = etree.HTML(source) id = dom.xpath('//link[@rel="canonical"]/@href')[0] title = dom.xpath('//title/text()')[0] price = dom.xpath('//span[@class="unitPriceValue"]/text()')[0] information = dict(re.compile('<li><span class="label">(.*?)</span>(.*?)</li>').findall(source)) #s = re.compile('<li><span class="label">(.*?)</span>(.*?)</li>').findall(source) ,s 位一个列表,每个元素位一个元组,每个元组有两个值。 #[('房屋户型', '3室2厅1厨3卫'), ('所在楼层', '高楼层 (共28层)'), ('建筑面积', '194.47㎡'), ('户型结构', '平层'), ('套内面积', '156.88㎡'), ('建筑类型', '板楼'), ('房屋朝向', '西南'), ('建筑结构', '钢混结构'), ('装修情况', '精装'), ('梯户比例', '两梯三户'), ('配备电梯', '有')] information.update(title=title, price=price, url=id) #把三者放入字典最后面 # print(information) asyncio.ensure_future(save_to_database(information, pool=pool)) #调用save_to_database()保持进数据库
async def save_to_database(information, pool): ''' 使用异步IO方式保存数据到mysql中 注:如果不存在数据表,则创建对应的表 ''' COLstr = '' # 列的字段 ROWstr = '' # 行字段 ColumnStyle = ' VARCHAR(255)' for key in information.keys(): COLstr = COLstr + ' ' + key + ColumnStyle + ',' ROWstr = (ROWstr + '"%s"' + ',') % (information[key]) # 异步IO方式插入数据库 async with pool.acquire() as conn: async with conn.cursor() as cur: try: await cur.execute("SELECT * FROM %s" % (TABLE_NAME)) await cur.execute("INSERT INTO %s VALUES (%s)"%(TABLE_NAME, ROWstr[:-1])) print('插入数据成功') except aiomysql.Error as e: await cur.execute("CREATE TABLE %s (%s)" % (TABLE_NAME, COLstr[:-1])) await cur.execute("INSERT INTO %s VALUES (%s)" % (TABLE_NAME, ROWstr[:-1])) except aiomysql.Error as e: print('mysql error %d: %s' % (e.args[0], e.args[1]))