python 的xpath写法

dom = etree.HTML(source)  # source是一个response.text(),是html源码，,source在main函数中定义
        # 以下为提取的项目，需要根据具体情况进行设置
        articleId = re.compile(articleIdPattern).findall(link)[0]
        title = dom.xpath(titlePattern)[0]
        content = dom.xpath(contentPattern)[0]
        content = tostring(content, encoding="utf-8", pretty_print=True, method="html").decode("utf-8")
        category = re.compile(categoryPattern).findall(title)[0]
        title = re.sub(r'_.*?_脚本之家', '', title)
        content = html_encode(content)  # 对html进行转码
        articleUrl = link
        crawlSign = 'Yes'
        publishSign = 'No'
        # 把各字段放入字典
        information = {}
        information.update(article_id=articleId, title=title, content=content, category=category, articleUrl=articleUrl,
                           crawlSign=crawlSign, publishSign=publishSign)
        # print(information)
        # 提取全部图片地址，保存到set（），另外把源码中的图片地址更改为新文件夹的地址
Post Views: 95