Python之子域名查询

  1. 根据域名查询全部相关子域名(因为搜索引擎限制,默认只查询了2页,不超过20条信息)
  2. 并将查询到的子域名和子域名标题进行组装后返回

实现代码

#!/usr/bin/env python
# -*- coding: utf-8 -*-

import requests
from bs4 import BeautifulSoup
from fake_useragent import UserAgent


def __get_http_headers():
    """
    根据不同浏览器组装header请求信息
    :return: header
    """
    try:
        ua = UserAgent()
        header = {
            "User-Agent": ua.random,
            "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8,ja;q=0.7,zh-TW;q=0.6"
        }
    except Exception as e:
        header = {
            "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36"}
    return header


def sub_domain_lookup(domain, resolve_count=2):
    """
    子域名解析(默认解析2页)
    :param domain: 域名
    :param resolve_count: 解析页数
    :return: 关联子域名
    """
    domain_list = {}
    for n in range(1, resolve_count):
        j = 1
        if n > 1:
            j = n * 10 - 1
        params = {
            "q": domain,
            "go": "搜索",
            "qs": "n",
            "form": "QBLH",
            "first": j
        }
        cookies = {
            "SRCHHPGUSR": "NRSLT=50"
        }
        res = requests.get("https://cn.bing.com/search", headers=__get_http_headers(), params=params, cookies=cookies)
        resp = res.content
        # BeautifulSoup匹配标题
        html = BeautifulSoup(resp, "html.parser")
        h2_list = html.find_all("h2", {"class": ""})
        for h2 in h2_list:
            if h2.find("a") is not None:
                href = h2.find("a")["href"]
                href = href.split("?")[0]
                domain_list[href] = h2.find("a").text
    return domain_list


if __name__ == '__main__':
    __domain = "baidu.com"
    resolve_result = sub_domain_lookup(domain=__domain)
    print(resolve_result)